lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lyrics_transcriber/cli/cli_main.py +7 -0
  2. lyrics_transcriber/core/config.py +1 -0
  3. lyrics_transcriber/core/controller.py +30 -52
  4. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  5. lyrics_transcriber/correction/corrector.py +224 -107
  6. lyrics_transcriber/correction/handlers/base.py +28 -10
  7. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  8. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  9. lyrics_transcriber/correction/handlers/llm.py +290 -0
  10. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  11. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  12. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  13. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  14. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  15. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  16. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  17. lyrics_transcriber/correction/text_utils.py +3 -7
  18. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  19. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  20. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  21. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  22. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  23. lyrics_transcriber/frontend/dist/index.html +1 -1
  24. lyrics_transcriber/frontend/package.json +6 -2
  25. lyrics_transcriber/frontend/src/App.tsx +18 -2
  26. lyrics_transcriber/frontend/src/api.ts +103 -6
  27. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  28. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  30. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  31. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  33. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  35. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  36. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  37. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  38. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  39. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  40. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  41. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  42. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  43. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  44. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  45. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  47. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  48. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  49. lyrics_transcriber/frontend/src/types.js +2 -0
  50. lyrics_transcriber/frontend/src/types.ts +70 -49
  51. lyrics_transcriber/frontend/src/validation.ts +132 -0
  52. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  53. lyrics_transcriber/frontend/yarn.lock +3752 -0
  54. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  55. lyrics_transcriber/lyrics/file_provider.py +6 -5
  56. lyrics_transcriber/lyrics/genius.py +5 -2
  57. lyrics_transcriber/lyrics/spotify.py +58 -21
  58. lyrics_transcriber/output/ass/config.py +16 -5
  59. lyrics_transcriber/output/cdg.py +8 -8
  60. lyrics_transcriber/output/generator.py +29 -14
  61. lyrics_transcriber/output/plain_text.py +15 -10
  62. lyrics_transcriber/output/segment_resizer.py +16 -3
  63. lyrics_transcriber/output/subtitles.py +56 -2
  64. lyrics_transcriber/output/video.py +107 -1
  65. lyrics_transcriber/review/__init__.py +0 -1
  66. lyrics_transcriber/review/server.py +337 -164
  67. lyrics_transcriber/transcribers/audioshake.py +3 -0
  68. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  69. lyrics_transcriber/transcribers/whisper.py +11 -1
  70. lyrics_transcriber/types.py +151 -105
  71. lyrics_transcriber/utils/word_utils.py +27 -0
  72. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  73. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
  74. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  75. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  76. lyrics_transcriber/frontend/package-lock.json +0 -4260
  77. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  78. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  79. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,14 @@
1
1
  from dataclasses import dataclass
2
2
  import logging
3
- from typing import Optional, Dict, Any
3
+ from typing import Optional, Dict, Any, List
4
4
  import json
5
5
  import hashlib
6
6
  from pathlib import Path
7
7
  import os
8
8
  from abc import ABC, abstractmethod
9
- from lyrics_transcriber.types import LyricsData
9
+ from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
10
10
  from karaoke_lyrics_processor import KaraokeLyricsProcessor
11
+ from lyrics_transcriber.utils.word_utils import WordUtils
11
12
 
12
13
 
13
14
  @dataclass
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
41
42
 
42
43
  # Use artist and title for cache key instead of audio file hash
43
44
  cache_key = self._get_artist_title_hash(artist, title)
44
- raw_cache_path = self._get_cache_path(cache_key, "raw")
45
45
 
46
- # Try to load from cache first
46
+ # Check converted cache first
47
+ converted_cache_path = self._get_cache_path(cache_key, "converted")
48
+ converted_data = self._load_from_cache(converted_cache_path)
49
+ if converted_data:
50
+ self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
51
+ return LyricsData.from_dict(converted_data)
52
+
53
+ # Check raw cache next
54
+ raw_cache_path = self._get_cache_path(cache_key, "raw")
47
55
  raw_data = self._load_from_cache(raw_cache_path)
48
- if raw_data is not None:
49
- self.logger.info(f"Using cached lyrics for {artist} - {title}")
50
- return self._save_and_convert_result(cache_key, raw_data)
56
+ if raw_data:
57
+ self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
58
+ converted_result = self._convert_result_format(raw_data)
59
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
60
+ return converted_result
51
61
 
52
62
  # If not in cache, fetch from source
53
63
  raw_result = self._fetch_data_from_source(artist, title)
54
64
  if raw_result:
55
65
  # Save raw API response
56
66
  self._save_to_cache(raw_cache_path, raw_result)
57
- return self._save_and_convert_result(cache_key, raw_result)
67
+ converted_result = self._convert_result_format(raw_result)
68
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
69
+ return converted_result
58
70
 
59
71
  return None
60
72
 
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
100
112
  self.logger.warning(f"Cache file {cache_path} is corrupted")
101
113
  return None
102
114
 
115
+ def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
116
+ """Create LyricsSegment objects with properly formatted words from text.
117
+
118
+ Args:
119
+ text: Raw lyrics text
120
+ is_synced: Whether timing information is available
121
+
122
+ Returns:
123
+ List of LyricsSegment objects with unique IDs and Word objects
124
+ """
125
+ segments = []
126
+ lines = text.strip().split("\n")
127
+
128
+ for line in lines:
129
+ if not line.strip():
130
+ continue
131
+
132
+ # Split line into words
133
+ word_texts = line.strip().split()
134
+ if not word_texts:
135
+ continue
136
+
137
+ words = []
138
+ for word_text in word_texts:
139
+ word = Word(
140
+ id=WordUtils.generate_id(),
141
+ text=word_text,
142
+ start_time=0.0 if is_synced else None,
143
+ end_time=0.0 if is_synced else None,
144
+ confidence=1.0, # Reference lyrics are considered ground truth
145
+ created_during_correction=False,
146
+ )
147
+ words.append(word)
148
+
149
+ segment = LyricsSegment(
150
+ id=WordUtils.generate_id(),
151
+ text=line.strip(),
152
+ words=words,
153
+ start_time=words[0].start_time if is_synced else None,
154
+ end_time=words[-1].end_time if is_synced else None,
155
+ )
156
+ segments.append(segment)
157
+
158
+ return segments
159
+
103
160
  def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
104
- """Process lyrics using KaraokeLyricsProcessor."""
161
+ """Process lyrics using KaraokeLyricsProcessor and create proper segments."""
162
+ # Concatenate all segment texts to get the full lyrics
163
+ full_lyrics = lyrics_data.get_full_text()
164
+
105
165
  processor = KaraokeLyricsProcessor(
106
166
  log_level=self.logger.getEffectiveLevel(),
107
167
  log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
108
- input_lyrics_text=lyrics_data.lyrics,
168
+ input_lyrics_text=full_lyrics,
109
169
  max_line_length=self.max_line_length,
110
170
  )
111
171
  processed_text = processor.process()
112
172
 
113
- # Create new LyricsData with processed text
114
- return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
173
+ # Create segments with words from processed text
174
+ segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
175
+
176
+ # Create new LyricsData with processed text and segments
177
+ return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
115
178
 
116
179
  def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
117
180
  """Convert raw result to standardized format, process lyrics, save to cache, and return."""
@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
67
67
  self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
68
68
 
69
69
  try:
70
- # Create metadata object like Genius provider does
70
+ # Create metadata object
71
71
  metadata = LyricsMetadata(
72
72
  source="file",
73
73
  track_name=self.title,
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
78
78
  provider_metadata={"filepath": raw_data["filepath"]},
79
79
  )
80
80
 
81
- lyrics_data = LyricsData(
82
- source="file", lyrics=raw_data["text"], segments=[], metadata=metadata # No timing information from file
83
- )
84
- self.logger.debug(f"Created LyricsData object: {lyrics_data}")
81
+ # Create segments with words from the processed text
82
+ segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
83
+
84
+ lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
85
+ self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
85
86
  return lyrics_data
86
87
 
87
88
  except Exception as e:
@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
77
77
  },
78
78
  )
79
79
 
80
- # Create result object
81
- return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
80
+ # Create segments with words from cleaned lyrics
81
+ segments = self._create_segments_with_words(lyrics, is_synced=False)
82
+
83
+ # Create result object with segments
84
+ return LyricsData(source="genius", segments=segments, metadata=metadata)
82
85
 
83
86
  def _clean_lyrics(self, lyrics: str) -> str:
84
87
  """Clean and process lyrics from Genius to remove unwanted content."""
@@ -1,9 +1,11 @@
1
1
  import logging
2
2
  from typing import Optional, Dict, Any
3
3
  import syrics.api
4
+ import time
4
5
 
5
- from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
6
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
6
7
  from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
8
+ from lyrics_transcriber.utils.word_utils import WordUtils
7
9
 
8
10
 
9
11
  class SpotifyProvider(BaseLyricsProvider):
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
12
14
  def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
13
15
  super().__init__(config, logger)
14
16
  self.cookie = config.spotify_cookie
15
- self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
17
+ self.client = None
18
+
19
+ if self.cookie:
20
+ max_retries = 5
21
+ retry_delay = 5 # seconds
22
+
23
+ for attempt in range(max_retries):
24
+ try:
25
+ self.client = syrics.api.Spotify(self.cookie)
26
+ break # Successfully initialized
27
+ except Exception as e:
28
+ if attempt == max_retries - 1: # Last attempt
29
+ self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
30
+ break
31
+ self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
32
+ time.sleep(retry_delay)
16
33
 
17
34
  def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
18
35
  """Fetch raw data from Spotify APIs using syrics library."""
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
45
62
  track_data = raw_data["track_data"]
46
63
  lyrics_data = raw_data["lyrics_data"]["lyrics"]
47
64
 
48
- # Convert raw lines to LyricsSegment objects
49
- segments = []
50
- for line in lyrics_data.get("lines", []):
51
- if not line.get("words"):
52
- continue
53
-
54
- # Skip lines that are just musical notes
55
- if not self._clean_lyrics(line["words"]):
56
- continue
57
-
58
- segment = LyricsSegment(
59
- text=line["words"],
60
- words=[], # TODO: Could potentially split words if needed
61
- start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
62
- end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
63
- )
64
- segments.append(segment)
65
-
66
65
  # Create metadata object
67
66
  metadata = LyricsMetadata(
68
67
  source="spotify",
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
83
82
  },
84
83
  )
85
84
 
86
- return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
85
+ # Create segments with timing information
86
+ segments = []
87
+ for line in lyrics_data.get("lines", []):
88
+ if not line.get("words"):
89
+ continue
90
+
91
+ # Skip lines that are just musical notes
92
+ if not self._clean_lyrics(line["words"]):
93
+ continue
94
+
95
+ # Split line into words
96
+ word_texts = line["words"].strip().split()
97
+ if not word_texts:
98
+ continue
99
+
100
+ # Calculate approximate timing for each word
101
+ start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
102
+ end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
103
+ duration = end_time - start_time
104
+ word_duration = duration / len(word_texts)
105
+
106
+ words = []
107
+ for i, word_text in enumerate(word_texts):
108
+ word = Word(
109
+ id=WordUtils.generate_id(),
110
+ text=word_text,
111
+ start_time=start_time + (i * word_duration),
112
+ end_time=start_time + ((i + 1) * word_duration),
113
+ confidence=1.0,
114
+ created_during_correction=False,
115
+ )
116
+ words.append(word)
117
+
118
+ segment = LyricsSegment(
119
+ id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
120
+ )
121
+ segments.append(segment)
122
+
123
+ return LyricsData(source="spotify", segments=segments, metadata=metadata)
87
124
 
88
125
  def _clean_lyrics(self, lyrics: str) -> str:
89
126
  """Clean and process lyrics from Spotify to remove unwanted content."""
@@ -4,7 +4,18 @@ from dataclasses import dataclass
4
4
  class ScreenConfig:
5
5
  """Configuration for screen timing and layout."""
6
6
 
7
- def __init__(self, line_height: int = 50, max_visible_lines: int = 4, top_padding: int = None, video_width: int = 640, video_height: int = 360):
7
+ def __init__(
8
+ self,
9
+ line_height: int = 50,
10
+ max_visible_lines: int = 4,
11
+ top_padding: int = None,
12
+ video_width: int = 640,
13
+ video_height: int = 360,
14
+ screen_gap_threshold: float = 5.0,
15
+ post_roll_time: float = 1.0,
16
+ fade_in_ms: int = 200,
17
+ fade_out_ms: int = 300,
18
+ ):
8
19
  # Screen layout
9
20
  self.max_visible_lines = max_visible_lines
10
21
  self.line_height = line_height
@@ -12,10 +23,10 @@ class ScreenConfig:
12
23
  self.video_height = video_height
13
24
  self.video_width = video_width
14
25
  # Timing configuration
15
- self.screen_gap_threshold = 5.0
16
- self.post_roll_time = 1.0
17
- self.fade_in_ms = 200
18
- self.fade_out_ms = 300
26
+ self.screen_gap_threshold = screen_gap_threshold
27
+ self.post_roll_time = post_roll_time
28
+ self.fade_in_ms = fade_in_ms
29
+ self.fade_out_ms = fade_out_ms
19
30
 
20
31
 
21
32
  @dataclass
@@ -126,7 +126,7 @@ class CDGGenerator:
126
126
  cdg_styles: dict,
127
127
  ) -> str:
128
128
  """Create TOML configuration file for CDG generation."""
129
- safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "toml")
129
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke", "toml")
130
130
  toml_file = os.path.join(self.output_dir, safe_filename)
131
131
  self.logger.debug(f"Generating TOML file: {toml_file}")
132
132
 
@@ -161,7 +161,7 @@ class CDGGenerator:
161
161
  title=title,
162
162
  artist=artist,
163
163
  audio_file=audio_file,
164
- output_name=f"{artist} - {title} (Karaoke CDG)",
164
+ output_name=f"{artist} - {title} (Karaoke)",
165
165
  sync_times=sync_times,
166
166
  instrumentals=instrumentals,
167
167
  formatted_lyrics=formatted_lyrics,
@@ -190,11 +190,11 @@ class CDGGenerator:
190
190
  """Compose CDG using KaraokeComposer."""
191
191
  kc = KaraokeComposer.from_file(toml_file)
192
192
  kc.compose()
193
- kc.create_mp4(height=1080, fps=30)
193
+ # kc.create_mp4(height=1080, fps=30)
194
194
 
195
195
  def _find_cdg_zip(self, artist: str, title: str) -> str:
196
196
  """Find the generated CDG ZIP file."""
197
- safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "zip")
197
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke", "zip")
198
198
  output_zip = os.path.join(self.output_dir, safe_filename)
199
199
 
200
200
  self.logger.info(f"Looking for CDG ZIP file in output directory: {output_zip}")
@@ -216,12 +216,12 @@ class CDGGenerator:
216
216
 
217
217
  def _get_cdg_path(self, artist: str, title: str) -> str:
218
218
  """Get the path to the CDG file."""
219
- safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "cdg")
219
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke", "cdg")
220
220
  return os.path.join(self.output_dir, safe_filename)
221
221
 
222
222
  def _get_mp3_path(self, artist: str, title: str) -> str:
223
223
  """Get the path to the MP3 file."""
224
- safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "mp3")
224
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke", "mp3")
225
225
  return os.path.join(self.output_dir, safe_filename)
226
226
 
227
227
  def _verify_output_files(self, cdg_file: str, mp3_file: str) -> None:
@@ -376,7 +376,7 @@ class CDGGenerator:
376
376
  cdg_styles: dict,
377
377
  ) -> dict:
378
378
  """Create TOML data structure."""
379
- safe_output_name = self._get_safe_filename(artist, title, "Karaoke CDG")
379
+ safe_output_name = self._get_safe_filename(artist, title, "Karaoke")
380
380
  return {
381
381
  "title": title,
382
382
  "artist": artist,
@@ -496,7 +496,7 @@ class CDGGenerator:
496
496
  text = text[1:]
497
497
 
498
498
  current_line += text + " "
499
- self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
499
+ # self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
500
500
 
501
501
  is_last_before_instrumental = any(
502
502
  inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
@@ -78,6 +78,7 @@ class OutputGenerator:
78
78
  font_size=self.font_size,
79
79
  line_height=self.line_height,
80
80
  styles=self.config.styles,
81
+ subtitle_offset_ms=self.config.subtitle_offset_ms,
81
82
  logger=self.logger,
82
83
  )
83
84
 
@@ -96,28 +97,42 @@ class OutputGenerator:
96
97
  def generate_outputs(
97
98
  self,
98
99
  transcription_corrected: Optional[CorrectionResult],
99
- lyrics_results: List[LyricsData],
100
+ lyrics_results: dict[str, LyricsData],
100
101
  output_prefix: str,
101
102
  audio_filepath: str,
102
103
  artist: Optional[str] = None,
103
104
  title: Optional[str] = None,
105
+ preview_mode: bool = False,
104
106
  ) -> OutputPaths:
105
107
  """Generate all requested output formats."""
106
108
  outputs = OutputPaths()
107
109
 
108
110
  try:
109
- # Generate plain lyrics files for each provider
110
- for lyrics_data in lyrics_results:
111
- self.plain_text.write_lyrics(lyrics_data, output_prefix)
112
-
113
111
  # Only process transcription-related outputs if we have transcription data
114
112
  if transcription_corrected:
115
- # Write original (uncorrected) transcription
116
- outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
117
113
 
118
- # Resize corrected segments to ensure none are longer than max_line_length
114
+ # Resize corrected segments
119
115
  resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
120
116
  transcription_corrected.resized_segments = resized_segments
117
+
118
+ # For preview, we only need to generate ASS and video
119
+ if preview_mode:
120
+ # Generate ASS subtitles for preview
121
+ outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
122
+
123
+ # Generate preview video
124
+ outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
125
+
126
+ return outputs
127
+
128
+ # Normal output generation (non-preview mode)
129
+ # Generate plain lyrics files for each provider
130
+ for name, lyrics_data in lyrics_results.items():
131
+ self.plain_text.write_lyrics(lyrics_data, output_prefix)
132
+
133
+ # Write original (uncorrected) transcription
134
+ outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
135
+
121
136
  outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
122
137
 
123
138
  # Write corrected lyrics as plain text
@@ -161,12 +176,12 @@ class OutputGenerator:
161
176
  "720p": (1280, 720),
162
177
  "360p": (640, 360),
163
178
  }
164
-
179
+
165
180
  if resolution not in resolution_map:
166
181
  raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
167
-
182
+
168
183
  resolution_dims = resolution_map[resolution]
169
-
184
+
170
185
  # Default font sizes for each resolution
171
186
  default_font_sizes = {
172
187
  "4k": 250,
@@ -174,13 +189,13 @@ class OutputGenerator:
174
189
  "720p": 100,
175
190
  "360p": 40,
176
191
  }
177
-
192
+
178
193
  # Get font size from styles if available, otherwise use default
179
194
  font_size = self.config.styles.get("karaoke", {}).get("font_size", default_font_sizes[resolution])
180
-
195
+
181
196
  # Line height matches font size for all except 360p
182
197
  line_height = 50 if resolution == "360p" else font_size
183
-
198
+
184
199
  return resolution_dims, font_size, line_height
185
200
 
186
201
  def write_corrections_data(self, correction_result: CorrectionResult, output_prefix: str) -> str:
@@ -5,12 +5,13 @@ from typing import List, Optional
5
5
  from lyrics_transcriber.types import LyricsData, LyricsSegment
6
6
  from lyrics_transcriber.correction.corrector import CorrectionResult
7
7
 
8
+
8
9
  class PlainTextGenerator:
9
10
  """Handles generation of plain text output files for lyrics and transcriptions."""
10
11
 
11
12
  def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
12
13
  """Initialize PlainTextGenerator.
13
-
14
+
14
15
  Args:
15
16
  output_dir: Directory where output files will be written
16
17
  logger: Optional logger instance
@@ -24,11 +25,11 @@ class PlainTextGenerator:
24
25
 
25
26
  def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
26
27
  """Write plain text lyrics file from provider data.
27
-
28
+
28
29
  Args:
29
30
  lyrics_data: LyricsData from a lyrics provider
30
31
  output_prefix: Prefix for output filename
31
-
32
+
32
33
  Returns:
33
34
  Path to generated file
34
35
  """
@@ -38,7 +39,9 @@ class PlainTextGenerator:
38
39
 
39
40
  try:
40
41
  with open(output_path, "w", encoding="utf-8") as f:
41
- f.write(lyrics_data.lyrics)
42
+ # Join segment texts with newlines
43
+ lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
44
+ f.write(lyrics_text)
42
45
  self.logger.info(f"Plain lyrics file generated: {output_path}")
43
46
  return output_path
44
47
  except Exception as e:
@@ -47,11 +50,11 @@ class PlainTextGenerator:
47
50
 
48
51
  def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
49
52
  """Write corrected lyrics as plain text file.
50
-
53
+
51
54
  Args:
52
55
  segments: List of corrected LyricsSegment objects
53
56
  output_prefix: Prefix for output filename
54
-
57
+
55
58
  Returns:
56
59
  Path to generated file
57
60
  """
@@ -70,22 +73,24 @@ class PlainTextGenerator:
70
73
 
71
74
  def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
72
75
  """Write original (uncorrected) transcription as plain text.
73
-
76
+
74
77
  Args:
75
78
  correction_result: CorrectionResult containing original transcription
76
79
  output_prefix: Prefix for output filename
77
-
80
+
78
81
  Returns:
79
82
  Path to generated file
80
83
  """
81
84
  self.logger.info("Writing original transcription file")
82
85
  output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
83
86
 
87
+ transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
88
+
84
89
  try:
85
90
  with open(output_path, "w", encoding="utf-8") as f:
86
- f.write(correction_result.transcribed_text)
91
+ f.write(transcribed_text)
87
92
  self.logger.info(f"Original transcription file generated: {output_path}")
88
93
  return output_path
89
94
  except Exception as e:
90
95
  self.logger.error(f"Failed to write original transcription file: {str(e)}")
91
- raise
96
+ raise
@@ -1,8 +1,9 @@
1
1
  import logging
2
2
  import re
3
- from typing import List, Optional, Tuple
3
+ from typing import List, Optional
4
4
 
5
5
  from lyrics_transcriber.types import LyricsSegment, Word
6
+ from lyrics_transcriber.utils.word_utils import WordUtils
6
7
 
7
8
 
8
9
  class SegmentResizer:
@@ -101,7 +102,13 @@ class SegmentResizer:
101
102
  Output: LyricsSegment(text="Hello World", words=[...])
102
103
  """
103
104
  cleaned_text = self._clean_text(segment.text)
104
- return LyricsSegment(text=cleaned_text, words=segment.words, start_time=segment.start_time, end_time=segment.end_time)
105
+ return LyricsSegment(
106
+ id=segment.id, # Preserve the original segment ID
107
+ text=cleaned_text,
108
+ words=segment.words,
109
+ start_time=segment.start_time,
110
+ end_time=segment.end_time,
111
+ )
105
112
 
106
113
  def _create_cleaned_word(self, word: Word) -> Word:
107
114
  """Create a new word with cleaned text."""
@@ -226,7 +233,13 @@ class SegmentResizer:
226
233
  def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
227
234
  """Create a new segment from a list of words."""
228
235
  cleaned_text = self._clean_text(line)
229
- return LyricsSegment(text=cleaned_text, words=words, start_time=words[0].start_time, end_time=words[-1].end_time)
236
+ return LyricsSegment(
237
+ id=WordUtils.generate_id(), # Generate new ID for split segments
238
+ text=cleaned_text,
239
+ words=words,
240
+ start_time=words[0].start_time,
241
+ end_time=words[-1].end_time,
242
+ )
230
243
 
231
244
  def _process_segment_text(self, text: str) -> List[str]:
232
245
  """Process segment text to determine optimal split points."""