lyrics-transcriber 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lyrics_transcriber/core/controller.py +30 -52
  2. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  3. lyrics_transcriber/correction/corrector.py +224 -107
  4. lyrics_transcriber/correction/handlers/base.py +28 -10
  5. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  6. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  7. lyrics_transcriber/correction/handlers/llm.py +290 -0
  8. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  9. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  10. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  11. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  12. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  13. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  14. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  15. lyrics_transcriber/correction/text_utils.py +3 -7
  16. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  17. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  18. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  19. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-D0Gr3Ep7.js} +16509 -9038
  20. lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +1 -0
  21. lyrics_transcriber/frontend/dist/index.html +1 -1
  22. lyrics_transcriber/frontend/package.json +6 -2
  23. lyrics_transcriber/frontend/src/App.tsx +18 -2
  24. lyrics_transcriber/frontend/src/api.ts +103 -6
  25. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -6
  26. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  27. lyrics_transcriber/frontend/src/components/EditModal.tsx +281 -63
  28. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  29. lyrics_transcriber/frontend/src/components/Header.tsx +249 -0
  30. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +320 -266
  31. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +120 -0
  32. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +174 -52
  33. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +158 -114
  34. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  35. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +39 -16
  36. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  37. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +134 -68
  38. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  39. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  40. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  41. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  42. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +67 -0
  43. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  44. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  45. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  47. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  48. lyrics_transcriber/frontend/src/types.js +2 -0
  49. lyrics_transcriber/frontend/src/types.ts +70 -49
  50. lyrics_transcriber/frontend/src/validation.ts +132 -0
  51. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  52. lyrics_transcriber/frontend/yarn.lock +3752 -0
  53. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  54. lyrics_transcriber/lyrics/file_provider.py +6 -5
  55. lyrics_transcriber/lyrics/genius.py +5 -2
  56. lyrics_transcriber/lyrics/spotify.py +58 -21
  57. lyrics_transcriber/output/ass/config.py +16 -5
  58. lyrics_transcriber/output/cdg.py +1 -1
  59. lyrics_transcriber/output/generator.py +22 -8
  60. lyrics_transcriber/output/plain_text.py +15 -10
  61. lyrics_transcriber/output/segment_resizer.py +16 -3
  62. lyrics_transcriber/output/subtitles.py +27 -1
  63. lyrics_transcriber/output/video.py +107 -1
  64. lyrics_transcriber/review/__init__.py +0 -1
  65. lyrics_transcriber/review/server.py +337 -164
  66. lyrics_transcriber/transcribers/audioshake.py +3 -0
  67. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  68. lyrics_transcriber/transcribers/whisper.py +11 -1
  69. lyrics_transcriber/types.py +151 -105
  70. lyrics_transcriber/utils/word_utils.py +27 -0
  71. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/METADATA +3 -1
  72. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/RECORD +75 -61
  73. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/WHEEL +1 -1
  74. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  75. lyrics_transcriber/frontend/package-lock.json +0 -4260
  76. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  77. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/LICENSE +0 -0
  78. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,14 @@
1
1
  from dataclasses import dataclass
2
2
  import logging
3
- from typing import Optional, Dict, Any
3
+ from typing import Optional, Dict, Any, List
4
4
  import json
5
5
  import hashlib
6
6
  from pathlib import Path
7
7
  import os
8
8
  from abc import ABC, abstractmethod
9
- from lyrics_transcriber.types import LyricsData
9
+ from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
10
10
  from karaoke_lyrics_processor import KaraokeLyricsProcessor
11
+ from lyrics_transcriber.utils.word_utils import WordUtils
11
12
 
12
13
 
13
14
  @dataclass
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
41
42
 
42
43
  # Use artist and title for cache key instead of audio file hash
43
44
  cache_key = self._get_artist_title_hash(artist, title)
44
- raw_cache_path = self._get_cache_path(cache_key, "raw")
45
45
 
46
- # Try to load from cache first
46
+ # Check converted cache first
47
+ converted_cache_path = self._get_cache_path(cache_key, "converted")
48
+ converted_data = self._load_from_cache(converted_cache_path)
49
+ if converted_data:
50
+ self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
51
+ return LyricsData.from_dict(converted_data)
52
+
53
+ # Check raw cache next
54
+ raw_cache_path = self._get_cache_path(cache_key, "raw")
47
55
  raw_data = self._load_from_cache(raw_cache_path)
48
- if raw_data is not None:
49
- self.logger.info(f"Using cached lyrics for {artist} - {title}")
50
- return self._save_and_convert_result(cache_key, raw_data)
56
+ if raw_data:
57
+ self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
58
+ converted_result = self._convert_result_format(raw_data)
59
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
60
+ return converted_result
51
61
 
52
62
  # If not in cache, fetch from source
53
63
  raw_result = self._fetch_data_from_source(artist, title)
54
64
  if raw_result:
55
65
  # Save raw API response
56
66
  self._save_to_cache(raw_cache_path, raw_result)
57
- return self._save_and_convert_result(cache_key, raw_result)
67
+ converted_result = self._convert_result_format(raw_result)
68
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
69
+ return converted_result
58
70
 
59
71
  return None
60
72
 
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
100
112
  self.logger.warning(f"Cache file {cache_path} is corrupted")
101
113
  return None
102
114
 
115
+ def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
116
+ """Create LyricsSegment objects with properly formatted words from text.
117
+
118
+ Args:
119
+ text: Raw lyrics text
120
+ is_synced: Whether timing information is available
121
+
122
+ Returns:
123
+ List of LyricsSegment objects with unique IDs and Word objects
124
+ """
125
+ segments = []
126
+ lines = text.strip().split("\n")
127
+
128
+ for line in lines:
129
+ if not line.strip():
130
+ continue
131
+
132
+ # Split line into words
133
+ word_texts = line.strip().split()
134
+ if not word_texts:
135
+ continue
136
+
137
+ words = []
138
+ for word_text in word_texts:
139
+ word = Word(
140
+ id=WordUtils.generate_id(),
141
+ text=word_text,
142
+ start_time=0.0 if is_synced else None,
143
+ end_time=0.0 if is_synced else None,
144
+ confidence=1.0, # Reference lyrics are considered ground truth
145
+ created_during_correction=False,
146
+ )
147
+ words.append(word)
148
+
149
+ segment = LyricsSegment(
150
+ id=WordUtils.generate_id(),
151
+ text=line.strip(),
152
+ words=words,
153
+ start_time=words[0].start_time if is_synced else None,
154
+ end_time=words[-1].end_time if is_synced else None,
155
+ )
156
+ segments.append(segment)
157
+
158
+ return segments
159
+
103
160
  def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
104
- """Process lyrics using KaraokeLyricsProcessor."""
161
+ """Process lyrics using KaraokeLyricsProcessor and create proper segments."""
162
+ # Concatenate all segment texts to get the full lyrics
163
+ full_lyrics = lyrics_data.get_full_text()
164
+
105
165
  processor = KaraokeLyricsProcessor(
106
166
  log_level=self.logger.getEffectiveLevel(),
107
167
  log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
108
- input_lyrics_text=lyrics_data.lyrics,
168
+ input_lyrics_text=full_lyrics,
109
169
  max_line_length=self.max_line_length,
110
170
  )
111
171
  processed_text = processor.process()
112
172
 
113
- # Create new LyricsData with processed text
114
- return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
173
+ # Create segments with words from processed text
174
+ segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
175
+
176
+ # Create new LyricsData with processed text and segments
177
+ return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
115
178
 
116
179
  def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
117
180
  """Convert raw result to standardized format, process lyrics, save to cache, and return."""
@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
67
67
  self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
68
68
 
69
69
  try:
70
- # Create metadata object like Genius provider does
70
+ # Create metadata object
71
71
  metadata = LyricsMetadata(
72
72
  source="file",
73
73
  track_name=self.title,
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
78
78
  provider_metadata={"filepath": raw_data["filepath"]},
79
79
  )
80
80
 
81
- lyrics_data = LyricsData(
82
- source="file", lyrics=raw_data["text"], segments=[], metadata=metadata # No timing information from file
83
- )
84
- self.logger.debug(f"Created LyricsData object: {lyrics_data}")
81
+ # Create segments with words from the processed text
82
+ segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
83
+
84
+ lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
85
+ self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
85
86
  return lyrics_data
86
87
 
87
88
  except Exception as e:
@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
77
77
  },
78
78
  )
79
79
 
80
- # Create result object
81
- return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
80
+ # Create segments with words from cleaned lyrics
81
+ segments = self._create_segments_with_words(lyrics, is_synced=False)
82
+
83
+ # Create result object with segments
84
+ return LyricsData(source="genius", segments=segments, metadata=metadata)
82
85
 
83
86
  def _clean_lyrics(self, lyrics: str) -> str:
84
87
  """Clean and process lyrics from Genius to remove unwanted content."""
@@ -1,9 +1,11 @@
1
1
  import logging
2
2
  from typing import Optional, Dict, Any
3
3
  import syrics.api
4
+ import time
4
5
 
5
- from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
6
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
6
7
  from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
8
+ from lyrics_transcriber.utils.word_utils import WordUtils
7
9
 
8
10
 
9
11
  class SpotifyProvider(BaseLyricsProvider):
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
12
14
  def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
13
15
  super().__init__(config, logger)
14
16
  self.cookie = config.spotify_cookie
15
- self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
17
+ self.client = None
18
+
19
+ if self.cookie:
20
+ max_retries = 5
21
+ retry_delay = 5 # seconds
22
+
23
+ for attempt in range(max_retries):
24
+ try:
25
+ self.client = syrics.api.Spotify(self.cookie)
26
+ break # Successfully initialized
27
+ except Exception as e:
28
+ if attempt == max_retries - 1: # Last attempt
29
+ self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
30
+ break
31
+ self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
32
+ time.sleep(retry_delay)
16
33
 
17
34
  def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
18
35
  """Fetch raw data from Spotify APIs using syrics library."""
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
45
62
  track_data = raw_data["track_data"]
46
63
  lyrics_data = raw_data["lyrics_data"]["lyrics"]
47
64
 
48
- # Convert raw lines to LyricsSegment objects
49
- segments = []
50
- for line in lyrics_data.get("lines", []):
51
- if not line.get("words"):
52
- continue
53
-
54
- # Skip lines that are just musical notes
55
- if not self._clean_lyrics(line["words"]):
56
- continue
57
-
58
- segment = LyricsSegment(
59
- text=line["words"],
60
- words=[], # TODO: Could potentially split words if needed
61
- start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
62
- end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
63
- )
64
- segments.append(segment)
65
-
66
65
  # Create metadata object
67
66
  metadata = LyricsMetadata(
68
67
  source="spotify",
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
83
82
  },
84
83
  )
85
84
 
86
- return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
85
+ # Create segments with timing information
86
+ segments = []
87
+ for line in lyrics_data.get("lines", []):
88
+ if not line.get("words"):
89
+ continue
90
+
91
+ # Skip lines that are just musical notes
92
+ if not self._clean_lyrics(line["words"]):
93
+ continue
94
+
95
+ # Split line into words
96
+ word_texts = line["words"].strip().split()
97
+ if not word_texts:
98
+ continue
99
+
100
+ # Calculate approximate timing for each word
101
+ start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
102
+ end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
103
+ duration = end_time - start_time
104
+ word_duration = duration / len(word_texts)
105
+
106
+ words = []
107
+ for i, word_text in enumerate(word_texts):
108
+ word = Word(
109
+ id=WordUtils.generate_id(),
110
+ text=word_text,
111
+ start_time=start_time + (i * word_duration),
112
+ end_time=start_time + ((i + 1) * word_duration),
113
+ confidence=1.0,
114
+ created_during_correction=False,
115
+ )
116
+ words.append(word)
117
+
118
+ segment = LyricsSegment(
119
+ id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
120
+ )
121
+ segments.append(segment)
122
+
123
+ return LyricsData(source="spotify", segments=segments, metadata=metadata)
87
124
 
88
125
  def _clean_lyrics(self, lyrics: str) -> str:
89
126
  """Clean and process lyrics from Spotify to remove unwanted content."""
@@ -4,7 +4,18 @@ from dataclasses import dataclass
4
4
  class ScreenConfig:
5
5
  """Configuration for screen timing and layout."""
6
6
 
7
- def __init__(self, line_height: int = 50, max_visible_lines: int = 4, top_padding: int = None, video_width: int = 640, video_height: int = 360):
7
+ def __init__(
8
+ self,
9
+ line_height: int = 50,
10
+ max_visible_lines: int = 4,
11
+ top_padding: int = None,
12
+ video_width: int = 640,
13
+ video_height: int = 360,
14
+ screen_gap_threshold: float = 5.0,
15
+ post_roll_time: float = 1.0,
16
+ fade_in_ms: int = 200,
17
+ fade_out_ms: int = 300,
18
+ ):
8
19
  # Screen layout
9
20
  self.max_visible_lines = max_visible_lines
10
21
  self.line_height = line_height
@@ -12,10 +23,10 @@ class ScreenConfig:
12
23
  self.video_height = video_height
13
24
  self.video_width = video_width
14
25
  # Timing configuration
15
- self.screen_gap_threshold = 5.0
16
- self.post_roll_time = 1.0
17
- self.fade_in_ms = 200
18
- self.fade_out_ms = 300
26
+ self.screen_gap_threshold = screen_gap_threshold
27
+ self.post_roll_time = post_roll_time
28
+ self.fade_in_ms = fade_in_ms
29
+ self.fade_out_ms = fade_out_ms
19
30
 
20
31
 
21
32
  @dataclass
@@ -496,7 +496,7 @@ class CDGGenerator:
496
496
  text = text[1:]
497
497
 
498
498
  current_line += text + " "
499
- self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
499
+ # self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
500
500
 
501
501
  is_last_before_instrumental = any(
502
502
  inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
@@ -97,28 +97,42 @@ class OutputGenerator:
97
97
  def generate_outputs(
98
98
  self,
99
99
  transcription_corrected: Optional[CorrectionResult],
100
- lyrics_results: List[LyricsData],
100
+ lyrics_results: dict[str, LyricsData],
101
101
  output_prefix: str,
102
102
  audio_filepath: str,
103
103
  artist: Optional[str] = None,
104
104
  title: Optional[str] = None,
105
+ preview_mode: bool = False,
105
106
  ) -> OutputPaths:
106
107
  """Generate all requested output formats."""
107
108
  outputs = OutputPaths()
108
109
 
109
110
  try:
110
- # Generate plain lyrics files for each provider
111
- for lyrics_data in lyrics_results:
112
- self.plain_text.write_lyrics(lyrics_data, output_prefix)
113
-
114
111
  # Only process transcription-related outputs if we have transcription data
115
112
  if transcription_corrected:
116
- # Write original (uncorrected) transcription
117
- outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
118
113
 
119
- # Resize corrected segments to ensure none are longer than max_line_length
114
+ # Resize corrected segments
120
115
  resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
121
116
  transcription_corrected.resized_segments = resized_segments
117
+
118
+ # For preview, we only need to generate ASS and video
119
+ if preview_mode:
120
+ # Generate ASS subtitles for preview
121
+ outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
122
+
123
+ # Generate preview video
124
+ outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
125
+
126
+ return outputs
127
+
128
+ # Normal output generation (non-preview mode)
129
+ # Generate plain lyrics files for each provider
130
+ for name, lyrics_data in lyrics_results.items():
131
+ self.plain_text.write_lyrics(lyrics_data, output_prefix)
132
+
133
+ # Write original (uncorrected) transcription
134
+ outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
135
+
122
136
  outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
123
137
 
124
138
  # Write corrected lyrics as plain text
@@ -5,12 +5,13 @@ from typing import List, Optional
5
5
  from lyrics_transcriber.types import LyricsData, LyricsSegment
6
6
  from lyrics_transcriber.correction.corrector import CorrectionResult
7
7
 
8
+
8
9
  class PlainTextGenerator:
9
10
  """Handles generation of plain text output files for lyrics and transcriptions."""
10
11
 
11
12
  def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
12
13
  """Initialize PlainTextGenerator.
13
-
14
+
14
15
  Args:
15
16
  output_dir: Directory where output files will be written
16
17
  logger: Optional logger instance
@@ -24,11 +25,11 @@ class PlainTextGenerator:
24
25
 
25
26
  def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
26
27
  """Write plain text lyrics file from provider data.
27
-
28
+
28
29
  Args:
29
30
  lyrics_data: LyricsData from a lyrics provider
30
31
  output_prefix: Prefix for output filename
31
-
32
+
32
33
  Returns:
33
34
  Path to generated file
34
35
  """
@@ -38,7 +39,9 @@ class PlainTextGenerator:
38
39
 
39
40
  try:
40
41
  with open(output_path, "w", encoding="utf-8") as f:
41
- f.write(lyrics_data.lyrics)
42
+ # Join segment texts with newlines
43
+ lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
44
+ f.write(lyrics_text)
42
45
  self.logger.info(f"Plain lyrics file generated: {output_path}")
43
46
  return output_path
44
47
  except Exception as e:
@@ -47,11 +50,11 @@ class PlainTextGenerator:
47
50
 
48
51
  def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
49
52
  """Write corrected lyrics as plain text file.
50
-
53
+
51
54
  Args:
52
55
  segments: List of corrected LyricsSegment objects
53
56
  output_prefix: Prefix for output filename
54
-
57
+
55
58
  Returns:
56
59
  Path to generated file
57
60
  """
@@ -70,22 +73,24 @@ class PlainTextGenerator:
70
73
 
71
74
  def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
72
75
  """Write original (uncorrected) transcription as plain text.
73
-
76
+
74
77
  Args:
75
78
  correction_result: CorrectionResult containing original transcription
76
79
  output_prefix: Prefix for output filename
77
-
80
+
78
81
  Returns:
79
82
  Path to generated file
80
83
  """
81
84
  self.logger.info("Writing original transcription file")
82
85
  output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
83
86
 
87
+ transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
88
+
84
89
  try:
85
90
  with open(output_path, "w", encoding="utf-8") as f:
86
- f.write(correction_result.transcribed_text)
91
+ f.write(transcribed_text)
87
92
  self.logger.info(f"Original transcription file generated: {output_path}")
88
93
  return output_path
89
94
  except Exception as e:
90
95
  self.logger.error(f"Failed to write original transcription file: {str(e)}")
91
- raise
96
+ raise
@@ -1,8 +1,9 @@
1
1
  import logging
2
2
  import re
3
- from typing import List, Optional, Tuple
3
+ from typing import List, Optional
4
4
 
5
5
  from lyrics_transcriber.types import LyricsSegment, Word
6
+ from lyrics_transcriber.utils.word_utils import WordUtils
6
7
 
7
8
 
8
9
  class SegmentResizer:
@@ -101,7 +102,13 @@ class SegmentResizer:
101
102
  Output: LyricsSegment(text="Hello World", words=[...])
102
103
  """
103
104
  cleaned_text = self._clean_text(segment.text)
104
- return LyricsSegment(text=cleaned_text, words=segment.words, start_time=segment.start_time, end_time=segment.end_time)
105
+ return LyricsSegment(
106
+ id=segment.id, # Preserve the original segment ID
107
+ text=cleaned_text,
108
+ words=segment.words,
109
+ start_time=segment.start_time,
110
+ end_time=segment.end_time,
111
+ )
105
112
 
106
113
  def _create_cleaned_word(self, word: Word) -> Word:
107
114
  """Create a new word with cleaned text."""
@@ -226,7 +233,13 @@ class SegmentResizer:
226
233
  def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
227
234
  """Create a new segment from a list of words."""
228
235
  cleaned_text = self._clean_text(line)
229
- return LyricsSegment(text=cleaned_text, words=words, start_time=words[0].start_time, end_time=words[-1].end_time)
236
+ return LyricsSegment(
237
+ id=WordUtils.generate_id(), # Generate new ID for split segments
238
+ text=cleaned_text,
239
+ words=words,
240
+ start_time=words[0].start_time,
241
+ end_time=words[-1].end_time,
242
+ )
230
243
 
231
244
  def _process_segment_text(self, text: str) -> List[str]:
232
245
  """Process segment text to determine optimal split points."""
@@ -44,7 +44,30 @@ class SubtitlesGenerator:
44
44
  self.font_size = font_size
45
45
  self.styles = styles
46
46
  self.subtitle_offset_ms = subtitle_offset_ms
47
- self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
47
+
48
+ # Create ScreenConfig with potential overrides from styles
49
+ karaoke_styles = styles.get("karaoke", {})
50
+ config_params = {
51
+ "line_height": line_height,
52
+ "video_width": video_resolution[0],
53
+ "video_height": video_resolution[1]
54
+ }
55
+
56
+ # Add any overrides from styles
57
+ screen_config_props = [
58
+ "max_visible_lines",
59
+ "top_padding",
60
+ "screen_gap_threshold",
61
+ "post_roll_time",
62
+ "fade_in_ms",
63
+ "fade_out_ms"
64
+ ]
65
+
66
+ for prop in screen_config_props:
67
+ if prop in karaoke_styles:
68
+ config_params[prop] = karaoke_styles[prop]
69
+
70
+ self.config = ScreenConfig(**config_params)
48
71
  self.logger = logger or logging.getLogger(__name__)
49
72
 
50
73
  def _get_output_path(self, output_prefix: str, extension: str) -> str:
@@ -102,13 +125,16 @@ class SubtitlesGenerator:
102
125
  offset_seconds = self.subtitle_offset_ms / 1000.0
103
126
  segments = [
104
127
  LyricsSegment(
128
+ id=seg.id, # Preserve original segment ID
105
129
  text=seg.text,
106
130
  words=[
107
131
  Word(
132
+ id=word.id, # Preserve original word ID
108
133
  text=word.text,
109
134
  start_time=max(0, word.start_time + offset_seconds),
110
135
  end_time=word.end_time + offset_seconds,
111
136
  confidence=word.confidence,
137
+ created_during_correction=getattr(word, "created_during_correction", False), # Preserve correction flag
112
138
  )
113
139
  for word in seg.words
114
140
  ],
@@ -88,6 +88,52 @@ class VideoGenerator:
88
88
  pass
89
89
  raise
90
90
 
91
+ def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
92
+ """Generate lower resolution MP4 preview video with lyrics overlay.
93
+
94
+ Args:
95
+ ass_path: Path to ASS subtitles file
96
+ audio_path: Path to audio file
97
+ output_prefix: Prefix for output filename
98
+
99
+ Returns:
100
+ Path to generated preview video file
101
+ """
102
+ self.logger.info("Generating preview video with lyrics overlay")
103
+ output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
104
+
105
+ # Check input files exist before running FFmpeg
106
+ if not os.path.isfile(ass_path):
107
+ raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
108
+ if not os.path.isfile(audio_path):
109
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
110
+
111
+ try:
112
+ # Create a temporary copy of the ASS file with a safe filename
113
+ temp_ass_path = os.path.join(self.cache_dir, "temp_preview_subtitles.ass")
114
+ import shutil
115
+
116
+ shutil.copy2(ass_path, temp_ass_path)
117
+ self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
118
+
119
+ cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
120
+ self._run_ffmpeg_command(cmd)
121
+ self.logger.info(f"Preview video generated: {output_path}")
122
+
123
+ # Clean up temporary file
124
+ os.remove(temp_ass_path)
125
+ return output_path
126
+
127
+ except Exception as e:
128
+ self.logger.error(f"Failed to generate preview video: {str(e)}")
129
+ # Clean up temporary file in case of error
130
+ if "temp_ass_path" in locals():
131
+ try:
132
+ os.remove(temp_ass_path)
133
+ except:
134
+ pass
135
+ raise
136
+
91
137
  def _get_output_path(self, output_prefix: str, extension: str) -> str:
92
138
  """Generate full output path for a file."""
93
139
  return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
@@ -181,7 +227,7 @@ class VideoGenerator:
181
227
  "-vf", f"ass={ass_path}", # Add subtitles
182
228
  "-c:v", self._get_video_codec(),
183
229
  # Video quality settings
184
- "-preset", "slow", # Better compression efficiency
230
+ "-preset", "fast", # Better compression efficiency
185
231
  "-b:v", "5000k", # Base video bitrate
186
232
  "-minrate", "5000k", # Minimum bitrate
187
233
  "-maxrate", "20000k", # Maximum bitrate
@@ -196,6 +242,66 @@ class VideoGenerator:
196
242
 
197
243
  return cmd
198
244
 
245
+ def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
246
+ """Build FFmpeg command for preview video generation with optimized settings."""
247
+ # Use 360p resolution for preview
248
+ width, height = 640, 360
249
+
250
+ # fmt: off
251
+ cmd = [
252
+ "ffmpeg",
253
+ "-hide_banner",
254
+ "-loglevel", "error",
255
+ "-r", "30", # Set frame rate to 30 fps
256
+ ]
257
+
258
+ # Input source (background)
259
+ if self.background_image:
260
+ # Resize background image first
261
+ resized_bg = self._resize_background_image(self.background_image)
262
+ self.logger.debug(f"Using resized background image: {resized_bg}")
263
+ cmd.extend([
264
+ "-loop", "1", # Loop the image
265
+ "-i", resized_bg,
266
+ ])
267
+ else:
268
+ self.logger.debug(
269
+ f"Using solid {self.background_color} background "
270
+ f"with resolution: {width}x{height}"
271
+ )
272
+ cmd.extend([
273
+ "-f", "lavfi",
274
+ "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
275
+ ])
276
+
277
+ # Add audio input and subtitle overlay
278
+ cmd.extend([
279
+ "-i", audio_path,
280
+ "-c:a", "aac", # Use AAC for audio
281
+ "-b:a", "128k", # Audio bitrate
282
+ "-vf", f"ass={ass_path}", # Add subtitles
283
+ "-c:v", "libx264", # Use H.264 codec
284
+ "-profile:v", "baseline", # Most compatible H.264 profile
285
+ "-level", "3.0", # Compatibility level
286
+ "-pix_fmt", "yuv420p", # Required for browser compatibility
287
+ "-preset", "ultrafast",
288
+ "-b:v", "1000k", # Slightly higher bitrate
289
+ "-maxrate", "1500k",
290
+ "-bufsize", "2000k",
291
+ "-movflags", "+faststart+frag_keyframe+empty_moov", # Enhanced streaming flags
292
+ "-g", "30", # Keyframe every 30 frames (1 second)
293
+ "-keyint_min", "30", # Minimum keyframe interval
294
+ "-sc_threshold", "0", # Disable scene change detection
295
+ "-shortest",
296
+ "-y"
297
+ ])
298
+ # fmt: on
299
+
300
+ # Add output path
301
+ cmd.append(output_path)
302
+
303
+ return cmd
304
+
199
305
  def _get_video_codec(self) -> str:
200
306
  """Determine the best available video codec."""
201
307
  # try: