lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lyrics_transcriber/cli/cli_main.py +7 -0
  2. lyrics_transcriber/core/config.py +1 -0
  3. lyrics_transcriber/core/controller.py +30 -52
  4. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  5. lyrics_transcriber/correction/corrector.py +224 -107
  6. lyrics_transcriber/correction/handlers/base.py +28 -10
  7. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  8. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  9. lyrics_transcriber/correction/handlers/llm.py +290 -0
  10. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  11. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  12. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  13. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  14. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  15. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  16. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  17. lyrics_transcriber/correction/text_utils.py +3 -7
  18. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  19. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  20. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  21. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  22. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  23. lyrics_transcriber/frontend/dist/index.html +1 -1
  24. lyrics_transcriber/frontend/package.json +6 -2
  25. lyrics_transcriber/frontend/src/App.tsx +18 -2
  26. lyrics_transcriber/frontend/src/api.ts +103 -6
  27. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  28. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  30. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  31. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  33. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  35. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  36. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  37. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  38. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  39. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  40. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  41. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  42. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  43. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  44. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  45. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  47. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  48. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  49. lyrics_transcriber/frontend/src/types.js +2 -0
  50. lyrics_transcriber/frontend/src/types.ts +70 -49
  51. lyrics_transcriber/frontend/src/validation.ts +132 -0
  52. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  53. lyrics_transcriber/frontend/yarn.lock +3752 -0
  54. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  55. lyrics_transcriber/lyrics/file_provider.py +6 -5
  56. lyrics_transcriber/lyrics/genius.py +5 -2
  57. lyrics_transcriber/lyrics/spotify.py +58 -21
  58. lyrics_transcriber/output/ass/config.py +16 -5
  59. lyrics_transcriber/output/cdg.py +8 -8
  60. lyrics_transcriber/output/generator.py +29 -14
  61. lyrics_transcriber/output/plain_text.py +15 -10
  62. lyrics_transcriber/output/segment_resizer.py +16 -3
  63. lyrics_transcriber/output/subtitles.py +56 -2
  64. lyrics_transcriber/output/video.py +107 -1
  65. lyrics_transcriber/review/__init__.py +0 -1
  66. lyrics_transcriber/review/server.py +337 -164
  67. lyrics_transcriber/transcribers/audioshake.py +3 -0
  68. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  69. lyrics_transcriber/transcribers/whisper.py +11 -1
  70. lyrics_transcriber/types.py +151 -105
  71. lyrics_transcriber/utils/word_utils.py +27 -0
  72. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  73. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
  74. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  75. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  76. lyrics_transcriber/frontend/package-lock.json +0 -4260
  77. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  78. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  79. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@ import subprocess
5
5
  import json
6
6
 
7
7
  from lyrics_transcriber.output.ass.section_screen import SectionScreen
8
- from lyrics_transcriber.types import LyricsSegment
8
+ from lyrics_transcriber.types import LyricsSegment, Word
9
9
  from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
10
10
  from lyrics_transcriber.output.ass.ass import ASS
11
11
  from lyrics_transcriber.output.ass.style import Style
@@ -25,6 +25,7 @@ class SubtitlesGenerator:
25
25
  font_size: int,
26
26
  line_height: int,
27
27
  styles: dict,
28
+ subtitle_offset_ms: int = 0,
28
29
  logger: Optional[logging.Logger] = None,
29
30
  ):
30
31
  """Initialize SubtitleGenerator.
@@ -34,13 +35,39 @@ class SubtitlesGenerator:
34
35
  video_resolution: Tuple of (width, height) for video resolution
35
36
  font_size: Font size for subtitles
36
37
  line_height: Line height for subtitle positioning
38
+ styles: Dictionary of style configurations
39
+ subtitle_offset_ms: Offset for subtitle timing in milliseconds
37
40
  logger: Optional logger instance
38
41
  """
39
42
  self.output_dir = output_dir
40
43
  self.video_resolution = video_resolution
41
44
  self.font_size = font_size
42
45
  self.styles = styles
43
- self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
46
+ self.subtitle_offset_ms = subtitle_offset_ms
47
+
48
+ # Create ScreenConfig with potential overrides from styles
49
+ karaoke_styles = styles.get("karaoke", {})
50
+ config_params = {
51
+ "line_height": line_height,
52
+ "video_width": video_resolution[0],
53
+ "video_height": video_resolution[1]
54
+ }
55
+
56
+ # Add any overrides from styles
57
+ screen_config_props = [
58
+ "max_visible_lines",
59
+ "top_padding",
60
+ "screen_gap_threshold",
61
+ "post_roll_time",
62
+ "fade_in_ms",
63
+ "fade_out_ms"
64
+ ]
65
+
66
+ for prop in screen_config_props:
67
+ if prop in karaoke_styles:
68
+ config_params[prop] = karaoke_styles[prop]
69
+
70
+ self.config = ScreenConfig(**config_params)
44
71
  self.logger = logger or logging.getLogger(__name__)
45
72
 
46
73
  def _get_output_path(self, output_prefix: str, extension: str) -> str:
@@ -91,6 +118,33 @@ class SubtitlesGenerator:
91
118
  """Create screens from segments with detailed logging."""
92
119
  self.logger.debug("Creating screens from segments")
93
120
 
121
+ # Apply timing offset to segments if needed
122
+ if self.subtitle_offset_ms != 0:
123
+ self.logger.info(f"Subtitle offset: {self.subtitle_offset_ms}ms")
124
+
125
+ offset_seconds = self.subtitle_offset_ms / 1000.0
126
+ segments = [
127
+ LyricsSegment(
128
+ id=seg.id, # Preserve original segment ID
129
+ text=seg.text,
130
+ words=[
131
+ Word(
132
+ id=word.id, # Preserve original word ID
133
+ text=word.text,
134
+ start_time=max(0, word.start_time + offset_seconds),
135
+ end_time=word.end_time + offset_seconds,
136
+ confidence=word.confidence,
137
+ created_during_correction=getattr(word, "created_during_correction", False), # Preserve correction flag
138
+ )
139
+ for word in seg.words
140
+ ],
141
+ start_time=max(0, seg.start_time + offset_seconds),
142
+ end_time=seg.end_time + offset_seconds,
143
+ )
144
+ for seg in segments
145
+ ]
146
+ self.logger.info(f"Applied {self.subtitle_offset_ms}ms offset to segment timings")
147
+
94
148
  # Create section screens and get instrumental boundaries
95
149
  section_screens = self._create_section_screens(segments, song_duration)
96
150
  instrumental_times = self._get_instrumental_times(section_screens)
@@ -88,6 +88,52 @@ class VideoGenerator:
88
88
  pass
89
89
  raise
90
90
 
91
+ def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
92
+ """Generate lower resolution MP4 preview video with lyrics overlay.
93
+
94
+ Args:
95
+ ass_path: Path to ASS subtitles file
96
+ audio_path: Path to audio file
97
+ output_prefix: Prefix for output filename
98
+
99
+ Returns:
100
+ Path to generated preview video file
101
+ """
102
+ self.logger.info("Generating preview video with lyrics overlay")
103
+ output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
104
+
105
+ # Check input files exist before running FFmpeg
106
+ if not os.path.isfile(ass_path):
107
+ raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
108
+ if not os.path.isfile(audio_path):
109
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
110
+
111
+ try:
112
+ # Create a temporary copy of the ASS file with a safe filename
113
+ temp_ass_path = os.path.join(self.cache_dir, "temp_preview_subtitles.ass")
114
+ import shutil
115
+
116
+ shutil.copy2(ass_path, temp_ass_path)
117
+ self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
118
+
119
+ cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
120
+ self._run_ffmpeg_command(cmd)
121
+ self.logger.info(f"Preview video generated: {output_path}")
122
+
123
+ # Clean up temporary file
124
+ os.remove(temp_ass_path)
125
+ return output_path
126
+
127
+ except Exception as e:
128
+ self.logger.error(f"Failed to generate preview video: {str(e)}")
129
+ # Clean up temporary file in case of error
130
+ if "temp_ass_path" in locals():
131
+ try:
132
+ os.remove(temp_ass_path)
133
+ except:
134
+ pass
135
+ raise
136
+
91
137
  def _get_output_path(self, output_prefix: str, extension: str) -> str:
92
138
  """Generate full output path for a file."""
93
139
  return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
@@ -181,7 +227,7 @@ class VideoGenerator:
181
227
  "-vf", f"ass={ass_path}", # Add subtitles
182
228
  "-c:v", self._get_video_codec(),
183
229
  # Video quality settings
184
- "-preset", "slow", # Better compression efficiency
230
+ "-preset", "fast", # Better compression efficiency
185
231
  "-b:v", "5000k", # Base video bitrate
186
232
  "-minrate", "5000k", # Minimum bitrate
187
233
  "-maxrate", "20000k", # Maximum bitrate
@@ -196,6 +242,66 @@ class VideoGenerator:
196
242
 
197
243
  return cmd
198
244
 
245
+ def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
246
+ """Build FFmpeg command for preview video generation with optimized settings."""
247
+ # Use 360p resolution for preview
248
+ width, height = 640, 360
249
+
250
+ # fmt: off
251
+ cmd = [
252
+ "ffmpeg",
253
+ "-hide_banner",
254
+ "-loglevel", "error",
255
+ "-r", "30", # Set frame rate to 30 fps
256
+ ]
257
+
258
+ # Input source (background)
259
+ if self.background_image:
260
+ # Resize background image first
261
+ resized_bg = self._resize_background_image(self.background_image)
262
+ self.logger.debug(f"Using resized background image: {resized_bg}")
263
+ cmd.extend([
264
+ "-loop", "1", # Loop the image
265
+ "-i", resized_bg,
266
+ ])
267
+ else:
268
+ self.logger.debug(
269
+ f"Using solid {self.background_color} background "
270
+ f"with resolution: {width}x{height}"
271
+ )
272
+ cmd.extend([
273
+ "-f", "lavfi",
274
+ "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
275
+ ])
276
+
277
+ # Add audio input and subtitle overlay
278
+ cmd.extend([
279
+ "-i", audio_path,
280
+ "-c:a", "aac", # Use AAC for audio
281
+ "-b:a", "128k", # Audio bitrate
282
+ "-vf", f"ass={ass_path}", # Add subtitles
283
+ "-c:v", "libx264", # Use H.264 codec
284
+ "-profile:v", "baseline", # Most compatible H.264 profile
285
+ "-level", "3.0", # Compatibility level
286
+ "-pix_fmt", "yuv420p", # Required for browser compatibility
287
+ "-preset", "ultrafast",
288
+ "-b:v", "1000k", # Slightly higher bitrate
289
+ "-maxrate", "1500k",
290
+ "-bufsize", "2000k",
291
+ "-movflags", "+faststart+frag_keyframe+empty_moov", # Enhanced streaming flags
292
+ "-g", "30", # Keyframe every 30 frames (1 second)
293
+ "-keyint_min", "30", # Minimum keyframe interval
294
+ "-sc_threshold", "0", # Disable scene change detection
295
+ "-shortest",
296
+ "-y"
297
+ ])
298
+ # fmt: on
299
+
300
+ # Add output path
301
+ cmd.append(output_path)
302
+
303
+ return cmd
304
+
199
305
  def _get_video_codec(self) -> str:
200
306
  """Determine the best available video codec."""
201
307
  # try:
@@ -1 +0,0 @@
1
- from .server import start_review_server, complete_review