lyrics-transcriber 0.43.1__py3-none-any.whl → 0.44.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. lyrics_transcriber/core/controller.py +58 -24
  2. lyrics_transcriber/correction/anchor_sequence.py +22 -8
  3. lyrics_transcriber/correction/corrector.py +47 -3
  4. lyrics_transcriber/correction/handlers/llm.py +15 -12
  5. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  6. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  7. lyrics_transcriber/frontend/dist/assets/{index-D0Gr3Ep7.js → index-DVoI6Z16.js} +10799 -7490
  8. lyrics_transcriber/frontend/dist/assets/index-DVoI6Z16.js.map +1 -0
  9. lyrics_transcriber/frontend/dist/index.html +1 -1
  10. lyrics_transcriber/frontend/src/App.tsx +4 -4
  11. lyrics_transcriber/frontend/src/api.ts +37 -0
  12. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  13. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -10
  14. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +62 -56
  15. lyrics_transcriber/frontend/src/components/EditModal.tsx +232 -237
  16. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  17. lyrics_transcriber/frontend/src/components/GlobalSyncEditor.tsx +675 -0
  18. lyrics_transcriber/frontend/src/components/Header.tsx +141 -101
  19. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +146 -80
  20. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +22 -13
  21. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +1 -0
  22. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +29 -12
  23. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +21 -4
  24. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +29 -15
  25. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +34 -16
  26. lyrics_transcriber/frontend/src/components/WordDivider.tsx +186 -0
  27. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +89 -41
  28. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +9 -2
  29. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +28 -3
  30. lyrics_transcriber/frontend/src/components/shared/types.ts +17 -2
  31. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +63 -14
  32. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +192 -0
  33. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +267 -0
  34. lyrics_transcriber/frontend/src/main.tsx +7 -1
  35. lyrics_transcriber/frontend/src/theme.ts +177 -0
  36. lyrics_transcriber/frontend/src/types.ts +1 -1
  37. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  38. lyrics_transcriber/lyrics/base_lyrics_provider.py +2 -2
  39. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  40. lyrics_transcriber/output/generator.py +40 -12
  41. lyrics_transcriber/review/server.py +238 -8
  42. {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/METADATA +3 -2
  43. {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/RECORD +46 -40
  44. lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +0 -1
  45. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +0 -252
  46. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +0 -110
  47. {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/LICENSE +0 -0
  48. {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/WHEEL +0 -0
  49. {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,177 @@
1
+ import { createTheme } from '@mui/material/styles';
2
+
3
+ // Create a theme with smaller typography and spacing
4
+ const theme = createTheme({
5
+ typography: {
6
+ // Scale down all typography by about 20%
7
+ fontSize: 14, // Default is 16
8
+ h1: {
9
+ fontSize: '2.5rem', // Default is ~3rem
10
+ },
11
+ h2: {
12
+ fontSize: '2rem', // Default is ~2.5rem
13
+ },
14
+ h3: {
15
+ fontSize: '1.5rem', // Default is ~1.75rem
16
+ },
17
+ h4: {
18
+ fontSize: '1.2rem', // Default is ~1.5rem
19
+ marginBottom: '0.5rem',
20
+ },
21
+ h5: {
22
+ fontSize: '1rem', // Default is ~1.25rem
23
+ },
24
+ h6: {
25
+ fontSize: '0.9rem', // Default is ~1.1rem
26
+ marginBottom: '0.5rem',
27
+ },
28
+ body1: {
29
+ fontSize: '0.85rem', // Default is ~1rem
30
+ },
31
+ body2: {
32
+ fontSize: '0.75rem', // Default is ~0.875rem
33
+ },
34
+ button: {
35
+ fontSize: '0.8rem', // Default is ~0.875rem
36
+ },
37
+ caption: {
38
+ fontSize: '0.7rem', // Default is ~0.75rem
39
+ },
40
+ },
41
+ components: {
42
+ MuiButton: {
43
+ styleOverrides: {
44
+ root: {
45
+ padding: '3px 10px', // Further reduced from 4px 12px
46
+ minHeight: '30px', // Further reduced from 32px
47
+ },
48
+ sizeSmall: {
49
+ padding: '1px 6px', // Further reduced from 2px 8px
50
+ minHeight: '24px', // Further reduced from 28px
51
+ },
52
+ },
53
+ },
54
+ MuiIconButton: {
55
+ styleOverrides: {
56
+ root: {
57
+ padding: '4px', // Further reduced from 6px
58
+ },
59
+ sizeSmall: {
60
+ padding: '2px', // Further reduced from 4px
61
+ },
62
+ },
63
+ },
64
+ MuiTextField: {
65
+ styleOverrides: {
66
+ root: {
67
+ '& .MuiInputBase-root': {
68
+ minHeight: '32px', // Further reduced from 36px
69
+ },
70
+ },
71
+ },
72
+ },
73
+ MuiDialog: {
74
+ styleOverrides: {
75
+ paper: {
76
+ padding: '8px', // Further reduced from 12px
77
+ },
78
+ },
79
+ },
80
+ MuiDialogTitle: {
81
+ styleOverrides: {
82
+ root: {
83
+ padding: '8px 12px', // Further reduced from 12px 16px
84
+ },
85
+ },
86
+ },
87
+ MuiDialogContent: {
88
+ styleOverrides: {
89
+ root: {
90
+ padding: '6px 12px', // Further reduced from 8px 16px
91
+ },
92
+ },
93
+ },
94
+ MuiDialogActions: {
95
+ styleOverrides: {
96
+ root: {
97
+ padding: '6px 12px', // Further reduced from 8px 16px
98
+ },
99
+ },
100
+ },
101
+ MuiPaper: {
102
+ styleOverrides: {
103
+ root: {
104
+ padding: '8px', // Further reduced from 12px
105
+ },
106
+ },
107
+ },
108
+ MuiList: {
109
+ styleOverrides: {
110
+ root: {
111
+ padding: '2px 0', // Further reduced from 4px 0
112
+ },
113
+ },
114
+ },
115
+ MuiListItem: {
116
+ styleOverrides: {
117
+ root: {
118
+ padding: '2px 8px', // Further reduced from 4px 12px
119
+ },
120
+ },
121
+ },
122
+ MuiTableCell: {
123
+ styleOverrides: {
124
+ root: {
125
+ padding: '4px 8px', // Further reduced from 8px 12px
126
+ },
127
+ },
128
+ },
129
+ MuiCard: {
130
+ styleOverrides: {
131
+ root: {
132
+ padding: '8px',
133
+ },
134
+ },
135
+ },
136
+ MuiCardContent: {
137
+ styleOverrides: {
138
+ root: {
139
+ padding: '8px',
140
+ '&:last-child': {
141
+ paddingBottom: '8px',
142
+ },
143
+ },
144
+ },
145
+ },
146
+ MuiCardHeader: {
147
+ styleOverrides: {
148
+ root: {
149
+ padding: '8px',
150
+ },
151
+ },
152
+ },
153
+ MuiCardActions: {
154
+ styleOverrides: {
155
+ root: {
156
+ padding: '4px 8px',
157
+ },
158
+ },
159
+ },
160
+ MuiGrid: {
161
+ styleOverrides: {
162
+ container: {
163
+ marginTop: '-4px',
164
+ marginLeft: '-4px',
165
+ width: 'calc(100% + 8px)',
166
+ },
167
+ item: {
168
+ paddingTop: '4px',
169
+ paddingLeft: '4px',
170
+ },
171
+ },
172
+ },
173
+ },
174
+ spacing: (factor: number) => `${0.6 * factor}rem`, // Further reduced from 0.8 * factor
175
+ });
176
+
177
+ export default theme;
@@ -136,4 +136,4 @@ export interface HighlightInfo {
136
136
  correction?: WordCorrection
137
137
  }
138
138
 
139
- export type InteractionMode = 'highlight' | 'details' | 'edit'
139
+ export type InteractionMode = 'highlight' | 'edit'
@@ -1 +1 @@
1
- {"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/validation.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/header.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/previewvideosection.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/keyboardhandlers.ts","./src/components/shared/utils/localstorage.ts","./src/components/shared/utils/referencelinecalculator.ts","./src/components/shared/utils/segmentoperations.ts","./src/components/shared/utils/wordutils.ts","./src/types/global.d.ts"],"version":"5.6.3"}
1
+ {"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/theme.ts","./src/types.ts","./src/validation.ts","./src/vite-env.d.ts","./src/components/addlyricsmodal.tsx","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/findreplacemodal.tsx","./src/components/globalsynceditor.tsx","./src/components/header.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/previewvideosection.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/worddivider.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/keyboardhandlers.ts","./src/components/shared/utils/localstorage.ts","./src/components/shared/utils/referencelinecalculator.ts","./src/components/shared/utils/segmentoperations.ts","./src/components/shared/utils/wordutils.ts","./src/hooks/usemanualsync.ts","./src/types/global.d.ts"],"version":"5.6.3"}
@@ -47,14 +47,14 @@ class BaseLyricsProvider(ABC):
47
47
  converted_cache_path = self._get_cache_path(cache_key, "converted")
48
48
  converted_data = self._load_from_cache(converted_cache_path)
49
49
  if converted_data:
50
- self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
50
+ self.logger.info(f"Using cached converted lyrics for {artist} - {title} from file: {converted_cache_path}")
51
51
  return LyricsData.from_dict(converted_data)
52
52
 
53
53
  # Check raw cache next
54
54
  raw_cache_path = self._get_cache_path(cache_key, "raw")
55
55
  raw_data = self._load_from_cache(raw_cache_path)
56
56
  if raw_data:
57
- self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
57
+ self.logger.info(f"Using cached raw lyrics for {artist} - {title} from file: {raw_cache_path}")
58
58
  converted_result = self._convert_result_format(raw_data)
59
59
  self._save_to_cache(converted_cache_path, converted_result.to_dict())
60
60
  return converted_result
@@ -0,0 +1,44 @@
1
+ from typing import Optional, Dict, Any
2
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
3
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata
4
+
5
+
6
+ class UserInputProvider(BaseLyricsProvider):
7
+ """Provider for manually input lyrics text."""
8
+
9
+ def __init__(self, lyrics_text: str, source_name: str, metadata: Dict[str, Any], *args, **kwargs):
10
+ """Initialize with the user's input text."""
11
+ super().__init__(LyricsProviderConfig(), *args, **kwargs)
12
+ self.lyrics_text = lyrics_text
13
+ self.source_name = source_name
14
+ self.input_metadata = metadata
15
+
16
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
17
+ """Return the user's input text as raw data."""
18
+ return {"text": self.lyrics_text, "metadata": self.input_metadata}
19
+
20
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
21
+ """Convert the raw text into LyricsData format."""
22
+ # Create segments with words from the text
23
+ segments = self._create_segments_with_words(raw_data["text"])
24
+
25
+ # Create metadata
26
+ metadata = LyricsMetadata(
27
+ source=self.source_name,
28
+ track_name=raw_data["metadata"].get("title", ""),
29
+ artist_names=raw_data["metadata"].get("artist", ""),
30
+ is_synced=False,
31
+ lyrics_provider="manual",
32
+ lyrics_provider_id="",
33
+ album_name=None,
34
+ duration_ms=None,
35
+ explicit=None,
36
+ language=None,
37
+ provider_metadata={},
38
+ )
39
+
40
+ return LyricsData(segments=segments, metadata=metadata, source=self.source_name)
41
+
42
+ def get_name(self) -> str:
43
+ """Return the provider name."""
44
+ return "UserInput"
@@ -37,6 +37,7 @@ class OutputGenerator:
37
37
  self,
38
38
  config: OutputConfig,
39
39
  logger: Optional[logging.Logger] = None,
40
+ preview_mode: bool = False,
40
41
  ):
41
42
  """
42
43
  Initialize OutputGenerator with configuration.
@@ -44,20 +45,12 @@ class OutputGenerator:
44
45
  Args:
45
46
  config: OutputConfig instance with required paths and settings
46
47
  logger: Optional logger instance
48
+ preview_mode: Boolean indicating if the generator is in preview mode
47
49
  """
48
50
  self.config = config
49
51
  self.logger = logger or logging.getLogger(__name__)
50
52
 
51
- self.logger.debug(f"Initializing OutputGenerator with config: {self.config}")
52
-
53
- # Set video resolution parameters
54
- self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
55
-
56
- self.segment_resizer = SegmentResizer(max_line_length=self.config.max_line_length, logger=self.logger)
57
-
58
- # Initialize generators
59
- self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
60
- self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
53
+ self.logger.info(f"Initializing OutputGenerator with config: {self.config}")
61
54
 
62
55
  if self.config.render_video or self.config.generate_cdg:
63
56
  # Load output styles from JSON
@@ -68,10 +61,46 @@ class OutputGenerator:
68
61
  except Exception as e:
69
62
  raise ValueError(f"Failed to load output styles file: {str(e)}")
70
63
 
64
+ # Set video resolution parameters
65
+ self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
66
+ self.logger.info(f"Video resolution: {self.video_resolution_num}, font size: {self.font_size}, line height: {self.line_height}")
67
+
68
+ self.segment_resizer = SegmentResizer(max_line_length=self.config.max_line_length, logger=self.logger)
69
+
70
+ # Initialize generators
71
+ self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
72
+ self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
73
+
71
74
  if self.config.generate_cdg:
72
75
  self.cdg = CDGGenerator(self.config.output_dir, self.logger)
73
76
 
77
+ self.preview_mode = preview_mode
74
78
  if self.config.render_video:
79
+ # Apply preview mode scaling if needed
80
+ if self.preview_mode:
81
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
82
+ scale_factor = 1 / 6
83
+
84
+ # Scale down top padding for preview if it exists
85
+ if "karaoke" in self.config.styles and "top_padding" in self.config.styles["karaoke"]:
86
+ self.logger.info(f"Preview mode: Found top_padding: {self.config.styles['karaoke']['top_padding']}")
87
+ original_padding = self.config.styles["karaoke"]["top_padding"]
88
+ if original_padding is not None:
89
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
90
+ self.config.styles["karaoke"]["top_padding"] = original_padding * scale_factor
91
+ self.logger.info(f"Preview mode: Scaled down top_padding to: {self.config.styles['karaoke']['top_padding']}")
92
+
93
+ # Scale down font size for preview if it exists
94
+ if "karaoke" in self.config.styles and "font_size" in self.config.styles["karaoke"]:
95
+ self.logger.info(f"Preview mode: Found font_size: {self.config.styles['karaoke']['font_size']}")
96
+ original_font_size = self.config.styles["karaoke"]["font_size"]
97
+ if original_font_size is not None:
98
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
99
+ self.font_size = original_font_size * scale_factor
100
+ self.config.styles["karaoke"]["font_size"] = self.font_size
101
+ self.logger.info(f"Preview mode: Scaled down font_size to: {self.font_size}")
102
+
103
+ # Initialize subtitle generator with potentially scaled values
75
104
  self.subtitle = SubtitlesGenerator(
76
105
  output_dir=self.config.output_dir,
77
106
  video_resolution=self.video_resolution_num,
@@ -102,7 +131,6 @@ class OutputGenerator:
102
131
  audio_filepath: str,
103
132
  artist: Optional[str] = None,
104
133
  title: Optional[str] = None,
105
- preview_mode: bool = False,
106
134
  ) -> OutputPaths:
107
135
  """Generate all requested output formats."""
108
136
  outputs = OutputPaths()
@@ -116,7 +144,7 @@ class OutputGenerator:
116
144
  transcription_corrected.resized_segments = resized_segments
117
145
 
118
146
  # For preview, we only need to generate ASS and video
119
- if preview_mode:
147
+ if self.preview_mode:
120
148
  # Generate ASS subtitles for preview
121
149
  outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
122
150
 
@@ -2,8 +2,8 @@ import logging
2
2
  import socket
3
3
  from fastapi import FastAPI, Body, HTTPException
4
4
  from fastapi.middleware.cors import CORSMiddleware
5
- from typing import Dict, Any, List
6
- from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment
5
+ from typing import Dict, Any, List, Optional
6
+ from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment, LyricsData, LyricsMetadata, Word
7
7
  import time
8
8
  import os
9
9
  import urllib.parse
@@ -18,6 +18,7 @@ from lyrics_transcriber.output.generator import OutputGenerator
18
18
  import json
19
19
  from lyrics_transcriber.correction.corrector import LyricsCorrector
20
20
  from lyrics_transcriber.types import TranscriptionResult, TranscriptionData
21
+ from lyrics_transcriber.lyrics.user_input_provider import UserInputProvider
21
22
 
22
23
 
23
24
  class ReviewServer:
@@ -73,6 +74,7 @@ class ReviewServer:
73
74
  self.app.add_api_route("/api/audio/{audio_hash}", self.get_audio, methods=["GET"])
74
75
  self.app.add_api_route("/api/ping", self.ping, methods=["GET"])
75
76
  self.app.add_api_route("/api/handlers", self.update_handlers, methods=["POST"])
77
+ self.app.add_api_route("/api/add-lyrics", self.add_lyrics, methods=["POST"])
76
78
 
77
79
  async def get_correction_data(self):
78
80
  """Get the correction data."""
@@ -83,8 +85,8 @@ class ReviewServer:
83
85
  return CorrectionResult(
84
86
  corrections=[
85
87
  WordCorrection(
86
- original_word=c.get("original_word", ""),
87
- corrected_word=c.get("corrected_word", ""),
88
+ original_word=c.get("original_word", "").strip(),
89
+ corrected_word=c.get("corrected_word", "").strip(),
88
90
  original_position=c.get("original_position", 0),
89
91
  source=c.get("source", "review"),
90
92
  reason=c.get("reason", "manual_review"),
@@ -103,7 +105,26 @@ class ReviewServer:
103
105
  )
104
106
  for c in updated_data["corrections"]
105
107
  ],
106
- corrected_segments=[LyricsSegment.from_dict(s) for s in updated_data["corrected_segments"]],
108
+ corrected_segments=[
109
+ LyricsSegment(
110
+ id=s["id"],
111
+ text=s["text"].strip(),
112
+ words=[
113
+ Word(
114
+ id=w["id"],
115
+ text=w["text"].strip(),
116
+ start_time=w["start_time"],
117
+ end_time=w["end_time"],
118
+ confidence=w.get("confidence"),
119
+ created_during_correction=w.get("created_during_correction", False),
120
+ )
121
+ for w in s["words"]
122
+ ],
123
+ start_time=s["start_time"],
124
+ end_time=s["end_time"],
125
+ )
126
+ for s in updated_data["corrected_segments"]
127
+ ],
107
128
  # Copy existing fields from the base result
108
129
  original_segments=base_result.original_segments,
109
130
  corrections_made=len(updated_data["corrections"]),
@@ -166,7 +187,7 @@ class ReviewServer:
166
187
  styles=self.output_config.styles,
167
188
  max_line_length=self.output_config.max_line_length,
168
189
  )
169
- output_generator = OutputGenerator(config=preview_config, logger=self.logger)
190
+ output_generator = OutputGenerator(config=preview_config, logger=self.logger, preview_mode=True)
170
191
 
171
192
  # Generate preview outputs with unique prefix
172
193
  preview_outputs = output_generator.generate_outputs(
@@ -174,7 +195,6 @@ class ReviewServer:
174
195
  lyrics_results={}, # Empty dict since we don't need lyrics results for preview
175
196
  output_prefix=f"preview_{preview_hash}", # Include hash in filename
176
197
  audio_filepath=self.audio_filepath,
177
- preview_mode=True,
178
198
  )
179
199
 
180
200
  if not preview_outputs.video:
@@ -238,13 +258,58 @@ class ReviewServer:
238
258
  source="original",
239
259
  )
240
260
 
241
- # Run correction
261
+ # Get currently enabled handlers from metadata
262
+ enabled_handlers = None
263
+ if self.correction_result.metadata:
264
+ if "enabled_handlers" in self.correction_result.metadata:
265
+ enabled_handlers = self.correction_result.metadata["enabled_handlers"]
266
+ self.logger.info(f"Found existing enabled handlers in metadata: {enabled_handlers}")
267
+ elif "available_handlers" in self.correction_result.metadata:
268
+ # If no enabled_handlers but we have available_handlers, enable all default handlers
269
+ enabled_handlers = [
270
+ handler["id"] for handler in self.correction_result.metadata["available_handlers"] if handler.get("enabled", True)
271
+ ]
272
+ self.logger.info(f"No enabled handlers found in metadata, using default enabled handlers: {enabled_handlers}")
273
+ else:
274
+ self.logger.warning("No handler configuration found in metadata")
275
+
276
+ # Log reference sources before correction
277
+ for source, lyrics in self.correction_result.reference_lyrics.items():
278
+ word_count = sum(len(s.words) for s in lyrics.segments)
279
+ self.logger.info(f"Reference source '{source}': {word_count} words in {len(lyrics.segments)} segments")
280
+
281
+ # Rerun correction with updated reference lyrics
282
+ self.logger.info("Initializing LyricsCorrector for re-correction")
283
+ self.logger.info(f"Passing enabled handlers to corrector: {enabled_handlers or '[]'}")
284
+ corrector = LyricsCorrector(
285
+ cache_dir=self.output_config.cache_dir,
286
+ enabled_handlers=enabled_handlers, # Pass the preserved handlers or None to use defaults
287
+ logger=self.logger,
288
+ )
289
+
290
+ self.logger.info(f"Active correction handlers: {[h.__class__.__name__ for h in corrector.handlers]}")
291
+ self.logger.info("Running correction with updated reference lyrics")
242
292
  self.correction_result = corrector.run(
243
293
  transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
244
294
  lyrics_results=self.correction_result.reference_lyrics,
245
295
  metadata=self.correction_result.metadata,
246
296
  )
247
297
 
298
+ # Update metadata with the new handler state from corrector
299
+ if not self.correction_result.metadata:
300
+ self.correction_result.metadata = {}
301
+ self.correction_result.metadata.update(
302
+ {
303
+ "available_handlers": corrector.all_handlers,
304
+ "enabled_handlers": [getattr(handler, "name", handler.__class__.__name__) for handler in corrector.handlers],
305
+ }
306
+ )
307
+
308
+ self.logger.info("Correction process completed")
309
+ self.logger.info(
310
+ f"Updated metadata with {len(corrector.handlers)} enabled handlers: {self.correction_result.metadata['enabled_handlers']}"
311
+ )
312
+
248
313
  # Restore audio hash
249
314
  if audio_hash:
250
315
  if not self.correction_result.metadata:
@@ -256,6 +321,171 @@ class ReviewServer:
256
321
  self.logger.error(f"Failed to update handlers: {str(e)}")
257
322
  raise HTTPException(status_code=500, detail=str(e))
258
323
 
324
+ def _create_lyrics_data_from_text(self, text: str, source: str) -> LyricsData:
325
+ """Create LyricsData object from plain text lyrics."""
326
+ self.logger.info(f"Creating LyricsData for source '{source}'")
327
+
328
+ # Split text into lines and create segments
329
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
330
+ self.logger.info(f"Found {len(lines)} non-empty lines in input text")
331
+
332
+ segments = []
333
+ for i, line in enumerate(lines):
334
+ # Split line into words
335
+ word_texts = line.strip().split()
336
+ words = []
337
+
338
+ for j, word_text in enumerate(word_texts):
339
+ word = Word(
340
+ id=f"manual_{source}_word_{i}_{j}", # Create unique ID for each word
341
+ text=word_text,
342
+ start_time=0.0, # Placeholder timing
343
+ end_time=0.0,
344
+ confidence=1.0, # Reference lyrics are considered ground truth
345
+ created_during_correction=False,
346
+ )
347
+ words.append(word)
348
+
349
+ segments.append(
350
+ LyricsSegment(
351
+ id=f"manual_{source}_{i}",
352
+ text=line,
353
+ words=words, # Now including the word objects
354
+ start_time=0.0, # Placeholder timing
355
+ end_time=0.0,
356
+ )
357
+ )
358
+
359
+ # Create metadata
360
+ self.logger.info("Creating metadata for LyricsData")
361
+ metadata = LyricsMetadata(
362
+ source=source,
363
+ track_name=self.correction_result.metadata.get("title", "") or "",
364
+ artist_names=self.correction_result.metadata.get("artist", "") or "",
365
+ is_synced=False,
366
+ lyrics_provider="manual",
367
+ lyrics_provider_id="",
368
+ album_name=None,
369
+ duration_ms=None,
370
+ explicit=None,
371
+ language=None,
372
+ provider_metadata={},
373
+ )
374
+ self.logger.info(f"Created metadata: {metadata}")
375
+
376
+ lyrics_data = LyricsData(segments=segments, metadata=metadata, source=source)
377
+ self.logger.info(f"Created LyricsData with {len(segments)} segments and {sum(len(s.words) for s in segments)} total words")
378
+
379
+ return lyrics_data
380
+
381
+ async def add_lyrics(self, data: Dict[str, str] = Body(...)):
382
+ """Add new lyrics source and rerun correction."""
383
+ try:
384
+ # Store existing audio hash
385
+ audio_hash = self.correction_result.metadata.get("audio_hash") if self.correction_result.metadata else None
386
+
387
+ source = data.get("source", "").strip()
388
+ lyrics_text = data.get("lyrics", "").strip()
389
+
390
+ self.logger.info(f"Received request to add lyrics source '{source}' with {len(lyrics_text)} characters")
391
+
392
+ if not source or not lyrics_text:
393
+ self.logger.warning("Invalid request: missing source or lyrics text")
394
+ raise HTTPException(status_code=400, detail="Source name and lyrics text are required")
395
+
396
+ # Validate source name isn't already used
397
+ if source in self.correction_result.reference_lyrics:
398
+ self.logger.warning(f"Source name '{source}' is already in use")
399
+ raise HTTPException(status_code=400, detail=f"Source name '{source}' is already in use")
400
+
401
+ # Create lyrics data using the provider
402
+ self.logger.info("Creating LyricsData using UserInputProvider")
403
+ provider = UserInputProvider(
404
+ lyrics_text=lyrics_text, source_name=source, metadata=self.correction_result.metadata or {}, logger=self.logger
405
+ )
406
+ lyrics_data = provider._convert_result_format({"text": lyrics_text, "metadata": self.correction_result.metadata or {}})
407
+ self.logger.info(f"Created LyricsData with {len(lyrics_data.segments)} segments")
408
+
409
+ # Add to reference lyrics
410
+ self.logger.info(f"Adding new source '{source}' to reference_lyrics")
411
+ self.correction_result.reference_lyrics[source] = lyrics_data
412
+ self.logger.info(f"Now have {len(self.correction_result.reference_lyrics)} total reference sources")
413
+
414
+ # Create TranscriptionData from original segments
415
+ self.logger.info("Creating TranscriptionData from original segments")
416
+ transcription_data = TranscriptionData(
417
+ segments=self.correction_result.original_segments,
418
+ words=[word for segment in self.correction_result.original_segments for word in segment.words],
419
+ text="\n".join(segment.text for segment in self.correction_result.original_segments),
420
+ source="original",
421
+ )
422
+
423
+ # Get currently enabled handlers from metadata
424
+ enabled_handlers = None
425
+ if self.correction_result.metadata:
426
+ if "enabled_handlers" in self.correction_result.metadata:
427
+ enabled_handlers = self.correction_result.metadata["enabled_handlers"]
428
+ self.logger.info(f"Found existing enabled handlers in metadata: {enabled_handlers}")
429
+ elif "available_handlers" in self.correction_result.metadata:
430
+ # If no enabled_handlers but we have available_handlers, enable all default handlers
431
+ enabled_handlers = [
432
+ handler["id"] for handler in self.correction_result.metadata["available_handlers"] if handler.get("enabled", True)
433
+ ]
434
+ self.logger.info(f"No enabled handlers found in metadata, using default enabled handlers: {enabled_handlers}")
435
+ else:
436
+ self.logger.warning("No handler configuration found in metadata")
437
+
438
+ # Log reference sources before correction
439
+ for source, lyrics in self.correction_result.reference_lyrics.items():
440
+ word_count = sum(len(s.words) for s in lyrics.segments)
441
+ self.logger.info(f"Reference source '{source}': {word_count} words in {len(lyrics.segments)} segments")
442
+
443
+ # Rerun correction with updated reference lyrics
444
+ self.logger.info("Initializing LyricsCorrector for re-correction")
445
+ self.logger.info(f"Passing enabled handlers to corrector: {enabled_handlers or '[]'}")
446
+ corrector = LyricsCorrector(
447
+ cache_dir=self.output_config.cache_dir,
448
+ enabled_handlers=enabled_handlers, # Pass the preserved handlers or None to use defaults
449
+ logger=self.logger,
450
+ )
451
+
452
+ self.logger.info(f"Active correction handlers: {[h.__class__.__name__ for h in corrector.handlers]}")
453
+ self.logger.info("Running correction with updated reference lyrics")
454
+ self.correction_result = corrector.run(
455
+ transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
456
+ lyrics_results=self.correction_result.reference_lyrics,
457
+ metadata=self.correction_result.metadata,
458
+ )
459
+
460
+ # Update metadata with the new handler state from corrector
461
+ if not self.correction_result.metadata:
462
+ self.correction_result.metadata = {}
463
+ self.correction_result.metadata.update(
464
+ {
465
+ "available_handlers": corrector.all_handlers,
466
+ "enabled_handlers": [getattr(handler, "name", handler.__class__.__name__) for handler in corrector.handlers],
467
+ }
468
+ )
469
+
470
+ # Restore audio hash
471
+ if audio_hash:
472
+ if not self.correction_result.metadata:
473
+ self.correction_result.metadata = {}
474
+ self.correction_result.metadata["audio_hash"] = audio_hash
475
+
476
+ self.logger.info("Correction process completed")
477
+ self.logger.info(
478
+ f"Updated metadata with {len(corrector.handlers)} enabled handlers: {self.correction_result.metadata['enabled_handlers']}"
479
+ )
480
+
481
+ return {"status": "success", "data": self.correction_result.to_dict()}
482
+
483
+ except HTTPException:
484
+ raise
485
+ except Exception as e:
486
+ self.logger.error(f"Failed to add lyrics: {str(e)}", exc_info=True)
487
+ raise HTTPException(status_code=500, detail=str(e))
488
+
259
489
  def start(self) -> CorrectionResult:
260
490
  """Start the review server and wait for completion."""
261
491
  # Generate audio hash if audio file exists