lyrics-transcriber 0.43.1__py3-none-any.whl → 0.44.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +58 -24
- lyrics_transcriber/correction/anchor_sequence.py +22 -8
- lyrics_transcriber/correction/corrector.py +47 -3
- lyrics_transcriber/correction/handlers/llm.py +15 -12
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/dist/assets/{index-D0Gr3Ep7.js → index-DVoI6Z16.js} +10799 -7490
- lyrics_transcriber/frontend/dist/assets/index-DVoI6Z16.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/src/App.tsx +4 -4
- lyrics_transcriber/frontend/src/api.ts +37 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -10
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +62 -56
- lyrics_transcriber/frontend/src/components/EditModal.tsx +232 -237
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/GlobalSyncEditor.tsx +675 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +141 -101
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +146 -80
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +22 -13
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +1 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +29 -12
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +21 -4
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +29 -15
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +34 -16
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +186 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +89 -41
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +9 -2
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +28 -3
- lyrics_transcriber/frontend/src/components/shared/types.ts +17 -2
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +63 -14
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +192 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +267 -0
- lyrics_transcriber/frontend/src/main.tsx +7 -1
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types.ts +1 -1
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/lyrics/base_lyrics_provider.py +2 -2
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/generator.py +40 -12
- lyrics_transcriber/review/server.py +238 -8
- {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/METADATA +3 -2
- {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/RECORD +46 -40
- lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +0 -1
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +0 -252
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +0 -110
- {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/WHEEL +0 -0
- {lyrics_transcriber-0.43.1.dist-info → lyrics_transcriber-0.44.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,177 @@
|
|
1
|
+
import { createTheme } from '@mui/material/styles';
|
2
|
+
|
3
|
+
// Create a theme with smaller typography and spacing
|
4
|
+
const theme = createTheme({
|
5
|
+
typography: {
|
6
|
+
// Scale down all typography by about 20%
|
7
|
+
fontSize: 14, // Default is 16
|
8
|
+
h1: {
|
9
|
+
fontSize: '2.5rem', // Default is ~3rem
|
10
|
+
},
|
11
|
+
h2: {
|
12
|
+
fontSize: '2rem', // Default is ~2.5rem
|
13
|
+
},
|
14
|
+
h3: {
|
15
|
+
fontSize: '1.5rem', // Default is ~1.75rem
|
16
|
+
},
|
17
|
+
h4: {
|
18
|
+
fontSize: '1.2rem', // Default is ~1.5rem
|
19
|
+
marginBottom: '0.5rem',
|
20
|
+
},
|
21
|
+
h5: {
|
22
|
+
fontSize: '1rem', // Default is ~1.25rem
|
23
|
+
},
|
24
|
+
h6: {
|
25
|
+
fontSize: '0.9rem', // Default is ~1.1rem
|
26
|
+
marginBottom: '0.5rem',
|
27
|
+
},
|
28
|
+
body1: {
|
29
|
+
fontSize: '0.85rem', // Default is ~1rem
|
30
|
+
},
|
31
|
+
body2: {
|
32
|
+
fontSize: '0.75rem', // Default is ~0.875rem
|
33
|
+
},
|
34
|
+
button: {
|
35
|
+
fontSize: '0.8rem', // Default is ~0.875rem
|
36
|
+
},
|
37
|
+
caption: {
|
38
|
+
fontSize: '0.7rem', // Default is ~0.75rem
|
39
|
+
},
|
40
|
+
},
|
41
|
+
components: {
|
42
|
+
MuiButton: {
|
43
|
+
styleOverrides: {
|
44
|
+
root: {
|
45
|
+
padding: '3px 10px', // Further reduced from 4px 12px
|
46
|
+
minHeight: '30px', // Further reduced from 32px
|
47
|
+
},
|
48
|
+
sizeSmall: {
|
49
|
+
padding: '1px 6px', // Further reduced from 2px 8px
|
50
|
+
minHeight: '24px', // Further reduced from 28px
|
51
|
+
},
|
52
|
+
},
|
53
|
+
},
|
54
|
+
MuiIconButton: {
|
55
|
+
styleOverrides: {
|
56
|
+
root: {
|
57
|
+
padding: '4px', // Further reduced from 6px
|
58
|
+
},
|
59
|
+
sizeSmall: {
|
60
|
+
padding: '2px', // Further reduced from 4px
|
61
|
+
},
|
62
|
+
},
|
63
|
+
},
|
64
|
+
MuiTextField: {
|
65
|
+
styleOverrides: {
|
66
|
+
root: {
|
67
|
+
'& .MuiInputBase-root': {
|
68
|
+
minHeight: '32px', // Further reduced from 36px
|
69
|
+
},
|
70
|
+
},
|
71
|
+
},
|
72
|
+
},
|
73
|
+
MuiDialog: {
|
74
|
+
styleOverrides: {
|
75
|
+
paper: {
|
76
|
+
padding: '8px', // Further reduced from 12px
|
77
|
+
},
|
78
|
+
},
|
79
|
+
},
|
80
|
+
MuiDialogTitle: {
|
81
|
+
styleOverrides: {
|
82
|
+
root: {
|
83
|
+
padding: '8px 12px', // Further reduced from 12px 16px
|
84
|
+
},
|
85
|
+
},
|
86
|
+
},
|
87
|
+
MuiDialogContent: {
|
88
|
+
styleOverrides: {
|
89
|
+
root: {
|
90
|
+
padding: '6px 12px', // Further reduced from 8px 16px
|
91
|
+
},
|
92
|
+
},
|
93
|
+
},
|
94
|
+
MuiDialogActions: {
|
95
|
+
styleOverrides: {
|
96
|
+
root: {
|
97
|
+
padding: '6px 12px', // Further reduced from 8px 16px
|
98
|
+
},
|
99
|
+
},
|
100
|
+
},
|
101
|
+
MuiPaper: {
|
102
|
+
styleOverrides: {
|
103
|
+
root: {
|
104
|
+
padding: '8px', // Further reduced from 12px
|
105
|
+
},
|
106
|
+
},
|
107
|
+
},
|
108
|
+
MuiList: {
|
109
|
+
styleOverrides: {
|
110
|
+
root: {
|
111
|
+
padding: '2px 0', // Further reduced from 4px 0
|
112
|
+
},
|
113
|
+
},
|
114
|
+
},
|
115
|
+
MuiListItem: {
|
116
|
+
styleOverrides: {
|
117
|
+
root: {
|
118
|
+
padding: '2px 8px', // Further reduced from 4px 12px
|
119
|
+
},
|
120
|
+
},
|
121
|
+
},
|
122
|
+
MuiTableCell: {
|
123
|
+
styleOverrides: {
|
124
|
+
root: {
|
125
|
+
padding: '4px 8px', // Further reduced from 8px 12px
|
126
|
+
},
|
127
|
+
},
|
128
|
+
},
|
129
|
+
MuiCard: {
|
130
|
+
styleOverrides: {
|
131
|
+
root: {
|
132
|
+
padding: '8px',
|
133
|
+
},
|
134
|
+
},
|
135
|
+
},
|
136
|
+
MuiCardContent: {
|
137
|
+
styleOverrides: {
|
138
|
+
root: {
|
139
|
+
padding: '8px',
|
140
|
+
'&:last-child': {
|
141
|
+
paddingBottom: '8px',
|
142
|
+
},
|
143
|
+
},
|
144
|
+
},
|
145
|
+
},
|
146
|
+
MuiCardHeader: {
|
147
|
+
styleOverrides: {
|
148
|
+
root: {
|
149
|
+
padding: '8px',
|
150
|
+
},
|
151
|
+
},
|
152
|
+
},
|
153
|
+
MuiCardActions: {
|
154
|
+
styleOverrides: {
|
155
|
+
root: {
|
156
|
+
padding: '4px 8px',
|
157
|
+
},
|
158
|
+
},
|
159
|
+
},
|
160
|
+
MuiGrid: {
|
161
|
+
styleOverrides: {
|
162
|
+
container: {
|
163
|
+
marginTop: '-4px',
|
164
|
+
marginLeft: '-4px',
|
165
|
+
width: 'calc(100% + 8px)',
|
166
|
+
},
|
167
|
+
item: {
|
168
|
+
paddingTop: '4px',
|
169
|
+
paddingLeft: '4px',
|
170
|
+
},
|
171
|
+
},
|
172
|
+
},
|
173
|
+
},
|
174
|
+
spacing: (factor: number) => `${0.6 * factor}rem`, // Further reduced from 0.8 * factor
|
175
|
+
});
|
176
|
+
|
177
|
+
export default theme;
|
@@ -1 +1 @@
|
|
1
|
-
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/validation.ts","./src/vite-env.d.ts","./src/components/
|
1
|
+
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/theme.ts","./src/types.ts","./src/validation.ts","./src/vite-env.d.ts","./src/components/addlyricsmodal.tsx","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/findreplacemodal.tsx","./src/components/globalsynceditor.tsx","./src/components/header.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/previewvideosection.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/worddivider.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/keyboardhandlers.ts","./src/components/shared/utils/localstorage.ts","./src/components/shared/utils/referencelinecalculator.ts","./src/components/shared/utils/segmentoperations.ts","./src/components/shared/utils/wordutils.ts","./src/hooks/usemanualsync.ts","./src/types/global.d.ts"],"version":"5.6.3"}
|
@@ -47,14 +47,14 @@ class BaseLyricsProvider(ABC):
|
|
47
47
|
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
48
48
|
converted_data = self._load_from_cache(converted_cache_path)
|
49
49
|
if converted_data:
|
50
|
-
self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
|
50
|
+
self.logger.info(f"Using cached converted lyrics for {artist} - {title} from file: {converted_cache_path}")
|
51
51
|
return LyricsData.from_dict(converted_data)
|
52
52
|
|
53
53
|
# Check raw cache next
|
54
54
|
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
55
55
|
raw_data = self._load_from_cache(raw_cache_path)
|
56
56
|
if raw_data:
|
57
|
-
self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
|
57
|
+
self.logger.info(f"Using cached raw lyrics for {artist} - {title} from file: {raw_cache_path}")
|
58
58
|
converted_result = self._convert_result_format(raw_data)
|
59
59
|
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
60
60
|
return converted_result
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from typing import Optional, Dict, Any
|
2
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
3
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata
|
4
|
+
|
5
|
+
|
6
|
+
class UserInputProvider(BaseLyricsProvider):
|
7
|
+
"""Provider for manually input lyrics text."""
|
8
|
+
|
9
|
+
def __init__(self, lyrics_text: str, source_name: str, metadata: Dict[str, Any], *args, **kwargs):
|
10
|
+
"""Initialize with the user's input text."""
|
11
|
+
super().__init__(LyricsProviderConfig(), *args, **kwargs)
|
12
|
+
self.lyrics_text = lyrics_text
|
13
|
+
self.source_name = source_name
|
14
|
+
self.input_metadata = metadata
|
15
|
+
|
16
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
17
|
+
"""Return the user's input text as raw data."""
|
18
|
+
return {"text": self.lyrics_text, "metadata": self.input_metadata}
|
19
|
+
|
20
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
21
|
+
"""Convert the raw text into LyricsData format."""
|
22
|
+
# Create segments with words from the text
|
23
|
+
segments = self._create_segments_with_words(raw_data["text"])
|
24
|
+
|
25
|
+
# Create metadata
|
26
|
+
metadata = LyricsMetadata(
|
27
|
+
source=self.source_name,
|
28
|
+
track_name=raw_data["metadata"].get("title", ""),
|
29
|
+
artist_names=raw_data["metadata"].get("artist", ""),
|
30
|
+
is_synced=False,
|
31
|
+
lyrics_provider="manual",
|
32
|
+
lyrics_provider_id="",
|
33
|
+
album_name=None,
|
34
|
+
duration_ms=None,
|
35
|
+
explicit=None,
|
36
|
+
language=None,
|
37
|
+
provider_metadata={},
|
38
|
+
)
|
39
|
+
|
40
|
+
return LyricsData(segments=segments, metadata=metadata, source=self.source_name)
|
41
|
+
|
42
|
+
def get_name(self) -> str:
|
43
|
+
"""Return the provider name."""
|
44
|
+
return "UserInput"
|
@@ -37,6 +37,7 @@ class OutputGenerator:
|
|
37
37
|
self,
|
38
38
|
config: OutputConfig,
|
39
39
|
logger: Optional[logging.Logger] = None,
|
40
|
+
preview_mode: bool = False,
|
40
41
|
):
|
41
42
|
"""
|
42
43
|
Initialize OutputGenerator with configuration.
|
@@ -44,20 +45,12 @@ class OutputGenerator:
|
|
44
45
|
Args:
|
45
46
|
config: OutputConfig instance with required paths and settings
|
46
47
|
logger: Optional logger instance
|
48
|
+
preview_mode: Boolean indicating if the generator is in preview mode
|
47
49
|
"""
|
48
50
|
self.config = config
|
49
51
|
self.logger = logger or logging.getLogger(__name__)
|
50
52
|
|
51
|
-
self.logger.
|
52
|
-
|
53
|
-
# Set video resolution parameters
|
54
|
-
self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
|
55
|
-
|
56
|
-
self.segment_resizer = SegmentResizer(max_line_length=self.config.max_line_length, logger=self.logger)
|
57
|
-
|
58
|
-
# Initialize generators
|
59
|
-
self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
|
60
|
-
self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
|
53
|
+
self.logger.info(f"Initializing OutputGenerator with config: {self.config}")
|
61
54
|
|
62
55
|
if self.config.render_video or self.config.generate_cdg:
|
63
56
|
# Load output styles from JSON
|
@@ -68,10 +61,46 @@ class OutputGenerator:
|
|
68
61
|
except Exception as e:
|
69
62
|
raise ValueError(f"Failed to load output styles file: {str(e)}")
|
70
63
|
|
64
|
+
# Set video resolution parameters
|
65
|
+
self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
|
66
|
+
self.logger.info(f"Video resolution: {self.video_resolution_num}, font size: {self.font_size}, line height: {self.line_height}")
|
67
|
+
|
68
|
+
self.segment_resizer = SegmentResizer(max_line_length=self.config.max_line_length, logger=self.logger)
|
69
|
+
|
70
|
+
# Initialize generators
|
71
|
+
self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
|
72
|
+
self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
|
73
|
+
|
71
74
|
if self.config.generate_cdg:
|
72
75
|
self.cdg = CDGGenerator(self.config.output_dir, self.logger)
|
73
76
|
|
77
|
+
self.preview_mode = preview_mode
|
74
78
|
if self.config.render_video:
|
79
|
+
# Apply preview mode scaling if needed
|
80
|
+
if self.preview_mode:
|
81
|
+
# Scale down from 4K (2160p) to 360p - factor of 1/6
|
82
|
+
scale_factor = 1 / 6
|
83
|
+
|
84
|
+
# Scale down top padding for preview if it exists
|
85
|
+
if "karaoke" in self.config.styles and "top_padding" in self.config.styles["karaoke"]:
|
86
|
+
self.logger.info(f"Preview mode: Found top_padding: {self.config.styles['karaoke']['top_padding']}")
|
87
|
+
original_padding = self.config.styles["karaoke"]["top_padding"]
|
88
|
+
if original_padding is not None:
|
89
|
+
# Scale down from 4K (2160p) to 360p - factor of 1/6
|
90
|
+
self.config.styles["karaoke"]["top_padding"] = original_padding * scale_factor
|
91
|
+
self.logger.info(f"Preview mode: Scaled down top_padding to: {self.config.styles['karaoke']['top_padding']}")
|
92
|
+
|
93
|
+
# Scale down font size for preview if it exists
|
94
|
+
if "karaoke" in self.config.styles and "font_size" in self.config.styles["karaoke"]:
|
95
|
+
self.logger.info(f"Preview mode: Found font_size: {self.config.styles['karaoke']['font_size']}")
|
96
|
+
original_font_size = self.config.styles["karaoke"]["font_size"]
|
97
|
+
if original_font_size is not None:
|
98
|
+
# Scale down from 4K (2160p) to 360p - factor of 1/6
|
99
|
+
self.font_size = original_font_size * scale_factor
|
100
|
+
self.config.styles["karaoke"]["font_size"] = self.font_size
|
101
|
+
self.logger.info(f"Preview mode: Scaled down font_size to: {self.font_size}")
|
102
|
+
|
103
|
+
# Initialize subtitle generator with potentially scaled values
|
75
104
|
self.subtitle = SubtitlesGenerator(
|
76
105
|
output_dir=self.config.output_dir,
|
77
106
|
video_resolution=self.video_resolution_num,
|
@@ -102,7 +131,6 @@ class OutputGenerator:
|
|
102
131
|
audio_filepath: str,
|
103
132
|
artist: Optional[str] = None,
|
104
133
|
title: Optional[str] = None,
|
105
|
-
preview_mode: bool = False,
|
106
134
|
) -> OutputPaths:
|
107
135
|
"""Generate all requested output formats."""
|
108
136
|
outputs = OutputPaths()
|
@@ -116,7 +144,7 @@ class OutputGenerator:
|
|
116
144
|
transcription_corrected.resized_segments = resized_segments
|
117
145
|
|
118
146
|
# For preview, we only need to generate ASS and video
|
119
|
-
if preview_mode:
|
147
|
+
if self.preview_mode:
|
120
148
|
# Generate ASS subtitles for preview
|
121
149
|
outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
|
122
150
|
|
@@ -2,8 +2,8 @@ import logging
|
|
2
2
|
import socket
|
3
3
|
from fastapi import FastAPI, Body, HTTPException
|
4
4
|
from fastapi.middleware.cors import CORSMiddleware
|
5
|
-
from typing import Dict, Any, List
|
6
|
-
from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment
|
5
|
+
from typing import Dict, Any, List, Optional
|
6
|
+
from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment, LyricsData, LyricsMetadata, Word
|
7
7
|
import time
|
8
8
|
import os
|
9
9
|
import urllib.parse
|
@@ -18,6 +18,7 @@ from lyrics_transcriber.output.generator import OutputGenerator
|
|
18
18
|
import json
|
19
19
|
from lyrics_transcriber.correction.corrector import LyricsCorrector
|
20
20
|
from lyrics_transcriber.types import TranscriptionResult, TranscriptionData
|
21
|
+
from lyrics_transcriber.lyrics.user_input_provider import UserInputProvider
|
21
22
|
|
22
23
|
|
23
24
|
class ReviewServer:
|
@@ -73,6 +74,7 @@ class ReviewServer:
|
|
73
74
|
self.app.add_api_route("/api/audio/{audio_hash}", self.get_audio, methods=["GET"])
|
74
75
|
self.app.add_api_route("/api/ping", self.ping, methods=["GET"])
|
75
76
|
self.app.add_api_route("/api/handlers", self.update_handlers, methods=["POST"])
|
77
|
+
self.app.add_api_route("/api/add-lyrics", self.add_lyrics, methods=["POST"])
|
76
78
|
|
77
79
|
async def get_correction_data(self):
|
78
80
|
"""Get the correction data."""
|
@@ -83,8 +85,8 @@ class ReviewServer:
|
|
83
85
|
return CorrectionResult(
|
84
86
|
corrections=[
|
85
87
|
WordCorrection(
|
86
|
-
original_word=c.get("original_word", ""),
|
87
|
-
corrected_word=c.get("corrected_word", ""),
|
88
|
+
original_word=c.get("original_word", "").strip(),
|
89
|
+
corrected_word=c.get("corrected_word", "").strip(),
|
88
90
|
original_position=c.get("original_position", 0),
|
89
91
|
source=c.get("source", "review"),
|
90
92
|
reason=c.get("reason", "manual_review"),
|
@@ -103,7 +105,26 @@ class ReviewServer:
|
|
103
105
|
)
|
104
106
|
for c in updated_data["corrections"]
|
105
107
|
],
|
106
|
-
corrected_segments=[
|
108
|
+
corrected_segments=[
|
109
|
+
LyricsSegment(
|
110
|
+
id=s["id"],
|
111
|
+
text=s["text"].strip(),
|
112
|
+
words=[
|
113
|
+
Word(
|
114
|
+
id=w["id"],
|
115
|
+
text=w["text"].strip(),
|
116
|
+
start_time=w["start_time"],
|
117
|
+
end_time=w["end_time"],
|
118
|
+
confidence=w.get("confidence"),
|
119
|
+
created_during_correction=w.get("created_during_correction", False),
|
120
|
+
)
|
121
|
+
for w in s["words"]
|
122
|
+
],
|
123
|
+
start_time=s["start_time"],
|
124
|
+
end_time=s["end_time"],
|
125
|
+
)
|
126
|
+
for s in updated_data["corrected_segments"]
|
127
|
+
],
|
107
128
|
# Copy existing fields from the base result
|
108
129
|
original_segments=base_result.original_segments,
|
109
130
|
corrections_made=len(updated_data["corrections"]),
|
@@ -166,7 +187,7 @@ class ReviewServer:
|
|
166
187
|
styles=self.output_config.styles,
|
167
188
|
max_line_length=self.output_config.max_line_length,
|
168
189
|
)
|
169
|
-
output_generator = OutputGenerator(config=preview_config, logger=self.logger)
|
190
|
+
output_generator = OutputGenerator(config=preview_config, logger=self.logger, preview_mode=True)
|
170
191
|
|
171
192
|
# Generate preview outputs with unique prefix
|
172
193
|
preview_outputs = output_generator.generate_outputs(
|
@@ -174,7 +195,6 @@ class ReviewServer:
|
|
174
195
|
lyrics_results={}, # Empty dict since we don't need lyrics results for preview
|
175
196
|
output_prefix=f"preview_{preview_hash}", # Include hash in filename
|
176
197
|
audio_filepath=self.audio_filepath,
|
177
|
-
preview_mode=True,
|
178
198
|
)
|
179
199
|
|
180
200
|
if not preview_outputs.video:
|
@@ -238,13 +258,58 @@ class ReviewServer:
|
|
238
258
|
source="original",
|
239
259
|
)
|
240
260
|
|
241
|
-
#
|
261
|
+
# Get currently enabled handlers from metadata
|
262
|
+
enabled_handlers = None
|
263
|
+
if self.correction_result.metadata:
|
264
|
+
if "enabled_handlers" in self.correction_result.metadata:
|
265
|
+
enabled_handlers = self.correction_result.metadata["enabled_handlers"]
|
266
|
+
self.logger.info(f"Found existing enabled handlers in metadata: {enabled_handlers}")
|
267
|
+
elif "available_handlers" in self.correction_result.metadata:
|
268
|
+
# If no enabled_handlers but we have available_handlers, enable all default handlers
|
269
|
+
enabled_handlers = [
|
270
|
+
handler["id"] for handler in self.correction_result.metadata["available_handlers"] if handler.get("enabled", True)
|
271
|
+
]
|
272
|
+
self.logger.info(f"No enabled handlers found in metadata, using default enabled handlers: {enabled_handlers}")
|
273
|
+
else:
|
274
|
+
self.logger.warning("No handler configuration found in metadata")
|
275
|
+
|
276
|
+
# Log reference sources before correction
|
277
|
+
for source, lyrics in self.correction_result.reference_lyrics.items():
|
278
|
+
word_count = sum(len(s.words) for s in lyrics.segments)
|
279
|
+
self.logger.info(f"Reference source '{source}': {word_count} words in {len(lyrics.segments)} segments")
|
280
|
+
|
281
|
+
# Rerun correction with updated reference lyrics
|
282
|
+
self.logger.info("Initializing LyricsCorrector for re-correction")
|
283
|
+
self.logger.info(f"Passing enabled handlers to corrector: {enabled_handlers or '[]'}")
|
284
|
+
corrector = LyricsCorrector(
|
285
|
+
cache_dir=self.output_config.cache_dir,
|
286
|
+
enabled_handlers=enabled_handlers, # Pass the preserved handlers or None to use defaults
|
287
|
+
logger=self.logger,
|
288
|
+
)
|
289
|
+
|
290
|
+
self.logger.info(f"Active correction handlers: {[h.__class__.__name__ for h in corrector.handlers]}")
|
291
|
+
self.logger.info("Running correction with updated reference lyrics")
|
242
292
|
self.correction_result = corrector.run(
|
243
293
|
transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
|
244
294
|
lyrics_results=self.correction_result.reference_lyrics,
|
245
295
|
metadata=self.correction_result.metadata,
|
246
296
|
)
|
247
297
|
|
298
|
+
# Update metadata with the new handler state from corrector
|
299
|
+
if not self.correction_result.metadata:
|
300
|
+
self.correction_result.metadata = {}
|
301
|
+
self.correction_result.metadata.update(
|
302
|
+
{
|
303
|
+
"available_handlers": corrector.all_handlers,
|
304
|
+
"enabled_handlers": [getattr(handler, "name", handler.__class__.__name__) for handler in corrector.handlers],
|
305
|
+
}
|
306
|
+
)
|
307
|
+
|
308
|
+
self.logger.info("Correction process completed")
|
309
|
+
self.logger.info(
|
310
|
+
f"Updated metadata with {len(corrector.handlers)} enabled handlers: {self.correction_result.metadata['enabled_handlers']}"
|
311
|
+
)
|
312
|
+
|
248
313
|
# Restore audio hash
|
249
314
|
if audio_hash:
|
250
315
|
if not self.correction_result.metadata:
|
@@ -256,6 +321,171 @@ class ReviewServer:
|
|
256
321
|
self.logger.error(f"Failed to update handlers: {str(e)}")
|
257
322
|
raise HTTPException(status_code=500, detail=str(e))
|
258
323
|
|
324
|
+
def _create_lyrics_data_from_text(self, text: str, source: str) -> LyricsData:
|
325
|
+
"""Create LyricsData object from plain text lyrics."""
|
326
|
+
self.logger.info(f"Creating LyricsData for source '{source}'")
|
327
|
+
|
328
|
+
# Split text into lines and create segments
|
329
|
+
lines = [line.strip() for line in text.split("\n") if line.strip()]
|
330
|
+
self.logger.info(f"Found {len(lines)} non-empty lines in input text")
|
331
|
+
|
332
|
+
segments = []
|
333
|
+
for i, line in enumerate(lines):
|
334
|
+
# Split line into words
|
335
|
+
word_texts = line.strip().split()
|
336
|
+
words = []
|
337
|
+
|
338
|
+
for j, word_text in enumerate(word_texts):
|
339
|
+
word = Word(
|
340
|
+
id=f"manual_{source}_word_{i}_{j}", # Create unique ID for each word
|
341
|
+
text=word_text,
|
342
|
+
start_time=0.0, # Placeholder timing
|
343
|
+
end_time=0.0,
|
344
|
+
confidence=1.0, # Reference lyrics are considered ground truth
|
345
|
+
created_during_correction=False,
|
346
|
+
)
|
347
|
+
words.append(word)
|
348
|
+
|
349
|
+
segments.append(
|
350
|
+
LyricsSegment(
|
351
|
+
id=f"manual_{source}_{i}",
|
352
|
+
text=line,
|
353
|
+
words=words, # Now including the word objects
|
354
|
+
start_time=0.0, # Placeholder timing
|
355
|
+
end_time=0.0,
|
356
|
+
)
|
357
|
+
)
|
358
|
+
|
359
|
+
# Create metadata
|
360
|
+
self.logger.info("Creating metadata for LyricsData")
|
361
|
+
metadata = LyricsMetadata(
|
362
|
+
source=source,
|
363
|
+
track_name=self.correction_result.metadata.get("title", "") or "",
|
364
|
+
artist_names=self.correction_result.metadata.get("artist", "") or "",
|
365
|
+
is_synced=False,
|
366
|
+
lyrics_provider="manual",
|
367
|
+
lyrics_provider_id="",
|
368
|
+
album_name=None,
|
369
|
+
duration_ms=None,
|
370
|
+
explicit=None,
|
371
|
+
language=None,
|
372
|
+
provider_metadata={},
|
373
|
+
)
|
374
|
+
self.logger.info(f"Created metadata: {metadata}")
|
375
|
+
|
376
|
+
lyrics_data = LyricsData(segments=segments, metadata=metadata, source=source)
|
377
|
+
self.logger.info(f"Created LyricsData with {len(segments)} segments and {sum(len(s.words) for s in segments)} total words")
|
378
|
+
|
379
|
+
return lyrics_data
|
380
|
+
|
381
|
+
async def add_lyrics(self, data: Dict[str, str] = Body(...)):
|
382
|
+
"""Add new lyrics source and rerun correction."""
|
383
|
+
try:
|
384
|
+
# Store existing audio hash
|
385
|
+
audio_hash = self.correction_result.metadata.get("audio_hash") if self.correction_result.metadata else None
|
386
|
+
|
387
|
+
source = data.get("source", "").strip()
|
388
|
+
lyrics_text = data.get("lyrics", "").strip()
|
389
|
+
|
390
|
+
self.logger.info(f"Received request to add lyrics source '{source}' with {len(lyrics_text)} characters")
|
391
|
+
|
392
|
+
if not source or not lyrics_text:
|
393
|
+
self.logger.warning("Invalid request: missing source or lyrics text")
|
394
|
+
raise HTTPException(status_code=400, detail="Source name and lyrics text are required")
|
395
|
+
|
396
|
+
# Validate source name isn't already used
|
397
|
+
if source in self.correction_result.reference_lyrics:
|
398
|
+
self.logger.warning(f"Source name '{source}' is already in use")
|
399
|
+
raise HTTPException(status_code=400, detail=f"Source name '{source}' is already in use")
|
400
|
+
|
401
|
+
# Create lyrics data using the provider
|
402
|
+
self.logger.info("Creating LyricsData using UserInputProvider")
|
403
|
+
provider = UserInputProvider(
|
404
|
+
lyrics_text=lyrics_text, source_name=source, metadata=self.correction_result.metadata or {}, logger=self.logger
|
405
|
+
)
|
406
|
+
lyrics_data = provider._convert_result_format({"text": lyrics_text, "metadata": self.correction_result.metadata or {}})
|
407
|
+
self.logger.info(f"Created LyricsData with {len(lyrics_data.segments)} segments")
|
408
|
+
|
409
|
+
# Add to reference lyrics
|
410
|
+
self.logger.info(f"Adding new source '{source}' to reference_lyrics")
|
411
|
+
self.correction_result.reference_lyrics[source] = lyrics_data
|
412
|
+
self.logger.info(f"Now have {len(self.correction_result.reference_lyrics)} total reference sources")
|
413
|
+
|
414
|
+
# Create TranscriptionData from original segments
|
415
|
+
self.logger.info("Creating TranscriptionData from original segments")
|
416
|
+
transcription_data = TranscriptionData(
|
417
|
+
segments=self.correction_result.original_segments,
|
418
|
+
words=[word for segment in self.correction_result.original_segments for word in segment.words],
|
419
|
+
text="\n".join(segment.text for segment in self.correction_result.original_segments),
|
420
|
+
source="original",
|
421
|
+
)
|
422
|
+
|
423
|
+
# Get currently enabled handlers from metadata
|
424
|
+
enabled_handlers = None
|
425
|
+
if self.correction_result.metadata:
|
426
|
+
if "enabled_handlers" in self.correction_result.metadata:
|
427
|
+
enabled_handlers = self.correction_result.metadata["enabled_handlers"]
|
428
|
+
self.logger.info(f"Found existing enabled handlers in metadata: {enabled_handlers}")
|
429
|
+
elif "available_handlers" in self.correction_result.metadata:
|
430
|
+
# If no enabled_handlers but we have available_handlers, enable all default handlers
|
431
|
+
enabled_handlers = [
|
432
|
+
handler["id"] for handler in self.correction_result.metadata["available_handlers"] if handler.get("enabled", True)
|
433
|
+
]
|
434
|
+
self.logger.info(f"No enabled handlers found in metadata, using default enabled handlers: {enabled_handlers}")
|
435
|
+
else:
|
436
|
+
self.logger.warning("No handler configuration found in metadata")
|
437
|
+
|
438
|
+
# Log reference sources before correction
|
439
|
+
for source, lyrics in self.correction_result.reference_lyrics.items():
|
440
|
+
word_count = sum(len(s.words) for s in lyrics.segments)
|
441
|
+
self.logger.info(f"Reference source '{source}': {word_count} words in {len(lyrics.segments)} segments")
|
442
|
+
|
443
|
+
# Rerun correction with updated reference lyrics
|
444
|
+
self.logger.info("Initializing LyricsCorrector for re-correction")
|
445
|
+
self.logger.info(f"Passing enabled handlers to corrector: {enabled_handlers or '[]'}")
|
446
|
+
corrector = LyricsCorrector(
|
447
|
+
cache_dir=self.output_config.cache_dir,
|
448
|
+
enabled_handlers=enabled_handlers, # Pass the preserved handlers or None to use defaults
|
449
|
+
logger=self.logger,
|
450
|
+
)
|
451
|
+
|
452
|
+
self.logger.info(f"Active correction handlers: {[h.__class__.__name__ for h in corrector.handlers]}")
|
453
|
+
self.logger.info("Running correction with updated reference lyrics")
|
454
|
+
self.correction_result = corrector.run(
|
455
|
+
transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
|
456
|
+
lyrics_results=self.correction_result.reference_lyrics,
|
457
|
+
metadata=self.correction_result.metadata,
|
458
|
+
)
|
459
|
+
|
460
|
+
# Update metadata with the new handler state from corrector
|
461
|
+
if not self.correction_result.metadata:
|
462
|
+
self.correction_result.metadata = {}
|
463
|
+
self.correction_result.metadata.update(
|
464
|
+
{
|
465
|
+
"available_handlers": corrector.all_handlers,
|
466
|
+
"enabled_handlers": [getattr(handler, "name", handler.__class__.__name__) for handler in corrector.handlers],
|
467
|
+
}
|
468
|
+
)
|
469
|
+
|
470
|
+
# Restore audio hash
|
471
|
+
if audio_hash:
|
472
|
+
if not self.correction_result.metadata:
|
473
|
+
self.correction_result.metadata = {}
|
474
|
+
self.correction_result.metadata["audio_hash"] = audio_hash
|
475
|
+
|
476
|
+
self.logger.info("Correction process completed")
|
477
|
+
self.logger.info(
|
478
|
+
f"Updated metadata with {len(corrector.handlers)} enabled handlers: {self.correction_result.metadata['enabled_handlers']}"
|
479
|
+
)
|
480
|
+
|
481
|
+
return {"status": "success", "data": self.correction_result.to_dict()}
|
482
|
+
|
483
|
+
except HTTPException:
|
484
|
+
raise
|
485
|
+
except Exception as e:
|
486
|
+
self.logger.error(f"Failed to add lyrics: {str(e)}", exc_info=True)
|
487
|
+
raise HTTPException(status_code=500, detail=str(e))
|
488
|
+
|
259
489
|
def start(self) -> CorrectionResult:
|
260
490
|
"""Start the review server and wait for completion."""
|
261
491
|
# Generate audio hash if audio file exists
|