lyrics-transcriber 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-D0Gr3Ep7.js} +16509 -9038
- lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +281 -63
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +249 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +320 -266
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +120 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +174 -52
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +158 -114
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +39 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +134 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +67 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +1 -1
- lyrics_transcriber/output/generator.py +22 -8
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +27 -1
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/RECORD +75 -61
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt +0 -0
@@ -1 +0,0 @@
|
|
1
|
-
from .server import start_review_server, complete_review
|
@@ -1,173 +1,346 @@
|
|
1
1
|
import logging
|
2
|
-
|
2
|
+
import socket
|
3
|
+
from fastapi import FastAPI, Body, HTTPException
|
3
4
|
from fastapi.middleware.cors import CORSMiddleware
|
4
|
-
from typing import
|
5
|
-
from
|
5
|
+
from typing import Dict, Any, List
|
6
|
+
from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment
|
6
7
|
import time
|
7
|
-
import subprocess
|
8
8
|
import os
|
9
|
-
import atexit
|
10
9
|
import urllib.parse
|
11
10
|
from fastapi.staticfiles import StaticFiles
|
12
11
|
from fastapi.responses import FileResponse
|
13
|
-
|
14
|
-
import
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
12
|
+
import hashlib
|
13
|
+
from lyrics_transcriber.core.config import OutputConfig
|
14
|
+
import uvicorn
|
15
|
+
import webbrowser
|
16
|
+
from threading import Thread
|
17
|
+
from lyrics_transcriber.output.generator import OutputGenerator
|
18
|
+
import json
|
19
|
+
from lyrics_transcriber.correction.corrector import LyricsCorrector
|
20
|
+
from lyrics_transcriber.types import TranscriptionResult, TranscriptionData
|
21
|
+
|
22
|
+
|
23
|
+
class ReviewServer:
|
24
|
+
"""Handles the review process through a web interface."""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
correction_result: CorrectionResult,
|
29
|
+
output_config: OutputConfig,
|
30
|
+
audio_filepath: str,
|
31
|
+
logger: logging.Logger,
|
32
|
+
):
|
33
|
+
"""Initialize the review server."""
|
34
|
+
self.correction_result = correction_result
|
35
|
+
self.output_config = output_config
|
36
|
+
self.audio_filepath = audio_filepath
|
37
|
+
self.logger = logger or logging.getLogger(__name__)
|
38
|
+
self.review_completed = False
|
39
|
+
|
40
|
+
# Create FastAPI instance and configure
|
41
|
+
self.app = FastAPI()
|
42
|
+
self._configure_cors()
|
43
|
+
self._register_routes()
|
44
|
+
self._mount_frontend()
|
45
|
+
|
46
|
+
def _configure_cors(self) -> None:
|
47
|
+
"""Configure CORS middleware."""
|
48
|
+
self.app.add_middleware(
|
49
|
+
CORSMiddleware,
|
50
|
+
allow_origins=[f"http://localhost:{port}" for port in range(3000, 5174)]
|
51
|
+
+ [f"http://127.0.0.1:{port}" for port in range(3000, 5174)],
|
52
|
+
allow_credentials=True,
|
53
|
+
allow_methods=["*"],
|
54
|
+
allow_headers=["*"],
|
55
|
+
)
|
56
|
+
|
57
|
+
def _mount_frontend(self) -> None:
|
58
|
+
"""Mount the frontend static files."""
|
59
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
60
|
+
frontend_dir = os.path.abspath(os.path.join(current_dir, "../frontend/dist"))
|
61
|
+
|
62
|
+
if not os.path.exists(frontend_dir):
|
63
|
+
raise FileNotFoundError(f"Frontend assets not found at {frontend_dir}")
|
64
|
+
|
65
|
+
self.app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
|
66
|
+
|
67
|
+
def _register_routes(self) -> None:
|
68
|
+
"""Register API routes."""
|
69
|
+
self.app.add_api_route("/api/correction-data", self.get_correction_data, methods=["GET"])
|
70
|
+
self.app.add_api_route("/api/complete", self.complete_review, methods=["POST"])
|
71
|
+
self.app.add_api_route("/api/preview-video", self.generate_preview_video, methods=["POST"])
|
72
|
+
self.app.add_api_route("/api/preview-video/{preview_hash}", self.get_preview_video, methods=["GET"])
|
73
|
+
self.app.add_api_route("/api/audio/{audio_hash}", self.get_audio, methods=["GET"])
|
74
|
+
self.app.add_api_route("/api/ping", self.ping, methods=["GET"])
|
75
|
+
self.app.add_api_route("/api/handlers", self.update_handlers, methods=["POST"])
|
76
|
+
|
77
|
+
async def get_correction_data(self):
|
78
|
+
"""Get the correction data."""
|
79
|
+
return self.correction_result.to_dict()
|
80
|
+
|
81
|
+
def _update_correction_result(self, base_result: CorrectionResult, updated_data: Dict[str, Any]) -> CorrectionResult:
|
82
|
+
"""Update a CorrectionResult with new correction data."""
|
83
|
+
return CorrectionResult(
|
84
|
+
corrections=[
|
85
|
+
WordCorrection(
|
86
|
+
original_word=c.get("original_word", ""),
|
87
|
+
corrected_word=c.get("corrected_word", ""),
|
88
|
+
original_position=c.get("original_position", 0),
|
89
|
+
source=c.get("source", "review"),
|
90
|
+
reason=c.get("reason", "manual_review"),
|
91
|
+
segment_index=c.get("segment_index", 0),
|
92
|
+
confidence=c.get("confidence"),
|
93
|
+
alternatives=c.get("alternatives", {}),
|
94
|
+
is_deletion=c.get("is_deletion", False),
|
95
|
+
split_index=c.get("split_index"),
|
96
|
+
split_total=c.get("split_total"),
|
97
|
+
corrected_position=c.get("corrected_position"),
|
98
|
+
reference_positions=c.get("reference_positions"),
|
99
|
+
length=c.get("length", 1),
|
100
|
+
handler=c.get("handler"),
|
101
|
+
word_id=c.get("word_id"),
|
102
|
+
corrected_word_id=c.get("corrected_word_id"),
|
103
|
+
)
|
104
|
+
for c in updated_data["corrections"]
|
105
|
+
],
|
106
|
+
corrected_segments=[LyricsSegment.from_dict(s) for s in updated_data["corrected_segments"]],
|
107
|
+
# Copy existing fields from the base result
|
108
|
+
original_segments=base_result.original_segments,
|
109
|
+
corrections_made=len(updated_data["corrections"]),
|
110
|
+
confidence=base_result.confidence,
|
111
|
+
reference_lyrics=base_result.reference_lyrics,
|
112
|
+
anchor_sequences=base_result.anchor_sequences,
|
113
|
+
gap_sequences=base_result.gap_sequences,
|
114
|
+
resized_segments=None, # Will be generated if needed
|
115
|
+
metadata=base_result.metadata,
|
116
|
+
correction_steps=base_result.correction_steps,
|
117
|
+
word_id_map=base_result.word_id_map,
|
118
|
+
segment_id_map=base_result.segment_id_map,
|
119
|
+
)
|
120
|
+
|
121
|
+
async def complete_review(self, updated_data: Dict[str, Any] = Body(...)):
|
122
|
+
"""Complete the review process."""
|
123
|
+
try:
|
124
|
+
self.correction_result = self._update_correction_result(self.correction_result, updated_data)
|
125
|
+
self.review_completed = True
|
126
|
+
return {"status": "success"}
|
127
|
+
except Exception as e:
|
128
|
+
self.logger.error(f"Failed to update correction data: {str(e)}")
|
129
|
+
return {"status": "error", "message": str(e)}
|
130
|
+
|
131
|
+
async def ping(self):
|
132
|
+
"""Simple ping endpoint for testing."""
|
133
|
+
return {"status": "ok"}
|
134
|
+
|
135
|
+
async def get_audio(self, audio_hash: str):
|
136
|
+
"""Stream the audio file."""
|
137
|
+
try:
|
138
|
+
if (
|
139
|
+
not self.audio_filepath
|
140
|
+
or not os.path.exists(self.audio_filepath)
|
141
|
+
or not self.correction_result.metadata
|
142
|
+
or self.correction_result.metadata.get("audio_hash") != audio_hash
|
143
|
+
):
|
144
|
+
raise FileNotFoundError("Audio file not found")
|
145
|
+
|
146
|
+
return FileResponse(self.audio_filepath, media_type="audio/mpeg", filename=os.path.basename(self.audio_filepath))
|
147
|
+
except Exception as e:
|
148
|
+
raise HTTPException(status_code=404, detail="Audio file not found")
|
149
|
+
|
150
|
+
async def generate_preview_video(self, updated_data: Dict[str, Any] = Body(...)):
|
151
|
+
"""Generate a preview video with the current corrections."""
|
152
|
+
try:
|
153
|
+
# Create temporary correction result with updated data
|
154
|
+
temp_correction = self._update_correction_result(self.correction_result, updated_data)
|
155
|
+
|
156
|
+
# Generate a unique hash for this preview
|
157
|
+
preview_data = json.dumps(updated_data, sort_keys=True).encode("utf-8")
|
158
|
+
preview_hash = hashlib.md5(preview_data).hexdigest()[:12] # Use first 12 chars for shorter filename
|
159
|
+
|
160
|
+
# Initialize output generator with preview settings
|
161
|
+
preview_config = OutputConfig(
|
162
|
+
output_dir=self.output_config.output_dir,
|
163
|
+
cache_dir=self.output_config.cache_dir,
|
164
|
+
output_styles_json=self.output_config.output_styles_json,
|
165
|
+
video_resolution="360p", # Force 360p for preview
|
166
|
+
styles=self.output_config.styles,
|
167
|
+
max_line_length=self.output_config.max_line_length,
|
168
|
+
)
|
169
|
+
output_generator = OutputGenerator(config=preview_config, logger=self.logger)
|
170
|
+
|
171
|
+
# Generate preview outputs with unique prefix
|
172
|
+
preview_outputs = output_generator.generate_outputs(
|
173
|
+
transcription_corrected=temp_correction,
|
174
|
+
lyrics_results={}, # Empty dict since we don't need lyrics results for preview
|
175
|
+
output_prefix=f"preview_{preview_hash}", # Include hash in filename
|
176
|
+
audio_filepath=self.audio_filepath,
|
177
|
+
preview_mode=True,
|
178
|
+
)
|
179
|
+
|
180
|
+
if not preview_outputs.video:
|
181
|
+
raise ValueError("Preview video generation failed")
|
182
|
+
|
183
|
+
# Store the path for later retrieval
|
184
|
+
if not hasattr(self, "preview_videos"):
|
185
|
+
self.preview_videos = {}
|
186
|
+
self.preview_videos[preview_hash] = preview_outputs.video
|
187
|
+
|
188
|
+
return {"status": "success", "preview_hash": preview_hash}
|
189
|
+
|
190
|
+
except Exception as e:
|
191
|
+
self.logger.error(f"Failed to generate preview video: {str(e)}")
|
192
|
+
raise HTTPException(status_code=500, detail=str(e))
|
193
|
+
|
194
|
+
async def get_preview_video(self, preview_hash: str):
|
195
|
+
"""Stream the preview video."""
|
196
|
+
try:
|
197
|
+
if not hasattr(self, "preview_videos") or preview_hash not in self.preview_videos:
|
198
|
+
raise FileNotFoundError("Preview video not found")
|
199
|
+
|
200
|
+
video_path = self.preview_videos[preview_hash]
|
201
|
+
if not os.path.exists(video_path):
|
202
|
+
raise FileNotFoundError("Preview video file not found")
|
203
|
+
|
204
|
+
return FileResponse(
|
205
|
+
video_path,
|
206
|
+
media_type="video/mp4",
|
207
|
+
filename=os.path.basename(video_path),
|
208
|
+
headers={
|
209
|
+
"Accept-Ranges": "bytes",
|
210
|
+
"Content-Disposition": "inline",
|
211
|
+
"Cache-Control": "no-cache",
|
212
|
+
"X-Content-Type-Options": "nosniff",
|
213
|
+
},
|
214
|
+
)
|
215
|
+
except Exception as e:
|
216
|
+
self.logger.error(f"Failed to stream preview video: {str(e)}")
|
217
|
+
raise HTTPException(status_code=404, detail="Preview video not found")
|
218
|
+
|
219
|
+
async def update_handlers(self, enabled_handlers: List[str] = Body(...)):
|
220
|
+
"""Update enabled correction handlers and rerun correction."""
|
221
|
+
try:
|
222
|
+
# Store existing audio hash
|
223
|
+
audio_hash = self.correction_result.metadata.get("audio_hash") if self.correction_result.metadata else None
|
224
|
+
|
225
|
+
# Update metadata with new handler configuration
|
226
|
+
if not self.correction_result.metadata:
|
227
|
+
self.correction_result.metadata = {}
|
228
|
+
self.correction_result.metadata["enabled_handlers"] = enabled_handlers
|
229
|
+
|
230
|
+
# Rerun correction with updated handlers
|
231
|
+
corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
|
232
|
+
|
233
|
+
# Create proper TranscriptionData from original segments
|
234
|
+
transcription_data = TranscriptionData(
|
235
|
+
segments=self.correction_result.original_segments,
|
236
|
+
words=[word for segment in self.correction_result.original_segments for word in segment.words],
|
237
|
+
text="\n".join(segment.text for segment in self.correction_result.original_segments),
|
238
|
+
source="original",
|
239
|
+
)
|
240
|
+
|
241
|
+
# Run correction
|
242
|
+
self.correction_result = corrector.run(
|
243
|
+
transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
|
244
|
+
lyrics_results=self.correction_result.reference_lyrics,
|
245
|
+
metadata=self.correction_result.metadata,
|
246
|
+
)
|
247
|
+
|
248
|
+
# Restore audio hash
|
249
|
+
if audio_hash:
|
250
|
+
if not self.correction_result.metadata:
|
251
|
+
self.correction_result.metadata = {}
|
252
|
+
self.correction_result.metadata["audio_hash"] = audio_hash
|
253
|
+
|
254
|
+
return {"status": "success", "data": self.correction_result.to_dict()}
|
255
|
+
except Exception as e:
|
256
|
+
self.logger.error(f"Failed to update handlers: {str(e)}")
|
257
|
+
raise HTTPException(status_code=500, detail=str(e))
|
258
|
+
|
259
|
+
def start(self) -> CorrectionResult:
|
260
|
+
"""Start the review server and wait for completion."""
|
261
|
+
# Generate audio hash if audio file exists
|
262
|
+
if self.audio_filepath and os.path.exists(self.audio_filepath):
|
263
|
+
with open(self.audio_filepath, "rb") as f:
|
264
|
+
audio_hash = hashlib.md5(f.read()).hexdigest()
|
265
|
+
if not self.correction_result.metadata:
|
266
|
+
self.correction_result.metadata = {}
|
267
|
+
self.correction_result.metadata["audio_hash"] = audio_hash
|
268
|
+
|
269
|
+
server = None
|
270
|
+
server_thread = None
|
271
|
+
sock = None
|
135
272
|
|
136
|
-
# Wait for default port (8000) to become available
|
137
|
-
DEFAULT_PORT = 8000
|
138
|
-
while True:
|
139
273
|
try:
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
274
|
+
# Check port availability
|
275
|
+
while True:
|
276
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
277
|
+
sock.settimeout(1)
|
278
|
+
if sock.connect_ex(("127.0.0.1", 8000)) == 0:
|
279
|
+
# Port is in use, get process info
|
280
|
+
process_info = ""
|
281
|
+
if os.name != "nt": # Unix-like systems
|
282
|
+
try:
|
283
|
+
process_info = os.popen("lsof -i:8000").read().strip()
|
284
|
+
except:
|
285
|
+
pass
|
286
|
+
|
287
|
+
self.logger.warning(
|
288
|
+
f"Port 8000 is in use. Waiting for it to become available...\n"
|
289
|
+
f"Process using port 8000:\n{process_info}\n"
|
290
|
+
f"To manually free the port, you can run: lsof -ti:8000 | xargs kill -9"
|
291
|
+
)
|
292
|
+
sock.close()
|
293
|
+
time.sleep(30)
|
294
|
+
else:
|
295
|
+
sock.close()
|
296
|
+
break
|
297
|
+
|
298
|
+
# Start server
|
299
|
+
config = uvicorn.Config(self.app, host="127.0.0.1", port=8000, log_level="error")
|
300
|
+
server = uvicorn.Server(config)
|
301
|
+
server_thread = Thread(target=server.run, daemon=True)
|
302
|
+
server_thread.start()
|
303
|
+
time.sleep(0.5) # Reduced wait time
|
304
|
+
|
305
|
+
# Open browser and wait for completion
|
306
|
+
base_api_url = "http://localhost:8000/api"
|
307
|
+
encoded_api_url = urllib.parse.quote(base_api_url, safe="")
|
308
|
+
audio_hash_param = (
|
309
|
+
f"&audioHash={self.correction_result.metadata.get('audio_hash', '')}"
|
310
|
+
if self.correction_result.metadata and "audio_hash" in self.correction_result.metadata
|
311
|
+
else ""
|
312
|
+
)
|
313
|
+
webbrowser.open(f"http://localhost:8000?baseApiUrl={encoded_api_url}{audio_hash_param}")
|
314
|
+
|
315
|
+
while not self.review_completed:
|
316
|
+
time.sleep(0.1)
|
317
|
+
|
318
|
+
return self.correction_result
|
319
|
+
|
320
|
+
except KeyboardInterrupt:
|
321
|
+
self.logger.info("Received interrupt, shutting down server...")
|
322
|
+
raise
|
323
|
+
except Exception as e:
|
324
|
+
self.logger.error(f"Error during review server operation: {e}")
|
325
|
+
raise
|
326
|
+
finally:
|
327
|
+
# Comprehensive cleanup
|
328
|
+
if sock:
|
329
|
+
try:
|
330
|
+
sock.close()
|
331
|
+
except:
|
332
|
+
pass
|
333
|
+
|
334
|
+
if server:
|
335
|
+
server.should_exit = True
|
336
|
+
|
337
|
+
if server_thread and server_thread.is_alive():
|
338
|
+
server_thread.join(timeout=1)
|
339
|
+
|
340
|
+
# Force cleanup any remaining server resources
|
341
|
+
try:
|
342
|
+
import multiprocessing.resource_tracker
|
343
|
+
|
344
|
+
multiprocessing.resource_tracker._resource_tracker = None
|
345
|
+
except:
|
346
|
+
pass
|
@@ -6,6 +6,7 @@ from typing import Dict, Optional, Any, Union
|
|
6
6
|
from pathlib import Path
|
7
7
|
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
8
8
|
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
9
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
9
10
|
|
10
11
|
|
11
12
|
@dataclass
|
@@ -183,6 +184,7 @@ class AudioShakeTranscriber(BaseTranscriber):
|
|
183
184
|
for line in transcription_data.get("lines", []):
|
184
185
|
words = [
|
185
186
|
Word(
|
187
|
+
id=WordUtils.generate_id(), # Generate unique ID for each word
|
186
188
|
text=word["text"].strip(" "),
|
187
189
|
start_time=word.get("start", 0.0),
|
188
190
|
end_time=word.get("end", 0.0),
|
@@ -193,6 +195,7 @@ class AudioShakeTranscriber(BaseTranscriber):
|
|
193
195
|
|
194
196
|
segments.append(
|
195
197
|
LyricsSegment(
|
198
|
+
id=WordUtils.generate_id(), # Generate unique ID for each segment
|
196
199
|
text=line.get("text", " ".join(w.text for w in words)),
|
197
200
|
words=words,
|
198
201
|
start_time=min((w.start_time for w in words), default=0.0),
|
@@ -94,14 +94,22 @@ class BaseTranscriber(ABC):
|
|
94
94
|
self._validate_audio_file(audio_filepath)
|
95
95
|
self.logger.debug("Audio file validation passed")
|
96
96
|
|
97
|
-
# Check cache first
|
97
|
+
# Check converted cache first
|
98
98
|
file_hash = self._get_file_hash(audio_filepath)
|
99
|
-
|
99
|
+
converted_cache_path = self._get_cache_path(file_hash, "converted")
|
100
|
+
converted_data = self._load_from_cache(converted_cache_path)
|
101
|
+
if converted_data:
|
102
|
+
self.logger.info(f"Using cached converted data for {audio_filepath}")
|
103
|
+
return TranscriptionData.from_dict(converted_data)
|
100
104
|
|
105
|
+
# Check raw cache next
|
106
|
+
raw_cache_path = self._get_cache_path(file_hash, "raw")
|
101
107
|
raw_data = self._load_from_cache(raw_cache_path)
|
102
108
|
if raw_data:
|
103
109
|
self.logger.info(f"Using cached raw data for {audio_filepath}")
|
104
|
-
|
110
|
+
converted_result = self._convert_result_format(raw_data)
|
111
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
112
|
+
return converted_result
|
105
113
|
|
106
114
|
# If not in cache, perform transcription
|
107
115
|
self.logger.info(f"No cache found, transcribing {audio_filepath}")
|
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
11
11
|
from pydub import AudioSegment
|
12
12
|
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
13
13
|
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
14
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
14
15
|
|
15
16
|
|
16
17
|
@dataclass
|
@@ -262,6 +263,7 @@ class WhisperTranscriber(BaseTranscriber):
|
|
262
263
|
# First collect all words from word_timestamps
|
263
264
|
word_list = [
|
264
265
|
Word(
|
266
|
+
id=WordUtils.generate_id(), # Generate unique ID for each word
|
265
267
|
text=word["word"].strip(),
|
266
268
|
start_time=word["start"],
|
267
269
|
end_time=word["end"],
|
@@ -275,7 +277,15 @@ class WhisperTranscriber(BaseTranscriber):
|
|
275
277
|
segments = []
|
276
278
|
for seg in raw_data["segments"]:
|
277
279
|
segment_words = [word for word in word_list if seg["start"] <= word.start_time < seg["end"]]
|
278
|
-
segments.append(
|
280
|
+
segments.append(
|
281
|
+
LyricsSegment(
|
282
|
+
id=WordUtils.generate_id(), # Generate unique ID for each segment
|
283
|
+
text=seg["text"].strip(),
|
284
|
+
words=segment_words,
|
285
|
+
start_time=seg["start"],
|
286
|
+
end_time=seg["end"],
|
287
|
+
)
|
288
|
+
)
|
279
289
|
|
280
290
|
return TranscriptionData(
|
281
291
|
segments=segments,
|