lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lyrics_transcriber/cli/cli_main.py +7 -0
  2. lyrics_transcriber/core/config.py +1 -0
  3. lyrics_transcriber/core/controller.py +30 -52
  4. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  5. lyrics_transcriber/correction/corrector.py +224 -107
  6. lyrics_transcriber/correction/handlers/base.py +28 -10
  7. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  8. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  9. lyrics_transcriber/correction/handlers/llm.py +290 -0
  10. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  11. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  12. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  13. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  14. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  15. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  16. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  17. lyrics_transcriber/correction/text_utils.py +3 -7
  18. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  19. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  20. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  21. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  22. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  23. lyrics_transcriber/frontend/dist/index.html +1 -1
  24. lyrics_transcriber/frontend/package.json +6 -2
  25. lyrics_transcriber/frontend/src/App.tsx +18 -2
  26. lyrics_transcriber/frontend/src/api.ts +103 -6
  27. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  28. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  30. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  31. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  33. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  35. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  36. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  37. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  38. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  39. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  40. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  41. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  42. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  43. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  44. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  45. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  47. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  48. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  49. lyrics_transcriber/frontend/src/types.js +2 -0
  50. lyrics_transcriber/frontend/src/types.ts +70 -49
  51. lyrics_transcriber/frontend/src/validation.ts +132 -0
  52. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  53. lyrics_transcriber/frontend/yarn.lock +3752 -0
  54. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  55. lyrics_transcriber/lyrics/file_provider.py +6 -5
  56. lyrics_transcriber/lyrics/genius.py +5 -2
  57. lyrics_transcriber/lyrics/spotify.py +58 -21
  58. lyrics_transcriber/output/ass/config.py +16 -5
  59. lyrics_transcriber/output/cdg.py +8 -8
  60. lyrics_transcriber/output/generator.py +29 -14
  61. lyrics_transcriber/output/plain_text.py +15 -10
  62. lyrics_transcriber/output/segment_resizer.py +16 -3
  63. lyrics_transcriber/output/subtitles.py +56 -2
  64. lyrics_transcriber/output/video.py +107 -1
  65. lyrics_transcriber/review/__init__.py +0 -1
  66. lyrics_transcriber/review/server.py +337 -164
  67. lyrics_transcriber/transcribers/audioshake.py +3 -0
  68. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  69. lyrics_transcriber/transcribers/whisper.py +11 -1
  70. lyrics_transcriber/types.py +151 -105
  71. lyrics_transcriber/utils/word_utils.py +27 -0
  72. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  73. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
  74. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  75. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  76. lyrics_transcriber/frontend/package-lock.json +0 -4260
  77. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  78. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  79. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,173 +1,346 @@
1
1
  import logging
2
- from fastapi import FastAPI, Body
2
+ import socket
3
+ from fastapi import FastAPI, Body, HTTPException
3
4
  from fastapi.middleware.cors import CORSMiddleware
4
- from typing import Optional, Dict, Any
5
- from ..types import CorrectionResult, WordCorrection, LyricsSegment
5
+ from typing import Dict, Any, List
6
+ from lyrics_transcriber.types import CorrectionResult, WordCorrection, LyricsSegment
6
7
  import time
7
- import subprocess
8
8
  import os
9
- import atexit
10
9
  import urllib.parse
11
10
  from fastapi.staticfiles import StaticFiles
12
11
  from fastapi.responses import FileResponse
13
- from pathlib import Path
14
- import socket
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- app = FastAPI()
19
-
20
- # Configure CORS for development
21
- app.add_middleware(
22
- CORSMiddleware,
23
- allow_origins=[f"http://localhost:{port}" for port in range(3000, 5174)] # Common development ports
24
- + [f"http://127.0.0.1:{port}" for port in range(3000, 5174)],
25
- allow_credentials=True,
26
- allow_methods=["*"],
27
- allow_headers=["*"],
28
- )
29
-
30
- # Global state for the review process
31
- current_review: Optional[CorrectionResult] = None
32
- review_completed = False
33
- vite_process: Optional[subprocess.Popen] = None
34
- audio_filepath: Optional[str] = None # Add this new global variable
35
-
36
-
37
- def start_vite_server():
38
- """Get path to the built frontend assets."""
39
- global vite_process # We'll keep this for backwards compatibility
40
-
41
- # Get the path to the built frontend assets
42
- current_dir = os.path.dirname(os.path.abspath(__file__))
43
- frontend_dir = os.path.abspath(os.path.join(current_dir, "../frontend/dist"))
44
-
45
- if not os.path.exists(frontend_dir):
46
- raise FileNotFoundError(f"Frontend assets not found at {frontend_dir}. Ensure the package was built correctly.")
47
-
48
- # Mount the static files
49
- app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
50
-
51
- logger.info(f"Mounted frontend assets from {frontend_dir}")
52
- return None # No process to return since we're serving static files
53
-
54
-
55
- @app.get("/api/correction-data")
56
- async def get_correction_data():
57
- """Get the current correction data for review."""
58
- if current_review is None:
59
- return {"error": "No review in progress"}
60
- return current_review.to_dict()
61
-
62
-
63
- @app.post("/api/complete")
64
- async def complete_review(updated_data: Dict[str, Any] = Body(...)):
65
- """
66
- Mark the review as complete and update the correction data.
67
-
68
- Args:
69
- updated_data: Dictionary containing corrections and corrected_segments
70
- """
71
- global review_completed, current_review
72
-
73
- logger.info("Received updated correction data")
74
-
75
- try:
76
- # Only update the specific fields that were modified
77
- if current_review is None:
78
- raise ValueError("No review in progress")
79
-
80
- # Update only the corrections and corrected_segments
81
- current_review.corrections = [WordCorrection.from_dict(c) for c in updated_data["corrections"]]
82
- current_review.corrected_segments = [LyricsSegment.from_dict(s) for s in updated_data["corrected_segments"]]
83
- current_review.corrections_made = len(current_review.corrections)
84
-
85
- logger.info(f"Successfully updated correction data with {len(current_review.corrections)} corrections")
86
-
87
- review_completed = True
88
- return {"status": "success"}
89
- except Exception as e:
90
- logger.error(f"Failed to update correction data: {str(e)}")
91
- return {"status": "error", "message": str(e)}
92
-
93
-
94
- @app.get("/api/audio")
95
- async def get_audio():
96
- """Stream the audio file for playback in the browser."""
97
- if not audio_filepath or not os.path.exists(audio_filepath):
98
- logger.error(f"Audio file not found at {audio_filepath}")
99
- return {"error": "Audio file not found"}
100
-
101
- return FileResponse(
102
- audio_filepath,
103
- media_type="audio/mpeg",
104
- headers={"Accept-Ranges": "bytes", "Content-Disposition": f"attachment; filename={Path(audio_filepath).name}"},
105
- )
106
-
107
-
108
- def start_review_server(correction_result: CorrectionResult) -> CorrectionResult:
109
- """
110
- Start the review server and wait for completion.
111
-
112
- Args:
113
- correction_result: The correction result to review
114
-
115
- Returns:
116
- The potentially modified correction result after review
117
- """
118
- import uvicorn
119
- import webbrowser
120
- from threading import Thread
121
- import signal
122
- import sys
123
-
124
- global current_review, review_completed, audio_filepath
125
- current_review = correction_result
126
- review_completed = False
127
-
128
- audio_filepath = correction_result.metadata.get("audio_filepath") if correction_result.metadata else None
129
-
130
- logger.info("Starting review server...")
131
-
132
- # Start Vite dev server (now just mounts static files)
133
- start_vite_server()
134
- logger.info("Frontend assets mounted")
12
+ import hashlib
13
+ from lyrics_transcriber.core.config import OutputConfig
14
+ import uvicorn
15
+ import webbrowser
16
+ from threading import Thread
17
+ from lyrics_transcriber.output.generator import OutputGenerator
18
+ import json
19
+ from lyrics_transcriber.correction.corrector import LyricsCorrector
20
+ from lyrics_transcriber.types import TranscriptionResult, TranscriptionData
21
+
22
+
23
+ class ReviewServer:
24
+ """Handles the review process through a web interface."""
25
+
26
+ def __init__(
27
+ self,
28
+ correction_result: CorrectionResult,
29
+ output_config: OutputConfig,
30
+ audio_filepath: str,
31
+ logger: logging.Logger,
32
+ ):
33
+ """Initialize the review server."""
34
+ self.correction_result = correction_result
35
+ self.output_config = output_config
36
+ self.audio_filepath = audio_filepath
37
+ self.logger = logger or logging.getLogger(__name__)
38
+ self.review_completed = False
39
+
40
+ # Create FastAPI instance and configure
41
+ self.app = FastAPI()
42
+ self._configure_cors()
43
+ self._register_routes()
44
+ self._mount_frontend()
45
+
46
+ def _configure_cors(self) -> None:
47
+ """Configure CORS middleware."""
48
+ self.app.add_middleware(
49
+ CORSMiddleware,
50
+ allow_origins=[f"http://localhost:{port}" for port in range(3000, 5174)]
51
+ + [f"http://127.0.0.1:{port}" for port in range(3000, 5174)],
52
+ allow_credentials=True,
53
+ allow_methods=["*"],
54
+ allow_headers=["*"],
55
+ )
56
+
57
+ def _mount_frontend(self) -> None:
58
+ """Mount the frontend static files."""
59
+ current_dir = os.path.dirname(os.path.abspath(__file__))
60
+ frontend_dir = os.path.abspath(os.path.join(current_dir, "../frontend/dist"))
61
+
62
+ if not os.path.exists(frontend_dir):
63
+ raise FileNotFoundError(f"Frontend assets not found at {frontend_dir}")
64
+
65
+ self.app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
66
+
67
+ def _register_routes(self) -> None:
68
+ """Register API routes."""
69
+ self.app.add_api_route("/api/correction-data", self.get_correction_data, methods=["GET"])
70
+ self.app.add_api_route("/api/complete", self.complete_review, methods=["POST"])
71
+ self.app.add_api_route("/api/preview-video", self.generate_preview_video, methods=["POST"])
72
+ self.app.add_api_route("/api/preview-video/{preview_hash}", self.get_preview_video, methods=["GET"])
73
+ self.app.add_api_route("/api/audio/{audio_hash}", self.get_audio, methods=["GET"])
74
+ self.app.add_api_route("/api/ping", self.ping, methods=["GET"])
75
+ self.app.add_api_route("/api/handlers", self.update_handlers, methods=["POST"])
76
+
77
+ async def get_correction_data(self):
78
+ """Get the correction data."""
79
+ return self.correction_result.to_dict()
80
+
81
+ def _update_correction_result(self, base_result: CorrectionResult, updated_data: Dict[str, Any]) -> CorrectionResult:
82
+ """Update a CorrectionResult with new correction data."""
83
+ return CorrectionResult(
84
+ corrections=[
85
+ WordCorrection(
86
+ original_word=c.get("original_word", ""),
87
+ corrected_word=c.get("corrected_word", ""),
88
+ original_position=c.get("original_position", 0),
89
+ source=c.get("source", "review"),
90
+ reason=c.get("reason", "manual_review"),
91
+ segment_index=c.get("segment_index", 0),
92
+ confidence=c.get("confidence"),
93
+ alternatives=c.get("alternatives", {}),
94
+ is_deletion=c.get("is_deletion", False),
95
+ split_index=c.get("split_index"),
96
+ split_total=c.get("split_total"),
97
+ corrected_position=c.get("corrected_position"),
98
+ reference_positions=c.get("reference_positions"),
99
+ length=c.get("length", 1),
100
+ handler=c.get("handler"),
101
+ word_id=c.get("word_id"),
102
+ corrected_word_id=c.get("corrected_word_id"),
103
+ )
104
+ for c in updated_data["corrections"]
105
+ ],
106
+ corrected_segments=[LyricsSegment.from_dict(s) for s in updated_data["corrected_segments"]],
107
+ # Copy existing fields from the base result
108
+ original_segments=base_result.original_segments,
109
+ corrections_made=len(updated_data["corrections"]),
110
+ confidence=base_result.confidence,
111
+ reference_lyrics=base_result.reference_lyrics,
112
+ anchor_sequences=base_result.anchor_sequences,
113
+ gap_sequences=base_result.gap_sequences,
114
+ resized_segments=None, # Will be generated if needed
115
+ metadata=base_result.metadata,
116
+ correction_steps=base_result.correction_steps,
117
+ word_id_map=base_result.word_id_map,
118
+ segment_id_map=base_result.segment_id_map,
119
+ )
120
+
121
+ async def complete_review(self, updated_data: Dict[str, Any] = Body(...)):
122
+ """Complete the review process."""
123
+ try:
124
+ self.correction_result = self._update_correction_result(self.correction_result, updated_data)
125
+ self.review_completed = True
126
+ return {"status": "success"}
127
+ except Exception as e:
128
+ self.logger.error(f"Failed to update correction data: {str(e)}")
129
+ return {"status": "error", "message": str(e)}
130
+
131
+ async def ping(self):
132
+ """Simple ping endpoint for testing."""
133
+ return {"status": "ok"}
134
+
135
+ async def get_audio(self, audio_hash: str):
136
+ """Stream the audio file."""
137
+ try:
138
+ if (
139
+ not self.audio_filepath
140
+ or not os.path.exists(self.audio_filepath)
141
+ or not self.correction_result.metadata
142
+ or self.correction_result.metadata.get("audio_hash") != audio_hash
143
+ ):
144
+ raise FileNotFoundError("Audio file not found")
145
+
146
+ return FileResponse(self.audio_filepath, media_type="audio/mpeg", filename=os.path.basename(self.audio_filepath))
147
+ except Exception as e:
148
+ raise HTTPException(status_code=404, detail="Audio file not found")
149
+
150
+ async def generate_preview_video(self, updated_data: Dict[str, Any] = Body(...)):
151
+ """Generate a preview video with the current corrections."""
152
+ try:
153
+ # Create temporary correction result with updated data
154
+ temp_correction = self._update_correction_result(self.correction_result, updated_data)
155
+
156
+ # Generate a unique hash for this preview
157
+ preview_data = json.dumps(updated_data, sort_keys=True).encode("utf-8")
158
+ preview_hash = hashlib.md5(preview_data).hexdigest()[:12] # Use first 12 chars for shorter filename
159
+
160
+ # Initialize output generator with preview settings
161
+ preview_config = OutputConfig(
162
+ output_dir=self.output_config.output_dir,
163
+ cache_dir=self.output_config.cache_dir,
164
+ output_styles_json=self.output_config.output_styles_json,
165
+ video_resolution="360p", # Force 360p for preview
166
+ styles=self.output_config.styles,
167
+ max_line_length=self.output_config.max_line_length,
168
+ )
169
+ output_generator = OutputGenerator(config=preview_config, logger=self.logger)
170
+
171
+ # Generate preview outputs with unique prefix
172
+ preview_outputs = output_generator.generate_outputs(
173
+ transcription_corrected=temp_correction,
174
+ lyrics_results={}, # Empty dict since we don't need lyrics results for preview
175
+ output_prefix=f"preview_{preview_hash}", # Include hash in filename
176
+ audio_filepath=self.audio_filepath,
177
+ preview_mode=True,
178
+ )
179
+
180
+ if not preview_outputs.video:
181
+ raise ValueError("Preview video generation failed")
182
+
183
+ # Store the path for later retrieval
184
+ if not hasattr(self, "preview_videos"):
185
+ self.preview_videos = {}
186
+ self.preview_videos[preview_hash] = preview_outputs.video
187
+
188
+ return {"status": "success", "preview_hash": preview_hash}
189
+
190
+ except Exception as e:
191
+ self.logger.error(f"Failed to generate preview video: {str(e)}")
192
+ raise HTTPException(status_code=500, detail=str(e))
193
+
194
+ async def get_preview_video(self, preview_hash: str):
195
+ """Stream the preview video."""
196
+ try:
197
+ if not hasattr(self, "preview_videos") or preview_hash not in self.preview_videos:
198
+ raise FileNotFoundError("Preview video not found")
199
+
200
+ video_path = self.preview_videos[preview_hash]
201
+ if not os.path.exists(video_path):
202
+ raise FileNotFoundError("Preview video file not found")
203
+
204
+ return FileResponse(
205
+ video_path,
206
+ media_type="video/mp4",
207
+ filename=os.path.basename(video_path),
208
+ headers={
209
+ "Accept-Ranges": "bytes",
210
+ "Content-Disposition": "inline",
211
+ "Cache-Control": "no-cache",
212
+ "X-Content-Type-Options": "nosniff",
213
+ },
214
+ )
215
+ except Exception as e:
216
+ self.logger.error(f"Failed to stream preview video: {str(e)}")
217
+ raise HTTPException(status_code=404, detail="Preview video not found")
218
+
219
+ async def update_handlers(self, enabled_handlers: List[str] = Body(...)):
220
+ """Update enabled correction handlers and rerun correction."""
221
+ try:
222
+ # Store existing audio hash
223
+ audio_hash = self.correction_result.metadata.get("audio_hash") if self.correction_result.metadata else None
224
+
225
+ # Update metadata with new handler configuration
226
+ if not self.correction_result.metadata:
227
+ self.correction_result.metadata = {}
228
+ self.correction_result.metadata["enabled_handlers"] = enabled_handlers
229
+
230
+ # Rerun correction with updated handlers
231
+ corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
232
+
233
+ # Create proper TranscriptionData from original segments
234
+ transcription_data = TranscriptionData(
235
+ segments=self.correction_result.original_segments,
236
+ words=[word for segment in self.correction_result.original_segments for word in segment.words],
237
+ text="\n".join(segment.text for segment in self.correction_result.original_segments),
238
+ source="original",
239
+ )
240
+
241
+ # Run correction
242
+ self.correction_result = corrector.run(
243
+ transcription_results=[TranscriptionResult(name="original", priority=1, result=transcription_data)],
244
+ lyrics_results=self.correction_result.reference_lyrics,
245
+ metadata=self.correction_result.metadata,
246
+ )
247
+
248
+ # Restore audio hash
249
+ if audio_hash:
250
+ if not self.correction_result.metadata:
251
+ self.correction_result.metadata = {}
252
+ self.correction_result.metadata["audio_hash"] = audio_hash
253
+
254
+ return {"status": "success", "data": self.correction_result.to_dict()}
255
+ except Exception as e:
256
+ self.logger.error(f"Failed to update handlers: {str(e)}")
257
+ raise HTTPException(status_code=500, detail=str(e))
258
+
259
+ def start(self) -> CorrectionResult:
260
+ """Start the review server and wait for completion."""
261
+ # Generate audio hash if audio file exists
262
+ if self.audio_filepath and os.path.exists(self.audio_filepath):
263
+ with open(self.audio_filepath, "rb") as f:
264
+ audio_hash = hashlib.md5(f.read()).hexdigest()
265
+ if not self.correction_result.metadata:
266
+ self.correction_result.metadata = {}
267
+ self.correction_result.metadata["audio_hash"] = audio_hash
268
+
269
+ server = None
270
+ server_thread = None
271
+ sock = None
135
272
 
136
- # Wait for default port (8000) to become available
137
- DEFAULT_PORT = 8000
138
- while True:
139
273
  try:
140
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
141
- s.bind(("127.0.0.1", DEFAULT_PORT))
142
- break
143
- except OSError:
144
- logger.info(f"Port {DEFAULT_PORT} is occupied, waiting 10 seconds before retrying...")
145
- time.sleep(10)
146
-
147
- logger.info(f"Port {DEFAULT_PORT} is available, starting server")
148
-
149
- # Create server config with default port
150
- config = uvicorn.Config(app, host="127.0.0.1", port=DEFAULT_PORT, log_level="info")
151
- server = uvicorn.Server(config)
152
-
153
- # Start FastAPI server in a separate thread
154
- server_thread = Thread(target=server.run, daemon=True)
155
- server_thread.start()
156
- logger.info("Server thread started")
157
-
158
- # Open browser with the correct port
159
- base_api_url = f"http://localhost:{DEFAULT_PORT}/api"
160
- encoded_api_url = urllib.parse.quote(base_api_url, safe="")
161
- webbrowser.open(f"http://localhost:{DEFAULT_PORT}?baseApiUrl={encoded_api_url}")
162
- logger.info("Opened browser for review")
163
-
164
- # Wait for review to complete
165
- start_time = time.time()
166
- while not review_completed:
167
- time.sleep(0.1)
168
-
169
- logger.info("Review completed, shutting down server...")
170
- server.should_exit = True
171
- server_thread.join(timeout=5) # Wait up to 5 seconds for server to shut down
172
-
173
- return current_review
274
+ # Check port availability
275
+ while True:
276
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
277
+ sock.settimeout(1)
278
+ if sock.connect_ex(("127.0.0.1", 8000)) == 0:
279
+ # Port is in use, get process info
280
+ process_info = ""
281
+ if os.name != "nt": # Unix-like systems
282
+ try:
283
+ process_info = os.popen("lsof -i:8000").read().strip()
284
+ except:
285
+ pass
286
+
287
+ self.logger.warning(
288
+ f"Port 8000 is in use. Waiting for it to become available...\n"
289
+ f"Process using port 8000:\n{process_info}\n"
290
+ f"To manually free the port, you can run: lsof -ti:8000 | xargs kill -9"
291
+ )
292
+ sock.close()
293
+ time.sleep(30)
294
+ else:
295
+ sock.close()
296
+ break
297
+
298
+ # Start server
299
+ config = uvicorn.Config(self.app, host="127.0.0.1", port=8000, log_level="error")
300
+ server = uvicorn.Server(config)
301
+ server_thread = Thread(target=server.run, daemon=True)
302
+ server_thread.start()
303
+ time.sleep(0.5) # Reduced wait time
304
+
305
+ # Open browser and wait for completion
306
+ base_api_url = "http://localhost:8000/api"
307
+ encoded_api_url = urllib.parse.quote(base_api_url, safe="")
308
+ audio_hash_param = (
309
+ f"&audioHash={self.correction_result.metadata.get('audio_hash', '')}"
310
+ if self.correction_result.metadata and "audio_hash" in self.correction_result.metadata
311
+ else ""
312
+ )
313
+ webbrowser.open(f"http://localhost:8000?baseApiUrl={encoded_api_url}{audio_hash_param}")
314
+
315
+ while not self.review_completed:
316
+ time.sleep(0.1)
317
+
318
+ return self.correction_result
319
+
320
+ except KeyboardInterrupt:
321
+ self.logger.info("Received interrupt, shutting down server...")
322
+ raise
323
+ except Exception as e:
324
+ self.logger.error(f"Error during review server operation: {e}")
325
+ raise
326
+ finally:
327
+ # Comprehensive cleanup
328
+ if sock:
329
+ try:
330
+ sock.close()
331
+ except:
332
+ pass
333
+
334
+ if server:
335
+ server.should_exit = True
336
+
337
+ if server_thread and server_thread.is_alive():
338
+ server_thread.join(timeout=1)
339
+
340
+ # Force cleanup any remaining server resources
341
+ try:
342
+ import multiprocessing.resource_tracker
343
+
344
+ multiprocessing.resource_tracker._resource_tracker = None
345
+ except:
346
+ pass
@@ -6,6 +6,7 @@ from typing import Dict, Optional, Any, Union
6
6
  from pathlib import Path
7
7
  from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
8
8
  from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
9
+ from lyrics_transcriber.utils.word_utils import WordUtils
9
10
 
10
11
 
11
12
  @dataclass
@@ -183,6 +184,7 @@ class AudioShakeTranscriber(BaseTranscriber):
183
184
  for line in transcription_data.get("lines", []):
184
185
  words = [
185
186
  Word(
187
+ id=WordUtils.generate_id(), # Generate unique ID for each word
186
188
  text=word["text"].strip(" "),
187
189
  start_time=word.get("start", 0.0),
188
190
  end_time=word.get("end", 0.0),
@@ -193,6 +195,7 @@ class AudioShakeTranscriber(BaseTranscriber):
193
195
 
194
196
  segments.append(
195
197
  LyricsSegment(
198
+ id=WordUtils.generate_id(), # Generate unique ID for each segment
196
199
  text=line.get("text", " ".join(w.text for w in words)),
197
200
  words=words,
198
201
  start_time=min((w.start_time for w in words), default=0.0),
@@ -94,14 +94,22 @@ class BaseTranscriber(ABC):
94
94
  self._validate_audio_file(audio_filepath)
95
95
  self.logger.debug("Audio file validation passed")
96
96
 
97
- # Check cache first
97
+ # Check converted cache first
98
98
  file_hash = self._get_file_hash(audio_filepath)
99
- raw_cache_path = self._get_cache_path(file_hash, "raw")
99
+ converted_cache_path = self._get_cache_path(file_hash, "converted")
100
+ converted_data = self._load_from_cache(converted_cache_path)
101
+ if converted_data:
102
+ self.logger.info(f"Using cached converted data for {audio_filepath}")
103
+ return TranscriptionData.from_dict(converted_data)
100
104
 
105
+ # Check raw cache next
106
+ raw_cache_path = self._get_cache_path(file_hash, "raw")
101
107
  raw_data = self._load_from_cache(raw_cache_path)
102
108
  if raw_data:
103
109
  self.logger.info(f"Using cached raw data for {audio_filepath}")
104
- return self._save_and_convert_result(file_hash, raw_data)
110
+ converted_result = self._convert_result_format(raw_data)
111
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
112
+ return converted_result
105
113
 
106
114
  # If not in cache, perform transcription
107
115
  self.logger.info(f"No cache found, transcribing {audio_filepath}")
@@ -11,6 +11,7 @@ from pathlib import Path
11
11
  from pydub import AudioSegment
12
12
  from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
13
13
  from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
14
+ from lyrics_transcriber.utils.word_utils import WordUtils
14
15
 
15
16
 
16
17
  @dataclass
@@ -262,6 +263,7 @@ class WhisperTranscriber(BaseTranscriber):
262
263
  # First collect all words from word_timestamps
263
264
  word_list = [
264
265
  Word(
266
+ id=WordUtils.generate_id(), # Generate unique ID for each word
265
267
  text=word["word"].strip(),
266
268
  start_time=word["start"],
267
269
  end_time=word["end"],
@@ -275,7 +277,15 @@ class WhisperTranscriber(BaseTranscriber):
275
277
  segments = []
276
278
  for seg in raw_data["segments"]:
277
279
  segment_words = [word for word in word_list if seg["start"] <= word.start_time < seg["end"]]
278
- segments.append(LyricsSegment(text=seg["text"].strip(), words=segment_words, start_time=seg["start"], end_time=seg["end"]))
280
+ segments.append(
281
+ LyricsSegment(
282
+ id=WordUtils.generate_id(), # Generate unique ID for each segment
283
+ text=seg["text"].strip(),
284
+ words=segment_words,
285
+ start_time=seg["start"],
286
+ end_time=seg["end"],
287
+ )
288
+ )
279
289
 
280
290
  return TranscriptionData(
281
291
  segments=segments,