lyrics-transcriber 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-D0Gr3Ep7.js} +16509 -9038
- lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +281 -63
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +249 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +320 -266
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +120 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +174 -52
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +158 -114
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +39 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +134 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +67 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +1 -1
- lyrics_transcriber/output/generator.py +22 -8
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +27 -1
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/RECORD +75 -61
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
|
|
1
|
-
import difflib
|
2
1
|
import os
|
3
2
|
import logging
|
4
3
|
from dataclasses import dataclass, field
|
@@ -21,7 +20,7 @@ class LyricsControllerResult:
|
|
21
20
|
"""Holds the results of the transcription and correction process."""
|
22
21
|
|
23
22
|
# Results from different sources
|
24
|
-
lyrics_results:
|
23
|
+
lyrics_results: dict[str, LyricsData] = field(default_factory=dict)
|
25
24
|
transcription_results: List[TranscriptionResult] = field(default_factory=list)
|
26
25
|
|
27
26
|
# Corrected results
|
@@ -260,7 +259,7 @@ class LyricsTranscriber:
|
|
260
259
|
try:
|
261
260
|
result = provider.fetch_lyrics(self.artist, self.title)
|
262
261
|
if result:
|
263
|
-
self.results.lyrics_results
|
262
|
+
self.results.lyrics_results[name] = result
|
264
263
|
self.logger.info(f"Successfully fetched lyrics from {name}")
|
265
264
|
|
266
265
|
except Exception as e:
|
@@ -303,12 +302,10 @@ class LyricsTranscriber:
|
|
303
302
|
self.results.transcription_corrected = CorrectionResult(
|
304
303
|
original_segments=best_transcription.result.segments,
|
305
304
|
corrected_segments=best_transcription.result.segments,
|
306
|
-
corrected_text="", # Will be generated from segments
|
307
305
|
corrections=[], # No corrections made
|
308
306
|
corrections_made=0, # No corrections made
|
309
307
|
confidence=1.0, # Full confidence since we're using original
|
310
|
-
|
311
|
-
reference_texts={},
|
308
|
+
reference_lyrics={},
|
312
309
|
anchor_sequences=[],
|
313
310
|
gap_sequences=[],
|
314
311
|
resized_segments=[], # Will be populated later
|
@@ -320,15 +317,24 @@ class LyricsTranscriber:
|
|
320
317
|
)
|
321
318
|
return
|
322
319
|
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
|
320
|
+
# Create metadata dict with song info
|
321
|
+
metadata = {
|
322
|
+
"artist": self.artist,
|
323
|
+
"title": self.title,
|
324
|
+
"full_reference_texts": {source: lyrics.get_full_text() for source, lyrics in self.results.lyrics_results.items()},
|
325
|
+
}
|
326
|
+
|
327
|
+
# Get enabled handlers from metadata if available
|
328
|
+
enabled_handlers = metadata.get("enabled_handlers", None)
|
329
|
+
|
330
|
+
# Create corrector with enabled handlers
|
331
|
+
corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
|
327
332
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
333
|
+
corrected_data = corrector.run(
|
334
|
+
transcription_results=self.results.transcription_results,
|
335
|
+
lyrics_results=self.results.lyrics_results,
|
336
|
+
metadata=metadata,
|
337
|
+
)
|
332
338
|
|
333
339
|
# Store corrected results
|
334
340
|
self.results.transcription_corrected = corrected_data
|
@@ -336,49 +342,21 @@ class LyricsTranscriber:
|
|
336
342
|
|
337
343
|
# Add human review step
|
338
344
|
if self.output_config.enable_review:
|
339
|
-
from
|
340
|
-
import json
|
341
|
-
from copy import deepcopy
|
345
|
+
from lyrics_transcriber.review.server import ReviewServer
|
342
346
|
|
343
347
|
self.logger.info("Starting human review process")
|
344
348
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
elif isinstance(data_dict, float):
|
352
|
-
# Convert whole number floats to integers
|
353
|
-
if data_dict.is_integer():
|
354
|
-
return int(data_dict)
|
355
|
-
return data_dict
|
356
|
-
return data_dict
|
357
|
-
|
358
|
-
# Normalize and convert auto-corrected data
|
359
|
-
auto_data = normalize_data(deepcopy(self.results.transcription_corrected.to_dict()))
|
360
|
-
auto_corrected_json = json.dumps(auto_data, indent=4).splitlines()
|
361
|
-
|
362
|
-
# Pass through review server
|
363
|
-
reviewed_data = start_review_server(self.results.transcription_corrected)
|
364
|
-
|
365
|
-
# Normalize and convert reviewed data
|
366
|
-
human_data = normalize_data(deepcopy(reviewed_data.to_dict()))
|
367
|
-
human_corrected_json = json.dumps(human_data, indent=4).splitlines()
|
368
|
-
|
369
|
-
self.logger.info("Human review completed")
|
370
|
-
|
371
|
-
# Compare the normalized JSON strings
|
372
|
-
diff = list(
|
373
|
-
difflib.unified_diff(auto_corrected_json, human_corrected_json, fromfile="auto-corrected", tofile="human-corrected")
|
349
|
+
# Create and start review server
|
350
|
+
review_server = ReviewServer(
|
351
|
+
correction_result=self.results.transcription_corrected,
|
352
|
+
output_config=self.output_config,
|
353
|
+
audio_filepath=self.audio_filepath,
|
354
|
+
logger=self.logger,
|
374
355
|
)
|
356
|
+
reviewed_data = review_server.start()
|
375
357
|
|
376
|
-
|
377
|
-
|
378
|
-
for line in diff:
|
379
|
-
self.logger.warning(line.rstrip())
|
380
|
-
|
381
|
-
# exit(1)
|
358
|
+
self.logger.info("Human review completed, updated transcription_corrected with reviewed_data")
|
359
|
+
self.results.transcription_corrected = reviewed_data
|
382
360
|
|
383
361
|
def generate_outputs(self) -> None:
|
384
362
|
"""Generate output files based on enabled features and available data."""
|