lyrics-transcriber 0.65.0__tar.gz → 0.66.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/PKG-INFO +1 -1
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/anchor_sequence.py +156 -59
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber-0.65.0/lyrics_transcriber/frontend/web_assets/assets/index-CSX3zHKu.js → lyrics_transcriber-0.66.0/lyrics_transcriber/frontend/web_assets/assets/index-BMWgZ3MR.js +2 -2
- lyrics_transcriber-0.65.0/lyrics_transcriber/frontend/web_assets/assets/index-CSX3zHKu.js.map → lyrics_transcriber-0.66.0/lyrics_transcriber/frontend/web_assets/assets/index-BMWgZ3MR.js.map +1 -1
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/index.html +1 -1
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/pyproject.toml +1 -1
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/LICENSE +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/README.md +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/cli/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/cli/cli_main.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/core/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/core/config.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/core/controller.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/corrector.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/base.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/extend_anchor.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/levenshtein.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/llm.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/llm_providers.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/no_space_punct_match.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/repeat.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/sound_alike.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/syllables_match.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/word_count_match.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/handlers/word_operations.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/operations.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/phrase_analyzer.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/correction/text_utils.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/.gitignore +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/.yarnrc.yml +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/README.md +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/eslint.config.js +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/index.html +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/favicon.ico +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/App.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/api.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/EditActionBar.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/EditModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/EditWordList.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/FileUpload.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/Header.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/ModeSelector.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/ReferenceView.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/WordDivider.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/constants.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/styles.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/types.js +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/types.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/hooks/useManualSync.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/main.tsx +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/theme.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/types/global.d.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/types.js +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/types.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/validation.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/src/vite-env.d.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/tsconfig.app.json +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/tsconfig.json +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/tsconfig.node.json +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/tsconfig.tsbuildinfo +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/update_version.js +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/vite.config.d.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/vite.config.js +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/vite.config.ts +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/frontend/yarn.lock +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/base_lyrics_provider.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/file_provider.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/genius.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/musixmatch.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/spotify.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/lyrics/user_input_provider.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/ass.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/ass_specs.txt +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/config.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/constants.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/event.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/formatters.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/lyrics_line.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/lyrics_screen.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/section_detector.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/section_screen.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/ass/style.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdg.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/cdg.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/composer.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/config.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/pack.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/render.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/cdgmaker/utils.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/arial.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/generator.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/lrc_to_cdg.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/lyrics_file.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/plain_text.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/segment_resizer.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/subtitles.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/output/video.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/review/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/review/server.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/storage/__init__.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/storage/dropbox.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/transcribers/audioshake.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/transcribers/base_transcriber.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/transcribers/whisper.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/types.py +0 -0
- {lyrics_transcriber-0.65.0 → lyrics_transcriber-0.66.0}/lyrics_transcriber/utils/word_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.66.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
License: MIT
|
6
6
|
Author: Andrew Beveridge
|
@@ -28,7 +28,7 @@ class AnchorSequenceFinder:
|
|
28
28
|
cache_dir: Union[str, Path],
|
29
29
|
min_sequence_length: int = 3,
|
30
30
|
min_sources: int = 1,
|
31
|
-
timeout_seconds: int =
|
31
|
+
timeout_seconds: int = 600, # 10 minutes default timeout
|
32
32
|
max_iterations_per_ngram: int = 1000, # Maximum iterations for while loop
|
33
33
|
progress_check_interval: int = 50, # Check progress every N iterations
|
34
34
|
logger: Optional[logging.Logger] = None,
|
@@ -194,6 +194,10 @@ class AnchorSequenceFinder:
|
|
194
194
|
min_sources: int,
|
195
195
|
) -> List[AnchorSequence]:
|
196
196
|
"""Process a single n-gram length to find matching sequences with timeout and early termination."""
|
197
|
+
self.logger.info(f"🔍 N-GRAM {n}: Starting processing with {len(trans_words)} transcription words")
|
198
|
+
self.logger.info(f"🔍 N-GRAM {n}: Reference sources: {list(ref_texts_clean.keys())}")
|
199
|
+
self.logger.info(f"🔍 N-GRAM {n}: Max iterations limit: {self.max_iterations_per_ngram}")
|
200
|
+
|
197
201
|
candidate_anchors = []
|
198
202
|
used_positions = {source: set() for source in ref_texts_clean.keys()}
|
199
203
|
used_trans_positions = set()
|
@@ -203,20 +207,31 @@ class AnchorSequenceFinder:
|
|
203
207
|
last_anchor_count = 0
|
204
208
|
stagnation_count = 0
|
205
209
|
|
206
|
-
self.logger.debug(f"Processing n-gram length {n} with max {self.max_iterations_per_ngram} iterations")
|
210
|
+
self.logger.debug(f"🔍 N-GRAM {n}: Processing n-gram length {n} with max {self.max_iterations_per_ngram} iterations")
|
211
|
+
|
212
|
+
# Generate n-grams from transcribed text once
|
213
|
+
trans_ngrams = self._find_ngrams(trans_words, n)
|
214
|
+
self.logger.info(f"🔍 N-GRAM {n}: Generated {len(trans_ngrams)} n-grams for processing")
|
207
215
|
|
216
|
+
# Process all n-grams efficiently in multiple passes
|
208
217
|
found_new_match = True
|
209
218
|
while found_new_match and iteration_count < self.max_iterations_per_ngram:
|
210
219
|
found_new_match = False
|
211
220
|
iteration_count += 1
|
221
|
+
anchors_found_this_iteration = 0
|
222
|
+
|
223
|
+
# Log every 10th iteration to track progress
|
224
|
+
if iteration_count % 10 == 0:
|
225
|
+
self.logger.debug(f"🔍 N-GRAM {n}: Iteration {iteration_count}, anchors found: {len(candidate_anchors)}")
|
212
226
|
|
213
227
|
# Check for progress stagnation every N iterations
|
214
228
|
if iteration_count - last_progress_check >= self.progress_check_interval:
|
215
229
|
current_anchor_count = len(candidate_anchors)
|
216
230
|
if current_anchor_count == last_anchor_count:
|
217
231
|
stagnation_count += 1
|
232
|
+
self.logger.debug(f"🔍 N-GRAM {n}: Stagnation check {stagnation_count}/3 at iteration {iteration_count}")
|
218
233
|
if stagnation_count >= 3: # No progress for 3 consecutive checks
|
219
|
-
self.logger.
|
234
|
+
self.logger.info(f"🔍 N-GRAM {n}: ⏹️ Early termination due to stagnation after {iteration_count} iterations")
|
220
235
|
break
|
221
236
|
else:
|
222
237
|
stagnation_count = 0 # Reset stagnation counter
|
@@ -224,11 +239,9 @@ class AnchorSequenceFinder:
|
|
224
239
|
last_anchor_count = current_anchor_count
|
225
240
|
last_progress_check = iteration_count
|
226
241
|
|
227
|
-
self.logger.debug(f"
|
228
|
-
|
229
|
-
# Generate n-grams from transcribed text
|
230
|
-
trans_ngrams = self._find_ngrams(trans_words, n)
|
242
|
+
self.logger.debug(f"🔍 N-GRAM {n}: iteration {iteration_count}, anchors: {current_anchor_count}, stagnation: {stagnation_count}")
|
231
243
|
|
244
|
+
# Process all n-grams in this iteration
|
232
245
|
for ngram, trans_pos in trans_ngrams:
|
233
246
|
# Skip if we've already used this transcription position
|
234
247
|
if trans_pos in used_trans_positions:
|
@@ -239,11 +252,11 @@ class AnchorSequenceFinder:
|
|
239
252
|
ngram_words = [w.lower() for w in ngram]
|
240
253
|
|
241
254
|
if actual_words != ngram_words:
|
242
|
-
self.logger.error(f"Mismatch between ngram and actual words at position {trans_pos}:")
|
243
|
-
self.logger.error(f"Ngram words: {ngram_words}")
|
244
|
-
self.logger.error(f"Actual words: {actual_words}")
|
245
|
-
self.logger.error(f"Full trans_words: {trans_words}")
|
246
|
-
self.logger.error(f"Full all_words: {[w.text for w in all_words]}")
|
255
|
+
self.logger.error(f"🔍 N-GRAM {n}: ❌ Mismatch between ngram and actual words at position {trans_pos}:")
|
256
|
+
self.logger.error(f"🔍 N-GRAM {n}: Ngram words: {ngram_words}")
|
257
|
+
self.logger.error(f"🔍 N-GRAM {n}: Actual words: {actual_words}")
|
258
|
+
self.logger.error(f"🔍 N-GRAM {n}: Full trans_words: {trans_words}")
|
259
|
+
self.logger.error(f"🔍 N-GRAM {n}: Full all_words: {[w.text for w in all_words]}")
|
247
260
|
raise AssertionError(
|
248
261
|
f"Ngram words don't match actual words at position {trans_pos}. "
|
249
262
|
f"This should never happen as trans_words should be derived from all_words."
|
@@ -251,6 +264,10 @@ class AnchorSequenceFinder:
|
|
251
264
|
|
252
265
|
matches = self._find_matching_sources(ngram, ref_texts_clean, n)
|
253
266
|
if len(matches) >= min_sources:
|
267
|
+
# Log successful match
|
268
|
+
if len(candidate_anchors) < 5: # Only log first few matches to avoid spam
|
269
|
+
self.logger.debug(f"🔍 N-GRAM {n}: ✅ Found match: '{' '.join(ngram)}' at pos {trans_pos} with {len(matches)} sources")
|
270
|
+
|
254
271
|
# Get Word IDs for transcribed words
|
255
272
|
transcribed_word_ids = [w.id for w in all_words[trans_pos : trans_pos + n]]
|
256
273
|
|
@@ -271,13 +288,26 @@ class AnchorSequenceFinder:
|
|
271
288
|
confidence=len(matches) / len(ref_texts_clean),
|
272
289
|
)
|
273
290
|
candidate_anchors.append(anchor)
|
291
|
+
anchors_found_this_iteration += 1
|
274
292
|
found_new_match = True
|
275
|
-
|
293
|
+
|
294
|
+
# For efficiency, if we have very low iteration limits, find one match per iteration
|
295
|
+
if self.max_iterations_per_ngram <= 10:
|
296
|
+
break
|
297
|
+
|
298
|
+
# Log progress for this iteration
|
299
|
+
if anchors_found_this_iteration > 0:
|
300
|
+
self.logger.debug(f"🔍 N-GRAM {n}: Found {anchors_found_this_iteration} anchors in iteration {iteration_count}")
|
301
|
+
|
302
|
+
# Early termination if we've found enough anchors or processed all positions
|
303
|
+
if len(used_trans_positions) >= len(trans_ngrams) or len(candidate_anchors) >= len(trans_ngrams):
|
304
|
+
self.logger.info(f"🔍 N-GRAM {n}: ⏹️ Early termination - processed all positions after {iteration_count} iterations")
|
305
|
+
break
|
276
306
|
|
277
307
|
if iteration_count >= self.max_iterations_per_ngram:
|
278
|
-
self.logger.warning(f"
|
308
|
+
self.logger.warning(f"🔍 N-GRAM {n}: ⏰ Processing terminated after reaching max iterations ({self.max_iterations_per_ngram})")
|
279
309
|
|
280
|
-
self.logger.
|
310
|
+
self.logger.info(f"🔍 N-GRAM {n}: ✅ Completed processing after {iteration_count} iterations, found {len(candidate_anchors)} anchors")
|
281
311
|
return candidate_anchors
|
282
312
|
|
283
313
|
def find_anchors(
|
@@ -290,19 +320,25 @@ class AnchorSequenceFinder:
|
|
290
320
|
start_time = time.time()
|
291
321
|
|
292
322
|
try:
|
323
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Starting find_anchors with timeout {self.timeout_seconds}s")
|
324
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Transcribed text length: {len(transcribed)}")
|
325
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Reference sources: {list(references.keys())}")
|
326
|
+
|
293
327
|
cache_key = self._get_cache_key(transcribed, references, transcription_result)
|
294
328
|
cache_path = self.cache_dir / f"anchors_{cache_key}.json"
|
329
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Cache key: {cache_key}")
|
295
330
|
|
296
331
|
# Try to load from cache
|
332
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Checking cache at {cache_path}")
|
297
333
|
if cached_data := self._load_from_cache(cache_path):
|
298
|
-
self.logger.info("Loading anchors from cache")
|
334
|
+
self.logger.info("🔍 ANCHOR SEARCH: ✅ Cache hit! Loading anchors from cache")
|
299
335
|
try:
|
300
336
|
# Convert cached_data to dictionary before logging
|
301
337
|
if cached_data:
|
302
338
|
first_anchor = {"anchor": cached_data[0].anchor.to_dict(), "phrase_score": cached_data[0].phrase_score.to_dict()}
|
303
339
|
return cached_data
|
304
340
|
except Exception as e:
|
305
|
-
self.logger.error(f"
|
341
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: ❌ Error loading cache: {type(e).__name__}: {e}")
|
306
342
|
if cached_data:
|
307
343
|
try:
|
308
344
|
first_anchor = {"anchor": cached_data[0].anchor.to_dict(), "phrase_score": cached_data[0].phrase_score.to_dict()}
|
@@ -311,43 +347,57 @@ class AnchorSequenceFinder:
|
|
311
347
|
self.logger.error("Could not serialize first cached anchor for logging")
|
312
348
|
|
313
349
|
# If not in cache or cache format invalid, perform the computation
|
314
|
-
self.logger.info(f"
|
315
|
-
self.logger.info(f"Finding anchor sequences for transcription with length {len(transcribed)}")
|
350
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ❌ Cache miss - computing anchors with timeout {self.timeout_seconds}s")
|
351
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Finding anchor sequences for transcription with length {len(transcribed)}")
|
316
352
|
|
317
353
|
# Check timeout before starting computation
|
318
354
|
self._check_timeout(start_time, "anchor computation initialization")
|
355
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Timeout check passed - initialization")
|
319
356
|
|
320
357
|
# Get all words from transcription
|
358
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Extracting words from transcription result...")
|
321
359
|
all_words = []
|
322
360
|
for segment in transcription_result.result.segments:
|
323
361
|
all_words.extend(segment.words)
|
362
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Extracted {len(all_words)} words from transcription")
|
324
363
|
|
325
364
|
# Clean and split texts
|
365
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Cleaning transcription words...")
|
326
366
|
trans_words = [w.text.lower().strip('.,?!"\n') for w in all_words]
|
367
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Cleaned {len(trans_words)} transcription words")
|
368
|
+
|
369
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Processing reference sources...")
|
327
370
|
ref_texts_clean = {
|
328
371
|
source: self._clean_text(" ".join(w.text for s in lyrics.segments for w in s.words)).split()
|
329
372
|
for source, lyrics in references.items()
|
330
373
|
}
|
331
374
|
ref_words = {source: [w for s in lyrics.segments for w in s.words] for source, lyrics in references.items()}
|
375
|
+
|
376
|
+
for source, words in ref_texts_clean.items():
|
377
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Reference '{source}': {len(words)} words")
|
332
378
|
|
333
379
|
# Check timeout after preprocessing
|
334
380
|
self._check_timeout(start_time, "anchor computation preprocessing")
|
381
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Timeout check passed - preprocessing")
|
335
382
|
|
336
383
|
# Filter out very short reference sources for n-gram length calculation
|
384
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Calculating n-gram lengths...")
|
337
385
|
valid_ref_lengths = [
|
338
386
|
len(words) for words in ref_texts_clean.values()
|
339
387
|
if len(words) >= self.min_sequence_length
|
340
388
|
]
|
341
389
|
|
342
390
|
if not valid_ref_lengths:
|
343
|
-
self.logger.warning("No reference sources long enough for anchor detection")
|
391
|
+
self.logger.warning("🔍 ANCHOR SEARCH: ❌ No reference sources long enough for anchor detection")
|
344
392
|
return []
|
345
393
|
|
346
394
|
# Calculate max length using only valid reference sources
|
347
395
|
max_length = min(len(trans_words), min(valid_ref_lengths))
|
348
396
|
n_gram_lengths = range(max_length, self.min_sequence_length - 1, -1)
|
397
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: N-gram lengths to process: {list(n_gram_lengths)} (max_length: {max_length})")
|
349
398
|
|
350
399
|
# Process n-gram lengths in parallel with timeout
|
400
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Setting up parallel processing...")
|
351
401
|
process_length_partial = partial(
|
352
402
|
self._process_ngram_length,
|
353
403
|
trans_words=trans_words,
|
@@ -363,83 +413,115 @@ class AnchorSequenceFinder:
|
|
363
413
|
|
364
414
|
# Check timeout before parallel processing
|
365
415
|
self._check_timeout(start_time, "parallel processing start")
|
416
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Timeout check passed - about to start parallel processing")
|
366
417
|
|
367
418
|
try:
|
419
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: 🚀 Starting parallel processing with {max(cpu_count() - 1, 1)} processes, pool timeout: {pool_timeout}s")
|
368
420
|
with Pool(processes=max(cpu_count() - 1, 1)) as pool:
|
369
|
-
self.logger.debug(f"
|
421
|
+
self.logger.debug(f"🔍 ANCHOR SEARCH: Pool created successfully")
|
370
422
|
results = []
|
371
423
|
|
372
424
|
# Submit all jobs first
|
373
|
-
|
425
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: Submitting {len(n_gram_lengths)} n-gram processing jobs...")
|
426
|
+
async_results = []
|
427
|
+
for i, n in enumerate(n_gram_lengths):
|
428
|
+
self.logger.debug(f"🔍 ANCHOR SEARCH: Submitting job {i+1}/{len(n_gram_lengths)} for n-gram length {n}")
|
429
|
+
async_result = pool.apply_async(process_length_partial, (n,))
|
430
|
+
async_results.append(async_result)
|
431
|
+
|
432
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ All {len(async_results)} jobs submitted")
|
374
433
|
|
375
434
|
# Collect results with individual timeouts
|
376
435
|
for i, async_result in enumerate(async_results):
|
436
|
+
n_gram_length = n_gram_lengths[i]
|
377
437
|
try:
|
378
|
-
|
379
|
-
self._check_timeout(start_time, f"collecting n-gram {n_gram_lengths[i]} results")
|
438
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ⏳ Collecting result {i+1}/{len(async_results)} for n-gram length {n_gram_length}")
|
380
439
|
|
381
|
-
# Check remaining time for pool timeout
|
440
|
+
# Check remaining time for pool timeout (more lenient than overall timeout)
|
382
441
|
elapsed_time = time.time() - start_time
|
383
442
|
remaining_time = max(10, self.timeout_seconds - elapsed_time) if self.timeout_seconds > 0 else pool_timeout
|
384
443
|
|
385
|
-
|
444
|
+
self.logger.debug(f"🔍 ANCHOR SEARCH: Remaining time for n-gram {n_gram_length}: {remaining_time}s")
|
445
|
+
|
446
|
+
# Use a more lenient timeout for individual results to allow fallback
|
447
|
+
individual_timeout = min(pool_timeout, remaining_time) if self.timeout_seconds > 0 else pool_timeout
|
448
|
+
|
449
|
+
result = async_result.get(timeout=individual_timeout)
|
386
450
|
results.append(result)
|
387
451
|
|
388
|
-
self.logger.
|
452
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Completed n-gram length {n_gram_length} ({i+1}/{len(n_gram_lengths)}) - found {len(result)} anchors")
|
389
453
|
|
390
|
-
except AnchorSequenceTimeoutError:
|
391
|
-
# Re-raise timeout errors
|
392
|
-
raise
|
393
454
|
except Exception as e:
|
394
|
-
self.logger.warning(f"n-gram length {
|
455
|
+
self.logger.warning(f"🔍 ANCHOR SEARCH: ⚠️ n-gram length {n_gram_length} failed or timed out: {str(e)}")
|
395
456
|
results.append([]) # Add empty result to maintain order
|
457
|
+
|
458
|
+
# If we're running short on time, trigger fallback early
|
459
|
+
if self.timeout_seconds > 0 and (time.time() - start_time) > (self.timeout_seconds * 0.8):
|
460
|
+
self.logger.warning(f"🔍 ANCHOR SEARCH: ⚠️ Approaching timeout limit, triggering early fallback")
|
461
|
+
# Raise exception to trigger fallback to sequential processing
|
462
|
+
raise Exception("Parallel processing timeout, triggering fallback")
|
396
463
|
|
464
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Parallel processing completed, combining results...")
|
397
465
|
for anchors in results:
|
398
466
|
candidate_anchors.extend(anchors)
|
399
467
|
|
400
468
|
except AnchorSequenceTimeoutError:
|
469
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: ❌ Parallel processing timed out")
|
401
470
|
# Re-raise timeout errors
|
402
471
|
raise
|
403
472
|
except Exception as e:
|
404
|
-
self.logger.error(f"Parallel processing failed: {str(e)}")
|
473
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: ❌ Parallel processing failed: {str(e)}")
|
405
474
|
# Fall back to sequential processing with timeout checks
|
406
|
-
self.logger.info("Falling back to sequential processing")
|
475
|
+
self.logger.info("🔍 ANCHOR SEARCH: 🔄 Falling back to sequential processing")
|
407
476
|
for n in n_gram_lengths:
|
408
477
|
try:
|
409
|
-
# Check timeout
|
410
|
-
self.
|
478
|
+
# Check timeout more leniently during sequential processing
|
479
|
+
if self.timeout_seconds > 0:
|
480
|
+
elapsed_time = time.time() - start_time
|
481
|
+
# Allow more time for sequential processing (up to 2x the original timeout)
|
482
|
+
if elapsed_time > (self.timeout_seconds * 2.0):
|
483
|
+
self.logger.warning(f"🔍 ANCHOR SEARCH: ⏰ Sequential processing timeout for n-gram {n}")
|
484
|
+
break
|
485
|
+
|
486
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: 🔄 Sequential processing n-gram length {n}")
|
411
487
|
|
412
488
|
anchors = self._process_ngram_length(
|
413
489
|
n, trans_words, all_words, ref_texts_clean, ref_words, self.min_sources
|
414
490
|
)
|
415
491
|
candidate_anchors.extend(anchors)
|
416
|
-
|
417
|
-
# Re-raise timeout errors
|
418
|
-
raise
|
492
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Sequential n-gram {n} completed - found {len(anchors)} anchors")
|
419
493
|
except Exception as e:
|
420
|
-
self.logger.warning(f"Sequential processing failed for n-gram length {n}: {str(e)}")
|
494
|
+
self.logger.warning(f"🔍 ANCHOR SEARCH: ⚠️ Sequential processing failed for n-gram length {n}: {str(e)}")
|
421
495
|
continue
|
422
496
|
|
423
|
-
self.logger.info(f"Found {len(candidate_anchors)} candidate anchors in {time.time() - start_time:.1f}s")
|
497
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Found {len(candidate_anchors)} candidate anchors in {time.time() - start_time:.1f}s")
|
424
498
|
|
425
499
|
# Check timeout before expensive filtering operation
|
426
500
|
self._check_timeout(start_time, "overlap filtering start")
|
501
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: 🔄 Starting overlap filtering...")
|
427
502
|
|
428
503
|
filtered_anchors = self._remove_overlapping_sequences(candidate_anchors, transcribed, transcription_result)
|
504
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: ✅ Filtering completed - {len(filtered_anchors)} final anchors")
|
429
505
|
|
430
506
|
# Save to cache
|
507
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: 💾 Saving results to cache...")
|
431
508
|
self._save_to_cache(cache_path, filtered_anchors)
|
432
509
|
|
433
510
|
total_time = time.time() - start_time
|
434
|
-
self.logger.info(f"Anchor sequence computation completed in {total_time:.1f}s")
|
511
|
+
self.logger.info(f"🔍 ANCHOR SEARCH: 🎉 Anchor sequence computation completed successfully in {total_time:.1f}s")
|
435
512
|
|
436
513
|
return filtered_anchors
|
437
514
|
|
438
515
|
except AnchorSequenceTimeoutError:
|
439
|
-
|
516
|
+
elapsed_time = time.time() - start_time
|
517
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: ⏰ TIMEOUT after {elapsed_time:.1f}s (limit: {self.timeout_seconds}s)")
|
440
518
|
raise
|
441
519
|
except Exception as e:
|
442
|
-
|
520
|
+
elapsed_time = time.time() - start_time
|
521
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: ❌ FAILED after {elapsed_time:.1f}s: {str(e)}")
|
522
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: Exception type: {type(e).__name__}")
|
523
|
+
import traceback
|
524
|
+
self.logger.error(f"🔍 ANCHOR SEARCH: Traceback: {traceback.format_exc()}")
|
443
525
|
raise
|
444
526
|
finally:
|
445
527
|
# No cleanup needed for time-based timeout checks
|
@@ -506,16 +588,20 @@ class AnchorSequenceFinder:
|
|
506
588
|
transcription_result: TranscriptionResult,
|
507
589
|
) -> List[ScoredAnchor]:
|
508
590
|
"""Remove overlapping sequences using phrase analysis with timeout protection."""
|
591
|
+
self.logger.info(f"🔍 FILTERING: Starting overlap removal for {len(anchors)} anchors")
|
592
|
+
|
509
593
|
if not anchors:
|
594
|
+
self.logger.info(f"🔍 FILTERING: No anchors to process")
|
510
595
|
return []
|
511
596
|
|
512
|
-
self.logger.info(f"Scoring {len(anchors)} anchors")
|
597
|
+
self.logger.info(f"🔍 FILTERING: Scoring {len(anchors)} anchors")
|
513
598
|
|
514
599
|
# Create word map for scoring
|
515
600
|
word_map = {w.id: w for s in transcription_result.result.segments for w in s.words}
|
601
|
+
self.logger.debug(f"🔍 FILTERING: Created word map with {len(word_map)} words")
|
516
602
|
|
517
603
|
# Add word map to each anchor for scoring
|
518
|
-
for anchor in anchors:
|
604
|
+
for i, anchor in enumerate(anchors):
|
519
605
|
# For backwards compatibility, only add transcribed_words if all IDs exist in word_map
|
520
606
|
try:
|
521
607
|
anchor.transcribed_words = [word_map[word_id] for word_id in anchor.transcribed_word_ids]
|
@@ -555,7 +641,7 @@ class AnchorSequenceFinder:
|
|
555
641
|
|
556
642
|
# Try different pool sizes with timeout
|
557
643
|
num_processes = max(cpu_count() - 1, 1) # Leave one CPU free
|
558
|
-
self.logger.info(f"Using {num_processes} processes for scoring")
|
644
|
+
self.logger.info(f"🔍 FILTERING: Using {num_processes} processes for scoring")
|
559
645
|
|
560
646
|
# Create a partial function with the context parameter fixed
|
561
647
|
score_anchor_partial = partial(self._score_anchor_static, context=context)
|
@@ -565,24 +651,29 @@ class AnchorSequenceFinder:
|
|
565
651
|
pool_timeout = 300 # 5 minutes for scoring phase
|
566
652
|
|
567
653
|
try:
|
654
|
+
self.logger.info(f"🔍 FILTERING: 🚀 Starting parallel scoring with timeout {pool_timeout}s")
|
568
655
|
with Pool(processes=num_processes) as pool:
|
569
656
|
# Submit scoring jobs with timeout
|
570
657
|
async_results = []
|
571
658
|
batch_size = 50
|
572
659
|
|
660
|
+
self.logger.info(f"🔍 FILTERING: Splitting {len(anchors)} anchors into batches of {batch_size}")
|
573
661
|
for i in range(0, len(anchors), batch_size):
|
574
662
|
batch = anchors[i:i + batch_size]
|
575
663
|
async_result = pool.apply_async(self._score_batch_static, (batch, context))
|
576
664
|
async_results.append(async_result)
|
577
665
|
|
666
|
+
self.logger.info(f"🔍 FILTERING: Submitted {len(async_results)} scoring batches")
|
667
|
+
|
578
668
|
# Collect results with timeout
|
579
669
|
for i, async_result in enumerate(async_results):
|
580
670
|
try:
|
671
|
+
self.logger.debug(f"🔍 FILTERING: ⏳ Collecting batch {i+1}/{len(async_results)}")
|
581
672
|
batch_results = async_result.get(timeout=pool_timeout)
|
582
673
|
scored_anchors.extend(batch_results)
|
583
|
-
self.logger.debug(f"Completed scoring batch {i+1}/{len(async_results)}")
|
674
|
+
self.logger.debug(f"🔍 FILTERING: ✅ Completed scoring batch {i+1}/{len(async_results)}")
|
584
675
|
except Exception as e:
|
585
|
-
self.logger.warning(f"Scoring batch {i+1} failed or timed out: {str(e)}")
|
676
|
+
self.logger.warning(f"🔍 FILTERING: ⚠️ Scoring batch {i+1} failed or timed out: {str(e)}")
|
586
677
|
# Add basic scores for failed batch
|
587
678
|
start_idx = i * batch_size
|
588
679
|
end_idx = min((i + 1) * batch_size, len(anchors))
|
@@ -599,7 +690,7 @@ class AnchorSequenceFinder:
|
|
599
690
|
continue
|
600
691
|
|
601
692
|
except Exception as e:
|
602
|
-
self.logger.warning(f"Parallel scoring failed: {str(e)}, falling back to basic scoring")
|
693
|
+
self.logger.warning(f"🔍 FILTERING: ❌ Parallel scoring failed: {str(e)}, falling back to basic scoring")
|
603
694
|
# Fall back to basic scoring
|
604
695
|
for anchor in anchors:
|
605
696
|
try:
|
@@ -613,22 +704,28 @@ class AnchorSequenceFinder:
|
|
613
704
|
continue
|
614
705
|
|
615
706
|
parallel_time = time.time() - start_time
|
616
|
-
self.logger.info(f"Parallel scoring
|
707
|
+
self.logger.info(f"🔍 FILTERING: ✅ Parallel scoring completed in {parallel_time:.2f}s, scored {len(scored_anchors)} anchors")
|
617
708
|
|
618
709
|
# Sort and filter as before
|
710
|
+
self.logger.info(f"🔍 FILTERING: 🔄 Sorting anchors by priority...")
|
619
711
|
scored_anchors.sort(key=self._get_sequence_priority, reverse=True)
|
712
|
+
self.logger.info(f"🔍 FILTERING: ✅ Sorting completed")
|
620
713
|
|
621
|
-
self.logger.info(f"Filtering {len(scored_anchors)} overlapping sequences")
|
714
|
+
self.logger.info(f"🔍 FILTERING: 🔄 Filtering {len(scored_anchors)} overlapping sequences")
|
622
715
|
filtered_scored = []
|
623
716
|
|
624
717
|
for i, scored_anchor in enumerate(scored_anchors):
|
625
|
-
# Check timeout every 100 anchors using our timeout mechanism
|
626
|
-
if i % 100 == 0:
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
718
|
+
# Check timeout every 100 anchors using our timeout mechanism (more lenient)
|
719
|
+
if i % 100 == 0 and i > 0:
|
720
|
+
# Only check timeout if we're significantly over the limit
|
721
|
+
if self.timeout_seconds > 0:
|
722
|
+
elapsed_time = time.time() - start_time
|
723
|
+
# Use a more lenient timeout for filtering (allow 50% more time)
|
724
|
+
if elapsed_time > (self.timeout_seconds * 1.5):
|
725
|
+
self.logger.warning(f"🔍 FILTERING: ⏰ Filtering timed out, returning {len(filtered_scored)} anchors out of {len(scored_anchors)}")
|
726
|
+
break
|
727
|
+
|
728
|
+
self.logger.debug(f"🔍 FILTERING: Progress: {i}/{len(scored_anchors)} processed, {len(filtered_scored)} kept")
|
632
729
|
|
633
730
|
overlaps = False
|
634
731
|
for existing in filtered_scored:
|
@@ -639,7 +736,7 @@ class AnchorSequenceFinder:
|
|
639
736
|
if not overlaps:
|
640
737
|
filtered_scored.append(scored_anchor)
|
641
738
|
|
642
|
-
self.logger.info(f"
|
739
|
+
self.logger.info(f"🔍 FILTERING: ✅ Filtering completed - kept {len(filtered_scored)} non-overlapping anchors out of {len(scored_anchors)}")
|
643
740
|
return filtered_scored
|
644
741
|
|
645
742
|
@staticmethod
|
@@ -38915,7 +38915,7 @@ const theme = createTheme({
|
|
38915
38915
|
spacing: (factor) => `${0.6 * factor}rem`
|
38916
38916
|
// Further reduced from 0.8 * factor
|
38917
38917
|
});
|
38918
|
-
const version = "0.
|
38918
|
+
const version = "0.66.0";
|
38919
38919
|
const packageJson = {
|
38920
38920
|
version
|
38921
38921
|
};
|
@@ -38926,4 +38926,4 @@ ReactDOM$1.createRoot(document.getElementById("root")).render(
|
|
38926
38926
|
/* @__PURE__ */ jsxRuntimeExports.jsx(App, {})
|
38927
38927
|
] })
|
38928
38928
|
);
|
38929
|
-
//# sourceMappingURL=index-
|
38929
|
+
//# sourceMappingURL=index-BMWgZ3MR.js.map
|