lyrics-transcriber 0.52.0__tar.gz → 0.53.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/PKG-INFO +6 -3
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/README.md +5 -2
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/cli/cli_main.py +1 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/anchor_sequence.py +38 -5
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/corrector.py +4 -3
- lyrics_transcriber-0.53.0/lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/text_utils.py +7 -3
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/package.json +1 -1
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/main.tsx +5 -0
- lyrics_transcriber-0.53.0/lyrics_transcriber/frontend/update_version.js +11 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/file_provider.py +7 -2
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/segment_resizer.py +1 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/types.py +165 -21
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/pyproject.toml +12 -2
- lyrics_transcriber-0.52.0/lyrics_transcriber/correction/handlers/extend_anchor.py +0 -134
- lyrics_transcriber-0.52.0/lyrics_transcriber/frontend/dist/assets/index-C5ftSgQo.js +0 -38924
- lyrics_transcriber-0.52.0/lyrics_transcriber/frontend/dist/assets/index-C5ftSgQo.js.map +0 -1
- lyrics_transcriber-0.52.0/lyrics_transcriber/frontend/dist/index.html +0 -13
- lyrics_transcriber-0.52.0/lyrics_transcriber/frontend/public/vite.svg +0 -1
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/LICENSE +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/cli/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/core/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/core/config.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/core/controller.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/base.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/levenshtein.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/llm.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/llm_providers.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/no_space_punct_match.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/repeat.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/sound_alike.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/syllables_match.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/word_count_match.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/handlers/word_operations.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/phrase_analyzer.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/.gitignore +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/.yarnrc.yml +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/README.md +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/eslint.config.js +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/index.html +0 -0
- {lyrics_transcriber-0.52.0/lyrics_transcriber/frontend/dist → lyrics_transcriber-0.53.0/lyrics_transcriber/frontend/public}/vite.svg +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/App.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/api.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/EditActionBar.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/EditModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/EditWordList.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/FileUpload.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/Header.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/ModeSelector.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/ReferenceView.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/WordDivider.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/constants.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/styles.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/types.js +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/types.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/hooks/useManualSync.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/theme.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/types/global.d.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/types.js +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/types.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/validation.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/vite-env.d.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/tsconfig.app.json +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/tsconfig.json +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/tsconfig.node.json +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/tsconfig.tsbuildinfo +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/vite.config.d.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/vite.config.js +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/vite.config.ts +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/yarn.lock +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/base_lyrics_provider.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/genius.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/spotify.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/user_input_provider.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/ass.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/ass_specs.txt +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/config.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/constants.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/event.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/formatters.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/lyrics_line.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/lyrics_screen.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/section_detector.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/section_screen.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/ass/style.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdg.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/cdg.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/composer.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/config.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/pack.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/render.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/cdgmaker/utils.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/arial.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/generator.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/lrc_to_cdg.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/lyrics_file.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/plain_text.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/subtitles.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/video.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/review/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/review/server.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/storage/__init__.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/storage/dropbox.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/transcribers/audioshake.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/transcribers/base_transcriber.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/transcribers/whisper.py +0 -0
- {lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/utils/word_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.53.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
License: MIT
|
6
6
|
Author: Andrew Beveridge
|
@@ -47,7 +47,10 @@ Description-Content-Type: text/markdown
|
|
47
47
|
|
48
48
|
# Lyrics Transcriber 🎶
|
49
49
|
|
50
|
-
|
50
|
+

|
51
|
+

|
52
|
+
[](https://github.com/nomadkaraoke/python-lyrics-transcriber/actions/workflows/test-and-publish.yml)
|
53
|
+
[](https://codecov.io/gh/nomadkaraoke/python-lyrics-transcriber)
|
51
54
|
|
52
55
|
Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using OpenAI Whisper and lyrics from Genius and Spotify, for convenience in use cases such as karaoke video production.
|
53
56
|
|
@@ -63,7 +66,7 @@ Automatically create synchronised lyrics files in ASS and MidiCo LRC formats wit
|
|
63
66
|
|
64
67
|
### Prerequisites
|
65
68
|
|
66
|
-
- Python 3.
|
69
|
+
- Python 3.10 or higher
|
67
70
|
- [Optional] Genius API token if you want to fetch lyrics from Genius
|
68
71
|
- [Optional] Spotify cookie value if you want to fetch lyrics from Spotify
|
69
72
|
- [Optional] OpenAI API token if you want to use LLM correction of the transcribed lyrics
|
@@ -1,6 +1,9 @@
|
|
1
1
|
# Lyrics Transcriber 🎶
|
2
2
|
|
3
|
-
|
3
|
+

|
4
|
+

|
5
|
+
[](https://github.com/nomadkaraoke/python-lyrics-transcriber/actions/workflows/test-and-publish.yml)
|
6
|
+
[](https://codecov.io/gh/nomadkaraoke/python-lyrics-transcriber)
|
4
7
|
|
5
8
|
Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using OpenAI Whisper and lyrics from Genius and Spotify, for convenience in use cases such as karaoke video production.
|
6
9
|
|
@@ -16,7 +19,7 @@ Automatically create synchronised lyrics files in ASS and MidiCo LRC formats wit
|
|
16
19
|
|
17
20
|
### Prerequisites
|
18
21
|
|
19
|
-
- Python 3.
|
22
|
+
- Python 3.10 or higher
|
20
23
|
- [Optional] Genius API token if you want to fetch lyrics from Genius
|
21
24
|
- [Optional] Spotify cookie value if you want to fetch lyrics from Spotify
|
22
25
|
- [Optional] OpenAI API token if you want to use LLM correction of the transcribed lyrics
|
@@ -18,6 +18,7 @@ from lyrics_transcriber.core.controller import TranscriberConfig, LyricsConfig,
|
|
18
18
|
def create_arg_parser() -> argparse.ArgumentParser:
|
19
19
|
"""Create and configure the argument parser."""
|
20
20
|
parser = argparse.ArgumentParser(
|
21
|
+
prog="lyrics-transcriber",
|
21
22
|
description="Create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps",
|
22
23
|
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=52),
|
23
24
|
)
|
@@ -104,7 +104,7 @@ class AnchorSequenceFinder:
|
|
104
104
|
ref_texts.append(f"{source}:{','.join(words_with_ids)}")
|
105
105
|
|
106
106
|
# Also include transcription word IDs to ensure complete matching
|
107
|
-
trans_words_with_ids = [f"{w.text}:{w.id}" for s in transcription_result.segments for w in s.words]
|
107
|
+
trans_words_with_ids = [f"{w.text}:{w.id}" for s in transcription_result.result.segments for w in s.words]
|
108
108
|
|
109
109
|
input_str = f"{transcribed}|" f"{','.join(trans_words_with_ids)}|" f"{','.join(ref_texts)}"
|
110
110
|
return hashlib.md5(input_str.encode()).hexdigest()
|
@@ -259,7 +259,7 @@ class AnchorSequenceFinder:
|
|
259
259
|
|
260
260
|
# Get all words from transcription
|
261
261
|
all_words = []
|
262
|
-
for segment in transcription_result.segments:
|
262
|
+
for segment in transcription_result.result.segments:
|
263
263
|
all_words.extend(segment.words)
|
264
264
|
|
265
265
|
# Clean and split texts
|
@@ -381,11 +381,44 @@ class AnchorSequenceFinder:
|
|
381
381
|
self.logger.info(f"Scoring {len(anchors)} anchors")
|
382
382
|
|
383
383
|
# Create word map for scoring
|
384
|
-
word_map = {w.id: w for s in transcription_result.segments for w in s.words}
|
384
|
+
word_map = {w.id: w for s in transcription_result.result.segments for w in s.words}
|
385
385
|
|
386
386
|
# Add word map to each anchor for scoring
|
387
387
|
for anchor in anchors:
|
388
|
-
|
388
|
+
# For backwards compatibility, only add transcribed_words if all IDs exist in word_map
|
389
|
+
try:
|
390
|
+
anchor.transcribed_words = [word_map[word_id] for word_id in anchor.transcribed_word_ids]
|
391
|
+
# Also set _words for backwards compatibility with text display
|
392
|
+
anchor._words = [word_map[word_id].text for word_id in anchor.transcribed_word_ids]
|
393
|
+
except KeyError:
|
394
|
+
# This can happen in tests using backwards compatible constructors
|
395
|
+
# Create dummy Word objects with the text from _words if available
|
396
|
+
if hasattr(anchor, '_words') and anchor._words is not None:
|
397
|
+
from lyrics_transcriber.types import Word
|
398
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
399
|
+
anchor.transcribed_words = [
|
400
|
+
Word(
|
401
|
+
id=word_id,
|
402
|
+
text=text,
|
403
|
+
start_time=i * 1.0,
|
404
|
+
end_time=(i + 1) * 1.0,
|
405
|
+
confidence=1.0
|
406
|
+
)
|
407
|
+
for i, (word_id, text) in enumerate(zip(anchor.transcribed_word_ids, anchor._words))
|
408
|
+
]
|
409
|
+
else:
|
410
|
+
# Create generic word objects for scoring
|
411
|
+
from lyrics_transcriber.types import Word
|
412
|
+
anchor.transcribed_words = [
|
413
|
+
Word(
|
414
|
+
id=word_id,
|
415
|
+
text=f"word_{i}",
|
416
|
+
start_time=i * 1.0,
|
417
|
+
end_time=(i + 1) * 1.0,
|
418
|
+
confidence=1.0
|
419
|
+
)
|
420
|
+
for i, word_id in enumerate(anchor.transcribed_word_ids)
|
421
|
+
]
|
389
422
|
|
390
423
|
start_time = time.time()
|
391
424
|
|
@@ -469,7 +502,7 @@ class AnchorSequenceFinder:
|
|
469
502
|
"""Find gaps between anchor sequences in the transcribed text."""
|
470
503
|
# Get all words from transcription
|
471
504
|
all_words = []
|
472
|
-
for segment in transcription_result.segments:
|
505
|
+
for segment in transcription_result.result.segments:
|
473
506
|
all_words.extend(segment.words)
|
474
507
|
|
475
508
|
# Clean and split reference texts
|
{lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/corrector.py
RENAMED
@@ -150,13 +150,14 @@ class LyricsCorrector:
|
|
150
150
|
self.reference_lyrics = lyrics_results
|
151
151
|
|
152
152
|
# Get primary transcription
|
153
|
-
|
153
|
+
primary_transcription_result = sorted(transcription_results, key=lambda x: x.priority)[0]
|
154
|
+
primary_transcription = primary_transcription_result.result
|
154
155
|
transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in primary_transcription.segments)
|
155
156
|
|
156
157
|
# Find anchor sequences and gaps
|
157
158
|
self.logger.debug("Finding anchor sequences and gaps")
|
158
|
-
anchor_sequences = self.anchor_finder.find_anchors(transcribed_text, lyrics_results,
|
159
|
-
gap_sequences = self.anchor_finder.find_gaps(transcribed_text, anchor_sequences, lyrics_results,
|
159
|
+
anchor_sequences = self.anchor_finder.find_anchors(transcribed_text, lyrics_results, primary_transcription_result)
|
160
|
+
gap_sequences = self.anchor_finder.find_gaps(transcribed_text, anchor_sequences, lyrics_results, primary_transcription_result)
|
160
161
|
|
161
162
|
# Store anchor sequences for use in correction handlers
|
162
163
|
self._anchor_sequences = anchor_sequences
|
@@ -0,0 +1,149 @@
|
|
1
|
+
from typing import List, Optional, Tuple, Dict, Any
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from lyrics_transcriber.types import GapSequence, WordCorrection, Word
|
5
|
+
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
6
|
+
from lyrics_transcriber.correction.handlers.word_operations import WordOperations
|
7
|
+
|
8
|
+
|
9
|
+
class ExtendAnchorHandler(GapCorrectionHandler):
|
10
|
+
"""Handles gaps where some words match reference text but there are extra words.
|
11
|
+
|
12
|
+
This handler looks for cases where:
|
13
|
+
1. One or more words in the gap match words in the same position in at least one reference source
|
14
|
+
2. The gap may contain additional words that aren't in the reference
|
15
|
+
|
16
|
+
When such matches are found, it:
|
17
|
+
1. Validates all matching words (creates corrections that keep the same words)
|
18
|
+
2. Leaves all non-matching words unchanged for other handlers to process
|
19
|
+
|
20
|
+
The confidence of validations is based on the ratio of reference sources that agree.
|
21
|
+
For example, if 2 out of 4 sources have the matching word, confidence will be 0.5.
|
22
|
+
|
23
|
+
Examples:
|
24
|
+
Gap: "hello world extra words"
|
25
|
+
References:
|
26
|
+
genius: ["hello", "world"]
|
27
|
+
spotify: ["hello", "world"]
|
28
|
+
Result:
|
29
|
+
- Validate "hello" (confidence=1.0)
|
30
|
+
- Validate "world" (confidence=1.0)
|
31
|
+
- Leave "extra" and "words" unchanged
|
32
|
+
|
33
|
+
Gap: "martyr youre a"
|
34
|
+
References:
|
35
|
+
genius: ["martyr"]
|
36
|
+
spotify: ["mother"]
|
37
|
+
Result:
|
38
|
+
- Validate "martyr" (confidence=0.5, source="genius")
|
39
|
+
- Leave "youre" and "a" unchanged
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
43
|
+
self.logger = logger or logging.getLogger(__name__)
|
44
|
+
|
45
|
+
def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
|
46
|
+
"""Check if this gap can be handled by extending anchor sequences."""
|
47
|
+
# Must have reference word IDs
|
48
|
+
if not gap.reference_word_ids:
|
49
|
+
self.logger.debug("No reference word IDs available.")
|
50
|
+
return False, {}
|
51
|
+
|
52
|
+
# Gap must have word IDs
|
53
|
+
if not gap.transcribed_word_ids:
|
54
|
+
self.logger.debug("No word IDs in the gap to process.")
|
55
|
+
return False, {}
|
56
|
+
|
57
|
+
# Must have word map to resolve IDs to actual words
|
58
|
+
if not self._validate_data(data):
|
59
|
+
return False, {}
|
60
|
+
|
61
|
+
word_map = data["word_map"]
|
62
|
+
|
63
|
+
# At least one word must match between gap and any reference source by text content
|
64
|
+
has_match = False
|
65
|
+
for i, trans_word_id in enumerate(gap.transcribed_word_ids):
|
66
|
+
if trans_word_id not in word_map:
|
67
|
+
continue
|
68
|
+
trans_word = word_map[trans_word_id]
|
69
|
+
|
70
|
+
# Check if this word matches any reference word at the same position
|
71
|
+
for ref_word_ids in gap.reference_word_ids.values():
|
72
|
+
if i < len(ref_word_ids):
|
73
|
+
ref_word_id = ref_word_ids[i]
|
74
|
+
if ref_word_id in word_map:
|
75
|
+
ref_word = word_map[ref_word_id]
|
76
|
+
if trans_word.text.lower() == ref_word.text.lower():
|
77
|
+
has_match = True
|
78
|
+
break
|
79
|
+
if has_match:
|
80
|
+
break
|
81
|
+
|
82
|
+
self.logger.debug(f"Can handle gap: {has_match}")
|
83
|
+
return has_match, {"word_map": word_map}
|
84
|
+
|
85
|
+
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
86
|
+
corrections = []
|
87
|
+
|
88
|
+
# Get word lookup map from data
|
89
|
+
if not self._validate_data(data):
|
90
|
+
return []
|
91
|
+
|
92
|
+
word_map = data["word_map"]
|
93
|
+
|
94
|
+
# Process each word in the gap that has a corresponding reference position
|
95
|
+
for i, word_id in enumerate(gap.transcribed_word_ids):
|
96
|
+
# Get the actual word object
|
97
|
+
if word_id not in word_map:
|
98
|
+
self.logger.error(f"Word ID {word_id} not found in word_map")
|
99
|
+
continue
|
100
|
+
word = word_map[word_id]
|
101
|
+
|
102
|
+
# Find reference sources that have a matching word (by text) at this position
|
103
|
+
matching_sources = []
|
104
|
+
corrected_word_id = None
|
105
|
+
|
106
|
+
for source, ref_word_ids in gap.reference_word_ids.items():
|
107
|
+
if i < len(ref_word_ids):
|
108
|
+
ref_word_id = ref_word_ids[i]
|
109
|
+
if ref_word_id in word_map:
|
110
|
+
ref_word = word_map[ref_word_id]
|
111
|
+
if word.text.lower() == ref_word.text.lower():
|
112
|
+
matching_sources.append(source)
|
113
|
+
if corrected_word_id is None:
|
114
|
+
corrected_word_id = ref_word_id
|
115
|
+
|
116
|
+
if not matching_sources:
|
117
|
+
self.logger.debug(f"Skipping word '{word.text}' at position {i} - no matching references")
|
118
|
+
continue
|
119
|
+
|
120
|
+
# Word matches reference(s) at this position - validate it
|
121
|
+
confidence = len(matching_sources) / len(gap.reference_word_ids)
|
122
|
+
sources = ", ".join(matching_sources)
|
123
|
+
|
124
|
+
# Get base reference positions
|
125
|
+
base_reference_positions = WordOperations.calculate_reference_positions(gap, matching_sources)
|
126
|
+
|
127
|
+
# Adjust reference positions based on the word's position in the reference text
|
128
|
+
reference_positions = {}
|
129
|
+
for source in matching_sources:
|
130
|
+
if source in base_reference_positions:
|
131
|
+
reference_positions[source] = base_reference_positions[source] + i
|
132
|
+
|
133
|
+
corrections.append(
|
134
|
+
WordOperations.create_word_replacement_correction(
|
135
|
+
original_word=word.text,
|
136
|
+
corrected_word=word.text,
|
137
|
+
original_position=gap.transcription_position + i,
|
138
|
+
source=sources,
|
139
|
+
confidence=confidence,
|
140
|
+
reason="Matched reference source(s)",
|
141
|
+
reference_positions=reference_positions,
|
142
|
+
handler="ExtendAnchorHandler",
|
143
|
+
original_word_id=word_id,
|
144
|
+
corrected_word_id=corrected_word_id,
|
145
|
+
)
|
146
|
+
)
|
147
|
+
self.logger.debug(f"Validated word '{word.text}' with confidence {confidence} from sources: {sources}")
|
148
|
+
|
149
|
+
return corrections
|
{lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/correction/text_utils.py
RENAMED
@@ -12,13 +12,17 @@ def clean_text(text: str) -> str:
|
|
12
12
|
- All text converted to lowercase
|
13
13
|
- Multiple spaces/whitespace collapsed to single space
|
14
14
|
- Leading/trailing whitespace removed
|
15
|
-
-
|
15
|
+
- Hyphens and forward slashes replaced with spaces
|
16
|
+
- Apostrophes and other punctuation removed
|
16
17
|
"""
|
17
18
|
# Convert to lowercase
|
18
19
|
text = text.lower()
|
19
20
|
|
20
|
-
#
|
21
|
-
text = re.sub(r"
|
21
|
+
# Replace hyphens and forward slashes with spaces
|
22
|
+
text = re.sub(r"[-/]", " ", text)
|
23
|
+
|
24
|
+
# Remove apostrophes and other punctuation
|
25
|
+
text = re.sub(r"[^\w\s]", "", text)
|
22
26
|
|
23
27
|
# Normalize whitespace (collapse multiple spaces, remove leading/trailing)
|
24
28
|
text = " ".join(text.split())
|
Binary file
|
{lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/frontend/src/main.tsx
RENAMED
@@ -3,6 +3,11 @@ import { ThemeProvider } from '@mui/material/styles'
|
|
3
3
|
import CssBaseline from '@mui/material/CssBaseline'
|
4
4
|
import App from './App'
|
5
5
|
import theme from './theme'
|
6
|
+
// Import version from package.json
|
7
|
+
import packageJson from '../package.json'
|
8
|
+
|
9
|
+
// Log the frontend version when the app loads
|
10
|
+
console.log(`🎵 Lyrics Transcriber Frontend v${packageJson.version}`)
|
6
11
|
|
7
12
|
ReactDOM.createRoot(document.getElementById('root')!).render(
|
8
13
|
<ThemeProvider theme={theme}>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
const fs = require('fs');
|
2
|
+
const path = require('path');
|
3
|
+
|
4
|
+
const packageJsonPath = path.join(__dirname, 'package.json');
|
5
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
6
|
+
|
7
|
+
const newVersion = process.argv[2];
|
8
|
+
packageJson.version = newVersion;
|
9
|
+
|
10
|
+
fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2) + '\n');
|
11
|
+
console.log(`✅ Updated package.json version to ${newVersion}`);
|
{lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/lyrics/file_provider.py
RENAMED
@@ -20,7 +20,7 @@ class FileProvider(BaseLyricsProvider):
|
|
20
20
|
"""Get lyrics for the specified artist and title."""
|
21
21
|
self.title = title # Store title for use in other methods
|
22
22
|
self.artist = artist # Store artist for use in other methods
|
23
|
-
return super().
|
23
|
+
return super().fetch_lyrics(artist, title)
|
24
24
|
|
25
25
|
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
26
26
|
"""Load lyrics from the specified file."""
|
@@ -41,9 +41,14 @@ class FileProvider(BaseLyricsProvider):
|
|
41
41
|
self.logger.debug(f"File size: {lyrics_file.stat().st_size} bytes")
|
42
42
|
|
43
43
|
try:
|
44
|
+
# Get formatter safely
|
45
|
+
formatter = None
|
46
|
+
if self.logger.handlers and len(self.logger.handlers) > 0 and hasattr(self.logger.handlers[0], 'formatter'):
|
47
|
+
formatter = self.logger.handlers[0].formatter
|
48
|
+
|
44
49
|
processor = KaraokeLyricsProcessor(
|
45
50
|
log_level=self.logger.getEffectiveLevel(),
|
46
|
-
log_formatter=
|
51
|
+
log_formatter=formatter,
|
47
52
|
input_filename=str(lyrics_file),
|
48
53
|
max_line_length=self.max_line_length,
|
49
54
|
)
|
{lyrics_transcriber-0.52.0 → lyrics_transcriber-0.53.0}/lyrics_transcriber/output/segment_resizer.py
RENAMED
@@ -114,6 +114,7 @@ class SegmentResizer:
|
|
114
114
|
"""Create a new word with cleaned text."""
|
115
115
|
cleaned_text = self._clean_text(word.text)
|
116
116
|
return Word(
|
117
|
+
id=word.id, # Preserve the original word ID
|
117
118
|
text=cleaned_text,
|
118
119
|
start_time=word.start_time,
|
119
120
|
end_time=word.end_time,
|
@@ -269,12 +269,67 @@ class AnchorSequence:
|
|
269
269
|
reference_positions: Dict[str, int] # Source -> position mapping
|
270
270
|
reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
|
271
271
|
confidence: float
|
272
|
+
|
273
|
+
# Backwards compatibility: store original words as text for tests
|
274
|
+
_words: Optional[List[str]] = field(default=None, repr=False)
|
275
|
+
|
276
|
+
def __init__(self, *args, **kwargs):
|
277
|
+
"""Backwards-compatible constructor supporting both old and new APIs."""
|
278
|
+
# Check for old API usage (either positional args or 'words' keyword)
|
279
|
+
if (len(args) >= 3 and isinstance(args[0], list)) or 'words' in kwargs:
|
280
|
+
# Old API: either AnchorSequence(words, ...) or AnchorSequence(words=..., ...)
|
281
|
+
if 'words' in kwargs:
|
282
|
+
# Keyword argument version
|
283
|
+
words = kwargs.pop('words')
|
284
|
+
transcription_position = kwargs.pop('transcription_position', 0)
|
285
|
+
reference_positions = kwargs.pop('reference_positions', {})
|
286
|
+
confidence = kwargs.pop('confidence', 0.0)
|
287
|
+
else:
|
288
|
+
# Positional argument version (may have confidence as keyword)
|
289
|
+
words = args[0]
|
290
|
+
transcription_position = args[1] if len(args) > 1 else 0
|
291
|
+
reference_positions = args[2] if len(args) > 2 else {}
|
292
|
+
|
293
|
+
# Handle confidence - could be positional or keyword
|
294
|
+
if len(args) > 3:
|
295
|
+
confidence = args[3]
|
296
|
+
else:
|
297
|
+
confidence = kwargs.pop('confidence', 0.0)
|
298
|
+
|
299
|
+
# Store words for backwards compatibility
|
300
|
+
self._words = words
|
301
|
+
|
302
|
+
# Create new API fields
|
303
|
+
self.id = kwargs.get('id', WordUtils.generate_id())
|
304
|
+
self.transcribed_word_ids = [WordUtils.generate_id() for _ in words]
|
305
|
+
self.transcription_position = transcription_position
|
306
|
+
self.reference_positions = reference_positions
|
307
|
+
# Create reference_word_ids with same structure as reference_positions
|
308
|
+
self.reference_word_ids = {source: [WordUtils.generate_id() for _ in words]
|
309
|
+
for source in reference_positions.keys()}
|
310
|
+
self.confidence = confidence
|
311
|
+
else:
|
312
|
+
# New API: use keyword arguments
|
313
|
+
self.id = kwargs.get('id', args[0] if len(args) > 0 else WordUtils.generate_id())
|
314
|
+
self.transcribed_word_ids = kwargs.get('transcribed_word_ids', args[1] if len(args) > 1 else [])
|
315
|
+
self.transcription_position = kwargs.get('transcription_position', args[2] if len(args) > 2 else 0)
|
316
|
+
self.reference_positions = kwargs.get('reference_positions', args[3] if len(args) > 3 else {})
|
317
|
+
self.reference_word_ids = kwargs.get('reference_word_ids', args[4] if len(args) > 4 else {})
|
318
|
+
self.confidence = kwargs.get('confidence', args[5] if len(args) > 5 else 0.0)
|
319
|
+
self._words = kwargs.get('_words', None)
|
320
|
+
|
321
|
+
@property
|
322
|
+
def words(self) -> List[str]:
|
323
|
+
"""Get the words as a list of strings (backwards compatibility)."""
|
324
|
+
if self._words is not None:
|
325
|
+
return self._words
|
326
|
+
# If we don't have stored words, we can't resolve IDs without a word map
|
327
|
+
# This is a limitation of the backwards compatibility
|
328
|
+
return [f"word_{i}" for i in range(len(self.transcribed_word_ids))]
|
272
329
|
|
273
330
|
@property
|
274
331
|
def text(self) -> str:
|
275
332
|
"""Get the sequence as a space-separated string."""
|
276
|
-
# This property might need to be updated to look up words from parent object
|
277
|
-
# For now, keeping it for backwards compatibility
|
278
333
|
return " ".join(self.words)
|
279
334
|
|
280
335
|
@property
|
@@ -284,6 +339,18 @@ class AnchorSequence:
|
|
284
339
|
|
285
340
|
def to_dict(self) -> Dict[str, Any]:
|
286
341
|
"""Convert the anchor sequence to a JSON-serializable dictionary."""
|
342
|
+
# For backwards compatibility, return old format when _words is present
|
343
|
+
if self._words is not None:
|
344
|
+
return {
|
345
|
+
"words": self._words,
|
346
|
+
"text": self.text,
|
347
|
+
"length": self.length,
|
348
|
+
"transcription_position": self.transcription_position,
|
349
|
+
"reference_positions": self.reference_positions,
|
350
|
+
"confidence": self.confidence,
|
351
|
+
}
|
352
|
+
|
353
|
+
# New format
|
287
354
|
return {
|
288
355
|
"id": self.id,
|
289
356
|
"transcribed_word_ids": self.transcribed_word_ids,
|
@@ -296,14 +363,26 @@ class AnchorSequence:
|
|
296
363
|
@classmethod
|
297
364
|
def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
|
298
365
|
"""Create AnchorSequence from dictionary."""
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
366
|
+
# Handle both old and new dictionary formats
|
367
|
+
if "words" in data:
|
368
|
+
# Old format - use backwards compatible constructor
|
369
|
+
return cls(
|
370
|
+
data["words"],
|
371
|
+
data["transcription_position"],
|
372
|
+
data["reference_positions"],
|
373
|
+
data["confidence"],
|
374
|
+
id=data.get("id", WordUtils.generate_id())
|
375
|
+
)
|
376
|
+
else:
|
377
|
+
# New format
|
378
|
+
return cls(
|
379
|
+
id=data.get("id", WordUtils.generate_id()),
|
380
|
+
transcribed_word_ids=data["transcribed_word_ids"],
|
381
|
+
transcription_position=data["transcription_position"],
|
382
|
+
reference_positions=data["reference_positions"],
|
383
|
+
reference_word_ids=data["reference_word_ids"],
|
384
|
+
confidence=data["confidence"],
|
385
|
+
)
|
307
386
|
|
308
387
|
|
309
388
|
@dataclass
|
@@ -354,11 +433,53 @@ class GapSequence:
|
|
354
433
|
reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
|
355
434
|
_corrected_positions: Set[int] = field(default_factory=set, repr=False)
|
356
435
|
_position_offset: int = field(default=0, repr=False) # Track cumulative position changes
|
436
|
+
|
437
|
+
# Backwards compatibility: store original words as text for tests
|
438
|
+
_words: Optional[List[str]] = field(default=None, repr=False)
|
439
|
+
|
440
|
+
def __init__(self, *args, **kwargs):
|
441
|
+
"""Backwards-compatible constructor supporting both old and new APIs."""
|
442
|
+
if len(args) >= 5 and isinstance(args[0], (list, tuple)):
|
443
|
+
# Old API: GapSequence(words, transcription_position, preceding_anchor, following_anchor, reference_words)
|
444
|
+
words, transcription_position, preceding_anchor, following_anchor, reference_words = args[:5]
|
445
|
+
|
446
|
+
# Store words for backwards compatibility
|
447
|
+
self._words = list(words) if isinstance(words, tuple) else words
|
448
|
+
|
449
|
+
# Create new API fields
|
450
|
+
self.id = kwargs.get('id', WordUtils.generate_id())
|
451
|
+
self.transcribed_word_ids = [WordUtils.generate_id() for _ in self._words]
|
452
|
+
self.transcription_position = transcription_position
|
453
|
+
self.preceding_anchor_id = getattr(preceding_anchor, 'id', None) if preceding_anchor else None
|
454
|
+
self.following_anchor_id = getattr(following_anchor, 'id', None) if following_anchor else None
|
455
|
+
# Convert reference_words to reference_word_ids
|
456
|
+
self.reference_word_ids = {source: [WordUtils.generate_id() for _ in ref_words]
|
457
|
+
for source, ref_words in reference_words.items()}
|
458
|
+
self._corrected_positions = set()
|
459
|
+
self._position_offset = 0
|
460
|
+
else:
|
461
|
+
# New API: use keyword arguments
|
462
|
+
self.id = kwargs.get('id', args[0] if len(args) > 0 else WordUtils.generate_id())
|
463
|
+
self.transcribed_word_ids = kwargs.get('transcribed_word_ids', args[1] if len(args) > 1 else [])
|
464
|
+
self.transcription_position = kwargs.get('transcription_position', args[2] if len(args) > 2 else 0)
|
465
|
+
self.preceding_anchor_id = kwargs.get('preceding_anchor_id', args[3] if len(args) > 3 else None)
|
466
|
+
self.following_anchor_id = kwargs.get('following_anchor_id', args[4] if len(args) > 4 else None)
|
467
|
+
self.reference_word_ids = kwargs.get('reference_word_ids', args[5] if len(args) > 5 else {})
|
468
|
+
self._corrected_positions = kwargs.get('_corrected_positions', set())
|
469
|
+
self._position_offset = kwargs.get('_position_offset', 0)
|
470
|
+
self._words = kwargs.get('_words', None)
|
471
|
+
|
472
|
+
@property
|
473
|
+
def words(self) -> List[str]:
|
474
|
+
"""Get the words as a list of strings (backwards compatibility)."""
|
475
|
+
if self._words is not None:
|
476
|
+
return self._words
|
477
|
+
# If we don't have stored words, we can't resolve IDs without a word map
|
478
|
+
return [f"word_{i}" for i in range(len(self.transcribed_word_ids))]
|
357
479
|
|
358
480
|
@property
|
359
481
|
def text(self) -> str:
|
360
482
|
"""Get the sequence as a space-separated string."""
|
361
|
-
# This property might need to be updated to look up words from parent object
|
362
483
|
return " ".join(self.words)
|
363
484
|
|
364
485
|
@property
|
@@ -368,7 +489,7 @@ class GapSequence:
|
|
368
489
|
|
369
490
|
def to_dict(self) -> Dict[str, Any]:
|
370
491
|
"""Convert the gap sequence to a JSON-serializable dictionary."""
|
371
|
-
|
492
|
+
result = {
|
372
493
|
"id": self.id,
|
373
494
|
"transcribed_word_ids": self.transcribed_word_ids,
|
374
495
|
"transcription_position": self.transcription_position,
|
@@ -376,19 +497,42 @@ class GapSequence:
|
|
376
497
|
"following_anchor_id": self.following_anchor_id,
|
377
498
|
"reference_word_ids": self.reference_word_ids,
|
378
499
|
}
|
500
|
+
|
501
|
+
# For backwards compatibility, include words and text in dict
|
502
|
+
if self._words is not None:
|
503
|
+
result.update({
|
504
|
+
"words": self._words,
|
505
|
+
"text": self.text,
|
506
|
+
"length": self.length,
|
507
|
+
})
|
508
|
+
|
509
|
+
return result
|
379
510
|
|
380
511
|
@classmethod
|
381
512
|
def from_dict(cls, data: Dict[str, Any]) -> "GapSequence":
|
382
513
|
"""Create GapSequence from dictionary."""
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
514
|
+
# Handle both old and new dictionary formats
|
515
|
+
if "words" in data:
|
516
|
+
# Old format - use backwards compatible constructor
|
517
|
+
return cls(
|
518
|
+
data["words"],
|
519
|
+
data["transcription_position"],
|
520
|
+
None, # preceding_anchor
|
521
|
+
None, # following_anchor
|
522
|
+
data.get("reference_words", {}),
|
523
|
+
id=data.get("id", WordUtils.generate_id())
|
524
|
+
)
|
525
|
+
else:
|
526
|
+
# New format
|
527
|
+
gap = cls(
|
528
|
+
id=data.get("id", WordUtils.generate_id()),
|
529
|
+
transcribed_word_ids=data["transcribed_word_ids"],
|
530
|
+
transcription_position=data["transcription_position"],
|
531
|
+
preceding_anchor_id=data["preceding_anchor_id"],
|
532
|
+
following_anchor_id=data["following_anchor_id"],
|
533
|
+
reference_word_ids=data["reference_word_ids"],
|
534
|
+
)
|
535
|
+
return gap
|
392
536
|
|
393
537
|
|
394
538
|
@dataclass
|