karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +461 -0
- karaoke_gen/audio_processor.py +407 -30
- karaoke_gen/config.py +62 -113
- karaoke_gen/file_handler.py +32 -59
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
- karaoke_gen/karaoke_gen.py +270 -61
- karaoke_gen/lyrics_processor.py +13 -1
- karaoke_gen/metadata.py +78 -73
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/bulk_cli.py +6 -0
- karaoke_gen/utils/cli_args.py +424 -0
- karaoke_gen/utils/gen_cli.py +26 -261
- karaoke_gen/utils/remote_cli.py +1965 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen-0.71.27.dist-info/METADATA +610 -0
- karaoke_gen-0.71.27.dist-info/RECORD +275 -0
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +520 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +1043 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +212 -0
- lyrics_transcriber/frontend/src/api.ts +239 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +267 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +290 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +648 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
- karaoke_gen-0.57.0.dist-info/METADATA +0 -167
- karaoke_gen-0.57.0.dist-info/RECORD +0 -23
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
from typing import List, Optional, Tuple, Union, Dict, Any
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
import os
|
|
6
|
+
import shortuuid
|
|
7
|
+
|
|
8
|
+
from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
|
|
9
|
+
from lyrics_transcriber.correction.handlers.llm import LLMHandler
|
|
10
|
+
from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
|
|
11
|
+
from lyrics_transcriber.correction.handlers.relaxed_word_count_match import RelaxedWordCountMatchHandler
|
|
12
|
+
from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
|
|
13
|
+
from lyrics_transcriber.correction.handlers.sound_alike import SoundAlikeHandler
|
|
14
|
+
from lyrics_transcriber.correction.handlers.syllables_match import SyllablesMatchHandler
|
|
15
|
+
from lyrics_transcriber.correction.handlers.word_count_match import WordCountMatchHandler
|
|
16
|
+
from lyrics_transcriber.types import (
|
|
17
|
+
CorrectionStep,
|
|
18
|
+
GapSequence,
|
|
19
|
+
LyricsData,
|
|
20
|
+
TranscriptionResult,
|
|
21
|
+
CorrectionResult,
|
|
22
|
+
LyricsSegment,
|
|
23
|
+
WordCorrection,
|
|
24
|
+
Word,
|
|
25
|
+
)
|
|
26
|
+
from lyrics_transcriber.correction.anchor_sequence import AnchorSequenceFinder
|
|
27
|
+
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
|
28
|
+
from lyrics_transcriber.correction.handlers.extend_anchor import ExtendAnchorHandler
|
|
29
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
30
|
+
from lyrics_transcriber.correction.handlers.llm_providers import OllamaProvider, OpenAIProvider
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LyricsCorrector:
|
|
34
|
+
"""
|
|
35
|
+
Coordinates lyrics correction process using multiple correction handlers.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
cache_dir: Union[str, Path],
|
|
41
|
+
handlers: Optional[List[GapCorrectionHandler]] = None,
|
|
42
|
+
enabled_handlers: Optional[List[str]] = None,
|
|
43
|
+
anchor_finder: Optional[AnchorSequenceFinder] = None,
|
|
44
|
+
logger: Optional[logging.Logger] = None,
|
|
45
|
+
):
|
|
46
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
47
|
+
self._anchor_finder = anchor_finder
|
|
48
|
+
self._cache_dir = Path(cache_dir)
|
|
49
|
+
|
|
50
|
+
# Define default enabled handlers - excluding LLM, Repeat, SoundAlike, and Levenshtein
|
|
51
|
+
DEFAULT_ENABLED_HANDLERS = [
|
|
52
|
+
"ExtendAnchorHandler",
|
|
53
|
+
"WordCountMatchHandler",
|
|
54
|
+
"SyllablesMatchHandler",
|
|
55
|
+
"RelaxedWordCountMatchHandler",
|
|
56
|
+
"NoSpacePunctuationMatchHandler",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
# Create all handlers but respect enabled_handlers if provided
|
|
60
|
+
all_handlers = [
|
|
61
|
+
("ExtendAnchorHandler", ExtendAnchorHandler(logger=self.logger)),
|
|
62
|
+
("WordCountMatchHandler", WordCountMatchHandler(logger=self.logger)),
|
|
63
|
+
("SyllablesMatchHandler", SyllablesMatchHandler(logger=self.logger)),
|
|
64
|
+
("RelaxedWordCountMatchHandler", RelaxedWordCountMatchHandler(logger=self.logger)),
|
|
65
|
+
("NoSpacePunctuationMatchHandler", NoSpacePunctuationMatchHandler(logger=self.logger)),
|
|
66
|
+
(
|
|
67
|
+
"LLMHandler_Ollama_R17B",
|
|
68
|
+
LLMHandler(
|
|
69
|
+
provider=OllamaProvider(model="deepseek-r1:7b", logger=self.logger),
|
|
70
|
+
name="LLMHandler_Ollama_R17B",
|
|
71
|
+
logger=self.logger,
|
|
72
|
+
cache_dir=self._cache_dir,
|
|
73
|
+
),
|
|
74
|
+
),
|
|
75
|
+
("RepeatCorrectionHandler", RepeatCorrectionHandler(logger=self.logger)),
|
|
76
|
+
("SoundAlikeHandler", SoundAlikeHandler(logger=self.logger)),
|
|
77
|
+
("LevenshteinHandler", LevenshteinHandler(logger=self.logger)),
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# Add OpenRouter handlers only if API key is available
|
|
81
|
+
if os.getenv("OPENROUTER_API_KEY"):
|
|
82
|
+
openrouter_handlers = [
|
|
83
|
+
(
|
|
84
|
+
"LLMHandler_OpenRouter_Sonnet",
|
|
85
|
+
LLMHandler(
|
|
86
|
+
provider=OpenAIProvider(
|
|
87
|
+
model="anthropic/claude-3-sonnet",
|
|
88
|
+
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
89
|
+
base_url="https://openrouter.ai/api/v1",
|
|
90
|
+
logger=self.logger,
|
|
91
|
+
),
|
|
92
|
+
name="LLMHandler_OpenRouter_Sonnet",
|
|
93
|
+
logger=self.logger,
|
|
94
|
+
cache_dir=self._cache_dir,
|
|
95
|
+
),
|
|
96
|
+
),
|
|
97
|
+
(
|
|
98
|
+
"LLMHandler_OpenRouter_R1",
|
|
99
|
+
LLMHandler(
|
|
100
|
+
provider=OpenAIProvider(
|
|
101
|
+
model="deepseek/deepseek-r1",
|
|
102
|
+
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
103
|
+
base_url="https://openrouter.ai/api/v1",
|
|
104
|
+
logger=self.logger,
|
|
105
|
+
),
|
|
106
|
+
name="LLMHandler_OpenRouter_R1",
|
|
107
|
+
logger=self.logger,
|
|
108
|
+
cache_dir=self._cache_dir,
|
|
109
|
+
),
|
|
110
|
+
),
|
|
111
|
+
]
|
|
112
|
+
all_handlers.extend(openrouter_handlers)
|
|
113
|
+
|
|
114
|
+
# Store all handler information
|
|
115
|
+
self.all_handlers = [
|
|
116
|
+
{
|
|
117
|
+
"id": handler_id,
|
|
118
|
+
"name": handler_id,
|
|
119
|
+
"description": handler.__class__.__doc__ or "",
|
|
120
|
+
"enabled": handler_id in (enabled_handlers if enabled_handlers is not None else DEFAULT_ENABLED_HANDLERS),
|
|
121
|
+
}
|
|
122
|
+
for handler_id, handler in all_handlers
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
# Add AgenticCorrector if agentic AI is enabled
|
|
126
|
+
use_agentic_env = os.getenv("USE_AGENTIC_AI", "0").lower() in {"1", "true", "yes"}
|
|
127
|
+
if use_agentic_env:
|
|
128
|
+
self.all_handlers.append({
|
|
129
|
+
"id": "AgenticCorrector",
|
|
130
|
+
"name": "Agentic AI Corrector",
|
|
131
|
+
"description": "AI-powered classification and correction of lyric gaps using LLM reasoning",
|
|
132
|
+
"enabled": True,
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
if handlers:
|
|
136
|
+
self.handlers = handlers
|
|
137
|
+
else:
|
|
138
|
+
# Use provided enabled_handlers if available, otherwise use defaults
|
|
139
|
+
handler_filter = enabled_handlers if enabled_handlers is not None else DEFAULT_ENABLED_HANDLERS
|
|
140
|
+
self.handlers = [h[1] for h in all_handlers if h[0] in handler_filter]
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def anchor_finder(self) -> AnchorSequenceFinder:
|
|
144
|
+
"""Lazy load the anchor finder instance, initializing it if not already set."""
|
|
145
|
+
if self._anchor_finder is None:
|
|
146
|
+
self._anchor_finder = AnchorSequenceFinder(cache_dir=self._cache_dir, logger=self.logger)
|
|
147
|
+
return self._anchor_finder
|
|
148
|
+
|
|
149
|
+
def run(
|
|
150
|
+
self,
|
|
151
|
+
transcription_results: List[TranscriptionResult],
|
|
152
|
+
lyrics_results: Dict[str, LyricsData],
|
|
153
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
154
|
+
) -> CorrectionResult:
|
|
155
|
+
"""Execute the correction process."""
|
|
156
|
+
# Optional agentic routing flag from environment; default off for safety
|
|
157
|
+
agentic_enabled = os.getenv("USE_AGENTIC_AI", "").lower() in {"1", "true", "yes"}
|
|
158
|
+
self.logger.info(f"🤖 AGENTIC MODE: {'ENABLED' if agentic_enabled else 'DISABLED'} (USE_AGENTIC_AI={os.getenv('USE_AGENTIC_AI', 'NOT_SET')})")
|
|
159
|
+
if not transcription_results:
|
|
160
|
+
self.logger.error("No transcription results available")
|
|
161
|
+
raise ValueError("No primary transcription data available")
|
|
162
|
+
|
|
163
|
+
# Store reference lyrics for use in word map
|
|
164
|
+
self.reference_lyrics = lyrics_results
|
|
165
|
+
|
|
166
|
+
# Get primary transcription
|
|
167
|
+
primary_transcription_result = sorted(transcription_results, key=lambda x: x.priority)[0]
|
|
168
|
+
primary_transcription = primary_transcription_result.result
|
|
169
|
+
transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in primary_transcription.segments)
|
|
170
|
+
|
|
171
|
+
# Find anchor sequences and gaps
|
|
172
|
+
self.logger.debug("Finding anchor sequences and gaps")
|
|
173
|
+
anchor_sequences = self.anchor_finder.find_anchors(transcribed_text, lyrics_results, primary_transcription_result)
|
|
174
|
+
gap_sequences = self.anchor_finder.find_gaps(transcribed_text, anchor_sequences, lyrics_results, primary_transcription_result)
|
|
175
|
+
|
|
176
|
+
# Store anchor sequences for use in correction handlers
|
|
177
|
+
self._anchor_sequences = anchor_sequences
|
|
178
|
+
|
|
179
|
+
# Process corrections with metadata
|
|
180
|
+
corrections, corrected_segments, correction_steps, word_id_map, segment_id_map = self._process_corrections(
|
|
181
|
+
primary_transcription.segments, gap_sequences, metadata=metadata
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Calculate correction ratio
|
|
185
|
+
total_words = sum(len(segment.words) for segment in corrected_segments)
|
|
186
|
+
corrections_made = len(corrections)
|
|
187
|
+
correction_ratio = 1 - (corrections_made / total_words if total_words > 0 else 0)
|
|
188
|
+
|
|
189
|
+
# Get the currently enabled handler IDs using the handler's name attribute if available
|
|
190
|
+
enabled_handlers = [getattr(handler, "name", handler.__class__.__name__) for handler in self.handlers]
|
|
191
|
+
|
|
192
|
+
result = CorrectionResult(
|
|
193
|
+
original_segments=primary_transcription.segments,
|
|
194
|
+
corrected_segments=corrected_segments,
|
|
195
|
+
corrections=corrections,
|
|
196
|
+
corrections_made=corrections_made,
|
|
197
|
+
confidence=correction_ratio,
|
|
198
|
+
reference_lyrics=lyrics_results,
|
|
199
|
+
anchor_sequences=anchor_sequences,
|
|
200
|
+
resized_segments=[],
|
|
201
|
+
gap_sequences=gap_sequences,
|
|
202
|
+
metadata={
|
|
203
|
+
"anchor_sequences_count": len(anchor_sequences),
|
|
204
|
+
"gap_sequences_count": len(gap_sequences),
|
|
205
|
+
"total_words": total_words,
|
|
206
|
+
"correction_ratio": correction_ratio,
|
|
207
|
+
"available_handlers": self.all_handlers,
|
|
208
|
+
"enabled_handlers": enabled_handlers,
|
|
209
|
+
"agentic_routing": "agentic" if agentic_enabled else "rule-based",
|
|
210
|
+
},
|
|
211
|
+
correction_steps=correction_steps,
|
|
212
|
+
word_id_map=word_id_map,
|
|
213
|
+
segment_id_map=segment_id_map,
|
|
214
|
+
)
|
|
215
|
+
return result
|
|
216
|
+
|
|
217
|
+
def _preserve_formatting(self, original: str, new_word: str) -> str:
|
|
218
|
+
"""Preserve original word's formatting when applying correction."""
|
|
219
|
+
# Find leading/trailing whitespace
|
|
220
|
+
leading_space = " " if original != original.lstrip() else ""
|
|
221
|
+
trailing_space = " " if original != original.rstrip() else ""
|
|
222
|
+
return leading_space + new_word.strip() + trailing_space
|
|
223
|
+
|
|
224
|
+
def _process_corrections(
|
|
225
|
+
self, segments: List[LyricsSegment], gap_sequences: List[GapSequence], metadata: Optional[Dict[str, Any]] = None
|
|
226
|
+
) -> Tuple[List[WordCorrection], List[LyricsSegment], List[CorrectionStep], Dict[str, str], Dict[str, str]]:
|
|
227
|
+
"""Process corrections using handlers.
|
|
228
|
+
|
|
229
|
+
The correction flow works as follows:
|
|
230
|
+
1. First pass: Process all gaps
|
|
231
|
+
- Iterate through each gap sequence
|
|
232
|
+
- Try handlers until one can handle the gap
|
|
233
|
+
- Store all corrections in the gap
|
|
234
|
+
2. Second pass: Apply corrections to segments
|
|
235
|
+
- Iterate through segments and words
|
|
236
|
+
- Look up any corrections that apply to each word
|
|
237
|
+
- Create new segments with corrected words
|
|
238
|
+
|
|
239
|
+
This two-pass approach separates the concerns of:
|
|
240
|
+
a) Finding and making corrections (gap-centric)
|
|
241
|
+
b) Applying those corrections to the original text (segment-centric)
|
|
242
|
+
"""
|
|
243
|
+
# Generate a unique session ID for this correction task
|
|
244
|
+
# This groups all traces in Langfuse for easy debugging
|
|
245
|
+
session_id = f"lyrics-correction-{shortuuid.uuid()}"
|
|
246
|
+
self.logger.info(f"Starting correction process with {len(gap_sequences)} gaps (session: {session_id})")
|
|
247
|
+
|
|
248
|
+
correction_steps = []
|
|
249
|
+
all_corrections = []
|
|
250
|
+
word_id_map = {}
|
|
251
|
+
segment_id_map = {}
|
|
252
|
+
|
|
253
|
+
# Create word map for handlers - include both transcribed and reference words
|
|
254
|
+
word_map = {w.id: w for s in segments for w in s.words} # Transcribed words
|
|
255
|
+
|
|
256
|
+
# Add reference words from all sources
|
|
257
|
+
for source, lyrics_data in self.reference_lyrics.items():
|
|
258
|
+
for segment in lyrics_data.segments:
|
|
259
|
+
for word in segment.words:
|
|
260
|
+
if word.id not in word_map: # Don't overwrite transcribed words
|
|
261
|
+
word_map[word.id] = word
|
|
262
|
+
|
|
263
|
+
# Build a linear position map for words to support agentic proposals
|
|
264
|
+
linear_position_map = {}
|
|
265
|
+
_pos_idx = 0
|
|
266
|
+
for s in segments:
|
|
267
|
+
for w in s.words:
|
|
268
|
+
linear_position_map[w.id] = _pos_idx
|
|
269
|
+
_pos_idx += 1
|
|
270
|
+
|
|
271
|
+
# Base handler data that all handlers need
|
|
272
|
+
base_handler_data = {
|
|
273
|
+
"word_map": word_map,
|
|
274
|
+
"anchor_sequences": self._anchor_sequences,
|
|
275
|
+
"audio_file_hash": metadata.get("audio_file_hash") if metadata else None,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
# Check if we're in agentic-only mode
|
|
279
|
+
use_agentic_env = os.getenv("USE_AGENTIC_AI", "").lower() in {"1", "true", "yes"}
|
|
280
|
+
|
|
281
|
+
# Import agentic modules once if needed
|
|
282
|
+
_AgenticCorrector = None
|
|
283
|
+
_adapt = None
|
|
284
|
+
_ModelRouter = None
|
|
285
|
+
|
|
286
|
+
if use_agentic_env:
|
|
287
|
+
try:
|
|
288
|
+
from lyrics_transcriber.correction.agentic.agent import AgenticCorrector as _AgenticCorrector
|
|
289
|
+
from lyrics_transcriber.correction.agentic.adapter import adapt_proposals_to_word_corrections as _adapt
|
|
290
|
+
from lyrics_transcriber.correction.agentic.router import ModelRouter as _ModelRouter
|
|
291
|
+
self.logger.info("🤖 Agentic modules imported successfully - running in AGENTIC-ONLY mode")
|
|
292
|
+
except Exception as e:
|
|
293
|
+
self.logger.error(f"🤖 Failed to import agentic modules but USE_AGENTIC_AI=1: {e}")
|
|
294
|
+
raise RuntimeError(f"Agentic AI correction is enabled but required modules could not be imported: {e}") from e
|
|
295
|
+
|
|
296
|
+
# === TEMPORARY: Gap extraction for manual review ===
|
|
297
|
+
if os.getenv("DUMP_GAPS") == "1":
|
|
298
|
+
import yaml
|
|
299
|
+
|
|
300
|
+
# Build a flat list of all transcribed words for context
|
|
301
|
+
all_transcribed_words = []
|
|
302
|
+
for seg in segments:
|
|
303
|
+
all_transcribed_words.extend(seg.words)
|
|
304
|
+
|
|
305
|
+
# Create word position map
|
|
306
|
+
word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
|
|
307
|
+
|
|
308
|
+
gaps_data = []
|
|
309
|
+
for i, gap in enumerate(gap_sequences, 1):
|
|
310
|
+
gap_words = []
|
|
311
|
+
gap_positions = []
|
|
312
|
+
|
|
313
|
+
for word_id in gap.transcribed_word_ids:
|
|
314
|
+
if word_id in word_map:
|
|
315
|
+
word = word_map[word_id]
|
|
316
|
+
gap_words.append({
|
|
317
|
+
"id": word_id,
|
|
318
|
+
"text": word.text,
|
|
319
|
+
"start_time": round(getattr(word, 'start_time', 0), 3),
|
|
320
|
+
"end_time": round(getattr(word, 'end_time', 0), 3)
|
|
321
|
+
})
|
|
322
|
+
if word_id in word_position:
|
|
323
|
+
gap_positions.append(word_position[word_id])
|
|
324
|
+
|
|
325
|
+
# Get context words (10 before and 10 after)
|
|
326
|
+
preceding_words_list = []
|
|
327
|
+
following_words_list = []
|
|
328
|
+
|
|
329
|
+
if gap_positions:
|
|
330
|
+
first_gap_pos = min(gap_positions)
|
|
331
|
+
last_gap_pos = max(gap_positions)
|
|
332
|
+
|
|
333
|
+
# Get 10 words before the gap
|
|
334
|
+
start_pos = max(0, first_gap_pos - 10)
|
|
335
|
+
if start_pos == 0:
|
|
336
|
+
preceding_words_list.append("<song_start>")
|
|
337
|
+
for idx in range(start_pos, first_gap_pos):
|
|
338
|
+
if idx < len(all_transcribed_words):
|
|
339
|
+
preceding_words_list.append(all_transcribed_words[idx].text)
|
|
340
|
+
|
|
341
|
+
# Get 10 words after the gap
|
|
342
|
+
end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
|
|
343
|
+
for idx in range(last_gap_pos + 1, end_pos):
|
|
344
|
+
if idx < len(all_transcribed_words):
|
|
345
|
+
following_words_list.append(all_transcribed_words[idx].text)
|
|
346
|
+
if end_pos == len(all_transcribed_words):
|
|
347
|
+
following_words_list.append("<song_end>")
|
|
348
|
+
|
|
349
|
+
# Convert to strings
|
|
350
|
+
preceding_words = " ".join(preceding_words_list)
|
|
351
|
+
following_words = " ".join(following_words_list)
|
|
352
|
+
|
|
353
|
+
# Get reference context from all sources using anchor sequences
|
|
354
|
+
reference_contexts = {}
|
|
355
|
+
|
|
356
|
+
# Find which anchor sequence this gap belongs to
|
|
357
|
+
parent_anchor = None
|
|
358
|
+
for anchor in self._anchor_sequences:
|
|
359
|
+
if hasattr(anchor, 'gaps') and gap in anchor.gaps:
|
|
360
|
+
parent_anchor = anchor
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
for source, lyrics_data in self.reference_lyrics.items():
|
|
364
|
+
if lyrics_data and lyrics_data.segments:
|
|
365
|
+
# Get all reference words
|
|
366
|
+
ref_words = []
|
|
367
|
+
for seg in lyrics_data.segments:
|
|
368
|
+
ref_words.extend([w.text for w in seg.words])
|
|
369
|
+
|
|
370
|
+
if parent_anchor and hasattr(parent_anchor, 'reference_word_ids'):
|
|
371
|
+
# Use anchor's reference word IDs to find the correct position
|
|
372
|
+
# Get the reference words from this anchor's context
|
|
373
|
+
anchor_ref_word_ids = parent_anchor.reference_word_ids.get(source, [])
|
|
374
|
+
|
|
375
|
+
if anchor_ref_word_ids:
|
|
376
|
+
# Find position of anchor's reference words
|
|
377
|
+
ref_word_map = {w.id: idx for idx, w in enumerate(
|
|
378
|
+
[w for seg in lyrics_data.segments for w in seg.words]
|
|
379
|
+
)}
|
|
380
|
+
|
|
381
|
+
# Get indices of anchor words in reference
|
|
382
|
+
anchor_indices = [ref_word_map[wid] for wid in anchor_ref_word_ids if wid in ref_word_map]
|
|
383
|
+
|
|
384
|
+
if anchor_indices:
|
|
385
|
+
# Use the anchor position to get context
|
|
386
|
+
anchor_start = min(anchor_indices)
|
|
387
|
+
anchor_end = max(anchor_indices)
|
|
388
|
+
|
|
389
|
+
# Get 20 words before and after the anchor region
|
|
390
|
+
context_start = max(0, anchor_start - 20)
|
|
391
|
+
context_end = min(len(ref_words), anchor_end + 21)
|
|
392
|
+
|
|
393
|
+
context_words = ref_words[context_start:context_end]
|
|
394
|
+
reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in context_words])
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
# Fallback: estimate position by time percentage
|
|
398
|
+
if gap_words and gap_words[0].get('start_time'):
|
|
399
|
+
# Try to get song duration from segments
|
|
400
|
+
last_word_time = 0
|
|
401
|
+
for seg in segments:
|
|
402
|
+
if seg.words:
|
|
403
|
+
last_word_time = max(last_word_time, seg.words[-1].end_time)
|
|
404
|
+
|
|
405
|
+
if last_word_time > 0:
|
|
406
|
+
gap_time = gap_words[0]['start_time']
|
|
407
|
+
time_percentage = gap_time / last_word_time
|
|
408
|
+
|
|
409
|
+
# Use percentage to estimate position in reference
|
|
410
|
+
estimated_idx = int(len(ref_words) * time_percentage)
|
|
411
|
+
context_start = max(0, estimated_idx - 20)
|
|
412
|
+
context_end = min(len(ref_words), estimated_idx + 21)
|
|
413
|
+
|
|
414
|
+
context_words = ref_words[context_start:context_end]
|
|
415
|
+
reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in context_words])
|
|
416
|
+
else:
|
|
417
|
+
# Ultimate fallback: entire reference lyrics
|
|
418
|
+
reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in ref_words])
|
|
419
|
+
else:
|
|
420
|
+
# No time info, use entire reference lyrics
|
|
421
|
+
reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in ref_words])
|
|
422
|
+
|
|
423
|
+
gap_text = " ".join([w["text"] for w in gap_words])
|
|
424
|
+
|
|
425
|
+
gaps_data.append({
|
|
426
|
+
"gap_id": i,
|
|
427
|
+
"position": gap.transcription_position,
|
|
428
|
+
"preceding_words": preceding_words,
|
|
429
|
+
"gap_text": gap_text,
|
|
430
|
+
"following_words": following_words,
|
|
431
|
+
"transcribed_words": gap_words,
|
|
432
|
+
"reference_contexts": reference_contexts,
|
|
433
|
+
"word_count": len(gap_words),
|
|
434
|
+
"annotations": {
|
|
435
|
+
"your_decision": "",
|
|
436
|
+
"action_type": "# NO_ACTION | REPLACE | DELETE | INSERT | MERGE | SPLIT",
|
|
437
|
+
"target_word_ids": [],
|
|
438
|
+
"replacement_text": "",
|
|
439
|
+
"notes": ""
|
|
440
|
+
}
|
|
441
|
+
})
|
|
442
|
+
|
|
443
|
+
with open("gaps_review.yaml", 'w') as f:
|
|
444
|
+
f.write("# Gap Review Data for Manual Annotation\n")
|
|
445
|
+
f.write(f"# Total gaps: {len(gaps_data)}\n")
|
|
446
|
+
f.write("#\n")
|
|
447
|
+
f.write("# For each gap, fill in the annotations section:\n")
|
|
448
|
+
f.write("# your_decision: Brief description of what should happen\n")
|
|
449
|
+
f.write("# action_type: NO_ACTION | REPLACE | DELETE | INSERT | MERGE | SPLIT\n")
|
|
450
|
+
f.write("# target_word_ids: Which word IDs to operate on (from transcribed_words)\n")
|
|
451
|
+
f.write("# replacement_text: The corrected text (if applicable)\n")
|
|
452
|
+
f.write("# notes: Any additional reasoning or context\n")
|
|
453
|
+
f.write("#\n\n")
|
|
454
|
+
yaml.dump({"gaps": gaps_data}, f, default_flow_style=False, allow_unicode=True, width=120, sort_keys=False)
|
|
455
|
+
|
|
456
|
+
self.logger.info(f"📝 Dumped {len(gaps_data)} gaps to gaps_review.yaml - review and annotate!")
|
|
457
|
+
import sys
|
|
458
|
+
sys.exit(0)
|
|
459
|
+
# === END TEMPORARY CODE ===
|
|
460
|
+
|
|
461
|
+
for i, gap in enumerate(gap_sequences, 1):
|
|
462
|
+
self.logger.info(f"Processing gap {i}/{len(gap_sequences)} at position {gap.transcription_position}")
|
|
463
|
+
|
|
464
|
+
# Get the actual words for logging
|
|
465
|
+
gap_words = [word_map[word_id] for word_id in gap.transcribed_word_ids]
|
|
466
|
+
self.logger.debug(f"Gap text: '{' '.join(w.text for w in gap_words)}'")
|
|
467
|
+
|
|
468
|
+
# AGENTIC-ONLY MODE: Use agentic correction exclusively
|
|
469
|
+
if use_agentic_env:
|
|
470
|
+
self.logger.info(f"🤖 Attempting agentic correction for gap {i}/{len(gap_sequences)}")
|
|
471
|
+
try:
|
|
472
|
+
# Prepare gap data for classification-first workflow
|
|
473
|
+
gap_words_data = []
|
|
474
|
+
for word_id in gap.transcribed_word_ids:
|
|
475
|
+
if word_id in word_map:
|
|
476
|
+
word = word_map[word_id]
|
|
477
|
+
gap_words_data.append({
|
|
478
|
+
"id": word_id,
|
|
479
|
+
"text": word.text,
|
|
480
|
+
"start_time": getattr(word, 'start_time', 0),
|
|
481
|
+
"end_time": getattr(word, 'end_time', 0)
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
# Get context words
|
|
485
|
+
all_transcribed_words = []
|
|
486
|
+
for seg in segments:
|
|
487
|
+
all_transcribed_words.extend(seg.words)
|
|
488
|
+
word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
|
|
489
|
+
|
|
490
|
+
gap_positions = [word_position[wid] for wid in gap.transcribed_word_ids if wid in word_position]
|
|
491
|
+
preceding_words = ""
|
|
492
|
+
following_words = ""
|
|
493
|
+
|
|
494
|
+
if gap_positions:
|
|
495
|
+
first_gap_pos = min(gap_positions)
|
|
496
|
+
last_gap_pos = max(gap_positions)
|
|
497
|
+
|
|
498
|
+
# Get 10 words before
|
|
499
|
+
start_pos = max(0, first_gap_pos - 10)
|
|
500
|
+
preceding_list = [all_transcribed_words[idx].text for idx in range(start_pos, first_gap_pos) if idx < len(all_transcribed_words)]
|
|
501
|
+
preceding_words = " ".join(preceding_list)
|
|
502
|
+
|
|
503
|
+
# Get 10 words after
|
|
504
|
+
end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
|
|
505
|
+
following_list = [all_transcribed_words[idx].text for idx in range(last_gap_pos + 1, end_pos) if idx < len(all_transcribed_words)]
|
|
506
|
+
following_words = " ".join(following_list)
|
|
507
|
+
|
|
508
|
+
# Get reference contexts from all sources
|
|
509
|
+
reference_contexts = {}
|
|
510
|
+
for source, lyrics_data in self.reference_lyrics.items():
|
|
511
|
+
if lyrics_data and lyrics_data.segments:
|
|
512
|
+
ref_words = []
|
|
513
|
+
for seg in lyrics_data.segments:
|
|
514
|
+
ref_words.extend([w.text for w in seg.words])
|
|
515
|
+
# For now, use full text (handlers will extract relevant portions)
|
|
516
|
+
reference_contexts[source] = " ".join(ref_words)
|
|
517
|
+
|
|
518
|
+
# Get artist and title from metadata
|
|
519
|
+
artist = metadata.get("artist") if metadata else None
|
|
520
|
+
title = metadata.get("title") if metadata else None
|
|
521
|
+
|
|
522
|
+
# Choose model via router
|
|
523
|
+
_router = _ModelRouter()
|
|
524
|
+
uncertainty = 0.3 if len(gap_words_data) <= 2 else 0.7
|
|
525
|
+
model_id = _router.choose_model("gap", uncertainty)
|
|
526
|
+
self.logger.debug(f"🤖 Router selected model: {model_id}")
|
|
527
|
+
|
|
528
|
+
# Create agent and use new classification-first workflow
|
|
529
|
+
self.logger.debug(f"🤖 Creating AgenticCorrector with model: {model_id}")
|
|
530
|
+
_agent = _AgenticCorrector.from_model(
|
|
531
|
+
model=model_id,
|
|
532
|
+
session_id=session_id,
|
|
533
|
+
cache_dir=str(self._cache_dir)
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
# Use new propose_for_gap method
|
|
537
|
+
self.logger.debug(f"🤖 Calling agent.propose_for_gap() for gap {i}")
|
|
538
|
+
_proposals = _agent.propose_for_gap(
|
|
539
|
+
gap_id=f"gap_{i}",
|
|
540
|
+
gap_words=gap_words_data,
|
|
541
|
+
preceding_words=preceding_words,
|
|
542
|
+
following_words=following_words,
|
|
543
|
+
reference_contexts=reference_contexts,
|
|
544
|
+
artist=artist,
|
|
545
|
+
title=title
|
|
546
|
+
)
|
|
547
|
+
self.logger.debug(f"🤖 Agent returned {len(_proposals) if _proposals else 0} proposals")
|
|
548
|
+
_agentic_corrections = _adapt(_proposals, word_map, linear_position_map) if _proposals else []
|
|
549
|
+
self.logger.debug(f"🤖 Adapter returned {len(_agentic_corrections)} corrections")
|
|
550
|
+
|
|
551
|
+
if _agentic_corrections:
|
|
552
|
+
self.logger.info(f"🤖 Applying {len(_agentic_corrections)} agentic corrections for gap {i}")
|
|
553
|
+
affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
|
|
554
|
+
affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
|
|
555
|
+
updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), _agentic_corrections)
|
|
556
|
+
for correction in _agentic_corrections:
|
|
557
|
+
if correction.word_id and correction.corrected_word_id:
|
|
558
|
+
word_id_map[correction.word_id] = correction.corrected_word_id
|
|
559
|
+
for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
|
|
560
|
+
segment_id_map[old_seg.id] = new_seg.id
|
|
561
|
+
step = CorrectionStep(
|
|
562
|
+
handler_name="AgenticCorrector",
|
|
563
|
+
affected_word_ids=affected_word_ids,
|
|
564
|
+
affected_segment_ids=affected_segment_ids,
|
|
565
|
+
corrections=_agentic_corrections,
|
|
566
|
+
segments_before=self._get_affected_segments(gap, segments),
|
|
567
|
+
segments_after=updated_segments,
|
|
568
|
+
created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
|
|
569
|
+
deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
|
|
570
|
+
)
|
|
571
|
+
correction_steps.append(step)
|
|
572
|
+
all_corrections.extend(_agentic_corrections)
|
|
573
|
+
# Log corrections made
|
|
574
|
+
for correction in _agentic_corrections:
|
|
575
|
+
self.logger.info(
|
|
576
|
+
f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
|
|
577
|
+
f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
|
|
578
|
+
)
|
|
579
|
+
else:
|
|
580
|
+
self.logger.info(f"🤖 No agentic corrections needed for gap {i}")
|
|
581
|
+
|
|
582
|
+
except Exception as e:
|
|
583
|
+
# In agentic-only mode, fail fast instead of falling back
|
|
584
|
+
self.logger.error(f"🤖 Agentic correction failed for gap {i}: {e}", exc_info=True)
|
|
585
|
+
raise RuntimeError(f"Agentic AI correction failed for gap {i}: {e}") from e
|
|
586
|
+
|
|
587
|
+
# Skip rule-based handlers completely in agentic mode
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
# RULE-BASED MODE: Try each handler in order
|
|
591
|
+
for handler in self.handlers:
|
|
592
|
+
handler_name = handler.__class__.__name__
|
|
593
|
+
can_handle, handler_data = handler.can_handle(gap, base_handler_data)
|
|
594
|
+
|
|
595
|
+
if can_handle:
|
|
596
|
+
# Merge base handler data with specific handler data
|
|
597
|
+
handler_data = {**base_handler_data, **(handler_data or {})}
|
|
598
|
+
|
|
599
|
+
corrections = handler.handle(gap, handler_data)
|
|
600
|
+
if corrections:
|
|
601
|
+
self.logger.info(f"Handler {handler_name} made {len(corrections)} corrections")
|
|
602
|
+
# Track affected IDs
|
|
603
|
+
affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
|
|
604
|
+
affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
|
|
605
|
+
|
|
606
|
+
# Apply corrections and get updated segments
|
|
607
|
+
updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), corrections)
|
|
608
|
+
|
|
609
|
+
# Update ID maps
|
|
610
|
+
for correction in corrections:
|
|
611
|
+
if correction.word_id and correction.corrected_word_id:
|
|
612
|
+
word_id_map[correction.word_id] = correction.corrected_word_id
|
|
613
|
+
|
|
614
|
+
# Map segment IDs
|
|
615
|
+
for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
|
|
616
|
+
segment_id_map[old_seg.id] = new_seg.id
|
|
617
|
+
|
|
618
|
+
# Create correction step
|
|
619
|
+
step = CorrectionStep(
|
|
620
|
+
handler_name=handler_name,
|
|
621
|
+
affected_word_ids=affected_word_ids,
|
|
622
|
+
affected_segment_ids=affected_segment_ids,
|
|
623
|
+
corrections=corrections,
|
|
624
|
+
segments_before=self._get_affected_segments(gap, segments),
|
|
625
|
+
segments_after=updated_segments,
|
|
626
|
+
created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
|
|
627
|
+
deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
|
|
628
|
+
)
|
|
629
|
+
correction_steps.append(step)
|
|
630
|
+
all_corrections.extend(corrections)
|
|
631
|
+
|
|
632
|
+
# Log correction details
|
|
633
|
+
for correction in corrections:
|
|
634
|
+
self.logger.info(
|
|
635
|
+
f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
|
|
636
|
+
f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
|
|
637
|
+
)
|
|
638
|
+
break # Stop trying other handlers once we've made corrections
|
|
639
|
+
else:
|
|
640
|
+
self.logger.debug(f"Handler {handler_name} found no corrections needed")
|
|
641
|
+
else:
|
|
642
|
+
self.logger.debug(f"Handler {handler_name} cannot handle gap")
|
|
643
|
+
|
|
644
|
+
# Create final result with correction history
|
|
645
|
+
corrected_segments = self._apply_all_corrections(segments, all_corrections)
|
|
646
|
+
self.logger.info(f"Correction process completed with {len(all_corrections)} total corrections")
|
|
647
|
+
return all_corrections, corrected_segments, correction_steps, word_id_map, segment_id_map
|
|
648
|
+
|
|
649
|
+
def _get_new_words(self, segments: List[LyricsSegment], original_word_ids: List[str]) -> List[Word]:
|
|
650
|
+
"""Find words that were created during correction."""
|
|
651
|
+
return [w for s in segments for w in s.words if w.id not in original_word_ids]
|
|
652
|
+
|
|
653
|
+
def _word_exists(self, word_id: str, segments: List[LyricsSegment]) -> bool:
|
|
654
|
+
"""Check if a word ID still exists in the segments."""
|
|
655
|
+
return any(w.id == word_id for s in segments for w in s.words)
|
|
656
|
+
|
|
657
|
+
def _apply_corrections_to_segments(self, segments: List[LyricsSegment], corrections: List[WordCorrection]) -> List[LyricsSegment]:
|
|
658
|
+
"""Apply corrections to create new segments."""
|
|
659
|
+
# Create word ID map for quick lookup
|
|
660
|
+
word_map = {w.id: w for s in segments for w in s.words}
|
|
661
|
+
|
|
662
|
+
# Group corrections by original_position to handle splits
|
|
663
|
+
correction_map = {}
|
|
664
|
+
for c in corrections:
|
|
665
|
+
if c.original_position not in correction_map:
|
|
666
|
+
correction_map[c.original_position] = []
|
|
667
|
+
correction_map[c.original_position].append(c)
|
|
668
|
+
|
|
669
|
+
corrected_segments = []
|
|
670
|
+
current_word_idx = 0
|
|
671
|
+
|
|
672
|
+
for segment in segments:
|
|
673
|
+
corrected_words = []
|
|
674
|
+
for word in segment.words:
|
|
675
|
+
if current_word_idx in correction_map:
|
|
676
|
+
word_corrections = sorted(correction_map[current_word_idx], key=lambda x: x.split_index or 0)
|
|
677
|
+
|
|
678
|
+
# Check if any correction has a valid split_total
|
|
679
|
+
total_splits = next((c.split_total for c in word_corrections if c.split_total is not None), None)
|
|
680
|
+
|
|
681
|
+
if total_splits:
|
|
682
|
+
# Handle word split
|
|
683
|
+
split_duration = (word.end_time - word.start_time) / total_splits
|
|
684
|
+
|
|
685
|
+
for i, correction in enumerate(word_corrections):
|
|
686
|
+
start_time = word.start_time + (i * split_duration)
|
|
687
|
+
end_time = start_time + split_duration
|
|
688
|
+
|
|
689
|
+
# Update corrected_position as we create new words
|
|
690
|
+
correction.corrected_position = len(corrected_words)
|
|
691
|
+
new_word = Word(
|
|
692
|
+
id=correction.corrected_word_id or WordUtils.generate_id(),
|
|
693
|
+
text=self._preserve_formatting(correction.original_word, correction.corrected_word),
|
|
694
|
+
start_time=start_time,
|
|
695
|
+
end_time=end_time,
|
|
696
|
+
confidence=correction.confidence,
|
|
697
|
+
created_during_correction=True,
|
|
698
|
+
)
|
|
699
|
+
corrected_words.append(new_word)
|
|
700
|
+
else:
|
|
701
|
+
# Handle single word replacement
|
|
702
|
+
correction = word_corrections[0]
|
|
703
|
+
if not correction.is_deletion:
|
|
704
|
+
# Update corrected_position
|
|
705
|
+
correction.corrected_position = len(corrected_words)
|
|
706
|
+
new_word = Word(
|
|
707
|
+
id=correction.corrected_word_id or WordUtils.generate_id(),
|
|
708
|
+
text=self._preserve_formatting(correction.original_word, correction.corrected_word),
|
|
709
|
+
start_time=word.start_time,
|
|
710
|
+
end_time=word.end_time,
|
|
711
|
+
confidence=correction.confidence,
|
|
712
|
+
created_during_correction=True,
|
|
713
|
+
)
|
|
714
|
+
corrected_words.append(new_word)
|
|
715
|
+
else:
|
|
716
|
+
corrected_words.append(word)
|
|
717
|
+
current_word_idx += 1
|
|
718
|
+
|
|
719
|
+
if corrected_words:
|
|
720
|
+
corrected_segments.append(
|
|
721
|
+
LyricsSegment(
|
|
722
|
+
id=segment.id, # Preserve original segment ID
|
|
723
|
+
text=" ".join(w.text for w in corrected_words),
|
|
724
|
+
words=corrected_words,
|
|
725
|
+
start_time=segment.start_time,
|
|
726
|
+
end_time=segment.end_time,
|
|
727
|
+
)
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
return corrected_segments
|
|
731
|
+
|
|
732
|
+
def _get_affected_segments(self, gap: GapSequence, segments: List[LyricsSegment]) -> List[LyricsSegment]:
|
|
733
|
+
"""Get segments that contain words from the gap sequence."""
|
|
734
|
+
affected_segments = []
|
|
735
|
+
gap_word_ids = set(gap.transcribed_word_ids)
|
|
736
|
+
|
|
737
|
+
for segment in segments:
|
|
738
|
+
# Check if any words in this segment are part of the gap
|
|
739
|
+
if any(w.id in gap_word_ids for w in segment.words):
|
|
740
|
+
affected_segments.append(segment)
|
|
741
|
+
elif affected_segments: # We've passed the gap
|
|
742
|
+
break
|
|
743
|
+
|
|
744
|
+
return affected_segments
|
|
745
|
+
|
|
746
|
+
def _get_affected_words(self, gap: GapSequence, segments: List[LyricsSegment]) -> List[Word]:
|
|
747
|
+
"""Get words that are part of the gap sequence."""
|
|
748
|
+
# Create a map of word IDs to Word objects for quick lookup
|
|
749
|
+
word_map = {w.id: w for s in segments for w in s.words}
|
|
750
|
+
|
|
751
|
+
# Get the actual Word objects using the IDs
|
|
752
|
+
return [word_map[word_id] for word_id in gap.transcribed_word_ids]
|
|
753
|
+
|
|
754
|
+
def _apply_all_corrections(self, segments: List[LyricsSegment], corrections: List[WordCorrection]) -> List[LyricsSegment]:
|
|
755
|
+
"""Apply all corrections to create final corrected segments."""
|
|
756
|
+
# Make a deep copy to avoid modifying original segments
|
|
757
|
+
working_segments = deepcopy(segments)
|
|
758
|
+
|
|
759
|
+
# Apply corrections in order
|
|
760
|
+
return self._apply_corrections_to_segments(working_segments, corrections)
|