karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,760 @@
1
+ from typing import List, Optional, Tuple, Union, Dict, Any
2
+ import logging
3
+ from pathlib import Path
4
+ from copy import deepcopy
5
+ import os
6
+ import shortuuid
7
+
8
+ from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
9
+ from lyrics_transcriber.correction.handlers.llm import LLMHandler
10
+ from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
11
+ from lyrics_transcriber.correction.handlers.relaxed_word_count_match import RelaxedWordCountMatchHandler
12
+ from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
13
+ from lyrics_transcriber.correction.handlers.sound_alike import SoundAlikeHandler
14
+ from lyrics_transcriber.correction.handlers.syllables_match import SyllablesMatchHandler
15
+ from lyrics_transcriber.correction.handlers.word_count_match import WordCountMatchHandler
16
+ from lyrics_transcriber.types import (
17
+ CorrectionStep,
18
+ GapSequence,
19
+ LyricsData,
20
+ TranscriptionResult,
21
+ CorrectionResult,
22
+ LyricsSegment,
23
+ WordCorrection,
24
+ Word,
25
+ )
26
+ from lyrics_transcriber.correction.anchor_sequence import AnchorSequenceFinder
27
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
28
+ from lyrics_transcriber.correction.handlers.extend_anchor import ExtendAnchorHandler
29
+ from lyrics_transcriber.utils.word_utils import WordUtils
30
+ from lyrics_transcriber.correction.handlers.llm_providers import OllamaProvider, OpenAIProvider
31
+
32
+
33
+ class LyricsCorrector:
34
+ """
35
+ Coordinates lyrics correction process using multiple correction handlers.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ cache_dir: Union[str, Path],
41
+ handlers: Optional[List[GapCorrectionHandler]] = None,
42
+ enabled_handlers: Optional[List[str]] = None,
43
+ anchor_finder: Optional[AnchorSequenceFinder] = None,
44
+ logger: Optional[logging.Logger] = None,
45
+ ):
46
+ self.logger = logger or logging.getLogger(__name__)
47
+ self._anchor_finder = anchor_finder
48
+ self._cache_dir = Path(cache_dir)
49
+
50
+ # Define default enabled handlers - excluding LLM, Repeat, SoundAlike, and Levenshtein
51
+ DEFAULT_ENABLED_HANDLERS = [
52
+ "ExtendAnchorHandler",
53
+ "WordCountMatchHandler",
54
+ "SyllablesMatchHandler",
55
+ "RelaxedWordCountMatchHandler",
56
+ "NoSpacePunctuationMatchHandler",
57
+ ]
58
+
59
+ # Create all handlers but respect enabled_handlers if provided
60
+ all_handlers = [
61
+ ("ExtendAnchorHandler", ExtendAnchorHandler(logger=self.logger)),
62
+ ("WordCountMatchHandler", WordCountMatchHandler(logger=self.logger)),
63
+ ("SyllablesMatchHandler", SyllablesMatchHandler(logger=self.logger)),
64
+ ("RelaxedWordCountMatchHandler", RelaxedWordCountMatchHandler(logger=self.logger)),
65
+ ("NoSpacePunctuationMatchHandler", NoSpacePunctuationMatchHandler(logger=self.logger)),
66
+ (
67
+ "LLMHandler_Ollama_R17B",
68
+ LLMHandler(
69
+ provider=OllamaProvider(model="deepseek-r1:7b", logger=self.logger),
70
+ name="LLMHandler_Ollama_R17B",
71
+ logger=self.logger,
72
+ cache_dir=self._cache_dir,
73
+ ),
74
+ ),
75
+ ("RepeatCorrectionHandler", RepeatCorrectionHandler(logger=self.logger)),
76
+ ("SoundAlikeHandler", SoundAlikeHandler(logger=self.logger)),
77
+ ("LevenshteinHandler", LevenshteinHandler(logger=self.logger)),
78
+ ]
79
+
80
+ # Add OpenRouter handlers only if API key is available
81
+ if os.getenv("OPENROUTER_API_KEY"):
82
+ openrouter_handlers = [
83
+ (
84
+ "LLMHandler_OpenRouter_Sonnet",
85
+ LLMHandler(
86
+ provider=OpenAIProvider(
87
+ model="anthropic/claude-3-sonnet",
88
+ api_key=os.getenv("OPENROUTER_API_KEY"),
89
+ base_url="https://openrouter.ai/api/v1",
90
+ logger=self.logger,
91
+ ),
92
+ name="LLMHandler_OpenRouter_Sonnet",
93
+ logger=self.logger,
94
+ cache_dir=self._cache_dir,
95
+ ),
96
+ ),
97
+ (
98
+ "LLMHandler_OpenRouter_R1",
99
+ LLMHandler(
100
+ provider=OpenAIProvider(
101
+ model="deepseek/deepseek-r1",
102
+ api_key=os.getenv("OPENROUTER_API_KEY"),
103
+ base_url="https://openrouter.ai/api/v1",
104
+ logger=self.logger,
105
+ ),
106
+ name="LLMHandler_OpenRouter_R1",
107
+ logger=self.logger,
108
+ cache_dir=self._cache_dir,
109
+ ),
110
+ ),
111
+ ]
112
+ all_handlers.extend(openrouter_handlers)
113
+
114
+ # Store all handler information
115
+ self.all_handlers = [
116
+ {
117
+ "id": handler_id,
118
+ "name": handler_id,
119
+ "description": handler.__class__.__doc__ or "",
120
+ "enabled": handler_id in (enabled_handlers if enabled_handlers is not None else DEFAULT_ENABLED_HANDLERS),
121
+ }
122
+ for handler_id, handler in all_handlers
123
+ ]
124
+
125
+ # Add AgenticCorrector if agentic AI is enabled
126
+ use_agentic_env = os.getenv("USE_AGENTIC_AI", "0").lower() in {"1", "true", "yes"}
127
+ if use_agentic_env:
128
+ self.all_handlers.append({
129
+ "id": "AgenticCorrector",
130
+ "name": "Agentic AI Corrector",
131
+ "description": "AI-powered classification and correction of lyric gaps using LLM reasoning",
132
+ "enabled": True,
133
+ })
134
+
135
+ if handlers:
136
+ self.handlers = handlers
137
+ else:
138
+ # Use provided enabled_handlers if available, otherwise use defaults
139
+ handler_filter = enabled_handlers if enabled_handlers is not None else DEFAULT_ENABLED_HANDLERS
140
+ self.handlers = [h[1] for h in all_handlers if h[0] in handler_filter]
141
+
142
+ @property
143
+ def anchor_finder(self) -> AnchorSequenceFinder:
144
+ """Lazy load the anchor finder instance, initializing it if not already set."""
145
+ if self._anchor_finder is None:
146
+ self._anchor_finder = AnchorSequenceFinder(cache_dir=self._cache_dir, logger=self.logger)
147
+ return self._anchor_finder
148
+
149
+ def run(
150
+ self,
151
+ transcription_results: List[TranscriptionResult],
152
+ lyrics_results: Dict[str, LyricsData],
153
+ metadata: Optional[Dict[str, Any]] = None,
154
+ ) -> CorrectionResult:
155
+ """Execute the correction process."""
156
+ # Optional agentic routing flag from environment; default off for safety
157
+ agentic_enabled = os.getenv("USE_AGENTIC_AI", "").lower() in {"1", "true", "yes"}
158
+ self.logger.info(f"🤖 AGENTIC MODE: {'ENABLED' if agentic_enabled else 'DISABLED'} (USE_AGENTIC_AI={os.getenv('USE_AGENTIC_AI', 'NOT_SET')})")
159
+ if not transcription_results:
160
+ self.logger.error("No transcription results available")
161
+ raise ValueError("No primary transcription data available")
162
+
163
+ # Store reference lyrics for use in word map
164
+ self.reference_lyrics = lyrics_results
165
+
166
+ # Get primary transcription
167
+ primary_transcription_result = sorted(transcription_results, key=lambda x: x.priority)[0]
168
+ primary_transcription = primary_transcription_result.result
169
+ transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in primary_transcription.segments)
170
+
171
+ # Find anchor sequences and gaps
172
+ self.logger.debug("Finding anchor sequences and gaps")
173
+ anchor_sequences = self.anchor_finder.find_anchors(transcribed_text, lyrics_results, primary_transcription_result)
174
+ gap_sequences = self.anchor_finder.find_gaps(transcribed_text, anchor_sequences, lyrics_results, primary_transcription_result)
175
+
176
+ # Store anchor sequences for use in correction handlers
177
+ self._anchor_sequences = anchor_sequences
178
+
179
+ # Process corrections with metadata
180
+ corrections, corrected_segments, correction_steps, word_id_map, segment_id_map = self._process_corrections(
181
+ primary_transcription.segments, gap_sequences, metadata=metadata
182
+ )
183
+
184
+ # Calculate correction ratio
185
+ total_words = sum(len(segment.words) for segment in corrected_segments)
186
+ corrections_made = len(corrections)
187
+ correction_ratio = 1 - (corrections_made / total_words if total_words > 0 else 0)
188
+
189
+ # Get the currently enabled handler IDs using the handler's name attribute if available
190
+ enabled_handlers = [getattr(handler, "name", handler.__class__.__name__) for handler in self.handlers]
191
+
192
+ result = CorrectionResult(
193
+ original_segments=primary_transcription.segments,
194
+ corrected_segments=corrected_segments,
195
+ corrections=corrections,
196
+ corrections_made=corrections_made,
197
+ confidence=correction_ratio,
198
+ reference_lyrics=lyrics_results,
199
+ anchor_sequences=anchor_sequences,
200
+ resized_segments=[],
201
+ gap_sequences=gap_sequences,
202
+ metadata={
203
+ "anchor_sequences_count": len(anchor_sequences),
204
+ "gap_sequences_count": len(gap_sequences),
205
+ "total_words": total_words,
206
+ "correction_ratio": correction_ratio,
207
+ "available_handlers": self.all_handlers,
208
+ "enabled_handlers": enabled_handlers,
209
+ "agentic_routing": "agentic" if agentic_enabled else "rule-based",
210
+ },
211
+ correction_steps=correction_steps,
212
+ word_id_map=word_id_map,
213
+ segment_id_map=segment_id_map,
214
+ )
215
+ return result
216
+
217
+ def _preserve_formatting(self, original: str, new_word: str) -> str:
218
+ """Preserve original word's formatting when applying correction."""
219
+ # Find leading/trailing whitespace
220
+ leading_space = " " if original != original.lstrip() else ""
221
+ trailing_space = " " if original != original.rstrip() else ""
222
+ return leading_space + new_word.strip() + trailing_space
223
+
224
+ def _process_corrections(
225
+ self, segments: List[LyricsSegment], gap_sequences: List[GapSequence], metadata: Optional[Dict[str, Any]] = None
226
+ ) -> Tuple[List[WordCorrection], List[LyricsSegment], List[CorrectionStep], Dict[str, str], Dict[str, str]]:
227
+ """Process corrections using handlers.
228
+
229
+ The correction flow works as follows:
230
+ 1. First pass: Process all gaps
231
+ - Iterate through each gap sequence
232
+ - Try handlers until one can handle the gap
233
+ - Store all corrections in the gap
234
+ 2. Second pass: Apply corrections to segments
235
+ - Iterate through segments and words
236
+ - Look up any corrections that apply to each word
237
+ - Create new segments with corrected words
238
+
239
+ This two-pass approach separates the concerns of:
240
+ a) Finding and making corrections (gap-centric)
241
+ b) Applying those corrections to the original text (segment-centric)
242
+ """
243
+ # Generate a unique session ID for this correction task
244
+ # This groups all traces in Langfuse for easy debugging
245
+ session_id = f"lyrics-correction-{shortuuid.uuid()}"
246
+ self.logger.info(f"Starting correction process with {len(gap_sequences)} gaps (session: {session_id})")
247
+
248
+ correction_steps = []
249
+ all_corrections = []
250
+ word_id_map = {}
251
+ segment_id_map = {}
252
+
253
+ # Create word map for handlers - include both transcribed and reference words
254
+ word_map = {w.id: w for s in segments for w in s.words} # Transcribed words
255
+
256
+ # Add reference words from all sources
257
+ for source, lyrics_data in self.reference_lyrics.items():
258
+ for segment in lyrics_data.segments:
259
+ for word in segment.words:
260
+ if word.id not in word_map: # Don't overwrite transcribed words
261
+ word_map[word.id] = word
262
+
263
+ # Build a linear position map for words to support agentic proposals
264
+ linear_position_map = {}
265
+ _pos_idx = 0
266
+ for s in segments:
267
+ for w in s.words:
268
+ linear_position_map[w.id] = _pos_idx
269
+ _pos_idx += 1
270
+
271
+ # Base handler data that all handlers need
272
+ base_handler_data = {
273
+ "word_map": word_map,
274
+ "anchor_sequences": self._anchor_sequences,
275
+ "audio_file_hash": metadata.get("audio_file_hash") if metadata else None,
276
+ }
277
+
278
+ # Check if we're in agentic-only mode
279
+ use_agentic_env = os.getenv("USE_AGENTIC_AI", "").lower() in {"1", "true", "yes"}
280
+
281
+ # Import agentic modules once if needed
282
+ _AgenticCorrector = None
283
+ _adapt = None
284
+ _ModelRouter = None
285
+
286
+ if use_agentic_env:
287
+ try:
288
+ from lyrics_transcriber.correction.agentic.agent import AgenticCorrector as _AgenticCorrector
289
+ from lyrics_transcriber.correction.agentic.adapter import adapt_proposals_to_word_corrections as _adapt
290
+ from lyrics_transcriber.correction.agentic.router import ModelRouter as _ModelRouter
291
+ self.logger.info("🤖 Agentic modules imported successfully - running in AGENTIC-ONLY mode")
292
+ except Exception as e:
293
+ self.logger.error(f"🤖 Failed to import agentic modules but USE_AGENTIC_AI=1: {e}")
294
+ raise RuntimeError(f"Agentic AI correction is enabled but required modules could not be imported: {e}") from e
295
+
296
+ # === TEMPORARY: Gap extraction for manual review ===
297
+ if os.getenv("DUMP_GAPS") == "1":
298
+ import yaml
299
+
300
+ # Build a flat list of all transcribed words for context
301
+ all_transcribed_words = []
302
+ for seg in segments:
303
+ all_transcribed_words.extend(seg.words)
304
+
305
+ # Create word position map
306
+ word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
307
+
308
+ gaps_data = []
309
+ for i, gap in enumerate(gap_sequences, 1):
310
+ gap_words = []
311
+ gap_positions = []
312
+
313
+ for word_id in gap.transcribed_word_ids:
314
+ if word_id in word_map:
315
+ word = word_map[word_id]
316
+ gap_words.append({
317
+ "id": word_id,
318
+ "text": word.text,
319
+ "start_time": round(getattr(word, 'start_time', 0), 3),
320
+ "end_time": round(getattr(word, 'end_time', 0), 3)
321
+ })
322
+ if word_id in word_position:
323
+ gap_positions.append(word_position[word_id])
324
+
325
+ # Get context words (10 before and 10 after)
326
+ preceding_words_list = []
327
+ following_words_list = []
328
+
329
+ if gap_positions:
330
+ first_gap_pos = min(gap_positions)
331
+ last_gap_pos = max(gap_positions)
332
+
333
+ # Get 10 words before the gap
334
+ start_pos = max(0, first_gap_pos - 10)
335
+ if start_pos == 0:
336
+ preceding_words_list.append("<song_start>")
337
+ for idx in range(start_pos, first_gap_pos):
338
+ if idx < len(all_transcribed_words):
339
+ preceding_words_list.append(all_transcribed_words[idx].text)
340
+
341
+ # Get 10 words after the gap
342
+ end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
343
+ for idx in range(last_gap_pos + 1, end_pos):
344
+ if idx < len(all_transcribed_words):
345
+ following_words_list.append(all_transcribed_words[idx].text)
346
+ if end_pos == len(all_transcribed_words):
347
+ following_words_list.append("<song_end>")
348
+
349
+ # Convert to strings
350
+ preceding_words = " ".join(preceding_words_list)
351
+ following_words = " ".join(following_words_list)
352
+
353
+ # Get reference context from all sources using anchor sequences
354
+ reference_contexts = {}
355
+
356
+ # Find which anchor sequence this gap belongs to
357
+ parent_anchor = None
358
+ for anchor in self._anchor_sequences:
359
+ if hasattr(anchor, 'gaps') and gap in anchor.gaps:
360
+ parent_anchor = anchor
361
+ break
362
+
363
+ for source, lyrics_data in self.reference_lyrics.items():
364
+ if lyrics_data and lyrics_data.segments:
365
+ # Get all reference words
366
+ ref_words = []
367
+ for seg in lyrics_data.segments:
368
+ ref_words.extend([w.text for w in seg.words])
369
+
370
+ if parent_anchor and hasattr(parent_anchor, 'reference_word_ids'):
371
+ # Use anchor's reference word IDs to find the correct position
372
+ # Get the reference words from this anchor's context
373
+ anchor_ref_word_ids = parent_anchor.reference_word_ids.get(source, [])
374
+
375
+ if anchor_ref_word_ids:
376
+ # Find position of anchor's reference words
377
+ ref_word_map = {w.id: idx for idx, w in enumerate(
378
+ [w for seg in lyrics_data.segments for w in seg.words]
379
+ )}
380
+
381
+ # Get indices of anchor words in reference
382
+ anchor_indices = [ref_word_map[wid] for wid in anchor_ref_word_ids if wid in ref_word_map]
383
+
384
+ if anchor_indices:
385
+ # Use the anchor position to get context
386
+ anchor_start = min(anchor_indices)
387
+ anchor_end = max(anchor_indices)
388
+
389
+ # Get 20 words before and after the anchor region
390
+ context_start = max(0, anchor_start - 20)
391
+ context_end = min(len(ref_words), anchor_end + 21)
392
+
393
+ context_words = ref_words[context_start:context_end]
394
+ reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in context_words])
395
+ continue
396
+
397
+ # Fallback: estimate position by time percentage
398
+ if gap_words and gap_words[0].get('start_time'):
399
+ # Try to get song duration from segments
400
+ last_word_time = 0
401
+ for seg in segments:
402
+ if seg.words:
403
+ last_word_time = max(last_word_time, seg.words[-1].end_time)
404
+
405
+ if last_word_time > 0:
406
+ gap_time = gap_words[0]['start_time']
407
+ time_percentage = gap_time / last_word_time
408
+
409
+ # Use percentage to estimate position in reference
410
+ estimated_idx = int(len(ref_words) * time_percentage)
411
+ context_start = max(0, estimated_idx - 20)
412
+ context_end = min(len(ref_words), estimated_idx + 21)
413
+
414
+ context_words = ref_words[context_start:context_end]
415
+ reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in context_words])
416
+ else:
417
+ # Ultimate fallback: entire reference lyrics
418
+ reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in ref_words])
419
+ else:
420
+ # No time info, use entire reference lyrics
421
+ reference_contexts[source] = " ".join([w.text if hasattr(w, 'text') else str(w) for w in ref_words])
422
+
423
+ gap_text = " ".join([w["text"] for w in gap_words])
424
+
425
+ gaps_data.append({
426
+ "gap_id": i,
427
+ "position": gap.transcription_position,
428
+ "preceding_words": preceding_words,
429
+ "gap_text": gap_text,
430
+ "following_words": following_words,
431
+ "transcribed_words": gap_words,
432
+ "reference_contexts": reference_contexts,
433
+ "word_count": len(gap_words),
434
+ "annotations": {
435
+ "your_decision": "",
436
+ "action_type": "# NO_ACTION | REPLACE | DELETE | INSERT | MERGE | SPLIT",
437
+ "target_word_ids": [],
438
+ "replacement_text": "",
439
+ "notes": ""
440
+ }
441
+ })
442
+
443
+ with open("gaps_review.yaml", 'w') as f:
444
+ f.write("# Gap Review Data for Manual Annotation\n")
445
+ f.write(f"# Total gaps: {len(gaps_data)}\n")
446
+ f.write("#\n")
447
+ f.write("# For each gap, fill in the annotations section:\n")
448
+ f.write("# your_decision: Brief description of what should happen\n")
449
+ f.write("# action_type: NO_ACTION | REPLACE | DELETE | INSERT | MERGE | SPLIT\n")
450
+ f.write("# target_word_ids: Which word IDs to operate on (from transcribed_words)\n")
451
+ f.write("# replacement_text: The corrected text (if applicable)\n")
452
+ f.write("# notes: Any additional reasoning or context\n")
453
+ f.write("#\n\n")
454
+ yaml.dump({"gaps": gaps_data}, f, default_flow_style=False, allow_unicode=True, width=120, sort_keys=False)
455
+
456
+ self.logger.info(f"📝 Dumped {len(gaps_data)} gaps to gaps_review.yaml - review and annotate!")
457
+ import sys
458
+ sys.exit(0)
459
+ # === END TEMPORARY CODE ===
460
+
461
+ for i, gap in enumerate(gap_sequences, 1):
462
+ self.logger.info(f"Processing gap {i}/{len(gap_sequences)} at position {gap.transcription_position}")
463
+
464
+ # Get the actual words for logging
465
+ gap_words = [word_map[word_id] for word_id in gap.transcribed_word_ids]
466
+ self.logger.debug(f"Gap text: '{' '.join(w.text for w in gap_words)}'")
467
+
468
+ # AGENTIC-ONLY MODE: Use agentic correction exclusively
469
+ if use_agentic_env:
470
+ self.logger.info(f"🤖 Attempting agentic correction for gap {i}/{len(gap_sequences)}")
471
+ try:
472
+ # Prepare gap data for classification-first workflow
473
+ gap_words_data = []
474
+ for word_id in gap.transcribed_word_ids:
475
+ if word_id in word_map:
476
+ word = word_map[word_id]
477
+ gap_words_data.append({
478
+ "id": word_id,
479
+ "text": word.text,
480
+ "start_time": getattr(word, 'start_time', 0),
481
+ "end_time": getattr(word, 'end_time', 0)
482
+ })
483
+
484
+ # Get context words
485
+ all_transcribed_words = []
486
+ for seg in segments:
487
+ all_transcribed_words.extend(seg.words)
488
+ word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
489
+
490
+ gap_positions = [word_position[wid] for wid in gap.transcribed_word_ids if wid in word_position]
491
+ preceding_words = ""
492
+ following_words = ""
493
+
494
+ if gap_positions:
495
+ first_gap_pos = min(gap_positions)
496
+ last_gap_pos = max(gap_positions)
497
+
498
+ # Get 10 words before
499
+ start_pos = max(0, first_gap_pos - 10)
500
+ preceding_list = [all_transcribed_words[idx].text for idx in range(start_pos, first_gap_pos) if idx < len(all_transcribed_words)]
501
+ preceding_words = " ".join(preceding_list)
502
+
503
+ # Get 10 words after
504
+ end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
505
+ following_list = [all_transcribed_words[idx].text for idx in range(last_gap_pos + 1, end_pos) if idx < len(all_transcribed_words)]
506
+ following_words = " ".join(following_list)
507
+
508
+ # Get reference contexts from all sources
509
+ reference_contexts = {}
510
+ for source, lyrics_data in self.reference_lyrics.items():
511
+ if lyrics_data and lyrics_data.segments:
512
+ ref_words = []
513
+ for seg in lyrics_data.segments:
514
+ ref_words.extend([w.text for w in seg.words])
515
+ # For now, use full text (handlers will extract relevant portions)
516
+ reference_contexts[source] = " ".join(ref_words)
517
+
518
+ # Get artist and title from metadata
519
+ artist = metadata.get("artist") if metadata else None
520
+ title = metadata.get("title") if metadata else None
521
+
522
+ # Choose model via router
523
+ _router = _ModelRouter()
524
+ uncertainty = 0.3 if len(gap_words_data) <= 2 else 0.7
525
+ model_id = _router.choose_model("gap", uncertainty)
526
+ self.logger.debug(f"🤖 Router selected model: {model_id}")
527
+
528
+ # Create agent and use new classification-first workflow
529
+ self.logger.debug(f"🤖 Creating AgenticCorrector with model: {model_id}")
530
+ _agent = _AgenticCorrector.from_model(
531
+ model=model_id,
532
+ session_id=session_id,
533
+ cache_dir=str(self._cache_dir)
534
+ )
535
+
536
+ # Use new propose_for_gap method
537
+ self.logger.debug(f"🤖 Calling agent.propose_for_gap() for gap {i}")
538
+ _proposals = _agent.propose_for_gap(
539
+ gap_id=f"gap_{i}",
540
+ gap_words=gap_words_data,
541
+ preceding_words=preceding_words,
542
+ following_words=following_words,
543
+ reference_contexts=reference_contexts,
544
+ artist=artist,
545
+ title=title
546
+ )
547
+ self.logger.debug(f"🤖 Agent returned {len(_proposals) if _proposals else 0} proposals")
548
+ _agentic_corrections = _adapt(_proposals, word_map, linear_position_map) if _proposals else []
549
+ self.logger.debug(f"🤖 Adapter returned {len(_agentic_corrections)} corrections")
550
+
551
+ if _agentic_corrections:
552
+ self.logger.info(f"🤖 Applying {len(_agentic_corrections)} agentic corrections for gap {i}")
553
+ affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
554
+ affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
555
+ updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), _agentic_corrections)
556
+ for correction in _agentic_corrections:
557
+ if correction.word_id and correction.corrected_word_id:
558
+ word_id_map[correction.word_id] = correction.corrected_word_id
559
+ for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
560
+ segment_id_map[old_seg.id] = new_seg.id
561
+ step = CorrectionStep(
562
+ handler_name="AgenticCorrector",
563
+ affected_word_ids=affected_word_ids,
564
+ affected_segment_ids=affected_segment_ids,
565
+ corrections=_agentic_corrections,
566
+ segments_before=self._get_affected_segments(gap, segments),
567
+ segments_after=updated_segments,
568
+ created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
569
+ deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
570
+ )
571
+ correction_steps.append(step)
572
+ all_corrections.extend(_agentic_corrections)
573
+ # Log corrections made
574
+ for correction in _agentic_corrections:
575
+ self.logger.info(
576
+ f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
577
+ f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
578
+ )
579
+ else:
580
+ self.logger.info(f"🤖 No agentic corrections needed for gap {i}")
581
+
582
+ except Exception as e:
583
+ # In agentic-only mode, fail fast instead of falling back
584
+ self.logger.error(f"🤖 Agentic correction failed for gap {i}: {e}", exc_info=True)
585
+ raise RuntimeError(f"Agentic AI correction failed for gap {i}: {e}") from e
586
+
587
+ # Skip rule-based handlers completely in agentic mode
588
+ continue
589
+
590
+ # RULE-BASED MODE: Try each handler in order
591
+ for handler in self.handlers:
592
+ handler_name = handler.__class__.__name__
593
+ can_handle, handler_data = handler.can_handle(gap, base_handler_data)
594
+
595
+ if can_handle:
596
+ # Merge base handler data with specific handler data
597
+ handler_data = {**base_handler_data, **(handler_data or {})}
598
+
599
+ corrections = handler.handle(gap, handler_data)
600
+ if corrections:
601
+ self.logger.info(f"Handler {handler_name} made {len(corrections)} corrections")
602
+ # Track affected IDs
603
+ affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
604
+ affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
605
+
606
+ # Apply corrections and get updated segments
607
+ updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), corrections)
608
+
609
+ # Update ID maps
610
+ for correction in corrections:
611
+ if correction.word_id and correction.corrected_word_id:
612
+ word_id_map[correction.word_id] = correction.corrected_word_id
613
+
614
+ # Map segment IDs
615
+ for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
616
+ segment_id_map[old_seg.id] = new_seg.id
617
+
618
+ # Create correction step
619
+ step = CorrectionStep(
620
+ handler_name=handler_name,
621
+ affected_word_ids=affected_word_ids,
622
+ affected_segment_ids=affected_segment_ids,
623
+ corrections=corrections,
624
+ segments_before=self._get_affected_segments(gap, segments),
625
+ segments_after=updated_segments,
626
+ created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
627
+ deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
628
+ )
629
+ correction_steps.append(step)
630
+ all_corrections.extend(corrections)
631
+
632
+ # Log correction details
633
+ for correction in corrections:
634
+ self.logger.info(
635
+ f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
636
+ f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
637
+ )
638
+ break # Stop trying other handlers once we've made corrections
639
+ else:
640
+ self.logger.debug(f"Handler {handler_name} found no corrections needed")
641
+ else:
642
+ self.logger.debug(f"Handler {handler_name} cannot handle gap")
643
+
644
+ # Create final result with correction history
645
+ corrected_segments = self._apply_all_corrections(segments, all_corrections)
646
+ self.logger.info(f"Correction process completed with {len(all_corrections)} total corrections")
647
+ return all_corrections, corrected_segments, correction_steps, word_id_map, segment_id_map
648
+
649
+ def _get_new_words(self, segments: List[LyricsSegment], original_word_ids: List[str]) -> List[Word]:
650
+ """Find words that were created during correction."""
651
+ return [w for s in segments for w in s.words if w.id not in original_word_ids]
652
+
653
+ def _word_exists(self, word_id: str, segments: List[LyricsSegment]) -> bool:
654
+ """Check if a word ID still exists in the segments."""
655
+ return any(w.id == word_id for s in segments for w in s.words)
656
+
657
+ def _apply_corrections_to_segments(self, segments: List[LyricsSegment], corrections: List[WordCorrection]) -> List[LyricsSegment]:
658
+ """Apply corrections to create new segments."""
659
+ # Create word ID map for quick lookup
660
+ word_map = {w.id: w for s in segments for w in s.words}
661
+
662
+ # Group corrections by original_position to handle splits
663
+ correction_map = {}
664
+ for c in corrections:
665
+ if c.original_position not in correction_map:
666
+ correction_map[c.original_position] = []
667
+ correction_map[c.original_position].append(c)
668
+
669
+ corrected_segments = []
670
+ current_word_idx = 0
671
+
672
+ for segment in segments:
673
+ corrected_words = []
674
+ for word in segment.words:
675
+ if current_word_idx in correction_map:
676
+ word_corrections = sorted(correction_map[current_word_idx], key=lambda x: x.split_index or 0)
677
+
678
+ # Check if any correction has a valid split_total
679
+ total_splits = next((c.split_total for c in word_corrections if c.split_total is not None), None)
680
+
681
+ if total_splits:
682
+ # Handle word split
683
+ split_duration = (word.end_time - word.start_time) / total_splits
684
+
685
+ for i, correction in enumerate(word_corrections):
686
+ start_time = word.start_time + (i * split_duration)
687
+ end_time = start_time + split_duration
688
+
689
+ # Update corrected_position as we create new words
690
+ correction.corrected_position = len(corrected_words)
691
+ new_word = Word(
692
+ id=correction.corrected_word_id or WordUtils.generate_id(),
693
+ text=self._preserve_formatting(correction.original_word, correction.corrected_word),
694
+ start_time=start_time,
695
+ end_time=end_time,
696
+ confidence=correction.confidence,
697
+ created_during_correction=True,
698
+ )
699
+ corrected_words.append(new_word)
700
+ else:
701
+ # Handle single word replacement
702
+ correction = word_corrections[0]
703
+ if not correction.is_deletion:
704
+ # Update corrected_position
705
+ correction.corrected_position = len(corrected_words)
706
+ new_word = Word(
707
+ id=correction.corrected_word_id or WordUtils.generate_id(),
708
+ text=self._preserve_formatting(correction.original_word, correction.corrected_word),
709
+ start_time=word.start_time,
710
+ end_time=word.end_time,
711
+ confidence=correction.confidence,
712
+ created_during_correction=True,
713
+ )
714
+ corrected_words.append(new_word)
715
+ else:
716
+ corrected_words.append(word)
717
+ current_word_idx += 1
718
+
719
+ if corrected_words:
720
+ corrected_segments.append(
721
+ LyricsSegment(
722
+ id=segment.id, # Preserve original segment ID
723
+ text=" ".join(w.text for w in corrected_words),
724
+ words=corrected_words,
725
+ start_time=segment.start_time,
726
+ end_time=segment.end_time,
727
+ )
728
+ )
729
+
730
+ return corrected_segments
731
+
732
+ def _get_affected_segments(self, gap: GapSequence, segments: List[LyricsSegment]) -> List[LyricsSegment]:
733
+ """Get segments that contain words from the gap sequence."""
734
+ affected_segments = []
735
+ gap_word_ids = set(gap.transcribed_word_ids)
736
+
737
+ for segment in segments:
738
+ # Check if any words in this segment are part of the gap
739
+ if any(w.id in gap_word_ids for w in segment.words):
740
+ affected_segments.append(segment)
741
+ elif affected_segments: # We've passed the gap
742
+ break
743
+
744
+ return affected_segments
745
+
746
+ def _get_affected_words(self, gap: GapSequence, segments: List[LyricsSegment]) -> List[Word]:
747
+ """Get words that are part of the gap sequence."""
748
+ # Create a map of word IDs to Word objects for quick lookup
749
+ word_map = {w.id: w for s in segments for w in s.words}
750
+
751
+ # Get the actual Word objects using the IDs
752
+ return [word_map[word_id] for word_id in gap.transcribed_word_ids]
753
+
754
+ def _apply_all_corrections(self, segments: List[LyricsSegment], corrections: List[WordCorrection]) -> List[LyricsSegment]:
755
+ """Apply all corrections to create final corrected segments."""
756
+ # Make a deep copy to avoid modifying original segments
757
+ working_segments = deepcopy(segments)
758
+
759
+ # Apply corrections in order
760
+ return self._apply_corrections_to_segments(working_segments, corrections)