karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,74 @@
1
+ """Handler for extra filler words at sentence starts."""
2
+
3
+ from typing import List, Dict, Any
4
+ from .base import BaseHandler
5
+ from ..models.schemas import CorrectionProposal, GapCategory
6
+
7
+
8
+ class ExtraWordsHandler(BaseHandler):
9
+ """Handles gaps with extra filler words like 'And', 'But', 'Well'."""
10
+
11
+ # Common filler words that are often incorrectly added by transcription
12
+ FILLER_WORDS = {'and', 'but', 'well', 'so', 'or', 'then', 'now'}
13
+
14
+ @property
15
+ def category(self) -> GapCategory:
16
+ return GapCategory.EXTRA_WORDS
17
+
18
+ def handle(
19
+ self,
20
+ gap_id: str,
21
+ gap_words: List[Dict[str, Any]],
22
+ preceding_words: str,
23
+ following_words: str,
24
+ reference_contexts: Dict[str, str],
25
+ classification_reasoning: str = ""
26
+ ) -> List[CorrectionProposal]:
27
+ """Propose deletion of filler words."""
28
+
29
+ if not gap_words:
30
+ return []
31
+
32
+ proposals = []
33
+
34
+ # Look for filler words at the start of the gap
35
+ for i, word in enumerate(gap_words):
36
+ text = word.get('text', '').strip().lower().rstrip(',.!?;:')
37
+
38
+ if text in self.FILLER_WORDS:
39
+ # Check if this is likely at a sentence/line start
40
+ # (either it's the first word or preceded by punctuation)
41
+ is_sentence_start = (
42
+ i == 0 or
43
+ gap_words[i-1].get('text', '').strip()[-1:] in '.!?'
44
+ )
45
+
46
+ if is_sentence_start:
47
+ proposal = CorrectionProposal(
48
+ word_id=word['id'],
49
+ action="DeleteWord",
50
+ confidence=0.80,
51
+ reason=f"Extra filler word '{word.get('text')}' at sentence start not in reference. {classification_reasoning}",
52
+ gap_category=self.category,
53
+ requires_human_review=False,
54
+ artist=self.artist,
55
+ title=self.title
56
+ )
57
+ proposals.append(proposal)
58
+
59
+ # If no filler words found, flag for review
60
+ if not proposals:
61
+ proposal = CorrectionProposal(
62
+ word_ids=[w['id'] for w in gap_words],
63
+ action="Flag",
64
+ confidence=0.5,
65
+ reason=f"Classified as extra words but no obvious fillers found. {classification_reasoning}",
66
+ gap_category=self.category,
67
+ requires_human_review=True,
68
+ artist=self.artist,
69
+ title=self.title
70
+ )
71
+ proposals.append(proposal)
72
+
73
+ return proposals
74
+
@@ -0,0 +1,42 @@
1
+ """Handler for gaps where transcription matches at least one reference source."""
2
+
3
+ from typing import List, Dict, Any
4
+ from .base import BaseHandler
5
+ from ..models.schemas import CorrectionProposal, GapCategory
6
+
7
+
8
+ class NoErrorHandler(BaseHandler):
9
+ """Handles gaps where the transcription is correct (matches a reference source)."""
10
+
11
+ @property
12
+ def category(self) -> GapCategory:
13
+ return GapCategory.NO_ERROR
14
+
15
+ def handle(
16
+ self,
17
+ gap_id: str,
18
+ gap_words: List[Dict[str, Any]],
19
+ preceding_words: str,
20
+ following_words: str,
21
+ reference_contexts: Dict[str, str],
22
+ classification_reasoning: str = ""
23
+ ) -> List[CorrectionProposal]:
24
+ """Return NO_ACTION since transcription is correct."""
25
+
26
+ if not gap_words:
27
+ return []
28
+
29
+ # Create a single NO_ACTION proposal
30
+ proposal = CorrectionProposal(
31
+ word_ids=[w['id'] for w in gap_words],
32
+ action="NoAction",
33
+ confidence=0.99,
34
+ reason=f"Transcription matches at least one reference source. {classification_reasoning}",
35
+ gap_category=self.category,
36
+ requires_human_review=False,
37
+ artist=self.artist,
38
+ title=self.title
39
+ )
40
+
41
+ return [proposal]
42
+
@@ -0,0 +1,44 @@
1
+ """Handler for punctuation-only differences."""
2
+
3
+ from typing import List, Dict, Any
4
+ from .base import BaseHandler
5
+ from ..models.schemas import CorrectionProposal, GapCategory
6
+
7
+
8
+ class PunctuationHandler(BaseHandler):
9
+ """Handles gaps where only punctuation/capitalization differs."""
10
+
11
+ @property
12
+ def category(self) -> GapCategory:
13
+ return GapCategory.PUNCTUATION_ONLY
14
+
15
+ def handle(
16
+ self,
17
+ gap_id: str,
18
+ gap_words: List[Dict[str, Any]],
19
+ preceding_words: str,
20
+ following_words: str,
21
+ reference_contexts: Dict[str, str],
22
+ classification_reasoning: str = ""
23
+ ) -> List[CorrectionProposal]:
24
+ """Return NO_ACTION for punctuation-only differences."""
25
+ # For punctuation differences, we don't need to make any changes
26
+ # The transcription is correct, just styled differently
27
+
28
+ if not gap_words:
29
+ return []
30
+
31
+ # Create a single NO_ACTION proposal for the entire gap
32
+ proposal = CorrectionProposal(
33
+ word_ids=[w['id'] for w in gap_words],
34
+ action="NoAction",
35
+ confidence=0.95,
36
+ reason=f"Punctuation/style difference only. {classification_reasoning}",
37
+ gap_category=self.category,
38
+ requires_human_review=False,
39
+ artist=self.artist,
40
+ title=self.title
41
+ )
42
+
43
+ return [proposal]
44
+
@@ -0,0 +1,60 @@
1
+ """Registry for mapping gap categories to handlers."""
2
+
3
+ from typing import Dict, Type
4
+ from .base import BaseHandler
5
+ from .punctuation import PunctuationHandler
6
+ from .sound_alike import SoundAlikeHandler
7
+ from .background_vocals import BackgroundVocalsHandler
8
+ from .extra_words import ExtraWordsHandler
9
+ from .repeated_section import RepeatedSectionHandler
10
+ from .complex_multi_error import ComplexMultiErrorHandler
11
+ from .ambiguous import AmbiguousHandler
12
+ from .no_error import NoErrorHandler
13
+ from ..models.schemas import GapCategory
14
+
15
+
16
+ class HandlerRegistry:
17
+ """Registry for mapping gap categories to their handler classes."""
18
+
19
+ _handlers: Dict[GapCategory, Type[BaseHandler]] = {
20
+ GapCategory.PUNCTUATION_ONLY: PunctuationHandler,
21
+ GapCategory.SOUND_ALIKE: SoundAlikeHandler,
22
+ GapCategory.BACKGROUND_VOCALS: BackgroundVocalsHandler,
23
+ GapCategory.EXTRA_WORDS: ExtraWordsHandler,
24
+ GapCategory.REPEATED_SECTION: RepeatedSectionHandler,
25
+ GapCategory.COMPLEX_MULTI_ERROR: ComplexMultiErrorHandler,
26
+ GapCategory.AMBIGUOUS: AmbiguousHandler,
27
+ GapCategory.NO_ERROR: NoErrorHandler,
28
+ }
29
+
30
+ @classmethod
31
+ def get_handler(cls, category: GapCategory, artist: str = None, title: str = None) -> BaseHandler:
32
+ """Get a handler instance for the given category.
33
+
34
+ Args:
35
+ category: Gap category
36
+ artist: Song artist name
37
+ title: Song title
38
+
39
+ Returns:
40
+ Handler instance for the category
41
+
42
+ Raises:
43
+ ValueError: If category is not registered
44
+ """
45
+ handler_class = cls._handlers.get(category)
46
+ if not handler_class:
47
+ raise ValueError(f"No handler registered for category: {category}")
48
+
49
+ return handler_class(artist=artist, title=title)
50
+
51
+ @classmethod
52
+ def register_handler(cls, category: GapCategory, handler_class: Type[BaseHandler]):
53
+ """Register a custom handler for a category.
54
+
55
+ Args:
56
+ category: Gap category
57
+ handler_class: Handler class to register
58
+ """
59
+ cls._handlers[category] = handler_class
60
+
@@ -0,0 +1,44 @@
1
+ """Handler for repeated sections (chorus, verse repetitions)."""
2
+
3
+ from typing import List, Dict, Any
4
+ from .base import BaseHandler
5
+ from ..models.schemas import CorrectionProposal, GapCategory
6
+
7
+
8
+ class RepeatedSectionHandler(BaseHandler):
9
+ """Handles gaps where transcription includes repeated sections not in condensed references."""
10
+
11
+ @property
12
+ def category(self) -> GapCategory:
13
+ return GapCategory.REPEATED_SECTION
14
+
15
+ def handle(
16
+ self,
17
+ gap_id: str,
18
+ gap_words: List[Dict[str, Any]],
19
+ preceding_words: str,
20
+ following_words: str,
21
+ reference_contexts: Dict[str, str],
22
+ classification_reasoning: str = ""
23
+ ) -> List[CorrectionProposal]:
24
+ """Flag repeated sections for human review."""
25
+
26
+ if not gap_words:
27
+ return []
28
+
29
+ # Repeated sections need audio verification - always flag for review
30
+ gap_text = ' '.join(w.get('text', '') for w in gap_words)
31
+
32
+ proposal = CorrectionProposal(
33
+ word_ids=[w['id'] for w in gap_words],
34
+ action="Flag",
35
+ confidence=0.5,
36
+ reason=f"Repeated section detected: '{gap_text[:100]}...'. Reference lyrics may be condensed. Requires audio verification. {classification_reasoning}",
37
+ gap_category=self.category,
38
+ requires_human_review=True,
39
+ artist=self.artist,
40
+ title=self.title
41
+ )
42
+
43
+ return [proposal]
44
+
@@ -0,0 +1,126 @@
1
+ """Handler for sound-alike transcription errors."""
2
+
3
+ from typing import List, Dict, Any, Optional
4
+ from .base import BaseHandler
5
+ from ..models.schemas import CorrectionProposal, GapCategory
6
+ import re
7
+
8
+
9
+ class SoundAlikeHandler(BaseHandler):
10
+ """Handles gaps with sound-alike errors (homophones, similar-sounding phrases)."""
11
+
12
+ @property
13
+ def category(self) -> GapCategory:
14
+ return GapCategory.SOUND_ALIKE
15
+
16
+ def _extract_replacement_from_references(
17
+ self,
18
+ gap_words: List[Dict[str, Any]],
19
+ reference_contexts: Dict[str, str],
20
+ preceding_words: str,
21
+ following_words: str
22
+ ) -> Optional[str]:
23
+ """Try to extract the correct text from reference lyrics.
24
+
25
+ Args:
26
+ gap_words: Words in the gap
27
+ reference_contexts: Reference lyrics from each source
28
+ preceding_words: Words before gap
29
+ following_words: Words after gap
30
+
31
+ Returns:
32
+ Replacement text if found, None otherwise
33
+ """
34
+ if not reference_contexts:
35
+ return None
36
+
37
+ # Normalize preceding and following for matching
38
+ preceding_norm = self._normalize_text(preceding_words)
39
+ following_norm = self._normalize_text(following_words)
40
+
41
+ # Take last few words of preceding and first few words of following
42
+ preceding_tokens = preceding_norm.split()[-5:] if preceding_norm else []
43
+ following_tokens = following_norm.split()[:5] if following_norm else []
44
+
45
+ # Try to find the context in each reference
46
+ for source, ref_text in reference_contexts.items():
47
+ ref_norm = self._normalize_text(ref_text)
48
+
49
+ # Try to find the preceding context
50
+ if preceding_tokens:
51
+ preceding_pattern = ' '.join(preceding_tokens)
52
+ if preceding_pattern in ref_norm:
53
+ # Found the context, now extract what comes after
54
+ start_idx = ref_norm.index(preceding_pattern) + len(preceding_pattern)
55
+ remaining = ref_norm[start_idx:].strip()
56
+
57
+ # Find where following context starts
58
+ if following_tokens:
59
+ following_pattern = ' '.join(following_tokens)
60
+ if following_pattern in remaining:
61
+ end_idx = remaining.index(following_pattern)
62
+ replacement = remaining[:end_idx].strip()
63
+ if replacement:
64
+ return replacement
65
+
66
+ return None
67
+
68
+ def _normalize_text(self, text: str) -> str:
69
+ """Normalize text for comparison (lowercase, remove punctuation)."""
70
+ # Remove punctuation except apostrophes in contractions
71
+ text = re.sub(r'[^\w\s\']', ' ', text.lower())
72
+ # Normalize whitespace
73
+ text = ' '.join(text.split())
74
+ return text
75
+
76
+ def handle(
77
+ self,
78
+ gap_id: str,
79
+ gap_words: List[Dict[str, Any]],
80
+ preceding_words: str,
81
+ following_words: str,
82
+ reference_contexts: Dict[str, str],
83
+ classification_reasoning: str = ""
84
+ ) -> List[CorrectionProposal]:
85
+ """Propose replacement based on reference lyrics."""
86
+
87
+ if not gap_words:
88
+ return []
89
+
90
+ # Try to extract the correct replacement from references
91
+ replacement_text = self._extract_replacement_from_references(
92
+ gap_words,
93
+ reference_contexts,
94
+ preceding_words,
95
+ following_words
96
+ )
97
+
98
+ if replacement_text:
99
+ # Found a replacement in reference lyrics
100
+ proposal = CorrectionProposal(
101
+ word_ids=[w['id'] for w in gap_words],
102
+ action="ReplaceWord",
103
+ replacement_text=replacement_text,
104
+ confidence=0.75,
105
+ reason=f"Sound-alike error. Reference suggests: '{replacement_text}'. {classification_reasoning}",
106
+ gap_category=self.category,
107
+ requires_human_review=False,
108
+ artist=self.artist,
109
+ title=self.title
110
+ )
111
+ return [proposal]
112
+ else:
113
+ # Could not extract replacement, flag for human review
114
+ gap_text = ' '.join(w.get('text', '') for w in gap_words)
115
+ proposal = CorrectionProposal(
116
+ word_ids=[w['id'] for w in gap_words],
117
+ action="Flag",
118
+ confidence=0.6,
119
+ reason=f"Sound-alike error detected for '{gap_text}' but could not extract replacement from references. {classification_reasoning}",
120
+ gap_category=self.category,
121
+ requires_human_review=True,
122
+ artist=self.artist,
123
+ title=self.title
124
+ )
125
+ return [proposal]
126
+
@@ -0,0 +1,5 @@
1
+ """Models and schemas for agentic correction (to be implemented via TDD)."""
2
+
3
+ __all__ = []
4
+
5
+
@@ -0,0 +1,31 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+ from .enums import CorrectionType
6
+
7
+
8
+ @dataclass
9
+ class AICorrection:
10
+ id: str
11
+ original_text: str
12
+ corrected_text: str
13
+ confidence_score: float
14
+ reasoning: str
15
+ model_used: str
16
+ correction_type: CorrectionType
17
+ processing_time_ms: int
18
+ tokens_used: int
19
+ created_at: datetime
20
+ word_position: int
21
+ session_id: str
22
+
23
+ def validate(self) -> None:
24
+ if not (0.0 <= self.confidence_score <= 1.0):
25
+ raise ValueError("confidence_score must be between 0.0 and 1.0")
26
+ if self.original_text == self.corrected_text:
27
+ raise ValueError("original_text and corrected_text must differ")
28
+ if self.processing_time_ms <= 0:
29
+ raise ValueError("processing_time_ms must be positive")
30
+
31
+
@@ -0,0 +1,30 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional, Dict
4
+
5
+ from .enums import SessionType, SessionStatus
6
+
7
+
8
+ @dataclass
9
+ class CorrectionSession:
10
+ id: str
11
+ audio_file_hash: str
12
+ session_type: SessionType
13
+ ai_model_config: Dict[str, object]
14
+ total_corrections: int
15
+ accepted_corrections: int
16
+ human_modifications: int
17
+ session_duration_ms: int
18
+ accuracy_improvement: float
19
+ started_at: datetime
20
+ completed_at: Optional[datetime]
21
+ status: SessionStatus
22
+
23
+ def validate(self) -> None:
24
+ # Basic validations per data-model
25
+ if any(v < 0 for v in (self.total_corrections, self.accepted_corrections, self.human_modifications)):
26
+ raise ValueError("correction counts must be non-negative")
27
+ if self.completed_at is not None and self.completed_at < self.started_at:
28
+ raise ValueError("completed_at must be after started_at")
29
+
30
+
@@ -0,0 +1,38 @@
1
+ from enum import Enum
2
+
3
+
4
+ class CorrectionType(str, Enum):
5
+ WORD_SUBSTITUTION = "WORD_SUBSTITUTION"
6
+ WORD_INSERTION = "WORD_INSERTION"
7
+ WORD_DELETION = "WORD_DELETION"
8
+ PUNCTUATION = "PUNCTUATION"
9
+ TIMING_ADJUSTMENT = "TIMING_ADJUSTMENT"
10
+ LINGUISTIC_IMPROVEMENT = "LINGUISTIC_IMPROVEMENT"
11
+
12
+
13
+ class ReviewerAction(str, Enum):
14
+ ACCEPT = "ACCEPT"
15
+ REJECT = "REJECT"
16
+ MODIFY = "MODIFY"
17
+
18
+
19
+ class FeedbackCategory(str, Enum):
20
+ AI_CORRECT = "AI_CORRECT"
21
+ AI_INCORRECT = "AI_INCORRECT"
22
+ AI_SUBOPTIMAL = "AI_SUBOPTIMAL"
23
+ CONTEXT_NEEDED = "CONTEXT_NEEDED"
24
+ SUBJECTIVE_PREFERENCE = "SUBJECTIVE_PREFERENCE"
25
+
26
+
27
+ class SessionType(str, Enum):
28
+ FULL_CORRECTION = "FULL_CORRECTION"
29
+ PARTIAL_REVIEW = "PARTIAL_REVIEW"
30
+ REPROCESSING = "REPROCESSING"
31
+
32
+
33
+ class SessionStatus(str, Enum):
34
+ IN_PROGRESS = "IN_PROGRESS"
35
+ COMPLETED = "COMPLETED"
36
+ FAILED = "FAILED"
37
+
38
+
@@ -0,0 +1,30 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+ from .enums import ReviewerAction, FeedbackCategory
6
+
7
+
8
+ @dataclass
9
+ class HumanFeedback:
10
+ id: str
11
+ ai_correction_id: str
12
+ reviewer_action: ReviewerAction
13
+ final_text: Optional[str]
14
+ reason_category: FeedbackCategory
15
+ reason_detail: Optional[str]
16
+ reviewer_confidence: float
17
+ review_time_ms: int
18
+ reviewer_id: Optional[str]
19
+ created_at: datetime
20
+ session_id: str
21
+
22
+ def validate(self) -> None:
23
+ if self.reviewer_action == ReviewerAction.MODIFY and not self.final_text:
24
+ raise ValueError("final_text required when action is MODIFY")
25
+ if self.reviewer_confidence is not None and not (0.0 <= self.reviewer_confidence <= 1.0):
26
+ raise ValueError("reviewer_confidence must be between 0.0 and 1.0")
27
+ if self.review_time_ms <= 0:
28
+ raise ValueError("review_time_ms must be positive")
29
+
30
+
@@ -0,0 +1,26 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, timedelta
3
+ from typing import Dict
4
+
5
+
6
+ @dataclass
7
+ class LearningData:
8
+ id: str
9
+ session_id: str
10
+ error_patterns: Dict[str, int]
11
+ correction_strategies: Dict[str, int]
12
+ model_performance: Dict[str, float]
13
+ feedback_trends: Dict[str, int]
14
+ improvement_metrics: Dict[str, float]
15
+ data_quality_score: float
16
+ created_at: datetime
17
+ expires_at: datetime
18
+
19
+ def validate(self) -> None:
20
+ if not (0.0 <= self.data_quality_score <= 1.0):
21
+ raise ValueError("data_quality_score must be between 0.0 and 1.0")
22
+ # Note: exact 3-year check depends on business rule; enforce >= 3 years
23
+ if (self.expires_at - self.created_at).days < 365 * 3:
24
+ raise ValueError("expires_at must be at least 3 years from created_at")
25
+
26
+
@@ -0,0 +1,28 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Dict
4
+
5
+
6
+ @dataclass
7
+ class ObservabilityMetrics:
8
+ id: str
9
+ session_id: str
10
+ ai_correction_accuracy: float
11
+ processing_time_breakdown: Dict[str, int]
12
+ human_review_duration: int
13
+ model_response_times: Dict[str, int]
14
+ error_reduction_percentage: float
15
+ cost_tracking: Dict[str, float]
16
+ system_health_indicators: Dict[str, float]
17
+ improvement_trends: Dict[str, float]
18
+ recorded_at: datetime
19
+
20
+ def validate(self) -> None:
21
+ if not (0.0 <= self.ai_correction_accuracy <= 100.0):
22
+ raise ValueError("ai_correction_accuracy must be 0-100")
23
+ if not (0.0 <= self.error_reduction_percentage <= 100.0):
24
+ raise ValueError("error_reduction_percentage must be 0-100")
25
+ if self.human_review_duration < 0:
26
+ raise ValueError("human_review_duration must be non-negative")
27
+
28
+
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List
4
+ from pydantic import BaseModel, Field, conint, confloat
5
+ from enum import Enum
6
+
7
+
8
+ class GapCategory(str, Enum):
9
+ """Categories for gap classification in transcription correction."""
10
+ PUNCTUATION_ONLY = "PUNCTUATION_ONLY"
11
+ SOUND_ALIKE = "SOUND_ALIKE"
12
+ BACKGROUND_VOCALS = "BACKGROUND_VOCALS"
13
+ EXTRA_WORDS = "EXTRA_WORDS"
14
+ REPEATED_SECTION = "REPEATED_SECTION"
15
+ COMPLEX_MULTI_ERROR = "COMPLEX_MULTI_ERROR"
16
+ AMBIGUOUS = "AMBIGUOUS"
17
+ NO_ERROR = "NO_ERROR"
18
+
19
+
20
+ class GapClassification(BaseModel):
21
+ """Classification result for a gap in the transcription."""
22
+ gap_id: str = Field(..., description="Unique identifier for the gap")
23
+ category: GapCategory = Field(..., description="Classification category")
24
+ confidence: confloat(ge=0.0, le=1.0) = Field(..., description="Confidence in classification (0-1)")
25
+ reasoning: str = Field(..., description="Explanation for the classification")
26
+ suggested_handler: Optional[str] = Field(None, description="Recommended handler for this gap")
27
+
28
+
29
+ class CorrectionProposal(BaseModel):
30
+ word_id: Optional[str] = Field(None, description="ID of the word to correct")
31
+ word_ids: Optional[List[str]] = Field(None, description="IDs of multiple words when applicable")
32
+ action: str = Field(..., description="ReplaceWord|SplitWord|DeleteWord|AdjustTiming|NoAction|Flag")
33
+ replacement_text: Optional[str] = Field(None, description="Text to insert/replace with")
34
+ timing_delta_ms: Optional[conint(ge=-1000, le=1000)] = None
35
+ confidence: confloat(ge=0.0, le=1.0) = 0.0
36
+ reason: str = Field(..., description="Short rationale for the proposal")
37
+ gap_category: Optional[GapCategory] = Field(None, description="Classification category of the gap")
38
+ requires_human_review: bool = Field(False, description="Whether this proposal needs human review")
39
+ artist: Optional[str] = Field(None, description="Song artist for context")
40
+ title: Optional[str] = Field(None, description="Song title for context")
41
+
42
+
43
+ class CorrectionProposalList(BaseModel):
44
+ proposals: List[CorrectionProposal]
45
+
46
+
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, is_dataclass
4
+ from typing import Any, Dict
5
+
6
+
7
+ def to_serializable_dict(obj: Any) -> Dict[str, Any]:
8
+ """Serialize dataclass or dict-like object to a plain dict for JSON.
9
+
10
+ This avoids pulling in runtime deps for Pydantic here; enforcement occurs in
11
+ workflow layers using Instructor/pydantic-ai as per guidance.
12
+ """
13
+ if is_dataclass(obj):
14
+ return asdict(obj)
15
+ if isinstance(obj, dict):
16
+ return obj
17
+ raise TypeError(f"Unsupported object type for serialization: {type(obj)!r}")
18
+
19
+
@@ -0,0 +1,5 @@
1
+ """Observability hooks and initialization for agentic correction."""
2
+
3
+ __all__ = []
4
+
5
+