karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,28 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Dict
4
+
5
+
6
+ @dataclass
7
+ class ObservabilityMetrics:
8
+ id: str
9
+ session_id: str
10
+ ai_correction_accuracy: float
11
+ processing_time_breakdown: Dict[str, int]
12
+ human_review_duration: int
13
+ model_response_times: Dict[str, int]
14
+ error_reduction_percentage: float
15
+ cost_tracking: Dict[str, float]
16
+ system_health_indicators: Dict[str, float]
17
+ improvement_trends: Dict[str, float]
18
+ recorded_at: datetime
19
+
20
+ def validate(self) -> None:
21
+ if not (0.0 <= self.ai_correction_accuracy <= 100.0):
22
+ raise ValueError("ai_correction_accuracy must be 0-100")
23
+ if not (0.0 <= self.error_reduction_percentage <= 100.0):
24
+ raise ValueError("error_reduction_percentage must be 0-100")
25
+ if self.human_review_duration < 0:
26
+ raise ValueError("human_review_duration must be non-negative")
27
+
28
+
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List
4
+ from pydantic import BaseModel, Field, conint, confloat
5
+ from enum import Enum
6
+
7
+
8
+ class GapCategory(str, Enum):
9
+ """Categories for gap classification in transcription correction."""
10
+ PUNCTUATION_ONLY = "PUNCTUATION_ONLY"
11
+ SOUND_ALIKE = "SOUND_ALIKE"
12
+ BACKGROUND_VOCALS = "BACKGROUND_VOCALS"
13
+ EXTRA_WORDS = "EXTRA_WORDS"
14
+ REPEATED_SECTION = "REPEATED_SECTION"
15
+ COMPLEX_MULTI_ERROR = "COMPLEX_MULTI_ERROR"
16
+ AMBIGUOUS = "AMBIGUOUS"
17
+ NO_ERROR = "NO_ERROR"
18
+
19
+
20
+ class GapClassification(BaseModel):
21
+ """Classification result for a gap in the transcription."""
22
+ gap_id: str = Field(..., description="Unique identifier for the gap")
23
+ category: GapCategory = Field(..., description="Classification category")
24
+ confidence: confloat(ge=0.0, le=1.0) = Field(..., description="Confidence in classification (0-1)")
25
+ reasoning: str = Field(..., description="Explanation for the classification")
26
+ suggested_handler: Optional[str] = Field(None, description="Recommended handler for this gap")
27
+
28
+
29
+ class CorrectionProposal(BaseModel):
30
+ word_id: Optional[str] = Field(None, description="ID of the word to correct")
31
+ word_ids: Optional[List[str]] = Field(None, description="IDs of multiple words when applicable")
32
+ action: str = Field(..., description="ReplaceWord|SplitWord|DeleteWord|AdjustTiming|NoAction|Flag")
33
+ replacement_text: Optional[str] = Field(None, description="Text to insert/replace with")
34
+ timing_delta_ms: Optional[conint(ge=-1000, le=1000)] = None
35
+ confidence: confloat(ge=0.0, le=1.0) = 0.0
36
+ reason: str = Field(..., description="Short rationale for the proposal")
37
+ gap_category: Optional[GapCategory] = Field(None, description="Classification category of the gap")
38
+ requires_human_review: bool = Field(False, description="Whether this proposal needs human review")
39
+ artist: Optional[str] = Field(None, description="Song artist for context")
40
+ title: Optional[str] = Field(None, description="Song title for context")
41
+
42
+
43
+ class CorrectionProposalList(BaseModel):
44
+ proposals: List[CorrectionProposal]
45
+
46
+
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, is_dataclass
4
+ from typing import Any, Dict
5
+
6
+
7
+ def to_serializable_dict(obj: Any) -> Dict[str, Any]:
8
+ """Serialize dataclass or dict-like object to a plain dict for JSON.
9
+
10
+ This avoids pulling in runtime deps for Pydantic here; enforcement occurs in
11
+ workflow layers using Instructor/pydantic-ai as per guidance.
12
+ """
13
+ if is_dataclass(obj):
14
+ return asdict(obj)
15
+ if isinstance(obj, dict):
16
+ return obj
17
+ raise TypeError(f"Unsupported object type for serialization: {type(obj)!r}")
18
+
19
+
@@ -0,0 +1,5 @@
1
+ """Observability hooks and initialization for agentic correction."""
2
+
3
+ __all__ = []
4
+
5
+
@@ -0,0 +1,35 @@
1
+ from typing import Optional, Dict, Any
2
+ import os
3
+ import threading
4
+
5
+
6
+ def setup_langfuse(client_name: str = "agentic-corrector") -> Optional[object]:
7
+ """Initialize Langfuse client if keys are present; return client or None.
8
+
9
+ This avoids hard dependency at import time; caller can check for None and
10
+ no-op if observability is not configured.
11
+ """
12
+ secret = os.getenv("LANGFUSE_SECRET_KEY")
13
+ public = os.getenv("LANGFUSE_PUBLIC_KEY")
14
+ host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
15
+ if not (secret and public):
16
+ return None
17
+ try:
18
+ from langfuse import Langfuse # type: ignore
19
+
20
+ client = Langfuse(secret_key=secret, public_key=public, host=host, sdk_integration=client_name)
21
+ return client
22
+ except Exception:
23
+ return None
24
+
25
+
26
+ def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any]) -> None:
27
+ """Record custom metrics to Langfuse if initialized."""
28
+ if client is None:
29
+ return
30
+ try:
31
+ # Minimal shape to avoid strict coupling; callers can extend
32
+ client.trace(name=name, metadata=metrics)
33
+ except Exception:
34
+ # Swallow observability errors to never impact core flow
35
+ pass
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Any
5
+
6
+
7
+ @dataclass
8
+ class MetricsAggregator:
9
+ """In-memory metrics aggregator for agentic correction API."""
10
+
11
+ total_sessions: int = 0
12
+ total_processing_time_ms: int = 0
13
+ total_feedback: int = 0
14
+ model_counts: Dict[str, int] = field(default_factory=dict)
15
+ model_total_time_ms: Dict[str, int] = field(default_factory=dict)
16
+ fallback_count: int = 0
17
+
18
+ def record_session(self, model_id: str, processing_time_ms: int, fallback_used: bool) -> None:
19
+ self.total_sessions += 1
20
+ self.total_processing_time_ms += max(0, int(processing_time_ms))
21
+ if model_id:
22
+ self.model_counts[model_id] = self.model_counts.get(model_id, 0) + 1
23
+ self.model_total_time_ms[model_id] = self.model_total_time_ms.get(model_id, 0) + max(0, int(processing_time_ms))
24
+ if fallback_used:
25
+ self.fallback_count += 1
26
+
27
+ def record_feedback(self) -> None:
28
+ self.total_feedback += 1
29
+
30
+ def snapshot(self, time_range: str = "day", session_id: str | None = None) -> Dict[str, Any]:
31
+ avg_time = int(self.total_processing_time_ms / self.total_sessions) if self.total_sessions else 0
32
+ # Compute simple per-model avg latencies
33
+ per_model_avg = {m: int(self.model_total_time_ms.get(m, 0) / c) if c else 0 for m, c in self.model_counts.items()}
34
+ # Placeholders for accuracy/cost until we collect these
35
+ return {
36
+ "timeRange": time_range,
37
+ "totalSessions": self.total_sessions,
38
+ "averageAccuracy": 0.0,
39
+ "errorReduction": 0.0,
40
+ "averageProcessingTime": avg_time,
41
+ "modelPerformance": {"counts": self.model_counts, "avgLatencyMs": per_model_avg, "fallbacks": self.fallback_count},
42
+ "costSummary": {},
43
+ "userSatisfaction": 0.0,
44
+ }
45
+
46
+
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from contextlib import contextmanager
5
+ from typing import Iterator
6
+
7
+
8
+ @contextmanager
9
+ def timer() -> Iterator[float]:
10
+ start = time.time()
11
+ try:
12
+ yield start
13
+ finally:
14
+ pass
15
+
16
+ def elapsed_ms(start: float) -> int:
17
+ return int((time.time() - start) * 1000)
18
+
19
+
@@ -0,0 +1,2 @@
1
+ """Prompt templates for agentic correction."""
2
+
@@ -0,0 +1,227 @@
1
+ """Gap classification prompt builder for agentic correction."""
2
+
3
+ from typing import Dict, List, Optional
4
+ import yaml
5
+ import os
6
+ from pathlib import Path
7
+
8
+
9
+ def load_few_shot_examples() -> Dict[str, List[Dict]]:
10
+ """Load few-shot examples from examples.yaml if it exists."""
11
+ examples_path = Path(__file__).parent / "examples.yaml"
12
+
13
+ if not examples_path.exists():
14
+ return get_hardcoded_examples()
15
+
16
+ try:
17
+ with open(examples_path, 'r') as f:
18
+ data = yaml.safe_load(f)
19
+ return data.get('examples_by_category', {})
20
+ except Exception:
21
+ return get_hardcoded_examples()
22
+
23
+
24
+ def get_hardcoded_examples() -> Dict[str, List[Dict]]:
25
+ """Hardcoded examples from gaps_review.yaml for initial training."""
26
+ return {
27
+ "sound_alike": [
28
+ {
29
+ "gap_text": "out, I'm starting over",
30
+ "preceding": "Oh no, was it worth it? Starting",
31
+ "following": "gonna sleep With the next person",
32
+ "reference": "Starting now I'm starting over",
33
+ "reasoning": "Transcription heard 'out' but reference lyrics show 'now' - common sound-alike error",
34
+ "action": "REPLACE 'out' with 'now'"
35
+ },
36
+ {
37
+ "gap_text": "And you said to watch it",
38
+ "preceding": "You're a time, uh, uh, uh",
39
+ "following": "just in time But to wreck",
40
+ "reference": "You set the watch You're just in time",
41
+ "reasoning": "Transcription heard 'And you said to watch it' but reference shows 'You set the watch You're' - sound-alike with extra word 'And'",
42
+ "action": "REPLACE with reference text"
43
+ }
44
+ ],
45
+ "background_vocals": [
46
+ {
47
+ "gap_text": "it? (Big business)",
48
+ "preceding": "Oh no, was it worth it? Was it worth",
49
+ "following": "Was it worth it? (Was it worth",
50
+ "reference": "was it worth what you did to big business?",
51
+ "reasoning": "Words in parentheses are background vocals not in reference lyrics",
52
+ "action": "DELETE words in parentheses"
53
+ },
54
+ {
55
+ "gap_text": "(Was it worth it?) Was",
56
+ "preceding": "it? (Big business) Was it worth it?",
57
+ "following": "it worth it? (Your friends)",
58
+ "reference": "Was it worth what you did to big business?",
59
+ "reasoning": "Parenthesized phrase is backing vocal repetition",
60
+ "action": "DELETE parenthesized words"
61
+ }
62
+ ],
63
+ "extra_words": [
64
+ {
65
+ "gap_text": "But to wreck my life",
66
+ "preceding": "said to watch it just in time",
67
+ "following": "To bring back what I left",
68
+ "reference": "You're just in time To wreck my life",
69
+ "reasoning": "Transcription adds filler word 'But' not in reference lyrics",
70
+ "action": "DELETE 'But'"
71
+ }
72
+ ],
73
+ "punctuation_only": [
74
+ {
75
+ "gap_text": "Tick- tock, you're",
76
+ "preceding": "They got no, they got no concept of time",
77
+ "following": "not a clock You're a time bomb",
78
+ "reference": "Tick tock, you're not a clock",
79
+ "reasoning": "Only difference is hyphen in 'Tick-tock' vs 'Tick tock' - stylistic",
80
+ "action": "NO_ACTION"
81
+ }
82
+ ],
83
+ "no_error": [
84
+ {
85
+ "gap_text": "you're telling lies Well,",
86
+ "preceding": "You swore together forever Now",
87
+ "following": "tell me your words They got",
88
+ "reference_genius": "Now you're telling lies",
89
+ "reference_lrclib": "Now you're telling me lies",
90
+ "reasoning": "Genius reference matches transcription exactly (without 'me'), so transcription is correct",
91
+ "action": "NO_ACTION"
92
+ }
93
+ ],
94
+ "repeated_section": [
95
+ {
96
+ "gap_text": "You're a time bomb, baby You're",
97
+ "preceding": "Tick-tock, you're not a clock",
98
+ "following": "a time bomb, baby, oh",
99
+ "reference": "You're a time bomb baby",
100
+ "reasoning": "Reference lyrics don't show repetition, but cannot confirm without audio",
101
+ "action": "FLAG for human review"
102
+ }
103
+ ],
104
+ "complex_multi_error": [
105
+ {
106
+ "gap_text": "Right here, did you dance for later? That's what you said? Well, here's an answer You're out in life You have to try",
107
+ "reference": "Five years and you fell for a waiter I'm sure he says he's an actor So you're acting like",
108
+ "reasoning": "50-word gap with multiple sound-alike errors throughout, too complex for automatic correction",
109
+ "action": "FLAG for human review"
110
+ }
111
+ ]
112
+ }
113
+
114
+
115
+ def build_classification_prompt(
116
+ gap_text: str,
117
+ preceding_words: str,
118
+ following_words: str,
119
+ reference_contexts: Dict[str, str],
120
+ artist: Optional[str] = None,
121
+ title: Optional[str] = None,
122
+ gap_id: Optional[str] = None
123
+ ) -> str:
124
+ """Build a prompt for classifying a gap in the transcription.
125
+
126
+ Args:
127
+ gap_text: The text of the gap that needs classification
128
+ preceding_words: Text immediately before the gap
129
+ following_words: Text immediately after the gap
130
+ reference_contexts: Dictionary of reference lyrics from each source
131
+ artist: Song artist name for context
132
+ title: Song title for context
133
+ gap_id: Identifier for the gap
134
+
135
+ Returns:
136
+ Formatted prompt string for the LLM
137
+ """
138
+ examples = load_few_shot_examples()
139
+
140
+ # Build few-shot examples section
141
+ examples_text = "## Example Classifications\n\n"
142
+ for category, category_examples in examples.items():
143
+ if category_examples:
144
+ examples_text += f"### {category.upper().replace('_', ' ')}\n\n"
145
+ for ex in category_examples[:2]: # Limit to 2 examples per category
146
+ examples_text += f"**Gap:** {ex['gap_text']}\n"
147
+ examples_text += f"**Context:** ...{ex.get('preceding', '')}... [GAP] ...{ex.get('following', '')}...\n"
148
+ if 'reference' in ex:
149
+ examples_text += f"**Reference:** {ex['reference']}\n"
150
+ examples_text += f"**Reasoning:** {ex['reasoning']}\n"
151
+ examples_text += f"**Action:** {ex['action']}\n\n"
152
+
153
+ # Build reference lyrics section
154
+ references_text = ""
155
+ if reference_contexts:
156
+ references_text = "## Available Reference Lyrics\n\n"
157
+ for source, context in reference_contexts.items():
158
+ references_text += f"**{source.upper()}:** {context}\n\n"
159
+
160
+ # Build song context
161
+ song_context = ""
162
+ if artist and title:
163
+ song_context = f"\n## Song Context\n\n**Artist:** {artist}\n**Title:** {title}\n\nNote: The song title and artist name may help identify proper nouns or unusual words that could be mis-heard.\n"
164
+
165
+ prompt = f"""You are an expert at analyzing transcription errors in song lyrics. Your task is to classify gaps (mismatches between transcription and reference lyrics) into categories to determine the best correction approach.
166
+
167
+ {song_context}
168
+
169
+ ## Categories
170
+
171
+ Use these EXACT category names in your response:
172
+
173
+ 1. **PUNCTUATION_ONLY**: Only difference is punctuation, capitalization, or symbols (hyphens, quotes). No text changes needed.
174
+
175
+ 2. **SOUND_ALIKE**: Transcription mis-heard words that sound similar (e.g., "out" vs "now", "said to watch" vs "set the watch"). Common for homophones or similar-sounding phrases.
176
+
177
+ 3. **BACKGROUND_VOCALS**: Transcription includes backing vocals (usually in parentheses) that aren't in the main reference lyrics. Should typically be removed for karaoke.
178
+
179
+ 4. **EXTRA_WORDS**: Transcription adds common filler words like "And", "But", "Well" at sentence starts that aren't in reference lyrics.
180
+
181
+ 5. **REPEATED_SECTION**: Transcription shows repeated chorus/lyrics that may or may not appear in condensed reference lyrics. Often needs human verification via audio.
182
+
183
+ 6. **COMPLEX_MULTI_ERROR**: Large gaps (many words) with multiple different error types. Too complex for automatic correction.
184
+
185
+ 7. **NO_ERROR**: At least one reference source matches the transcription exactly, indicating the transcription is correct and other references are incomplete/wrong.
186
+
187
+ 8. **AMBIGUOUS**: Cannot determine correct action without listening to audio. Similar to repeated sections but less clear.
188
+
189
+ {examples_text}
190
+
191
+ ## Gap to Classify
192
+
193
+ **Gap ID:** {gap_id or 'unknown'}
194
+
195
+ **Preceding Context:** {preceding_words}
196
+
197
+ **Gap Text:** {gap_text}
198
+
199
+ **Following Context:** {following_words}
200
+
201
+ {references_text}
202
+
203
+ ## Important Guidelines
204
+
205
+ - If ANY reference source matches the gap text exactly (ignoring punctuation), classify as **NO_ERROR**
206
+ - Consider whether the song title/artist contains words that might appear in the gap
207
+ - Parentheses in transcription usually indicate background vocals
208
+ - Sound-alike errors are very common in song transcription
209
+ - Flag for human review when uncertain
210
+
211
+ ## Your Task
212
+
213
+ Analyze this gap and respond with a JSON object matching this schema:
214
+
215
+ {{
216
+ "gap_id": "{gap_id or 'unknown'}",
217
+ "category": "<one of the 8 categories above>",
218
+ "confidence": <float between 0 and 1>,
219
+ "reasoning": "<detailed explanation for your classification>",
220
+ "suggested_handler": "<name of handler or null>"
221
+ }}
222
+
223
+ Provide ONLY the JSON response, no other text.
224
+ """
225
+
226
+ return prompt
227
+
@@ -0,0 +1,6 @@
1
+ """AI provider scaffolding for agentic correction (config, health checks)."""
2
+
3
+ __all__ = [
4
+ ]
5
+
6
+
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Dict, Any
5
+
6
+
7
+ class BaseAIProvider(ABC):
8
+ """Abstract provider interface for generating correction proposals.
9
+
10
+ Implementations should honor timeouts and retry policies according to
11
+ ProviderConfig and return structured proposals validated upstream.
12
+ """
13
+
14
+ @abstractmethod
15
+ def name(self) -> str:
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def generate_correction_proposals(
20
+ self,
21
+ prompt: str,
22
+ schema: Dict[str, Any],
23
+ session_id: str | None = None
24
+ ) -> List[Dict[str, Any]]:
25
+ """Return a list of correction proposals as dictionaries matching `schema`.
26
+
27
+ The schema is provided so implementations can guide structured outputs.
28
+
29
+ Args:
30
+ prompt: The correction prompt
31
+ schema: JSON schema for the expected output structure
32
+ session_id: Optional Langfuse session ID for grouping traces
33
+ """
34
+ raise NotImplementedError
35
+
36
+
@@ -0,0 +1,145 @@
1
+ """Circuit breaker pattern implementation for AI provider reliability."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ import logging
6
+ from typing import Dict
7
+
8
+ from .config import ProviderConfig
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class CircuitBreaker:
14
+ """Circuit breaker for protecting against cascading failures.
15
+
16
+ Tracks failures per model and temporarily stops requests when
17
+ failure threshold is exceeded. Automatically resets after a timeout.
18
+
19
+ Single Responsibility: Failure tracking and circuit state management only.
20
+ """
21
+
22
+ def __init__(self, config: ProviderConfig):
23
+ """Initialize circuit breaker with configuration.
24
+
25
+ Args:
26
+ config: Provider configuration with thresholds and timeouts
27
+ """
28
+ self._config = config
29
+ self._failures: Dict[str, int] = {}
30
+ self._open_until: Dict[str, float] = {}
31
+
32
+ def is_open(self, model: str) -> bool:
33
+ """Check if circuit breaker is open for this model.
34
+
35
+ An open circuit means requests should be rejected immediately
36
+ to prevent cascading failures.
37
+
38
+ Args:
39
+ model: Model identifier to check
40
+
41
+ Returns:
42
+ True if circuit is open (reject requests), False if closed (allow)
43
+ """
44
+ now = time.time()
45
+ open_until = self._open_until.get(model, 0)
46
+
47
+ if now < open_until:
48
+ remaining = int(open_until - now)
49
+ logger.debug(
50
+ f"🤖 Circuit breaker open for {model}, "
51
+ f"retry in {remaining}s"
52
+ )
53
+ return True
54
+
55
+ # Circuit was open but timeout expired - close it
56
+ if model in self._open_until:
57
+ logger.info(f"🤖 Circuit breaker closed for {model} (timeout expired)")
58
+ del self._open_until[model]
59
+ self._failures[model] = 0
60
+
61
+ return False
62
+
63
+ def get_open_until(self, model: str) -> float:
64
+ """Get timestamp when circuit will close for this model.
65
+
66
+ Args:
67
+ model: Model identifier
68
+
69
+ Returns:
70
+ Unix timestamp when circuit will close, or 0 if not open
71
+ """
72
+ return self._open_until.get(model, 0)
73
+
74
+ def record_failure(self, model: str) -> None:
75
+ """Record a failure for this model and maybe open the circuit.
76
+
77
+ Args:
78
+ model: Model identifier that failed
79
+ """
80
+ self._failures[model] = self._failures.get(model, 0) + 1
81
+ failure_count = self._failures[model]
82
+
83
+ logger.debug(
84
+ f"🤖 Recorded failure for {model}, "
85
+ f"total: {failure_count}"
86
+ )
87
+
88
+ # Check if we should open the circuit
89
+ threshold = self._config.circuit_breaker_failure_threshold
90
+ if failure_count >= threshold:
91
+ self._open_circuit(model)
92
+
93
+ def record_success(self, model: str) -> None:
94
+ """Record a successful call and reset failure count.
95
+
96
+ Args:
97
+ model: Model identifier that succeeded
98
+ """
99
+ if model in self._failures and self._failures[model] > 0:
100
+ logger.debug(
101
+ f"🤖 Reset failure count for {model} "
102
+ f"(was {self._failures[model]})"
103
+ )
104
+ self._failures[model] = 0
105
+
106
+ def _open_circuit(self, model: str) -> None:
107
+ """Open the circuit breaker for this model.
108
+
109
+ Args:
110
+ model: Model identifier to open circuit for
111
+ """
112
+ open_seconds = self._config.circuit_breaker_open_seconds
113
+ self._open_until[model] = time.time() + open_seconds
114
+
115
+ logger.warning(
116
+ f"🤖 Circuit breaker opened for {model} "
117
+ f"({self._failures[model]} failures >= "
118
+ f"{self._config.circuit_breaker_failure_threshold} threshold), "
119
+ f"will retry in {open_seconds}s"
120
+ )
121
+
122
+ def reset(self, model: str) -> None:
123
+ """Manually reset circuit breaker for a model.
124
+
125
+ Useful for testing or administrative reset.
126
+
127
+ Args:
128
+ model: Model identifier to reset
129
+ """
130
+ self._failures[model] = 0
131
+ if model in self._open_until:
132
+ del self._open_until[model]
133
+ logger.info(f"🤖 Circuit breaker manually reset for {model}")
134
+
135
+ def get_failure_count(self, model: str) -> int:
136
+ """Get current failure count for a model.
137
+
138
+ Args:
139
+ model: Model identifier
140
+
141
+ Returns:
142
+ Number of consecutive failures
143
+ """
144
+ return self._failures.get(model, 0)
145
+