karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,189 @@
1
+ from typing import List, Optional, Tuple, Dict, Any
2
+ import string
3
+ import Levenshtein
4
+ import logging
5
+
6
+ from lyrics_transcriber.types import GapSequence, WordCorrection
7
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
8
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
9
+
10
+
11
+ class LevenshteinHandler(GapCorrectionHandler):
12
+ """Handles corrections based on Levenshtein (edit distance) similarity between words.
13
+
14
+ This handler looks for words that are similar in spelling to reference words in the same position.
15
+ The similarity calculation includes:
16
+ 1. Basic Levenshtein ratio
17
+ 2. Bonus for words starting with the same letter
18
+ 3. Penalty for words starting with different letters
19
+ 4. Bonus for similar length words
20
+
21
+ Examples:
22
+ Gap: "wold" (misspelling)
23
+ References:
24
+ genius: ["world"]
25
+ spotify: ["world"]
26
+ Result:
27
+ - Correct "wold" to "world" (high confidence due to small edit distance)
28
+
29
+ Gap: "worde" (misspelling)
30
+ References:
31
+ genius: ["world"]
32
+ spotify: ["words"]
33
+ Result:
34
+ - Correct "worde" to "world" (lower confidence due to disagreeing sources)
35
+ """
36
+
37
+ def __init__(self, similarity_threshold: float = 0.65, logger: Optional[logging.Logger] = None):
38
+ self.similarity_threshold = similarity_threshold
39
+ self.logger = logger or logging.getLogger(__name__)
40
+
41
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
42
+ """Check if we can handle this gap - we'll try if there are reference words."""
43
+ if not data or "word_map" not in data:
44
+ self.logger.error("No word_map provided in data")
45
+ return False, {}
46
+
47
+ word_map = data["word_map"]
48
+
49
+ if not gap.reference_word_ids:
50
+ self.logger.debug("No reference words available")
51
+ return False, {}
52
+
53
+ if not gap.transcribed_word_ids:
54
+ self.logger.debug("No gap words available")
55
+ return False, {}
56
+
57
+ # Check if any word has sufficient similarity to reference
58
+ for i, word_id in enumerate(gap.transcribed_word_ids):
59
+ if word_id not in word_map:
60
+ continue
61
+ word = word_map[word_id]
62
+
63
+ for source, ref_word_ids in gap.reference_word_ids.items():
64
+ if i < len(ref_word_ids):
65
+ ref_word_id = ref_word_ids[i]
66
+ if ref_word_id not in word_map:
67
+ continue
68
+ ref_word = word_map[ref_word_id]
69
+
70
+ similarity = self._get_string_similarity(word.text, ref_word.text)
71
+ if similarity >= self.similarity_threshold:
72
+ self.logger.debug(f"Found similar word: '{word.text}' -> '{ref_word.text}' ({similarity:.2f})")
73
+ return True, {}
74
+
75
+ self.logger.debug("No words meet similarity threshold")
76
+ return False, {}
77
+
78
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
79
+ """Try to correct words based on string similarity."""
80
+ if not data or "word_map" not in data:
81
+ self.logger.error("No word_map provided in data")
82
+ return []
83
+
84
+ word_map = data["word_map"]
85
+ corrections = []
86
+
87
+ # Process each word in the gap
88
+ for i, word_id in enumerate(gap.transcribed_word_ids):
89
+ if word_id not in word_map:
90
+ continue
91
+ word = word_map[word_id]
92
+
93
+ # Skip if word is empty or just punctuation
94
+ if not word.text.strip():
95
+ continue
96
+
97
+ # Skip exact matches
98
+ exact_match = False
99
+ for source, ref_word_ids in gap.reference_word_ids.items():
100
+ if i < len(ref_word_ids):
101
+ ref_word_id = ref_word_ids[i]
102
+ if ref_word_id in word_map:
103
+ ref_word = word_map[ref_word_id]
104
+ if word.text.lower() == ref_word.text.lower():
105
+ exact_match = True
106
+ break
107
+ if exact_match:
108
+ continue
109
+
110
+ # Find matching reference words at this position
111
+ matches: Dict[str, Tuple[List[str], float, str]] = {} # word -> (sources, similarity, word_id)
112
+
113
+ for source, ref_word_ids in gap.reference_word_ids.items():
114
+ if i >= len(ref_word_ids):
115
+ continue
116
+
117
+ ref_word_id = ref_word_ids[i]
118
+ if ref_word_id not in word_map:
119
+ continue
120
+ ref_word = word_map[ref_word_id]
121
+
122
+ similarity = self._get_string_similarity(word.text, ref_word.text)
123
+
124
+ if similarity >= self.similarity_threshold:
125
+ self.logger.debug(f"Found match: '{word.text}' -> '{ref_word.text}' ({similarity:.2f})")
126
+ if ref_word.text not in matches:
127
+ matches[ref_word.text] = ([], similarity, ref_word_id)
128
+ matches[ref_word.text][0].append(source)
129
+
130
+ # Create correction for best match if any found
131
+ if matches:
132
+ best_match, (sources, similarity, ref_word_id) = max(
133
+ matches.items(), key=lambda x: (len(x[1][0]), x[1][1]) # Sort by number of sources, then similarity
134
+ )
135
+
136
+ source_confidence = len(sources) / len(gap.reference_word_ids)
137
+ final_confidence = similarity * source_confidence
138
+
139
+ # Calculate reference positions
140
+ reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
141
+
142
+ self.logger.debug(f"Creating correction: {word.text} -> {best_match} (confidence: {final_confidence})")
143
+ corrections.append(
144
+ WordCorrection(
145
+ original_word=word.text,
146
+ corrected_word=best_match,
147
+ segment_index=0,
148
+ original_position=gap.transcription_position + i,
149
+ confidence=final_confidence,
150
+ source=", ".join(sources),
151
+ reason=f"String similarity ({final_confidence:.2f})",
152
+ alternatives={k: len(v[0]) for k, v in matches.items()},
153
+ is_deletion=False,
154
+ reference_positions=reference_positions,
155
+ length=1,
156
+ handler="LevenshteinHandler",
157
+ word_id=word_id,
158
+ corrected_word_id=ref_word_id,
159
+ )
160
+ )
161
+
162
+ return corrections
163
+
164
+ def _clean_word(self, word: str) -> str:
165
+ """Remove punctuation and standardize for comparison."""
166
+ return word.strip().lower().strip(string.punctuation)
167
+
168
+ def _get_string_similarity(self, word1: str, word2: str) -> float:
169
+ """Calculate string similarity using Levenshtein ratio with adjustments."""
170
+ # Clean words
171
+ w1, w2 = self._clean_word(word1), self._clean_word(word2)
172
+ if not w1 or not w2:
173
+ return 0.0
174
+
175
+ # Calculate Levenshtein ratio
176
+ similarity = Levenshtein.ratio(w1, w2)
177
+
178
+ # Boost similarity for words starting with the same letter
179
+ if w1[0] == w2[0]:
180
+ similarity = (similarity + 1) / 2
181
+ else:
182
+ # Penalize words starting with different letters
183
+ similarity = similarity * 0.9
184
+
185
+ # Boost for similar length words
186
+ length_ratio = min(len(w1), len(w2)) / max(len(w1), len(w2))
187
+ similarity = (similarity + length_ratio) / 2
188
+
189
+ return similarity
@@ -0,0 +1,293 @@
1
+ from typing import List, Optional, Tuple, Dict, Any, Union
2
+ import logging
3
+ import json
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ from lyrics_transcriber.types import GapSequence, WordCorrection
8
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
9
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
10
+ from lyrics_transcriber.correction.handlers.llm_providers import LLMProvider
11
+
12
+
13
+ class LLMHandler(GapCorrectionHandler):
14
+ """Uses an LLM to analyze and correct gaps by comparing with reference lyrics."""
15
+
16
+ def __init__(
17
+ self, provider: LLMProvider, name: str, logger: Optional[logging.Logger] = None, cache_dir: Optional[Union[str, Path]] = None
18
+ ):
19
+ super().__init__(logger)
20
+ self.logger = logger or logging.getLogger(__name__)
21
+ self.provider = provider
22
+ self.name = name
23
+ self.cache_dir = Path(cache_dir) if cache_dir else None
24
+
25
+ def _format_prompt(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> str:
26
+ """Format the prompt for the LLM with context about the gap and reference lyrics."""
27
+ word_map = data.get("word_map", {})
28
+ metadata = data.get("metadata", {}) if data else {}
29
+
30
+ if not word_map:
31
+ self.logger.error("No word_map provided in data")
32
+ return ""
33
+
34
+ # Format transcribed words with their IDs
35
+ transcribed_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in gap.transcribed_word_ids if word_id in word_map]
36
+
37
+ prompt = (
38
+ "You are a lyrics correction expert. You will be given transcribed lyrics that may contain errors "
39
+ "and reference lyrics from multiple sources. Your task is to analyze each word in the transcribed text "
40
+ "and suggest specific corrections based on the reference lyrics.\n\n"
41
+ "Each word has a unique ID. When suggesting corrections, you must specify the ID of the word being corrected. "
42
+ "This ensures accuracy in applying your corrections.\n\n"
43
+ "For each correction, specify:\n"
44
+ "1. The word ID being corrected\n"
45
+ "2. The correction type ('replace', 'split', 'combine', or 'delete')\n"
46
+ "3. The corrected text\n"
47
+ "4. Your confidence level\n"
48
+ "5. The reason for the correction\n\n"
49
+ )
50
+
51
+ # Add song context if available
52
+ if metadata and metadata.get("artist") and metadata.get("title"):
53
+ prompt += f"Song: {metadata['title']}\nArtist: {metadata['artist']}\n\n"
54
+
55
+ # Format transcribed words with IDs
56
+ prompt += "Transcribed words:\n"
57
+ for word in transcribed_words:
58
+ prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
59
+
60
+ prompt += "\nReference lyrics from different sources:\n"
61
+
62
+ # Add each reference source with words and their IDs
63
+ for source, word_ids in gap.reference_word_ids.items():
64
+ reference_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in word_ids if word_id in word_map]
65
+ prompt += f"\n{source} immediate context:\n"
66
+ for word in reference_words:
67
+ prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
68
+
69
+ # Add full lyrics if available
70
+ if metadata and metadata.get("full_reference_texts", {}).get(source):
71
+ prompt += f"\nFull {source} lyrics:\n{metadata['full_reference_texts'][source]}\n"
72
+
73
+ # Add context about surrounding anchors if available
74
+ if gap.preceding_anchor_id:
75
+ preceding_anchor = next((a.anchor for a in data.get("anchor_sequences", []) if a.anchor.id == gap.preceding_anchor_id), None)
76
+ if preceding_anchor:
77
+ anchor_words = [
78
+ {"id": word_id, "text": word_map[word_id].text}
79
+ for word_id in preceding_anchor.transcribed_word_ids
80
+ if word_id in word_map
81
+ ]
82
+ prompt += "\nPreceding correct words:\n"
83
+ for word in anchor_words:
84
+ prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
85
+
86
+ prompt += (
87
+ "\nProvide corrections in the following JSON format:\n"
88
+ "{\n"
89
+ ' "corrections": [\n'
90
+ " {\n"
91
+ ' "word_id": "id_of_word_to_correct",\n'
92
+ ' "type": "replace|split|combine|delete",\n'
93
+ ' "corrected_text": "new text",\n'
94
+ ' "reference_word_id": "id_from_reference_lyrics", // Optional, use when matching a specific reference word\n'
95
+ ' "confidence": 0.9,\n'
96
+ ' "reason": "explanation of correction"\n'
97
+ " }\n"
98
+ " ]\n"
99
+ "}\n\n"
100
+ "Important rules:\n"
101
+ "1. Always include the word_id for each correction\n"
102
+ "2. For 'split' type, corrected_text should contain the space-separated words\n"
103
+ "3. For 'combine' type, word_id should be the first word to combine\n"
104
+ "4. Include reference_word_id when the correction matches a specific reference word\n"
105
+ "5. Only suggest corrections when you're confident they improve the lyrics\n"
106
+ "6. Preserve any existing words that match the reference lyrics\n"
107
+ "7. Respond ONLY with the JSON object, no other text"
108
+ )
109
+
110
+ return prompt
111
+
112
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
113
+ """LLM handler can attempt to handle any gap with reference words."""
114
+ if not gap.reference_word_ids:
115
+ self.logger.debug("No reference words available")
116
+ return False, {}
117
+
118
+ return True, {}
119
+
120
+ def _write_debug_info(self, prompt: str, response: str, gap_index: int, audio_file_hash: Optional[str] = None) -> None:
121
+ """Write prompt and response to debug files."""
122
+ if not self.cache_dir:
123
+ self.logger.warning("No cache directory provided, skipping LLM debug output")
124
+ return
125
+
126
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
127
+ debug_dir = self.cache_dir / "llm_debug"
128
+ debug_dir.mkdir(exist_ok=True, parents=True)
129
+
130
+ hash_prefix = f"{audio_file_hash}_" if audio_file_hash else ""
131
+ filename = debug_dir / f"llm_debug_{hash_prefix}{gap_index}_{timestamp}.txt"
132
+
133
+ debug_content = "=== LLM PROMPT ===\n" f"{prompt}\n\n" "=== LLM RESPONSE ===\n" f"{response}\n"
134
+
135
+ try:
136
+ with open(filename, "w", encoding="utf-8") as f:
137
+ f.write(debug_content)
138
+ except IOError as e:
139
+ self.logger.error(f"Failed to write LLM debug file: {e}")
140
+
141
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
142
+ """Process the gap using the LLM and create corrections based on its response."""
143
+ if not data or "word_map" not in data:
144
+ self.logger.error("No word_map provided in data")
145
+ return []
146
+
147
+ word_map = data["word_map"]
148
+ transcribed_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids if word_id in word_map]
149
+
150
+ # Calculate reference positions using the centralized method
151
+ reference_positions = (
152
+ WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", [])) or {}
153
+ ) # Ensure empty dict if None
154
+
155
+ prompt = self._format_prompt(gap, data)
156
+ if not prompt:
157
+ return []
158
+
159
+ # Get a unique index for this gap based on its position
160
+ gap_index = gap.transcription_position
161
+
162
+ try:
163
+ self.logger.debug(f"Processing gap words: {transcribed_words}")
164
+ self.logger.debug(f"Reference word IDs: {gap.reference_word_ids}")
165
+
166
+ response = self.provider.generate_response(prompt)
167
+
168
+ # Write debug info to files
169
+ self._write_debug_info(prompt, response, gap_index, audio_file_hash=data.get("audio_file_hash"))
170
+
171
+ try:
172
+ corrections_data = json.loads(response)
173
+ except json.JSONDecodeError as e:
174
+ self.logger.error(f"Failed to parse LLM response as JSON: {e}")
175
+ self.logger.error(f"Raw response content: {response}")
176
+ return []
177
+
178
+ # Check if corrections exist and are non-empty
179
+ if not corrections_data.get("corrections"):
180
+ self.logger.debug("No corrections suggested by LLM")
181
+ return []
182
+
183
+ corrections = []
184
+ for correction in corrections_data["corrections"]:
185
+ # Validate word_id exists in gap
186
+ if correction["word_id"] not in gap.transcribed_word_ids:
187
+ self.logger.error(f"LLM suggested correction for word_id {correction['word_id']} which is not in the gap")
188
+ continue
189
+
190
+ # Get original word from word map
191
+ original_word = word_map[correction["word_id"]]
192
+ position = gap.transcription_position + gap.transcribed_word_ids.index(correction["word_id"])
193
+
194
+ self.logger.debug(f"Processing correction: {correction}")
195
+
196
+ if correction["type"] == "replace":
197
+ self.logger.debug(
198
+ f"Creating replacement: '{original_word.text}' -> '{correction['corrected_text']}' " f"at position {position}"
199
+ )
200
+ corrections.append(
201
+ WordOperations.create_word_replacement_correction(
202
+ original_word=original_word.text,
203
+ corrected_word=correction["corrected_text"],
204
+ original_position=position,
205
+ source="LLM",
206
+ confidence=correction["confidence"],
207
+ reason=correction["reason"],
208
+ handler=self.name,
209
+ reference_positions=reference_positions,
210
+ original_word_id=correction["word_id"],
211
+ corrected_word_id=correction.get("reference_word_id"),
212
+ )
213
+ )
214
+ elif correction["type"] == "split":
215
+ split_words = correction["corrected_text"].split()
216
+ self.logger.debug(f"Creating split: '{original_word.text}' -> {split_words} " f"at position {position}")
217
+
218
+ # Get reference word IDs if provided
219
+ reference_word_ids = correction.get("reference_word_ids", [None] * len(split_words))
220
+
221
+ corrections.extend(
222
+ WordOperations.create_word_split_corrections(
223
+ original_word=original_word.text,
224
+ reference_words=split_words,
225
+ original_position=position,
226
+ source="LLM",
227
+ confidence=correction["confidence"],
228
+ reason=correction["reason"],
229
+ handler=self.name,
230
+ reference_positions=reference_positions,
231
+ original_word_id=correction["word_id"],
232
+ corrected_word_ids=reference_word_ids,
233
+ )
234
+ )
235
+ elif correction["type"] == "combine":
236
+ # Get all word IDs to combine
237
+ word_ids_to_combine = []
238
+ current_idx = gap.transcribed_word_ids.index(correction["word_id"])
239
+ words_needed = len(correction["corrected_text"].split())
240
+
241
+ if current_idx + words_needed <= len(gap.transcribed_word_ids):
242
+ word_ids_to_combine = gap.transcribed_word_ids[current_idx : current_idx + words_needed]
243
+ else:
244
+ self.logger.error(f"Not enough words available to combine at position {position}")
245
+ continue
246
+
247
+ words_to_combine = [word_map[word_id].text for word_id in word_ids_to_combine]
248
+
249
+ self.logger.debug(
250
+ f"Creating combine: {words_to_combine} -> '{correction['corrected_text']}' " f"at position {position}"
251
+ )
252
+
253
+ corrections.extend(
254
+ WordOperations.create_word_combine_corrections(
255
+ original_words=words_to_combine,
256
+ reference_word=correction["corrected_text"],
257
+ original_position=position,
258
+ source="LLM",
259
+ confidence=correction["confidence"],
260
+ combine_reason=correction["reason"],
261
+ delete_reason=f"Part of combining words: {correction['reason']}",
262
+ handler=self.name,
263
+ reference_positions=reference_positions,
264
+ original_word_ids=word_ids_to_combine,
265
+ corrected_word_id=correction.get("reference_word_id"),
266
+ )
267
+ )
268
+ elif correction["type"] == "delete":
269
+ self.logger.debug(f"Creating deletion: '{original_word.text}' at position {position}")
270
+ corrections.append(
271
+ WordCorrection(
272
+ original_word=original_word.text,
273
+ corrected_word="",
274
+ segment_index=0,
275
+ original_position=position,
276
+ confidence=correction["confidence"],
277
+ source="LLM",
278
+ reason=correction["reason"],
279
+ alternatives={},
280
+ is_deletion=True,
281
+ handler=self.name,
282
+ reference_positions=reference_positions,
283
+ word_id=correction["word_id"],
284
+ corrected_word_id=None,
285
+ )
286
+ )
287
+
288
+ self.logger.debug(f"Created {len(corrections)} corrections: {[f'{c.original_word}->{c.corrected_word}' for c in corrections]}")
289
+ return corrections
290
+
291
+ except Exception as e:
292
+ self.logger.error(f"Unexpected error in LLM handler: {e}")
293
+ return []
@@ -0,0 +1,60 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+ import logging
4
+ from ollama import chat as ollama_chat
5
+ import openai
6
+
7
+
8
+ class LLMProvider(ABC):
9
+ """Abstract base class for LLM providers."""
10
+
11
+ def __init__(self, logger: Optional[logging.Logger] = None):
12
+ self.logger = logger or logging.getLogger(__name__)
13
+
14
+ @abstractmethod
15
+ def generate_response(self, prompt: str, **kwargs) -> str:
16
+ """Generate a response from the LLM.
17
+
18
+ Args:
19
+ prompt: The prompt to send to the LLM
20
+ **kwargs: Additional provider-specific parameters
21
+
22
+ Returns:
23
+ str: The LLM's response
24
+ """
25
+ pass
26
+
27
+
28
+ class OllamaProvider(LLMProvider):
29
+ """Provider for local Ollama models."""
30
+
31
+ def __init__(self, model: str, logger: Optional[logging.Logger] = None):
32
+ super().__init__(logger)
33
+ self.model = model
34
+
35
+ def generate_response(self, prompt: str, **kwargs) -> str:
36
+ try:
37
+ response = ollama_chat(model=self.model, messages=[{"role": "user", "content": prompt}], format="json")
38
+ return response.message.content
39
+ except Exception as e:
40
+ self.logger.error(f"Error generating Ollama response: {e}")
41
+ raise
42
+
43
+
44
+ class OpenAIProvider(LLMProvider):
45
+ """Provider for OpenAI-compatible APIs (including OpenRouter)."""
46
+
47
+ def __init__(self, model: str, api_key: str, base_url: Optional[str] = None, logger: Optional[logging.Logger] = None):
48
+ super().__init__(logger)
49
+ self.model = model
50
+ self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
51
+
52
+ def generate_response(self, prompt: str, **kwargs) -> str:
53
+ try:
54
+ response = self.client.chat.completions.create(
55
+ model=self.model, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"}, **kwargs
56
+ )
57
+ return response.choices[0].message.content
58
+ except Exception as e:
59
+ self.logger.error(f"Error generating OpenAI response: {e}")
60
+ raise