karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,154 @@
1
+ from typing import List, Optional, Tuple, Dict, Any
2
+ import logging
3
+ import re
4
+
5
+ from lyrics_transcriber.types import GapSequence, WordCorrection
6
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
7
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
8
+
9
+
10
+ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
11
+ """Handles gaps where reference text matches when spaces and punctuation are removed."""
12
+
13
+ def __init__(self, logger: Optional[logging.Logger] = None):
14
+ super().__init__(logger)
15
+ self.logger = logger or logging.getLogger(__name__)
16
+
17
+ def _remove_spaces_and_punct(self, words: List[str]) -> str:
18
+ """Join words and remove all whitespace and punctuation."""
19
+ text = "".join(words).lower()
20
+ # Remove all punctuation including apostrophes
21
+ return re.sub(r"[^\w\s]", "", text)
22
+
23
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
24
+ # Must have reference words
25
+ if not gap.reference_word_ids:
26
+ self.logger.debug("No reference word IDs available.")
27
+ return False, {}
28
+
29
+ # Get word lookup map from data
30
+ if not data or "word_map" not in data:
31
+ self.logger.error("No word_map provided in data")
32
+ return False, {}
33
+
34
+ word_map = data["word_map"]
35
+
36
+ # Get the actual words from word IDs
37
+ gap_words = []
38
+ for word_id in gap.transcribed_word_ids:
39
+ if word_id not in word_map:
40
+ self.logger.error(f"Word ID {word_id} not found in word_map")
41
+ return False, {}
42
+ gap_words.append(word_map[word_id].text)
43
+
44
+ # Get the gap text without spaces and punctuation
45
+ gap_text = self._remove_spaces_and_punct(gap_words)
46
+
47
+ # Check if any reference source matches when spaces and punctuation are removed
48
+ for source, ref_word_ids in gap.reference_word_ids.items():
49
+ ref_words = []
50
+ for word_id in ref_word_ids:
51
+ if word_id not in word_map:
52
+ self.logger.error(f"Reference word ID {word_id} not found in word_map")
53
+ continue
54
+ ref_words.append(word_map[word_id].text)
55
+
56
+ if not ref_words:
57
+ continue
58
+
59
+ ref_text = self._remove_spaces_and_punct(ref_words)
60
+ if gap_text == ref_text:
61
+ self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
62
+ return True, {
63
+ "matching_source": source,
64
+ "reference_word_ids": ref_word_ids,
65
+ "word_map": word_map,
66
+ }
67
+
68
+ self.logger.debug("No matching reference source found with spaces and punctuation removed.")
69
+ return False, {}
70
+
71
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
72
+ """Handle the gap using no-space punctuation matching."""
73
+ if not data:
74
+ can_handle, data = self.can_handle(gap)
75
+ if not can_handle:
76
+ return []
77
+
78
+ corrections = []
79
+ matching_source = data["matching_source"]
80
+ reference_word_ids = data["reference_word_ids"]
81
+ word_map = data["word_map"]
82
+
83
+ # Calculate reference positions for the matching source
84
+ reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
85
+
86
+ # Handle cases where number of words differ
87
+ if len(gap.transcribed_word_ids) > len(reference_word_ids):
88
+ # Multiple transcribed words -> fewer reference words
89
+ # Get the actual words from word IDs
90
+ gap_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids]
91
+ ref_word = word_map[reference_word_ids[0]].text
92
+
93
+ corrections.extend(
94
+ WordOperations.create_word_combine_corrections(
95
+ original_words=gap_words,
96
+ reference_word=ref_word,
97
+ original_position=gap.transcription_position,
98
+ source=matching_source,
99
+ confidence=1.0,
100
+ combine_reason="Words combined based on text match",
101
+ delete_reason="Word removed as part of text match combination",
102
+ reference_positions=reference_positions,
103
+ handler="NoSpacePunctuationMatchHandler",
104
+ original_word_ids=gap.transcribed_word_ids,
105
+ corrected_word_id=reference_word_ids[0], # Use the reference word's ID
106
+ )
107
+ )
108
+ self.logger.debug(f"Combined words into '{ref_word}'.")
109
+
110
+ elif len(gap.transcribed_word_ids) < len(reference_word_ids):
111
+ # Single transcribed word -> multiple reference words
112
+ # Get the actual words
113
+ gap_word = word_map[gap.transcribed_word_ids[0]].text
114
+ ref_words = [word_map[word_id].text for word_id in reference_word_ids]
115
+
116
+ corrections.extend(
117
+ WordOperations.create_word_split_corrections(
118
+ original_word=gap_word,
119
+ reference_words=ref_words,
120
+ original_position=gap.transcription_position,
121
+ source=matching_source,
122
+ confidence=1.0,
123
+ reason="Split word based on text match",
124
+ reference_positions=reference_positions,
125
+ handler="NoSpacePunctuationMatchHandler",
126
+ original_word_id=gap.transcribed_word_ids[0],
127
+ corrected_word_ids=reference_word_ids, # Use the reference word IDs
128
+ )
129
+ )
130
+ self.logger.debug(f"Split word '{gap_word}' into {ref_words}.")
131
+
132
+ else:
133
+ # One-to-one replacement
134
+ for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
135
+ orig_word = word_map[orig_word_id]
136
+ ref_word = word_map[ref_word_id]
137
+
138
+ if orig_word.text.lower() != ref_word.text.lower():
139
+ correction = WordOperations.create_word_replacement_correction(
140
+ original_word=orig_word.text,
141
+ corrected_word=ref_word.text,
142
+ original_position=gap.transcription_position + i,
143
+ source=matching_source,
144
+ confidence=1.0,
145
+ reason=f"Source '{matching_source}' matched when spaces and punctuation removed",
146
+ reference_positions=reference_positions,
147
+ handler="NoSpacePunctuationMatchHandler",
148
+ original_word_id=orig_word_id,
149
+ corrected_word_id=ref_word_id,
150
+ )
151
+ corrections.append(correction)
152
+ self.logger.debug(f"Correction made: {correction}")
153
+
154
+ return corrections
@@ -0,0 +1,85 @@
1
+ from typing import List, Tuple, Dict, Any, Optional
2
+ import logging
3
+
4
+ from lyrics_transcriber.types import GapSequence, WordCorrection
5
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
6
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
7
+
8
+
9
+ class RelaxedWordCountMatchHandler(GapCorrectionHandler):
10
+ """Handles gaps where at least one reference source has matching word count."""
11
+
12
+ def __init__(self, logger: Optional[logging.Logger] = None):
13
+ super().__init__(logger)
14
+ self.logger = logger or logging.getLogger(__name__)
15
+
16
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
17
+ # Must have reference words
18
+ if not gap.reference_word_ids:
19
+ self.logger.debug("No reference word IDs available.")
20
+ return False, {}
21
+
22
+ if not self._validate_data(data):
23
+ return False, {}
24
+
25
+ # Check if any source has matching word count
26
+ for source, ref_word_ids in gap.reference_word_ids.items():
27
+ if len(ref_word_ids) == gap.length:
28
+ self.logger.debug(f"Source '{source}' has matching word count.")
29
+ return True, {
30
+ "matching_source": source,
31
+ "reference_word_ids": ref_word_ids,
32
+ "word_map": data["word_map"],
33
+ "anchor_sequences": data.get("anchor_sequences", []),
34
+ }
35
+
36
+ self.logger.debug("No source with matching word count found.")
37
+ return False, {}
38
+
39
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
40
+ """Handle the gap using word count matching."""
41
+ if not self._validate_data(data):
42
+ return []
43
+
44
+ corrections = []
45
+ matching_source = data["matching_source"]
46
+ reference_word_ids = data["reference_word_ids"]
47
+ word_map = data["word_map"]
48
+ anchor_sequences = data.get("anchor_sequences", [])
49
+
50
+ # Use the centralized method to calculate reference positions
51
+ reference_positions = WordOperations.calculate_reference_positions(
52
+ gap, sources=[matching_source], anchor_sequences=anchor_sequences
53
+ )
54
+ self.logger.debug(f"Calculated reference positions: {reference_positions}")
55
+
56
+ # Since we found a source with matching word count, we can correct using that source
57
+ for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
58
+ # Get the actual words from the word map
59
+ if orig_word_id not in word_map:
60
+ self.logger.error(f"Original word ID {orig_word_id} not found in word_map")
61
+ continue
62
+ orig_word = word_map[orig_word_id]
63
+
64
+ if ref_word_id not in word_map:
65
+ self.logger.error(f"Reference word ID {ref_word_id} not found in word_map")
66
+ continue
67
+ ref_word = word_map[ref_word_id]
68
+
69
+ if orig_word.text.lower() != ref_word.text.lower():
70
+ correction = WordOperations.create_word_replacement_correction(
71
+ original_word=orig_word.text,
72
+ corrected_word=ref_word.text,
73
+ original_position=gap.transcription_position + i,
74
+ source=matching_source,
75
+ confidence=1.0,
76
+ reason=f"Source '{matching_source}' had matching word count",
77
+ reference_positions=reference_positions,
78
+ handler="RelaxedWordCountMatchHandler",
79
+ original_word_id=orig_word_id,
80
+ corrected_word_id=ref_word_id, # Use the reference word's ID
81
+ )
82
+ corrections.append(correction)
83
+ self.logger.debug(f"Correction made: {correction}")
84
+
85
+ return corrections
@@ -0,0 +1,88 @@
1
+ from typing import List, Dict, Optional, Tuple, Any
2
+ from lyrics_transcriber.types import GapSequence, WordCorrection
3
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
4
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
5
+ import logging
6
+
7
+
8
+ class RepeatCorrectionHandler(GapCorrectionHandler):
9
+ """Handler that applies corrections that were previously made by other handlers."""
10
+
11
+ def __init__(self, logger: Optional[logging.Logger] = None, confidence_threshold: float = 0.7):
12
+ super().__init__(logger)
13
+ self.logger = logger or logging.getLogger(__name__)
14
+ self.confidence_threshold = confidence_threshold
15
+ self.previous_corrections: List[WordCorrection] = []
16
+
17
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
18
+ """Check if any words in the gap match previous corrections."""
19
+ if not self._validate_data(data):
20
+ return False, {}
21
+
22
+ return bool(self.previous_corrections), {"word_map": data["word_map"], "anchor_sequences": data.get("anchor_sequences", [])}
23
+
24
+ def set_previous_corrections(self, corrections: List[WordCorrection]) -> None:
25
+ """Store corrections from previous handlers to use as reference."""
26
+ self.previous_corrections = corrections
27
+
28
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
29
+ """Apply previous corrections to matching words in the current gap."""
30
+ if not self._validate_data(data):
31
+ return []
32
+
33
+ word_map = data["word_map"]
34
+ corrections = []
35
+
36
+ # Use the centralized method to calculate reference positions
37
+ reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
38
+
39
+ # Build a map of original words to their corrections
40
+ correction_map: Dict[str, List[WordCorrection]] = {}
41
+ for corr in self.previous_corrections:
42
+ if corr.confidence >= self.confidence_threshold:
43
+ correction_map.setdefault(corr.original_word.lower(), []).append(corr)
44
+
45
+ # Check each word in the gap
46
+ for i, word_id in enumerate(gap.transcribed_word_ids):
47
+ if word_id not in word_map:
48
+ self.logger.error(f"Word ID {word_id} not found in word map")
49
+ continue
50
+
51
+ word = word_map[word_id]
52
+ word_lower = word.text.lower()
53
+
54
+ if word_lower in correction_map:
55
+ # Get the most common correction for this word
56
+ prev_corrections = correction_map[word_lower]
57
+ best_correction = max(
58
+ prev_corrections,
59
+ key=lambda c: (sum(1 for pc in prev_corrections if pc.corrected_word == c.corrected_word), c.confidence),
60
+ )
61
+
62
+ self.logger.debug(
63
+ f"Applying previous correction: {word.text} -> {best_correction.corrected_word} "
64
+ f"(confidence: {best_correction.confidence:.2f})"
65
+ )
66
+
67
+ corrections.append(
68
+ WordCorrection(
69
+ original_word=word.text,
70
+ corrected_word=best_correction.corrected_word,
71
+ segment_index=0,
72
+ original_position=gap.transcription_position + i,
73
+ confidence=best_correction.confidence * 0.9, # Slightly lower confidence for repeats
74
+ source=best_correction.source,
75
+ reason=f"RepeatCorrectionHandler: Matches previous correction",
76
+ alternatives={best_correction.corrected_word: 1},
77
+ is_deletion=best_correction.is_deletion,
78
+ reference_positions=reference_positions,
79
+ length=best_correction.length,
80
+ split_index=best_correction.split_index,
81
+ split_total=best_correction.split_total,
82
+ handler="RepeatCorrectionHandler",
83
+ word_id=word_id,
84
+ corrected_word_id=best_correction.corrected_word_id,
85
+ )
86
+ )
87
+
88
+ return corrections
@@ -0,0 +1,259 @@
1
+ from typing import List, Dict, Tuple, Optional, Any
2
+ import logging
3
+ from metaphone import doublemetaphone
4
+ from lyrics_transcriber.types import GapSequence, WordCorrection
5
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
6
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
7
+
8
+
9
+ class SoundAlikeHandler(GapCorrectionHandler):
10
+ """Handles gaps where words sound similar to reference words but are spelled differently.
11
+
12
+ Uses Double Metaphone algorithm to detect sound-alike words. For each word in the gap,
13
+ it checks if its phonetic encoding matches any reference word's encoding.
14
+
15
+ The confidence of corrections is based on:
16
+ 1. The ratio of reference sources agreeing on the correction
17
+ 2. Whether the match was on primary (1.0) or secondary (0.8) metaphone code
18
+
19
+ Examples:
20
+ Gap: "shush look deep"
21
+ References:
22
+ genius: ["search", "look", "deep"]
23
+ spotify: ["search", "look", "deep"]
24
+ Result:
25
+ - Correct "shush" to "search" (confidence based on metaphone match type)
26
+ - Validate "look" and "deep" (exact matches)
27
+ """
28
+
29
+ def __init__(self, logger: Optional[logging.Logger] = None, similarity_threshold: float = 0.6):
30
+ """Initialize the handler.
31
+
32
+ Args:
33
+ logger: Optional logger instance
34
+ similarity_threshold: Minimum confidence threshold for matches (default: 0.6)
35
+ """
36
+ self.logger = logger or logging.getLogger(__name__)
37
+ self.similarity_threshold = similarity_threshold
38
+
39
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
40
+ """Check if any gap word has a metaphone match with any reference word."""
41
+ if not self._validate_data(data):
42
+ return False, {}
43
+
44
+ word_map = data["word_map"]
45
+
46
+ # Must have reference words
47
+ if not gap.reference_word_ids:
48
+ self.logger.debug("No reference words available")
49
+ return False, {}
50
+
51
+ # Gap must have words
52
+ if not gap.transcribed_word_ids:
53
+ self.logger.debug("No gap words available")
54
+ return False, {}
55
+
56
+ # Check if any gap word has a metaphone match with any reference word
57
+ for word_id in gap.transcribed_word_ids:
58
+ if word_id not in word_map:
59
+ continue
60
+ word = word_map[word_id]
61
+ word_codes = doublemetaphone(word.text)
62
+ self.logger.debug(f"Gap word '{word.text}' has metaphone codes: {word_codes}")
63
+
64
+ for source, ref_word_ids in gap.reference_word_ids.items():
65
+ for ref_word_id in ref_word_ids:
66
+ if ref_word_id not in word_map:
67
+ continue
68
+ ref_word = word_map[ref_word_id]
69
+ ref_codes = doublemetaphone(ref_word.text)
70
+ self.logger.debug(f"Reference word '{ref_word.text}' has metaphone codes: {ref_codes}")
71
+ if self._codes_match(word_codes, ref_codes):
72
+ self.logger.debug(f"Found metaphone match between '{word.text}' and '{ref_word.text}'")
73
+ return True, {}
74
+
75
+ self.logger.debug("No metaphone matches found")
76
+ return False, {}
77
+
78
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
79
+ """Process the gap and create corrections for sound-alike matches."""
80
+ if not self._validate_data(data):
81
+ return []
82
+
83
+ word_map = data["word_map"]
84
+ corrections = []
85
+
86
+ # Use the centralized method to calculate reference positions
87
+ reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
88
+
89
+ # For each word in the gap
90
+ for i, word_id in enumerate(gap.transcribed_word_ids):
91
+ if word_id not in word_map:
92
+ continue
93
+ word = word_map[word_id]
94
+ word_codes = doublemetaphone(word.text)
95
+ self.logger.debug(f"Processing '{word.text}' (codes: {word_codes})")
96
+
97
+ # Skip if word exactly matches any reference
98
+ exact_match = False
99
+ for source, ref_word_ids in gap.reference_word_ids.items():
100
+ if i < len(ref_word_ids):
101
+ ref_word_id = ref_word_ids[i]
102
+ if ref_word_id in word_map:
103
+ ref_word = word_map[ref_word_id]
104
+ if word.text.lower() == ref_word.text.lower():
105
+ exact_match = True
106
+ break
107
+ if exact_match:
108
+ continue
109
+
110
+ # Find sound-alike matches in references
111
+ matches: Dict[str, Tuple[List[str], float, str]] = {} # Added word_id to tuple
112
+
113
+ for source, ref_word_ids in gap.reference_word_ids.items():
114
+ for j, ref_word_id in enumerate(ref_word_ids):
115
+ if ref_word_id not in word_map:
116
+ continue
117
+ ref_word = word_map[ref_word_id]
118
+ ref_codes = doublemetaphone(ref_word.text)
119
+
120
+ match_confidence = self._get_match_confidence(word_codes, ref_codes)
121
+ if match_confidence >= self.similarity_threshold:
122
+ # Special handling for short codes - don't apply position penalty
123
+ is_short_code = any(len(c) <= 2 for c in word_codes if c) or any(len(c) <= 2 for c in ref_codes if c)
124
+ position_multiplier = 1.0 if is_short_code or i == j else 0.8
125
+
126
+ adjusted_confidence = match_confidence * position_multiplier
127
+
128
+ if adjusted_confidence >= self.similarity_threshold:
129
+ if ref_word.text not in matches:
130
+ matches[ref_word.text] = ([], adjusted_confidence, ref_word_id)
131
+ matches[ref_word.text][0].append(source)
132
+
133
+ # Create correction for best match if any found
134
+ if matches:
135
+ best_match, (sources, base_confidence, ref_word_id) = max(matches.items(), key=lambda x: (len(x[1][0]), x[1][1]))
136
+
137
+ source_confidence = len(sources) / len(gap.reference_word_ids)
138
+ final_confidence = base_confidence * source_confidence
139
+
140
+ self.logger.debug(f"Found match: {word.text} -> {best_match} " f"(confidence: {final_confidence:.2f}, sources: {sources})")
141
+
142
+ corrections.append(
143
+ WordCorrection(
144
+ original_word=word.text,
145
+ corrected_word=best_match,
146
+ segment_index=0,
147
+ original_position=gap.transcription_position + i,
148
+ confidence=final_confidence,
149
+ source=", ".join(sources),
150
+ reason=f"SoundAlikeHandler: Phonetic match ({final_confidence:.2f} confidence)",
151
+ alternatives={k: len(v[0]) for k, v in matches.items()},
152
+ is_deletion=False,
153
+ reference_positions=reference_positions,
154
+ length=1,
155
+ handler="SoundAlikeHandler",
156
+ word_id=word_id,
157
+ corrected_word_id=ref_word_id,
158
+ )
159
+ )
160
+
161
+ return corrections
162
+
163
+ def _codes_match(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
164
+ """Check if two sets of metaphone codes match and return match quality."""
165
+ # Get all non-empty codes
166
+ codes1_set = {c for c in codes1 if c}
167
+ codes2_set = {c for c in codes2 if c}
168
+
169
+ if not codes1_set or not codes2_set:
170
+ return 0.0
171
+
172
+ best_match = 0.0
173
+ for code1 in codes1_set:
174
+ for code2 in codes2_set:
175
+ # Special case for very short codes (like 'A' for 'you')
176
+ if len(code1) <= 2 or len(code2) <= 2:
177
+ if code1 == code2:
178
+ best_match = max(best_match, 1.0)
179
+ elif code1 in code2 or code2 in code1:
180
+ best_match = max(best_match, 0.8)
181
+ elif code1[0] == code2[0]: # Match first character
182
+ best_match = max(best_match, 0.7)
183
+ continue
184
+
185
+ # Skip if codes are too different in length
186
+ length_diff = abs(len(code1) - len(code2))
187
+ if length_diff > 3:
188
+ continue
189
+
190
+ # Exact match
191
+ if code1 == code2:
192
+ best_match = max(best_match, 1.0)
193
+ continue
194
+
195
+ # Similar codes (allow 1-2 character differences)
196
+ if len(code1) >= 2 and len(code2) >= 2:
197
+ # Compare first N characters where N is min length
198
+ min_len = min(len(code1), len(code2))
199
+
200
+ # Check for shared characters in any position
201
+ shared_chars = sum(1 for c in code1 if c in code2)
202
+ if shared_chars >= min(2, min_len): # More lenient shared character requirement
203
+ match_quality = 0.7 + (0.1 * shared_chars / max(len(code1), len(code2)))
204
+ best_match = max(best_match, match_quality)
205
+ continue
206
+
207
+ # Compare aligned characters
208
+ differences = sum(1 for a, b in zip(code1[:min_len], code2[:min_len]) if a != b)
209
+ if differences <= 2:
210
+ match_quality = 0.85 - (differences * 0.1)
211
+ best_match = max(best_match, match_quality)
212
+ continue
213
+
214
+ # Common prefix/suffix match with more lenient threshold
215
+ common_prefix_len = 0
216
+ for a, b in zip(code1, code2):
217
+ if a != b:
218
+ break
219
+ common_prefix_len += 1
220
+
221
+ common_suffix_len = 0
222
+ for a, b in zip(code1[::-1], code2[::-1]):
223
+ if a != b:
224
+ break
225
+ common_suffix_len += 1
226
+
227
+ if common_prefix_len >= 1 or common_suffix_len >= 1: # Even more lenient prefix/suffix requirement
228
+ match_quality = 0.7 + (0.1 * max(common_prefix_len, common_suffix_len))
229
+ best_match = max(best_match, match_quality)
230
+ continue
231
+
232
+ # Substring match
233
+ if len(code1) >= 2 and len(code2) >= 2: # More lenient length requirement
234
+ # Look for shared substrings of length 2 or more
235
+ for length in range(min(len(code1), len(code2)), 1, -1):
236
+ for i in range(len(code1) - length + 1):
237
+ substring = code1[i : i + length]
238
+ if substring in code2:
239
+ match_quality = 0.7 + (0.1 * length / max(len(code1), len(code2)))
240
+ best_match = max(best_match, match_quality)
241
+ break
242
+
243
+ return best_match
244
+
245
+ def _get_match_confidence(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
246
+ """Calculate confidence score for a metaphone code match."""
247
+ match_quality = self._codes_match(codes1, codes2)
248
+ if match_quality == 0:
249
+ return 0.0
250
+
251
+ # Get primary codes (first code of each tuple)
252
+ code1, code2 = codes1[0], codes2[0]
253
+
254
+ # Boost confidence for codes that share prefixes
255
+ if code1 and code2 and len(code1) >= 2 and len(code2) >= 2:
256
+ if code1[:2] == code2[:2]:
257
+ match_quality = min(1.0, match_quality + 0.1)
258
+
259
+ return match_quality