karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1815 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.23.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.23.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,252 @@
1
+ from typing import List, Tuple, Dict, Any, Optional
2
+ import spacy
3
+ import logging
4
+ import pyphen
5
+ import nltk
6
+ from nltk.corpus import cmudict
7
+ import syllables
8
+ from spacy_syllables import SpacySyllables
9
+
10
+ from lyrics_transcriber.types import GapSequence, WordCorrection
11
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
12
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
13
+
14
+
15
+ class SyllablesMatchHandler(GapCorrectionHandler):
16
+ """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
17
+
18
+ def __init__(self, logger: Optional[logging.Logger] = None):
19
+ super().__init__(logger)
20
+ self.logger = logger or logging.getLogger(__name__)
21
+
22
+ # Marking SpacySyllables as used to prevent unused import warning
23
+ _ = SpacySyllables
24
+
25
+ # Load spacy model with syllables pipeline
26
+ try:
27
+ self.nlp = spacy.load("en_core_web_sm")
28
+ except OSError:
29
+ self.logger.info("Language model 'en_core_web_sm' not found. Attempting to download...")
30
+ import subprocess
31
+
32
+ try:
33
+ subprocess.check_call(["python", "-m", "spacy", "download", "en_core_web_sm"])
34
+ self.nlp = spacy.load("en_core_web_sm")
35
+ self.logger.info("Successfully downloaded and loaded en_core_web_sm")
36
+ except subprocess.CalledProcessError as e:
37
+ raise OSError(
38
+ "Language model 'en_core_web_sm' could not be downloaded. "
39
+ "Please install it manually with: python -m spacy download en_core_web_sm"
40
+ ) from e
41
+
42
+ # Add syllables component to pipeline if not already present
43
+ if "syllables" not in self.nlp.pipe_names:
44
+ self.nlp.add_pipe("syllables", after="tagger")
45
+
46
+ # Initialize Pyphen for English
47
+ self.dic = pyphen.Pyphen(lang="en_US")
48
+
49
+ # Initialize NLTK's CMU dictionary
50
+ try:
51
+ self.cmudict = cmudict.dict()
52
+ except LookupError:
53
+ nltk.download("cmudict")
54
+ self.cmudict = cmudict.dict()
55
+
56
+ def _count_syllables_spacy(self, words: List[str]) -> int:
57
+ """Count syllables using spacy_syllables."""
58
+ text = " ".join(words)
59
+ doc = self.nlp(text)
60
+ total_syllables = sum(token._.syllables_count or 1 for token in doc)
61
+ return total_syllables
62
+
63
+ def _count_syllables_pyphen(self, words: List[str]) -> int:
64
+ """Count syllables using pyphen."""
65
+ total_syllables = 0
66
+ for word in words:
67
+ hyphenated = self.dic.inserted(word)
68
+ syllables_count = len(hyphenated.split("-")) if hyphenated else 1
69
+ total_syllables += syllables_count
70
+ return total_syllables
71
+
72
+ def _count_syllables_nltk(self, words: List[str]) -> int:
73
+ """Count syllables using NLTK's CMU dictionary."""
74
+ total_syllables = 0
75
+ for word in words:
76
+ word = word.lower()
77
+ if word in self.cmudict:
78
+ syllables_count = len([ph for ph in self.cmudict[word][0] if ph[-1].isdigit()])
79
+ total_syllables += syllables_count
80
+ else:
81
+ total_syllables += 1
82
+ return total_syllables
83
+
84
+ def _count_syllables_lib(self, words: List[str]) -> int:
85
+ """Count syllables using the syllables library."""
86
+ total_syllables = 0
87
+ for word in words:
88
+ syllables_count = syllables.estimate(word)
89
+ total_syllables += syllables_count
90
+ return total_syllables
91
+
92
+ def _count_syllables(self, words: List[str]) -> List[int]:
93
+ """Count syllables using multiple methods."""
94
+ spacy_count = self._count_syllables_spacy(words)
95
+ pyphen_count = self._count_syllables_pyphen(words)
96
+ nltk_count = self._count_syllables_nltk(words)
97
+ syllables_count = self._count_syllables_lib(words)
98
+
99
+ text = " ".join(words)
100
+ self.logger.debug(
101
+ f"Syllable counts for '{text}': spacy={spacy_count}, pyphen={pyphen_count}, nltk={nltk_count}, syllables={syllables_count}"
102
+ )
103
+ return [spacy_count, pyphen_count, nltk_count, syllables_count]
104
+
105
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
106
+ # Must have reference words
107
+ if not gap.reference_word_ids:
108
+ self.logger.debug("No reference word IDs available")
109
+ return False, {}
110
+
111
+ # Get word lookup map from data
112
+ if not data or "word_map" not in data:
113
+ self.logger.error("No word_map provided in data")
114
+ return False, {}
115
+
116
+ word_map = data["word_map"]
117
+
118
+ # Get actual words from word IDs
119
+ gap_words = []
120
+ for word_id in gap.transcribed_word_ids:
121
+ if word_id not in word_map:
122
+ self.logger.error(f"Word ID {word_id} not found in word_map")
123
+ return False, {}
124
+ gap_words.append(word_map[word_id].text)
125
+
126
+ # Get syllable counts for gap text using different methods
127
+ gap_syllables = self._count_syllables(gap_words)
128
+
129
+ # Check if any reference source has matching syllable count with any method
130
+ for source, ref_word_ids in gap.reference_word_ids.items():
131
+ # Get reference words from word map
132
+ ref_words = []
133
+ for word_id in ref_word_ids:
134
+ if word_id not in word_map:
135
+ self.logger.error(f"Reference word ID {word_id} not found in word_map")
136
+ continue
137
+ ref_words.append(word_map[word_id].text)
138
+
139
+ if not ref_words:
140
+ continue
141
+
142
+ ref_syllables = self._count_syllables(ref_words)
143
+
144
+ # If any counting method matches between gap and reference, we can handle it
145
+ if any(gap_count == ref_count for gap_count in gap_syllables for ref_count in ref_syllables):
146
+ self.logger.debug(f"Found matching syllable count in source '{source}'")
147
+ return True, {
148
+ "gap_syllables": gap_syllables,
149
+ "matching_source": source,
150
+ "reference_word_ids": ref_word_ids,
151
+ "word_map": word_map,
152
+ }
153
+
154
+ self.logger.debug("No reference source had matching syllable count")
155
+ return False, {}
156
+
157
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
158
+ """Handle the gap using syllable matching."""
159
+ if not data:
160
+ can_handle, data = self.can_handle(gap)
161
+ if not can_handle:
162
+ return []
163
+
164
+ corrections = []
165
+ matching_source = data["matching_source"]
166
+ reference_word_ids = data["reference_word_ids"]
167
+ word_map = data["word_map"]
168
+
169
+ # Get the actual words from word IDs
170
+ gap_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids]
171
+ ref_words = [word_map[word_id].text for word_id in reference_word_ids]
172
+
173
+ # Use the centralized method to calculate reference positions
174
+ reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
175
+
176
+ # Since we matched syllable counts for the entire gap, we should handle all words
177
+ if len(gap_words) > len(ref_words):
178
+ # Multiple transcribed words -> fewer reference words
179
+ # Try to distribute the reference words across the gap words
180
+ words_per_ref = len(gap_words) / len(ref_words)
181
+
182
+ for ref_idx, ref_word_id in enumerate(reference_word_ids):
183
+ start_idx = int(ref_idx * words_per_ref)
184
+ end_idx = int((ref_idx + 1) * words_per_ref)
185
+
186
+ # Get the group of words to combine
187
+ words_to_combine = gap_words[start_idx:end_idx]
188
+ word_ids_to_combine = gap.transcribed_word_ids[start_idx:end_idx]
189
+ corrections.extend(
190
+ WordOperations.create_word_combine_corrections(
191
+ original_words=words_to_combine,
192
+ reference_word=word_map[ref_word_id].text,
193
+ original_position=gap.transcription_position + start_idx,
194
+ source=matching_source,
195
+ confidence=0.8,
196
+ combine_reason="Words combined based on syllable match",
197
+ delete_reason="Word removed as part of syllable match combination",
198
+ reference_positions=reference_positions,
199
+ handler="SyllablesMatchHandler",
200
+ original_word_ids=word_ids_to_combine,
201
+ corrected_word_id=ref_word_id,
202
+ )
203
+ )
204
+
205
+ elif len(gap_words) < len(ref_words):
206
+ # Single transcribed word -> multiple reference words
207
+ words_per_gap = len(ref_words) / len(gap_words)
208
+
209
+ for i, word_id in enumerate(gap.transcribed_word_ids):
210
+ start_idx = int(i * words_per_gap)
211
+ end_idx = int((i + 1) * words_per_gap)
212
+ ref_word_ids_for_split = reference_word_ids[start_idx:end_idx]
213
+ ref_words_for_split = [word_map[ref_id].text for ref_id in ref_word_ids_for_split]
214
+
215
+ corrections.extend(
216
+ WordOperations.create_word_split_corrections(
217
+ original_word=word_map[word_id].text,
218
+ reference_words=ref_words_for_split,
219
+ original_position=gap.transcription_position + i,
220
+ source=matching_source,
221
+ confidence=0.8,
222
+ reason="Split word based on syllable match",
223
+ reference_positions=reference_positions,
224
+ handler="SyllablesMatchHandler",
225
+ original_word_id=word_id,
226
+ corrected_word_ids=ref_word_ids_for_split,
227
+ )
228
+ )
229
+
230
+ else:
231
+ # One-to-one replacement
232
+ for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
233
+ orig_word = word_map[orig_word_id]
234
+ ref_word = word_map[ref_word_id]
235
+
236
+ if orig_word.text.lower() != ref_word.text.lower():
237
+ corrections.append(
238
+ WordOperations.create_word_replacement_correction(
239
+ original_word=orig_word.text,
240
+ corrected_word=ref_word.text,
241
+ original_position=gap.transcription_position + i,
242
+ source=matching_source,
243
+ confidence=0.8,
244
+ reason=f"Source '{matching_source}' had matching syllable count",
245
+ reference_positions=reference_positions,
246
+ handler="SyllablesMatchHandler",
247
+ original_word_id=orig_word_id,
248
+ corrected_word_id=ref_word_id,
249
+ )
250
+ )
251
+
252
+ return corrections
@@ -0,0 +1,80 @@
1
+ from typing import List, Tuple, Dict, Any, Optional
2
+ import logging
3
+
4
+ from lyrics_transcriber.types import GapSequence, WordCorrection
5
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
6
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
7
+
8
+
9
+ class WordCountMatchHandler(GapCorrectionHandler):
10
+ """Handles gaps where reference sources agree and have matching word counts."""
11
+
12
+ def __init__(self, logger: Optional[logging.Logger] = None):
13
+ super().__init__(logger)
14
+ self.logger = logger or logging.getLogger(__name__)
15
+
16
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
17
+ # Must have reference words
18
+ if not gap.reference_word_ids:
19
+ self.logger.debug("No reference word IDs available.")
20
+ return False, {}
21
+
22
+ if not self._validate_data(data):
23
+ return False, {}
24
+
25
+ ref_word_lists = list(gap.reference_word_ids.values())
26
+
27
+ # All sources must have same number of words as gap
28
+ if not all(len(words) == gap.length for words in ref_word_lists):
29
+ self.logger.debug("Not all sources have the same number of words as the gap.")
30
+ return False, {}
31
+
32
+ # If we have multiple sources, they must all agree
33
+ if len(ref_word_lists) > 1 and not all(words == ref_word_lists[0] for words in ref_word_lists[1:]):
34
+ self.logger.debug("Not all sources agree on the words.")
35
+ return False, {}
36
+
37
+ self.logger.debug("All sources agree and have matching word counts.")
38
+ return True, {"word_map": data["word_map"]}
39
+
40
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
41
+ if not self._validate_data(data):
42
+ return []
43
+
44
+ corrections = []
45
+ word_map = data["word_map"]
46
+ source = list(gap.reference_word_ids.keys())[0]
47
+ reference_word_ids = gap.reference_word_ids[source]
48
+ sources = ", ".join(gap.reference_word_ids.keys())
49
+
50
+ reference_positions = WordOperations.calculate_reference_positions(gap)
51
+
52
+ for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
53
+ # Get the actual words from the word map
54
+ if orig_word_id not in word_map:
55
+ self.logger.error(f"Original word ID {orig_word_id} not found in word_map")
56
+ continue
57
+ orig_word = word_map[orig_word_id]
58
+
59
+ if ref_word_id not in word_map:
60
+ self.logger.error(f"Reference word ID {ref_word_id} not found in word_map")
61
+ continue
62
+ ref_word = word_map[ref_word_id]
63
+
64
+ if orig_word.text.lower() != ref_word.text.lower():
65
+ correction = WordOperations.create_word_replacement_correction(
66
+ original_word=orig_word.text,
67
+ corrected_word=ref_word.text,
68
+ original_position=gap.transcription_position + i,
69
+ source=sources,
70
+ confidence=1.0,
71
+ reason="Reference sources had same word count as gap",
72
+ reference_positions=reference_positions,
73
+ handler="WordCountMatchHandler",
74
+ original_word_id=orig_word_id,
75
+ corrected_word_id=ref_word_id, # Use the reference word's ID
76
+ )
77
+ corrections.append(correction)
78
+ self.logger.debug(f"Correction made: {correction}")
79
+
80
+ return corrections
@@ -0,0 +1,187 @@
1
+ from typing import List, Optional, Dict, Any
2
+ from lyrics_transcriber.types import WordCorrection, GapSequence
3
+ from lyrics_transcriber.utils.word_utils import WordUtils
4
+
5
+
6
+ class WordOperations:
7
+ """Utility class for common word manipulation operations used by correction handlers."""
8
+
9
+ @staticmethod
10
+ def calculate_reference_positions(
11
+ gap: GapSequence, sources: Optional[List[str]] = None, anchor_sequences: Optional[List[Any]] = None
12
+ ) -> Dict[str, int]:
13
+ """Calculate reference positions for given sources based on preceding anchor.
14
+
15
+ Args:
16
+ gap: The gap sequence containing the preceding anchor ID
17
+ sources: Optional list of sources to calculate positions for. If None, uses all sources.
18
+ anchor_sequences: List of anchor sequences to look up preceding anchor
19
+
20
+ Returns:
21
+ Dictionary mapping source names to their reference positions
22
+ """
23
+ reference_positions = {}
24
+
25
+ if not gap.preceding_anchor_id or not anchor_sequences:
26
+ return reference_positions
27
+
28
+ # Find the preceding anchor in the sequences
29
+ preceding_anchor = next(
30
+ (scored_anchor.anchor for scored_anchor in anchor_sequences if scored_anchor.anchor.id == gap.preceding_anchor_id), None
31
+ )
32
+
33
+ if not preceding_anchor:
34
+ return reference_positions
35
+
36
+ # If no sources specified, use all sources from reference words
37
+ sources_to_check = sources or list(gap.reference_word_ids.keys())
38
+
39
+ for source in sources_to_check:
40
+ # Get reference positions from the anchor
41
+ if source in preceding_anchor.reference_positions:
42
+ # Calculate base position from anchor
43
+ anchor_pos = preceding_anchor.reference_positions[source]
44
+ base_ref_pos = anchor_pos + len(preceding_anchor.reference_word_ids[source])
45
+
46
+ # Calculate word offset within the gap
47
+ word_offset = 0
48
+
49
+ # Add word offset to base position
50
+ ref_pos = base_ref_pos + word_offset
51
+ reference_positions[source] = ref_pos
52
+
53
+ return reference_positions
54
+
55
+ @staticmethod
56
+ def create_word_replacement_correction(
57
+ original_word: str,
58
+ corrected_word: str,
59
+ original_position: int,
60
+ source: str,
61
+ confidence: float,
62
+ reason: str,
63
+ handler: str,
64
+ reference_positions: Optional[Dict[str, int]] = None,
65
+ original_word_id: Optional[str] = None,
66
+ corrected_word_id: Optional[str] = None,
67
+ ) -> WordCorrection:
68
+ """Creates a correction for replacing a single word with another word."""
69
+ return WordCorrection(
70
+ original_word=original_word,
71
+ corrected_word=corrected_word,
72
+ segment_index=0,
73
+ original_position=original_position,
74
+ confidence=confidence,
75
+ source=source,
76
+ reason=reason,
77
+ alternatives={},
78
+ reference_positions=reference_positions,
79
+ length=1,
80
+ handler=handler,
81
+ word_id=original_word_id,
82
+ corrected_word_id=corrected_word_id if corrected_word_id is not None else (WordUtils.generate_id() if corrected_word else None),
83
+ )
84
+
85
+ @staticmethod
86
+ def create_word_split_corrections(
87
+ original_word: str,
88
+ reference_words: List[str],
89
+ original_position: int,
90
+ source: str,
91
+ confidence: float,
92
+ reason: str,
93
+ handler: str,
94
+ reference_positions: Optional[Dict[str, int]] = None,
95
+ original_word_id: Optional[str] = None,
96
+ corrected_word_ids: Optional[List[str]] = None,
97
+ ) -> List[WordCorrection]:
98
+ """Creates corrections for splitting a single word into multiple words."""
99
+ corrections = []
100
+
101
+ # Generate word IDs if none provided
102
+ if corrected_word_ids is None:
103
+ corrected_word_ids = [WordUtils.generate_id() for _ in reference_words]
104
+
105
+ for split_idx, (ref_word, word_id) in enumerate(zip(reference_words, corrected_word_ids)):
106
+ corrections.append(
107
+ WordCorrection(
108
+ original_word=original_word,
109
+ corrected_word=ref_word,
110
+ segment_index=0,
111
+ original_position=original_position,
112
+ confidence=confidence,
113
+ source=source,
114
+ reason=reason,
115
+ alternatives={},
116
+ split_index=split_idx,
117
+ split_total=len(reference_words),
118
+ reference_positions=reference_positions,
119
+ length=1, # Each split word is length 1
120
+ handler=handler,
121
+ word_id=WordUtils.generate_id(), # Generate new ID for each split
122
+ corrected_word_id=word_id,
123
+ )
124
+ )
125
+ return corrections
126
+
127
+ @staticmethod
128
+ def create_word_combine_corrections(
129
+ original_words: List[str],
130
+ reference_word: str,
131
+ original_position: int,
132
+ source: str,
133
+ confidence: float,
134
+ combine_reason: str,
135
+ delete_reason: str,
136
+ handler: str,
137
+ reference_positions: Optional[Dict[str, int]] = None,
138
+ original_word_ids: Optional[List[str]] = None,
139
+ corrected_word_id: Optional[str] = None,
140
+ ) -> List[WordCorrection]:
141
+ """Creates corrections for combining multiple words into a single word."""
142
+ corrections = []
143
+ word_ids = original_word_ids or [None] * len(original_words)
144
+
145
+ final_word_id = corrected_word_id or WordUtils.generate_id()
146
+
147
+ # First word gets replaced
148
+ corrections.append(
149
+ WordCorrection(
150
+ original_word=original_words[0],
151
+ corrected_word=reference_word,
152
+ segment_index=0,
153
+ original_position=original_position,
154
+ confidence=confidence,
155
+ source=source,
156
+ reason=combine_reason,
157
+ alternatives={},
158
+ reference_positions=reference_positions,
159
+ length=len(original_words), # Combined word spans all original words
160
+ handler=handler,
161
+ word_id=WordUtils.generate_id(), # Generate new ID for combined word
162
+ corrected_word_id=final_word_id,
163
+ )
164
+ )
165
+
166
+ # Additional words get marked for deletion
167
+ for i, (word, word_id) in enumerate(zip(original_words[1:], word_ids[1:]), start=1):
168
+ corrections.append(
169
+ WordCorrection(
170
+ original_word=word,
171
+ corrected_word="",
172
+ segment_index=0,
173
+ original_position=original_position + i,
174
+ confidence=confidence,
175
+ source=source,
176
+ reason=delete_reason,
177
+ alternatives={},
178
+ is_deletion=True,
179
+ reference_positions=reference_positions,
180
+ length=1, # Deleted words are length 1
181
+ handler=handler,
182
+ word_id=WordUtils.generate_id(), # Generate new ID for each deleted word
183
+ corrected_word_id=None, # Deleted words don't need a corrected ID
184
+ )
185
+ )
186
+
187
+ return corrections