karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,50 @@
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, Optional
4
+
5
+
6
+ @dataclass
7
+ class TranscriberConfig:
8
+ """Configuration for transcription services."""
9
+
10
+ audioshake_api_token: Optional[str] = None
11
+ runpod_api_key: Optional[str] = None
12
+ whisper_runpod_id: Optional[str] = None
13
+
14
+
15
+ @dataclass
16
+ class LyricsConfig:
17
+ """Configuration for lyrics services."""
18
+
19
+ genius_api_token: Optional[str] = None
20
+ rapidapi_key: Optional[str] = None
21
+ spotify_cookie: Optional[str] = None
22
+ lyrics_file: Optional[str] = None
23
+
24
+ @dataclass
25
+ class OutputConfig:
26
+ """Configuration for output generation."""
27
+
28
+ output_styles_json: str
29
+ default_max_line_length: int = 36
30
+ styles: Dict[str, Any] = field(default_factory=dict)
31
+ output_dir: Optional[str] = os.getcwd()
32
+ cache_dir: str = os.getenv(
33
+ "LYRICS_TRANSCRIBER_CACHE_DIR",
34
+ os.path.join(os.path.expanduser("~"), "lyrics-transcriber-cache")
35
+ )
36
+
37
+ fetch_lyrics: bool = True
38
+ run_transcription: bool = True
39
+ run_correction: bool = True
40
+ enable_review: bool = True
41
+
42
+ generate_plain_text: bool = True
43
+ generate_lrc: bool = True
44
+ generate_cdg: bool = True
45
+ render_video: bool = True
46
+ video_resolution: str = "360p"
47
+ subtitle_offset_ms: int = 0
48
+
49
+ # Countdown feature for songs that start too quickly
50
+ add_countdown: bool = True
@@ -0,0 +1,594 @@
1
+ import os
2
+ import logging
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, Optional, List
6
+ from lyrics_transcriber.types import LyricsData, TranscriptionResult, CorrectionResult
7
+ from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
8
+ from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
9
+ from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
10
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
11
+ from lyrics_transcriber.lyrics.genius import GeniusProvider
12
+ from lyrics_transcriber.lyrics.spotify import SpotifyProvider
13
+ from lyrics_transcriber.lyrics.musixmatch import MusixmatchProvider
14
+ from lyrics_transcriber.lyrics.lrclib import LRCLIBProvider
15
+ from lyrics_transcriber.output.generator import OutputGenerator
16
+ from lyrics_transcriber.correction.corrector import LyricsCorrector
17
+ from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
18
+ from lyrics_transcriber.lyrics.file_provider import FileProvider
19
+
20
+
21
+ @dataclass
22
+ class LyricsControllerResult:
23
+ """Holds the results of the transcription and correction process."""
24
+
25
+ # Results from different sources
26
+ lyrics_results: dict[str, LyricsData] = field(default_factory=dict)
27
+ transcription_results: List[TranscriptionResult] = field(default_factory=list)
28
+
29
+ # Corrected results
30
+ transcription_corrected: Optional[CorrectionResult] = None
31
+
32
+ # Output files
33
+ lrc_filepath: Optional[str] = None
34
+ ass_filepath: Optional[str] = None
35
+ video_filepath: Optional[str] = None
36
+ mp3_filepath: Optional[str] = None
37
+ cdg_filepath: Optional[str] = None
38
+ cdg_zip_filepath: Optional[str] = None
39
+ original_txt: Optional[str] = None
40
+ corrected_txt: Optional[str] = None
41
+ corrections_json: Optional[str] = None
42
+
43
+ # Countdown padding info (for applying same padding to other audio files)
44
+ countdown_padding_added: bool = False
45
+ countdown_padding_seconds: float = 0.0
46
+ padded_audio_filepath: Optional[str] = None
47
+
48
+
49
+ class LyricsTranscriber:
50
+ """
51
+ Controller class that orchestrates the lyrics transcription workflow:
52
+ 1. Fetch lyrics from internet sources
53
+ 2. Run multiple transcription methods
54
+ 3. Correct transcribed lyrics using fetched lyrics
55
+ 4. Generate output formats (LRC, ASS, video)
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ audio_filepath: str,
61
+ artist: Optional[str] = None,
62
+ title: Optional[str] = None,
63
+ transcriber_config: Optional[TranscriberConfig] = None,
64
+ lyrics_config: Optional[LyricsConfig] = None,
65
+ output_config: Optional[OutputConfig] = None,
66
+ transcribers: Optional[Dict[str, BaseTranscriber]] = None,
67
+ lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
68
+ corrector: Optional[LyricsCorrector] = None,
69
+ output_generator: Optional[OutputGenerator] = None,
70
+ logger: Optional[logging.Logger] = None,
71
+ log_level: int = logging.DEBUG,
72
+ log_formatter: Optional[logging.Formatter] = None,
73
+ ):
74
+ # Set up logging
75
+ self.logger = logger or logging.getLogger(__name__)
76
+ if not logger:
77
+ self.logger.setLevel(log_level)
78
+ if not self.logger.handlers:
79
+ handler = logging.StreamHandler()
80
+ formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
81
+ handler.setFormatter(formatter)
82
+ self.logger.addHandler(handler)
83
+
84
+ self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
85
+
86
+ # Store configs (with defaults if not provided)
87
+ self.transcriber_config = transcriber_config or TranscriberConfig()
88
+ self.lyrics_config = lyrics_config or LyricsConfig()
89
+ self.output_config = output_config or OutputConfig()
90
+
91
+ # Check if styles JSON is available for CDG and video features
92
+ if not self.output_config.output_styles_json or not os.path.exists(self.output_config.output_styles_json):
93
+ if self.output_config.generate_cdg or self.output_config.render_video:
94
+ self.logger.warning(
95
+ f"Output styles JSON file not found: {self.output_config.output_styles_json}. "
96
+ "CDG and video generation will be disabled."
97
+ )
98
+ self.output_config.generate_cdg = False
99
+ self.output_config.render_video = False
100
+
101
+ # Basic settings with sanitized filenames
102
+ self.audio_filepath = audio_filepath
103
+ self.artist = artist
104
+ self.title = title
105
+ self.output_prefix = self._create_sanitized_output_prefix(artist, title)
106
+
107
+ # Add after creating necessary folders
108
+ self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
109
+ self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
110
+
111
+ # Create necessary folders
112
+ os.makedirs(self.output_config.cache_dir, exist_ok=True)
113
+ os.makedirs(self.output_config.output_dir, exist_ok=True)
114
+
115
+ # Initialize results
116
+ self.results = LyricsControllerResult()
117
+
118
+ # Load styles early so lyrics providers can use them
119
+ self._load_styles()
120
+
121
+ # Initialize components (with dependency injection)
122
+ self.transcribers = transcribers or self._initialize_transcribers()
123
+ self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
124
+ self.corrector = corrector or LyricsCorrector(cache_dir=self.output_config.cache_dir, logger=self.logger)
125
+ self.output_generator = output_generator or self._initialize_output_generator()
126
+
127
+ # Log enabled features
128
+ self.logger.info("Enabled features:")
129
+ self.logger.info(f" Lyrics fetching: {'enabled' if self.output_config.fetch_lyrics else 'disabled'}")
130
+ self.logger.info(f" Transcription: {'enabled' if self.output_config.run_transcription else 'disabled'}")
131
+ self.logger.info(f" Lyrics correction: {'enabled' if self.output_config.run_correction else 'disabled'}")
132
+ self.logger.info(f" Plain text output: {'enabled' if self.output_config.generate_plain_text else 'disabled'}")
133
+ self.logger.info(f" LRC file generation: {'enabled' if self.output_config.generate_lrc else 'disabled'}")
134
+ self.logger.info(f" CDG file generation: {'enabled' if self.output_config.generate_cdg else 'disabled'}")
135
+ self.logger.info(f" Video rendering: {'enabled' if self.output_config.render_video else 'disabled'}")
136
+ if self.output_config.render_video:
137
+ self.logger.info(f" Video resolution: {self.output_config.video_resolution}")
138
+
139
+ def _load_styles(self) -> None:
140
+ """Load styles from JSON file if available."""
141
+ if self.output_config.output_styles_json and os.path.exists(self.output_config.output_styles_json):
142
+ try:
143
+ with open(self.output_config.output_styles_json, "r") as f:
144
+ self.output_config.styles = json.load(f)
145
+ self.logger.debug(f"Loaded output styles from: {self.output_config.output_styles_json}")
146
+ except Exception as e:
147
+ self.logger.warning(f"Failed to load output styles file: {str(e)}")
148
+ self.output_config.styles = {}
149
+ else:
150
+ self.logger.debug("No styles JSON file provided or file does not exist")
151
+ self.output_config.styles = {}
152
+
153
+ def _sanitize_filename(self, filename: str) -> str:
154
+ """Replace or remove characters that are unsafe for filenames."""
155
+ if not filename:
156
+ return ""
157
+ # Replace problematic characters with underscores
158
+ for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
159
+ filename = filename.replace(char, "_")
160
+ # Remove any trailing spaces
161
+ filename = filename.rstrip(" ")
162
+ return filename
163
+
164
+ def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
165
+ """Create a sanitized output prefix from artist and title."""
166
+ if artist and title:
167
+ sanitized_artist = self._sanitize_filename(artist)
168
+ sanitized_title = self._sanitize_filename(title)
169
+ return f"{sanitized_artist} - {sanitized_title}"
170
+ else:
171
+ return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
172
+
173
+ def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
174
+ """Initialize available transcription services."""
175
+ transcribers = {}
176
+
177
+ # Add debug logging for config values
178
+ self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
179
+ self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
180
+
181
+ if self.transcriber_config.audioshake_api_token:
182
+ self.logger.debug("Initializing AudioShake transcriber")
183
+ transcribers["audioshake"] = {
184
+ "instance": AudioShakeTranscriber(
185
+ cache_dir=self.output_config.cache_dir,
186
+ config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
187
+ logger=self.logger,
188
+ ),
189
+ "priority": 1, # AudioShake has highest priority
190
+ }
191
+ else:
192
+ self.logger.debug("Skipping AudioShake transcriber - no API token provided")
193
+
194
+ if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
195
+ self.logger.debug("Initializing Whisper transcriber")
196
+ transcribers["whisper"] = {
197
+ "instance": WhisperTranscriber(
198
+ cache_dir=self.output_config.cache_dir,
199
+ config=WhisperConfig(
200
+ runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
201
+ ),
202
+ logger=self.logger,
203
+ ),
204
+ "priority": 2, # Whisper has lower priority
205
+ }
206
+ else:
207
+ self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
208
+
209
+ return transcribers
210
+
211
+ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
212
+ """Initialize available lyrics providers."""
213
+ providers = {}
214
+
215
+ # Get max_line_length from styles if available, otherwise use config default
216
+ max_line_length = self.output_config.styles.get("karaoke", {}).get("max_line_length", self.output_config.default_max_line_length)
217
+ self.logger.info(f"Using max_line_length for lyrics providers: {max_line_length}")
218
+
219
+ # Create provider config with all necessary parameters
220
+ provider_config = LyricsProviderConfig(
221
+ genius_api_token=self.lyrics_config.genius_api_token,
222
+ rapidapi_key=self.lyrics_config.rapidapi_key,
223
+ spotify_cookie=self.lyrics_config.spotify_cookie,
224
+ lyrics_file=self.lyrics_config.lyrics_file,
225
+ cache_dir=self.output_config.cache_dir,
226
+ audio_filepath=self.audio_filepath,
227
+ max_line_length=max_line_length,
228
+ )
229
+
230
+ if provider_config.lyrics_file and os.path.exists(provider_config.lyrics_file):
231
+ self.logger.debug(f"Initializing File lyrics provider with file: {provider_config.lyrics_file}")
232
+ providers["file"] = FileProvider(config=provider_config, logger=self.logger)
233
+ return providers
234
+
235
+ # LRCLIB - always enabled (no API key required)
236
+ self.logger.debug("Initializing LRCLIB lyrics provider")
237
+ providers["lrclib"] = LRCLIBProvider(config=provider_config, logger=self.logger)
238
+
239
+ if provider_config.genius_api_token:
240
+ self.logger.debug("Initializing Genius lyrics provider")
241
+ providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
242
+ else:
243
+ self.logger.debug("Skipping Genius provider - no API token provided")
244
+
245
+ if provider_config.spotify_cookie:
246
+ self.logger.debug("Initializing Spotify lyrics provider")
247
+ providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
248
+ else:
249
+ self.logger.debug("Skipping Spotify provider - no cookie provided")
250
+
251
+ if provider_config.rapidapi_key:
252
+ self.logger.debug("Initializing Musixmatch lyrics provider")
253
+ providers["musixmatch"] = MusixmatchProvider(config=provider_config, logger=self.logger)
254
+ else:
255
+ self.logger.debug("Skipping Musixmatch provider - no RapidAPI key provided")
256
+
257
+ return providers
258
+
259
+ def _initialize_output_generator(self) -> OutputGenerator:
260
+ """Initialize output generation service."""
261
+ return OutputGenerator(config=self.output_config, logger=self.logger)
262
+
263
+ def process(self) -> LyricsControllerResult:
264
+ """Main processing method that orchestrates the entire workflow."""
265
+
266
+ self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
267
+
268
+ # Debug: Log package version and environment variables
269
+ try:
270
+ import lyrics_transcriber
271
+ package_version = getattr(lyrics_transcriber, '__version__', 'unknown')
272
+ self.logger.info(f"LyricsTranscriber package version: {package_version}")
273
+ except Exception as e:
274
+ self.logger.warning(f"Could not get package version: {e}")
275
+
276
+ # Debug: Log environment variables (first 3 characters only for security)
277
+ env_vars = {}
278
+ for key, value in os.environ.items():
279
+ if value:
280
+ env_vars[key] = value[:3] + "..." if len(value) > 3 else value
281
+ else:
282
+ env_vars[key] = "(empty)"
283
+
284
+ self.logger.info(f"Environment variables count: {len(env_vars)}")
285
+
286
+ # Log specific API-related variables
287
+ api_vars = {k: v for k, v in env_vars.items() if any(keyword in k.upper() for keyword in ['API', 'TOKEN', 'KEY', 'SECRET'])}
288
+ if api_vars:
289
+ self.logger.info(f"API-related environment variables: {api_vars}")
290
+ else:
291
+ self.logger.warning("No API-related environment variables found")
292
+
293
+ # Log all env vars if in debug mode
294
+ if self.logger.getEffectiveLevel() <= logging.DEBUG:
295
+ self.logger.debug(f"All environment variables: {env_vars}")
296
+
297
+ # Check for existing corrections JSON
298
+ corrections_json_path = os.path.join(self.output_config.output_dir, f"{self.output_prefix} (Lyrics Corrections).json")
299
+
300
+ if os.path.exists(corrections_json_path):
301
+ self.logger.info(f"Found existing corrections JSON: {corrections_json_path}")
302
+ try:
303
+ with open(corrections_json_path, "r", encoding="utf-8") as f:
304
+ corrections_data = json.load(f)
305
+
306
+ # Reconstruct CorrectionResult from JSON
307
+ self.results.transcription_corrected = CorrectionResult.from_dict(corrections_data)
308
+ self.logger.info("Successfully loaded existing corrections data")
309
+
310
+ # Check if the loaded corrections have countdown padding applied
311
+ # This is important because the video needs to use padded audio to sync
312
+ # with the countdown-adjusted timestamps in the ASS subtitles
313
+ if self.output_config.add_countdown:
314
+ from lyrics_transcriber.output.countdown_processor import CountdownProcessor
315
+
316
+ countdown_processor = CountdownProcessor(
317
+ cache_dir=self.output_config.cache_dir,
318
+ logger=self.logger,
319
+ )
320
+
321
+ if countdown_processor.has_countdown(self.results.transcription_corrected):
322
+ self.logger.info(
323
+ "Loaded corrections have countdown - creating padded audio for video sync"
324
+ )
325
+ # Create padded audio file to match the countdown-adjusted timestamps
326
+ padded_audio_path = countdown_processor.create_padded_audio_only(self.audio_filepath)
327
+ self.audio_filepath = padded_audio_path
328
+
329
+ # Set countdown padding attributes on results
330
+ self.results.countdown_padding_added = True
331
+ self.results.countdown_padding_seconds = countdown_processor.COUNTDOWN_PADDING_SECONDS
332
+ self.results.padded_audio_filepath = padded_audio_path
333
+
334
+ self.logger.info(
335
+ f"Countdown padding applied: {countdown_processor.COUNTDOWN_PADDING_SECONDS}s. "
336
+ f"Using padded audio: {padded_audio_path}"
337
+ )
338
+ else:
339
+ self.logger.info("Loaded corrections do not have countdown - no padding needed")
340
+
341
+ # Skip to output generation
342
+ self.generate_outputs()
343
+ self.logger.info("Processing completed successfully using existing corrections")
344
+ return self.results
345
+
346
+ except Exception as e:
347
+ self.logger.error(f"Failed to load existing corrections JSON: {str(e)}")
348
+ # Continue with normal processing if loading fails
349
+
350
+ # Normal processing flow continues...
351
+ if self.output_config.fetch_lyrics and self.artist and self.title:
352
+ self.fetch_lyrics()
353
+ else:
354
+ self.logger.info("Skipping lyrics fetching - no artist/title provided or fetching disabled")
355
+
356
+ # Step 2: Run transcription if enabled
357
+ if self.output_config.run_transcription:
358
+ self.transcribe()
359
+ else:
360
+ self.logger.info("Skipping transcription - transcription disabled")
361
+
362
+ # Step 3: Process and correct lyrics if enabled AND we have transcription results
363
+ if self.output_config.run_correction and self.results.transcription_results:
364
+ self.correct_lyrics()
365
+ elif self.output_config.run_correction:
366
+ self.logger.info("Skipping lyrics correction - no transcription results available")
367
+
368
+ # Step 4: Generate outputs based on what we have
369
+ if self.results.transcription_corrected or self.results.lyrics_results:
370
+ self.generate_outputs()
371
+ else:
372
+ self.logger.warning("No corrected transcription or lyrics available. Skipping output generation.")
373
+
374
+ self.logger.info("Processing completed successfully")
375
+ return self.results
376
+
377
+ def fetch_lyrics(self) -> None:
378
+ """Fetch lyrics from available providers."""
379
+ self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
380
+
381
+ for name, provider in self.lyrics_providers.items():
382
+ try:
383
+ result = provider.fetch_lyrics(self.artist, self.title)
384
+ if result:
385
+ self.results.lyrics_results[name] = result
386
+ self.logger.info(f"Successfully fetched lyrics from {name}")
387
+
388
+ except Exception as e:
389
+ self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
390
+ continue
391
+
392
+ if not self.results.lyrics_results:
393
+ self.logger.warning("No lyrics found from any source")
394
+
395
+ def transcribe(self) -> None:
396
+ """Run transcription using all available transcribers."""
397
+ provider_names = list(self.transcribers.keys())
398
+
399
+ if not provider_names:
400
+ self.logger.warning(
401
+ "Starting transcription with providers: [] - NO TRANSCRIPTION PROVIDERS CONFIGURED!\n"
402
+ "\n"
403
+ "This means no word-level timing data will be generated, and synchronized karaoke "
404
+ "lyrics cannot be created. The output will lack the '(With Vocals).mkv' video file.\n"
405
+ "\n"
406
+ "To enable transcription, configure at least one provider:\n"
407
+ " - AudioShake: Set AUDIOSHAKE_API_TOKEN environment variable\n"
408
+ " - Whisper/RunPod: Set RUNPOD_API_KEY and WHISPER_RUNPOD_ID environment variables\n"
409
+ "\n"
410
+ "See README.md 'Transcription Providers' section for detailed setup instructions."
411
+ )
412
+ else:
413
+ self.logger.info(f"Starting transcription with providers: {provider_names}")
414
+ self._log_provider_configuration_status()
415
+
416
+ for name, transcriber_info in self.transcribers.items():
417
+ self.logger.info(f"Running transcription with {name}")
418
+ result = transcriber_info["instance"].transcribe(self.audio_filepath)
419
+ if result:
420
+ # Add the transcriber name and priority to the result
421
+ self.results.transcription_results.append(
422
+ TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
423
+ )
424
+ self.logger.debug(f"Transcription completed for {name}")
425
+
426
+ if not self.results.transcription_results:
427
+ self.logger.warning(
428
+ "No successful transcriptions from any provider. "
429
+ "Check that your API tokens are valid and the services are accessible."
430
+ )
431
+
432
+ def _log_provider_configuration_status(self) -> None:
433
+ """Log detailed configuration status for each potential transcription provider."""
434
+ self.logger.debug("Transcription provider configuration status:")
435
+
436
+ # AudioShake status
437
+ if self.transcriber_config.audioshake_api_token:
438
+ self.logger.debug(" - AudioShake: CONFIGURED (API token provided)")
439
+ else:
440
+ self.logger.debug(" - AudioShake: NOT CONFIGURED (missing AUDIOSHAKE_API_TOKEN)")
441
+
442
+ # Whisper/RunPod status
443
+ has_runpod_key = bool(self.transcriber_config.runpod_api_key)
444
+ has_whisper_id = bool(self.transcriber_config.whisper_runpod_id)
445
+
446
+ if has_runpod_key and has_whisper_id:
447
+ self.logger.debug(" - Whisper (RunPod): CONFIGURED (API key and endpoint ID provided)")
448
+ elif has_runpod_key:
449
+ self.logger.debug(" - Whisper (RunPod): PARTIALLY CONFIGURED (missing WHISPER_RUNPOD_ID)")
450
+ elif has_whisper_id:
451
+ self.logger.debug(" - Whisper (RunPod): PARTIALLY CONFIGURED (missing RUNPOD_API_KEY)")
452
+ else:
453
+ self.logger.debug(" - Whisper (RunPod): NOT CONFIGURED (missing RUNPOD_API_KEY and WHISPER_RUNPOD_ID)")
454
+
455
+ def correct_lyrics(self) -> None:
456
+ """Run lyrics correction using transcription and internet lyrics."""
457
+ self.logger.info("Starting lyrics correction process")
458
+
459
+ # Check if we have reference lyrics to work with
460
+ if not self.results.lyrics_results:
461
+ self.logger.warning("No reference lyrics available for correction - using raw transcription")
462
+ # Use the highest priority transcription result as the "corrected" version
463
+ if self.results.transcription_results:
464
+ sorted_results = sorted(self.results.transcription_results, key=lambda x: x.priority)
465
+ best_transcription = sorted_results[0]
466
+
467
+ # Count total words in the transcription
468
+ total_words = sum(len(segment.words) for segment in best_transcription.result.segments)
469
+
470
+ # Create a CorrectionResult with no corrections
471
+ self.results.transcription_corrected = CorrectionResult(
472
+ original_segments=best_transcription.result.segments,
473
+ corrected_segments=best_transcription.result.segments,
474
+ corrections=[], # No corrections made
475
+ corrections_made=0, # No corrections made
476
+ confidence=1.0, # Full confidence since we're using original
477
+ reference_lyrics={},
478
+ anchor_sequences=[],
479
+ gap_sequences=[],
480
+ resized_segments=[],
481
+ correction_steps=[],
482
+ word_id_map={},
483
+ segment_id_map={},
484
+ metadata={
485
+ "correction_type": "none",
486
+ "reason": "no_reference_lyrics",
487
+ "audio_filepath": self.audio_filepath,
488
+ "anchor_sequences_count": 0,
489
+ "gap_sequences_count": 0,
490
+ "total_words": total_words,
491
+ "correction_ratio": 0.0,
492
+ "available_handlers": [],
493
+ "enabled_handlers": [],
494
+ },
495
+ )
496
+ else:
497
+ # Create metadata dict with song info
498
+ metadata = {
499
+ "artist": self.artist,
500
+ "title": self.title,
501
+ "full_reference_texts": {source: lyrics.get_full_text() for source, lyrics in self.results.lyrics_results.items()},
502
+ }
503
+
504
+ # Get enabled handlers from metadata if available
505
+ enabled_handlers = metadata.get("enabled_handlers", None)
506
+
507
+ # Create corrector with enabled handlers
508
+ corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
509
+
510
+ corrected_data = corrector.run(
511
+ transcription_results=self.results.transcription_results,
512
+ lyrics_results=self.results.lyrics_results,
513
+ metadata=metadata,
514
+ )
515
+
516
+ # Store corrected results
517
+ self.results.transcription_corrected = corrected_data
518
+ self.logger.info("Lyrics correction completed")
519
+
520
+ # Add human review step (moved outside the else block)
521
+ if self.output_config.enable_review:
522
+ from lyrics_transcriber.review.server import ReviewServer
523
+
524
+ self.logger.info("Starting human review process")
525
+
526
+ # Create and start review server
527
+ review_server = ReviewServer(
528
+ correction_result=self.results.transcription_corrected,
529
+ output_config=self.output_config,
530
+ audio_filepath=self.audio_filepath,
531
+ logger=self.logger,
532
+ )
533
+ reviewed_data = review_server.start()
534
+
535
+ self.logger.info("Human review completed, updated transcription_corrected with reviewed_data")
536
+ self.results.transcription_corrected = reviewed_data
537
+
538
+ # Add countdown intro if enabled and needed (after review, before output generation)
539
+ if self.output_config.add_countdown and self.results.transcription_corrected:
540
+ from lyrics_transcriber.output.countdown_processor import CountdownProcessor
541
+
542
+ self.logger.info("Processing countdown intro (if needed)")
543
+ countdown_processor = CountdownProcessor(
544
+ cache_dir=self.output_config.cache_dir,
545
+ logger=self.logger,
546
+ )
547
+
548
+ # Process and potentially modify the correction result and audio filepath
549
+ (
550
+ self.results.transcription_corrected,
551
+ self.audio_filepath,
552
+ padding_added,
553
+ padding_seconds,
554
+ ) = countdown_processor.process(
555
+ correction_result=self.results.transcription_corrected,
556
+ audio_filepath=self.audio_filepath,
557
+ )
558
+
559
+ # Store padding information in results for parent code to use
560
+ self.results.countdown_padding_added = padding_added
561
+ self.results.countdown_padding_seconds = padding_seconds
562
+ if padding_added:
563
+ self.results.padded_audio_filepath = self.audio_filepath
564
+ self.logger.info(
565
+ f"Countdown padding applied: {padding_seconds}s added to audio. "
566
+ f"Padded audio: {self.audio_filepath}"
567
+ )
568
+
569
+ def generate_outputs(self) -> None:
570
+ """Generate output files based on enabled features and available data."""
571
+ self.logger.info("Generating output files")
572
+
573
+ # Only proceed with outputs that make sense based on what we have
574
+ has_correction = bool(self.results.transcription_corrected)
575
+
576
+ output_files = self.output_generator.generate_outputs(
577
+ transcription_corrected=self.results.transcription_corrected if has_correction else None,
578
+ lyrics_results=self.results.lyrics_results,
579
+ output_prefix=self.output_prefix,
580
+ audio_filepath=self.audio_filepath,
581
+ artist=self.artist,
582
+ title=self.title,
583
+ )
584
+
585
+ # Store results
586
+ self.results.lrc_filepath = output_files.lrc
587
+ self.results.ass_filepath = output_files.ass
588
+ self.results.video_filepath = output_files.video
589
+ self.results.original_txt = output_files.original_txt
590
+ self.results.corrected_txt = output_files.corrected_txt
591
+ self.results.corrections_json = output_files.corrections_json
592
+ self.results.cdg_filepath = output_files.cdg
593
+ self.results.mp3_filepath = output_files.mp3
594
+ self.results.cdg_zip_filepath = output_files.cdg_zip
File without changes
@@ -0,0 +1,9 @@
1
+ """Agentic AI correction system scaffold.
2
+
3
+ This package will contain the semi-agentic correction workflows, providers,
4
+ observability, and feedback modules. Implementation follows TDD; tests come first.
5
+ """
6
+
7
+ __all__ = []
8
+
9
+