karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,306 @@
1
+ """Handles adding countdown intro to songs that start too quickly for karaoke singers."""
2
+
3
+ import logging
4
+ import os
5
+ import subprocess
6
+ from typing import List, Optional, Tuple
7
+ from copy import deepcopy
8
+
9
+ from lyrics_transcriber.types import CorrectionResult, LyricsSegment, Word
10
+ from lyrics_transcriber.utils.word_utils import WordUtils
11
+
12
+
13
+ class CountdownProcessor:
14
+ """
15
+ Processes corrected lyrics and audio to add countdown intro for songs that start too quickly.
16
+
17
+ For songs where vocals start within the first 3 seconds, this processor:
18
+ - Adds 3 seconds of silence to the start of the audio file
19
+ - Shifts all timestamps in corrected lyrics by 3 seconds
20
+ - Adds a countdown segment "3... 2... 1..." spanning 0.1s to 2.9s
21
+ """
22
+
23
+ # Configuration constants
24
+ COUNTDOWN_THRESHOLD_SECONDS = 3.0 # Trigger countdown if first word is within this time
25
+ COUNTDOWN_PADDING_SECONDS = 3.0 # Amount of silence to add
26
+ COUNTDOWN_START_TIME = 0.1 # When countdown text starts
27
+ COUNTDOWN_END_TIME = 2.9 # When countdown text ends
28
+ COUNTDOWN_TEXT = "3... 2... 1..." # The countdown text to display
29
+
30
+ def __init__(
31
+ self,
32
+ cache_dir: str,
33
+ logger: Optional[logging.Logger] = None,
34
+ ):
35
+ """
36
+ Initialize CountdownProcessor.
37
+
38
+ Args:
39
+ cache_dir: Directory for temporary files (padded audio)
40
+ logger: Optional logger instance
41
+ """
42
+ self.cache_dir = cache_dir
43
+ self.logger = logger or logging.getLogger(__name__)
44
+
45
+ # Ensure cache directory exists
46
+ os.makedirs(self.cache_dir, exist_ok=True)
47
+
48
+ def process(
49
+ self,
50
+ correction_result: CorrectionResult,
51
+ audio_filepath: str,
52
+ ) -> Tuple[CorrectionResult, str, bool, float]:
53
+ """
54
+ Process correction result and audio file, adding countdown if needed.
55
+
56
+ Args:
57
+ correction_result: The CorrectionResult to potentially modify
58
+ audio_filepath: Path to the original audio file
59
+
60
+ Returns:
61
+ Tuple of:
62
+ - potentially modified CorrectionResult
63
+ - potentially padded audio filepath
64
+ - whether padding was added (bool)
65
+ - amount of padding in seconds (float)
66
+ """
67
+ # Check if countdown is needed
68
+ if not self._needs_countdown(correction_result):
69
+ self.logger.info(
70
+ f"First word starts after {self.COUNTDOWN_THRESHOLD_SECONDS}s - "
71
+ "no countdown needed"
72
+ )
73
+ return correction_result, audio_filepath, False, 0.0
74
+
75
+ self.logger.info(
76
+ f"First word starts within {self.COUNTDOWN_THRESHOLD_SECONDS}s - "
77
+ "adding countdown intro"
78
+ )
79
+
80
+ # Create padded audio file
81
+ padded_audio_path = self._create_padded_audio(audio_filepath)
82
+
83
+ # Create modified correction result with adjusted timestamps
84
+ modified_result = self._add_countdown_to_result(correction_result)
85
+
86
+ self.logger.info(
87
+ f"Countdown intro added successfully. "
88
+ f"Padded audio: {os.path.basename(padded_audio_path)}"
89
+ )
90
+
91
+ return modified_result, padded_audio_path, True, self.COUNTDOWN_PADDING_SECONDS
92
+
93
+ def _needs_countdown(self, correction_result: CorrectionResult) -> bool:
94
+ """
95
+ Check if the song needs a countdown intro.
96
+
97
+ Args:
98
+ correction_result: The correction result to check
99
+
100
+ Returns:
101
+ True if first word starts within threshold, False otherwise
102
+ """
103
+ if not correction_result.corrected_segments:
104
+ return False
105
+
106
+ # Find the first segment with words
107
+ for segment in correction_result.corrected_segments:
108
+ if segment.words:
109
+ first_word_start = segment.words[0].start_time
110
+ return first_word_start < self.COUNTDOWN_THRESHOLD_SECONDS
111
+
112
+ return False
113
+
114
+ def _create_padded_audio(self, audio_filepath: str) -> str:
115
+ """
116
+ Create a new audio file with silence prepended.
117
+
118
+ Args:
119
+ audio_filepath: Path to original audio file
120
+
121
+ Returns:
122
+ Path to padded audio file
123
+
124
+ Raises:
125
+ FileNotFoundError: If input audio file doesn't exist
126
+ RuntimeError: If ffmpeg command fails
127
+ """
128
+ if not os.path.isfile(audio_filepath):
129
+ raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
130
+
131
+ # Create output path in cache directory
132
+ # Always use .flac extension since we encode with FLAC codec for quality
133
+ basename = os.path.basename(audio_filepath)
134
+ name, _ = os.path.splitext(basename)
135
+ padded_filename = f"{name}_padded.flac"
136
+ padded_filepath = os.path.join(self.cache_dir, padded_filename)
137
+
138
+ self.logger.info(f"Creating padded audio file: {padded_filename}")
139
+
140
+ # Build ffmpeg command to prepend silence
141
+ # We use the anullsrc filter to generate silence and concat it with the original audio
142
+ cmd = [
143
+ "ffmpeg",
144
+ "-y", # Overwrite output file if it exists
145
+ "-hide_banner",
146
+ "-loglevel", "error",
147
+ "-f", "lavfi",
148
+ "-t", str(self.COUNTDOWN_PADDING_SECONDS),
149
+ "-i", f"anullsrc=channel_layout=stereo:sample_rate=44100",
150
+ "-i", audio_filepath,
151
+ "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1[out]",
152
+ "-map", "[out]",
153
+ "-c:a", "flac", # Use FLAC to preserve quality
154
+ padded_filepath,
155
+ ]
156
+
157
+ try:
158
+ self.logger.debug(f"Running ffmpeg command: {' '.join(cmd)}")
159
+ output = subprocess.check_output(
160
+ cmd,
161
+ stderr=subprocess.STDOUT,
162
+ universal_newlines=True
163
+ )
164
+ self.logger.debug(f"ffmpeg output: {output}")
165
+
166
+ if not os.path.isfile(padded_filepath):
167
+ raise RuntimeError(
168
+ f"ffmpeg command succeeded but output file not created: {padded_filepath}"
169
+ )
170
+
171
+ return padded_filepath
172
+
173
+ except subprocess.CalledProcessError as e:
174
+ self.logger.error(f"Failed to create padded audio: {e.output}")
175
+ raise RuntimeError(f"ffmpeg command failed: {e.output}")
176
+
177
+ def _add_countdown_to_result(self, correction_result: CorrectionResult) -> CorrectionResult:
178
+ """
179
+ Create a new CorrectionResult with countdown segment and adjusted timestamps.
180
+
181
+ Args:
182
+ correction_result: The original correction result
183
+
184
+ Returns:
185
+ A new CorrectionResult with countdown and shifted timestamps
186
+ """
187
+ # Deep copy the result to avoid modifying the original
188
+ modified_result = deepcopy(correction_result)
189
+
190
+ # Shift all timestamps in corrected_segments
191
+ self._shift_segments_timestamps(
192
+ modified_result.corrected_segments,
193
+ self.COUNTDOWN_PADDING_SECONDS
194
+ )
195
+
196
+ # Shift timestamps in resized_segments if they exist
197
+ if modified_result.resized_segments:
198
+ self._shift_segments_timestamps(
199
+ modified_result.resized_segments,
200
+ self.COUNTDOWN_PADDING_SECONDS
201
+ )
202
+
203
+ # Create and prepend countdown segment
204
+ countdown_segment = self._create_countdown_segment()
205
+ modified_result.corrected_segments.insert(0, countdown_segment)
206
+
207
+ # Also add to resized_segments if present
208
+ if modified_result.resized_segments:
209
+ modified_result.resized_segments.insert(0, countdown_segment)
210
+
211
+ self.logger.debug(
212
+ f"Added countdown segment and shifted {len(modified_result.corrected_segments)} segments "
213
+ f"by {self.COUNTDOWN_PADDING_SECONDS}s"
214
+ )
215
+
216
+ return modified_result
217
+
218
+ def _shift_segments_timestamps(
219
+ self,
220
+ segments: List[LyricsSegment],
221
+ offset_seconds: float
222
+ ) -> None:
223
+ """
224
+ Shift all timestamps in segments by the given offset (in-place).
225
+
226
+ Args:
227
+ segments: List of segments to modify
228
+ offset_seconds: Amount to shift timestamps (in seconds)
229
+ """
230
+ for segment in segments:
231
+ # Shift segment timestamps
232
+ segment.start_time += offset_seconds
233
+ segment.end_time += offset_seconds
234
+
235
+ # Shift all word timestamps
236
+ for word in segment.words:
237
+ word.start_time += offset_seconds
238
+ word.end_time += offset_seconds
239
+
240
+ def _create_countdown_segment(self) -> LyricsSegment:
241
+ """
242
+ Create a countdown segment with the countdown text.
243
+
244
+ Returns:
245
+ A LyricsSegment containing the countdown
246
+ """
247
+ # Create a single word for the countdown text
248
+ countdown_word = Word(
249
+ id=WordUtils.generate_id(),
250
+ text=self.COUNTDOWN_TEXT,
251
+ start_time=self.COUNTDOWN_START_TIME,
252
+ end_time=self.COUNTDOWN_END_TIME,
253
+ confidence=1.0,
254
+ created_during_correction=True,
255
+ )
256
+
257
+ # Create the segment
258
+ countdown_segment = LyricsSegment(
259
+ id=WordUtils.generate_id(),
260
+ text=self.COUNTDOWN_TEXT,
261
+ words=[countdown_word],
262
+ start_time=self.COUNTDOWN_START_TIME,
263
+ end_time=self.COUNTDOWN_END_TIME,
264
+ )
265
+
266
+ return countdown_segment
267
+
268
+ def has_countdown(self, correction_result: CorrectionResult) -> bool:
269
+ """
270
+ Check if a CorrectionResult already has a countdown segment.
271
+
272
+ This is used to detect if countdown padding was applied to corrections
273
+ that were loaded from a saved JSON file (where the padding state is not
274
+ explicitly stored).
275
+
276
+ Args:
277
+ correction_result: The correction result to check
278
+
279
+ Returns:
280
+ True if the first segment is a countdown, False otherwise
281
+ """
282
+ if not correction_result.corrected_segments:
283
+ return False
284
+
285
+ first_segment = correction_result.corrected_segments[0]
286
+ return first_segment.text == self.COUNTDOWN_TEXT
287
+
288
+ def create_padded_audio_only(self, audio_filepath: str) -> str:
289
+ """
290
+ Create a padded audio file without modifying the correction result.
291
+
292
+ This is used when loading existing corrections that already have countdown
293
+ timestamps, but we need to create the padded audio file for video rendering.
294
+
295
+ Args:
296
+ audio_filepath: Path to original audio file
297
+
298
+ Returns:
299
+ Path to padded audio file
300
+
301
+ Raises:
302
+ FileNotFoundError: If input audio file doesn't exist
303
+ RuntimeError: If ffmpeg command fails
304
+ """
305
+ return self._create_padded_audio(audio_filepath)
306
+
@@ -0,0 +1,257 @@
1
+ from dataclasses import dataclass
2
+ import os
3
+ import logging
4
+ from typing import List, Optional
5
+ import json
6
+
7
+ from lyrics_transcriber.types import LyricsData
8
+ from lyrics_transcriber.correction.corrector import CorrectionResult
9
+ from lyrics_transcriber.output.plain_text import PlainTextGenerator
10
+ from lyrics_transcriber.output.lyrics_file import LyricsFileGenerator
11
+ from lyrics_transcriber.output.subtitles import SubtitlesGenerator
12
+ from lyrics_transcriber.output.video import VideoGenerator
13
+ from lyrics_transcriber.output.segment_resizer import SegmentResizer
14
+ from lyrics_transcriber.output.cdg import CDGGenerator
15
+ from lyrics_transcriber.core.config import OutputConfig
16
+
17
+
18
+ @dataclass
19
+ class OutputPaths:
20
+ """Holds paths for generated output files."""
21
+
22
+ lrc: Optional[str] = None
23
+ ass: Optional[str] = None
24
+ video: Optional[str] = None
25
+ original_txt: Optional[str] = None
26
+ corrected_txt: Optional[str] = None
27
+ corrections_json: Optional[str] = None
28
+ cdg: Optional[str] = None
29
+ mp3: Optional[str] = None
30
+ cdg_zip: Optional[str] = None
31
+
32
+
33
+ class OutputGenerator:
34
+ """Handles generation of various lyrics output formats."""
35
+
36
+ def __init__(
37
+ self,
38
+ config: OutputConfig,
39
+ logger: Optional[logging.Logger] = None,
40
+ preview_mode: bool = False,
41
+ ):
42
+ """
43
+ Initialize OutputGenerator with configuration.
44
+
45
+ Args:
46
+ config: OutputConfig instance with required paths and settings
47
+ logger: Optional logger instance
48
+ preview_mode: Boolean indicating if the generator is in preview mode
49
+ """
50
+ self.config = config
51
+ self.logger = logger or logging.getLogger(__name__)
52
+
53
+ self.logger.info(f"Initializing OutputGenerator with config: {self.config}")
54
+
55
+ # Load output styles from JSON if provided
56
+ if self.config.output_styles_json and os.path.exists(self.config.output_styles_json):
57
+ try:
58
+ with open(self.config.output_styles_json, "r") as f:
59
+ self.config.styles = json.load(f)
60
+ self.logger.debug(f"Loaded output styles from: {self.config.output_styles_json}")
61
+ except Exception as e:
62
+ if self.config.render_video or self.config.generate_cdg:
63
+ # Only raise error for video/CDG since they require styles
64
+ raise ValueError(f"Failed to load output styles file: {str(e)}")
65
+ else:
66
+ # For other outputs, just log warning and continue with empty styles
67
+ self.logger.warning(f"Failed to load output styles file: {str(e)}")
68
+ self.config.styles = {}
69
+ else:
70
+ # No styles file provided or doesn't exist
71
+ if self.config.render_video or self.config.generate_cdg:
72
+ raise ValueError(f"Output styles file required for video/CDG generation but not found: {self.config.output_styles_json}")
73
+ else:
74
+ self.config.styles = {}
75
+
76
+ # Set video resolution parameters
77
+ self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
78
+ self.logger.info(f"Video resolution: {self.video_resolution_num}, font size: {self.font_size}, line height: {self.line_height}")
79
+
80
+ # Initialize generators
81
+ self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
82
+ self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
83
+
84
+ if self.config.generate_cdg:
85
+ self.cdg = CDGGenerator(self.config.output_dir, self.logger)
86
+
87
+ self.preview_mode = preview_mode
88
+ if self.config.render_video:
89
+ # Apply preview mode scaling if needed
90
+ if self.preview_mode:
91
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
92
+ scale_factor = 1 / 6
93
+
94
+ # Scale down top padding for preview if it exists
95
+ if "karaoke" in self.config.styles and "top_padding" in self.config.styles["karaoke"]:
96
+ self.logger.info(f"Preview mode: Found top_padding: {self.config.styles['karaoke']['top_padding']}")
97
+ original_padding = self.config.styles["karaoke"]["top_padding"]
98
+ if original_padding is not None:
99
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
100
+ self.config.styles["karaoke"]["top_padding"] = original_padding * scale_factor
101
+ self.logger.info(f"Preview mode: Scaled down top_padding to: {self.config.styles['karaoke']['top_padding']}")
102
+
103
+ # Scale down font size for preview if it exists
104
+ if "karaoke" in self.config.styles and "font_size" in self.config.styles["karaoke"]:
105
+ self.logger.info(f"Preview mode: Found font_size: {self.config.styles['karaoke']['font_size']}")
106
+ original_font_size = self.config.styles["karaoke"]["font_size"]
107
+ if original_font_size is not None:
108
+ # Scale down from 4K (2160p) to 360p - factor of 1/6
109
+ self.font_size = original_font_size * scale_factor
110
+ self.config.styles["karaoke"]["font_size"] = self.font_size
111
+ self.logger.info(f"Preview mode: Scaled down font_size to: {self.font_size}")
112
+
113
+ # Get max_line_length from styles if available, otherwise use config default
114
+ max_line_length = self.config.styles.get("karaoke", {}).get("max_line_length", self.config.default_max_line_length)
115
+ self.logger.info(f"Using max_line_length: {max_line_length}")
116
+ self.segment_resizer = SegmentResizer(max_line_length=max_line_length, logger=self.logger)
117
+
118
+ if self.config.render_video:
119
+ # Initialize subtitle generator with potentially scaled values
120
+ self.subtitle = SubtitlesGenerator(
121
+ output_dir=self.config.output_dir,
122
+ video_resolution=self.video_resolution_num,
123
+ font_size=self.font_size,
124
+ line_height=self.line_height,
125
+ styles=self.config.styles,
126
+ subtitle_offset_ms=self.config.subtitle_offset_ms,
127
+ logger=self.logger,
128
+ )
129
+
130
+ self.video = VideoGenerator(
131
+ output_dir=self.config.output_dir,
132
+ cache_dir=self.config.cache_dir,
133
+ video_resolution=self.video_resolution_num,
134
+ styles=self.config.styles,
135
+ logger=self.logger,
136
+ )
137
+
138
+ # Log the configured directories
139
+ self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
140
+ self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
141
+
142
+ def generate_outputs(
143
+ self,
144
+ transcription_corrected: Optional[CorrectionResult],
145
+ lyrics_results: dict[str, LyricsData],
146
+ output_prefix: str,
147
+ audio_filepath: str,
148
+ artist: Optional[str] = None,
149
+ title: Optional[str] = None,
150
+ ) -> OutputPaths:
151
+ """Generate all requested output formats."""
152
+ outputs = OutputPaths()
153
+
154
+ try:
155
+ # Only process transcription-related outputs if we have transcription data
156
+ if transcription_corrected:
157
+
158
+ # Resize corrected segments
159
+ resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
160
+ transcription_corrected.resized_segments = resized_segments
161
+
162
+ # For preview, we only need to generate ASS and video
163
+ if self.preview_mode:
164
+ # Generate ASS subtitles for preview
165
+ outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
166
+
167
+ # Generate preview video
168
+ outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
169
+
170
+ return outputs
171
+
172
+ # Normal output generation (non-preview mode)
173
+ # Generate plain lyrics files for each provider
174
+ for name, lyrics_data in lyrics_results.items():
175
+ self.plain_text.write_lyrics(lyrics_data, output_prefix)
176
+
177
+ # Write original (uncorrected) transcription
178
+ outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
179
+
180
+ outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
181
+
182
+ # Write corrected lyrics as plain text
183
+ outputs.corrected_txt = self.plain_text.write_corrected_lyrics(resized_segments, output_prefix)
184
+
185
+ # Generate LRC using LyricsFileGenerator
186
+ outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)
187
+
188
+ # Generate CDG file if requested
189
+ if self.config.generate_cdg:
190
+ outputs.cdg, outputs.mp3, outputs.cdg_zip = self.cdg.generate_cdg(
191
+ segments=resized_segments,
192
+ audio_file=audio_filepath,
193
+ title=title or output_prefix,
194
+ artist=artist or "",
195
+ cdg_styles=self.config.styles["cdg"],
196
+ )
197
+
198
+ # Generate video if requested
199
+ if self.config.render_video:
200
+ # Generate ASS subtitles
201
+ outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix, audio_filepath)
202
+ outputs.video = self.video.generate_video(outputs.ass, audio_filepath, output_prefix)
203
+
204
+ return outputs
205
+
206
+ except Exception as e:
207
+ self.logger.error(f"Failed to generate outputs: {str(e)}")
208
+ raise
209
+
210
+ def _get_output_path(self, output_prefix: str, extension: str) -> str:
211
+ """Generate full output path for a file."""
212
+ return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
213
+
214
+ def _get_video_params(self, resolution: str) -> tuple:
215
+ """Get video parameters: (width, height), font_size, line_height based on video resolution config."""
216
+ # Get resolution dimensions
217
+ resolution_map = {
218
+ "4k": (3840, 2160),
219
+ "1080p": (1920, 1080),
220
+ "720p": (1280, 720),
221
+ "360p": (640, 360),
222
+ }
223
+
224
+ if resolution not in resolution_map:
225
+ raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
226
+
227
+ resolution_dims = resolution_map[resolution]
228
+
229
+ # Default font sizes for each resolution
230
+ default_font_sizes = {
231
+ "4k": 250,
232
+ "1080p": 120,
233
+ "720p": 100,
234
+ "360p": 40,
235
+ }
236
+
237
+ # Get font size from styles if available, otherwise use default
238
+ font_size = self.config.styles.get("karaoke", {}).get("font_size", default_font_sizes[resolution])
239
+
240
+ # Line height matches font size for all except 360p
241
+ line_height = 50 if resolution == "360p" else font_size
242
+
243
+ return resolution_dims, font_size, line_height
244
+
245
+ def write_corrections_data(self, correction_result: CorrectionResult, output_prefix: str) -> str:
246
+ """Write corrections data to JSON file."""
247
+ self.logger.info("Writing corrections data JSON")
248
+ output_path = self._get_output_path(f"{output_prefix} (Lyrics Corrections)", "json")
249
+
250
+ try:
251
+ with open(output_path, "w", encoding="utf-8") as f:
252
+ json.dump(correction_result.to_dict(), f, indent=2, ensure_ascii=False)
253
+ self.logger.info(f"Corrections data JSON generated: {output_path}")
254
+ return output_path
255
+ except Exception as e:
256
+ self.logger.error(f"Failed to write corrections data JSON: {str(e)}")
257
+ raise
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import logging
4
+ import argparse
5
+ import json
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from lyrics_transcriber.output.cdg import CDGGenerator
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def cli_main():
15
+ """Command-line interface entry point for the lrc2cdg tool."""
16
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
17
+
18
+ parser = argparse.ArgumentParser(description="Convert LRC file to CDG")
19
+ parser.add_argument("lrc_file", help="Path to the LRC file")
20
+ parser.add_argument("audio_file", help="Path to the audio file")
21
+ parser.add_argument("--title", required=True, help="Title of the song")
22
+ parser.add_argument("--artist", required=True, help="Artist of the song")
23
+ parser.add_argument("--style_params_json", required=True, help="Path to JSON file containing CDG style configuration")
24
+
25
+ args = parser.parse_args()
26
+
27
+ try:
28
+ with open(args.style_params_json, "r") as f:
29
+ style_params = json.loads(f.read())
30
+ cdg_styles = style_params["cdg"]
31
+ except FileNotFoundError:
32
+ logger.error(f"Style configuration file not found: {args.style_params_json}")
33
+ sys.exit(1)
34
+ except json.JSONDecodeError as e:
35
+ logger.error(f"Invalid JSON in style configuration file: {e}")
36
+ sys.exit(1)
37
+
38
+ try:
39
+ output_dir = str(Path(args.lrc_file).parent)
40
+ generator = CDGGenerator(output_dir=output_dir, logger=logger)
41
+
42
+ cdg_file, mp3_file, zip_file = generator.generate_cdg_from_lrc(
43
+ lrc_file=args.lrc_file,
44
+ audio_file=args.audio_file,
45
+ title=args.title,
46
+ artist=args.artist,
47
+ cdg_styles=cdg_styles,
48
+ )
49
+
50
+ logger.info(f"Generated files:\nCDG: {cdg_file}\nMP3: {mp3_file}\nZIP: {zip_file}")
51
+
52
+ except ValueError as e:
53
+ logger.error(f"Invalid style configuration: {e}")
54
+ sys.exit(1)
55
+ except Exception as e:
56
+ logger.error(f"Error generating CDG: {e}")
57
+ sys.exit(1)
58
+
59
+
60
+ if __name__ == "__main__":
61
+ cli_main()