karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,397 @@
1
+ import os
2
+ import logging
3
+ from typing import List, Optional, Tuple, Union
4
+ import subprocess
5
+ import json
6
+
7
+ from lyrics_transcriber.output.ass.section_screen import SectionScreen
8
+ from lyrics_transcriber.types import LyricsSegment, Word
9
+ from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
10
+ from lyrics_transcriber.output.ass.ass import ASS
11
+ from lyrics_transcriber.output.ass.style import Style
12
+ from lyrics_transcriber.output.ass.constants import ALIGN_TOP_CENTER
13
+ from lyrics_transcriber.output.ass import LyricsScreen
14
+ from lyrics_transcriber.output.ass.section_detector import SectionDetector
15
+ from lyrics_transcriber.output.ass.config import ScreenConfig
16
+
17
+
18
+ class SubtitlesGenerator:
19
+ """Handles generation of subtitle files in various formats."""
20
+
21
+ def __init__(
22
+ self,
23
+ output_dir: str,
24
+ video_resolution: Tuple[int, int],
25
+ font_size: int,
26
+ line_height: int,
27
+ styles: dict,
28
+ subtitle_offset_ms: int = 0,
29
+ logger: Optional[logging.Logger] = None,
30
+ ):
31
+ """Initialize SubtitleGenerator.
32
+
33
+ Args:
34
+ output_dir: Directory where output files will be written
35
+ video_resolution: Tuple of (width, height) for video resolution
36
+ font_size: Font size for subtitles
37
+ line_height: Line height for subtitle positioning
38
+ styles: Dictionary of style configurations
39
+ subtitle_offset_ms: Offset for subtitle timing in milliseconds
40
+ logger: Optional logger instance
41
+ """
42
+ self.output_dir = output_dir
43
+ self.video_resolution = video_resolution
44
+ self.font_size = font_size
45
+ self.styles = styles
46
+ self.subtitle_offset_ms = subtitle_offset_ms
47
+
48
+ # Create ScreenConfig with potential overrides from styles
49
+ karaoke_styles = styles.get("karaoke", {})
50
+ config_params = {
51
+ "line_height": line_height,
52
+ "video_width": video_resolution[0],
53
+ "video_height": video_resolution[1]
54
+ }
55
+
56
+ # Add any overrides from styles
57
+ screen_config_props = [
58
+ "max_visible_lines",
59
+ "top_padding",
60
+ "screen_gap_threshold",
61
+ "post_roll_time",
62
+ "fade_in_ms",
63
+ "fade_out_ms",
64
+ "lead_in_color",
65
+ "text_case_transform",
66
+ # New lead-in indicator configuration options
67
+ "lead_in_enabled",
68
+ "lead_in_width_percent",
69
+ "lead_in_height_percent",
70
+ "lead_in_opacity_percent",
71
+ "lead_in_outline_thickness",
72
+ "lead_in_outline_color",
73
+ "lead_in_gap_threshold",
74
+ "lead_in_horiz_offset_percent",
75
+ "lead_in_vert_offset_percent",
76
+ ]
77
+
78
+ for prop in screen_config_props:
79
+ if prop in karaoke_styles:
80
+ config_params[prop] = karaoke_styles[prop]
81
+
82
+ self.config = ScreenConfig(**config_params)
83
+ self.logger = logger or logging.getLogger(__name__)
84
+
85
+ def _get_output_path(self, output_prefix: str, extension: str) -> str:
86
+ """Generate full output path for a file."""
87
+ return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
88
+
89
+ def _get_audio_duration(self, audio_filepath: str, segments: Optional[List[LyricsSegment]] = None) -> float:
90
+ """Get audio duration using ffprobe."""
91
+ try:
92
+ probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", audio_filepath]
93
+ probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
94
+ probe_data = json.loads(probe_output)
95
+ duration = float(probe_data["format"]["duration"])
96
+ self.logger.debug(f"Detected audio duration: {duration:.2f}s")
97
+ return duration
98
+ except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
99
+ self.logger.error(f"Failed to get audio duration: {e}")
100
+ # Fallback to last segment end time plus buffer
101
+ if segments:
102
+ duration = segments[-1].end_time + 30.0
103
+ self.logger.warning(f"Using fallback duration: {duration:.2f}s")
104
+ return duration
105
+ return 0.0
106
+
107
+ def generate_ass(self, segments: List[LyricsSegment], output_prefix: str, audio_filepath: str) -> str:
108
+ self.logger.info("Generating ASS format subtitles")
109
+ output_path = self._get_output_path(f"{output_prefix} (Karaoke)", "ass")
110
+
111
+ try:
112
+ self.logger.debug(f"Processing {len(segments)} segments")
113
+ song_duration = self._get_audio_duration(audio_filepath, segments)
114
+
115
+ screens = self._create_screens(segments, song_duration)
116
+ self.logger.debug(f"Created {len(screens)} initial screens")
117
+
118
+ lyric_subtitles_ass = self._create_styled_subtitles(screens, self.video_resolution, self.font_size)
119
+ self.logger.debug("Created styled subtitles")
120
+
121
+ lyric_subtitles_ass.write(output_path)
122
+ self.logger.info(f"ASS file generated: {output_path}")
123
+ return output_path
124
+
125
+ except Exception as e:
126
+ self.logger.error(f"Failed to generate ASS file: {str(e)}", exc_info=True)
127
+ raise
128
+
129
+ def _create_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[LyricsScreen]:
130
+ """Create screens from segments with detailed logging."""
131
+ self.logger.debug("Creating screens from segments")
132
+
133
+ # Apply timing offset to segments if needed
134
+ if self.subtitle_offset_ms != 0:
135
+ self.logger.info(f"Subtitle offset: {self.subtitle_offset_ms}ms")
136
+
137
+ offset_seconds = self.subtitle_offset_ms / 1000.0
138
+ segments = [
139
+ LyricsSegment(
140
+ id=seg.id, # Preserve original segment ID
141
+ text=seg.text,
142
+ words=[
143
+ Word(
144
+ id=word.id, # Preserve original word ID
145
+ text=word.text,
146
+ start_time=max(0, word.start_time + offset_seconds),
147
+ end_time=word.end_time + offset_seconds,
148
+ confidence=word.confidence,
149
+ created_during_correction=getattr(word, "created_during_correction", False), # Preserve correction flag
150
+ )
151
+ for word in seg.words
152
+ ],
153
+ start_time=max(0, seg.start_time + offset_seconds),
154
+ end_time=seg.end_time + offset_seconds,
155
+ )
156
+ for seg in segments
157
+ ]
158
+ self.logger.info(f"Applied {self.subtitle_offset_ms}ms offset to segment timings")
159
+
160
+ # Create section screens and get instrumental boundaries
161
+ section_screens = self._create_section_screens(segments, song_duration)
162
+ instrumental_times = self._get_instrumental_times(section_screens)
163
+
164
+ # Create regular lyric screens
165
+ lyric_screens = self._create_lyric_screens(segments, instrumental_times)
166
+
167
+ # Merge and process all screens
168
+ all_screens = self._merge_and_process_screens(section_screens, lyric_screens)
169
+
170
+ # Log final results
171
+ self._log_final_screens(all_screens)
172
+
173
+ return all_screens
174
+
175
+ def _create_section_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[SectionScreen]:
176
+ """Create section screens using SectionDetector."""
177
+ section_detector = SectionDetector(logger=self.logger)
178
+ return section_detector.process_segments(segments, self.video_resolution, self.config.line_height, song_duration)
179
+
180
+ def _get_instrumental_times(self, section_screens: List[SectionScreen]) -> List[Tuple[float, float]]:
181
+ """Extract instrumental section time boundaries."""
182
+ instrumental_times = [
183
+ (s.start_time, s.end_time) for s in section_screens if isinstance(s, SectionScreen) and s.section_type == "INSTRUMENTAL"
184
+ ]
185
+
186
+ self.logger.debug(f"Found {len(instrumental_times)} instrumental sections:")
187
+ for start, end in instrumental_times:
188
+ self.logger.debug(f" {start:.2f}s - {end:.2f}s")
189
+
190
+ return instrumental_times
191
+
192
+ def _create_lyric_screens(self, segments: List[LyricsSegment], instrumental_times: List[Tuple[float, float]]) -> List[LyricsScreen]:
193
+ """Create regular lyric screens, handling instrumental boundaries."""
194
+ screens: List[LyricsScreen] = []
195
+ current_screen: Optional[LyricsScreen] = None
196
+
197
+ for i, segment in enumerate(segments):
198
+ self.logger.debug(f"Processing segment {i}: {segment.start_time:.2f}s - {segment.end_time:.2f}s")
199
+
200
+ # Skip segments in instrumental sections
201
+ if self._is_in_instrumental_section(segment, instrumental_times):
202
+ continue
203
+
204
+ # Check if we need a new screen
205
+ if self._should_start_new_screen(current_screen, segment, instrumental_times):
206
+ # fmt: off
207
+ current_screen = LyricsScreen(
208
+ video_size=self.video_resolution,
209
+ line_height=self.config.line_height,
210
+ config=self.config,
211
+ logger=self.logger
212
+ )
213
+ # fmt: on
214
+ screens.append(current_screen)
215
+ self.logger.debug(" Created new screen")
216
+
217
+ # Add line to current screen
218
+ line = LyricsLine(logger=self.logger, segment=segment, screen_config=self.config)
219
+ current_screen.lines.append(line)
220
+ self.logger.debug(f" Added line to screen (now has {len(current_screen.lines)} lines)")
221
+
222
+ return screens
223
+
224
+ def _is_in_instrumental_section(self, segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]) -> bool:
225
+ """Check if a segment falls within any instrumental section."""
226
+ for inst_start, inst_end in instrumental_times:
227
+ if segment.start_time >= inst_start and segment.start_time < inst_end:
228
+ self.logger.debug(f" Skipping segment - falls within instrumental {inst_start:.2f}s - {inst_end:.2f}s")
229
+ return True
230
+ return False
231
+
232
+ def _should_start_new_screen(
233
+ self, current_screen: Optional[LyricsScreen], segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]
234
+ ) -> bool:
235
+ """Determine if a new screen should be started."""
236
+ if current_screen is None:
237
+ return True
238
+
239
+ if len(current_screen.lines) >= self.config.max_visible_lines:
240
+ return True
241
+
242
+ # Check if this segment is first after any instrumental section
243
+ if current_screen.lines:
244
+ prev_segment = current_screen.lines[-1].segment
245
+ for inst_start, inst_end in instrumental_times:
246
+ if prev_segment.end_time <= inst_start and segment.start_time >= inst_end:
247
+ self.logger.debug(f" Forcing new screen - first segment after instrumental {inst_start:.2f}s - {inst_end:.2f}s")
248
+ return True
249
+
250
+ return False
251
+
252
+ def _merge_and_process_screens(
253
+ self, section_screens: List[SectionScreen], lyric_screens: List[LyricsScreen]
254
+ ) -> List[Union[SectionScreen, LyricsScreen]]:
255
+ """Merge section and lyric screens in chronological order."""
256
+ # Sort all screens by start time
257
+ return sorted(section_screens + lyric_screens, key=lambda s: s.start_ts)
258
+
259
+ def _log_final_screens(self, screens: List[Union[SectionScreen, LyricsScreen]]) -> None:
260
+ """Log details of all final screens."""
261
+ self.logger.debug("Final screens created:")
262
+ for i, screen in enumerate(screens):
263
+ self.logger.debug(f"Screen {i + 1}:")
264
+ if isinstance(screen, SectionScreen):
265
+ self.logger.debug(f" Section: {screen.section_type}")
266
+ self.logger.debug(f" Text: {screen.text}")
267
+ self.logger.debug(f" Time: {screen.start_time:.2f}s - {screen.end_time:.2f}s")
268
+ else:
269
+ self.logger.debug(f" Number of lines: {len(screen.lines)}")
270
+ for j, line in enumerate(screen.lines):
271
+ self.logger.debug(f" Line {j + 1} ({line.segment.start_time:.2f}s - {line.segment.end_time:.2f}s): {line}")
272
+
273
+ def _create_styled_ass_instance(self, resolution, fontsize):
274
+ a = ASS()
275
+ a.set_resolution(resolution)
276
+
277
+ a.styles_format = [
278
+ "Name", # The name of the Style. Case sensitive. Cannot include commas.
279
+ "Fontname", # The fontname as used by Windows. Case-sensitive.
280
+ "Fontpath", # The path to the font file.
281
+ "Fontsize", # Font size
282
+ "PrimaryColour", # This is the colour that a subtitle will normally appear in.
283
+ "SecondaryColour", # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
284
+ "OutlineColour", # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
285
+ "BackColour", # This is the colour of the subtitle outline or shadow, if these are used
286
+ "Bold", # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
287
+ "Italic", # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
288
+ "Underline", # [-1 or 0]
289
+ "StrikeOut", # [-1 or 0]
290
+ "ScaleX", # Modifies the width of the font. [percent]
291
+ "ScaleY", # Modifies the height of the font. [percent]
292
+ "Spacing", # Extra space between characters. [pixels]
293
+ "Angle", # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
294
+ "BorderStyle", # 1=Outline + drop shadow, 3=Opaque box
295
+ "Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
296
+ "Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
297
+ "Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
298
+ "MarginL", # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
299
+ "MarginR", # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
300
+ "MarginV", # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
301
+ "Encoding", #
302
+ ]
303
+
304
+ # Get font settings from styles
305
+ karaoke_styles = self.styles.get("karaoke", {})
306
+ font_path = karaoke_styles.get("font_path")
307
+
308
+ style = Style()
309
+
310
+ style.type = "Style"
311
+ style.Name = self.styles["karaoke"]["ass_name"]
312
+ style.Fontname = self.styles["karaoke"]["font"]
313
+ style.Fontpath = font_path
314
+ style.Fontsize = fontsize
315
+
316
+ style.Alignment = ALIGN_TOP_CENTER
317
+
318
+ # Convert color strings to tuples of integers
319
+ def parse_color(color_str):
320
+ return tuple(int(x.strip()) for x in color_str.split(","))
321
+
322
+ style.PrimaryColour = parse_color(self.styles["karaoke"]["primary_color"])
323
+ style.SecondaryColour = parse_color(self.styles["karaoke"]["secondary_color"])
324
+ style.OutlineColour = parse_color(self.styles["karaoke"]["outline_color"])
325
+ style.BackColour = parse_color(self.styles["karaoke"]["back_color"])
326
+
327
+ # Convert boolean strings to integers (-1 for True, 0 for False)
328
+ def parse_bool(value):
329
+ return -1 if value else 0
330
+
331
+ style.Bold = parse_bool(self.styles["karaoke"]["bold"])
332
+ style.Italic = parse_bool(self.styles["karaoke"]["italic"])
333
+ style.Underline = parse_bool(self.styles["karaoke"]["underline"])
334
+ style.StrikeOut = parse_bool(self.styles["karaoke"]["strike_out"])
335
+
336
+ # Convert numeric strings to appropriate types
337
+ style.ScaleX = int(self.styles["karaoke"]["scale_x"])
338
+ style.ScaleY = int(self.styles["karaoke"]["scale_y"])
339
+ style.Spacing = int(self.styles["karaoke"]["spacing"])
340
+ style.Angle = float(self.styles["karaoke"]["angle"])
341
+ style.BorderStyle = int(self.styles["karaoke"]["border_style"])
342
+ style.Outline = int(self.styles["karaoke"]["outline"])
343
+ style.Shadow = int(self.styles["karaoke"]["shadow"])
344
+ style.MarginL = int(self.styles["karaoke"]["margin_l"])
345
+ style.MarginR = int(self.styles["karaoke"]["margin_r"])
346
+ style.MarginV = int(self.styles["karaoke"]["margin_v"])
347
+ style.Encoding = int(self.styles["karaoke"]["encoding"])
348
+
349
+ a.add_style(style)
350
+
351
+ a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
352
+ return a, style
353
+
354
+ def _create_styled_subtitles(
355
+ self,
356
+ screens: List[Union[SectionScreen, LyricsScreen]],
357
+ resolution: Tuple[int, int],
358
+ fontsize: int,
359
+ ) -> ASS:
360
+ """Create styled ASS subtitles from all screens."""
361
+ ass_file, style = self._create_styled_ass_instance(resolution, fontsize)
362
+
363
+ active_lines = []
364
+ previous_instrumental_end = None
365
+
366
+ for screen in screens:
367
+ if isinstance(screen, SectionScreen):
368
+ # Create section marker events (returns tuple of ([event], []))
369
+ section_events, _ = screen.as_ass_events(style=style)
370
+ for event in section_events: # Now we're iterating over the list of events
371
+ ass_file.add(event)
372
+
373
+ previous_instrumental_end = screen.end_time
374
+ active_lines = []
375
+ self.logger.debug(f"Found instrumental section ending at {screen.end_time:.2f}s")
376
+ continue
377
+
378
+ # Process screen and get its events
379
+ self.logger.debug(f"Processing screen with instrumental_end={previous_instrumental_end}")
380
+ # fmt: off
381
+ events, active_lines = screen.as_ass_events(
382
+ style=style,
383
+ previous_active_lines=active_lines,
384
+ previous_instrumental_end=previous_instrumental_end
385
+ )
386
+ # fmt: on
387
+
388
+ # Only reset instrumental end after we've processed the first post-instrumental screen
389
+ if previous_instrumental_end is not None:
390
+ self.logger.debug("Clearing instrumental end time after processing post-instrumental screen")
391
+ previous_instrumental_end = None
392
+
393
+ # Add all events to ASS file
394
+ for event in events:
395
+ ass_file.add(event)
396
+
397
+ return ass_file