karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,409 @@
1
+ """
2
+ Waveform visualization generator for audio files.
3
+
4
+ This module provides the WaveformGenerator class which creates waveform
5
+ images suitable for display in the instrumental review UI.
6
+ """
7
+
8
+ import logging
9
+ import math
10
+ from pathlib import Path
11
+ from typing import List, Optional, Tuple
12
+
13
+ import matplotlib
14
+ matplotlib.use('Agg') # Use non-interactive backend
15
+ import matplotlib.pyplot as plt
16
+ import numpy as np
17
+ from pydub import AudioSegment
18
+
19
+ from .models import AudibleSegment, MuteRegion
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class WaveformGenerator:
26
+ """
27
+ Generates waveform visualization images from audio files.
28
+
29
+ This class creates PNG images showing the amplitude envelope of an
30
+ audio file over time. It can highlight detected audible segments
31
+ and mute regions with different colors.
32
+
33
+ The generated images are suitable for display in web UIs and can
34
+ be used for interactive seeking (click-to-seek) functionality.
35
+
36
+ Attributes:
37
+ width: Width of the output image in pixels (default: 1200)
38
+ height: Height of the output image in pixels (default: 200)
39
+ background_color: Background color (default: "#1a1a2e")
40
+ waveform_color: Main waveform color (default: "#4a90d9")
41
+ segment_color: Color for audible segments (default: "#e94560")
42
+ mute_color: Color for mute regions (default: "#ff6b6b")
43
+ time_axis_color: Color for time axis (default: "#ffffff")
44
+
45
+ Example:
46
+ >>> generator = WaveformGenerator(width=1200, height=200)
47
+ >>> generator.generate(
48
+ ... audio_path="/path/to/backing_vocals.flac",
49
+ ... output_path="/path/to/waveform.png",
50
+ ... segments=analysis_result.audible_segments
51
+ ... )
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ width: int = 1200,
57
+ height: int = 200,
58
+ background_color: str = "#1a1a2e",
59
+ waveform_color: str = "#4a90d9",
60
+ segment_color: str = "#e94560",
61
+ mute_color: str = "#ff6b6b",
62
+ time_axis_color: str = "#ffffff",
63
+ dpi: int = 100,
64
+ ):
65
+ """
66
+ Initialize the waveform generator.
67
+
68
+ Args:
69
+ width: Width of the output image in pixels
70
+ height: Height of the output image in pixels
71
+ background_color: Background color (hex or named color)
72
+ waveform_color: Main waveform color
73
+ segment_color: Color for highlighting audible segments
74
+ mute_color: Color for highlighting mute regions
75
+ time_axis_color: Color for time axis labels
76
+ dpi: DPI for the output image
77
+ """
78
+ self.width = width
79
+ self.height = height
80
+ self.background_color = background_color
81
+ self.waveform_color = waveform_color
82
+ self.segment_color = segment_color
83
+ self.mute_color = mute_color
84
+ self.time_axis_color = time_axis_color
85
+ self.dpi = dpi
86
+
87
+ def generate(
88
+ self,
89
+ audio_path: str,
90
+ output_path: str,
91
+ segments: Optional[List[AudibleSegment]] = None,
92
+ mute_regions: Optional[List[MuteRegion]] = None,
93
+ show_time_axis: bool = True,
94
+ silence_threshold_db: float = -40.0,
95
+ ) -> str:
96
+ """
97
+ Generate a waveform image from an audio file.
98
+
99
+ Args:
100
+ audio_path: Path to the audio file
101
+ output_path: Path where the PNG image will be saved
102
+ segments: Optional list of audible segments to highlight
103
+ mute_regions: Optional list of mute regions to highlight
104
+ show_time_axis: Whether to show time axis labels
105
+ silence_threshold_db: Threshold for visual reference line
106
+
107
+ Returns:
108
+ Path to the generated image file
109
+
110
+ Raises:
111
+ FileNotFoundError: If the audio file doesn't exist
112
+ """
113
+ path = Path(audio_path)
114
+ if not path.exists():
115
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
116
+
117
+ logger.info(f"Generating waveform for: {audio_path}")
118
+
119
+ # Load audio
120
+ audio = AudioSegment.from_file(audio_path)
121
+ duration_seconds = len(audio) / 1000.0
122
+
123
+ # Convert to mono if needed
124
+ if audio.channels > 1:
125
+ audio = audio.set_channels(1)
126
+
127
+ # Get amplitude envelope
128
+ envelope = self._get_envelope(audio)
129
+
130
+ # Create the figure
131
+ fig, ax = self._create_figure(duration_seconds, show_time_axis)
132
+
133
+ # Draw waveform
134
+ self._draw_waveform(ax, envelope, duration_seconds)
135
+
136
+ # Highlight mute regions (if any) - draw first so waveform is on top
137
+ if mute_regions:
138
+ self._draw_mute_regions(ax, mute_regions, duration_seconds)
139
+
140
+ # Highlight audible segments (if any)
141
+ if segments:
142
+ self._draw_segments(ax, segments, envelope, duration_seconds)
143
+
144
+ # Draw silence threshold reference line
145
+ self._draw_threshold_line(ax, silence_threshold_db, duration_seconds)
146
+
147
+ # Save the figure
148
+ output_dir = Path(output_path).parent
149
+ output_dir.mkdir(parents=True, exist_ok=True)
150
+
151
+ fig.savefig(
152
+ output_path,
153
+ facecolor=self.background_color,
154
+ edgecolor='none',
155
+ bbox_inches='tight',
156
+ pad_inches=0.1,
157
+ )
158
+ plt.close(fig)
159
+
160
+ logger.info(f"Waveform saved to: {output_path}")
161
+ return output_path
162
+
163
+ def generate_data_only(
164
+ self,
165
+ audio_path: str,
166
+ num_points: int = 500,
167
+ ) -> Tuple[List[float], float]:
168
+ """
169
+ Generate waveform data without creating an image.
170
+
171
+ This is useful for sending data to a frontend that will
172
+ render the waveform itself (e.g., using Canvas or SVG).
173
+
174
+ Args:
175
+ audio_path: Path to the audio file
176
+ num_points: Number of data points to return
177
+
178
+ Returns:
179
+ Tuple of (amplitude_values, duration_seconds)
180
+ Amplitude values are normalized to 0.0-1.0 range.
181
+ """
182
+ path = Path(audio_path)
183
+ if not path.exists():
184
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
185
+
186
+ audio = AudioSegment.from_file(audio_path)
187
+ duration_seconds = len(audio) / 1000.0
188
+
189
+ if audio.channels > 1:
190
+ audio = audio.set_channels(1)
191
+
192
+ # Calculate window size to get desired number of points
193
+ duration_ms = len(audio)
194
+ window_ms = max(1, duration_ms // num_points)
195
+
196
+ amplitudes = []
197
+ for start_ms in range(0, duration_ms, window_ms):
198
+ end_ms = min(start_ms + window_ms, duration_ms)
199
+ window = audio[start_ms:end_ms]
200
+
201
+ if window.rms > 0:
202
+ db = 20 * math.log10(window.rms / window.max_possible_amplitude)
203
+ else:
204
+ db = -100.0
205
+
206
+ # Normalize to 0-1 range (mapping -60dB to 0dB -> 0 to 1)
207
+ normalized = max(0.0, min(1.0, (db + 60) / 60))
208
+ amplitudes.append(normalized)
209
+
210
+ return amplitudes, duration_seconds
211
+
212
+ def _get_envelope(
213
+ self,
214
+ audio: AudioSegment,
215
+ window_ms: int = 50,
216
+ ) -> np.ndarray:
217
+ """
218
+ Extract amplitude envelope from audio.
219
+
220
+ Returns array of amplitude values in dB.
221
+ """
222
+ duration_ms = len(audio)
223
+ amplitudes = []
224
+
225
+ for start_ms in range(0, duration_ms, window_ms):
226
+ end_ms = min(start_ms + window_ms, duration_ms)
227
+ window = audio[start_ms:end_ms]
228
+
229
+ if window.rms > 0:
230
+ db = 20 * math.log10(window.rms / window.max_possible_amplitude)
231
+ else:
232
+ db = -100.0
233
+
234
+ amplitudes.append(db)
235
+
236
+ return np.array(amplitudes)
237
+
238
+ def _create_figure(
239
+ self,
240
+ duration_seconds: float,
241
+ show_time_axis: bool,
242
+ ) -> Tuple[plt.Figure, plt.Axes]:
243
+ """
244
+ Create matplotlib figure and axes.
245
+ """
246
+ fig_width = self.width / self.dpi
247
+ fig_height = self.height / self.dpi
248
+
249
+ fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=self.dpi)
250
+
251
+ # Set background
252
+ fig.patch.set_facecolor(self.background_color)
253
+ ax.set_facecolor(self.background_color)
254
+
255
+ # Configure axes
256
+ ax.set_xlim(0, duration_seconds)
257
+ ax.set_ylim(-60, 0) # dB range
258
+
259
+ # Remove spines
260
+ for spine in ax.spines.values():
261
+ spine.set_visible(False)
262
+
263
+ # Configure ticks
264
+ if show_time_axis:
265
+ ax.tick_params(
266
+ axis='x',
267
+ colors=self.time_axis_color,
268
+ labelsize=8,
269
+ )
270
+ ax.tick_params(axis='y', left=False, labelleft=False)
271
+
272
+ # Set time axis ticks
273
+ self._set_time_ticks(ax, duration_seconds)
274
+ else:
275
+ ax.tick_params(
276
+ axis='both',
277
+ left=False,
278
+ bottom=False,
279
+ labelleft=False,
280
+ labelbottom=False,
281
+ )
282
+
283
+ return fig, ax
284
+
285
+ def _set_time_ticks(self, ax: plt.Axes, duration_seconds: float):
286
+ """
287
+ Set appropriate time axis tick marks.
288
+ """
289
+ if duration_seconds <= 60:
290
+ # Under 1 minute: tick every 10 seconds
291
+ tick_interval = 10
292
+ elif duration_seconds <= 300:
293
+ # Under 5 minutes: tick every 30 seconds
294
+ tick_interval = 30
295
+ else:
296
+ # Over 5 minutes: tick every minute
297
+ tick_interval = 60
298
+
299
+ ticks = np.arange(0, duration_seconds + 1, tick_interval)
300
+ ax.set_xticks(ticks)
301
+
302
+ # Format tick labels as MM:SS
303
+ labels = []
304
+ for t in ticks:
305
+ minutes = int(t // 60)
306
+ seconds = int(t % 60)
307
+ labels.append(f"{minutes}:{seconds:02d}")
308
+ ax.set_xticklabels(labels)
309
+
310
+ def _draw_waveform(
311
+ self,
312
+ ax: plt.Axes,
313
+ envelope: np.ndarray,
314
+ duration_seconds: float,
315
+ ):
316
+ """
317
+ Draw the main waveform.
318
+ """
319
+ num_points = len(envelope)
320
+ time_points = np.linspace(0, duration_seconds, num_points)
321
+
322
+ # Draw as filled area
323
+ ax.fill_between(
324
+ time_points,
325
+ envelope,
326
+ -60, # Bottom of range
327
+ color=self.waveform_color,
328
+ alpha=0.7,
329
+ )
330
+
331
+ # Draw outline
332
+ ax.plot(
333
+ time_points,
334
+ envelope,
335
+ color=self.waveform_color,
336
+ linewidth=0.5,
337
+ alpha=0.9,
338
+ )
339
+
340
+ def _draw_segments(
341
+ self,
342
+ ax: plt.Axes,
343
+ segments: List[AudibleSegment],
344
+ envelope: np.ndarray,
345
+ duration_seconds: float,
346
+ ):
347
+ """
348
+ Highlight audible segments on the waveform.
349
+ """
350
+ num_points = len(envelope)
351
+ time_points = np.linspace(0, duration_seconds, num_points)
352
+
353
+ for segment in segments:
354
+ # Find indices corresponding to this segment
355
+ start_idx = int(segment.start_seconds / duration_seconds * num_points)
356
+ end_idx = int(segment.end_seconds / duration_seconds * num_points)
357
+
358
+ start_idx = max(0, min(start_idx, num_points - 1))
359
+ end_idx = max(0, min(end_idx, num_points))
360
+
361
+ if start_idx >= end_idx:
362
+ continue
363
+
364
+ segment_time = time_points[start_idx:end_idx]
365
+ segment_envelope = envelope[start_idx:end_idx]
366
+
367
+ # Highlight this segment with a different color
368
+ ax.fill_between(
369
+ segment_time,
370
+ segment_envelope,
371
+ -60,
372
+ color=self.segment_color,
373
+ alpha=0.6,
374
+ )
375
+
376
+ def _draw_mute_regions(
377
+ self,
378
+ ax: plt.Axes,
379
+ mute_regions: List[MuteRegion],
380
+ duration_seconds: float,
381
+ ):
382
+ """
383
+ Draw mute region overlays.
384
+ """
385
+ for region in mute_regions:
386
+ ax.axvspan(
387
+ region.start_seconds,
388
+ region.end_seconds,
389
+ color=self.mute_color,
390
+ alpha=0.3,
391
+ zorder=0,
392
+ )
393
+
394
+ def _draw_threshold_line(
395
+ self,
396
+ ax: plt.Axes,
397
+ threshold_db: float,
398
+ duration_seconds: float,
399
+ ):
400
+ """
401
+ Draw a reference line at the silence threshold.
402
+ """
403
+ ax.axhline(
404
+ y=threshold_db,
405
+ color=self.time_axis_color,
406
+ linestyle='--',
407
+ linewidth=0.5,
408
+ alpha=0.3,
409
+ )
@@ -0,0 +1 @@
1
+ from .karaoke_finalise import KaraokeFinalise