karaoke-gen 0.75.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of karaoke-gen might be problematic. Click here for more details.
- karaoke_gen/__init__.py +38 -0
- karaoke_gen/audio_fetcher.py +1614 -0
- karaoke_gen/audio_processor.py +790 -0
- karaoke_gen/config.py +83 -0
- karaoke_gen/file_handler.py +387 -0
- karaoke_gen/instrumental_review/__init__.py +45 -0
- karaoke_gen/instrumental_review/analyzer.py +408 -0
- karaoke_gen/instrumental_review/editor.py +322 -0
- karaoke_gen/instrumental_review/models.py +171 -0
- karaoke_gen/instrumental_review/server.py +475 -0
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/instrumental_review/waveform.py +409 -0
- karaoke_gen/karaoke_finalise/__init__.py +1 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
- karaoke_gen/karaoke_gen.py +1026 -0
- karaoke_gen/lyrics_processor.py +474 -0
- karaoke_gen/metadata.py +160 -0
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
- karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
- karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/__init__.py +18 -0
- karaoke_gen/utils/bulk_cli.py +492 -0
- karaoke_gen/utils/cli_args.py +432 -0
- karaoke_gen/utils/gen_cli.py +978 -0
- karaoke_gen/utils/remote_cli.py +3268 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen/video_generator.py +424 -0
- karaoke_gen-0.75.54.dist-info/METADATA +718 -0
- karaoke_gen-0.75.54.dist-info/RECORD +287 -0
- karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
- karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
- karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +594 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +919 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +214 -0
- lyrics_transcriber/frontend/src/api.ts +254 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +306 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +379 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +650 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Waveform visualization generator for audio files.
|
|
3
|
+
|
|
4
|
+
This module provides the WaveformGenerator class which creates waveform
|
|
5
|
+
images suitable for display in the instrumental review UI.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import math
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
import matplotlib
|
|
14
|
+
matplotlib.use('Agg') # Use non-interactive backend
|
|
15
|
+
import matplotlib.pyplot as plt
|
|
16
|
+
import numpy as np
|
|
17
|
+
from pydub import AudioSegment
|
|
18
|
+
|
|
19
|
+
from .models import AudibleSegment, MuteRegion
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class WaveformGenerator:
|
|
26
|
+
"""
|
|
27
|
+
Generates waveform visualization images from audio files.
|
|
28
|
+
|
|
29
|
+
This class creates PNG images showing the amplitude envelope of an
|
|
30
|
+
audio file over time. It can highlight detected audible segments
|
|
31
|
+
and mute regions with different colors.
|
|
32
|
+
|
|
33
|
+
The generated images are suitable for display in web UIs and can
|
|
34
|
+
be used for interactive seeking (click-to-seek) functionality.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
width: Width of the output image in pixels (default: 1200)
|
|
38
|
+
height: Height of the output image in pixels (default: 200)
|
|
39
|
+
background_color: Background color (default: "#1a1a2e")
|
|
40
|
+
waveform_color: Main waveform color (default: "#4a90d9")
|
|
41
|
+
segment_color: Color for audible segments (default: "#e94560")
|
|
42
|
+
mute_color: Color for mute regions (default: "#ff6b6b")
|
|
43
|
+
time_axis_color: Color for time axis (default: "#ffffff")
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
>>> generator = WaveformGenerator(width=1200, height=200)
|
|
47
|
+
>>> generator.generate(
|
|
48
|
+
... audio_path="/path/to/backing_vocals.flac",
|
|
49
|
+
... output_path="/path/to/waveform.png",
|
|
50
|
+
... segments=analysis_result.audible_segments
|
|
51
|
+
... )
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
width: int = 1200,
|
|
57
|
+
height: int = 200,
|
|
58
|
+
background_color: str = "#1a1a2e",
|
|
59
|
+
waveform_color: str = "#4a90d9",
|
|
60
|
+
segment_color: str = "#e94560",
|
|
61
|
+
mute_color: str = "#ff6b6b",
|
|
62
|
+
time_axis_color: str = "#ffffff",
|
|
63
|
+
dpi: int = 100,
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Initialize the waveform generator.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
width: Width of the output image in pixels
|
|
70
|
+
height: Height of the output image in pixels
|
|
71
|
+
background_color: Background color (hex or named color)
|
|
72
|
+
waveform_color: Main waveform color
|
|
73
|
+
segment_color: Color for highlighting audible segments
|
|
74
|
+
mute_color: Color for highlighting mute regions
|
|
75
|
+
time_axis_color: Color for time axis labels
|
|
76
|
+
dpi: DPI for the output image
|
|
77
|
+
"""
|
|
78
|
+
self.width = width
|
|
79
|
+
self.height = height
|
|
80
|
+
self.background_color = background_color
|
|
81
|
+
self.waveform_color = waveform_color
|
|
82
|
+
self.segment_color = segment_color
|
|
83
|
+
self.mute_color = mute_color
|
|
84
|
+
self.time_axis_color = time_axis_color
|
|
85
|
+
self.dpi = dpi
|
|
86
|
+
|
|
87
|
+
def generate(
|
|
88
|
+
self,
|
|
89
|
+
audio_path: str,
|
|
90
|
+
output_path: str,
|
|
91
|
+
segments: Optional[List[AudibleSegment]] = None,
|
|
92
|
+
mute_regions: Optional[List[MuteRegion]] = None,
|
|
93
|
+
show_time_axis: bool = True,
|
|
94
|
+
silence_threshold_db: float = -40.0,
|
|
95
|
+
) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Generate a waveform image from an audio file.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
audio_path: Path to the audio file
|
|
101
|
+
output_path: Path where the PNG image will be saved
|
|
102
|
+
segments: Optional list of audible segments to highlight
|
|
103
|
+
mute_regions: Optional list of mute regions to highlight
|
|
104
|
+
show_time_axis: Whether to show time axis labels
|
|
105
|
+
silence_threshold_db: Threshold for visual reference line
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Path to the generated image file
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
FileNotFoundError: If the audio file doesn't exist
|
|
112
|
+
"""
|
|
113
|
+
path = Path(audio_path)
|
|
114
|
+
if not path.exists():
|
|
115
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
116
|
+
|
|
117
|
+
logger.info(f"Generating waveform for: {audio_path}")
|
|
118
|
+
|
|
119
|
+
# Load audio
|
|
120
|
+
audio = AudioSegment.from_file(audio_path)
|
|
121
|
+
duration_seconds = len(audio) / 1000.0
|
|
122
|
+
|
|
123
|
+
# Convert to mono if needed
|
|
124
|
+
if audio.channels > 1:
|
|
125
|
+
audio = audio.set_channels(1)
|
|
126
|
+
|
|
127
|
+
# Get amplitude envelope
|
|
128
|
+
envelope = self._get_envelope(audio)
|
|
129
|
+
|
|
130
|
+
# Create the figure
|
|
131
|
+
fig, ax = self._create_figure(duration_seconds, show_time_axis)
|
|
132
|
+
|
|
133
|
+
# Draw waveform
|
|
134
|
+
self._draw_waveform(ax, envelope, duration_seconds)
|
|
135
|
+
|
|
136
|
+
# Highlight mute regions (if any) - draw first so waveform is on top
|
|
137
|
+
if mute_regions:
|
|
138
|
+
self._draw_mute_regions(ax, mute_regions, duration_seconds)
|
|
139
|
+
|
|
140
|
+
# Highlight audible segments (if any)
|
|
141
|
+
if segments:
|
|
142
|
+
self._draw_segments(ax, segments, envelope, duration_seconds)
|
|
143
|
+
|
|
144
|
+
# Draw silence threshold reference line
|
|
145
|
+
self._draw_threshold_line(ax, silence_threshold_db, duration_seconds)
|
|
146
|
+
|
|
147
|
+
# Save the figure
|
|
148
|
+
output_dir = Path(output_path).parent
|
|
149
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
|
|
151
|
+
fig.savefig(
|
|
152
|
+
output_path,
|
|
153
|
+
facecolor=self.background_color,
|
|
154
|
+
edgecolor='none',
|
|
155
|
+
bbox_inches='tight',
|
|
156
|
+
pad_inches=0.1,
|
|
157
|
+
)
|
|
158
|
+
plt.close(fig)
|
|
159
|
+
|
|
160
|
+
logger.info(f"Waveform saved to: {output_path}")
|
|
161
|
+
return output_path
|
|
162
|
+
|
|
163
|
+
def generate_data_only(
|
|
164
|
+
self,
|
|
165
|
+
audio_path: str,
|
|
166
|
+
num_points: int = 500,
|
|
167
|
+
) -> Tuple[List[float], float]:
|
|
168
|
+
"""
|
|
169
|
+
Generate waveform data without creating an image.
|
|
170
|
+
|
|
171
|
+
This is useful for sending data to a frontend that will
|
|
172
|
+
render the waveform itself (e.g., using Canvas or SVG).
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
audio_path: Path to the audio file
|
|
176
|
+
num_points: Number of data points to return
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (amplitude_values, duration_seconds)
|
|
180
|
+
Amplitude values are normalized to 0.0-1.0 range.
|
|
181
|
+
"""
|
|
182
|
+
path = Path(audio_path)
|
|
183
|
+
if not path.exists():
|
|
184
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
185
|
+
|
|
186
|
+
audio = AudioSegment.from_file(audio_path)
|
|
187
|
+
duration_seconds = len(audio) / 1000.0
|
|
188
|
+
|
|
189
|
+
if audio.channels > 1:
|
|
190
|
+
audio = audio.set_channels(1)
|
|
191
|
+
|
|
192
|
+
# Calculate window size to get desired number of points
|
|
193
|
+
duration_ms = len(audio)
|
|
194
|
+
window_ms = max(1, duration_ms // num_points)
|
|
195
|
+
|
|
196
|
+
amplitudes = []
|
|
197
|
+
for start_ms in range(0, duration_ms, window_ms):
|
|
198
|
+
end_ms = min(start_ms + window_ms, duration_ms)
|
|
199
|
+
window = audio[start_ms:end_ms]
|
|
200
|
+
|
|
201
|
+
if window.rms > 0:
|
|
202
|
+
db = 20 * math.log10(window.rms / window.max_possible_amplitude)
|
|
203
|
+
else:
|
|
204
|
+
db = -100.0
|
|
205
|
+
|
|
206
|
+
# Normalize to 0-1 range (mapping -60dB to 0dB -> 0 to 1)
|
|
207
|
+
normalized = max(0.0, min(1.0, (db + 60) / 60))
|
|
208
|
+
amplitudes.append(normalized)
|
|
209
|
+
|
|
210
|
+
return amplitudes, duration_seconds
|
|
211
|
+
|
|
212
|
+
def _get_envelope(
|
|
213
|
+
self,
|
|
214
|
+
audio: AudioSegment,
|
|
215
|
+
window_ms: int = 50,
|
|
216
|
+
) -> np.ndarray:
|
|
217
|
+
"""
|
|
218
|
+
Extract amplitude envelope from audio.
|
|
219
|
+
|
|
220
|
+
Returns array of amplitude values in dB.
|
|
221
|
+
"""
|
|
222
|
+
duration_ms = len(audio)
|
|
223
|
+
amplitudes = []
|
|
224
|
+
|
|
225
|
+
for start_ms in range(0, duration_ms, window_ms):
|
|
226
|
+
end_ms = min(start_ms + window_ms, duration_ms)
|
|
227
|
+
window = audio[start_ms:end_ms]
|
|
228
|
+
|
|
229
|
+
if window.rms > 0:
|
|
230
|
+
db = 20 * math.log10(window.rms / window.max_possible_amplitude)
|
|
231
|
+
else:
|
|
232
|
+
db = -100.0
|
|
233
|
+
|
|
234
|
+
amplitudes.append(db)
|
|
235
|
+
|
|
236
|
+
return np.array(amplitudes)
|
|
237
|
+
|
|
238
|
+
def _create_figure(
|
|
239
|
+
self,
|
|
240
|
+
duration_seconds: float,
|
|
241
|
+
show_time_axis: bool,
|
|
242
|
+
) -> Tuple[plt.Figure, plt.Axes]:
|
|
243
|
+
"""
|
|
244
|
+
Create matplotlib figure and axes.
|
|
245
|
+
"""
|
|
246
|
+
fig_width = self.width / self.dpi
|
|
247
|
+
fig_height = self.height / self.dpi
|
|
248
|
+
|
|
249
|
+
fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=self.dpi)
|
|
250
|
+
|
|
251
|
+
# Set background
|
|
252
|
+
fig.patch.set_facecolor(self.background_color)
|
|
253
|
+
ax.set_facecolor(self.background_color)
|
|
254
|
+
|
|
255
|
+
# Configure axes
|
|
256
|
+
ax.set_xlim(0, duration_seconds)
|
|
257
|
+
ax.set_ylim(-60, 0) # dB range
|
|
258
|
+
|
|
259
|
+
# Remove spines
|
|
260
|
+
for spine in ax.spines.values():
|
|
261
|
+
spine.set_visible(False)
|
|
262
|
+
|
|
263
|
+
# Configure ticks
|
|
264
|
+
if show_time_axis:
|
|
265
|
+
ax.tick_params(
|
|
266
|
+
axis='x',
|
|
267
|
+
colors=self.time_axis_color,
|
|
268
|
+
labelsize=8,
|
|
269
|
+
)
|
|
270
|
+
ax.tick_params(axis='y', left=False, labelleft=False)
|
|
271
|
+
|
|
272
|
+
# Set time axis ticks
|
|
273
|
+
self._set_time_ticks(ax, duration_seconds)
|
|
274
|
+
else:
|
|
275
|
+
ax.tick_params(
|
|
276
|
+
axis='both',
|
|
277
|
+
left=False,
|
|
278
|
+
bottom=False,
|
|
279
|
+
labelleft=False,
|
|
280
|
+
labelbottom=False,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return fig, ax
|
|
284
|
+
|
|
285
|
+
def _set_time_ticks(self, ax: plt.Axes, duration_seconds: float):
|
|
286
|
+
"""
|
|
287
|
+
Set appropriate time axis tick marks.
|
|
288
|
+
"""
|
|
289
|
+
if duration_seconds <= 60:
|
|
290
|
+
# Under 1 minute: tick every 10 seconds
|
|
291
|
+
tick_interval = 10
|
|
292
|
+
elif duration_seconds <= 300:
|
|
293
|
+
# Under 5 minutes: tick every 30 seconds
|
|
294
|
+
tick_interval = 30
|
|
295
|
+
else:
|
|
296
|
+
# Over 5 minutes: tick every minute
|
|
297
|
+
tick_interval = 60
|
|
298
|
+
|
|
299
|
+
ticks = np.arange(0, duration_seconds + 1, tick_interval)
|
|
300
|
+
ax.set_xticks(ticks)
|
|
301
|
+
|
|
302
|
+
# Format tick labels as MM:SS
|
|
303
|
+
labels = []
|
|
304
|
+
for t in ticks:
|
|
305
|
+
minutes = int(t // 60)
|
|
306
|
+
seconds = int(t % 60)
|
|
307
|
+
labels.append(f"{minutes}:{seconds:02d}")
|
|
308
|
+
ax.set_xticklabels(labels)
|
|
309
|
+
|
|
310
|
+
def _draw_waveform(
|
|
311
|
+
self,
|
|
312
|
+
ax: plt.Axes,
|
|
313
|
+
envelope: np.ndarray,
|
|
314
|
+
duration_seconds: float,
|
|
315
|
+
):
|
|
316
|
+
"""
|
|
317
|
+
Draw the main waveform.
|
|
318
|
+
"""
|
|
319
|
+
num_points = len(envelope)
|
|
320
|
+
time_points = np.linspace(0, duration_seconds, num_points)
|
|
321
|
+
|
|
322
|
+
# Draw as filled area
|
|
323
|
+
ax.fill_between(
|
|
324
|
+
time_points,
|
|
325
|
+
envelope,
|
|
326
|
+
-60, # Bottom of range
|
|
327
|
+
color=self.waveform_color,
|
|
328
|
+
alpha=0.7,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Draw outline
|
|
332
|
+
ax.plot(
|
|
333
|
+
time_points,
|
|
334
|
+
envelope,
|
|
335
|
+
color=self.waveform_color,
|
|
336
|
+
linewidth=0.5,
|
|
337
|
+
alpha=0.9,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
def _draw_segments(
|
|
341
|
+
self,
|
|
342
|
+
ax: plt.Axes,
|
|
343
|
+
segments: List[AudibleSegment],
|
|
344
|
+
envelope: np.ndarray,
|
|
345
|
+
duration_seconds: float,
|
|
346
|
+
):
|
|
347
|
+
"""
|
|
348
|
+
Highlight audible segments on the waveform.
|
|
349
|
+
"""
|
|
350
|
+
num_points = len(envelope)
|
|
351
|
+
time_points = np.linspace(0, duration_seconds, num_points)
|
|
352
|
+
|
|
353
|
+
for segment in segments:
|
|
354
|
+
# Find indices corresponding to this segment
|
|
355
|
+
start_idx = int(segment.start_seconds / duration_seconds * num_points)
|
|
356
|
+
end_idx = int(segment.end_seconds / duration_seconds * num_points)
|
|
357
|
+
|
|
358
|
+
start_idx = max(0, min(start_idx, num_points - 1))
|
|
359
|
+
end_idx = max(0, min(end_idx, num_points))
|
|
360
|
+
|
|
361
|
+
if start_idx >= end_idx:
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
segment_time = time_points[start_idx:end_idx]
|
|
365
|
+
segment_envelope = envelope[start_idx:end_idx]
|
|
366
|
+
|
|
367
|
+
# Highlight this segment with a different color
|
|
368
|
+
ax.fill_between(
|
|
369
|
+
segment_time,
|
|
370
|
+
segment_envelope,
|
|
371
|
+
-60,
|
|
372
|
+
color=self.segment_color,
|
|
373
|
+
alpha=0.6,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
def _draw_mute_regions(
|
|
377
|
+
self,
|
|
378
|
+
ax: plt.Axes,
|
|
379
|
+
mute_regions: List[MuteRegion],
|
|
380
|
+
duration_seconds: float,
|
|
381
|
+
):
|
|
382
|
+
"""
|
|
383
|
+
Draw mute region overlays.
|
|
384
|
+
"""
|
|
385
|
+
for region in mute_regions:
|
|
386
|
+
ax.axvspan(
|
|
387
|
+
region.start_seconds,
|
|
388
|
+
region.end_seconds,
|
|
389
|
+
color=self.mute_color,
|
|
390
|
+
alpha=0.3,
|
|
391
|
+
zorder=0,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
def _draw_threshold_line(
|
|
395
|
+
self,
|
|
396
|
+
ax: plt.Axes,
|
|
397
|
+
threshold_db: float,
|
|
398
|
+
duration_seconds: float,
|
|
399
|
+
):
|
|
400
|
+
"""
|
|
401
|
+
Draw a reference line at the silence threshold.
|
|
402
|
+
"""
|
|
403
|
+
ax.axhline(
|
|
404
|
+
y=threshold_db,
|
|
405
|
+
color=self.time_axis_color,
|
|
406
|
+
linestyle='--',
|
|
407
|
+
linewidth=0.5,
|
|
408
|
+
alpha=0.3,
|
|
409
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .karaoke_finalise import KaraokeFinalise
|