karaoke-gen 0.75.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of karaoke-gen might be problematic. Click here for more details.
- karaoke_gen/__init__.py +38 -0
- karaoke_gen/audio_fetcher.py +1614 -0
- karaoke_gen/audio_processor.py +790 -0
- karaoke_gen/config.py +83 -0
- karaoke_gen/file_handler.py +387 -0
- karaoke_gen/instrumental_review/__init__.py +45 -0
- karaoke_gen/instrumental_review/analyzer.py +408 -0
- karaoke_gen/instrumental_review/editor.py +322 -0
- karaoke_gen/instrumental_review/models.py +171 -0
- karaoke_gen/instrumental_review/server.py +475 -0
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/instrumental_review/waveform.py +409 -0
- karaoke_gen/karaoke_finalise/__init__.py +1 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
- karaoke_gen/karaoke_gen.py +1026 -0
- karaoke_gen/lyrics_processor.py +474 -0
- karaoke_gen/metadata.py +160 -0
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
- karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
- karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/__init__.py +18 -0
- karaoke_gen/utils/bulk_cli.py +492 -0
- karaoke_gen/utils/cli_args.py +432 -0
- karaoke_gen/utils/gen_cli.py +978 -0
- karaoke_gen/utils/remote_cli.py +3268 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen/video_generator.py +424 -0
- karaoke_gen-0.75.54.dist-info/METADATA +718 -0
- karaoke_gen-0.75.54.dist-info/RECORD +287 -0
- karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
- karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
- karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +594 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +919 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +214 -0
- lyrics_transcriber/frontend/src/api.ts +254 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +306 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +379 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +650 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class TranscriberConfig:
|
|
8
|
+
"""Configuration for transcription services."""
|
|
9
|
+
|
|
10
|
+
audioshake_api_token: Optional[str] = None
|
|
11
|
+
runpod_api_key: Optional[str] = None
|
|
12
|
+
whisper_runpod_id: Optional[str] = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class LyricsConfig:
|
|
17
|
+
"""Configuration for lyrics services."""
|
|
18
|
+
|
|
19
|
+
genius_api_token: Optional[str] = None
|
|
20
|
+
rapidapi_key: Optional[str] = None
|
|
21
|
+
spotify_cookie: Optional[str] = None
|
|
22
|
+
lyrics_file: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class OutputConfig:
|
|
26
|
+
"""Configuration for output generation."""
|
|
27
|
+
|
|
28
|
+
output_styles_json: str
|
|
29
|
+
default_max_line_length: int = 36
|
|
30
|
+
styles: Dict[str, Any] = field(default_factory=dict)
|
|
31
|
+
output_dir: Optional[str] = os.getcwd()
|
|
32
|
+
cache_dir: str = os.getenv(
|
|
33
|
+
"LYRICS_TRANSCRIBER_CACHE_DIR",
|
|
34
|
+
os.path.join(os.path.expanduser("~"), "lyrics-transcriber-cache")
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
fetch_lyrics: bool = True
|
|
38
|
+
run_transcription: bool = True
|
|
39
|
+
run_correction: bool = True
|
|
40
|
+
enable_review: bool = True
|
|
41
|
+
|
|
42
|
+
generate_plain_text: bool = True
|
|
43
|
+
generate_lrc: bool = True
|
|
44
|
+
generate_cdg: bool = True
|
|
45
|
+
render_video: bool = True
|
|
46
|
+
video_resolution: str = "360p"
|
|
47
|
+
subtitle_offset_ms: int = 0
|
|
48
|
+
|
|
49
|
+
# Countdown feature for songs that start too quickly
|
|
50
|
+
add_countdown: bool = True
|
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, Optional, List
|
|
6
|
+
from lyrics_transcriber.types import LyricsData, TranscriptionResult, CorrectionResult
|
|
7
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
|
|
8
|
+
from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
|
|
9
|
+
from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
|
|
10
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
|
11
|
+
from lyrics_transcriber.lyrics.genius import GeniusProvider
|
|
12
|
+
from lyrics_transcriber.lyrics.spotify import SpotifyProvider
|
|
13
|
+
from lyrics_transcriber.lyrics.musixmatch import MusixmatchProvider
|
|
14
|
+
from lyrics_transcriber.lyrics.lrclib import LRCLIBProvider
|
|
15
|
+
from lyrics_transcriber.output.generator import OutputGenerator
|
|
16
|
+
from lyrics_transcriber.correction.corrector import LyricsCorrector
|
|
17
|
+
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
|
|
18
|
+
from lyrics_transcriber.lyrics.file_provider import FileProvider
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class LyricsControllerResult:
|
|
23
|
+
"""Holds the results of the transcription and correction process."""
|
|
24
|
+
|
|
25
|
+
# Results from different sources
|
|
26
|
+
lyrics_results: dict[str, LyricsData] = field(default_factory=dict)
|
|
27
|
+
transcription_results: List[TranscriptionResult] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
# Corrected results
|
|
30
|
+
transcription_corrected: Optional[CorrectionResult] = None
|
|
31
|
+
|
|
32
|
+
# Output files
|
|
33
|
+
lrc_filepath: Optional[str] = None
|
|
34
|
+
ass_filepath: Optional[str] = None
|
|
35
|
+
video_filepath: Optional[str] = None
|
|
36
|
+
mp3_filepath: Optional[str] = None
|
|
37
|
+
cdg_filepath: Optional[str] = None
|
|
38
|
+
cdg_zip_filepath: Optional[str] = None
|
|
39
|
+
original_txt: Optional[str] = None
|
|
40
|
+
corrected_txt: Optional[str] = None
|
|
41
|
+
corrections_json: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
# Countdown padding info (for applying same padding to other audio files)
|
|
44
|
+
countdown_padding_added: bool = False
|
|
45
|
+
countdown_padding_seconds: float = 0.0
|
|
46
|
+
padded_audio_filepath: Optional[str] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LyricsTranscriber:
|
|
50
|
+
"""
|
|
51
|
+
Controller class that orchestrates the lyrics transcription workflow:
|
|
52
|
+
1. Fetch lyrics from internet sources
|
|
53
|
+
2. Run multiple transcription methods
|
|
54
|
+
3. Correct transcribed lyrics using fetched lyrics
|
|
55
|
+
4. Generate output formats (LRC, ASS, video)
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
audio_filepath: str,
|
|
61
|
+
artist: Optional[str] = None,
|
|
62
|
+
title: Optional[str] = None,
|
|
63
|
+
transcriber_config: Optional[TranscriberConfig] = None,
|
|
64
|
+
lyrics_config: Optional[LyricsConfig] = None,
|
|
65
|
+
output_config: Optional[OutputConfig] = None,
|
|
66
|
+
transcribers: Optional[Dict[str, BaseTranscriber]] = None,
|
|
67
|
+
lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
|
|
68
|
+
corrector: Optional[LyricsCorrector] = None,
|
|
69
|
+
output_generator: Optional[OutputGenerator] = None,
|
|
70
|
+
logger: Optional[logging.Logger] = None,
|
|
71
|
+
log_level: int = logging.DEBUG,
|
|
72
|
+
log_formatter: Optional[logging.Formatter] = None,
|
|
73
|
+
):
|
|
74
|
+
# Set up logging
|
|
75
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
76
|
+
if not logger:
|
|
77
|
+
self.logger.setLevel(log_level)
|
|
78
|
+
if not self.logger.handlers:
|
|
79
|
+
handler = logging.StreamHandler()
|
|
80
|
+
formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
|
|
81
|
+
handler.setFormatter(formatter)
|
|
82
|
+
self.logger.addHandler(handler)
|
|
83
|
+
|
|
84
|
+
self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
|
|
85
|
+
|
|
86
|
+
# Store configs (with defaults if not provided)
|
|
87
|
+
self.transcriber_config = transcriber_config or TranscriberConfig()
|
|
88
|
+
self.lyrics_config = lyrics_config or LyricsConfig()
|
|
89
|
+
self.output_config = output_config or OutputConfig()
|
|
90
|
+
|
|
91
|
+
# Check if styles JSON is available for CDG and video features
|
|
92
|
+
if not self.output_config.output_styles_json or not os.path.exists(self.output_config.output_styles_json):
|
|
93
|
+
if self.output_config.generate_cdg or self.output_config.render_video:
|
|
94
|
+
self.logger.warning(
|
|
95
|
+
f"Output styles JSON file not found: {self.output_config.output_styles_json}. "
|
|
96
|
+
"CDG and video generation will be disabled."
|
|
97
|
+
)
|
|
98
|
+
self.output_config.generate_cdg = False
|
|
99
|
+
self.output_config.render_video = False
|
|
100
|
+
|
|
101
|
+
# Basic settings with sanitized filenames
|
|
102
|
+
self.audio_filepath = audio_filepath
|
|
103
|
+
self.artist = artist
|
|
104
|
+
self.title = title
|
|
105
|
+
self.output_prefix = self._create_sanitized_output_prefix(artist, title)
|
|
106
|
+
|
|
107
|
+
# Add after creating necessary folders
|
|
108
|
+
self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
|
|
109
|
+
self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
|
|
110
|
+
|
|
111
|
+
# Create necessary folders
|
|
112
|
+
os.makedirs(self.output_config.cache_dir, exist_ok=True)
|
|
113
|
+
os.makedirs(self.output_config.output_dir, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
# Initialize results
|
|
116
|
+
self.results = LyricsControllerResult()
|
|
117
|
+
|
|
118
|
+
# Load styles early so lyrics providers can use them
|
|
119
|
+
self._load_styles()
|
|
120
|
+
|
|
121
|
+
# Initialize components (with dependency injection)
|
|
122
|
+
self.transcribers = transcribers or self._initialize_transcribers()
|
|
123
|
+
self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
|
|
124
|
+
self.corrector = corrector or LyricsCorrector(cache_dir=self.output_config.cache_dir, logger=self.logger)
|
|
125
|
+
self.output_generator = output_generator or self._initialize_output_generator()
|
|
126
|
+
|
|
127
|
+
# Log enabled features
|
|
128
|
+
self.logger.info("Enabled features:")
|
|
129
|
+
self.logger.info(f" Lyrics fetching: {'enabled' if self.output_config.fetch_lyrics else 'disabled'}")
|
|
130
|
+
self.logger.info(f" Transcription: {'enabled' if self.output_config.run_transcription else 'disabled'}")
|
|
131
|
+
self.logger.info(f" Lyrics correction: {'enabled' if self.output_config.run_correction else 'disabled'}")
|
|
132
|
+
self.logger.info(f" Plain text output: {'enabled' if self.output_config.generate_plain_text else 'disabled'}")
|
|
133
|
+
self.logger.info(f" LRC file generation: {'enabled' if self.output_config.generate_lrc else 'disabled'}")
|
|
134
|
+
self.logger.info(f" CDG file generation: {'enabled' if self.output_config.generate_cdg else 'disabled'}")
|
|
135
|
+
self.logger.info(f" Video rendering: {'enabled' if self.output_config.render_video else 'disabled'}")
|
|
136
|
+
if self.output_config.render_video:
|
|
137
|
+
self.logger.info(f" Video resolution: {self.output_config.video_resolution}")
|
|
138
|
+
|
|
139
|
+
def _load_styles(self) -> None:
|
|
140
|
+
"""Load styles from JSON file if available."""
|
|
141
|
+
if self.output_config.output_styles_json and os.path.exists(self.output_config.output_styles_json):
|
|
142
|
+
try:
|
|
143
|
+
with open(self.output_config.output_styles_json, "r") as f:
|
|
144
|
+
self.output_config.styles = json.load(f)
|
|
145
|
+
self.logger.debug(f"Loaded output styles from: {self.output_config.output_styles_json}")
|
|
146
|
+
except Exception as e:
|
|
147
|
+
self.logger.warning(f"Failed to load output styles file: {str(e)}")
|
|
148
|
+
self.output_config.styles = {}
|
|
149
|
+
else:
|
|
150
|
+
self.logger.debug("No styles JSON file provided or file does not exist")
|
|
151
|
+
self.output_config.styles = {}
|
|
152
|
+
|
|
153
|
+
def _sanitize_filename(self, filename: str) -> str:
|
|
154
|
+
"""Replace or remove characters that are unsafe for filenames."""
|
|
155
|
+
if not filename:
|
|
156
|
+
return ""
|
|
157
|
+
# Replace problematic characters with underscores
|
|
158
|
+
for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
|
|
159
|
+
filename = filename.replace(char, "_")
|
|
160
|
+
# Remove any trailing spaces
|
|
161
|
+
filename = filename.rstrip(" ")
|
|
162
|
+
return filename
|
|
163
|
+
|
|
164
|
+
def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
|
|
165
|
+
"""Create a sanitized output prefix from artist and title."""
|
|
166
|
+
if artist and title:
|
|
167
|
+
sanitized_artist = self._sanitize_filename(artist)
|
|
168
|
+
sanitized_title = self._sanitize_filename(title)
|
|
169
|
+
return f"{sanitized_artist} - {sanitized_title}"
|
|
170
|
+
else:
|
|
171
|
+
return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
|
|
172
|
+
|
|
173
|
+
def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
|
|
174
|
+
"""Initialize available transcription services."""
|
|
175
|
+
transcribers = {}
|
|
176
|
+
|
|
177
|
+
# Add debug logging for config values
|
|
178
|
+
self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
|
|
179
|
+
self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
|
|
180
|
+
|
|
181
|
+
if self.transcriber_config.audioshake_api_token:
|
|
182
|
+
self.logger.debug("Initializing AudioShake transcriber")
|
|
183
|
+
transcribers["audioshake"] = {
|
|
184
|
+
"instance": AudioShakeTranscriber(
|
|
185
|
+
cache_dir=self.output_config.cache_dir,
|
|
186
|
+
config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
|
|
187
|
+
logger=self.logger,
|
|
188
|
+
),
|
|
189
|
+
"priority": 1, # AudioShake has highest priority
|
|
190
|
+
}
|
|
191
|
+
else:
|
|
192
|
+
self.logger.debug("Skipping AudioShake transcriber - no API token provided")
|
|
193
|
+
|
|
194
|
+
if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
|
|
195
|
+
self.logger.debug("Initializing Whisper transcriber")
|
|
196
|
+
transcribers["whisper"] = {
|
|
197
|
+
"instance": WhisperTranscriber(
|
|
198
|
+
cache_dir=self.output_config.cache_dir,
|
|
199
|
+
config=WhisperConfig(
|
|
200
|
+
runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
|
|
201
|
+
),
|
|
202
|
+
logger=self.logger,
|
|
203
|
+
),
|
|
204
|
+
"priority": 2, # Whisper has lower priority
|
|
205
|
+
}
|
|
206
|
+
else:
|
|
207
|
+
self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
|
|
208
|
+
|
|
209
|
+
return transcribers
|
|
210
|
+
|
|
211
|
+
def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
|
|
212
|
+
"""Initialize available lyrics providers."""
|
|
213
|
+
providers = {}
|
|
214
|
+
|
|
215
|
+
# Get max_line_length from styles if available, otherwise use config default
|
|
216
|
+
max_line_length = self.output_config.styles.get("karaoke", {}).get("max_line_length", self.output_config.default_max_line_length)
|
|
217
|
+
self.logger.info(f"Using max_line_length for lyrics providers: {max_line_length}")
|
|
218
|
+
|
|
219
|
+
# Create provider config with all necessary parameters
|
|
220
|
+
provider_config = LyricsProviderConfig(
|
|
221
|
+
genius_api_token=self.lyrics_config.genius_api_token,
|
|
222
|
+
rapidapi_key=self.lyrics_config.rapidapi_key,
|
|
223
|
+
spotify_cookie=self.lyrics_config.spotify_cookie,
|
|
224
|
+
lyrics_file=self.lyrics_config.lyrics_file,
|
|
225
|
+
cache_dir=self.output_config.cache_dir,
|
|
226
|
+
audio_filepath=self.audio_filepath,
|
|
227
|
+
max_line_length=max_line_length,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if provider_config.lyrics_file and os.path.exists(provider_config.lyrics_file):
|
|
231
|
+
self.logger.debug(f"Initializing File lyrics provider with file: {provider_config.lyrics_file}")
|
|
232
|
+
providers["file"] = FileProvider(config=provider_config, logger=self.logger)
|
|
233
|
+
return providers
|
|
234
|
+
|
|
235
|
+
# LRCLIB - always enabled (no API key required)
|
|
236
|
+
self.logger.debug("Initializing LRCLIB lyrics provider")
|
|
237
|
+
providers["lrclib"] = LRCLIBProvider(config=provider_config, logger=self.logger)
|
|
238
|
+
|
|
239
|
+
if provider_config.genius_api_token:
|
|
240
|
+
self.logger.debug("Initializing Genius lyrics provider")
|
|
241
|
+
providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
|
|
242
|
+
else:
|
|
243
|
+
self.logger.debug("Skipping Genius provider - no API token provided")
|
|
244
|
+
|
|
245
|
+
if provider_config.spotify_cookie:
|
|
246
|
+
self.logger.debug("Initializing Spotify lyrics provider")
|
|
247
|
+
providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
|
|
248
|
+
else:
|
|
249
|
+
self.logger.debug("Skipping Spotify provider - no cookie provided")
|
|
250
|
+
|
|
251
|
+
if provider_config.rapidapi_key:
|
|
252
|
+
self.logger.debug("Initializing Musixmatch lyrics provider")
|
|
253
|
+
providers["musixmatch"] = MusixmatchProvider(config=provider_config, logger=self.logger)
|
|
254
|
+
else:
|
|
255
|
+
self.logger.debug("Skipping Musixmatch provider - no RapidAPI key provided")
|
|
256
|
+
|
|
257
|
+
return providers
|
|
258
|
+
|
|
259
|
+
def _initialize_output_generator(self) -> OutputGenerator:
|
|
260
|
+
"""Initialize output generation service."""
|
|
261
|
+
return OutputGenerator(config=self.output_config, logger=self.logger)
|
|
262
|
+
|
|
263
|
+
def process(self) -> LyricsControllerResult:
|
|
264
|
+
"""Main processing method that orchestrates the entire workflow."""
|
|
265
|
+
|
|
266
|
+
self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
|
|
267
|
+
|
|
268
|
+
# Debug: Log package version and environment variables
|
|
269
|
+
try:
|
|
270
|
+
import lyrics_transcriber
|
|
271
|
+
package_version = getattr(lyrics_transcriber, '__version__', 'unknown')
|
|
272
|
+
self.logger.info(f"LyricsTranscriber package version: {package_version}")
|
|
273
|
+
except Exception as e:
|
|
274
|
+
self.logger.warning(f"Could not get package version: {e}")
|
|
275
|
+
|
|
276
|
+
# Debug: Log environment variables (first 3 characters only for security)
|
|
277
|
+
env_vars = {}
|
|
278
|
+
for key, value in os.environ.items():
|
|
279
|
+
if value:
|
|
280
|
+
env_vars[key] = value[:3] + "..." if len(value) > 3 else value
|
|
281
|
+
else:
|
|
282
|
+
env_vars[key] = "(empty)"
|
|
283
|
+
|
|
284
|
+
self.logger.info(f"Environment variables count: {len(env_vars)}")
|
|
285
|
+
|
|
286
|
+
# Log specific API-related variables
|
|
287
|
+
api_vars = {k: v for k, v in env_vars.items() if any(keyword in k.upper() for keyword in ['API', 'TOKEN', 'KEY', 'SECRET'])}
|
|
288
|
+
if api_vars:
|
|
289
|
+
self.logger.info(f"API-related environment variables: {api_vars}")
|
|
290
|
+
else:
|
|
291
|
+
self.logger.warning("No API-related environment variables found")
|
|
292
|
+
|
|
293
|
+
# Log all env vars if in debug mode
|
|
294
|
+
if self.logger.getEffectiveLevel() <= logging.DEBUG:
|
|
295
|
+
self.logger.debug(f"All environment variables: {env_vars}")
|
|
296
|
+
|
|
297
|
+
# Check for existing corrections JSON
|
|
298
|
+
corrections_json_path = os.path.join(self.output_config.output_dir, f"{self.output_prefix} (Lyrics Corrections).json")
|
|
299
|
+
|
|
300
|
+
if os.path.exists(corrections_json_path):
|
|
301
|
+
self.logger.info(f"Found existing corrections JSON: {corrections_json_path}")
|
|
302
|
+
try:
|
|
303
|
+
with open(corrections_json_path, "r", encoding="utf-8") as f:
|
|
304
|
+
corrections_data = json.load(f)
|
|
305
|
+
|
|
306
|
+
# Reconstruct CorrectionResult from JSON
|
|
307
|
+
self.results.transcription_corrected = CorrectionResult.from_dict(corrections_data)
|
|
308
|
+
self.logger.info("Successfully loaded existing corrections data")
|
|
309
|
+
|
|
310
|
+
# Check if the loaded corrections have countdown padding applied
|
|
311
|
+
# This is important because the video needs to use padded audio to sync
|
|
312
|
+
# with the countdown-adjusted timestamps in the ASS subtitles
|
|
313
|
+
if self.output_config.add_countdown:
|
|
314
|
+
from lyrics_transcriber.output.countdown_processor import CountdownProcessor
|
|
315
|
+
|
|
316
|
+
countdown_processor = CountdownProcessor(
|
|
317
|
+
cache_dir=self.output_config.cache_dir,
|
|
318
|
+
logger=self.logger,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if countdown_processor.has_countdown(self.results.transcription_corrected):
|
|
322
|
+
self.logger.info(
|
|
323
|
+
"Loaded corrections have countdown - creating padded audio for video sync"
|
|
324
|
+
)
|
|
325
|
+
# Create padded audio file to match the countdown-adjusted timestamps
|
|
326
|
+
padded_audio_path = countdown_processor.create_padded_audio_only(self.audio_filepath)
|
|
327
|
+
self.audio_filepath = padded_audio_path
|
|
328
|
+
|
|
329
|
+
# Set countdown padding attributes on results
|
|
330
|
+
self.results.countdown_padding_added = True
|
|
331
|
+
self.results.countdown_padding_seconds = countdown_processor.COUNTDOWN_PADDING_SECONDS
|
|
332
|
+
self.results.padded_audio_filepath = padded_audio_path
|
|
333
|
+
|
|
334
|
+
self.logger.info(
|
|
335
|
+
f"Countdown padding applied: {countdown_processor.COUNTDOWN_PADDING_SECONDS}s. "
|
|
336
|
+
f"Using padded audio: {padded_audio_path}"
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
self.logger.info("Loaded corrections do not have countdown - no padding needed")
|
|
340
|
+
|
|
341
|
+
# Skip to output generation
|
|
342
|
+
self.generate_outputs()
|
|
343
|
+
self.logger.info("Processing completed successfully using existing corrections")
|
|
344
|
+
return self.results
|
|
345
|
+
|
|
346
|
+
except Exception as e:
|
|
347
|
+
self.logger.error(f"Failed to load existing corrections JSON: {str(e)}")
|
|
348
|
+
# Continue with normal processing if loading fails
|
|
349
|
+
|
|
350
|
+
# Normal processing flow continues...
|
|
351
|
+
if self.output_config.fetch_lyrics and self.artist and self.title:
|
|
352
|
+
self.fetch_lyrics()
|
|
353
|
+
else:
|
|
354
|
+
self.logger.info("Skipping lyrics fetching - no artist/title provided or fetching disabled")
|
|
355
|
+
|
|
356
|
+
# Step 2: Run transcription if enabled
|
|
357
|
+
if self.output_config.run_transcription:
|
|
358
|
+
self.transcribe()
|
|
359
|
+
else:
|
|
360
|
+
self.logger.info("Skipping transcription - transcription disabled")
|
|
361
|
+
|
|
362
|
+
# Step 3: Process and correct lyrics if enabled AND we have transcription results
|
|
363
|
+
if self.output_config.run_correction and self.results.transcription_results:
|
|
364
|
+
self.correct_lyrics()
|
|
365
|
+
elif self.output_config.run_correction:
|
|
366
|
+
self.logger.info("Skipping lyrics correction - no transcription results available")
|
|
367
|
+
|
|
368
|
+
# Step 4: Generate outputs based on what we have
|
|
369
|
+
if self.results.transcription_corrected or self.results.lyrics_results:
|
|
370
|
+
self.generate_outputs()
|
|
371
|
+
else:
|
|
372
|
+
self.logger.warning("No corrected transcription or lyrics available. Skipping output generation.")
|
|
373
|
+
|
|
374
|
+
self.logger.info("Processing completed successfully")
|
|
375
|
+
return self.results
|
|
376
|
+
|
|
377
|
+
def fetch_lyrics(self) -> None:
|
|
378
|
+
"""Fetch lyrics from available providers."""
|
|
379
|
+
self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
|
|
380
|
+
|
|
381
|
+
for name, provider in self.lyrics_providers.items():
|
|
382
|
+
try:
|
|
383
|
+
result = provider.fetch_lyrics(self.artist, self.title)
|
|
384
|
+
if result:
|
|
385
|
+
self.results.lyrics_results[name] = result
|
|
386
|
+
self.logger.info(f"Successfully fetched lyrics from {name}")
|
|
387
|
+
|
|
388
|
+
except Exception as e:
|
|
389
|
+
self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
|
|
390
|
+
continue
|
|
391
|
+
|
|
392
|
+
if not self.results.lyrics_results:
|
|
393
|
+
self.logger.warning("No lyrics found from any source")
|
|
394
|
+
|
|
395
|
+
def transcribe(self) -> None:
|
|
396
|
+
"""Run transcription using all available transcribers."""
|
|
397
|
+
provider_names = list(self.transcribers.keys())
|
|
398
|
+
|
|
399
|
+
if not provider_names:
|
|
400
|
+
self.logger.warning(
|
|
401
|
+
"Starting transcription with providers: [] - NO TRANSCRIPTION PROVIDERS CONFIGURED!\n"
|
|
402
|
+
"\n"
|
|
403
|
+
"This means no word-level timing data will be generated, and synchronized karaoke "
|
|
404
|
+
"lyrics cannot be created. The output will lack the '(With Vocals).mkv' video file.\n"
|
|
405
|
+
"\n"
|
|
406
|
+
"To enable transcription, configure at least one provider:\n"
|
|
407
|
+
" - AudioShake: Set AUDIOSHAKE_API_TOKEN environment variable\n"
|
|
408
|
+
" - Whisper/RunPod: Set RUNPOD_API_KEY and WHISPER_RUNPOD_ID environment variables\n"
|
|
409
|
+
"\n"
|
|
410
|
+
"See README.md 'Transcription Providers' section for detailed setup instructions."
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
self.logger.info(f"Starting transcription with providers: {provider_names}")
|
|
414
|
+
self._log_provider_configuration_status()
|
|
415
|
+
|
|
416
|
+
for name, transcriber_info in self.transcribers.items():
|
|
417
|
+
self.logger.info(f"Running transcription with {name}")
|
|
418
|
+
result = transcriber_info["instance"].transcribe(self.audio_filepath)
|
|
419
|
+
if result:
|
|
420
|
+
# Add the transcriber name and priority to the result
|
|
421
|
+
self.results.transcription_results.append(
|
|
422
|
+
TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
|
|
423
|
+
)
|
|
424
|
+
self.logger.debug(f"Transcription completed for {name}")
|
|
425
|
+
|
|
426
|
+
if not self.results.transcription_results:
|
|
427
|
+
self.logger.warning(
|
|
428
|
+
"No successful transcriptions from any provider. "
|
|
429
|
+
"Check that your API tokens are valid and the services are accessible."
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
def _log_provider_configuration_status(self) -> None:
|
|
433
|
+
"""Log detailed configuration status for each potential transcription provider."""
|
|
434
|
+
self.logger.debug("Transcription provider configuration status:")
|
|
435
|
+
|
|
436
|
+
# AudioShake status
|
|
437
|
+
if self.transcriber_config.audioshake_api_token:
|
|
438
|
+
self.logger.debug(" - AudioShake: CONFIGURED (API token provided)")
|
|
439
|
+
else:
|
|
440
|
+
self.logger.debug(" - AudioShake: NOT CONFIGURED (missing AUDIOSHAKE_API_TOKEN)")
|
|
441
|
+
|
|
442
|
+
# Whisper/RunPod status
|
|
443
|
+
has_runpod_key = bool(self.transcriber_config.runpod_api_key)
|
|
444
|
+
has_whisper_id = bool(self.transcriber_config.whisper_runpod_id)
|
|
445
|
+
|
|
446
|
+
if has_runpod_key and has_whisper_id:
|
|
447
|
+
self.logger.debug(" - Whisper (RunPod): CONFIGURED (API key and endpoint ID provided)")
|
|
448
|
+
elif has_runpod_key:
|
|
449
|
+
self.logger.debug(" - Whisper (RunPod): PARTIALLY CONFIGURED (missing WHISPER_RUNPOD_ID)")
|
|
450
|
+
elif has_whisper_id:
|
|
451
|
+
self.logger.debug(" - Whisper (RunPod): PARTIALLY CONFIGURED (missing RUNPOD_API_KEY)")
|
|
452
|
+
else:
|
|
453
|
+
self.logger.debug(" - Whisper (RunPod): NOT CONFIGURED (missing RUNPOD_API_KEY and WHISPER_RUNPOD_ID)")
|
|
454
|
+
|
|
455
|
+
def correct_lyrics(self) -> None:
|
|
456
|
+
"""Run lyrics correction using transcription and internet lyrics."""
|
|
457
|
+
self.logger.info("Starting lyrics correction process")
|
|
458
|
+
|
|
459
|
+
# Check if we have reference lyrics to work with
|
|
460
|
+
if not self.results.lyrics_results:
|
|
461
|
+
self.logger.warning("No reference lyrics available for correction - using raw transcription")
|
|
462
|
+
# Use the highest priority transcription result as the "corrected" version
|
|
463
|
+
if self.results.transcription_results:
|
|
464
|
+
sorted_results = sorted(self.results.transcription_results, key=lambda x: x.priority)
|
|
465
|
+
best_transcription = sorted_results[0]
|
|
466
|
+
|
|
467
|
+
# Count total words in the transcription
|
|
468
|
+
total_words = sum(len(segment.words) for segment in best_transcription.result.segments)
|
|
469
|
+
|
|
470
|
+
# Create a CorrectionResult with no corrections
|
|
471
|
+
self.results.transcription_corrected = CorrectionResult(
|
|
472
|
+
original_segments=best_transcription.result.segments,
|
|
473
|
+
corrected_segments=best_transcription.result.segments,
|
|
474
|
+
corrections=[], # No corrections made
|
|
475
|
+
corrections_made=0, # No corrections made
|
|
476
|
+
confidence=1.0, # Full confidence since we're using original
|
|
477
|
+
reference_lyrics={},
|
|
478
|
+
anchor_sequences=[],
|
|
479
|
+
gap_sequences=[],
|
|
480
|
+
resized_segments=[],
|
|
481
|
+
correction_steps=[],
|
|
482
|
+
word_id_map={},
|
|
483
|
+
segment_id_map={},
|
|
484
|
+
metadata={
|
|
485
|
+
"correction_type": "none",
|
|
486
|
+
"reason": "no_reference_lyrics",
|
|
487
|
+
"audio_filepath": self.audio_filepath,
|
|
488
|
+
"anchor_sequences_count": 0,
|
|
489
|
+
"gap_sequences_count": 0,
|
|
490
|
+
"total_words": total_words,
|
|
491
|
+
"correction_ratio": 0.0,
|
|
492
|
+
"available_handlers": [],
|
|
493
|
+
"enabled_handlers": [],
|
|
494
|
+
},
|
|
495
|
+
)
|
|
496
|
+
else:
|
|
497
|
+
# Create metadata dict with song info
|
|
498
|
+
metadata = {
|
|
499
|
+
"artist": self.artist,
|
|
500
|
+
"title": self.title,
|
|
501
|
+
"full_reference_texts": {source: lyrics.get_full_text() for source, lyrics in self.results.lyrics_results.items()},
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
# Get enabled handlers from metadata if available
|
|
505
|
+
enabled_handlers = metadata.get("enabled_handlers", None)
|
|
506
|
+
|
|
507
|
+
# Create corrector with enabled handlers
|
|
508
|
+
corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
|
|
509
|
+
|
|
510
|
+
corrected_data = corrector.run(
|
|
511
|
+
transcription_results=self.results.transcription_results,
|
|
512
|
+
lyrics_results=self.results.lyrics_results,
|
|
513
|
+
metadata=metadata,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
# Store corrected results
|
|
517
|
+
self.results.transcription_corrected = corrected_data
|
|
518
|
+
self.logger.info("Lyrics correction completed")
|
|
519
|
+
|
|
520
|
+
# Add human review step (moved outside the else block)
|
|
521
|
+
if self.output_config.enable_review:
|
|
522
|
+
from lyrics_transcriber.review.server import ReviewServer
|
|
523
|
+
|
|
524
|
+
self.logger.info("Starting human review process")
|
|
525
|
+
|
|
526
|
+
# Create and start review server
|
|
527
|
+
review_server = ReviewServer(
|
|
528
|
+
correction_result=self.results.transcription_corrected,
|
|
529
|
+
output_config=self.output_config,
|
|
530
|
+
audio_filepath=self.audio_filepath,
|
|
531
|
+
logger=self.logger,
|
|
532
|
+
)
|
|
533
|
+
reviewed_data = review_server.start()
|
|
534
|
+
|
|
535
|
+
self.logger.info("Human review completed, updated transcription_corrected with reviewed_data")
|
|
536
|
+
self.results.transcription_corrected = reviewed_data
|
|
537
|
+
|
|
538
|
+
# Add countdown intro if enabled and needed (after review, before output generation)
|
|
539
|
+
if self.output_config.add_countdown and self.results.transcription_corrected:
|
|
540
|
+
from lyrics_transcriber.output.countdown_processor import CountdownProcessor
|
|
541
|
+
|
|
542
|
+
self.logger.info("Processing countdown intro (if needed)")
|
|
543
|
+
countdown_processor = CountdownProcessor(
|
|
544
|
+
cache_dir=self.output_config.cache_dir,
|
|
545
|
+
logger=self.logger,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Process and potentially modify the correction result and audio filepath
|
|
549
|
+
(
|
|
550
|
+
self.results.transcription_corrected,
|
|
551
|
+
self.audio_filepath,
|
|
552
|
+
padding_added,
|
|
553
|
+
padding_seconds,
|
|
554
|
+
) = countdown_processor.process(
|
|
555
|
+
correction_result=self.results.transcription_corrected,
|
|
556
|
+
audio_filepath=self.audio_filepath,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# Store padding information in results for parent code to use
|
|
560
|
+
self.results.countdown_padding_added = padding_added
|
|
561
|
+
self.results.countdown_padding_seconds = padding_seconds
|
|
562
|
+
if padding_added:
|
|
563
|
+
self.results.padded_audio_filepath = self.audio_filepath
|
|
564
|
+
self.logger.info(
|
|
565
|
+
f"Countdown padding applied: {padding_seconds}s added to audio. "
|
|
566
|
+
f"Padded audio: {self.audio_filepath}"
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
def generate_outputs(self) -> None:
|
|
570
|
+
"""Generate output files based on enabled features and available data."""
|
|
571
|
+
self.logger.info("Generating output files")
|
|
572
|
+
|
|
573
|
+
# Only proceed with outputs that make sense based on what we have
|
|
574
|
+
has_correction = bool(self.results.transcription_corrected)
|
|
575
|
+
|
|
576
|
+
output_files = self.output_generator.generate_outputs(
|
|
577
|
+
transcription_corrected=self.results.transcription_corrected if has_correction else None,
|
|
578
|
+
lyrics_results=self.results.lyrics_results,
|
|
579
|
+
output_prefix=self.output_prefix,
|
|
580
|
+
audio_filepath=self.audio_filepath,
|
|
581
|
+
artist=self.artist,
|
|
582
|
+
title=self.title,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Store results
|
|
586
|
+
self.results.lrc_filepath = output_files.lrc
|
|
587
|
+
self.results.ass_filepath = output_files.ass
|
|
588
|
+
self.results.video_filepath = output_files.video
|
|
589
|
+
self.results.original_txt = output_files.original_txt
|
|
590
|
+
self.results.corrected_txt = output_files.corrected_txt
|
|
591
|
+
self.results.corrections_json = output_files.corrections_json
|
|
592
|
+
self.results.cdg_filepath = output_files.cdg
|
|
593
|
+
self.results.mp3_filepath = output_files.mp3
|
|
594
|
+
self.results.cdg_zip_filepath = output_files.cdg_zip
|
|
File without changes
|