karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +461 -0
- karaoke_gen/audio_processor.py +407 -30
- karaoke_gen/config.py +62 -113
- karaoke_gen/file_handler.py +32 -59
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
- karaoke_gen/karaoke_gen.py +270 -61
- karaoke_gen/lyrics_processor.py +13 -1
- karaoke_gen/metadata.py +78 -73
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/bulk_cli.py +6 -0
- karaoke_gen/utils/cli_args.py +424 -0
- karaoke_gen/utils/gen_cli.py +26 -261
- karaoke_gen/utils/remote_cli.py +1815 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen-0.71.23.dist-info/METADATA +610 -0
- karaoke_gen-0.71.23.dist-info/RECORD +275 -0
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +520 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +1043 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +212 -0
- lyrics_transcriber/frontend/src/api.ts +239 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +267 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +290 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +648 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
- karaoke_gen-0.57.0.dist-info/METADATA +0 -167
- karaoke_gen-0.57.0.dist-info/RECORD +0 -23
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0
|
File without changes
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Optional, Dict, Any, List
|
|
4
|
+
import json
|
|
5
|
+
import hashlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import os
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
|
|
10
|
+
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
|
11
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class LyricsProviderConfig:
|
|
16
|
+
"""Configuration for lyrics providers."""
|
|
17
|
+
|
|
18
|
+
genius_api_token: Optional[str] = None
|
|
19
|
+
rapidapi_key: Optional[str] = None
|
|
20
|
+
spotify_cookie: Optional[str] = None
|
|
21
|
+
lyrics_file: Optional[str] = None
|
|
22
|
+
cache_dir: Optional[str] = None
|
|
23
|
+
audio_filepath: Optional[str] = None
|
|
24
|
+
max_line_length: int = 36 # Config parameter for KaraokeLyricsProcessor
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BaseLyricsProvider(ABC):
|
|
28
|
+
"""Base class for lyrics providers."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
|
31
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
32
|
+
self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
|
|
33
|
+
self.audio_filepath = config.audio_filepath
|
|
34
|
+
self.max_line_length = config.max_line_length
|
|
35
|
+
if self.cache_dir:
|
|
36
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
|
|
38
|
+
|
|
39
|
+
def fetch_lyrics(self, artist: str, title: str) -> Optional[LyricsData]:
|
|
40
|
+
"""Fetch lyrics for a given artist and title, using cache if available."""
|
|
41
|
+
if not self.cache_dir:
|
|
42
|
+
return self._fetch_and_convert_result(artist, title)
|
|
43
|
+
|
|
44
|
+
# Use artist and title for cache key instead of audio file hash
|
|
45
|
+
cache_key = self._get_artist_title_hash(artist, title)
|
|
46
|
+
|
|
47
|
+
# Check converted cache first
|
|
48
|
+
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
|
49
|
+
converted_data = self._load_from_cache(converted_cache_path)
|
|
50
|
+
if converted_data:
|
|
51
|
+
self.logger.info(f"Using cached converted lyrics for {artist} - {title} from file: {converted_cache_path}")
|
|
52
|
+
return LyricsData.from_dict(converted_data)
|
|
53
|
+
|
|
54
|
+
# Check raw cache next
|
|
55
|
+
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
|
56
|
+
raw_data = self._load_from_cache(raw_cache_path)
|
|
57
|
+
if raw_data:
|
|
58
|
+
self.logger.info(f"Using cached raw lyrics for {artist} - {title} from file: {raw_cache_path}")
|
|
59
|
+
converted_result = self._convert_result_format(raw_data)
|
|
60
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
|
61
|
+
return converted_result
|
|
62
|
+
|
|
63
|
+
# If not in cache, fetch from source
|
|
64
|
+
raw_result = self._fetch_data_from_source(artist, title)
|
|
65
|
+
if raw_result:
|
|
66
|
+
# Save raw API response
|
|
67
|
+
self._save_to_cache(raw_cache_path, raw_result)
|
|
68
|
+
converted_result = self._convert_result_format(raw_result)
|
|
69
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
|
70
|
+
return converted_result
|
|
71
|
+
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def _get_file_hash(self, filepath: str) -> str:
|
|
75
|
+
"""Calculate MD5 hash of a file."""
|
|
76
|
+
self.logger.debug(f"Calculating hash for file: {filepath}")
|
|
77
|
+
md5_hash = hashlib.md5()
|
|
78
|
+
with open(filepath, "rb") as f:
|
|
79
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
80
|
+
md5_hash.update(chunk)
|
|
81
|
+
hash_result = md5_hash.hexdigest()
|
|
82
|
+
self.logger.debug(f"File hash: {hash_result}")
|
|
83
|
+
return hash_result
|
|
84
|
+
|
|
85
|
+
def _get_artist_title_hash(self, artist: str, title: str) -> str:
|
|
86
|
+
"""Calculate MD5 hash of the artist and title."""
|
|
87
|
+
combined = f"{artist.lower()}_{title.lower()}"
|
|
88
|
+
return hashlib.md5(combined.encode()).hexdigest()
|
|
89
|
+
|
|
90
|
+
def _get_cache_path(self, cache_key: str, suffix: str) -> str:
|
|
91
|
+
"""Get the cache file path for a given cache key and suffix."""
|
|
92
|
+
return os.path.join(self.cache_dir, f"{self.get_name().lower()}_{cache_key}_{suffix}.json")
|
|
93
|
+
|
|
94
|
+
def _save_to_cache(self, cache_path: str, data: Dict[str, Any]) -> None:
|
|
95
|
+
"""Save data to cache."""
|
|
96
|
+
self.logger.debug(f"Saving lyrics to cache: {cache_path}")
|
|
97
|
+
with open(cache_path, "w", encoding="utf-8") as f:
|
|
98
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
99
|
+
self.logger.debug("Cache save completed")
|
|
100
|
+
|
|
101
|
+
def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
|
|
102
|
+
"""Load data from cache if it exists."""
|
|
103
|
+
self.logger.debug(f"Attempting to load from cache: {cache_path}")
|
|
104
|
+
try:
|
|
105
|
+
with open(cache_path, "r", encoding="utf-8") as f:
|
|
106
|
+
data = json.load(f)
|
|
107
|
+
self.logger.debug("Lyrics loaded from cache")
|
|
108
|
+
return data
|
|
109
|
+
except FileNotFoundError:
|
|
110
|
+
self.logger.debug("Cache file not found")
|
|
111
|
+
return None
|
|
112
|
+
except json.JSONDecodeError:
|
|
113
|
+
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
|
|
117
|
+
"""Create LyricsSegment objects with properly formatted words from text.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
text: Raw lyrics text
|
|
121
|
+
is_synced: Whether timing information is available
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of LyricsSegment objects with unique IDs and Word objects
|
|
125
|
+
"""
|
|
126
|
+
segments = []
|
|
127
|
+
lines = text.strip().split("\n")
|
|
128
|
+
|
|
129
|
+
for line in lines:
|
|
130
|
+
if not line.strip():
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# Split line into words
|
|
134
|
+
word_texts = line.strip().split()
|
|
135
|
+
if not word_texts:
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
words = []
|
|
139
|
+
for word_text in word_texts:
|
|
140
|
+
word = Word(
|
|
141
|
+
id=WordUtils.generate_id(),
|
|
142
|
+
text=word_text,
|
|
143
|
+
start_time=0.0 if is_synced else None,
|
|
144
|
+
end_time=0.0 if is_synced else None,
|
|
145
|
+
confidence=1.0, # Reference lyrics are considered ground truth
|
|
146
|
+
created_during_correction=False,
|
|
147
|
+
)
|
|
148
|
+
words.append(word)
|
|
149
|
+
|
|
150
|
+
segment = LyricsSegment(
|
|
151
|
+
id=WordUtils.generate_id(),
|
|
152
|
+
text=line.strip(),
|
|
153
|
+
words=words,
|
|
154
|
+
start_time=words[0].start_time if is_synced else None,
|
|
155
|
+
end_time=words[-1].end_time if is_synced else None,
|
|
156
|
+
)
|
|
157
|
+
segments.append(segment)
|
|
158
|
+
|
|
159
|
+
return segments
|
|
160
|
+
|
|
161
|
+
def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
|
|
162
|
+
"""Process lyrics using KaraokeLyricsProcessor and create proper segments."""
|
|
163
|
+
# Concatenate all segment texts to get the full lyrics
|
|
164
|
+
full_lyrics = lyrics_data.get_full_text()
|
|
165
|
+
|
|
166
|
+
processor = KaraokeLyricsProcessor(
|
|
167
|
+
log_level=self.logger.getEffectiveLevel(),
|
|
168
|
+
log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
|
|
169
|
+
input_lyrics_text=full_lyrics,
|
|
170
|
+
max_line_length=self.max_line_length,
|
|
171
|
+
)
|
|
172
|
+
processed_text = processor.process()
|
|
173
|
+
|
|
174
|
+
# Create segments with words from processed text
|
|
175
|
+
segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
|
|
176
|
+
|
|
177
|
+
# Create new LyricsData with processed text and segments
|
|
178
|
+
return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
|
|
179
|
+
|
|
180
|
+
def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
|
|
181
|
+
"""Convert raw result to standardized format, process lyrics, save to cache, and return."""
|
|
182
|
+
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
|
183
|
+
converted_result = self._convert_result_format(raw_data)
|
|
184
|
+
|
|
185
|
+
# Process the lyrics
|
|
186
|
+
processed_result = self._process_lyrics(converted_result)
|
|
187
|
+
|
|
188
|
+
# Convert to dictionary before saving to cache
|
|
189
|
+
self._save_to_cache(converted_cache_path, processed_result.to_dict())
|
|
190
|
+
return processed_result
|
|
191
|
+
|
|
192
|
+
def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
|
|
193
|
+
"""Fetch and convert result when caching is disabled."""
|
|
194
|
+
raw_result = self._fetch_data_from_source(artist, title)
|
|
195
|
+
if raw_result:
|
|
196
|
+
return self._convert_result_format(raw_result)
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
@abstractmethod
|
|
200
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
|
201
|
+
"""Fetch raw data from the source (implemented by subclasses)."""
|
|
202
|
+
raise NotImplementedError("Subclasses must implement _fetch_data_from_source") # pragma: no cover
|
|
203
|
+
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
|
206
|
+
"""Convert raw API response to standardized format (implemented by subclasses)."""
|
|
207
|
+
raise NotImplementedError("Subclasses must implement _convert_result_format") # pragma: no cover
|
|
208
|
+
|
|
209
|
+
def get_name(self) -> str:
|
|
210
|
+
"""Return the name of this lyrics provider."""
|
|
211
|
+
return self.__class__.__name__.replace("Provider", "")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Optional, Dict, Any
|
|
4
|
+
from .base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
|
5
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata
|
|
6
|
+
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileProvider(BaseLyricsProvider):
|
|
10
|
+
"""Provider that loads lyrics from a local file."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
|
13
|
+
super().__init__(config, logger)
|
|
14
|
+
self.config = config # Store the config for use in other methods
|
|
15
|
+
self.logger.debug(f"FileProvider initialized with config: {config}")
|
|
16
|
+
self.title = None # Initialize title
|
|
17
|
+
self.artist = None # Initialize artist
|
|
18
|
+
|
|
19
|
+
def get_lyrics(self, artist: str, title: str) -> Optional[LyricsData]:
|
|
20
|
+
"""Get lyrics for the specified artist and title."""
|
|
21
|
+
self.title = title # Store title for use in other methods
|
|
22
|
+
self.artist = artist # Store artist for use in other methods
|
|
23
|
+
return super().fetch_lyrics(artist, title)
|
|
24
|
+
|
|
25
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
|
26
|
+
"""Load lyrics from the specified file."""
|
|
27
|
+
self.logger.info(f"Attempting to fetch lyrics from file for {artist} - {title}")
|
|
28
|
+
|
|
29
|
+
if not self.config.lyrics_file:
|
|
30
|
+
self.logger.warning("No lyrics file specified in config")
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
lyrics_file = Path(self.config.lyrics_file)
|
|
34
|
+
self.logger.debug(f"Looking for lyrics file at: {lyrics_file} (absolute: {lyrics_file.absolute()})")
|
|
35
|
+
|
|
36
|
+
if not lyrics_file.exists():
|
|
37
|
+
self.logger.error(f"Lyrics file not found: {lyrics_file}")
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
self.logger.info(f"Found lyrics file: {lyrics_file}")
|
|
41
|
+
self.logger.debug(f"File size: {lyrics_file.stat().st_size} bytes")
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# Get formatter safely
|
|
45
|
+
formatter = None
|
|
46
|
+
if self.logger.handlers and len(self.logger.handlers) > 0 and hasattr(self.logger.handlers[0], 'formatter'):
|
|
47
|
+
formatter = self.logger.handlers[0].formatter
|
|
48
|
+
|
|
49
|
+
processor = KaraokeLyricsProcessor(
|
|
50
|
+
log_level=self.logger.getEffectiveLevel(),
|
|
51
|
+
log_formatter=formatter,
|
|
52
|
+
input_filename=str(lyrics_file),
|
|
53
|
+
max_line_length=self.max_line_length,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
self.logger.debug("Created KaraokeLyricsProcessor instance")
|
|
57
|
+
processed_text = processor.process()
|
|
58
|
+
self.logger.debug(f"Processed text length: {len(processed_text)} characters")
|
|
59
|
+
self.logger.debug(f"First 100 characters of processed text: {processed_text[:100]}...")
|
|
60
|
+
|
|
61
|
+
result = {"text": processed_text, "source": "file", "filepath": str(lyrics_file)}
|
|
62
|
+
self.logger.info("Successfully processed lyrics file")
|
|
63
|
+
self.logger.debug(f"Returning result dictionary: {result}")
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
self.logger.error(f"Error processing lyrics file: {str(e)}", exc_info=True)
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
|
71
|
+
"""Convert the raw file data to LyricsData format."""
|
|
72
|
+
self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Create metadata object
|
|
76
|
+
metadata = LyricsMetadata(
|
|
77
|
+
source="file",
|
|
78
|
+
track_name=self.title,
|
|
79
|
+
artist_names=self.artist,
|
|
80
|
+
lyrics_provider="file",
|
|
81
|
+
lyrics_provider_id=raw_data["filepath"],
|
|
82
|
+
is_synced=False,
|
|
83
|
+
provider_metadata={"filepath": raw_data["filepath"]},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Create segments with words from the processed text
|
|
87
|
+
segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
|
|
88
|
+
|
|
89
|
+
lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
|
|
90
|
+
self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
|
|
91
|
+
return lyrics_data
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
self.logger.error(f"Error converting result format: {str(e)}", exc_info=True)
|
|
95
|
+
raise
|