lyrics-transcriber 0.30.0__tar.gz → 0.32.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/PKG-INFO +14 -3
- lyrics_transcriber-0.32.1/lyrics_transcriber/__init__.py +4 -0
- lyrics_transcriber-0.30.0/lyrics_transcriber/cli/main.py → lyrics_transcriber-0.32.1/lyrics_transcriber/cli/cli_main.py +47 -14
- lyrics_transcriber-0.32.1/lyrics_transcriber/core/config.py +35 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/core/controller.py +281 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/anchor_sequence.py +471 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/corrector.py +256 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/base.py +30 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/repeat.py +71 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/word_operations.py +135 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/phrase_analyzer.py +426 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/genius.py +73 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/__init__.py +21 -0
- {lyrics_transcriber-0.30.0/lyrics_transcriber/output → lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass}/ass.py +150 -690
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/config.py +37 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/lyrics_line.py +219 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdg.py +503 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/generator.py +179 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/plain_text.py +91 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/segment_resizer.py +416 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/subtitles.py +331 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/output/video.py +219 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/review/__init__.py +1 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/review/server.py +138 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/audioshake.py +217 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/base_transcriber.py +149 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/whisper.py +320 -0
- lyrics_transcriber-0.32.1/lyrics_transcriber/types.py +454 -0
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/pyproject.toml +14 -3
- lyrics_transcriber-0.30.0/lyrics_transcriber/__init__.py +0 -3
- lyrics_transcriber-0.30.0/lyrics_transcriber/core/controller.py +0 -283
- lyrics_transcriber-0.30.0/lyrics_transcriber/core/corrector.py +0 -56
- lyrics_transcriber-0.30.0/lyrics_transcriber/core/fetcher.py +0 -143
- lyrics_transcriber-0.30.0/lyrics_transcriber/output/generator.py +0 -210
- lyrics_transcriber-0.30.0/lyrics_transcriber/output/subtitles.py +0 -305
- lyrics_transcriber-0.30.0/lyrics_transcriber/storage/dropbox.py +0 -249
- lyrics_transcriber-0.30.0/lyrics_transcriber/storage/tokens.py +0 -116
- lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/audioshake.py +0 -151
- lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/base.py +0 -31
- lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/whisper.py +0 -186
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/LICENSE +0 -0
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/README.md +0 -0
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/lyrics_transcriber/cli/__init__.py +0 -0
- {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/lyrics_transcriber/core/__init__.py +0 -0
- {lyrics_transcriber-0.30.0/lyrics_transcriber/output → lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers}/__init__.py +0 -0
- {lyrics_transcriber-0.30.0/lyrics_transcriber/storage → lyrics_transcriber-0.32.1/lyrics_transcriber/output}/__init__.py +0 -0
@@ -1,8 +1,7 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.32.1
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
|
-
Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
|
6
5
|
License: MIT
|
7
6
|
Author: Andrew Beveridge
|
8
7
|
Author-email: andrew@beveridge.uk
|
@@ -14,13 +13,25 @@ Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
16
15
|
Requires-Dist: dropbox (>=12)
|
16
|
+
Requires-Dist: fastapi (>=0.115.6,<0.116.0)
|
17
17
|
Requires-Dist: karaoke-lyrics-processor (>=0.4)
|
18
18
|
Requires-Dist: lyricsgenius (>=3)
|
19
|
+
Requires-Dist: metaphone (>=0.6,<0.7)
|
20
|
+
Requires-Dist: nltk (>=3.9.1,<4.0.0)
|
19
21
|
Requires-Dist: pydub (>=0.25)
|
20
22
|
Requires-Dist: python-dotenv (>=1)
|
23
|
+
Requires-Dist: python-levenshtein (>=0.26.1,<0.27.0)
|
21
24
|
Requires-Dist: python-slugify (>=8)
|
25
|
+
Requires-Dist: spacy (>=3.8.3,<4.0.0)
|
26
|
+
Requires-Dist: spacy-syllables (>=3.0.2,<4.0.0)
|
27
|
+
Requires-Dist: syllables (>=1.0.9,<2.0.0)
|
22
28
|
Requires-Dist: syrics (>=0)
|
29
|
+
Requires-Dist: torch (>=2.5.1,<3.0.0)
|
30
|
+
Requires-Dist: tqdm (>=4.67.1,<5.0.0)
|
31
|
+
Requires-Dist: transformers (>=4.47.1,<5.0.0)
|
32
|
+
Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
|
23
33
|
Project-URL: Documentation, https://github.com/karaokenerds/python-lyrics-transcriber/blob/main/README.md
|
34
|
+
Project-URL: Homepage, https://github.com/karaokenerds/python-lyrics-transcriber
|
24
35
|
Project-URL: Repository, https://github.com/karaokenerds/python-lyrics-transcriber
|
25
36
|
Description-Content-Type: text/markdown
|
26
37
|
|
@@ -60,9 +60,14 @@ def create_arg_parser() -> argparse.ArgumentParser:
|
|
60
60
|
output_group.add_argument(
|
61
61
|
"--cache_dir",
|
62
62
|
type=Path,
|
63
|
-
default=Path("/tmp/lyrics-transcriber-cache/"),
|
64
63
|
help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
|
65
64
|
)
|
65
|
+
output_group.add_argument(
|
66
|
+
"--output_styles_json",
|
67
|
+
type=Path,
|
68
|
+
help="JSON file containing output style configurations for CDG and video generation",
|
69
|
+
)
|
70
|
+
output_group.add_argument("--generate_cdg", action="store_true", help="Generate CDG karaoke files")
|
66
71
|
|
67
72
|
# Video options
|
68
73
|
video_group = parser.add_argument_group("Video Options")
|
@@ -70,16 +75,22 @@ def create_arg_parser() -> argparse.ArgumentParser:
|
|
70
75
|
video_group.add_argument(
|
71
76
|
"--video_resolution", choices=["4k", "1080p", "720p", "360p"], default="360p", help="Resolution of the karaoke video. Default: 360p"
|
72
77
|
)
|
73
|
-
video_group.add_argument("--video_background_image", type=Path, help="Image file to use for karaoke video background")
|
74
|
-
video_group.add_argument(
|
75
|
-
"--video_background_color",
|
76
|
-
default="black",
|
77
|
-
help="Color for karaoke video background (hex format or FFmpeg color name). Default: black",
|
78
|
-
)
|
79
78
|
|
80
79
|
return parser
|
81
80
|
|
82
81
|
|
82
|
+
def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
|
83
|
+
"""Parse and process command line arguments."""
|
84
|
+
# Use provided args_list for testing, otherwise use sys.argv
|
85
|
+
args = parser.parse_args(args_list)
|
86
|
+
|
87
|
+
# Set default cache_dir if not provided
|
88
|
+
if not hasattr(args, "cache_dir") or args.cache_dir is None:
|
89
|
+
args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
|
90
|
+
|
91
|
+
return args
|
92
|
+
|
93
|
+
|
83
94
|
def get_config_from_env() -> Dict[str, str]:
|
84
95
|
"""Load configuration from environment variables."""
|
85
96
|
load_dotenv()
|
@@ -121,12 +132,12 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
|
|
121
132
|
)
|
122
133
|
|
123
134
|
output_config = OutputConfig(
|
124
|
-
|
135
|
+
output_styles_json=str(args.output_styles_json),
|
136
|
+
output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
|
125
137
|
cache_dir=str(args.cache_dir),
|
126
138
|
render_video=args.render_video,
|
139
|
+
generate_cdg=args.generate_cdg,
|
127
140
|
video_resolution=args.video_resolution,
|
128
|
-
video_background_image=str(args.video_background_image) if args.video_background_image else None,
|
129
|
-
video_background_color=args.video_background_color,
|
130
141
|
)
|
131
142
|
|
132
143
|
return transcriber_config, lyrics_config, output_config
|
@@ -151,7 +162,7 @@ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, log
|
|
151
162
|
def main() -> None:
|
152
163
|
"""Main entry point for the CLI."""
|
153
164
|
parser = create_arg_parser()
|
154
|
-
args =
|
165
|
+
args = parse_args(parser)
|
155
166
|
|
156
167
|
# Set up logging first
|
157
168
|
logger = setup_logging(args.log_level)
|
@@ -182,13 +193,35 @@ def main() -> None:
|
|
182
193
|
# Log results
|
183
194
|
logger.info("*** Success! ***")
|
184
195
|
|
196
|
+
# Log all generated output files
|
197
|
+
if results.original_txt:
|
198
|
+
logger.info(f"Generated original transcription: {results.original_txt}")
|
199
|
+
if results.corrections_json:
|
200
|
+
logger.info(f"Generated corrections data: {results.corrections_json}")
|
201
|
+
|
202
|
+
if results.corrected_txt:
|
203
|
+
logger.info(f"Generated corrected lyrics: {results.corrected_txt}")
|
185
204
|
if results.lrc_filepath:
|
186
205
|
logger.info(f"Generated LRC file: {results.lrc_filepath}")
|
206
|
+
|
207
|
+
if results.cdg_filepath:
|
208
|
+
logger.info(f"Generated CDG file: {results.cdg_filepath}")
|
209
|
+
if results.mp3_filepath:
|
210
|
+
logger.info(f"Generated MP3 file: {results.mp3_filepath}")
|
211
|
+
if results.cdg_zip_filepath:
|
212
|
+
logger.info(f"Generated CDG ZIP archive: {results.cdg_zip_filepath}")
|
213
|
+
|
187
214
|
if results.ass_filepath:
|
188
|
-
logger.info(f"Generated ASS
|
215
|
+
logger.info(f"Generated ASS subtitles: {results.ass_filepath}")
|
189
216
|
if results.video_filepath:
|
190
|
-
logger.info(f"Generated video
|
217
|
+
logger.info(f"Generated video: {results.video_filepath}")
|
191
218
|
|
192
219
|
except Exception as e:
|
193
|
-
|
220
|
+
# Get the full exception traceback
|
221
|
+
import traceback
|
222
|
+
|
223
|
+
error_details = traceback.format_exc()
|
224
|
+
|
225
|
+
# Log both the error message and the full traceback
|
226
|
+
logger.error(f"Processing failed: {str(e)}\n\nFull traceback:\n{error_details}")
|
194
227
|
exit(1)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import os
|
2
|
+
from dataclasses import dataclass, field
|
3
|
+
from typing import Any, Dict, Optional
|
4
|
+
|
5
|
+
|
6
|
+
@dataclass
|
7
|
+
class TranscriberConfig:
|
8
|
+
"""Configuration for transcription services."""
|
9
|
+
|
10
|
+
audioshake_api_token: Optional[str] = None
|
11
|
+
runpod_api_key: Optional[str] = None
|
12
|
+
whisper_runpod_id: Optional[str] = None
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class LyricsConfig:
|
17
|
+
"""Configuration for lyrics services."""
|
18
|
+
|
19
|
+
genius_api_token: Optional[str] = None
|
20
|
+
spotify_cookie: Optional[str] = None
|
21
|
+
|
22
|
+
|
23
|
+
@dataclass
|
24
|
+
class OutputConfig:
|
25
|
+
"""Configuration for output generation."""
|
26
|
+
|
27
|
+
output_styles_json: str
|
28
|
+
max_line_length: int = 36
|
29
|
+
styles: Dict[str, Any] = field(default_factory=dict)
|
30
|
+
output_dir: Optional[str] = os.getcwd()
|
31
|
+
cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
|
32
|
+
render_video: bool = False
|
33
|
+
generate_cdg: bool = False
|
34
|
+
video_resolution: str = "360p"
|
35
|
+
enable_review: bool = True
|
@@ -0,0 +1,281 @@
|
|
1
|
+
import os
|
2
|
+
import logging
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
from typing import Dict, Optional, List
|
5
|
+
from lyrics_transcriber.types import (
|
6
|
+
LyricsData,
|
7
|
+
TranscriptionResult,
|
8
|
+
CorrectionResult,
|
9
|
+
)
|
10
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
|
11
|
+
from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
|
12
|
+
from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
|
13
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
14
|
+
from lyrics_transcriber.lyrics.genius import GeniusProvider
|
15
|
+
from lyrics_transcriber.lyrics.spotify import SpotifyProvider
|
16
|
+
from lyrics_transcriber.output.generator import OutputGenerator
|
17
|
+
from lyrics_transcriber.correction.corrector import LyricsCorrector
|
18
|
+
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class LyricsControllerResult:
|
23
|
+
"""Holds the results of the transcription and correction process."""
|
24
|
+
|
25
|
+
# Results from different sources
|
26
|
+
lyrics_results: List[LyricsData] = field(default_factory=list)
|
27
|
+
transcription_results: List[TranscriptionResult] = field(default_factory=list)
|
28
|
+
|
29
|
+
# Corrected results
|
30
|
+
transcription_corrected: Optional[CorrectionResult] = None
|
31
|
+
|
32
|
+
# Output files
|
33
|
+
lrc_filepath: Optional[str] = None
|
34
|
+
ass_filepath: Optional[str] = None
|
35
|
+
video_filepath: Optional[str] = None
|
36
|
+
mp3_filepath: Optional[str] = None
|
37
|
+
cdg_filepath: Optional[str] = None
|
38
|
+
cdg_zip_filepath: Optional[str] = None
|
39
|
+
original_txt: Optional[str] = None
|
40
|
+
corrected_txt: Optional[str] = None
|
41
|
+
corrections_json: Optional[str] = None
|
42
|
+
|
43
|
+
|
44
|
+
class LyricsTranscriber:
|
45
|
+
"""
|
46
|
+
Controller class that orchestrates the lyrics transcription workflow:
|
47
|
+
1. Fetch lyrics from internet sources
|
48
|
+
2. Run multiple transcription methods
|
49
|
+
3. Correct transcribed lyrics using fetched lyrics
|
50
|
+
4. Generate output formats (LRC, ASS, video)
|
51
|
+
"""
|
52
|
+
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
audio_filepath: str,
|
56
|
+
artist: Optional[str] = None,
|
57
|
+
title: Optional[str] = None,
|
58
|
+
transcriber_config: Optional[TranscriberConfig] = None,
|
59
|
+
lyrics_config: Optional[LyricsConfig] = None,
|
60
|
+
output_config: Optional[OutputConfig] = None,
|
61
|
+
transcribers: Optional[Dict[str, BaseTranscriber]] = None,
|
62
|
+
lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
|
63
|
+
corrector: Optional[LyricsCorrector] = None,
|
64
|
+
output_generator: Optional[OutputGenerator] = None,
|
65
|
+
logger: Optional[logging.Logger] = None,
|
66
|
+
log_level: int = logging.DEBUG,
|
67
|
+
log_formatter: Optional[logging.Formatter] = None,
|
68
|
+
):
|
69
|
+
# Set up logging
|
70
|
+
self.logger = logger or logging.getLogger(__name__)
|
71
|
+
if not logger:
|
72
|
+
self.logger.setLevel(log_level)
|
73
|
+
if not self.logger.handlers:
|
74
|
+
handler = logging.StreamHandler()
|
75
|
+
formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
|
76
|
+
handler.setFormatter(formatter)
|
77
|
+
self.logger.addHandler(handler)
|
78
|
+
|
79
|
+
self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
|
80
|
+
|
81
|
+
# Store configs (with defaults if not provided)
|
82
|
+
self.transcriber_config = transcriber_config or TranscriberConfig()
|
83
|
+
self.lyrics_config = lyrics_config or LyricsConfig()
|
84
|
+
self.output_config = output_config or OutputConfig()
|
85
|
+
|
86
|
+
# Basic settings
|
87
|
+
self.audio_filepath = audio_filepath
|
88
|
+
self.artist = artist
|
89
|
+
self.title = title
|
90
|
+
self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
|
91
|
+
|
92
|
+
# Add after creating necessary folders
|
93
|
+
self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
|
94
|
+
self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
|
95
|
+
|
96
|
+
# Create necessary folders
|
97
|
+
os.makedirs(self.output_config.cache_dir, exist_ok=True)
|
98
|
+
os.makedirs(self.output_config.output_dir, exist_ok=True)
|
99
|
+
|
100
|
+
# Initialize results
|
101
|
+
self.results = LyricsControllerResult()
|
102
|
+
|
103
|
+
# Initialize components (with dependency injection)
|
104
|
+
self.transcribers = transcribers or self._initialize_transcribers()
|
105
|
+
self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
|
106
|
+
self.corrector = corrector or LyricsCorrector(cache_dir=self.output_config.cache_dir, logger=self.logger)
|
107
|
+
self.output_generator = output_generator or self._initialize_output_generator()
|
108
|
+
|
109
|
+
def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
|
110
|
+
"""Initialize available transcription services."""
|
111
|
+
transcribers = {}
|
112
|
+
|
113
|
+
# Add debug logging for config values
|
114
|
+
self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
|
115
|
+
self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
|
116
|
+
|
117
|
+
if self.transcriber_config.audioshake_api_token:
|
118
|
+
self.logger.debug("Initializing AudioShake transcriber")
|
119
|
+
transcribers["audioshake"] = {
|
120
|
+
"instance": AudioShakeTranscriber(
|
121
|
+
cache_dir=self.output_config.cache_dir,
|
122
|
+
config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
|
123
|
+
logger=self.logger,
|
124
|
+
),
|
125
|
+
"priority": 1, # AudioShake has highest priority
|
126
|
+
}
|
127
|
+
else:
|
128
|
+
self.logger.debug("Skipping AudioShake transcriber - no API token provided")
|
129
|
+
|
130
|
+
if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
|
131
|
+
self.logger.debug("Initializing Whisper transcriber")
|
132
|
+
transcribers["whisper"] = {
|
133
|
+
"instance": WhisperTranscriber(
|
134
|
+
cache_dir=self.output_config.cache_dir,
|
135
|
+
config=WhisperConfig(
|
136
|
+
runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
|
137
|
+
),
|
138
|
+
logger=self.logger,
|
139
|
+
),
|
140
|
+
"priority": 2, # Whisper has lower priority
|
141
|
+
}
|
142
|
+
else:
|
143
|
+
self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
|
144
|
+
|
145
|
+
return transcribers
|
146
|
+
|
147
|
+
def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
|
148
|
+
"""Initialize available lyrics providers."""
|
149
|
+
providers = {}
|
150
|
+
|
151
|
+
# Create provider config with all necessary parameters
|
152
|
+
provider_config = LyricsProviderConfig(
|
153
|
+
genius_api_token=self.lyrics_config.genius_api_token,
|
154
|
+
spotify_cookie=self.lyrics_config.spotify_cookie,
|
155
|
+
cache_dir=self.output_config.cache_dir,
|
156
|
+
audio_filepath=self.audio_filepath,
|
157
|
+
)
|
158
|
+
|
159
|
+
if provider_config.genius_api_token:
|
160
|
+
self.logger.debug("Initializing Genius lyrics provider")
|
161
|
+
providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
|
162
|
+
else:
|
163
|
+
self.logger.debug("Skipping Genius provider - no API token provided")
|
164
|
+
|
165
|
+
if provider_config.spotify_cookie:
|
166
|
+
self.logger.debug("Initializing Spotify lyrics provider")
|
167
|
+
providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
|
168
|
+
else:
|
169
|
+
self.logger.debug("Skipping Spotify provider - no cookie provided")
|
170
|
+
|
171
|
+
return providers
|
172
|
+
|
173
|
+
def _initialize_output_generator(self) -> OutputGenerator:
|
174
|
+
"""Initialize output generation service."""
|
175
|
+
return OutputGenerator(config=self.output_config, logger=self.logger)
|
176
|
+
|
177
|
+
def process(self) -> LyricsControllerResult:
|
178
|
+
"""
|
179
|
+
Main processing method that orchestrates the entire workflow.
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
LyricsControllerResult containing all outputs and generated files.
|
183
|
+
|
184
|
+
Raises:
|
185
|
+
Exception: If a critical error occurs during processing.
|
186
|
+
"""
|
187
|
+
# Step 1: Fetch lyrics if artist and title are provided
|
188
|
+
if self.artist and self.title:
|
189
|
+
self.fetch_lyrics()
|
190
|
+
|
191
|
+
# Step 2: Run transcription
|
192
|
+
self.transcribe()
|
193
|
+
|
194
|
+
# Step 3: Process and correct lyrics
|
195
|
+
self.correct_lyrics()
|
196
|
+
|
197
|
+
# Step 4: Generate outputs
|
198
|
+
self.generate_outputs()
|
199
|
+
|
200
|
+
self.logger.info("Processing completed successfully")
|
201
|
+
return self.results
|
202
|
+
|
203
|
+
def fetch_lyrics(self) -> None:
|
204
|
+
"""Fetch lyrics from available providers."""
|
205
|
+
self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
|
206
|
+
|
207
|
+
for name, provider in self.lyrics_providers.items():
|
208
|
+
try:
|
209
|
+
result = provider.fetch_lyrics(self.artist, self.title)
|
210
|
+
if result:
|
211
|
+
self.results.lyrics_results.append(result)
|
212
|
+
self.logger.info(f"Successfully fetched lyrics from {name}")
|
213
|
+
|
214
|
+
except Exception as e:
|
215
|
+
self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
|
216
|
+
continue
|
217
|
+
|
218
|
+
if not self.results.lyrics_results:
|
219
|
+
self.logger.warning("No lyrics found from any source")
|
220
|
+
|
221
|
+
def transcribe(self) -> None:
|
222
|
+
"""Run transcription using all available transcribers."""
|
223
|
+
self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
|
224
|
+
|
225
|
+
for name, transcriber_info in self.transcribers.items():
|
226
|
+
self.logger.info(f"Running transcription with {name}")
|
227
|
+
result = transcriber_info["instance"].transcribe(self.audio_filepath)
|
228
|
+
if result:
|
229
|
+
# Add the transcriber name and priority to the result
|
230
|
+
self.results.transcription_results.append(
|
231
|
+
TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
|
232
|
+
)
|
233
|
+
self.logger.debug(f"Transcription completed for {name}")
|
234
|
+
|
235
|
+
if not self.results.transcription_results:
|
236
|
+
self.logger.warning("No successful transcriptions from any provider")
|
237
|
+
|
238
|
+
def correct_lyrics(self) -> None:
|
239
|
+
"""Run lyrics correction using transcription and internet lyrics."""
|
240
|
+
self.logger.info("Starting lyrics correction process")
|
241
|
+
|
242
|
+
# Run correction
|
243
|
+
corrected_data = self.corrector.run(
|
244
|
+
transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
|
245
|
+
)
|
246
|
+
|
247
|
+
# Store corrected results
|
248
|
+
self.results.transcription_corrected = corrected_data
|
249
|
+
self.logger.info("Lyrics correction completed")
|
250
|
+
|
251
|
+
# Add human review step
|
252
|
+
if self.output_config.enable_review: # We'll need to add this config option
|
253
|
+
from ..review import start_review_server
|
254
|
+
|
255
|
+
self.logger.info("Starting human review process")
|
256
|
+
self.results.transcription_corrected = start_review_server(corrected_data)
|
257
|
+
self.logger.info("Human review completed")
|
258
|
+
|
259
|
+
def generate_outputs(self) -> None:
|
260
|
+
"""Generate output files."""
|
261
|
+
self.logger.info("Generating output files")
|
262
|
+
|
263
|
+
output_files = self.output_generator.generate_outputs(
|
264
|
+
transcription_corrected=self.results.transcription_corrected,
|
265
|
+
lyrics_results=self.results.lyrics_results,
|
266
|
+
output_prefix=self.output_prefix,
|
267
|
+
audio_filepath=self.audio_filepath,
|
268
|
+
artist=self.artist,
|
269
|
+
title=self.title,
|
270
|
+
)
|
271
|
+
|
272
|
+
# Store all output paths in results
|
273
|
+
self.results.lrc_filepath = output_files.lrc
|
274
|
+
self.results.ass_filepath = output_files.ass
|
275
|
+
self.results.video_filepath = output_files.video
|
276
|
+
self.results.original_txt = output_files.original_txt
|
277
|
+
self.results.corrected_txt = output_files.corrected_txt
|
278
|
+
self.results.corrections_json = output_files.corrections_json
|
279
|
+
self.results.cdg_filepath = output_files.cdg
|
280
|
+
self.results.mp3_filepath = output_files.mp3
|
281
|
+
self.results.cdg_zip_filepath = output_files.cdg_zip
|