lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -5
- lyrics_transcriber/cli/cli_main.py +206 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/controller.py +317 -0
- lyrics_transcriber/correction/base_strategy.py +29 -0
- lyrics_transcriber/correction/corrector.py +52 -0
- lyrics_transcriber/correction/strategy_diff.py +263 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
- lyrics_transcriber/lyrics/genius.py +70 -0
- lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/generator.py +271 -0
- lyrics_transcriber/{utils → output}/subtitles.py +12 -12
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/audioshake.py +216 -0
- lyrics_transcriber/transcribers/base_transcriber.py +186 -0
- lyrics_transcriber/transcribers/whisper.py +321 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
- lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
- lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
- lyrics_transcriber/audioshake_transcriber.py +0 -122
- lyrics_transcriber/corrector.py +0 -57
- lyrics_transcriber/llm_prompts/README.md +0 -10
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
- lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
- lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
- lyrics_transcriber/transcriber.py +0 -934
- lyrics_transcriber/utils/cli.py +0 -179
- lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
- lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
- /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
- /lyrics_transcriber/{utils → output}/ass.py +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
lyrics_transcriber/__init__.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
import
|
2
|
-
|
3
|
-
warnings.simplefilter("ignore")
|
4
|
-
|
5
|
-
from .transcriber import LyricsTranscriber
|
1
|
+
from .core.controller import LyricsTranscriber, TranscriberConfig, LyricsConfig, OutputConfig
|
6
2
|
|
3
|
+
__all__ = ["LyricsTranscriber", "TranscriberConfig", "LyricsConfig", "OutputConfig"]
|
@@ -0,0 +1,206 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
import argparse
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Dict
|
7
|
+
from importlib.metadata import version
|
8
|
+
from dotenv import load_dotenv
|
9
|
+
|
10
|
+
from lyrics_transcriber import LyricsTranscriber
|
11
|
+
from lyrics_transcriber.core.controller import TranscriberConfig, LyricsConfig, OutputConfig
|
12
|
+
|
13
|
+
|
14
|
+
def create_arg_parser() -> argparse.ArgumentParser:
|
15
|
+
"""Create and configure the argument parser."""
|
16
|
+
parser = argparse.ArgumentParser(
|
17
|
+
description="Create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps",
|
18
|
+
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=52),
|
19
|
+
)
|
20
|
+
|
21
|
+
# Required arguments
|
22
|
+
parser.add_argument(
|
23
|
+
"audio_filepath",
|
24
|
+
nargs="?",
|
25
|
+
help="The audio file path to transcribe lyrics for.",
|
26
|
+
default=argparse.SUPPRESS,
|
27
|
+
)
|
28
|
+
|
29
|
+
# Version
|
30
|
+
package_version = version("lyrics-transcriber")
|
31
|
+
parser.add_argument("-v", "--version", action="version", version=f"%(prog)s {package_version}")
|
32
|
+
|
33
|
+
# Optional arguments
|
34
|
+
parser.add_argument(
|
35
|
+
"--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Logging level. Default: INFO"
|
36
|
+
)
|
37
|
+
|
38
|
+
# Song identification
|
39
|
+
song_group = parser.add_argument_group("Song Identification")
|
40
|
+
song_group.add_argument("--artist", help="Song artist for lyrics lookup and auto-correction")
|
41
|
+
song_group.add_argument("--title", help="Song title for lyrics lookup and auto-correction")
|
42
|
+
|
43
|
+
# API Credentials
|
44
|
+
api_group = parser.add_argument_group("API Credentials")
|
45
|
+
api_group.add_argument(
|
46
|
+
"--audioshake_api_token", help="AudioShake API token for lyrics transcription. Can also use AUDIOSHAKE_API_TOKEN env var."
|
47
|
+
)
|
48
|
+
api_group.add_argument("--genius_api_token", help="Genius API token for lyrics fetching. Can also use GENIUS_API_TOKEN env var.")
|
49
|
+
api_group.add_argument(
|
50
|
+
"--spotify_cookie", help="Spotify sp_dc cookie value for lyrics fetching. Can also use SPOTIFY_COOKIE_SP_DC env var."
|
51
|
+
)
|
52
|
+
api_group.add_argument("--runpod_api_key", help="RunPod API key for Whisper transcription. Can also use RUNPOD_API_KEY env var.")
|
53
|
+
api_group.add_argument(
|
54
|
+
"--whisper_runpod_id", help="RunPod endpoint ID for Whisper transcription. Can also use WHISPER_RUNPOD_ID env var."
|
55
|
+
)
|
56
|
+
|
57
|
+
# Output options
|
58
|
+
output_group = parser.add_argument_group("Output Options")
|
59
|
+
output_group.add_argument("--output_dir", type=Path, help="Directory where output files will be saved. Default: current directory")
|
60
|
+
|
61
|
+
output_group.add_argument(
|
62
|
+
"--cache_dir",
|
63
|
+
type=Path,
|
64
|
+
help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
|
65
|
+
)
|
66
|
+
|
67
|
+
# Video options
|
68
|
+
video_group = parser.add_argument_group("Video Options")
|
69
|
+
video_group.add_argument("--render_video", action="store_true", help="Render a karaoke video with the generated lyrics")
|
70
|
+
video_group.add_argument(
|
71
|
+
"--video_resolution", choices=["4k", "1080p", "720p", "360p"], default="360p", help="Resolution of the karaoke video. Default: 360p"
|
72
|
+
)
|
73
|
+
video_group.add_argument("--video_background_image", type=Path, help="Image file to use for karaoke video background")
|
74
|
+
video_group.add_argument(
|
75
|
+
"--video_background_color",
|
76
|
+
default="black",
|
77
|
+
help="Color for karaoke video background (hex format or FFmpeg color name). Default: black",
|
78
|
+
)
|
79
|
+
|
80
|
+
return parser
|
81
|
+
|
82
|
+
|
83
|
+
def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
|
84
|
+
"""Parse and process command line arguments."""
|
85
|
+
# Use provided args_list for testing, otherwise use sys.argv
|
86
|
+
args = parser.parse_args(args_list)
|
87
|
+
|
88
|
+
# Set default cache_dir if not provided
|
89
|
+
if not hasattr(args, "cache_dir") or args.cache_dir is None:
|
90
|
+
args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
|
91
|
+
|
92
|
+
return args
|
93
|
+
|
94
|
+
|
95
|
+
def get_config_from_env() -> Dict[str, str]:
|
96
|
+
"""Load configuration from environment variables."""
|
97
|
+
load_dotenv()
|
98
|
+
return {
|
99
|
+
"audioshake_api_token": os.getenv("AUDIOSHAKE_API_TOKEN"),
|
100
|
+
"genius_api_token": os.getenv("GENIUS_API_TOKEN"),
|
101
|
+
"spotify_cookie": os.getenv("SPOTIFY_COOKIE_SP_DC"),
|
102
|
+
"runpod_api_key": os.getenv("RUNPOD_API_KEY"),
|
103
|
+
"whisper_runpod_id": os.getenv("WHISPER_RUNPOD_ID"),
|
104
|
+
}
|
105
|
+
|
106
|
+
|
107
|
+
def setup_logging(log_level: str) -> logging.Logger:
|
108
|
+
"""Configure logging with consistent format."""
|
109
|
+
logger = logging.getLogger("lyrics_transcriber")
|
110
|
+
log_level_enum = getattr(logging, log_level.upper())
|
111
|
+
logger.setLevel(log_level_enum)
|
112
|
+
|
113
|
+
if not logger.handlers:
|
114
|
+
handler = logging.StreamHandler()
|
115
|
+
formatter = logging.Formatter(fmt="%(asctime)s.%(msecs)03d - %(levelname)s - %(module)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
116
|
+
handler.setFormatter(formatter)
|
117
|
+
logger.addHandler(handler)
|
118
|
+
|
119
|
+
return logger
|
120
|
+
|
121
|
+
|
122
|
+
def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tuple[TranscriberConfig, LyricsConfig, OutputConfig]:
|
123
|
+
"""Create configuration objects from arguments and environment variables."""
|
124
|
+
transcriber_config = TranscriberConfig(
|
125
|
+
audioshake_api_token=args.audioshake_api_token or env_config.get("audioshake_api_token"),
|
126
|
+
runpod_api_key=args.runpod_api_key or env_config.get("runpod_api_key"),
|
127
|
+
whisper_runpod_id=args.whisper_runpod_id or env_config.get("whisper_runpod_id"),
|
128
|
+
)
|
129
|
+
|
130
|
+
lyrics_config = LyricsConfig(
|
131
|
+
genius_api_token=args.genius_api_token or env_config.get("genius_api_token"),
|
132
|
+
spotify_cookie=args.spotify_cookie or env_config.get("spotify_cookie"),
|
133
|
+
)
|
134
|
+
|
135
|
+
output_config = OutputConfig(
|
136
|
+
output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
|
137
|
+
cache_dir=str(args.cache_dir),
|
138
|
+
render_video=args.render_video,
|
139
|
+
video_resolution=args.video_resolution,
|
140
|
+
video_background_image=str(args.video_background_image) if args.video_background_image else None,
|
141
|
+
video_background_color=args.video_background_color,
|
142
|
+
)
|
143
|
+
|
144
|
+
return transcriber_config, lyrics_config, output_config
|
145
|
+
|
146
|
+
|
147
|
+
def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, logger: logging.Logger) -> None:
|
148
|
+
"""Validate command line arguments."""
|
149
|
+
if not hasattr(args, "audio_filepath"):
|
150
|
+
parser.print_help()
|
151
|
+
logger.error("No audio filepath provided")
|
152
|
+
exit(1)
|
153
|
+
|
154
|
+
if not os.path.exists(args.audio_filepath):
|
155
|
+
logger.error(f"Audio file not found: {args.audio_filepath}")
|
156
|
+
exit(1)
|
157
|
+
|
158
|
+
if args.artist and not args.title or args.title and not args.artist:
|
159
|
+
logger.error("Both artist and title must be provided together")
|
160
|
+
exit(1)
|
161
|
+
|
162
|
+
|
163
|
+
def main() -> None:
|
164
|
+
"""Main entry point for the CLI."""
|
165
|
+
parser = create_arg_parser()
|
166
|
+
args = parse_args(parser)
|
167
|
+
|
168
|
+
# Set up logging first
|
169
|
+
logger = setup_logging(args.log_level)
|
170
|
+
|
171
|
+
# Validate arguments
|
172
|
+
validate_args(args, parser, logger)
|
173
|
+
|
174
|
+
# Load environment variables
|
175
|
+
env_config = get_config_from_env()
|
176
|
+
|
177
|
+
# Create configuration objects
|
178
|
+
transcriber_config, lyrics_config, output_config = create_configs(args, env_config)
|
179
|
+
|
180
|
+
try:
|
181
|
+
# Initialize and run transcriber
|
182
|
+
transcriber = LyricsTranscriber(
|
183
|
+
audio_filepath=args.audio_filepath,
|
184
|
+
artist=args.artist,
|
185
|
+
title=args.title,
|
186
|
+
transcriber_config=transcriber_config,
|
187
|
+
lyrics_config=lyrics_config,
|
188
|
+
output_config=output_config,
|
189
|
+
logger=logger,
|
190
|
+
)
|
191
|
+
|
192
|
+
results = transcriber.process()
|
193
|
+
|
194
|
+
# Log results
|
195
|
+
logger.info("*** Success! ***")
|
196
|
+
|
197
|
+
if results.lrc_filepath:
|
198
|
+
logger.info(f"Generated LRC file: {results.lrc_filepath}")
|
199
|
+
if results.ass_filepath:
|
200
|
+
logger.info(f"Generated ASS file: {results.ass_filepath}")
|
201
|
+
if results.video_filepath:
|
202
|
+
logger.info(f"Generated video file: {results.video_filepath}")
|
203
|
+
|
204
|
+
except Exception as e:
|
205
|
+
logger.error(f"Processing failed: {str(e)}")
|
206
|
+
exit(1)
|
File without changes
|
@@ -0,0 +1,317 @@
|
|
1
|
+
import os
|
2
|
+
import logging
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
from typing import Dict, Optional, Any, List
|
5
|
+
from ..transcribers.base_transcriber import BaseTranscriber, TranscriptionResult
|
6
|
+
from ..transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
|
7
|
+
from ..transcribers.whisper import WhisperTranscriber, WhisperConfig
|
8
|
+
from ..lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsData
|
9
|
+
from ..lyrics.genius import GeniusProvider
|
10
|
+
from ..lyrics.spotify import SpotifyProvider
|
11
|
+
from ..output.generator import OutputGenerator, OutputGeneratorConfig
|
12
|
+
from ..correction.corrector import LyricsCorrector, CorrectionResult
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class TranscriberConfig:
|
17
|
+
"""Configuration for transcription services."""
|
18
|
+
|
19
|
+
audioshake_api_token: Optional[str] = None
|
20
|
+
runpod_api_key: Optional[str] = None
|
21
|
+
whisper_runpod_id: Optional[str] = None
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class LyricsConfig:
|
26
|
+
"""Configuration for lyrics services."""
|
27
|
+
|
28
|
+
genius_api_token: Optional[str] = None
|
29
|
+
spotify_cookie: Optional[str] = None
|
30
|
+
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class OutputConfig:
|
34
|
+
"""Configuration for output generation."""
|
35
|
+
|
36
|
+
output_dir: Optional[str] = os.getcwd()
|
37
|
+
cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
|
38
|
+
render_video: bool = False
|
39
|
+
video_resolution: str = "360p"
|
40
|
+
video_background_image: Optional[str] = None
|
41
|
+
video_background_color: str = "black"
|
42
|
+
|
43
|
+
|
44
|
+
@dataclass
|
45
|
+
class LyricsControllerResult:
|
46
|
+
"""Holds the results of the transcription and correction process."""
|
47
|
+
|
48
|
+
# Results from different sources
|
49
|
+
lyrics_results: List[LyricsData] = field(default_factory=list)
|
50
|
+
transcription_results: List[TranscriptionResult] = field(default_factory=list)
|
51
|
+
|
52
|
+
# Corrected results
|
53
|
+
transcription_corrected: Optional[CorrectionResult] = None
|
54
|
+
|
55
|
+
# Output files
|
56
|
+
lrc_filepath: Optional[str] = None
|
57
|
+
ass_filepath: Optional[str] = None
|
58
|
+
video_filepath: Optional[str] = None
|
59
|
+
|
60
|
+
|
61
|
+
class LyricsTranscriber:
|
62
|
+
"""
|
63
|
+
Controller class that orchestrates the lyrics transcription workflow:
|
64
|
+
1. Fetch lyrics from internet sources
|
65
|
+
2. Run multiple transcription methods
|
66
|
+
3. Correct transcribed lyrics using fetched lyrics
|
67
|
+
4. Generate output formats (LRC, ASS, video)
|
68
|
+
"""
|
69
|
+
|
70
|
+
def __init__(
|
71
|
+
self,
|
72
|
+
audio_filepath: str,
|
73
|
+
artist: Optional[str] = None,
|
74
|
+
title: Optional[str] = None,
|
75
|
+
transcriber_config: Optional[TranscriberConfig] = None,
|
76
|
+
lyrics_config: Optional[LyricsConfig] = None,
|
77
|
+
output_config: Optional[OutputConfig] = None,
|
78
|
+
transcribers: Optional[Dict[str, BaseTranscriber]] = None,
|
79
|
+
lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
|
80
|
+
corrector: Optional[LyricsCorrector] = None,
|
81
|
+
output_generator: Optional[OutputGenerator] = None,
|
82
|
+
logger: Optional[logging.Logger] = None,
|
83
|
+
log_level: int = logging.DEBUG,
|
84
|
+
log_formatter: Optional[logging.Formatter] = None,
|
85
|
+
):
|
86
|
+
# Set up logging
|
87
|
+
self.logger = logger or logging.getLogger(__name__)
|
88
|
+
if not logger:
|
89
|
+
self.logger.setLevel(log_level)
|
90
|
+
if not self.logger.handlers:
|
91
|
+
handler = logging.StreamHandler()
|
92
|
+
formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
|
93
|
+
handler.setFormatter(formatter)
|
94
|
+
self.logger.addHandler(handler)
|
95
|
+
|
96
|
+
self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
|
97
|
+
|
98
|
+
# Store configs (with defaults if not provided)
|
99
|
+
self.transcriber_config = transcriber_config or TranscriberConfig()
|
100
|
+
self.lyrics_config = lyrics_config or LyricsConfig()
|
101
|
+
self.output_config = output_config or OutputConfig()
|
102
|
+
|
103
|
+
# Basic settings
|
104
|
+
self.audio_filepath = audio_filepath
|
105
|
+
self.artist = artist
|
106
|
+
self.title = title
|
107
|
+
self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
|
108
|
+
|
109
|
+
# Add after creating necessary folders
|
110
|
+
self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
|
111
|
+
self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
|
112
|
+
|
113
|
+
# Create necessary folders
|
114
|
+
os.makedirs(self.output_config.cache_dir, exist_ok=True)
|
115
|
+
os.makedirs(self.output_config.output_dir, exist_ok=True)
|
116
|
+
|
117
|
+
# Initialize results
|
118
|
+
self.results = LyricsControllerResult()
|
119
|
+
|
120
|
+
# Initialize components (with dependency injection)
|
121
|
+
self.transcribers = transcribers or self._initialize_transcribers()
|
122
|
+
self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
|
123
|
+
self.corrector = corrector or LyricsCorrector(logger=self.logger)
|
124
|
+
self.output_generator = output_generator or self._initialize_output_generator()
|
125
|
+
|
126
|
+
def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
|
127
|
+
"""Initialize available transcription services."""
|
128
|
+
transcribers = {}
|
129
|
+
|
130
|
+
# Add debug logging for config values
|
131
|
+
self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
|
132
|
+
self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
|
133
|
+
|
134
|
+
if self.transcriber_config.audioshake_api_token:
|
135
|
+
self.logger.debug("Initializing AudioShake transcriber")
|
136
|
+
transcribers["audioshake"] = {
|
137
|
+
"instance": AudioShakeTranscriber(
|
138
|
+
cache_dir=self.output_config.cache_dir,
|
139
|
+
config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
|
140
|
+
logger=self.logger,
|
141
|
+
),
|
142
|
+
"priority": 1, # AudioShake has highest priority
|
143
|
+
}
|
144
|
+
else:
|
145
|
+
self.logger.debug("Skipping AudioShake transcriber - no API token provided")
|
146
|
+
|
147
|
+
if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
|
148
|
+
self.logger.debug("Initializing Whisper transcriber")
|
149
|
+
transcribers["whisper"] = {
|
150
|
+
"instance": WhisperTranscriber(
|
151
|
+
cache_dir=self.output_config.cache_dir,
|
152
|
+
config=WhisperConfig(
|
153
|
+
runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
|
154
|
+
),
|
155
|
+
logger=self.logger,
|
156
|
+
),
|
157
|
+
"priority": 2, # Whisper has lower priority
|
158
|
+
}
|
159
|
+
else:
|
160
|
+
self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
|
161
|
+
|
162
|
+
return transcribers
|
163
|
+
|
164
|
+
def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
|
165
|
+
"""Initialize available lyrics providers."""
|
166
|
+
providers = {}
|
167
|
+
|
168
|
+
# Create provider config with all necessary parameters
|
169
|
+
provider_config = LyricsProviderConfig(
|
170
|
+
genius_api_token=self.lyrics_config.genius_api_token,
|
171
|
+
spotify_cookie=self.lyrics_config.spotify_cookie,
|
172
|
+
cache_dir=self.output_config.cache_dir,
|
173
|
+
audio_filepath=self.audio_filepath,
|
174
|
+
)
|
175
|
+
|
176
|
+
if provider_config.genius_api_token:
|
177
|
+
self.logger.debug("Initializing Genius lyrics provider")
|
178
|
+
providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
|
179
|
+
else:
|
180
|
+
self.logger.debug("Skipping Genius provider - no API token provided")
|
181
|
+
|
182
|
+
if provider_config.spotify_cookie:
|
183
|
+
self.logger.debug("Initializing Spotify lyrics provider")
|
184
|
+
providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
|
185
|
+
else:
|
186
|
+
self.logger.debug("Skipping Spotify provider - no cookie provided")
|
187
|
+
|
188
|
+
return providers
|
189
|
+
|
190
|
+
def _initialize_output_generator(self) -> OutputGenerator:
|
191
|
+
"""Initialize output generation service."""
|
192
|
+
|
193
|
+
# Convert OutputConfig to OutputGeneratorConfig
|
194
|
+
generator_config = OutputGeneratorConfig(
|
195
|
+
output_dir=self.output_config.output_dir,
|
196
|
+
cache_dir=self.output_config.cache_dir,
|
197
|
+
video_resolution=self.output_config.video_resolution,
|
198
|
+
video_background_image=self.output_config.video_background_image,
|
199
|
+
video_background_color=self.output_config.video_background_color,
|
200
|
+
)
|
201
|
+
|
202
|
+
# Initialize output generator
|
203
|
+
return OutputGenerator(config=generator_config, logger=self.logger)
|
204
|
+
|
205
|
+
def process(self) -> LyricsControllerResult:
|
206
|
+
"""
|
207
|
+
Main processing method that orchestrates the entire workflow.
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
LyricsControllerResult containing all outputs and generated files.
|
211
|
+
|
212
|
+
Raises:
|
213
|
+
Exception: If a critical error occurs during processing.
|
214
|
+
"""
|
215
|
+
try:
|
216
|
+
# Step 1: Fetch lyrics if artist and title are provided
|
217
|
+
if self.artist and self.title:
|
218
|
+
self.fetch_lyrics()
|
219
|
+
|
220
|
+
# Step 2: Run transcription
|
221
|
+
self.transcribe()
|
222
|
+
|
223
|
+
# Step 3: Process and correct lyrics
|
224
|
+
self.correct_lyrics()
|
225
|
+
|
226
|
+
# Step 4: Generate outputs
|
227
|
+
self.generate_outputs()
|
228
|
+
|
229
|
+
self.logger.info("Processing completed successfully")
|
230
|
+
return self.results
|
231
|
+
|
232
|
+
except Exception as e:
|
233
|
+
self.logger.error(f"Error during processing: {str(e)}")
|
234
|
+
raise
|
235
|
+
|
236
|
+
def fetch_lyrics(self) -> None:
|
237
|
+
"""Fetch lyrics from available providers."""
|
238
|
+
self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
|
239
|
+
|
240
|
+
try:
|
241
|
+
for name, provider in self.lyrics_providers.items():
|
242
|
+
try:
|
243
|
+
result = provider.fetch_lyrics(self.artist, self.title)
|
244
|
+
if result:
|
245
|
+
self.results.lyrics_results.append(result)
|
246
|
+
self.logger.info(f"Successfully fetched lyrics from {name}")
|
247
|
+
|
248
|
+
except Exception as e:
|
249
|
+
self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
|
250
|
+
continue
|
251
|
+
|
252
|
+
if not self.results.lyrics_results:
|
253
|
+
self.logger.warning("No lyrics found from any source")
|
254
|
+
|
255
|
+
except Exception as e:
|
256
|
+
self.logger.error(f"Failed to fetch lyrics: {str(e)}")
|
257
|
+
# Don't raise - we can continue without lyrics
|
258
|
+
|
259
|
+
def transcribe(self) -> None:
|
260
|
+
"""Run transcription using all available transcribers."""
|
261
|
+
self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
|
262
|
+
|
263
|
+
for name, transcriber_info in self.transcribers.items():
|
264
|
+
self.logger.info(f"Running transcription with {name}")
|
265
|
+
try:
|
266
|
+
result = transcriber_info["instance"].transcribe(self.audio_filepath)
|
267
|
+
if result:
|
268
|
+
# Add the transcriber name and priority to the result
|
269
|
+
self.results.transcription_results.append(
|
270
|
+
TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
|
271
|
+
)
|
272
|
+
self.logger.debug(f"Transcription completed for {name}")
|
273
|
+
|
274
|
+
except Exception as e:
|
275
|
+
self.logger.error(f"Transcription failed for {name}: {str(e)}", exc_info=True)
|
276
|
+
continue
|
277
|
+
|
278
|
+
if not self.results.transcription_results:
|
279
|
+
self.logger.warning("No successful transcriptions from any provider")
|
280
|
+
|
281
|
+
def correct_lyrics(self) -> None:
|
282
|
+
"""Run lyrics correction using transcription and internet lyrics."""
|
283
|
+
self.logger.info("Starting lyrics correction process")
|
284
|
+
|
285
|
+
try:
|
286
|
+
# Run correction
|
287
|
+
corrected_data = self.corrector.run(
|
288
|
+
transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
|
289
|
+
)
|
290
|
+
|
291
|
+
# Store corrected results
|
292
|
+
self.results.transcription_corrected = corrected_data
|
293
|
+
self.logger.info("Lyrics correction completed")
|
294
|
+
|
295
|
+
except Exception as e:
|
296
|
+
self.logger.error(f"Failed to correct lyrics: {str(e)}", exc_info=True)
|
297
|
+
|
298
|
+
def generate_outputs(self) -> None:
|
299
|
+
"""Generate output files."""
|
300
|
+
self.logger.info("Generating output files")
|
301
|
+
|
302
|
+
try:
|
303
|
+
output_files = self.output_generator.generate_outputs(
|
304
|
+
transcription_corrected=self.results.transcription_corrected,
|
305
|
+
lyrics_results=self.results.lyrics_results,
|
306
|
+
output_prefix=self.output_prefix,
|
307
|
+
audio_filepath=self.audio_filepath,
|
308
|
+
)
|
309
|
+
|
310
|
+
# Store output paths - access attributes directly instead of using .get()
|
311
|
+
self.results.lrc_filepath = output_files.lrc
|
312
|
+
self.results.ass_filepath = output_files.ass
|
313
|
+
self.results.video_filepath = output_files.video
|
314
|
+
|
315
|
+
except Exception as e:
|
316
|
+
self.logger.error(f"Failed to generate outputs: {str(e)}")
|
317
|
+
raise
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Any, Dict, List, Protocol
|
3
|
+
|
4
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
|
5
|
+
from ..transcribers.base_transcriber import LyricsSegment, TranscriptionResult
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class CorrectionResult:
|
10
|
+
"""Container for correction results."""
|
11
|
+
|
12
|
+
segments: List[LyricsSegment]
|
13
|
+
text: str
|
14
|
+
confidence: float
|
15
|
+
corrections_made: int
|
16
|
+
source_mapping: Dict[str, str] # Maps corrected words to their source
|
17
|
+
metadata: Dict[str, Any]
|
18
|
+
|
19
|
+
|
20
|
+
class CorrectionStrategy(Protocol):
|
21
|
+
"""Interface for different lyrics correction strategies."""
|
22
|
+
|
23
|
+
def correct(
|
24
|
+
self,
|
25
|
+
transcription_results: List[TranscriptionResult],
|
26
|
+
lyrics_results: List[LyricsData],
|
27
|
+
) -> CorrectionResult:
|
28
|
+
"""Apply correction strategy to transcribed lyrics."""
|
29
|
+
... # pragma: no cover
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Any, Dict, List, Optional, Protocol
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from lyrics_transcriber.transcribers.base_transcriber import TranscriptionResult
|
6
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
|
7
|
+
from .strategy_diff import DiffBasedCorrector
|
8
|
+
from .base_strategy import CorrectionResult, CorrectionStrategy
|
9
|
+
|
10
|
+
|
11
|
+
class LyricsCorrector:
|
12
|
+
"""
|
13
|
+
Coordinates lyrics correction process using multiple data sources
|
14
|
+
and correction strategies.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
correction_strategy: Optional[CorrectionStrategy] = None,
|
20
|
+
logger: Optional[logging.Logger] = None,
|
21
|
+
):
|
22
|
+
self.logger = logger or logging.getLogger(__name__)
|
23
|
+
self.correction_strategy = correction_strategy or DiffBasedCorrector(logger=self.logger)
|
24
|
+
|
25
|
+
def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
|
26
|
+
"""Execute the correction process using configured strategy."""
|
27
|
+
if not transcription_results:
|
28
|
+
self.logger.error("No transcription results available")
|
29
|
+
raise ValueError("No primary transcription data available")
|
30
|
+
|
31
|
+
try:
|
32
|
+
self.logger.debug(f"Running correction with strategy: {self.correction_strategy.__class__.__name__}")
|
33
|
+
|
34
|
+
result = self.correction_strategy.correct(
|
35
|
+
transcription_results=transcription_results,
|
36
|
+
lyrics_results=lyrics_results,
|
37
|
+
)
|
38
|
+
|
39
|
+
self.logger.debug(f"Correction completed. Made {result.corrections_made} corrections")
|
40
|
+
return result
|
41
|
+
|
42
|
+
except Exception as e:
|
43
|
+
self.logger.error(f"Correction failed: {str(e)}", exc_info=True)
|
44
|
+
# Return uncorrected transcription as fallback
|
45
|
+
return CorrectionResult(
|
46
|
+
segments=transcription_results[0].result.segments,
|
47
|
+
text=transcription_results[0].result.text,
|
48
|
+
confidence=1.0,
|
49
|
+
corrections_made=0,
|
50
|
+
source_mapping={},
|
51
|
+
metadata=transcription_results[0].result.metadata or {},
|
52
|
+
)
|