lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. lyrics_transcriber/__init__.py +2 -5
  2. lyrics_transcriber/cli/cli_main.py +206 -0
  3. lyrics_transcriber/core/__init__.py +0 -0
  4. lyrics_transcriber/core/controller.py +317 -0
  5. lyrics_transcriber/correction/base_strategy.py +29 -0
  6. lyrics_transcriber/correction/corrector.py +52 -0
  7. lyrics_transcriber/correction/strategy_diff.py +263 -0
  8. lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
  9. lyrics_transcriber/lyrics/genius.py +70 -0
  10. lyrics_transcriber/lyrics/spotify.py +82 -0
  11. lyrics_transcriber/output/__init__.py +0 -0
  12. lyrics_transcriber/output/generator.py +271 -0
  13. lyrics_transcriber/{utils → output}/subtitles.py +12 -12
  14. lyrics_transcriber/storage/__init__.py +0 -0
  15. lyrics_transcriber/storage/dropbox.py +225 -0
  16. lyrics_transcriber/transcribers/audioshake.py +216 -0
  17. lyrics_transcriber/transcribers/base_transcriber.py +186 -0
  18. lyrics_transcriber/transcribers/whisper.py +321 -0
  19. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
  20. lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
  21. lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
  22. lyrics_transcriber/audioshake_transcriber.py +0 -122
  23. lyrics_transcriber/corrector.py +0 -57
  24. lyrics_transcriber/llm_prompts/README.md +0 -10
  25. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
  26. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
  27. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
  28. lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
  29. lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
  30. lyrics_transcriber/transcriber.py +0 -934
  31. lyrics_transcriber/utils/cli.py +0 -179
  32. lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
  33. lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
  34. /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
  35. /lyrics_transcriber/{utils → output}/ass.py +0 -0
  36. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
  37. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
@@ -1,6 +1,3 @@
1
- import warnings
2
-
3
- warnings.simplefilter("ignore")
4
-
5
- from .transcriber import LyricsTranscriber
1
+ from .core.controller import LyricsTranscriber, TranscriberConfig, LyricsConfig, OutputConfig
6
2
 
3
+ __all__ = ["LyricsTranscriber", "TranscriberConfig", "LyricsConfig", "OutputConfig"]
@@ -0,0 +1,206 @@
1
+ #!/usr/bin/env python
2
+ import argparse
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Dict
7
+ from importlib.metadata import version
8
+ from dotenv import load_dotenv
9
+
10
+ from lyrics_transcriber import LyricsTranscriber
11
+ from lyrics_transcriber.core.controller import TranscriberConfig, LyricsConfig, OutputConfig
12
+
13
+
14
+ def create_arg_parser() -> argparse.ArgumentParser:
15
+ """Create and configure the argument parser."""
16
+ parser = argparse.ArgumentParser(
17
+ description="Create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps",
18
+ formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=52),
19
+ )
20
+
21
+ # Required arguments
22
+ parser.add_argument(
23
+ "audio_filepath",
24
+ nargs="?",
25
+ help="The audio file path to transcribe lyrics for.",
26
+ default=argparse.SUPPRESS,
27
+ )
28
+
29
+ # Version
30
+ package_version = version("lyrics-transcriber")
31
+ parser.add_argument("-v", "--version", action="version", version=f"%(prog)s {package_version}")
32
+
33
+ # Optional arguments
34
+ parser.add_argument(
35
+ "--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Logging level. Default: INFO"
36
+ )
37
+
38
+ # Song identification
39
+ song_group = parser.add_argument_group("Song Identification")
40
+ song_group.add_argument("--artist", help="Song artist for lyrics lookup and auto-correction")
41
+ song_group.add_argument("--title", help="Song title for lyrics lookup and auto-correction")
42
+
43
+ # API Credentials
44
+ api_group = parser.add_argument_group("API Credentials")
45
+ api_group.add_argument(
46
+ "--audioshake_api_token", help="AudioShake API token for lyrics transcription. Can also use AUDIOSHAKE_API_TOKEN env var."
47
+ )
48
+ api_group.add_argument("--genius_api_token", help="Genius API token for lyrics fetching. Can also use GENIUS_API_TOKEN env var.")
49
+ api_group.add_argument(
50
+ "--spotify_cookie", help="Spotify sp_dc cookie value for lyrics fetching. Can also use SPOTIFY_COOKIE_SP_DC env var."
51
+ )
52
+ api_group.add_argument("--runpod_api_key", help="RunPod API key for Whisper transcription. Can also use RUNPOD_API_KEY env var.")
53
+ api_group.add_argument(
54
+ "--whisper_runpod_id", help="RunPod endpoint ID for Whisper transcription. Can also use WHISPER_RUNPOD_ID env var."
55
+ )
56
+
57
+ # Output options
58
+ output_group = parser.add_argument_group("Output Options")
59
+ output_group.add_argument("--output_dir", type=Path, help="Directory where output files will be saved. Default: current directory")
60
+
61
+ output_group.add_argument(
62
+ "--cache_dir",
63
+ type=Path,
64
+ help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
65
+ )
66
+
67
+ # Video options
68
+ video_group = parser.add_argument_group("Video Options")
69
+ video_group.add_argument("--render_video", action="store_true", help="Render a karaoke video with the generated lyrics")
70
+ video_group.add_argument(
71
+ "--video_resolution", choices=["4k", "1080p", "720p", "360p"], default="360p", help="Resolution of the karaoke video. Default: 360p"
72
+ )
73
+ video_group.add_argument("--video_background_image", type=Path, help="Image file to use for karaoke video background")
74
+ video_group.add_argument(
75
+ "--video_background_color",
76
+ default="black",
77
+ help="Color for karaoke video background (hex format or FFmpeg color name). Default: black",
78
+ )
79
+
80
+ return parser
81
+
82
+
83
+ def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
84
+ """Parse and process command line arguments."""
85
+ # Use provided args_list for testing, otherwise use sys.argv
86
+ args = parser.parse_args(args_list)
87
+
88
+ # Set default cache_dir if not provided
89
+ if not hasattr(args, "cache_dir") or args.cache_dir is None:
90
+ args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
91
+
92
+ return args
93
+
94
+
95
+ def get_config_from_env() -> Dict[str, str]:
96
+ """Load configuration from environment variables."""
97
+ load_dotenv()
98
+ return {
99
+ "audioshake_api_token": os.getenv("AUDIOSHAKE_API_TOKEN"),
100
+ "genius_api_token": os.getenv("GENIUS_API_TOKEN"),
101
+ "spotify_cookie": os.getenv("SPOTIFY_COOKIE_SP_DC"),
102
+ "runpod_api_key": os.getenv("RUNPOD_API_KEY"),
103
+ "whisper_runpod_id": os.getenv("WHISPER_RUNPOD_ID"),
104
+ }
105
+
106
+
107
+ def setup_logging(log_level: str) -> logging.Logger:
108
+ """Configure logging with consistent format."""
109
+ logger = logging.getLogger("lyrics_transcriber")
110
+ log_level_enum = getattr(logging, log_level.upper())
111
+ logger.setLevel(log_level_enum)
112
+
113
+ if not logger.handlers:
114
+ handler = logging.StreamHandler()
115
+ formatter = logging.Formatter(fmt="%(asctime)s.%(msecs)03d - %(levelname)s - %(module)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
116
+ handler.setFormatter(formatter)
117
+ logger.addHandler(handler)
118
+
119
+ return logger
120
+
121
+
122
+ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tuple[TranscriberConfig, LyricsConfig, OutputConfig]:
123
+ """Create configuration objects from arguments and environment variables."""
124
+ transcriber_config = TranscriberConfig(
125
+ audioshake_api_token=args.audioshake_api_token or env_config.get("audioshake_api_token"),
126
+ runpod_api_key=args.runpod_api_key or env_config.get("runpod_api_key"),
127
+ whisper_runpod_id=args.whisper_runpod_id or env_config.get("whisper_runpod_id"),
128
+ )
129
+
130
+ lyrics_config = LyricsConfig(
131
+ genius_api_token=args.genius_api_token or env_config.get("genius_api_token"),
132
+ spotify_cookie=args.spotify_cookie or env_config.get("spotify_cookie"),
133
+ )
134
+
135
+ output_config = OutputConfig(
136
+ output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
137
+ cache_dir=str(args.cache_dir),
138
+ render_video=args.render_video,
139
+ video_resolution=args.video_resolution,
140
+ video_background_image=str(args.video_background_image) if args.video_background_image else None,
141
+ video_background_color=args.video_background_color,
142
+ )
143
+
144
+ return transcriber_config, lyrics_config, output_config
145
+
146
+
147
+ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, logger: logging.Logger) -> None:
148
+ """Validate command line arguments."""
149
+ if not hasattr(args, "audio_filepath"):
150
+ parser.print_help()
151
+ logger.error("No audio filepath provided")
152
+ exit(1)
153
+
154
+ if not os.path.exists(args.audio_filepath):
155
+ logger.error(f"Audio file not found: {args.audio_filepath}")
156
+ exit(1)
157
+
158
+ if args.artist and not args.title or args.title and not args.artist:
159
+ logger.error("Both artist and title must be provided together")
160
+ exit(1)
161
+
162
+
163
+ def main() -> None:
164
+ """Main entry point for the CLI."""
165
+ parser = create_arg_parser()
166
+ args = parse_args(parser)
167
+
168
+ # Set up logging first
169
+ logger = setup_logging(args.log_level)
170
+
171
+ # Validate arguments
172
+ validate_args(args, parser, logger)
173
+
174
+ # Load environment variables
175
+ env_config = get_config_from_env()
176
+
177
+ # Create configuration objects
178
+ transcriber_config, lyrics_config, output_config = create_configs(args, env_config)
179
+
180
+ try:
181
+ # Initialize and run transcriber
182
+ transcriber = LyricsTranscriber(
183
+ audio_filepath=args.audio_filepath,
184
+ artist=args.artist,
185
+ title=args.title,
186
+ transcriber_config=transcriber_config,
187
+ lyrics_config=lyrics_config,
188
+ output_config=output_config,
189
+ logger=logger,
190
+ )
191
+
192
+ results = transcriber.process()
193
+
194
+ # Log results
195
+ logger.info("*** Success! ***")
196
+
197
+ if results.lrc_filepath:
198
+ logger.info(f"Generated LRC file: {results.lrc_filepath}")
199
+ if results.ass_filepath:
200
+ logger.info(f"Generated ASS file: {results.ass_filepath}")
201
+ if results.video_filepath:
202
+ logger.info(f"Generated video file: {results.video_filepath}")
203
+
204
+ except Exception as e:
205
+ logger.error(f"Processing failed: {str(e)}")
206
+ exit(1)
File without changes
@@ -0,0 +1,317 @@
1
+ import os
2
+ import logging
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Optional, Any, List
5
+ from ..transcribers.base_transcriber import BaseTranscriber, TranscriptionResult
6
+ from ..transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
7
+ from ..transcribers.whisper import WhisperTranscriber, WhisperConfig
8
+ from ..lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsData
9
+ from ..lyrics.genius import GeniusProvider
10
+ from ..lyrics.spotify import SpotifyProvider
11
+ from ..output.generator import OutputGenerator, OutputGeneratorConfig
12
+ from ..correction.corrector import LyricsCorrector, CorrectionResult
13
+
14
+
15
+ @dataclass
16
+ class TranscriberConfig:
17
+ """Configuration for transcription services."""
18
+
19
+ audioshake_api_token: Optional[str] = None
20
+ runpod_api_key: Optional[str] = None
21
+ whisper_runpod_id: Optional[str] = None
22
+
23
+
24
+ @dataclass
25
+ class LyricsConfig:
26
+ """Configuration for lyrics services."""
27
+
28
+ genius_api_token: Optional[str] = None
29
+ spotify_cookie: Optional[str] = None
30
+
31
+
32
+ @dataclass
33
+ class OutputConfig:
34
+ """Configuration for output generation."""
35
+
36
+ output_dir: Optional[str] = os.getcwd()
37
+ cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
38
+ render_video: bool = False
39
+ video_resolution: str = "360p"
40
+ video_background_image: Optional[str] = None
41
+ video_background_color: str = "black"
42
+
43
+
44
+ @dataclass
45
+ class LyricsControllerResult:
46
+ """Holds the results of the transcription and correction process."""
47
+
48
+ # Results from different sources
49
+ lyrics_results: List[LyricsData] = field(default_factory=list)
50
+ transcription_results: List[TranscriptionResult] = field(default_factory=list)
51
+
52
+ # Corrected results
53
+ transcription_corrected: Optional[CorrectionResult] = None
54
+
55
+ # Output files
56
+ lrc_filepath: Optional[str] = None
57
+ ass_filepath: Optional[str] = None
58
+ video_filepath: Optional[str] = None
59
+
60
+
61
+ class LyricsTranscriber:
62
+ """
63
+ Controller class that orchestrates the lyrics transcription workflow:
64
+ 1. Fetch lyrics from internet sources
65
+ 2. Run multiple transcription methods
66
+ 3. Correct transcribed lyrics using fetched lyrics
67
+ 4. Generate output formats (LRC, ASS, video)
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ audio_filepath: str,
73
+ artist: Optional[str] = None,
74
+ title: Optional[str] = None,
75
+ transcriber_config: Optional[TranscriberConfig] = None,
76
+ lyrics_config: Optional[LyricsConfig] = None,
77
+ output_config: Optional[OutputConfig] = None,
78
+ transcribers: Optional[Dict[str, BaseTranscriber]] = None,
79
+ lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
80
+ corrector: Optional[LyricsCorrector] = None,
81
+ output_generator: Optional[OutputGenerator] = None,
82
+ logger: Optional[logging.Logger] = None,
83
+ log_level: int = logging.DEBUG,
84
+ log_formatter: Optional[logging.Formatter] = None,
85
+ ):
86
+ # Set up logging
87
+ self.logger = logger or logging.getLogger(__name__)
88
+ if not logger:
89
+ self.logger.setLevel(log_level)
90
+ if not self.logger.handlers:
91
+ handler = logging.StreamHandler()
92
+ formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
93
+ handler.setFormatter(formatter)
94
+ self.logger.addHandler(handler)
95
+
96
+ self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
97
+
98
+ # Store configs (with defaults if not provided)
99
+ self.transcriber_config = transcriber_config or TranscriberConfig()
100
+ self.lyrics_config = lyrics_config or LyricsConfig()
101
+ self.output_config = output_config or OutputConfig()
102
+
103
+ # Basic settings
104
+ self.audio_filepath = audio_filepath
105
+ self.artist = artist
106
+ self.title = title
107
+ self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
108
+
109
+ # Add after creating necessary folders
110
+ self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
111
+ self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
112
+
113
+ # Create necessary folders
114
+ os.makedirs(self.output_config.cache_dir, exist_ok=True)
115
+ os.makedirs(self.output_config.output_dir, exist_ok=True)
116
+
117
+ # Initialize results
118
+ self.results = LyricsControllerResult()
119
+
120
+ # Initialize components (with dependency injection)
121
+ self.transcribers = transcribers or self._initialize_transcribers()
122
+ self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
123
+ self.corrector = corrector or LyricsCorrector(logger=self.logger)
124
+ self.output_generator = output_generator or self._initialize_output_generator()
125
+
126
+ def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
127
+ """Initialize available transcription services."""
128
+ transcribers = {}
129
+
130
+ # Add debug logging for config values
131
+ self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
132
+ self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
133
+
134
+ if self.transcriber_config.audioshake_api_token:
135
+ self.logger.debug("Initializing AudioShake transcriber")
136
+ transcribers["audioshake"] = {
137
+ "instance": AudioShakeTranscriber(
138
+ cache_dir=self.output_config.cache_dir,
139
+ config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
140
+ logger=self.logger,
141
+ ),
142
+ "priority": 1, # AudioShake has highest priority
143
+ }
144
+ else:
145
+ self.logger.debug("Skipping AudioShake transcriber - no API token provided")
146
+
147
+ if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
148
+ self.logger.debug("Initializing Whisper transcriber")
149
+ transcribers["whisper"] = {
150
+ "instance": WhisperTranscriber(
151
+ cache_dir=self.output_config.cache_dir,
152
+ config=WhisperConfig(
153
+ runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
154
+ ),
155
+ logger=self.logger,
156
+ ),
157
+ "priority": 2, # Whisper has lower priority
158
+ }
159
+ else:
160
+ self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
161
+
162
+ return transcribers
163
+
164
+ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
165
+ """Initialize available lyrics providers."""
166
+ providers = {}
167
+
168
+ # Create provider config with all necessary parameters
169
+ provider_config = LyricsProviderConfig(
170
+ genius_api_token=self.lyrics_config.genius_api_token,
171
+ spotify_cookie=self.lyrics_config.spotify_cookie,
172
+ cache_dir=self.output_config.cache_dir,
173
+ audio_filepath=self.audio_filepath,
174
+ )
175
+
176
+ if provider_config.genius_api_token:
177
+ self.logger.debug("Initializing Genius lyrics provider")
178
+ providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
179
+ else:
180
+ self.logger.debug("Skipping Genius provider - no API token provided")
181
+
182
+ if provider_config.spotify_cookie:
183
+ self.logger.debug("Initializing Spotify lyrics provider")
184
+ providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
185
+ else:
186
+ self.logger.debug("Skipping Spotify provider - no cookie provided")
187
+
188
+ return providers
189
+
190
+ def _initialize_output_generator(self) -> OutputGenerator:
191
+ """Initialize output generation service."""
192
+
193
+ # Convert OutputConfig to OutputGeneratorConfig
194
+ generator_config = OutputGeneratorConfig(
195
+ output_dir=self.output_config.output_dir,
196
+ cache_dir=self.output_config.cache_dir,
197
+ video_resolution=self.output_config.video_resolution,
198
+ video_background_image=self.output_config.video_background_image,
199
+ video_background_color=self.output_config.video_background_color,
200
+ )
201
+
202
+ # Initialize output generator
203
+ return OutputGenerator(config=generator_config, logger=self.logger)
204
+
205
+ def process(self) -> LyricsControllerResult:
206
+ """
207
+ Main processing method that orchestrates the entire workflow.
208
+
209
+ Returns:
210
+ LyricsControllerResult containing all outputs and generated files.
211
+
212
+ Raises:
213
+ Exception: If a critical error occurs during processing.
214
+ """
215
+ try:
216
+ # Step 1: Fetch lyrics if artist and title are provided
217
+ if self.artist and self.title:
218
+ self.fetch_lyrics()
219
+
220
+ # Step 2: Run transcription
221
+ self.transcribe()
222
+
223
+ # Step 3: Process and correct lyrics
224
+ self.correct_lyrics()
225
+
226
+ # Step 4: Generate outputs
227
+ self.generate_outputs()
228
+
229
+ self.logger.info("Processing completed successfully")
230
+ return self.results
231
+
232
+ except Exception as e:
233
+ self.logger.error(f"Error during processing: {str(e)}")
234
+ raise
235
+
236
+ def fetch_lyrics(self) -> None:
237
+ """Fetch lyrics from available providers."""
238
+ self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
239
+
240
+ try:
241
+ for name, provider in self.lyrics_providers.items():
242
+ try:
243
+ result = provider.fetch_lyrics(self.artist, self.title)
244
+ if result:
245
+ self.results.lyrics_results.append(result)
246
+ self.logger.info(f"Successfully fetched lyrics from {name}")
247
+
248
+ except Exception as e:
249
+ self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
250
+ continue
251
+
252
+ if not self.results.lyrics_results:
253
+ self.logger.warning("No lyrics found from any source")
254
+
255
+ except Exception as e:
256
+ self.logger.error(f"Failed to fetch lyrics: {str(e)}")
257
+ # Don't raise - we can continue without lyrics
258
+
259
+ def transcribe(self) -> None:
260
+ """Run transcription using all available transcribers."""
261
+ self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
262
+
263
+ for name, transcriber_info in self.transcribers.items():
264
+ self.logger.info(f"Running transcription with {name}")
265
+ try:
266
+ result = transcriber_info["instance"].transcribe(self.audio_filepath)
267
+ if result:
268
+ # Add the transcriber name and priority to the result
269
+ self.results.transcription_results.append(
270
+ TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
271
+ )
272
+ self.logger.debug(f"Transcription completed for {name}")
273
+
274
+ except Exception as e:
275
+ self.logger.error(f"Transcription failed for {name}: {str(e)}", exc_info=True)
276
+ continue
277
+
278
+ if not self.results.transcription_results:
279
+ self.logger.warning("No successful transcriptions from any provider")
280
+
281
+ def correct_lyrics(self) -> None:
282
+ """Run lyrics correction using transcription and internet lyrics."""
283
+ self.logger.info("Starting lyrics correction process")
284
+
285
+ try:
286
+ # Run correction
287
+ corrected_data = self.corrector.run(
288
+ transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
289
+ )
290
+
291
+ # Store corrected results
292
+ self.results.transcription_corrected = corrected_data
293
+ self.logger.info("Lyrics correction completed")
294
+
295
+ except Exception as e:
296
+ self.logger.error(f"Failed to correct lyrics: {str(e)}", exc_info=True)
297
+
298
+ def generate_outputs(self) -> None:
299
+ """Generate output files."""
300
+ self.logger.info("Generating output files")
301
+
302
+ try:
303
+ output_files = self.output_generator.generate_outputs(
304
+ transcription_corrected=self.results.transcription_corrected,
305
+ lyrics_results=self.results.lyrics_results,
306
+ output_prefix=self.output_prefix,
307
+ audio_filepath=self.audio_filepath,
308
+ )
309
+
310
+ # Store output paths - access attributes directly instead of using .get()
311
+ self.results.lrc_filepath = output_files.lrc
312
+ self.results.ass_filepath = output_files.ass
313
+ self.results.video_filepath = output_files.video
314
+
315
+ except Exception as e:
316
+ self.logger.error(f"Failed to generate outputs: {str(e)}")
317
+ raise
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Protocol
3
+
4
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
5
+ from ..transcribers.base_transcriber import LyricsSegment, TranscriptionResult
6
+
7
+
8
+ @dataclass
9
+ class CorrectionResult:
10
+ """Container for correction results."""
11
+
12
+ segments: List[LyricsSegment]
13
+ text: str
14
+ confidence: float
15
+ corrections_made: int
16
+ source_mapping: Dict[str, str] # Maps corrected words to their source
17
+ metadata: Dict[str, Any]
18
+
19
+
20
+ class CorrectionStrategy(Protocol):
21
+ """Interface for different lyrics correction strategies."""
22
+
23
+ def correct(
24
+ self,
25
+ transcription_results: List[TranscriptionResult],
26
+ lyrics_results: List[LyricsData],
27
+ ) -> CorrectionResult:
28
+ """Apply correction strategy to transcribed lyrics."""
29
+ ... # pragma: no cover
@@ -0,0 +1,52 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Optional, Protocol
3
+ import logging
4
+
5
+ from lyrics_transcriber.transcribers.base_transcriber import TranscriptionResult
6
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
7
+ from .strategy_diff import DiffBasedCorrector
8
+ from .base_strategy import CorrectionResult, CorrectionStrategy
9
+
10
+
11
+ class LyricsCorrector:
12
+ """
13
+ Coordinates lyrics correction process using multiple data sources
14
+ and correction strategies.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ correction_strategy: Optional[CorrectionStrategy] = None,
20
+ logger: Optional[logging.Logger] = None,
21
+ ):
22
+ self.logger = logger or logging.getLogger(__name__)
23
+ self.correction_strategy = correction_strategy or DiffBasedCorrector(logger=self.logger)
24
+
25
+ def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
26
+ """Execute the correction process using configured strategy."""
27
+ if not transcription_results:
28
+ self.logger.error("No transcription results available")
29
+ raise ValueError("No primary transcription data available")
30
+
31
+ try:
32
+ self.logger.debug(f"Running correction with strategy: {self.correction_strategy.__class__.__name__}")
33
+
34
+ result = self.correction_strategy.correct(
35
+ transcription_results=transcription_results,
36
+ lyrics_results=lyrics_results,
37
+ )
38
+
39
+ self.logger.debug(f"Correction completed. Made {result.corrections_made} corrections")
40
+ return result
41
+
42
+ except Exception as e:
43
+ self.logger.error(f"Correction failed: {str(e)}", exc_info=True)
44
+ # Return uncorrected transcription as fallback
45
+ return CorrectionResult(
46
+ segments=transcription_results[0].result.segments,
47
+ text=transcription_results[0].result.text,
48
+ confidence=1.0,
49
+ corrections_made=0,
50
+ source_mapping={},
51
+ metadata=transcription_results[0].result.metadata or {},
52
+ )