lyrics-transcriber 0.30.0__tar.gz → 0.30.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/PKG-INFO +1 -1
  2. lyrics_transcriber-0.30.0/lyrics_transcriber/cli/main.py → lyrics_transcriber-0.30.1/lyrics_transcriber/cli/cli_main.py +15 -3
  3. lyrics_transcriber-0.30.1/lyrics_transcriber/core/controller.py +317 -0
  4. lyrics_transcriber-0.30.1/lyrics_transcriber/correction/base_strategy.py +29 -0
  5. lyrics_transcriber-0.30.1/lyrics_transcriber/correction/corrector.py +52 -0
  6. lyrics_transcriber-0.30.1/lyrics_transcriber/correction/strategy_diff.py +263 -0
  7. lyrics_transcriber-0.30.1/lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
  8. lyrics_transcriber-0.30.1/lyrics_transcriber/lyrics/genius.py +70 -0
  9. lyrics_transcriber-0.30.1/lyrics_transcriber/lyrics/spotify.py +82 -0
  10. lyrics_transcriber-0.30.1/lyrics_transcriber/output/generator.py +271 -0
  11. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/output/subtitles.py +12 -12
  12. lyrics_transcriber-0.30.1/lyrics_transcriber/storage/dropbox.py +225 -0
  13. lyrics_transcriber-0.30.1/lyrics_transcriber/transcribers/audioshake.py +216 -0
  14. lyrics_transcriber-0.30.1/lyrics_transcriber/transcribers/base_transcriber.py +186 -0
  15. lyrics_transcriber-0.30.1/lyrics_transcriber/transcribers/whisper.py +321 -0
  16. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/pyproject.toml +2 -2
  17. lyrics_transcriber-0.30.0/lyrics_transcriber/core/controller.py +0 -283
  18. lyrics_transcriber-0.30.0/lyrics_transcriber/core/corrector.py +0 -56
  19. lyrics_transcriber-0.30.0/lyrics_transcriber/core/fetcher.py +0 -143
  20. lyrics_transcriber-0.30.0/lyrics_transcriber/output/generator.py +0 -210
  21. lyrics_transcriber-0.30.0/lyrics_transcriber/storage/dropbox.py +0 -249
  22. lyrics_transcriber-0.30.0/lyrics_transcriber/storage/tokens.py +0 -116
  23. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/audioshake.py +0 -151
  24. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/base.py +0 -31
  25. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/whisper.py +0 -186
  26. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/LICENSE +0 -0
  27. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/README.md +0 -0
  28. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/__init__.py +0 -0
  29. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/cli/__init__.py +0 -0
  30. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/core/__init__.py +0 -0
  31. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/output/__init__.py +0 -0
  32. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/output/ass.py +0 -0
  33. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.30.1}/lyrics_transcriber/storage/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.30.0
3
+ Version: 0.30.1
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -57,10 +57,10 @@ def create_arg_parser() -> argparse.ArgumentParser:
57
57
  # Output options
58
58
  output_group = parser.add_argument_group("Output Options")
59
59
  output_group.add_argument("--output_dir", type=Path, help="Directory where output files will be saved. Default: current directory")
60
+
60
61
  output_group.add_argument(
61
62
  "--cache_dir",
62
63
  type=Path,
63
- default=Path("/tmp/lyrics-transcriber-cache/"),
64
64
  help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
65
65
  )
66
66
 
@@ -80,6 +80,18 @@ def create_arg_parser() -> argparse.ArgumentParser:
80
80
  return parser
81
81
 
82
82
 
83
+ def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
84
+ """Parse and process command line arguments."""
85
+ # Use provided args_list for testing, otherwise use sys.argv
86
+ args = parser.parse_args(args_list)
87
+
88
+ # Set default cache_dir if not provided
89
+ if not hasattr(args, "cache_dir") or args.cache_dir is None:
90
+ args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
91
+
92
+ return args
93
+
94
+
83
95
  def get_config_from_env() -> Dict[str, str]:
84
96
  """Load configuration from environment variables."""
85
97
  load_dotenv()
@@ -121,7 +133,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
121
133
  )
122
134
 
123
135
  output_config = OutputConfig(
124
- output_dir=str(args.output_dir) if args.output_dir else None,
136
+ output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
125
137
  cache_dir=str(args.cache_dir),
126
138
  render_video=args.render_video,
127
139
  video_resolution=args.video_resolution,
@@ -151,7 +163,7 @@ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, log
151
163
  def main() -> None:
152
164
  """Main entry point for the CLI."""
153
165
  parser = create_arg_parser()
154
- args = parser.parse_args()
166
+ args = parse_args(parser)
155
167
 
156
168
  # Set up logging first
157
169
  logger = setup_logging(args.log_level)
@@ -0,0 +1,317 @@
1
+ import os
2
+ import logging
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Optional, Any, List
5
+ from ..transcribers.base_transcriber import BaseTranscriber, TranscriptionResult
6
+ from ..transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
7
+ from ..transcribers.whisper import WhisperTranscriber, WhisperConfig
8
+ from ..lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsData
9
+ from ..lyrics.genius import GeniusProvider
10
+ from ..lyrics.spotify import SpotifyProvider
11
+ from ..output.generator import OutputGenerator, OutputGeneratorConfig
12
+ from ..correction.corrector import LyricsCorrector, CorrectionResult
13
+
14
+
15
+ @dataclass
16
+ class TranscriberConfig:
17
+ """Configuration for transcription services."""
18
+
19
+ audioshake_api_token: Optional[str] = None
20
+ runpod_api_key: Optional[str] = None
21
+ whisper_runpod_id: Optional[str] = None
22
+
23
+
24
+ @dataclass
25
+ class LyricsConfig:
26
+ """Configuration for lyrics services."""
27
+
28
+ genius_api_token: Optional[str] = None
29
+ spotify_cookie: Optional[str] = None
30
+
31
+
32
+ @dataclass
33
+ class OutputConfig:
34
+ """Configuration for output generation."""
35
+
36
+ output_dir: Optional[str] = os.getcwd()
37
+ cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
38
+ render_video: bool = False
39
+ video_resolution: str = "360p"
40
+ video_background_image: Optional[str] = None
41
+ video_background_color: str = "black"
42
+
43
+
44
+ @dataclass
45
+ class LyricsControllerResult:
46
+ """Holds the results of the transcription and correction process."""
47
+
48
+ # Results from different sources
49
+ lyrics_results: List[LyricsData] = field(default_factory=list)
50
+ transcription_results: List[TranscriptionResult] = field(default_factory=list)
51
+
52
+ # Corrected results
53
+ transcription_corrected: Optional[CorrectionResult] = None
54
+
55
+ # Output files
56
+ lrc_filepath: Optional[str] = None
57
+ ass_filepath: Optional[str] = None
58
+ video_filepath: Optional[str] = None
59
+
60
+
61
+ class LyricsTranscriber:
62
+ """
63
+ Controller class that orchestrates the lyrics transcription workflow:
64
+ 1. Fetch lyrics from internet sources
65
+ 2. Run multiple transcription methods
66
+ 3. Correct transcribed lyrics using fetched lyrics
67
+ 4. Generate output formats (LRC, ASS, video)
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ audio_filepath: str,
73
+ artist: Optional[str] = None,
74
+ title: Optional[str] = None,
75
+ transcriber_config: Optional[TranscriberConfig] = None,
76
+ lyrics_config: Optional[LyricsConfig] = None,
77
+ output_config: Optional[OutputConfig] = None,
78
+ transcribers: Optional[Dict[str, BaseTranscriber]] = None,
79
+ lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
80
+ corrector: Optional[LyricsCorrector] = None,
81
+ output_generator: Optional[OutputGenerator] = None,
82
+ logger: Optional[logging.Logger] = None,
83
+ log_level: int = logging.DEBUG,
84
+ log_formatter: Optional[logging.Formatter] = None,
85
+ ):
86
+ # Set up logging
87
+ self.logger = logger or logging.getLogger(__name__)
88
+ if not logger:
89
+ self.logger.setLevel(log_level)
90
+ if not self.logger.handlers:
91
+ handler = logging.StreamHandler()
92
+ formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
93
+ handler.setFormatter(formatter)
94
+ self.logger.addHandler(handler)
95
+
96
+ self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
97
+
98
+ # Store configs (with defaults if not provided)
99
+ self.transcriber_config = transcriber_config or TranscriberConfig()
100
+ self.lyrics_config = lyrics_config or LyricsConfig()
101
+ self.output_config = output_config or OutputConfig()
102
+
103
+ # Basic settings
104
+ self.audio_filepath = audio_filepath
105
+ self.artist = artist
106
+ self.title = title
107
+ self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
108
+
109
+ # Add after creating necessary folders
110
+ self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
111
+ self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
112
+
113
+ # Create necessary folders
114
+ os.makedirs(self.output_config.cache_dir, exist_ok=True)
115
+ os.makedirs(self.output_config.output_dir, exist_ok=True)
116
+
117
+ # Initialize results
118
+ self.results = LyricsControllerResult()
119
+
120
+ # Initialize components (with dependency injection)
121
+ self.transcribers = transcribers or self._initialize_transcribers()
122
+ self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
123
+ self.corrector = corrector or LyricsCorrector(logger=self.logger)
124
+ self.output_generator = output_generator or self._initialize_output_generator()
125
+
126
+ def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
127
+ """Initialize available transcription services."""
128
+ transcribers = {}
129
+
130
+ # Add debug logging for config values
131
+ self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
132
+ self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
133
+
134
+ if self.transcriber_config.audioshake_api_token:
135
+ self.logger.debug("Initializing AudioShake transcriber")
136
+ transcribers["audioshake"] = {
137
+ "instance": AudioShakeTranscriber(
138
+ cache_dir=self.output_config.cache_dir,
139
+ config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
140
+ logger=self.logger,
141
+ ),
142
+ "priority": 1, # AudioShake has highest priority
143
+ }
144
+ else:
145
+ self.logger.debug("Skipping AudioShake transcriber - no API token provided")
146
+
147
+ if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
148
+ self.logger.debug("Initializing Whisper transcriber")
149
+ transcribers["whisper"] = {
150
+ "instance": WhisperTranscriber(
151
+ cache_dir=self.output_config.cache_dir,
152
+ config=WhisperConfig(
153
+ runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
154
+ ),
155
+ logger=self.logger,
156
+ ),
157
+ "priority": 2, # Whisper has lower priority
158
+ }
159
+ else:
160
+ self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
161
+
162
+ return transcribers
163
+
164
+ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
165
+ """Initialize available lyrics providers."""
166
+ providers = {}
167
+
168
+ # Create provider config with all necessary parameters
169
+ provider_config = LyricsProviderConfig(
170
+ genius_api_token=self.lyrics_config.genius_api_token,
171
+ spotify_cookie=self.lyrics_config.spotify_cookie,
172
+ cache_dir=self.output_config.cache_dir,
173
+ audio_filepath=self.audio_filepath,
174
+ )
175
+
176
+ if provider_config.genius_api_token:
177
+ self.logger.debug("Initializing Genius lyrics provider")
178
+ providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
179
+ else:
180
+ self.logger.debug("Skipping Genius provider - no API token provided")
181
+
182
+ if provider_config.spotify_cookie:
183
+ self.logger.debug("Initializing Spotify lyrics provider")
184
+ providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
185
+ else:
186
+ self.logger.debug("Skipping Spotify provider - no cookie provided")
187
+
188
+ return providers
189
+
190
+ def _initialize_output_generator(self) -> OutputGenerator:
191
+ """Initialize output generation service."""
192
+
193
+ # Convert OutputConfig to OutputGeneratorConfig
194
+ generator_config = OutputGeneratorConfig(
195
+ output_dir=self.output_config.output_dir,
196
+ cache_dir=self.output_config.cache_dir,
197
+ video_resolution=self.output_config.video_resolution,
198
+ video_background_image=self.output_config.video_background_image,
199
+ video_background_color=self.output_config.video_background_color,
200
+ )
201
+
202
+ # Initialize output generator
203
+ return OutputGenerator(config=generator_config, logger=self.logger)
204
+
205
+ def process(self) -> LyricsControllerResult:
206
+ """
207
+ Main processing method that orchestrates the entire workflow.
208
+
209
+ Returns:
210
+ LyricsControllerResult containing all outputs and generated files.
211
+
212
+ Raises:
213
+ Exception: If a critical error occurs during processing.
214
+ """
215
+ try:
216
+ # Step 1: Fetch lyrics if artist and title are provided
217
+ if self.artist and self.title:
218
+ self.fetch_lyrics()
219
+
220
+ # Step 2: Run transcription
221
+ self.transcribe()
222
+
223
+ # Step 3: Process and correct lyrics
224
+ self.correct_lyrics()
225
+
226
+ # Step 4: Generate outputs
227
+ self.generate_outputs()
228
+
229
+ self.logger.info("Processing completed successfully")
230
+ return self.results
231
+
232
+ except Exception as e:
233
+ self.logger.error(f"Error during processing: {str(e)}")
234
+ raise
235
+
236
+ def fetch_lyrics(self) -> None:
237
+ """Fetch lyrics from available providers."""
238
+ self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
239
+
240
+ try:
241
+ for name, provider in self.lyrics_providers.items():
242
+ try:
243
+ result = provider.fetch_lyrics(self.artist, self.title)
244
+ if result:
245
+ self.results.lyrics_results.append(result)
246
+ self.logger.info(f"Successfully fetched lyrics from {name}")
247
+
248
+ except Exception as e:
249
+ self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
250
+ continue
251
+
252
+ if not self.results.lyrics_results:
253
+ self.logger.warning("No lyrics found from any source")
254
+
255
+ except Exception as e:
256
+ self.logger.error(f"Failed to fetch lyrics: {str(e)}")
257
+ # Don't raise - we can continue without lyrics
258
+
259
+ def transcribe(self) -> None:
260
+ """Run transcription using all available transcribers."""
261
+ self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
262
+
263
+ for name, transcriber_info in self.transcribers.items():
264
+ self.logger.info(f"Running transcription with {name}")
265
+ try:
266
+ result = transcriber_info["instance"].transcribe(self.audio_filepath)
267
+ if result:
268
+ # Add the transcriber name and priority to the result
269
+ self.results.transcription_results.append(
270
+ TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
271
+ )
272
+ self.logger.debug(f"Transcription completed for {name}")
273
+
274
+ except Exception as e:
275
+ self.logger.error(f"Transcription failed for {name}: {str(e)}", exc_info=True)
276
+ continue
277
+
278
+ if not self.results.transcription_results:
279
+ self.logger.warning("No successful transcriptions from any provider")
280
+
281
+ def correct_lyrics(self) -> None:
282
+ """Run lyrics correction using transcription and internet lyrics."""
283
+ self.logger.info("Starting lyrics correction process")
284
+
285
+ try:
286
+ # Run correction
287
+ corrected_data = self.corrector.run(
288
+ transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
289
+ )
290
+
291
+ # Store corrected results
292
+ self.results.transcription_corrected = corrected_data
293
+ self.logger.info("Lyrics correction completed")
294
+
295
+ except Exception as e:
296
+ self.logger.error(f"Failed to correct lyrics: {str(e)}", exc_info=True)
297
+
298
+ def generate_outputs(self) -> None:
299
+ """Generate output files."""
300
+ self.logger.info("Generating output files")
301
+
302
+ try:
303
+ output_files = self.output_generator.generate_outputs(
304
+ transcription_corrected=self.results.transcription_corrected,
305
+ lyrics_results=self.results.lyrics_results,
306
+ output_prefix=self.output_prefix,
307
+ audio_filepath=self.audio_filepath,
308
+ )
309
+
310
+ # Store output paths - access attributes directly instead of using .get()
311
+ self.results.lrc_filepath = output_files.lrc
312
+ self.results.ass_filepath = output_files.ass
313
+ self.results.video_filepath = output_files.video
314
+
315
+ except Exception as e:
316
+ self.logger.error(f"Failed to generate outputs: {str(e)}")
317
+ raise
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Protocol
3
+
4
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
5
+ from ..transcribers.base_transcriber import LyricsSegment, TranscriptionResult
6
+
7
+
8
+ @dataclass
9
+ class CorrectionResult:
10
+ """Container for correction results."""
11
+
12
+ segments: List[LyricsSegment]
13
+ text: str
14
+ confidence: float
15
+ corrections_made: int
16
+ source_mapping: Dict[str, str] # Maps corrected words to their source
17
+ metadata: Dict[str, Any]
18
+
19
+
20
+ class CorrectionStrategy(Protocol):
21
+ """Interface for different lyrics correction strategies."""
22
+
23
+ def correct(
24
+ self,
25
+ transcription_results: List[TranscriptionResult],
26
+ lyrics_results: List[LyricsData],
27
+ ) -> CorrectionResult:
28
+ """Apply correction strategy to transcribed lyrics."""
29
+ ... # pragma: no cover
@@ -0,0 +1,52 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Optional, Protocol
3
+ import logging
4
+
5
+ from lyrics_transcriber.transcribers.base_transcriber import TranscriptionResult
6
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
7
+ from .strategy_diff import DiffBasedCorrector
8
+ from .base_strategy import CorrectionResult, CorrectionStrategy
9
+
10
+
11
+ class LyricsCorrector:
12
+ """
13
+ Coordinates lyrics correction process using multiple data sources
14
+ and correction strategies.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ correction_strategy: Optional[CorrectionStrategy] = None,
20
+ logger: Optional[logging.Logger] = None,
21
+ ):
22
+ self.logger = logger or logging.getLogger(__name__)
23
+ self.correction_strategy = correction_strategy or DiffBasedCorrector(logger=self.logger)
24
+
25
+ def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
26
+ """Execute the correction process using configured strategy."""
27
+ if not transcription_results:
28
+ self.logger.error("No transcription results available")
29
+ raise ValueError("No primary transcription data available")
30
+
31
+ try:
32
+ self.logger.debug(f"Running correction with strategy: {self.correction_strategy.__class__.__name__}")
33
+
34
+ result = self.correction_strategy.correct(
35
+ transcription_results=transcription_results,
36
+ lyrics_results=lyrics_results,
37
+ )
38
+
39
+ self.logger.debug(f"Correction completed. Made {result.corrections_made} corrections")
40
+ return result
41
+
42
+ except Exception as e:
43
+ self.logger.error(f"Correction failed: {str(e)}", exc_info=True)
44
+ # Return uncorrected transcription as fallback
45
+ return CorrectionResult(
46
+ segments=transcription_results[0].result.segments,
47
+ text=transcription_results[0].result.text,
48
+ confidence=1.0,
49
+ corrections_made=0,
50
+ source_mapping={},
51
+ metadata=transcription_results[0].result.metadata or {},
52
+ )