lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,10 +57,10 @@ def create_arg_parser() -> argparse.ArgumentParser:
57
57
  # Output options
58
58
  output_group = parser.add_argument_group("Output Options")
59
59
  output_group.add_argument("--output_dir", type=Path, help="Directory where output files will be saved. Default: current directory")
60
+
60
61
  output_group.add_argument(
61
62
  "--cache_dir",
62
63
  type=Path,
63
- default=Path("/tmp/lyrics-transcriber-cache/"),
64
64
  help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
65
65
  )
66
66
 
@@ -80,6 +80,18 @@ def create_arg_parser() -> argparse.ArgumentParser:
80
80
  return parser
81
81
 
82
82
 
83
+ def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
84
+ """Parse and process command line arguments."""
85
+ # Use provided args_list for testing, otherwise use sys.argv
86
+ args = parser.parse_args(args_list)
87
+
88
+ # Set default cache_dir if not provided
89
+ if not hasattr(args, "cache_dir") or args.cache_dir is None:
90
+ args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
91
+
92
+ return args
93
+
94
+
83
95
  def get_config_from_env() -> Dict[str, str]:
84
96
  """Load configuration from environment variables."""
85
97
  load_dotenv()
@@ -121,7 +133,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
121
133
  )
122
134
 
123
135
  output_config = OutputConfig(
124
- output_dir=str(args.output_dir) if args.output_dir else None,
136
+ output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
125
137
  cache_dir=str(args.cache_dir),
126
138
  render_video=args.render_video,
127
139
  video_resolution=args.video_resolution,
@@ -151,7 +163,7 @@ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, log
151
163
  def main() -> None:
152
164
  """Main entry point for the CLI."""
153
165
  parser = create_arg_parser()
154
- args = parser.parse_args()
166
+ args = parse_args(parser)
155
167
 
156
168
  # Set up logging first
157
169
  logger = setup_logging(args.log_level)
@@ -1,13 +1,15 @@
1
1
  import os
2
2
  import logging
3
- from dataclasses import dataclass
4
- from typing import Dict, Optional, List
5
- from ..transcribers.base import BaseTranscriber
6
- from ..transcribers.audioshake import AudioShakeTranscriber
7
- from ..transcribers.whisper import WhisperTranscriber
8
- from .fetcher import LyricsFetcher
9
- from ..output.generator import OutputGenerator
10
- from .corrector import LyricsTranscriptionCorrector
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Optional, Any, List
5
+ from ..transcribers.base_transcriber import BaseTranscriber, TranscriptionResult
6
+ from ..transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
7
+ from ..transcribers.whisper import WhisperTranscriber, WhisperConfig
8
+ from ..lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsData
9
+ from ..lyrics.genius import GeniusProvider
10
+ from ..lyrics.spotify import SpotifyProvider
11
+ from ..output.generator import OutputGenerator, OutputGeneratorConfig
12
+ from ..correction.corrector import LyricsCorrector, CorrectionResult
11
13
 
12
14
 
13
15
  @dataclass
@@ -31,8 +33,8 @@ class LyricsConfig:
31
33
  class OutputConfig:
32
34
  """Configuration for output generation."""
33
35
 
34
- output_dir: Optional[str] = None
35
- cache_dir: str = "/tmp/lyrics-transcriber-cache/"
36
+ output_dir: Optional[str] = os.getcwd()
37
+ cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
36
38
  render_video: bool = False
37
39
  video_resolution: str = "360p"
38
40
  video_background_image: Optional[str] = None
@@ -40,21 +42,15 @@ class OutputConfig:
40
42
 
41
43
 
42
44
  @dataclass
43
- class TranscriptionResult:
45
+ class LyricsControllerResult:
44
46
  """Holds the results of the transcription and correction process."""
45
47
 
46
- # Lyrics from internet sources
47
- lyrics_text: Optional[str] = None
48
- lyrics_source: Optional[str] = None
49
- lyrics_genius: Optional[str] = None
50
- lyrics_spotify: Optional[str] = None
51
- spotify_lyrics_data: Optional[Dict] = None
48
+ # Results from different sources
49
+ lyrics_results: List[LyricsData] = field(default_factory=list)
50
+ transcription_results: List[TranscriptionResult] = field(default_factory=list)
52
51
 
53
- # Transcription results
54
- transcription_whisper: Optional[Dict] = None
55
- transcription_audioshake: Optional[Dict] = None
56
- transcription_primary: Optional[Dict] = None
57
- transcription_corrected: Optional[Dict] = None
52
+ # Corrected results
53
+ transcription_corrected: Optional[CorrectionResult] = None
58
54
 
59
55
  # Output files
60
56
  lrc_filepath: Optional[str] = None
@@ -79,8 +75,9 @@ class LyricsTranscriber:
79
75
  transcriber_config: Optional[TranscriberConfig] = None,
80
76
  lyrics_config: Optional[LyricsConfig] = None,
81
77
  output_config: Optional[OutputConfig] = None,
82
- lyrics_fetcher: Optional[LyricsFetcher] = None,
83
- corrector: Optional[LyricsTranscriptionCorrector] = None,
78
+ transcribers: Optional[Dict[str, BaseTranscriber]] = None,
79
+ lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
80
+ corrector: Optional[LyricsCorrector] = None,
84
81
  output_generator: Optional[OutputGenerator] = None,
85
82
  logger: Optional[logging.Logger] = None,
86
83
  log_level: int = logging.DEBUG,
@@ -109,46 +106,92 @@ class LyricsTranscriber:
109
106
  self.title = title
110
107
  self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
111
108
 
109
+ # Add after creating necessary folders
110
+ self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
111
+ self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
112
+
112
113
  # Create necessary folders
113
114
  os.makedirs(self.output_config.cache_dir, exist_ok=True)
114
- if self.output_config.output_dir:
115
- os.makedirs(self.output_config.output_dir, exist_ok=True)
115
+ os.makedirs(self.output_config.output_dir, exist_ok=True)
116
116
 
117
117
  # Initialize results
118
- self.results = TranscriptionResult()
118
+ self.results = LyricsControllerResult()
119
119
 
120
120
  # Initialize components (with dependency injection)
121
- self.transcribers = self._initialize_transcribers()
122
- self.lyrics_fetcher = lyrics_fetcher or self._initialize_lyrics_fetcher()
123
- self.corrector = corrector or LyricsTranscriptionCorrector(logger=self.logger)
121
+ self.transcribers = transcribers or self._initialize_transcribers()
122
+ self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
123
+ self.corrector = corrector or LyricsCorrector(logger=self.logger)
124
124
  self.output_generator = output_generator or self._initialize_output_generator()
125
125
 
126
126
  def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
127
127
  """Initialize available transcription services."""
128
128
  transcribers = {}
129
129
 
130
+ # Add debug logging for config values
131
+ self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
132
+ self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
133
+
130
134
  if self.transcriber_config.audioshake_api_token:
131
- transcribers["audioshake"] = AudioShakeTranscriber(api_token=self.transcriber_config.audioshake_api_token, logger=self.logger)
135
+ self.logger.debug("Initializing AudioShake transcriber")
136
+ transcribers["audioshake"] = {
137
+ "instance": AudioShakeTranscriber(
138
+ cache_dir=self.output_config.cache_dir,
139
+ config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
140
+ logger=self.logger,
141
+ ),
142
+ "priority": 1, # AudioShake has highest priority
143
+ }
144
+ else:
145
+ self.logger.debug("Skipping AudioShake transcriber - no API token provided")
132
146
 
133
147
  if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
134
- transcribers["whisper"] = WhisperTranscriber(
135
- logger=self.logger,
136
- runpod_api_key=self.transcriber_config.runpod_api_key,
137
- endpoint_id=self.transcriber_config.whisper_runpod_id,
138
- )
148
+ self.logger.debug("Initializing Whisper transcriber")
149
+ transcribers["whisper"] = {
150
+ "instance": WhisperTranscriber(
151
+ cache_dir=self.output_config.cache_dir,
152
+ config=WhisperConfig(
153
+ runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
154
+ ),
155
+ logger=self.logger,
156
+ ),
157
+ "priority": 2, # Whisper has lower priority
158
+ }
159
+ else:
160
+ self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
139
161
 
140
162
  return transcribers
141
163
 
142
- def _initialize_lyrics_fetcher(self) -> LyricsFetcher:
143
- """Initialize lyrics fetching service."""
144
- return LyricsFetcher(
145
- genius_api_token=self.lyrics_config.genius_api_token, spotify_cookie=self.lyrics_config.spotify_cookie, logger=self.logger
164
+ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
165
+ """Initialize available lyrics providers."""
166
+ providers = {}
167
+
168
+ # Create provider config with all necessary parameters
169
+ provider_config = LyricsProviderConfig(
170
+ genius_api_token=self.lyrics_config.genius_api_token,
171
+ spotify_cookie=self.lyrics_config.spotify_cookie,
172
+ cache_dir=self.output_config.cache_dir,
173
+ audio_filepath=self.audio_filepath,
146
174
  )
147
175
 
176
+ if provider_config.genius_api_token:
177
+ self.logger.debug("Initializing Genius lyrics provider")
178
+ providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
179
+ else:
180
+ self.logger.debug("Skipping Genius provider - no API token provided")
181
+
182
+ if provider_config.spotify_cookie:
183
+ self.logger.debug("Initializing Spotify lyrics provider")
184
+ providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
185
+ else:
186
+ self.logger.debug("Skipping Spotify provider - no cookie provided")
187
+
188
+ return providers
189
+
148
190
  def _initialize_output_generator(self) -> OutputGenerator:
149
191
  """Initialize output generation service."""
150
- return OutputGenerator(
151
- logger=self.logger,
192
+
193
+ # Convert OutputConfig to OutputGeneratorConfig
194
+ generator_config = OutputGeneratorConfig(
152
195
  output_dir=self.output_config.output_dir,
153
196
  cache_dir=self.output_config.cache_dir,
154
197
  video_resolution=self.output_config.video_resolution,
@@ -156,12 +199,15 @@ class LyricsTranscriber:
156
199
  video_background_color=self.output_config.video_background_color,
157
200
  )
158
201
 
159
- def process(self) -> TranscriptionResult:
202
+ # Initialize output generator
203
+ return OutputGenerator(config=generator_config, logger=self.logger)
204
+
205
+ def process(self) -> LyricsControllerResult:
160
206
  """
161
207
  Main processing method that orchestrates the entire workflow.
162
208
 
163
209
  Returns:
164
- TranscriptionResult containing all outputs and generated files.
210
+ LyricsControllerResult containing all outputs and generated files.
165
211
 
166
212
  Raises:
167
213
  Exception: If a critical error occurs during processing.
@@ -175,12 +221,10 @@ class LyricsTranscriber:
175
221
  self.transcribe()
176
222
 
177
223
  # Step 3: Process and correct lyrics
178
- if self.results.transcription_primary:
179
- self.correct_lyrics()
224
+ self.correct_lyrics()
180
225
 
181
226
  # Step 4: Generate outputs
182
- if self.results.transcription_corrected:
183
- self.generate_outputs()
227
+ self.generate_outputs()
184
228
 
185
229
  self.logger.info("Processing completed successfully")
186
230
  return self.results
@@ -190,22 +234,22 @@ class LyricsTranscriber:
190
234
  raise
191
235
 
192
236
  def fetch_lyrics(self) -> None:
193
- """Fetch lyrics from online sources."""
237
+ """Fetch lyrics from available providers."""
194
238
  self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
195
239
 
196
240
  try:
197
- lyrics_result = self.lyrics_fetcher.fetch_lyrics(self.artist, self.title)
198
-
199
- # Update results
200
- self.results.lyrics_text = lyrics_result["lyrics"]
201
- self.results.lyrics_source = lyrics_result["source"]
202
- self.results.lyrics_genius = lyrics_result["genius_lyrics"]
203
- self.results.lyrics_spotify = lyrics_result["spotify_lyrics"]
204
- self.results.spotify_lyrics_data = lyrics_result.get("spotify_lyrics_data")
205
-
206
- if lyrics_result["lyrics"]:
207
- self.logger.info(f"Successfully fetched lyrics from {lyrics_result['source']}")
208
- else:
241
+ for name, provider in self.lyrics_providers.items():
242
+ try:
243
+ result = provider.fetch_lyrics(self.artist, self.title)
244
+ if result:
245
+ self.results.lyrics_results.append(result)
246
+ self.logger.info(f"Successfully fetched lyrics from {name}")
247
+
248
+ except Exception as e:
249
+ self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
250
+ continue
251
+
252
+ if not self.results.lyrics_results:
209
253
  self.logger.warning("No lyrics found from any source")
210
254
 
211
255
  except Exception as e:
@@ -214,52 +258,42 @@ class LyricsTranscriber:
214
258
 
215
259
  def transcribe(self) -> None:
216
260
  """Run transcription using all available transcribers."""
217
- self.logger.info("Starting transcription process")
261
+ self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
218
262
 
219
- for name, transcriber in self.transcribers.items():
263
+ for name, transcriber_info in self.transcribers.items():
264
+ self.logger.info(f"Running transcription with {name}")
220
265
  try:
221
- result = transcriber.transcribe(self.audio_filepath)
222
-
223
- # Store result based on transcriber type
224
- if name == "whisper":
225
- self.results.transcription_whisper = result
226
- elif name == "audioshake":
227
- self.results.transcription_audioshake = result
228
-
229
- # Use first successful transcription as primary
230
- if not self.results.transcription_primary:
231
- self.results.transcription_primary = result
266
+ result = transcriber_info["instance"].transcribe(self.audio_filepath)
267
+ if result:
268
+ # Add the transcriber name and priority to the result
269
+ self.results.transcription_results.append(
270
+ TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
271
+ )
272
+ self.logger.debug(f"Transcription completed for {name}")
232
273
 
233
274
  except Exception as e:
234
- self.logger.error(f"Transcription failed for {name}: {str(e)}")
275
+ self.logger.error(f"Transcription failed for {name}: {str(e)}", exc_info=True)
235
276
  continue
236
277
 
278
+ if not self.results.transcription_results:
279
+ self.logger.warning("No successful transcriptions from any provider")
280
+
237
281
  def correct_lyrics(self) -> None:
238
282
  """Run lyrics correction using transcription and internet lyrics."""
239
283
  self.logger.info("Starting lyrics correction process")
240
284
 
241
285
  try:
242
- # Set input data for correction
243
- self.corrector.set_input_data(
244
- spotify_lyrics_data_dict=self.results.spotify_lyrics_data,
245
- spotify_lyrics_text=self.results.lyrics_spotify,
246
- genius_lyrics_text=self.results.lyrics_genius,
247
- transcription_data_dict_whisper=self.results.transcription_whisper,
248
- transcription_data_dict_audioshake=self.results.transcription_audioshake,
249
- )
250
-
251
286
  # Run correction
252
- corrected_data = self.corrector.run_corrector()
287
+ corrected_data = self.corrector.run(
288
+ transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
289
+ )
253
290
 
254
291
  # Store corrected results
255
292
  self.results.transcription_corrected = corrected_data
256
293
  self.logger.info("Lyrics correction completed")
257
294
 
258
295
  except Exception as e:
259
- self.logger.error(f"Failed to correct lyrics: {str(e)}")
260
- # Use uncorrected transcription as fallback
261
- self.results.transcription_corrected = self.results.transcription_primary
262
- self.logger.warning("Using uncorrected transcription as fallback")
296
+ self.logger.error(f"Failed to correct lyrics: {str(e)}", exc_info=True)
263
297
 
264
298
  def generate_outputs(self) -> None:
265
299
  """Generate output files."""
@@ -267,16 +301,16 @@ class LyricsTranscriber:
267
301
 
268
302
  try:
269
303
  output_files = self.output_generator.generate_outputs(
270
- transcription_data=self.results.transcription_corrected,
304
+ transcription_corrected=self.results.transcription_corrected,
305
+ lyrics_results=self.results.lyrics_results,
271
306
  output_prefix=self.output_prefix,
272
307
  audio_filepath=self.audio_filepath,
273
- render_video=self.output_config.render_video,
274
308
  )
275
309
 
276
- # Store output paths
277
- self.results.lrc_filepath = output_files.get("lrc")
278
- self.results.ass_filepath = output_files.get("ass")
279
- self.results.video_filepath = output_files.get("video")
310
+ # Store output paths - access attributes directly instead of using .get()
311
+ self.results.lrc_filepath = output_files.lrc
312
+ self.results.ass_filepath = output_files.ass
313
+ self.results.video_filepath = output_files.video
280
314
 
281
315
  except Exception as e:
282
316
  self.logger.error(f"Failed to generate outputs: {str(e)}")
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Protocol
3
+
4
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
5
+ from ..transcribers.base_transcriber import LyricsSegment, TranscriptionResult
6
+
7
+
8
+ @dataclass
9
+ class CorrectionResult:
10
+ """Container for correction results."""
11
+
12
+ segments: List[LyricsSegment]
13
+ text: str
14
+ confidence: float
15
+ corrections_made: int
16
+ source_mapping: Dict[str, str] # Maps corrected words to their source
17
+ metadata: Dict[str, Any]
18
+
19
+
20
+ class CorrectionStrategy(Protocol):
21
+ """Interface for different lyrics correction strategies."""
22
+
23
+ def correct(
24
+ self,
25
+ transcription_results: List[TranscriptionResult],
26
+ lyrics_results: List[LyricsData],
27
+ ) -> CorrectionResult:
28
+ """Apply correction strategy to transcribed lyrics."""
29
+ ... # pragma: no cover
@@ -0,0 +1,52 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, List, Optional, Protocol
3
+ import logging
4
+
5
+ from lyrics_transcriber.transcribers.base_transcriber import TranscriptionResult
6
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
7
+ from .strategy_diff import DiffBasedCorrector
8
+ from .base_strategy import CorrectionResult, CorrectionStrategy
9
+
10
+
11
+ class LyricsCorrector:
12
+ """
13
+ Coordinates lyrics correction process using multiple data sources
14
+ and correction strategies.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ correction_strategy: Optional[CorrectionStrategy] = None,
20
+ logger: Optional[logging.Logger] = None,
21
+ ):
22
+ self.logger = logger or logging.getLogger(__name__)
23
+ self.correction_strategy = correction_strategy or DiffBasedCorrector(logger=self.logger)
24
+
25
+ def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
26
+ """Execute the correction process using configured strategy."""
27
+ if not transcription_results:
28
+ self.logger.error("No transcription results available")
29
+ raise ValueError("No primary transcription data available")
30
+
31
+ try:
32
+ self.logger.debug(f"Running correction with strategy: {self.correction_strategy.__class__.__name__}")
33
+
34
+ result = self.correction_strategy.correct(
35
+ transcription_results=transcription_results,
36
+ lyrics_results=lyrics_results,
37
+ )
38
+
39
+ self.logger.debug(f"Correction completed. Made {result.corrections_made} corrections")
40
+ return result
41
+
42
+ except Exception as e:
43
+ self.logger.error(f"Correction failed: {str(e)}", exc_info=True)
44
+ # Return uncorrected transcription as fallback
45
+ return CorrectionResult(
46
+ segments=transcription_results[0].result.segments,
47
+ text=transcription_results[0].result.text,
48
+ confidence=1.0,
49
+ corrections_made=0,
50
+ source_mapping={},
51
+ metadata=transcription_results[0].result.metadata or {},
52
+ )