lyrics-transcriber 0.30.0__tar.gz → 0.32.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/PKG-INFO +14 -3
  2. lyrics_transcriber-0.32.1/lyrics_transcriber/__init__.py +4 -0
  3. lyrics_transcriber-0.30.0/lyrics_transcriber/cli/main.py → lyrics_transcriber-0.32.1/lyrics_transcriber/cli/cli_main.py +47 -14
  4. lyrics_transcriber-0.32.1/lyrics_transcriber/core/config.py +35 -0
  5. lyrics_transcriber-0.32.1/lyrics_transcriber/core/controller.py +281 -0
  6. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/anchor_sequence.py +471 -0
  7. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/corrector.py +256 -0
  8. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/base.py +30 -0
  9. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
  10. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
  11. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
  12. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
  13. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/repeat.py +71 -0
  14. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
  15. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
  16. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
  17. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers/word_operations.py +135 -0
  18. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/phrase_analyzer.py +426 -0
  19. lyrics_transcriber-0.32.1/lyrics_transcriber/correction/text_utils.py +30 -0
  20. lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
  21. lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/genius.py +73 -0
  22. lyrics_transcriber-0.32.1/lyrics_transcriber/lyrics/spotify.py +82 -0
  23. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/__init__.py +21 -0
  24. {lyrics_transcriber-0.30.0/lyrics_transcriber/output → lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass}/ass.py +150 -690
  25. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  26. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/config.py +37 -0
  27. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/constants.py +23 -0
  28. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/event.py +94 -0
  29. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/formatters.py +132 -0
  30. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/lyrics_line.py +219 -0
  31. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  32. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/section_detector.py +89 -0
  33. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/section_screen.py +106 -0
  34. lyrics_transcriber-0.32.1/lyrics_transcriber/output/ass/style.py +187 -0
  35. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdg.py +503 -0
  36. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  37. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  38. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
  39. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/config.py +151 -0
  40. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  41. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  42. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  43. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/render.py +346 -0
  44. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  45. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  46. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  47. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  48. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  49. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  50. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  51. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  52. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  53. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  54. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  55. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  56. lyrics_transcriber-0.32.1/lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  57. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  58. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  59. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  60. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  61. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  62. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/arial.ttf +0 -0
  63. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  64. lyrics_transcriber-0.32.1/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  65. lyrics_transcriber-0.32.1/lyrics_transcriber/output/generator.py +179 -0
  66. lyrics_transcriber-0.32.1/lyrics_transcriber/output/lyrics_file.py +102 -0
  67. lyrics_transcriber-0.32.1/lyrics_transcriber/output/plain_text.py +91 -0
  68. lyrics_transcriber-0.32.1/lyrics_transcriber/output/segment_resizer.py +416 -0
  69. lyrics_transcriber-0.32.1/lyrics_transcriber/output/subtitles.py +331 -0
  70. lyrics_transcriber-0.32.1/lyrics_transcriber/output/video.py +219 -0
  71. lyrics_transcriber-0.32.1/lyrics_transcriber/review/__init__.py +1 -0
  72. lyrics_transcriber-0.32.1/lyrics_transcriber/review/server.py +138 -0
  73. lyrics_transcriber-0.32.1/lyrics_transcriber/storage/__init__.py +0 -0
  74. lyrics_transcriber-0.32.1/lyrics_transcriber/storage/dropbox.py +225 -0
  75. lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/audioshake.py +217 -0
  76. lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/base_transcriber.py +149 -0
  77. lyrics_transcriber-0.32.1/lyrics_transcriber/transcribers/whisper.py +320 -0
  78. lyrics_transcriber-0.32.1/lyrics_transcriber/types.py +454 -0
  79. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/pyproject.toml +14 -3
  80. lyrics_transcriber-0.30.0/lyrics_transcriber/__init__.py +0 -3
  81. lyrics_transcriber-0.30.0/lyrics_transcriber/core/controller.py +0 -283
  82. lyrics_transcriber-0.30.0/lyrics_transcriber/core/corrector.py +0 -56
  83. lyrics_transcriber-0.30.0/lyrics_transcriber/core/fetcher.py +0 -143
  84. lyrics_transcriber-0.30.0/lyrics_transcriber/output/generator.py +0 -210
  85. lyrics_transcriber-0.30.0/lyrics_transcriber/output/subtitles.py +0 -305
  86. lyrics_transcriber-0.30.0/lyrics_transcriber/storage/dropbox.py +0 -249
  87. lyrics_transcriber-0.30.0/lyrics_transcriber/storage/tokens.py +0 -116
  88. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/audioshake.py +0 -151
  89. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/base.py +0 -31
  90. lyrics_transcriber-0.30.0/lyrics_transcriber/transcribers/whisper.py +0 -186
  91. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/LICENSE +0 -0
  92. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/README.md +0 -0
  93. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/lyrics_transcriber/cli/__init__.py +0 -0
  94. {lyrics_transcriber-0.30.0 → lyrics_transcriber-0.32.1}/lyrics_transcriber/core/__init__.py +0 -0
  95. {lyrics_transcriber-0.30.0/lyrics_transcriber/output → lyrics_transcriber-0.32.1/lyrics_transcriber/correction/handlers}/__init__.py +0 -0
  96. {lyrics_transcriber-0.30.0/lyrics_transcriber/storage → lyrics_transcriber-0.32.1/lyrics_transcriber/output}/__init__.py +0 -0
@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: lyrics-transcriber
3
- Version: 0.30.0
3
+ Version: 0.32.1
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
- Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
5
  License: MIT
7
6
  Author: Andrew Beveridge
8
7
  Author-email: andrew@beveridge.uk
@@ -14,13 +13,25 @@ Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
16
15
  Requires-Dist: dropbox (>=12)
16
+ Requires-Dist: fastapi (>=0.115.6,<0.116.0)
17
17
  Requires-Dist: karaoke-lyrics-processor (>=0.4)
18
18
  Requires-Dist: lyricsgenius (>=3)
19
+ Requires-Dist: metaphone (>=0.6,<0.7)
20
+ Requires-Dist: nltk (>=3.9.1,<4.0.0)
19
21
  Requires-Dist: pydub (>=0.25)
20
22
  Requires-Dist: python-dotenv (>=1)
23
+ Requires-Dist: python-levenshtein (>=0.26.1,<0.27.0)
21
24
  Requires-Dist: python-slugify (>=8)
25
+ Requires-Dist: spacy (>=3.8.3,<4.0.0)
26
+ Requires-Dist: spacy-syllables (>=3.0.2,<4.0.0)
27
+ Requires-Dist: syllables (>=1.0.9,<2.0.0)
22
28
  Requires-Dist: syrics (>=0)
29
+ Requires-Dist: torch (>=2.5.1,<3.0.0)
30
+ Requires-Dist: tqdm (>=4.67.1,<5.0.0)
31
+ Requires-Dist: transformers (>=4.47.1,<5.0.0)
32
+ Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
23
33
  Project-URL: Documentation, https://github.com/karaokenerds/python-lyrics-transcriber/blob/main/README.md
34
+ Project-URL: Homepage, https://github.com/karaokenerds/python-lyrics-transcriber
24
35
  Project-URL: Repository, https://github.com/karaokenerds/python-lyrics-transcriber
25
36
  Description-Content-Type: text/markdown
26
37
 
@@ -0,0 +1,4 @@
1
+ from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
2
+ from lyrics_transcriber.core.controller import LyricsTranscriber
3
+
4
+ __all__ = ["LyricsTranscriber", "TranscriberConfig", "LyricsConfig", "OutputConfig"]
@@ -60,9 +60,14 @@ def create_arg_parser() -> argparse.ArgumentParser:
60
60
  output_group.add_argument(
61
61
  "--cache_dir",
62
62
  type=Path,
63
- default=Path("/tmp/lyrics-transcriber-cache/"),
64
63
  help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
65
64
  )
65
+ output_group.add_argument(
66
+ "--output_styles_json",
67
+ type=Path,
68
+ help="JSON file containing output style configurations for CDG and video generation",
69
+ )
70
+ output_group.add_argument("--generate_cdg", action="store_true", help="Generate CDG karaoke files")
66
71
 
67
72
  # Video options
68
73
  video_group = parser.add_argument_group("Video Options")
@@ -70,16 +75,22 @@ def create_arg_parser() -> argparse.ArgumentParser:
70
75
  video_group.add_argument(
71
76
  "--video_resolution", choices=["4k", "1080p", "720p", "360p"], default="360p", help="Resolution of the karaoke video. Default: 360p"
72
77
  )
73
- video_group.add_argument("--video_background_image", type=Path, help="Image file to use for karaoke video background")
74
- video_group.add_argument(
75
- "--video_background_color",
76
- default="black",
77
- help="Color for karaoke video background (hex format or FFmpeg color name). Default: black",
78
- )
79
78
 
80
79
  return parser
81
80
 
82
81
 
82
+ def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
83
+ """Parse and process command line arguments."""
84
+ # Use provided args_list for testing, otherwise use sys.argv
85
+ args = parser.parse_args(args_list)
86
+
87
+ # Set default cache_dir if not provided
88
+ if not hasattr(args, "cache_dir") or args.cache_dir is None:
89
+ args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
90
+
91
+ return args
92
+
93
+
83
94
  def get_config_from_env() -> Dict[str, str]:
84
95
  """Load configuration from environment variables."""
85
96
  load_dotenv()
@@ -121,12 +132,12 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
121
132
  )
122
133
 
123
134
  output_config = OutputConfig(
124
- output_dir=str(args.output_dir) if args.output_dir else None,
135
+ output_styles_json=str(args.output_styles_json),
136
+ output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
125
137
  cache_dir=str(args.cache_dir),
126
138
  render_video=args.render_video,
139
+ generate_cdg=args.generate_cdg,
127
140
  video_resolution=args.video_resolution,
128
- video_background_image=str(args.video_background_image) if args.video_background_image else None,
129
- video_background_color=args.video_background_color,
130
141
  )
131
142
 
132
143
  return transcriber_config, lyrics_config, output_config
@@ -151,7 +162,7 @@ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, log
151
162
  def main() -> None:
152
163
  """Main entry point for the CLI."""
153
164
  parser = create_arg_parser()
154
- args = parser.parse_args()
165
+ args = parse_args(parser)
155
166
 
156
167
  # Set up logging first
157
168
  logger = setup_logging(args.log_level)
@@ -182,13 +193,35 @@ def main() -> None:
182
193
  # Log results
183
194
  logger.info("*** Success! ***")
184
195
 
196
+ # Log all generated output files
197
+ if results.original_txt:
198
+ logger.info(f"Generated original transcription: {results.original_txt}")
199
+ if results.corrections_json:
200
+ logger.info(f"Generated corrections data: {results.corrections_json}")
201
+
202
+ if results.corrected_txt:
203
+ logger.info(f"Generated corrected lyrics: {results.corrected_txt}")
185
204
  if results.lrc_filepath:
186
205
  logger.info(f"Generated LRC file: {results.lrc_filepath}")
206
+
207
+ if results.cdg_filepath:
208
+ logger.info(f"Generated CDG file: {results.cdg_filepath}")
209
+ if results.mp3_filepath:
210
+ logger.info(f"Generated MP3 file: {results.mp3_filepath}")
211
+ if results.cdg_zip_filepath:
212
+ logger.info(f"Generated CDG ZIP archive: {results.cdg_zip_filepath}")
213
+
187
214
  if results.ass_filepath:
188
- logger.info(f"Generated ASS file: {results.ass_filepath}")
215
+ logger.info(f"Generated ASS subtitles: {results.ass_filepath}")
189
216
  if results.video_filepath:
190
- logger.info(f"Generated video file: {results.video_filepath}")
217
+ logger.info(f"Generated video: {results.video_filepath}")
191
218
 
192
219
  except Exception as e:
193
- logger.error(f"Processing failed: {str(e)}")
220
+ # Get the full exception traceback
221
+ import traceback
222
+
223
+ error_details = traceback.format_exc()
224
+
225
+ # Log both the error message and the full traceback
226
+ logger.error(f"Processing failed: {str(e)}\n\nFull traceback:\n{error_details}")
194
227
  exit(1)
@@ -0,0 +1,35 @@
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, Optional
4
+
5
+
6
+ @dataclass
7
+ class TranscriberConfig:
8
+ """Configuration for transcription services."""
9
+
10
+ audioshake_api_token: Optional[str] = None
11
+ runpod_api_key: Optional[str] = None
12
+ whisper_runpod_id: Optional[str] = None
13
+
14
+
15
+ @dataclass
16
+ class LyricsConfig:
17
+ """Configuration for lyrics services."""
18
+
19
+ genius_api_token: Optional[str] = None
20
+ spotify_cookie: Optional[str] = None
21
+
22
+
23
+ @dataclass
24
+ class OutputConfig:
25
+ """Configuration for output generation."""
26
+
27
+ output_styles_json: str
28
+ max_line_length: int = 36
29
+ styles: Dict[str, Any] = field(default_factory=dict)
30
+ output_dir: Optional[str] = os.getcwd()
31
+ cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
32
+ render_video: bool = False
33
+ generate_cdg: bool = False
34
+ video_resolution: str = "360p"
35
+ enable_review: bool = True
@@ -0,0 +1,281 @@
1
+ import os
2
+ import logging
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Optional, List
5
+ from lyrics_transcriber.types import (
6
+ LyricsData,
7
+ TranscriptionResult,
8
+ CorrectionResult,
9
+ )
10
+ from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
11
+ from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
12
+ from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
13
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
14
+ from lyrics_transcriber.lyrics.genius import GeniusProvider
15
+ from lyrics_transcriber.lyrics.spotify import SpotifyProvider
16
+ from lyrics_transcriber.output.generator import OutputGenerator
17
+ from lyrics_transcriber.correction.corrector import LyricsCorrector
18
+ from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
19
+
20
+
21
+ @dataclass
22
+ class LyricsControllerResult:
23
+ """Holds the results of the transcription and correction process."""
24
+
25
+ # Results from different sources
26
+ lyrics_results: List[LyricsData] = field(default_factory=list)
27
+ transcription_results: List[TranscriptionResult] = field(default_factory=list)
28
+
29
+ # Corrected results
30
+ transcription_corrected: Optional[CorrectionResult] = None
31
+
32
+ # Output files
33
+ lrc_filepath: Optional[str] = None
34
+ ass_filepath: Optional[str] = None
35
+ video_filepath: Optional[str] = None
36
+ mp3_filepath: Optional[str] = None
37
+ cdg_filepath: Optional[str] = None
38
+ cdg_zip_filepath: Optional[str] = None
39
+ original_txt: Optional[str] = None
40
+ corrected_txt: Optional[str] = None
41
+ corrections_json: Optional[str] = None
42
+
43
+
44
+ class LyricsTranscriber:
45
+ """
46
+ Controller class that orchestrates the lyrics transcription workflow:
47
+ 1. Fetch lyrics from internet sources
48
+ 2. Run multiple transcription methods
49
+ 3. Correct transcribed lyrics using fetched lyrics
50
+ 4. Generate output formats (LRC, ASS, video)
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ audio_filepath: str,
56
+ artist: Optional[str] = None,
57
+ title: Optional[str] = None,
58
+ transcriber_config: Optional[TranscriberConfig] = None,
59
+ lyrics_config: Optional[LyricsConfig] = None,
60
+ output_config: Optional[OutputConfig] = None,
61
+ transcribers: Optional[Dict[str, BaseTranscriber]] = None,
62
+ lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
63
+ corrector: Optional[LyricsCorrector] = None,
64
+ output_generator: Optional[OutputGenerator] = None,
65
+ logger: Optional[logging.Logger] = None,
66
+ log_level: int = logging.DEBUG,
67
+ log_formatter: Optional[logging.Formatter] = None,
68
+ ):
69
+ # Set up logging
70
+ self.logger = logger or logging.getLogger(__name__)
71
+ if not logger:
72
+ self.logger.setLevel(log_level)
73
+ if not self.logger.handlers:
74
+ handler = logging.StreamHandler()
75
+ formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
76
+ handler.setFormatter(formatter)
77
+ self.logger.addHandler(handler)
78
+
79
+ self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
80
+
81
+ # Store configs (with defaults if not provided)
82
+ self.transcriber_config = transcriber_config or TranscriberConfig()
83
+ self.lyrics_config = lyrics_config or LyricsConfig()
84
+ self.output_config = output_config or OutputConfig()
85
+
86
+ # Basic settings
87
+ self.audio_filepath = audio_filepath
88
+ self.artist = artist
89
+ self.title = title
90
+ self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
91
+
92
+ # Add after creating necessary folders
93
+ self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
94
+ self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
95
+
96
+ # Create necessary folders
97
+ os.makedirs(self.output_config.cache_dir, exist_ok=True)
98
+ os.makedirs(self.output_config.output_dir, exist_ok=True)
99
+
100
+ # Initialize results
101
+ self.results = LyricsControllerResult()
102
+
103
+ # Initialize components (with dependency injection)
104
+ self.transcribers = transcribers or self._initialize_transcribers()
105
+ self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
106
+ self.corrector = corrector or LyricsCorrector(cache_dir=self.output_config.cache_dir, logger=self.logger)
107
+ self.output_generator = output_generator or self._initialize_output_generator()
108
+
109
+ def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
110
+ """Initialize available transcription services."""
111
+ transcribers = {}
112
+
113
+ # Add debug logging for config values
114
+ self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
115
+ self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
116
+
117
+ if self.transcriber_config.audioshake_api_token:
118
+ self.logger.debug("Initializing AudioShake transcriber")
119
+ transcribers["audioshake"] = {
120
+ "instance": AudioShakeTranscriber(
121
+ cache_dir=self.output_config.cache_dir,
122
+ config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
123
+ logger=self.logger,
124
+ ),
125
+ "priority": 1, # AudioShake has highest priority
126
+ }
127
+ else:
128
+ self.logger.debug("Skipping AudioShake transcriber - no API token provided")
129
+
130
+ if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
131
+ self.logger.debug("Initializing Whisper transcriber")
132
+ transcribers["whisper"] = {
133
+ "instance": WhisperTranscriber(
134
+ cache_dir=self.output_config.cache_dir,
135
+ config=WhisperConfig(
136
+ runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
137
+ ),
138
+ logger=self.logger,
139
+ ),
140
+ "priority": 2, # Whisper has lower priority
141
+ }
142
+ else:
143
+ self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
144
+
145
+ return transcribers
146
+
147
+ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
148
+ """Initialize available lyrics providers."""
149
+ providers = {}
150
+
151
+ # Create provider config with all necessary parameters
152
+ provider_config = LyricsProviderConfig(
153
+ genius_api_token=self.lyrics_config.genius_api_token,
154
+ spotify_cookie=self.lyrics_config.spotify_cookie,
155
+ cache_dir=self.output_config.cache_dir,
156
+ audio_filepath=self.audio_filepath,
157
+ )
158
+
159
+ if provider_config.genius_api_token:
160
+ self.logger.debug("Initializing Genius lyrics provider")
161
+ providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
162
+ else:
163
+ self.logger.debug("Skipping Genius provider - no API token provided")
164
+
165
+ if provider_config.spotify_cookie:
166
+ self.logger.debug("Initializing Spotify lyrics provider")
167
+ providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
168
+ else:
169
+ self.logger.debug("Skipping Spotify provider - no cookie provided")
170
+
171
+ return providers
172
+
173
+ def _initialize_output_generator(self) -> OutputGenerator:
174
+ """Initialize output generation service."""
175
+ return OutputGenerator(config=self.output_config, logger=self.logger)
176
+
177
+ def process(self) -> LyricsControllerResult:
178
+ """
179
+ Main processing method that orchestrates the entire workflow.
180
+
181
+ Returns:
182
+ LyricsControllerResult containing all outputs and generated files.
183
+
184
+ Raises:
185
+ Exception: If a critical error occurs during processing.
186
+ """
187
+ # Step 1: Fetch lyrics if artist and title are provided
188
+ if self.artist and self.title:
189
+ self.fetch_lyrics()
190
+
191
+ # Step 2: Run transcription
192
+ self.transcribe()
193
+
194
+ # Step 3: Process and correct lyrics
195
+ self.correct_lyrics()
196
+
197
+ # Step 4: Generate outputs
198
+ self.generate_outputs()
199
+
200
+ self.logger.info("Processing completed successfully")
201
+ return self.results
202
+
203
+ def fetch_lyrics(self) -> None:
204
+ """Fetch lyrics from available providers."""
205
+ self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
206
+
207
+ for name, provider in self.lyrics_providers.items():
208
+ try:
209
+ result = provider.fetch_lyrics(self.artist, self.title)
210
+ if result:
211
+ self.results.lyrics_results.append(result)
212
+ self.logger.info(f"Successfully fetched lyrics from {name}")
213
+
214
+ except Exception as e:
215
+ self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
216
+ continue
217
+
218
+ if not self.results.lyrics_results:
219
+ self.logger.warning("No lyrics found from any source")
220
+
221
+ def transcribe(self) -> None:
222
+ """Run transcription using all available transcribers."""
223
+ self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
224
+
225
+ for name, transcriber_info in self.transcribers.items():
226
+ self.logger.info(f"Running transcription with {name}")
227
+ result = transcriber_info["instance"].transcribe(self.audio_filepath)
228
+ if result:
229
+ # Add the transcriber name and priority to the result
230
+ self.results.transcription_results.append(
231
+ TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
232
+ )
233
+ self.logger.debug(f"Transcription completed for {name}")
234
+
235
+ if not self.results.transcription_results:
236
+ self.logger.warning("No successful transcriptions from any provider")
237
+
238
+ def correct_lyrics(self) -> None:
239
+ """Run lyrics correction using transcription and internet lyrics."""
240
+ self.logger.info("Starting lyrics correction process")
241
+
242
+ # Run correction
243
+ corrected_data = self.corrector.run(
244
+ transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
245
+ )
246
+
247
+ # Store corrected results
248
+ self.results.transcription_corrected = corrected_data
249
+ self.logger.info("Lyrics correction completed")
250
+
251
+ # Add human review step
252
+ if self.output_config.enable_review: # We'll need to add this config option
253
+ from ..review import start_review_server
254
+
255
+ self.logger.info("Starting human review process")
256
+ self.results.transcription_corrected = start_review_server(corrected_data)
257
+ self.logger.info("Human review completed")
258
+
259
+ def generate_outputs(self) -> None:
260
+ """Generate output files."""
261
+ self.logger.info("Generating output files")
262
+
263
+ output_files = self.output_generator.generate_outputs(
264
+ transcription_corrected=self.results.transcription_corrected,
265
+ lyrics_results=self.results.lyrics_results,
266
+ output_prefix=self.output_prefix,
267
+ audio_filepath=self.audio_filepath,
268
+ artist=self.artist,
269
+ title=self.title,
270
+ )
271
+
272
+ # Store all output paths in results
273
+ self.results.lrc_filepath = output_files.lrc
274
+ self.results.ass_filepath = output_files.ass
275
+ self.results.video_filepath = output_files.video
276
+ self.results.original_txt = output_files.original_txt
277
+ self.results.corrected_txt = output_files.corrected_txt
278
+ self.results.corrections_json = output_files.corrections_json
279
+ self.results.cdg_filepath = output_files.cdg
280
+ self.results.mp3_filepath = output_files.mp3
281
+ self.results.cdg_zip_filepath = output_files.cdg_zip