lyrics-transcriber 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,9 @@ class AudioShakeTranscriber:
11
11
  self.logger = logger
12
12
  self.output_prefix = output_prefix
13
13
 
14
- def transcribe(self, audio_filepath):
15
- self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
14
+ def start_transcription(self, audio_filepath):
15
+ """Starts the transcription job and returns the job ID without waiting for completion"""
16
+ self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
16
17
 
17
18
  # Step 1: Upload the audio file
18
19
  asset_id = self._upload_file(audio_filepath)
@@ -22,6 +23,12 @@ class AudioShakeTranscriber:
22
23
  job_id = self._create_job(asset_id)
23
24
  self.logger.info(f"Job created successfully. Job ID: {job_id}")
24
25
 
26
+ return job_id
27
+
28
+ def get_transcription_result(self, job_id):
29
+ """Gets the results for a previously started job"""
30
+ self.logger.info(f"Getting results for job ID: {job_id}")
31
+
25
32
  # Step 3: Wait for the job to complete and get the results
26
33
  result = self._get_job_result(job_id)
27
34
  self.logger.info(f"Job completed. Processing results...")
@@ -29,6 +36,11 @@ class AudioShakeTranscriber:
29
36
  # Step 4: Process the result and return in the required format
30
37
  return self._process_result(result)
31
38
 
39
+ def transcribe(self, audio_filepath):
40
+ """Original method now just combines the two steps"""
41
+ job_id = self.start_transcription(audio_filepath)
42
+ return self.get_transcription_result(job_id)
43
+
32
44
  def _upload_file(self, filepath):
33
45
  self.logger.info(f"Uploading {filepath} to AudioShake")
34
46
  url = f"{self.base_url}/upload"
@@ -77,13 +89,10 @@ class AudioShakeTranscriber:
77
89
  output_assets = job_data.get("outputAssets", [])
78
90
  self.logger.debug(f"Output assets: {output_assets}")
79
91
 
80
- output_asset = next((asset for asset in output_assets if asset["name"] == "transcription.json"), None)
81
- if not output_asset:
82
- self.logger.warning("'transcription.json' not found, looking for 'alignment.json'")
83
- output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
92
+ output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
84
93
 
85
94
  if not output_asset:
86
- self.logger.error("Neither 'transcription.json' nor 'alignment.json' found in job results")
95
+ self.logger.error("'alignment.json' found in job results")
87
96
  self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
88
97
  raise Exception("Required output not found in job results")
89
98
 
@@ -0,0 +1,57 @@
1
+ import json
2
+ import logging
3
+ from openai import OpenAI
4
+ from typing import Dict, Optional
5
+
6
+
7
+ class LyricsTranscriptionCorrector:
8
+ def __init__(
9
+ self,
10
+ logger: Optional[logging.Logger] = None,
11
+ ):
12
+ self.logger = logger or logging.getLogger(__name__)
13
+
14
+ # Initialize instance variables for input data
15
+ self.spotify_lyrics_data_dict = None
16
+ self.spotify_lyrics_text = None
17
+ self.genius_lyrics_text = None
18
+ self.transcription_data_dict_whisper = None
19
+ self.transcription_data_dict_audioshake = None
20
+
21
+ def set_input_data(
22
+ self,
23
+ spotify_lyrics_data_dict: Optional[Dict] = None,
24
+ spotify_lyrics_text: Optional[str] = None,
25
+ genius_lyrics_text: Optional[str] = None,
26
+ transcription_data_dict_whisper: Optional[Dict] = None,
27
+ transcription_data_dict_audioshake: Optional[Dict] = None,
28
+ ) -> None:
29
+ """Store the input data as instance variables"""
30
+ self.spotify_lyrics_data_dict = spotify_lyrics_data_dict
31
+ self.spotify_lyrics_text = spotify_lyrics_text
32
+ self.genius_lyrics_text = genius_lyrics_text
33
+ self.transcription_data_dict_whisper = transcription_data_dict_whisper
34
+ self.transcription_data_dict_audioshake = transcription_data_dict_audioshake
35
+
36
+ def run_corrector(self) -> Dict:
37
+ """
38
+ Test implementation that replaces every third word with 'YOLO' in the AudioShake transcription.
39
+ """
40
+ self.logger.info("Running corrector (test implementation - replacing every 3rd word with YOLO)")
41
+
42
+ # Create a deep copy to avoid modifying the original
43
+ modified_data = json.loads(json.dumps(self.transcription_data_dict_audioshake))
44
+
45
+ # Process each segment
46
+ for segment in modified_data["segments"]:
47
+ # Replace every third word in the words list
48
+ for i in range(2, len(segment["words"]), 3):
49
+ segment["words"][i]["text"] = "YOLO"
50
+
51
+ # Reconstruct the segment text from the modified words
52
+ segment["text"] = " ".join(word["text"] for word in segment["words"])
53
+
54
+ # Reconstruct the full text from all segments
55
+ modified_data["text"] = "".join(segment["text"] for segment in modified_data["segments"])
56
+
57
+ return modified_data
@@ -13,9 +13,10 @@ import syrics.api
13
13
  from datetime import timedelta
14
14
  from .utils import subtitles
15
15
  from typing import List, Optional
16
- from openai import OpenAI
17
16
  from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type
18
17
  import requests
18
+ from karaoke_lyrics_processor import KaraokeLyricsProcessor
19
+ from .corrector import LyricsTranscriptionCorrector
19
20
 
20
21
 
21
22
  class LyricsTranscriber:
@@ -24,18 +25,15 @@ class LyricsTranscriber:
24
25
  audio_filepath,
25
26
  artist=None,
26
27
  title=None,
27
- openai_api_key=None,
28
28
  audioshake_api_token=None,
29
29
  genius_api_token=None,
30
30
  spotify_cookie=None,
31
+ skip_transcription=False,
31
32
  output_dir=None,
32
33
  cache_dir="/tmp/lyrics-transcriber-cache/",
33
34
  log_level=logging.DEBUG,
34
35
  log_formatter=None,
35
36
  transcription_model="medium",
36
- llm_model="gpt-4o",
37
- llm_prompt_matching=None,
38
- llm_prompt_correction=None,
39
37
  render_video=False,
40
38
  video_resolution="360p",
41
39
  video_background_image=None,
@@ -63,47 +61,11 @@ class LyricsTranscriber:
63
61
  self.title = title
64
62
  self.song_known = self.artist is not None and self.title is not None
65
63
 
66
- self.openai_api_key = os.getenv("OPENAI_API_KEY", default=openai_api_key)
64
+ self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
67
65
  self.genius_api_token = os.getenv("GENIUS_API_TOKEN", default=genius_api_token)
68
66
  self.spotify_cookie = os.getenv("SPOTIFY_COOKIE_SP_DC", default=spotify_cookie)
69
- self.audioshake_api_token = os.getenv("AUDIOSHAKE_TOKEN", default=audioshake_api_token)
70
67
 
71
68
  self.transcription_model = transcription_model
72
- self.llm_model = llm_model
73
-
74
- # Use package-relative paths for prompt files
75
- if llm_prompt_matching is None:
76
- llm_prompt_matching = os.path.join(
77
- os.path.dirname(__file__), "llm_prompts", "llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt"
78
- )
79
- if llm_prompt_correction is None:
80
- llm_prompt_correction = os.path.join(
81
- os.path.dirname(__file__), "llm_prompts", "llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt"
82
- )
83
-
84
- self.llm_prompt_matching = llm_prompt_matching
85
- self.llm_prompt_correction = llm_prompt_correction
86
-
87
- if not os.path.exists(self.llm_prompt_matching):
88
- raise FileNotFoundError(f"LLM prompt file not found: {self.llm_prompt_matching}")
89
- if not os.path.exists(self.llm_prompt_correction):
90
- raise FileNotFoundError(f"LLM prompt file not found: {self.llm_prompt_correction}")
91
-
92
- self.openai_client = None
93
-
94
- if self.openai_api_key:
95
- self.openai_client = OpenAI(api_key=self.openai_api_key)
96
-
97
- # Uncomment for local models e.g. with ollama
98
- # self.openai_client = OpenAI(
99
- # base_url="http://localhost:11434/v1",
100
- # api_key="ollama",
101
- # )
102
-
103
- self.openai_client.log = self.log_level
104
- else:
105
- self.logger.error("No OpenAI API key found, no correction will be applied to transcription")
106
-
107
69
  self.render_video = render_video
108
70
  self.video_resolution = video_resolution
109
71
  self.video_background_image = video_background_image
@@ -137,19 +99,25 @@ class LyricsTranscriber:
137
99
  raise FileNotFoundError(f"video_background is not a valid file path: {self.video_background_image}")
138
100
 
139
101
  self.outputs = {
140
- "transcription_data_dict": None,
141
- "transcription_data_filepath": None,
142
- "transcribed_lyrics_text": None,
143
- "transcribed_lyrics_text_filepath": None,
102
+ "transcription_data_dict_whisper": None,
103
+ "transcription_data_whisper_filepath": None,
104
+ "transcribed_lyrics_text_whisper": None,
105
+ "transcribed_lyrics_text_whisper_filepath": None,
106
+ "transcription_data_dict_audioshake": None,
107
+ "transcription_data_audioshake_filepath": None,
108
+ "transcribed_lyrics_text_audioshake": None,
109
+ "transcribed_lyrics_text_audioshake_filepath": None,
110
+ "transcription_data_dict_primary": None,
111
+ "transcription_data_primary_filepath": None,
112
+ "transcribed_lyrics_text_primary": None,
113
+ "transcribed_lyrics_text_primary_filepath": None,
144
114
  "genius_lyrics_text": None,
145
- "genius_lyrics_filepath": None,
115
+ "genius_lyrics_text_filepath": None,
116
+ "genius_lyrics_processed_filepath": None,
146
117
  "spotify_lyrics_data_dict": None,
147
118
  "spotify_lyrics_data_filepath": None,
148
119
  "spotify_lyrics_text_filepath": None,
149
- "llm_token_usage": {"input": 0, "output": 0},
150
- "llm_costs_usd": {"input": 0.0, "output": 0.0, "total": 0.0},
151
- "llm_transcript": None,
152
- "llm_transcript_filepath": None,
120
+ "spotify_lyrics_processed_filepath": None,
153
121
  "corrected_lyrics_text": None,
154
122
  "corrected_lyrics_text_filepath": None,
155
123
  "midico_lrc_filepath": None,
@@ -168,40 +136,47 @@ class LyricsTranscriber:
168
136
 
169
137
  self.output_prefix = f"{artist} - {title}"
170
138
 
139
+ self.skip_transcription = skip_transcription
140
+
171
141
  def generate(self):
172
- self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
142
+ self.logger.debug(f"Starting generate() with cache_dir: {self.cache_dir} and output_dir: {self.output_dir}")
173
143
 
174
- self.transcribe()
175
- self.write_transcribed_lyrics_plain_text()
144
+ self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
176
145
 
177
- self.write_genius_lyrics_file()
178
146
  self.write_spotify_lyrics_data_file()
179
147
  self.write_spotify_lyrics_plain_text()
148
+ if self.outputs["spotify_lyrics_text_filepath"]:
149
+ self.outputs["spotify_lyrics_processed_filepath"] = os.path.join(
150
+ self.cache_dir, self.get_output_filename(" (Lyrics Spotify Processed).txt")
151
+ )
152
+ self.write_processed_lyrics(self.outputs["spotify_lyrics_text_filepath"], self.outputs["spotify_lyrics_processed_filepath"])
153
+
154
+ self.write_genius_lyrics_file()
155
+ if self.outputs["genius_lyrics_text_filepath"]:
156
+ self.outputs["genius_lyrics_processed_filepath"] = os.path.join(
157
+ self.cache_dir, self.get_output_filename(" (Lyrics Genius Processed).txt")
158
+ )
159
+ self.write_processed_lyrics(self.outputs["genius_lyrics_text_filepath"], self.outputs["genius_lyrics_processed_filepath"])
180
160
 
181
- self.validate_lyrics_match_song()
161
+ if not self.skip_transcription:
162
+ self.transcribe()
163
+ self.validate_lyrics_match_song()
182
164
 
183
- if self.openai_client:
184
- self.write_corrected_lyrics_data_file()
185
- self.write_corrected_lyrics_plain_text()
186
- else:
187
- self.logger.warning("Skipping LLM correction as no OpenAI client is available")
188
- self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
189
- self.write_corrected_lyrics_plain_text()
165
+ self.correct_lyrics_transcription()
190
166
 
191
- self.calculate_singing_percentage()
167
+ self.calculate_singing_percentage()
192
168
 
193
- self.write_midico_lrc_file()
194
- self.write_ass_file()
169
+ self.write_midico_lrc_file()
170
+ self.write_ass_file()
195
171
 
196
- if self.render_video:
197
- self.outputs["karaoke_video_filepath"] = self.get_cache_filepath(".mp4")
198
- self.create_video()
172
+ if self.render_video:
173
+ self.outputs["karaoke_video_filepath"] = self.get_cache_filepath(".mp4")
174
+ self.create_video()
175
+ else:
176
+ self.outputs["corrected_lyrics_text_filepath"] = self.outputs["genius_lyrics_text_filepath"]
177
+ self.outputs["corrected_lyrics_text"] = self.outputs["genius_lyrics_text"]
199
178
 
200
179
  self.copy_files_to_output_dir()
201
- self.calculate_llm_costs()
202
-
203
- if self.openai_client:
204
- self.openai_client.close()
205
180
 
206
181
  return self.outputs
207
182
 
@@ -210,20 +185,21 @@ class LyricsTranscriber:
210
185
  self.output_dir = os.getcwd()
211
186
 
212
187
  self.logger.debug(f"copying temporary files to output dir: {self.output_dir}")
213
-
214
- for key in self.outputs:
188
+ self.logger.debug("Files to copy:")
189
+ for key, value in self.outputs.items():
215
190
  if key.endswith("_filepath"):
216
- if self.outputs[key] and os.path.isfile(self.outputs[key]):
217
- shutil.copy(self.outputs[key], self.output_dir)
191
+ self.logger.debug(f" {key}: {value}")
192
+ if value and os.path.isfile(value):
193
+ self.logger.debug(f" File exists, copying to {self.output_dir}")
194
+ shutil.copy(value, self.output_dir)
195
+ else:
196
+ self.logger.debug(f" File doesn't exist or is None")
218
197
 
219
198
  self.outputs["output_dir"] = self.output_dir
220
199
 
221
200
  def validate_lyrics_match_song(self):
222
201
  at_least_one_online_lyrics_validated = False
223
202
 
224
- with open(self.llm_prompt_matching, "r") as file:
225
- llm_matching_instructions = file.read()
226
-
227
203
  for online_lyrics_source in ["genius", "spotify"]:
228
204
  self.logger.debug(f"validating transcribed lyrics match lyrics from {online_lyrics_source}")
229
205
 
@@ -233,52 +209,21 @@ class LyricsTranscriber:
233
209
  if online_lyrics_text_key not in self.outputs or self.outputs[online_lyrics_text_key] is None:
234
210
  continue
235
211
 
236
- if self.openai_client:
237
- data_input_str = (
238
- f'Data input 1:\n{self.outputs["transcribed_lyrics_text"]}\nData input 2:\n{self.outputs[online_lyrics_text_key]}\n'
239
- )
212
+ self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
213
+ transcribed_words = set(self.outputs["transcribed_lyrics_text_primary"].split())
214
+ online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
215
+ common_words = transcribed_words & online_lyrics_words
216
+ match_percentage = len(common_words) / len(online_lyrics_words) * 100
240
217
 
241
- self.logger.debug(f"making API call to LLM model {self.llm_model} to validate {online_lyrics_source} lyrics match")
242
- response = self.openai_client.chat.completions.create(
243
- model=self.llm_model,
244
- messages=[{"role": "system", "content": llm_matching_instructions}, {"role": "user", "content": data_input_str}],
218
+ if match_percentage >= 50:
219
+ self.logger.info(
220
+ f"{online_lyrics_source} lyrics successfully validated to match transcription with {match_percentage:.2f}% word match"
245
221
  )
246
-
247
- message = response.choices[0].message.content
248
- finish_reason = response.choices[0].finish_reason
249
-
250
- self.outputs["llm_token_usage"]["input"] += response.usage.prompt_tokens
251
- self.outputs["llm_token_usage"]["output"] += response.usage.completion_tokens
252
-
253
- if finish_reason == "stop":
254
- if message == "Yes":
255
- self.logger.info(f"{online_lyrics_source} lyrics successfully validated to match transcription")
256
- at_least_one_online_lyrics_validated = True
257
- elif message == "No":
258
- self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
259
- self.outputs[online_lyrics_text_key] = None
260
- self.outputs[online_lyrics_filepath_key] = None
261
- else:
262
- self.logger.error(f"Unexpected response from LLM: {message}")
263
- else:
264
- self.logger.warning(f"OpenAI API call did not finish successfully, finish_reason: {finish_reason}")
222
+ at_least_one_online_lyrics_validated = True
265
223
  else:
266
- # Fallback primitive word matching
267
- self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
268
- transcribed_words = set(self.outputs["transcribed_lyrics_text"].split())
269
- online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
270
- common_words = transcribed_words & online_lyrics_words
271
- match_percentage = len(common_words) / len(online_lyrics_words) * 100
272
-
273
- if match_percentage >= 50:
274
- self.logger.info(
275
- f"{online_lyrics_source} lyrics successfully validated to match transcription with {match_percentage:.2f}% word match"
276
- )
277
- at_least_one_online_lyrics_validated = True
278
- else:
279
- self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
280
- self.outputs[online_lyrics_text_key] = None
281
- self.outputs[online_lyrics_filepath_key] = None
224
+ self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
225
+ self.outputs[online_lyrics_text_key] = None
226
+ self.outputs[online_lyrics_filepath_key] = None
282
227
 
283
228
  self.logger.info(
284
229
  f"Completed validation of transcription using online lyrics sources. Match found: {at_least_one_online_lyrics_validated}"
@@ -289,178 +234,37 @@ class LyricsTranscriber:
289
234
  f"Lyrics from Genius and Spotify did not match the transcription. Please check artist and title are set correctly."
290
235
  )
291
236
 
292
- def write_corrected_lyrics_data_file(self):
293
- if not self.openai_client:
294
- self.logger.warning("Skipping LLM correction as no OpenAI client is available")
295
- return
296
-
297
- self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")
298
-
237
+ def correct_lyrics_transcription(self):
299
238
  corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
300
239
 
301
240
  if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
302
- self.logger.debug(
241
+ self.logger.info(
303
242
  f"found existing file at corrected_lyrics_data_json_cache_filepath, reading: {corrected_lyrics_data_json_cache_filepath}"
304
243
  )
305
244
 
306
245
  with open(corrected_lyrics_data_json_cache_filepath, "r") as corrected_lyrics_data_json:
307
246
  self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
308
-
309
- corrected_lyrics_data_dict = json.load(corrected_lyrics_data_json)
310
- self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_data_dict
247
+ self.outputs["corrected_lyrics_data_dict"] = json.load(corrected_lyrics_data_json)
311
248
  return
312
249
 
313
- reference_lyrics = self.outputs.get("genius_lyrics_text") or self.outputs.get("spotify_lyrics_text")
314
-
315
- if not reference_lyrics:
316
- self.logger.warning("No reference lyrics found from Genius or Spotify. Skipping LLM correction.")
317
- self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
318
- return
319
-
320
- self.logger.debug(
321
- f"no cached lyrics found at corrected_lyrics_data_json_cache_filepath: {corrected_lyrics_data_json_cache_filepath}, attempting to run correction using LLM"
250
+ lyrics_corrector = LyricsTranscriptionCorrector(logger=self.logger)
251
+ lyrics_corrector.set_input_data(
252
+ spotify_lyrics_data_dict=self.outputs["spotify_lyrics_data_dict"],
253
+ spotify_lyrics_text=self.outputs["spotify_lyrics_text"],
254
+ genius_lyrics_text=self.outputs["genius_lyrics_text"],
255
+ transcription_data_dict_whisper=self.outputs["transcription_data_dict_whisper"],
256
+ transcription_data_dict_audioshake=self.outputs["transcription_data_dict_audioshake"],
322
257
  )
258
+ self.outputs["corrected_lyrics_data_dict"] = lyrics_corrector.run_corrector()
323
259
 
324
- corrected_lyrics_dict = {"segments": []}
325
-
326
- with open(self.llm_prompt_correction, "r") as file:
327
- system_prompt_template = file.read()
328
-
329
- system_prompt = system_prompt_template.replace("{{reference_lyrics}}", reference_lyrics)
330
-
331
- # TODO: Test if results are cleaner when using the vocal file from a background vocal audio separation model
332
- # TODO: Record more info about the correction process (e.g before/after diffs for each segment) to a file for debugging
333
- # TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff
334
-
335
- self.outputs["llm_transcript"] = ""
336
- self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))
337
-
338
- total_segments = len(self.outputs["transcription_data_dict"]["segments"])
339
- self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
340
-
341
- with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file:
342
- self.logger.debug(f"writing LLM chat instructions: {self.outputs['llm_transcript_filepath']}")
343
-
344
- llm_transcript_header = f"--- SYSTEM instructions passed in for all segments ---:\n\n{system_prompt}\n"
345
- self.outputs["llm_transcript"] += llm_transcript_header
346
- llm_transcript_file.write(llm_transcript_header)
347
-
348
- for segment in self.outputs["transcription_data_dict"]["segments"]:
349
- # # Don't waste OpenAI dollars when testing!
350
- # if segment["id"] > 10:
351
- # continue
352
- # if segment["id"] < 20 or segment["id"] > 24:
353
- # continue
354
-
355
- llm_transcript_segment = ""
356
- segment_input = json.dumps(
357
- {
358
- "id": segment["id"],
359
- "start": segment["start"],
360
- "end": segment["end"],
361
- "confidence": segment["confidence"],
362
- "text": segment["text"],
363
- "words": segment["words"],
364
- }
365
- )
366
-
367
- previous_two_corrected_lines = ""
368
- upcoming_two_uncorrected_lines = ""
369
-
370
- for previous_segment in corrected_lyrics_dict["segments"]:
371
- if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
372
- previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
373
-
374
- for next_segment in self.outputs["transcription_data_dict"]["segments"]:
375
- if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
376
- upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
377
-
378
- llm_transcript_segment += f"--- Segment {segment['id']} / {total_segments} ---\n"
379
- llm_transcript_segment += f"Previous two corrected lines:\n\n{previous_two_corrected_lines}\nUpcoming two uncorrected lines:\n\n{upcoming_two_uncorrected_lines}\nData input:\n\n{segment_input}\n"
380
-
381
- # fmt: off
382
- segment_prompt = system_prompt_template.replace(
383
- "{{previous_two_corrected_lines}}", previous_two_corrected_lines
384
- ).replace(
385
- "{{upcoming_two_uncorrected_lines}}", upcoming_two_uncorrected_lines
386
- ).replace(
387
- "{{segment_input}}", segment_input
388
- )
389
-
390
- self.logger.info(
391
- f'Calling completion model {self.llm_model} with instructions and data input for segment {segment["id"]} / {total_segments}:'
392
- )
393
-
394
- response = self.openai_client.chat.completions.create(
395
- model=self.llm_model,
396
- response_format={"type": "json_object"},
397
- seed=10,
398
- temperature=0.4,
399
- messages=[
400
- {
401
- "role": "user",
402
- "content": segment_prompt
403
- }
404
- ],
405
- )
406
- # fmt: on
407
-
408
- message = response.choices[0].message.content
409
- finish_reason = response.choices[0].finish_reason
410
-
411
- llm_transcript_segment += f"\n--- RESPONSE for segment {segment['id']} ---:\n\n"
412
- llm_transcript_segment += message
413
- llm_transcript_segment += f"\n--- END segment {segment['id']} / {total_segments} ---:\n\n"
414
-
415
- self.logger.debug(f"writing LLM chat transcript for segment to: {self.outputs['llm_transcript_filepath']}")
416
- llm_transcript_file.write(llm_transcript_segment)
417
- self.outputs["llm_transcript"] += llm_transcript_segment
418
-
419
- self.outputs["llm_token_usage"]["input"] += response.usage.prompt_tokens
420
- self.outputs["llm_token_usage"]["output"] += response.usage.completion_tokens
421
-
422
- # self.logger.debug(f"response finish_reason: {finish_reason} message: \n{message}")
423
-
424
- if finish_reason == "stop":
425
- try:
426
- corrected_segment_dict = json.loads(message)
427
- corrected_lyrics_dict["segments"].append(corrected_segment_dict)
428
- self.logger.info("Successfully parsed response from GPT as JSON and appended to corrected_lyrics_dict.segments")
429
- except json.JSONDecodeError as e:
430
- raise Exception("Failed to parse response from GPT as JSON") from e
431
- else:
432
- self.logger.warning(f"OpenAI API call did not finish successfully, finish_reason: {finish_reason}")
433
-
434
- self.logger.info(f'Successfully processed correction for all {len(corrected_lyrics_dict["segments"])} lyrics segments')
435
-
436
- self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
437
- with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as corrected_lyrics_data_json_cache_file:
438
- corrected_lyrics_data_json_cache_file.write(json.dumps(corrected_lyrics_dict, indent=4))
260
+ # Save the corrected lyrics to output JSON file
261
+ self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
262
+ with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f:
263
+ f.write(json.dumps(self.outputs["corrected_lyrics_data_dict"], indent=4))
439
264
 
440
265
  self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
441
- self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_dict
442
-
443
- def calculate_llm_costs(self):
444
- price_dollars_per_1000_tokens = {
445
- "gpt-3.5-turbo-1106": {
446
- "input": 0.0010,
447
- "output": 0.0020,
448
- },
449
- "gpt-4-1106-preview": {
450
- "input": 0.01,
451
- "output": 0.03,
452
- },
453
- }
454
266
 
455
- input_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["input"]
456
- output_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["output"]
457
-
458
- input_cost = input_price * (self.outputs["llm_token_usage"]["input"] / 1000)
459
- output_cost = output_price * (self.outputs["llm_token_usage"]["output"] / 1000)
460
-
461
- self.outputs["llm_costs_usd"]["input"] = round(input_cost, 3)
462
- self.outputs["llm_costs_usd"]["output"] = round(output_cost, 3)
463
- self.outputs["llm_costs_usd"]["total"] = round(input_cost + output_cost, 3)
267
+ self.write_corrected_lyrics_plain_text()
464
268
 
465
269
  def write_corrected_lyrics_plain_text(self):
466
270
  if self.outputs["corrected_lyrics_data_dict"]:
@@ -569,30 +373,39 @@ class LyricsTranscriber:
569
373
 
570
374
  genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))
571
375
 
376
+ # Check cache first
572
377
  if os.path.isfile(genius_lyrics_cache_filepath):
573
378
  self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
574
379
 
575
380
  with open(genius_lyrics_cache_filepath, "r") as cached_lyrics:
576
- self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
381
+ self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
577
382
  self.outputs["genius_lyrics_text"] = cached_lyrics.read()
578
383
  return
579
-
580
384
  self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
581
- genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG))
385
+
386
+ # Initialize Genius with better defaults
387
+ genius = lyricsgenius.Genius(
388
+ self.genius_api_token,
389
+ verbose=(self.log_level == logging.DEBUG),
390
+ remove_section_headers=True,
391
+ )
582
392
 
583
393
  try:
584
394
  song = self.fetch_genius_lyrics(genius, self.title, self.artist)
585
395
  if song is None:
586
396
  self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
587
- return
397
+ return None
398
+
588
399
  lyrics = self.clean_genius_lyrics(song.lyrics)
589
400
 
590
401
  self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
591
402
  with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
592
403
  f.write(lyrics)
593
404
 
594
- self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
405
+ self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
595
406
  self.outputs["genius_lyrics_text"] = lyrics
407
+ return lyrics.split("\n") # Return lines like write_lyrics_from_genius
408
+
596
409
  except requests.exceptions.RequestException as e:
597
410
  self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
598
411
  raise
@@ -600,8 +413,13 @@ class LyricsTranscriber:
600
413
  def clean_genius_lyrics(self, lyrics):
601
414
  lyrics = lyrics.replace("\\n", "\n")
602
415
  lyrics = re.sub(r"You might also like", "", lyrics)
603
- # Remove the song name and word "Lyrics" if this has a non-newline char at the start
604
- lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
416
+ lyrics = re.sub(
417
+ r".*?Lyrics([A-Z])", r"\1", lyrics
418
+ ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
419
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
420
+ lyrics = re.sub(
421
+ r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
422
+ ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
605
423
  lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
606
424
  lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
607
425
  lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
@@ -611,7 +429,9 @@ class LyricsTranscriber:
611
429
 
612
430
  def calculate_singing_percentage(self):
613
431
  # Calculate total seconds of singing using timings from whisper transcription results
614
- total_singing_duration = sum(segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict"]["segments"])
432
+ total_singing_duration = sum(
433
+ segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict_primary"]["segments"]
434
+ )
615
435
 
616
436
  self.logger.debug(f"calculated total_singing_duration: {int(total_singing_duration)} seconds, now running ffprobe")
617
437
 
@@ -641,9 +461,7 @@ class LyricsTranscriber:
641
461
  # then loops over each word and writes all words with MidiCo segment start/end formatting
642
462
  # and word-level timestamps to a MidiCo-compatible LRC file
643
463
  def write_midico_lrc_file(self):
644
- self.outputs["midico_lrc_filepath"] = os.path.join(
645
- self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc") # Updated suffix
646
- )
464
+ self.outputs["midico_lrc_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc"))
647
465
 
648
466
  lrc_filename = self.outputs["midico_lrc_filepath"]
649
467
  self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
@@ -660,7 +478,7 @@ class LyricsTranscriber:
660
478
  f.write(line)
661
479
 
662
480
  def create_screens(self):
663
- self.logger.debug("create_screens beginning generation of screens from whisper results")
481
+ self.logger.debug("create_screens beginning generation of screens from transcription results")
664
482
  screens: List[subtitles.LyricsScreen] = []
665
483
  screen: Optional[subtitles.LyricsScreen] = None
666
484
 
@@ -725,8 +543,8 @@ class LyricsTranscriber:
725
543
  ass_filepath = self.outputs["ass_subtitles_filepath"]
726
544
  self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
727
545
 
728
- intial_screens = self.create_screens()
729
- screens = subtitles.set_segment_end_times(intial_screens, int(self.outputs["song_duration"]))
546
+ initial_screens = self.create_screens()
547
+ screens = subtitles.set_segment_end_times(initial_screens, int(self.outputs["song_duration"]))
730
548
  screens = subtitles.set_screen_start_times(screens)
731
549
  lyric_subtitles_ass = subtitles.create_styled_subtitles(screens, self.video_resolution_num, self.font_size)
732
550
  lyric_subtitles_ass.write(ass_filepath)
@@ -845,22 +663,29 @@ class LyricsTranscriber:
845
663
  return formatted_time
846
664
 
847
665
  def write_transcribed_lyrics_plain_text(self):
848
- if self.outputs["transcription_data_dict"]:
849
- transcription_cache_suffix = " (Lyrics AudioShake).txt" if self.audioshake_api_token else " (Lyrics Whisper).txt"
850
- self.logger.debug(f"transcription_cache_suffix: {transcription_cache_suffix}")
851
-
852
- transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, self.get_output_filename(transcription_cache_suffix))
853
- self.outputs["transcribed_lyrics_text_filepath"] = transcribed_lyrics_text_filepath
666
+ if self.outputs["transcription_data_dict_whisper"]:
667
+ transcribed_lyrics_text_whisper_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Whisper).txt"))
668
+ self.logger.debug(f"Setting Whisper text filepath to: {transcribed_lyrics_text_whisper_filepath}")
669
+ self.outputs["transcribed_lyrics_text_whisper_filepath"] = transcribed_lyrics_text_whisper_filepath
670
+ self.outputs["transcribed_lyrics_text_whisper"] = ""
671
+
672
+ self.logger.debug(f"Writing Whisper lyrics to: {transcribed_lyrics_text_whisper_filepath}")
673
+ with open(transcribed_lyrics_text_whisper_filepath, "w", encoding="utf-8") as f:
674
+ for segment in self.outputs["transcription_data_dict_whisper"]["segments"]:
675
+ self.outputs["transcribed_lyrics_text_whisper"] += segment["text"] + "\n"
676
+ f.write(segment["text"].strip() + "\n")
677
+ self.logger.debug(f"Finished writing Whisper lyrics, file exists: {os.path.exists(transcribed_lyrics_text_whisper_filepath)}")
854
678
 
855
- self.outputs["transcribed_lyrics_text"] = ""
679
+ if self.outputs["transcription_data_dict_audioshake"]:
680
+ transcribed_lyrics_text_audioshake_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics AudioShake).txt"))
681
+ self.outputs["transcribed_lyrics_text_audioshake_filepath"] = transcribed_lyrics_text_audioshake_filepath
682
+ self.outputs["transcribed_lyrics_text_audioshake"] = ""
856
683
 
857
- self.logger.debug(f"writing lyrics plain text to transcribed_lyrics_text_filepath: {transcribed_lyrics_text_filepath}")
858
- with open(transcribed_lyrics_text_filepath, "w", encoding="utf-8") as f:
859
- for segment in self.outputs["transcription_data_dict"]["segments"]:
860
- self.outputs["transcribed_lyrics_text"] += segment["text"] + "\n"
684
+ self.logger.debug(f"Writing AudioShake lyrics to: {transcribed_lyrics_text_audioshake_filepath}")
685
+ with open(transcribed_lyrics_text_audioshake_filepath, "w", encoding="utf-8") as f:
686
+ for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]:
687
+ self.outputs["transcribed_lyrics_text_audioshake"] += segment["text"] + "\n"
861
688
  f.write(segment["text"].strip() + "\n")
862
- else:
863
- raise Exception("Cannot write transcribed lyrics plain text as transcription_data_dict is not set")
864
689
 
865
690
  def find_best_split_point(self, text, max_length):
866
691
  self.logger.debug(f"Finding best split point for text: '{text}' (max_length: {max_length})")
@@ -963,45 +788,122 @@ class LyricsTranscriber:
963
788
  return new_segments
964
789
 
965
790
  def transcribe(self):
966
- transcription_cache_suffix = " (AudioShake).json" if self.audioshake_api_token else " (Whisper).json"
967
- self.outputs["transcription_data_filepath"] = self.get_cache_filepath(transcription_cache_suffix)
968
-
969
- transcription_cache_filepath = self.outputs["transcription_data_filepath"]
970
- if os.path.isfile(transcription_cache_filepath):
971
- self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
972
- with open(transcription_cache_filepath, "r") as cache_file:
973
- self.outputs["transcription_data_dict"] = json.load(cache_file)
974
- return
791
+ # Check cache first
792
+ transcription_cache_filepath_whisper = self.get_cache_filepath(" (Lyrics Whisper).json")
793
+ transcription_cache_filepath_audioshake = self.get_cache_filepath(" (Lyrics AudioShake).json")
794
+
795
+ self.logger.debug(f"Cache directory: {self.cache_dir}")
796
+ self.logger.debug(f"Output directory: {self.output_dir}")
797
+
798
+ if os.path.isfile(transcription_cache_filepath_whisper):
799
+ self.logger.debug(f"Found existing Whisper transcription, reading: {transcription_cache_filepath_whisper}")
800
+ with open(transcription_cache_filepath_whisper, "r") as cache_file:
801
+ self.outputs["transcription_data_dict_whisper"] = json.load(cache_file)
802
+ self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
803
+ self.logger.debug(f"Loaded Whisper data and set filepath to: {self.outputs['transcription_data_whisper_filepath']}")
804
+
805
+ if os.path.isfile(transcription_cache_filepath_audioshake):
806
+ self.logger.debug(f"Found existing AudioShake transcription, reading: {transcription_cache_filepath_audioshake}")
807
+ with open(transcription_cache_filepath_audioshake, "r") as cache_file:
808
+ self.outputs["transcription_data_dict_audioshake"] = json.load(cache_file)
809
+ self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
810
+
811
+ # If we have both cached transcriptions, set primary and return early
812
+ if self.outputs["transcription_data_dict_whisper"] and self.outputs["transcription_data_dict_audioshake"]:
813
+ self.set_primary_transcription()
814
+ return
815
+ # If we have Whisper cached and AudioShake isn't available, set primary and return early
816
+ elif self.outputs["transcription_data_dict_whisper"] and not self.audioshake_api_token:
817
+ self.set_primary_transcription()
818
+ return
975
819
 
976
- if self.audioshake_api_token:
977
- self.logger.debug(f"Using AudioShake API for transcription")
820
+ # Continue with transcription for any missing data...
821
+ audioshake_job_id = None
822
+ if self.audioshake_api_token and not self.outputs["transcription_data_dict_audioshake"]:
823
+ self.logger.debug(f"Starting AudioShake transcription")
978
824
  from .audioshake_transcriber import AudioShakeTranscriber
979
825
 
980
826
  audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
981
- transcription_data = audioshake.transcribe(self.audio_filepath)
982
- else:
827
+ audioshake_job_id = audioshake.start_transcription(self.audio_filepath)
828
+
829
+ # Run Whisper transcription if needed while AudioShake processes
830
+ if not self.outputs["transcription_data_dict_whisper"]:
983
831
  self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
984
832
  audio = whisper.load_audio(self.audio_filepath)
985
833
  model = whisper.load_model(self.transcription_model, device="cpu")
986
- transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
987
-
988
- # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
989
- # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
834
+ whisper_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
990
835
 
991
836
  # Remove segments with no words, only music
992
- transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
993
- self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
837
+ whisper_data["segments"] = [segment for segment in whisper_data["segments"] if segment["text"].strip() != "Music"]
838
+ self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(whisper_data['segments'])}")
994
839
 
995
840
  # Split long segments
996
841
  self.logger.debug("Starting to split long segments")
997
- transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
998
- self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
842
+ whisper_data["segments"] = self.split_long_segments(whisper_data["segments"], max_length=36)
843
+ self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(whisper_data['segments'])}")
844
+
845
+ # Store Whisper results
846
+ self.outputs["transcription_data_dict_whisper"] = whisper_data
847
+ self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
848
+ with open(transcription_cache_filepath_whisper, "w") as cache_file:
849
+ json.dump(whisper_data, cache_file, indent=4)
850
+
851
+ # Now that Whisper is done, get AudioShake results if available
852
+ if audioshake_job_id:
853
+ self.logger.debug("Getting AudioShake results")
854
+ audioshake_data = audioshake.get_transcription_result(audioshake_job_id)
855
+ self.outputs["transcription_data_dict_audioshake"] = audioshake_data
856
+ self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
857
+ with open(transcription_cache_filepath_audioshake, "w") as cache_file:
858
+ json.dump(audioshake_data, cache_file, indent=4)
859
+
860
+ # Set the primary transcription source
861
+ self.set_primary_transcription()
862
+
863
+ # Write the text files
864
+ self.write_transcribed_lyrics_plain_text()
999
865
 
1000
- self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
1001
- with open(transcription_cache_filepath, "w") as cache_file:
1002
- json.dump(transcription_data, cache_file, indent=4)
866
+ def set_primary_transcription(self):
867
+ """Set the primary transcription source (AudioShake if available, otherwise Whisper)"""
868
+ if self.outputs["transcription_data_dict_audioshake"]:
869
+ self.logger.info("Using AudioShake as primary transcription source")
870
+ self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_audioshake"]
871
+ self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_audioshake_filepath"]
872
+
873
+ # Set the primary text content
874
+ if "transcribed_lyrics_text_audioshake" not in self.outputs or not self.outputs["transcribed_lyrics_text_audioshake"]:
875
+ self.outputs["transcribed_lyrics_text_audioshake"] = "\n".join(
876
+ segment["text"].strip() for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]
877
+ )
878
+ self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_audioshake"]
879
+ self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_audioshake_filepath"]
880
+ else:
881
+ self.logger.info("Using Whisper as primary transcription source")
882
+ self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_whisper"]
883
+ self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_whisper_filepath"]
884
+
885
+ # Set the primary text content
886
+ if "transcribed_lyrics_text_whisper" not in self.outputs or not self.outputs["transcribed_lyrics_text_whisper"]:
887
+ self.outputs["transcribed_lyrics_text_whisper"] = "\n".join(
888
+ segment["text"].strip() for segment in self.outputs["transcription_data_dict_whisper"]["segments"]
889
+ )
890
+ self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_whisper"]
891
+ self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_whisper_filepath"]
892
+
893
+ def write_processed_lyrics(self, lyrics_file, processed_lyrics_file):
894
+ self.logger.info(f"Processing lyrics from {lyrics_file} and writing to {processed_lyrics_file}")
895
+
896
+ processor = KaraokeLyricsProcessor(
897
+ log_level=self.log_level,
898
+ log_formatter=self.log_formatter,
899
+ input_filename=lyrics_file,
900
+ output_filename=processed_lyrics_file,
901
+ max_line_length=36,
902
+ )
903
+ processor.process()
904
+ processor.write_to_output_file()
1003
905
 
1004
- self.outputs["transcription_data_dict"] = transcription_data
906
+ self.logger.info(f"Lyrics processing complete, processed lyrics written to: {processed_lyrics_file}")
1005
907
 
1006
908
  def get_cache_filepath(self, extension):
1007
909
  # Instead of using slugify and hash, use the consistent naming pattern
@@ -1,21 +1,21 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.19.0
3
+ Version: 0.20.0
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
7
7
  Author: Andrew Beveridge
8
8
  Author-email: andrew@beveridge.uk
9
- Requires-Python: >=3.9
9
+ Requires-Python: >=3.9,<3.13
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Programming Language :: Python :: 3.13
17
16
  Requires-Dist: Cython (>=0)
18
17
  Requires-Dist: dtw-python (>=1)
18
+ Requires-Dist: karaoke-lyrics-processor (>=0.4.1)
19
19
  Requires-Dist: llvmlite (>=0)
20
20
  Requires-Dist: lyricsgenius (>=3)
21
21
  Requires-Dist: numba (>=0.57)
@@ -1,18 +1,19 @@
1
1
  lyrics_transcriber/__init__.py,sha256=bIRjsXAzlghS1rQxWNLU0wppZy0T_iciN9EclHLwNrQ,94
2
- lyrics_transcriber/audioshake_transcriber.py,sha256=MdlDv58-l5yL1QPuToc6pxaW7TXHVip1GxbPgrXTk9c,4960
2
+ lyrics_transcriber/audioshake_transcriber.py,sha256=AbIkghvguI1PV0fCMUHGRnidQwLPM_pQ96FI0Qk-aI0,5221
3
+ lyrics_transcriber/corrector.py,sha256=LVicUYBCz2TpzzPUbzgLfNYebYJLj7yVvbERMHuXzTY,2300
3
4
  lyrics_transcriber/llm_prompts/README.md,sha256=DPAGRDVGt9ZNcQAAoQGFhwesLY3D6hD8apL71yHP4yo,196
4
5
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt,sha256=a3XjAYfyhWt1uCKKqm_n2Pc0STdmBdiHHtJ7ODP99Nk,4046
5
6
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt,sha256=r6HN3DD_3gwh3B_JPd2R0I4lDXuB5iy7B90J9agOxbQ,2369
6
7
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt,sha256=hvk2Vs3M3Q4zGQsiQnXvnpd8wXWfwsudYeqN5qFyNWs,1754
7
8
  lyrics_transcriber/llm_prompts/promptfooconfig.yaml,sha256=O4YxlLV7XSUiSw_1Q9G7ELC2VAbrYUV_N5QxrPbd1jE,3735
8
9
  lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt,sha256=8d-RvZtyINKUlpQLwMi-VD--Y59J-epPt7SZSqjFbPI,1690
9
- lyrics_transcriber/transcriber.py,sha256=4Z9ugLG_LmQ3kw_GZMYeA4TVrZjPuCI8yru44iFUOyQ,51190
10
+ lyrics_transcriber/transcriber.py,sha256=SrZLY4zEqSd--jgXqRUtgX6oyhM8POpL91AMas_Dpzw,47897
10
11
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
12
  lyrics_transcriber/utils/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
12
13
  lyrics_transcriber/utils/cli.py,sha256=8Poba_9wQw0VmOK73vuK-w-abR9QmO4y4FYDHiAQbc0,6972
13
14
  lyrics_transcriber/utils/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
14
- lyrics_transcriber-0.19.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
15
- lyrics_transcriber-0.19.0.dist-info/METADATA,sha256=IrVopVhJauL3M2GDjBtXq3dPjBakkJ_l_u6V5T0GCwY,5825
16
- lyrics_transcriber-0.19.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
17
- lyrics_transcriber-0.19.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
18
- lyrics_transcriber-0.19.0.dist-info/RECORD,,
15
+ lyrics_transcriber-0.20.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
16
+ lyrics_transcriber-0.20.0.dist-info/METADATA,sha256=1mOcGn2Hb5Nw3nKH0Cc41Zv7_gp4a-H4DLDnktEeRNs,5830
17
+ lyrics_transcriber-0.20.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
18
+ lyrics_transcriber-0.20.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
19
+ lyrics_transcriber-0.20.0.dist-info/RECORD,,