lyrics-transcriber 0.18.0__py3-none-any.whl → 0.19.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,13 +5,15 @@ import json
5
5
 
6
6
 
7
7
  class AudioShakeTranscriber:
8
- def __init__(self, api_token, logger):
8
+ def __init__(self, api_token, logger, output_prefix):
9
9
  self.api_token = api_token
10
10
  self.base_url = "https://groovy.audioshake.ai"
11
11
  self.logger = logger
12
+ self.output_prefix = output_prefix
12
13
 
13
- def transcribe(self, audio_filepath):
14
- self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
14
+ def start_transcription(self, audio_filepath):
15
+ """Starts the transcription job and returns the job ID without waiting for completion"""
16
+ self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
15
17
 
16
18
  # Step 1: Upload the audio file
17
19
  asset_id = self._upload_file(audio_filepath)
@@ -21,6 +23,12 @@ class AudioShakeTranscriber:
21
23
  job_id = self._create_job(asset_id)
22
24
  self.logger.info(f"Job created successfully. Job ID: {job_id}")
23
25
 
26
+ return job_id
27
+
28
+ def get_transcription_result(self, job_id):
29
+ """Gets the results for a previously started job"""
30
+ self.logger.info(f"Getting results for job ID: {job_id}")
31
+
24
32
  # Step 3: Wait for the job to complete and get the results
25
33
  result = self._get_job_result(job_id)
26
34
  self.logger.info(f"Job completed. Processing results...")
@@ -28,6 +36,11 @@ class AudioShakeTranscriber:
28
36
  # Step 4: Process the result and return in the required format
29
37
  return self._process_result(result)
30
38
 
39
+ def transcribe(self, audio_filepath):
40
+ """Original method now just combines the two steps"""
41
+ job_id = self.start_transcription(audio_filepath)
42
+ return self.get_transcription_result(job_id)
43
+
31
44
  def _upload_file(self, filepath):
32
45
  self.logger.info(f"Uploading {filepath} to AudioShake")
33
46
  url = f"{self.base_url}/upload"
@@ -76,13 +89,10 @@ class AudioShakeTranscriber:
76
89
  output_assets = job_data.get("outputAssets", [])
77
90
  self.logger.debug(f"Output assets: {output_assets}")
78
91
 
79
- output_asset = next((asset for asset in output_assets if asset["name"] == "transcription.json"), None)
80
- if not output_asset:
81
- self.logger.warning("'transcription.json' not found, looking for 'alignment.json'")
82
- output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
92
+ output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
83
93
 
84
94
  if not output_asset:
85
- self.logger.error("Neither 'transcription.json' nor 'alignment.json' found in job results")
95
+ self.logger.error("'alignment.json' found in job results")
86
96
  self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
87
97
  raise Exception("Required output not found in job results")
88
98
 
@@ -103,4 +113,10 @@ class AudioShakeTranscriber:
103
113
  if "text" not in segment:
104
114
  segment["text"] = " ".join(word["text"] for word in segment["words"])
105
115
 
116
+ transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")
117
+
106
118
  return transcription_data
119
+
120
+ def get_output_filename(self, suffix):
121
+ """Generate consistent filename with (Purpose) suffix pattern"""
122
+ return f"{self.output_prefix}{suffix}"
@@ -66,7 +66,7 @@ class LyricsTranscriber:
66
66
  self.openai_api_key = os.getenv("OPENAI_API_KEY", default=openai_api_key)
67
67
  self.genius_api_token = os.getenv("GENIUS_API_TOKEN", default=genius_api_token)
68
68
  self.spotify_cookie = os.getenv("SPOTIFY_COOKIE_SP_DC", default=spotify_cookie)
69
- self.audioshake_api_token = os.getenv("AUDIOSHAKE_TOKEN", default=audioshake_api_token)
69
+ self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
70
70
 
71
71
  self.transcription_model = transcription_model
72
72
  self.llm_model = llm_model
@@ -102,7 +102,7 @@ class LyricsTranscriber:
102
102
 
103
103
  self.openai_client.log = self.log_level
104
104
  else:
105
- self.logger.error("No OpenAI API key found, no correction will be applied to transcription")
105
+ self.logger.warning("No OpenAI API key found, no correction will be applied to transcription")
106
106
 
107
107
  self.render_video = render_video
108
108
  self.video_resolution = video_resolution
@@ -137,10 +137,18 @@ class LyricsTranscriber:
137
137
  raise FileNotFoundError(f"video_background is not a valid file path: {self.video_background_image}")
138
138
 
139
139
  self.outputs = {
140
- "transcription_data_dict": None,
141
- "transcription_data_filepath": None,
142
- "transcribed_lyrics_text": None,
143
- "transcribed_lyrics_text_filepath": None,
140
+ "transcription_data_dict_whisper": None,
141
+ "transcription_data_whisper_filepath": None,
142
+ "transcribed_lyrics_text_whisper": None,
143
+ "transcribed_lyrics_text_whisper_filepath": None,
144
+ "transcription_data_dict_audioshake": None,
145
+ "transcription_data_audioshake_filepath": None,
146
+ "transcribed_lyrics_text_audioshake": None,
147
+ "transcribed_lyrics_text_audioshake_filepath": None,
148
+ "transcription_data_dict_primary": None,
149
+ "transcription_data_primary_filepath": None,
150
+ "transcribed_lyrics_text_primary": None,
151
+ "transcribed_lyrics_text_primary_filepath": None,
144
152
  "genius_lyrics_text": None,
145
153
  "genius_lyrics_filepath": None,
146
154
  "spotify_lyrics_data_dict": None,
@@ -166,10 +174,15 @@ class LyricsTranscriber:
166
174
 
167
175
  self.create_folders()
168
176
 
177
+ self.output_prefix = f"{artist} - {title}"
178
+
169
179
  def generate(self):
180
+ self.logger.debug(f"Starting generate() with cache_dir: {self.cache_dir} and output_dir: {self.output_dir}")
181
+
170
182
  self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
171
183
 
172
184
  self.transcribe()
185
+
173
186
  self.write_transcribed_lyrics_plain_text()
174
187
 
175
188
  self.write_genius_lyrics_file()
@@ -183,7 +196,7 @@ class LyricsTranscriber:
183
196
  self.write_corrected_lyrics_plain_text()
184
197
  else:
185
198
  self.logger.warning("Skipping LLM correction as no OpenAI client is available")
186
- self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
199
+ self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict_primary"]
187
200
  self.write_corrected_lyrics_plain_text()
188
201
 
189
202
  self.calculate_singing_percentage()
@@ -208,11 +221,15 @@ class LyricsTranscriber:
208
221
  self.output_dir = os.getcwd()
209
222
 
210
223
  self.logger.debug(f"copying temporary files to output dir: {self.output_dir}")
211
-
212
- for key in self.outputs:
224
+ self.logger.debug("Files to copy:")
225
+ for key, value in self.outputs.items():
213
226
  if key.endswith("_filepath"):
214
- if self.outputs[key] and os.path.isfile(self.outputs[key]):
215
- shutil.copy(self.outputs[key], self.output_dir)
227
+ self.logger.debug(f" {key}: {value}")
228
+ if value and os.path.isfile(value):
229
+ self.logger.debug(f" File exists, copying to {self.output_dir}")
230
+ shutil.copy(value, self.output_dir)
231
+ else:
232
+ self.logger.debug(f" File doesn't exist or is None")
216
233
 
217
234
  self.outputs["output_dir"] = self.output_dir
218
235
 
@@ -232,9 +249,7 @@ class LyricsTranscriber:
232
249
  continue
233
250
 
234
251
  if self.openai_client:
235
- data_input_str = (
236
- f'Data input 1:\n{self.outputs["transcribed_lyrics_text"]}\nData input 2:\n{self.outputs[online_lyrics_text_key]}\n'
237
- )
252
+ data_input_str = f'Data input 1:\n{self.outputs["transcribed_lyrics_text_primary"]}\nData input 2:\n{self.outputs[online_lyrics_text_key]}\n'
238
253
 
239
254
  self.logger.debug(f"making API call to LLM model {self.llm_model} to validate {online_lyrics_source} lyrics match")
240
255
  response = self.openai_client.chat.completions.create(
@@ -263,7 +278,7 @@ class LyricsTranscriber:
263
278
  else:
264
279
  # Fallback primitive word matching
265
280
  self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
266
- transcribed_words = set(self.outputs["transcribed_lyrics_text"].split())
281
+ transcribed_words = set(self.outputs["transcribed_lyrics_text_primary"].split())
267
282
  online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
268
283
  common_words = transcribed_words & online_lyrics_words
269
284
  match_percentage = len(common_words) / len(online_lyrics_words) * 100
@@ -294,7 +309,7 @@ class LyricsTranscriber:
294
309
 
295
310
  self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")
296
311
 
297
- corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.json")
312
+ corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
298
313
 
299
314
  if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
300
315
  self.logger.debug(
@@ -312,7 +327,7 @@ class LyricsTranscriber:
312
327
 
313
328
  if not reference_lyrics:
314
329
  self.logger.warning("No reference lyrics found from Genius or Spotify. Skipping LLM correction.")
315
- self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
330
+ self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict_primary"]
316
331
  return
317
332
 
318
333
  self.logger.debug(
@@ -331,11 +346,9 @@ class LyricsTranscriber:
331
346
  # TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff
332
347
 
333
348
  self.outputs["llm_transcript"] = ""
334
- self.outputs["llm_transcript_filepath"] = os.path.join(
335
- self.cache_dir, "lyrics-" + self.get_song_slug() + "-llm-correction-transcript.txt"
336
- )
349
+ self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))
337
350
 
338
- total_segments = len(self.outputs["transcription_data_dict"]["segments"])
351
+ total_segments = len(self.outputs["transcription_data_dict_primary"]["segments"])
339
352
  self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
340
353
 
341
354
  with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file:
@@ -345,7 +358,7 @@ class LyricsTranscriber:
345
358
  self.outputs["llm_transcript"] += llm_transcript_header
346
359
  llm_transcript_file.write(llm_transcript_header)
347
360
 
348
- for segment in self.outputs["transcription_data_dict"]["segments"]:
361
+ for segment in self.outputs["transcription_data_dict_primary"]["segments"]:
349
362
  # # Don't waste OpenAI dollars when testing!
350
363
  # if segment["id"] > 10:
351
364
  # continue
@@ -371,7 +384,7 @@ class LyricsTranscriber:
371
384
  if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
372
385
  previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
373
386
 
374
- for next_segment in self.outputs["transcription_data_dict"]["segments"]:
387
+ for next_segment in self.outputs["transcription_data_dict_primary"]["segments"]:
375
388
  if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
376
389
  upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
377
390
 
@@ -466,7 +479,9 @@ class LyricsTranscriber:
466
479
  if self.outputs["corrected_lyrics_data_dict"]:
467
480
  self.logger.debug(f"corrected_lyrics_data_dict exists, writing plain text lyrics file")
468
481
 
469
- corrected_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.txt")
482
+ corrected_lyrics_text_filepath = os.path.join(
483
+ self.cache_dir, self.get_output_filename(" (Lyrics Corrected).txt") # Updated to use consistent naming
484
+ )
470
485
  self.outputs["corrected_lyrics_text_filepath"] = corrected_lyrics_text_filepath
471
486
 
472
487
  self.outputs["corrected_lyrics_text"] = ""
@@ -475,7 +490,7 @@ class LyricsTranscriber:
475
490
  with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
476
491
  for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
477
492
  self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
478
- f.write(corrected_segment["text".strip()] + "\n")
493
+ f.write(corrected_segment["text"].strip() + "\n")
479
494
 
480
495
  def write_spotify_lyrics_data_file(self):
481
496
  if self.spotify_cookie and self.song_known:
@@ -484,7 +499,9 @@ class LyricsTranscriber:
484
499
  self.logger.warning(f"skipping spotify fetch as not all spotify params were set")
485
500
  return
486
501
 
487
- spotify_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.json")
502
+ spotify_lyrics_data_json_cache_filepath = os.path.join(
503
+ self.cache_dir, self.get_output_filename(" (Lyrics Spotify).json") # Updated to use consistent naming
504
+ )
488
505
 
489
506
  if os.path.isfile(spotify_lyrics_data_json_cache_filepath):
490
507
  self.logger.debug(
@@ -531,7 +548,9 @@ class LyricsTranscriber:
531
548
  if self.outputs["spotify_lyrics_data_dict"]:
532
549
  self.logger.debug(f"spotify_lyrics data found, checking/writing plain text lyrics file")
533
550
 
534
- spotify_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.txt")
551
+ spotify_lyrics_text_filepath = os.path.join(
552
+ self.cache_dir, self.get_output_filename(" (Lyrics Spotify).txt") # Updated to use consistent naming
553
+ )
535
554
  self.outputs["spotify_lyrics_text_filepath"] = spotify_lyrics_text_filepath
536
555
 
537
556
  lines = self.outputs["spotify_lyrics_data_dict"]["lyrics"]["lines"]
@@ -561,8 +580,9 @@ class LyricsTranscriber:
561
580
  self.logger.warning(f"skipping genius fetch as not all genius params were set")
562
581
  return
563
582
 
564
- genius_lyrics_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-genius.txt")
583
+ genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))
565
584
 
585
+ # Check cache first
566
586
  if os.path.isfile(genius_lyrics_cache_filepath):
567
587
  self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
568
588
 
@@ -570,15 +590,21 @@ class LyricsTranscriber:
570
590
  self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
571
591
  self.outputs["genius_lyrics_text"] = cached_lyrics.read()
572
592
  return
573
-
574
593
  self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
575
- genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG))
594
+
595
+ # Initialize Genius with better defaults
596
+ genius = lyricsgenius.Genius(
597
+ self.genius_api_token,
598
+ verbose=(self.log_level == logging.DEBUG),
599
+ remove_section_headers=True,
600
+ )
576
601
 
577
602
  try:
578
603
  song = self.fetch_genius_lyrics(genius, self.title, self.artist)
579
604
  if song is None:
580
605
  self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
581
- return
606
+ return None
607
+
582
608
  lyrics = self.clean_genius_lyrics(song.lyrics)
583
609
 
584
610
  self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
@@ -587,6 +613,8 @@ class LyricsTranscriber:
587
613
 
588
614
  self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
589
615
  self.outputs["genius_lyrics_text"] = lyrics
616
+ return lyrics.split("\n") # Return lines like write_lyrics_from_genius
617
+
590
618
  except requests.exceptions.RequestException as e:
591
619
  self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
592
620
  raise
@@ -594,8 +622,13 @@ class LyricsTranscriber:
594
622
  def clean_genius_lyrics(self, lyrics):
595
623
  lyrics = lyrics.replace("\\n", "\n")
596
624
  lyrics = re.sub(r"You might also like", "", lyrics)
597
- # Remove the song name and word "Lyrics" if this has a non-newline char at the start
598
- lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
625
+ lyrics = re.sub(
626
+ r".*?Lyrics([A-Z])", r"\1", lyrics
627
+ ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
628
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
629
+ lyrics = re.sub(
630
+ r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
631
+ ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
599
632
  lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
600
633
  lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
601
634
  lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
@@ -605,7 +638,9 @@ class LyricsTranscriber:
605
638
 
606
639
  def calculate_singing_percentage(self):
607
640
  # Calculate total seconds of singing using timings from whisper transcription results
608
- total_singing_duration = sum(segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict"]["segments"])
641
+ total_singing_duration = sum(
642
+ segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict_primary"]["segments"]
643
+ )
609
644
 
610
645
  self.logger.debug(f"calculated total_singing_duration: {int(total_singing_duration)} seconds, now running ffprobe")
611
646
 
@@ -635,7 +670,7 @@ class LyricsTranscriber:
635
670
  # then loops over each word and writes all words with MidiCo segment start/end formatting
636
671
  # and word-level timestamps to a MidiCo-compatible LRC file
637
672
  def write_midico_lrc_file(self):
638
- self.outputs["midico_lrc_filepath"] = self.get_cache_filepath(".lrc")
673
+ self.outputs["midico_lrc_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc"))
639
674
 
640
675
  lrc_filename = self.outputs["midico_lrc_filepath"]
641
676
  self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
@@ -652,7 +687,7 @@ class LyricsTranscriber:
652
687
  f.write(line)
653
688
 
654
689
  def create_screens(self):
655
- self.logger.debug("create_screens beginning generation of screens from whisper results")
690
+ self.logger.debug("create_screens beginning generation of screens from transcription results")
656
691
  screens: List[subtitles.LyricsScreen] = []
657
692
  screen: Optional[subtitles.LyricsScreen] = None
658
693
 
@@ -692,9 +727,15 @@ class LyricsTranscriber:
692
727
  self.logger.debug("Reset current line")
693
728
 
694
729
  current_line_text += (" " if current_line_text else "") + word["text"]
730
+
731
+ # fmt: off
695
732
  lyric_segment = subtitles.LyricSegment(
696
- text=word["text"], ts=timedelta(seconds=word["start"]), end_ts=timedelta(seconds=word["end"])
733
+ text=word["text"],
734
+ ts=timedelta(seconds=word["start"]),
735
+ end_ts=timedelta(seconds=word["end"])
697
736
  )
737
+ # fmt: on
738
+
698
739
  current_line.segments.append(lyric_segment)
699
740
  self.logger.debug(f"Added word to current line. Current line: '{current_line_text}'")
700
741
 
@@ -706,13 +747,13 @@ class LyricsTranscriber:
706
747
  return screens
707
748
 
708
749
  def write_ass_file(self):
709
- self.outputs["ass_subtitles_filepath"] = self.get_cache_filepath(".ass")
750
+ self.outputs["ass_subtitles_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).ass"))
710
751
 
711
752
  ass_filepath = self.outputs["ass_subtitles_filepath"]
712
753
  self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
713
754
 
714
- intial_screens = self.create_screens()
715
- screens = subtitles.set_segment_end_times(intial_screens, int(self.outputs["song_duration"]))
755
+ initial_screens = self.create_screens()
756
+ screens = subtitles.set_segment_end_times(initial_screens, int(self.outputs["song_duration"]))
716
757
  screens = subtitles.set_screen_start_times(screens)
717
758
  lyric_subtitles_ass = subtitles.create_styled_subtitles(screens, self.video_resolution_num, self.font_size)
718
759
  lyric_subtitles_ass.write(ass_filepath)
@@ -831,22 +872,29 @@ class LyricsTranscriber:
831
872
  return formatted_time
832
873
 
833
874
  def write_transcribed_lyrics_plain_text(self):
834
- if self.outputs["transcription_data_dict"]:
835
- transcription_cache_suffix = "-audioshake-transcribed.txt" if self.audioshake_api_token else "-whisper-transcribed.txt"
836
- self.logger.debug(f"transcription_cache_suffix: {transcription_cache_suffix}")
837
-
838
- transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + transcription_cache_suffix)
839
- self.outputs["transcribed_lyrics_text_filepath"] = transcribed_lyrics_text_filepath
875
+ if self.outputs["transcription_data_dict_whisper"]:
876
+ transcribed_lyrics_text_whisper_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Whisper).txt"))
877
+ self.logger.debug(f"Setting Whisper text filepath to: {transcribed_lyrics_text_whisper_filepath}")
878
+ self.outputs["transcribed_lyrics_text_whisper_filepath"] = transcribed_lyrics_text_whisper_filepath
879
+ self.outputs["transcribed_lyrics_text_whisper"] = ""
880
+
881
+ self.logger.debug(f"Writing Whisper lyrics to: {transcribed_lyrics_text_whisper_filepath}")
882
+ with open(transcribed_lyrics_text_whisper_filepath, "w", encoding="utf-8") as f:
883
+ for segment in self.outputs["transcription_data_dict_whisper"]["segments"]:
884
+ self.outputs["transcribed_lyrics_text_whisper"] += segment["text"] + "\n"
885
+ f.write(segment["text"].strip() + "\n")
886
+ self.logger.debug(f"Finished writing Whisper lyrics, file exists: {os.path.exists(transcribed_lyrics_text_whisper_filepath)}")
840
887
 
841
- self.outputs["transcribed_lyrics_text"] = ""
888
+ if self.outputs["transcription_data_dict_audioshake"]:
889
+ transcribed_lyrics_text_audioshake_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics AudioShake).txt"))
890
+ self.outputs["transcribed_lyrics_text_audioshake_filepath"] = transcribed_lyrics_text_audioshake_filepath
891
+ self.outputs["transcribed_lyrics_text_audioshake"] = ""
842
892
 
843
- self.logger.debug(f"writing lyrics plain text to transcribed_lyrics_text_filepath: {transcribed_lyrics_text_filepath}")
844
- with open(transcribed_lyrics_text_filepath, "w", encoding="utf-8") as f:
845
- for segment in self.outputs["transcription_data_dict"]["segments"]:
846
- self.outputs["transcribed_lyrics_text"] += segment["text"] + "\n"
893
+ self.logger.debug(f"Writing AudioShake lyrics to: {transcribed_lyrics_text_audioshake_filepath}")
894
+ with open(transcribed_lyrics_text_audioshake_filepath, "w", encoding="utf-8") as f:
895
+ for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]:
896
+ self.outputs["transcribed_lyrics_text_audioshake"] += segment["text"] + "\n"
847
897
  f.write(segment["text"].strip() + "\n")
848
- else:
849
- raise Exception("Cannot write transcribed lyrics plain text as transcription_data_dict is not set")
850
898
 
851
899
  def find_best_split_point(self, text, max_length):
852
900
  self.logger.debug(f"Finding best split point for text: '{text}' (max_length: {max_length})")
@@ -949,51 +997,111 @@ class LyricsTranscriber:
949
997
  return new_segments
950
998
 
951
999
  def transcribe(self):
952
- transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
953
- self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
954
-
955
- transcription_cache_filepath = self.outputs["transcription_data_filepath"]
956
- if os.path.isfile(transcription_cache_filepath):
957
- self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
958
- with open(transcription_cache_filepath, "r") as cache_file:
959
- self.outputs["transcription_data_dict"] = json.load(cache_file)
960
- return
1000
+ # Check cache first
1001
+ transcription_cache_filepath_whisper = self.get_cache_filepath(" (Lyrics Whisper).json")
1002
+ transcription_cache_filepath_audioshake = self.get_cache_filepath(" (Lyrics AudioShake).json")
1003
+
1004
+ self.logger.debug(f"Cache directory: {self.cache_dir}")
1005
+ self.logger.debug(f"Output directory: {self.output_dir}")
1006
+
1007
+ if os.path.isfile(transcription_cache_filepath_whisper):
1008
+ self.logger.debug(f"Found existing Whisper transcription, reading: {transcription_cache_filepath_whisper}")
1009
+ with open(transcription_cache_filepath_whisper, "r") as cache_file:
1010
+ self.outputs["transcription_data_dict_whisper"] = json.load(cache_file)
1011
+ self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
1012
+ self.logger.debug(f"Loaded Whisper data and set filepath to: {self.outputs['transcription_data_whisper_filepath']}")
1013
+
1014
+ if os.path.isfile(transcription_cache_filepath_audioshake):
1015
+ self.logger.debug(f"Found existing AudioShake transcription, reading: {transcription_cache_filepath_audioshake}")
1016
+ with open(transcription_cache_filepath_audioshake, "r") as cache_file:
1017
+ self.outputs["transcription_data_dict_audioshake"] = json.load(cache_file)
1018
+ self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
1019
+
1020
+ # If we have both cached transcriptions, set primary and return early
1021
+ if self.outputs["transcription_data_dict_whisper"] and self.outputs["transcription_data_dict_audioshake"]:
1022
+ self.set_primary_transcription()
1023
+ return
1024
+ # If we have Whisper cached and AudioShake isn't available, set primary and return early
1025
+ elif self.outputs["transcription_data_dict_whisper"] and not self.audioshake_api_token:
1026
+ self.set_primary_transcription()
1027
+ return
961
1028
 
962
- if self.audioshake_api_token:
963
- self.logger.debug(f"Using AudioShake API for transcription")
1029
+ # Continue with transcription for any missing data...
1030
+ audioshake_job_id = None
1031
+ if self.audioshake_api_token and not self.outputs["transcription_data_dict_audioshake"]:
1032
+ self.logger.debug(f"Starting AudioShake transcription")
964
1033
  from .audioshake_transcriber import AudioShakeTranscriber
965
1034
 
966
- audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
967
- transcription_data = audioshake.transcribe(self.audio_filepath)
968
- else:
1035
+ audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
1036
+ audioshake_job_id = audioshake.start_transcription(self.audio_filepath)
1037
+
1038
+ # Run Whisper transcription if needed while AudioShake processes
1039
+ if not self.outputs["transcription_data_dict_whisper"]:
969
1040
  self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
970
1041
  audio = whisper.load_audio(self.audio_filepath)
971
1042
  model = whisper.load_model(self.transcription_model, device="cpu")
972
- transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
973
-
974
- # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
975
- # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
1043
+ whisper_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
976
1044
 
977
1045
  # Remove segments with no words, only music
978
- transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
979
- self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
1046
+ whisper_data["segments"] = [segment for segment in whisper_data["segments"] if segment["text"].strip() != "Music"]
1047
+ self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(whisper_data['segments'])}")
980
1048
 
981
1049
  # Split long segments
982
1050
  self.logger.debug("Starting to split long segments")
983
- transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
984
- self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
985
-
986
- self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
987
- with open(transcription_cache_filepath, "w") as cache_file:
988
- json.dump(transcription_data, cache_file, indent=4)
1051
+ whisper_data["segments"] = self.split_long_segments(whisper_data["segments"], max_length=36)
1052
+ self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(whisper_data['segments'])}")
1053
+
1054
+ # Store Whisper results
1055
+ self.outputs["transcription_data_dict_whisper"] = whisper_data
1056
+ self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
1057
+ with open(transcription_cache_filepath_whisper, "w") as cache_file:
1058
+ json.dump(whisper_data, cache_file, indent=4)
1059
+
1060
+ # Now that Whisper is done, get AudioShake results if available
1061
+ if audioshake_job_id:
1062
+ self.logger.debug("Getting AudioShake results")
1063
+ audioshake_data = audioshake.get_transcription_result(audioshake_job_id)
1064
+ self.outputs["transcription_data_dict_audioshake"] = audioshake_data
1065
+ self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
1066
+ with open(transcription_cache_filepath_audioshake, "w") as cache_file:
1067
+ json.dump(audioshake_data, cache_file, indent=4)
1068
+
1069
+ # Set the primary transcription source
1070
+ self.set_primary_transcription()
1071
+
1072
+ # Write the text files
1073
+ self.write_transcribed_lyrics_plain_text()
989
1074
 
990
- self.outputs["transcription_data_dict"] = transcription_data
1075
+ def set_primary_transcription(self):
1076
+ """Set the primary transcription source (AudioShake if available, otherwise Whisper)"""
1077
+ if self.outputs["transcription_data_dict_audioshake"]:
1078
+ self.logger.info("Using AudioShake as primary transcription source")
1079
+ self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_audioshake"]
1080
+ self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_audioshake_filepath"]
1081
+
1082
+ # Set the primary text content
1083
+ if "transcribed_lyrics_text_audioshake" not in self.outputs or not self.outputs["transcribed_lyrics_text_audioshake"]:
1084
+ self.outputs["transcribed_lyrics_text_audioshake"] = "\n".join(
1085
+ segment["text"].strip() for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]
1086
+ )
1087
+ self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_audioshake"]
1088
+ self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_audioshake_filepath"]
1089
+ else:
1090
+ self.logger.info("Using Whisper as primary transcription source")
1091
+ self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_whisper"]
1092
+ self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_whisper_filepath"]
1093
+
1094
+ # Set the primary text content
1095
+ if "transcribed_lyrics_text_whisper" not in self.outputs or not self.outputs["transcribed_lyrics_text_whisper"]:
1096
+ self.outputs["transcribed_lyrics_text_whisper"] = "\n".join(
1097
+ segment["text"].strip() for segment in self.outputs["transcription_data_dict_whisper"]["segments"]
1098
+ )
1099
+ self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_whisper"]
1100
+ self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_whisper_filepath"]
991
1101
 
992
1102
  def get_cache_filepath(self, extension):
993
- filename = os.path.split(self.audio_filepath)[1]
994
- filename_slug = slugify.slugify(filename, lowercase=False)
995
- hash_value = self.get_file_hash(self.audio_filepath)
996
- cache_filepath = os.path.join(self.cache_dir, filename_slug + "_" + hash_value + extension)
1103
+ # Instead of using slugify and hash, use the consistent naming pattern
1104
+ cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
997
1105
  self.logger.debug(f"get_cache_filepath returning cache_filepath: {cache_filepath}")
998
1106
  return cache_filepath
999
1107
 
@@ -1014,3 +1122,7 @@ class LyricsTranscriber:
1014
1122
 
1015
1123
  if self.output_dir is not None:
1016
1124
  os.makedirs(self.output_dir, exist_ok=True)
1125
+
1126
+ def get_output_filename(self, suffix):
1127
+ """Generate consistent filename with (Purpose) suffix pattern"""
1128
+ return f"{self.output_prefix}{suffix}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.18.0
3
+ Version: 0.19.2
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -1,18 +1,18 @@
1
1
  lyrics_transcriber/__init__.py,sha256=bIRjsXAzlghS1rQxWNLU0wppZy0T_iciN9EclHLwNrQ,94
2
- lyrics_transcriber/audioshake_transcriber.py,sha256=rfbBS7K99hYLVyOqTuhK0eigopSqXsc2Zfgg4lZz41A,4647
2
+ lyrics_transcriber/audioshake_transcriber.py,sha256=AbIkghvguI1PV0fCMUHGRnidQwLPM_pQ96FI0Qk-aI0,5221
3
3
  lyrics_transcriber/llm_prompts/README.md,sha256=DPAGRDVGt9ZNcQAAoQGFhwesLY3D6hD8apL71yHP4yo,196
4
4
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt,sha256=a3XjAYfyhWt1uCKKqm_n2Pc0STdmBdiHHtJ7ODP99Nk,4046
5
5
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt,sha256=r6HN3DD_3gwh3B_JPd2R0I4lDXuB5iy7B90J9agOxbQ,2369
6
6
  lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt,sha256=hvk2Vs3M3Q4zGQsiQnXvnpd8wXWfwsudYeqN5qFyNWs,1754
7
7
  lyrics_transcriber/llm_prompts/promptfooconfig.yaml,sha256=O4YxlLV7XSUiSw_1Q9G7ELC2VAbrYUV_N5QxrPbd1jE,3735
8
8
  lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt,sha256=8d-RvZtyINKUlpQLwMi-VD--Y59J-epPt7SZSqjFbPI,1690
9
- lyrics_transcriber/transcriber.py,sha256=W-XXNDVgS25JLvfZL8bx9kRtdVD3ZpNqyt-1Qp4eCak,50681
9
+ lyrics_transcriber/transcriber.py,sha256=AhovzKfAWJERjqBseItHk3rnj3M1rlPHRhbMk0ogqxQ,57568
10
10
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  lyrics_transcriber/utils/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
12
12
  lyrics_transcriber/utils/cli.py,sha256=8Poba_9wQw0VmOK73vuK-w-abR9QmO4y4FYDHiAQbc0,6972
13
13
  lyrics_transcriber/utils/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
14
- lyrics_transcriber-0.18.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
15
- lyrics_transcriber-0.18.0.dist-info/METADATA,sha256=K8IY-6Vy5Wa6X5VKCg_sDgjvzfyiiyOBOo8mbyOUNi0,5825
16
- lyrics_transcriber-0.18.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
17
- lyrics_transcriber-0.18.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
18
- lyrics_transcriber-0.18.0.dist-info/RECORD,,
14
+ lyrics_transcriber-0.19.2.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
15
+ lyrics_transcriber-0.19.2.dist-info/METADATA,sha256=J1tlv6r2va-7Q_ygGvEPJ9DrIGa_hzCGWufVhXo6Vcc,5825
16
+ lyrics_transcriber-0.19.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
17
+ lyrics_transcriber-0.19.2.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
18
+ lyrics_transcriber-0.19.2.dist-info/RECORD,,