PyPI - lyrics-transcriber - Versions diffs - 0.18.0__tar.gz → 0.19.2__tar.gz - Mend

lyrics-transcriber 0.18.0tar.gz → 0.19.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{lyrics_transcriber-0.18.0 → lyrics_transcriber-0.19.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lyrics-transcriber
-Version: 0.18.0
+Version: 0.19.2
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
 License: MIT

{lyrics_transcriber-0.18.0 → lyrics_transcriber-0.19.2}/lyrics_transcriber/audioshake_transcriber.py RENAMED Viewed

@@ -5,13 +5,15 @@ import json
 class AudioShakeTranscriber:
-    def __init__(self, api_token, logger):
+    def __init__(self, api_token, logger, output_prefix):
         self.api_token = api_token
         self.base_url = "https://groovy.audioshake.ai"
         self.logger = logger
+        self.output_prefix = output_prefix
-    def transcribe(self, audio_filepath):
-        self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
+    def start_transcription(self, audio_filepath):
+        """Starts the transcription job and returns the job ID without waiting for completion"""
+        self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
         # Step 1: Upload the audio file
         asset_id = self._upload_file(audio_filepath)
@@ -21,6 +23,12 @@ class AudioShakeTranscriber:
         job_id = self._create_job(asset_id)
         self.logger.info(f"Job created successfully. Job ID: {job_id}")
+        return job_id
+    def get_transcription_result(self, job_id):
+        """Gets the results for a previously started job"""
+        self.logger.info(f"Getting results for job ID: {job_id}")
         # Step 3: Wait for the job to complete and get the results
         result = self._get_job_result(job_id)
         self.logger.info(f"Job completed. Processing results...")
@@ -28,6 +36,11 @@ class AudioShakeTranscriber:
         # Step 4: Process the result and return in the required format
         return self._process_result(result)
+    def transcribe(self, audio_filepath):
+        """Original method now just combines the two steps"""
+        job_id = self.start_transcription(audio_filepath)
+        return self.get_transcription_result(job_id)
     def _upload_file(self, filepath):
         self.logger.info(f"Uploading {filepath} to AudioShake")
         url = f"{self.base_url}/upload"
@@ -76,13 +89,10 @@ class AudioShakeTranscriber:
         output_assets = job_data.get("outputAssets", [])
         self.logger.debug(f"Output assets: {output_assets}")
-        output_asset = next((asset for asset in output_assets if asset["name"] == "transcription.json"), None)
-        if not output_asset:
-            self.logger.warning("'transcription.json' not found, looking for 'alignment.json'")
-            output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
+        output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
         if not output_asset:
-            self.logger.error("Neither 'transcription.json' nor 'alignment.json' found in job results")
+            self.logger.error("'alignment.json' found in job results")
             self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
             raise Exception("Required output not found in job results")
@@ -103,4 +113,10 @@ class AudioShakeTranscriber:
             if "text" not in segment:
                 segment["text"] = " ".join(word["text"] for word in segment["words"])
+        transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")
         return transcription_data
+    def get_output_filename(self, suffix):
+        """Generate consistent filename with (Purpose) suffix pattern"""
+        return f"{self.output_prefix}{suffix}"

{lyrics_transcriber-0.18.0 → lyrics_transcriber-0.19.2}/lyrics_transcriber/transcriber.py RENAMED Viewed

@@ -66,7 +66,7 @@ class LyricsTranscriber:
         self.openai_api_key = os.getenv("OPENAI_API_KEY", default=openai_api_key)
         self.genius_api_token = os.getenv("GENIUS_API_TOKEN", default=genius_api_token)
         self.spotify_cookie = os.getenv("SPOTIFY_COOKIE_SP_DC", default=spotify_cookie)
-        self.audioshake_api_token = os.getenv("AUDIOSHAKE_TOKEN", default=audioshake_api_token)
+        self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
         self.transcription_model = transcription_model
         self.llm_model = llm_model
@@ -102,7 +102,7 @@ class LyricsTranscriber:
             self.openai_client.log = self.log_level
         else:
-            self.logger.error("No OpenAI API key found, no correction will be applied to transcription")
+            self.logger.warning("No OpenAI API key found, no correction will be applied to transcription")
         self.render_video = render_video
         self.video_resolution = video_resolution
@@ -137,10 +137,18 @@ class LyricsTranscriber:
                 raise FileNotFoundError(f"video_background is not a valid file path: {self.video_background_image}")
         self.outputs = {
-            "transcription_data_dict": None,
-            "transcription_data_filepath": None,
-            "transcribed_lyrics_text": None,
-            "transcribed_lyrics_text_filepath": None,
+            "transcription_data_dict_whisper": None,
+            "transcription_data_whisper_filepath": None,
+            "transcribed_lyrics_text_whisper": None,
+            "transcribed_lyrics_text_whisper_filepath": None,
+            "transcription_data_dict_audioshake": None,
+            "transcription_data_audioshake_filepath": None,
+            "transcribed_lyrics_text_audioshake": None,
+            "transcribed_lyrics_text_audioshake_filepath": None,
+            "transcription_data_dict_primary": None,
+            "transcription_data_primary_filepath": None,
+            "transcribed_lyrics_text_primary": None,
+            "transcribed_lyrics_text_primary_filepath": None,
             "genius_lyrics_text": None,
             "genius_lyrics_filepath": None,
             "spotify_lyrics_data_dict": None,
@@ -166,10 +174,15 @@ class LyricsTranscriber:
         self.create_folders()
+        self.output_prefix = f"{artist} - {title}"
     def generate(self):
+        self.logger.debug(f"Starting generate() with cache_dir: {self.cache_dir} and output_dir: {self.output_dir}")
         self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
         self.transcribe()
         self.write_transcribed_lyrics_plain_text()
         self.write_genius_lyrics_file()
@@ -183,7 +196,7 @@ class LyricsTranscriber:
             self.write_corrected_lyrics_plain_text()
         else:
             self.logger.warning("Skipping LLM correction as no OpenAI client is available")
-            self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
+            self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict_primary"]
             self.write_corrected_lyrics_plain_text()
         self.calculate_singing_percentage()
@@ -208,11 +221,15 @@ class LyricsTranscriber:
             self.output_dir = os.getcwd()
         self.logger.debug(f"copying temporary files to output dir: {self.output_dir}")
-        for key in self.outputs:
+        self.logger.debug("Files to copy:")
+        for key, value in self.outputs.items():
             if key.endswith("_filepath"):
-                if self.outputs[key] and os.path.isfile(self.outputs[key]):
-                    shutil.copy(self.outputs[key], self.output_dir)
+                self.logger.debug(f"  {key}: {value}")
+                if value and os.path.isfile(value):
+                    self.logger.debug(f"    File exists, copying to {self.output_dir}")
+                    shutil.copy(value, self.output_dir)
+                else:
+                    self.logger.debug(f"    File doesn't exist or is None")
         self.outputs["output_dir"] = self.output_dir
@@ -232,9 +249,7 @@ class LyricsTranscriber:
                 continue
             if self.openai_client:
-                data_input_str = (
-                    f'Data input 1:\n{self.outputs["transcribed_lyrics_text"]}\nData input 2:\n{self.outputs[online_lyrics_text_key]}\n'
-                )
+                data_input_str = f'Data input 1:\n{self.outputs["transcribed_lyrics_text_primary"]}\nData input 2:\n{self.outputs[online_lyrics_text_key]}\n'
                 self.logger.debug(f"making API call to LLM model {self.llm_model} to validate {online_lyrics_source} lyrics match")
                 response = self.openai_client.chat.completions.create(
@@ -263,7 +278,7 @@ class LyricsTranscriber:
             else:
                 # Fallback primitive word matching
                 self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
-                transcribed_words = set(self.outputs["transcribed_lyrics_text"].split())
+                transcribed_words = set(self.outputs["transcribed_lyrics_text_primary"].split())
                 online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
                 common_words = transcribed_words & online_lyrics_words
                 match_percentage = len(common_words) / len(online_lyrics_words) * 100
@@ -294,7 +309,7 @@ class LyricsTranscriber:
         self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")
-        corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.json")
+        corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
         if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
             self.logger.debug(
@@ -312,7 +327,7 @@ class LyricsTranscriber:
         if not reference_lyrics:
             self.logger.warning("No reference lyrics found from Genius or Spotify. Skipping LLM correction.")
-            self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
+            self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict_primary"]
             return
         self.logger.debug(
@@ -331,11 +346,9 @@ class LyricsTranscriber:
         # TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff
         self.outputs["llm_transcript"] = ""
-        self.outputs["llm_transcript_filepath"] = os.path.join(
-            self.cache_dir, "lyrics-" + self.get_song_slug() + "-llm-correction-transcript.txt"
-        )
+        self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))
-        total_segments = len(self.outputs["transcription_data_dict"]["segments"])
+        total_segments = len(self.outputs["transcription_data_dict_primary"]["segments"])
         self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
         with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file:
@@ -345,7 +358,7 @@ class LyricsTranscriber:
             self.outputs["llm_transcript"] += llm_transcript_header
             llm_transcript_file.write(llm_transcript_header)
-            for segment in self.outputs["transcription_data_dict"]["segments"]:
+            for segment in self.outputs["transcription_data_dict_primary"]["segments"]:
                 # # Don't waste OpenAI dollars when testing!
                 # if segment["id"] > 10:
                 #     continue
@@ -371,7 +384,7 @@ class LyricsTranscriber:
                     if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
                         previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
-                for next_segment in self.outputs["transcription_data_dict"]["segments"]:
+                for next_segment in self.outputs["transcription_data_dict_primary"]["segments"]:
                     if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
                         upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
@@ -466,7 +479,9 @@ class LyricsTranscriber:
         if self.outputs["corrected_lyrics_data_dict"]:
             self.logger.debug(f"corrected_lyrics_data_dict exists, writing plain text lyrics file")
-            corrected_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.txt")
+            corrected_lyrics_text_filepath = os.path.join(
+                self.cache_dir, self.get_output_filename(" (Lyrics Corrected).txt")  # Updated to use consistent naming
+            )
             self.outputs["corrected_lyrics_text_filepath"] = corrected_lyrics_text_filepath
             self.outputs["corrected_lyrics_text"] = ""
@@ -475,7 +490,7 @@ class LyricsTranscriber:
             with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
                 for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
                     self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
-                    f.write(corrected_segment["text".strip()] + "\n")
+                    f.write(corrected_segment["text"].strip() + "\n")
     def write_spotify_lyrics_data_file(self):
         if self.spotify_cookie and self.song_known:
@@ -484,7 +499,9 @@ class LyricsTranscriber:
             self.logger.warning(f"skipping spotify fetch as not all spotify params were set")
             return
-        spotify_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.json")
+        spotify_lyrics_data_json_cache_filepath = os.path.join(
+            self.cache_dir, self.get_output_filename(" (Lyrics Spotify).json")  # Updated to use consistent naming
+        )
         if os.path.isfile(spotify_lyrics_data_json_cache_filepath):
             self.logger.debug(
@@ -531,7 +548,9 @@ class LyricsTranscriber:
         if self.outputs["spotify_lyrics_data_dict"]:
             self.logger.debug(f"spotify_lyrics data found, checking/writing plain text lyrics file")
-            spotify_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.txt")
+            spotify_lyrics_text_filepath = os.path.join(
+                self.cache_dir, self.get_output_filename(" (Lyrics Spotify).txt")  # Updated to use consistent naming
+            )
             self.outputs["spotify_lyrics_text_filepath"] = spotify_lyrics_text_filepath
             lines = self.outputs["spotify_lyrics_data_dict"]["lyrics"]["lines"]
@@ -561,8 +580,9 @@ class LyricsTranscriber:
             self.logger.warning(f"skipping genius fetch as not all genius params were set")
             return
-        genius_lyrics_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-genius.txt")
+        genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))
+        # Check cache first
         if os.path.isfile(genius_lyrics_cache_filepath):
             self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
@@ -570,15 +590,21 @@ class LyricsTranscriber:
                 self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
                 self.outputs["genius_lyrics_text"] = cached_lyrics.read()
                 return
         self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
-        genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG))
+        # Initialize Genius with better defaults
+        genius = lyricsgenius.Genius(
+            self.genius_api_token,
+            verbose=(self.log_level == logging.DEBUG),
+            remove_section_headers=True,
+        )
         try:
             song = self.fetch_genius_lyrics(genius, self.title, self.artist)
             if song is None:
                 self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
-                return
+                return None
             lyrics = self.clean_genius_lyrics(song.lyrics)
             self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
@@ -587,6 +613,8 @@ class LyricsTranscriber:
             self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
             self.outputs["genius_lyrics_text"] = lyrics
+            return lyrics.split("\n")  # Return lines like write_lyrics_from_genius
         except requests.exceptions.RequestException as e:
             self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
             raise
@@ -594,8 +622,13 @@ class LyricsTranscriber:
     def clean_genius_lyrics(self, lyrics):
         lyrics = lyrics.replace("\\n", "\n")
         lyrics = re.sub(r"You might also like", "", lyrics)
-        # Remove the song name and word "Lyrics" if this has a non-newline char at the start
-        lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
+        lyrics = re.sub(
+            r".*?Lyrics([A-Z])", r"\1", lyrics
+        )  # Remove the song name and word "Lyrics" if this has a non-newline char at the start
+        lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)  # Remove this example: 27 ContributorsSex Bomb Lyrics
+        lyrics = re.sub(
+            r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
+        )  # Remove this example: See Tom Jones LiveGet tickets as low as $71
         lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)  # Remove the word "Embed" at end of line with preceding numbers if found
         lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)  # Remove the word "Embed" if it has been tacked onto a word at the end of a line
         lyrics = re.sub(r"^Embed$", r"", lyrics)  # Remove the word "Embed" if it has been tacked onto a word at the end of a line
@@ -605,7 +638,9 @@ class LyricsTranscriber:
     def calculate_singing_percentage(self):
         # Calculate total seconds of singing using timings from whisper transcription results
-        total_singing_duration = sum(segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict"]["segments"])
+        total_singing_duration = sum(
+            segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict_primary"]["segments"]
+        )
         self.logger.debug(f"calculated total_singing_duration: {int(total_singing_duration)} seconds, now running ffprobe")
@@ -635,7 +670,7 @@ class LyricsTranscriber:
     # then loops over each word and writes all words with MidiCo segment start/end formatting
     # and word-level timestamps to a MidiCo-compatible LRC file
     def write_midico_lrc_file(self):
-        self.outputs["midico_lrc_filepath"] = self.get_cache_filepath(".lrc")
+        self.outputs["midico_lrc_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc"))
         lrc_filename = self.outputs["midico_lrc_filepath"]
         self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
@@ -652,7 +687,7 @@ class LyricsTranscriber:
                     f.write(line)
     def create_screens(self):
-        self.logger.debug("create_screens beginning generation of screens from whisper results")
+        self.logger.debug("create_screens beginning generation of screens from transcription results")
         screens: List[subtitles.LyricsScreen] = []
         screen: Optional[subtitles.LyricsScreen] = None
@@ -692,9 +727,15 @@ class LyricsTranscriber:
                     self.logger.debug("Reset current line")
                 current_line_text += (" " if current_line_text else "") + word["text"]
+                # fmt: off
                 lyric_segment = subtitles.LyricSegment(
-                    text=word["text"], ts=timedelta(seconds=word["start"]), end_ts=timedelta(seconds=word["end"])
+                    text=word["text"],
+                    ts=timedelta(seconds=word["start"]),
+                    end_ts=timedelta(seconds=word["end"])
                 )
+                # fmt: on
                 current_line.segments.append(lyric_segment)
                 self.logger.debug(f"Added word to current line. Current line: '{current_line_text}'")
@@ -706,13 +747,13 @@ class LyricsTranscriber:
         return screens
     def write_ass_file(self):
-        self.outputs["ass_subtitles_filepath"] = self.get_cache_filepath(".ass")
+        self.outputs["ass_subtitles_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).ass"))
         ass_filepath = self.outputs["ass_subtitles_filepath"]
         self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
-        intial_screens = self.create_screens()
-        screens = subtitles.set_segment_end_times(intial_screens, int(self.outputs["song_duration"]))
+        initial_screens = self.create_screens()
+        screens = subtitles.set_segment_end_times(initial_screens, int(self.outputs["song_duration"]))
         screens = subtitles.set_screen_start_times(screens)
         lyric_subtitles_ass = subtitles.create_styled_subtitles(screens, self.video_resolution_num, self.font_size)
         lyric_subtitles_ass.write(ass_filepath)
@@ -831,22 +872,29 @@ class LyricsTranscriber:
         return formatted_time
     def write_transcribed_lyrics_plain_text(self):
-        if self.outputs["transcription_data_dict"]:
-            transcription_cache_suffix = "-audioshake-transcribed.txt" if self.audioshake_api_token else "-whisper-transcribed.txt"
-            self.logger.debug(f"transcription_cache_suffix: {transcription_cache_suffix}")
-            transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + transcription_cache_suffix)
-            self.outputs["transcribed_lyrics_text_filepath"] = transcribed_lyrics_text_filepath
+        if self.outputs["transcription_data_dict_whisper"]:
+            transcribed_lyrics_text_whisper_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Whisper).txt"))
+            self.logger.debug(f"Setting Whisper text filepath to: {transcribed_lyrics_text_whisper_filepath}")
+            self.outputs["transcribed_lyrics_text_whisper_filepath"] = transcribed_lyrics_text_whisper_filepath
+            self.outputs["transcribed_lyrics_text_whisper"] = ""
+            self.logger.debug(f"Writing Whisper lyrics to: {transcribed_lyrics_text_whisper_filepath}")
+            with open(transcribed_lyrics_text_whisper_filepath, "w", encoding="utf-8") as f:
+                for segment in self.outputs["transcription_data_dict_whisper"]["segments"]:
+                    self.outputs["transcribed_lyrics_text_whisper"] += segment["text"] + "\n"
+                    f.write(segment["text"].strip() + "\n")
+            self.logger.debug(f"Finished writing Whisper lyrics, file exists: {os.path.exists(transcribed_lyrics_text_whisper_filepath)}")
-            self.outputs["transcribed_lyrics_text"] = ""
+        if self.outputs["transcription_data_dict_audioshake"]:
+            transcribed_lyrics_text_audioshake_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics AudioShake).txt"))
+            self.outputs["transcribed_lyrics_text_audioshake_filepath"] = transcribed_lyrics_text_audioshake_filepath
+            self.outputs["transcribed_lyrics_text_audioshake"] = ""
-            self.logger.debug(f"writing lyrics plain text to transcribed_lyrics_text_filepath: {transcribed_lyrics_text_filepath}")
-            with open(transcribed_lyrics_text_filepath, "w", encoding="utf-8") as f:
-                for segment in self.outputs["transcription_data_dict"]["segments"]:
-                    self.outputs["transcribed_lyrics_text"] += segment["text"] + "\n"
+            self.logger.debug(f"Writing AudioShake lyrics to: {transcribed_lyrics_text_audioshake_filepath}")
+            with open(transcribed_lyrics_text_audioshake_filepath, "w", encoding="utf-8") as f:
+                for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]:
+                    self.outputs["transcribed_lyrics_text_audioshake"] += segment["text"] + "\n"
                     f.write(segment["text"].strip() + "\n")
-        else:
-            raise Exception("Cannot write transcribed lyrics plain text as transcription_data_dict is not set")
     def find_best_split_point(self, text, max_length):
         self.logger.debug(f"Finding best split point for text: '{text}' (max_length: {max_length})")
@@ -949,51 +997,111 @@ class LyricsTranscriber:
         return new_segments
     def transcribe(self):
-        transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
-        self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
-        transcription_cache_filepath = self.outputs["transcription_data_filepath"]
-        if os.path.isfile(transcription_cache_filepath):
-            self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
-            with open(transcription_cache_filepath, "r") as cache_file:
-                self.outputs["transcription_data_dict"] = json.load(cache_file)
-                return
+        # Check cache first
+        transcription_cache_filepath_whisper = self.get_cache_filepath(" (Lyrics Whisper).json")
+        transcription_cache_filepath_audioshake = self.get_cache_filepath(" (Lyrics AudioShake).json")
+        self.logger.debug(f"Cache directory: {self.cache_dir}")
+        self.logger.debug(f"Output directory: {self.output_dir}")
+        if os.path.isfile(transcription_cache_filepath_whisper):
+            self.logger.debug(f"Found existing Whisper transcription, reading: {transcription_cache_filepath_whisper}")
+            with open(transcription_cache_filepath_whisper, "r") as cache_file:
+                self.outputs["transcription_data_dict_whisper"] = json.load(cache_file)
+                self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
+                self.logger.debug(f"Loaded Whisper data and set filepath to: {self.outputs['transcription_data_whisper_filepath']}")
+        if os.path.isfile(transcription_cache_filepath_audioshake):
+            self.logger.debug(f"Found existing AudioShake transcription, reading: {transcription_cache_filepath_audioshake}")
+            with open(transcription_cache_filepath_audioshake, "r") as cache_file:
+                self.outputs["transcription_data_dict_audioshake"] = json.load(cache_file)
+                self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
+        # If we have both cached transcriptions, set primary and return early
+        if self.outputs["transcription_data_dict_whisper"] and self.outputs["transcription_data_dict_audioshake"]:
+            self.set_primary_transcription()
+            return
+        # If we have Whisper cached and AudioShake isn't available, set primary and return early
+        elif self.outputs["transcription_data_dict_whisper"] and not self.audioshake_api_token:
+            self.set_primary_transcription()
+            return
-        if self.audioshake_api_token:
-            self.logger.debug(f"Using AudioShake API for transcription")
+        # Continue with transcription for any missing data...
+        audioshake_job_id = None
+        if self.audioshake_api_token and not self.outputs["transcription_data_dict_audioshake"]:
+            self.logger.debug(f"Starting AudioShake transcription")
             from .audioshake_transcriber import AudioShakeTranscriber
-            audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
-            transcription_data = audioshake.transcribe(self.audio_filepath)
-        else:
+            audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
+            audioshake_job_id = audioshake.start_transcription(self.audio_filepath)
+        # Run Whisper transcription if needed while AudioShake processes
+        if not self.outputs["transcription_data_dict_whisper"]:
             self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
             audio = whisper.load_audio(self.audio_filepath)
             model = whisper.load_model(self.transcription_model, device="cpu")
-            transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
-            # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
-            # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
+            whisper_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
             # Remove segments with no words, only music
-            transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
-            self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
+            whisper_data["segments"] = [segment for segment in whisper_data["segments"] if segment["text"].strip() != "Music"]
+            self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(whisper_data['segments'])}")
             # Split long segments
             self.logger.debug("Starting to split long segments")
-            transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
-            self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
-        self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
-        with open(transcription_cache_filepath, "w") as cache_file:
-            json.dump(transcription_data, cache_file, indent=4)
+            whisper_data["segments"] = self.split_long_segments(whisper_data["segments"], max_length=36)
+            self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(whisper_data['segments'])}")
+            # Store Whisper results
+            self.outputs["transcription_data_dict_whisper"] = whisper_data
+            self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
+            with open(transcription_cache_filepath_whisper, "w") as cache_file:
+                json.dump(whisper_data, cache_file, indent=4)
+        # Now that Whisper is done, get AudioShake results if available
+        if audioshake_job_id:
+            self.logger.debug("Getting AudioShake results")
+            audioshake_data = audioshake.get_transcription_result(audioshake_job_id)
+            self.outputs["transcription_data_dict_audioshake"] = audioshake_data
+            self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
+            with open(transcription_cache_filepath_audioshake, "w") as cache_file:
+                json.dump(audioshake_data, cache_file, indent=4)
+        # Set the primary transcription source
+        self.set_primary_transcription()
+        # Write the text files
+        self.write_transcribed_lyrics_plain_text()
-        self.outputs["transcription_data_dict"] = transcription_data
+    def set_primary_transcription(self):
+        """Set the primary transcription source (AudioShake if available, otherwise Whisper)"""
+        if self.outputs["transcription_data_dict_audioshake"]:
+            self.logger.info("Using AudioShake as primary transcription source")
+            self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_audioshake"]
+            self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_audioshake_filepath"]
+            # Set the primary text content
+            if "transcribed_lyrics_text_audioshake" not in self.outputs or not self.outputs["transcribed_lyrics_text_audioshake"]:
+                self.outputs["transcribed_lyrics_text_audioshake"] = "\n".join(
+                    segment["text"].strip() for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]
+                )
+            self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_audioshake"]
+            self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_audioshake_filepath"]
+        else:
+            self.logger.info("Using Whisper as primary transcription source")
+            self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_whisper"]
+            self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_whisper_filepath"]
+            # Set the primary text content
+            if "transcribed_lyrics_text_whisper" not in self.outputs or not self.outputs["transcribed_lyrics_text_whisper"]:
+                self.outputs["transcribed_lyrics_text_whisper"] = "\n".join(
+                    segment["text"].strip() for segment in self.outputs["transcription_data_dict_whisper"]["segments"]
+                )
+            self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_whisper"]
+            self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_whisper_filepath"]
     def get_cache_filepath(self, extension):
-        filename = os.path.split(self.audio_filepath)[1]
-        filename_slug = slugify.slugify(filename, lowercase=False)
-        hash_value = self.get_file_hash(self.audio_filepath)
-        cache_filepath = os.path.join(self.cache_dir, filename_slug + "_" + hash_value + extension)
+        # Instead of using slugify and hash, use the consistent naming pattern
+        cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
         self.logger.debug(f"get_cache_filepath returning cache_filepath: {cache_filepath}")
         return cache_filepath
@@ -1014,3 +1122,7 @@ class LyricsTranscriber:
         if self.output_dir is not None:
             os.makedirs(self.output_dir, exist_ok=True)
+    def get_output_filename(self, suffix):
+        """Generate consistent filename with (Purpose) suffix pattern"""
+        return f"{self.output_prefix}{suffix}"

{lyrics_transcriber-0.18.0 → lyrics_transcriber-0.19.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lyrics-transcriber"
-version = "0.18.0"
+version = "0.19.2"
 description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"