PyPI - lyrics-transcriber - Versions diffs - 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

lyrics-transcriber 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

lyrics_transcriber/audioshake_transcriber.py CHANGED Viewed

@@ -5,10 +5,11 @@ import json
 class AudioShakeTranscriber:
-    def __init__(self, api_token, logger):
+    def __init__(self, api_token, logger, output_prefix):
         self.api_token = api_token
         self.base_url = "https://groovy.audioshake.ai"
         self.logger = logger
+        self.output_prefix = output_prefix
     def transcribe(self, audio_filepath):
         self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
@@ -103,4 +104,10 @@ class AudioShakeTranscriber:
             if "text" not in segment:
                 segment["text"] = " ".join(word["text"] for word in segment["words"])
+        transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")
         return transcription_data
+    def get_output_filename(self, suffix):
+        """Generate consistent filename with (Purpose) suffix pattern"""
+        return f"{self.output_prefix}{suffix}"

lyrics_transcriber/transcriber.py CHANGED Viewed

@@ -166,6 +166,8 @@ class LyricsTranscriber:
         self.create_folders()
+        self.output_prefix = f"{artist} - {title}"
     def generate(self):
         self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
@@ -294,7 +296,7 @@ class LyricsTranscriber:
         self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")
-        corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.json")
+        corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
         if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
             self.logger.debug(
@@ -331,9 +333,7 @@ class LyricsTranscriber:
         # TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff
         self.outputs["llm_transcript"] = ""
-        self.outputs["llm_transcript_filepath"] = os.path.join(
-            self.cache_dir, "lyrics-" + self.get_song_slug() + "-llm-correction-transcript.txt"
-        )
+        self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))
         total_segments = len(self.outputs["transcription_data_dict"]["segments"])
         self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
@@ -466,7 +466,9 @@ class LyricsTranscriber:
         if self.outputs["corrected_lyrics_data_dict"]:
             self.logger.debug(f"corrected_lyrics_data_dict exists, writing plain text lyrics file")
-            corrected_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.txt")
+            corrected_lyrics_text_filepath = os.path.join(
+                self.cache_dir, self.get_output_filename(" (Lyrics Corrected).txt")  # Updated to use consistent naming
+            )
             self.outputs["corrected_lyrics_text_filepath"] = corrected_lyrics_text_filepath
             self.outputs["corrected_lyrics_text"] = ""
@@ -475,7 +477,7 @@ class LyricsTranscriber:
             with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
                 for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
                     self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
-                    f.write(corrected_segment["text".strip()] + "\n")
+                    f.write(corrected_segment["text"].strip() + "\n")
     def write_spotify_lyrics_data_file(self):
         if self.spotify_cookie and self.song_known:
@@ -484,7 +486,9 @@ class LyricsTranscriber:
             self.logger.warning(f"skipping spotify fetch as not all spotify params were set")
             return
-        spotify_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.json")
+        spotify_lyrics_data_json_cache_filepath = os.path.join(
+            self.cache_dir, self.get_output_filename(" (Lyrics Spotify).json")  # Updated to use consistent naming
+        )
         if os.path.isfile(spotify_lyrics_data_json_cache_filepath):
             self.logger.debug(
@@ -531,7 +535,9 @@ class LyricsTranscriber:
         if self.outputs["spotify_lyrics_data_dict"]:
             self.logger.debug(f"spotify_lyrics data found, checking/writing plain text lyrics file")
-            spotify_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.txt")
+            spotify_lyrics_text_filepath = os.path.join(
+                self.cache_dir, self.get_output_filename(" (Lyrics Spotify).txt")  # Updated to use consistent naming
+            )
             self.outputs["spotify_lyrics_text_filepath"] = spotify_lyrics_text_filepath
             lines = self.outputs["spotify_lyrics_data_dict"]["lyrics"]["lines"]
@@ -561,7 +567,7 @@ class LyricsTranscriber:
             self.logger.warning(f"skipping genius fetch as not all genius params were set")
             return
-        genius_lyrics_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-genius.txt")
+        genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))
         if os.path.isfile(genius_lyrics_cache_filepath):
             self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
@@ -635,7 +641,9 @@ class LyricsTranscriber:
     # then loops over each word and writes all words with MidiCo segment start/end formatting
     # and word-level timestamps to a MidiCo-compatible LRC file
     def write_midico_lrc_file(self):
-        self.outputs["midico_lrc_filepath"] = self.get_cache_filepath(".lrc")
+        self.outputs["midico_lrc_filepath"] = os.path.join(
+            self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc")  # Updated suffix
+        )
         lrc_filename = self.outputs["midico_lrc_filepath"]
         self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
@@ -692,9 +700,15 @@ class LyricsTranscriber:
                     self.logger.debug("Reset current line")
                 current_line_text += (" " if current_line_text else "") + word["text"]
+                # fmt: off
                 lyric_segment = subtitles.LyricSegment(
-                    text=word["text"], ts=timedelta(seconds=word["start"]), end_ts=timedelta(seconds=word["end"])
+                    text=word["text"],
+                    ts=timedelta(seconds=word["start"]),
+                    end_ts=timedelta(seconds=word["end"])
                 )
+                # fmt: on
                 current_line.segments.append(lyric_segment)
                 self.logger.debug(f"Added word to current line. Current line: '{current_line_text}'")
@@ -706,7 +720,7 @@ class LyricsTranscriber:
         return screens
     def write_ass_file(self):
-        self.outputs["ass_subtitles_filepath"] = self.get_cache_filepath(".ass")
+        self.outputs["ass_subtitles_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).ass"))
         ass_filepath = self.outputs["ass_subtitles_filepath"]
         self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
@@ -832,10 +846,10 @@ class LyricsTranscriber:
     def write_transcribed_lyrics_plain_text(self):
         if self.outputs["transcription_data_dict"]:
-            transcription_cache_suffix = "-audioshake-transcribed.txt" if self.audioshake_api_token else "-whisper-transcribed.txt"
+            transcription_cache_suffix = " (Lyrics AudioShake).txt" if self.audioshake_api_token else " (Lyrics Whisper).txt"
             self.logger.debug(f"transcription_cache_suffix: {transcription_cache_suffix}")
-            transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + transcription_cache_suffix)
+            transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, self.get_output_filename(transcription_cache_suffix))
             self.outputs["transcribed_lyrics_text_filepath"] = transcribed_lyrics_text_filepath
             self.outputs["transcribed_lyrics_text"] = ""
@@ -949,8 +963,8 @@ class LyricsTranscriber:
         return new_segments
     def transcribe(self):
-        transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
-        self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
+        transcription_cache_suffix = " (AudioShake).json" if self.audioshake_api_token else " (Whisper).json"
+        self.outputs["transcription_data_filepath"] = self.get_cache_filepath(transcription_cache_suffix)
         transcription_cache_filepath = self.outputs["transcription_data_filepath"]
         if os.path.isfile(transcription_cache_filepath):
@@ -963,14 +977,14 @@ class LyricsTranscriber:
             self.logger.debug(f"Using AudioShake API for transcription")
             from .audioshake_transcriber import AudioShakeTranscriber
-            audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
+            audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
             transcription_data = audioshake.transcribe(self.audio_filepath)
         else:
             self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
             audio = whisper.load_audio(self.audio_filepath)
             model = whisper.load_model(self.transcription_model, device="cpu")
             transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
             # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
             # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
@@ -990,10 +1004,8 @@ class LyricsTranscriber:
         self.outputs["transcription_data_dict"] = transcription_data
     def get_cache_filepath(self, extension):
-        filename = os.path.split(self.audio_filepath)[1]
-        filename_slug = slugify.slugify(filename, lowercase=False)
-        hash_value = self.get_file_hash(self.audio_filepath)
-        cache_filepath = os.path.join(self.cache_dir, filename_slug + "_" + hash_value + extension)
+        # Instead of using slugify and hash, use the consistent naming pattern
+        cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
         self.logger.debug(f"get_cache_filepath returning cache_filepath: {cache_filepath}")
         return cache_filepath
@@ -1014,3 +1026,7 @@ class LyricsTranscriber:
         if self.output_dir is not None:
             os.makedirs(self.output_dir, exist_ok=True)
+    def get_output_filename(self, suffix):
+        """Generate consistent filename with (Purpose) suffix pattern"""
+        return f"{self.output_prefix}{suffix}"

{lyrics_transcriber-0.18.0.dist-info → lyrics_transcriber-0.19.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lyrics-transcriber
-Version: 0.18.0
+Version: 0.19.0
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
 License: MIT

{lyrics_transcriber-0.18.0.dist-info → lyrics_transcriber-0.19.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 lyrics_transcriber/__init__.py,sha256=bIRjsXAzlghS1rQxWNLU0wppZy0T_iciN9EclHLwNrQ,94
-lyrics_transcriber/audioshake_transcriber.py,sha256=rfbBS7K99hYLVyOqTuhK0eigopSqXsc2Zfgg4lZz41A,4647
+lyrics_transcriber/audioshake_transcriber.py,sha256=MdlDv58-l5yL1QPuToc6pxaW7TXHVip1GxbPgrXTk9c,4960
 lyrics_transcriber/llm_prompts/README.md,sha256=DPAGRDVGt9ZNcQAAoQGFhwesLY3D6hD8apL71yHP4yo,196
 lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt,sha256=a3XjAYfyhWt1uCKKqm_n2Pc0STdmBdiHHtJ7ODP99Nk,4046
 lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt,sha256=r6HN3DD_3gwh3B_JPd2R0I4lDXuB5iy7B90J9agOxbQ,2369
 lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt,sha256=hvk2Vs3M3Q4zGQsiQnXvnpd8wXWfwsudYeqN5qFyNWs,1754
 lyrics_transcriber/llm_prompts/promptfooconfig.yaml,sha256=O4YxlLV7XSUiSw_1Q9G7ELC2VAbrYUV_N5QxrPbd1jE,3735
 lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt,sha256=8d-RvZtyINKUlpQLwMi-VD--Y59J-epPt7SZSqjFbPI,1690
-lyrics_transcriber/transcriber.py,sha256=W-XXNDVgS25JLvfZL8bx9kRtdVD3ZpNqyt-1Qp4eCak,50681
+lyrics_transcriber/transcriber.py,sha256=4Z9ugLG_LmQ3kw_GZMYeA4TVrZjPuCI8yru44iFUOyQ,51190
 lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lyrics_transcriber/utils/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
 lyrics_transcriber/utils/cli.py,sha256=8Poba_9wQw0VmOK73vuK-w-abR9QmO4y4FYDHiAQbc0,6972
 lyrics_transcriber/utils/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
-lyrics_transcriber-0.18.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
-lyrics_transcriber-0.18.0.dist-info/METADATA,sha256=K8IY-6Vy5Wa6X5VKCg_sDgjvzfyiiyOBOo8mbyOUNi0,5825
-lyrics_transcriber-0.18.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-lyrics_transcriber-0.18.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
-lyrics_transcriber-0.18.0.dist-info/RECORD,,
+lyrics_transcriber-0.19.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
+lyrics_transcriber-0.19.0.dist-info/METADATA,sha256=IrVopVhJauL3M2GDjBtXq3dPjBakkJ_l_u6V5T0GCwY,5825
+lyrics_transcriber-0.19.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+lyrics_transcriber-0.19.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
+lyrics_transcriber-0.19.0.dist-info/RECORD,,

{lyrics_transcriber-0.18.0.dist-info → lyrics_transcriber-0.19.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{lyrics_transcriber-0.18.0.dist-info → lyrics_transcriber-0.19.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{lyrics_transcriber-0.18.0.dist-info → lyrics_transcriber-0.19.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

lyrics-transcriber 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

lyrics-transcriber 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl