PyPI - lyrics-transcriber - Versions diffs - 0.17.1__tar.gz → 0.18.0__tar.gz - Mend

lyrics-transcriber 0.17.1tar.gz → 0.18.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{lyrics_transcriber-0.17.1 → lyrics_transcriber-0.18.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lyrics-transcriber
-Version: 0.17.1
+Version: 0.18.0
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
 License: MIT
@@ -13,8 +13,8 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: Cython (>=0)
-Requires-Dist: auditok (>=0.2)
 Requires-Dist: dtw-python (>=1)
 Requires-Dist: llvmlite (>=0)
 Requires-Dist: lyricsgenius (>=3)

{lyrics_transcriber-0.17.1 → lyrics_transcriber-0.18.0}/lyrics_transcriber/transcriber.py RENAMED Viewed

@@ -308,6 +308,13 @@ class LyricsTranscriber:
                 self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_data_dict
                 return
+        reference_lyrics = self.outputs.get("genius_lyrics_text") or self.outputs.get("spotify_lyrics_text")
+        if not reference_lyrics:
+            self.logger.warning("No reference lyrics found from Genius or Spotify. Skipping LLM correction.")
+            self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
+            return
         self.logger.debug(
             f"no cached lyrics found at corrected_lyrics_data_json_cache_filepath: {corrected_lyrics_data_json_cache_filepath}, attempting to run correction using LLM"
         )
@@ -317,7 +324,6 @@ class LyricsTranscriber:
         with open(self.llm_prompt_correction, "r") as file:
             system_prompt_template = file.read()
-        reference_lyrics = self.outputs["genius_lyrics_text"] or self.outputs["spotify_lyrics_text"]
         system_prompt = system_prompt_template.replace("{{reference_lyrics}}", reference_lyrics)
         # TODO: Test if results are cleaner when using the vocal file from a background vocal audio separation model
@@ -639,7 +645,9 @@ class LyricsTranscriber:
                 for i, word in enumerate(segment["words"]):
                     start_time = self.format_time_lrc(word["start"])
                     if i != len(segment["words"]) - 1:
-                        word["text"] += " "
+                        if not word["text"].endswith(" "):
+                            self.logger.debug(f"word '{word['text']}' does not end with a space, adding one")
+                            word["text"] += " "
                     line = "[{}]1:{}{}\n".format(start_time, "/" if i == 0 else "", word["text"])
                     f.write(line)
@@ -961,7 +969,10 @@ class LyricsTranscriber:
             self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
             audio = whisper.load_audio(self.audio_filepath)
             model = whisper.load_model(self.transcription_model, device="cpu")
-            transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
+            transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
+            # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
+            # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
             # Remove segments with no words, only music
             transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]

{lyrics_transcriber-0.17.1 → lyrics_transcriber-0.18.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lyrics-transcriber"
-version = "0.17.1"
+version = "0.18.0"
 description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
@@ -26,13 +26,12 @@ lyricsgenius = ">=3"
 python-slugify = ">=8"
 syrics = ">=0"
 openai = "^1"
-openai-whisper = ">=20231117"
 transformers = ">=4"
-auditok = ">=0.2"
 whisper-timestamped = ">=1"
 tenacity = ">=8"
+openai-whisper = ">=20231117"
 # Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
-# from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.
+# from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper
 # This was the only way I was able to get a working cross-platform build published to PyPI.
 # To update the lockfile and install/upgrade dependencies, modify the dependency list above then run:
 # poetry lock; patch -p0 poetry.lock <.github/removetriton.patch; poetry install