PyPI - lyrics-transcriber - Versions diffs - 0.13.1__tar.gz → 0.15.0__tar.gz - Mend

lyrics-transcriber 0.13.1tar.gz → 0.15.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{lyrics_transcriber-0.13.1 → lyrics_transcriber-0.15.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lyrics-transcriber
-Version: 0.13.1
+Version: 0.15.0
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
 License: MIT
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: Cython (>=0)
+Requires-Dist: auditok (>=0.2)
 Requires-Dist: dtw-python (>=1)
 Requires-Dist: llvmlite (>=0)
 Requires-Dist: lyricsgenius (>=3)

{lyrics_transcriber-0.13.1 → lyrics_transcriber-0.15.0}/lyrics_transcriber/transcriber.py RENAMED Viewed

@@ -29,7 +29,7 @@ class LyricsTranscriber:
         log_level=logging.DEBUG,
         log_formatter=None,
         transcription_model="medium",
-        llm_model="gpt-4-1106-preview",
+        llm_model="gpt-4o",
         llm_prompt_matching="lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt",
         llm_prompt_correction="lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt",
         render_video=False,
@@ -66,7 +66,15 @@ class LyricsTranscriber:
         self.llm_model = llm_model
         self.llm_prompt_matching = llm_prompt_matching
         self.llm_prompt_correction = llm_prompt_correction
         self.openai_client = OpenAI()
+        # Uncomment for local models e.g. with ollama
+        # self.openai_client = OpenAI(
+        #     base_url="http://localhost:11434/v1",
+        #     api_key="ollama",
+        # )
         self.openai_client.log = self.log_level
         self.render_video = render_video
@@ -391,8 +399,11 @@ class LyricsTranscriber:
             },
         }
-        input_cost = price_dollars_per_1000_tokens[self.llm_model]["input"] * (self.outputs["llm_token_usage"]["input"] / 1000)
-        output_cost = price_dollars_per_1000_tokens[self.llm_model]["output"] * (self.outputs["llm_token_usage"]["output"] / 1000)
+        input_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["input"]
+        output_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["output"]
+        input_cost = input_price * (self.outputs["llm_token_usage"]["input"] / 1000)
+        output_cost = output_price * (self.outputs["llm_token_usage"]["output"] / 1000)
         self.outputs["llm_costs_usd"]["input"] = round(input_cost, 3)
         self.outputs["llm_costs_usd"]["output"] = round(output_cost, 3)
@@ -775,7 +786,7 @@ class LyricsTranscriber:
         self.logger.debug(f"no cached transcription file found, running whisper transcribe with model: {self.transcription_model}")
         audio = whisper.load_audio(self.audio_filepath)
         model = whisper.load_model(self.transcription_model, device="cpu")
-        result = whisper.transcribe(model, audio, language="en")
+        result = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
         self.logger.debug(f"transcription complete, performing post-processing cleanup")

{lyrics_transcriber-0.13.1 → lyrics_transcriber-0.15.0}/lyrics_transcriber/utils/cli.py RENAMED Viewed

@@ -65,8 +65,8 @@ def main():
     parser.add_argument(
         "--llm_model",
-        default="gpt-4-1106-preview",
-        help="Optional: LLM model to use (currently only supports OpenAI chat completion models, e.g. gpt-4-1106-preview). Default: gpt-3.5-turbo-1106",
+        default="gpt-4o",
+        help="Optional: LLM model to use (currently only supports OpenAI chat completion compatible models",
     )
     parser.add_argument(

{lyrics_transcriber-0.13.1 → lyrics_transcriber-0.15.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lyrics-transcriber"
-version = "0.13.1"
+version = "0.15.0"
 description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
@@ -28,6 +28,7 @@ syrics = ">=0"
 openai = "^1"
 openai-whisper = ">=20231117"
 transformers = ">=4"
+auditok = ">=0.2"
 whisper-timestamped = ">=1"
 # Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
 # from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.