lyrics-transcriber 0.16.4__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/PKG-INFO +1 -1
  2. lyrics_transcriber-0.17.0/lyrics_transcriber/audioshake_transcriber.py +93 -0
  3. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/transcriber.py +15 -15
  4. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/pyproject.toml +1 -1
  5. lyrics_transcriber-0.16.4/lyrics_transcriber/audioshake_transcriber.py +0 -35
  6. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/LICENSE +0 -0
  7. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/README.md +0 -0
  8. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/__init__.py +0 -0
  9. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/README.md +0 -0
  10. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -0
  11. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -0
  12. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -0
  13. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -0
  14. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -0
  15. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/__init__.py +0 -0
  16. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/ass.py +0 -0
  17. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/cli.py +0 -0
  18. {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/subtitles.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -0,0 +1,93 @@
1
+ import requests
2
+ import time
3
+ import os
4
+ import json
5
+
6
+
7
+ class AudioShakeTranscriber:
8
+ def __init__(self, api_token, logger):
9
+ self.api_token = api_token
10
+ self.base_url = "https://groovy.audioshake.ai"
11
+ self.logger = logger
12
+
13
+ def transcribe(self, audio_filepath):
14
+ self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
15
+
16
+ # Step 1: Upload the audio file
17
+ asset_id = self._upload_file(audio_filepath)
18
+ self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
19
+
20
+ # Step 2: Create a job for transcription and alignment
21
+ job_id = self._create_job(asset_id)
22
+ self.logger.debug(f"Job created successfully. Job ID: {job_id}")
23
+
24
+ # Step 3: Wait for the job to complete and get the results
25
+ result = self._get_job_result(job_id)
26
+ self.logger.debug(f"Job completed. Processing results...")
27
+
28
+ # Step 4: Process the result and return in the required format
29
+ return self._process_result(result)
30
+
31
+ def _upload_file(self, filepath):
32
+ self.logger.debug(f"Uploading {filepath} to AudioShake")
33
+ url = f"{self.base_url}/upload"
34
+ headers = {"Authorization": f"Bearer {self.api_token}"}
35
+ with open(filepath, "rb") as file:
36
+ files = {"file": (os.path.basename(filepath), file)}
37
+ response = requests.post(url, headers=headers, files=files)
38
+
39
+ self.logger.debug(f"Upload response status code: {response.status_code}")
40
+ self.logger.debug(f"Upload response content: {response.text}")
41
+
42
+ response.raise_for_status()
43
+ return response.json()["id"]
44
+
45
+ def _create_job(self, asset_id):
46
+ self.logger.debug(f"Creating job for asset {asset_id}")
47
+ url = f"{self.base_url}/job/"
48
+ headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
49
+ data = {
50
+ "metadata": {"format": "json", "name": "alignment", "language": "en"},
51
+ "callbackUrl": "https://example.com/webhook/alignment",
52
+ "assetId": asset_id,
53
+ }
54
+ response = requests.post(url, headers=headers, json=data)
55
+ response.raise_for_status()
56
+ return response.json()["job"]["id"]
57
+
58
+ def _get_job_result(self, job_id):
59
+ self.logger.debug(f"Getting job result for job {job_id}")
60
+ url = f"{self.base_url}/job/{job_id}"
61
+ headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
62
+ while True:
63
+ response = requests.get(url, headers=headers)
64
+ response.raise_for_status()
65
+ job_data = response.json()["job"]
66
+ if job_data["status"] == "completed":
67
+ return job_data
68
+ elif job_data["status"] == "failed":
69
+ raise Exception("Job failed")
70
+ time.sleep(5) # Wait 5 seconds before checking again
71
+
72
+ def _process_result(self, job_data):
73
+ self.logger.debug(f"Processing result for job {job_data}")
74
+ output_asset = next((asset for asset in job_data["outputAssets"] if asset["name"] == "transcription.json"), None)
75
+
76
+ if not output_asset:
77
+ raise Exception("Transcription output not found in job results")
78
+
79
+ transcription_url = output_asset["link"]
80
+ response = requests.get(transcription_url)
81
+ response.raise_for_status()
82
+ transcription_data = response.json()
83
+
84
+ transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
85
+
86
+ # Ensure each segment has the required fields
87
+ for segment in transcription_data["segments"]:
88
+ if "words" not in segment:
89
+ segment["words"] = []
90
+ if "text" not in segment:
91
+ segment["text"] = " ".join(word["text"] for word in segment["words"])
92
+
93
+ return transcription_data
@@ -944,10 +944,10 @@ class LyricsTranscriber:
944
944
  transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
945
945
  self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
946
946
 
947
- whisper_cache_filepath = self.outputs["transcription_data_filepath"]
948
- if os.path.isfile(whisper_cache_filepath):
949
- self.logger.debug(f"transcribe found existing file at whisper_cache_filepath, reading: {whisper_cache_filepath}")
950
- with open(whisper_cache_filepath, "r") as cache_file:
947
+ transcription_cache_filepath = self.outputs["transcription_data_filepath"]
948
+ if os.path.isfile(transcription_cache_filepath):
949
+ self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
950
+ with open(transcription_cache_filepath, "r") as cache_file:
951
951
  self.outputs["transcription_data_dict"] = json.load(cache_file)
952
952
  return
953
953
 
@@ -955,28 +955,28 @@ class LyricsTranscriber:
955
955
  self.logger.debug(f"Using AudioShake API for transcription")
956
956
  from .audioshake_transcriber import AudioShakeTranscriber
957
957
 
958
- audioshake = AudioShakeTranscriber(self.audioshake_api_token, log_level=self.log_level)
959
- result = audioshake.transcribe(self.audio_filepath)
958
+ audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
959
+ transcription_data = audioshake.transcribe(self.audio_filepath)
960
960
  else:
961
961
  self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
962
962
  audio = whisper.load_audio(self.audio_filepath)
963
963
  model = whisper.load_model(self.transcription_model, device="cpu")
964
- result = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
964
+ transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
965
965
 
966
966
  # Remove segments with no words, only music
967
- result["segments"] = [segment for segment in result["segments"] if segment["text"].strip() != "Music"]
968
- self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(result['segments'])}")
967
+ transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
968
+ self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
969
969
 
970
970
  # Split long segments
971
971
  self.logger.debug("Starting to split long segments")
972
- result["segments"] = self.split_long_segments(result["segments"], max_length=36)
973
- self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(result['segments'])}")
972
+ transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
973
+ self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
974
974
 
975
- self.logger.debug(f"writing transcription data JSON to cache file: {whisper_cache_filepath}")
976
- with open(whisper_cache_filepath, "w") as cache_file:
977
- json.dump(result, cache_file, indent=4)
975
+ self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
976
+ with open(transcription_cache_filepath, "w") as cache_file:
977
+ json.dump(transcription_data, cache_file, indent=4)
978
978
 
979
- self.outputs["transcription_data_dict"] = result
979
+ self.outputs["transcription_data_dict"] = transcription_data
980
980
 
981
981
  def get_cache_filepath(self, extension):
982
982
  filename = os.path.split(self.audio_filepath)[1]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lyrics-transcriber"
3
- version = "0.16.4"
3
+ version = "0.17.0"
4
4
  description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
5
5
  authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
6
6
  license = "MIT"
@@ -1,35 +0,0 @@
1
- import logging
2
- import requests
3
-
4
-
5
- class AudioShakeTranscriber:
6
- def __init__(self, api_token, log_level=logging.DEBUG):
7
- self.api_token = api_token
8
- self.logger = logging.getLogger(__name__)
9
- self.logger.setLevel(log_level)
10
-
11
- def transcribe(self, audio_filepath):
12
- # This is a placeholder for the actual AudioShake API implementation
13
- self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
14
-
15
- self.logger.debug(f"AudioShake API token: {self.api_token}")
16
- # TODO: Implement the actual API call to AudioShake
17
- # For now, we'll return a dummy result
18
- return {
19
- "transcription_data_dict": {
20
- "segments": [
21
- {
22
- "start": 0,
23
- "end": 5,
24
- "text": "This is a dummy transcription",
25
- "words": [
26
- {"text": "This", "start": 0, "end": 1},
27
- {"text": "is", "start": 1, "end": 2},
28
- {"text": "a", "start": 2, "end": 3},
29
- {"text": "dummy", "start": 3, "end": 4},
30
- {"text": "transcription", "start": 4, "end": 5},
31
- ],
32
- }
33
- ]
34
- }
35
- }