lyrics-transcriber 0.17.0__tar.gz → 0.17.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/PKG-INFO +1 -1
  2. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/audioshake_transcriber.py +24 -11
  3. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/transcriber.py +10 -2
  4. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/pyproject.toml +1 -1
  5. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/LICENSE +0 -0
  6. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/README.md +0 -0
  7. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/__init__.py +0 -0
  8. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/README.md +0 -0
  9. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -0
  10. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -0
  11. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -0
  12. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -0
  13. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -0
  14. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/utils/__init__.py +0 -0
  15. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/utils/ass.py +0 -0
  16. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/utils/cli.py +0 -0
  17. {lyrics_transcriber-0.17.0 → lyrics_transcriber-0.17.2}/lyrics_transcriber/utils/subtitles.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -15,35 +15,35 @@ class AudioShakeTranscriber:
15
15
 
16
16
  # Step 1: Upload the audio file
17
17
  asset_id = self._upload_file(audio_filepath)
18
- self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
18
+ self.logger.info(f"File uploaded successfully. Asset ID: {asset_id}")
19
19
 
20
20
  # Step 2: Create a job for transcription and alignment
21
21
  job_id = self._create_job(asset_id)
22
- self.logger.debug(f"Job created successfully. Job ID: {job_id}")
22
+ self.logger.info(f"Job created successfully. Job ID: {job_id}")
23
23
 
24
24
  # Step 3: Wait for the job to complete and get the results
25
25
  result = self._get_job_result(job_id)
26
- self.logger.debug(f"Job completed. Processing results...")
26
+ self.logger.info(f"Job completed. Processing results...")
27
27
 
28
28
  # Step 4: Process the result and return in the required format
29
29
  return self._process_result(result)
30
30
 
31
31
  def _upload_file(self, filepath):
32
- self.logger.debug(f"Uploading {filepath} to AudioShake")
32
+ self.logger.info(f"Uploading {filepath} to AudioShake")
33
33
  url = f"{self.base_url}/upload"
34
34
  headers = {"Authorization": f"Bearer {self.api_token}"}
35
35
  with open(filepath, "rb") as file:
36
36
  files = {"file": (os.path.basename(filepath), file)}
37
37
  response = requests.post(url, headers=headers, files=files)
38
38
 
39
- self.logger.debug(f"Upload response status code: {response.status_code}")
40
- self.logger.debug(f"Upload response content: {response.text}")
39
+ self.logger.info(f"Upload response status code: {response.status_code}")
40
+ self.logger.info(f"Upload response content: {response.text}")
41
41
 
42
42
  response.raise_for_status()
43
43
  return response.json()["id"]
44
44
 
45
45
  def _create_job(self, asset_id):
46
- self.logger.debug(f"Creating job for asset {asset_id}")
46
+ self.logger.info(f"Creating job for asset {asset_id}")
47
47
  url = f"{self.base_url}/job/"
48
48
  headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
49
49
  data = {
@@ -56,7 +56,7 @@ class AudioShakeTranscriber:
56
56
  return response.json()["job"]["id"]
57
57
 
58
58
  def _get_job_result(self, job_id):
59
- self.logger.debug(f"Getting job result for job {job_id}")
59
+ self.logger.info(f"Getting job result for job {job_id}")
60
60
  url = f"{self.base_url}/job/{job_id}"
61
61
  headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
62
62
  while True:
@@ -70,16 +70,29 @@ class AudioShakeTranscriber:
70
70
  time.sleep(5) # Wait 5 seconds before checking again
71
71
 
72
72
  def _process_result(self, job_data):
73
- self.logger.debug(f"Processing result for job {job_data}")
74
- output_asset = next((asset for asset in job_data["outputAssets"] if asset["name"] == "transcription.json"), None)
73
+ self.logger.debug(f"Processing result for job {job_data['id']}")
74
+ self.logger.debug(f"Job data: {json.dumps(job_data, indent=2)}")
75
+
76
+ output_assets = job_data.get("outputAssets", [])
77
+ self.logger.debug(f"Output assets: {output_assets}")
78
+
79
+ output_asset = next((asset for asset in output_assets if asset["name"] == "transcription.json"), None)
80
+ if not output_asset:
81
+ self.logger.warning("'transcription.json' not found, looking for 'alignment.json'")
82
+ output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
75
83
 
76
84
  if not output_asset:
77
- raise Exception("Transcription output not found in job results")
85
+ self.logger.error("Neither 'transcription.json' nor 'alignment.json' found in job results")
86
+ self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
87
+ raise Exception("Required output not found in job results")
78
88
 
79
89
  transcription_url = output_asset["link"]
90
+ self.logger.debug(f"Output URL: {transcription_url}")
91
+
80
92
  response = requests.get(transcription_url)
81
93
  response.raise_for_status()
82
94
  transcription_data = response.json()
95
+ self.logger.debug(f"Output data: {json.dumps(transcription_data, indent=2)}")
83
96
 
84
97
  transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
85
98
 
@@ -308,6 +308,13 @@ class LyricsTranscriber:
308
308
  self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_data_dict
309
309
  return
310
310
 
311
+ reference_lyrics = self.outputs.get("genius_lyrics_text") or self.outputs.get("spotify_lyrics_text")
312
+
313
+ if not reference_lyrics:
314
+ self.logger.warning("No reference lyrics found from Genius or Spotify. Skipping LLM correction.")
315
+ self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict"]
316
+ return
317
+
311
318
  self.logger.debug(
312
319
  f"no cached lyrics found at corrected_lyrics_data_json_cache_filepath: {corrected_lyrics_data_json_cache_filepath}, attempting to run correction using LLM"
313
320
  )
@@ -317,7 +324,6 @@ class LyricsTranscriber:
317
324
  with open(self.llm_prompt_correction, "r") as file:
318
325
  system_prompt_template = file.read()
319
326
 
320
- reference_lyrics = self.outputs["genius_lyrics_text"] or self.outputs["spotify_lyrics_text"]
321
327
  system_prompt = system_prompt_template.replace("{{reference_lyrics}}", reference_lyrics)
322
328
 
323
329
  # TODO: Test if results are cleaner when using the vocal file from a background vocal audio separation model
@@ -639,7 +645,9 @@ class LyricsTranscriber:
639
645
  for i, word in enumerate(segment["words"]):
640
646
  start_time = self.format_time_lrc(word["start"])
641
647
  if i != len(segment["words"]) - 1:
642
- word["text"] += " "
648
+ if not word["text"].endswith(" "):
649
+ self.logger.debug(f"word '{word['text']}' does not end with a space, adding one")
650
+ word["text"] += " "
643
651
  line = "[{}]1:{}{}\n".format(start_time, "/" if i == 0 else "", word["text"])
644
652
  f.write(line)
645
653
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lyrics-transcriber"
3
- version = "0.17.0"
3
+ version = "0.17.2"
4
4
  description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
5
5
  authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
6
6
  license = "MIT"