lyrics-transcriber 0.16.3__tar.gz → 0.17.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/PKG-INFO +2 -1
- lyrics_transcriber-0.17.0/lyrics_transcriber/audioshake_transcriber.py +93 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/transcriber.py +41 -25
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/pyproject.toml +3 -2
- lyrics_transcriber-0.16.3/lyrics_transcriber/audioshake_transcriber.py +0 -35
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/LICENSE +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/README.md +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/__init__.py +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/README.md +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/__init__.py +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/ass.py +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/cli.py +0 -0
- {lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/subtitles.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.17.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
|
6
6
|
License: MIT
|
@@ -26,6 +26,7 @@ Requires-Dist: openai (>=1,<2)
|
|
26
26
|
Requires-Dist: openai-whisper (>=20231117)
|
27
27
|
Requires-Dist: python-slugify (>=8)
|
28
28
|
Requires-Dist: syrics (>=0)
|
29
|
+
Requires-Dist: tenacity (>=8)
|
29
30
|
Requires-Dist: torch (>=1)
|
30
31
|
Requires-Dist: tqdm (>=4)
|
31
32
|
Requires-Dist: transformers (>=4)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
import requests
|
2
|
+
import time
|
3
|
+
import os
|
4
|
+
import json
|
5
|
+
|
6
|
+
|
7
|
+
class AudioShakeTranscriber:
|
8
|
+
def __init__(self, api_token, logger):
|
9
|
+
self.api_token = api_token
|
10
|
+
self.base_url = "https://groovy.audioshake.ai"
|
11
|
+
self.logger = logger
|
12
|
+
|
13
|
+
def transcribe(self, audio_filepath):
|
14
|
+
self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
|
15
|
+
|
16
|
+
# Step 1: Upload the audio file
|
17
|
+
asset_id = self._upload_file(audio_filepath)
|
18
|
+
self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
|
19
|
+
|
20
|
+
# Step 2: Create a job for transcription and alignment
|
21
|
+
job_id = self._create_job(asset_id)
|
22
|
+
self.logger.debug(f"Job created successfully. Job ID: {job_id}")
|
23
|
+
|
24
|
+
# Step 3: Wait for the job to complete and get the results
|
25
|
+
result = self._get_job_result(job_id)
|
26
|
+
self.logger.debug(f"Job completed. Processing results...")
|
27
|
+
|
28
|
+
# Step 4: Process the result and return in the required format
|
29
|
+
return self._process_result(result)
|
30
|
+
|
31
|
+
def _upload_file(self, filepath):
|
32
|
+
self.logger.debug(f"Uploading {filepath} to AudioShake")
|
33
|
+
url = f"{self.base_url}/upload"
|
34
|
+
headers = {"Authorization": f"Bearer {self.api_token}"}
|
35
|
+
with open(filepath, "rb") as file:
|
36
|
+
files = {"file": (os.path.basename(filepath), file)}
|
37
|
+
response = requests.post(url, headers=headers, files=files)
|
38
|
+
|
39
|
+
self.logger.debug(f"Upload response status code: {response.status_code}")
|
40
|
+
self.logger.debug(f"Upload response content: {response.text}")
|
41
|
+
|
42
|
+
response.raise_for_status()
|
43
|
+
return response.json()["id"]
|
44
|
+
|
45
|
+
def _create_job(self, asset_id):
|
46
|
+
self.logger.debug(f"Creating job for asset {asset_id}")
|
47
|
+
url = f"{self.base_url}/job/"
|
48
|
+
headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
|
49
|
+
data = {
|
50
|
+
"metadata": {"format": "json", "name": "alignment", "language": "en"},
|
51
|
+
"callbackUrl": "https://example.com/webhook/alignment",
|
52
|
+
"assetId": asset_id,
|
53
|
+
}
|
54
|
+
response = requests.post(url, headers=headers, json=data)
|
55
|
+
response.raise_for_status()
|
56
|
+
return response.json()["job"]["id"]
|
57
|
+
|
58
|
+
def _get_job_result(self, job_id):
|
59
|
+
self.logger.debug(f"Getting job result for job {job_id}")
|
60
|
+
url = f"{self.base_url}/job/{job_id}"
|
61
|
+
headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
|
62
|
+
while True:
|
63
|
+
response = requests.get(url, headers=headers)
|
64
|
+
response.raise_for_status()
|
65
|
+
job_data = response.json()["job"]
|
66
|
+
if job_data["status"] == "completed":
|
67
|
+
return job_data
|
68
|
+
elif job_data["status"] == "failed":
|
69
|
+
raise Exception("Job failed")
|
70
|
+
time.sleep(5) # Wait 5 seconds before checking again
|
71
|
+
|
72
|
+
def _process_result(self, job_data):
|
73
|
+
self.logger.debug(f"Processing result for job {job_data}")
|
74
|
+
output_asset = next((asset for asset in job_data["outputAssets"] if asset["name"] == "transcription.json"), None)
|
75
|
+
|
76
|
+
if not output_asset:
|
77
|
+
raise Exception("Transcription output not found in job results")
|
78
|
+
|
79
|
+
transcription_url = output_asset["link"]
|
80
|
+
response = requests.get(transcription_url)
|
81
|
+
response.raise_for_status()
|
82
|
+
transcription_data = response.json()
|
83
|
+
|
84
|
+
transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
|
85
|
+
|
86
|
+
# Ensure each segment has the required fields
|
87
|
+
for segment in transcription_data["segments"]:
|
88
|
+
if "words" not in segment:
|
89
|
+
segment["words"] = []
|
90
|
+
if "text" not in segment:
|
91
|
+
segment["text"] = " ".join(word["text"] for word in segment["words"])
|
92
|
+
|
93
|
+
return transcription_data
|
@@ -14,6 +14,8 @@ from datetime import timedelta
|
|
14
14
|
from .utils import subtitles
|
15
15
|
from typing import List, Optional
|
16
16
|
from openai import OpenAI
|
17
|
+
from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type
|
18
|
+
import requests
|
17
19
|
|
18
20
|
|
19
21
|
class LyricsTranscriber:
|
@@ -536,6 +538,16 @@ class LyricsTranscriber:
|
|
536
538
|
self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n"
|
537
539
|
f.write(line["words"].strip() + "\n")
|
538
540
|
|
541
|
+
@retry(
|
542
|
+
stop=stop_after_delay(120), # Stop after 2 minutes
|
543
|
+
wait=wait_exponential(multiplier=1, min=4, max=60), # Exponential backoff starting at 4 seconds
|
544
|
+
retry=retry_if_exception_type(requests.exceptions.RequestException), # Retry on request exceptions
|
545
|
+
reraise=True, # Reraise the last exception if all retries fail
|
546
|
+
)
|
547
|
+
def fetch_genius_lyrics(self, genius, title, artist):
|
548
|
+
self.logger.debug(f"fetch_genius_lyrics attempting to fetch lyrics from Genius for {title} by {artist}")
|
549
|
+
return genius.search_song(title, artist)
|
550
|
+
|
539
551
|
def write_genius_lyrics_file(self):
|
540
552
|
if self.genius_api_token and self.song_known:
|
541
553
|
self.logger.debug(f"attempting genius fetch as genius_api_token and song name was set")
|
@@ -556,18 +568,22 @@ class LyricsTranscriber:
|
|
556
568
|
self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
|
557
569
|
genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG))
|
558
570
|
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
571
|
+
try:
|
572
|
+
song = self.fetch_genius_lyrics(genius, self.title, self.artist)
|
573
|
+
if song is None:
|
574
|
+
self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
|
575
|
+
return
|
576
|
+
lyrics = self.clean_genius_lyrics(song.lyrics)
|
564
577
|
|
565
|
-
|
566
|
-
|
567
|
-
|
578
|
+
self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
|
579
|
+
with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
|
580
|
+
f.write(lyrics)
|
568
581
|
|
569
|
-
|
570
|
-
|
582
|
+
self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
|
583
|
+
self.outputs["genius_lyrics_text"] = lyrics
|
584
|
+
except requests.exceptions.RequestException as e:
|
585
|
+
self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
|
586
|
+
raise
|
571
587
|
|
572
588
|
def clean_genius_lyrics(self, lyrics):
|
573
589
|
lyrics = lyrics.replace("\\n", "\n")
|
@@ -928,10 +944,10 @@ class LyricsTranscriber:
|
|
928
944
|
transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
|
929
945
|
self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
|
930
946
|
|
931
|
-
|
932
|
-
if os.path.isfile(
|
933
|
-
self.logger.debug(f"transcribe found existing file at
|
934
|
-
with open(
|
947
|
+
transcription_cache_filepath = self.outputs["transcription_data_filepath"]
|
948
|
+
if os.path.isfile(transcription_cache_filepath):
|
949
|
+
self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
|
950
|
+
with open(transcription_cache_filepath, "r") as cache_file:
|
935
951
|
self.outputs["transcription_data_dict"] = json.load(cache_file)
|
936
952
|
return
|
937
953
|
|
@@ -939,28 +955,28 @@ class LyricsTranscriber:
|
|
939
955
|
self.logger.debug(f"Using AudioShake API for transcription")
|
940
956
|
from .audioshake_transcriber import AudioShakeTranscriber
|
941
957
|
|
942
|
-
audioshake = AudioShakeTranscriber(self.audioshake_api_token,
|
943
|
-
|
958
|
+
audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
|
959
|
+
transcription_data = audioshake.transcribe(self.audio_filepath)
|
944
960
|
else:
|
945
961
|
self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
|
946
962
|
audio = whisper.load_audio(self.audio_filepath)
|
947
963
|
model = whisper.load_model(self.transcription_model, device="cpu")
|
948
|
-
|
964
|
+
transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
|
949
965
|
|
950
966
|
# Remove segments with no words, only music
|
951
|
-
|
952
|
-
self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(
|
967
|
+
transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
|
968
|
+
self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
|
953
969
|
|
954
970
|
# Split long segments
|
955
971
|
self.logger.debug("Starting to split long segments")
|
956
|
-
|
957
|
-
self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(
|
972
|
+
transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
|
973
|
+
self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
|
958
974
|
|
959
|
-
self.logger.debug(f"writing transcription data JSON to cache file: {
|
960
|
-
with open(
|
961
|
-
json.dump(
|
975
|
+
self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
|
976
|
+
with open(transcription_cache_filepath, "w") as cache_file:
|
977
|
+
json.dump(transcription_data, cache_file, indent=4)
|
962
978
|
|
963
|
-
self.outputs["transcription_data_dict"] =
|
979
|
+
self.outputs["transcription_data_dict"] = transcription_data
|
964
980
|
|
965
981
|
def get_cache_filepath(self, extension):
|
966
982
|
filename = os.path.split(self.audio_filepath)[1]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "lyrics-transcriber"
|
3
|
-
version = "0.
|
3
|
+
version = "0.17.0"
|
4
4
|
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
|
5
5
|
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
|
6
6
|
license = "MIT"
|
@@ -30,6 +30,7 @@ openai-whisper = ">=20231117"
|
|
30
30
|
transformers = ">=4"
|
31
31
|
auditok = ">=0.2"
|
32
32
|
whisper-timestamped = ">=1"
|
33
|
+
tenacity = ">=8"
|
33
34
|
# Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
|
34
35
|
# from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.
|
35
36
|
# This was the only way I was able to get a working cross-platform build published to PyPI.
|
@@ -47,4 +48,4 @@ lyrics-transcriber = 'lyrics_transcriber.utils.cli:main'
|
|
47
48
|
|
48
49
|
[build-system]
|
49
50
|
requires = ["poetry-core"]
|
50
|
-
build-backend = "poetry.core.masonry.api"
|
51
|
+
build-backend = "poetry.core.masonry.api"
|
@@ -1,35 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import requests
|
3
|
-
|
4
|
-
|
5
|
-
class AudioShakeTranscriber:
|
6
|
-
def __init__(self, api_token, log_level=logging.DEBUG):
|
7
|
-
self.api_token = api_token
|
8
|
-
self.logger = logging.getLogger(__name__)
|
9
|
-
self.logger.setLevel(log_level)
|
10
|
-
|
11
|
-
def transcribe(self, audio_filepath):
|
12
|
-
# This is a placeholder for the actual AudioShake API implementation
|
13
|
-
self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
|
14
|
-
|
15
|
-
self.logger.debug(f"AudioShake API token: {self.api_token}")
|
16
|
-
# TODO: Implement the actual API call to AudioShake
|
17
|
-
# For now, we'll return a dummy result
|
18
|
-
return {
|
19
|
-
"transcription_data_dict": {
|
20
|
-
"segments": [
|
21
|
-
{
|
22
|
-
"start": 0,
|
23
|
-
"end": 5,
|
24
|
-
"text": "This is a dummy transcription",
|
25
|
-
"words": [
|
26
|
-
{"text": "This", "start": 0, "end": 1},
|
27
|
-
{"text": "is", "start": 1, "end": 2},
|
28
|
-
{"text": "a", "start": 2, "end": 3},
|
29
|
-
{"text": "dummy", "start": 3, "end": 4},
|
30
|
-
{"text": "transcription", "start": 4, "end": 5},
|
31
|
-
],
|
32
|
-
}
|
33
|
-
]
|
34
|
-
}
|
35
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/README.md
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.3 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/subtitles.py
RENAMED
File without changes
|