lyrics-transcriber 0.16.4__tar.gz → 0.17.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/PKG-INFO +1 -1
- lyrics_transcriber-0.17.0/lyrics_transcriber/audioshake_transcriber.py +93 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/transcriber.py +15 -15
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/pyproject.toml +1 -1
- lyrics_transcriber-0.16.4/lyrics_transcriber/audioshake_transcriber.py +0 -35
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/LICENSE +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/README.md +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/__init__.py +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/README.md +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/__init__.py +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/ass.py +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/cli.py +0 -0
- {lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/subtitles.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.17.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
|
6
6
|
License: MIT
|
@@ -0,0 +1,93 @@
|
|
1
|
+
import requests
|
2
|
+
import time
|
3
|
+
import os
|
4
|
+
import json
|
5
|
+
|
6
|
+
|
7
|
+
class AudioShakeTranscriber:
|
8
|
+
def __init__(self, api_token, logger):
|
9
|
+
self.api_token = api_token
|
10
|
+
self.base_url = "https://groovy.audioshake.ai"
|
11
|
+
self.logger = logger
|
12
|
+
|
13
|
+
def transcribe(self, audio_filepath):
|
14
|
+
self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
|
15
|
+
|
16
|
+
# Step 1: Upload the audio file
|
17
|
+
asset_id = self._upload_file(audio_filepath)
|
18
|
+
self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
|
19
|
+
|
20
|
+
# Step 2: Create a job for transcription and alignment
|
21
|
+
job_id = self._create_job(asset_id)
|
22
|
+
self.logger.debug(f"Job created successfully. Job ID: {job_id}")
|
23
|
+
|
24
|
+
# Step 3: Wait for the job to complete and get the results
|
25
|
+
result = self._get_job_result(job_id)
|
26
|
+
self.logger.debug(f"Job completed. Processing results...")
|
27
|
+
|
28
|
+
# Step 4: Process the result and return in the required format
|
29
|
+
return self._process_result(result)
|
30
|
+
|
31
|
+
def _upload_file(self, filepath):
|
32
|
+
self.logger.debug(f"Uploading {filepath} to AudioShake")
|
33
|
+
url = f"{self.base_url}/upload"
|
34
|
+
headers = {"Authorization": f"Bearer {self.api_token}"}
|
35
|
+
with open(filepath, "rb") as file:
|
36
|
+
files = {"file": (os.path.basename(filepath), file)}
|
37
|
+
response = requests.post(url, headers=headers, files=files)
|
38
|
+
|
39
|
+
self.logger.debug(f"Upload response status code: {response.status_code}")
|
40
|
+
self.logger.debug(f"Upload response content: {response.text}")
|
41
|
+
|
42
|
+
response.raise_for_status()
|
43
|
+
return response.json()["id"]
|
44
|
+
|
45
|
+
def _create_job(self, asset_id):
|
46
|
+
self.logger.debug(f"Creating job for asset {asset_id}")
|
47
|
+
url = f"{self.base_url}/job/"
|
48
|
+
headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
|
49
|
+
data = {
|
50
|
+
"metadata": {"format": "json", "name": "alignment", "language": "en"},
|
51
|
+
"callbackUrl": "https://example.com/webhook/alignment",
|
52
|
+
"assetId": asset_id,
|
53
|
+
}
|
54
|
+
response = requests.post(url, headers=headers, json=data)
|
55
|
+
response.raise_for_status()
|
56
|
+
return response.json()["job"]["id"]
|
57
|
+
|
58
|
+
def _get_job_result(self, job_id):
|
59
|
+
self.logger.debug(f"Getting job result for job {job_id}")
|
60
|
+
url = f"{self.base_url}/job/{job_id}"
|
61
|
+
headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
|
62
|
+
while True:
|
63
|
+
response = requests.get(url, headers=headers)
|
64
|
+
response.raise_for_status()
|
65
|
+
job_data = response.json()["job"]
|
66
|
+
if job_data["status"] == "completed":
|
67
|
+
return job_data
|
68
|
+
elif job_data["status"] == "failed":
|
69
|
+
raise Exception("Job failed")
|
70
|
+
time.sleep(5) # Wait 5 seconds before checking again
|
71
|
+
|
72
|
+
def _process_result(self, job_data):
|
73
|
+
self.logger.debug(f"Processing result for job {job_data}")
|
74
|
+
output_asset = next((asset for asset in job_data["outputAssets"] if asset["name"] == "transcription.json"), None)
|
75
|
+
|
76
|
+
if not output_asset:
|
77
|
+
raise Exception("Transcription output not found in job results")
|
78
|
+
|
79
|
+
transcription_url = output_asset["link"]
|
80
|
+
response = requests.get(transcription_url)
|
81
|
+
response.raise_for_status()
|
82
|
+
transcription_data = response.json()
|
83
|
+
|
84
|
+
transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
|
85
|
+
|
86
|
+
# Ensure each segment has the required fields
|
87
|
+
for segment in transcription_data["segments"]:
|
88
|
+
if "words" not in segment:
|
89
|
+
segment["words"] = []
|
90
|
+
if "text" not in segment:
|
91
|
+
segment["text"] = " ".join(word["text"] for word in segment["words"])
|
92
|
+
|
93
|
+
return transcription_data
|
@@ -944,10 +944,10 @@ class LyricsTranscriber:
|
|
944
944
|
transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
|
945
945
|
self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
|
946
946
|
|
947
|
-
|
948
|
-
if os.path.isfile(
|
949
|
-
self.logger.debug(f"transcribe found existing file at
|
950
|
-
with open(
|
947
|
+
transcription_cache_filepath = self.outputs["transcription_data_filepath"]
|
948
|
+
if os.path.isfile(transcription_cache_filepath):
|
949
|
+
self.logger.debug(f"transcribe found existing file at transcription_cache_filepath, reading: {transcription_cache_filepath}")
|
950
|
+
with open(transcription_cache_filepath, "r") as cache_file:
|
951
951
|
self.outputs["transcription_data_dict"] = json.load(cache_file)
|
952
952
|
return
|
953
953
|
|
@@ -955,28 +955,28 @@ class LyricsTranscriber:
|
|
955
955
|
self.logger.debug(f"Using AudioShake API for transcription")
|
956
956
|
from .audioshake_transcriber import AudioShakeTranscriber
|
957
957
|
|
958
|
-
audioshake = AudioShakeTranscriber(self.audioshake_api_token,
|
959
|
-
|
958
|
+
audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
|
959
|
+
transcription_data = audioshake.transcribe(self.audio_filepath)
|
960
960
|
else:
|
961
961
|
self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
|
962
962
|
audio = whisper.load_audio(self.audio_filepath)
|
963
963
|
model = whisper.load_model(self.transcription_model, device="cpu")
|
964
|
-
|
964
|
+
transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
|
965
965
|
|
966
966
|
# Remove segments with no words, only music
|
967
|
-
|
968
|
-
self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(
|
967
|
+
transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]
|
968
|
+
self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(transcription_data['segments'])}")
|
969
969
|
|
970
970
|
# Split long segments
|
971
971
|
self.logger.debug("Starting to split long segments")
|
972
|
-
|
973
|
-
self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(
|
972
|
+
transcription_data["segments"] = self.split_long_segments(transcription_data["segments"], max_length=36)
|
973
|
+
self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(transcription_data['segments'])}")
|
974
974
|
|
975
|
-
self.logger.debug(f"writing transcription data JSON to cache file: {
|
976
|
-
with open(
|
977
|
-
json.dump(
|
975
|
+
self.logger.debug(f"writing transcription data JSON to cache file: {transcription_cache_filepath}")
|
976
|
+
with open(transcription_cache_filepath, "w") as cache_file:
|
977
|
+
json.dump(transcription_data, cache_file, indent=4)
|
978
978
|
|
979
|
-
self.outputs["transcription_data_dict"] =
|
979
|
+
self.outputs["transcription_data_dict"] = transcription_data
|
980
980
|
|
981
981
|
def get_cache_filepath(self, extension):
|
982
982
|
filename = os.path.split(self.audio_filepath)[1]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "lyrics-transcriber"
|
3
|
-
version = "0.
|
3
|
+
version = "0.17.0"
|
4
4
|
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
|
5
5
|
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
|
6
6
|
license = "MIT"
|
@@ -1,35 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import requests
|
3
|
-
|
4
|
-
|
5
|
-
class AudioShakeTranscriber:
|
6
|
-
def __init__(self, api_token, log_level=logging.DEBUG):
|
7
|
-
self.api_token = api_token
|
8
|
-
self.logger = logging.getLogger(__name__)
|
9
|
-
self.logger.setLevel(log_level)
|
10
|
-
|
11
|
-
def transcribe(self, audio_filepath):
|
12
|
-
# This is a placeholder for the actual AudioShake API implementation
|
13
|
-
self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
|
14
|
-
|
15
|
-
self.logger.debug(f"AudioShake API token: {self.api_token}")
|
16
|
-
# TODO: Implement the actual API call to AudioShake
|
17
|
-
# For now, we'll return a dummy result
|
18
|
-
return {
|
19
|
-
"transcription_data_dict": {
|
20
|
-
"segments": [
|
21
|
-
{
|
22
|
-
"start": 0,
|
23
|
-
"end": 5,
|
24
|
-
"text": "This is a dummy transcription",
|
25
|
-
"words": [
|
26
|
-
{"text": "This", "start": 0, "end": 1},
|
27
|
-
{"text": "is", "start": 1, "end": 2},
|
28
|
-
{"text": "a", "start": 2, "end": 3},
|
29
|
-
{"text": "dummy", "start": 3, "end": 4},
|
30
|
-
{"text": "transcription", "start": 4, "end": 5},
|
31
|
-
],
|
32
|
-
}
|
33
|
-
]
|
34
|
-
}
|
35
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/llm_prompts/README.md
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.16.4 → lyrics_transcriber-0.17.0}/lyrics_transcriber/utils/subtitles.py
RENAMED
File without changes
|