lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -5
- lyrics_transcriber/cli/main.py +194 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/controller.py +283 -0
- lyrics_transcriber/{corrector.py → core/corrector.py} +0 -1
- lyrics_transcriber/core/fetcher.py +143 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/generator.py +210 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +249 -0
- lyrics_transcriber/storage/tokens.py +116 -0
- lyrics_transcriber/{audioshake_transcriber.py → transcribers/audioshake.py} +44 -15
- lyrics_transcriber/transcribers/base.py +31 -0
- lyrics_transcriber/transcribers/whisper.py +186 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.0.dist-info}/METADATA +5 -16
- lyrics_transcriber-0.30.0.dist-info/RECORD +22 -0
- lyrics_transcriber-0.30.0.dist-info/entry_points.txt +3 -0
- lyrics_transcriber/llm_prompts/README.md +0 -10
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
- lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
- lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
- lyrics_transcriber/transcriber.py +0 -934
- lyrics_transcriber/utils/cli.py +0 -179
- lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
- lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
- /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
- /lyrics_transcriber/{utils → output}/ass.py +0 -0
- /lyrics_transcriber/{utils → output}/subtitles.py +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Any
|
3
|
+
import logging
|
4
|
+
|
5
|
+
|
6
|
+
class BaseTranscriber(ABC):
|
7
|
+
"""Base class for all transcription services."""
|
8
|
+
|
9
|
+
def __init__(self, logger: logging.Logger = None):
|
10
|
+
self.logger = logger or logging.getLogger(__name__)
|
11
|
+
|
12
|
+
@abstractmethod
|
13
|
+
def transcribe(self, audio_filepath: str) -> Dict[str, Any]:
|
14
|
+
"""
|
15
|
+
Transcribe an audio file and return the results in a standardized format.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
audio_filepath (str): Path to the audio file to transcribe
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
Dict containing:
|
22
|
+
- segments: List of segments with start/end times and word-level data
|
23
|
+
- text: Full text transcription
|
24
|
+
- metadata: Dict of additional info (confidence, language, etc)
|
25
|
+
"""
|
26
|
+
pass
|
27
|
+
|
28
|
+
@abstractmethod
|
29
|
+
def get_name(self) -> str:
|
30
|
+
"""Return the name of this transcription service."""
|
31
|
+
pass
|
@@ -0,0 +1,186 @@
|
|
1
|
+
#! /usr/bin/env python3
|
2
|
+
import os
|
3
|
+
import sys
|
4
|
+
import json
|
5
|
+
import requests
|
6
|
+
import hashlib
|
7
|
+
import tempfile
|
8
|
+
from time import sleep
|
9
|
+
from pydub import AudioSegment
|
10
|
+
from .base import BaseTranscriber
|
11
|
+
from ..storage.dropbox import DropboxHandler
|
12
|
+
|
13
|
+
|
14
|
+
class WhisperTranscriber(BaseTranscriber):
|
15
|
+
"""Transcription service using Whisper API via RunPod."""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
logger=None,
|
20
|
+
runpod_api_key=None,
|
21
|
+
endpoint_id=None,
|
22
|
+
dropbox_app_key=None,
|
23
|
+
dropbox_app_secret=None,
|
24
|
+
dropbox_refresh_token=None,
|
25
|
+
dropbox_access_token=None,
|
26
|
+
):
|
27
|
+
super().__init__(logger)
|
28
|
+
self.runpod_api_key = runpod_api_key or os.getenv("RUNPOD_API_KEY")
|
29
|
+
self.endpoint_id = endpoint_id or os.getenv("WHISPER_RUNPOD_ID")
|
30
|
+
|
31
|
+
if not self.runpod_api_key or not self.endpoint_id:
|
32
|
+
raise ValueError("RunPod API key and endpoint ID must be provided either directly or via environment variables")
|
33
|
+
|
34
|
+
self.dbx = DropboxHandler(
|
35
|
+
app_key=dropbox_app_key or os.getenv("WHISPER_DROPBOX_APP_KEY"),
|
36
|
+
app_secret=dropbox_app_secret or os.getenv("WHISPER_DROPBOX_APP_SECRET"),
|
37
|
+
refresh_token=dropbox_refresh_token or os.getenv("WHISPER_DROPBOX_REFRESH_TOKEN"),
|
38
|
+
access_token=dropbox_access_token or os.getenv("WHISPER_DROPBOX_ACCESS_TOKEN"),
|
39
|
+
)
|
40
|
+
|
41
|
+
def get_name(self) -> str:
|
42
|
+
return "Whisper"
|
43
|
+
|
44
|
+
def transcribe(self, audio_filepath: str) -> dict:
|
45
|
+
"""
|
46
|
+
Transcribe an audio file using Whisper API via RunPod.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
audio_filepath: Path to the audio file to transcribe
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
Dict containing:
|
53
|
+
- segments: List of segments with start/end times and word-level data
|
54
|
+
- text: Full text transcription
|
55
|
+
- metadata: Dict of additional info
|
56
|
+
"""
|
57
|
+
self.logger.info(f"Starting transcription for {audio_filepath} using Whisper API")
|
58
|
+
|
59
|
+
# Calculate MD5 hash and prepare file
|
60
|
+
file_hash = self._get_file_md5(audio_filepath)
|
61
|
+
processed_filepath = self._convert_to_flac(audio_filepath)
|
62
|
+
|
63
|
+
try:
|
64
|
+
# Upload to Dropbox and get URL
|
65
|
+
dropbox_path = f"/transcription_temp/{file_hash}{os.path.splitext(processed_filepath)[1]}"
|
66
|
+
audio_url = self._upload_and_get_link(processed_filepath, dropbox_path)
|
67
|
+
|
68
|
+
# Get transcription from API
|
69
|
+
result = self._run_transcription(audio_url)
|
70
|
+
|
71
|
+
# Add metadata
|
72
|
+
result["metadata"] = {
|
73
|
+
"service": self.get_name(),
|
74
|
+
"model": "large-v2",
|
75
|
+
"language": "en",
|
76
|
+
}
|
77
|
+
|
78
|
+
return result
|
79
|
+
|
80
|
+
finally:
|
81
|
+
# Clean up temporary FLAC file if one was created
|
82
|
+
if processed_filepath != audio_filepath:
|
83
|
+
self.logger.debug(f"Cleaning up temporary file: {processed_filepath}")
|
84
|
+
os.unlink(processed_filepath)
|
85
|
+
|
86
|
+
def _convert_to_flac(self, filepath: str) -> str:
|
87
|
+
"""Convert WAV to FLAC if needed for faster upload."""
|
88
|
+
if not filepath.lower().endswith(".wav"):
|
89
|
+
return filepath
|
90
|
+
|
91
|
+
self.logger.info("Converting WAV to FLAC for faster upload...")
|
92
|
+
audio = AudioSegment.from_wav(filepath)
|
93
|
+
|
94
|
+
with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
|
95
|
+
flac_path = temp_flac.name
|
96
|
+
audio.export(flac_path, format="flac")
|
97
|
+
|
98
|
+
return flac_path
|
99
|
+
|
100
|
+
def _get_file_md5(self, filepath: str) -> str:
|
101
|
+
"""Calculate MD5 hash of a file."""
|
102
|
+
md5_hash = hashlib.md5()
|
103
|
+
with open(filepath, "rb") as f:
|
104
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
105
|
+
md5_hash.update(chunk)
|
106
|
+
return md5_hash.hexdigest()
|
107
|
+
|
108
|
+
def _upload_and_get_link(self, filepath: str, dropbox_path: str) -> str:
|
109
|
+
"""Upload file to Dropbox and return shared link."""
|
110
|
+
if not self.dbx.file_exists(dropbox_path):
|
111
|
+
self.logger.info("Uploading file to Dropbox...")
|
112
|
+
with open(filepath, "rb") as f:
|
113
|
+
self.dbx.upload_with_retry(f, dropbox_path)
|
114
|
+
else:
|
115
|
+
self.logger.info("File already exists in Dropbox, skipping upload...")
|
116
|
+
|
117
|
+
audio_url = self.dbx.create_or_get_shared_link(dropbox_path)
|
118
|
+
self.logger.debug(f"Using shared link: {audio_url}")
|
119
|
+
return audio_url
|
120
|
+
|
121
|
+
def _run_transcription(self, audio_url: str) -> dict:
|
122
|
+
"""Submit transcription job to RunPod and get results."""
|
123
|
+
run_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/run"
|
124
|
+
status_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/status"
|
125
|
+
headers = {"Authorization": f"Bearer {self.runpod_api_key}"}
|
126
|
+
|
127
|
+
payload = {
|
128
|
+
"input": {
|
129
|
+
"audio": audio_url,
|
130
|
+
"word_timestamps": True,
|
131
|
+
"model": "large-v2",
|
132
|
+
"temperature": 0.2,
|
133
|
+
"best_of": 5,
|
134
|
+
"compression_ratio_threshold": 2.8,
|
135
|
+
"no_speech_threshold": 1,
|
136
|
+
"condition_on_previous_text": True,
|
137
|
+
"enable_vad": True,
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
# Submit job
|
142
|
+
self.logger.info("Submitting transcription job...")
|
143
|
+
response = requests.post(run_url, json=payload, headers=headers)
|
144
|
+
|
145
|
+
self.logger.debug(f"Response status code: {response.status_code}")
|
146
|
+
try:
|
147
|
+
self.logger.debug(f"Response content: {json.dumps(response.json(), indent=2)}")
|
148
|
+
except:
|
149
|
+
self.logger.debug(f"Raw response content: {response.text}")
|
150
|
+
|
151
|
+
response.raise_for_status()
|
152
|
+
job_id = response.json()["id"]
|
153
|
+
|
154
|
+
# Poll for results
|
155
|
+
self.logger.info("Waiting for results...")
|
156
|
+
while True:
|
157
|
+
status_response = requests.get(f"{status_url}/{job_id}", headers=headers)
|
158
|
+
status_response.raise_for_status()
|
159
|
+
status_data = status_response.json()
|
160
|
+
|
161
|
+
if status_data["status"] == "COMPLETED":
|
162
|
+
return status_data["output"]
|
163
|
+
elif status_data["status"] == "FAILED":
|
164
|
+
raise Exception(f"Transcription failed: {status_data.get('error', 'Unknown error')}")
|
165
|
+
|
166
|
+
sleep(2) # Wait 2 seconds before checking again
|
167
|
+
|
168
|
+
|
169
|
+
if __name__ == "__main__":
|
170
|
+
# Example usage
|
171
|
+
import logging
|
172
|
+
|
173
|
+
logging.basicConfig(level=logging.INFO)
|
174
|
+
|
175
|
+
if len(sys.argv) > 1:
|
176
|
+
audio_file = sys.argv[1]
|
177
|
+
else:
|
178
|
+
audio_file = input("Enter the path to your audio file: ")
|
179
|
+
|
180
|
+
transcriber = WhisperTranscriber()
|
181
|
+
results = transcriber.transcribe(audio_file)
|
182
|
+
|
183
|
+
output_file = f"transcription_results_{WhisperTranscriber._get_file_md5(audio_file)}.json"
|
184
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
185
|
+
json.dump(results, f, indent=2)
|
186
|
+
print(f"Transcription completed! Results saved to {output_file}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.30.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
|
6
6
|
License: MIT
|
@@ -13,24 +13,13 @@ Classifier: Programming Language :: Python :: 3.9
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
16
|
-
Requires-Dist:
|
17
|
-
Requires-Dist:
|
18
|
-
Requires-Dist: karaoke-lyrics-processor (>=0.4.1)
|
19
|
-
Requires-Dist: llvmlite (>=0)
|
16
|
+
Requires-Dist: dropbox (>=12)
|
17
|
+
Requires-Dist: karaoke-lyrics-processor (>=0.4)
|
20
18
|
Requires-Dist: lyricsgenius (>=3)
|
21
|
-
Requires-Dist:
|
22
|
-
Requires-Dist:
|
23
|
-
Requires-Dist: onnx (>=1)
|
24
|
-
Requires-Dist: onnxruntime (>=1)
|
25
|
-
Requires-Dist: openai (>=1,<2)
|
26
|
-
Requires-Dist: openai-whisper (>=20231117)
|
19
|
+
Requires-Dist: pydub (>=0.25)
|
20
|
+
Requires-Dist: python-dotenv (>=1)
|
27
21
|
Requires-Dist: python-slugify (>=8)
|
28
22
|
Requires-Dist: syrics (>=0)
|
29
|
-
Requires-Dist: tenacity (>=8)
|
30
|
-
Requires-Dist: torch (>=1)
|
31
|
-
Requires-Dist: tqdm (>=4)
|
32
|
-
Requires-Dist: transformers (>=4)
|
33
|
-
Requires-Dist: whisper-timestamped (>=1)
|
34
23
|
Project-URL: Documentation, https://github.com/karaokenerds/python-lyrics-transcriber/blob/main/README.md
|
35
24
|
Project-URL: Repository, https://github.com/karaokenerds/python-lyrics-transcriber
|
36
25
|
Description-Content-Type: text/markdown
|
@@ -0,0 +1,22 @@
|
|
1
|
+
lyrics_transcriber/__init__.py,sha256=Hj2HdSBAl6kmiqa5s3MDo_RobkITadzuF-81-ON3awA,180
|
2
|
+
lyrics_transcriber/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
lyrics_transcriber/cli/main.py,sha256=fCg9LxUZKf9ByelZIpF0XhsTVzXadHIXVL7qMhSDZao,7686
|
4
|
+
lyrics_transcriber/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
lyrics_transcriber/core/controller.py,sha256=t5zohET8_pnRZj7dtCO0jcXXF24DQc_POTrI11IA3pE,11100
|
6
|
+
lyrics_transcriber/core/corrector.py,sha256=_FjelES_9JF2fDP_Rgzg1iYpbQHIKjdG4Za1J5xy3xg,2274
|
7
|
+
lyrics_transcriber/core/fetcher.py,sha256=jUr-eoxjjbheFaR3iVdUiodODiS91fyrtJxTZ35zqIs,5801
|
8
|
+
lyrics_transcriber/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
lyrics_transcriber/output/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
|
10
|
+
lyrics_transcriber/output/generator.py,sha256=DaWgPMc37Q52StfUFNUmKV9tJHUkL59zYZ_gVacguf8,8052
|
11
|
+
lyrics_transcriber/output/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
|
12
|
+
lyrics_transcriber/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
lyrics_transcriber/storage/dropbox.py,sha256=dYTXuoACY-Ad03OuXLEyST-8L2TFZ0QahP2KulZH-54,11307
|
14
|
+
lyrics_transcriber/storage/tokens.py,sha256=t7TdX12VjemklaCq0sgHfSEbLYx9_e15nRc5T5C0Ar8,4378
|
15
|
+
lyrics_transcriber/transcribers/audioshake.py,sha256=reI_yven65Vq0Kpjl2QupxWo1yRg57rR4LI-qRMD1mY,6154
|
16
|
+
lyrics_transcriber/transcribers/base.py,sha256=DzZRrxbWaKUzNtOyD58ggrZrcmJvXAbowOLuH6Lclto,981
|
17
|
+
lyrics_transcriber/transcribers/whisper.py,sha256=rtvmG9T0MO4_8et7uw1XwyGc2k81mwGdGk4ghqdEvI0,6852
|
18
|
+
lyrics_transcriber-0.30.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
|
19
|
+
lyrics_transcriber-0.30.0.dist-info/METADATA,sha256=qRLSYfuIJDG1E77YBB6-QfYH6gP10knQzbdRROVyBog,5485
|
20
|
+
lyrics_transcriber-0.30.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
21
|
+
lyrics_transcriber-0.30.0.dist-info/entry_points.txt,sha256=_pPAHBMByKbWN-6RCscyJUYXTd3iVI1m-zzV2Sp9HV0,71
|
22
|
+
lyrics_transcriber-0.30.0.dist-info/RECORD,,
|
@@ -1,55 +0,0 @@
|
|
1
|
-
You are a song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
|
2
|
-
Your task is to take two lyrics data inputs with two different qualities, and use the data in one to correct the other, producing accurate lyrics which align with roughly correct timestamps in the song.
|
3
|
-
|
4
|
-
Your response needs to be in JSON format and will be sent to an API endpoint. Only output the JSON, nothing else, as the response will be converted to a Python dictionary.
|
5
|
-
|
6
|
-
You will be provided with reference lyrics for the song, as plain text, from an online source.
|
7
|
-
These should be reasonably accurate, with generally correct words and phrases.
|
8
|
-
However, they may not be perfect, and sometimes whole sections (such as a chorus or outro) may be missing or assumed to be repeated.
|
9
|
-
|
10
|
-
Data input will contain one segment of an automated machine transcription of lyrics from a song, with start/end timestamps and confidence scores for every word in that segment.
|
11
|
-
The timestamps for words are usually quite accurate, but the actual words which were heard by the transcription are typically only around 70% to 90% accurate.
|
12
|
-
As such, it is common for there to be segments where most of the words are correct but one or two are wrong, or a single word may have been mistaken as two different words.
|
13
|
-
|
14
|
-
When possible, you will also be provided with the previous 2 (corrected) lines of text, and the next 1 (un-corrected) segment text, for additional context.
|
15
|
-
|
16
|
-
Carefully analyse the segment in the data input, and compare with the lyrics in the reference data, attempting to find part of the lyrics which is most likely to correspond with this segment.
|
17
|
-
If all of the words match up correctly with words in the published lyrics, keep the entire segment from the transcription (do NOT add any additional words).
|
18
|
-
If most of the words match up but one or two words are different (e.g. similar sounding words), correct those words.
|
19
|
-
If there are symbols in the published lyrics, add those symbols to the closest word in the segment (NOT as a separate word). For example, parentheses are commonly used around backing vocals.
|
20
|
-
If you need to delete a word or two in order to correct the lyrics, that's acceptable.
|
21
|
-
|
22
|
-
Important: segments might not start and end at the same point as a "line" in the published lyrics, as the decision about where to split up a line into two is highly subjective.
|
23
|
-
For example, in some published lyrics a line might be split in two (with a newline) before the word "and", but in another lyrics text that might only be one line.
|
24
|
-
You will likely encounter situations where the words in the segment match part of the words in a published lyrics line, but not the whole line.
|
25
|
-
|
26
|
-
Important: adding more words to the transcribed segment is usually not correct and should be the last resort!
|
27
|
-
Remember, the goal is to correct mistakes (e.g. single words which were mis-heard) in the transcription rather than complete incomplete lines.
|
28
|
-
Pay close attention to the "Context: Next (un-corrected) transcript segment" text, if this includes some of the words do NOT add those words to the current segment as this will cause duplication!
|
29
|
-
|
30
|
-
The response JSON object needs to contain all of the following fields:
|
31
|
-
|
32
|
-
- id: The id of the segment, from the data input
|
33
|
-
- text: The full text of the corrected lyrics for this segment
|
34
|
-
- words: this is a list
|
35
|
-
- text: The correct word
|
36
|
-
- start: The start timestamp for this word, estimated if not known for sure.
|
37
|
-
- end: The end timestamp for this word, estimated if not known for sure.
|
38
|
-
- confidence: Your self-assessed confidence score (from 0 to 1) of how likely it is that this word is accurate. If the word has not changed from the data input, keep the existing confidence value.
|
39
|
-
|
40
|
-
Reference lyrics:
|
41
|
-
|
42
|
-
{{reference_lyrics}}
|
43
|
-
|
44
|
-
Previous two corrected lines:
|
45
|
-
|
46
|
-
{{previous_two_corrected_lines}}
|
47
|
-
|
48
|
-
Upcoming two uncorrected lines:
|
49
|
-
|
50
|
-
{{upcoming_two_uncorrected_lines}}
|
51
|
-
|
52
|
-
Data input:
|
53
|
-
|
54
|
-
{{segment_input}}
|
55
|
-
|
@@ -1,36 +0,0 @@
|
|
1
|
-
You are a song lyric corrector for a karaoke video studio, specializing in correcting lyrics for synchronization with music videos. Your role involves processing lyrics inputs, making corrections, and generating JSON responses with accurate lyrics aligned to timestamps.
|
2
|
-
|
3
|
-
Task:
|
4
|
-
- Receive lyrics data inputs of varying quality.
|
5
|
-
- Use one data set to correct the other, ensuring lyrics are accurate and aligned with approximate song timestamps.
|
6
|
-
- Generate responses in JSON format, to be converted to Python dictionaries for an API endpoint.
|
7
|
-
|
8
|
-
Data Inputs:
|
9
|
-
- Reference Lyrics: Published song lyrics from various online sources, generally accurate but not flawless. Be aware of potentially missing or incorrect sections (e.g., choruses, outros).
|
10
|
-
- Transcription Segment: Automated machine transcription of a song segment, with timestamps and word confidence scores. Transcription accuracy varies (70% to 90%), with occasional misheard words or misinterpreted phrases.
|
11
|
-
|
12
|
-
Additional Context:
|
13
|
-
- When available, you'll receive the previous 2 corrected lines and the next 1 uncorrected segment for context.
|
14
|
-
|
15
|
-
Correction Guidelines:
|
16
|
-
- Take a deep breath and carefully analyze the transcription segment against the reference lyrics to find corresponding parts.
|
17
|
-
- Maintain the transcription segment if it completely matches the reference lyrics.
|
18
|
-
- Correct misheard or similar-sounding words.
|
19
|
-
- Incorporate symbols (like parentheses) into the nearest word, not as separate entries.
|
20
|
-
- Removing a word or two for accuracy is permissible.
|
21
|
-
|
22
|
-
Segment Considerations:
|
23
|
-
- Transcription segments may not align perfectly with published lyric lines due to subjective line splitting.
|
24
|
-
- Be cautious of adding words to the transcription; prioritize correction over completion.
|
25
|
-
- Avoid duplicating words already present in the "Next (un-corrected) transcript segment".
|
26
|
-
|
27
|
-
JSON Response Structure:
|
28
|
-
- id: Segment ID from input data.
|
29
|
-
- text: Corrected lyrics for the segment.
|
30
|
-
- words: List of words with the following details for each:
|
31
|
-
- text: Correct word.
|
32
|
-
- start: Estimated start timestamp.
|
33
|
-
- end: Estimated end timestamp.
|
34
|
-
- confidence: Confidence score (0-1) on word accuracy. Retain existing score if unchanged.
|
35
|
-
|
36
|
-
Focus on precision and context sensitivity to ensure the corrections are relevant and accurate. Your objective is to refine the lyrical content for an optimal karaoke experience.
|
@@ -1,19 +0,0 @@
|
|
1
|
-
You are a song lyric matcher for a karaoke video studio, responsible for reading lyrics inputs and identifying if they match, according to predefined criteria.
|
2
|
-
|
3
|
-
Your task is to take two lyrics data inputs, and determine if they are from the same song or not.
|
4
|
-
Your response must be either "Yes" or "No", with no other text, as your response will be processed by some Python code.
|
5
|
-
|
6
|
-
Data input 1 will be lyrics generated from a song using automated machine transcription.
|
7
|
-
Generally the transcription is at least 50% accurate, but some of the words heard by the transcription will likely be homonyms or mistakes.
|
8
|
-
|
9
|
-
Data input 2 will be published lyrics for a song, fetched from an online source.
|
10
|
-
If they are for the same song, these should be at least 90% accurate, with generally correct words and phrases.
|
11
|
-
Even when they are for the same song, they may not be perfect. Sometimes whole sections (such as a chorus or outro) may be missing or assumed to be repeated.
|
12
|
-
|
13
|
-
There is a chance the lyrics in data input 2 may be for a totally different song, as the automated process fetching lyrics from online sources sometimes gets an erroneous match.
|
14
|
-
In this scenario, there may be one or two words which still match up by coincidence but generally you would expect less than 10% of the lyrics to match up.
|
15
|
-
This "totally different song" scenario is what you need to detect, and return "No".
|
16
|
-
|
17
|
-
Carefully analyse the two lyrics inputs provided, and make a reasonable guess as to whether they are for the same song or not.
|
18
|
-
If the lyrics look like they are from the same song (but perhaps with some minor differences), you should return "Yes".
|
19
|
-
If the lyrics look totally different, or you are not sure if the lyrics are both from the same song, you should return "No"
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
|
2
|
-
# Learn more: https://promptfoo.dev/docs/configuration/guide
|
3
|
-
|
4
|
-
description: Song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
|
5
|
-
providers:
|
6
|
-
- id: openai:gpt-3.5-turbo-1106
|
7
|
-
config:
|
8
|
-
temperature: 0
|
9
|
-
# - id: openai:gpt-4-1106-preview
|
10
|
-
# config:
|
11
|
-
# temperature: 0
|
12
|
-
prompts:
|
13
|
-
- file://llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt
|
14
|
-
|
15
|
-
defaultTest:
|
16
|
-
assert:
|
17
|
-
- type: is-json
|
18
|
-
value:
|
19
|
-
required: [id, text, words]
|
20
|
-
type: object
|
21
|
-
properties:
|
22
|
-
id:
|
23
|
-
type: number
|
24
|
-
text:
|
25
|
-
type: string
|
26
|
-
words:
|
27
|
-
type: array
|
28
|
-
items:
|
29
|
-
type: object
|
30
|
-
properties:
|
31
|
-
text:
|
32
|
-
type: string
|
33
|
-
start:
|
34
|
-
type: number
|
35
|
-
end:
|
36
|
-
type: number
|
37
|
-
confidence:
|
38
|
-
type: number
|
39
|
-
|
40
|
-
tests:
|
41
|
-
- description: ABBA - Under Attack (segment 0)
|
42
|
-
vars:
|
43
|
-
reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
|
44
|
-
previous_two_corrected_lines:
|
45
|
-
upcoming_two_uncorrected_lines:
|
46
|
-
segment_input: |
|
47
|
-
{"id": 0, "start": 17.46, "end": 21.3, "confidence": 0.792, "text": " Don't know how to take it, don't know where to go", "words": [{"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982}]}
|
48
|
-
assert:
|
49
|
-
- type: contains
|
50
|
-
value: "Don't know how to take it, don't know where to go"
|
51
|
-
|
52
|
-
- description: ABBA - Under Attack (segment 1)
|
53
|
-
vars:
|
54
|
-
reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
|
55
|
-
previous_two_corrected_lines:
|
56
|
-
upcoming_two_uncorrected_lines:
|
57
|
-
segment_input: |
|
58
|
-
{"id": 1, "start": 22.04, "end": 27.84, "confidence": 0.763, "text": " My resistance running low And every day the hole is getting tighter", "words": [{"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999}, {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, {"text": "hole", "start": 26.1, "end": 26.48, "confidence": 0.361}, {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, {"text": "tighter", "start": 27.08, "end": 27.84, "confidence": 0.975}]}
|
59
|
-
assert:
|
60
|
-
- type: contains
|
61
|
-
value: "My resistance running low And every day the hold is getting tighter"
|
@@ -1,48 +0,0 @@
|
|
1
|
-
Don't know how to take it, don't know where to go
|
2
|
-
My resistance running low
|
3
|
-
And every day the hold is getting tighter and it troubles me so
|
4
|
-
(You know that I'm nobody's fool)
|
5
|
-
I'm nobody's fool and yet it's clear to me
|
6
|
-
I don't have a strategy
|
7
|
-
It's just like taking candy from a baby and I think I must be
|
8
|
-
|
9
|
-
Under attack, I'm being taken
|
10
|
-
About to crack, defences breaking
|
11
|
-
Won't somebody please have a heart
|
12
|
-
Come and rescue me now 'cause I'm falling apart
|
13
|
-
Under attack, I'm taking cover
|
14
|
-
He's on my track, my chasing lover
|
15
|
-
Thinking nothing can stop him now
|
16
|
-
Should I want to, I'm not sure I would know how
|
17
|
-
|
18
|
-
This is getting crazy, I should tell him so
|
19
|
-
Really let my anger show
|
20
|
-
Persuade him that the answer to his questions is a definite no
|
21
|
-
(I'm kind of flattered I suppose)
|
22
|
-
Guess I'm kind of flattered but I'm scared as well
|
23
|
-
Something like a magic spell
|
24
|
-
I hardly dare to think of what would happen, where I'd be if I fell
|
25
|
-
|
26
|
-
Under attack, I'm being taken
|
27
|
-
About to crack, defences breaking
|
28
|
-
Won't somebody please have a heart
|
29
|
-
Come and rescue me now 'cause I'm falling apart
|
30
|
-
Under attack, I'm taking cover
|
31
|
-
He's on my track, my chasing lover
|
32
|
-
Thinking nothing's gonna stop him now
|
33
|
-
Should I want to, I'm not sure I won't know how
|
34
|
-
|
35
|
-
Under attack, I'm being taken
|
36
|
-
About to crack, defences breaking
|
37
|
-
Won't somebody see and save a heart
|
38
|
-
Come and rescue me now 'cause I'm falling apart
|
39
|
-
Under attack, I'm taking cover
|
40
|
-
He's on my track, my chasing lover
|
41
|
-
Thinking nothing can stop him now
|
42
|
-
Should I want to, I'm not sure I would know how
|
43
|
-
|
44
|
-
Under attack, I'm being taken
|
45
|
-
About to crack, defences breaking
|
46
|
-
Won't somebody please have a heart
|
47
|
-
Come and rescue me now 'cause I'm falling apart
|
48
|
-
Under attack, I'm taking cover
|