lyrics-transcriber 0.19.2__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. lyrics_transcriber/__init__.py +2 -5
  2. lyrics_transcriber/cli/main.py +194 -0
  3. lyrics_transcriber/core/__init__.py +0 -0
  4. lyrics_transcriber/core/controller.py +283 -0
  5. lyrics_transcriber/core/corrector.py +56 -0
  6. lyrics_transcriber/core/fetcher.py +143 -0
  7. lyrics_transcriber/output/__init__.py +0 -0
  8. lyrics_transcriber/output/generator.py +210 -0
  9. lyrics_transcriber/storage/__init__.py +0 -0
  10. lyrics_transcriber/storage/dropbox.py +249 -0
  11. lyrics_transcriber/storage/tokens.py +116 -0
  12. lyrics_transcriber/{audioshake_transcriber.py → transcribers/audioshake.py} +44 -15
  13. lyrics_transcriber/transcribers/base.py +31 -0
  14. lyrics_transcriber/transcribers/whisper.py +186 -0
  15. {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.30.0.dist-info}/METADATA +6 -17
  16. lyrics_transcriber-0.30.0.dist-info/RECORD +22 -0
  17. lyrics_transcriber-0.30.0.dist-info/entry_points.txt +3 -0
  18. lyrics_transcriber/llm_prompts/README.md +0 -10
  19. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
  20. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
  21. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
  22. lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
  23. lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
  24. lyrics_transcriber/transcriber.py +0 -1128
  25. lyrics_transcriber/utils/cli.py +0 -179
  26. lyrics_transcriber-0.19.2.dist-info/RECORD +0 -18
  27. lyrics_transcriber-0.19.2.dist-info/entry_points.txt +0 -3
  28. /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
  29. /lyrics_transcriber/{utils → output}/ass.py +0 -0
  30. /lyrics_transcriber/{utils → output}/subtitles.py +0 -0
  31. {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.30.0.dist-info}/LICENSE +0 -0
  32. {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.30.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,31 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any
3
+ import logging
4
+
5
+
6
+ class BaseTranscriber(ABC):
7
+ """Base class for all transcription services."""
8
+
9
+ def __init__(self, logger: logging.Logger = None):
10
+ self.logger = logger or logging.getLogger(__name__)
11
+
12
+ @abstractmethod
13
+ def transcribe(self, audio_filepath: str) -> Dict[str, Any]:
14
+ """
15
+ Transcribe an audio file and return the results in a standardized format.
16
+
17
+ Args:
18
+ audio_filepath (str): Path to the audio file to transcribe
19
+
20
+ Returns:
21
+ Dict containing:
22
+ - segments: List of segments with start/end times and word-level data
23
+ - text: Full text transcription
24
+ - metadata: Dict of additional info (confidence, language, etc)
25
+ """
26
+ pass
27
+
28
+ @abstractmethod
29
+ def get_name(self) -> str:
30
+ """Return the name of this transcription service."""
31
+ pass
@@ -0,0 +1,186 @@
1
+ #! /usr/bin/env python3
2
+ import os
3
+ import sys
4
+ import json
5
+ import requests
6
+ import hashlib
7
+ import tempfile
8
+ from time import sleep
9
+ from pydub import AudioSegment
10
+ from .base import BaseTranscriber
11
+ from ..storage.dropbox import DropboxHandler
12
+
13
+
14
+ class WhisperTranscriber(BaseTranscriber):
15
+ """Transcription service using Whisper API via RunPod."""
16
+
17
+ def __init__(
18
+ self,
19
+ logger=None,
20
+ runpod_api_key=None,
21
+ endpoint_id=None,
22
+ dropbox_app_key=None,
23
+ dropbox_app_secret=None,
24
+ dropbox_refresh_token=None,
25
+ dropbox_access_token=None,
26
+ ):
27
+ super().__init__(logger)
28
+ self.runpod_api_key = runpod_api_key or os.getenv("RUNPOD_API_KEY")
29
+ self.endpoint_id = endpoint_id or os.getenv("WHISPER_RUNPOD_ID")
30
+
31
+ if not self.runpod_api_key or not self.endpoint_id:
32
+ raise ValueError("RunPod API key and endpoint ID must be provided either directly or via environment variables")
33
+
34
+ self.dbx = DropboxHandler(
35
+ app_key=dropbox_app_key or os.getenv("WHISPER_DROPBOX_APP_KEY"),
36
+ app_secret=dropbox_app_secret or os.getenv("WHISPER_DROPBOX_APP_SECRET"),
37
+ refresh_token=dropbox_refresh_token or os.getenv("WHISPER_DROPBOX_REFRESH_TOKEN"),
38
+ access_token=dropbox_access_token or os.getenv("WHISPER_DROPBOX_ACCESS_TOKEN"),
39
+ )
40
+
41
+ def get_name(self) -> str:
42
+ return "Whisper"
43
+
44
+ def transcribe(self, audio_filepath: str) -> dict:
45
+ """
46
+ Transcribe an audio file using Whisper API via RunPod.
47
+
48
+ Args:
49
+ audio_filepath: Path to the audio file to transcribe
50
+
51
+ Returns:
52
+ Dict containing:
53
+ - segments: List of segments with start/end times and word-level data
54
+ - text: Full text transcription
55
+ - metadata: Dict of additional info
56
+ """
57
+ self.logger.info(f"Starting transcription for {audio_filepath} using Whisper API")
58
+
59
+ # Calculate MD5 hash and prepare file
60
+ file_hash = self._get_file_md5(audio_filepath)
61
+ processed_filepath = self._convert_to_flac(audio_filepath)
62
+
63
+ try:
64
+ # Upload to Dropbox and get URL
65
+ dropbox_path = f"/transcription_temp/{file_hash}{os.path.splitext(processed_filepath)[1]}"
66
+ audio_url = self._upload_and_get_link(processed_filepath, dropbox_path)
67
+
68
+ # Get transcription from API
69
+ result = self._run_transcription(audio_url)
70
+
71
+ # Add metadata
72
+ result["metadata"] = {
73
+ "service": self.get_name(),
74
+ "model": "large-v2",
75
+ "language": "en",
76
+ }
77
+
78
+ return result
79
+
80
+ finally:
81
+ # Clean up temporary FLAC file if one was created
82
+ if processed_filepath != audio_filepath:
83
+ self.logger.debug(f"Cleaning up temporary file: {processed_filepath}")
84
+ os.unlink(processed_filepath)
85
+
86
+ def _convert_to_flac(self, filepath: str) -> str:
87
+ """Convert WAV to FLAC if needed for faster upload."""
88
+ if not filepath.lower().endswith(".wav"):
89
+ return filepath
90
+
91
+ self.logger.info("Converting WAV to FLAC for faster upload...")
92
+ audio = AudioSegment.from_wav(filepath)
93
+
94
+ with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
95
+ flac_path = temp_flac.name
96
+ audio.export(flac_path, format="flac")
97
+
98
+ return flac_path
99
+
100
+ def _get_file_md5(self, filepath: str) -> str:
101
+ """Calculate MD5 hash of a file."""
102
+ md5_hash = hashlib.md5()
103
+ with open(filepath, "rb") as f:
104
+ for chunk in iter(lambda: f.read(4096), b""):
105
+ md5_hash.update(chunk)
106
+ return md5_hash.hexdigest()
107
+
108
+ def _upload_and_get_link(self, filepath: str, dropbox_path: str) -> str:
109
+ """Upload file to Dropbox and return shared link."""
110
+ if not self.dbx.file_exists(dropbox_path):
111
+ self.logger.info("Uploading file to Dropbox...")
112
+ with open(filepath, "rb") as f:
113
+ self.dbx.upload_with_retry(f, dropbox_path)
114
+ else:
115
+ self.logger.info("File already exists in Dropbox, skipping upload...")
116
+
117
+ audio_url = self.dbx.create_or_get_shared_link(dropbox_path)
118
+ self.logger.debug(f"Using shared link: {audio_url}")
119
+ return audio_url
120
+
121
+ def _run_transcription(self, audio_url: str) -> dict:
122
+ """Submit transcription job to RunPod and get results."""
123
+ run_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/run"
124
+ status_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/status"
125
+ headers = {"Authorization": f"Bearer {self.runpod_api_key}"}
126
+
127
+ payload = {
128
+ "input": {
129
+ "audio": audio_url,
130
+ "word_timestamps": True,
131
+ "model": "large-v2",
132
+ "temperature": 0.2,
133
+ "best_of": 5,
134
+ "compression_ratio_threshold": 2.8,
135
+ "no_speech_threshold": 1,
136
+ "condition_on_previous_text": True,
137
+ "enable_vad": True,
138
+ }
139
+ }
140
+
141
+ # Submit job
142
+ self.logger.info("Submitting transcription job...")
143
+ response = requests.post(run_url, json=payload, headers=headers)
144
+
145
+ self.logger.debug(f"Response status code: {response.status_code}")
146
+ try:
147
+ self.logger.debug(f"Response content: {json.dumps(response.json(), indent=2)}")
148
+ except:
149
+ self.logger.debug(f"Raw response content: {response.text}")
150
+
151
+ response.raise_for_status()
152
+ job_id = response.json()["id"]
153
+
154
+ # Poll for results
155
+ self.logger.info("Waiting for results...")
156
+ while True:
157
+ status_response = requests.get(f"{status_url}/{job_id}", headers=headers)
158
+ status_response.raise_for_status()
159
+ status_data = status_response.json()
160
+
161
+ if status_data["status"] == "COMPLETED":
162
+ return status_data["output"]
163
+ elif status_data["status"] == "FAILED":
164
+ raise Exception(f"Transcription failed: {status_data.get('error', 'Unknown error')}")
165
+
166
+ sleep(2) # Wait 2 seconds before checking again
167
+
168
+
169
+ if __name__ == "__main__":
170
+ # Example usage
171
+ import logging
172
+
173
+ logging.basicConfig(level=logging.INFO)
174
+
175
+ if len(sys.argv) > 1:
176
+ audio_file = sys.argv[1]
177
+ else:
178
+ audio_file = input("Enter the path to your audio file: ")
179
+
180
+ transcriber = WhisperTranscriber()
181
+ results = transcriber.transcribe(audio_file)
182
+
183
+ output_file = f"transcription_results_{WhisperTranscriber._get_file_md5(audio_file)}.json"
184
+ with open(output_file, "w", encoding="utf-8") as f:
185
+ json.dump(results, f, indent=2)
186
+ print(f"Transcription completed! Results saved to {output_file}")
@@ -1,36 +1,25 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.19.2
3
+ Version: 0.30.0
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
7
7
  Author: Andrew Beveridge
8
8
  Author-email: andrew@beveridge.uk
9
- Requires-Python: >=3.9
9
+ Requires-Python: >=3.9,<3.13
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Programming Language :: Python :: 3.13
17
- Requires-Dist: Cython (>=0)
18
- Requires-Dist: dtw-python (>=1)
19
- Requires-Dist: llvmlite (>=0)
16
+ Requires-Dist: dropbox (>=12)
17
+ Requires-Dist: karaoke-lyrics-processor (>=0.4)
20
18
  Requires-Dist: lyricsgenius (>=3)
21
- Requires-Dist: numba (>=0.57)
22
- Requires-Dist: numpy (>=1)
23
- Requires-Dist: onnx (>=1)
24
- Requires-Dist: onnxruntime (>=1)
25
- Requires-Dist: openai (>=1,<2)
26
- Requires-Dist: openai-whisper (>=20231117)
19
+ Requires-Dist: pydub (>=0.25)
20
+ Requires-Dist: python-dotenv (>=1)
27
21
  Requires-Dist: python-slugify (>=8)
28
22
  Requires-Dist: syrics (>=0)
29
- Requires-Dist: tenacity (>=8)
30
- Requires-Dist: torch (>=1)
31
- Requires-Dist: tqdm (>=4)
32
- Requires-Dist: transformers (>=4)
33
- Requires-Dist: whisper-timestamped (>=1)
34
23
  Project-URL: Documentation, https://github.com/karaokenerds/python-lyrics-transcriber/blob/main/README.md
35
24
  Project-URL: Repository, https://github.com/karaokenerds/python-lyrics-transcriber
36
25
  Description-Content-Type: text/markdown
@@ -0,0 +1,22 @@
1
+ lyrics_transcriber/__init__.py,sha256=Hj2HdSBAl6kmiqa5s3MDo_RobkITadzuF-81-ON3awA,180
2
+ lyrics_transcriber/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ lyrics_transcriber/cli/main.py,sha256=fCg9LxUZKf9ByelZIpF0XhsTVzXadHIXVL7qMhSDZao,7686
4
+ lyrics_transcriber/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ lyrics_transcriber/core/controller.py,sha256=t5zohET8_pnRZj7dtCO0jcXXF24DQc_POTrI11IA3pE,11100
6
+ lyrics_transcriber/core/corrector.py,sha256=_FjelES_9JF2fDP_Rgzg1iYpbQHIKjdG4Za1J5xy3xg,2274
7
+ lyrics_transcriber/core/fetcher.py,sha256=jUr-eoxjjbheFaR3iVdUiodODiS91fyrtJxTZ35zqIs,5801
8
+ lyrics_transcriber/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ lyrics_transcriber/output/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
10
+ lyrics_transcriber/output/generator.py,sha256=DaWgPMc37Q52StfUFNUmKV9tJHUkL59zYZ_gVacguf8,8052
11
+ lyrics_transcriber/output/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
12
+ lyrics_transcriber/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ lyrics_transcriber/storage/dropbox.py,sha256=dYTXuoACY-Ad03OuXLEyST-8L2TFZ0QahP2KulZH-54,11307
14
+ lyrics_transcriber/storage/tokens.py,sha256=t7TdX12VjemklaCq0sgHfSEbLYx9_e15nRc5T5C0Ar8,4378
15
+ lyrics_transcriber/transcribers/audioshake.py,sha256=reI_yven65Vq0Kpjl2QupxWo1yRg57rR4LI-qRMD1mY,6154
16
+ lyrics_transcriber/transcribers/base.py,sha256=DzZRrxbWaKUzNtOyD58ggrZrcmJvXAbowOLuH6Lclto,981
17
+ lyrics_transcriber/transcribers/whisper.py,sha256=rtvmG9T0MO4_8et7uw1XwyGc2k81mwGdGk4ghqdEvI0,6852
18
+ lyrics_transcriber-0.30.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
19
+ lyrics_transcriber-0.30.0.dist-info/METADATA,sha256=qRLSYfuIJDG1E77YBB6-QfYH6gP10knQzbdRROVyBog,5485
20
+ lyrics_transcriber-0.30.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
21
+ lyrics_transcriber-0.30.0.dist-info/entry_points.txt,sha256=_pPAHBMByKbWN-6RCscyJUYXTd3iVI1m-zzV2Sp9HV0,71
22
+ lyrics_transcriber-0.30.0.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ lyrics-transcriber=lyrics_transcriber.cli.main:main
3
+
@@ -1,10 +0,0 @@
1
- To get started, set your OPENAI_API_KEY environment variable.
2
-
3
- Next, edit promptfooconfig.yaml.
4
-
5
- Then run:
6
- ```
7
- promptfoo eval
8
- ```
9
-
10
- Afterwards, you can view the results by running `promptfoo view`
@@ -1,55 +0,0 @@
1
- You are a song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
2
- Your task is to take two lyrics data inputs with two different qualities, and use the data in one to correct the other, producing accurate lyrics which align with roughly correct timestamps in the song.
3
-
4
- Your response needs to be in JSON format and will be sent to an API endpoint. Only output the JSON, nothing else, as the response will be converted to a Python dictionary.
5
-
6
- You will be provided with reference lyrics for the song, as plain text, from an online source.
7
- These should be reasonably accurate, with generally correct words and phrases.
8
- However, they may not be perfect, and sometimes whole sections (such as a chorus or outro) may be missing or assumed to be repeated.
9
-
10
- Data input will contain one segment of an automated machine transcription of lyrics from a song, with start/end timestamps and confidence scores for every word in that segment.
11
- The timestamps for words are usually quite accurate, but the actual words which were heard by the transcription are typically only around 70% to 90% accurate.
12
- As such, it is common for there to be segments where most of the words are correct but one or two are wrong, or a single word may have been mistaken as two different words.
13
-
14
- When possible, you will also be provided with the previous 2 (corrected) lines of text, and the next 1 (un-corrected) segment text, for additional context.
15
-
16
- Carefully analyse the segment in the data input, and compare with the lyrics in the reference data, attempting to find part of the lyrics which is most likely to correspond with this segment.
17
- If all of the words match up correctly with words in the published lyrics, keep the entire segment from the transcription (do NOT add any additional words).
18
- If most of the words match up but one or two words are different (e.g. similar sounding words), correct those words.
19
- If there are symbols in the published lyrics, add those symbols to the closest word in the segment (NOT as a separate word). For example, parentheses are commonly used around backing vocals.
20
- If you need to delete a word or two in order to correct the lyrics, that's acceptable.
21
-
22
- Important: segments might not start and end at the same point as a "line" in the published lyrics, as the decision about where to split up a line into two is highly subjective.
23
- For example, in some published lyrics a line might be split in two (with a newline) before the word "and", but in another lyrics text that might only be one line.
24
- You will likely encounter situations where the words in the segment match part of the words in a published lyrics line, but not the whole line.
25
-
26
- Important: adding more words to the transcribed segment is usually not correct and should be the last resort!
27
- Remember, the goal is to correct mistakes (e.g. single words which were mis-heard) in the transcription rather than complete incomplete lines.
28
- Pay close attention to the "Context: Next (un-corrected) transcript segment" text, if this includes some of the words do NOT add those words to the current segment as this will cause duplication!
29
-
30
- The response JSON object needs to contain all of the following fields:
31
-
32
- - id: The id of the segment, from the data input
33
- - text: The full text of the corrected lyrics for this segment
34
- - words: this is a list
35
- - text: The correct word
36
- - start: The start timestamp for this word, estimated if not known for sure.
37
- - end: The end timestamp for this word, estimated if not known for sure.
38
- - confidence: Your self-assessed confidence score (from 0 to 1) of how likely it is that this word is accurate. If the word has not changed from the data input, keep the existing confidence value.
39
-
40
- Reference lyrics:
41
-
42
- {{reference_lyrics}}
43
-
44
- Previous two corrected lines:
45
-
46
- {{previous_two_corrected_lines}}
47
-
48
- Upcoming two uncorrected lines:
49
-
50
- {{upcoming_two_uncorrected_lines}}
51
-
52
- Data input:
53
-
54
- {{segment_input}}
55
-
@@ -1,36 +0,0 @@
1
- You are a song lyric corrector for a karaoke video studio, specializing in correcting lyrics for synchronization with music videos. Your role involves processing lyrics inputs, making corrections, and generating JSON responses with accurate lyrics aligned to timestamps.
2
-
3
- Task:
4
- - Receive lyrics data inputs of varying quality.
5
- - Use one data set to correct the other, ensuring lyrics are accurate and aligned with approximate song timestamps.
6
- - Generate responses in JSON format, to be converted to Python dictionaries for an API endpoint.
7
-
8
- Data Inputs:
9
- - Reference Lyrics: Published song lyrics from various online sources, generally accurate but not flawless. Be aware of potentially missing or incorrect sections (e.g., choruses, outros).
10
- - Transcription Segment: Automated machine transcription of a song segment, with timestamps and word confidence scores. Transcription accuracy varies (70% to 90%), with occasional misheard words or misinterpreted phrases.
11
-
12
- Additional Context:
13
- - When available, you'll receive the previous 2 corrected lines and the next 1 uncorrected segment for context.
14
-
15
- Correction Guidelines:
16
- - Take a deep breath and carefully analyze the transcription segment against the reference lyrics to find corresponding parts.
17
- - Maintain the transcription segment if it completely matches the reference lyrics.
18
- - Correct misheard or similar-sounding words.
19
- - Incorporate symbols (like parentheses) into the nearest word, not as separate entries.
20
- - Removing a word or two for accuracy is permissible.
21
-
22
- Segment Considerations:
23
- - Transcription segments may not align perfectly with published lyric lines due to subjective line splitting.
24
- - Be cautious of adding words to the transcription; prioritize correction over completion.
25
- - Avoid duplicating words already present in the "Next (un-corrected) transcript segment".
26
-
27
- JSON Response Structure:
28
- - id: Segment ID from input data.
29
- - text: Corrected lyrics for the segment.
30
- - words: List of words with the following details for each:
31
- - text: Correct word.
32
- - start: Estimated start timestamp.
33
- - end: Estimated end timestamp.
34
- - confidence: Confidence score (0-1) on word accuracy. Retain existing score if unchanged.
35
-
36
- Focus on precision and context sensitivity to ensure the corrections are relevant and accurate. Your objective is to refine the lyrical content for an optimal karaoke experience.
@@ -1,19 +0,0 @@
1
- You are a song lyric matcher for a karaoke video studio, responsible for reading lyrics inputs and identifying if they match, according to predefined criteria.
2
-
3
- Your task is to take two lyrics data inputs, and determine if they are from the same song or not.
4
- Your response must be either "Yes" or "No", with no other text, as your response will be processed by some Python code.
5
-
6
- Data input 1 will be lyrics generated from a song using automated machine transcription.
7
- Generally the transcription is at least 50% accurate, but some of the words heard by the transcription will likely be homonyms or mistakes.
8
-
9
- Data input 2 will be published lyrics for a song, fetched from an online source.
10
- If they are for the same song, these should be at least 90% accurate, with generally correct words and phrases.
11
- Even when they are for the same song, they may not be perfect. Sometimes whole sections (such as a chorus or outro) may be missing or assumed to be repeated.
12
-
13
- There is a chance the lyrics in data input 2 may be for a totally different song, as the automated process fetching lyrics from online sources sometimes gets an erroneous match.
14
- In this scenario, there may be one or two words which still match up by coincidence but generally you would expect less than 10% of the lyrics to match up.
15
- This "totally different song" scenario is what you need to detect, and return "No".
16
-
17
- Carefully analyse the two lyrics inputs provided, and make a reasonable guess as to whether they are for the same song or not.
18
- If the lyrics look like they are from the same song (but perhaps with some minor differences), you should return "Yes".
19
- If the lyrics look totally different, or you are not sure if the lyrics are both from the same song, you should return "No"
@@ -1,61 +0,0 @@
1
- # This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
2
- # Learn more: https://promptfoo.dev/docs/configuration/guide
3
-
4
- description: Song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
5
- providers:
6
- - id: openai:gpt-3.5-turbo-1106
7
- config:
8
- temperature: 0
9
- # - id: openai:gpt-4-1106-preview
10
- # config:
11
- # temperature: 0
12
- prompts:
13
- - file://llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt
14
-
15
- defaultTest:
16
- assert:
17
- - type: is-json
18
- value:
19
- required: [id, text, words]
20
- type: object
21
- properties:
22
- id:
23
- type: number
24
- text:
25
- type: string
26
- words:
27
- type: array
28
- items:
29
- type: object
30
- properties:
31
- text:
32
- type: string
33
- start:
34
- type: number
35
- end:
36
- type: number
37
- confidence:
38
- type: number
39
-
40
- tests:
41
- - description: ABBA - Under Attack (segment 0)
42
- vars:
43
- reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
44
- previous_two_corrected_lines:
45
- upcoming_two_uncorrected_lines:
46
- segment_input: |
47
- {"id": 0, "start": 17.46, "end": 21.3, "confidence": 0.792, "text": " Don't know how to take it, don't know where to go", "words": [{"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982}]}
48
- assert:
49
- - type: contains
50
- value: "Don't know how to take it, don't know where to go"
51
-
52
- - description: ABBA - Under Attack (segment 1)
53
- vars:
54
- reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
55
- previous_two_corrected_lines:
56
- upcoming_two_uncorrected_lines:
57
- segment_input: |
58
- {"id": 1, "start": 22.04, "end": 27.84, "confidence": 0.763, "text": " My resistance running low And every day the hole is getting tighter", "words": [{"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999}, {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, {"text": "hole", "start": 26.1, "end": 26.48, "confidence": 0.361}, {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, {"text": "tighter", "start": 27.08, "end": 27.84, "confidence": 0.975}]}
59
- assert:
60
- - type: contains
61
- value: "My resistance running low And every day the hold is getting tighter"
@@ -1,48 +0,0 @@
1
- Don't know how to take it, don't know where to go
2
- My resistance running low
3
- And every day the hold is getting tighter and it troubles me so
4
- (You know that I'm nobody's fool)
5
- I'm nobody's fool and yet it's clear to me
6
- I don't have a strategy
7
- It's just like taking candy from a baby and I think I must be
8
-
9
- Under attack, I'm being taken
10
- About to crack, defences breaking
11
- Won't somebody please have a heart
12
- Come and rescue me now 'cause I'm falling apart
13
- Under attack, I'm taking cover
14
- He's on my track, my chasing lover
15
- Thinking nothing can stop him now
16
- Should I want to, I'm not sure I would know how
17
-
18
- This is getting crazy, I should tell him so
19
- Really let my anger show
20
- Persuade him that the answer to his questions is a definite no
21
- (I'm kind of flattered I suppose)
22
- Guess I'm kind of flattered but I'm scared as well
23
- Something like a magic spell
24
- I hardly dare to think of what would happen, where I'd be if I fell
25
-
26
- Under attack, I'm being taken
27
- About to crack, defences breaking
28
- Won't somebody please have a heart
29
- Come and rescue me now 'cause I'm falling apart
30
- Under attack, I'm taking cover
31
- He's on my track, my chasing lover
32
- Thinking nothing's gonna stop him now
33
- Should I want to, I'm not sure I won't know how
34
-
35
- Under attack, I'm being taken
36
- About to crack, defences breaking
37
- Won't somebody see and save a heart
38
- Come and rescue me now 'cause I'm falling apart
39
- Under attack, I'm taking cover
40
- He's on my track, my chasing lover
41
- Thinking nothing can stop him now
42
- Should I want to, I'm not sure I would know how
43
-
44
- Under attack, I'm being taken
45
- About to crack, defences breaking
46
- Won't somebody please have a heart
47
- Come and rescue me now 'cause I'm falling apart
48
- Under attack, I'm taking cover