lyrics-transcriber 0.19.2__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/corrector.py +57 -0
- lyrics_transcriber/transcriber.py +79 -273
- {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/METADATA +3 -3
- {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/RECORD +7 -6
- {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/WHEEL +0 -0
- {lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from openai import OpenAI
|
4
|
+
from typing import Dict, Optional
|
5
|
+
|
6
|
+
|
7
|
+
class LyricsTranscriptionCorrector:
|
8
|
+
def __init__(
|
9
|
+
self,
|
10
|
+
logger: Optional[logging.Logger] = None,
|
11
|
+
):
|
12
|
+
self.logger = logger or logging.getLogger(__name__)
|
13
|
+
|
14
|
+
# Initialize instance variables for input data
|
15
|
+
self.spotify_lyrics_data_dict = None
|
16
|
+
self.spotify_lyrics_text = None
|
17
|
+
self.genius_lyrics_text = None
|
18
|
+
self.transcription_data_dict_whisper = None
|
19
|
+
self.transcription_data_dict_audioshake = None
|
20
|
+
|
21
|
+
def set_input_data(
|
22
|
+
self,
|
23
|
+
spotify_lyrics_data_dict: Optional[Dict] = None,
|
24
|
+
spotify_lyrics_text: Optional[str] = None,
|
25
|
+
genius_lyrics_text: Optional[str] = None,
|
26
|
+
transcription_data_dict_whisper: Optional[Dict] = None,
|
27
|
+
transcription_data_dict_audioshake: Optional[Dict] = None,
|
28
|
+
) -> None:
|
29
|
+
"""Store the input data as instance variables"""
|
30
|
+
self.spotify_lyrics_data_dict = spotify_lyrics_data_dict
|
31
|
+
self.spotify_lyrics_text = spotify_lyrics_text
|
32
|
+
self.genius_lyrics_text = genius_lyrics_text
|
33
|
+
self.transcription_data_dict_whisper = transcription_data_dict_whisper
|
34
|
+
self.transcription_data_dict_audioshake = transcription_data_dict_audioshake
|
35
|
+
|
36
|
+
def run_corrector(self) -> Dict:
|
37
|
+
"""
|
38
|
+
Test implementation that replaces every third word with 'YOLO' in the AudioShake transcription.
|
39
|
+
"""
|
40
|
+
self.logger.info("Running corrector (test implementation - replacing every 3rd word with YOLO)")
|
41
|
+
|
42
|
+
# Create a deep copy to avoid modifying the original
|
43
|
+
modified_data = json.loads(json.dumps(self.transcription_data_dict_audioshake))
|
44
|
+
|
45
|
+
# Process each segment
|
46
|
+
for segment in modified_data["segments"]:
|
47
|
+
# Replace every third word in the words list
|
48
|
+
for i in range(2, len(segment["words"]), 3):
|
49
|
+
segment["words"][i]["text"] = "YOLO"
|
50
|
+
|
51
|
+
# Reconstruct the segment text from the modified words
|
52
|
+
segment["text"] = " ".join(word["text"] for word in segment["words"])
|
53
|
+
|
54
|
+
# Reconstruct the full text from all segments
|
55
|
+
modified_data["text"] = "".join(segment["text"] for segment in modified_data["segments"])
|
56
|
+
|
57
|
+
return modified_data
|
@@ -13,9 +13,10 @@ import syrics.api
|
|
13
13
|
from datetime import timedelta
|
14
14
|
from .utils import subtitles
|
15
15
|
from typing import List, Optional
|
16
|
-
from openai import OpenAI
|
17
16
|
from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type
|
18
17
|
import requests
|
18
|
+
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
19
|
+
from .corrector import LyricsTranscriptionCorrector
|
19
20
|
|
20
21
|
|
21
22
|
class LyricsTranscriber:
|
@@ -24,18 +25,15 @@ class LyricsTranscriber:
|
|
24
25
|
audio_filepath,
|
25
26
|
artist=None,
|
26
27
|
title=None,
|
27
|
-
openai_api_key=None,
|
28
28
|
audioshake_api_token=None,
|
29
29
|
genius_api_token=None,
|
30
30
|
spotify_cookie=None,
|
31
|
+
skip_transcription=False,
|
31
32
|
output_dir=None,
|
32
33
|
cache_dir="/tmp/lyrics-transcriber-cache/",
|
33
34
|
log_level=logging.DEBUG,
|
34
35
|
log_formatter=None,
|
35
36
|
transcription_model="medium",
|
36
|
-
llm_model="gpt-4o",
|
37
|
-
llm_prompt_matching=None,
|
38
|
-
llm_prompt_correction=None,
|
39
37
|
render_video=False,
|
40
38
|
video_resolution="360p",
|
41
39
|
video_background_image=None,
|
@@ -63,47 +61,11 @@ class LyricsTranscriber:
|
|
63
61
|
self.title = title
|
64
62
|
self.song_known = self.artist is not None and self.title is not None
|
65
63
|
|
66
|
-
self.
|
64
|
+
self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
|
67
65
|
self.genius_api_token = os.getenv("GENIUS_API_TOKEN", default=genius_api_token)
|
68
66
|
self.spotify_cookie = os.getenv("SPOTIFY_COOKIE_SP_DC", default=spotify_cookie)
|
69
|
-
self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
|
70
67
|
|
71
68
|
self.transcription_model = transcription_model
|
72
|
-
self.llm_model = llm_model
|
73
|
-
|
74
|
-
# Use package-relative paths for prompt files
|
75
|
-
if llm_prompt_matching is None:
|
76
|
-
llm_prompt_matching = os.path.join(
|
77
|
-
os.path.dirname(__file__), "llm_prompts", "llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt"
|
78
|
-
)
|
79
|
-
if llm_prompt_correction is None:
|
80
|
-
llm_prompt_correction = os.path.join(
|
81
|
-
os.path.dirname(__file__), "llm_prompts", "llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt"
|
82
|
-
)
|
83
|
-
|
84
|
-
self.llm_prompt_matching = llm_prompt_matching
|
85
|
-
self.llm_prompt_correction = llm_prompt_correction
|
86
|
-
|
87
|
-
if not os.path.exists(self.llm_prompt_matching):
|
88
|
-
raise FileNotFoundError(f"LLM prompt file not found: {self.llm_prompt_matching}")
|
89
|
-
if not os.path.exists(self.llm_prompt_correction):
|
90
|
-
raise FileNotFoundError(f"LLM prompt file not found: {self.llm_prompt_correction}")
|
91
|
-
|
92
|
-
self.openai_client = None
|
93
|
-
|
94
|
-
if self.openai_api_key:
|
95
|
-
self.openai_client = OpenAI(api_key=self.openai_api_key)
|
96
|
-
|
97
|
-
# Uncomment for local models e.g. with ollama
|
98
|
-
# self.openai_client = OpenAI(
|
99
|
-
# base_url="http://localhost:11434/v1",
|
100
|
-
# api_key="ollama",
|
101
|
-
# )
|
102
|
-
|
103
|
-
self.openai_client.log = self.log_level
|
104
|
-
else:
|
105
|
-
self.logger.warning("No OpenAI API key found, no correction will be applied to transcription")
|
106
|
-
|
107
69
|
self.render_video = render_video
|
108
70
|
self.video_resolution = video_resolution
|
109
71
|
self.video_background_image = video_background_image
|
@@ -150,14 +112,12 @@ class LyricsTranscriber:
|
|
150
112
|
"transcribed_lyrics_text_primary": None,
|
151
113
|
"transcribed_lyrics_text_primary_filepath": None,
|
152
114
|
"genius_lyrics_text": None,
|
153
|
-
"
|
115
|
+
"genius_lyrics_text_filepath": None,
|
116
|
+
"genius_lyrics_processed_filepath": None,
|
154
117
|
"spotify_lyrics_data_dict": None,
|
155
118
|
"spotify_lyrics_data_filepath": None,
|
156
119
|
"spotify_lyrics_text_filepath": None,
|
157
|
-
"
|
158
|
-
"llm_costs_usd": {"input": 0.0, "output": 0.0, "total": 0.0},
|
159
|
-
"llm_transcript": None,
|
160
|
-
"llm_transcript_filepath": None,
|
120
|
+
"spotify_lyrics_processed_filepath": None,
|
161
121
|
"corrected_lyrics_text": None,
|
162
122
|
"corrected_lyrics_text_filepath": None,
|
163
123
|
"midico_lrc_filepath": None,
|
@@ -176,43 +136,47 @@ class LyricsTranscriber:
|
|
176
136
|
|
177
137
|
self.output_prefix = f"{artist} - {title}"
|
178
138
|
|
139
|
+
self.skip_transcription = skip_transcription
|
140
|
+
|
179
141
|
def generate(self):
|
180
142
|
self.logger.debug(f"Starting generate() with cache_dir: {self.cache_dir} and output_dir: {self.output_dir}")
|
181
143
|
|
182
144
|
self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
|
183
145
|
|
184
|
-
self.transcribe()
|
185
|
-
|
186
|
-
self.write_transcribed_lyrics_plain_text()
|
187
|
-
|
188
|
-
self.write_genius_lyrics_file()
|
189
146
|
self.write_spotify_lyrics_data_file()
|
190
147
|
self.write_spotify_lyrics_plain_text()
|
148
|
+
if self.outputs["spotify_lyrics_text_filepath"]:
|
149
|
+
self.outputs["spotify_lyrics_processed_filepath"] = os.path.join(
|
150
|
+
self.cache_dir, self.get_output_filename(" (Lyrics Spotify Processed).txt")
|
151
|
+
)
|
152
|
+
self.write_processed_lyrics(self.outputs["spotify_lyrics_text_filepath"], self.outputs["spotify_lyrics_processed_filepath"])
|
153
|
+
|
154
|
+
self.write_genius_lyrics_file()
|
155
|
+
if self.outputs["genius_lyrics_text_filepath"]:
|
156
|
+
self.outputs["genius_lyrics_processed_filepath"] = os.path.join(
|
157
|
+
self.cache_dir, self.get_output_filename(" (Lyrics Genius Processed).txt")
|
158
|
+
)
|
159
|
+
self.write_processed_lyrics(self.outputs["genius_lyrics_text_filepath"], self.outputs["genius_lyrics_processed_filepath"])
|
191
160
|
|
192
|
-
self.
|
161
|
+
if not self.skip_transcription:
|
162
|
+
self.transcribe()
|
163
|
+
self.validate_lyrics_match_song()
|
193
164
|
|
194
|
-
|
195
|
-
self.write_corrected_lyrics_data_file()
|
196
|
-
self.write_corrected_lyrics_plain_text()
|
197
|
-
else:
|
198
|
-
self.logger.warning("Skipping LLM correction as no OpenAI client is available")
|
199
|
-
self.outputs["corrected_lyrics_data_dict"] = self.outputs["transcription_data_dict_primary"]
|
200
|
-
self.write_corrected_lyrics_plain_text()
|
165
|
+
self.correct_lyrics_transcription()
|
201
166
|
|
202
|
-
|
167
|
+
self.calculate_singing_percentage()
|
203
168
|
|
204
|
-
|
205
|
-
|
169
|
+
self.write_midico_lrc_file()
|
170
|
+
self.write_ass_file()
|
206
171
|
|
207
|
-
|
208
|
-
|
209
|
-
|
172
|
+
if self.render_video:
|
173
|
+
self.outputs["karaoke_video_filepath"] = self.get_cache_filepath(".mp4")
|
174
|
+
self.create_video()
|
175
|
+
else:
|
176
|
+
self.outputs["corrected_lyrics_text_filepath"] = self.outputs["genius_lyrics_text_filepath"]
|
177
|
+
self.outputs["corrected_lyrics_text"] = self.outputs["genius_lyrics_text"]
|
210
178
|
|
211
179
|
self.copy_files_to_output_dir()
|
212
|
-
self.calculate_llm_costs()
|
213
|
-
|
214
|
-
if self.openai_client:
|
215
|
-
self.openai_client.close()
|
216
180
|
|
217
181
|
return self.outputs
|
218
182
|
|
@@ -236,9 +200,6 @@ class LyricsTranscriber:
|
|
236
200
|
def validate_lyrics_match_song(self):
|
237
201
|
at_least_one_online_lyrics_validated = False
|
238
202
|
|
239
|
-
with open(self.llm_prompt_matching, "r") as file:
|
240
|
-
llm_matching_instructions = file.read()
|
241
|
-
|
242
203
|
for online_lyrics_source in ["genius", "spotify"]:
|
243
204
|
self.logger.debug(f"validating transcribed lyrics match lyrics from {online_lyrics_source}")
|
244
205
|
|
@@ -248,50 +209,21 @@ class LyricsTranscriber:
|
|
248
209
|
if online_lyrics_text_key not in self.outputs or self.outputs[online_lyrics_text_key] is None:
|
249
210
|
continue
|
250
211
|
|
251
|
-
|
252
|
-
|
212
|
+
self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
|
213
|
+
transcribed_words = set(self.outputs["transcribed_lyrics_text_primary"].split())
|
214
|
+
online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
|
215
|
+
common_words = transcribed_words & online_lyrics_words
|
216
|
+
match_percentage = len(common_words) / len(online_lyrics_words) * 100
|
253
217
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
messages=[{"role": "system", "content": llm_matching_instructions}, {"role": "user", "content": data_input_str}],
|
218
|
+
if match_percentage >= 50:
|
219
|
+
self.logger.info(
|
220
|
+
f"{online_lyrics_source} lyrics successfully validated to match transcription with {match_percentage:.2f}% word match"
|
258
221
|
)
|
259
|
-
|
260
|
-
message = response.choices[0].message.content
|
261
|
-
finish_reason = response.choices[0].finish_reason
|
262
|
-
|
263
|
-
self.outputs["llm_token_usage"]["input"] += response.usage.prompt_tokens
|
264
|
-
self.outputs["llm_token_usage"]["output"] += response.usage.completion_tokens
|
265
|
-
|
266
|
-
if finish_reason == "stop":
|
267
|
-
if message == "Yes":
|
268
|
-
self.logger.info(f"{online_lyrics_source} lyrics successfully validated to match transcription")
|
269
|
-
at_least_one_online_lyrics_validated = True
|
270
|
-
elif message == "No":
|
271
|
-
self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
|
272
|
-
self.outputs[online_lyrics_text_key] = None
|
273
|
-
self.outputs[online_lyrics_filepath_key] = None
|
274
|
-
else:
|
275
|
-
self.logger.error(f"Unexpected response from LLM: {message}")
|
276
|
-
else:
|
277
|
-
self.logger.warning(f"OpenAI API call did not finish successfully, finish_reason: {finish_reason}")
|
222
|
+
at_least_one_online_lyrics_validated = True
|
278
223
|
else:
|
279
|
-
|
280
|
-
self.
|
281
|
-
|
282
|
-
online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
|
283
|
-
common_words = transcribed_words & online_lyrics_words
|
284
|
-
match_percentage = len(common_words) / len(online_lyrics_words) * 100
|
285
|
-
|
286
|
-
if match_percentage >= 50:
|
287
|
-
self.logger.info(
|
288
|
-
f"{online_lyrics_source} lyrics successfully validated to match transcription with {match_percentage:.2f}% word match"
|
289
|
-
)
|
290
|
-
at_least_one_online_lyrics_validated = True
|
291
|
-
else:
|
292
|
-
self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
|
293
|
-
self.outputs[online_lyrics_text_key] = None
|
294
|
-
self.outputs[online_lyrics_filepath_key] = None
|
224
|
+
self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
|
225
|
+
self.outputs[online_lyrics_text_key] = None
|
226
|
+
self.outputs[online_lyrics_filepath_key] = None
|
295
227
|
|
296
228
|
self.logger.info(
|
297
229
|
f"Completed validation of transcription using online lyrics sources. Match found: {at_least_one_online_lyrics_validated}"
|
@@ -302,178 +234,37 @@ class LyricsTranscriber:
|
|
302
234
|
f"Lyrics from Genius and Spotify did not match the transcription. Please check artist and title are set correctly."
|
303
235
|
)
|
304
236
|
|
305
|
-
def
|
306
|
-
if not self.openai_client:
|
307
|
-
self.logger.warning("Skipping LLM correction as no OpenAI client is available")
|
308
|
-
return
|
309
|
-
|
310
|
-
self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")
|
311
|
-
|
237
|
+
def correct_lyrics_transcription(self):
|
312
238
|
corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
|
313
239
|
|
314
240
|
if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
|
315
|
-
self.logger.
|
241
|
+
self.logger.info(
|
316
242
|
f"found existing file at corrected_lyrics_data_json_cache_filepath, reading: {corrected_lyrics_data_json_cache_filepath}"
|
317
243
|
)
|
318
244
|
|
319
245
|
with open(corrected_lyrics_data_json_cache_filepath, "r") as corrected_lyrics_data_json:
|
320
246
|
self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
|
321
|
-
|
322
|
-
corrected_lyrics_data_dict = json.load(corrected_lyrics_data_json)
|
323
|
-
self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_data_dict
|
247
|
+
self.outputs["corrected_lyrics_data_dict"] = json.load(corrected_lyrics_data_json)
|
324
248
|
return
|
325
249
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
self.
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
self.logger.debug(
|
334
|
-
f"no cached lyrics found at corrected_lyrics_data_json_cache_filepath: {corrected_lyrics_data_json_cache_filepath}, attempting to run correction using LLM"
|
250
|
+
lyrics_corrector = LyricsTranscriptionCorrector(logger=self.logger)
|
251
|
+
lyrics_corrector.set_input_data(
|
252
|
+
spotify_lyrics_data_dict=self.outputs["spotify_lyrics_data_dict"],
|
253
|
+
spotify_lyrics_text=self.outputs["spotify_lyrics_text"],
|
254
|
+
genius_lyrics_text=self.outputs["genius_lyrics_text"],
|
255
|
+
transcription_data_dict_whisper=self.outputs["transcription_data_dict_whisper"],
|
256
|
+
transcription_data_dict_audioshake=self.outputs["transcription_data_dict_audioshake"],
|
335
257
|
)
|
258
|
+
self.outputs["corrected_lyrics_data_dict"] = lyrics_corrector.run_corrector()
|
336
259
|
|
337
|
-
|
338
|
-
|
339
|
-
with open(
|
340
|
-
|
341
|
-
|
342
|
-
system_prompt = system_prompt_template.replace("{{reference_lyrics}}", reference_lyrics)
|
343
|
-
|
344
|
-
# TODO: Test if results are cleaner when using the vocal file from a background vocal audio separation model
|
345
|
-
# TODO: Record more info about the correction process (e.g before/after diffs for each segment) to a file for debugging
|
346
|
-
# TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff
|
347
|
-
|
348
|
-
self.outputs["llm_transcript"] = ""
|
349
|
-
self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))
|
350
|
-
|
351
|
-
total_segments = len(self.outputs["transcription_data_dict_primary"]["segments"])
|
352
|
-
self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
|
353
|
-
|
354
|
-
with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file:
|
355
|
-
self.logger.debug(f"writing LLM chat instructions: {self.outputs['llm_transcript_filepath']}")
|
356
|
-
|
357
|
-
llm_transcript_header = f"--- SYSTEM instructions passed in for all segments ---:\n\n{system_prompt}\n"
|
358
|
-
self.outputs["llm_transcript"] += llm_transcript_header
|
359
|
-
llm_transcript_file.write(llm_transcript_header)
|
360
|
-
|
361
|
-
for segment in self.outputs["transcription_data_dict_primary"]["segments"]:
|
362
|
-
# # Don't waste OpenAI dollars when testing!
|
363
|
-
# if segment["id"] > 10:
|
364
|
-
# continue
|
365
|
-
# if segment["id"] < 20 or segment["id"] > 24:
|
366
|
-
# continue
|
367
|
-
|
368
|
-
llm_transcript_segment = ""
|
369
|
-
segment_input = json.dumps(
|
370
|
-
{
|
371
|
-
"id": segment["id"],
|
372
|
-
"start": segment["start"],
|
373
|
-
"end": segment["end"],
|
374
|
-
"confidence": segment["confidence"],
|
375
|
-
"text": segment["text"],
|
376
|
-
"words": segment["words"],
|
377
|
-
}
|
378
|
-
)
|
379
|
-
|
380
|
-
previous_two_corrected_lines = ""
|
381
|
-
upcoming_two_uncorrected_lines = ""
|
382
|
-
|
383
|
-
for previous_segment in corrected_lyrics_dict["segments"]:
|
384
|
-
if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
|
385
|
-
previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
|
386
|
-
|
387
|
-
for next_segment in self.outputs["transcription_data_dict_primary"]["segments"]:
|
388
|
-
if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
|
389
|
-
upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
|
390
|
-
|
391
|
-
llm_transcript_segment += f"--- Segment {segment['id']} / {total_segments} ---\n"
|
392
|
-
llm_transcript_segment += f"Previous two corrected lines:\n\n{previous_two_corrected_lines}\nUpcoming two uncorrected lines:\n\n{upcoming_two_uncorrected_lines}\nData input:\n\n{segment_input}\n"
|
393
|
-
|
394
|
-
# fmt: off
|
395
|
-
segment_prompt = system_prompt_template.replace(
|
396
|
-
"{{previous_two_corrected_lines}}", previous_two_corrected_lines
|
397
|
-
).replace(
|
398
|
-
"{{upcoming_two_uncorrected_lines}}", upcoming_two_uncorrected_lines
|
399
|
-
).replace(
|
400
|
-
"{{segment_input}}", segment_input
|
401
|
-
)
|
402
|
-
|
403
|
-
self.logger.info(
|
404
|
-
f'Calling completion model {self.llm_model} with instructions and data input for segment {segment["id"]} / {total_segments}:'
|
405
|
-
)
|
406
|
-
|
407
|
-
response = self.openai_client.chat.completions.create(
|
408
|
-
model=self.llm_model,
|
409
|
-
response_format={"type": "json_object"},
|
410
|
-
seed=10,
|
411
|
-
temperature=0.4,
|
412
|
-
messages=[
|
413
|
-
{
|
414
|
-
"role": "user",
|
415
|
-
"content": segment_prompt
|
416
|
-
}
|
417
|
-
],
|
418
|
-
)
|
419
|
-
# fmt: on
|
420
|
-
|
421
|
-
message = response.choices[0].message.content
|
422
|
-
finish_reason = response.choices[0].finish_reason
|
423
|
-
|
424
|
-
llm_transcript_segment += f"\n--- RESPONSE for segment {segment['id']} ---:\n\n"
|
425
|
-
llm_transcript_segment += message
|
426
|
-
llm_transcript_segment += f"\n--- END segment {segment['id']} / {total_segments} ---:\n\n"
|
427
|
-
|
428
|
-
self.logger.debug(f"writing LLM chat transcript for segment to: {self.outputs['llm_transcript_filepath']}")
|
429
|
-
llm_transcript_file.write(llm_transcript_segment)
|
430
|
-
self.outputs["llm_transcript"] += llm_transcript_segment
|
431
|
-
|
432
|
-
self.outputs["llm_token_usage"]["input"] += response.usage.prompt_tokens
|
433
|
-
self.outputs["llm_token_usage"]["output"] += response.usage.completion_tokens
|
434
|
-
|
435
|
-
# self.logger.debug(f"response finish_reason: {finish_reason} message: \n{message}")
|
436
|
-
|
437
|
-
if finish_reason == "stop":
|
438
|
-
try:
|
439
|
-
corrected_segment_dict = json.loads(message)
|
440
|
-
corrected_lyrics_dict["segments"].append(corrected_segment_dict)
|
441
|
-
self.logger.info("Successfully parsed response from GPT as JSON and appended to corrected_lyrics_dict.segments")
|
442
|
-
except json.JSONDecodeError as e:
|
443
|
-
raise Exception("Failed to parse response from GPT as JSON") from e
|
444
|
-
else:
|
445
|
-
self.logger.warning(f"OpenAI API call did not finish successfully, finish_reason: {finish_reason}")
|
446
|
-
|
447
|
-
self.logger.info(f'Successfully processed correction for all {len(corrected_lyrics_dict["segments"])} lyrics segments')
|
448
|
-
|
449
|
-
self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
|
450
|
-
with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as corrected_lyrics_data_json_cache_file:
|
451
|
-
corrected_lyrics_data_json_cache_file.write(json.dumps(corrected_lyrics_dict, indent=4))
|
260
|
+
# Save the corrected lyrics to output JSON file
|
261
|
+
self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
|
262
|
+
with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f:
|
263
|
+
f.write(json.dumps(self.outputs["corrected_lyrics_data_dict"], indent=4))
|
452
264
|
|
453
265
|
self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
|
454
|
-
self.outputs["corrected_lyrics_data_dict"] = corrected_lyrics_dict
|
455
|
-
|
456
|
-
def calculate_llm_costs(self):
|
457
|
-
price_dollars_per_1000_tokens = {
|
458
|
-
"gpt-3.5-turbo-1106": {
|
459
|
-
"input": 0.0010,
|
460
|
-
"output": 0.0020,
|
461
|
-
},
|
462
|
-
"gpt-4-1106-preview": {
|
463
|
-
"input": 0.01,
|
464
|
-
"output": 0.03,
|
465
|
-
},
|
466
|
-
}
|
467
|
-
|
468
|
-
input_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["input"]
|
469
|
-
output_price = price_dollars_per_1000_tokens.get(self.llm_model, {"input": 0, "output": 0})["output"]
|
470
|
-
|
471
|
-
input_cost = input_price * (self.outputs["llm_token_usage"]["input"] / 1000)
|
472
|
-
output_cost = output_price * (self.outputs["llm_token_usage"]["output"] / 1000)
|
473
266
|
|
474
|
-
self.
|
475
|
-
self.outputs["llm_costs_usd"]["output"] = round(output_cost, 3)
|
476
|
-
self.outputs["llm_costs_usd"]["total"] = round(input_cost + output_cost, 3)
|
267
|
+
self.write_corrected_lyrics_plain_text()
|
477
268
|
|
478
269
|
def write_corrected_lyrics_plain_text(self):
|
479
270
|
if self.outputs["corrected_lyrics_data_dict"]:
|
@@ -587,7 +378,7 @@ class LyricsTranscriber:
|
|
587
378
|
self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
|
588
379
|
|
589
380
|
with open(genius_lyrics_cache_filepath, "r") as cached_lyrics:
|
590
|
-
self.outputs["
|
381
|
+
self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
|
591
382
|
self.outputs["genius_lyrics_text"] = cached_lyrics.read()
|
592
383
|
return
|
593
384
|
self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
|
@@ -611,7 +402,7 @@ class LyricsTranscriber:
|
|
611
402
|
with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
|
612
403
|
f.write(lyrics)
|
613
404
|
|
614
|
-
self.outputs["
|
405
|
+
self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
|
615
406
|
self.outputs["genius_lyrics_text"] = lyrics
|
616
407
|
return lyrics.split("\n") # Return lines like write_lyrics_from_genius
|
617
408
|
|
@@ -1099,6 +890,21 @@ class LyricsTranscriber:
|
|
1099
890
|
self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_whisper"]
|
1100
891
|
self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_whisper_filepath"]
|
1101
892
|
|
893
|
+
def write_processed_lyrics(self, lyrics_file, processed_lyrics_file):
|
894
|
+
self.logger.info(f"Processing lyrics from {lyrics_file} and writing to {processed_lyrics_file}")
|
895
|
+
|
896
|
+
processor = KaraokeLyricsProcessor(
|
897
|
+
log_level=self.log_level,
|
898
|
+
log_formatter=self.log_formatter,
|
899
|
+
input_filename=lyrics_file,
|
900
|
+
output_filename=processed_lyrics_file,
|
901
|
+
max_line_length=36,
|
902
|
+
)
|
903
|
+
processor.process()
|
904
|
+
processor.write_to_output_file()
|
905
|
+
|
906
|
+
self.logger.info(f"Lyrics processing complete, processed lyrics written to: {processed_lyrics_file}")
|
907
|
+
|
1102
908
|
def get_cache_filepath(self, extension):
|
1103
909
|
# Instead of using slugify and hash, use the consistent naming pattern
|
1104
910
|
cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
|
@@ -1,21 +1,21 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.20.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
|
6
6
|
License: MIT
|
7
7
|
Author: Andrew Beveridge
|
8
8
|
Author-email: andrew@beveridge.uk
|
9
|
-
Requires-Python: >=3.9
|
9
|
+
Requires-Python: >=3.9,<3.13
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
16
|
-
Classifier: Programming Language :: Python :: 3.13
|
17
16
|
Requires-Dist: Cython (>=0)
|
18
17
|
Requires-Dist: dtw-python (>=1)
|
18
|
+
Requires-Dist: karaoke-lyrics-processor (>=0.4.1)
|
19
19
|
Requires-Dist: llvmlite (>=0)
|
20
20
|
Requires-Dist: lyricsgenius (>=3)
|
21
21
|
Requires-Dist: numba (>=0.57)
|
@@ -1,18 +1,19 @@
|
|
1
1
|
lyrics_transcriber/__init__.py,sha256=bIRjsXAzlghS1rQxWNLU0wppZy0T_iciN9EclHLwNrQ,94
|
2
2
|
lyrics_transcriber/audioshake_transcriber.py,sha256=AbIkghvguI1PV0fCMUHGRnidQwLPM_pQ96FI0Qk-aI0,5221
|
3
|
+
lyrics_transcriber/corrector.py,sha256=LVicUYBCz2TpzzPUbzgLfNYebYJLj7yVvbERMHuXzTY,2300
|
3
4
|
lyrics_transcriber/llm_prompts/README.md,sha256=DPAGRDVGt9ZNcQAAoQGFhwesLY3D6hD8apL71yHP4yo,196
|
4
5
|
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt,sha256=a3XjAYfyhWt1uCKKqm_n2Pc0STdmBdiHHtJ7ODP99Nk,4046
|
5
6
|
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt,sha256=r6HN3DD_3gwh3B_JPd2R0I4lDXuB5iy7B90J9agOxbQ,2369
|
6
7
|
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt,sha256=hvk2Vs3M3Q4zGQsiQnXvnpd8wXWfwsudYeqN5qFyNWs,1754
|
7
8
|
lyrics_transcriber/llm_prompts/promptfooconfig.yaml,sha256=O4YxlLV7XSUiSw_1Q9G7ELC2VAbrYUV_N5QxrPbd1jE,3735
|
8
9
|
lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt,sha256=8d-RvZtyINKUlpQLwMi-VD--Y59J-epPt7SZSqjFbPI,1690
|
9
|
-
lyrics_transcriber/transcriber.py,sha256=
|
10
|
+
lyrics_transcriber/transcriber.py,sha256=SrZLY4zEqSd--jgXqRUtgX6oyhM8POpL91AMas_Dpzw,47897
|
10
11
|
lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
12
|
lyrics_transcriber/utils/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
|
12
13
|
lyrics_transcriber/utils/cli.py,sha256=8Poba_9wQw0VmOK73vuK-w-abR9QmO4y4FYDHiAQbc0,6972
|
13
14
|
lyrics_transcriber/utils/subtitles.py,sha256=_WG0pFoZMXcrGe6gbARkC9KrWzFNTMOsiqQwNL-H2lU,11812
|
14
|
-
lyrics_transcriber-0.
|
15
|
-
lyrics_transcriber-0.
|
16
|
-
lyrics_transcriber-0.
|
17
|
-
lyrics_transcriber-0.
|
18
|
-
lyrics_transcriber-0.
|
15
|
+
lyrics_transcriber-0.20.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
|
16
|
+
lyrics_transcriber-0.20.0.dist-info/METADATA,sha256=1mOcGn2Hb5Nw3nKH0Cc41Zv7_gp4a-H4DLDnktEeRNs,5830
|
17
|
+
lyrics_transcriber-0.20.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
18
|
+
lyrics_transcriber-0.20.0.dist-info/entry_points.txt,sha256=lh6L-iR5CGELaNcouDK94X78eS5Ua_tK9lI4UEkza-k,72
|
19
|
+
lyrics_transcriber-0.20.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.19.2.dist-info → lyrics_transcriber-0.20.0.dist-info}/entry_points.txt
RENAMED
File without changes
|