lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. lyrics_transcriber/__init__.py +2 -5
  2. lyrics_transcriber/cli/cli_main.py +206 -0
  3. lyrics_transcriber/core/__init__.py +0 -0
  4. lyrics_transcriber/core/controller.py +317 -0
  5. lyrics_transcriber/correction/base_strategy.py +29 -0
  6. lyrics_transcriber/correction/corrector.py +52 -0
  7. lyrics_transcriber/correction/strategy_diff.py +263 -0
  8. lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
  9. lyrics_transcriber/lyrics/genius.py +70 -0
  10. lyrics_transcriber/lyrics/spotify.py +82 -0
  11. lyrics_transcriber/output/__init__.py +0 -0
  12. lyrics_transcriber/output/generator.py +271 -0
  13. lyrics_transcriber/{utils → output}/subtitles.py +12 -12
  14. lyrics_transcriber/storage/__init__.py +0 -0
  15. lyrics_transcriber/storage/dropbox.py +225 -0
  16. lyrics_transcriber/transcribers/audioshake.py +216 -0
  17. lyrics_transcriber/transcribers/base_transcriber.py +186 -0
  18. lyrics_transcriber/transcribers/whisper.py +321 -0
  19. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
  20. lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
  21. lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
  22. lyrics_transcriber/audioshake_transcriber.py +0 -122
  23. lyrics_transcriber/corrector.py +0 -57
  24. lyrics_transcriber/llm_prompts/README.md +0 -10
  25. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
  26. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
  27. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
  28. lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
  29. lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
  30. lyrics_transcriber/transcriber.py +0 -934
  31. lyrics_transcriber/utils/cli.py +0 -179
  32. lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
  33. lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
  34. /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
  35. /lyrics_transcriber/{utils → output}/ass.py +0 -0
  36. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
  37. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
@@ -1,934 +0,0 @@
1
- import os
2
- import sys
3
- import re
4
- import json
5
- import logging
6
- import shutil
7
- import hashlib
8
- import subprocess
9
- import slugify
10
- import whisper_timestamped as whisper
11
- import lyricsgenius
12
- import syrics.api
13
- from datetime import timedelta
14
- from .utils import subtitles
15
- from typing import List, Optional
16
- from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type
17
- import requests
18
- from karaoke_lyrics_processor import KaraokeLyricsProcessor
19
- from .corrector import LyricsTranscriptionCorrector
20
-
21
-
22
- class LyricsTranscriber:
23
- def __init__(
24
- self,
25
- audio_filepath,
26
- artist=None,
27
- title=None,
28
- audioshake_api_token=None,
29
- genius_api_token=None,
30
- spotify_cookie=None,
31
- skip_transcription=False,
32
- output_dir=None,
33
- cache_dir="/tmp/lyrics-transcriber-cache/",
34
- log_level=logging.DEBUG,
35
- log_formatter=None,
36
- transcription_model="medium",
37
- render_video=False,
38
- video_resolution="360p",
39
- video_background_image=None,
40
- video_background_color="black",
41
- ):
42
- self.logger = logging.getLogger(__name__)
43
- self.logger.setLevel(log_level)
44
- self.log_level = log_level
45
- self.log_formatter = log_formatter
46
-
47
- self.log_handler = logging.StreamHandler()
48
-
49
- if self.log_formatter is None:
50
- self.log_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
51
-
52
- self.log_handler.setFormatter(self.log_formatter)
53
- self.logger.addHandler(self.log_handler)
54
-
55
- self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
56
-
57
- self.cache_dir = cache_dir
58
- self.output_dir = output_dir
59
- self.audio_filepath = audio_filepath
60
- self.artist = artist
61
- self.title = title
62
- self.song_known = self.artist is not None and self.title is not None
63
-
64
- self.audioshake_api_token = os.getenv("AUDIOSHAKE_API_TOKEN", default=audioshake_api_token)
65
- self.genius_api_token = os.getenv("GENIUS_API_TOKEN", default=genius_api_token)
66
- self.spotify_cookie = os.getenv("SPOTIFY_COOKIE_SP_DC", default=spotify_cookie)
67
-
68
- self.transcription_model = transcription_model
69
- self.render_video = render_video
70
- self.video_resolution = video_resolution
71
- self.video_background_image = video_background_image
72
- self.video_background_color = video_background_color
73
-
74
- match video_resolution:
75
- case "4k":
76
- self.video_resolution_num = (3840, 2160)
77
- self.font_size = 250
78
- self.line_height = 250
79
- case "1080p":
80
- self.video_resolution_num = (1920, 1080)
81
- self.font_size = 120
82
- self.line_height = 120
83
- case "720p":
84
- self.video_resolution_num = (1280, 720)
85
- self.font_size = 100
86
- self.line_height = 100
87
- case "360p":
88
- self.video_resolution_num = (640, 360)
89
- self.font_size = 50
90
- self.line_height = 50
91
- case _:
92
- raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
93
-
94
- # If a video background is provided, validate file exists
95
- if self.video_background_image is not None:
96
- if os.path.isfile(self.video_background_image):
97
- self.logger.debug(f"video_background is valid file path: {self.video_background_image}")
98
- else:
99
- raise FileNotFoundError(f"video_background is not a valid file path: {self.video_background_image}")
100
-
101
- self.outputs = {
102
- "transcription_data_dict_whisper": None,
103
- "transcription_data_whisper_filepath": None,
104
- "transcribed_lyrics_text_whisper": None,
105
- "transcribed_lyrics_text_whisper_filepath": None,
106
- "transcription_data_dict_audioshake": None,
107
- "transcription_data_audioshake_filepath": None,
108
- "transcribed_lyrics_text_audioshake": None,
109
- "transcribed_lyrics_text_audioshake_filepath": None,
110
- "transcription_data_dict_primary": None,
111
- "transcription_data_primary_filepath": None,
112
- "transcribed_lyrics_text_primary": None,
113
- "transcribed_lyrics_text_primary_filepath": None,
114
- "genius_lyrics_text": None,
115
- "genius_lyrics_text_filepath": None,
116
- "genius_lyrics_processed_filepath": None,
117
- "spotify_lyrics_data_dict": None,
118
- "spotify_lyrics_data_filepath": None,
119
- "spotify_lyrics_text_filepath": None,
120
- "spotify_lyrics_processed_filepath": None,
121
- "corrected_lyrics_text": None,
122
- "corrected_lyrics_text_filepath": None,
123
- "midico_lrc_filepath": None,
124
- "ass_subtitles_filepath": None,
125
- "karaoke_video_filepath": None,
126
- "singing_percentage": None,
127
- "total_singing_duration": None,
128
- "song_duration": None,
129
- "output_dir": None,
130
- }
131
-
132
- if self.audio_filepath is None:
133
- raise Exception("audio_filepath must be specified as the input source to transcribe")
134
-
135
- self.create_folders()
136
-
137
- self.output_prefix = f"{artist} - {title}"
138
-
139
- self.skip_transcription = skip_transcription
140
-
141
- def generate(self):
142
- self.logger.debug(f"Starting generate() with cache_dir: {self.cache_dir} and output_dir: {self.output_dir}")
143
-
144
- self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")
145
-
146
- self.write_spotify_lyrics_data_file()
147
- self.write_spotify_lyrics_plain_text()
148
- if self.outputs["spotify_lyrics_text_filepath"]:
149
- self.outputs["spotify_lyrics_processed_filepath"] = os.path.join(
150
- self.cache_dir, self.get_output_filename(" (Lyrics Spotify Processed).txt")
151
- )
152
- self.write_processed_lyrics(self.outputs["spotify_lyrics_text_filepath"], self.outputs["spotify_lyrics_processed_filepath"])
153
-
154
- self.write_genius_lyrics_file()
155
- if self.outputs["genius_lyrics_text_filepath"]:
156
- self.outputs["genius_lyrics_processed_filepath"] = os.path.join(
157
- self.cache_dir, self.get_output_filename(" (Lyrics Genius Processed).txt")
158
- )
159
- self.write_processed_lyrics(self.outputs["genius_lyrics_text_filepath"], self.outputs["genius_lyrics_processed_filepath"])
160
-
161
- if not self.skip_transcription:
162
- self.transcribe()
163
- self.validate_lyrics_match_song()
164
-
165
- self.correct_lyrics_transcription()
166
-
167
- self.calculate_singing_percentage()
168
-
169
- self.write_midico_lrc_file()
170
- self.write_ass_file()
171
-
172
- if self.render_video:
173
- self.outputs["karaoke_video_filepath"] = self.get_cache_filepath(".mp4")
174
- self.create_video()
175
- else:
176
- self.outputs["corrected_lyrics_text_filepath"] = self.outputs["genius_lyrics_text_filepath"]
177
- self.outputs["corrected_lyrics_text"] = self.outputs["genius_lyrics_text"]
178
-
179
- self.copy_files_to_output_dir()
180
-
181
- return self.outputs
182
-
183
- def copy_files_to_output_dir(self):
184
- if self.output_dir is None:
185
- self.output_dir = os.getcwd()
186
-
187
- self.logger.debug(f"copying temporary files to output dir: {self.output_dir}")
188
- self.logger.debug("Files to copy:")
189
- for key, value in self.outputs.items():
190
- if key.endswith("_filepath"):
191
- self.logger.debug(f" {key}: {value}")
192
- if value and os.path.isfile(value):
193
- self.logger.debug(f" File exists, copying to {self.output_dir}")
194
- shutil.copy(value, self.output_dir)
195
- else:
196
- self.logger.debug(f" File doesn't exist or is None")
197
-
198
- self.outputs["output_dir"] = self.output_dir
199
-
200
- def validate_lyrics_match_song(self):
201
- at_least_one_online_lyrics_validated = False
202
-
203
- for online_lyrics_source in ["genius", "spotify"]:
204
- self.logger.debug(f"validating transcribed lyrics match lyrics from {online_lyrics_source}")
205
-
206
- online_lyrics_text_key = f"{online_lyrics_source}_lyrics_text"
207
- online_lyrics_filepath_key = f"{online_lyrics_source}_lyrics_filepath"
208
-
209
- if online_lyrics_text_key not in self.outputs or self.outputs[online_lyrics_text_key] is None:
210
- continue
211
-
212
- self.logger.debug(f"Using primitive word matching to validate {online_lyrics_source} lyrics match")
213
- transcribed_words = set(self.outputs["transcribed_lyrics_text_primary"].split())
214
- online_lyrics_words = set(self.outputs[online_lyrics_text_key].split())
215
- common_words = transcribed_words & online_lyrics_words
216
- match_percentage = len(common_words) / len(online_lyrics_words) * 100
217
-
218
- if match_percentage >= 50:
219
- self.logger.info(
220
- f"{online_lyrics_source} lyrics successfully validated to match transcription with {match_percentage:.2f}% word match"
221
- )
222
- at_least_one_online_lyrics_validated = True
223
- else:
224
- self.logger.warning(f"{online_lyrics_source} lyrics do not match transcription, deleting that source from outputs")
225
- self.outputs[online_lyrics_text_key] = None
226
- self.outputs[online_lyrics_filepath_key] = None
227
-
228
- self.logger.info(
229
- f"Completed validation of transcription using online lyrics sources. Match found: {at_least_one_online_lyrics_validated}"
230
- )
231
-
232
- if not at_least_one_online_lyrics_validated:
233
- self.logger.error(
234
- f"Lyrics from Genius and Spotify did not match the transcription. Please check artist and title are set correctly."
235
- )
236
-
237
- def correct_lyrics_transcription(self):
238
- corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))
239
-
240
- if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
241
- self.logger.info(
242
- f"found existing file at corrected_lyrics_data_json_cache_filepath, reading: {corrected_lyrics_data_json_cache_filepath}"
243
- )
244
-
245
- with open(corrected_lyrics_data_json_cache_filepath, "r") as corrected_lyrics_data_json:
246
- self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
247
- self.outputs["corrected_lyrics_data_dict"] = json.load(corrected_lyrics_data_json)
248
- return
249
-
250
- lyrics_corrector = LyricsTranscriptionCorrector(logger=self.logger)
251
- lyrics_corrector.set_input_data(
252
- spotify_lyrics_data_dict=self.outputs["spotify_lyrics_data_dict"],
253
- spotify_lyrics_text=self.outputs["spotify_lyrics_text"],
254
- genius_lyrics_text=self.outputs["genius_lyrics_text"],
255
- transcription_data_dict_whisper=self.outputs["transcription_data_dict_whisper"],
256
- transcription_data_dict_audioshake=self.outputs["transcription_data_dict_audioshake"],
257
- )
258
- self.outputs["corrected_lyrics_data_dict"] = lyrics_corrector.run_corrector()
259
-
260
- # Save the corrected lyrics to output JSON file
261
- self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
262
- with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f:
263
- f.write(json.dumps(self.outputs["corrected_lyrics_data_dict"], indent=4))
264
-
265
- self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
266
-
267
- self.write_corrected_lyrics_plain_text()
268
-
269
- def write_corrected_lyrics_plain_text(self):
270
- if self.outputs["corrected_lyrics_data_dict"]:
271
- self.logger.debug(f"corrected_lyrics_data_dict exists, writing plain text lyrics file")
272
-
273
- corrected_lyrics_text_filepath = os.path.join(
274
- self.cache_dir, self.get_output_filename(" (Lyrics Corrected).txt") # Updated to use consistent naming
275
- )
276
- self.outputs["corrected_lyrics_text_filepath"] = corrected_lyrics_text_filepath
277
-
278
- self.outputs["corrected_lyrics_text"] = ""
279
-
280
- self.logger.debug(f"writing lyrics plain text to corrected_lyrics_text_filepath: {corrected_lyrics_text_filepath}")
281
- with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
282
- for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
283
- self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
284
- f.write(corrected_segment["text"].strip() + "\n")
285
-
286
- def write_spotify_lyrics_data_file(self):
287
- if self.spotify_cookie and self.song_known:
288
- self.logger.debug(f"attempting spotify fetch as spotify_cookie and song name was set")
289
- else:
290
- self.logger.warning(f"skipping spotify fetch as not all spotify params were set")
291
- return
292
-
293
- spotify_lyrics_data_json_cache_filepath = os.path.join(
294
- self.cache_dir, self.get_output_filename(" (Lyrics Spotify).json") # Updated to use consistent naming
295
- )
296
-
297
- if os.path.isfile(spotify_lyrics_data_json_cache_filepath):
298
- self.logger.debug(
299
- f"found existing file at spotify_lyrics_data_json_cache_filepath, reading: {spotify_lyrics_data_json_cache_filepath}"
300
- )
301
-
302
- with open(spotify_lyrics_data_json_cache_filepath, "r") as spotify_lyrics_data_json:
303
- spotify_lyrics_data_dict = json.load(spotify_lyrics_data_json)
304
- self.outputs["spotify_lyrics_data_filepath"] = spotify_lyrics_data_json_cache_filepath
305
- self.outputs["spotify_lyrics_data_dict"] = spotify_lyrics_data_dict
306
- return
307
-
308
- self.logger.debug(
309
- f"no cached lyrics found at spotify_lyrics_data_json_cache_filepath: {spotify_lyrics_data_json_cache_filepath}, attempting to fetch from spotify"
310
- )
311
-
312
- spotify_lyrics_json = None
313
-
314
- try:
315
- spotify_client = syrics.api.Spotify(self.spotify_cookie)
316
- spotify_search_query = f"{self.title} - {self.artist}"
317
- spotify_search_results = spotify_client.search(spotify_search_query, type="track", limit=5)
318
-
319
- spotify_top_result = spotify_search_results["tracks"]["items"][0]
320
- self.logger.debug(
321
- f"spotify_top_result: {spotify_top_result['artists'][0]['name']} - {spotify_top_result['name']} ({spotify_top_result['external_urls']['spotify']})"
322
- )
323
-
324
- spotify_lyrics_dict = spotify_client.get_lyrics(spotify_top_result["id"])
325
- spotify_lyrics_json = json.dumps(spotify_lyrics_dict, indent=4)
326
-
327
- self.logger.debug(
328
- f"writing lyrics data JSON to spotify_lyrics_data_json_cache_filepath: {spotify_lyrics_data_json_cache_filepath}"
329
- )
330
- with open(spotify_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f:
331
- f.write(spotify_lyrics_json)
332
- except Exception as e:
333
- self.logger.warn(f"caught exception while attempting to fetch from spotify: ", e)
334
-
335
- self.outputs["spotify_lyrics_data_filepath"] = spotify_lyrics_data_json_cache_filepath
336
- self.outputs["spotify_lyrics_data_dict"] = spotify_lyrics_dict
337
-
338
- def write_spotify_lyrics_plain_text(self):
339
- if self.outputs["spotify_lyrics_data_dict"]:
340
- self.logger.debug(f"spotify_lyrics data found, checking/writing plain text lyrics file")
341
-
342
- spotify_lyrics_text_filepath = os.path.join(
343
- self.cache_dir, self.get_output_filename(" (Lyrics Spotify).txt") # Updated to use consistent naming
344
- )
345
- self.outputs["spotify_lyrics_text_filepath"] = spotify_lyrics_text_filepath
346
-
347
- lines = self.outputs["spotify_lyrics_data_dict"]["lyrics"]["lines"]
348
-
349
- self.outputs["spotify_lyrics_text"] = ""
350
-
351
- self.logger.debug(f"writing lyrics plain text to spotify_lyrics_text_filepath: {spotify_lyrics_text_filepath}")
352
- with open(spotify_lyrics_text_filepath, "w", encoding="utf-8") as f:
353
- for line in lines:
354
- self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n"
355
- f.write(line["words"].strip() + "\n")
356
-
357
- @retry(
358
- stop=stop_after_delay(120), # Stop after 2 minutes
359
- wait=wait_exponential(multiplier=1, min=4, max=60), # Exponential backoff starting at 4 seconds
360
- retry=retry_if_exception_type(requests.exceptions.RequestException), # Retry on request exceptions
361
- reraise=True, # Reraise the last exception if all retries fail
362
- )
363
- def fetch_genius_lyrics(self, genius, title, artist):
364
- self.logger.debug(f"fetch_genius_lyrics attempting to fetch lyrics from Genius for {title} by {artist}")
365
- return genius.search_song(title, artist)
366
-
367
- def write_genius_lyrics_file(self):
368
- if self.genius_api_token and self.song_known:
369
- self.logger.debug(f"attempting genius fetch as genius_api_token and song name was set")
370
- else:
371
- self.logger.warning(f"skipping genius fetch as not all genius params were set")
372
- return
373
-
374
- genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))
375
-
376
- # Check cache first
377
- if os.path.isfile(genius_lyrics_cache_filepath):
378
- self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
379
-
380
- with open(genius_lyrics_cache_filepath, "r") as cached_lyrics:
381
- self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
382
- self.outputs["genius_lyrics_text"] = cached_lyrics.read()
383
- return
384
- self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
385
-
386
- # Initialize Genius with better defaults
387
- genius = lyricsgenius.Genius(
388
- self.genius_api_token,
389
- verbose=(self.log_level == logging.DEBUG),
390
- remove_section_headers=True,
391
- )
392
-
393
- try:
394
- song = self.fetch_genius_lyrics(genius, self.title, self.artist)
395
- if song is None:
396
- self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
397
- return None
398
-
399
- lyrics = self.clean_genius_lyrics(song.lyrics)
400
-
401
- self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
402
- with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
403
- f.write(lyrics)
404
-
405
- self.outputs["genius_lyrics_text_filepath"] = genius_lyrics_cache_filepath
406
- self.outputs["genius_lyrics_text"] = lyrics
407
- return lyrics.split("\n") # Return lines like write_lyrics_from_genius
408
-
409
- except requests.exceptions.RequestException as e:
410
- self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
411
- raise
412
-
413
- def clean_genius_lyrics(self, lyrics):
414
- lyrics = lyrics.replace("\\n", "\n")
415
- lyrics = re.sub(r"You might also like", "", lyrics)
416
- lyrics = re.sub(
417
- r".*?Lyrics([A-Z])", r"\1", lyrics
418
- ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
419
- lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
420
- lyrics = re.sub(
421
- r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
422
- ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
423
- lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
424
- lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
425
- lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
426
- lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics) # Remove lines containing square brackets
427
- # add any additional cleaning rules here
428
- return lyrics
429
-
430
- def calculate_singing_percentage(self):
431
- # Calculate total seconds of singing using timings from whisper transcription results
432
- total_singing_duration = sum(
433
- segment["end"] - segment["start"] for segment in self.outputs["transcription_data_dict_primary"]["segments"]
434
- )
435
-
436
- self.logger.debug(f"calculated total_singing_duration: {int(total_singing_duration)} seconds, now running ffprobe")
437
-
438
- # Calculate total song duration using ffprobe
439
- duration_command = [
440
- "ffprobe",
441
- "-i",
442
- self.audio_filepath,
443
- "-show_entries",
444
- "format=duration",
445
- "-v",
446
- "quiet",
447
- "-of",
448
- "csv=%s" % ("p=0"),
449
- ]
450
- duration_output = subprocess.check_output(duration_command, universal_newlines=True)
451
- song_duration = float(duration_output)
452
-
453
- # Calculate singing percentage
454
- singing_percentage = int((total_singing_duration / song_duration) * 100)
455
-
456
- self.outputs["singing_percentage"] = singing_percentage
457
- self.outputs["total_singing_duration"] = total_singing_duration
458
- self.outputs["song_duration"] = song_duration
459
-
460
- # Loops through lyrics segments (typically sentences) from whisper_timestamps JSON output,
461
- # then loops over each word and writes all words with MidiCo segment start/end formatting
462
- # and word-level timestamps to a MidiCo-compatible LRC file
463
- def write_midico_lrc_file(self):
464
- self.outputs["midico_lrc_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc"))
465
-
466
- lrc_filename = self.outputs["midico_lrc_filepath"]
467
- self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
468
- with open(lrc_filename, "w", encoding="utf-8") as f:
469
- f.write("[re:MidiCo]\n")
470
- for segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
471
- for i, word in enumerate(segment["words"]):
472
- start_time = self.format_time_lrc(word["start"])
473
- if i != len(segment["words"]) - 1:
474
- if not word["text"].endswith(" "):
475
- self.logger.debug(f"word '{word['text']}' does not end with a space, adding one")
476
- word["text"] += " "
477
- line = "[{}]1:{}{}\n".format(start_time, "/" if i == 0 else "", word["text"])
478
- f.write(line)
479
-
480
- def create_screens(self):
481
- self.logger.debug("create_screens beginning generation of screens from transcription results")
482
- screens: List[subtitles.LyricsScreen] = []
483
- screen: Optional[subtitles.LyricsScreen] = None
484
-
485
- max_lines_per_screen = 4
486
- max_line_length = 36 # Maximum characters per line
487
- self.logger.debug(f"Max lines per screen: {max_lines_per_screen}, Max line length: {max_line_length}")
488
-
489
- for segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
490
- self.logger.debug(f"Processing segment: {segment['text']}")
491
- if screen is None or len(screen.lines) >= max_lines_per_screen:
492
- screen = subtitles.LyricsScreen(video_size=self.video_resolution_num, line_height=self.line_height, logger=self.logger)
493
- screens.append(screen)
494
- self.logger.debug(f"Created new screen. Total screens: {len(screens)}")
495
-
496
- words = segment["words"]
497
- current_line = subtitles.LyricsLine()
498
- current_line_text = ""
499
- self.logger.debug(f"Processing {len(words)} words in segment")
500
-
501
- for word in words:
502
- self.logger.debug(f"Processing word: '{word['text']}'")
503
- if len(current_line_text) + len(word["text"]) + 1 > max_line_length or (current_line_text and word["text"][0].isupper()):
504
- self.logger.debug(f"Current line would exceed max length or new capitalized word. Line: '{current_line_text}'")
505
- if current_line.segments:
506
- screen.lines.append(current_line)
507
- self.logger.debug(f"Added line to screen. Lines on current screen: {len(screen.lines)}")
508
- if len(screen.lines) >= max_lines_per_screen:
509
- screen = subtitles.LyricsScreen(
510
- video_size=self.video_resolution_num,
511
- line_height=self.line_height,
512
- logger=self.logger,
513
- )
514
- screens.append(screen)
515
- self.logger.debug(f"Screen full, created new screen. Total screens: {len(screens)}")
516
- current_line = subtitles.LyricsLine()
517
- current_line_text = ""
518
- self.logger.debug("Reset current line")
519
-
520
- current_line_text += (" " if current_line_text else "") + word["text"]
521
-
522
- # fmt: off
523
- lyric_segment = subtitles.LyricSegment(
524
- text=word["text"],
525
- ts=timedelta(seconds=word["start"]),
526
- end_ts=timedelta(seconds=word["end"])
527
- )
528
- # fmt: on
529
-
530
- current_line.segments.append(lyric_segment)
531
- self.logger.debug(f"Added word to current line. Current line: '{current_line_text}'")
532
-
533
- if current_line.segments:
534
- screen.lines.append(current_line)
535
- self.logger.debug(f"Added final line of segment to screen. Lines on current screen: {len(screen.lines)}")
536
-
537
- self.logger.debug(f"Finished creating screens. Total screens created: {len(screens)}")
538
- return screens
539
-
540
- def write_ass_file(self):
541
- self.outputs["ass_subtitles_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).ass"))
542
-
543
- ass_filepath = self.outputs["ass_subtitles_filepath"]
544
- self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
545
-
546
- initial_screens = self.create_screens()
547
- screens = subtitles.set_segment_end_times(initial_screens, int(self.outputs["song_duration"]))
548
- screens = subtitles.set_screen_start_times(screens)
549
- lyric_subtitles_ass = subtitles.create_styled_subtitles(screens, self.video_resolution_num, self.font_size)
550
- lyric_subtitles_ass.write(ass_filepath)
551
-
552
- def resize_background_image(self):
553
- self.logger.debug(
554
- f"resize_background_image attempting to resize background image: {self.video_background_image} to resolution: {self.video_resolution}"
555
- )
556
- background_image_resized = self.get_cache_filepath(f"-{self.video_resolution}.png")
557
-
558
- if os.path.isfile(background_image_resized):
559
- self.logger.debug(
560
- f"resize_background_image found existing resized background image, skipping resize: {background_image_resized}"
561
- )
562
- return background_image_resized
563
-
564
- resize_command = ["ffmpeg", "-i", self.video_background_image]
565
- resize_command += ["-vf", f"scale={self.video_resolution_num[0]}x{self.video_resolution_num[1]}"]
566
-
567
- resize_command += [background_image_resized]
568
- subprocess.check_output(resize_command, universal_newlines=True)
569
-
570
- if not os.path.isfile(background_image_resized):
571
- raise FileNotFoundError(
572
- f"background_image_resized was not a valid file after running ffmpeg to resize: {background_image_resized}"
573
- )
574
-
575
- return background_image_resized
576
-
577
- def create_video(self):
578
- self.logger.debug(f"create_video attempting to generate video file: {self.outputs['karaoke_video_filepath']}")
579
-
580
- audio_delay = 0
581
- audio_delay_ms = int(audio_delay * 1000) # milliseconds
582
-
583
- video_metadata = []
584
- if self.artist:
585
- video_metadata.append("-metadata")
586
- video_metadata.append(f"artist={self.artist}")
587
- if self.title:
588
- video_metadata.append("-metadata")
589
- video_metadata.append(f"title={self.title}")
590
-
591
- # fmt: off
592
- ffmpeg_cmd = [
593
- "ffmpeg",
594
- "-r", "30", # Set frame rate to 30 fps
595
- ]
596
-
597
- if self.video_background_image:
598
- self.logger.debug(f"background image set: {self.video_background_image}, resizing to resolution: {self.video_resolution}")
599
-
600
- background_image_resized = self.resize_background_image()
601
-
602
- ffmpeg_cmd += [
603
- # Use provided image as background
604
- "-loop", "1", # Loop the image
605
- "-i", background_image_resized, # Input image file
606
- ]
607
-
608
- else:
609
- self.logger.debug(f"background not set, using solid {self.video_background_color} background with resolution: {self.video_resolution}")
610
- ffmpeg_cmd += ["-f", "lavfi"]
611
- ffmpeg_cmd += ["-i", f"color=c={self.video_background_color}:s={self.video_resolution_num[0]}x{self.video_resolution_num[1]}:r=30"]
612
-
613
-
614
- # Check for hardware acclerated h.264 encoding and use if available
615
- video_codec = "libx264"
616
- ffmpeg_codes = subprocess.getoutput("ffmpeg -codecs")
617
-
618
- if "h264_videotoolbox" in ffmpeg_codes:
619
- video_codec = "h264_videotoolbox"
620
- self.logger.info(f"video codec set to hardware accelerated h264_videotoolbox")
621
-
622
- ffmpeg_cmd += [
623
- # Use accompaniment track as audio
624
- "-i", self.audio_filepath,
625
- # Set audio delay if needed
626
- # https://ffmpeg.org/ffmpeg-filters.html#adelay
627
- # "-af",
628
- # f"adelay=delays={audio_delay_ms}:all=1",
629
- # Re-encode audio as mp3
630
- "-c:a", "aac",
631
- # Add subtitles
632
- "-vf", "ass=" + self.outputs["ass_subtitles_filepath"],
633
- # Encode as H264 using hardware acceleration if available
634
- "-c:v", video_codec,
635
- # Increase output video quality
636
- "-preset", "slow", # Use a slower preset for better compression efficiency
637
- # "-crf", "1", # Lower CRF for higher quality. Adjust as needed, lower is better quality
638
- "-b:v", "5000k", # Set the video bitrate, for example, 5000 kbps
639
- "-minrate", "5000k", # Minimum bitrate
640
- "-maxrate", "20000k", # Maximum bitrate
641
- "-bufsize", "10000k", # Set the buffer size, typically 2x maxrate
642
- # End encoding after the shortest stream
643
- "-shortest",
644
- # Overwrite files without asking
645
- "-y",
646
- # Only encode the first 30 seconds (for testing, fast iteration when editing this)
647
- # "-t", "30",
648
- *video_metadata,
649
- # Output path of video
650
- self.outputs["karaoke_video_filepath"],
651
- ]
652
- # fmt: on
653
-
654
- self.logger.debug(f"running ffmpeg command to generate video: {' '.join(ffmpeg_cmd)}")
655
- ffmpeg_output = subprocess.check_output(ffmpeg_cmd, universal_newlines=True)
656
- return ffmpeg_output
657
-
658
- def format_time_lrc(self, duration):
659
- minutes = int(duration // 60)
660
- seconds = int(duration % 60)
661
- milliseconds = int((duration % 1) * 1000)
662
- formatted_time = f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
663
- return formatted_time
664
-
665
- def write_transcribed_lyrics_plain_text(self):
666
- if self.outputs["transcription_data_dict_whisper"]:
667
- transcribed_lyrics_text_whisper_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Whisper).txt"))
668
- self.logger.debug(f"Setting Whisper text filepath to: {transcribed_lyrics_text_whisper_filepath}")
669
- self.outputs["transcribed_lyrics_text_whisper_filepath"] = transcribed_lyrics_text_whisper_filepath
670
- self.outputs["transcribed_lyrics_text_whisper"] = ""
671
-
672
- self.logger.debug(f"Writing Whisper lyrics to: {transcribed_lyrics_text_whisper_filepath}")
673
- with open(transcribed_lyrics_text_whisper_filepath, "w", encoding="utf-8") as f:
674
- for segment in self.outputs["transcription_data_dict_whisper"]["segments"]:
675
- self.outputs["transcribed_lyrics_text_whisper"] += segment["text"] + "\n"
676
- f.write(segment["text"].strip() + "\n")
677
- self.logger.debug(f"Finished writing Whisper lyrics, file exists: {os.path.exists(transcribed_lyrics_text_whisper_filepath)}")
678
-
679
- if self.outputs["transcription_data_dict_audioshake"]:
680
- transcribed_lyrics_text_audioshake_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics AudioShake).txt"))
681
- self.outputs["transcribed_lyrics_text_audioshake_filepath"] = transcribed_lyrics_text_audioshake_filepath
682
- self.outputs["transcribed_lyrics_text_audioshake"] = ""
683
-
684
- self.logger.debug(f"Writing AudioShake lyrics to: {transcribed_lyrics_text_audioshake_filepath}")
685
- with open(transcribed_lyrics_text_audioshake_filepath, "w", encoding="utf-8") as f:
686
- for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]:
687
- self.outputs["transcribed_lyrics_text_audioshake"] += segment["text"] + "\n"
688
- f.write(segment["text"].strip() + "\n")
689
-
690
- def find_best_split_point(self, text, max_length):
691
- self.logger.debug(f"Finding best split point for text: '{text}' (max_length: {max_length})")
692
- words = text.split()
693
- mid_word_index = len(words) // 2
694
- mid_point = len(" ".join(words[:mid_word_index]))
695
- self.logger.debug(f"Mid point is at character {mid_point}")
696
-
697
- # Check for a comma within one or two words of the middle word
698
- if "," in text:
699
- comma_indices = [i for i, char in enumerate(text) if char == ","]
700
- self.logger.debug(f"Found commas at indices: {comma_indices}")
701
- for index in comma_indices:
702
- if abs(mid_point - index) < 20 and len(text[: index + 1].strip()) <= max_length:
703
- self.logger.debug(f"Choosing comma at index {index} as split point")
704
- return index + 1 # Include the comma in the first part
705
-
706
- # Check for 'and'
707
- if " and " in text:
708
- and_indices = [m.start() for m in re.finditer(" and ", text)]
709
- self.logger.debug(f"Found 'and' at indices: {and_indices}")
710
- for index in sorted(and_indices, key=lambda x: abs(x - mid_point)):
711
- if len(text[: index + len(" and ")].strip()) <= max_length:
712
- self.logger.debug(f"Choosing 'and' at index {index} as split point")
713
- return index + len(" and ")
714
-
715
- # Check for words starting with a capital letter
716
- capital_word_indices = [m.start() for m in re.finditer(r"\s[A-Z]", text)]
717
- self.logger.debug(f"Found capital words at indices: {capital_word_indices}")
718
- for index in sorted(capital_word_indices, key=lambda x: abs(x - mid_point)):
719
- if index > 0 and len(text[:index].strip()) <= max_length:
720
- self.logger.debug(f"Choosing capital word at index {index} as split point")
721
- return index
722
-
723
- # If no better split point is found, try splitting at the middle word
724
- if len(words) > 2 and mid_word_index > 0:
725
- split_at_middle = len(" ".join(words[:mid_word_index]))
726
- if split_at_middle <= max_length:
727
- self.logger.debug(f"Choosing middle word split at index {split_at_middle}")
728
- return split_at_middle
729
-
730
- # If the text is still too long, forcibly split at the maximum length
731
- self.logger.debug(f"No suitable split point found, forcibly splitting at max_length {max_length}")
732
- return max_length
733
-
734
- def split_long_segments(self, segments, max_length):
735
- self.logger.debug(f"Splitting long segments (max_length: {max_length})")
736
- new_segments = []
737
- for segment in segments:
738
- text = segment["text"]
739
- self.logger.debug(f"Processing segment: '{text}' (length: {len(text)})")
740
- if len(text) <= max_length:
741
- self.logger.debug("Segment is within max_length, keeping as is")
742
- new_segments.append(segment)
743
- else:
744
- self.logger.debug("Segment exceeds max_length, splitting")
745
- meta_words = segment["words"]
746
- current_text = ""
747
- current_start = segment["start"]
748
- current_words = []
749
-
750
- for i, meta in enumerate(meta_words):
751
- word = meta["text"]
752
- if current_text:
753
- current_text += " "
754
- current_text += word
755
- current_words.append(meta)
756
-
757
- should_split = len(current_text) > max_length or (i > 0 and word[0].isupper())
758
- if should_split:
759
- self.logger.debug(f"Splitting at: '{current_text}'")
760
- # If splitting due to capitalization, don't include the capitalized word
761
- if word[0].isupper() and len(current_text.strip()) > len(word):
762
- split_text = current_text[: -(len(word) + 1)].strip()
763
- current_words = current_words[:-1]
764
- else:
765
- split_text = current_text.strip()
766
-
767
- new_segment = {"text": split_text, "start": current_start, "end": current_words[-1]["end"], "words": current_words}
768
- new_segments.append(new_segment)
769
- self.logger.debug(f"Added new segment: {new_segment}")
770
-
771
- # Reset for next segment
772
- if word[0].isupper() and len(current_text.strip()) > len(word):
773
- current_text = word
774
- current_words = [meta]
775
- else:
776
- current_text = ""
777
- current_words = []
778
- current_start = meta["start"]
779
-
780
- # Add any remaining text as a final segment
781
- if current_text:
782
- self.logger.debug(f"Adding final segment: '{current_text}'")
783
- new_segments.append(
784
- {"text": current_text.strip(), "start": current_start, "end": segment["end"], "words": current_words}
785
- )
786
-
787
- self.logger.debug(f"Splitting complete. Original segments: {len(segments)}, New segments: {len(new_segments)}")
788
- return new_segments
789
-
790
- def transcribe(self):
791
- # Check cache first
792
- transcription_cache_filepath_whisper = self.get_cache_filepath(" (Lyrics Whisper).json")
793
- transcription_cache_filepath_audioshake = self.get_cache_filepath(" (Lyrics AudioShake).json")
794
-
795
- self.logger.debug(f"Cache directory: {self.cache_dir}")
796
- self.logger.debug(f"Output directory: {self.output_dir}")
797
-
798
- if os.path.isfile(transcription_cache_filepath_whisper):
799
- self.logger.debug(f"Found existing Whisper transcription, reading: {transcription_cache_filepath_whisper}")
800
- with open(transcription_cache_filepath_whisper, "r") as cache_file:
801
- self.outputs["transcription_data_dict_whisper"] = json.load(cache_file)
802
- self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
803
- self.logger.debug(f"Loaded Whisper data and set filepath to: {self.outputs['transcription_data_whisper_filepath']}")
804
-
805
- if os.path.isfile(transcription_cache_filepath_audioshake):
806
- self.logger.debug(f"Found existing AudioShake transcription, reading: {transcription_cache_filepath_audioshake}")
807
- with open(transcription_cache_filepath_audioshake, "r") as cache_file:
808
- self.outputs["transcription_data_dict_audioshake"] = json.load(cache_file)
809
- self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
810
-
811
- # If we have both cached transcriptions, set primary and return early
812
- if self.outputs["transcription_data_dict_whisper"] and self.outputs["transcription_data_dict_audioshake"]:
813
- self.set_primary_transcription()
814
- return
815
- # If we have Whisper cached and AudioShake isn't available, set primary and return early
816
- elif self.outputs["transcription_data_dict_whisper"] and not self.audioshake_api_token:
817
- self.set_primary_transcription()
818
- return
819
-
820
- # Continue with transcription for any missing data...
821
- audioshake_job_id = None
822
- if self.audioshake_api_token and not self.outputs["transcription_data_dict_audioshake"]:
823
- self.logger.debug(f"Starting AudioShake transcription")
824
- from .audioshake_transcriber import AudioShakeTranscriber
825
-
826
- audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
827
- audioshake_job_id = audioshake.start_transcription(self.audio_filepath)
828
-
829
- # Run Whisper transcription if needed while AudioShake processes
830
- if not self.outputs["transcription_data_dict_whisper"]:
831
- self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
832
- audio = whisper.load_audio(self.audio_filepath)
833
- model = whisper.load_model(self.transcription_model, device="cpu")
834
- whisper_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
835
-
836
- # Remove segments with no words, only music
837
- whisper_data["segments"] = [segment for segment in whisper_data["segments"] if segment["text"].strip() != "Music"]
838
- self.logger.debug(f"Removed 'Music' segments. Remaining segments: {len(whisper_data['segments'])}")
839
-
840
- # Split long segments
841
- self.logger.debug("Starting to split long segments")
842
- whisper_data["segments"] = self.split_long_segments(whisper_data["segments"], max_length=36)
843
- self.logger.debug(f"Finished splitting segments. Total segments after splitting: {len(whisper_data['segments'])}")
844
-
845
- # Store Whisper results
846
- self.outputs["transcription_data_dict_whisper"] = whisper_data
847
- self.outputs["transcription_data_whisper_filepath"] = transcription_cache_filepath_whisper
848
- with open(transcription_cache_filepath_whisper, "w") as cache_file:
849
- json.dump(whisper_data, cache_file, indent=4)
850
-
851
- # Now that Whisper is done, get AudioShake results if available
852
- if audioshake_job_id:
853
- self.logger.debug("Getting AudioShake results")
854
- audioshake_data = audioshake.get_transcription_result(audioshake_job_id)
855
- self.outputs["transcription_data_dict_audioshake"] = audioshake_data
856
- self.outputs["transcription_data_audioshake_filepath"] = transcription_cache_filepath_audioshake
857
- with open(transcription_cache_filepath_audioshake, "w") as cache_file:
858
- json.dump(audioshake_data, cache_file, indent=4)
859
-
860
- # Set the primary transcription source
861
- self.set_primary_transcription()
862
-
863
- # Write the text files
864
- self.write_transcribed_lyrics_plain_text()
865
-
866
- def set_primary_transcription(self):
867
- """Set the primary transcription source (AudioShake if available, otherwise Whisper)"""
868
- if self.outputs["transcription_data_dict_audioshake"]:
869
- self.logger.info("Using AudioShake as primary transcription source")
870
- self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_audioshake"]
871
- self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_audioshake_filepath"]
872
-
873
- # Set the primary text content
874
- if "transcribed_lyrics_text_audioshake" not in self.outputs or not self.outputs["transcribed_lyrics_text_audioshake"]:
875
- self.outputs["transcribed_lyrics_text_audioshake"] = "\n".join(
876
- segment["text"].strip() for segment in self.outputs["transcription_data_dict_audioshake"]["segments"]
877
- )
878
- self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_audioshake"]
879
- self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_audioshake_filepath"]
880
- else:
881
- self.logger.info("Using Whisper as primary transcription source")
882
- self.outputs["transcription_data_dict_primary"] = self.outputs["transcription_data_dict_whisper"]
883
- self.outputs["transcription_data_primary_filepath"] = self.outputs["transcription_data_whisper_filepath"]
884
-
885
- # Set the primary text content
886
- if "transcribed_lyrics_text_whisper" not in self.outputs or not self.outputs["transcribed_lyrics_text_whisper"]:
887
- self.outputs["transcribed_lyrics_text_whisper"] = "\n".join(
888
- segment["text"].strip() for segment in self.outputs["transcription_data_dict_whisper"]["segments"]
889
- )
890
- self.outputs["transcribed_lyrics_text_primary"] = self.outputs["transcribed_lyrics_text_whisper"]
891
- self.outputs["transcribed_lyrics_text_primary_filepath"] = self.outputs["transcribed_lyrics_text_whisper_filepath"]
892
-
893
- def write_processed_lyrics(self, lyrics_file, processed_lyrics_file):
894
- self.logger.info(f"Processing lyrics from {lyrics_file} and writing to {processed_lyrics_file}")
895
-
896
- processor = KaraokeLyricsProcessor(
897
- log_level=self.log_level,
898
- log_formatter=self.log_formatter,
899
- input_filename=lyrics_file,
900
- output_filename=processed_lyrics_file,
901
- max_line_length=36,
902
- )
903
- processor.process()
904
- processor.write_to_output_file()
905
-
906
- self.logger.info(f"Lyrics processing complete, processed lyrics written to: {processed_lyrics_file}")
907
-
908
- def get_cache_filepath(self, extension):
909
- # Instead of using slugify and hash, use the consistent naming pattern
910
- cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
911
- self.logger.debug(f"get_cache_filepath returning cache_filepath: {cache_filepath}")
912
- return cache_filepath
913
-
914
- def get_song_slug(self):
915
- if not self.artist and not self.title:
916
- return "unknown_song_" + self.get_file_hash(self.audio_filepath)
917
-
918
- artist_slug = slugify.slugify(self.artist or "unknown_artist", lowercase=False)
919
- title_slug = slugify.slugify(self.title or "unknown_title", lowercase=False)
920
- return artist_slug + "-" + title_slug
921
-
922
- def get_file_hash(self, filepath):
923
- return hashlib.md5(open(filepath, "rb").read()).hexdigest()
924
-
925
- def create_folders(self):
926
- if self.cache_dir is not None:
927
- os.makedirs(self.cache_dir, exist_ok=True)
928
-
929
- if self.output_dir is not None:
930
- os.makedirs(self.output_dir, exist_ok=True)
931
-
932
- def get_output_filename(self, suffix):
933
- """Generate consistent filename with (Purpose) suffix pattern"""
934
- return f"{self.output_prefix}{suffix}"