karaoke-gen 0.60.0__tar.gz → 0.61.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (23) hide show
  1. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/PKG-INFO +42 -1
  2. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/README.md +41 -0
  3. karaoke_gen-0.61.0/karaoke_gen/audio_processor.py +719 -0
  4. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/karaoke_gen.py +9 -9
  5. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/pyproject.toml +1 -1
  6. karaoke_gen-0.60.0/karaoke_gen/audio_processor.py +0 -401
  7. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/LICENSE +0 -0
  8. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/__init__.py +0 -0
  9. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/config.py +0 -0
  10. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/file_handler.py +0 -0
  11. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/karaoke_finalise/__init__.py +0 -0
  12. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/karaoke_finalise/karaoke_finalise.py +0 -0
  13. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/lyrics_processor.py +0 -0
  14. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/metadata.py +0 -0
  15. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  16. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  17. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  18. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  19. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  20. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/utils/__init__.py +0 -0
  21. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/utils/bulk_cli.py +0 -0
  22. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/utils/gen_cli.py +0 -0
  23. {karaoke_gen-0.60.0 → karaoke_gen-0.61.0}/karaoke_gen/video_generator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: karaoke-gen
3
- Version: 0.60.0
3
+ Version: 0.61.0
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  Author: Andrew Beveridge
@@ -72,6 +72,47 @@ Karaoke Generator is a comprehensive tool for creating high-quality karaoke vide
72
72
  pip install karaoke-gen
73
73
  ```
74
74
 
75
+ ## Remote Audio Separation 🌐
76
+
77
+ Karaoke Generator now supports remote audio separation using the Audio Separator API. This allows you to offload the compute-intensive audio separation to a remote GPU server while keeping the rest of the workflow local.
78
+
79
+ ### Benefits of Remote Processing
80
+ - **Save Local Resources**: No more laptop CPU/GPU consumption during separation
81
+ - **Faster Processing**: GPU-accelerated separation on dedicated hardware
82
+ - **Cost Effective**: ~$0.019 per separation job on Modal.com (with $30/month free credits)
83
+ - **Multiple Models**: Process with multiple separation models efficiently
84
+
85
+ ### Setup Remote Processing
86
+
87
+ 1. **Deploy Audio Separator API** (using Modal.com):
88
+ ```bash
89
+ pip install modal
90
+ modal setup
91
+ modal deploy audio_separator/remote/deploy_modal.py
92
+ ```
93
+
94
+ 2. **Set Environment Variable**:
95
+ ```bash
96
+ export AUDIO_SEPARATOR_API_URL="https://USERNAME--audio-separator-api.modal.run"
97
+ ```
98
+
99
+ 3. **Run Karaoke Generator Normally**:
100
+ ```bash
101
+ karaoke-gen "Rick Astley" "Never Gonna Give You Up"
102
+ ```
103
+
104
+ The tool will automatically detect the `AUDIO_SEPARATOR_API_URL` environment variable and use remote processing instead of local separation. If the remote API is unavailable, it will gracefully fall back to local processing.
105
+
106
+ ### Remote vs Local Processing
107
+
108
+ | Aspect | Remote Processing | Local Processing |
109
+ |--------|------------------|------------------|
110
+ | **Resource Usage** | Minimal local CPU/GPU | High local CPU/GPU |
111
+ | **Processing Time** | ~2-5 minutes | ~15-45 minutes |
112
+ | **Cost** | ~$0.019 per job | Free (but uses local resources) |
113
+ | **Requirements** | Internet connection | Local GPU recommended |
114
+ | **Setup** | One-time API deployment | Audio separator models download |
115
+
75
116
  ## Quick Start
76
117
 
77
118
  ```bash
@@ -24,6 +24,47 @@ Karaoke Generator is a comprehensive tool for creating high-quality karaoke vide
24
24
  pip install karaoke-gen
25
25
  ```
26
26
 
27
+ ## Remote Audio Separation 🌐
28
+
29
+ Karaoke Generator now supports remote audio separation using the Audio Separator API. This allows you to offload the compute-intensive audio separation to a remote GPU server while keeping the rest of the workflow local.
30
+
31
+ ### Benefits of Remote Processing
32
+ - **Save Local Resources**: No more laptop CPU/GPU consumption during separation
33
+ - **Faster Processing**: GPU-accelerated separation on dedicated hardware
34
+ - **Cost Effective**: ~$0.019 per separation job on Modal.com (with $30/month free credits)
35
+ - **Multiple Models**: Process with multiple separation models efficiently
36
+
37
+ ### Setup Remote Processing
38
+
39
+ 1. **Deploy Audio Separator API** (using Modal.com):
40
+ ```bash
41
+ pip install modal
42
+ modal setup
43
+ modal deploy audio_separator/remote/deploy_modal.py
44
+ ```
45
+
46
+ 2. **Set Environment Variable**:
47
+ ```bash
48
+ export AUDIO_SEPARATOR_API_URL="https://USERNAME--audio-separator-api.modal.run"
49
+ ```
50
+
51
+ 3. **Run Karaoke Generator Normally**:
52
+ ```bash
53
+ karaoke-gen "Rick Astley" "Never Gonna Give You Up"
54
+ ```
55
+
56
+ The tool will automatically detect the `AUDIO_SEPARATOR_API_URL` environment variable and use remote processing instead of local separation. If the remote API is unavailable, it will gracefully fall back to local processing.
57
+
58
+ ### Remote vs Local Processing
59
+
60
+ | Aspect | Remote Processing | Local Processing |
61
+ |--------|------------------|------------------|
62
+ | **Resource Usage** | Minimal local CPU/GPU | High local CPU/GPU |
63
+ | **Processing Time** | ~2-5 minutes | ~15-45 minutes |
64
+ | **Cost** | ~$0.019 per job | Free (but uses local resources) |
65
+ | **Requirements** | Internet connection | Local GPU recommended |
66
+ | **Setup** | One-time API deployment | Audio separator models download |
67
+
27
68
  ## Quick Start
28
69
 
29
70
  ```bash
@@ -0,0 +1,719 @@
1
+ import os
2
+ import sys
3
+ import json
4
+ import logging
5
+ import glob
6
+ import shutil
7
+ import tempfile
8
+ import time
9
+ import fcntl
10
+ import errno
11
+ import psutil
12
+ from datetime import datetime
13
+ from pydub import AudioSegment
14
+
15
+ # Try to import the remote API client if available
16
+ try:
17
+ from audio_separator.remote import AudioSeparatorAPIClient
18
+ REMOTE_API_AVAILABLE = True
19
+ except ImportError:
20
+ REMOTE_API_AVAILABLE = False
21
+ AudioSeparatorAPIClient = None
22
+
23
+
24
+ # Placeholder class or functions for audio processing
25
+ class AudioProcessor:
26
+ def __init__(
27
+ self,
28
+ logger,
29
+ log_level,
30
+ log_formatter,
31
+ model_file_dir,
32
+ lossless_output_format,
33
+ clean_instrumental_model,
34
+ backing_vocals_models,
35
+ other_stems_models,
36
+ ffmpeg_base_command,
37
+ ):
38
+ self.logger = logger
39
+ self.log_level = log_level
40
+ self.log_formatter = log_formatter
41
+ self.model_file_dir = model_file_dir
42
+ self.lossless_output_format = lossless_output_format
43
+ self.clean_instrumental_model = clean_instrumental_model
44
+ self.backing_vocals_models = backing_vocals_models
45
+ self.other_stems_models = other_stems_models
46
+ self.ffmpeg_base_command = ffmpeg_base_command # Needed for combined instrumentals
47
+
48
+ def _file_exists(self, file_path):
49
+ """Check if a file exists and log the result."""
50
+ exists = os.path.isfile(file_path)
51
+ if exists:
52
+ self.logger.info(f"File already exists, skipping creation: {file_path}")
53
+ return exists
54
+
55
+ def separate_audio(self, audio_file, model_name, artist_title, track_output_dir, instrumental_path, vocals_path):
56
+ if audio_file is None or not os.path.isfile(audio_file):
57
+ raise Exception("Error: Invalid audio source provided.")
58
+
59
+ self.logger.debug(f"audio_file is valid file: {audio_file}")
60
+
61
+ self.logger.info(
62
+ f"instantiating Separator with model_file_dir: {self.model_file_dir}, model_filename: {model_name} output_format: {self.lossless_output_format}"
63
+ )
64
+
65
+ from audio_separator.separator import Separator
66
+
67
+ separator = Separator(
68
+ log_level=self.log_level,
69
+ log_formatter=self.log_formatter,
70
+ model_file_dir=self.model_file_dir,
71
+ output_format=self.lossless_output_format,
72
+ )
73
+
74
+ separator.load_model(model_filename=model_name)
75
+ output_files = separator.separate(audio_file)
76
+
77
+ self.logger.debug(f"Separator output files: {output_files}")
78
+
79
+ model_name_no_extension = os.path.splitext(model_name)[0]
80
+
81
+ for file in output_files:
82
+ if "(Vocals)" in file:
83
+ self.logger.info(f"Moving Vocals file {file} to {vocals_path}")
84
+ shutil.move(file, vocals_path)
85
+ elif "(Instrumental)" in file:
86
+ self.logger.info(f"Moving Instrumental file {file} to {instrumental_path}")
87
+ shutil.move(file, instrumental_path)
88
+ elif model_name in file:
89
+ # Example filename 1: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Piano)_htdemucs_6s.flac"
90
+ # Example filename 2: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Guitar)_htdemucs_6s.flac"
91
+ # The stem name in these examples would be "Piano" or "Guitar"
92
+ # Extract stem_name from the filename
93
+ stem_name = file.split(f"_{model_name}")[0].split("_")[-1]
94
+ stem_name = stem_name.strip("()") # Remove parentheses if present
95
+
96
+ other_stem_path = os.path.join(track_output_dir, f"{artist_title} ({stem_name} {model_name}).{self.lossless_output_format}")
97
+ self.logger.info(f"Moving other stem file {file} to {other_stem_path}")
98
+ shutil.move(file, other_stem_path)
99
+
100
+ elif model_name_no_extension in file:
101
+ # Example filename 1: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Piano)_htdemucs_6s.flac"
102
+ # Example filename 2: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Guitar)_htdemucs_6s.flac"
103
+ # The stem name in these examples would be "Piano" or "Guitar"
104
+ # Extract stem_name from the filename
105
+ stem_name = file.split(f"_{model_name_no_extension}")[0].split("_")[-1]
106
+ stem_name = stem_name.strip("()") # Remove parentheses if present
107
+
108
+ other_stem_path = os.path.join(track_output_dir, f"{artist_title} ({stem_name} {model_name}).{self.lossless_output_format}")
109
+ self.logger.info(f"Moving other stem file {file} to {other_stem_path}")
110
+ shutil.move(file, other_stem_path)
111
+
112
+ self.logger.info(f"Separation complete! Output file(s): {vocals_path} {instrumental_path}")
113
+
114
+ def process_audio_separation(self, audio_file, artist_title, track_output_dir):
115
+ # Check if we should use remote API
116
+ remote_api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
117
+ if remote_api_url:
118
+ if not REMOTE_API_AVAILABLE:
119
+ self.logger.warning("AUDIO_SEPARATOR_API_URL is set but remote API client is not available. "
120
+ "Please ensure audio-separator is updated to a version that includes remote API support. "
121
+ "Falling back to local processing.")
122
+ else:
123
+ self.logger.info(f"Using remote audio separator API at: {remote_api_url}")
124
+ try:
125
+ return self._process_audio_separation_remote(audio_file, artist_title, track_output_dir, remote_api_url)
126
+ except Exception as e:
127
+ error_str = str(e)
128
+ # Don't fall back for download failures - these indicate API issues that should be fixed
129
+ if ("no files were downloaded" in error_str or
130
+ "failed to produce essential" in error_str):
131
+ self.logger.error(f"Remote API processing failed with download/file organization issue: {error_str}")
132
+ self.logger.error("This indicates an audio-separator API issue that should be fixed. Not falling back to local processing.")
133
+ raise e
134
+ else:
135
+ # Fall back for other types of errors (network issues, etc.)
136
+ self.logger.error(f"Remote API processing failed: {error_str}")
137
+ self.logger.info("Falling back to local audio separation")
138
+ else:
139
+ self.logger.info("AUDIO_SEPARATOR_API_URL not set, using local audio separation. "
140
+ "Set this environment variable to use remote GPU processing.")
141
+
142
+ from audio_separator.separator import Separator
143
+
144
+ self.logger.info(f"Starting local audio separation process for {artist_title}")
145
+
146
+ # Define lock file path in system temp directory
147
+ lock_file_path = os.path.join(tempfile.gettempdir(), "audio_separator.lock")
148
+
149
+ # Try to acquire lock
150
+ while True:
151
+ try:
152
+ # First check if there's a stale lock
153
+ if os.path.exists(lock_file_path):
154
+ try:
155
+ with open(lock_file_path, "r") as f:
156
+ lock_data = json.load(f)
157
+ pid = lock_data.get("pid")
158
+ start_time = datetime.fromisoformat(lock_data.get("start_time"))
159
+ running_track = lock_data.get("track")
160
+
161
+ # Check if process is still running
162
+ if not psutil.pid_exists(pid):
163
+ self.logger.warning(f"Found stale lock from dead process {pid}, removing...")
164
+ os.remove(lock_file_path)
165
+ else:
166
+ # Calculate runtime
167
+ runtime = datetime.now() - start_time
168
+ runtime_mins = runtime.total_seconds() / 60
169
+
170
+ # Get process command line
171
+ try:
172
+ proc = psutil.Process(pid)
173
+ cmdline_args = proc.cmdline()
174
+ # Handle potential bytes in cmdline args (cross-platform compatibility)
175
+ cmd = " ".join(arg.decode('utf-8', errors='replace') if isinstance(arg, bytes) else arg for arg in cmdline_args)
176
+ except (psutil.AccessDenied, psutil.NoSuchProcess):
177
+ cmd = "<command unavailable>"
178
+
179
+ self.logger.info(
180
+ f"Waiting for other audio separation process to complete before starting separation for {artist_title}...\n"
181
+ f"Currently running process details:\n"
182
+ f" Track: {running_track}\n"
183
+ f" PID: {pid}\n"
184
+ f" Running time: {runtime_mins:.1f} minutes\n"
185
+ f" Command: {cmd}\n"
186
+ f"To force clear the lock and kill the process, run:\n"
187
+ f" kill {pid} && rm {lock_file_path}"
188
+ )
189
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
190
+ self.logger.warning(f"Found invalid lock file, removing: {e}")
191
+ os.remove(lock_file_path)
192
+
193
+ # Try to acquire lock
194
+ lock_file = open(lock_file_path, "w")
195
+ fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
196
+
197
+ # Write metadata to lock file
198
+ lock_data = {
199
+ "pid": os.getpid(),
200
+ "start_time": datetime.now().isoformat(),
201
+ "track": f"{artist_title}",
202
+ }
203
+ json.dump(lock_data, lock_file)
204
+ lock_file.flush()
205
+ break
206
+
207
+ except IOError as e:
208
+ if e.errno != errno.EAGAIN:
209
+ raise
210
+ # Lock is held by another process
211
+ time.sleep(30) # Wait 30 seconds before trying again
212
+ continue
213
+
214
+ try:
215
+ separator = Separator(
216
+ log_level=self.log_level,
217
+ log_formatter=self.log_formatter,
218
+ model_file_dir=self.model_file_dir,
219
+ output_format=self.lossless_output_format,
220
+ )
221
+
222
+ stems_dir = self._create_stems_directory(track_output_dir)
223
+ result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}, "combined_instrumentals": {}}
224
+
225
+ if os.environ.get("KARAOKE_GEN_SKIP_AUDIO_SEPARATION"):
226
+ return result
227
+
228
+ result["clean_instrumental"] = self._separate_clean_instrumental(
229
+ separator, audio_file, artist_title, track_output_dir, stems_dir
230
+ )
231
+ result["other_stems"] = self._separate_other_stems(separator, audio_file, artist_title, stems_dir)
232
+ result["backing_vocals"] = self._separate_backing_vocals(
233
+ separator, result["clean_instrumental"]["vocals"], artist_title, stems_dir
234
+ )
235
+ result["combined_instrumentals"] = self._generate_combined_instrumentals(
236
+ result["clean_instrumental"]["instrumental"], result["backing_vocals"], artist_title, track_output_dir
237
+ )
238
+ self._normalize_audio_files(result, artist_title, track_output_dir)
239
+
240
+ # Create Audacity LOF file
241
+ if result["backing_vocals"]:
242
+ lof_path = os.path.join(stems_dir, f"{artist_title} (Audacity).lof")
243
+ first_model = list(result["backing_vocals"].keys())[0]
244
+
245
+ files_to_include = [
246
+ audio_file, # Original audio
247
+ result["clean_instrumental"]["instrumental"], # Clean instrumental
248
+ result["backing_vocals"][first_model]["backing_vocals"], # Backing vocals
249
+ result["combined_instrumentals"][first_model], # Combined instrumental+BV
250
+ ]
251
+
252
+ # Convert to absolute paths
253
+ files_to_include = [os.path.abspath(f) for f in files_to_include]
254
+
255
+ with open(lof_path, "w") as lof:
256
+ for file_path in files_to_include:
257
+ lof.write(f'file "{file_path}"\n')
258
+
259
+ self.logger.info(f"Created Audacity LOF file: {lof_path}")
260
+ result["audacity_lof"] = lof_path
261
+
262
+ # Launch Audacity with multiple tracks
263
+ if sys.platform == "darwin": # Check if we're on macOS
264
+ if lof_path and os.path.exists(lof_path):
265
+ self.logger.info(f"Launching Audacity with LOF file: {lof_path}")
266
+ os.system(f'open -a Audacity "{lof_path}"')
267
+ else:
268
+ self.logger.debug("Audacity LOF file not available or not found")
269
+
270
+ self.logger.info("Audio separation, combination, and normalization process completed")
271
+ return result
272
+ finally:
273
+ # Release lock
274
+ fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
275
+ lock_file.close()
276
+ try:
277
+ os.remove(lock_file_path)
278
+ except OSError:
279
+ pass
280
+
281
+ def _process_audio_separation_remote(self, audio_file, artist_title, track_output_dir, remote_api_url):
282
+ """Process audio separation using remote API with proper two-stage workflow."""
283
+ self.logger.info(f"Starting remote audio separation process for {artist_title}")
284
+
285
+ # Initialize the API client
286
+ api_client = AudioSeparatorAPIClient(remote_api_url, self.logger)
287
+
288
+ stems_dir = self._create_stems_directory(track_output_dir)
289
+ result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}, "combined_instrumentals": {}}
290
+
291
+ if os.environ.get("KARAOKE_GEN_SKIP_AUDIO_SEPARATION"):
292
+ return result
293
+
294
+ try:
295
+ # Stage 1: Process original song with clean instrumental model + other stems models
296
+ stage1_models = []
297
+ if self.clean_instrumental_model:
298
+ stage1_models.append(self.clean_instrumental_model)
299
+ stage1_models.extend(self.other_stems_models)
300
+
301
+ self.logger.info(f"Stage 1: Submitting audio separation job with models: {stage1_models}")
302
+
303
+ # Submit the first stage job
304
+ stage1_result = api_client.separate_audio_and_wait(
305
+ audio_file,
306
+ models=stage1_models,
307
+ timeout=1800, # 30 minutes timeout
308
+ poll_interval=15, # Check every 15 seconds
309
+ download=True,
310
+ output_dir=stems_dir,
311
+ output_format=self.lossless_output_format.lower()
312
+ )
313
+
314
+ if stage1_result["status"] != "completed":
315
+ raise Exception(f"Stage 1 remote audio separation failed: {stage1_result.get('error', 'Unknown error')}")
316
+
317
+ self.logger.info(f"Stage 1 completed. Downloaded {len(stage1_result['downloaded_files'])} files")
318
+
319
+ # Check if we actually got the expected files for Stage 1
320
+ if len(stage1_result["downloaded_files"]) == 0:
321
+ error_msg = ("Stage 1 audio separation completed successfully but no files were downloaded. "
322
+ "This indicates a filename encoding or API issue in the audio-separator remote service. "
323
+ f"Expected files for models {stage1_models} but got 0.")
324
+ self.logger.error(error_msg)
325
+ raise Exception(error_msg)
326
+
327
+ # Organize the stage 1 results
328
+ result = self._organize_stage1_remote_results(
329
+ stage1_result["downloaded_files"], artist_title, track_output_dir, stems_dir
330
+ )
331
+
332
+ # Validate that we got the essential clean instrumental outputs
333
+ if not result["clean_instrumental"].get("vocals") or not result["clean_instrumental"].get("instrumental"):
334
+ missing = []
335
+ if not result["clean_instrumental"].get("vocals"):
336
+ missing.append("clean vocals")
337
+ if not result["clean_instrumental"].get("instrumental"):
338
+ missing.append("clean instrumental")
339
+ error_msg = (f"Stage 1 completed but failed to produce essential clean instrumental outputs: {', '.join(missing)}. "
340
+ "This may indicate a model naming or file organization issue in the remote API.")
341
+ self.logger.error(error_msg)
342
+ raise Exception(error_msg)
343
+
344
+ # Stage 2: Process clean vocals with backing vocals models (if we have both)
345
+ if result["clean_instrumental"].get("vocals") and self.backing_vocals_models:
346
+ self.logger.info(f"Stage 2: Processing clean vocals for backing vocals separation...")
347
+ vocals_path = result["clean_instrumental"]["vocals"]
348
+
349
+ stage2_result = api_client.separate_audio_and_wait(
350
+ vocals_path,
351
+ models=self.backing_vocals_models,
352
+ timeout=900, # 15 minutes timeout for backing vocals
353
+ poll_interval=10,
354
+ download=True,
355
+ output_dir=stems_dir,
356
+ output_format=self.lossless_output_format.lower()
357
+ )
358
+
359
+ if stage2_result["status"] == "completed":
360
+ self.logger.info(f"Stage 2 completed. Downloaded {len(stage2_result['downloaded_files'])} files")
361
+
362
+ # Check if we actually got the expected files
363
+ if len(stage2_result["downloaded_files"]) == 0:
364
+ error_msg = ("Stage 2 backing vocals separation completed successfully but no files were downloaded. "
365
+ "This indicates a filename encoding or API issue in the audio-separator remote service. "
366
+ "Expected 2 files (lead vocals + backing vocals) but got 0.")
367
+ self.logger.error(error_msg)
368
+ raise Exception(error_msg)
369
+
370
+ # Organize the stage 2 results (backing vocals)
371
+ backing_vocals_result = self._organize_stage2_remote_results(
372
+ stage2_result["downloaded_files"], artist_title, stems_dir
373
+ )
374
+ result["backing_vocals"] = backing_vocals_result
375
+ else:
376
+ error_msg = f"Stage 2 backing vocals separation failed: {stage2_result.get('error', 'Unknown error')}"
377
+ self.logger.error(error_msg)
378
+ raise Exception(error_msg)
379
+ else:
380
+ result["backing_vocals"] = {}
381
+
382
+ # Generate combined instrumentals
383
+ if result["clean_instrumental"].get("instrumental") and result["backing_vocals"]:
384
+ result["combined_instrumentals"] = self._generate_combined_instrumentals(
385
+ result["clean_instrumental"]["instrumental"], result["backing_vocals"], artist_title, track_output_dir
386
+ )
387
+ else:
388
+ result["combined_instrumentals"] = {}
389
+
390
+ # Normalize audio files
391
+ self._normalize_audio_files(result, artist_title, track_output_dir)
392
+
393
+ # Create Audacity LOF file
394
+ if result["backing_vocals"]:
395
+ lof_path = os.path.join(stems_dir, f"{artist_title} (Audacity).lof")
396
+ first_model = list(result["backing_vocals"].keys())[0]
397
+
398
+ files_to_include = [
399
+ audio_file, # Original audio
400
+ result["clean_instrumental"]["instrumental"], # Clean instrumental
401
+ result["backing_vocals"][first_model]["backing_vocals"], # Backing vocals
402
+ result["combined_instrumentals"][first_model], # Combined instrumental+BV
403
+ ]
404
+
405
+ # Convert to absolute paths
406
+ files_to_include = [os.path.abspath(f) for f in files_to_include]
407
+
408
+ with open(lof_path, "w") as lof:
409
+ for file_path in files_to_include:
410
+ lof.write(f'file "{file_path}"\n')
411
+
412
+ self.logger.info(f"Created Audacity LOF file: {lof_path}")
413
+ result["audacity_lof"] = lof_path
414
+
415
+ # Launch Audacity with multiple tracks
416
+ if sys.platform == "darwin": # Check if we're on macOS
417
+ if lof_path and os.path.exists(lof_path):
418
+ self.logger.info(f"Launching Audacity with LOF file: {lof_path}")
419
+ os.system(f'open -a Audacity "{lof_path}"')
420
+ else:
421
+ self.logger.debug("Audacity LOF file not available or not found")
422
+
423
+ self.logger.info("Remote audio separation, combination, and normalization process completed")
424
+ return result
425
+
426
+ except Exception as e:
427
+ self.logger.error(f"Error during remote audio separation: {str(e)}")
428
+ raise e
429
+
430
+ def _organize_stage1_remote_results(self, downloaded_files, artist_title, track_output_dir, stems_dir):
431
+ """Organize stage 1 separation results (clean instrumental + other stems)."""
432
+ result = {"clean_instrumental": {}, "other_stems": {}}
433
+
434
+ for file_path in downloaded_files:
435
+ filename = os.path.basename(file_path)
436
+ self.logger.debug(f"Stage 1 - Processing downloaded file: {filename}")
437
+
438
+ # Determine which model and stem type this file represents
439
+ model_name = None
440
+ stem_type = None
441
+
442
+ # Extract model name and stem type from filename
443
+ # Expected format: "audio_(StemType)_modelname.ext"
444
+ if "_(Vocals)_" in filename:
445
+ stem_type = "Vocals"
446
+ model_name = filename.split("_(Vocals)_")[1].split(".")[0]
447
+ elif "_(Instrumental)_" in filename:
448
+ stem_type = "Instrumental"
449
+ model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
450
+ elif "_(Drums)_" in filename:
451
+ stem_type = "Drums"
452
+ model_name = filename.split("_(Drums)_")[1].split(".")[0]
453
+ elif "_(Bass)_" in filename:
454
+ stem_type = "Bass"
455
+ model_name = filename.split("_(Bass)_")[1].split(".")[0]
456
+ elif "_(Other)_" in filename:
457
+ stem_type = "Other"
458
+ model_name = filename.split("_(Other)_")[1].split(".")[0]
459
+ elif "_(Guitar)_" in filename:
460
+ stem_type = "Guitar"
461
+ model_name = filename.split("_(Guitar)_")[1].split(".")[0]
462
+ elif "_(Piano)_" in filename:
463
+ stem_type = "Piano"
464
+ model_name = filename.split("_(Piano)_")[1].split(".")[0]
465
+ else:
466
+ # Try to extract stem type from parentheses
467
+ import re
468
+ match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
469
+ if match:
470
+ stem_type = match.group(1)
471
+ model_name = match.group(2)
472
+ else:
473
+ self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
474
+ continue
475
+
476
+ # Check if this model name matches the clean instrumental model
477
+ is_clean_instrumental_model = (
478
+ model_name == self.clean_instrumental_model or
479
+ self.clean_instrumental_model.startswith(model_name) or
480
+ model_name.startswith(self.clean_instrumental_model.split('.')[0])
481
+ )
482
+
483
+ if is_clean_instrumental_model:
484
+ if stem_type == "Vocals":
485
+ target_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")
486
+ shutil.move(file_path, target_path)
487
+ result["clean_instrumental"]["vocals"] = target_path
488
+ elif stem_type == "Instrumental":
489
+ target_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}")
490
+ shutil.move(file_path, target_path)
491
+ result["clean_instrumental"]["instrumental"] = target_path
492
+
493
+ elif any(model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]) for os_model in self.other_stems_models):
494
+ # Find the matching other stems model
495
+ matching_os_model = None
496
+ for os_model in self.other_stems_models:
497
+ if model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]):
498
+ matching_os_model = os_model
499
+ break
500
+
501
+ if matching_os_model:
502
+ if matching_os_model not in result["other_stems"]:
503
+ result["other_stems"][matching_os_model] = {}
504
+
505
+ target_path = os.path.join(stems_dir, f"{artist_title} ({stem_type} {matching_os_model}).{self.lossless_output_format}")
506
+ shutil.move(file_path, target_path)
507
+ result["other_stems"][matching_os_model][stem_type] = target_path
508
+
509
+ return result
510
+
511
+ def _organize_stage2_remote_results(self, downloaded_files, artist_title, stems_dir):
512
+ """Organize stage 2 separation results (backing vocals)."""
513
+ result = {}
514
+
515
+ for file_path in downloaded_files:
516
+ filename = os.path.basename(file_path)
517
+ self.logger.debug(f"Stage 2 - Processing downloaded file: {filename}")
518
+
519
+ # Determine which model and stem type this file represents
520
+ model_name = None
521
+ stem_type = None
522
+
523
+ # Extract model name and stem type from filename
524
+ if "_(Vocals)_" in filename:
525
+ stem_type = "Vocals"
526
+ model_name = filename.split("_(Vocals)_")[1].split(".")[0]
527
+ elif "_(Instrumental)_" in filename:
528
+ stem_type = "Instrumental"
529
+ model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
530
+ else:
531
+ # Try to extract stem type from parentheses
532
+ import re
533
+ match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
534
+ if match:
535
+ stem_type = match.group(1)
536
+ model_name = match.group(2)
537
+ else:
538
+ self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
539
+ continue
540
+
541
+ # Find the matching backing vocals model
542
+ matching_bv_model = None
543
+ for bv_model in self.backing_vocals_models:
544
+ if model_name == bv_model or bv_model.startswith(model_name) or model_name.startswith(bv_model.split('.')[0]):
545
+ matching_bv_model = bv_model
546
+ break
547
+
548
+ if matching_bv_model:
549
+ if matching_bv_model not in result:
550
+ result[matching_bv_model] = {}
551
+
552
+ if stem_type == "Vocals":
553
+ target_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {matching_bv_model}).{self.lossless_output_format}")
554
+ shutil.move(file_path, target_path)
555
+ result[matching_bv_model]["lead_vocals"] = target_path
556
+ elif stem_type == "Instrumental":
557
+ target_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {matching_bv_model}).{self.lossless_output_format}")
558
+ shutil.move(file_path, target_path)
559
+ result[matching_bv_model]["backing_vocals"] = target_path
560
+
561
+ return result
562
+
563
+ def _create_stems_directory(self, track_output_dir):
564
+ stems_dir = os.path.join(track_output_dir, "stems")
565
+ os.makedirs(stems_dir, exist_ok=True)
566
+ self.logger.info(f"Created stems directory: {stems_dir}")
567
+ return stems_dir
568
+
569
+ def _separate_clean_instrumental(self, separator, audio_file, artist_title, track_output_dir, stems_dir):
570
+ self.logger.info(f"Separating using clean instrumental model: {self.clean_instrumental_model}")
571
+ instrumental_path = os.path.join(
572
+ track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}"
573
+ )
574
+ vocals_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")
575
+
576
+ result = {}
577
+ if not self._file_exists(instrumental_path) or not self._file_exists(vocals_path):
578
+ separator.load_model(model_filename=self.clean_instrumental_model)
579
+ clean_output_files = separator.separate(audio_file)
580
+
581
+ for file in clean_output_files:
582
+ if "(Vocals)" in file and not self._file_exists(vocals_path):
583
+ shutil.move(file, vocals_path)
584
+ result["vocals"] = vocals_path
585
+ elif "(Instrumental)" in file and not self._file_exists(instrumental_path):
586
+ shutil.move(file, instrumental_path)
587
+ result["instrumental"] = instrumental_path
588
+ else:
589
+ result["vocals"] = vocals_path
590
+ result["instrumental"] = instrumental_path
591
+
592
+ return result
593
+
594
+ def _separate_other_stems(self, separator, audio_file, artist_title, stems_dir):
595
+ self.logger.info(f"Separating using other stems models: {self.other_stems_models}")
596
+ result = {}
597
+ for model in self.other_stems_models:
598
+ self.logger.info(f"Processing with model: {model}")
599
+ result[model] = {}
600
+
601
+ # Check if any stem files for this model already exist
602
+ existing_stems = glob.glob(os.path.join(stems_dir, f"{artist_title} (*{model}).{self.lossless_output_format}"))
603
+
604
+ if existing_stems:
605
+ self.logger.info(f"Found existing stem files for model {model}, skipping separation")
606
+ for stem_file in existing_stems:
607
+ stem_name = os.path.basename(stem_file).split("(")[1].split(")")[0].strip()
608
+ result[model][stem_name] = stem_file
609
+ else:
610
+ separator.load_model(model_filename=model)
611
+ other_stems_output = separator.separate(audio_file)
612
+
613
+ for file in other_stems_output:
614
+ file_name = os.path.basename(file)
615
+ stem_name = file_name[file_name.rfind("_(") + 2 : file_name.rfind(")_")]
616
+ new_filename = f"{artist_title} ({stem_name} {model}).{self.lossless_output_format}"
617
+ other_stem_path = os.path.join(stems_dir, new_filename)
618
+ if not self._file_exists(other_stem_path):
619
+ shutil.move(file, other_stem_path)
620
+ result[model][stem_name] = other_stem_path
621
+
622
+ return result
623
+
624
+ def _separate_backing_vocals(self, separator, vocals_path, artist_title, stems_dir):
625
+ self.logger.info(f"Separating clean vocals using backing vocals models: {self.backing_vocals_models}")
626
+ result = {}
627
+ for model in self.backing_vocals_models:
628
+ self.logger.info(f"Processing with model: {model}")
629
+ result[model] = {}
630
+ lead_vocals_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {model}).{self.lossless_output_format}")
631
+ backing_vocals_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {model}).{self.lossless_output_format}")
632
+
633
+ if not self._file_exists(lead_vocals_path) or not self._file_exists(backing_vocals_path):
634
+ separator.load_model(model_filename=model)
635
+ backing_vocals_output = separator.separate(vocals_path)
636
+
637
+ for file in backing_vocals_output:
638
+ if "(Vocals)" in file and not self._file_exists(lead_vocals_path):
639
+ shutil.move(file, lead_vocals_path)
640
+ result[model]["lead_vocals"] = lead_vocals_path
641
+ elif "(Instrumental)" in file and not self._file_exists(backing_vocals_path):
642
+ shutil.move(file, backing_vocals_path)
643
+ result[model]["backing_vocals"] = backing_vocals_path
644
+ else:
645
+ result[model]["lead_vocals"] = lead_vocals_path
646
+ result[model]["backing_vocals"] = backing_vocals_path
647
+ return result
648
+
649
+ def _generate_combined_instrumentals(self, instrumental_path, backing_vocals_result, artist_title, track_output_dir):
650
+ self.logger.info("Generating combined instrumental tracks with backing vocals")
651
+ result = {}
652
+ for model, paths in backing_vocals_result.items():
653
+ backing_vocals_path = paths["backing_vocals"]
654
+ combined_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental +BV {model}).{self.lossless_output_format}")
655
+
656
+ if not self._file_exists(combined_path):
657
+ ffmpeg_command = (
658
+ f'{self.ffmpeg_base_command} -i "{instrumental_path}" -i "{backing_vocals_path}" '
659
+ f'-filter_complex "[0:a][1:a]amix=inputs=2:duration=longest:weights=1 1" '
660
+ f'-c:a {self.lossless_output_format.lower()} "{combined_path}"'
661
+ )
662
+
663
+ self.logger.debug(f"Running command: {ffmpeg_command}")
664
+ os.system(ffmpeg_command)
665
+
666
+ result[model] = combined_path
667
+ return result
668
+
669
+ def _normalize_audio_files(self, separation_result, artist_title, track_output_dir):
670
+ self.logger.info("Normalizing clean instrumental and combined instrumentals")
671
+
672
+ files_to_normalize = [
673
+ ("clean_instrumental", separation_result["clean_instrumental"]["instrumental"]),
674
+ ] + [("combined_instrumentals", path) for path in separation_result["combined_instrumentals"].values()]
675
+
676
+ for key, file_path in files_to_normalize:
677
+ if self._file_exists(file_path):
678
+ try:
679
+ self._normalize_audio(file_path, file_path) # Normalize in-place
680
+
681
+ # Verify the normalized file
682
+ if os.path.getsize(file_path) > 0:
683
+ self.logger.info(f"Successfully normalized: {file_path}")
684
+ else:
685
+ raise Exception("Normalized file is empty")
686
+
687
+ except Exception as e:
688
+ self.logger.error(f"Error during normalization of {file_path}: {e}")
689
+ self.logger.warning(f"Normalization failed for {file_path}. Original file remains unchanged.")
690
+ else:
691
+ self.logger.warning(f"File not found for normalization: {file_path}")
692
+
693
+ self.logger.info("Audio normalization process completed")
694
+
695
+ def _normalize_audio(self, input_path, output_path, target_level=0.0):
696
+ self.logger.info(f"Normalizing audio file: {input_path}")
697
+
698
+ # Load audio file
699
+ audio = AudioSegment.from_file(input_path, format=self.lossless_output_format.lower())
700
+
701
+ # Calculate the peak amplitude
702
+ peak_amplitude = float(audio.max_dBFS)
703
+
704
+ # Calculate the necessary gain
705
+ gain_db = target_level - peak_amplitude
706
+
707
+ # Apply gain
708
+ normalized_audio = audio.apply_gain(gain_db)
709
+
710
+ # Ensure the audio is not completely silent
711
+ if normalized_audio.rms == 0:
712
+ self.logger.warning(f"Normalized audio is silent for {input_path}. Using original audio.")
713
+ normalized_audio = audio
714
+
715
+ # Export normalized audio, overwriting the original file
716
+ normalized_audio.export(output_path, format=self.lossless_output_format.lower())
717
+
718
+ self.logger.info(f"Normalized audio saved, replacing: {output_path}")
719
+ self.logger.debug(f"Original peak: {peak_amplitude} dB, Applied gain: {gain_db} dB")
@@ -566,16 +566,16 @@ class KaraokePrep:
566
566
  "instrumental": instrumental_path,
567
567
  "vocals": None,
568
568
  }
569
+ elif "separated_audio" not in processed_track or not processed_track["separated_audio"]:
570
+ # Only run separation if it wasn't already done in parallel processing
571
+ self.logger.info(f"Separation was not completed in parallel processing, running separation for track: {self.title} by {self.artist}")
572
+ # Delegate to AudioProcessor (called directly, not in thread here)
573
+ separation_results = self.audio_processor.process_audio_separation(
574
+ audio_file=processed_track["input_audio_wav"], artist_title=artist_title, track_output_dir=track_output_dir
575
+ )
576
+ processed_track["separated_audio"] = separation_results
569
577
  else:
570
- # Only run separation if not skipped
571
- if not self.skip_separation:
572
- self.logger.info(f"Separating audio for track: {self.title} by {self.artist}")
573
- # Delegate to AudioProcessor (called directly, not in thread here)
574
- separation_results = self.audio_processor.process_audio_separation(
575
- audio_file=processed_track["input_audio_wav"], artist_title=artist_title, track_output_dir=track_output_dir
576
- )
577
- processed_track["separated_audio"] = separation_results
578
- # We don't need an else here, if skip_separation is true, separated_audio remains the default empty dict
578
+ self.logger.info("Audio separation was already completed in parallel processing, skipping duplicate separation.")
579
579
 
580
580
  self.logger.info("Script finished, audio downloaded, lyrics fetched and audio separated!")
581
581
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "karaoke-gen"
3
- version = "0.60.0"
3
+ version = "0.61.0"
4
4
  description = "Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens."
5
5
  authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
6
6
  license = "MIT"
@@ -1,401 +0,0 @@
1
- import os
2
- import sys
3
- import json
4
- import logging
5
- import glob
6
- import shutil
7
- import tempfile
8
- import time
9
- import fcntl
10
- import errno
11
- import psutil
12
- from datetime import datetime
13
- from pydub import AudioSegment
14
-
15
-
16
- # Placeholder class or functions for audio processing
17
- class AudioProcessor:
18
- def __init__(
19
- self,
20
- logger,
21
- log_level,
22
- log_formatter,
23
- model_file_dir,
24
- lossless_output_format,
25
- clean_instrumental_model,
26
- backing_vocals_models,
27
- other_stems_models,
28
- ffmpeg_base_command,
29
- ):
30
- self.logger = logger
31
- self.log_level = log_level
32
- self.log_formatter = log_formatter
33
- self.model_file_dir = model_file_dir
34
- self.lossless_output_format = lossless_output_format
35
- self.clean_instrumental_model = clean_instrumental_model
36
- self.backing_vocals_models = backing_vocals_models
37
- self.other_stems_models = other_stems_models
38
- self.ffmpeg_base_command = ffmpeg_base_command # Needed for combined instrumentals
39
-
40
- def _file_exists(self, file_path):
41
- """Check if a file exists and log the result."""
42
- exists = os.path.isfile(file_path)
43
- if exists:
44
- self.logger.info(f"File already exists, skipping creation: {file_path}")
45
- return exists
46
-
47
- def separate_audio(self, audio_file, model_name, artist_title, track_output_dir, instrumental_path, vocals_path):
48
- if audio_file is None or not os.path.isfile(audio_file):
49
- raise Exception("Error: Invalid audio source provided.")
50
-
51
- self.logger.debug(f"audio_file is valid file: {audio_file}")
52
-
53
- self.logger.info(
54
- f"instantiating Separator with model_file_dir: {self.model_file_dir}, model_filename: {model_name} output_format: {self.lossless_output_format}"
55
- )
56
-
57
- from audio_separator.separator import Separator
58
-
59
- separator = Separator(
60
- log_level=self.log_level,
61
- log_formatter=self.log_formatter,
62
- model_file_dir=self.model_file_dir,
63
- output_format=self.lossless_output_format,
64
- )
65
-
66
- separator.load_model(model_filename=model_name)
67
- output_files = separator.separate(audio_file)
68
-
69
- self.logger.debug(f"Separator output files: {output_files}")
70
-
71
- model_name_no_extension = os.path.splitext(model_name)[0]
72
-
73
- for file in output_files:
74
- if "(Vocals)" in file:
75
- self.logger.info(f"Moving Vocals file {file} to {vocals_path}")
76
- shutil.move(file, vocals_path)
77
- elif "(Instrumental)" in file:
78
- self.logger.info(f"Moving Instrumental file {file} to {instrumental_path}")
79
- shutil.move(file, instrumental_path)
80
- elif model_name in file:
81
- # Example filename 1: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Piano)_htdemucs_6s.flac"
82
- # Example filename 2: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Guitar)_htdemucs_6s.flac"
83
- # The stem name in these examples would be "Piano" or "Guitar"
84
- # Extract stem_name from the filename
85
- stem_name = file.split(f"_{model_name}")[0].split("_")[-1]
86
- stem_name = stem_name.strip("()") # Remove parentheses if present
87
-
88
- other_stem_path = os.path.join(track_output_dir, f"{artist_title} ({stem_name} {model_name}).{self.lossless_output_format}")
89
- self.logger.info(f"Moving other stem file {file} to {other_stem_path}")
90
- shutil.move(file, other_stem_path)
91
-
92
- elif model_name_no_extension in file:
93
- # Example filename 1: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Piano)_htdemucs_6s.flac"
94
- # Example filename 2: "Freddie Jackson - All I'll Ever Ask (feat. Najee) (Local)_(Guitar)_htdemucs_6s.flac"
95
- # The stem name in these examples would be "Piano" or "Guitar"
96
- # Extract stem_name from the filename
97
- stem_name = file.split(f"_{model_name_no_extension}")[0].split("_")[-1]
98
- stem_name = stem_name.strip("()") # Remove parentheses if present
99
-
100
- other_stem_path = os.path.join(track_output_dir, f"{artist_title} ({stem_name} {model_name}).{self.lossless_output_format}")
101
- self.logger.info(f"Moving other stem file {file} to {other_stem_path}")
102
- shutil.move(file, other_stem_path)
103
-
104
- self.logger.info(f"Separation complete! Output file(s): {vocals_path} {instrumental_path}")
105
-
106
- def process_audio_separation(self, audio_file, artist_title, track_output_dir):
107
- from audio_separator.separator import Separator
108
-
109
- self.logger.info(f"Starting audio separation process for {artist_title}")
110
-
111
- # Define lock file path in system temp directory
112
- lock_file_path = os.path.join(tempfile.gettempdir(), "audio_separator.lock")
113
-
114
- # Try to acquire lock
115
- while True:
116
- try:
117
- # First check if there's a stale lock
118
- if os.path.exists(lock_file_path):
119
- try:
120
- with open(lock_file_path, "r") as f:
121
- lock_data = json.load(f)
122
- pid = lock_data.get("pid")
123
- start_time = datetime.fromisoformat(lock_data.get("start_time"))
124
- running_track = lock_data.get("track")
125
-
126
- # Check if process is still running
127
- if not psutil.pid_exists(pid):
128
- self.logger.warning(f"Found stale lock from dead process {pid}, removing...")
129
- os.remove(lock_file_path)
130
- else:
131
- # Calculate runtime
132
- runtime = datetime.now() - start_time
133
- runtime_mins = runtime.total_seconds() / 60
134
-
135
- # Get process command line
136
- try:
137
- proc = psutil.Process(pid)
138
- cmdline_args = proc.cmdline()
139
- # Handle potential bytes in cmdline args (cross-platform compatibility)
140
- cmd = " ".join(arg.decode('utf-8', errors='replace') if isinstance(arg, bytes) else arg for arg in cmdline_args)
141
- except (psutil.AccessDenied, psutil.NoSuchProcess):
142
- cmd = "<command unavailable>"
143
-
144
- self.logger.info(
145
- f"Waiting for other audio separation process to complete before starting separation for {artist_title}...\n"
146
- f"Currently running process details:\n"
147
- f" Track: {running_track}\n"
148
- f" PID: {pid}\n"
149
- f" Running time: {runtime_mins:.1f} minutes\n"
150
- f" Command: {cmd}\n"
151
- f"To force clear the lock and kill the process, run:\n"
152
- f" kill {pid} && rm {lock_file_path}"
153
- )
154
- except (json.JSONDecodeError, KeyError, ValueError) as e:
155
- self.logger.warning(f"Found invalid lock file, removing: {e}")
156
- os.remove(lock_file_path)
157
-
158
- # Try to acquire lock
159
- lock_file = open(lock_file_path, "w")
160
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
161
-
162
- # Write metadata to lock file
163
- lock_data = {
164
- "pid": os.getpid(),
165
- "start_time": datetime.now().isoformat(),
166
- "track": f"{artist_title}",
167
- }
168
- json.dump(lock_data, lock_file)
169
- lock_file.flush()
170
- break
171
-
172
- except IOError as e:
173
- if e.errno != errno.EAGAIN:
174
- raise
175
- # Lock is held by another process
176
- time.sleep(30) # Wait 30 seconds before trying again
177
- continue
178
-
179
- try:
180
- separator = Separator(
181
- log_level=self.log_level,
182
- log_formatter=self.log_formatter,
183
- model_file_dir=self.model_file_dir,
184
- output_format=self.lossless_output_format,
185
- )
186
-
187
- stems_dir = self._create_stems_directory(track_output_dir)
188
- result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}, "combined_instrumentals": {}}
189
-
190
- if os.environ.get("KARAOKE_GEN_SKIP_AUDIO_SEPARATION"):
191
- return result
192
-
193
- result["clean_instrumental"] = self._separate_clean_instrumental(
194
- separator, audio_file, artist_title, track_output_dir, stems_dir
195
- )
196
- result["other_stems"] = self._separate_other_stems(separator, audio_file, artist_title, stems_dir)
197
- result["backing_vocals"] = self._separate_backing_vocals(
198
- separator, result["clean_instrumental"]["vocals"], artist_title, stems_dir
199
- )
200
- result["combined_instrumentals"] = self._generate_combined_instrumentals(
201
- result["clean_instrumental"]["instrumental"], result["backing_vocals"], artist_title, track_output_dir
202
- )
203
- self._normalize_audio_files(result, artist_title, track_output_dir)
204
-
205
- # Create Audacity LOF file
206
- lof_path = os.path.join(stems_dir, f"{artist_title} (Audacity).lof")
207
- first_model = list(result["backing_vocals"].keys())[0]
208
-
209
- files_to_include = [
210
- audio_file, # Original audio
211
- result["clean_instrumental"]["instrumental"], # Clean instrumental
212
- result["backing_vocals"][first_model]["backing_vocals"], # Backing vocals
213
- result["combined_instrumentals"][first_model], # Combined instrumental+BV
214
- ]
215
-
216
- # Convert to absolute paths
217
- files_to_include = [os.path.abspath(f) for f in files_to_include]
218
-
219
- with open(lof_path, "w") as lof:
220
- for file_path in files_to_include:
221
- lof.write(f'file "{file_path}"\n')
222
-
223
- self.logger.info(f"Created Audacity LOF file: {lof_path}")
224
- result["audacity_lof"] = lof_path
225
-
226
- # Launch Audacity with multiple tracks
227
- if sys.platform == "darwin": # Check if we're on macOS
228
- if lof_path and os.path.exists(lof_path):
229
- self.logger.info(f"Launching Audacity with LOF file: {lof_path}")
230
- os.system(f'open -a Audacity "{lof_path}"')
231
- else:
232
- self.logger.debug("Audacity LOF file not available or not found")
233
-
234
- self.logger.info("Audio separation, combination, and normalization process completed")
235
- return result
236
- finally:
237
- # Release lock
238
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
239
- lock_file.close()
240
- try:
241
- os.remove(lock_file_path)
242
- except OSError:
243
- pass
244
-
245
- def _create_stems_directory(self, track_output_dir):
246
- stems_dir = os.path.join(track_output_dir, "stems")
247
- os.makedirs(stems_dir, exist_ok=True)
248
- self.logger.info(f"Created stems directory: {stems_dir}")
249
- return stems_dir
250
-
251
- def _separate_clean_instrumental(self, separator, audio_file, artist_title, track_output_dir, stems_dir):
252
- self.logger.info(f"Separating using clean instrumental model: {self.clean_instrumental_model}")
253
- instrumental_path = os.path.join(
254
- track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}"
255
- )
256
- vocals_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")
257
-
258
- result = {}
259
- if not self._file_exists(instrumental_path) or not self._file_exists(vocals_path):
260
- separator.load_model(model_filename=self.clean_instrumental_model)
261
- clean_output_files = separator.separate(audio_file)
262
-
263
- for file in clean_output_files:
264
- if "(Vocals)" in file and not self._file_exists(vocals_path):
265
- shutil.move(file, vocals_path)
266
- result["vocals"] = vocals_path
267
- elif "(Instrumental)" in file and not self._file_exists(instrumental_path):
268
- shutil.move(file, instrumental_path)
269
- result["instrumental"] = instrumental_path
270
- else:
271
- result["vocals"] = vocals_path
272
- result["instrumental"] = instrumental_path
273
-
274
- return result
275
-
276
- def _separate_other_stems(self, separator, audio_file, artist_title, stems_dir):
277
- self.logger.info(f"Separating using other stems models: {self.other_stems_models}")
278
- result = {}
279
- for model in self.other_stems_models:
280
- self.logger.info(f"Processing with model: {model}")
281
- result[model] = {}
282
-
283
- # Check if any stem files for this model already exist
284
- existing_stems = glob.glob(os.path.join(stems_dir, f"{artist_title} (*{model}).{self.lossless_output_format}"))
285
-
286
- if existing_stems:
287
- self.logger.info(f"Found existing stem files for model {model}, skipping separation")
288
- for stem_file in existing_stems:
289
- stem_name = os.path.basename(stem_file).split("(")[1].split(")")[0].strip()
290
- result[model][stem_name] = stem_file
291
- else:
292
- separator.load_model(model_filename=model)
293
- other_stems_output = separator.separate(audio_file)
294
-
295
- for file in other_stems_output:
296
- file_name = os.path.basename(file)
297
- stem_name = file_name[file_name.rfind("_(") + 2 : file_name.rfind(")_")]
298
- new_filename = f"{artist_title} ({stem_name} {model}).{self.lossless_output_format}"
299
- other_stem_path = os.path.join(stems_dir, new_filename)
300
- if not self._file_exists(other_stem_path):
301
- shutil.move(file, other_stem_path)
302
- result[model][stem_name] = other_stem_path
303
-
304
- return result
305
-
306
- def _separate_backing_vocals(self, separator, vocals_path, artist_title, stems_dir):
307
- self.logger.info(f"Separating clean vocals using backing vocals models: {self.backing_vocals_models}")
308
- result = {}
309
- for model in self.backing_vocals_models:
310
- self.logger.info(f"Processing with model: {model}")
311
- result[model] = {}
312
- lead_vocals_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {model}).{self.lossless_output_format}")
313
- backing_vocals_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {model}).{self.lossless_output_format}")
314
-
315
- if not self._file_exists(lead_vocals_path) or not self._file_exists(backing_vocals_path):
316
- separator.load_model(model_filename=model)
317
- backing_vocals_output = separator.separate(vocals_path)
318
-
319
- for file in backing_vocals_output:
320
- if "(Vocals)" in file and not self._file_exists(lead_vocals_path):
321
- shutil.move(file, lead_vocals_path)
322
- result[model]["lead_vocals"] = lead_vocals_path
323
- elif "(Instrumental)" in file and not self._file_exists(backing_vocals_path):
324
- shutil.move(file, backing_vocals_path)
325
- result[model]["backing_vocals"] = backing_vocals_path
326
- else:
327
- result[model]["lead_vocals"] = lead_vocals_path
328
- result[model]["backing_vocals"] = backing_vocals_path
329
- return result
330
-
331
- def _generate_combined_instrumentals(self, instrumental_path, backing_vocals_result, artist_title, track_output_dir):
332
- self.logger.info("Generating combined instrumental tracks with backing vocals")
333
- result = {}
334
- for model, paths in backing_vocals_result.items():
335
- backing_vocals_path = paths["backing_vocals"]
336
- combined_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental +BV {model}).{self.lossless_output_format}")
337
-
338
- if not self._file_exists(combined_path):
339
- ffmpeg_command = (
340
- f'{self.ffmpeg_base_command} -i "{instrumental_path}" -i "{backing_vocals_path}" '
341
- f'-filter_complex "[0:a][1:a]amix=inputs=2:duration=longest:weights=1 1" '
342
- f'-c:a {self.lossless_output_format.lower()} "{combined_path}"'
343
- )
344
-
345
- self.logger.debug(f"Running command: {ffmpeg_command}")
346
- os.system(ffmpeg_command)
347
-
348
- result[model] = combined_path
349
- return result
350
-
351
- def _normalize_audio_files(self, separation_result, artist_title, track_output_dir):
352
- self.logger.info("Normalizing clean instrumental and combined instrumentals")
353
-
354
- files_to_normalize = [
355
- ("clean_instrumental", separation_result["clean_instrumental"]["instrumental"]),
356
- ] + [("combined_instrumentals", path) for path in separation_result["combined_instrumentals"].values()]
357
-
358
- for key, file_path in files_to_normalize:
359
- if self._file_exists(file_path):
360
- try:
361
- self._normalize_audio(file_path, file_path) # Normalize in-place
362
-
363
- # Verify the normalized file
364
- if os.path.getsize(file_path) > 0:
365
- self.logger.info(f"Successfully normalized: {file_path}")
366
- else:
367
- raise Exception("Normalized file is empty")
368
-
369
- except Exception as e:
370
- self.logger.error(f"Error during normalization of {file_path}: {e}")
371
- self.logger.warning(f"Normalization failed for {file_path}. Original file remains unchanged.")
372
- else:
373
- self.logger.warning(f"File not found for normalization: {file_path}")
374
-
375
- self.logger.info("Audio normalization process completed")
376
-
377
- def _normalize_audio(self, input_path, output_path, target_level=0.0):
378
- self.logger.info(f"Normalizing audio file: {input_path}")
379
-
380
- # Load audio file
381
- audio = AudioSegment.from_file(input_path, format=self.lossless_output_format.lower())
382
-
383
- # Calculate the peak amplitude
384
- peak_amplitude = float(audio.max_dBFS)
385
-
386
- # Calculate the necessary gain
387
- gain_db = target_level - peak_amplitude
388
-
389
- # Apply gain
390
- normalized_audio = audio.apply_gain(gain_db)
391
-
392
- # Ensure the audio is not completely silent
393
- if normalized_audio.rms == 0:
394
- self.logger.warning(f"Normalized audio is silent for {input_path}. Using original audio.")
395
- normalized_audio = audio
396
-
397
- # Export normalized audio, overwriting the original file
398
- normalized_audio.export(output_path, format=self.lossless_output_format.lower())
399
-
400
- self.logger.info(f"Normalized audio saved, replacing: {output_path}")
401
- self.logger.debug(f"Original peak: {peak_amplitude} dB, Applied gain: {gain_db} dB")
File without changes