karaoke-gen 0.71.27__py3-none-any.whl → 0.71.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,12 @@ import shutil
5
5
  import tempfile
6
6
  from .utils import sanitize_filename
7
7
 
8
+ try:
9
+ import yt_dlp
10
+ YT_DLP_AVAILABLE = True
11
+ except ImportError:
12
+ YT_DLP_AVAILABLE = False
13
+
8
14
 
9
15
  # Placeholder class or functions for file handling
10
16
  class FileHandler:
@@ -71,6 +77,192 @@ class FileHandler:
71
77
 
72
78
  return target_path
73
79
 
80
+ def download_video(self, url, output_filename_no_extension, cookies_str=None):
81
+ """
82
+ Download audio from a URL (YouTube, etc.) using yt-dlp.
83
+
84
+ This method downloads the best quality audio from a URL and saves it
85
+ to the specified output path. It handles YouTube and other video platforms
86
+ supported by yt-dlp.
87
+
88
+ Args:
89
+ url: URL to download from (YouTube, Vimeo, etc.)
90
+ output_filename_no_extension: Output filename without extension
91
+ cookies_str: Optional cookies string for authenticated downloads
92
+
93
+ Returns:
94
+ Path to downloaded audio file, or None if failed
95
+ """
96
+ if not YT_DLP_AVAILABLE:
97
+ self.logger.error("yt-dlp is not installed. Install with: pip install yt-dlp")
98
+ return None
99
+
100
+ self.logger.info(f"Downloading audio from URL: {url}")
101
+
102
+ # Configure yt-dlp options
103
+ ydl_opts = {
104
+ 'format': 'bestaudio/best',
105
+ 'outtmpl': output_filename_no_extension + '.%(ext)s',
106
+ 'postprocessors': [{
107
+ 'key': 'FFmpegExtractAudio',
108
+ 'preferredcodec': 'best',
109
+ 'preferredquality': '0', # Best quality
110
+ }],
111
+ 'quiet': True,
112
+ 'no_warnings': True,
113
+ 'extract_flat': False,
114
+ # Anti-detection options
115
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
116
+ 'retries': 3,
117
+ 'fragment_retries': 3,
118
+ 'http_headers': {
119
+ 'Accept-Language': 'en-US,en;q=0.9',
120
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
121
+ },
122
+ }
123
+
124
+ # Handle cookies if provided - use safe tempfile pattern to avoid leaks
125
+ cookie_file_path = None
126
+ if cookies_str:
127
+ try:
128
+ # Use context manager to safely write cookies file
129
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as cookie_file:
130
+ cookie_file.write(cookies_str)
131
+ cookie_file_path = cookie_file.name
132
+ ydl_opts['cookiefile'] = cookie_file_path
133
+ except Exception as e:
134
+ self.logger.warning(f"Failed to write cookies file: {e}")
135
+ cookie_file_path = None
136
+
137
+ try:
138
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
139
+ # Extract info first to get actual filename
140
+ info = ydl.extract_info(url, download=True)
141
+
142
+ if info is None:
143
+ self.logger.error("Failed to extract info from URL")
144
+ return None
145
+
146
+ # Find the downloaded file
147
+ # The actual filename might differ from template due to post-processing
148
+ downloaded_file = None
149
+
150
+ # Check common extensions
151
+ for ext in ['m4a', 'opus', 'webm', 'mp3', 'flac', 'wav', 'ogg', 'aac']:
152
+ candidate = f"{output_filename_no_extension}.{ext}"
153
+ if os.path.exists(candidate):
154
+ downloaded_file = candidate
155
+ break
156
+
157
+ if downloaded_file is None:
158
+ # Try to find any audio file with matching prefix
159
+ import glob
160
+ matches = glob.glob(f"{output_filename_no_extension}.*")
161
+ audio_extensions = ['.m4a', '.opus', '.webm', '.mp3', '.flac', '.wav', '.ogg', '.aac']
162
+ for match in matches:
163
+ if any(match.endswith(ext) for ext in audio_extensions):
164
+ downloaded_file = match
165
+ break
166
+
167
+ if downloaded_file and os.path.exists(downloaded_file):
168
+ self.logger.info(f"Successfully downloaded: {downloaded_file}")
169
+ return downloaded_file
170
+ else:
171
+ self.logger.error("Downloaded file not found after yt-dlp completed")
172
+ return None
173
+
174
+ except yt_dlp.DownloadError as e:
175
+ self.logger.error(f"yt-dlp download error: {e}")
176
+ return None
177
+ except Exception as e:
178
+ self.logger.error(f"Failed to download from URL: {e}")
179
+ return None
180
+ finally:
181
+ # Clean up cookie file if we created one
182
+ if cookie_file_path is not None:
183
+ try:
184
+ os.unlink(cookie_file_path)
185
+ except Exception:
186
+ pass
187
+
188
+ def extract_metadata_from_url(self, url):
189
+ """
190
+ Extract metadata (artist, title) from a URL without downloading.
191
+
192
+ Uses yt-dlp to fetch video metadata including title, uploader/artist,
193
+ and other information that can be used for the karaoke generation.
194
+
195
+ Args:
196
+ url: URL to extract metadata from
197
+
198
+ Returns:
199
+ Dict with 'artist', 'title', 'duration', and 'raw_info', or None if failed
200
+ """
201
+ if not YT_DLP_AVAILABLE:
202
+ self.logger.error("yt-dlp is not installed. Install with: pip install yt-dlp")
203
+ return None
204
+
205
+ self.logger.info(f"Extracting metadata from URL: {url}")
206
+
207
+ ydl_opts = {
208
+ 'quiet': True,
209
+ 'no_warnings': True,
210
+ 'extract_flat': False,
211
+ 'skip_download': True,
212
+ }
213
+
214
+ try:
215
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
216
+ info = ydl.extract_info(url, download=False)
217
+
218
+ if info is None:
219
+ self.logger.error("Failed to extract metadata from URL")
220
+ return None
221
+
222
+ # Try to extract artist and title from various fields
223
+ raw_title = info.get('title', '')
224
+ uploader = info.get('uploader', '') or info.get('channel', '') or info.get('artist', '')
225
+ duration = info.get('duration', 0)
226
+
227
+ # Attempt to parse "Artist - Title" format from title
228
+ artist = None
229
+ title = raw_title
230
+
231
+ if ' - ' in raw_title:
232
+ parts = raw_title.split(' - ', 1)
233
+ if len(parts) == 2:
234
+ artist = parts[0].strip()
235
+ title = parts[1].strip()
236
+
237
+ # Fall back to uploader as artist if not found in title
238
+ if not artist:
239
+ artist = uploader
240
+
241
+ # Clean up title (remove common suffixes like "(Official Video)")
242
+ title_cleanup_patterns = [
243
+ '(official video)', '(official music video)', '(official audio)',
244
+ '(lyric video)', '(lyrics)', '(visualizer)', '(music video)',
245
+ '[official video]', '[official music video]', '[official audio]',
246
+ '(hd)', '(4k)', '(remastered)', '| official video', '| official audio',
247
+ ]
248
+ title_lower = title.lower()
249
+ for pattern in title_cleanup_patterns:
250
+ if pattern in title_lower:
251
+ idx = title_lower.find(pattern)
252
+ title = title[:idx].strip()
253
+ title_lower = title.lower()
254
+
255
+ return {
256
+ 'artist': artist,
257
+ 'title': title,
258
+ 'duration': duration,
259
+ 'raw_info': info,
260
+ }
261
+
262
+ except Exception as e:
263
+ self.logger.error(f"Failed to extract metadata from URL: {e}")
264
+ return None
265
+
74
266
  def extract_still_image_from_video(self, input_filename, output_filename_no_extension):
75
267
  output_filename = output_filename_no_extension + ".png"
76
268
  self.logger.info(f"Extracting still image from position 30s input media")
@@ -0,0 +1,45 @@
1
+ """
2
+ Instrumental Review Module - Shared core for both local and remote CLI.
3
+
4
+ This module provides audio analysis and editing functionality for instrumental
5
+ selection in karaoke generation. It's designed to be:
6
+ - Pure Python with no cloud dependencies (GCS, etc.)
7
+ - Reusable by both local CLI (karaoke-gen) and remote backend (Cloud Run)
8
+ - Easy to test without mocking cloud services
9
+
10
+ Classes:
11
+ AudioAnalyzer: Analyzes backing vocals audio for audible content
12
+ AudioEditor: Creates custom instrumentals by muting regions
13
+ WaveformGenerator: Generates waveform visualization images
14
+ InstrumentalReviewServer: Local HTTP server for browser-based review
15
+
16
+ Models:
17
+ AnalysisResult: Result of audio analysis
18
+ AudibleSegment: A detected segment of audible content
19
+ MuteRegion: A region to mute in the backing vocals
20
+ RecommendedSelection: Enum of selection recommendations
21
+ """
22
+
23
+ from .models import (
24
+ AnalysisResult,
25
+ AudibleSegment,
26
+ MuteRegion,
27
+ RecommendedSelection,
28
+ )
29
+ from .analyzer import AudioAnalyzer
30
+ from .editor import AudioEditor
31
+ from .waveform import WaveformGenerator
32
+ from .server import InstrumentalReviewServer
33
+
34
+ __all__ = [
35
+ # Models
36
+ "AnalysisResult",
37
+ "AudibleSegment",
38
+ "MuteRegion",
39
+ "RecommendedSelection",
40
+ # Classes
41
+ "AudioAnalyzer",
42
+ "AudioEditor",
43
+ "WaveformGenerator",
44
+ "InstrumentalReviewServer",
45
+ ]
@@ -0,0 +1,408 @@
1
+ """
2
+ Audio analyzer for detecting audible content in backing vocals.
3
+
4
+ This module provides the AudioAnalyzer class which analyzes audio files
5
+ to detect segments of audible content above a silence threshold. It's used
6
+ to help determine whether backing vocals should be included in the final
7
+ karaoke instrumental.
8
+ """
9
+
10
+ import logging
11
+ import math
12
+ from pathlib import Path
13
+ from typing import List, Optional, Tuple
14
+
15
+ from pydub import AudioSegment
16
+
17
+ from .models import AnalysisResult, AudibleSegment, RecommendedSelection
18
+
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class AudioAnalyzer:
24
+ """
25
+ Analyzes audio files for backing vocals content.
26
+
27
+ This class is pure Python with no cloud dependencies. It works with
28
+ local file paths and uses pydub for audio loading and analysis.
29
+
30
+ The analyzer detects segments of audible content (above a silence threshold)
31
+ and provides recommendations for instrumental selection based on the
32
+ analysis results.
33
+
34
+ Attributes:
35
+ silence_threshold_db: Amplitude threshold below which audio is
36
+ considered silent (default: -40.0 dB)
37
+ min_segment_duration_ms: Minimum duration for a segment to be
38
+ considered audible (default: 100ms)
39
+ merge_gap_ms: Maximum gap between segments to merge them
40
+ (default: 500ms)
41
+ window_ms: Analysis window size in milliseconds (default: 50ms)
42
+
43
+ Example:
44
+ >>> analyzer = AudioAnalyzer(silence_threshold_db=-40.0)
45
+ >>> result = analyzer.analyze("/path/to/backing_vocals.flac")
46
+ >>> if result.has_audible_content:
47
+ ... print(f"Found {result.segment_count} audible segments")
48
+ ... for seg in result.audible_segments:
49
+ ... print(f" {seg.start_seconds:.1f}s - {seg.end_seconds:.1f}s")
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ silence_threshold_db: float = -40.0,
55
+ min_segment_duration_ms: int = 100,
56
+ merge_gap_ms: int = 500,
57
+ window_ms: int = 50,
58
+ ):
59
+ """
60
+ Initialize the audio analyzer.
61
+
62
+ Args:
63
+ silence_threshold_db: Amplitude threshold below which audio is
64
+ considered silent. Default is -40.0 dB.
65
+ min_segment_duration_ms: Minimum duration for a segment to be
66
+ reported as audible. Segments shorter than this are ignored.
67
+ Default is 100ms.
68
+ merge_gap_ms: If two audible segments are separated by a gap
69
+ shorter than this, they are merged into one segment.
70
+ Default is 500ms.
71
+ window_ms: Size of the analysis window in milliseconds.
72
+ Smaller windows give more precise timing but slower analysis.
73
+ Default is 50ms.
74
+ """
75
+ self.silence_threshold_db = silence_threshold_db
76
+ self.min_segment_duration_ms = min_segment_duration_ms
77
+ self.merge_gap_ms = merge_gap_ms
78
+ self.window_ms = window_ms
79
+
80
+ def analyze(self, audio_path: str) -> AnalysisResult:
81
+ """
82
+ Analyze an audio file for audible content.
83
+
84
+ This method loads the audio file, calculates amplitude levels across
85
+ the duration, and identifies segments where the amplitude exceeds
86
+ the silence threshold.
87
+
88
+ Args:
89
+ audio_path: Path to the audio file to analyze. Supports formats
90
+ that pydub/ffmpeg can read (FLAC, WAV, MP3, etc.)
91
+
92
+ Returns:
93
+ AnalysisResult containing:
94
+ - has_audible_content: Whether any audible content was found
95
+ - total_duration_seconds: Total duration of the audio
96
+ - audible_segments: List of detected audible segments
97
+ - recommended_selection: Recommendation for which instrumental
98
+ - Various statistics about the audible content
99
+
100
+ Raises:
101
+ FileNotFoundError: If the audio file doesn't exist
102
+ Exception: If the audio file cannot be loaded
103
+ """
104
+ path = Path(audio_path)
105
+ if not path.exists():
106
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
107
+
108
+ logger.info(f"Analyzing audio file: {audio_path}")
109
+
110
+ # Load audio file
111
+ audio = AudioSegment.from_file(audio_path)
112
+ total_duration_ms = len(audio)
113
+ total_duration_seconds = total_duration_ms / 1000.0
114
+
115
+ logger.debug(f"Audio duration: {total_duration_seconds:.2f}s, "
116
+ f"channels: {audio.channels}, "
117
+ f"sample_rate: {audio.frame_rate}")
118
+
119
+ # Convert to mono for consistent analysis
120
+ if audio.channels > 1:
121
+ audio = audio.set_channels(1)
122
+
123
+ # Analyze amplitude in windows
124
+ audible_windows = self._find_audible_windows(audio)
125
+
126
+ # Merge adjacent windows into segments
127
+ raw_segments = self._windows_to_segments(audible_windows, audio)
128
+
129
+ # Merge close segments and filter short ones
130
+ segments = self._merge_and_filter_segments(raw_segments)
131
+
132
+ # Calculate statistics
133
+ total_audible_ms = sum(
134
+ seg.duration_seconds * 1000 for seg in segments
135
+ )
136
+ total_audible_seconds = total_audible_ms / 1000.0
137
+ audible_percentage = (
138
+ (total_audible_seconds / total_duration_seconds * 100)
139
+ if total_duration_seconds > 0 else 0.0
140
+ )
141
+
142
+ has_audible_content = len(segments) > 0
143
+
144
+ # Determine recommendation
145
+ recommended_selection = self._get_recommendation(
146
+ has_audible_content,
147
+ segments,
148
+ audible_percentage
149
+ )
150
+
151
+ logger.info(
152
+ f"Analysis complete: {len(segments)} segments, "
153
+ f"{audible_percentage:.1f}% audible, "
154
+ f"recommendation: {recommended_selection.value}"
155
+ )
156
+
157
+ return AnalysisResult(
158
+ has_audible_content=has_audible_content,
159
+ total_duration_seconds=total_duration_seconds,
160
+ audible_segments=segments,
161
+ recommended_selection=recommended_selection,
162
+ silence_threshold_db=self.silence_threshold_db,
163
+ total_audible_duration_seconds=total_audible_seconds,
164
+ audible_percentage=audible_percentage,
165
+ )
166
+
167
+ def get_amplitude_envelope(
168
+ self,
169
+ audio_path: str,
170
+ window_ms: int = 100,
171
+ normalize: bool = True,
172
+ ) -> List[float]:
173
+ """
174
+ Get the amplitude envelope for waveform visualization.
175
+
176
+ This method returns a list of amplitude values suitable for
177
+ rendering a waveform display. Each value represents the RMS
178
+ amplitude of a window of audio.
179
+
180
+ Args:
181
+ audio_path: Path to the audio file
182
+ window_ms: Size of each window in milliseconds. Smaller values
183
+ give more detail but larger data. Default is 100ms.
184
+ normalize: If True, normalize amplitudes to 0.0-1.0 range.
185
+ Default is True.
186
+
187
+ Returns:
188
+ List of amplitude values (floats). If normalize=True, values
189
+ are in the range [0.0, 1.0]. Otherwise, values are in dBFS.
190
+ """
191
+ path = Path(audio_path)
192
+ if not path.exists():
193
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
194
+
195
+ audio = AudioSegment.from_file(audio_path)
196
+
197
+ # Convert to mono
198
+ if audio.channels > 1:
199
+ audio = audio.set_channels(1)
200
+
201
+ amplitudes = []
202
+ duration_ms = len(audio)
203
+
204
+ for start_ms in range(0, duration_ms, window_ms):
205
+ end_ms = min(start_ms + window_ms, duration_ms)
206
+ window = audio[start_ms:end_ms]
207
+
208
+ # Get RMS amplitude in dBFS
209
+ if window.rms > 0:
210
+ db = 20 * math.log10(window.rms / window.max_possible_amplitude)
211
+ else:
212
+ db = -100.0 # Effectively silent
213
+
214
+ amplitudes.append(db)
215
+
216
+ if normalize and amplitudes:
217
+ # Normalize to 0.0 - 1.0 range
218
+ # Map from [silence_threshold, 0] to [0, 1]
219
+ min_db = self.silence_threshold_db
220
+ max_db = 0.0
221
+ amplitudes = [
222
+ max(0.0, min(1.0, (db - min_db) / (max_db - min_db)))
223
+ for db in amplitudes
224
+ ]
225
+
226
+ return amplitudes
227
+
228
+ def _find_audible_windows(
229
+ self,
230
+ audio: AudioSegment
231
+ ) -> List[Tuple[int, float, float]]:
232
+ """
233
+ Find windows with amplitude above the silence threshold.
234
+
235
+ Returns a list of tuples: (start_ms, avg_db, peak_db)
236
+ """
237
+ audible_windows = []
238
+ duration_ms = len(audio)
239
+
240
+ for start_ms in range(0, duration_ms, self.window_ms):
241
+ end_ms = min(start_ms + self.window_ms, duration_ms)
242
+ window = audio[start_ms:end_ms]
243
+
244
+ # Calculate RMS amplitude in dB
245
+ if window.rms > 0:
246
+ avg_db = 20 * math.log10(window.rms / window.max_possible_amplitude)
247
+ # Peak is approximated as max sample value
248
+ peak_db = window.dBFS if hasattr(window, 'dBFS') else avg_db
249
+ else:
250
+ avg_db = -100.0
251
+ peak_db = -100.0
252
+
253
+ if avg_db > self.silence_threshold_db:
254
+ audible_windows.append((start_ms, avg_db, peak_db))
255
+
256
+ return audible_windows
257
+
258
+ def _windows_to_segments(
259
+ self,
260
+ audible_windows: List[Tuple[int, float, float]],
261
+ audio: AudioSegment
262
+ ) -> List[AudibleSegment]:
263
+ """
264
+ Convert list of audible windows into contiguous segments.
265
+ """
266
+ if not audible_windows:
267
+ return []
268
+
269
+ segments = []
270
+ segment_start_ms = audible_windows[0][0]
271
+ segment_dbs = [audible_windows[0][1]]
272
+ segment_peaks = [audible_windows[0][2]]
273
+ last_end_ms = audible_windows[0][0] + self.window_ms
274
+
275
+ for i in range(1, len(audible_windows)):
276
+ start_ms, avg_db, peak_db = audible_windows[i]
277
+
278
+ # Check if this window is contiguous with the previous
279
+ gap_ms = start_ms - last_end_ms
280
+
281
+ if gap_ms <= self.window_ms:
282
+ # Extend current segment
283
+ segment_dbs.append(avg_db)
284
+ segment_peaks.append(peak_db)
285
+ last_end_ms = start_ms + self.window_ms
286
+ else:
287
+ # Save current segment and start a new one
288
+ segments.append(self._create_segment(
289
+ segment_start_ms, last_end_ms, segment_dbs, segment_peaks
290
+ ))
291
+
292
+ segment_start_ms = start_ms
293
+ segment_dbs = [avg_db]
294
+ segment_peaks = [peak_db]
295
+ last_end_ms = start_ms + self.window_ms
296
+
297
+ # Don't forget the last segment
298
+ segments.append(self._create_segment(
299
+ segment_start_ms, last_end_ms, segment_dbs, segment_peaks
300
+ ))
301
+
302
+ return segments
303
+
304
+ def _create_segment(
305
+ self,
306
+ start_ms: int,
307
+ end_ms: int,
308
+ dbs: List[float],
309
+ peaks: List[float]
310
+ ) -> AudibleSegment:
311
+ """Create an AudibleSegment from window data."""
312
+ return AudibleSegment(
313
+ start_seconds=start_ms / 1000.0,
314
+ end_seconds=end_ms / 1000.0,
315
+ duration_seconds=(end_ms - start_ms) / 1000.0,
316
+ avg_amplitude_db=sum(dbs) / len(dbs) if dbs else -100.0,
317
+ peak_amplitude_db=max(peaks) if peaks else -100.0,
318
+ )
319
+
320
+ def _merge_and_filter_segments(
321
+ self,
322
+ segments: List[AudibleSegment]
323
+ ) -> List[AudibleSegment]:
324
+ """
325
+ Merge segments that are close together and filter out short ones.
326
+ """
327
+ if not segments:
328
+ return []
329
+
330
+ # Sort by start time
331
+ segments = sorted(segments, key=lambda s: s.start_seconds)
332
+
333
+ # Merge segments with small gaps
334
+ merged = []
335
+ current = segments[0]
336
+
337
+ for next_seg in segments[1:]:
338
+ gap_ms = (next_seg.start_seconds - current.end_seconds) * 1000
339
+
340
+ if gap_ms <= self.merge_gap_ms:
341
+ # Merge segments
342
+ combined_duration = (
343
+ next_seg.end_seconds - current.start_seconds
344
+ )
345
+ # Weight average amplitude by duration
346
+ total_duration = (
347
+ current.duration_seconds + next_seg.duration_seconds
348
+ )
349
+ weighted_avg_db = (
350
+ (current.avg_amplitude_db * current.duration_seconds +
351
+ next_seg.avg_amplitude_db * next_seg.duration_seconds)
352
+ / total_duration
353
+ ) if total_duration > 0 else -100.0
354
+
355
+ current = AudibleSegment(
356
+ start_seconds=current.start_seconds,
357
+ end_seconds=next_seg.end_seconds,
358
+ duration_seconds=combined_duration,
359
+ avg_amplitude_db=weighted_avg_db,
360
+ peak_amplitude_db=max(
361
+ current.peak_amplitude_db,
362
+ next_seg.peak_amplitude_db
363
+ ),
364
+ )
365
+ else:
366
+ merged.append(current)
367
+ current = next_seg
368
+
369
+ merged.append(current)
370
+
371
+ # Filter out segments shorter than minimum duration
372
+ min_duration_seconds = self.min_segment_duration_ms / 1000.0
373
+ filtered = [
374
+ seg for seg in merged
375
+ if seg.duration_seconds >= min_duration_seconds
376
+ ]
377
+
378
+ return filtered
379
+
380
+ def _get_recommendation(
381
+ self,
382
+ has_audible_content: bool,
383
+ segments: List[AudibleSegment],
384
+ audible_percentage: float
385
+ ) -> RecommendedSelection:
386
+ """
387
+ Determine the recommended instrumental selection.
388
+
389
+ Logic:
390
+ - If no audible content: recommend clean instrumental
391
+ - If audible content covers > 20% of the audio: likely has
392
+ meaningful backing vocals, recommend review
393
+ - Otherwise: minimal content, recommend clean
394
+ """
395
+ if not has_audible_content:
396
+ return RecommendedSelection.CLEAN
397
+
398
+ # If there's significant audible content, recommend review
399
+ if audible_percentage > 20.0:
400
+ return RecommendedSelection.REVIEW_NEEDED
401
+
402
+ # If there are loud segments, recommend review
403
+ loud_segments = [seg for seg in segments if seg.is_loud]
404
+ if loud_segments:
405
+ return RecommendedSelection.REVIEW_NEEDED
406
+
407
+ # Minimal content - recommend clean
408
+ return RecommendedSelection.CLEAN