monkeyplug-enhanced 2.3.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: monkeyplug-enhanced
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
5
5
  Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
6
6
  Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
14
14
  Requires-Python: >=3.10
15
15
  Requires-Dist: aiohttp>=3.9.0
16
- Requires-Dist: duckduckgo-search>=6.0.0
16
+ Requires-Dist: ddgs>=6.0.0
17
17
  Requires-Dist: groq>=0.1.0
18
18
  Requires-Dist: mmguero==2.0.3
19
19
  Requires-Dist: mutagen==1.47.0
@@ -22,7 +22,7 @@ Requires-Dist: requests==2.32.5
22
22
  Requires-Dist: shazamio>=0.8.0
23
23
  Requires-Dist: sherpa-onnx>=1.10.0
24
24
  Requires-Dist: soundfile>=0.12.0
25
- Requires-Dist: spotify-scraper>=0.1.0
25
+ Requires-Dist: spotifyscraper>=0.1.0
26
26
  Requires-Dist: tqdm>=4.65.0
27
27
  Description-Content-Type: text/markdown
28
28
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "monkeyplug-enhanced"
7
- version = "2.3.0"
7
+ version = "2.3.1"
8
8
  authors = [
9
9
  { name="Seth Grover", email="mero.mero.guero@gmail.com" },
10
10
  { name="Lincoln Brown", email="link@brown.fm" },
@@ -29,8 +29,8 @@ dependencies = [
29
29
  "tqdm>=4.65.0",
30
30
  "shazamio>=0.8.0",
31
31
  "aiohttp>=3.9.0",
32
- "duckduckgo-search>=6.0.0",
33
- "spotify-scraper>=0.1.0",
32
+ "ddgs>=6.0.0",
33
+ "spotifyscraper>=0.1.0",
34
34
  ]
35
35
 
36
36
  [project.urls]
@@ -123,13 +123,17 @@ UNIFY_ALBUM_PROMPT_DEFAULT = (
123
123
  "titles, and current album names, determine the correct unified album name and "
124
124
  "assign track numbers to each song. Consider the existing album name guesses and "
125
125
  "song titles to infer the real album. Return track numbers in the order the songs "
126
- "should appear on the album."
126
+ "should appear on the album. "
127
+ "CRITICAL: You MUST return a track entry for EVERY input file - do not skip any files. "
128
+ "The tracks array must contain exactly the same number of entries as the input."
127
129
  )
128
130
 
129
131
  UNIFY_ALBUM_RENAME_PROMPT_DEFAULT = (
130
132
  "You are a music file naming expert. Suggest clean, consistent filenames for each track. "
131
133
  "Use format: 'XX - Song Name' where XX is the track number with leading zero if needed. "
132
134
  "Keep only essential information, remove extra words like 'feat', 'explicit', etc. "
135
+ "IMPORTANT: Never use these invalid characters in filenames: < > : \" / \\ | ? * "
136
+ "Replace punctuation marks like ? with nothing or - (hyphen). "
133
137
  "Return the suggested filename WITHOUT the file extension."
134
138
  )
135
139
 
@@ -139,6 +143,7 @@ UNIFY_ALBUM_SCHEMA = {
139
143
  "unified_album": {"type": "string", "description": "The unified album name"},
140
144
  "tracks": {
141
145
  "type": "array",
146
+ "description": "MUST contain one entry for EACH input file - no files may be omitted",
142
147
  "items": {
143
148
  "type": "object",
144
149
  "properties": {
@@ -419,6 +424,131 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
419
424
  if debug:
420
425
  mmguero.eprint(f"AI unification response: {content}")
421
426
 
427
+ # Validate that we got results for all files
428
+ returned_tracks = parsed.get('tracks', [])
429
+ if len(returned_tracks) < len(metadata_list):
430
+ missing = len(metadata_list) - len(returned_tracks)
431
+ mmguero.eprint(f"WARNING: AI only returned {len(returned_tracks)} of {len(metadata_list)} track assignments.")
432
+ mmguero.eprint(f"This is likely due to Groq's output token limit. {missing} files were not processed.")
433
+ if spotify_tracks:
434
+ mmguero.eprint("Try running again without --use-spotify, or process files in smaller batches.")
435
+ else:
436
+ mmguero.eprint("Try processing files in smaller batches (e.g., split into subdirectories).")
437
+
438
+ return parsed
439
+
440
+ except requests.exceptions.Timeout:
441
+ if attempt < max_retries - 1:
442
+ if debug:
443
+ mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
444
+ time.sleep(retry_delay)
445
+ retry_delay *= 2
446
+ else:
447
+ raise Exception("Album unification request timed out")
448
+
449
+ except requests.exceptions.RequestException as e:
450
+ if attempt < max_retries - 1:
451
+ if debug:
452
+ mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
453
+ time.sleep(retry_delay)
454
+ retry_delay *= 2
455
+ else:
456
+ raise Exception(f"Album unification request failed: {e}")
457
+
458
+ raise Exception("Album unification failed after maximum retries")
459
+
460
+
461
+ def _call_groq_api_single_batch(metadata_list, system_prompt, groq_api_key, model, batch_num=1, total_batches=1, debug=False):
462
+ """Make a single API call to Groq for album unification.
463
+
464
+ Args:
465
+ metadata_list: List of file metadata dicts to send in this batch
466
+ system_prompt: System prompt for the AI
467
+ groq_api_key: Groq API key for authentication
468
+ model: AI model name
469
+ batch_num: Current batch number (for debug output)
470
+ total_batches: Total expected batches (for debug output)
471
+ debug: Enable debug output
472
+
473
+ Returns:
474
+ dict: Parsed JSON response with 'unified_album' and 'tracks'
475
+
476
+ Raises:
477
+ Exception: If API call fails after retries
478
+ """
479
+ import requests
480
+ import time
481
+
482
+ # Build input for AI
483
+ input_text = json.dumps(metadata_list, indent=2, ensure_ascii=False)
484
+
485
+ # API call with retry logic (more retries for transient 400 JSON validation errors)
486
+ max_retries = 5
487
+ retry_delay = 1
488
+
489
+ for attempt in range(max_retries):
490
+ try:
491
+ if debug:
492
+ batch_info = f" (batch {batch_num}" + (f"/{total_batches}" if isinstance(total_batches, int) and total_batches > 1 else "") + ")"
493
+ mmguero.eprint(f"Calling Groq API{batch_info} (attempt {attempt + 1}/{max_retries})...")
494
+ mmguero.eprint(f"Sending {len(metadata_list)} files to AI for unification")
495
+
496
+ response = requests.post(
497
+ "https://api.groq.com/openai/v1/chat/completions",
498
+ headers={
499
+ "Authorization": f"Bearer {groq_api_key}",
500
+ "Content-Type": "application/json",
501
+ },
502
+ json={
503
+ "model": model,
504
+ "messages": [
505
+ {"role": "system", "content": system_prompt},
506
+ {"role": "user", "content": input_text},
507
+ ],
508
+ "response_format": {
509
+ "type": "json_schema",
510
+ "json_schema": {
511
+ "name": "album_unification",
512
+ "strict": True,
513
+ "schema": UNIFY_ALBUM_SCHEMA,
514
+ }
515
+ }
516
+ },
517
+ timeout=120,
518
+ )
519
+
520
+ # Handle rate limiting
521
+ if response.status_code == 429:
522
+ if attempt < max_retries - 1:
523
+ if debug:
524
+ mmguero.eprint(f"Rate limited, retrying in {retry_delay}s...")
525
+ time.sleep(retry_delay)
526
+ retry_delay *= 2
527
+ continue
528
+ raise Exception("Album unification rate limit exceeded")
529
+
530
+ if response.status_code == 401:
531
+ raise Exception("Invalid Groq API key for album unification")
532
+
533
+ # Handle 400 errors (usually JSON validation failures from Groq - transient)
534
+ if response.status_code == 400:
535
+ if attempt < max_retries - 1:
536
+ if debug:
537
+ mmguero.eprint(f"JSON validation error (400), retrying in {retry_delay}s...")
538
+ time.sleep(retry_delay)
539
+ retry_delay *= 2
540
+ continue
541
+ raise Exception("Album unification failed due to JSON validation errors")
542
+
543
+ response.raise_for_status()
544
+
545
+ result = response.json()
546
+ content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
547
+ parsed = json.loads(content)
548
+
549
+ if debug:
550
+ mmguero.eprint(f"AI returned {len(parsed.get('tracks', []))} track assignments")
551
+
422
552
  return parsed
423
553
 
424
554
  except requests.exceptions.Timeout:
@@ -442,6 +572,145 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
442
572
  raise Exception("Album unification failed after maximum retries")
443
573
 
444
574
 
575
+ def _unify_album_metadata_with_batching(file_paths, groq_api_key, model, prompt, rename_prompt=None, spotify_tracks=None, batch_size=10, batch_size_spotify=5, debug=False):
576
+ """Use Groq AI to unify album metadata with automatic batching for large file lists.
577
+
578
+ Implements automatic batching to handle Groq's output token limits.
579
+ If a partial response is received, automatically retries with remaining files.
580
+
581
+ Args:
582
+ file_paths: List of audio file paths
583
+ groq_api_key: Groq API key for authentication
584
+ model: AI model name (e.g., "openai/gpt-oss-120b")
585
+ prompt: System prompt for the AI
586
+ rename_prompt: Optional prompt for renaming (if provided, adds suggested_name to response)
587
+ spotify_tracks: Optional list of track names from Spotify for accurate ordering
588
+ debug: Enable debug output
589
+
590
+ Returns:
591
+ Dict with 'unified_album' (str) and 'tracks' (list of dicts with
592
+ 'filename', 'track_number', 'album_name', optionally 'suggested_name')
593
+
594
+ Raises:
595
+ ValueError: If API key is missing
596
+ Exception: If API call fails
597
+ """
598
+ if not groq_api_key:
599
+ raise ValueError("Groq API key required for album unification")
600
+
601
+ # Read metadata from all files
602
+ if debug:
603
+ mmguero.eprint("Reading metadata from files...")
604
+ metadata_list = _read_metadata_from_files(file_paths, debug=debug)
605
+
606
+ # Build system prompt (add rename and Spotify instructions if needed)
607
+ system_prompt = prompt
608
+ if rename_prompt:
609
+ system_prompt = f"{prompt}\n\n{rename_prompt}"
610
+
611
+ unified_album = None # Will be set from first batch and reused
612
+ all_tracks = [] # Accumulates results across batches
613
+ processed_files = set() # Tracks which files we've gotten results for
614
+
615
+ # Proactive batching: limit batch size to avoid overwhelming Groq
616
+ # With Spotify tracks, use smaller batches since the prompt is larger
617
+ max_batch_size = batch_size_spotify if spotify_tracks else batch_size
618
+
619
+ # Start with first batch
620
+ batch_metadata = metadata_list[:max_batch_size]
621
+ remaining_metadata = metadata_list[max_batch_size:]
622
+ batch_num = 0
623
+
624
+ while batch_metadata:
625
+ batch_num += 1
626
+
627
+ # Build system prompt for this batch
628
+ # ALWAYS pass full Spotify list - don't slice it!
629
+ batch_system_prompt = system_prompt
630
+ if spotify_tracks:
631
+ # Add FULL Spotify track listing every time
632
+ tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
633
+ batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
634
+ if debug and batch_num == 1:
635
+ mmguero.eprint(f"Providing full Spotify track list ({len(spotify_tracks)} tracks) - AI will match by name")
636
+
637
+ # Call API with current batch
638
+ try:
639
+ parsed = _call_groq_api_single_batch(
640
+ batch_metadata, batch_system_prompt, groq_api_key, model,
641
+ batch_num, total_batches="?", debug=debug
642
+ )
643
+ except Exception as e:
644
+ # If this isn't the first batch, we have partial results - fail gracefully
645
+ if all_tracks:
646
+ mmguero.eprint(f"Batch {batch_num} failed after {len(all_tracks)} tracks were processed: {e}")
647
+ mmguero.eprint("Proceeding with partial results...")
648
+ break
649
+ raise
650
+
651
+ # On first successful call, capture unified_album name
652
+ if unified_album is None and parsed.get('unified_album'):
653
+ unified_album = parsed['unified_album']
654
+
655
+ # For subsequent batches, override the album name to match first batch
656
+ if unified_album and parsed.get('unified_album') != unified_album:
657
+ parsed['unified_album'] = unified_album
658
+
659
+ # Add returned tracks to our collection
660
+ returned_tracks = parsed.get('tracks', [])
661
+
662
+ # Guard against empty response to avoid infinite loop
663
+ if not returned_tracks:
664
+ mmguero.eprint(f"WARNING: Batch {batch_num} returned no tracks. Stopping to avoid infinite loop.")
665
+ break
666
+
667
+ for track in returned_tracks:
668
+ filename = track['filename']
669
+ if filename not in processed_files:
670
+ all_tracks.append(track)
671
+ processed_files.add(filename)
672
+
673
+ if debug:
674
+ mmguero.eprint(f"Batch {batch_num} complete: {len(returned_tracks)} tracks returned, {len(all_tracks)} total processed")
675
+
676
+ # Determine what's missing from this batch
677
+ returned_filenames = {t['filename'] for t in returned_tracks}
678
+ missing_metadata = [
679
+ m for m in batch_metadata
680
+ if m['filename'] not in returned_filenames and m['filename'] not in processed_files
681
+ ]
682
+
683
+ if not missing_metadata and not remaining_metadata:
684
+ # All files processed!
685
+ if debug:
686
+ mmguero.eprint(f"All {len(all_tracks)} files processed successfully!")
687
+ break
688
+
689
+ # Prepare next batch: combine missing files + next chunk from remaining
690
+ next_batch = missing_metadata if missing_metadata else []
691
+ if remaining_metadata:
692
+ take = min(max_batch_size - len(next_batch), len(remaining_metadata))
693
+ next_batch.extend(remaining_metadata[:take])
694
+ remaining_metadata = remaining_metadata[take:]
695
+
696
+ if not next_batch:
697
+ break
698
+
699
+ if debug:
700
+ if missing_metadata:
701
+ new_files = len(next_batch) - len(missing_metadata)
702
+ mmguero.eprint(f"Partial response: {len(missing_metadata)} files from this batch need retry. Next batch: {len(missing_metadata)} retries + {new_files} new files = {len(next_batch)} total")
703
+ else:
704
+ mmguero.eprint(f"Starting batch {batch_num + 1} with {len(next_batch)} files...")
705
+
706
+ batch_metadata = next_batch
707
+
708
+ return {
709
+ 'unified_album': unified_album or '',
710
+ 'tracks': all_tracks
711
+ }
712
+
713
+
445
714
  ###################################################################################################
446
715
  # Apply unified metadata to audio files
447
716
  def _apply_unified_metadata(file_paths, unified_result, debug=False):
@@ -523,6 +792,35 @@ def _apply_unified_metadata(file_paths, unified_result, debug=False):
523
792
 
524
793
  ###################################################################################################
525
794
  # Apply smart renaming to audio files
795
+ def _sanitize_filename(filename, debug=False):
796
+ r"""Sanitize filename for Windows compatibility.
797
+
798
+ Removes/replaces characters that are invalid on Windows:
799
+ < > : " / \ | ? *
800
+
801
+ Args:
802
+ filename: The filename to sanitize (without extension)
803
+ debug: Enable debug output
804
+
805
+ Returns:
806
+ str: Sanitized filename
807
+ """
808
+ # Windows invalid characters: < > : " / \ | ? *
809
+ invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
810
+ sanitized = filename
811
+
812
+ for char in invalid_chars:
813
+ sanitized = sanitized.replace(char, '')
814
+
815
+ # Also handle leading/trailing spaces and dots
816
+ sanitized = sanitized.strip('. ')
817
+
818
+ if debug and sanitized != filename:
819
+ mmguero.eprint(f"Sanitized filename: '{filename}' → '{sanitized}'")
820
+
821
+ return sanitized
822
+
823
+
526
824
  def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
527
825
  """Rename files based on AI-suggested names.
528
826
 
@@ -568,8 +866,9 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
568
866
  dirname = os.path.dirname(filepath)
569
867
  ext = os.path.splitext(filepath)[1]
570
868
 
571
- # Build new filename
572
- new_name = f"{suggested_name}{ext}"
869
+ # Build new filename (sanitize for Windows compatibility)
870
+ sanitized_name = _sanitize_filename(suggested_name, debug=debug)
871
+ new_name = f"{sanitized_name}{ext}"
573
872
  new_path = os.path.join(dirname, new_name)
574
873
 
575
874
  # Skip if same name
@@ -578,10 +877,11 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
578
877
  mmguero.eprint(f"Skipping rename (same name): {basename}")
579
878
  continue
580
879
 
581
- # Check if target already exists
880
+ # Check if target already exists - remove it so we can rename/overwrite
582
881
  if os.path.exists(new_path):
583
- mmguero.eprint(f"Cannot rename {basename} to {new_name}: target already exists")
584
- continue
882
+ if debug:
883
+ mmguero.eprint(f"Removing existing file {new_name} to allow rename")
884
+ os.remove(new_path)
585
885
 
586
886
  try:
587
887
  shutil.move(filepath, new_path)
@@ -610,7 +910,7 @@ def _search_spotify_album(album_name, debug=False):
610
910
  from ddgs import DDGS
611
911
  except ImportError:
612
912
  if debug:
613
- mmguero.eprint("duckduckgo-search not installed, skipping Spotify search")
913
+ mmguero.eprint("ddgs not installed, skipping Spotify search")
614
914
  return None
615
915
 
616
916
  query = f"site:spotify.com {album_name} album"
@@ -645,7 +945,7 @@ def _get_spotify_album_info(spotify_url, debug=False):
645
945
  from spotify_scraper import SpotifyClient
646
946
  except ImportError:
647
947
  if debug:
648
- mmguero.eprint("spotify-scraper not installed, skipping Spotify info")
948
+ mmguero.eprint("spotifyscraper not installed, skipping Spotify info")
649
949
  return None
650
950
 
651
951
  try:
@@ -790,6 +1090,8 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
790
1090
 
791
1091
  model = config.get("unify_album_model", "openai/gpt-oss-120b")
792
1092
  prompt = config.get("unify_album_prompt", UNIFY_ALBUM_PROMPT_DEFAULT)
1093
+ batch_size = config.get("unify_album_batch_size", 10)
1094
+ batch_size_spotify = config.get("unify_album_batch_size_with_spotify", 5)
793
1095
 
794
1096
  # Determine files to process
795
1097
  audio_extensions = ['.mp3', '.mp4', '.m4a', '.wav', '.flac', '.ogg', '.aac', '.wma']
@@ -833,8 +1135,9 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
833
1135
  mmguero.eprint(f"Found {len(file_paths)} audio files for album unification")
834
1136
 
835
1137
  # Call AI to unify album metadata (first pass - gets unified album name)
836
- unified_result = _unify_album_metadata(
837
- file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt, debug=debug
1138
+ unified_result = _unify_album_metadata_with_batching(
1139
+ file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
1140
+ batch_size=batch_size, batch_size_spotify=batch_size_spotify, debug=debug
838
1141
  )
839
1142
 
840
1143
  unified_album = unified_result.get('unified_album', '')
@@ -863,10 +1166,11 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
863
1166
 
864
1167
  # Second AI pass with Spotify track listing for accurate ordering
865
1168
  mmguero.eprint("Refining track order with Spotify data...")
866
- unified_result = _unify_album_metadata(
1169
+ unified_result = _unify_album_metadata_with_batching(
867
1170
  file_paths, groq_api_key, model, prompt,
868
1171
  rename_prompt=rename_prompt,
869
1172
  spotify_tracks=spotify_info.get('tracks', []),
1173
+ batch_size=batch_size, batch_size_spotify=batch_size_spotify,
870
1174
  debug=debug
871
1175
  )
872
1176
  else:
@@ -2180,6 +2484,65 @@ class Plugger(object):
2180
2484
 
2181
2485
  self.CreateCleanMuteList()
2182
2486
 
2487
+ # Fast-path: if no profanity detected and same format, just copy
2488
+ no_profanity = len(self.naughtyWordList) == 0
2489
+ same_format = (
2490
+ self.inputFileParts[1].lower().lstrip('.') == self.outputAudioFileFormat
2491
+ if hasattr(self, 'inputFileParts')
2492
+ else False
2493
+ )
2494
+
2495
+ if no_profanity and same_format:
2496
+ # Direct copy - no processing needed
2497
+ if self.debug:
2498
+ mmguero.eprint("No profanity detected and same format - using direct copy")
2499
+
2500
+ # Update progress bar description to reflect copy operation
2501
+ if progress:
2502
+ progress.set_description("Copying")
2503
+
2504
+ copy_start = time.time()
2505
+ shutil.copyfile(self.inputFileSpec, self.outputFileSpec)
2506
+ copy_time = time.time() - copy_start
2507
+
2508
+ # Still embed metadata and tag as processed
2509
+ if self.debug:
2510
+ mmguero.eprint("Embedding Shazam metadata into copied file...")
2511
+ self._embed_metadata(self.outputFileSpec)
2512
+ SetMonkeyplugTag(self.outputFileSpec, debug=self.debug)
2513
+
2514
+ # Complete progress
2515
+ if progress:
2516
+ if smooth_ticker:
2517
+ progress.n = progress.total
2518
+ progress.refresh()
2519
+ else:
2520
+ progress.update(1)
2521
+ progress.close()
2522
+
2523
+ # Record timing separately for copy (don't skew encode estimates)
2524
+ # Copy is much faster than encode, so we track it separately
2525
+ if step_timings is not None and file_duration > 0:
2526
+ step_timings['copy'] = (copy_time, file_duration)
2527
+ if timing_log is not None and file_duration > 0:
2528
+ for op, (wall_secs, audio_secs) in step_timings.items():
2529
+ update_timing_measurement(timing_log, op, wall_secs, audio_secs)
2530
+ save_timing_log(timing_log)
2531
+
2532
+ # Clean up progress references
2533
+ if hasattr(self, '_progress'):
2534
+ delattr(self, '_progress')
2535
+ for attr in ('_smooth_ticker', '_smooth_cumulative', '_smooth_extract_est',
2536
+ '_smooth_transcribe_est', '_will_transcribe',
2537
+ '_step_timings', '_timing_log', '_timing_file_duration'):
2538
+ if hasattr(self, attr):
2539
+ delattr(self, attr)
2540
+
2541
+ # Print profanity detection summary
2542
+ self._print_words_summary()
2543
+
2544
+ return self.outputFileSpec
2545
+
2183
2546
  # Update progress after CreateCleanMuteList (step-based mode only)
2184
2547
  if progress and not smooth_ticker:
2185
2548
  did_extraction = (
@@ -3323,8 +3686,42 @@ DEFAULT_CONFIG = {
3323
3686
  "ai_detect_prompt": AI_DETECT_PROMPT_DEFAULT,
3324
3687
  "unify_album_model": "openai/gpt-oss-120b",
3325
3688
  "unify_album_prompt": UNIFY_ALBUM_PROMPT_DEFAULT,
3689
+ "unify_album_batch_size": 10,
3690
+ "unify_album_batch_size_with_spotify": 5,
3326
3691
  }
3327
3692
 
3693
+ # Validation rules for config values with defined options
3694
+ CONFIG_VALIDATION = {
3695
+ "show_words": {"choices": ["full", "clean", "none"], "default": "clean"},
3696
+ "detect_mode": {"choices": ["list", "ai", "both"], "default": "list"},
3697
+ }
3698
+
3699
+
3700
+ def validate_config_settings(config, debug=False):
3701
+ """
3702
+ Validate config settings and fix invalid values.
3703
+
3704
+ For each config key that has defined choices, validates the value.
3705
+ If invalid, prints a warning and uses the default value.
3706
+
3707
+ Args:
3708
+ config: dict - Config settings to validate
3709
+ debug: bool - Enable debug output
3710
+
3711
+ Returns:
3712
+ dict: Validated config with invalid values replaced by defaults
3713
+ """
3714
+ validated = dict(config)
3715
+ for key, rules in CONFIG_VALIDATION.items():
3716
+ if key in validated:
3717
+ value = validated[key]
3718
+ choices = rules["choices"]
3719
+ if value not in choices:
3720
+ default = rules["default"]
3721
+ mmguero.eprint(f"WARNING: CONFIG \"{key}\" SET TO INVALID VALUE \"{value}\". USING DEFAULT \"{default}\".")
3722
+ validated[key] = default
3723
+ return validated
3724
+
3328
3725
 
3329
3726
  def load_config_settings(debug=False):
3330
3727
  """
@@ -3354,7 +3751,8 @@ def load_config_settings(debug=False):
3354
3751
  if debug:
3355
3752
  mmguero.eprint(f"Loaded config from: {config_path}")
3356
3753
 
3357
- return config
3754
+ # Validate and fix any invalid config values
3755
+ return validate_config_settings(config, debug=debug)
3358
3756
  except (json.JSONDecodeError, IOError) as e:
3359
3757
  if debug:
3360
3758
  mmguero.eprint(f"Warning: Failed to load config from {config_path}: {e}")
@@ -3372,6 +3770,7 @@ def load_config_settings(debug=False):
3372
3770
  if debug:
3373
3771
  mmguero.eprint(f"Warning: Could not create default config: {e}")
3374
3772
 
3773
+ # Return a copy of DEFAULT_CONFIG (already validated)
3375
3774
  return dict(DEFAULT_CONFIG)
3376
3775
 
3377
3776