monkeyplug-enhanced 2.3.0__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/PKG-INFO +3 -3
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/pyproject.toml +3 -3
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/monkeyplug.py +411 -12
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/.gitignore +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/LICENSE +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/README.md +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/__init__.py +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/data/profanity_list.json +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/groq_config.py +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/separation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: monkeyplug-enhanced
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
|
|
5
5
|
Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
|
|
6
6
|
Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
|
|
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
13
13
|
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
14
14
|
Requires-Python: >=3.10
|
|
15
15
|
Requires-Dist: aiohttp>=3.9.0
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: ddgs>=6.0.0
|
|
17
17
|
Requires-Dist: groq>=0.1.0
|
|
18
18
|
Requires-Dist: mmguero==2.0.3
|
|
19
19
|
Requires-Dist: mutagen==1.47.0
|
|
@@ -22,7 +22,7 @@ Requires-Dist: requests==2.32.5
|
|
|
22
22
|
Requires-Dist: shazamio>=0.8.0
|
|
23
23
|
Requires-Dist: sherpa-onnx>=1.10.0
|
|
24
24
|
Requires-Dist: soundfile>=0.12.0
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: spotifyscraper>=0.1.0
|
|
26
26
|
Requires-Dist: tqdm>=4.65.0
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "monkeyplug-enhanced"
|
|
7
|
-
version = "2.3.
|
|
7
|
+
version = "2.3.1"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Seth Grover", email="mero.mero.guero@gmail.com" },
|
|
10
10
|
{ name="Lincoln Brown", email="link@brown.fm" },
|
|
@@ -29,8 +29,8 @@ dependencies = [
|
|
|
29
29
|
"tqdm>=4.65.0",
|
|
30
30
|
"shazamio>=0.8.0",
|
|
31
31
|
"aiohttp>=3.9.0",
|
|
32
|
-
"
|
|
33
|
-
"
|
|
32
|
+
"ddgs>=6.0.0",
|
|
33
|
+
"spotifyscraper>=0.1.0",
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
[project.urls]
|
|
@@ -123,13 +123,17 @@ UNIFY_ALBUM_PROMPT_DEFAULT = (
|
|
|
123
123
|
"titles, and current album names, determine the correct unified album name and "
|
|
124
124
|
"assign track numbers to each song. Consider the existing album name guesses and "
|
|
125
125
|
"song titles to infer the real album. Return track numbers in the order the songs "
|
|
126
|
-
"should appear on the album."
|
|
126
|
+
"should appear on the album. "
|
|
127
|
+
"CRITICAL: You MUST return a track entry for EVERY input file - do not skip any files. "
|
|
128
|
+
"The tracks array must contain exactly the same number of entries as the input."
|
|
127
129
|
)
|
|
128
130
|
|
|
129
131
|
UNIFY_ALBUM_RENAME_PROMPT_DEFAULT = (
|
|
130
132
|
"You are a music file naming expert. Suggest clean, consistent filenames for each track. "
|
|
131
133
|
"Use format: 'XX - Song Name' where XX is the track number with leading zero if needed. "
|
|
132
134
|
"Keep only essential information, remove extra words like 'feat', 'explicit', etc. "
|
|
135
|
+
"IMPORTANT: Never use these invalid characters in filenames: < > : \" / \\ | ? * "
|
|
136
|
+
"Replace punctuation marks like ? with nothing or - (hyphen). "
|
|
133
137
|
"Return the suggested filename WITHOUT the file extension."
|
|
134
138
|
)
|
|
135
139
|
|
|
@@ -139,6 +143,7 @@ UNIFY_ALBUM_SCHEMA = {
|
|
|
139
143
|
"unified_album": {"type": "string", "description": "The unified album name"},
|
|
140
144
|
"tracks": {
|
|
141
145
|
"type": "array",
|
|
146
|
+
"description": "MUST contain one entry for EACH input file - no files may be omitted",
|
|
142
147
|
"items": {
|
|
143
148
|
"type": "object",
|
|
144
149
|
"properties": {
|
|
@@ -419,6 +424,131 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
|
|
|
419
424
|
if debug:
|
|
420
425
|
mmguero.eprint(f"AI unification response: {content}")
|
|
421
426
|
|
|
427
|
+
# Validate that we got results for all files
|
|
428
|
+
returned_tracks = parsed.get('tracks', [])
|
|
429
|
+
if len(returned_tracks) < len(metadata_list):
|
|
430
|
+
missing = len(metadata_list) - len(returned_tracks)
|
|
431
|
+
mmguero.eprint(f"WARNING: AI only returned {len(returned_tracks)} of {len(metadata_list)} track assignments.")
|
|
432
|
+
mmguero.eprint(f"This is likely due to Groq's output token limit. {missing} files were not processed.")
|
|
433
|
+
if spotify_tracks:
|
|
434
|
+
mmguero.eprint("Try running again without --use-spotify, or process files in smaller batches.")
|
|
435
|
+
else:
|
|
436
|
+
mmguero.eprint("Try processing files in smaller batches (e.g., split into subdirectories).")
|
|
437
|
+
|
|
438
|
+
return parsed
|
|
439
|
+
|
|
440
|
+
except requests.exceptions.Timeout:
|
|
441
|
+
if attempt < max_retries - 1:
|
|
442
|
+
if debug:
|
|
443
|
+
mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
|
|
444
|
+
time.sleep(retry_delay)
|
|
445
|
+
retry_delay *= 2
|
|
446
|
+
else:
|
|
447
|
+
raise Exception("Album unification request timed out")
|
|
448
|
+
|
|
449
|
+
except requests.exceptions.RequestException as e:
|
|
450
|
+
if attempt < max_retries - 1:
|
|
451
|
+
if debug:
|
|
452
|
+
mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
|
|
453
|
+
time.sleep(retry_delay)
|
|
454
|
+
retry_delay *= 2
|
|
455
|
+
else:
|
|
456
|
+
raise Exception(f"Album unification request failed: {e}")
|
|
457
|
+
|
|
458
|
+
raise Exception("Album unification failed after maximum retries")
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _call_groq_api_single_batch(metadata_list, system_prompt, groq_api_key, model, batch_num=1, total_batches=1, debug=False):
|
|
462
|
+
"""Make a single API call to Groq for album unification.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
metadata_list: List of file metadata dicts to send in this batch
|
|
466
|
+
system_prompt: System prompt for the AI
|
|
467
|
+
groq_api_key: Groq API key for authentication
|
|
468
|
+
model: AI model name
|
|
469
|
+
batch_num: Current batch number (for debug output)
|
|
470
|
+
total_batches: Total expected batches (for debug output)
|
|
471
|
+
debug: Enable debug output
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
dict: Parsed JSON response with 'unified_album' and 'tracks'
|
|
475
|
+
|
|
476
|
+
Raises:
|
|
477
|
+
Exception: If API call fails after retries
|
|
478
|
+
"""
|
|
479
|
+
import requests
|
|
480
|
+
import time
|
|
481
|
+
|
|
482
|
+
# Build input for AI
|
|
483
|
+
input_text = json.dumps(metadata_list, indent=2, ensure_ascii=False)
|
|
484
|
+
|
|
485
|
+
# API call with retry logic (more retries for transient 400 JSON validation errors)
|
|
486
|
+
max_retries = 5
|
|
487
|
+
retry_delay = 1
|
|
488
|
+
|
|
489
|
+
for attempt in range(max_retries):
|
|
490
|
+
try:
|
|
491
|
+
if debug:
|
|
492
|
+
batch_info = f" (batch {batch_num}" + (f"/{total_batches}" if isinstance(total_batches, int) and total_batches > 1 else "") + ")"
|
|
493
|
+
mmguero.eprint(f"Calling Groq API{batch_info} (attempt {attempt + 1}/{max_retries})...")
|
|
494
|
+
mmguero.eprint(f"Sending {len(metadata_list)} files to AI for unification")
|
|
495
|
+
|
|
496
|
+
response = requests.post(
|
|
497
|
+
"https://api.groq.com/openai/v1/chat/completions",
|
|
498
|
+
headers={
|
|
499
|
+
"Authorization": f"Bearer {groq_api_key}",
|
|
500
|
+
"Content-Type": "application/json",
|
|
501
|
+
},
|
|
502
|
+
json={
|
|
503
|
+
"model": model,
|
|
504
|
+
"messages": [
|
|
505
|
+
{"role": "system", "content": system_prompt},
|
|
506
|
+
{"role": "user", "content": input_text},
|
|
507
|
+
],
|
|
508
|
+
"response_format": {
|
|
509
|
+
"type": "json_schema",
|
|
510
|
+
"json_schema": {
|
|
511
|
+
"name": "album_unification",
|
|
512
|
+
"strict": True,
|
|
513
|
+
"schema": UNIFY_ALBUM_SCHEMA,
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
},
|
|
517
|
+
timeout=120,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
# Handle rate limiting
|
|
521
|
+
if response.status_code == 429:
|
|
522
|
+
if attempt < max_retries - 1:
|
|
523
|
+
if debug:
|
|
524
|
+
mmguero.eprint(f"Rate limited, retrying in {retry_delay}s...")
|
|
525
|
+
time.sleep(retry_delay)
|
|
526
|
+
retry_delay *= 2
|
|
527
|
+
continue
|
|
528
|
+
raise Exception("Album unification rate limit exceeded")
|
|
529
|
+
|
|
530
|
+
if response.status_code == 401:
|
|
531
|
+
raise Exception("Invalid Groq API key for album unification")
|
|
532
|
+
|
|
533
|
+
# Handle 400 errors (usually JSON validation failures from Groq - transient)
|
|
534
|
+
if response.status_code == 400:
|
|
535
|
+
if attempt < max_retries - 1:
|
|
536
|
+
if debug:
|
|
537
|
+
mmguero.eprint(f"JSON validation error (400), retrying in {retry_delay}s...")
|
|
538
|
+
time.sleep(retry_delay)
|
|
539
|
+
retry_delay *= 2
|
|
540
|
+
continue
|
|
541
|
+
raise Exception("Album unification failed due to JSON validation errors")
|
|
542
|
+
|
|
543
|
+
response.raise_for_status()
|
|
544
|
+
|
|
545
|
+
result = response.json()
|
|
546
|
+
content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
|
|
547
|
+
parsed = json.loads(content)
|
|
548
|
+
|
|
549
|
+
if debug:
|
|
550
|
+
mmguero.eprint(f"AI returned {len(parsed.get('tracks', []))} track assignments")
|
|
551
|
+
|
|
422
552
|
return parsed
|
|
423
553
|
|
|
424
554
|
except requests.exceptions.Timeout:
|
|
@@ -442,6 +572,145 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
|
|
|
442
572
|
raise Exception("Album unification failed after maximum retries")
|
|
443
573
|
|
|
444
574
|
|
|
575
|
+
def _unify_album_metadata_with_batching(file_paths, groq_api_key, model, prompt, rename_prompt=None, spotify_tracks=None, batch_size=10, batch_size_spotify=5, debug=False):
|
|
576
|
+
"""Use Groq AI to unify album metadata with automatic batching for large file lists.
|
|
577
|
+
|
|
578
|
+
Implements automatic batching to handle Groq's output token limits.
|
|
579
|
+
If a partial response is received, automatically retries with remaining files.
|
|
580
|
+
|
|
581
|
+
Args:
|
|
582
|
+
file_paths: List of audio file paths
|
|
583
|
+
groq_api_key: Groq API key for authentication
|
|
584
|
+
model: AI model name (e.g., "openai/gpt-oss-120b")
|
|
585
|
+
prompt: System prompt for the AI
|
|
586
|
+
rename_prompt: Optional prompt for renaming (if provided, adds suggested_name to response)
|
|
587
|
+
spotify_tracks: Optional list of track names from Spotify for accurate ordering
|
|
588
|
+
debug: Enable debug output
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
Dict with 'unified_album' (str) and 'tracks' (list of dicts with
|
|
592
|
+
'filename', 'track_number', 'album_name', optionally 'suggested_name')
|
|
593
|
+
|
|
594
|
+
Raises:
|
|
595
|
+
ValueError: If API key is missing
|
|
596
|
+
Exception: If API call fails
|
|
597
|
+
"""
|
|
598
|
+
if not groq_api_key:
|
|
599
|
+
raise ValueError("Groq API key required for album unification")
|
|
600
|
+
|
|
601
|
+
# Read metadata from all files
|
|
602
|
+
if debug:
|
|
603
|
+
mmguero.eprint("Reading metadata from files...")
|
|
604
|
+
metadata_list = _read_metadata_from_files(file_paths, debug=debug)
|
|
605
|
+
|
|
606
|
+
# Build system prompt (add rename and Spotify instructions if needed)
|
|
607
|
+
system_prompt = prompt
|
|
608
|
+
if rename_prompt:
|
|
609
|
+
system_prompt = f"{prompt}\n\n{rename_prompt}"
|
|
610
|
+
|
|
611
|
+
unified_album = None # Will be set from first batch and reused
|
|
612
|
+
all_tracks = [] # Accumulates results across batches
|
|
613
|
+
processed_files = set() # Tracks which files we've gotten results for
|
|
614
|
+
|
|
615
|
+
# Proactive batching: limit batch size to avoid overwhelming Groq
|
|
616
|
+
# With Spotify tracks, use smaller batches since the prompt is larger
|
|
617
|
+
max_batch_size = batch_size_spotify if spotify_tracks else batch_size
|
|
618
|
+
|
|
619
|
+
# Start with first batch
|
|
620
|
+
batch_metadata = metadata_list[:max_batch_size]
|
|
621
|
+
remaining_metadata = metadata_list[max_batch_size:]
|
|
622
|
+
batch_num = 0
|
|
623
|
+
|
|
624
|
+
while batch_metadata:
|
|
625
|
+
batch_num += 1
|
|
626
|
+
|
|
627
|
+
# Build system prompt for this batch
|
|
628
|
+
# ALWAYS pass full Spotify list - don't slice it!
|
|
629
|
+
batch_system_prompt = system_prompt
|
|
630
|
+
if spotify_tracks:
|
|
631
|
+
# Add FULL Spotify track listing every time
|
|
632
|
+
tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
|
|
633
|
+
batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
|
|
634
|
+
if debug and batch_num == 1:
|
|
635
|
+
mmguero.eprint(f"Providing full Spotify track list ({len(spotify_tracks)} tracks) - AI will match by name")
|
|
636
|
+
|
|
637
|
+
# Call API with current batch
|
|
638
|
+
try:
|
|
639
|
+
parsed = _call_groq_api_single_batch(
|
|
640
|
+
batch_metadata, batch_system_prompt, groq_api_key, model,
|
|
641
|
+
batch_num, total_batches="?", debug=debug
|
|
642
|
+
)
|
|
643
|
+
except Exception as e:
|
|
644
|
+
# If this isn't the first batch, we have partial results - fail gracefully
|
|
645
|
+
if all_tracks:
|
|
646
|
+
mmguero.eprint(f"Batch {batch_num} failed after {len(all_tracks)} tracks were processed: {e}")
|
|
647
|
+
mmguero.eprint("Proceeding with partial results...")
|
|
648
|
+
break
|
|
649
|
+
raise
|
|
650
|
+
|
|
651
|
+
# On first successful call, capture unified_album name
|
|
652
|
+
if unified_album is None and parsed.get('unified_album'):
|
|
653
|
+
unified_album = parsed['unified_album']
|
|
654
|
+
|
|
655
|
+
# For subsequent batches, override the album name to match first batch
|
|
656
|
+
if unified_album and parsed.get('unified_album') != unified_album:
|
|
657
|
+
parsed['unified_album'] = unified_album
|
|
658
|
+
|
|
659
|
+
# Add returned tracks to our collection
|
|
660
|
+
returned_tracks = parsed.get('tracks', [])
|
|
661
|
+
|
|
662
|
+
# Guard against empty response to avoid infinite loop
|
|
663
|
+
if not returned_tracks:
|
|
664
|
+
mmguero.eprint(f"WARNING: Batch {batch_num} returned no tracks. Stopping to avoid infinite loop.")
|
|
665
|
+
break
|
|
666
|
+
|
|
667
|
+
for track in returned_tracks:
|
|
668
|
+
filename = track['filename']
|
|
669
|
+
if filename not in processed_files:
|
|
670
|
+
all_tracks.append(track)
|
|
671
|
+
processed_files.add(filename)
|
|
672
|
+
|
|
673
|
+
if debug:
|
|
674
|
+
mmguero.eprint(f"Batch {batch_num} complete: {len(returned_tracks)} tracks returned, {len(all_tracks)} total processed")
|
|
675
|
+
|
|
676
|
+
# Determine what's missing from this batch
|
|
677
|
+
returned_filenames = {t['filename'] for t in returned_tracks}
|
|
678
|
+
missing_metadata = [
|
|
679
|
+
m for m in batch_metadata
|
|
680
|
+
if m['filename'] not in returned_filenames and m['filename'] not in processed_files
|
|
681
|
+
]
|
|
682
|
+
|
|
683
|
+
if not missing_metadata and not remaining_metadata:
|
|
684
|
+
# All files processed!
|
|
685
|
+
if debug:
|
|
686
|
+
mmguero.eprint(f"All {len(all_tracks)} files processed successfully!")
|
|
687
|
+
break
|
|
688
|
+
|
|
689
|
+
# Prepare next batch: combine missing files + next chunk from remaining
|
|
690
|
+
next_batch = missing_metadata if missing_metadata else []
|
|
691
|
+
if remaining_metadata:
|
|
692
|
+
take = min(max_batch_size - len(next_batch), len(remaining_metadata))
|
|
693
|
+
next_batch.extend(remaining_metadata[:take])
|
|
694
|
+
remaining_metadata = remaining_metadata[take:]
|
|
695
|
+
|
|
696
|
+
if not next_batch:
|
|
697
|
+
break
|
|
698
|
+
|
|
699
|
+
if debug:
|
|
700
|
+
if missing_metadata:
|
|
701
|
+
new_files = len(next_batch) - len(missing_metadata)
|
|
702
|
+
mmguero.eprint(f"Partial response: {len(missing_metadata)} files from this batch need retry. Next batch: {len(missing_metadata)} retries + {new_files} new files = {len(next_batch)} total")
|
|
703
|
+
else:
|
|
704
|
+
mmguero.eprint(f"Starting batch {batch_num + 1} with {len(next_batch)} files...")
|
|
705
|
+
|
|
706
|
+
batch_metadata = next_batch
|
|
707
|
+
|
|
708
|
+
return {
|
|
709
|
+
'unified_album': unified_album or '',
|
|
710
|
+
'tracks': all_tracks
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
|
|
445
714
|
###################################################################################################
|
|
446
715
|
# Apply unified metadata to audio files
|
|
447
716
|
def _apply_unified_metadata(file_paths, unified_result, debug=False):
|
|
@@ -523,6 +792,35 @@ def _apply_unified_metadata(file_paths, unified_result, debug=False):
|
|
|
523
792
|
|
|
524
793
|
###################################################################################################
|
|
525
794
|
# Apply smart renaming to audio files
|
|
795
|
+
def _sanitize_filename(filename, debug=False):
|
|
796
|
+
r"""Sanitize filename for Windows compatibility.
|
|
797
|
+
|
|
798
|
+
Removes/replaces characters that are invalid on Windows:
|
|
799
|
+
< > : " / \ | ? *
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
filename: The filename to sanitize (without extension)
|
|
803
|
+
debug: Enable debug output
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
str: Sanitized filename
|
|
807
|
+
"""
|
|
808
|
+
# Windows invalid characters: < > : " / \ | ? *
|
|
809
|
+
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
|
|
810
|
+
sanitized = filename
|
|
811
|
+
|
|
812
|
+
for char in invalid_chars:
|
|
813
|
+
sanitized = sanitized.replace(char, '')
|
|
814
|
+
|
|
815
|
+
# Also handle leading/trailing spaces and dots
|
|
816
|
+
sanitized = sanitized.strip('. ')
|
|
817
|
+
|
|
818
|
+
if debug and sanitized != filename:
|
|
819
|
+
mmguero.eprint(f"Sanitized filename: '{filename}' → '{sanitized}'")
|
|
820
|
+
|
|
821
|
+
return sanitized
|
|
822
|
+
|
|
823
|
+
|
|
526
824
|
def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
527
825
|
"""Rename files based on AI-suggested names.
|
|
528
826
|
|
|
@@ -568,8 +866,9 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
|
568
866
|
dirname = os.path.dirname(filepath)
|
|
569
867
|
ext = os.path.splitext(filepath)[1]
|
|
570
868
|
|
|
571
|
-
# Build new filename
|
|
572
|
-
|
|
869
|
+
# Build new filename (sanitize for Windows compatibility)
|
|
870
|
+
sanitized_name = _sanitize_filename(suggested_name, debug=debug)
|
|
871
|
+
new_name = f"{sanitized_name}{ext}"
|
|
573
872
|
new_path = os.path.join(dirname, new_name)
|
|
574
873
|
|
|
575
874
|
# Skip if same name
|
|
@@ -578,10 +877,11 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
|
578
877
|
mmguero.eprint(f"Skipping rename (same name): {basename}")
|
|
579
878
|
continue
|
|
580
879
|
|
|
581
|
-
# Check if target already exists
|
|
880
|
+
# Check if target already exists - remove it so we can rename/overwrite
|
|
582
881
|
if os.path.exists(new_path):
|
|
583
|
-
|
|
584
|
-
|
|
882
|
+
if debug:
|
|
883
|
+
mmguero.eprint(f"Removing existing file {new_name} to allow rename")
|
|
884
|
+
os.remove(new_path)
|
|
585
885
|
|
|
586
886
|
try:
|
|
587
887
|
shutil.move(filepath, new_path)
|
|
@@ -610,7 +910,7 @@ def _search_spotify_album(album_name, debug=False):
|
|
|
610
910
|
from ddgs import DDGS
|
|
611
911
|
except ImportError:
|
|
612
912
|
if debug:
|
|
613
|
-
mmguero.eprint("
|
|
913
|
+
mmguero.eprint("ddgs not installed, skipping Spotify search")
|
|
614
914
|
return None
|
|
615
915
|
|
|
616
916
|
query = f"site:spotify.com {album_name} album"
|
|
@@ -645,7 +945,7 @@ def _get_spotify_album_info(spotify_url, debug=False):
|
|
|
645
945
|
from spotify_scraper import SpotifyClient
|
|
646
946
|
except ImportError:
|
|
647
947
|
if debug:
|
|
648
|
-
mmguero.eprint("
|
|
948
|
+
mmguero.eprint("spotifyscraper not installed, skipping Spotify info")
|
|
649
949
|
return None
|
|
650
950
|
|
|
651
951
|
try:
|
|
@@ -790,6 +1090,8 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
790
1090
|
|
|
791
1091
|
model = config.get("unify_album_model", "openai/gpt-oss-120b")
|
|
792
1092
|
prompt = config.get("unify_album_prompt", UNIFY_ALBUM_PROMPT_DEFAULT)
|
|
1093
|
+
batch_size = config.get("unify_album_batch_size", 10)
|
|
1094
|
+
batch_size_spotify = config.get("unify_album_batch_size_with_spotify", 5)
|
|
793
1095
|
|
|
794
1096
|
# Determine files to process
|
|
795
1097
|
audio_extensions = ['.mp3', '.mp4', '.m4a', '.wav', '.flac', '.ogg', '.aac', '.wma']
|
|
@@ -833,8 +1135,9 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
833
1135
|
mmguero.eprint(f"Found {len(file_paths)} audio files for album unification")
|
|
834
1136
|
|
|
835
1137
|
# Call AI to unify album metadata (first pass - gets unified album name)
|
|
836
|
-
unified_result =
|
|
837
|
-
file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
|
|
1138
|
+
unified_result = _unify_album_metadata_with_batching(
|
|
1139
|
+
file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
|
|
1140
|
+
batch_size=batch_size, batch_size_spotify=batch_size_spotify, debug=debug
|
|
838
1141
|
)
|
|
839
1142
|
|
|
840
1143
|
unified_album = unified_result.get('unified_album', '')
|
|
@@ -863,10 +1166,11 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
863
1166
|
|
|
864
1167
|
# Second AI pass with Spotify track listing for accurate ordering
|
|
865
1168
|
mmguero.eprint("Refining track order with Spotify data...")
|
|
866
|
-
unified_result =
|
|
1169
|
+
unified_result = _unify_album_metadata_with_batching(
|
|
867
1170
|
file_paths, groq_api_key, model, prompt,
|
|
868
1171
|
rename_prompt=rename_prompt,
|
|
869
1172
|
spotify_tracks=spotify_info.get('tracks', []),
|
|
1173
|
+
batch_size=batch_size, batch_size_spotify=batch_size_spotify,
|
|
870
1174
|
debug=debug
|
|
871
1175
|
)
|
|
872
1176
|
else:
|
|
@@ -2180,6 +2484,65 @@ class Plugger(object):
|
|
|
2180
2484
|
|
|
2181
2485
|
self.CreateCleanMuteList()
|
|
2182
2486
|
|
|
2487
|
+
# Fast-path: if no profanity detected and same format, just copy
|
|
2488
|
+
no_profanity = len(self.naughtyWordList) == 0
|
|
2489
|
+
same_format = (
|
|
2490
|
+
self.inputFileParts[1].lower().lstrip('.') == self.outputAudioFileFormat
|
|
2491
|
+
if hasattr(self, 'inputFileParts')
|
|
2492
|
+
else False
|
|
2493
|
+
)
|
|
2494
|
+
|
|
2495
|
+
if no_profanity and same_format:
|
|
2496
|
+
# Direct copy - no processing needed
|
|
2497
|
+
if self.debug:
|
|
2498
|
+
mmguero.eprint("No profanity detected and same format - using direct copy")
|
|
2499
|
+
|
|
2500
|
+
# Update progress bar description to reflect copy operation
|
|
2501
|
+
if progress:
|
|
2502
|
+
progress.set_description("Copying")
|
|
2503
|
+
|
|
2504
|
+
copy_start = time.time()
|
|
2505
|
+
shutil.copyfile(self.inputFileSpec, self.outputFileSpec)
|
|
2506
|
+
copy_time = time.time() - copy_start
|
|
2507
|
+
|
|
2508
|
+
# Still embed metadata and tag as processed
|
|
2509
|
+
if self.debug:
|
|
2510
|
+
mmguero.eprint("Embedding Shazam metadata into copied file...")
|
|
2511
|
+
self._embed_metadata(self.outputFileSpec)
|
|
2512
|
+
SetMonkeyplugTag(self.outputFileSpec, debug=self.debug)
|
|
2513
|
+
|
|
2514
|
+
# Complete progress
|
|
2515
|
+
if progress:
|
|
2516
|
+
if smooth_ticker:
|
|
2517
|
+
progress.n = progress.total
|
|
2518
|
+
progress.refresh()
|
|
2519
|
+
else:
|
|
2520
|
+
progress.update(1)
|
|
2521
|
+
progress.close()
|
|
2522
|
+
|
|
2523
|
+
# Record timing separately for copy (don't skew encode estimates)
|
|
2524
|
+
# Copy is much faster than encode, so we track it separately
|
|
2525
|
+
if step_timings is not None and file_duration > 0:
|
|
2526
|
+
step_timings['copy'] = (copy_time, file_duration)
|
|
2527
|
+
if timing_log is not None and file_duration > 0:
|
|
2528
|
+
for op, (wall_secs, audio_secs) in step_timings.items():
|
|
2529
|
+
update_timing_measurement(timing_log, op, wall_secs, audio_secs)
|
|
2530
|
+
save_timing_log(timing_log)
|
|
2531
|
+
|
|
2532
|
+
# Clean up progress references
|
|
2533
|
+
if hasattr(self, '_progress'):
|
|
2534
|
+
delattr(self, '_progress')
|
|
2535
|
+
for attr in ('_smooth_ticker', '_smooth_cumulative', '_smooth_extract_est',
|
|
2536
|
+
'_smooth_transcribe_est', '_will_transcribe',
|
|
2537
|
+
'_step_timings', '_timing_log', '_timing_file_duration'):
|
|
2538
|
+
if hasattr(self, attr):
|
|
2539
|
+
delattr(self, attr)
|
|
2540
|
+
|
|
2541
|
+
# Print profanity detection summary
|
|
2542
|
+
self._print_words_summary()
|
|
2543
|
+
|
|
2544
|
+
return self.outputFileSpec
|
|
2545
|
+
|
|
2183
2546
|
# Update progress after CreateCleanMuteList (step-based mode only)
|
|
2184
2547
|
if progress and not smooth_ticker:
|
|
2185
2548
|
did_extraction = (
|
|
@@ -3323,8 +3686,42 @@ DEFAULT_CONFIG = {
|
|
|
3323
3686
|
"ai_detect_prompt": AI_DETECT_PROMPT_DEFAULT,
|
|
3324
3687
|
"unify_album_model": "openai/gpt-oss-120b",
|
|
3325
3688
|
"unify_album_prompt": UNIFY_ALBUM_PROMPT_DEFAULT,
|
|
3689
|
+
"unify_album_batch_size": 10,
|
|
3690
|
+
"unify_album_batch_size_with_spotify": 5,
|
|
3326
3691
|
}
|
|
3327
3692
|
|
|
3693
|
+
# Validation rules for config values with defined options
|
|
3694
|
+
CONFIG_VALIDATION = {
|
|
3695
|
+
"show_words": {"choices": ["full", "clean", "none"], "default": "clean"},
|
|
3696
|
+
"detect_mode": {"choices": ["list", "ai", "both"], "default": "list"},
|
|
3697
|
+
}
|
|
3698
|
+
|
|
3699
|
+
|
|
3700
|
+
def validate_config_settings(config, debug=False):
|
|
3701
|
+
"""
|
|
3702
|
+
Validate config settings and fix invalid values.
|
|
3703
|
+
|
|
3704
|
+
For each config key that has defined choices, validates the value.
|
|
3705
|
+
If invalid, prints a warning and uses the default value.
|
|
3706
|
+
|
|
3707
|
+
Args:
|
|
3708
|
+
config: dict - Config settings to validate
|
|
3709
|
+
debug: bool - Enable debug output
|
|
3710
|
+
|
|
3711
|
+
Returns:
|
|
3712
|
+
dict: Validated config with invalid values replaced by defaults
|
|
3713
|
+
"""
|
|
3714
|
+
validated = dict(config)
|
|
3715
|
+
for key, rules in CONFIG_VALIDATION.items():
|
|
3716
|
+
if key in validated:
|
|
3717
|
+
value = validated[key]
|
|
3718
|
+
choices = rules["choices"]
|
|
3719
|
+
if value not in choices:
|
|
3720
|
+
default = rules["default"]
|
|
3721
|
+
mmguero.eprint(f"WARNING: CONFIG \"{key}\" SET TO INVALID VALUE \"{value}\". USING DEFAULT \"{default}\".")
|
|
3722
|
+
validated[key] = default
|
|
3723
|
+
return validated
|
|
3724
|
+
|
|
3328
3725
|
|
|
3329
3726
|
def load_config_settings(debug=False):
|
|
3330
3727
|
"""
|
|
@@ -3354,7 +3751,8 @@ def load_config_settings(debug=False):
|
|
|
3354
3751
|
if debug:
|
|
3355
3752
|
mmguero.eprint(f"Loaded config from: {config_path}")
|
|
3356
3753
|
|
|
3357
|
-
|
|
3754
|
+
# Validate and fix any invalid config values
|
|
3755
|
+
return validate_config_settings(config, debug=debug)
|
|
3358
3756
|
except (json.JSONDecodeError, IOError) as e:
|
|
3359
3757
|
if debug:
|
|
3360
3758
|
mmguero.eprint(f"Warning: Failed to load config from {config_path}: {e}")
|
|
@@ -3372,6 +3770,7 @@ def load_config_settings(debug=False):
|
|
|
3372
3770
|
if debug:
|
|
3373
3771
|
mmguero.eprint(f"Warning: Could not create default config: {e}")
|
|
3374
3772
|
|
|
3773
|
+
# Return a copy of DEFAULT_CONFIG (already validated)
|
|
3375
3774
|
return dict(DEFAULT_CONFIG)
|
|
3376
3775
|
|
|
3377
3776
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.1}/src/monkeyplug/data/profanity_list.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|