PyPI - monkeyplug-enhanced - Versions diffs - 2.3.0__tar.gz → 2.3.2__tar.gz - Mend

monkeyplug-enhanced 2.3.0tar.gz → 2.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.4
 Name: monkeyplug-enhanced
-Version: 2.3.0
+Version: 2.3.2
 Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
 Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
 Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
 Project-URL: Repository, https://github.com/ljbred08/monkeyplug.git
-Author-email: Seth Grover <mero.mero.guero@gmail.com>, Lincoln Brown <link@brown.fm>
+Author-email: Lincoln Brown <link@brown.fm>, Seth Grover <mero.mero.guero@gmail.com>
 License-File: LICENSE
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Operating System :: OS Independent
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
 Requires-Python: >=3.10
 Requires-Dist: aiohttp>=3.9.0
-Requires-Dist: duckduckgo-search>=6.0.0
+Requires-Dist: ddgs>=6.0.0
 Requires-Dist: groq>=0.1.0
 Requires-Dist: mmguero==2.0.3
 Requires-Dist: mutagen==1.47.0
@@ -22,7 +22,7 @@ Requires-Dist: requests==2.32.5
 Requires-Dist: shazamio>=0.8.0
 Requires-Dist: sherpa-onnx>=1.10.0
 Requires-Dist: soundfile>=0.12.0
-Requires-Dist: spotify-scraper>=0.1.0
+Requires-Dist: spotifyscraper>=0.1.0
 Requires-Dist: tqdm>=4.65.0
 Description-Content-Type: text/markdown

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/pyproject.toml RENAMED Viewed

@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
 [project]
 name = "monkeyplug-enhanced"
-version = "2.3.0"
+version = "2.3.2"
 authors = [
-  { name="Seth Grover", email="mero.mero.guero@gmail.com" },
   { name="Lincoln Brown", email="link@brown.fm" },
+  { name="Seth Grover", email="mero.mero.guero@gmail.com" },
 ]
 description = "Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing."
 readme = "README.md"
@@ -29,8 +29,8 @@ dependencies = [
     "tqdm>=4.65.0",
     "shazamio>=0.8.0",
     "aiohttp>=3.9.0",
-    "duckduckgo-search>=6.0.0",
-    "spotify-scraper>=0.1.0",
+    "ddgs>=6.0.0",
+    "spotifyscraper>=0.1.0",
 ]
 [project.urls]

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/monkeyplug.py RENAMED Viewed

@@ -123,13 +123,17 @@ UNIFY_ALBUM_PROMPT_DEFAULT = (
     "titles, and current album names, determine the correct unified album name and "
     "assign track numbers to each song. Consider the existing album name guesses and "
     "song titles to infer the real album. Return track numbers in the order the songs "
-    "should appear on the album."
+    "should appear on the album. "
+    "CRITICAL: You MUST return a track entry for EVERY input file - do not skip any files. "
+    "The tracks array must contain exactly the same number of entries as the input."
 )
 UNIFY_ALBUM_RENAME_PROMPT_DEFAULT = (
     "You are a music file naming expert. Suggest clean, consistent filenames for each track. "
     "Use format: 'XX - Song Name' where XX is the track number with leading zero if needed. "
     "Keep only essential information, remove extra words like 'feat', 'explicit', etc. "
+    "IMPORTANT: Never use these invalid characters in filenames: < > : \" / \\ | ? * "
+    "Replace punctuation marks like ? with nothing or - (hyphen). "
     "Return the suggested filename WITHOUT the file extension."
 )
@@ -139,6 +143,7 @@ UNIFY_ALBUM_SCHEMA = {
         "unified_album": {"type": "string", "description": "The unified album name"},
         "tracks": {
             "type": "array",
+            "description": "MUST contain one entry for EACH input file - no files may be omitted",
             "items": {
                 "type": "object",
                 "properties": {
@@ -419,9 +424,177 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
             if debug:
                 mmguero.eprint(f"AI unification response: {content}")
+            # Validate that we got results for all files
+            returned_tracks = parsed.get('tracks', [])
+            if len(returned_tracks) < len(metadata_list):
+                missing = len(metadata_list) - len(returned_tracks)
+                mmguero.eprint(f"WARNING: AI only returned {len(returned_tracks)} of {len(metadata_list)} track assignments.")
+                mmguero.eprint(f"This is likely due to Groq's output token limit. {missing} files were not processed.")
+                if spotify_tracks:
+                    mmguero.eprint("Try running again without --use-spotify, or process files in smaller batches.")
+                else:
+                    mmguero.eprint("Try processing files in smaller batches (e.g., split into subdirectories).")
+            return parsed
+        except requests.exceptions.Timeout:
+            if attempt < max_retries - 1:
+                if debug:
+                    mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
+                time.sleep(retry_delay)
+                retry_delay *= 2
+            else:
+                raise Exception("Album unification request timed out")
+        except requests.exceptions.RequestException as e:
+            if attempt < max_retries - 1:
+                if debug:
+                    mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
+                time.sleep(retry_delay)
+                retry_delay *= 2
+            else:
+                raise Exception(f"Album unification request failed: {e}")
+    raise Exception("Album unification failed after maximum retries")
+def _call_groq_api_single_batch(metadata_list, system_prompt, groq_api_key, model, batch_num=1, total_batches=1, debug=False, progress_bar=None, batch_start_position=0.0, batch_slice_size=1.0, timing_log=None, operation_name='unify_batch_groq'):
+    """Make a single API call to Groq for album unification.
+    Args:
+        metadata_list: List of file metadata dicts to send in this batch
+        system_prompt: System prompt for the AI
+        groq_api_key: Groq API key for authentication
+        model: AI model name
+        batch_num: Current batch number (for debug output)
+        total_batches: Total expected batches (for debug output)
+        debug: Enable debug output
+        progress_bar: Optional tqdm progress bar for progress tracking
+        batch_start_position: Starting position (0.0 to 1.0) for this batch in overall progress
+        batch_slice_size: Size of this batch's slice (0.0 to 1.0) of overall progress
+        timing_log: Timing log dict for estimation
+        operation_name: Name of operation for timing tracking ('unify_batch_groq' or 'unify_batch_spotify')
+    Returns:
+        dict: Parsed JSON response with 'unified_album' and 'tracks'
+    Raises:
+        Exception: If API call fails after retries
+    """
+    import requests
+    import time
+    # Build input for AI
+    input_text = json.dumps(metadata_list, indent=2, ensure_ascii=False)
+    # Estimate tokens for this batch
+    batch_tokens = _estimate_batch_tokens(metadata_list, system_prompt)
+    # Estimate duration based on historical data
+    batch_estimated = estimate_step_duration_tokens(timing_log, operation_name, batch_tokens) or batch_tokens * 0.1
+    # API call with retry logic (more retries for transient 400 JSON validation errors)
+    max_retries = 5
+    retry_delay = 1
+    smooth_ticker = None
+    for attempt in range(max_retries):
+        try:
+            if debug:
+                batch_info = f" (batch {batch_num}" + (f"/{total_batches}" if isinstance(total_batches, int) and total_batches > 1 else "") + ")"
+                mmguero.eprint(f"Calling Groq API{batch_info} (attempt {attempt + 1}/{max_retries})...")
+                mmguero.eprint(f"Sending {len(metadata_list)} files to AI for unification")
+            # Start smooth progress ticker for this batch
+            if progress_bar:
+                # Reset to batch start position on retry
+                if attempt > 0:
+                    progress_bar.n = batch_start_position * progress_bar.total
+                    progress_bar.refresh()
+                if smooth_ticker is None:
+                    smooth_ticker = _SmoothProgressTicker(progress_bar)
+                smooth_ticker.start(
+                    cumulative=batch_start_position * progress_bar.total,
+                    step_estimated_seconds=batch_estimated
+                )
+            api_start = time.time()
+            response = requests.post(
+                "https://api.groq.com/openai/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {groq_api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "messages": [
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": input_text},
+                    ],
+                    "response_format": {
+                        "type": "json_schema",
+                        "json_schema": {
+                            "name": "album_unification",
+                            "strict": True,
+                            "schema": UNIFY_ALBUM_SCHEMA,
+                        }
+                    }
+                },
+                timeout=120,
+            )
+            api_elapsed = time.time() - api_start
+            # Stop ticker and get actual time
+            if smooth_ticker:
+                actual_time = smooth_ticker.stop()
+                # Snap to actual batch end position
+                progress_bar.n = (batch_start_position + batch_slice_size) * progress_bar.total
+                progress_bar.refresh()
+                # Record timing
+                if timing_log is not None:
+                    update_timing_measurement_tokens(timing_log, operation_name, actual_time, batch_tokens)
+                    save_timing_log(timing_log)
+            # Handle rate limiting
+            if response.status_code == 429:
+                if attempt < max_retries - 1:
+                    if debug:
+                        mmguero.eprint(f"Rate limited, retrying in {retry_delay}s...")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                    continue
+                raise Exception("Album unification rate limit exceeded")
+            if response.status_code == 401:
+                raise Exception("Invalid Groq API key for album unification")
+            # Handle 400 errors (usually JSON validation failures from Groq - transient)
+            if response.status_code == 400:
+                if attempt < max_retries - 1:
+                    if debug:
+                        mmguero.eprint(f"JSON validation error (400), retrying in {retry_delay}s...")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                    continue
+                raise Exception("Album unification failed due to JSON validation errors")
+            response.raise_for_status()
+            result = response.json()
+            content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
+            parsed = json.loads(content)
+            if debug:
+                mmguero.eprint(f"AI returned {len(parsed.get('tracks', []))} track assignments")
             return parsed
         except requests.exceptions.Timeout:
+            if smooth_ticker:
+                smooth_ticker.stop()
             if attempt < max_retries - 1:
                 if debug:
                     mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
@@ -431,6 +604,8 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
                 raise Exception("Album unification request timed out")
         except requests.exceptions.RequestException as e:
+            if smooth_ticker:
+                smooth_ticker.stop()
             if attempt < max_retries - 1:
                 if debug:
                     mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
@@ -442,6 +617,206 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
     raise Exception("Album unification failed after maximum retries")
+def _unify_album_metadata_with_batching(file_paths, groq_api_key, model, prompt, rename_prompt=None, spotify_tracks=None, batch_size=10, batch_size_spotify=5, debug=False, verbose=False):
+    """Use Groq AI to unify album metadata with automatic batching for large file lists.
+    Implements automatic batching to handle Groq's output token limits.
+    If a partial response is received, automatically retries with remaining files.
+    Args:
+        file_paths: List of audio file paths
+        groq_api_key: Groq API key for authentication
+        model: AI model name (e.g., "openai/gpt-oss-120b")
+        prompt: System prompt for the AI
+        rename_prompt: Optional prompt for renaming (if provided, adds suggested_name to response)
+        spotify_tracks: Optional list of track names from Spotify for accurate ordering
+        batch_size: Default batch size (without Spotify)
+        batch_size_spotify: Batch size when using Spotify (smaller due to larger prompts)
+        debug: Enable debug output
+        verbose: Disable progress bar if True
+    Returns:
+        Dict with 'unified_album' (str) and 'tracks' (list of dicts with
+        'filename', 'track_number', 'album_name', optionally 'suggested_name')
+    Raises:
+        ValueError: If API key is missing
+        Exception: If API call fails
+    """
+    if not groq_api_key:
+        raise ValueError("Groq API key required for album unification")
+    # Read metadata from all files
+    if debug:
+        mmguero.eprint("Reading metadata from files...")
+    metadata_list = _read_metadata_from_files(file_paths, debug=debug)
+    # Build system prompt (add rename and Spotify instructions if needed)
+    system_prompt = prompt
+    if rename_prompt:
+        system_prompt = f"{prompt}\n\n{rename_prompt}"
+    # Determine operation name for timing
+    operation_name = 'unify_batch_spotify' if spotify_tracks else 'unify_batch_groq'
+    # Proactive batching: limit batch size to avoid overwhelming Groq
+    # With Spotify tracks, use smaller batches since the prompt is larger
+    max_batch_size = batch_size_spotify if spotify_tracks else batch_size
+    # Calculate expected batch count
+    expected_batches = (len(metadata_list) + max_batch_size - 1) // max_batch_size
+    # Guard against empty metadata list
+    if expected_batches == 0:
+        return {'unified_album': '', 'tracks': []}
+    # Estimate total tokens for all batches
+    total_tokens = 0
+    for i in range(expected_batches):
+        batch_metadata = metadata_list[i * max_batch_size : (i + 1) * max_batch_size]
+        batch_system_prompt = system_prompt
+        if spotify_tracks:
+            tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
+            batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
+        total_tokens += _estimate_batch_tokens(batch_metadata, batch_system_prompt)
+    # Load timing log and estimate total duration
+    timing_log = load_timing_log()
+    total_estimated = estimate_step_duration_tokens(timing_log, operation_name, total_tokens) or total_tokens * 0.1
+    # Create progress bar
+    progress = None
+    if not verbose:
+        progress = tqdm(
+            total=total_estimated,
+            desc=f"Unifying Album ({expected_batches} batches)",
+            unit="s",
+            disable=False,
+            bar_format='{l_bar}{bar}| {n:.0f}/{total:.0f}s [{elapsed}<{remaining}]',
+        )
+    unified_album = None  # Will be set from first batch and reused
+    all_tracks = []       # Accumulates results across batches
+    processed_files = set()  # Tracks which files we've gotten results for
+    # Start with first batch
+    batch_metadata = metadata_list[:max_batch_size]
+    remaining_metadata = metadata_list[max_batch_size:]
+    batch_num = 0
+    actual_batch_num = 0  # Track actual batch attempts (including retries)
+    while batch_metadata:
+        batch_num += 1
+        actual_batch_num += 1
+        # Build system prompt for this batch
+        # ALWAYS pass full Spotify list - don't slice it!
+        batch_system_prompt = system_prompt
+        if spotify_tracks:
+            # Add FULL Spotify track listing every time
+            tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
+            batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
+            if debug and batch_num == 1:
+                mmguero.eprint(f"Providing full Spotify track list ({len(spotify_tracks)} tracks) - AI will match by name")
+        # Calculate batch slice size and start position
+        batch_slice_size = 1.0 / expected_batches
+        # Clamp batch number for progress calculation to handle retries
+        progress_batch_num = min(actual_batch_num - 1, expected_batches - 1)
+        batch_start_position = progress_batch_num * batch_slice_size
+        # Update progress bar description
+        if progress:
+            display_batch_num = min(actual_batch_num, expected_batches)
+            progress.set_description(f"Processing Batch {display_batch_num}/{expected_batches}")
+        # Call API with current batch
+        try:
+            parsed = _call_groq_api_single_batch(
+                batch_metadata, batch_system_prompt, groq_api_key, model,
+                actual_batch_num, expected_batches, debug=debug,
+                progress_bar=progress, batch_start_position=batch_start_position,
+                batch_slice_size=batch_slice_size, timing_log=timing_log,
+                operation_name=operation_name
+            )
+        except Exception as e:
+            # If this isn't the first batch, we have partial results - fail gracefully
+            if all_tracks:
+                mmguero.eprint(f"Batch {actual_batch_num} failed after {len(all_tracks)} tracks were processed: {e}")
+                mmguero.eprint("Proceeding with partial results...")
+                break
+            # Close progress bar before raising
+            if progress:
+                progress.close()
+            raise
+        # On first successful call, capture unified_album name
+        if unified_album is None and parsed.get('unified_album'):
+            unified_album = parsed['unified_album']
+        # For subsequent batches, override the album name to match first batch
+        if unified_album and parsed.get('unified_album') != unified_album:
+            parsed['unified_album'] = unified_album
+        # Add returned tracks to our collection
+        returned_tracks = parsed.get('tracks', [])
+        # Guard against empty response to avoid infinite loop
+        if not returned_tracks:
+            mmguero.eprint(f"WARNING: Batch {actual_batch_num} returned no tracks. Stopping to avoid infinite loop.")
+            break
+        for track in returned_tracks:
+            filename = track['filename']
+            if filename not in processed_files:
+                all_tracks.append(track)
+                processed_files.add(filename)
+        if debug:
+            mmguero.eprint(f"Batch {actual_batch_num} complete: {len(returned_tracks)} tracks returned, {len(all_tracks)} total processed")
+        # Determine what's missing from this batch
+        returned_filenames = {t['filename'] for t in returned_tracks}
+        missing_metadata = [
+            m for m in batch_metadata
+            if m['filename'] not in returned_filenames and m['filename'] not in processed_files
+        ]
+        if not missing_metadata and not remaining_metadata:
+            # All files processed!
+            if debug:
+                mmguero.eprint(f"All {len(all_tracks)} files processed successfully!")
+            break
+        # Prepare next batch: combine missing files + next chunk from remaining
+        next_batch = missing_metadata if missing_metadata else []
+        if remaining_metadata:
+            take = min(max_batch_size - len(next_batch), len(remaining_metadata))
+            next_batch.extend(remaining_metadata[:take])
+            remaining_metadata = remaining_metadata[take:]
+        if not next_batch:
+            break
+        if debug:
+            if missing_metadata:
+                new_files = len(next_batch) - len(missing_metadata)
+                mmguero.eprint(f"Partial response: {len(missing_metadata)} files from this batch need retry. Next batch: {len(missing_metadata)} retries + {new_files} new files = {len(next_batch)} total")
+            else:
+                mmguero.eprint(f"Starting batch {actual_batch_num + 1} with {len(next_batch)} files...")
+        batch_metadata = next_batch
+    # Close progress bar
+    if progress:
+        progress.close()
+    return {
+        'unified_album': unified_album or '',
+        'tracks': all_tracks
+    }
 ###################################################################################################
 # Apply unified metadata to audio files
 def _apply_unified_metadata(file_paths, unified_result, debug=False):
@@ -523,6 +898,35 @@ def _apply_unified_metadata(file_paths, unified_result, debug=False):
 ###################################################################################################
 # Apply smart renaming to audio files
+def _sanitize_filename(filename, debug=False):
+    r"""Sanitize filename for Windows compatibility.
+    Removes/replaces characters that are invalid on Windows:
+    < > : " / \ | ? *
+    Args:
+        filename: The filename to sanitize (without extension)
+        debug: Enable debug output
+    Returns:
+        str: Sanitized filename
+    """
+    # Windows invalid characters: < > : " / \ | ? *
+    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
+    sanitized = filename
+    for char in invalid_chars:
+        sanitized = sanitized.replace(char, '')
+    # Also handle leading/trailing spaces and dots
+    sanitized = sanitized.strip('. ')
+    if debug and sanitized != filename:
+        mmguero.eprint(f"Sanitized filename: '{filename}' → '{sanitized}'")
+    return sanitized
 def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
     """Rename files based on AI-suggested names.
@@ -568,8 +972,9 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
         dirname = os.path.dirname(filepath)
         ext = os.path.splitext(filepath)[1]
-        # Build new filename
-        new_name = f"{suggested_name}{ext}"
+        # Build new filename (sanitize for Windows compatibility)
+        sanitized_name = _sanitize_filename(suggested_name, debug=debug)
+        new_name = f"{sanitized_name}{ext}"
         new_path = os.path.join(dirname, new_name)
         # Skip if same name
@@ -578,10 +983,11 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
                 mmguero.eprint(f"Skipping rename (same name): {basename}")
             continue
-        # Check if target already exists
+        # Check if target already exists - remove it so we can rename/overwrite
         if os.path.exists(new_path):
-            mmguero.eprint(f"Cannot rename {basename} to {new_name}: target already exists")
-            continue
+            if debug:
+                mmguero.eprint(f"Removing existing file {new_name} to allow rename")
+            os.remove(new_path)
         try:
             shutil.move(filepath, new_path)
@@ -610,7 +1016,7 @@ def _search_spotify_album(album_name, debug=False):
         from ddgs import DDGS
     except ImportError:
         if debug:
-            mmguero.eprint("duckduckgo-search not installed, skipping Spotify search")
+            mmguero.eprint("ddgs not installed, skipping Spotify search")
         return None
     query = f"site:spotify.com {album_name} album"
@@ -645,7 +1051,7 @@ def _get_spotify_album_info(spotify_url, debug=False):
         from spotify_scraper import SpotifyClient
     except ImportError:
         if debug:
-            mmguero.eprint("spotify-scraper not installed, skipping Spotify info")
+            mmguero.eprint("spotifyscraper not installed, skipping Spotify info")
         return None
     try:
@@ -761,7 +1167,7 @@ def _apply_cover_art_to_files(file_paths, image_data, debug=False):
 ###################################################################################################
 # Run album unification process
-def _run_album_unification(input_path, output_path, config, rename_prompt=None, use_spotify=None, debug=False):
+def _run_album_unification(input_path, output_path, config, rename_prompt=None, use_spotify=None, debug=False, verbose=False):
     """Run the album unification process on a folder of files.
     Args:
@@ -771,6 +1177,7 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
         rename_prompt: Optional prompt for smart renaming (None = no renaming)
         use_spotify: Spotify URL if provided, True to search for album, None/False to disable
         debug: Enable debug output
+        verbose: Disable progress bar if True
     Returns:
         str: Status message
@@ -790,6 +1197,8 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
     model = config.get("unify_album_model", "openai/gpt-oss-120b")
     prompt = config.get("unify_album_prompt", UNIFY_ALBUM_PROMPT_DEFAULT)
+    batch_size = config.get("unify_album_batch_size", 10)
+    batch_size_spotify = config.get("unify_album_batch_size_with_spotify", 5)
     # Determine files to process
     audio_extensions = ['.mp3', '.mp4', '.m4a', '.wav', '.flac', '.ogg', '.aac', '.wma']
@@ -833,8 +1242,9 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
     mmguero.eprint(f"Found {len(file_paths)} audio files for album unification")
     # Call AI to unify album metadata (first pass - gets unified album name)
-    unified_result = _unify_album_metadata(
-        file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt, debug=debug
+    unified_result = _unify_album_metadata_with_batching(
+        file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
+        batch_size=batch_size, batch_size_spotify=batch_size_spotify, debug=debug, verbose=verbose
     )
     unified_album = unified_result.get('unified_album', '')
@@ -863,11 +1273,12 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
             # Second AI pass with Spotify track listing for accurate ordering
             mmguero.eprint("Refining track order with Spotify data...")
-            unified_result = _unify_album_metadata(
+            unified_result = _unify_album_metadata_with_batching(
                 file_paths, groq_api_key, model, prompt,
                 rename_prompt=rename_prompt,
                 spotify_tracks=spotify_info.get('tracks', []),
-                debug=debug
+                batch_size=batch_size, batch_size_spotify=batch_size_spotify,
+                debug=debug, verbose=verbose
             )
         else:
             mmguero.eprint("Could not fetch Spotify info, using AI results only")
@@ -2180,6 +2591,65 @@ class Plugger(object):
             self.CreateCleanMuteList()
+            # Fast-path: if no profanity detected and same format, just copy
+            no_profanity = len(self.naughtyWordList) == 0
+            same_format = (
+                self.inputFileParts[1].lower().lstrip('.') == self.outputAudioFileFormat
+                if hasattr(self, 'inputFileParts')
+                else False
+            )
+            if no_profanity and same_format:
+                # Direct copy - no processing needed
+                if self.debug:
+                    mmguero.eprint("No profanity detected and same format - using direct copy")
+                # Update progress bar description to reflect copy operation
+                if progress:
+                    progress.set_description("Copying")
+                copy_start = time.time()
+                shutil.copyfile(self.inputFileSpec, self.outputFileSpec)
+                copy_time = time.time() - copy_start
+                # Still embed metadata and tag as processed
+                if self.debug:
+                    mmguero.eprint("Embedding Shazam metadata into copied file...")
+                self._embed_metadata(self.outputFileSpec)
+                SetMonkeyplugTag(self.outputFileSpec, debug=self.debug)
+                # Complete progress
+                if progress:
+                    if smooth_ticker:
+                        progress.n = progress.total
+                        progress.refresh()
+                    else:
+                        progress.update(1)
+                    progress.close()
+                # Record timing separately for copy (don't skew encode estimates)
+                # Copy is much faster than encode, so we track it separately
+                if step_timings is not None and file_duration > 0:
+                    step_timings['copy'] = (copy_time, file_duration)
+                if timing_log is not None and file_duration > 0:
+                    for op, (wall_secs, audio_secs) in step_timings.items():
+                        update_timing_measurement(timing_log, op, wall_secs, audio_secs)
+                    save_timing_log(timing_log)
+                # Clean up progress references
+                if hasattr(self, '_progress'):
+                    delattr(self, '_progress')
+                for attr in ('_smooth_ticker', '_smooth_cumulative', '_smooth_extract_est',
+                              '_smooth_transcribe_est', '_will_transcribe',
+                              '_step_timings', '_timing_log', '_timing_file_duration'):
+                    if hasattr(self, attr):
+                        delattr(self, attr)
+                # Print profanity detection summary
+                self._print_words_summary()
+                return self.outputFileSpec
             # Update progress after CreateCleanMuteList (step-based mode only)
             if progress and not smooth_ticker:
                 did_extraction = (
@@ -3323,9 +3793,43 @@ DEFAULT_CONFIG = {
     "ai_detect_prompt": AI_DETECT_PROMPT_DEFAULT,
     "unify_album_model": "openai/gpt-oss-120b",
     "unify_album_prompt": UNIFY_ALBUM_PROMPT_DEFAULT,
+    "unify_album_batch_size": 10,
+    "unify_album_batch_size_with_spotify": 5,
+}
+# Validation rules for config values with defined options
+CONFIG_VALIDATION = {
+    "show_words": {"choices": ["full", "clean", "none"], "default": "clean"},
+    "detect_mode": {"choices": ["list", "ai", "both"], "default": "list"},
 }
+def validate_config_settings(config, debug=False):
+    """
+    Validate config settings and fix invalid values.
+    For each config key that has defined choices, validates the value.
+    If invalid, prints a warning and uses the default value.
+    Args:
+        config: dict - Config settings to validate
+        debug: bool - Enable debug output
+    Returns:
+        dict: Validated config with invalid values replaced by defaults
+    """
+    validated = dict(config)
+    for key, rules in CONFIG_VALIDATION.items():
+        if key in validated:
+            value = validated[key]
+            choices = rules["choices"]
+            if value not in choices:
+                default = rules["default"]
+                mmguero.eprint(f"WARNING: CONFIG \"{key}\" SET TO INVALID VALUE \"{value}\". USING DEFAULT \"{default}\".")
+                validated[key] = default
+    return validated
 def load_config_settings(debug=False):
     """
     Load settings from JSON config file.
@@ -3354,7 +3858,8 @@ def load_config_settings(debug=False):
                 if debug:
                     mmguero.eprint(f"Loaded config from: {config_path}")
-                return config
+                # Validate and fix any invalid config values
+                return validate_config_settings(config, debug=debug)
             except (json.JSONDecodeError, IOError) as e:
                 if debug:
                     mmguero.eprint(f"Warning: Failed to load config from {config_path}: {e}")
@@ -3372,6 +3877,7 @@ def load_config_settings(debug=False):
         if debug:
             mmguero.eprint(f"Warning: Could not create default config: {e}")
+    # Return a copy of DEFAULT_CONFIG (already validated)
     return dict(DEFAULT_CONFIG)
@@ -3438,6 +3944,80 @@ def update_timing_measurement(timing_log, operation, wall_seconds, audio_seconds
     entry['run_count'] += 1
+def estimate_step_duration_tokens(timing_log, operation, input_tokens):
+    """Estimate wall-clock seconds for an operation based on token-based historical data.
+    Args:
+        timing_log: Timing log dict
+        operation: Operation name (e.g., 'unify_batch_groq')
+        input_tokens: Estimated input tokens
+    Returns:
+        float or None: Estimated seconds, or None if no data available.
+    """
+    entry = timing_log.get(operation)
+    if not entry or entry.get('run_count', 0) == 0:
+        return None
+    total_tokens = entry.get('total_input_tokens', 0)
+    if total_tokens <= 0:
+        return None
+    rate = entry['total_wall_seconds'] / total_tokens
+    return rate * input_tokens
+def update_timing_measurement_tokens(timing_log, operation, wall_seconds, input_tokens):
+    """Add a new token-based timing measurement to the running averages.
+    Args:
+        timing_log: Timing log dict
+        operation: Operation name
+        wall_seconds: Actual wall-clock seconds elapsed
+        input_tokens: Actual input tokens processed
+    """
+    if operation not in timing_log:
+        timing_log[operation] = {
+            'total_input_tokens': 0,
+            'total_wall_seconds': 0.0,
+            'run_count': 0,
+        }
+    entry = timing_log[operation]
+    entry['total_input_tokens'] += input_tokens
+    entry['total_wall_seconds'] += wall_seconds
+    entry['run_count'] += 1
+def _estimate_input_tokens(text):
+    """Estimate input token count using character approximation.
+    Args:
+        text: String to estimate tokens for
+    Returns:
+        int: Estimated token count (approximately characters / 4)
+    """
+    return len(text) // 4
+def _estimate_batch_tokens(metadata_list, system_prompt):
+    """Estimate total input tokens for a batch request.
+    Args:
+        metadata_list: List of metadata dicts
+        system_prompt: System prompt string
+    Returns:
+        int: Estimated input tokens
+    """
+    # Count tokens in metadata
+    metadata_json = json.dumps(metadata_list, indent=2, ensure_ascii=False)
+    metadata_tokens = _estimate_input_tokens(metadata_json)
+    # Count tokens in system prompt
+    prompt_tokens = _estimate_input_tokens(system_prompt)
+    return metadata_tokens + prompt_tokens
 ###################################################################################################
 # RunMonkeyPlug
 def RunMonkeyPlug():
@@ -3985,7 +4565,8 @@ def RunMonkeyPlug():
                 config,
                 rename_prompt=args.autoRename,
                 use_spotify=args.useSpotify,
-                debug=args.debug
+                debug=args.debug,
+                verbose=args.debug
             )
             print(result)
         except Exception as e:
@@ -4282,7 +4863,8 @@ def RunMonkeyPlug():
                     config,
                     rename_prompt=args.autoRename,
                     use_spotify=args.useSpotify,
-                    debug=args.debug
+                    debug=args.debug,
+                    verbose=args.debug
                 )
                 mmguero.eprint(result)
             except Exception as e:
@@ -4592,7 +5174,8 @@ def RunMonkeyPlug():
                 config,
                 rename_prompt=args.autoRename,
                 use_spotify=args.useSpotify,
-                debug=args.debug
+                debug=args.debug,
+                verbose=args.debug
             )
             mmguero.eprint(result)
         except Exception as e:

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/.gitignore RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/LICENSE RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/README.md RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/__init__.py RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/data/profanity_list.json RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/groq_config.py RENAMED Viewed

File without changes

{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/separation.py RENAMED Viewed

File without changes

monkeyplug-enhanced 2.3.0__tar.gz → 2.3.2__tar.gz

monkeyplug-enhanced 2.3.0tar.gz → 2.3.2tar.gz