monkeyplug-enhanced 2.3.0__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/PKG-INFO +4 -4
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/pyproject.toml +4 -4
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/monkeyplug.py +600 -17
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/.gitignore +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/LICENSE +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/README.md +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/__init__.py +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/data/profanity_list.json +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/groq_config.py +0 -0
- {monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/separation.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: monkeyplug-enhanced
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
|
|
5
5
|
Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
|
|
6
6
|
Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
|
|
7
7
|
Project-URL: Repository, https://github.com/ljbred08/monkeyplug.git
|
|
8
|
-
Author-email: Seth Grover <mero.mero.guero@gmail.com
|
|
8
|
+
Author-email: Lincoln Brown <link@brown.fm>, Seth Grover <mero.mero.guero@gmail.com>
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Classifier: License :: OSI Approved :: BSD License
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
13
13
|
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
14
14
|
Requires-Python: >=3.10
|
|
15
15
|
Requires-Dist: aiohttp>=3.9.0
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: ddgs>=6.0.0
|
|
17
17
|
Requires-Dist: groq>=0.1.0
|
|
18
18
|
Requires-Dist: mmguero==2.0.3
|
|
19
19
|
Requires-Dist: mutagen==1.47.0
|
|
@@ -22,7 +22,7 @@ Requires-Dist: requests==2.32.5
|
|
|
22
22
|
Requires-Dist: shazamio>=0.8.0
|
|
23
23
|
Requires-Dist: sherpa-onnx>=1.10.0
|
|
24
24
|
Requires-Dist: soundfile>=0.12.0
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: spotifyscraper>=0.1.0
|
|
26
26
|
Requires-Dist: tqdm>=4.65.0
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
|
|
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "monkeyplug-enhanced"
|
|
7
|
-
version = "2.3.
|
|
7
|
+
version = "2.3.2"
|
|
8
8
|
authors = [
|
|
9
|
-
{ name="Seth Grover", email="mero.mero.guero@gmail.com" },
|
|
10
9
|
{ name="Lincoln Brown", email="link@brown.fm" },
|
|
10
|
+
{ name="Seth Grover", email="mero.mero.guero@gmail.com" },
|
|
11
11
|
]
|
|
12
12
|
description = "Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing."
|
|
13
13
|
readme = "README.md"
|
|
@@ -29,8 +29,8 @@ dependencies = [
|
|
|
29
29
|
"tqdm>=4.65.0",
|
|
30
30
|
"shazamio>=0.8.0",
|
|
31
31
|
"aiohttp>=3.9.0",
|
|
32
|
-
"
|
|
33
|
-
"
|
|
32
|
+
"ddgs>=6.0.0",
|
|
33
|
+
"spotifyscraper>=0.1.0",
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
[project.urls]
|
|
@@ -123,13 +123,17 @@ UNIFY_ALBUM_PROMPT_DEFAULT = (
|
|
|
123
123
|
"titles, and current album names, determine the correct unified album name and "
|
|
124
124
|
"assign track numbers to each song. Consider the existing album name guesses and "
|
|
125
125
|
"song titles to infer the real album. Return track numbers in the order the songs "
|
|
126
|
-
"should appear on the album."
|
|
126
|
+
"should appear on the album. "
|
|
127
|
+
"CRITICAL: You MUST return a track entry for EVERY input file - do not skip any files. "
|
|
128
|
+
"The tracks array must contain exactly the same number of entries as the input."
|
|
127
129
|
)
|
|
128
130
|
|
|
129
131
|
UNIFY_ALBUM_RENAME_PROMPT_DEFAULT = (
|
|
130
132
|
"You are a music file naming expert. Suggest clean, consistent filenames for each track. "
|
|
131
133
|
"Use format: 'XX - Song Name' where XX is the track number with leading zero if needed. "
|
|
132
134
|
"Keep only essential information, remove extra words like 'feat', 'explicit', etc. "
|
|
135
|
+
"IMPORTANT: Never use these invalid characters in filenames: < > : \" / \\ | ? * "
|
|
136
|
+
"Replace punctuation marks like ? with nothing or - (hyphen). "
|
|
133
137
|
"Return the suggested filename WITHOUT the file extension."
|
|
134
138
|
)
|
|
135
139
|
|
|
@@ -139,6 +143,7 @@ UNIFY_ALBUM_SCHEMA = {
|
|
|
139
143
|
"unified_album": {"type": "string", "description": "The unified album name"},
|
|
140
144
|
"tracks": {
|
|
141
145
|
"type": "array",
|
|
146
|
+
"description": "MUST contain one entry for EACH input file - no files may be omitted",
|
|
142
147
|
"items": {
|
|
143
148
|
"type": "object",
|
|
144
149
|
"properties": {
|
|
@@ -419,9 +424,177 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
|
|
|
419
424
|
if debug:
|
|
420
425
|
mmguero.eprint(f"AI unification response: {content}")
|
|
421
426
|
|
|
427
|
+
# Validate that we got results for all files
|
|
428
|
+
returned_tracks = parsed.get('tracks', [])
|
|
429
|
+
if len(returned_tracks) < len(metadata_list):
|
|
430
|
+
missing = len(metadata_list) - len(returned_tracks)
|
|
431
|
+
mmguero.eprint(f"WARNING: AI only returned {len(returned_tracks)} of {len(metadata_list)} track assignments.")
|
|
432
|
+
mmguero.eprint(f"This is likely due to Groq's output token limit. {missing} files were not processed.")
|
|
433
|
+
if spotify_tracks:
|
|
434
|
+
mmguero.eprint("Try running again without --use-spotify, or process files in smaller batches.")
|
|
435
|
+
else:
|
|
436
|
+
mmguero.eprint("Try processing files in smaller batches (e.g., split into subdirectories).")
|
|
437
|
+
|
|
438
|
+
return parsed
|
|
439
|
+
|
|
440
|
+
except requests.exceptions.Timeout:
|
|
441
|
+
if attempt < max_retries - 1:
|
|
442
|
+
if debug:
|
|
443
|
+
mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
|
|
444
|
+
time.sleep(retry_delay)
|
|
445
|
+
retry_delay *= 2
|
|
446
|
+
else:
|
|
447
|
+
raise Exception("Album unification request timed out")
|
|
448
|
+
|
|
449
|
+
except requests.exceptions.RequestException as e:
|
|
450
|
+
if attempt < max_retries - 1:
|
|
451
|
+
if debug:
|
|
452
|
+
mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
|
|
453
|
+
time.sleep(retry_delay)
|
|
454
|
+
retry_delay *= 2
|
|
455
|
+
else:
|
|
456
|
+
raise Exception(f"Album unification request failed: {e}")
|
|
457
|
+
|
|
458
|
+
raise Exception("Album unification failed after maximum retries")
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _call_groq_api_single_batch(metadata_list, system_prompt, groq_api_key, model, batch_num=1, total_batches=1, debug=False, progress_bar=None, batch_start_position=0.0, batch_slice_size=1.0, timing_log=None, operation_name='unify_batch_groq'):
|
|
462
|
+
"""Make a single API call to Groq for album unification.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
metadata_list: List of file metadata dicts to send in this batch
|
|
466
|
+
system_prompt: System prompt for the AI
|
|
467
|
+
groq_api_key: Groq API key for authentication
|
|
468
|
+
model: AI model name
|
|
469
|
+
batch_num: Current batch number (for debug output)
|
|
470
|
+
total_batches: Total expected batches (for debug output)
|
|
471
|
+
debug: Enable debug output
|
|
472
|
+
progress_bar: Optional tqdm progress bar for progress tracking
|
|
473
|
+
batch_start_position: Starting position (0.0 to 1.0) for this batch in overall progress
|
|
474
|
+
batch_slice_size: Size of this batch's slice (0.0 to 1.0) of overall progress
|
|
475
|
+
timing_log: Timing log dict for estimation
|
|
476
|
+
operation_name: Name of operation for timing tracking ('unify_batch_groq' or 'unify_batch_spotify')
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
dict: Parsed JSON response with 'unified_album' and 'tracks'
|
|
480
|
+
|
|
481
|
+
Raises:
|
|
482
|
+
Exception: If API call fails after retries
|
|
483
|
+
"""
|
|
484
|
+
import requests
|
|
485
|
+
import time
|
|
486
|
+
|
|
487
|
+
# Build input for AI
|
|
488
|
+
input_text = json.dumps(metadata_list, indent=2, ensure_ascii=False)
|
|
489
|
+
|
|
490
|
+
# Estimate tokens for this batch
|
|
491
|
+
batch_tokens = _estimate_batch_tokens(metadata_list, system_prompt)
|
|
492
|
+
|
|
493
|
+
# Estimate duration based on historical data
|
|
494
|
+
batch_estimated = estimate_step_duration_tokens(timing_log, operation_name, batch_tokens) or batch_tokens * 0.1
|
|
495
|
+
|
|
496
|
+
# API call with retry logic (more retries for transient 400 JSON validation errors)
|
|
497
|
+
max_retries = 5
|
|
498
|
+
retry_delay = 1
|
|
499
|
+
smooth_ticker = None
|
|
500
|
+
|
|
501
|
+
for attempt in range(max_retries):
|
|
502
|
+
try:
|
|
503
|
+
if debug:
|
|
504
|
+
batch_info = f" (batch {batch_num}" + (f"/{total_batches}" if isinstance(total_batches, int) and total_batches > 1 else "") + ")"
|
|
505
|
+
mmguero.eprint(f"Calling Groq API{batch_info} (attempt {attempt + 1}/{max_retries})...")
|
|
506
|
+
mmguero.eprint(f"Sending {len(metadata_list)} files to AI for unification")
|
|
507
|
+
|
|
508
|
+
# Start smooth progress ticker for this batch
|
|
509
|
+
if progress_bar:
|
|
510
|
+
# Reset to batch start position on retry
|
|
511
|
+
if attempt > 0:
|
|
512
|
+
progress_bar.n = batch_start_position * progress_bar.total
|
|
513
|
+
progress_bar.refresh()
|
|
514
|
+
|
|
515
|
+
if smooth_ticker is None:
|
|
516
|
+
smooth_ticker = _SmoothProgressTicker(progress_bar)
|
|
517
|
+
|
|
518
|
+
smooth_ticker.start(
|
|
519
|
+
cumulative=batch_start_position * progress_bar.total,
|
|
520
|
+
step_estimated_seconds=batch_estimated
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
api_start = time.time()
|
|
524
|
+
response = requests.post(
|
|
525
|
+
"https://api.groq.com/openai/v1/chat/completions",
|
|
526
|
+
headers={
|
|
527
|
+
"Authorization": f"Bearer {groq_api_key}",
|
|
528
|
+
"Content-Type": "application/json",
|
|
529
|
+
},
|
|
530
|
+
json={
|
|
531
|
+
"model": model,
|
|
532
|
+
"messages": [
|
|
533
|
+
{"role": "system", "content": system_prompt},
|
|
534
|
+
{"role": "user", "content": input_text},
|
|
535
|
+
],
|
|
536
|
+
"response_format": {
|
|
537
|
+
"type": "json_schema",
|
|
538
|
+
"json_schema": {
|
|
539
|
+
"name": "album_unification",
|
|
540
|
+
"strict": True,
|
|
541
|
+
"schema": UNIFY_ALBUM_SCHEMA,
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
},
|
|
545
|
+
timeout=120,
|
|
546
|
+
)
|
|
547
|
+
api_elapsed = time.time() - api_start
|
|
548
|
+
|
|
549
|
+
# Stop ticker and get actual time
|
|
550
|
+
if smooth_ticker:
|
|
551
|
+
actual_time = smooth_ticker.stop()
|
|
552
|
+
# Snap to actual batch end position
|
|
553
|
+
progress_bar.n = (batch_start_position + batch_slice_size) * progress_bar.total
|
|
554
|
+
progress_bar.refresh()
|
|
555
|
+
|
|
556
|
+
# Record timing
|
|
557
|
+
if timing_log is not None:
|
|
558
|
+
update_timing_measurement_tokens(timing_log, operation_name, actual_time, batch_tokens)
|
|
559
|
+
save_timing_log(timing_log)
|
|
560
|
+
|
|
561
|
+
# Handle rate limiting
|
|
562
|
+
if response.status_code == 429:
|
|
563
|
+
if attempt < max_retries - 1:
|
|
564
|
+
if debug:
|
|
565
|
+
mmguero.eprint(f"Rate limited, retrying in {retry_delay}s...")
|
|
566
|
+
time.sleep(retry_delay)
|
|
567
|
+
retry_delay *= 2
|
|
568
|
+
continue
|
|
569
|
+
raise Exception("Album unification rate limit exceeded")
|
|
570
|
+
|
|
571
|
+
if response.status_code == 401:
|
|
572
|
+
raise Exception("Invalid Groq API key for album unification")
|
|
573
|
+
|
|
574
|
+
# Handle 400 errors (usually JSON validation failures from Groq - transient)
|
|
575
|
+
if response.status_code == 400:
|
|
576
|
+
if attempt < max_retries - 1:
|
|
577
|
+
if debug:
|
|
578
|
+
mmguero.eprint(f"JSON validation error (400), retrying in {retry_delay}s...")
|
|
579
|
+
time.sleep(retry_delay)
|
|
580
|
+
retry_delay *= 2
|
|
581
|
+
continue
|
|
582
|
+
raise Exception("Album unification failed due to JSON validation errors")
|
|
583
|
+
|
|
584
|
+
response.raise_for_status()
|
|
585
|
+
|
|
586
|
+
result = response.json()
|
|
587
|
+
content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
|
|
588
|
+
parsed = json.loads(content)
|
|
589
|
+
|
|
590
|
+
if debug:
|
|
591
|
+
mmguero.eprint(f"AI returned {len(parsed.get('tracks', []))} track assignments")
|
|
592
|
+
|
|
422
593
|
return parsed
|
|
423
594
|
|
|
424
595
|
except requests.exceptions.Timeout:
|
|
596
|
+
if smooth_ticker:
|
|
597
|
+
smooth_ticker.stop()
|
|
425
598
|
if attempt < max_retries - 1:
|
|
426
599
|
if debug:
|
|
427
600
|
mmguero.eprint(f"Request timed out, retrying in {retry_delay}s...")
|
|
@@ -431,6 +604,8 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
|
|
|
431
604
|
raise Exception("Album unification request timed out")
|
|
432
605
|
|
|
433
606
|
except requests.exceptions.RequestException as e:
|
|
607
|
+
if smooth_ticker:
|
|
608
|
+
smooth_ticker.stop()
|
|
434
609
|
if attempt < max_retries - 1:
|
|
435
610
|
if debug:
|
|
436
611
|
mmguero.eprint(f"Request failed: {e}, retrying in {retry_delay}s...")
|
|
@@ -442,6 +617,206 @@ def _unify_album_metadata(file_paths, groq_api_key, model, prompt, rename_prompt
|
|
|
442
617
|
raise Exception("Album unification failed after maximum retries")
|
|
443
618
|
|
|
444
619
|
|
|
620
|
+
def _unify_album_metadata_with_batching(file_paths, groq_api_key, model, prompt, rename_prompt=None, spotify_tracks=None, batch_size=10, batch_size_spotify=5, debug=False, verbose=False):
|
|
621
|
+
"""Use Groq AI to unify album metadata with automatic batching for large file lists.
|
|
622
|
+
|
|
623
|
+
Implements automatic batching to handle Groq's output token limits.
|
|
624
|
+
If a partial response is received, automatically retries with remaining files.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
file_paths: List of audio file paths
|
|
628
|
+
groq_api_key: Groq API key for authentication
|
|
629
|
+
model: AI model name (e.g., "openai/gpt-oss-120b")
|
|
630
|
+
prompt: System prompt for the AI
|
|
631
|
+
rename_prompt: Optional prompt for renaming (if provided, adds suggested_name to response)
|
|
632
|
+
spotify_tracks: Optional list of track names from Spotify for accurate ordering
|
|
633
|
+
batch_size: Default batch size (without Spotify)
|
|
634
|
+
batch_size_spotify: Batch size when using Spotify (smaller due to larger prompts)
|
|
635
|
+
debug: Enable debug output
|
|
636
|
+
verbose: Disable progress bar if True
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
Dict with 'unified_album' (str) and 'tracks' (list of dicts with
|
|
640
|
+
'filename', 'track_number', 'album_name', optionally 'suggested_name')
|
|
641
|
+
|
|
642
|
+
Raises:
|
|
643
|
+
ValueError: If API key is missing
|
|
644
|
+
Exception: If API call fails
|
|
645
|
+
"""
|
|
646
|
+
if not groq_api_key:
|
|
647
|
+
raise ValueError("Groq API key required for album unification")
|
|
648
|
+
|
|
649
|
+
# Read metadata from all files
|
|
650
|
+
if debug:
|
|
651
|
+
mmguero.eprint("Reading metadata from files...")
|
|
652
|
+
metadata_list = _read_metadata_from_files(file_paths, debug=debug)
|
|
653
|
+
|
|
654
|
+
# Build system prompt (add rename and Spotify instructions if needed)
|
|
655
|
+
system_prompt = prompt
|
|
656
|
+
if rename_prompt:
|
|
657
|
+
system_prompt = f"{prompt}\n\n{rename_prompt}"
|
|
658
|
+
|
|
659
|
+
# Determine operation name for timing
|
|
660
|
+
operation_name = 'unify_batch_spotify' if spotify_tracks else 'unify_batch_groq'
|
|
661
|
+
|
|
662
|
+
# Proactive batching: limit batch size to avoid overwhelming Groq
|
|
663
|
+
# With Spotify tracks, use smaller batches since the prompt is larger
|
|
664
|
+
max_batch_size = batch_size_spotify if spotify_tracks else batch_size
|
|
665
|
+
|
|
666
|
+
# Calculate expected batch count
|
|
667
|
+
expected_batches = (len(metadata_list) + max_batch_size - 1) // max_batch_size
|
|
668
|
+
|
|
669
|
+
# Guard against empty metadata list
|
|
670
|
+
if expected_batches == 0:
|
|
671
|
+
return {'unified_album': '', 'tracks': []}
|
|
672
|
+
|
|
673
|
+
# Estimate total tokens for all batches
|
|
674
|
+
total_tokens = 0
|
|
675
|
+
for i in range(expected_batches):
|
|
676
|
+
batch_metadata = metadata_list[i * max_batch_size : (i + 1) * max_batch_size]
|
|
677
|
+
batch_system_prompt = system_prompt
|
|
678
|
+
if spotify_tracks:
|
|
679
|
+
tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
|
|
680
|
+
batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
|
|
681
|
+
total_tokens += _estimate_batch_tokens(batch_metadata, batch_system_prompt)
|
|
682
|
+
|
|
683
|
+
# Load timing log and estimate total duration
|
|
684
|
+
timing_log = load_timing_log()
|
|
685
|
+
total_estimated = estimate_step_duration_tokens(timing_log, operation_name, total_tokens) or total_tokens * 0.1
|
|
686
|
+
|
|
687
|
+
# Create progress bar
|
|
688
|
+
progress = None
|
|
689
|
+
if not verbose:
|
|
690
|
+
progress = tqdm(
|
|
691
|
+
total=total_estimated,
|
|
692
|
+
desc=f"Unifying Album ({expected_batches} batches)",
|
|
693
|
+
unit="s",
|
|
694
|
+
disable=False,
|
|
695
|
+
bar_format='{l_bar}{bar}| {n:.0f}/{total:.0f}s [{elapsed}<{remaining}]',
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
unified_album = None # Will be set from first batch and reused
|
|
699
|
+
all_tracks = [] # Accumulates results across batches
|
|
700
|
+
processed_files = set() # Tracks which files we've gotten results for
|
|
701
|
+
|
|
702
|
+
# Start with first batch
|
|
703
|
+
batch_metadata = metadata_list[:max_batch_size]
|
|
704
|
+
remaining_metadata = metadata_list[max_batch_size:]
|
|
705
|
+
batch_num = 0
|
|
706
|
+
actual_batch_num = 0 # Track actual batch attempts (including retries)
|
|
707
|
+
|
|
708
|
+
while batch_metadata:
|
|
709
|
+
batch_num += 1
|
|
710
|
+
actual_batch_num += 1
|
|
711
|
+
|
|
712
|
+
# Build system prompt for this batch
|
|
713
|
+
# ALWAYS pass full Spotify list - don't slice it!
|
|
714
|
+
batch_system_prompt = system_prompt
|
|
715
|
+
if spotify_tracks:
|
|
716
|
+
# Add FULL Spotify track listing every time
|
|
717
|
+
tracks_json = json.dumps(spotify_tracks, ensure_ascii=False)
|
|
718
|
+
batch_system_prompt = f"{system_prompt}\n\nOfficial track listing from Spotify: {tracks_json}"
|
|
719
|
+
if debug and batch_num == 1:
|
|
720
|
+
mmguero.eprint(f"Providing full Spotify track list ({len(spotify_tracks)} tracks) - AI will match by name")
|
|
721
|
+
|
|
722
|
+
# Calculate batch slice size and start position
|
|
723
|
+
batch_slice_size = 1.0 / expected_batches
|
|
724
|
+
# Clamp batch number for progress calculation to handle retries
|
|
725
|
+
progress_batch_num = min(actual_batch_num - 1, expected_batches - 1)
|
|
726
|
+
batch_start_position = progress_batch_num * batch_slice_size
|
|
727
|
+
|
|
728
|
+
# Update progress bar description
|
|
729
|
+
if progress:
|
|
730
|
+
display_batch_num = min(actual_batch_num, expected_batches)
|
|
731
|
+
progress.set_description(f"Processing Batch {display_batch_num}/{expected_batches}")
|
|
732
|
+
|
|
733
|
+
# Call API with current batch
|
|
734
|
+
try:
|
|
735
|
+
parsed = _call_groq_api_single_batch(
|
|
736
|
+
batch_metadata, batch_system_prompt, groq_api_key, model,
|
|
737
|
+
actual_batch_num, expected_batches, debug=debug,
|
|
738
|
+
progress_bar=progress, batch_start_position=batch_start_position,
|
|
739
|
+
batch_slice_size=batch_slice_size, timing_log=timing_log,
|
|
740
|
+
operation_name=operation_name
|
|
741
|
+
)
|
|
742
|
+
except Exception as e:
|
|
743
|
+
# If this isn't the first batch, we have partial results - fail gracefully
|
|
744
|
+
if all_tracks:
|
|
745
|
+
mmguero.eprint(f"Batch {actual_batch_num} failed after {len(all_tracks)} tracks were processed: {e}")
|
|
746
|
+
mmguero.eprint("Proceeding with partial results...")
|
|
747
|
+
break
|
|
748
|
+
# Close progress bar before raising
|
|
749
|
+
if progress:
|
|
750
|
+
progress.close()
|
|
751
|
+
raise
|
|
752
|
+
|
|
753
|
+
# On first successful call, capture unified_album name
|
|
754
|
+
if unified_album is None and parsed.get('unified_album'):
|
|
755
|
+
unified_album = parsed['unified_album']
|
|
756
|
+
|
|
757
|
+
# For subsequent batches, override the album name to match first batch
|
|
758
|
+
if unified_album and parsed.get('unified_album') != unified_album:
|
|
759
|
+
parsed['unified_album'] = unified_album
|
|
760
|
+
|
|
761
|
+
# Add returned tracks to our collection
|
|
762
|
+
returned_tracks = parsed.get('tracks', [])
|
|
763
|
+
|
|
764
|
+
# Guard against empty response to avoid infinite loop
|
|
765
|
+
if not returned_tracks:
|
|
766
|
+
mmguero.eprint(f"WARNING: Batch {actual_batch_num} returned no tracks. Stopping to avoid infinite loop.")
|
|
767
|
+
break
|
|
768
|
+
|
|
769
|
+
for track in returned_tracks:
|
|
770
|
+
filename = track['filename']
|
|
771
|
+
if filename not in processed_files:
|
|
772
|
+
all_tracks.append(track)
|
|
773
|
+
processed_files.add(filename)
|
|
774
|
+
|
|
775
|
+
if debug:
|
|
776
|
+
mmguero.eprint(f"Batch {actual_batch_num} complete: {len(returned_tracks)} tracks returned, {len(all_tracks)} total processed")
|
|
777
|
+
|
|
778
|
+
# Determine what's missing from this batch
|
|
779
|
+
returned_filenames = {t['filename'] for t in returned_tracks}
|
|
780
|
+
missing_metadata = [
|
|
781
|
+
m for m in batch_metadata
|
|
782
|
+
if m['filename'] not in returned_filenames and m['filename'] not in processed_files
|
|
783
|
+
]
|
|
784
|
+
|
|
785
|
+
if not missing_metadata and not remaining_metadata:
|
|
786
|
+
# All files processed!
|
|
787
|
+
if debug:
|
|
788
|
+
mmguero.eprint(f"All {len(all_tracks)} files processed successfully!")
|
|
789
|
+
break
|
|
790
|
+
|
|
791
|
+
# Prepare next batch: combine missing files + next chunk from remaining
|
|
792
|
+
next_batch = missing_metadata if missing_metadata else []
|
|
793
|
+
if remaining_metadata:
|
|
794
|
+
take = min(max_batch_size - len(next_batch), len(remaining_metadata))
|
|
795
|
+
next_batch.extend(remaining_metadata[:take])
|
|
796
|
+
remaining_metadata = remaining_metadata[take:]
|
|
797
|
+
|
|
798
|
+
if not next_batch:
|
|
799
|
+
break
|
|
800
|
+
|
|
801
|
+
if debug:
|
|
802
|
+
if missing_metadata:
|
|
803
|
+
new_files = len(next_batch) - len(missing_metadata)
|
|
804
|
+
mmguero.eprint(f"Partial response: {len(missing_metadata)} files from this batch need retry. Next batch: {len(missing_metadata)} retries + {new_files} new files = {len(next_batch)} total")
|
|
805
|
+
else:
|
|
806
|
+
mmguero.eprint(f"Starting batch {actual_batch_num + 1} with {len(next_batch)} files...")
|
|
807
|
+
|
|
808
|
+
batch_metadata = next_batch
|
|
809
|
+
|
|
810
|
+
# Close progress bar
|
|
811
|
+
if progress:
|
|
812
|
+
progress.close()
|
|
813
|
+
|
|
814
|
+
return {
|
|
815
|
+
'unified_album': unified_album or '',
|
|
816
|
+
'tracks': all_tracks
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
|
|
445
820
|
###################################################################################################
|
|
446
821
|
# Apply unified metadata to audio files
|
|
447
822
|
def _apply_unified_metadata(file_paths, unified_result, debug=False):
|
|
@@ -523,6 +898,35 @@ def _apply_unified_metadata(file_paths, unified_result, debug=False):
|
|
|
523
898
|
|
|
524
899
|
###################################################################################################
|
|
525
900
|
# Apply smart renaming to audio files
|
|
901
|
+
def _sanitize_filename(filename, debug=False):
|
|
902
|
+
r"""Sanitize filename for Windows compatibility.
|
|
903
|
+
|
|
904
|
+
Removes/replaces characters that are invalid on Windows:
|
|
905
|
+
< > : " / \ | ? *
|
|
906
|
+
|
|
907
|
+
Args:
|
|
908
|
+
filename: The filename to sanitize (without extension)
|
|
909
|
+
debug: Enable debug output
|
|
910
|
+
|
|
911
|
+
Returns:
|
|
912
|
+
str: Sanitized filename
|
|
913
|
+
"""
|
|
914
|
+
# Windows invalid characters: < > : " / \ | ? *
|
|
915
|
+
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
|
|
916
|
+
sanitized = filename
|
|
917
|
+
|
|
918
|
+
for char in invalid_chars:
|
|
919
|
+
sanitized = sanitized.replace(char, '')
|
|
920
|
+
|
|
921
|
+
# Also handle leading/trailing spaces and dots
|
|
922
|
+
sanitized = sanitized.strip('. ')
|
|
923
|
+
|
|
924
|
+
if debug and sanitized != filename:
|
|
925
|
+
mmguero.eprint(f"Sanitized filename: '{filename}' → '{sanitized}'")
|
|
926
|
+
|
|
927
|
+
return sanitized
|
|
928
|
+
|
|
929
|
+
|
|
526
930
|
def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
527
931
|
"""Rename files based on AI-suggested names.
|
|
528
932
|
|
|
@@ -568,8 +972,9 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
|
568
972
|
dirname = os.path.dirname(filepath)
|
|
569
973
|
ext = os.path.splitext(filepath)[1]
|
|
570
974
|
|
|
571
|
-
# Build new filename
|
|
572
|
-
|
|
975
|
+
# Build new filename (sanitize for Windows compatibility)
|
|
976
|
+
sanitized_name = _sanitize_filename(suggested_name, debug=debug)
|
|
977
|
+
new_name = f"{sanitized_name}{ext}"
|
|
573
978
|
new_path = os.path.join(dirname, new_name)
|
|
574
979
|
|
|
575
980
|
# Skip if same name
|
|
@@ -578,10 +983,11 @@ def _apply_renames(file_paths, unified_result, rename_prompt, debug=False):
|
|
|
578
983
|
mmguero.eprint(f"Skipping rename (same name): {basename}")
|
|
579
984
|
continue
|
|
580
985
|
|
|
581
|
-
# Check if target already exists
|
|
986
|
+
# Check if target already exists - remove it so we can rename/overwrite
|
|
582
987
|
if os.path.exists(new_path):
|
|
583
|
-
|
|
584
|
-
|
|
988
|
+
if debug:
|
|
989
|
+
mmguero.eprint(f"Removing existing file {new_name} to allow rename")
|
|
990
|
+
os.remove(new_path)
|
|
585
991
|
|
|
586
992
|
try:
|
|
587
993
|
shutil.move(filepath, new_path)
|
|
@@ -610,7 +1016,7 @@ def _search_spotify_album(album_name, debug=False):
|
|
|
610
1016
|
from ddgs import DDGS
|
|
611
1017
|
except ImportError:
|
|
612
1018
|
if debug:
|
|
613
|
-
mmguero.eprint("
|
|
1019
|
+
mmguero.eprint("ddgs not installed, skipping Spotify search")
|
|
614
1020
|
return None
|
|
615
1021
|
|
|
616
1022
|
query = f"site:spotify.com {album_name} album"
|
|
@@ -645,7 +1051,7 @@ def _get_spotify_album_info(spotify_url, debug=False):
|
|
|
645
1051
|
from spotify_scraper import SpotifyClient
|
|
646
1052
|
except ImportError:
|
|
647
1053
|
if debug:
|
|
648
|
-
mmguero.eprint("
|
|
1054
|
+
mmguero.eprint("spotifyscraper not installed, skipping Spotify info")
|
|
649
1055
|
return None
|
|
650
1056
|
|
|
651
1057
|
try:
|
|
@@ -761,7 +1167,7 @@ def _apply_cover_art_to_files(file_paths, image_data, debug=False):
|
|
|
761
1167
|
|
|
762
1168
|
###################################################################################################
|
|
763
1169
|
# Run album unification process
|
|
764
|
-
def _run_album_unification(input_path, output_path, config, rename_prompt=None, use_spotify=None, debug=False):
|
|
1170
|
+
def _run_album_unification(input_path, output_path, config, rename_prompt=None, use_spotify=None, debug=False, verbose=False):
|
|
765
1171
|
"""Run the album unification process on a folder of files.
|
|
766
1172
|
|
|
767
1173
|
Args:
|
|
@@ -771,6 +1177,7 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
771
1177
|
rename_prompt: Optional prompt for smart renaming (None = no renaming)
|
|
772
1178
|
use_spotify: Spotify URL if provided, True to search for album, None/False to disable
|
|
773
1179
|
debug: Enable debug output
|
|
1180
|
+
verbose: Disable progress bar if True
|
|
774
1181
|
|
|
775
1182
|
Returns:
|
|
776
1183
|
str: Status message
|
|
@@ -790,6 +1197,8 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
790
1197
|
|
|
791
1198
|
model = config.get("unify_album_model", "openai/gpt-oss-120b")
|
|
792
1199
|
prompt = config.get("unify_album_prompt", UNIFY_ALBUM_PROMPT_DEFAULT)
|
|
1200
|
+
batch_size = config.get("unify_album_batch_size", 10)
|
|
1201
|
+
batch_size_spotify = config.get("unify_album_batch_size_with_spotify", 5)
|
|
793
1202
|
|
|
794
1203
|
# Determine files to process
|
|
795
1204
|
audio_extensions = ['.mp3', '.mp4', '.m4a', '.wav', '.flac', '.ogg', '.aac', '.wma']
|
|
@@ -833,8 +1242,9 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
833
1242
|
mmguero.eprint(f"Found {len(file_paths)} audio files for album unification")
|
|
834
1243
|
|
|
835
1244
|
# Call AI to unify album metadata (first pass - gets unified album name)
|
|
836
|
-
unified_result =
|
|
837
|
-
file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
|
|
1245
|
+
unified_result = _unify_album_metadata_with_batching(
|
|
1246
|
+
file_paths, groq_api_key, model, prompt, rename_prompt=rename_prompt,
|
|
1247
|
+
batch_size=batch_size, batch_size_spotify=batch_size_spotify, debug=debug, verbose=verbose
|
|
838
1248
|
)
|
|
839
1249
|
|
|
840
1250
|
unified_album = unified_result.get('unified_album', '')
|
|
@@ -863,11 +1273,12 @@ def _run_album_unification(input_path, output_path, config, rename_prompt=None,
|
|
|
863
1273
|
|
|
864
1274
|
# Second AI pass with Spotify track listing for accurate ordering
|
|
865
1275
|
mmguero.eprint("Refining track order with Spotify data...")
|
|
866
|
-
unified_result =
|
|
1276
|
+
unified_result = _unify_album_metadata_with_batching(
|
|
867
1277
|
file_paths, groq_api_key, model, prompt,
|
|
868
1278
|
rename_prompt=rename_prompt,
|
|
869
1279
|
spotify_tracks=spotify_info.get('tracks', []),
|
|
870
|
-
|
|
1280
|
+
batch_size=batch_size, batch_size_spotify=batch_size_spotify,
|
|
1281
|
+
debug=debug, verbose=verbose
|
|
871
1282
|
)
|
|
872
1283
|
else:
|
|
873
1284
|
mmguero.eprint("Could not fetch Spotify info, using AI results only")
|
|
@@ -2180,6 +2591,65 @@ class Plugger(object):
|
|
|
2180
2591
|
|
|
2181
2592
|
self.CreateCleanMuteList()
|
|
2182
2593
|
|
|
2594
|
+
# Fast-path: if no profanity detected and same format, just copy
|
|
2595
|
+
no_profanity = len(self.naughtyWordList) == 0
|
|
2596
|
+
same_format = (
|
|
2597
|
+
self.inputFileParts[1].lower().lstrip('.') == self.outputAudioFileFormat
|
|
2598
|
+
if hasattr(self, 'inputFileParts')
|
|
2599
|
+
else False
|
|
2600
|
+
)
|
|
2601
|
+
|
|
2602
|
+
if no_profanity and same_format:
|
|
2603
|
+
# Direct copy - no processing needed
|
|
2604
|
+
if self.debug:
|
|
2605
|
+
mmguero.eprint("No profanity detected and same format - using direct copy")
|
|
2606
|
+
|
|
2607
|
+
# Update progress bar description to reflect copy operation
|
|
2608
|
+
if progress:
|
|
2609
|
+
progress.set_description("Copying")
|
|
2610
|
+
|
|
2611
|
+
copy_start = time.time()
|
|
2612
|
+
shutil.copyfile(self.inputFileSpec, self.outputFileSpec)
|
|
2613
|
+
copy_time = time.time() - copy_start
|
|
2614
|
+
|
|
2615
|
+
# Still embed metadata and tag as processed
|
|
2616
|
+
if self.debug:
|
|
2617
|
+
mmguero.eprint("Embedding Shazam metadata into copied file...")
|
|
2618
|
+
self._embed_metadata(self.outputFileSpec)
|
|
2619
|
+
SetMonkeyplugTag(self.outputFileSpec, debug=self.debug)
|
|
2620
|
+
|
|
2621
|
+
# Complete progress
|
|
2622
|
+
if progress:
|
|
2623
|
+
if smooth_ticker:
|
|
2624
|
+
progress.n = progress.total
|
|
2625
|
+
progress.refresh()
|
|
2626
|
+
else:
|
|
2627
|
+
progress.update(1)
|
|
2628
|
+
progress.close()
|
|
2629
|
+
|
|
2630
|
+
# Record timing separately for copy (don't skew encode estimates)
|
|
2631
|
+
# Copy is much faster than encode, so we track it separately
|
|
2632
|
+
if step_timings is not None and file_duration > 0:
|
|
2633
|
+
step_timings['copy'] = (copy_time, file_duration)
|
|
2634
|
+
if timing_log is not None and file_duration > 0:
|
|
2635
|
+
for op, (wall_secs, audio_secs) in step_timings.items():
|
|
2636
|
+
update_timing_measurement(timing_log, op, wall_secs, audio_secs)
|
|
2637
|
+
save_timing_log(timing_log)
|
|
2638
|
+
|
|
2639
|
+
# Clean up progress references
|
|
2640
|
+
if hasattr(self, '_progress'):
|
|
2641
|
+
delattr(self, '_progress')
|
|
2642
|
+
for attr in ('_smooth_ticker', '_smooth_cumulative', '_smooth_extract_est',
|
|
2643
|
+
'_smooth_transcribe_est', '_will_transcribe',
|
|
2644
|
+
'_step_timings', '_timing_log', '_timing_file_duration'):
|
|
2645
|
+
if hasattr(self, attr):
|
|
2646
|
+
delattr(self, attr)
|
|
2647
|
+
|
|
2648
|
+
# Print profanity detection summary
|
|
2649
|
+
self._print_words_summary()
|
|
2650
|
+
|
|
2651
|
+
return self.outputFileSpec
|
|
2652
|
+
|
|
2183
2653
|
# Update progress after CreateCleanMuteList (step-based mode only)
|
|
2184
2654
|
if progress and not smooth_ticker:
|
|
2185
2655
|
did_extraction = (
|
|
@@ -3323,9 +3793,43 @@ DEFAULT_CONFIG = {
|
|
|
3323
3793
|
"ai_detect_prompt": AI_DETECT_PROMPT_DEFAULT,
|
|
3324
3794
|
"unify_album_model": "openai/gpt-oss-120b",
|
|
3325
3795
|
"unify_album_prompt": UNIFY_ALBUM_PROMPT_DEFAULT,
|
|
3796
|
+
"unify_album_batch_size": 10,
|
|
3797
|
+
"unify_album_batch_size_with_spotify": 5,
|
|
3798
|
+
}
|
|
3799
|
+
|
|
3800
|
+
# Validation rules for config values with defined options
|
|
3801
|
+
CONFIG_VALIDATION = {
|
|
3802
|
+
"show_words": {"choices": ["full", "clean", "none"], "default": "clean"},
|
|
3803
|
+
"detect_mode": {"choices": ["list", "ai", "both"], "default": "list"},
|
|
3326
3804
|
}
|
|
3327
3805
|
|
|
3328
3806
|
|
|
3807
|
+
def validate_config_settings(config, debug=False):
|
|
3808
|
+
"""
|
|
3809
|
+
Validate config settings and fix invalid values.
|
|
3810
|
+
|
|
3811
|
+
For each config key that has defined choices, validates the value.
|
|
3812
|
+
If invalid, prints a warning and uses the default value.
|
|
3813
|
+
|
|
3814
|
+
Args:
|
|
3815
|
+
config: dict - Config settings to validate
|
|
3816
|
+
debug: bool - Enable debug output
|
|
3817
|
+
|
|
3818
|
+
Returns:
|
|
3819
|
+
dict: Validated config with invalid values replaced by defaults
|
|
3820
|
+
"""
|
|
3821
|
+
validated = dict(config)
|
|
3822
|
+
for key, rules in CONFIG_VALIDATION.items():
|
|
3823
|
+
if key in validated:
|
|
3824
|
+
value = validated[key]
|
|
3825
|
+
choices = rules["choices"]
|
|
3826
|
+
if value not in choices:
|
|
3827
|
+
default = rules["default"]
|
|
3828
|
+
mmguero.eprint(f"WARNING: CONFIG \"{key}\" SET TO INVALID VALUE \"{value}\". USING DEFAULT \"{default}\".")
|
|
3829
|
+
validated[key] = default
|
|
3830
|
+
return validated
|
|
3831
|
+
|
|
3832
|
+
|
|
3329
3833
|
def load_config_settings(debug=False):
|
|
3330
3834
|
"""
|
|
3331
3835
|
Load settings from JSON config file.
|
|
@@ -3354,7 +3858,8 @@ def load_config_settings(debug=False):
|
|
|
3354
3858
|
if debug:
|
|
3355
3859
|
mmguero.eprint(f"Loaded config from: {config_path}")
|
|
3356
3860
|
|
|
3357
|
-
|
|
3861
|
+
# Validate and fix any invalid config values
|
|
3862
|
+
return validate_config_settings(config, debug=debug)
|
|
3358
3863
|
except (json.JSONDecodeError, IOError) as e:
|
|
3359
3864
|
if debug:
|
|
3360
3865
|
mmguero.eprint(f"Warning: Failed to load config from {config_path}: {e}")
|
|
@@ -3372,6 +3877,7 @@ def load_config_settings(debug=False):
|
|
|
3372
3877
|
if debug:
|
|
3373
3878
|
mmguero.eprint(f"Warning: Could not create default config: {e}")
|
|
3374
3879
|
|
|
3880
|
+
# Return a copy of DEFAULT_CONFIG (already validated)
|
|
3375
3881
|
return dict(DEFAULT_CONFIG)
|
|
3376
3882
|
|
|
3377
3883
|
|
|
@@ -3438,6 +3944,80 @@ def update_timing_measurement(timing_log, operation, wall_seconds, audio_seconds
|
|
|
3438
3944
|
entry['run_count'] += 1
|
|
3439
3945
|
|
|
3440
3946
|
|
|
3947
|
+
def estimate_step_duration_tokens(timing_log, operation, input_tokens):
|
|
3948
|
+
"""Estimate wall-clock seconds for an operation based on token-based historical data.
|
|
3949
|
+
|
|
3950
|
+
Args:
|
|
3951
|
+
timing_log: Timing log dict
|
|
3952
|
+
operation: Operation name (e.g., 'unify_batch_groq')
|
|
3953
|
+
input_tokens: Estimated input tokens
|
|
3954
|
+
|
|
3955
|
+
Returns:
|
|
3956
|
+
float or None: Estimated seconds, or None if no data available.
|
|
3957
|
+
"""
|
|
3958
|
+
entry = timing_log.get(operation)
|
|
3959
|
+
if not entry or entry.get('run_count', 0) == 0:
|
|
3960
|
+
return None
|
|
3961
|
+
total_tokens = entry.get('total_input_tokens', 0)
|
|
3962
|
+
if total_tokens <= 0:
|
|
3963
|
+
return None
|
|
3964
|
+
rate = entry['total_wall_seconds'] / total_tokens
|
|
3965
|
+
return rate * input_tokens
|
|
3966
|
+
|
|
3967
|
+
|
|
3968
|
+
def update_timing_measurement_tokens(timing_log, operation, wall_seconds, input_tokens):
|
|
3969
|
+
"""Add a new token-based timing measurement to the running averages.
|
|
3970
|
+
|
|
3971
|
+
Args:
|
|
3972
|
+
timing_log: Timing log dict
|
|
3973
|
+
operation: Operation name
|
|
3974
|
+
wall_seconds: Actual wall-clock seconds elapsed
|
|
3975
|
+
input_tokens: Actual input tokens processed
|
|
3976
|
+
"""
|
|
3977
|
+
if operation not in timing_log:
|
|
3978
|
+
timing_log[operation] = {
|
|
3979
|
+
'total_input_tokens': 0,
|
|
3980
|
+
'total_wall_seconds': 0.0,
|
|
3981
|
+
'run_count': 0,
|
|
3982
|
+
}
|
|
3983
|
+
entry = timing_log[operation]
|
|
3984
|
+
entry['total_input_tokens'] += input_tokens
|
|
3985
|
+
entry['total_wall_seconds'] += wall_seconds
|
|
3986
|
+
entry['run_count'] += 1
|
|
3987
|
+
|
|
3988
|
+
|
|
3989
|
+
def _estimate_input_tokens(text):
|
|
3990
|
+
"""Estimate input token count using character approximation.
|
|
3991
|
+
|
|
3992
|
+
Args:
|
|
3993
|
+
text: String to estimate tokens for
|
|
3994
|
+
|
|
3995
|
+
Returns:
|
|
3996
|
+
int: Estimated token count (approximately characters / 4)
|
|
3997
|
+
"""
|
|
3998
|
+
return len(text) // 4
|
|
3999
|
+
|
|
4000
|
+
|
|
4001
|
+
def _estimate_batch_tokens(metadata_list, system_prompt):
|
|
4002
|
+
"""Estimate total input tokens for a batch request.
|
|
4003
|
+
|
|
4004
|
+
Args:
|
|
4005
|
+
metadata_list: List of metadata dicts
|
|
4006
|
+
system_prompt: System prompt string
|
|
4007
|
+
|
|
4008
|
+
Returns:
|
|
4009
|
+
int: Estimated input tokens
|
|
4010
|
+
"""
|
|
4011
|
+
# Count tokens in metadata
|
|
4012
|
+
metadata_json = json.dumps(metadata_list, indent=2, ensure_ascii=False)
|
|
4013
|
+
metadata_tokens = _estimate_input_tokens(metadata_json)
|
|
4014
|
+
|
|
4015
|
+
# Count tokens in system prompt
|
|
4016
|
+
prompt_tokens = _estimate_input_tokens(system_prompt)
|
|
4017
|
+
|
|
4018
|
+
return metadata_tokens + prompt_tokens
|
|
4019
|
+
|
|
4020
|
+
|
|
3441
4021
|
###################################################################################################
|
|
3442
4022
|
# RunMonkeyPlug
|
|
3443
4023
|
def RunMonkeyPlug():
|
|
@@ -3985,7 +4565,8 @@ def RunMonkeyPlug():
|
|
|
3985
4565
|
config,
|
|
3986
4566
|
rename_prompt=args.autoRename,
|
|
3987
4567
|
use_spotify=args.useSpotify,
|
|
3988
|
-
debug=args.debug
|
|
4568
|
+
debug=args.debug,
|
|
4569
|
+
verbose=args.debug
|
|
3989
4570
|
)
|
|
3990
4571
|
print(result)
|
|
3991
4572
|
except Exception as e:
|
|
@@ -4282,7 +4863,8 @@ def RunMonkeyPlug():
|
|
|
4282
4863
|
config,
|
|
4283
4864
|
rename_prompt=args.autoRename,
|
|
4284
4865
|
use_spotify=args.useSpotify,
|
|
4285
|
-
debug=args.debug
|
|
4866
|
+
debug=args.debug,
|
|
4867
|
+
verbose=args.debug
|
|
4286
4868
|
)
|
|
4287
4869
|
mmguero.eprint(result)
|
|
4288
4870
|
except Exception as e:
|
|
@@ -4592,7 +5174,8 @@ def RunMonkeyPlug():
|
|
|
4592
5174
|
config,
|
|
4593
5175
|
rename_prompt=args.autoRename,
|
|
4594
5176
|
use_spotify=args.useSpotify,
|
|
4595
|
-
debug=args.debug
|
|
5177
|
+
debug=args.debug,
|
|
5178
|
+
verbose=args.debug
|
|
4596
5179
|
)
|
|
4597
5180
|
mmguero.eprint(result)
|
|
4598
5181
|
except Exception as e:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{monkeyplug_enhanced-2.3.0 → monkeyplug_enhanced-2.3.2}/src/monkeyplug/data/profanity_list.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|