monkeyplug-enhanced 2.2.4__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: monkeyplug-enhanced
3
- Version: 2.2.4
3
+ Version: 2.3.0
4
4
  Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
5
5
  Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
6
6
  Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
@@ -11,14 +11,18 @@ Classifier: License :: OSI Approved :: BSD License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
14
- Requires-Python: >=3.6
14
+ Requires-Python: >=3.10
15
+ Requires-Dist: aiohttp>=3.9.0
16
+ Requires-Dist: duckduckgo-search>=6.0.0
15
17
  Requires-Dist: groq>=0.1.0
16
18
  Requires-Dist: mmguero==2.0.3
17
19
  Requires-Dist: mutagen==1.47.0
18
20
  Requires-Dist: numpy>=1.24.0
19
21
  Requires-Dist: requests==2.32.5
22
+ Requires-Dist: shazamio>=0.8.0
20
23
  Requires-Dist: sherpa-onnx>=1.10.0
21
24
  Requires-Dist: soundfile>=0.12.0
25
+ Requires-Dist: spotify-scraper>=0.1.0
22
26
  Requires-Dist: tqdm>=4.65.0
23
27
  Description-Content-Type: text/markdown
24
28
 
@@ -34,9 +38,12 @@ The CLI command is still `monkeyplug` — only the package name changed to avoid
34
38
 
35
39
  - **Groq API** integration (fast, default mode)
36
40
  - **AI instrumental generation** via sherpa-onnx source separation
41
+ - **AI profanity detection** via Groq LLM with structured outputs
37
42
  - **Wildcard/batch processing** with automatic vocal detection
43
+ - **Progress bar** for non-verbose mode
38
44
  - **Transcript save/reuse** for faster reprocessing
39
45
  - **Config file** support with sensible defaults
46
+ - **Automatic metadata tagging** via ShazamIO (title, artist, genre, cover art)
40
47
 
41
48
  ## How It Works
42
49
 
@@ -62,7 +69,7 @@ pip install 'git+https://github.com/ljbred08/monkeyplug'
62
69
  ### Prerequisites
63
70
 
64
71
  - **FFmpeg** — install via your OS package manager or from [ffmpeg.org](https://www.ffmpeg.org/download.html)
65
- - **Python 3.6+**
72
+ - **Python 3.10+**
66
73
  - **Groq API key** (for default mode) — see [Groq API Setup](#groq-api-setup)
67
74
  - Optional: [Whisper](https://github.com/openai/whisper) or [Vosk](https://github.com/alphacep/vosk-api) for offline recognition
68
75
 
@@ -94,6 +101,7 @@ echo 'gsk_...' > .groq_key
94
101
 
95
102
  ```bash
96
103
  # Basic usage — mutes profanity using Groq API and built-in word list
104
+ # Shows progress bar automatically in non-verbose mode
97
105
  monkeyplug -i song.mp3 -o song_clean.mp3
98
106
 
99
107
  # Verbose output to see what's happening
@@ -229,14 +237,130 @@ monkeyplug -i song.mp3 -o song_clean_strict.mp3 --input-transcript song_clean_tr
229
237
 
230
238
  ```bash
231
239
  # Use a custom text file (one word per line, or word|replacement)
232
- monkeyplug -i podcast.mp3 -o podcast_clean.mp3 -w custom_swears.txt
240
+ monkeyplug -i podcast.mp3 -o podcast_clean.mp3 --swears custom_swears.txt
233
241
 
234
242
  # Use a custom JSON file (array of strings)
235
- monkeyplug -i podcast.mp3 -o podcast_clean.mp3 -w custom_swears.json
243
+ monkeyplug -i podcast.mp3 -o podcast_clean.mp3 --swears custom_swears.json
236
244
 
237
245
  # Custom words are merged with the built-in profanity list
238
246
  ```
239
247
 
248
+ ## Automatic Metadata Tagging
249
+
250
+ monkeyplug automatically fetches song metadata from Shazam and embeds it into the output file:
251
+
252
+ - **Title, Artist, Genre** - Text tags embedded in the audio file
253
+ - **Cover Art** - Album artwork downloaded and embedded (MP3 only)
254
+
255
+ ```bash
256
+ # Metadata is enabled by default
257
+ monkeyplug -i song.mp3 -o song_clean.mp3
258
+
259
+ # Disable metadata fetching
260
+ monkeyplug -i song.mp3 -o song_clean.mp3 --disable-metadata
261
+ ```
262
+
263
+ **What happens:**
264
+ 1. The input file is analyzed by Shazam to identify the song
265
+ 2. Metadata (title, artist, genre, cover art URL) is retrieved
266
+ 3. Cover art is downloaded and embedded as ID3/APIC frames
267
+ 4. Text tags are added to the output file
268
+
269
+ **Notes:**
270
+ - Requires internet connection for Shazam recognition
271
+ - Cover art embedding is supported for MP3 files
272
+ - If recognition fails, the file is still processed (no error)
273
+ - Metadata can be viewed in any music player or with `ffprobe`
274
+
275
+ ## Show Profanity Output
276
+
277
+ Control what's printed about detected profanity in normal (non-verbose) mode:
278
+
279
+ ```bash
280
+ # Show count only (default)
281
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w clean
282
+
283
+ # Show full list with timestamps
284
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w full
285
+
286
+ # Silent mode (no profanity output)
287
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w none
288
+ ```
289
+
290
+ ## AI Profanity Detection
291
+
292
+ Use Groq's LLM for context-aware profanity detection instead of (or in addition to) the static word list:
293
+
294
+ ```bash
295
+ # AI-only detection (replaces static list)
296
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect ai
297
+
298
+ # Both list + AI (word flagged if either catches it)
299
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect both
300
+
301
+ # Default: static list only
302
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect list
303
+ ```
304
+
305
+ Requires a Groq API key (same setup as Groq STT mode). Works with all speech recognition modes (Groq, Whisper, Vosk).
306
+
307
+ Configurable via `~/.cache/monkeyplug/config.json`:
308
+ ```json
309
+ {
310
+ "detect_mode": "list",
311
+ "ai_detect_model": "openai/gpt-oss-20b",
312
+ "ai_detect_prompt": "You are a profanity detection assistant..."
313
+ }
314
+ ```
315
+
316
+ ## Album Metadata Unification
317
+
318
+ Unify album names, cover art, and assign track numbers across a folder of songs using AI:
319
+
320
+ ```bash
321
+ # Basic AI unification
322
+ monkeyplug --unify-album
323
+
324
+ # With Spotify integration (recommended for best results)
325
+ monkeyplug --unify-album --use-spotify
326
+
327
+ # With direct Spotify URL (skip search)
328
+ monkeyplug --unify-album --use-spotify "https://open.spotify.com/album/1kCHru7uhxBUdzkm4gzRQc"
329
+
330
+ # Combine with normal processing
331
+ monkeyplug -i "album/*.mp3" -o "album/*_clean.mp3" --unify-album
332
+
333
+ # Full workflow with Spotify and smart renaming
334
+ monkeyplug -i "album/*.mp3" -o "album/*_clean.mp3" --unify-album --use-spotify --auto-rename
335
+ ```
336
+
337
+ The AI analyzes all songs together to determine the correct album name and track order. With `--use-spotify`, it fetches official cover art and track listings from Spotify for accurate results.
338
+
339
+ **Two modes:**
340
+
341
+ 1. **Combined with processing**: Runs after normal audio processing completes
342
+ 2. **Standalone**: Processes existing files without audio processing (requires Groq API key)
343
+
344
+ **Spotify Integration (--use-spotify):**
345
+ - Provide a direct Spotify URL to skip the search step
346
+ - Or let it search automatically for the album
347
+ - Downloads official cover art (640x640)
348
+ - Gets official track listing for accurate ordering
349
+ - Applies consistent cover art to all tracks
350
+
351
+ **Configurable via `~/.cache/monkeyplug/config.json`:**
352
+ ```json
353
+ {
354
+ "unify_album_model": "openai/gpt-oss-120b",
355
+ "unify_album_prompt": "You are a music metadata expert..."
356
+ }
357
+ ```
358
+
359
+ **Requirements:**
360
+ - Groq API key (same setup as other AI features)
361
+ - Files must have existing metadata (title, album)
362
+ - MP3 files get full support (album + track number + cover art via ID3 tags)
363
+
240
364
  ## Config File
241
365
 
242
366
  monkeyplug looks for a JSON config file in this order (first found wins):
@@ -252,7 +376,11 @@ If neither exists, a default config is auto-created at `~/.cache/monkeyplug/conf
252
376
  "pad_milliseconds_pre": 10,
253
377
  "pad_milliseconds_post": 10,
254
378
  "separation_padding": 1.0,
255
- "beep_hertz": 1000
379
+ "beep_hertz": 1000,
380
+ "show_words": "clean",
381
+ "detect_mode": "list",
382
+ "ai_detect_model": "openai/gpt-oss-20b",
383
+ "ai_detect_prompt": "You are a profanity detection assistant..."
256
384
  }
257
385
  ```
258
386
 
@@ -295,7 +423,9 @@ Censorship Modes:
295
423
  --instrumental-auto-candidates <int> Top candidates for AUTO matching (default: 5)
296
424
 
297
425
  Profanity:
298
- -w, --swears <file> Custom profanity list (text or JSON)
426
+ --swears <file> Custom profanity list (text or JSON)
427
+ --detect <list|ai|both> Profanity detection method (default: list)
428
+ -w, --show-words <clean|full|none> Show detected profanity (default: clean)
299
429
  --pad-milliseconds <int> Padding around profanity (default: 10)
300
430
  --pad-milliseconds-pre <int> Padding before profanity (default: 10)
301
431
  --pad-milliseconds-post <int> Padding after profanity (default: 10)
@@ -327,6 +457,8 @@ Audio Output:
327
457
 
328
458
  Other:
329
459
  --force Process file even if already tagged
460
+ --disable-metadata Disable automatic metadata fetching via ShazamIO
461
+ --unify-album Unify album metadata across all files in the folder using AI
330
462
  --clean-cache Delete all cached data (models, config) and exit
331
463
 
332
464
  Groq Options:
@@ -10,9 +10,12 @@ The CLI command is still `monkeyplug` — only the package name changed to avoid
10
10
 
11
11
  - **Groq API** integration (fast, default mode)
12
12
  - **AI instrumental generation** via sherpa-onnx source separation
13
+ - **AI profanity detection** via Groq LLM with structured outputs
13
14
  - **Wildcard/batch processing** with automatic vocal detection
15
+ - **Progress bar** for non-verbose mode
14
16
  - **Transcript save/reuse** for faster reprocessing
15
17
  - **Config file** support with sensible defaults
18
+ - **Automatic metadata tagging** via ShazamIO (title, artist, genre, cover art)
16
19
 
17
20
  ## How It Works
18
21
 
@@ -38,7 +41,7 @@ pip install 'git+https://github.com/ljbred08/monkeyplug'
38
41
  ### Prerequisites
39
42
 
40
43
  - **FFmpeg** — install via your OS package manager or from [ffmpeg.org](https://www.ffmpeg.org/download.html)
41
- - **Python 3.6+**
44
+ - **Python 3.10+**
42
45
  - **Groq API key** (for default mode) — see [Groq API Setup](#groq-api-setup)
43
46
  - Optional: [Whisper](https://github.com/openai/whisper) or [Vosk](https://github.com/alphacep/vosk-api) for offline recognition
44
47
 
@@ -70,6 +73,7 @@ echo 'gsk_...' > .groq_key
70
73
 
71
74
  ```bash
72
75
  # Basic usage — mutes profanity using Groq API and built-in word list
76
+ # Shows progress bar automatically in non-verbose mode
73
77
  monkeyplug -i song.mp3 -o song_clean.mp3
74
78
 
75
79
  # Verbose output to see what's happening
@@ -205,14 +209,130 @@ monkeyplug -i song.mp3 -o song_clean_strict.mp3 --input-transcript song_clean_tr
205
209
 
206
210
  ```bash
207
211
  # Use a custom text file (one word per line, or word|replacement)
208
- monkeyplug -i podcast.mp3 -o podcast_clean.mp3 -w custom_swears.txt
212
+ monkeyplug -i podcast.mp3 -o podcast_clean.mp3 --swears custom_swears.txt
209
213
 
210
214
  # Use a custom JSON file (array of strings)
211
- monkeyplug -i podcast.mp3 -o podcast_clean.mp3 -w custom_swears.json
215
+ monkeyplug -i podcast.mp3 -o podcast_clean.mp3 --swears custom_swears.json
212
216
 
213
217
  # Custom words are merged with the built-in profanity list
214
218
  ```
215
219
 
220
+ ## Automatic Metadata Tagging
221
+
222
+ monkeyplug automatically fetches song metadata from Shazam and embeds it into the output file:
223
+
224
+ - **Title, Artist, Genre** - Text tags embedded in the audio file
225
+ - **Cover Art** - Album artwork downloaded and embedded (MP3 only)
226
+
227
+ ```bash
228
+ # Metadata is enabled by default
229
+ monkeyplug -i song.mp3 -o song_clean.mp3
230
+
231
+ # Disable metadata fetching
232
+ monkeyplug -i song.mp3 -o song_clean.mp3 --disable-metadata
233
+ ```
234
+
235
+ **What happens:**
236
+ 1. The input file is analyzed by Shazam to identify the song
237
+ 2. Metadata (title, artist, genre, cover art URL) is retrieved
238
+ 3. Cover art is downloaded and embedded as ID3/APIC frames
239
+ 4. Text tags are added to the output file
240
+
241
+ **Notes:**
242
+ - Requires internet connection for Shazam recognition
243
+ - Cover art embedding is supported for MP3 files
244
+ - If recognition fails, the file is still processed (no error)
245
+ - Metadata can be viewed in any music player or with `ffprobe`
246
+
247
+ ## Show Profanity Output
248
+
249
+ Control what's printed about detected profanity in normal (non-verbose) mode:
250
+
251
+ ```bash
252
+ # Show count only (default)
253
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w clean
254
+
255
+ # Show full list with timestamps
256
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w full
257
+
258
+ # Silent mode (no profanity output)
259
+ monkeyplug -i song.mp3 -o song_clean.mp3 -w none
260
+ ```
261
+
262
+ ## AI Profanity Detection
263
+
264
+ Use Groq's LLM for context-aware profanity detection instead of (or in addition to) the static word list:
265
+
266
+ ```bash
267
+ # AI-only detection (replaces static list)
268
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect ai
269
+
270
+ # Both list + AI (word flagged if either catches it)
271
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect both
272
+
273
+ # Default: static list only
274
+ monkeyplug -i song.mp3 -o song_clean.mp3 --detect list
275
+ ```
276
+
277
+ Requires a Groq API key (same setup as Groq STT mode). Works with all speech recognition modes (Groq, Whisper, Vosk).
278
+
279
+ Configurable via `~/.cache/monkeyplug/config.json`:
280
+ ```json
281
+ {
282
+ "detect_mode": "list",
283
+ "ai_detect_model": "openai/gpt-oss-20b",
284
+ "ai_detect_prompt": "You are a profanity detection assistant..."
285
+ }
286
+ ```
287
+
288
+ ## Album Metadata Unification
289
+
290
+ Unify album names, cover art, and assign track numbers across a folder of songs using AI:
291
+
292
+ ```bash
293
+ # Basic AI unification
294
+ monkeyplug --unify-album
295
+
296
+ # With Spotify integration (recommended for best results)
297
+ monkeyplug --unify-album --use-spotify
298
+
299
+ # With direct Spotify URL (skip search)
300
+ monkeyplug --unify-album --use-spotify "https://open.spotify.com/album/1kCHru7uhxBUdzkm4gzRQc"
301
+
302
+ # Combine with normal processing
303
+ monkeyplug -i "album/*.mp3" -o "album/*_clean.mp3" --unify-album
304
+
305
+ # Full workflow with Spotify and smart renaming
306
+ monkeyplug -i "album/*.mp3" -o "album/*_clean.mp3" --unify-album --use-spotify --auto-rename
307
+ ```
308
+
309
+ The AI analyzes all songs together to determine the correct album name and track order. With `--use-spotify`, it fetches official cover art and track listings from Spotify for accurate results.
310
+
311
+ **Two modes:**
312
+
313
+ 1. **Combined with processing**: Runs after normal audio processing completes
314
+ 2. **Standalone**: Processes existing files without audio processing (requires Groq API key)
315
+
316
+ **Spotify Integration (--use-spotify):**
317
+ - Provide a direct Spotify URL to skip the search step
318
+ - Or let it search automatically for the album
319
+ - Downloads official cover art (640x640)
320
+ - Gets official track listing for accurate ordering
321
+ - Applies consistent cover art to all tracks
322
+
323
+ **Configurable via `~/.cache/monkeyplug/config.json`:**
324
+ ```json
325
+ {
326
+ "unify_album_model": "openai/gpt-oss-120b",
327
+ "unify_album_prompt": "You are a music metadata expert..."
328
+ }
329
+ ```
330
+
331
+ **Requirements:**
332
+ - Groq API key (same setup as other AI features)
333
+ - Files must have existing metadata (title, album)
334
+ - MP3 files get full support (album + track number + cover art via ID3 tags)
335
+
216
336
  ## Config File
217
337
 
218
338
  monkeyplug looks for a JSON config file in this order (first found wins):
@@ -228,7 +348,11 @@ If neither exists, a default config is auto-created at `~/.cache/monkeyplug/conf
228
348
  "pad_milliseconds_pre": 10,
229
349
  "pad_milliseconds_post": 10,
230
350
  "separation_padding": 1.0,
231
- "beep_hertz": 1000
351
+ "beep_hertz": 1000,
352
+ "show_words": "clean",
353
+ "detect_mode": "list",
354
+ "ai_detect_model": "openai/gpt-oss-20b",
355
+ "ai_detect_prompt": "You are a profanity detection assistant..."
232
356
  }
233
357
  ```
234
358
 
@@ -271,7 +395,9 @@ Censorship Modes:
271
395
  --instrumental-auto-candidates <int> Top candidates for AUTO matching (default: 5)
272
396
 
273
397
  Profanity:
274
- -w, --swears <file> Custom profanity list (text or JSON)
398
+ --swears <file> Custom profanity list (text or JSON)
399
+ --detect <list|ai|both> Profanity detection method (default: list)
400
+ -w, --show-words <clean|full|none> Show detected profanity (default: clean)
275
401
  --pad-milliseconds <int> Padding around profanity (default: 10)
276
402
  --pad-milliseconds-pre <int> Padding before profanity (default: 10)
277
403
  --pad-milliseconds-post <int> Padding after profanity (default: 10)
@@ -303,6 +429,8 @@ Audio Output:
303
429
 
304
430
  Other:
305
431
  --force Process file even if already tagged
432
+ --disable-metadata Disable automatic metadata fetching via ShazamIO
433
+ --unify-album Unify album metadata across all files in the folder using AI
306
434
  --clean-cache Delete all cached data (models, config) and exit
307
435
 
308
436
  Groq Options:
@@ -4,14 +4,14 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "monkeyplug-enhanced"
7
- version = "2.2.4"
7
+ version = "2.3.0"
8
8
  authors = [
9
9
  { name="Seth Grover", email="mero.mero.guero@gmail.com" },
10
10
  { name="Lincoln Brown", email="link@brown.fm" },
11
11
  ]
12
12
  description = "Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing."
13
13
  readme = "README.md"
14
- requires-python = ">=3.6"
14
+ requires-python = ">=3.10"
15
15
  classifiers = [
16
16
  "License :: OSI Approved :: BSD License",
17
17
  "Operating System :: OS Independent",
@@ -27,6 +27,10 @@ dependencies = [
27
27
  "numpy>=1.24.0",
28
28
  "soundfile>=0.12.0",
29
29
  "tqdm>=4.65.0",
30
+ "shazamio>=0.8.0",
31
+ "aiohttp>=3.9.0",
32
+ "duckduckgo-search>=6.0.0",
33
+ "spotify-scraper>=0.1.0",
30
34
  ]
31
35
 
32
36
  [project.urls]