monkeyplug-enhanced 2.2.3__tar.gz → 2.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: monkeyplug-enhanced
3
- Version: 2.2.3
3
+ Version: 2.2.5
4
4
  Summary: Enhanced fork of monkeyplug — censors profanity in audio files using speech recognition with Groq API, AI instrumental generation, and batch processing.
5
5
  Project-URL: Homepage, https://github.com/ljbred08/monkeyplug
6
6
  Project-URL: Issues, https://github.com/ljbred08/monkeyplug/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "monkeyplug-enhanced"
7
- version = "2.2.3"
7
+ version = "2.2.5"
8
8
  authors = [
9
9
  { name="Seth Grover", email="mero.mero.guero@gmail.com" },
10
10
  { name="Lincoln Brown", email="link@brown.fm" },
@@ -16,6 +16,8 @@ import requests
16
16
  import shutil
17
17
  import string
18
18
  import sys
19
+ import threading
20
+ import time
19
21
  import wave
20
22
  from tqdm import tqdm
21
23
 
@@ -85,6 +87,36 @@ DEFAULT_WHISPER_MODEL_DIR = os.getenv(
85
87
  DEFAULT_WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL_NAME", "small.en")
86
88
  DEFAULT_TORCH_THREADS = 0
87
89
 
90
+ AI_DETECT_PROMPT_DEFAULT = (
91
+ "You are a profanity detection assistant for audio content. "
92
+ "Given a numbered transcript with timestamps, identify all words that are profane, vulgar, or offensive. "
93
+ "Return each word's index number, the word itself, and its timestamps. "
94
+ "Consider context — some words are profane in one context but not another."
95
+ )
96
+
97
+ AI_DETECT_SCHEMA = {
98
+ "type": "object",
99
+ "properties": {
100
+ "reasoning": {"type": "string", "description": "Brief explanation of detection decisions"},
101
+ "profane_words": {
102
+ "type": "array",
103
+ "items": {
104
+ "type": "object",
105
+ "properties": {
106
+ "index": {"type": "integer", "description": "Word index in transcript"},
107
+ "word": {"type": "string"},
108
+ "start": {"type": "number"},
109
+ "end": {"type": "number"}
110
+ },
111
+ "required": ["index", "word", "start", "end"],
112
+ "additionalProperties": False
113
+ }
114
+ }
115
+ },
116
+ "required": ["reasoning", "profane_words"],
117
+ "additionalProperties": False
118
+ }
119
+
88
120
  ###################################################################################################
89
121
  # Determine script_path and script_name in a way that works both as module and direct execution
90
122
  try:
@@ -221,6 +253,63 @@ def GetCodecs(local_filename, debug=False):
221
253
  return result
222
254
 
223
255
 
256
+ ###################################################################################################
257
+ class _SmoothProgressTicker:
258
+ """Background thread that smoothly advances a tqdm bar based on elapsed time.
259
+
260
+ Used when historical timing data allows estimating step durations.
261
+ The bar advances linearly within each step's estimated range, clamped
262
+ so it never overshoots. When the step completes, stop() snaps to actual.
263
+ """
264
+
265
+ def __init__(self, bar):
266
+ self._bar = bar
267
+ self._cumulative = 0.0 # Position where current step begins
268
+ self._step_estimate = 0.0 # Estimated seconds for current step
269
+ self._step_start = 0.0 # time.time() when step started
270
+ self._stop_event = threading.Event()
271
+ self._thread = None
272
+
273
+ def start(self, cumulative, step_estimated_seconds):
274
+ """Begin ticking for a new step."""
275
+ self.stop() # Stop any previous tick
276
+ self._cumulative = cumulative
277
+ self._step_estimate = step_estimated_seconds
278
+ self._step_start = time.time()
279
+ self._stop_event.clear()
280
+ self._thread = threading.Thread(target=self._tick, daemon=True)
281
+ self._thread.start()
282
+
283
+ def _tick(self):
284
+ while not self._stop_event.is_set():
285
+ try:
286
+ elapsed = time.time() - self._step_start
287
+ position = self._cumulative + min(elapsed, self._step_estimate)
288
+ # Never exceed the bar's total
289
+ if self._bar.total is not None:
290
+ position = min(position, self._bar.total)
291
+ self._bar.n = position
292
+ self._bar.refresh()
293
+ except (TypeError, ValueError, AttributeError):
294
+ break # Bar was closed externally
295
+ self._stop_event.wait(0.25)
296
+
297
+ def stop(self):
298
+ """Stop the ticker and return actual elapsed seconds for this step."""
299
+ self._stop_event.set()
300
+ if self._thread and self._thread.is_alive():
301
+ self._thread.join(timeout=1.0)
302
+ self._thread = None
303
+ if self._step_start > 0:
304
+ return time.time() - self._step_start
305
+ return 0.0
306
+
307
+ def adjust_total(self, delta):
308
+ """Adjust the bar's total by delta (e.g., remove an unused step estimate)."""
309
+ if self._bar.total is not None:
310
+ self._bar.total = max(self._bar.total + delta, self._bar.n)
311
+
312
+
224
313
  #################################################################################
225
314
  class Plugger(object):
226
315
  debug = False
@@ -284,20 +373,30 @@ class Plugger(object):
284
373
  force=False,
285
374
  dbug=False,
286
375
  instrumentalFileSpec=None,
376
+ showWords="clean",
377
+ detectMode="list",
378
+ groqApiKey=None,
379
+ aiDetectModel="openai/gpt-oss-20b",
380
+ aiDetectPrompt=AI_DETECT_PROMPT_DEFAULT,
287
381
  ):
288
- self.padSecPre = padMsecPre / 1000.0
289
- self.padSecPost = padMsecPost / 1000.0
382
+ self.debug = dbug
383
+ self.outputJson = outputJson
384
+ self.inputTranscript = inputTranscript
385
+ self.saveTranscript = saveTranscript
386
+ self.forceDespiteTag = force
290
387
  self.beep = beep
291
388
  self.beepHertz = beepHertz
292
389
  self.beepMixNormalize = beepMixNormalize
293
390
  self.beepAudioWeight = beepAudioWeight
294
391
  self.beepSineWeight = beepSineWeight
295
392
  self.beepDropTransition = beepDropTransition
296
- self.forceDespiteTag = force
297
- self.debug = dbug
298
- self.outputJson = outputJson
299
- self.inputTranscript = inputTranscript
300
- self.saveTranscript = saveTranscript
393
+ self.padSecPre = padMsecPre / 1000.0
394
+ self.padSecPost = padMsecPost / 1000.0
395
+ self.showWords = showWords
396
+ self.detectMode = detectMode
397
+ self.groqApiKey = groqApiKey
398
+ self.aiDetectModel = aiDetectModel
399
+ self.aiDetectPrompt = aiDetectPrompt
301
400
 
302
401
  # determine input file name, or download and save file
303
402
  if (iFileSpec is not None) and os.path.isfile(iFileSpec):
@@ -670,12 +769,42 @@ class Plugger(object):
670
769
 
671
770
  ######## CreateCleanMuteList #################################################
672
771
  def CreateCleanMuteList(self):
673
- # Try to load existing transcript first, otherwise perform speech recognition
772
+ smooth = hasattr(self, '_smooth_ticker') and self._smooth_ticker is not None
773
+ cumulative = getattr(self, '_smooth_cumulative', 0.0)
774
+ will_transcribe = getattr(self, '_will_transcribe', False)
775
+
776
+ # Start ticker for transcribe step (if applicable)
777
+ if smooth and will_transcribe:
778
+ est = getattr(self, '_smooth_transcribe_est', 0)
779
+ if hasattr(self, '_progress') and self._progress:
780
+ self._progress.set_description("Transcribing")
781
+ self._smooth_ticker.start(cumulative, est)
782
+
783
+ transcribe_start = time.time() if will_transcribe else 0
674
784
  if not self.LoadTranscriptFromFile():
675
785
  self.RecognizeSpeech()
676
786
 
787
+ if will_transcribe:
788
+ actual_transcribe = time.time() - transcribe_start
789
+ if smooth:
790
+ self._smooth_ticker.stop()
791
+ cumulative += actual_transcribe
792
+ self._smooth_cumulative = cumulative
793
+ if hasattr(self, '_step_timings') and self._step_timings is not None:
794
+ self._step_timings['transcribe'] = (actual_transcribe, getattr(self, '_timing_file_duration', 0))
795
+
677
796
  self.naughtyWordList = [word for word in self.wordList if word["scrub"] is True]
678
797
 
798
+ # AI-based profanity detection (replaces or supplements list)
799
+ if self.detectMode in ("ai", "both"):
800
+ if self.detectMode == "ai":
801
+ # Reset list-based scrub flags — AI decides everything
802
+ for word in self.wordList:
803
+ word["scrub"] = False
804
+ self._ai_detect_profanity()
805
+ # Rebuild naughtyWordList with AI results
806
+ self.naughtyWordList = [word for word in self.wordList if word["scrub"] is True]
807
+
679
808
  # Handle auto-generation mode
680
809
  if hasattr(self, 'autoGenerateMode') and self.autoGenerateMode and len(self.naughtyWordList) > 0:
681
810
  # Create merged profanity segments
@@ -684,30 +813,58 @@ class Plugger(object):
684
813
  # Extract, separate, and get instrumental file
685
814
  if self.instrumentalSegments:
686
815
  try:
687
- # Update progress bar to show extraction starting
816
+ # Update progress bar for extraction step
688
817
  if hasattr(self, '_progress') and self._progress and not self.debug:
689
- self._progress.update(1)
690
- self._progress.total = 3
691
- self._progress.set_description("Extracting instrumental")
818
+ if smooth:
819
+ extract_est = getattr(self, '_smooth_extract_est', 0)
820
+ self._progress.set_description("Extracting instrumental")
821
+ self._smooth_ticker.start(cumulative, extract_est)
822
+ else:
823
+ self._progress.update(1)
824
+ self._progress.total = 3
825
+ self._progress.set_description("Extracting instrumental")
692
826
 
827
+ extract_start = time.time()
693
828
  self.instrumentalFileSpec = self._create_combined_profanity_file()
694
829
 
695
- # Update progress after extraction completes
696
- if hasattr(self, '_progress') and self._progress and not self.debug:
830
+ actual_extract = time.time() - extract_start
831
+ if smooth:
832
+ self._smooth_ticker.stop()
833
+ cumulative += actual_extract
834
+ self._smooth_cumulative = cumulative
835
+ if hasattr(self, '_step_timings') and self._step_timings is not None:
836
+ self._step_timings['extract'] = (actual_extract, getattr(self, '_timing_file_duration', 0))
837
+
838
+ # Update progress after extraction completes (step-based mode)
839
+ if not smooth and hasattr(self, '_progress') and self._progress and not self.debug:
697
840
  self._progress.update(1)
841
+
698
842
  if self.instrumentalFileSpec:
699
843
  self.instrumentalMode = True
700
844
  self._build_instrumental_filters()
701
845
  return [] # Return empty list for muteTimeList
702
846
  except Exception as e:
703
847
  # Fallback to mute if generation fails
848
+ if smooth:
849
+ self._smooth_ticker.stop()
704
850
  if self.debug:
705
851
  mmguero.eprint(f"Generation failed: {e}, falling back to mute mode")
706
852
  self.instrumentalMode = False
707
853
  return self._create_mute_beep_list()
708
854
  else:
855
+ # No instrumental segments — remove extract estimate from smooth bar
856
+ if smooth and hasattr(self, '_progress') and self._progress:
857
+ extract_est = getattr(self, '_smooth_extract_est', 0)
858
+ self._smooth_ticker.adjust_total(-extract_est)
709
859
  return []
710
860
 
861
+ else:
862
+ # No profanity found in auto mode — remove extract estimate if applicable
863
+ if smooth and hasattr(self, 'autoGenerateMode') and self.autoGenerateMode:
864
+ extract_est = getattr(self, '_smooth_extract_est', 0)
865
+ if extract_est > 0 and hasattr(self, '_progress') and self._progress:
866
+ self._smooth_ticker.adjust_total(-extract_est)
867
+
711
868
  # Handle traditional instrumental file mode or mute/beep mode
712
869
  if self.instrumentalMode:
713
870
  return self._create_instrumental_splice_list()
@@ -800,6 +957,140 @@ class Plugger(object):
800
957
 
801
958
  return self.muteTimeList
802
959
 
960
+ def _fmt_time(self, seconds):
961
+ """Format seconds as M:SS.mmm"""
962
+ mins = int(seconds) // 60
963
+ secs = seconds - mins * 60
964
+ return f"{mins}:{secs:06.3f}"
965
+
966
+ def _print_words_summary(self):
967
+ """Print profanity detection summary based on showWords mode."""
968
+ if self.showWords == "none":
969
+ return
970
+
971
+ if not self.naughtyWordList:
972
+ mmguero.eprint("No profanity detected")
973
+ return
974
+
975
+ count = len(self.naughtyWordList)
976
+ if self.showWords == "clean":
977
+ word = "word" if count == 1 else "words"
978
+ mmguero.eprint(f"{count} {word} detected")
979
+ elif self.showWords == "full":
980
+ mmguero.eprint("Profanity detected:")
981
+ for w in self.naughtyWordList:
982
+ start = w.get('start', 0)
983
+ end = w.get('end', 0)
984
+ mmguero.eprint(f' - "{w["word"]}" ({self._fmt_time(start)} - {self._fmt_time(end)})')
985
+ word = "word" if count == 1 else "words"
986
+ mmguero.eprint(f"{count} {word} detected")
987
+
988
+ def _ai_detect_profanity(self):
989
+ """Use Groq chat API with structured outputs to detect profanity."""
990
+ import time as _time
991
+
992
+ if not self.groqApiKey:
993
+ raise ValueError("Groq API key required for AI detection")
994
+ if not self.wordList:
995
+ return
996
+
997
+ # Build numbered transcript text
998
+ transcript_lines = []
999
+ for i, w in enumerate(self.wordList):
1000
+ transcript_lines.append(
1001
+ f"[{i}] ({w.get('start', 0):.2f}-{w.get('end', 0):.2f}) {w.get('word', '')}"
1002
+ )
1003
+ transcript_text = "\n".join(transcript_lines)
1004
+
1005
+ # Get model and prompt (set from config via constructor)
1006
+ model = self.aiDetectModel
1007
+ prompt = self.aiDetectPrompt
1008
+
1009
+ # API call with retry logic
1010
+ max_retries = 3
1011
+ retry_delay = 1
1012
+
1013
+ for attempt in range(max_retries):
1014
+ try:
1015
+ response = requests.post(
1016
+ "https://api.groq.com/openai/v1/chat/completions",
1017
+ headers={
1018
+ "Authorization": f"Bearer {self.groqApiKey}",
1019
+ "Content-Type": "application/json",
1020
+ },
1021
+ json={
1022
+ "model": model,
1023
+ "messages": [
1024
+ {"role": "system", "content": prompt},
1025
+ {"role": "user", "content": transcript_text},
1026
+ ],
1027
+ "response_format": {
1028
+ "type": "json_schema",
1029
+ "json_schema": {
1030
+ "name": "profanity_detection",
1031
+ "strict": True,
1032
+ "schema": AI_DETECT_SCHEMA,
1033
+ }
1034
+ }
1035
+ },
1036
+ timeout=120,
1037
+ )
1038
+
1039
+ if response.status_code == 429:
1040
+ if attempt < max_retries - 1:
1041
+ if self.debug:
1042
+ mmguero.eprint(f"AI detection rate limited, retrying in {retry_delay}s...")
1043
+ _time.sleep(retry_delay)
1044
+ retry_delay *= 2
1045
+ continue
1046
+ raise Exception("AI detection rate limit exceeded")
1047
+
1048
+ if response.status_code == 401:
1049
+ raise Exception("Invalid Groq API key for AI detection")
1050
+
1051
+ response.raise_for_status()
1052
+
1053
+ result = response.json()
1054
+ content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
1055
+ parsed = json.loads(content)
1056
+
1057
+ profane_words = parsed.get("profane_words", [])
1058
+ for item in profane_words:
1059
+ idx = item.get("index", -1)
1060
+ if 0 <= idx < len(self.wordList):
1061
+ self.wordList[idx]["scrub"] = True
1062
+
1063
+ if self.debug:
1064
+ mmguero.eprint(f"AI detection raw response: {content}")
1065
+ reasoning = parsed.get("reasoning", "")
1066
+ if reasoning:
1067
+ mmguero.eprint(f"AI reasoning: {reasoning}")
1068
+ mmguero.eprint(f"AI detection found {len(profane_words)} profane words")
1069
+ for item in profane_words:
1070
+ idx = item.get("index", -1)
1071
+ word = item.get("word", "?")
1072
+ mmguero.eprint(f" [{idx}] \"{word}\" ({item.get('start', 0):.2f}-{item.get('end', 0):.2f})")
1073
+
1074
+ return
1075
+
1076
+ except requests.exceptions.Timeout:
1077
+ if attempt < max_retries - 1:
1078
+ if self.debug:
1079
+ mmguero.eprint(f"AI detection timed out, retrying ({attempt + 1}/{max_retries})...")
1080
+ _time.sleep(retry_delay)
1081
+ retry_delay *= 2
1082
+ else:
1083
+ raise Exception("AI detection request timed out")
1084
+
1085
+ except requests.exceptions.RequestException as e:
1086
+ if attempt < max_retries - 1:
1087
+ if self.debug:
1088
+ mmguero.eprint(f"AI detection request failed: {e}, retrying ({attempt + 1}/{max_retries})...")
1089
+ _time.sleep(retry_delay)
1090
+ retry_delay *= 2
1091
+ else:
1092
+ raise Exception(f"AI detection failed after {max_retries} retries: {e}")
1093
+
803
1094
  def _build_instrumental_filters(self):
804
1095
  """Build FFmpeg filter complex for instrumental splicing
805
1096
 
@@ -912,28 +1203,84 @@ class Plugger(object):
912
1203
  if (self.forceDespiteTag is True) or (GetMonkeyplugTagged(self.inputFileSpec, debug=self.debug) is False):
913
1204
  # Initialize progress (only when not in debug mode)
914
1205
  progress = None
1206
+ smooth_ticker = None
1207
+ step_timings = None
1208
+ timing_log = None
1209
+ file_duration = 0.0
1210
+
915
1211
  if not self.debug:
916
- # Determine first action
917
- if not self.inputTranscript:
918
- initial_desc = "Transcribing"
919
- else:
920
- initial_desc = "Processing"
921
-
922
- progress = tqdm(
923
- total=1, # Will be updated based on actual steps
924
- desc=initial_desc,
925
- unit="step",
926
- disable=False,
927
- bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]',
1212
+ # Load timing log and file duration for progress estimation
1213
+ timing_log = load_timing_log()
1214
+ file_duration = self._get_file_duration(self.inputFileSpec)
1215
+ step_timings = {}
1216
+
1217
+ # Determine which steps will run
1218
+ will_transcribe = not self.inputTranscript
1219
+ will_extract = hasattr(self, 'autoGenerateMode') and self.autoGenerateMode
1220
+ # encode always runs
1221
+
1222
+ # Check if we have estimates for all needed steps
1223
+ est_transcribe = estimate_step_duration(timing_log, 'transcribe', file_duration) if will_transcribe else None
1224
+ est_extract = estimate_step_duration(timing_log, 'extract', file_duration) if will_extract else None
1225
+ est_encode = estimate_step_duration(timing_log, 'encode', file_duration)
1226
+
1227
+ can_smooth = (
1228
+ file_duration > 0
1229
+ and est_encode is not None
1230
+ and (not will_transcribe or est_transcribe is not None)
1231
+ and (not will_extract or est_extract is not None)
928
1232
  )
929
1233
 
1234
+ if can_smooth:
1235
+ # Smooth mode: single bar with total in seconds
1236
+ est_transcribe_val = est_transcribe or 0
1237
+ est_extract_val = est_extract or 0
1238
+ total_est = est_transcribe_val + est_extract_val + est_encode
1239
+
1240
+ initial_desc = "Transcribing" if will_transcribe else "Processing"
1241
+ progress = tqdm(
1242
+ total=total_est,
1243
+ desc=initial_desc,
1244
+ unit="s",
1245
+ disable=False,
1246
+ bar_format='{l_bar}{bar}| {n:.0f}/{total:.0f}s [{elapsed}<{remaining}]',
1247
+ )
1248
+
1249
+ smooth_ticker = _SmoothProgressTicker(progress)
1250
+ # Ticker will be started inside CreateCleanMuteList for each step
1251
+
1252
+ # Pass context to CreateCleanMuteList
1253
+ self._smooth_ticker = smooth_ticker
1254
+ self._smooth_cumulative = 0.0
1255
+ self._smooth_transcribe_est = est_transcribe_val
1256
+ self._smooth_extract_est = est_extract_val
1257
+ self._step_timings = {}
1258
+ self._timing_log = timing_log
1259
+ self._timing_file_duration = file_duration
1260
+ self._will_transcribe = will_transcribe
1261
+ else:
1262
+ # Fallback: step-based bar (existing behavior)
1263
+ initial_desc = "Transcribing" if not self.inputTranscript else "Processing"
1264
+ progress = tqdm(
1265
+ total=1,
1266
+ desc=initial_desc,
1267
+ unit="step",
1268
+ disable=False,
1269
+ bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]',
1270
+ )
1271
+
1272
+ # Always pass timing context (even in step-based mode, for data collection)
1273
+ self._step_timings = step_timings
1274
+ self._timing_file_duration = file_duration
1275
+ self._will_transcribe = not self.inputTranscript
1276
+
930
1277
  # Store progress reference for use in CreateCleanMuteList
931
1278
  self._progress = progress
932
1279
 
933
1280
  self.CreateCleanMuteList()
934
1281
 
935
- # Update progress after CreateCleanMuteList
936
- if progress:
1282
+ # Update progress after CreateCleanMuteList (step-based mode only)
1283
+ if progress and not smooth_ticker:
937
1284
  did_extraction = (
938
1285
  hasattr(self, 'autoGenerateMode') and
939
1286
  self.autoGenerateMode and
@@ -942,23 +1289,21 @@ class Plugger(object):
942
1289
  )
943
1290
 
944
1291
  if not self.inputTranscript and not did_extraction:
945
- # Transcription done inside CreateCleanMuteList, no extraction
946
1292
  progress.update(1)
947
1293
  progress.total = 2
948
1294
  progress.set_description("Encoding")
949
1295
  elif not self.inputTranscript and did_extraction:
950
- # Both transcription and extraction handled inside CreateCleanMuteList
951
- # Just set description to encoding
952
1296
  progress.set_description("Encoding")
953
1297
  elif self.inputTranscript and did_extraction:
954
- # Extraction handled inside CreateCleanMuteList (no transcription update needed)
955
1298
  progress.total = 2
956
1299
  progress.set_description("Encoding")
957
1300
  else:
958
- # No transcription, no extraction - just encoding
959
1301
  progress.total = 1
960
1302
  progress.set_description("Encoding")
961
1303
 
1304
+ # Get cumulative position after CreateCleanMuteList (smooth mode)
1305
+ cumulative = getattr(self, '_smooth_cumulative', 0.0) if smooth_ticker else 0
1306
+
962
1307
  # Handle instrumental mode differently
963
1308
  if self.instrumentalMode:
964
1309
  # Use instrumental splicing
@@ -1030,6 +1375,15 @@ class Plugger(object):
1030
1375
  ffmpegCmd.extend(self.aParams)
1031
1376
  ffmpegCmd.append(self.outputFileSpec)
1032
1377
 
1378
+ # Start encode step with timing
1379
+ if progress and smooth_ticker:
1380
+ est_encode = estimate_step_duration(timing_log, 'encode', file_duration) or 0
1381
+ progress.set_description("Encoding")
1382
+ smooth_ticker.start(cumulative, est_encode)
1383
+ elif progress:
1384
+ progress.set_description("Encoding")
1385
+ encode_start = time.time()
1386
+
1033
1387
  ffmpegResult, ffmpegOutput = mmguero.run_process(ffmpegCmd, stdout=True, stderr=True, debug=self.debug)
1034
1388
  if (ffmpegResult != 0) or (not os.path.isfile(self.outputFileSpec)):
1035
1389
  mmguero.eprint(' '.join(mmguero.flatten(ffmpegCmd)))
@@ -1037,21 +1391,47 @@ class Plugger(object):
1037
1391
  mmguero.eprint(ffmpegOutput)
1038
1392
  raise ValueError(f"Could not process {self.inputFileSpec}")
1039
1393
 
1394
+ # Record encode timing and finalize
1395
+ actual_encode = time.time() - encode_start
1396
+ if smooth_ticker:
1397
+ smooth_ticker.stop()
1398
+ if step_timings is not None:
1399
+ step_timings['encode'] = (actual_encode, file_duration)
1400
+
1040
1401
  SetMonkeyplugTag(self.outputFileSpec, debug=self.debug)
1041
1402
 
1042
- # Complete progress
1403
+ # Complete progress and save timing data
1043
1404
  if progress:
1044
- progress.update(1)
1405
+ if smooth_ticker:
1406
+ # Snap bar to total
1407
+ progress.n = progress.total
1408
+ progress.refresh()
1409
+ else:
1410
+ progress.update(1)
1045
1411
  progress.close()
1046
1412
 
1413
+ # Update timing log with actual measurements (only on success)
1414
+ if timing_log is not None and file_duration > 0:
1415
+ for op, (wall_secs, audio_secs) in step_timings.items():
1416
+ update_timing_measurement(timing_log, op, wall_secs, audio_secs)
1417
+ save_timing_log(timing_log)
1418
+
1047
1419
  else:
1048
1420
  shutil.copyfile(self.inputFileSpec, self.outputFileSpec)
1049
1421
  if progress:
1050
1422
  progress.close()
1051
1423
 
1052
- # Clean up progress reference
1424
+ # Clean up progress references
1053
1425
  if hasattr(self, '_progress'):
1054
1426
  delattr(self, '_progress')
1427
+ for attr in ('_smooth_ticker', '_smooth_cumulative', '_smooth_extract_est',
1428
+ '_smooth_transcribe_est', '_will_transcribe',
1429
+ '_step_timings', '_timing_log', '_timing_file_duration'):
1430
+ if hasattr(self, attr):
1431
+ delattr(self, attr)
1432
+
1433
+ # Print profanity detection summary (after progress bar is closed)
1434
+ self._print_words_summary()
1055
1435
 
1056
1436
  return self.outputFileSpec
1057
1437
 
@@ -1093,6 +1473,11 @@ class VoskPlugger(Plugger):
1093
1473
  beepDropTransition=BEEP_DROPOUT_TRANSITION_DEFAULT,
1094
1474
  force=False,
1095
1475
  dbug=False,
1476
+ showWords="clean",
1477
+ detectMode="list",
1478
+ groqApiKey=None,
1479
+ aiDetectModel="openai/gpt-oss-20b",
1480
+ aiDetectPrompt=AI_DETECT_PROMPT_DEFAULT,
1096
1481
  ):
1097
1482
  self.wavReadFramesChunk = wChunk
1098
1483
  self.modelPath = None
@@ -1140,6 +1525,11 @@ class VoskPlugger(Plugger):
1140
1525
  beepDropTransition=beepDropTransition,
1141
1526
  force=force,
1142
1527
  dbug=dbug,
1528
+ showWords=showWords,
1529
+ detectMode=detectMode,
1530
+ groqApiKey=groqApiKey,
1531
+ aiDetectModel=aiDetectModel,
1532
+ aiDetectPrompt=aiDetectPrompt,
1143
1533
  )
1144
1534
 
1145
1535
  self.tmpWavFileSpec = self.inputFileParts[0] + ".wav"
@@ -1276,6 +1666,11 @@ class WhisperPlugger(Plugger):
1276
1666
  beepDropTransition=BEEP_DROPOUT_TRANSITION_DEFAULT,
1277
1667
  force=False,
1278
1668
  dbug=False,
1669
+ showWords="clean",
1670
+ detectMode="list",
1671
+ groqApiKey=None,
1672
+ aiDetectModel="openai/gpt-oss-20b",
1673
+ aiDetectPrompt=AI_DETECT_PROMPT_DEFAULT,
1279
1674
  ):
1280
1675
  self.whisper = None
1281
1676
  self.model = None
@@ -1320,6 +1715,11 @@ class WhisperPlugger(Plugger):
1320
1715
  beepDropTransition=beepDropTransition,
1321
1716
  force=force,
1322
1717
  dbug=dbug,
1718
+ showWords=showWords,
1719
+ detectMode=detectMode,
1720
+ groqApiKey=groqApiKey,
1721
+ aiDetectModel=aiDetectModel,
1722
+ aiDetectPrompt=aiDetectPrompt,
1323
1723
  )
1324
1724
 
1325
1725
  if self.debug:
@@ -1405,6 +1805,11 @@ class GroqPlugger(Plugger):
1405
1805
  verbose_level="",
1406
1806
  auto_generate=False,
1407
1807
  separation_padding=1.0,
1808
+ showWords="clean",
1809
+ detectMode="list",
1810
+ groqApiKey=None,
1811
+ aiDetectModel="openai/gpt-oss-20b",
1812
+ aiDetectPrompt=AI_DETECT_PROMPT_DEFAULT,
1408
1813
  ):
1409
1814
  # Import groq_config - handle both relative and absolute imports
1410
1815
  try:
@@ -1448,6 +1853,11 @@ class GroqPlugger(Plugger):
1448
1853
  force=force,
1449
1854
  dbug=dbug,
1450
1855
  instrumentalFileSpec=instrumentalFileSpec,
1856
+ showWords=showWords,
1857
+ detectMode=detectMode,
1858
+ groqApiKey=groqApiKey,
1859
+ aiDetectModel=aiDetectModel,
1860
+ aiDetectPrompt=aiDetectPrompt,
1451
1861
  )
1452
1862
 
1453
1863
  # Initialize auto-separation mode
@@ -1970,6 +2380,7 @@ def expand_and_detect_vocals(input_pattern, output_pattern, args, skip_detection
1970
2380
  # Config file loading
1971
2381
  MONKEYPLUG_CACHE_DIR = os.path.join(os.path.expanduser('~'), '.cache', 'monkeyplug')
1972
2382
  MONKEYPLUG_CONFIG_PATH = os.path.join(MONKEYPLUG_CACHE_DIR, 'config.json')
2383
+ MONKEYPLUG_TIMING_LOG_PATH = os.path.join(MONKEYPLUG_CACHE_DIR, 'timing_log.json')
1973
2384
 
1974
2385
  DEFAULT_CONFIG = {
1975
2386
  "pad_milliseconds": 10,
@@ -1977,6 +2388,10 @@ DEFAULT_CONFIG = {
1977
2388
  "pad_milliseconds_post": 10,
1978
2389
  "separation_padding": 1.0,
1979
2390
  "beep_hertz": BEEP_HERTZ_DEFAULT,
2391
+ "show_words": "clean",
2392
+ "detect_mode": "list",
2393
+ "ai_detect_model": "openai/gpt-oss-20b",
2394
+ "ai_detect_prompt": AI_DETECT_PROMPT_DEFAULT,
1980
2395
  }
1981
2396
 
1982
2397
 
@@ -2029,6 +2444,69 @@ def load_config_settings(debug=False):
2029
2444
  return dict(DEFAULT_CONFIG)
2030
2445
 
2031
2446
 
2447
+ ###################################################################################################
2448
+ # Timing log for progress estimation
2449
+ def load_timing_log():
2450
+ """Load historical timing data for progress bar estimation.
2451
+
2452
+ Returns:
2453
+ dict: Timing log with per-operation running averages, or {} if unavailable.
2454
+ """
2455
+ if not os.path.isfile(MONKEYPLUG_TIMING_LOG_PATH):
2456
+ return {}
2457
+ try:
2458
+ with open(MONKEYPLUG_TIMING_LOG_PATH, 'r') as f:
2459
+ data = json.load(f)
2460
+ if isinstance(data, dict):
2461
+ return data
2462
+ except (json.JSONDecodeError, IOError, ValueError):
2463
+ pass
2464
+ return {}
2465
+
2466
+
2467
+ def save_timing_log(timing_log):
2468
+ """Save timing log atomically to disk."""
2469
+ try:
2470
+ os.makedirs(os.path.dirname(MONKEYPLUG_TIMING_LOG_PATH), exist_ok=True)
2471
+ tmp_path = MONKEYPLUG_TIMING_LOG_PATH + '.tmp'
2472
+ with open(tmp_path, 'w') as f:
2473
+ json.dump(timing_log, f, indent=2)
2474
+ f.write('\n')
2475
+ os.replace(tmp_path, MONKEYPLUG_TIMING_LOG_PATH)
2476
+ except (IOError, OSError):
2477
+ pass # Best-effort
2478
+
2479
+
2480
+ def estimate_step_duration(timing_log, operation, audio_seconds):
2481
+ """Estimate wall-clock seconds for an operation based on historical data.
2482
+
2483
+ Returns:
2484
+ float or None: Estimated seconds, or None if no data available.
2485
+ """
2486
+ entry = timing_log.get(operation)
2487
+ if not entry or entry.get('run_count', 0) == 0:
2488
+ return None
2489
+ total_audio = entry.get('total_audio_seconds', 0)
2490
+ if total_audio <= 0:
2491
+ return None
2492
+ rate = entry['total_wall_seconds'] / total_audio
2493
+ return rate * audio_seconds
2494
+
2495
+
2496
+ def update_timing_measurement(timing_log, operation, wall_seconds, audio_seconds):
2497
+ """Add a new timing measurement to the running averages."""
2498
+ if operation not in timing_log:
2499
+ timing_log[operation] = {
2500
+ 'total_audio_seconds': 0.0,
2501
+ 'total_wall_seconds': 0.0,
2502
+ 'run_count': 0,
2503
+ }
2504
+ entry = timing_log[operation]
2505
+ entry['total_audio_seconds'] += audio_seconds
2506
+ entry['total_wall_seconds'] += wall_seconds
2507
+ entry['run_count'] += 1
2508
+
2509
+
2032
2510
  ###################################################################################################
2033
2511
  # RunMonkeyPlug
2034
2512
  def RunMonkeyPlug():
@@ -2099,6 +2577,22 @@ def RunMonkeyPlug():
2099
2577
  )
2100
2578
  parser.add_argument(
2101
2579
  "-w",
2580
+ "--show-words",
2581
+ dest="showWords",
2582
+ type=str,
2583
+ choices=["full", "clean", "none"],
2584
+ default=config.get("show_words", "clean"),
2585
+ help="Show detected profanity: full (list with timestamps), clean (count only), none (default: clean)",
2586
+ )
2587
+ parser.add_argument(
2588
+ "--detect",
2589
+ dest="detectMode",
2590
+ type=str,
2591
+ choices=["list", "ai", "both"],
2592
+ default=config.get("detect_mode", "list"),
2593
+ help="Profanity detection method: list (static list), ai (Groq LLM), both (default: list)",
2594
+ )
2595
+ parser.add_argument(
2102
2596
  "--swears",
2103
2597
  help=f"text file containing profanity (default: \"{SWEARS_FILENAME_DEFAULT}\")",
2104
2598
  default=os.path.join(script_path, SWEARS_FILENAME_DEFAULT),
@@ -2425,6 +2919,26 @@ def RunMonkeyPlug():
2425
2919
  else:
2426
2920
  sys.tracebacklimit = 0
2427
2921
 
2922
+ # Load Groq API key for AI detection (needed for all modes if --detect ai|both)
2923
+ if args.detectMode in ("ai", "both"):
2924
+ try:
2925
+ from monkeyplug.groq_config import load_groq_api_key
2926
+ except ImportError:
2927
+ from .groq_config import load_groq_api_key
2928
+ if not args.groqApiKey:
2929
+ args.groqApiKey = load_groq_api_key(None, debug=args.debug)
2930
+ if not args.groqApiKey:
2931
+ mmguero.eprint("Groq API key required for --detect ai or --detect both")
2932
+ mmguero.eprint("Provide via --groq-api-key, GROQ_API_KEY env var, ~/.groq/config.json, or ./.groq_key")
2933
+ sys.exit(1)
2934
+ elif args.speechRecMode == SPEECH_REC_MODE_GROQ and not args.groqApiKey:
2935
+ # Load key for Groq STT mode too (existing behavior)
2936
+ try:
2937
+ from monkeyplug.groq_config import load_groq_api_key
2938
+ except ImportError:
2939
+ from .groq_config import load_groq_api_key
2940
+ args.groqApiKey = load_groq_api_key(None, debug=args.debug)
2941
+
2428
2942
  # Set default output pattern if not specified: <input>_clean.<ext>
2429
2943
  if not args.output:
2430
2944
  input_base, input_ext = os.path.splitext(args.input)
@@ -2682,6 +3196,11 @@ def RunMonkeyPlug():
2682
3196
  verbose_level=args_copy.verbose_level if hasattr(args_copy, 'verbose_level') else "",
2683
3197
  auto_generate=file_auto_generate,
2684
3198
  separation_padding=args_copy.separationPadding,
3199
+ showWords=args_copy.showWords,
3200
+ detectMode=args_copy.detectMode,
3201
+ groqApiKey=args_copy.groqApiKey,
3202
+ aiDetectModel=config.get("ai_detect_model", "openai/gpt-oss-20b"),
3203
+ aiDetectPrompt=config.get("ai_detect_prompt", AI_DETECT_PROMPT_DEFAULT),
2685
3204
  )
2686
3205
 
2687
3206
  print(plug.EncodeCleanAudio())
@@ -2888,6 +3407,11 @@ def RunMonkeyPlug():
2888
3407
  beepDropTransition=args.beepDropTransition,
2889
3408
  force=args.forceDespiteTag,
2890
3409
  dbug=args.debug,
3410
+ showWords=args.showWords,
3411
+ detectMode=args.detectMode,
3412
+ groqApiKey=args.groqApiKey,
3413
+ aiDetectModel=config.get("ai_detect_model", "openai/gpt-oss-20b"),
3414
+ aiDetectPrompt=config.get("ai_detect_prompt", AI_DETECT_PROMPT_DEFAULT),
2891
3415
  )
2892
3416
 
2893
3417
  elif args.speechRecMode == SPEECH_REC_MODE_WHISPER:
@@ -2919,6 +3443,11 @@ def RunMonkeyPlug():
2919
3443
  beepDropTransition=args.beepDropTransition,
2920
3444
  force=args.forceDespiteTag,
2921
3445
  dbug=args.debug,
3446
+ showWords=args.showWords,
3447
+ detectMode=args.detectMode,
3448
+ groqApiKey=args.groqApiKey,
3449
+ aiDetectModel=config.get("ai_detect_model", "openai/gpt-oss-20b"),
3450
+ aiDetectPrompt=config.get("ai_detect_prompt", AI_DETECT_PROMPT_DEFAULT),
2922
3451
  )
2923
3452
 
2924
3453
  elif args.speechRecMode == SPEECH_REC_MODE_GROQ:
@@ -2952,6 +3481,11 @@ def RunMonkeyPlug():
2952
3481
  verbose_level=args.verbose_level if hasattr(args, 'verbose_level') else "",
2953
3482
  auto_generate=auto_generate,
2954
3483
  separation_padding=args.separationPadding,
3484
+ showWords=args.showWords,
3485
+ detectMode=args.detectMode,
3486
+ groqApiKey=args.groqApiKey,
3487
+ aiDetectModel=config.get("ai_detect_model", "openai/gpt-oss-20b"),
3488
+ aiDetectPrompt=config.get("ai_detect_prompt", AI_DETECT_PROMPT_DEFAULT),
2955
3489
  )
2956
3490
  else:
2957
3491
  raise ValueError(f"Unsupported speech recognition engine {args.speechRecMode}")