GameSentenceMiner 2.16.7__py3-none-any.whl → 2.16.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -363,9 +363,12 @@ class ConfigApp:
363
363
  self.vad_trim_beginning_value = tk.BooleanVar(value=self.settings.vad.trim_beginning)
364
364
  self.vad_beginning_offset_value = tk.StringVar(value=str(self.settings.vad.beginning_offset))
365
365
  self.add_audio_on_no_results_value = tk.BooleanVar(value=self.settings.vad.add_audio_on_no_results)
366
+ self.use_tts_as_fallback_value = tk.BooleanVar(value=self.settings.vad.use_tts_as_fallback)
367
+ self.tts_url_value = tk.StringVar(value=self.settings.vad.tts_url)
366
368
  self.language_value = tk.StringVar(value=self.settings.vad.language)
367
369
  self.cut_and_splice_segments_value = tk.BooleanVar(value=self.settings.vad.cut_and_splice_segments)
368
370
  self.splice_padding_value = tk.StringVar(value=str(self.settings.vad.splice_padding) if self.settings.vad.splice_padding else "")
371
+ self.use_vad_filter_for_whisper_value = tk.BooleanVar(value=self.settings.vad.use_vad_filter_for_whisper)
369
372
 
370
373
  # Advanced Settings
371
374
  self.audio_player_path_value = tk.StringVar(value=self.settings.advanced.audio_player_path)
@@ -396,6 +399,8 @@ class ConfigApp:
396
399
  self.overlay_websocket_port_value = tk.StringVar(value=str(self.settings.overlay.websocket_port))
397
400
  self.overlay_websocket_send_value = tk.BooleanVar(value=self.settings.overlay.monitor_to_capture)
398
401
  self.overlay_engine_value = tk.StringVar(value=self.settings.overlay.engine)
402
+ self.periodic_value = tk.BooleanVar(value=self.settings.overlay.periodic)
403
+ self.periodic_interval_value = tk.StringVar(value=str(self.settings.overlay.periodic_interval))
399
404
 
400
405
  # Master Config Settings
401
406
  self.switch_to_default_if_not_found_value = tk.BooleanVar(value=self.master_config.switch_to_default_if_not_found)
@@ -594,10 +599,13 @@ class ConfigApp:
594
599
  trim_beginning=self.vad_trim_beginning_value.get(),
595
600
  beginning_offset=float(self.vad_beginning_offset_value.get()),
596
601
  add_audio_on_no_results=self.add_audio_on_no_results_value.get(),
602
+ use_tts_as_fallback=self.use_tts_as_fallback_value.get(),
603
+ tts_url=self.tts_url_value.get(),
597
604
  language=self.language_value.get(),
598
605
  cut_and_splice_segments=self.cut_and_splice_segments_value.get(),
599
606
  splice_padding=float(self.splice_padding_value.get()) if self.splice_padding_value.get() else 0.0,
600
607
  use_cpu_for_inference=self.use_cpu_for_inference_value.get(),
608
+ use_vad_filter_for_whisper=self.use_vad_filter_for_whisper_value.get(),
601
609
  ),
602
610
  advanced=Advanced(
603
611
  audio_player_path=self.audio_player_path_value.get(),
@@ -628,7 +636,9 @@ class ConfigApp:
628
636
  overlay=Overlay(
629
637
  websocket_port=int(self.overlay_websocket_port_value.get()),
630
638
  monitor_to_capture=self.overlay_monitor.current() if self.monitors else 0,
631
- engine=OverlayEngine(self.overlay_engine_value.get()).value if self.overlay_engine_value.get() else OverlayEngine.LENS.value
639
+ engine=OverlayEngine(self.overlay_engine_value.get()).value if self.overlay_engine_value.get() else OverlayEngine.LENS.value,
640
+ periodic=self.periodic_value.get(),
641
+ periodic_interval=self.periodic_interval_value.get(),
632
642
  )
633
643
  # wip=WIP(
634
644
  # overlay_websocket_port=int(self.overlay_websocket_port_value.get()),
@@ -1109,6 +1119,17 @@ class ConfigApp:
1109
1119
  row=self.current_row, column=1, sticky='W', pady=2)
1110
1120
  self.current_row += 1
1111
1121
 
1122
+ # TODO ADD LOCALIZATION
1123
+ tts_fallback_i18n = vad_i18n.get('use_tts_as_fallback', {})
1124
+ HoverInfoLabelWidget(vad_frame, text=tts_fallback_i18n.get('label', 'Use TTS as Fallback.'), tooltip=tts_fallback_i18n.get('tooltip', 'Use TTS if no audio is detected'), row=self.current_row, column=0)
1125
+ ttk.Checkbutton(vad_frame, variable=self.use_tts_as_fallback_value, bootstyle="round-toggle").grid(row=self.current_row, column=1, sticky='W', pady=2)
1126
+ self.current_row += 1
1127
+
1128
+ tts_url_i18n = vad_i18n.get('tts_url', {})
1129
+ HoverInfoLabelWidget(vad_frame, text=tts_url_i18n.get('label', 'TTS URL'), tooltip=tts_url_i18n.get('tooltip', 'The URL for the TTS service'), row=self.current_row, column=0)
1130
+ ttk.Entry(vad_frame, textvariable=self.tts_url_value).grid(row=self.current_row, column=1, sticky='EW', pady=2)
1131
+ self.current_row += 1
1132
+
1112
1133
  end_offset_i18n = vad_i18n.get('audio_end_offset', {})
1113
1134
  HoverInfoLabelWidget(vad_frame, text=end_offset_i18n.get('label', '...'),
1114
1135
  tooltip=end_offset_i18n.get('tooltip', '...'), foreground="dark orange",
@@ -1150,9 +1171,22 @@ class ConfigApp:
1150
1171
  ttk.Checkbutton(vad_frame, variable=self.use_cpu_for_inference_value, bootstyle="round-toggle").grid(row=self.current_row, column=1, sticky='W', pady=2)
1151
1172
  self.current_row += 1
1152
1173
 
1174
+ # TODO Add Localization
1175
+ use_vad_filter_for_whisper_i18n = vad_i18n.get('use_vad_filter_for_whisper', {})
1176
+ HoverInfoLabelWidget(vad_frame, text=use_vad_filter_for_whisper_i18n.get('label', 'Use VAD Filter for Whisper'), tooltip=use_vad_filter_for_whisper_i18n.get('tooltip', 'Uses Silero to Filter out Non-Voiced Segments before Transcribing with Whisper.'), row=self.current_row, column=0)
1177
+ ttk.Checkbutton(vad_frame, variable=self.use_vad_filter_for_whisper_value, bootstyle="round-toggle").grid(row=self.current_row, column=1, sticky='W', pady=2)
1178
+ self.current_row += 1
1179
+
1153
1180
  # Add Reset Button
1154
1181
  self.add_reset_button(vad_frame, "vad", self.current_row, column=0, recreate_tab=self.create_vad_tab)
1155
1182
 
1183
+ for col in range(3):
1184
+ vad_frame.grid_columnconfigure(col, weight=0)
1185
+ for row in range(self.current_row):
1186
+ vad_frame.grid_rowconfigure(row, minsize=30)
1187
+
1188
+ return vad_frame
1189
+
1156
1190
  @new_tab
1157
1191
  def create_paths_tab(self):
1158
1192
  if self.paths_tab is None:
@@ -1175,7 +1209,7 @@ class ConfigApp:
1175
1209
  ttk.Button(paths_frame, text=browse_text, command=lambda: self.browse_folder(folder_watch_entry),
1176
1210
  bootstyle="outline").grid(row=self.current_row, column=2, padx=5, pady=2)
1177
1211
  self.current_row += 1
1178
-
1212
+
1179
1213
  # Combine "Copy temp files to output folder" and "Output folder" on one row
1180
1214
  copy_to_output_i18n = paths_i18n.get('copy_temp_files_to_output_folder', {})
1181
1215
  combined_i18n = paths_i18n.get('output_folder', {})
@@ -2050,7 +2084,7 @@ class ConfigApp:
2050
2084
  entry = ttk.Entry(ai_frame, textvariable=self.open_ai_url_value)
2051
2085
  entry.grid(row=self.current_row, column=1, sticky='EW', pady=2)
2052
2086
  self.current_row += 1
2053
-
2087
+
2054
2088
  entry.bind("<FocusOut>", lambda e, row=self.current_row: self.update_models_element(ai_frame, row))
2055
2089
  entry.bind("<Return>", lambda e, row=self.current_row: self.update_models_element(ai_frame, row))
2056
2090
 
@@ -2245,6 +2279,21 @@ class ConfigApp:
2245
2279
  textvariable=self.overlay_engine_value)
2246
2280
  self.overlay_engine.grid(row=self.current_row, column=1, sticky='EW', pady=2)
2247
2281
  self.current_row += 1
2282
+
2283
+ # Periodic Settings
2284
+ periodic_i18n = overlay_i18n.get('periodic', {})
2285
+ HoverInfoLabelWidget(overlay_frame, text=periodic_i18n.get('label', 'Periodic:'),
2286
+ tooltip=periodic_i18n.get('tooltip', 'Enable periodic Scanning.'),
2287
+ row=self.current_row, column=0)
2288
+ ttk.Checkbutton(overlay_frame, variable=self.periodic_value, bootstyle="round-toggle").grid(
2289
+ row=self.current_row, column=1, sticky='W', pady=2)
2290
+ self.current_row += 1
2291
+ periodic_interval_i18n = overlay_i18n.get('periodic_interval', {})
2292
+ HoverInfoLabelWidget(overlay_frame, text=periodic_interval_i18n.get('label', 'Periodic Interval:'),
2293
+ tooltip=periodic_interval_i18n.get('tooltip', 'Interval for periodic scanning.'),
2294
+ row=self.current_row, column=0)
2295
+ ttk.Entry(overlay_frame, textvariable=self.periodic_interval_value).grid(row=self.current_row, column=1, sticky='EW', pady=2)
2296
+ self.current_row += 1
2248
2297
 
2249
2298
  if self.monitors:
2250
2299
  # Ensure the index is valid
@@ -2285,7 +2334,7 @@ class ConfigApp:
2285
2334
  # self.controller_hotkey_entry.grid(row=self.current_row, column=1, sticky='EW', pady=2)
2286
2335
 
2287
2336
  # listen_for_input_button = ttk.Button(wip_frame, text="Listen for Input", command=lambda: self.listen_for_controller_input())
2288
- # listen_for_input_button.grid(row=self.current_row, column=2, sticky='EW', pady=2)
2337
+ # listen_for_input_button.grid(row=self.current_row, column=2, sticky='EW', pady=2, padx=5)
2289
2338
  # self.current_row += 1
2290
2339
 
2291
2340
  except Exception as e:
@@ -2398,6 +2447,7 @@ class ConfigApp:
2398
2447
  default_path = get_default_anki_media_collection_path()
2399
2448
  if default_path != self.anki_media_collection_value.get():
2400
2449
  self.anki_media_collection_value.set(default_path)
2450
+
2401
2451
  self.save_settings()
2402
2452
 
2403
2453
 
GameSentenceMiner/gsm.py CHANGED
@@ -1,3 +1,4 @@
1
+ import tempfile
1
2
  import time
2
3
  import asyncio
3
4
  import subprocess
@@ -6,6 +7,11 @@ import sys
6
7
  import os
7
8
  import warnings
8
9
 
10
+ import requests
11
+
12
+ from GameSentenceMiner.util.get_overlay_coords import OverlayThread
13
+ from GameSentenceMiner.util.gsm_utils import remove_html_and_cloze_tags
14
+
9
15
  os.environ.pop('TCL_LIBRARY', None)
10
16
 
11
17
 
@@ -21,6 +27,7 @@ def handle_error_in_initialization(e):
21
27
  logger.info("Exiting due to initialization error.")
22
28
  sys.exit(1)
23
29
 
30
+
24
31
  try:
25
32
  import os.path
26
33
  import signal
@@ -48,15 +55,18 @@ try:
48
55
 
49
56
  start_time = time.time()
50
57
  from GameSentenceMiner.util.downloader.download_tools import download_obs_if_needed, download_ffmpeg_if_needed
51
- logger.debug(f"[Import] download_tools (download_obs_if_needed, download_ffmpeg_if_needed): {time.time() - start_time:.3f}s")
58
+ logger.debug(
59
+ f"[Import] download_tools (download_obs_if_needed, download_ffmpeg_if_needed): {time.time() - start_time:.3f}s")
52
60
 
53
61
  start_time = time.time()
54
62
  from GameSentenceMiner.util.communication.send import send_restart_signal
55
- logger.debug(f"[Import] send_restart_signal: {time.time() - start_time:.3f}s")
63
+ logger.debug(
64
+ f"[Import] send_restart_signal: {time.time() - start_time:.3f}s")
56
65
 
57
66
  start_time = time.time()
58
67
  from GameSentenceMiner.util.gsm_utils import wait_for_stable_file, make_unique_file_name, run_new_thread
59
- logger.debug(f"[Import] gsm_utils (wait_for_stable_file, make_unique_file_name, run_new_thread): {time.time() - start_time:.3f}s")
68
+ logger.debug(
69
+ f"[Import] gsm_utils (wait_for_stable_file, make_unique_file_name, run_new_thread): {time.time() - start_time:.3f}s")
60
70
 
61
71
  start_time = time.time()
62
72
  from GameSentenceMiner import anki
@@ -68,7 +78,8 @@ try:
68
78
 
69
79
  start_time = time.time()
70
80
  from GameSentenceMiner.util import configuration, notification, ffmpeg
71
- logger.debug(f"[Import] util (configuration, notification, ffmpeg): {time.time() - start_time:.3f}s")
81
+ logger.debug(
82
+ f"[Import] util (configuration, notification, ffmpeg): {time.time() - start_time:.3f}s")
72
83
 
73
84
  start_time = time.time()
74
85
  from GameSentenceMiner import gametext
@@ -84,19 +95,23 @@ try:
84
95
 
85
96
  start_time = time.time()
86
97
  from GameSentenceMiner.util.communication.websocket import connect_websocket, register_websocket_message_handler, FunctionName
87
- logger.debug(f"[Import] websocket (connect_websocket, register_websocket_message_handler, FunctionName): {time.time() - start_time:.3f}s")
98
+ logger.debug(
99
+ f"[Import] websocket (connect_websocket, register_websocket_message_handler, FunctionName): {time.time() - start_time:.3f}s")
88
100
 
89
101
  start_time = time.time()
90
102
  from GameSentenceMiner.util.ffmpeg import get_audio_and_trim, get_video_timings, get_ffmpeg_path
91
- logger.debug(f"[Import] util.ffmpeg (get_audio_and_trim, get_video_timings, get_ffmpeg_path): {time.time() - start_time:.3f}s")
103
+ logger.debug(
104
+ f"[Import] util.ffmpeg (get_audio_and_trim, get_video_timings, get_ffmpeg_path): {time.time() - start_time:.3f}s")
92
105
 
93
106
  start_time = time.time()
94
107
  from GameSentenceMiner.obs import check_obs_folder_is_correct
95
- logger.debug(f"[Import] obs.check_obs_folder_is_correct: {time.time() - start_time:.3f}s")
108
+ logger.debug(
109
+ f"[Import] obs.check_obs_folder_is_correct: {time.time() - start_time:.3f}s")
96
110
 
97
111
  start_time = time.time()
98
112
  from GameSentenceMiner.util.text_log import GameLine, get_text_event, get_mined_line, get_all_lines, game_log
99
- logger.debug(f"[Import] util.text_log (GameLine, get_text_event, get_mined_line, get_all_lines, game_log): {time.time() - start_time:.3f}s")
113
+ logger.debug(
114
+ f"[Import] util.text_log (GameLine, get_text_event, get_mined_line, get_all_lines, game_log): {time.time() - start_time:.3f}s")
100
115
 
101
116
  start_time = time.time()
102
117
  from GameSentenceMiner.util import *
@@ -104,15 +119,18 @@ try:
104
119
 
105
120
  start_time = time.time()
106
121
  from GameSentenceMiner.web import texthooking_page
107
- logger.debug(f"[Import] web.texthooking_page: {time.time() - start_time:.3f}s")
122
+ logger.debug(
123
+ f"[Import] web.texthooking_page: {time.time() - start_time:.3f}s")
108
124
 
109
125
  start_time = time.time()
110
126
  from GameSentenceMiner.web.service import handle_texthooker_button, set_get_audio_from_video_callback
111
- logger.debug(f"[Import] web.service (handle_texthooker_button, set_get_audio_from_video_callback): {time.time() - start_time:.3f}s")
127
+ logger.debug(
128
+ f"[Import] web.service (handle_texthooker_button, set_get_audio_from_video_callback): {time.time() - start_time:.3f}s")
112
129
 
113
130
  start_time = time.time()
114
131
  from GameSentenceMiner.web.texthooking_page import run_text_hooker_page
115
- logger.debug(f"[Import] web.texthooking_page.run_text_hooker_page: {time.time() - start_time:.3f}s")
132
+ logger.debug(
133
+ f"[Import] web.texthooking_page.run_text_hooker_page: {time.time() - start_time:.3f}s")
116
134
  except Exception as e:
117
135
  from GameSentenceMiner.util.configuration import logger, is_linux, is_windows
118
136
  handle_error_in_initialization(e)
@@ -172,8 +190,9 @@ class VideoToAudioHandler(FileSystemEventHandler):
172
190
  if get_config().features.backfill_audio:
173
191
  last_note = anki.get_cards_by_sentence(
174
192
  gametext.current_line_after_regex)
175
-
176
- note, last_note = anki.get_initial_card_info(last_note, selected_lines)
193
+
194
+ note, last_note = anki.get_initial_card_info(
195
+ last_note, selected_lines)
177
196
  tango = last_note.get_field(
178
197
  get_config().anki.word_field) if last_note else ''
179
198
 
@@ -184,12 +203,15 @@ class VideoToAudioHandler(FileSystemEventHandler):
184
203
  start_line = selected_lines[0]
185
204
  mined_line = get_mined_line(last_note, selected_lines)
186
205
  line_cutoff = selected_lines[-1].get_next_time()
206
+ full_text = remove_html_and_cloze_tags(note['fields'][get_config().anki.sentence_field])
187
207
  else:
188
208
  mined_line = get_text_event(last_note)
189
209
  if mined_line:
190
210
  start_line = mined_line
191
211
  if mined_line.next:
192
212
  line_cutoff = mined_line.next.time
213
+ full_text = mined_line.text
214
+
193
215
  gsm_state.last_mined_line = mined_line
194
216
 
195
217
  if os.path.exists(video_path) and os.access(video_path, os.R_OK):
@@ -213,7 +235,8 @@ class VideoToAudioHandler(FileSystemEventHandler):
213
235
  line_cutoff,
214
236
  video_path,
215
237
  anki_card_creation_time,
216
- mined_line=mined_line)
238
+ mined_line=mined_line,
239
+ full_text=full_text)
217
240
  else:
218
241
  final_audio_output = ""
219
242
  vad_result = VADResult(True, 0, 0, '')
@@ -269,11 +292,13 @@ class VideoToAudioHandler(FileSystemEventHandler):
269
292
  f"Error removing video file {video_path}: {e}", exc_info=True)
270
293
 
271
294
  @staticmethod
272
- def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False, mined_line=None):
295
+ def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False, mined_line=None, full_text=''):
273
296
  trimmed_audio, start_time, end_time = get_audio_and_trim(
274
297
  video_path, game_line, next_line_time, anki_card_creation_time)
275
298
  if temporary:
276
299
  return ffmpeg.convert_audio_to_wav_lossless(trimmed_audio)
300
+ if not get_config().vad.do_vad_postprocessing:
301
+ return trimmed_audio, VADResult(True, start_time, end_time, "No VAD"), trimmed_audio, start_time, end_time
277
302
  vad_trimmed_audio = make_unique_file_name(
278
303
  f"{os.path.abspath(configuration.get_temporary_directory())}/{obs.get_current_game(sanitize=True)}.{get_config().audio.extension}")
279
304
  final_audio_output = make_unique_file_name(os.path.join(get_temporary_directory(),
@@ -283,6 +308,25 @@ class VideoToAudioHandler(FileSystemEventHandler):
283
308
  trimmed_audio, vad_trimmed_audio, game_line)
284
309
  if timing_only:
285
310
  return vad_result
311
+
312
+ if not vad_result.success:
313
+ if get_config().vad.add_audio_on_no_results:
314
+ logger.info("No voice activity detected, using full audio.")
315
+ vad_result.output_audio = trimmed_audio
316
+ elif get_config().vad.use_tts_as_fallback:
317
+ logger.info(
318
+ "No voice activity detected, using TTS as fallback.")
319
+ text_to_tts = full_text if full_text else game_line.text
320
+ url = get_config().vad.tts_url.replace("$s", text_to_tts)
321
+ tts_resp = requests.get(url)
322
+ if not tts_resp.ok:
323
+ logger.error(
324
+ f"Error fetching TTS audio from {url}. Is it running?: {tts_resp.status_code} {tts_resp.text}")
325
+ with tempfile.NamedTemporaryFile(dir=get_temporary_directory(), delete=False, suffix=".opus") as tmpfile:
326
+ tmpfile.write(tts_resp.content)
327
+ vad_result.output_audio = tmpfile.name
328
+ else:
329
+ logger.info(vad_result.trim_successful_string())
286
330
  if vad_result.output_audio:
287
331
  vad_trimmed_audio = vad_result.output_audio
288
332
  if get_config().audio.ffmpeg_reencode_options_to_use and os.path.exists(vad_trimmed_audio):
@@ -404,12 +448,13 @@ def open_multimine(icon, item):
404
448
 
405
449
 
406
450
  def exit_program(passed_icon, item):
407
- """Exit the application."""
408
- if not passed_icon:
409
- passed_icon = icon
410
- logger.info("Exiting...")
411
- passed_icon.stop()
412
- cleanup()
451
+ """Exit the application."""
452
+ if not passed_icon:
453
+ passed_icon = icon
454
+ logger.info("Exiting...")
455
+ passed_icon.stop()
456
+ cleanup()
457
+
413
458
 
414
459
  class GSMTray(threading.Thread):
415
460
  def __init__(self):
@@ -421,12 +466,11 @@ class GSMTray(threading.Thread):
421
466
  def run(self):
422
467
  self.run_tray()
423
468
 
424
-
425
469
  def run_tray(self):
426
470
  self.profile_menu = Menu(
427
471
  *[MenuItem(("Active: " if profile == get_master_config().current_profile else "") + profile, self.switch_profile) for
428
- profile in
429
- get_master_config().get_all_profile_names()]
472
+ profile in
473
+ get_master_config().get_all_profile_names()]
430
474
  )
431
475
 
432
476
  menu = Menu(
@@ -447,8 +491,8 @@ class GSMTray(threading.Thread):
447
491
  # Recreate the menu with the updated button text
448
492
  profile_menu = Menu(
449
493
  *[MenuItem(("Active: " if profile == get_master_config().current_profile else "") + profile, self.switch_profile) for
450
- profile in
451
- get_master_config().get_all_profile_names()]
494
+ profile in
495
+ get_master_config().get_all_profile_names()]
452
496
  )
453
497
 
454
498
  menu = Menu(
@@ -486,6 +530,7 @@ class GSMTray(threading.Thread):
486
530
  if self.icon:
487
531
  self.icon.stop()
488
532
 
533
+
489
534
  gsm_tray = GSMTray()
490
535
 
491
536
 
@@ -540,13 +585,13 @@ def cleanup():
540
585
  obs.disconnect_from_obs()
541
586
  if get_config().obs.close_obs:
542
587
  close_obs()
543
-
588
+
544
589
  if texthooking_page.websocket_server_threads:
545
590
  for thread in texthooking_page.websocket_server_threads:
546
591
  if thread and isinstance(thread, threading.Thread) and thread.is_alive():
547
592
  thread.stop_server()
548
593
  thread.join()
549
-
594
+
550
595
  proc: Popen
551
596
  for proc in procs_to_close:
552
597
  try:
@@ -568,7 +613,8 @@ def cleanup():
568
613
  if os.path.exists(video):
569
614
  os.remove(video)
570
615
  except Exception as e:
571
- logger.error(f"Error removing temporary video file {video}: {e}")
616
+ logger.error(
617
+ f"Error removing temporary video file {video}: {e}")
572
618
 
573
619
  settings_window.window.destroy()
574
620
  # time.sleep(5)
@@ -668,6 +714,9 @@ def async_loop():
668
714
  await register_scene_switcher_callback()
669
715
  await check_obs_folder_is_correct()
670
716
  vad_processor.init()
717
+ OverlayThread().start()
718
+
719
+ # Keep loop alive
671
720
  # if is_beangate:
672
721
  # await run_test_code()
673
722
 
@@ -713,8 +762,8 @@ async def run_test_code():
713
762
  if boxes:
714
763
  await texthooking_page.send_word_coordinates_to_overlay(boxes)
715
764
  await asyncio.sleep(2)
716
-
717
-
765
+
766
+
718
767
  async def check_if_script_is_running():
719
768
  """Check if the script is already running and kill it if so."""
720
769
  if os.path.exists(os.path.join(get_app_directory(), "current_pid.txt")):
@@ -722,14 +771,15 @@ async def check_if_script_is_running():
722
771
  pid = int(f.read().strip())
723
772
  if psutil.pid_exists(pid) and 'python' in psutil.Process(pid).name().lower():
724
773
  logger.info(f"Script is already running with PID: {pid}")
725
- psutil.Process(pid).terminate() # Attempt to terminate the existing process
774
+ # Attempt to terminate the existing process
775
+ psutil.Process(pid).terminate()
726
776
  logger.info("Sent SIGTERM to the existing process.")
727
777
  notification.send_error_notification(
728
778
  "Script was already running. Terminating the existing process.")
729
779
  return True
730
780
  return False
731
-
732
-
781
+
782
+
733
783
  async def log_current_pid():
734
784
  """Log the current process ID."""
735
785
  current_pid = os.getpid()
@@ -748,17 +798,17 @@ async def async_main(reloading=False):
748
798
  initialize_async()
749
799
  observer = Observer()
750
800
  observer.schedule(VideoToAudioHandler(),
751
- get_config().paths.folder_to_watch, recursive=False)
801
+ get_config().paths.folder_to_watch, recursive=False)
752
802
  observer.start()
753
803
  if is_windows():
754
804
  register_hotkeys()
755
-
805
+
756
806
  run_new_thread(initialize_text_monitor)
757
807
  run_new_thread(run_text_hooker_page)
758
808
  run_new_thread(async_loop).join()
759
-
809
+
760
810
  logger.info("Initialization complete. Happy Mining! がんばれ!")
761
-
811
+
762
812
  # await check_if_script_is_running()
763
813
  # await log_current_pid()
764
814
 
@@ -797,10 +847,9 @@ def main():
797
847
  handle_error_in_initialization(e)
798
848
 
799
849
 
800
-
801
850
  if __name__ == "__main__":
802
851
  logger.info("Starting GSM")
803
852
  try:
804
853
  asyncio.run(async_main())
805
854
  except Exception as e:
806
- handle_error_in_initialization(e)
855
+ handle_error_in_initialization(e)
@@ -268,6 +268,14 @@
268
268
  "use_cpu_for_inference": {
269
269
  "label": "Force CPU:",
270
270
  "tooltip": "Even if CUDA is installed, use CPU for Whisper"
271
+ },
272
+ "use_tts_as_fallback": {
273
+ "label": "Use TTS as Fallback:",
274
+ "tooltip": "Use Text-to-Speech as a fallback when no audio is found."
275
+ },
276
+ "tts_url": {
277
+ "label": "TTS URL:",
278
+ "tooltip": "URL for the Text-to-Speech service. Use $s as a placeholder for the text."
271
279
  }
272
280
  },
273
281
  "features": {
@@ -576,6 +584,14 @@
576
584
  "overlay_engine": {
577
585
  "label": "Overlay Engine:",
578
586
  "tooltip": "Select the OCR engine for the overlay. If you use lens, and are on windows, it will use OneOCR to optimize the scan."
587
+ },
588
+ "periodic": {
589
+ "label": "Periodic Capture:",
590
+ "tooltip": "Enable periodic capture of the screen for Overlay. Note, you still need text flowing into GSM for mining to work."
591
+ },
592
+ "periodic_interval": {
593
+ "label": "Capture Interval (Seconds):",
594
+ "tooltip": "Interval in seconds for periodic screen capture."
579
595
  }
580
596
  },
581
597
  "wip": {
@@ -267,6 +267,14 @@
267
267
  "use_cpu_for_inference": {
268
268
  "label": "CPU強制使用:",
269
269
  "tooltip": "CUDAがインストールされていてもWhisperでCPUを使用します"
270
+ },
271
+ "use_tts_as_fallback": {
272
+ "label": "TTSをフォールバックとして使用:",
273
+ "tooltip": "音声が見つからない場合にテキスト読み上げをフォールバックとして使用します。"
274
+ },
275
+ "tts_url": {
276
+ "label": "TTS URL:",
277
+ "tooltip": "テキスト読み上げサービスのURL。テキストのプレースホルダーとして$sを使用します。"
270
278
  }
271
279
  },
272
280
  "features": {
@@ -575,6 +583,14 @@
575
583
  "overlay_engine": {
576
584
  "label": "オーバーレイエンジン:",
577
585
  "tooltip": "オーバーレイのOCRエンジンを選択します。Lensを使用していてWindowsの場合、スキャンを最適化するためにOneOCRを使用します。"
586
+ },
587
+ "periodic": {
588
+ "label": "定期キャプチャ:",
589
+ "tooltip": "OCR処理のために画面を定期的にキャプチャするかどうか。"
590
+ },
591
+ "periodic_interval": {
592
+ "label": "キャプチャ間隔(秒):",
593
+ "tooltip": "定期的な画面キャプチャの間隔(秒単位)。"
578
594
  }
579
595
  },
580
596
  "wip": {
@@ -268,6 +268,14 @@
268
268
  "use_cpu_for_inference": {
269
269
  "label": "强制使用 CPU:",
270
270
  "tooltip": "即使已安装 CUDA,也强制使用 CPU 运行 Whisper"
271
+ },
272
+ "use_tts_as_fallback": {
273
+ "label": "使用 TTS 作为后备:",
274
+ "tooltip": "在未找到音频时使用文本转语音作为后备。"
275
+ },
276
+ "tts_url": {
277
+ "label": "TTS URL:",
278
+ "tooltip": "文本转语音服务的 URL。使用 $s 作为文本的占位符。"
271
279
  }
272
280
  },
273
281
  "features": {
@@ -564,6 +572,14 @@
564
572
  "overlay_engine": {
565
573
  "label": "覆盖层引擎:",
566
574
  "tooltip": "为覆盖层选择 OCR 引擎。如果您使用的是 lens,并且在 windows 上,它将使用 OneOCR 来优化扫描。"
575
+ },
576
+ "periodic": {
577
+ "label": "定期捕获:",
578
+ "tooltip": "启用定期屏幕捕获以进行 OCR 处理。"
579
+ },
580
+ "periodic_interval": {
581
+ "label": "捕获间隔(秒):",
582
+ "tooltip": "定期屏幕捕获的时间间隔(秒)。"
567
583
  }
568
584
  },
569
585
  "wip": {
@@ -558,9 +558,12 @@ class VAD:
558
558
  trim_beginning: bool = False
559
559
  beginning_offset: float = -0.25
560
560
  add_audio_on_no_results: bool = False
561
+ use_tts_as_fallback: bool = False
562
+ tts_url: str = 'http://127.0.0.1:5050/?term=$s'
561
563
  cut_and_splice_segments: bool = False
562
564
  splice_padding: float = 0.1
563
565
  use_cpu_for_inference: bool = False
566
+ use_vad_filter_for_whisper: bool = True
564
567
 
565
568
  def is_silero(self):
566
569
  return self.selected_vad_model == SILERO or self.backup_vad_model == SILERO
@@ -641,6 +644,8 @@ class Overlay:
641
644
  websocket_port: int = 55499
642
645
  engine: str = OverlayEngine.LENS.value
643
646
  monitor_to_capture: int = 0
647
+ periodic: bool = False
648
+ periodic_interval: float = 1.0
644
649
 
645
650
  def __post_init__(self):
646
651
  if self.monitor_to_capture == -1: