GameSentenceMiner 2.15.4__tar.gz → 2.15.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/anki.py +24 -25
  2. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/gsm.py +2 -2
  3. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/obs.py +8 -47
  4. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/owocr_helper.py +79 -14
  5. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/ocr.py +15 -3
  6. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/run.py +82 -60
  7. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/vad.py +9 -3
  8. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/PKG-INFO +3 -2
  9. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/requires.txt +1 -0
  10. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/PKG-INFO +3 -2
  11. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/README.md +1 -1
  12. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/pyproject.toml +3 -2
  13. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/__init__.py +0 -0
  14. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ai/__init__.py +0 -0
  15. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ai/ai_prompting.py +0 -0
  16. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/__init__.py +0 -0
  17. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon.png +0 -0
  18. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon128.png +0 -0
  19. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon256.png +0 -0
  20. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon32.png +0 -0
  21. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon512.png +0 -0
  22. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/icon64.png +0 -0
  23. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/assets/pickaxe.png +0 -0
  24. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/config_gui.py +0 -0
  25. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/gametext.py +0 -0
  26. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/locales/en_us.json +0 -0
  27. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/locales/ja_jp.json +0 -0
  28. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/locales/zh_cn.json +0 -0
  29. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/__init__.py +0 -0
  30. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/gsm_ocr_config.py +0 -0
  31. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/ocrconfig.py +0 -0
  32. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/owocr_area_selector.py +0 -0
  33. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/ss_picker.py +0 -0
  34. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/__init__.py +0 -0
  35. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/__main__.py +0 -0
  36. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/config.py +0 -0
  37. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/lens_betterproto.py +0 -0
  38. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +0 -0
  39. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/tools/__init__.py +0 -0
  40. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/tools/audio_offset_selector.py +0 -0
  41. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/tools/furigana_filter_preview.py +0 -0
  42. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/tools/ss_selector.py +0 -0
  43. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/tools/window_transparency.py +0 -0
  44. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/__init__.py +0 -0
  45. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/communication/__init__.py +0 -0
  46. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/communication/send.py +0 -0
  47. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/communication/websocket.py +0 -0
  48. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/configuration.py +0 -0
  49. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/db.py +0 -0
  50. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/downloader/Untitled_json.py +0 -0
  51. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/downloader/__init__.py +0 -0
  52. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/downloader/download_tools.py +0 -0
  53. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/downloader/oneocr_dl.py +0 -0
  54. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/electron_config.py +0 -0
  55. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/ffmpeg.py +0 -0
  56. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/get_overlay_coords.py +0 -0
  57. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/gsm_utils.py +0 -0
  58. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/model.py +0 -0
  59. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/notification.py +0 -0
  60. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/text_log.py +0 -0
  61. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/win10toast/__init__.py +0 -0
  62. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/util/win10toast/__main__.py +0 -0
  63. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/__init__.py +0 -0
  64. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/service.py +0 -0
  65. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/__init__.py +0 -0
  66. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  67. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  68. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/favicon.ico +0 -0
  69. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/favicon.svg +0 -0
  70. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/site.webmanifest +0 -0
  71. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/style.css +0 -0
  72. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  73. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  74. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/templates/__init__.py +0 -0
  75. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/templates/index.html +0 -0
  76. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/templates/text_replacements.html +0 -0
  77. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/templates/utility.html +0 -0
  78. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/web/texthooking_page.py +0 -0
  79. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/wip/__init___.py +0 -0
  80. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/SOURCES.txt +0 -0
  81. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/dependency_links.txt +0 -0
  82. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/entry_points.txt +0 -0
  83. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/top_level.txt +0 -0
  84. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/LICENSE +0 -0
  85. {gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/setup.cfg +0 -0
@@ -280,31 +280,30 @@ def get_initial_card_info(last_note: AnkiCard, selected_lines):
280
280
 
281
281
  if get_config().overlay.websocket_port and texthooking_page.overlay_server_thread.has_clients():
282
282
  sentence_in_anki = last_note.get_field(get_config().anki.sentence_field).replace("\n", "").replace("\r", "").strip()
283
- if lines_match(game_line.text, remove_html_and_cloze_tags(sentence_in_anki)):
284
- logger.info("Found matching line in Anki, Preserving HTML and fix spacing!")
285
- if "<b>" in sentence_in_anki:
286
- text_inside_bold = re.findall(r'<b>(.*?)</b>', sentence_in_anki)
287
- logger.info(text_inside_bold)
288
- if text_inside_bold:
289
- text = text_inside_bold[0].replace(" ", "").replace('\n', '').strip()
290
- note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_bold[0], f"<b>{text}</b>")
291
- logger.info(f"Preserved bold Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
292
- if "<i>" in sentence_in_anki:
293
- text_inside_italic = re.findall(r'<i>(.*?)</i>', sentence_in_anki)
294
- if text_inside_italic:
295
- text = text_inside_italic[0].replace(" ", "").replace('\n', '').strip()
296
- note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_italic[0], f"<i>{text}</i>")
297
- logger.info(f"Preserved italic Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
298
- if "<u>" in sentence_in_anki:
299
- text_inside_underline = re.findall(r'<u>(.*?)</u>', sentence_in_anki)
300
- if text_inside_underline:
301
- text = text_inside_underline[0].replace(" ", "").replace('\n', '').strip()
302
- note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_underline[0], f"<u>{text}</u>")
303
- logger.info(f"Preserved underline Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
304
-
305
- if get_config().anki.sentence_field not in note['fields']:
306
- logger.info("No HTML tags found to preserve, just fixing spacing")
307
- note['fields'][get_config().anki.sentence_field] = game_line.text
283
+ logger.info("Found matching line in Anki, Preserving HTML and fix spacing!")
284
+ if "<b>" in sentence_in_anki:
285
+ text_inside_bold = re.findall(r'<b>(.*?)</b>', sentence_in_anki)
286
+ logger.info(text_inside_bold)
287
+ if text_inside_bold:
288
+ text = text_inside_bold[0].replace(" ", "").replace('\n', '').strip()
289
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_bold[0], f"<b>{text}</b>")
290
+ logger.info(f"Preserved bold Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
291
+ if "<i>" in sentence_in_anki:
292
+ text_inside_italic = re.findall(r'<i>(.*?)</i>', sentence_in_anki)
293
+ if text_inside_italic:
294
+ text = text_inside_italic[0].replace(" ", "").replace('\n', '').strip()
295
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_italic[0], f"<i>{text}</i>")
296
+ logger.info(f"Preserved italic Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
297
+ if "<u>" in sentence_in_anki:
298
+ text_inside_underline = re.findall(r'<u>(.*?)</u>', sentence_in_anki)
299
+ if text_inside_underline:
300
+ text = text_inside_underline[0].replace(" ", "").replace('\n', '').strip()
301
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_underline[0], f"<u>{text}</u>")
302
+ logger.info(f"Preserved underline Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
303
+
304
+ if get_config().anki.sentence_field not in note['fields']:
305
+ logger.info("No HTML tags found to preserve, just fixing spacing")
306
+ note['fields'][get_config().anki.sentence_field] = game_line.text
308
307
  if selected_lines:
309
308
  try:
310
309
  sentence_in_anki = last_note.get_field(get_config().anki.sentence_field)
@@ -663,10 +663,10 @@ def initialize_text_monitor():
663
663
 
664
664
  def async_loop():
665
665
  async def loop():
666
- await obs.connect_to_obs()
666
+ logger.info("Post-Initialization started.")
667
+ await obs.connect_to_obs(connections=3, check_output=True)
667
668
  await register_scene_switcher_callback()
668
669
  await check_obs_folder_is_correct()
669
- logger.info("Post-Initialization started.")
670
670
  vad_processor.init()
671
671
  # if is_beangate:
672
672
  # await run_test_code()
@@ -101,7 +101,7 @@ class OBSConnectionPool:
101
101
 
102
102
 
103
103
  class OBSConnectionManager(threading.Thread):
104
- def __init__(self, check_output=True):
104
+ def __init__(self, check_output=False):
105
105
  super().__init__()
106
106
  self.daemon = True
107
107
  self.running = True
@@ -261,7 +261,7 @@ def get_obs_websocket_config_values():
261
261
  full_config.save()
262
262
  reload_config()
263
263
 
264
- async def connect_to_obs(retry=5, check_output=True):
264
+ async def connect_to_obs(retry=5, connections=2, check_output=False):
265
265
  global connection_pool, obs_connection_manager, event_client, connecting
266
266
  if is_windows():
267
267
  get_obs_websocket_config_values()
@@ -275,7 +275,7 @@ async def connect_to_obs(retry=5, check_output=True):
275
275
  'password': get_config().obs.password,
276
276
  'timeout': 3,
277
277
  }
278
- connection_pool = OBSConnectionPool(size=3, **pool_kwargs)
278
+ connection_pool = OBSConnectionPool(size=connections, **pool_kwargs)
279
279
  connection_pool.connect_all()
280
280
 
281
281
  with connection_pool.get_client() as client:
@@ -306,46 +306,8 @@ async def connect_to_obs(retry=5, check_output=True):
306
306
  retry -= 1
307
307
  connecting = False
308
308
 
309
- def connect_to_obs_sync(retry=2, check_output=True):
310
- global connection_pool, obs_connection_manager, event_client
311
- if is_windows():
312
- get_obs_websocket_config_values()
313
-
314
- while True:
315
- try:
316
- pool_kwargs = {
317
- 'host': get_config().obs.host,
318
- 'port': get_config().obs.port,
319
- 'password': get_config().obs.password,
320
- 'timeout': 3,
321
- }
322
- connection_pool = OBSConnectionPool(size=5, **pool_kwargs)
323
- connection_pool.connect_all()
324
-
325
- with connection_pool.get_client() as client:
326
- client.get_version() # Test one connection to confirm it works
327
-
328
- event_client = obs.EventClient(
329
- host=get_config().obs.host,
330
- port=get_config().obs.port,
331
- password=get_config().obs.password,
332
- timeout=1,
333
- )
334
- if not obs_connection_manager:
335
- obs_connection_manager = OBSConnectionManager(check_output=check_output)
336
- obs_connection_manager.start()
337
- update_current_game()
338
- logger.info("Connected to OBS WebSocket.")
339
- break # Exit the loop once connected
340
- except Exception as e:
341
- if retry <= 0:
342
- gsm_status.obs_connected = False
343
- logger.error(f"Failed to connect to OBS WebSocket: {e}")
344
- connection_pool = None
345
- event_client = None
346
- break
347
- time.sleep(1)
348
- retry -= 1
309
+ def connect_to_obs_sync(retry=2, connections=2, check_output=False):
310
+ asyncio.run(connect_to_obs(retry=retry, connections=connections, check_output=check_output))
349
311
 
350
312
 
351
313
  def disconnect_from_obs():
@@ -419,14 +381,13 @@ def stop_replay_buffer():
419
381
  logger.warning(f"Error stopping replay buffer: {e}")
420
382
 
421
383
  def save_replay_buffer():
422
- status = get_replay_buffer_status()
423
- if status:
384
+ try:
424
385
  with connection_pool.get_client() as client:
425
386
  response = client.save_replay_buffer()
426
387
  if response and response.ok:
427
388
  logger.info("Replay buffer saved. If your log stops here, make sure your obs output path matches \"Path To Watch\" in GSM settings.")
428
- else:
429
- raise Exception("Replay Buffer is not active, could not save Replay Buffer!")
389
+ except Exception as e:
390
+ raise Exception(f"Error saving replay buffer: {e}")
430
391
 
431
392
  def get_current_scene():
432
393
  try:
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ from copy import copy
2
3
  import io
3
4
  import json
4
5
  import logging
@@ -376,18 +377,19 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
376
377
  return
377
378
  previous_orig_text = orig_text_string
378
379
  previous_ocr1_result = previous_text
379
- if crop_coords and get_ocr_optimize_second_scan():
380
- x1, y1, x2, y2 = crop_coords
381
- x1 = max(0, min(x1, img.width))
382
- y1 = max(0, min(y1, img.height))
383
- x2 = max(x1, min(x2, img.width))
384
- y2 = max(y1, min(y2, img.height))
385
- previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
386
- try:
387
- previous_img_local = previous_img_local.crop((x1, y1, x2, y2))
388
- except ValueError:
389
- logger.warning("Error cropping image, using original image")
390
- second_ocr_queue.put((previous_text, stable_time, previous_img_local, filtering, pre_crop_image))
380
+ ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2())
381
+ # if crop_coords and get_ocr_optimize_second_scan():
382
+ # x1, y1, x2, y2 = crop_coords
383
+ # x1 = max(0, min(x1, img.width))
384
+ # y1 = max(0, min(y1, img.height))
385
+ # x2 = max(x1, min(x2, img.width))
386
+ # y2 = max(y1, min(y2, img.height))
387
+ # previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
388
+ # try:
389
+ # previous_img_local = previous_img_local.crop((x1, y1, x2, y2))
390
+ # except ValueError:
391
+ # logger.warning("Error cropping image, using original image")
392
+ second_ocr_queue.put((previous_text, stable_time, ocr2_image, filtering, pre_crop_image))
391
393
  # threading.Thread(target=do_second_ocr, args=(previous_text, stable_time, previous_img_local, filtering), daemon=True).start()
392
394
  previous_img = None
393
395
  previous_text = None
@@ -412,6 +414,69 @@ done = False
412
414
  # Create a queue for tasks
413
415
  second_ocr_queue = queue.Queue()
414
416
 
417
+ def get_ocr2_image(crop_coords, og_image, ocr2_engine=None):
418
+ """
419
+ Returns the image to use for the second OCR pass, cropping and scaling as needed.
420
+ Logic is unchanged, but code is refactored for clarity and maintainability.
421
+ """
422
+ def return_original_image():
423
+ logger.info("Returning original image for OCR2 (no cropping or optimization).")
424
+ if not crop_coords or not get_ocr_optimize_second_scan():
425
+ return og_image
426
+ x1, y1, x2, y2 = crop_coords
427
+ x1 = max(0, min(x1, og_image.width))
428
+ y1 = max(0, min(y1, og_image.height))
429
+ x2 = max(x1, min(x2, og_image.width))
430
+ y2 = max(y1, min(y2, og_image.height))
431
+ og_image.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
432
+ return og_image.crop((x1, y1, x2, y2))
433
+
434
+ LOCAL_OCR_ENGINES = ['easyocr', 'oneocr', 'rapidocr', 'mangaocr', 'winrtocr']
435
+ local_ocr = ocr2_engine in LOCAL_OCR_ENGINES
436
+ ocr_config_local = copy(ocr_config)
437
+
438
+ # Non-local OCR: just crop the original image if needed
439
+ if not local_ocr:
440
+ return return_original_image()
441
+
442
+ # Local OCR: get fresh screenshot and apply config/cropping
443
+ obs_width = getattr(run.obs_screenshot_thread, 'width', None)
444
+ obs_height = getattr(run.obs_screenshot_thread, 'height', None)
445
+ if not obs_width or not obs_height:
446
+ return return_original_image()
447
+ logger.debug(f"Getting OCR2 image with OBS dimensions: {obs_width}x{obs_height}")
448
+
449
+ img = obs.get_screenshot_PIL(compression=100, img_format="jpg")
450
+ ocr_config_local.scale_to_custom_size(img.width, img.height)
451
+
452
+ # If no crop or optimization, just apply config and return
453
+ if not crop_coords or not get_ocr_optimize_second_scan():
454
+ img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=True)
455
+ return img
456
+
457
+ # Calculate scaling ratios
458
+ width_ratio = img.width / obs_width if obs_width else 1
459
+ height_ratio = img.height / obs_height if obs_height else 1
460
+ logger.debug(f"Cropping OCR2 image with crop coordinates: {crop_coords} and ratios: {width_ratio}, {height_ratio}")
461
+
462
+ # Scale crop_coords
463
+ x1 = int(crop_coords[0] * width_ratio)
464
+ y1 = int(crop_coords[1] * height_ratio)
465
+ x2 = int(crop_coords[2] * width_ratio)
466
+ y2 = int(crop_coords[3] * height_ratio)
467
+ logger.debug(f"Scaled crop coordinates: {(x1, y1, x2, y2)}")
468
+
469
+ # Clamp coordinates to image bounds
470
+ x1 = max(0, min(x1, img.width))
471
+ y1 = max(0, min(y1, img.height))
472
+ x2 = max(x1, min(x2, img.width))
473
+ y2 = max(y1, min(y2, img.height))
474
+
475
+ img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=False)
476
+
477
+
478
+ return img.crop((x1, y1, x2, y2))
479
+
415
480
  def process_task_queue():
416
481
  while True:
417
482
  try:
@@ -456,7 +521,7 @@ def run_oneocr(ocr_config: OCRConfig, rectangles, config_check_thread):
456
521
  gsm_ocr_config=ocr_config,
457
522
  screen_capture_areas=screen_areas,
458
523
  furigana_filter_sensitivity=furigana_filter_sensitivity,
459
- screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None,
524
+ screen_capture_combo=manual_ocr_hotkey.upper() if manual_ocr_hotkey and manual else None,
460
525
  config_check_thread=config_check_thread)
461
526
  except Exception as e:
462
527
  logger.exception(f"Error running OneOCR: {e}")
@@ -530,7 +595,7 @@ def set_force_stable_hotkey():
530
595
 
531
596
  if __name__ == "__main__":
532
597
  try:
533
- global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr
598
+ global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr, manual
534
599
  import sys
535
600
 
536
601
  import argparse
@@ -1243,7 +1243,7 @@ class OCRSpace:
1243
1243
  class GeminiOCR:
1244
1244
  name = 'gemini'
1245
1245
  readable_name = 'Gemini'
1246
- key = 'm'
1246
+ key = ';'
1247
1247
  available = False
1248
1248
 
1249
1249
  def __init__(self, config={'api_key': None}, lang='ja'):
@@ -1433,10 +1433,14 @@ class localLLMOCR:
1433
1433
  self.keep_warm = config.get('keep_warm', True)
1434
1434
  self.custom_prompt = config.get('prompt', None)
1435
1435
  self.available = True
1436
+ if not self.check_url_for_connectivity(self.api_url):
1437
+ self.available = False
1438
+ logger.warning(f'Local LLM OCR API URL not reachable: {self.api_url}')
1439
+ return
1436
1440
  self.client = openai.OpenAI(
1437
1441
  base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
1438
1442
  api_key=self.api_key,
1439
- timeout=3
1443
+ timeout=1
1440
1444
  )
1441
1445
  if self.client.models.retrieve(self.model):
1442
1446
  self.model = self.model
@@ -1446,7 +1450,15 @@ class localLLMOCR:
1446
1450
  self.keep_llm_hot_thread.start()
1447
1451
  except Exception as e:
1448
1452
  logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
1449
-
1453
+
1454
+ def check_url_for_connectivity(self, url):
1455
+ import requests
1456
+ try:
1457
+ response = requests.get(url, timeout=0.5)
1458
+ return response.status_code == 200
1459
+ except Exception:
1460
+ return False
1461
+
1450
1462
  def keep_llm_warm(self):
1451
1463
  def ocr_blank_black_image():
1452
1464
  if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:
@@ -42,6 +42,7 @@ import socketserver
42
42
  import cv2
43
43
  import numpy as np
44
44
 
45
+ from collections import deque
45
46
  from datetime import datetime, timedelta
46
47
  from PIL import Image, ImageDraw
47
48
  from loguru import logger
@@ -337,6 +338,7 @@ class TextFiltering:
337
338
  self.thai_regex = re.compile(r'[\u0E00-\u0E7F]')
338
339
  self.latin_extended_regex = re.compile(
339
340
  r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
341
+ self.last_few_results = {}
340
342
  try:
341
343
  from transformers import pipeline, AutoTokenizer
342
344
  import torch
@@ -361,7 +363,7 @@ class TextFiltering:
361
363
  import langid
362
364
  self.classify = langid.classify
363
365
 
364
- def __call__(self, text, last_result):
366
+ def __call__(self, text, last_result, engine=None, is_second_ocr=False):
365
367
  lang = get_ocr_language()
366
368
  if self.initial_lang != lang:
367
369
  from pysbd import Segmenter
@@ -402,11 +404,24 @@ class TextFiltering:
402
404
 
403
405
  try:
404
406
  if isinstance(last_result, list):
405
- last_text = last_result
407
+ last_text = last_result.copy()
406
408
  elif last_result and last_result[1] == engine_index:
407
409
  last_text = last_result[0]
408
410
  else:
409
411
  last_text = []
412
+
413
+ if engine and not is_second_ocr:
414
+ if self.last_few_results and self.last_few_results.get(engine):
415
+ for sublist in self.last_few_results.get(engine, []):
416
+ if sublist:
417
+ for item in sublist:
418
+ if item and item not in last_text:
419
+ last_text.append(item)
420
+ self.last_few_results[engine].append(orig_text_filtered)
421
+ else:
422
+ self.last_few_results[engine] = deque(maxlen=3)
423
+ self.last_few_results[engine].append(orig_text_filtered)
424
+
410
425
  except Exception as e:
411
426
  logger.error(f"Error processing last_result {last_result}: {e}")
412
427
  last_text = []
@@ -981,7 +996,7 @@ def quick_text_detection(pil_image, threshold_ratio=0.01):
981
996
 
982
997
  # Use OBS for Screenshot Source (i.e. Linux)
983
998
  class OBSScreenshotThread(threading.Thread):
984
- def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
999
+ def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1, is_manual_ocr=False):
985
1000
  super().__init__(daemon=True)
986
1001
  self.ocr_config = ocr_config
987
1002
  self.interval = interval
@@ -992,6 +1007,7 @@ class OBSScreenshotThread(threading.Thread):
992
1007
  self.width = width
993
1008
  self.height = height
994
1009
  self.use_periodic_queue = not screen_capture_on_combo
1010
+ self.is_manual_ocr = is_manual_ocr
995
1011
 
996
1012
  def write_result(self, result):
997
1013
  if self.use_periodic_queue:
@@ -1003,62 +1019,26 @@ class OBSScreenshotThread(threading.Thread):
1003
1019
  def connect_obs(self):
1004
1020
  import GameSentenceMiner.obs as obs
1005
1021
  obs.connect_to_obs_sync(check_output=False)
1006
-
1007
- def scale_down_width_height(self, width, height):
1008
- if width == 0 or height == 0:
1009
- return self.width, self.height
1010
- # return width, height
1011
- aspect_ratio = width / height
1012
- logger.info(
1013
- f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
1014
- if aspect_ratio > 2.66:
1015
- # Ultra-wide (32:9) - use 1920x540
1016
- logger.info("Using ultra-wide aspect ratio scaling (32:9).")
1017
- return 1920, 540
1018
- elif aspect_ratio > 2.33:
1019
- # 21:9 - use 1920x800
1020
- logger.info("Using ultra-wide aspect ratio scaling (21:9).")
1021
- return 1920, 800
1022
- elif aspect_ratio > 1.77:
1023
- # 16:9 - use 1280x720
1024
- logger.info("Using standard aspect ratio scaling (16:9).")
1025
- return 1280, 720
1026
- elif aspect_ratio > 1.6:
1027
- # 16:10 - use 1280x800
1028
- logger.info("Using standard aspect ratio scaling (16:10).")
1029
- return 1280, 800
1030
- elif aspect_ratio > 1.33:
1031
- # 4:3 - use 960x720
1032
- logger.info("Using standard aspect ratio scaling (4:3).")
1033
- return 960, 720
1034
- elif aspect_ratio > 1.25:
1035
- # 5:4 - use 900x720
1036
- logger.info("Using standard aspect ratio scaling (5:4).")
1037
- return 900, 720
1038
- elif aspect_ratio > 1.5:
1039
- # 3:2 - use 1080x720
1040
- logger.info("Using standard aspect ratio scaling (3:2).")
1041
- return 1080, 720
1042
- else:
1043
- # Default fallback - use original resolution
1044
- logger.info(
1045
- "Using default aspect ratio scaling (original resolution).")
1046
- return width, height
1047
1022
 
1048
1023
  def init_config(self, source=None, scene=None):
1049
1024
  import GameSentenceMiner.obs as obs
1050
1025
  obs.update_current_game()
1051
1026
  self.current_source = source if source else obs.get_active_source()
1052
- logger.info(f"Current OBS source: {self.current_source}")
1027
+ logger.debug(f"Current OBS source: {self.current_source}")
1053
1028
  self.source_width = self.current_source.get(
1054
1029
  "sceneItemTransform").get("sourceWidth") or self.width
1055
1030
  self.source_height = self.current_source.get(
1056
1031
  "sceneItemTransform").get("sourceHeight") or self.height
1057
- if self.source_width and self.source_height:
1058
- self.width, self.height = self.scale_down_width_height(
1032
+ if self.source_width and self.source_height and not self.is_manual_ocr and not get_ocr_two_pass_ocr():
1033
+ self.width, self.height = scale_down_width_height(
1059
1034
  self.source_width, self.source_height)
1060
1035
  logger.info(
1061
- f"Using OBS source dimensions: {self.width}x{self.height}")
1036
+ f"Using OBS source dimensions: {self.source_width}x{self.source_height}")
1037
+ else:
1038
+ self.width = self.source_width or 1280
1039
+ self.height = self.source_height or 720
1040
+ logger.info(
1041
+ f"Using source dimensions: {self.width}x{self.height}")
1062
1042
  self.current_source_name = self.current_source.get(
1063
1043
  "sourceName") or None
1064
1044
  self.current_scene = scene if scene else obs.get_current_game()
@@ -1105,7 +1085,7 @@ class OBSScreenshotThread(threading.Thread):
1105
1085
  self.write_result(1)
1106
1086
  continue
1107
1087
  img = obs.get_screenshot_PIL(source_name=self.current_source_name,
1108
- width=self.width, height=self.height, img_format='jpg', compression=80)
1088
+ width=self.width, height=self.height, img_format='jpg', compression=100)
1109
1089
 
1110
1090
  img = apply_ocr_config_to_image(img, self.ocr_config)
1111
1091
 
@@ -1120,6 +1100,39 @@ class OBSScreenshotThread(threading.Thread):
1120
1100
  f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
1121
1101
  time.sleep(.5)
1122
1102
  continue
1103
+
1104
+ def scale_down_width_height(width, height):
1105
+ if width == 0 or height == 0:
1106
+ return width, height
1107
+ # return width, height
1108
+ aspect_ratio = width / height
1109
+ logger.info(
1110
+ f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
1111
+ if aspect_ratio > 2.66:
1112
+ logger.info("Using ultra-wide aspect ratio scaling (32:9).")
1113
+ return 1920, 540
1114
+ elif aspect_ratio > 2.33:
1115
+ logger.info("Using ultra-wide aspect ratio scaling (21:9).")
1116
+ return 1920, 800
1117
+ elif aspect_ratio > 1.77:
1118
+ logger.info("Using standard aspect ratio scaling (16:9).")
1119
+ return 1280, 720
1120
+ elif aspect_ratio > 1.6:
1121
+ logger.info("Using standard aspect ratio scaling (16:10).")
1122
+ return 1280, 800
1123
+ elif aspect_ratio > 1.33:
1124
+ logger.info("Using standard aspect ratio scaling (4:3).")
1125
+ return 960, 720
1126
+ elif aspect_ratio > 1.25:
1127
+ logger.info("Using standard aspect ratio scaling (5:4).")
1128
+ return 900, 720
1129
+ elif aspect_ratio > 1.5:
1130
+ logger.info("Using standard aspect ratio scaling (3:2).")
1131
+ return 1080, 720
1132
+ else:
1133
+ logger.info(
1134
+ "Using default aspect ratio scaling (original resolution).")
1135
+ return width, height
1123
1136
 
1124
1137
 
1125
1138
  def apply_ocr_config_to_image(img, ocr_config, is_secondary=False):
@@ -1317,8 +1330,10 @@ def do_configured_ocr_replacements(text: str) -> str:
1317
1330
  return do_text_replacements(text, OCR_REPLACEMENTS_FILE)
1318
1331
 
1319
1332
 
1320
- def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering=None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
1333
+ def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering: TextFiltering = None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
1321
1334
  global engine_index
1335
+ # TODO Replace this at a later date
1336
+ is_second_ocr = bool(engine)
1322
1337
  if auto_pause_handler:
1323
1338
  auto_pause_handler.stop()
1324
1339
  if engine:
@@ -1328,9 +1343,10 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1328
1343
  break
1329
1344
  else:
1330
1345
  engine_instance = engine_instances[engine_index]
1346
+ engine = engine_instance.name
1331
1347
 
1332
1348
  engine_color = config.get_general('engine_color')
1333
-
1349
+
1334
1350
  start_time = time.time()
1335
1351
  result = engine_instance(img_or_path, furigana_filter_sensitivity)
1336
1352
  res, text, crop_coords = (*result, None)[:3]
@@ -1362,7 +1378,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1362
1378
  if res:
1363
1379
  text = do_configured_ocr_replacements(text)
1364
1380
  if filtering:
1365
- text, orig_text = filtering(text, last_result)
1381
+ text, orig_text = filtering(text, last_result, engine=engine, is_second_ocr=is_second_ocr)
1366
1382
  if get_ocr_language() == "ja" or get_ocr_language() == "zh":
1367
1383
  text = post_process(text, keep_blank_lines=get_ocr_keep_newline())
1368
1384
  if notify and config.get_general('notifications'):
@@ -1382,7 +1398,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1382
1398
  pyperclipfix.copy(text)
1383
1399
  elif write_to == "callback":
1384
1400
  txt_callback(text, orig_text, ocr_start_time,
1385
- img_or_path, bool(engine), filtering, crop_coords)
1401
+ img_or_path, is_second_ocr, filtering, crop_coords)
1386
1402
  elif write_to:
1387
1403
  with Path(write_to).open('a', encoding='utf-8') as f:
1388
1404
  f.write(text + '\n')
@@ -1404,7 +1420,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
1404
1420
  This function checks if the detected text area falls entirely within secondary rectangles (menu areas).
1405
1421
 
1406
1422
  :param text: The recognized text from OCR.
1407
- :param crop_coords: Tuple containing (x, y, width, height) of the detected text area relative to the cropped image.
1423
+ :param crop_coords: Tuple containing (x, y, x2, y2) of the detected text area relative to the cropped image.
1408
1424
  :return: True if the text is all menu items (within secondary rectangles), False otherwise.
1409
1425
  """
1410
1426
  if not text or not crop_coords:
@@ -1412,7 +1428,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
1412
1428
 
1413
1429
  original_width = obs_screenshot_thread.width
1414
1430
  original_height = obs_screenshot_thread.height
1415
- crop_x, crop_y, crop_w, crop_h = crop_coords
1431
+ crop_x, crop_y, crop_x2, crop_y2 = crop_coords
1416
1432
 
1417
1433
  ocr_config = get_scene_ocr_config()
1418
1434
 
@@ -1430,14 +1446,14 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
1430
1446
  return False
1431
1447
 
1432
1448
  if not primary_rectangles:
1433
- if crop_x < 0 or crop_y < 0 or crop_x + crop_w > original_width or crop_y + crop_h > original_height:
1449
+ if crop_x < 0 or crop_y < 0 or crop_x2 > original_width or crop_y2 > original_height:
1434
1450
  return False
1435
1451
  for menu_rect in menu_rectangles:
1436
1452
  rect_left, rect_top, rect_width, rect_height = menu_rect.coordinates
1437
1453
  rect_right = rect_left + rect_width
1438
1454
  rect_bottom = rect_top + rect_height
1439
1455
  if (crop_x >= rect_left and crop_y >= rect_top and
1440
- crop_x + crop_w <= rect_right and crop_y + crop_h <= rect_bottom):
1456
+ crop_x2 <= rect_right and crop_y2 <= rect_bottom):
1441
1457
  return True
1442
1458
  return False
1443
1459
 
@@ -1445,19 +1461,25 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
1445
1461
 
1446
1462
  if len(primary_rectangles) == 1:
1447
1463
  primary_rect = primary_rectangles[0]
1448
- primary_left, primary_top = primary_rect.coordinates[0], primary_rect.coordinates[1]
1464
+ primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
1449
1465
  original_x = crop_x + primary_left
1450
1466
  original_y = crop_y + primary_top
1467
+ original_x2 = crop_x2 + primary_left
1468
+ original_y2 = crop_y2 + primary_top
1451
1469
  else:
1452
1470
  current_y_offset = 0
1453
1471
  original_x = None
1454
1472
  original_y = None
1473
+ original_x2 = None
1474
+ original_y2 = None
1455
1475
  for i, primary_rect in enumerate(primary_rectangles):
1456
1476
  primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
1457
1477
  section_height = primary_height
1458
1478
  if crop_y >= current_y_offset and crop_y < current_y_offset + section_height:
1459
1479
  original_x = crop_x + primary_left
1460
1480
  original_y = (crop_y - current_y_offset) + primary_top
1481
+ original_x2 = crop_x2 + primary_left
1482
+ original_y2 = crop_y2 + primary_top
1461
1483
  break
1462
1484
  current_y_offset += section_height + 50
1463
1485
  if original_x is None or original_y is None:
@@ -1471,7 +1493,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
1471
1493
  rect_right = rect_left + rect_width
1472
1494
  rect_bottom = rect_top + rect_height
1473
1495
  if (original_x >= rect_left and original_y >= rect_top and
1474
- original_x <= rect_right and original_y <= rect_bottom):
1496
+ original_x2 <= rect_right and original_y2 <= rect_bottom):
1475
1497
  return True
1476
1498
 
1477
1499
  return False
@@ -1712,7 +1734,7 @@ def run(read_from=None,
1712
1734
  last_result = ([], engine_index)
1713
1735
  screenshot_event = threading.Event()
1714
1736
  obs_screenshot_thread = OBSScreenshotThread(
1715
- gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs)
1737
+ gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs, is_manual_ocr=bool(screen_capture_on_combo))
1716
1738
  obs_screenshot_thread.start()
1717
1739
  filtering = TextFiltering()
1718
1740
  read_from_readable.append('obs')
@@ -139,7 +139,7 @@ class VADProcessor(ABC):
139
139
  self.extract_audio_and_combine_segments(input_audio, voice_activity, output_audio, padding=get_config().vad.splice_padding)
140
140
  else:
141
141
  ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio, trim_beginning=get_config().vad.trim_beginning, fade_in_duration=0.05, fade_out_duration=0)
142
- return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, self.vad_system_name, voice_activity, output_audio)
142
+ return VADResult(True, max(0, start_time + get_config().vad.beginning_offset), max(0, end_time + get_config().audio.end_offset), self.vad_system_name, voice_activity, output_audio)
143
143
 
144
144
  class SileroVADProcessor(VADProcessor):
145
145
  def __init__(self):
@@ -165,10 +165,12 @@ class WhisperVADProcessor(VADProcessor):
165
165
 
166
166
  def load_whisper_model(self):
167
167
  import stable_whisper as whisper
168
+ import torch
168
169
  if not self.vad_model:
170
+ self.device = "cpu" if get_config().vad.use_cpu_for_inference else "cuda" if torch.cuda.is_available() else "cpu"
169
171
  with warnings.catch_warnings():
170
172
  warnings.simplefilter("ignore")
171
- self.vad_model = whisper.load_model(get_config().vad.whisper_model, device="cpu" if get_config().vad.use_cpu_for_inference else None)
173
+ self.vad_model = whisper.load_faster_whisper(get_config().vad.whisper_model, device=self.device)
172
174
  logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
173
175
  return self.vad_model
174
176
 
@@ -187,7 +189,7 @@ class WhisperVADProcessor(VADProcessor):
187
189
  temperature=0.0)
188
190
  voice_activity = []
189
191
 
190
- logger.debug(result.to_dict())
192
+ logger.debug(json.dumps(result.to_dict(), indent=2))
191
193
 
192
194
  # Process the segments to extract tokens, timestamps, and confidence
193
195
  for i, segment in enumerate(result.segments):
@@ -198,6 +200,10 @@ class WhisperVADProcessor(VADProcessor):
198
200
  else:
199
201
  logger.info(
200
202
  "Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
203
+
204
+ if segment.no_speech_prob and segment.no_speech_prob > 0.8:
205
+ logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")
206
+ continue
201
207
 
202
208
 
203
209
  logger.debug(segment.to_dict())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.15.4
3
+ Version: 2.15.6
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -47,11 +47,12 @@ Requires-Dist: betterproto==2.0.0b7
47
47
  Requires-Dist: obsws-python~=1.7.2
48
48
  Requires-Dist: numpy==2.2.6
49
49
  Requires-Dist: regex
50
+ Requires-Dist: faster-whisper~=1.2.0
50
51
  Dynamic: license-file
51
52
 
52
53
  # GSM - An Immersion toolkit for Games.
53
54
 
54
- ### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
55
+ ### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
55
56
 
56
57
  An application designed to assist with language learning through games.
57
58
 
@@ -28,6 +28,7 @@ betterproto==2.0.0b7
28
28
  obsws-python~=1.7.2
29
29
  numpy==2.2.6
30
30
  regex
31
+ faster-whisper~=1.2.0
31
32
 
32
33
  [:sys_platform != "win32"]
33
34
  openai-whisper
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.15.4
3
+ Version: 2.15.6
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -47,11 +47,12 @@ Requires-Dist: betterproto==2.0.0b7
47
47
  Requires-Dist: obsws-python~=1.7.2
48
48
  Requires-Dist: numpy==2.2.6
49
49
  Requires-Dist: regex
50
+ Requires-Dist: faster-whisper~=1.2.0
50
51
  Dynamic: license-file
51
52
 
52
53
  # GSM - An Immersion toolkit for Games.
53
54
 
54
- ### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
55
+ ### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
55
56
 
56
57
  An application designed to assist with language learning through games.
57
58
 
@@ -1,6 +1,6 @@
1
1
  # GSM - An Immersion toolkit for Games.
2
2
 
3
- ### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
3
+ ### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
4
4
 
5
5
  An application designed to assist with language learning through games.
6
6
 
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "GameSentenceMiner"
10
- version = "2.15.4"
10
+ version = "2.15.6"
11
11
  description = "A tool for mining sentences from games. Update: Overlay?"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"
@@ -56,7 +56,8 @@ dependencies = [
56
56
  "betterproto==2.0.0b7",
57
57
  "obsws-python~=1.7.2",
58
58
  "numpy==2.2.6",
59
- "regex"
59
+ "regex",
60
+ "faster-whisper~=1.2.0"
60
61
  ]
61
62
 
62
63
  # This creates a command-line script named `gamesentenceminer` that will