GameSentenceMiner 2.13.12__tar.gz → 2.13.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/config_gui.py +1 -1
  2. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/gsm_ocr_config.py +8 -0
  3. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/owocr_helper.py +19 -6
  4. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/ocr.py +194 -41
  5. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/run.py +40 -27
  6. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/text_log.py +16 -7
  7. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/wip/get_overlay_coords.py +27 -12
  8. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/PKG-INFO +1 -1
  9. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/PKG-INFO +1 -1
  10. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/pyproject.toml +1 -1
  11. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/__init__.py +0 -0
  12. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ai/__init__.py +0 -0
  13. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ai/ai_prompting.py +0 -0
  14. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/anki.py +0 -0
  15. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/__init__.py +0 -0
  16. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon.png +0 -0
  17. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon128.png +0 -0
  18. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon256.png +0 -0
  19. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon32.png +0 -0
  20. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon512.png +0 -0
  21. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/icon64.png +0 -0
  22. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/assets/pickaxe.png +0 -0
  23. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/gametext.py +0 -0
  24. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/gsm.py +0 -0
  25. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/locales/en_us.json +0 -0
  26. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/locales/ja_jp.json +0 -0
  27. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/locales/zh_cn.json +0 -0
  28. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/obs.py +0 -0
  29. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/__init__.py +0 -0
  30. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/ocrconfig.py +0 -0
  31. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/owocr_area_selector.py +0 -0
  32. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/ocr/ss_picker.py +0 -0
  33. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/__init__.py +0 -0
  34. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/__main__.py +0 -0
  35. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/config.py +0 -0
  36. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/lens_betterproto.py +0 -0
  37. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +0 -0
  38. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/__init__.py +0 -0
  39. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/audio_offset_selector.py +0 -0
  40. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/communication/__init__.py +0 -0
  41. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/communication/send.py +0 -0
  42. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/communication/websocket.py +0 -0
  43. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/configuration.py +0 -0
  44. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/downloader/Untitled_json.py +0 -0
  45. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/downloader/__init__.py +0 -0
  46. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/downloader/download_tools.py +0 -0
  47. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/downloader/oneocr_dl.py +0 -0
  48. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/electron_config.py +0 -0
  49. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/ffmpeg.py +0 -0
  50. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/gsm_utils.py +0 -0
  51. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/model.py +0 -0
  52. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/notification.py +0 -0
  53. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/package.py +0 -0
  54. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/ss_selector.py +0 -0
  55. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/util/window_transparency.py +0 -0
  56. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/vad.py +0 -0
  57. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/__init__.py +0 -0
  58. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/service.py +0 -0
  59. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/__init__.py +0 -0
  60. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  61. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  62. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/favicon.ico +0 -0
  63. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/favicon.svg +0 -0
  64. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/site.webmanifest +0 -0
  65. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/style.css +0 -0
  66. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  67. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  68. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/templates/__init__.py +0 -0
  69. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/templates/index.html +0 -0
  70. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/templates/text_replacements.html +0 -0
  71. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/templates/utility.html +0 -0
  72. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner/web/texthooking_page.py +0 -0
  73. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/SOURCES.txt +0 -0
  74. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/dependency_links.txt +0 -0
  75. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/entry_points.txt +0 -0
  76. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/requires.txt +0 -0
  77. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/GameSentenceMiner.egg-info/top_level.txt +0 -0
  78. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/LICENSE +0 -0
  79. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.14}/setup.cfg +0 -0
@@ -1873,7 +1873,7 @@ class ConfigApp:
1873
1873
  gemini_model_i18n = ai_i18n.get('gemini_model', {})
1874
1874
  HoverInfoLabelWidget(ai_frame, text=gemini_model_i18n.get('label', '...'), tooltip=gemini_model_i18n.get('tooltip', '...'),
1875
1875
  row=self.current_row, column=0)
1876
- ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemma-3n-e4b-it', 'gemini-2.5-flash-lite', 'gemini-2.5-flash','gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
1876
+ ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemini-2.5-flash-lite', 'gemini-2.5-flash', 'gemma-3-27b-it', 'gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
1877
1877
  self.current_row += 1
1878
1878
 
1879
1879
  gemini_key_i18n = ai_i18n.get('gemini_api_key', {})
@@ -9,6 +9,7 @@ from dataclasses_json import dataclass_json
9
9
  from typing import List, Optional, Union
10
10
 
11
11
  from GameSentenceMiner.util.configuration import logger, get_app_directory
12
+ from GameSentenceMiner.util.electron_config import get_ocr_use_window_for_config
12
13
  from GameSentenceMiner.util.gsm_utils import sanitize_filename
13
14
 
14
15
 
@@ -92,6 +93,13 @@ class OCRConfig:
92
93
  floor(rectangle.coordinates[2] * width),
93
94
  floor(rectangle.coordinates[3] * height),
94
95
  ]
96
+
97
+ def has_config_changed(current_config: OCRConfig) -> bool:
98
+ new_config = get_scene_ocr_config(use_window_as_config=get_ocr_use_window_for_config(), window=current_config.window)
99
+ if new_config.rectangles != current_config.rectangles:
100
+ logger.info("OCR config has changed.")
101
+ return True
102
+ return False
95
103
 
96
104
 
97
105
  def get_window(title):
@@ -22,7 +22,7 @@ from GameSentenceMiner.util.electron_config import *
22
22
  from GameSentenceMiner.ocr.ss_picker import ScreenCropper
23
23
  from GameSentenceMiner.owocr.owocr.run import TextFiltering
24
24
  from GameSentenceMiner.util.configuration import get_config, get_app_directory, get_temporary_directory
25
- from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, set_dpi_awareness, get_window, get_ocr_config_path
25
+ from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, has_config_changed, set_dpi_awareness, get_window, get_ocr_config_path
26
26
  from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
27
27
  from GameSentenceMiner.util.gsm_utils import sanitize_filename, do_text_replacements, OCR_REPLACEMENTS_FILE
28
28
  import threading
@@ -252,7 +252,8 @@ class ConfigChangeCheckThread(threading.Thread):
252
252
  def __init__(self):
253
253
  super().__init__(daemon=True)
254
254
  self.last_changes = None
255
- self.callbacks = []
255
+ self.config_callbacks = []
256
+ self.area_callbacks = []
256
257
 
257
258
  def run(self):
258
259
  global ocr_config
@@ -265,20 +266,32 @@ class ConfigChangeCheckThread(threading.Thread):
265
266
  # Only run this block after a change has occurred and then the section is stable (no change)
266
267
  if self.last_changes is not None and not section_changed:
267
268
  logger.info(f"Detected config changes: {self.last_changes}")
268
- for cb in self.callbacks:
269
+ for cb in self.config_callbacks:
269
270
  cb(self.last_changes)
270
271
  if hasattr(run, 'handle_config_change'):
271
272
  run.handle_config_change()
272
273
  if any(c in self.last_changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
273
274
  reset_callback_vars()
274
275
  self.last_changes = None
276
+ ocr_config_changed = has_config_changed(ocr_config)
277
+ if ocr_config_changed:
278
+ logger.info("OCR config has changed, reloading...")
279
+ ocr_config = get_ocr_config(use_window_for_config=True, window=obs.get_current_game())
280
+ for cb in self.area_callbacks:
281
+ cb(ocr_config)
282
+ if hasattr(run, 'handle_area_config_changes'):
283
+ run.handle_area_config_changes(ocr_config)
284
+ reset_callback_vars()
275
285
  except Exception as e:
276
286
  logger.debug(f"ConfigChangeCheckThread error: {e}")
277
287
  time.sleep(0.25) # Lowered to 0.25s for more responsiveness
278
288
 
279
- def add_callback(self, callback):
280
- self.callbacks.append(callback)
281
-
289
+ def add_config_callback(self, callback):
290
+ self.config_callbacks.append(callback)
291
+
292
+ def add_area_callback(self, callback):
293
+ self.area_callbacks.append(callback)
294
+
282
295
  def reset_callback_vars():
283
296
  global previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, previous_text_list, last_ocr2_result
284
297
  previous_text = None
@@ -17,7 +17,12 @@ from PIL import Image
17
17
  from loguru import logger
18
18
  import requests
19
19
 
20
- from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
20
+
21
+ try:
22
+ from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
23
+ from GameSentenceMiner.util.configuration import CommonLanguages
24
+ except ImportError:
25
+ pass
21
26
 
22
27
  # from GameSentenceMiner.util.configuration import get_temporary_directory
23
28
 
@@ -363,20 +368,21 @@ class GoogleLens:
363
368
  if furigana_filter_sensitivity:
364
369
  for word in line['words']:
365
370
  if 'geometry' not in word:
366
- res += word['plain_text']
371
+ res += word['plain_text'] + word['text_separator']
367
372
  continue
368
373
  word_width = word['geometry']['bounding_box']['width'] * img.width
369
374
  word_height = word['geometry']['bounding_box']['height'] * img.height
370
375
  if word_width > furigana_filter_sensitivity and word_height > furigana_filter_sensitivity:
371
- res += word['plain_text']
376
+ res += word['plain_text'] + word['text_separator']
372
377
  else:
373
378
  skipped.extend([word['plain_text'] for word in line['words']])
374
379
  continue
375
380
  else:
376
381
  for word in line['words']:
377
- res += word['plain_text']
382
+ res += word['plain_text'] + word['text_separator']
383
+
378
384
  previous_line = paragraph
379
- res += '\n'
385
+ res += '\n'
380
386
  # logger.info(
381
387
  # f"Skipped {len(skipped)} chars due to furigana filter sensitivity: {furigana_filter_sensitivity}")
382
388
  # widths = []
@@ -894,7 +900,7 @@ class OneOCR:
894
900
  self.regex = re.compile(
895
901
  r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
896
902
 
897
- def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
903
+ def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False, multiple_crop_coords=False):
898
904
  lang = get_ocr_language()
899
905
  furigana_filter_sensitivity = get_furigana_filter_sensitivity()
900
906
  if lang != self.initial_lang:
@@ -910,6 +916,7 @@ class OneOCR:
910
916
  if not img:
911
917
  return (False, 'Invalid image provided')
912
918
  crop_coords = None
919
+ crop_coords_list = []
913
920
  if sys.platform == 'win32':
914
921
  try:
915
922
  ocr_resp = self.model.recognize_pil(img)
@@ -985,6 +992,12 @@ class OneOCR:
985
992
  }
986
993
  boxes.append(box)
987
994
  res = ocr_resp['text']
995
+ elif multiple_crop_coords:
996
+ for line in filtered_lines:
997
+ crop_coords_list.append(
998
+ (line['bounding_rect']['x1'] - 5, line['bounding_rect']['y1'] - 5,
999
+ line['bounding_rect']['x3'] + 5, line['bounding_rect']['y3'] + 5))
1000
+ res = ocr_resp['text']
988
1001
  else:
989
1002
  res = ocr_resp['text']
990
1003
 
@@ -1004,6 +1017,8 @@ class OneOCR:
1004
1017
  res = res.json()['text']
1005
1018
  if return_coords:
1006
1019
  x = (True, res, filtered_lines)
1020
+ elif multiple_crop_coords:
1021
+ x = (True, res, crop_coords_list)
1007
1022
  else:
1008
1023
  x = (True, res, crop_coords)
1009
1024
  if is_path:
@@ -1367,70 +1382,208 @@ class GroqOCR:
1367
1382
  def _preprocess(self, img):
1368
1383
  return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
1369
1384
 
1385
+
1386
+ # OpenAI-Compatible Endpoint OCR using LM Studio
1387
+ class localLLMOCR:
1388
+ name= 'local_llm_ocr'
1389
+ readable_name = 'Local LLM OCR'
1390
+ key = 'a'
1391
+ available = False
1392
+ last_ocr_time = time.time() - 5
1393
+
1394
+ def __init__(self, config={}, lang='ja'):
1395
+ self.keep_llm_hot_thread = None
1396
+ try:
1397
+ import openai
1398
+ except ImportError:
1399
+ logger.warning('openai module not available, Local LLM OCR will not work!')
1400
+ return
1401
+ import openai, threading
1402
+ try:
1403
+ self.api_url = config.get('api_url', 'http://localhost:1234/v1/chat/completions')
1404
+ self.model = config.get('model', 'qwen2.5-vl-3b-instruct')
1405
+ self.api_key = config.get('api_key', 'lm-studio')
1406
+ self.keep_warm = config.get('keep_warm', True)
1407
+ self.custom_prompt = config.get('prompt', None)
1408
+ self.available = True
1409
+ self.client = openai.OpenAI(
1410
+ base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
1411
+ api_key=self.api_key
1412
+ )
1413
+ logger.info('Local LLM OCR (OpenAI-compatible) ready')
1414
+ self.keep_llm_hot_thread = threading.Thread(target=self.keep_llm_warm, daemon=True)
1415
+ self.keep_llm_hot_thread.start()
1416
+ except Exception as e:
1417
+ logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
1418
+
1419
+ def keep_llm_warm(self):
1420
+ def ocr_blank_black_image():
1421
+ if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:
1422
+ return
1423
+ import numpy as np
1424
+ from PIL import Image
1425
+ # Create a blank black image
1426
+ blank_image = Image.fromarray(np.zeros((100, 100, 3), dtype=np.uint8))
1427
+ logger.info('Keeping local LLM OCR warm with a blank black image')
1428
+ self(blank_image)
1429
+
1430
+ while True:
1431
+ ocr_blank_black_image()
1432
+ time.sleep(5)
1433
+
1434
+ def __call__(self, img, furigana_filter_sensitivity=0):
1435
+ import base64
1436
+ try:
1437
+ img, is_path = input_to_pil_image(img)
1438
+ img_bytes = pil_image_to_bytes(img)
1439
+ img_base64 = base64.b64encode(img_bytes).decode('utf-8')
1440
+ if self.custom_prompt and self.custom_prompt.strip() != "":
1441
+ prompt = self.custom_prompt.strip()
1442
+ else:
1443
+ prompt = f"""
1444
+ Extract all {CommonLanguages.from_code(get_ocr_language())} Text from Image. Ignore all Furigana. Do not return any commentary, just the text in the image. If there is no text in the image, return "" (Empty String).
1445
+ """
1446
+
1447
+ response = self.client.chat.completions.create(
1448
+ model=self.model,
1449
+ messages=[
1450
+ {
1451
+ "role": "user",
1452
+ "content": [
1453
+ {"type": "text", "text": prompt},
1454
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
1455
+ ],
1456
+ }
1457
+ ],
1458
+ max_tokens=512,
1459
+ temperature=0.1
1460
+ )
1461
+ self.last_ocr_time = time.time()
1462
+ if response.choices and response.choices[0].message.content:
1463
+ text_output = response.choices[0].message.content.strip()
1464
+ return (True, text_output)
1465
+ else:
1466
+ return (True, "")
1467
+ except Exception as e:
1468
+ return (False, f'Local LLM OCR request failed: {e}')
1469
+
1370
1470
  # class QWENOCR:
1371
1471
  # name = 'qwenv2'
1372
1472
  # readable_name = 'Qwen2-VL'
1373
1473
  # key = 'q'
1474
+
1475
+ # # Class-level attributes for model and processor to ensure they are loaded only once
1476
+ # model = None
1477
+ # processor = None
1478
+ # device = None
1374
1479
  # available = False
1375
1480
 
1376
- # def __init__(self, config={}, lang='ja'):
1377
- # try:
1378
- # import torch
1379
- # import transformers
1380
- # from transformers import AutoModelForImageTextToText, AutoProcessor
1381
- # self.model = AutoModelForImageTextToText.from_pretrained(
1382
- # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
1383
- # )
1384
- # self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
1385
- # self.device = "cuda" if torch.cuda.is_available() else "cpu"
1386
- # print(self.device)
1387
- # self.available = True
1388
- # logger.info('Qwen2.5-VL ready')
1389
- # except Exception as e:
1390
- # logger.warning(f'Qwen2-VL not available: {e}')
1481
+ # @classmethod
1482
+ # def initialize(cls):
1483
+ # import torch
1484
+ # from transformers import AutoModelForImageTextToText, AutoProcessor
1485
+ # """
1486
+ # Class method to initialize the model. Call this once at the start of your application.
1487
+ # This prevents reloading the model on every instantiation.
1488
+ # """
1489
+ # if cls.model is not None:
1490
+ # logger.info('Qwen2-VL is already initialized.')
1491
+ # return
1391
1492
 
1392
- # def __call__(self, img, furigana_filter_sensitivity=0):
1393
- # if not self.available:
1394
- # return (False, 'Qwen2-VL is not available.')
1395
1493
  # try:
1396
- # img, is_path = input_to_pil_image(img)
1397
-
1398
- # # img.show()
1494
+ # if not torch.cuda.is_available():
1495
+ # logger.warning("CUDA not available, Qwen2-VL will run on CPU, which will be very slow.")
1496
+ # # You might want to prevent initialization on CPU entirely
1497
+ # # raise RuntimeError("CUDA is required for efficient Qwen2-VL operation.")
1498
+
1499
+ # cls.device = "cuda" if torch.cuda.is_available() else "cpu"
1500
+
1501
+ # cls.model = AutoModelForImageTextToText.from_pretrained(
1502
+ # "Qwen/Qwen2-VL-2B-Instruct",
1503
+ # torch_dtype="auto", # Uses bfloat16/float16 if available, which is faster
1504
+ # device_map=cls.device
1505
+ # )
1506
+ # # For PyTorch 2.0+, torch.compile can significantly speed up inference after a warm-up call
1507
+ # # cls.model = torch.compile(cls.model)
1508
+
1509
+ # cls.processor = AutoProcessor.from_pretrained(
1510
+ # "Qwen/Qwen2-VL-2B-Instruct",
1511
+ # use_fast=True
1512
+ # )
1513
+
1514
+ # cls.available = True
1515
+
1399
1516
  # conversation = [
1400
1517
  # {
1401
1518
  # "role": "user",
1402
1519
  # "content": [
1403
1520
  # {"type": "image"},
1404
- # {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana (Small characters above the kanji). Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
1521
+ # {"type": "text", "text": "Extract all the text from this image, ignore all furigana."},
1405
1522
  # ],
1406
1523
  # }
1407
1524
  # ]
1408
- # text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
1525
+
1526
+ # # The same prompt is applied to all images in the batch
1527
+ # cls.text_prompt = cls.processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
1528
+ # logger.info(f'Qwen2.5-VL ready on device: {cls.device}')
1529
+ # except Exception as e:
1530
+ # logger.warning(f'Qwen2-VL not available: {e}')
1531
+ # cls.available = False
1532
+
1533
+ # def __init__(self, config={}, lang='ja'):
1534
+ # # The __init__ is now very lightweight. It just checks if initialization has happened.
1535
+ # if not self.available:
1536
+ # raise RuntimeError("QWENOCR has not been initialized. Call QWENOCR.initialize() first.")
1537
+
1538
+ # def __call__(self, images):
1539
+ # """
1540
+ # Processes a single image or a list of images.
1541
+ # :param images: A single image (path or PIL.Image) or a list of images.
1542
+ # :return: A tuple (success, list_of_results)
1543
+ # """
1544
+ # if not self.available:
1545
+ # return (False, ['Qwen2-VL is not available.'])
1546
+
1547
+ # try:
1548
+ # # Standardize input to be a list
1549
+ # if not isinstance(images, list):
1550
+ # images = [images]
1551
+
1552
+ # pil_images = [input_to_pil_image(img)[0] for img in images]
1553
+
1554
+ # # The processor handles batching of images and text prompts
1409
1555
  # inputs = self.processor(
1410
- # text=[text_prompt], images=[img], padding=True, return_tensors="pt"
1411
- # )
1412
- # inputs = inputs.to(self.device)
1413
- # output_ids = self.model.generate(**inputs, max_new_tokens=128)
1556
+ # text=[self.text_prompt] * len(pil_images),
1557
+ # images=pil_images,
1558
+ # padding=True,
1559
+ # return_tensors="pt"
1560
+ # ).to(self.device)
1561
+
1562
+ # output_ids = self.model.generate(**inputs, max_new_tokens=32)
1563
+
1564
+ # # The decoding logic needs to be slightly adjusted for batching
1565
+ # input_ids_len = [len(x) for x in inputs.input_ids]
1414
1566
  # generated_ids = [
1415
- # output_ids[len(input_ids):]
1416
- # for input_ids, output_ids in zip(inputs.input_ids, output_ids)
1567
+ # output_ids[i][input_ids_len[i]:] for i in range(len(input_ids_len))
1417
1568
  # ]
1569
+
1418
1570
  # output_text = self.processor.batch_decode(
1419
1571
  # generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
1420
1572
  # )
1421
- # return (True, output_text[0] if output_text else "")
1573
+
1574
+ # return (True, output_text)
1422
1575
  # except Exception as e:
1423
- # return (False, f'Qwen2-VL inference failed: {e}')
1424
-
1425
- # def _preprocess(self, img):
1426
- # return base64.b64encode(pil_image_to_bytes(img, png_compression=6)).decode('utf-8')
1576
+ # return (False, [f'Qwen2-VL inference failed: {e}'])
1427
1577
 
1428
1578
 
1579
+ # QWENOCR.initialize()
1429
1580
  # qwenocr = QWENOCR()
1430
1581
 
1582
+ # localOCR = localLLMOCR(config={'api_url': 'http://localhost:1234/v1/chat/completions', 'model': 'qwen2.5-vl-3b-instruct'})
1583
+
1431
1584
  # for i in range(10):
1432
1585
  # start_time = time.time()
1433
- # res, text = qwenocr(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png"), furigana_filter_sensitivity=0) # Example usage
1586
+ # res, text = localOCR(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png")) # Example usage
1434
1587
  # end_time = time.time()
1435
1588
 
1436
1589
  # print(f"Time taken: {end_time - start_time:.2f} seconds")
@@ -800,8 +800,11 @@ class ScreenshotThread(threading.Thread):
800
800
 
801
801
  def set_last_image(image):
802
802
  global last_image
803
- if image == last_image:
804
- return
803
+ try:
804
+ if image == last_image:
805
+ return
806
+ except Exception:
807
+ pass
805
808
  try:
806
809
  if last_image is not None and hasattr(last_image, "close"):
807
810
  last_image.close()
@@ -890,43 +893,44 @@ class OBSScreenshotThread(threading.Thread):
890
893
  logger.info(
891
894
  "Using default aspect ratio scaling (original resolution).")
892
895
  return width, height
896
+
897
+ def init_config(self, source=None, scene=None):
898
+ import GameSentenceMiner.obs as obs
899
+ obs.update_current_game()
900
+ self.current_source = source if source else obs.get_active_source()
901
+ logger.info(f"Current OBS source: {self.current_source}")
902
+ self.source_width = self.current_source.get(
903
+ "sceneItemTransform").get("sourceWidth") or self.width
904
+ self.source_height = self.current_source.get(
905
+ "sceneItemTransform").get("sourceHeight") or self.height
906
+ if self.source_width and self.source_height:
907
+ self.width, self.height = self.scale_down_width_height(
908
+ self.source_width, self.source_height)
909
+ logger.info(
910
+ f"Using OBS source dimensions: {self.width}x{self.height}")
911
+ self.current_source_name = self.current_source.get(
912
+ "sourceName") or None
913
+ self.current_scene = scene if scene else obs.get_current_game()
914
+ self.ocr_config = get_scene_ocr_config()
915
+ if not self.ocr_config:
916
+ logger.error("No OCR config found for the current scene.")
917
+ return
918
+ self.ocr_config.scale_to_custom_size(self.width, self.height)
893
919
 
894
920
  def run(self):
895
921
  global last_image
896
922
  from PIL import Image
897
923
  import GameSentenceMiner.obs as obs
898
924
 
899
- def init_config(source=None, scene=None):
900
- obs.update_current_game()
901
- self.current_source = source if source else obs.get_active_source()
902
- logger.info(f"Current OBS source: {self.current_source}")
903
- self.source_width = self.current_source.get(
904
- "sceneItemTransform").get("sourceWidth") or self.width
905
- self.source_height = self.current_source.get(
906
- "sceneItemTransform").get("sourceHeight") or self.height
907
- if self.source_width and self.source_height:
908
- self.width, self.height = self.scale_down_width_height(
909
- self.source_width, self.source_height)
910
- logger.info(
911
- f"Using OBS source dimensions: {self.width}x{self.height}")
912
- self.current_source_name = self.current_source.get(
913
- "sourceName") or None
914
- self.current_scene = scene if scene else obs.get_current_game()
915
- self.ocr_config = get_scene_ocr_config()
916
- if not self.ocr_config:
917
- logger.error("No OCR config found for the current scene.")
918
- return
919
- self.ocr_config.scale_to_custom_size(self.width, self.height)
920
-
921
925
  # Register a scene switch callback in obsws
922
926
  def on_scene_switch(scene):
923
927
  logger.info(f"Scene switched to: {scene}. Loading new OCR config.")
924
- init_config(scene=scene)
928
+ self.init_config(scene=scene)
925
929
 
926
930
  asyncio.run(obs.register_scene_change_callback(on_scene_switch))
927
931
 
928
932
  self.connect_obs()
929
- init_config()
933
+ self.init_config()
930
934
  start = time.time()
931
935
  while not terminated:
932
936
  if not screenshot_event.wait(timeout=0.1):
@@ -1396,6 +1400,7 @@ def run(read_from=None,
1396
1400
  global notifier
1397
1401
  global websocket_server_thread
1398
1402
  global screenshot_thread
1403
+ global obs_screenshot_thread
1399
1404
  global image_queue
1400
1405
  global ocr_1
1401
1406
  global ocr_2
@@ -1555,7 +1560,15 @@ def run(read_from=None,
1555
1560
  if any(c in changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
1556
1561
  last_result = ([], engine_index)
1557
1562
  engine_change_handler_name(get_ocr_ocr1())
1558
- config_check_thread.add_callback(handle_config_changes)
1563
+
1564
+ def handle_area_config_changes(changes):
1565
+ if screenshot_thread:
1566
+ screenshot_thread.ocr_config = get_scene_ocr_config()
1567
+ if obs_screenshot_thread:
1568
+ obs_screenshot_thread.init_config()
1569
+
1570
+ config_check_thread.add_config_callback(handle_config_changes)
1571
+ config_check_thread.add_area_callback(handle_area_config_changes)
1559
1572
 
1560
1573
  while not terminated:
1561
1574
  ocr_start_time = datetime.now()
@@ -4,9 +4,12 @@ from datetime import datetime
4
4
  from difflib import SequenceMatcher
5
5
  from typing import Optional
6
6
 
7
+ import rapidfuzz
8
+
7
9
  from GameSentenceMiner.util.gsm_utils import remove_html_and_cloze_tags
8
10
  from GameSentenceMiner.util.configuration import logger, get_config, gsm_state
9
11
  from GameSentenceMiner.util.model import AnkiCard
12
+ import re
10
13
 
11
14
  initial_time = datetime.now()
12
15
 
@@ -107,20 +110,26 @@ class GameText:
107
110
 
108
111
  game_log = GameText()
109
112
 
110
-
111
- def similar(a, b):
112
- return SequenceMatcher(None, a, b).ratio()
113
+ def strip_whitespace_and_punctuation(text: str) -> str:
114
+ """
115
+ Strips whitespace and punctuation from the given text.
116
+ """
117
+ # Remove all whitespace and specified punctuation using regex
118
+ # Includes Japanese and common punctuation
119
+ return re.sub(r'[\s 、。「」【】《》., ]', '', text).strip()
113
120
 
114
121
 
115
122
  def lines_match(texthooker_sentence, anki_sentence):
116
- texthooker_sentence = texthooker_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
117
- anki_sentence = anki_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
118
- similarity = similar(texthooker_sentence, anki_sentence)
123
+ # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
124
+ texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
125
+ anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
126
+ similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
127
+ logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
119
128
  if texthooker_sentence in anki_sentence:
120
129
  logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
121
130
  elif anki_sentence in texthooker_sentence:
122
131
  logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
123
- return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 0.8)
132
+ return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 80)
124
133
 
125
134
 
126
135
  def get_text_event(last_note) -> GameLine:
@@ -315,7 +315,7 @@ async def get_full_screenshot() -> Image.Image | None:
315
315
  else:
316
316
  monitors = [monitors[0]]
317
317
  monitor = monitors[get_config().wip.monitor_to_capture]
318
- img = get_screenshot_PIL(compression=90, img_format='jpg')
318
+ img = get_screenshot_PIL(compression=90, img_format='jpg', width=monitor['width'] // 2, height=monitor['height'] // 2)
319
319
  # Put the image over a transparent background without stretching
320
320
  new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
321
321
  # Calculate coordinates to center img horizontally and vertically
@@ -356,34 +356,49 @@ async def get_full_screenshot() -> Image.Image | None:
356
356
  return None
357
357
 
358
358
  async def do_work(sentence_to_check=None):
359
+ from math import floor
359
360
  # connect_to_obs_sync(5)
360
361
  logger.info("in find_box")
361
362
  # await asyncio.sleep(.5)
362
363
  logger.info("after_initial_sleep")
364
+ start_time = time.time()
363
365
  full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
364
366
 
365
- oneocr_results = oneocr(full_screenshot_image)
366
- crop_coords = oneocr_results[2]
367
- logger.info("Cropping full screenshot with coordinates: %s", crop_coords)
368
- cropped_image = full_screenshot_image.crop(crop_coords)
367
+ oneocr_results = oneocr(full_screenshot_image, multiple_crop_coords=True)
368
+ crop_coords_list = oneocr_results[2]
369
+ logger.info("Cropping full screenshot with coordinates: %s", crop_coords_list)
370
+ cropped_images = []
371
+ img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
372
+ for crop_coords in crop_coords_list:
373
+ cropped_image = full_screenshot_image.crop(crop_coords)
374
+ cropped_images.append(cropped_image)
375
+ # Paste the cropped image onto the transparent background
376
+ img.paste(cropped_image, (floor(crop_coords[0]), floor(crop_coords[1])))
377
+
378
+ # img.show()
379
+
369
380
  # Convert 1/4
370
381
  if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
371
- cropped_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
382
+ img.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
383
+ logger.info(f"Time taken to get cropped image for lens: {time.time() - start_time:.2f} seconds")
384
+
372
385
  # full_screenshot_image.show()
373
- if cropped_image:
386
+ if img:
387
+ start_time = time.time()
374
388
  logger.info("Full screenshot captured successfully. Now performing OCR...")
375
389
  # ocr_results = oneocr(full_screenshot_image, return_coords=True)
376
- google_ocr_results = lens(cropped_image, return_coords=True)[2]
390
+ google_ocr_results = lens(img, return_coords=True)[2]
377
391
 
378
392
  ret = extract_text_with_pixel_boxes(
379
393
  api_response=google_ocr_results,
380
394
  original_width=monitor_width,
381
395
  original_height=monitor_height,
382
- crop_x=crop_coords[0],
383
- crop_y=crop_coords[1],
384
- crop_width=crop_coords[2] - crop_coords[0],
385
- crop_height=crop_coords[3] - crop_coords[1]
396
+ crop_x=0,
397
+ crop_y=0,
398
+ crop_width=img.width,
399
+ crop_height=img.height
386
400
  )
401
+ logger.info(f"Time taken for Lens OCR: {time.time() - start_time:.2f} seconds")
387
402
 
388
403
  # boxes_of_text = google_ocr_results[2]
389
404
  # logger.info(f"Boxes of text found: {boxes_of_text}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.13.12
3
+ Version: 2.13.14
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.13.12
3
+ Version: 2.13.14
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "GameSentenceMiner"
10
- version = "2.13.12"
10
+ version = "2.13.14"
11
11
  description = "A tool for mining sentences from games. Update: Overlay?"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"