GameSentenceMiner 2.13.12__tar.gz → 2.13.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/config_gui.py +1 -1
  2. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/gsm_ocr_config.py +8 -0
  3. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/owocr_helper.py +19 -6
  4. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/ocr.py +189 -37
  5. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/run.py +40 -27
  6. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/text_log.py +16 -7
  7. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/wip/get_overlay_coords.py +27 -12
  8. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/PKG-INFO +1 -1
  9. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/PKG-INFO +1 -1
  10. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/pyproject.toml +1 -1
  11. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/__init__.py +0 -0
  12. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ai/__init__.py +0 -0
  13. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ai/ai_prompting.py +0 -0
  14. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/anki.py +0 -0
  15. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/__init__.py +0 -0
  16. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon.png +0 -0
  17. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon128.png +0 -0
  18. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon256.png +0 -0
  19. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon32.png +0 -0
  20. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon512.png +0 -0
  21. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/icon64.png +0 -0
  22. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/assets/pickaxe.png +0 -0
  23. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/gametext.py +0 -0
  24. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/gsm.py +0 -0
  25. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/locales/en_us.json +0 -0
  26. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/locales/ja_jp.json +0 -0
  27. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/locales/zh_cn.json +0 -0
  28. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/obs.py +0 -0
  29. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/__init__.py +0 -0
  30. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/ocrconfig.py +0 -0
  31. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/owocr_area_selector.py +0 -0
  32. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/ss_picker.py +0 -0
  33. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/__init__.py +0 -0
  34. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/__main__.py +0 -0
  35. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/config.py +0 -0
  36. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/lens_betterproto.py +0 -0
  37. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +0 -0
  38. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/__init__.py +0 -0
  39. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/audio_offset_selector.py +0 -0
  40. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/communication/__init__.py +0 -0
  41. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/communication/send.py +0 -0
  42. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/communication/websocket.py +0 -0
  43. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/configuration.py +0 -0
  44. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/downloader/Untitled_json.py +0 -0
  45. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/downloader/__init__.py +0 -0
  46. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/downloader/download_tools.py +0 -0
  47. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/downloader/oneocr_dl.py +0 -0
  48. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/electron_config.py +0 -0
  49. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/ffmpeg.py +0 -0
  50. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/gsm_utils.py +0 -0
  51. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/model.py +0 -0
  52. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/notification.py +0 -0
  53. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/package.py +0 -0
  54. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/ss_selector.py +0 -0
  55. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/window_transparency.py +0 -0
  56. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/vad.py +0 -0
  57. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/__init__.py +0 -0
  58. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/service.py +0 -0
  59. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/__init__.py +0 -0
  60. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  61. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  62. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/favicon.ico +0 -0
  63. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/favicon.svg +0 -0
  64. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/site.webmanifest +0 -0
  65. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/style.css +0 -0
  66. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  67. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  68. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/templates/__init__.py +0 -0
  69. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/templates/index.html +0 -0
  70. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/templates/text_replacements.html +0 -0
  71. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/templates/utility.html +0 -0
  72. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/web/texthooking_page.py +0 -0
  73. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/SOURCES.txt +0 -0
  74. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/dependency_links.txt +0 -0
  75. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/entry_points.txt +0 -0
  76. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/requires.txt +0 -0
  77. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/top_level.txt +0 -0
  78. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/LICENSE +0 -0
  79. {gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/setup.cfg +0 -0
@@ -1873,7 +1873,7 @@ class ConfigApp:
1873
1873
  gemini_model_i18n = ai_i18n.get('gemini_model', {})
1874
1874
  HoverInfoLabelWidget(ai_frame, text=gemini_model_i18n.get('label', '...'), tooltip=gemini_model_i18n.get('tooltip', '...'),
1875
1875
  row=self.current_row, column=0)
1876
- ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemma-3n-e4b-it', 'gemini-2.5-flash-lite', 'gemini-2.5-flash','gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
1876
+ ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemini-2.5-flash-lite', 'gemini-2.5-flash', 'gemma-3-27b-it', 'gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
1877
1877
  self.current_row += 1
1878
1878
 
1879
1879
  gemini_key_i18n = ai_i18n.get('gemini_api_key', {})
@@ -9,6 +9,7 @@ from dataclasses_json import dataclass_json
9
9
  from typing import List, Optional, Union
10
10
 
11
11
  from GameSentenceMiner.util.configuration import logger, get_app_directory
12
+ from GameSentenceMiner.util.electron_config import get_ocr_use_window_for_config
12
13
  from GameSentenceMiner.util.gsm_utils import sanitize_filename
13
14
 
14
15
 
@@ -92,6 +93,13 @@ class OCRConfig:
92
93
  floor(rectangle.coordinates[2] * width),
93
94
  floor(rectangle.coordinates[3] * height),
94
95
  ]
96
+
97
+ def has_config_changed(current_config: OCRConfig) -> bool:
98
+ new_config = get_scene_ocr_config(use_window_as_config=get_ocr_use_window_for_config(), window=current_config.window)
99
+ if new_config.rectangles != current_config.rectangles:
100
+ logger.info("OCR config has changed.")
101
+ return True
102
+ return False
95
103
 
96
104
 
97
105
  def get_window(title):
@@ -22,7 +22,7 @@ from GameSentenceMiner.util.electron_config import *
22
22
  from GameSentenceMiner.ocr.ss_picker import ScreenCropper
23
23
  from GameSentenceMiner.owocr.owocr.run import TextFiltering
24
24
  from GameSentenceMiner.util.configuration import get_config, get_app_directory, get_temporary_directory
25
- from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, set_dpi_awareness, get_window, get_ocr_config_path
25
+ from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, has_config_changed, set_dpi_awareness, get_window, get_ocr_config_path
26
26
  from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
27
27
  from GameSentenceMiner.util.gsm_utils import sanitize_filename, do_text_replacements, OCR_REPLACEMENTS_FILE
28
28
  import threading
@@ -252,7 +252,8 @@ class ConfigChangeCheckThread(threading.Thread):
252
252
  def __init__(self):
253
253
  super().__init__(daemon=True)
254
254
  self.last_changes = None
255
- self.callbacks = []
255
+ self.config_callbacks = []
256
+ self.area_callbacks = []
256
257
 
257
258
  def run(self):
258
259
  global ocr_config
@@ -265,20 +266,32 @@ class ConfigChangeCheckThread(threading.Thread):
265
266
  # Only run this block after a change has occurred and then the section is stable (no change)
266
267
  if self.last_changes is not None and not section_changed:
267
268
  logger.info(f"Detected config changes: {self.last_changes}")
268
- for cb in self.callbacks:
269
+ for cb in self.config_callbacks:
269
270
  cb(self.last_changes)
270
271
  if hasattr(run, 'handle_config_change'):
271
272
  run.handle_config_change()
272
273
  if any(c in self.last_changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
273
274
  reset_callback_vars()
274
275
  self.last_changes = None
276
+ ocr_config_changed = has_config_changed(ocr_config)
277
+ if ocr_config_changed:
278
+ logger.info("OCR config has changed, reloading...")
279
+ ocr_config = get_ocr_config(use_window_for_config=True, window=obs.get_current_game())
280
+ for cb in self.area_callbacks:
281
+ cb(ocr_config)
282
+ if hasattr(run, 'handle_area_config_changes'):
283
+ run.handle_area_config_changes(ocr_config)
284
+ reset_callback_vars()
275
285
  except Exception as e:
276
286
  logger.debug(f"ConfigChangeCheckThread error: {e}")
277
287
  time.sleep(0.25) # Lowered to 0.25s for more responsiveness
278
288
 
279
- def add_callback(self, callback):
280
- self.callbacks.append(callback)
281
-
289
+ def add_config_callback(self, callback):
290
+ self.config_callbacks.append(callback)
291
+
292
+ def add_area_callback(self, callback):
293
+ self.area_callbacks.append(callback)
294
+
282
295
  def reset_callback_vars():
283
296
  global previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, previous_text_list, last_ocr2_result
284
297
  previous_text = None
@@ -17,7 +17,12 @@ from PIL import Image
17
17
  from loguru import logger
18
18
  import requests
19
19
 
20
- from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
20
+
21
+ try:
22
+ from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
23
+ from GameSentenceMiner.util.configuration import CommonLanguages
24
+ except ImportError:
25
+ pass
21
26
 
22
27
  # from GameSentenceMiner.util.configuration import get_temporary_directory
23
28
 
@@ -894,7 +899,7 @@ class OneOCR:
894
899
  self.regex = re.compile(
895
900
  r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
896
901
 
897
- def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
902
+ def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False, multiple_crop_coords=False):
898
903
  lang = get_ocr_language()
899
904
  furigana_filter_sensitivity = get_furigana_filter_sensitivity()
900
905
  if lang != self.initial_lang:
@@ -910,6 +915,7 @@ class OneOCR:
910
915
  if not img:
911
916
  return (False, 'Invalid image provided')
912
917
  crop_coords = None
918
+ crop_coords_list = []
913
919
  if sys.platform == 'win32':
914
920
  try:
915
921
  ocr_resp = self.model.recognize_pil(img)
@@ -985,6 +991,12 @@ class OneOCR:
985
991
  }
986
992
  boxes.append(box)
987
993
  res = ocr_resp['text']
994
+ elif multiple_crop_coords:
995
+ for line in filtered_lines:
996
+ crop_coords_list.append(
997
+ (line['bounding_rect']['x1'] - 5, line['bounding_rect']['y1'] - 5,
998
+ line['bounding_rect']['x3'] + 5, line['bounding_rect']['y3'] + 5))
999
+ res = ocr_resp['text']
988
1000
  else:
989
1001
  res = ocr_resp['text']
990
1002
 
@@ -1004,6 +1016,8 @@ class OneOCR:
1004
1016
  res = res.json()['text']
1005
1017
  if return_coords:
1006
1018
  x = (True, res, filtered_lines)
1019
+ elif multiple_crop_coords:
1020
+ x = (True, res, crop_coords_list)
1007
1021
  else:
1008
1022
  x = (True, res, crop_coords)
1009
1023
  if is_path:
@@ -1367,70 +1381,208 @@ class GroqOCR:
1367
1381
  def _preprocess(self, img):
1368
1382
  return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
1369
1383
 
1384
+
1385
+ # OpenAI-Compatible Endpoint OCR using LM Studio
1386
+ class localLLMOCR:
1387
+ name= 'local_llm_ocr'
1388
+ readable_name = 'Local LLM OCR'
1389
+ key = 'a'
1390
+ available = False
1391
+ last_ocr_time = time.time() - 5
1392
+
1393
+ def __init__(self, config={}, lang='ja'):
1394
+ self.keep_llm_hot_thread = None
1395
+ try:
1396
+ import openai
1397
+ except ImportError:
1398
+ logger.warning('openai module not available, Local LLM OCR will not work!')
1399
+ return
1400
+ import openai, threading
1401
+ try:
1402
+ self.api_url = config.get('api_url', 'http://localhost:1234/v1/chat/completions')
1403
+ self.model = config.get('model', 'qwen2.5-vl-3b-instruct')
1404
+ self.api_key = config.get('api_key', 'lm-studio')
1405
+ self.keep_warm = config.get('keep_warm', True)
1406
+ self.custom_prompt = config.get('prompt', None)
1407
+ self.available = True
1408
+ self.client = openai.OpenAI(
1409
+ base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
1410
+ api_key=self.api_key
1411
+ )
1412
+ logger.info('Local LLM OCR (OpenAI-compatible) ready')
1413
+ self.keep_llm_hot_thread = threading.Thread(target=self.keep_llm_warm, daemon=True)
1414
+ self.keep_llm_hot_thread.start()
1415
+ except Exception as e:
1416
+ logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
1417
+
1418
+ def keep_llm_warm(self):
1419
+ def ocr_blank_black_image():
1420
+ if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:
1421
+ return
1422
+ import numpy as np
1423
+ from PIL import Image
1424
+ # Create a blank black image
1425
+ blank_image = Image.fromarray(np.zeros((100, 100, 3), dtype=np.uint8))
1426
+ logger.info('Keeping local LLM OCR warm with a blank black image')
1427
+ self(blank_image)
1428
+
1429
+ while True:
1430
+ ocr_blank_black_image()
1431
+ time.sleep(5)
1432
+
1433
+ def __call__(self, img, furigana_filter_sensitivity=0):
1434
+ import base64
1435
+ try:
1436
+ img, is_path = input_to_pil_image(img)
1437
+ img_bytes = pil_image_to_bytes(img)
1438
+ img_base64 = base64.b64encode(img_bytes).decode('utf-8')
1439
+ if self.custom_prompt and self.custom_prompt.strip() != "":
1440
+ prompt = self.custom_prompt.strip()
1441
+ else:
1442
+ prompt = f"""
1443
+ Extract all {CommonLanguages.from_code(get_ocr_language())} Text from Image. Ignore all Furigana. Do not return any commentary, just the text in the image. If there is no text in the image, return "" (Empty String).
1444
+ """
1445
+
1446
+ response = self.client.chat.completions.create(
1447
+ model=self.model,
1448
+ messages=[
1449
+ {
1450
+ "role": "user",
1451
+ "content": [
1452
+ {"type": "text", "text": prompt},
1453
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
1454
+ ],
1455
+ }
1456
+ ],
1457
+ max_tokens=512,
1458
+ temperature=0.1
1459
+ )
1460
+ self.last_ocr_time = time.time()
1461
+ if response.choices and response.choices[0].message.content:
1462
+ text_output = response.choices[0].message.content.strip()
1463
+ return (True, text_output)
1464
+ else:
1465
+ return (True, "")
1466
+ except Exception as e:
1467
+ return (False, f'Local LLM OCR request failed: {e}')
1468
+
1370
1469
  # class QWENOCR:
1371
1470
  # name = 'qwenv2'
1372
1471
  # readable_name = 'Qwen2-VL'
1373
1472
  # key = 'q'
1473
+
1474
+ # # Class-level attributes for model and processor to ensure they are loaded only once
1475
+ # model = None
1476
+ # processor = None
1477
+ # device = None
1374
1478
  # available = False
1375
1479
 
1376
- # def __init__(self, config={}, lang='ja'):
1377
- # try:
1378
- # import torch
1379
- # import transformers
1380
- # from transformers import AutoModelForImageTextToText, AutoProcessor
1381
- # self.model = AutoModelForImageTextToText.from_pretrained(
1382
- # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
1383
- # )
1384
- # self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
1385
- # self.device = "cuda" if torch.cuda.is_available() else "cpu"
1386
- # print(self.device)
1387
- # self.available = True
1388
- # logger.info('Qwen2.5-VL ready')
1389
- # except Exception as e:
1390
- # logger.warning(f'Qwen2-VL not available: {e}')
1480
+ # @classmethod
1481
+ # def initialize(cls):
1482
+ # import torch
1483
+ # from transformers import AutoModelForImageTextToText, AutoProcessor
1484
+ # """
1485
+ # Class method to initialize the model. Call this once at the start of your application.
1486
+ # This prevents reloading the model on every instantiation.
1487
+ # """
1488
+ # if cls.model is not None:
1489
+ # logger.info('Qwen2-VL is already initialized.')
1490
+ # return
1391
1491
 
1392
- # def __call__(self, img, furigana_filter_sensitivity=0):
1393
- # if not self.available:
1394
- # return (False, 'Qwen2-VL is not available.')
1395
1492
  # try:
1396
- # img, is_path = input_to_pil_image(img)
1397
-
1398
- # # img.show()
1493
+ # if not torch.cuda.is_available():
1494
+ # logger.warning("CUDA not available, Qwen2-VL will run on CPU, which will be very slow.")
1495
+ # # You might want to prevent initialization on CPU entirely
1496
+ # # raise RuntimeError("CUDA is required for efficient Qwen2-VL operation.")
1497
+
1498
+ # cls.device = "cuda" if torch.cuda.is_available() else "cpu"
1499
+
1500
+ # cls.model = AutoModelForImageTextToText.from_pretrained(
1501
+ # "Qwen/Qwen2-VL-2B-Instruct",
1502
+ # torch_dtype="auto", # Uses bfloat16/float16 if available, which is faster
1503
+ # device_map=cls.device
1504
+ # )
1505
+ # # For PyTorch 2.0+, torch.compile can significantly speed up inference after a warm-up call
1506
+ # # cls.model = torch.compile(cls.model)
1507
+
1508
+ # cls.processor = AutoProcessor.from_pretrained(
1509
+ # "Qwen/Qwen2-VL-2B-Instruct",
1510
+ # use_fast=True
1511
+ # )
1512
+
1513
+ # cls.available = True
1514
+
1399
1515
  # conversation = [
1400
1516
  # {
1401
1517
  # "role": "user",
1402
1518
  # "content": [
1403
1519
  # {"type": "image"},
1404
- # {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana (Small characters above the kanji). Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
1520
+ # {"type": "text", "text": "Extract all the text from this image, ignore all furigana."},
1405
1521
  # ],
1406
1522
  # }
1407
1523
  # ]
1408
- # text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
1524
+
1525
+ # # The same prompt is applied to all images in the batch
1526
+ # cls.text_prompt = cls.processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
1527
+ # logger.info(f'Qwen2.5-VL ready on device: {cls.device}')
1528
+ # except Exception as e:
1529
+ # logger.warning(f'Qwen2-VL not available: {e}')
1530
+ # cls.available = False
1531
+
1532
+ # def __init__(self, config={}, lang='ja'):
1533
+ # # The __init__ is now very lightweight. It just checks if initialization has happened.
1534
+ # if not self.available:
1535
+ # raise RuntimeError("QWENOCR has not been initialized. Call QWENOCR.initialize() first.")
1536
+
1537
+ # def __call__(self, images):
1538
+ # """
1539
+ # Processes a single image or a list of images.
1540
+ # :param images: A single image (path or PIL.Image) or a list of images.
1541
+ # :return: A tuple (success, list_of_results)
1542
+ # """
1543
+ # if not self.available:
1544
+ # return (False, ['Qwen2-VL is not available.'])
1545
+
1546
+ # try:
1547
+ # # Standardize input to be a list
1548
+ # if not isinstance(images, list):
1549
+ # images = [images]
1550
+
1551
+ # pil_images = [input_to_pil_image(img)[0] for img in images]
1552
+
1553
+ # # The processor handles batching of images and text prompts
1409
1554
  # inputs = self.processor(
1410
- # text=[text_prompt], images=[img], padding=True, return_tensors="pt"
1411
- # )
1412
- # inputs = inputs.to(self.device)
1413
- # output_ids = self.model.generate(**inputs, max_new_tokens=128)
1555
+ # text=[self.text_prompt] * len(pil_images),
1556
+ # images=pil_images,
1557
+ # padding=True,
1558
+ # return_tensors="pt"
1559
+ # ).to(self.device)
1560
+
1561
+ # output_ids = self.model.generate(**inputs, max_new_tokens=32)
1562
+
1563
+ # # The decoding logic needs to be slightly adjusted for batching
1564
+ # input_ids_len = [len(x) for x in inputs.input_ids]
1414
1565
  # generated_ids = [
1415
- # output_ids[len(input_ids):]
1416
- # for input_ids, output_ids in zip(inputs.input_ids, output_ids)
1566
+ # output_ids[i][input_ids_len[i]:] for i in range(len(input_ids_len))
1417
1567
  # ]
1568
+
1418
1569
  # output_text = self.processor.batch_decode(
1419
1570
  # generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
1420
1571
  # )
1421
- # return (True, output_text[0] if output_text else "")
1572
+
1573
+ # return (True, output_text)
1422
1574
  # except Exception as e:
1423
- # return (False, f'Qwen2-VL inference failed: {e}')
1424
-
1425
- # def _preprocess(self, img):
1426
- # return base64.b64encode(pil_image_to_bytes(img, png_compression=6)).decode('utf-8')
1575
+ # return (False, [f'Qwen2-VL inference failed: {e}'])
1427
1576
 
1428
1577
 
1578
+ # QWENOCR.initialize()
1429
1579
  # qwenocr = QWENOCR()
1430
1580
 
1581
+ # localOCR = localLLMOCR(config={'api_url': 'http://localhost:1234/v1/chat/completions', 'model': 'qwen2.5-vl-3b-instruct'})
1582
+
1431
1583
  # for i in range(10):
1432
1584
  # start_time = time.time()
1433
- # res, text = qwenocr(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png"), furigana_filter_sensitivity=0) # Example usage
1585
+ # res, text = localOCR(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png")) # Example usage
1434
1586
  # end_time = time.time()
1435
1587
 
1436
1588
  # print(f"Time taken: {end_time - start_time:.2f} seconds")
@@ -800,8 +800,11 @@ class ScreenshotThread(threading.Thread):
800
800
 
801
801
  def set_last_image(image):
802
802
  global last_image
803
- if image == last_image:
804
- return
803
+ try:
804
+ if image == last_image:
805
+ return
806
+ except Exception:
807
+ pass
805
808
  try:
806
809
  if last_image is not None and hasattr(last_image, "close"):
807
810
  last_image.close()
@@ -890,43 +893,44 @@ class OBSScreenshotThread(threading.Thread):
890
893
  logger.info(
891
894
  "Using default aspect ratio scaling (original resolution).")
892
895
  return width, height
896
+
897
+ def init_config(self, source=None, scene=None):
898
+ import GameSentenceMiner.obs as obs
899
+ obs.update_current_game()
900
+ self.current_source = source if source else obs.get_active_source()
901
+ logger.info(f"Current OBS source: {self.current_source}")
902
+ self.source_width = self.current_source.get(
903
+ "sceneItemTransform").get("sourceWidth") or self.width
904
+ self.source_height = self.current_source.get(
905
+ "sceneItemTransform").get("sourceHeight") or self.height
906
+ if self.source_width and self.source_height:
907
+ self.width, self.height = self.scale_down_width_height(
908
+ self.source_width, self.source_height)
909
+ logger.info(
910
+ f"Using OBS source dimensions: {self.width}x{self.height}")
911
+ self.current_source_name = self.current_source.get(
912
+ "sourceName") or None
913
+ self.current_scene = scene if scene else obs.get_current_game()
914
+ self.ocr_config = get_scene_ocr_config()
915
+ if not self.ocr_config:
916
+ logger.error("No OCR config found for the current scene.")
917
+ return
918
+ self.ocr_config.scale_to_custom_size(self.width, self.height)
893
919
 
894
920
  def run(self):
895
921
  global last_image
896
922
  from PIL import Image
897
923
  import GameSentenceMiner.obs as obs
898
924
 
899
- def init_config(source=None, scene=None):
900
- obs.update_current_game()
901
- self.current_source = source if source else obs.get_active_source()
902
- logger.info(f"Current OBS source: {self.current_source}")
903
- self.source_width = self.current_source.get(
904
- "sceneItemTransform").get("sourceWidth") or self.width
905
- self.source_height = self.current_source.get(
906
- "sceneItemTransform").get("sourceHeight") or self.height
907
- if self.source_width and self.source_height:
908
- self.width, self.height = self.scale_down_width_height(
909
- self.source_width, self.source_height)
910
- logger.info(
911
- f"Using OBS source dimensions: {self.width}x{self.height}")
912
- self.current_source_name = self.current_source.get(
913
- "sourceName") or None
914
- self.current_scene = scene if scene else obs.get_current_game()
915
- self.ocr_config = get_scene_ocr_config()
916
- if not self.ocr_config:
917
- logger.error("No OCR config found for the current scene.")
918
- return
919
- self.ocr_config.scale_to_custom_size(self.width, self.height)
920
-
921
925
  # Register a scene switch callback in obsws
922
926
  def on_scene_switch(scene):
923
927
  logger.info(f"Scene switched to: {scene}. Loading new OCR config.")
924
- init_config(scene=scene)
928
+ self.init_config(scene=scene)
925
929
 
926
930
  asyncio.run(obs.register_scene_change_callback(on_scene_switch))
927
931
 
928
932
  self.connect_obs()
929
- init_config()
933
+ self.init_config()
930
934
  start = time.time()
931
935
  while not terminated:
932
936
  if not screenshot_event.wait(timeout=0.1):
@@ -1396,6 +1400,7 @@ def run(read_from=None,
1396
1400
  global notifier
1397
1401
  global websocket_server_thread
1398
1402
  global screenshot_thread
1403
+ global obs_screenshot_thread
1399
1404
  global image_queue
1400
1405
  global ocr_1
1401
1406
  global ocr_2
@@ -1555,7 +1560,15 @@ def run(read_from=None,
1555
1560
  if any(c in changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
1556
1561
  last_result = ([], engine_index)
1557
1562
  engine_change_handler_name(get_ocr_ocr1())
1558
- config_check_thread.add_callback(handle_config_changes)
1563
+
1564
+ def handle_area_config_changes(changes):
1565
+ if screenshot_thread:
1566
+ screenshot_thread.ocr_config = get_scene_ocr_config()
1567
+ if obs_screenshot_thread:
1568
+ obs_screenshot_thread.init_config()
1569
+
1570
+ config_check_thread.add_config_callback(handle_config_changes)
1571
+ config_check_thread.add_area_callback(handle_area_config_changes)
1559
1572
 
1560
1573
  while not terminated:
1561
1574
  ocr_start_time = datetime.now()
@@ -4,9 +4,12 @@ from datetime import datetime
4
4
  from difflib import SequenceMatcher
5
5
  from typing import Optional
6
6
 
7
+ import rapidfuzz
8
+
7
9
  from GameSentenceMiner.util.gsm_utils import remove_html_and_cloze_tags
8
10
  from GameSentenceMiner.util.configuration import logger, get_config, gsm_state
9
11
  from GameSentenceMiner.util.model import AnkiCard
12
+ import re
10
13
 
11
14
  initial_time = datetime.now()
12
15
 
@@ -107,20 +110,26 @@ class GameText:
107
110
 
108
111
  game_log = GameText()
109
112
 
110
-
111
- def similar(a, b):
112
- return SequenceMatcher(None, a, b).ratio()
113
+ def strip_whitespace_and_punctuation(text: str) -> str:
114
+ """
115
+ Strips whitespace and punctuation from the given text.
116
+ """
117
+ # Remove all whitespace and specified punctuation using regex
118
+ # Includes Japanese and common punctuation
119
+ return re.sub(r'[\s 、。「」【】《》., ]', '', text).strip()
113
120
 
114
121
 
115
122
  def lines_match(texthooker_sentence, anki_sentence):
116
- texthooker_sentence = texthooker_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
117
- anki_sentence = anki_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
118
- similarity = similar(texthooker_sentence, anki_sentence)
123
+ # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
124
+ texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
125
+ anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
126
+ similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
127
+ logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
119
128
  if texthooker_sentence in anki_sentence:
120
129
  logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
121
130
  elif anki_sentence in texthooker_sentence:
122
131
  logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
123
- return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 0.8)
132
+ return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 80)
124
133
 
125
134
 
126
135
  def get_text_event(last_note) -> GameLine:
@@ -315,7 +315,7 @@ async def get_full_screenshot() -> Image.Image | None:
315
315
  else:
316
316
  monitors = [monitors[0]]
317
317
  monitor = monitors[get_config().wip.monitor_to_capture]
318
- img = get_screenshot_PIL(compression=90, img_format='jpg')
318
+ img = get_screenshot_PIL(compression=90, img_format='jpg', width=monitor['width'] // 2, height=monitor['height'] // 2)
319
319
  # Put the image over a transparent background without stretching
320
320
  new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
321
321
  # Calculate coordinates to center img horizontally and vertically
@@ -356,34 +356,49 @@ async def get_full_screenshot() -> Image.Image | None:
356
356
  return None
357
357
 
358
358
  async def do_work(sentence_to_check=None):
359
+ from math import floor
359
360
  # connect_to_obs_sync(5)
360
361
  logger.info("in find_box")
361
362
  # await asyncio.sleep(.5)
362
363
  logger.info("after_initial_sleep")
364
+ start_time = time.time()
363
365
  full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
364
366
 
365
- oneocr_results = oneocr(full_screenshot_image)
366
- crop_coords = oneocr_results[2]
367
- logger.info("Cropping full screenshot with coordinates: %s", crop_coords)
368
- cropped_image = full_screenshot_image.crop(crop_coords)
367
+ oneocr_results = oneocr(full_screenshot_image, multiple_crop_coords=True)
368
+ crop_coords_list = oneocr_results[2]
369
+ logger.info("Cropping full screenshot with coordinates: %s", crop_coords_list)
370
+ cropped_images = []
371
+ img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
372
+ for crop_coords in crop_coords_list:
373
+ cropped_image = full_screenshot_image.crop(crop_coords)
374
+ cropped_images.append(cropped_image)
375
+ # Paste the cropped image onto the transparent background
376
+ img.paste(cropped_image, (floor(crop_coords[0]), floor(crop_coords[1])))
377
+
378
+ # img.show()
379
+
369
380
  # Convert 1/4
370
381
  if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
371
- cropped_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
382
+ img.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
383
+ logger.info(f"Time taken to get cropped image for lens: {time.time() - start_time:.2f} seconds")
384
+
372
385
  # full_screenshot_image.show()
373
- if cropped_image:
386
+ if img:
387
+ start_time = time.time()
374
388
  logger.info("Full screenshot captured successfully. Now performing OCR...")
375
389
  # ocr_results = oneocr(full_screenshot_image, return_coords=True)
376
- google_ocr_results = lens(cropped_image, return_coords=True)[2]
390
+ google_ocr_results = lens(img, return_coords=True)[2]
377
391
 
378
392
  ret = extract_text_with_pixel_boxes(
379
393
  api_response=google_ocr_results,
380
394
  original_width=monitor_width,
381
395
  original_height=monitor_height,
382
- crop_x=crop_coords[0],
383
- crop_y=crop_coords[1],
384
- crop_width=crop_coords[2] - crop_coords[0],
385
- crop_height=crop_coords[3] - crop_coords[1]
396
+ crop_x=0,
397
+ crop_y=0,
398
+ crop_width=img.width,
399
+ crop_height=img.height
386
400
  )
401
+ logger.info(f"Time taken for Lens OCR: {time.time() - start_time:.2f} seconds")
387
402
 
388
403
  # boxes_of_text = google_ocr_results[2]
389
404
  # logger.info(f"Boxes of text found: {boxes_of_text}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.13.12
3
+ Version: 2.13.13
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.13.12
3
+ Version: 2.13.13
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "GameSentenceMiner"
10
- version = "2.13.12"
10
+ version = "2.13.13"
11
11
  description = "A tool for mining sentences from games. Update: Overlay?"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"