GameSentenceMiner 2.14.3__py3-none-any.whl → 2.14.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -276,8 +276,10 @@ class GoogleLens:
276
276
  available = False
277
277
 
278
278
  def __init__(self, lang='ja'):
279
+ import regex
279
280
  self.regex = get_regex(lang)
280
281
  self.initial_lang = lang
282
+ self.punctuation_regex = regex.compile(r'[\p{P}\p{S}]')
281
283
  if 'betterproto' not in sys.modules:
282
284
  logger.warning('betterproto not available, Google Lens will not work!')
283
285
  else:
@@ -375,6 +377,8 @@ class GoogleLens:
375
377
  for line in paragraph['lines']:
376
378
  if furigana_filter_sensitivity:
377
379
  for word in line['words']:
380
+ if not self.punctuation_regex.findall(word):
381
+ continue
378
382
  if 'geometry' not in word:
379
383
  res += word['plain_text'] + word['text_separator']
380
384
  continue
@@ -383,7 +387,7 @@ class GoogleLens:
383
387
  if word_width > furigana_filter_sensitivity and word_height > furigana_filter_sensitivity:
384
388
  res += word['plain_text'] + word['text_separator']
385
389
  else:
386
- skipped.extend([word['plain_text'] for word in line['words']])
390
+ skipped.extend(word['plain_text'])
387
391
  continue
388
392
  else:
389
393
  for word in line['words']:
@@ -439,6 +443,7 @@ class GoogleLens:
439
443
 
440
444
  if skipped:
441
445
  logger.info(f"Skipped {len(skipped)} chars due to furigana filter sensitivity: {furigana_filter_sensitivity}")
446
+ logger.debug(f"Skipped chars: {''.join(skipped)}")
442
447
 
443
448
  # img.close()
444
449
  return x
@@ -934,10 +939,10 @@ class OneOCR:
934
939
  if sys.platform == 'win32':
935
940
  try:
936
941
  ocr_resp = self.model.recognize_pil(img)
937
- if os.path.exists(os.path.expanduser("~/GSM/temp")):
938
- with open(os.path.join(os.path.expanduser("~/GSM/temp"), 'oneocr_response.json'), 'w',
939
- encoding='utf-8') as f:
940
- json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
942
+ # if os.path.exists(os.path.expanduser("~/GSM/temp")):
943
+ # with open(os.path.join(os.path.expanduser("~/GSM/temp"), 'oneocr_response.json'), 'w',
944
+ # encoding='utf-8') as f:
945
+ # json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
941
946
  # print(json.dumps(ocr_resp))
942
947
  filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
943
948
  x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
@@ -1401,6 +1406,11 @@ class localLLMOCR:
1401
1406
 
1402
1407
  def __init__(self, config={}, lang='ja'):
1403
1408
  self.keep_llm_hot_thread = None
1409
+ # All three config values are required: url, model, api_key
1410
+ if not config or not (config.get('url') and config.get('model') and config.get('api_key')):
1411
+ logger.warning('Local LLM OCR requires url, model, and api_key in config, Local LLM OCR will not work!')
1412
+ return
1413
+
1404
1414
  try:
1405
1415
  import openai
1406
1416
  except ImportError:
@@ -1408,16 +1418,20 @@ class localLLMOCR:
1408
1418
  return
1409
1419
  import openai, threading
1410
1420
  try:
1411
- self.api_url = config.get('api_url', 'http://localhost:1234/v1/chat/completions')
1421
+ self.api_url = config.get('url', 'http://localhost:1234/v1/chat/completions')
1412
1422
  self.model = config.get('model', 'qwen2.5-vl-3b-instruct')
1413
1423
  self.api_key = config.get('api_key', 'lm-studio')
1414
1424
  self.keep_warm = config.get('keep_warm', True)
1415
1425
  self.custom_prompt = config.get('prompt', None)
1416
1426
  self.available = True
1427
+ if any(x in self.api_url for x in ['localhost', '127.0.0.1']):
1428
+ if not self.check_connection(self.api_url):
1429
+ logger.warning('Local LLM OCR API is not reachable')
1430
+ return
1417
1431
  self.client = openai.OpenAI(
1418
- base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
1419
- api_key=self.api_key
1420
- )
1432
+ base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
1433
+ api_key=self.api_key
1434
+ )
1421
1435
  if self.client.models.retrieve(self.model):
1422
1436
  self.model = self.model
1423
1437
  logger.info(f'Local LLM OCR (OpenAI-compatible) ready with model {self.model}')
@@ -1426,6 +1440,25 @@ class localLLMOCR:
1426
1440
  self.keep_llm_hot_thread.start()
1427
1441
  except Exception as e:
1428
1442
  logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
1443
+
1444
+ def check_connection(self, url, port=None):
1445
+ # simple connectivity check with mega low timeout
1446
+ import http.client
1447
+ conn = http.client.HTTPConnection(url, port or 1234, timeout=0.1)
1448
+ try:
1449
+ conn.request("GET", "/v1/models")
1450
+ response = conn.getresponse()
1451
+ if response.status == 200:
1452
+ logger.info('Local LLM OCR API is reachable')
1453
+ return True
1454
+ else:
1455
+ logger.warning('Local LLM OCR API is not reachable')
1456
+ return False
1457
+ except Exception as e:
1458
+ logger.warning(f'Error connecting to Local LLM OCR API: {e}')
1459
+ return False
1460
+ finally:
1461
+ conn.close()
1429
1462
 
1430
1463
  def keep_llm_warm(self):
1431
1464
  def ocr_blank_black_image():
@@ -41,7 +41,7 @@ import websockets
41
41
  import socketserver
42
42
  import queue
43
43
 
44
- from datetime import datetime
44
+ from datetime import datetime, timedelta
45
45
  from PIL import Image, ImageDraw, UnidentifiedImageError
46
46
  from loguru import logger
47
47
  from desktop_notifier import DesktopNotifierSync
@@ -798,24 +798,47 @@ class ScreenshotThread(threading.Thread):
798
798
  self.windows_window_tracker_instance.join()
799
799
 
800
800
 
801
+ import cv2
802
+ import numpy as np
803
+
804
+ def apply_adaptive_threshold_filter(img):
805
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
806
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
807
+ inverted = cv2.bitwise_not(gray)
808
+ blur = cv2.GaussianBlur(inverted, (3, 3), 0)
809
+ thresh = cv2.adaptiveThreshold(
810
+ blur, 255,
811
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
812
+ cv2.THRESH_BINARY,
813
+ 11, 2
814
+ )
815
+ result = cv2.bitwise_not(thresh)
816
+
817
+ return Image.fromarray(result)
818
+
819
+
801
820
  def set_last_image(image):
802
821
  global last_image
822
+ if image is None:
823
+ last_image = None
803
824
  try:
804
825
  if image == last_image:
805
826
  return
806
827
  except Exception:
807
- pass
828
+ last_image = None
829
+ return
808
830
  try:
809
831
  if last_image is not None and hasattr(last_image, "close"):
810
832
  last_image.close()
811
833
  except Exception:
812
834
  pass
813
- last_image = image
835
+ # last_image = image
836
+ last_image = apply_adaptive_threshold_filter(image)
814
837
 
815
838
 
816
839
  def are_images_identical(img1, img2):
817
840
  if None in (img1, img2):
818
- return img1 == img2
841
+ return False
819
842
 
820
843
  try:
821
844
  img1 = np.array(img1)
@@ -829,6 +852,128 @@ def are_images_identical(img1, img2):
829
852
  return (img1.shape == img2.shape) and np.array_equal(img1, img2)
830
853
 
831
854
 
855
+ import cv2
856
+ import numpy as np
857
+ from skimage.metrics import structural_similarity as ssim
858
+ from typing import Union
859
+
860
+ ImageType = Union[np.ndarray, Image.Image]
861
+
862
+ def _prepare_image(image: ImageType) -> np.ndarray:
863
+ """
864
+ Standardizes an image (PIL or NumPy) into an OpenCV-compatible NumPy array (BGR).
865
+ """
866
+ # If the image is a PIL Image, convert it to a NumPy array
867
+ if isinstance(image, Image.Image):
868
+ # Convert PIL Image (which is RGB) to a NumPy array, then convert RGB to BGR for OpenCV
869
+ prepared_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
870
+ # If it's already a NumPy array, assume it's in a compatible format (like BGR)
871
+ elif isinstance(image, np.ndarray):
872
+ prepared_image = image
873
+ else:
874
+ raise TypeError(f"Unsupported image type: {type(image)}. Must be a PIL Image or NumPy array.")
875
+
876
+ return prepared_image
877
+
878
+ i = 1
879
+
880
+ def calculate_ssim_score(imageA: ImageType, imageB: ImageType) -> float:
881
+ global i
882
+ """
883
+ Calculates the structural similarity index (SSIM) between two images.
884
+
885
+ Args:
886
+ imageA: The first image as a NumPy array.
887
+ imageB: The second image as a NumPy array.
888
+
889
+ Returns:
890
+ The SSIM score between the two images (between -1 and 1).
891
+ """
892
+
893
+ if isinstance(imageA, Image.Image):
894
+ imageA = apply_adaptive_threshold_filter(imageA)
895
+
896
+ # Save Images to temp for debugging on a random 1/20 chance
897
+ # if np.random.rand() < 0.05:
898
+ # if i < 600:
899
+ # # Save as image_000
900
+ # imageA.save(os.path.join(get_temporary_directory(), f'frame_{i:03d}.png'), 'PNG')
901
+ # i += 1
902
+ # imageB.save(os.path.join(get_temporary_directory(), f'ssim_imageB_{i:03d}.png'), 'PNG')
903
+
904
+ imageA = _prepare_image(imageA)
905
+ imageB = _prepare_image(imageB)
906
+
907
+ # Images must have the same dimensions
908
+ if imageA.shape != imageB.shape:
909
+ raise ValueError("Input images must have the same dimensions.")
910
+
911
+ # Convert images to grayscale for a more robust SSIM comparison
912
+ # This is less sensitive to minor color changes and lighting.
913
+ # grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
914
+ # grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
915
+
916
+ # Calculate the SSIM. The `score` is the main value.
917
+ # The `win_size` parameter must be an odd number and less than the image dimensions.
918
+ # We choose a value that is likely to be safe for a variety of image sizes.
919
+ win_size = min(3, imageA.shape[0] // 2, imageA.shape[1] // 2)
920
+ if win_size % 2 == 0:
921
+ win_size -= 1 # ensure it's odd
922
+
923
+ score, _ = ssim(imageA, imageB, full=True, win_size=win_size)
924
+
925
+ return score
926
+
927
+
928
+
929
+ def are_images_similar(imageA: Image.Image, imageB: Image.Image, threshold: float = 0.98) -> bool:
930
+ """
931
+ Compares two images and returns True if their similarity score is above a threshold.
932
+
933
+ Args:
934
+ imageA: The first image as a NumPy array.
935
+ imageB: The second image as a NumPy array.
936
+ threshold: The minimum SSIM score to be considered "similar".
937
+ Defaults to 0.98 (very high similarity). Your original `90` would
938
+ be equivalent to a threshold of `0.90` here.
939
+
940
+ Returns:
941
+ True if the images are similar, False otherwise.
942
+ """
943
+ if None in (imageA, imageB):
944
+ logger.info("One of the images is None, cannot compare.")
945
+ return False
946
+ try:
947
+ score = calculate_ssim_score(imageA, imageB)
948
+ except Exception as e:
949
+ logger.info(e)
950
+ return False
951
+ return score > threshold
952
+
953
+
954
+ def quick_text_detection(pil_image, threshold_ratio=0.01):
955
+ """
956
+ Quick check if image likely contains text using edge detection.
957
+
958
+ Args:
959
+ pil_image (PIL.Image): Input image
960
+ threshold_ratio (float): Minimum ratio of edge pixels to consider text present
961
+
962
+ Returns:
963
+ bool: True if text is likely present
964
+ """
965
+ # Convert to grayscale
966
+ gray = np.array(pil_image.convert('L'))
967
+
968
+ # Apply Canny edge detection
969
+ edges = cv2.Canny(gray, 50, 150)
970
+
971
+ # Calculate ratio of edge pixels
972
+ edge_ratio = np.sum(edges > 0) / edges.size
973
+
974
+ return edge_ratio > threshold_ratio
975
+
976
+
832
977
  # Use OBS for Screenshot Source (i.e. Linux)
833
978
  class OBSScreenshotThread(threading.Thread):
834
979
  def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
@@ -848,6 +993,7 @@ class OBSScreenshotThread(threading.Thread):
848
993
  periodic_screenshot_queue.put(result)
849
994
  else:
850
995
  image_queue.put((result, True))
996
+ screenshot_event.clear()
851
997
 
852
998
  def connect_obs(self):
853
999
  import GameSentenceMiner.obs as obs
@@ -911,7 +1057,7 @@ class OBSScreenshotThread(threading.Thread):
911
1057
  self.current_source_name = self.current_source.get(
912
1058
  "sourceName") or None
913
1059
  self.current_scene = scene if scene else obs.get_current_game()
914
- self.ocr_config = get_scene_ocr_config()
1060
+ self.ocr_config = get_scene_ocr_config(refresh=True)
915
1061
  if not self.ocr_config:
916
1062
  logger.error("No OCR config found for the current scene.")
917
1063
  return
@@ -931,7 +1077,6 @@ class OBSScreenshotThread(threading.Thread):
931
1077
 
932
1078
  self.connect_obs()
933
1079
  self.init_config()
934
- start = time.time()
935
1080
  while not terminated:
936
1081
  if not screenshot_event.wait(timeout=0.1):
937
1082
  continue
@@ -952,33 +1097,21 @@ class OBSScreenshotThread(threading.Thread):
952
1097
  if not self.current_source_name:
953
1098
  logger.error(
954
1099
  "No active source found in the current scene.")
955
- time.sleep(1)
1100
+ self.write_result(1)
956
1101
  continue
957
- # start_time = time.time()
958
1102
  img = obs.get_screenshot_PIL(source_name=self.current_source_name,
959
1103
  width=self.width, height=self.height, img_format='jpg', compression=80)
960
- # logger.info(f"OBS screenshot taken in {time.time() - start_time:.2f} seconds.")
961
1104
 
962
1105
  img = apply_ocr_config_to_image(img, self.ocr_config)
963
1106
 
964
1107
  if img is not None:
965
- if not img.getbbox():
966
- logger.info("OBS Not Capturing anything, sleeping.")
967
- time.sleep(1)
968
- continue
969
-
970
- if last_image and are_images_identical(img, last_image):
971
- logger.debug(
972
- "Captured screenshot is identical to the last one, sleeping.")
973
- time.sleep(max(.5, get_ocr_scan_rate()))
974
- else:
975
- self.write_result(img)
976
- screenshot_event.clear()
1108
+ self.write_result(img)
977
1109
  else:
978
1110
  logger.error("Failed to get screenshot data from OBS.")
979
1111
 
980
1112
  except Exception as e:
981
- logger.error(
1113
+ print(e)
1114
+ logger.info(
982
1115
  f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
983
1116
  continue
984
1117
 
@@ -1569,7 +1702,9 @@ def run(read_from=None,
1569
1702
 
1570
1703
  config_check_thread.add_config_callback(handle_config_changes)
1571
1704
  config_check_thread.add_area_callback(handle_area_config_changes)
1572
-
1705
+ previous_text = "Placeholder"
1706
+ sleep_time_to_add = 0
1707
+ last_result_time = time.time()
1573
1708
  while not terminated:
1574
1709
  ocr_start_time = datetime.now()
1575
1710
  start_time = time.time()
@@ -1582,15 +1717,22 @@ def run(read_from=None,
1582
1717
  notify = True
1583
1718
  except queue.Empty:
1584
1719
  pass
1585
-
1720
+
1721
+ if get_ocr_scan_rate() < .5:
1722
+ adjusted_scan_rate = min(get_ocr_scan_rate() + sleep_time_to_add, .5)
1723
+ else:
1724
+ adjusted_scan_rate = get_ocr_scan_rate()
1725
+
1586
1726
  if (not img) and process_screenshots:
1587
- if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > get_ocr_scan_rate():
1727
+ if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > adjusted_scan_rate:
1588
1728
  screenshot_event.set()
1589
1729
  img = periodic_screenshot_queue.get()
1590
1730
  filter_img = True
1591
1731
  notify = False
1592
1732
  last_screenshot_time = time.time()
1593
1733
  ocr_start_time = datetime.now()
1734
+ if adjusted_scan_rate > get_ocr_scan_rate():
1735
+ ocr_start_time = ocr_start_time - timedelta(seconds=adjusted_scan_rate - get_ocr_scan_rate())
1594
1736
 
1595
1737
  if img == 0:
1596
1738
  on_window_closed(False)
@@ -1598,10 +1740,51 @@ def run(read_from=None,
1598
1740
  break
1599
1741
  elif img:
1600
1742
  if filter_img:
1601
- res, _ = process_and_write_results(img, write_to, last_result, filtering, notify,
1743
+ ocr_config = get_scene_ocr_config()
1744
+ # Check if the image is completely empty (all white or all black)
1745
+ try:
1746
+ extrema = img.getextrema()
1747
+ # For RGB or RGBA images, extrema is a tuple of (min, max) for each channel
1748
+ if isinstance(extrema[0], tuple):
1749
+ is_empty = all(e[0] == e[1] for e in extrema)
1750
+ else:
1751
+ is_empty = extrema[0] == extrema[1]
1752
+ if is_empty:
1753
+ logger.info("Image is totally empty (all pixels the same), sleeping.")
1754
+ sleep_time_to_add = .5
1755
+ continue
1756
+ except Exception as e:
1757
+ logger.debug(f"Could not determine if image is empty: {e}")
1758
+
1759
+ # Compare images, but only if it's one box, multiple boxes skews results way too much and produces false positives
1760
+ if ocr_config and len(ocr_config.rectangles) < 2:
1761
+ if are_images_similar(img, last_image):
1762
+ logger.info("Captured screenshot is similar to the last one, sleeping.")
1763
+ if time.time() - last_result_time > 10:
1764
+ sleep_time_to_add += .005
1765
+ continue
1766
+ else:
1767
+ if are_images_identical(img, last_image):
1768
+ logger.info("Captured screenshot is identical to the last one, sleeping.")
1769
+ if time.time() - last_result_time > 10:
1770
+ sleep_time_to_add += .005
1771
+ continue
1772
+
1773
+ res, text = process_and_write_results(img, write_to, last_result, filtering, notify,
1602
1774
  ocr_start_time=ocr_start_time, furigana_filter_sensitivity=get_ocr_furigana_filter_sensitivity())
1775
+ if not text and not previous_text and time.time() - last_result_time > 10:
1776
+ sleep_time_to_add += .005
1777
+ logger.info(f"No text detected again, sleeping.")
1778
+ else:
1779
+ sleep_time_to_add = 0
1780
+
1781
+ # If image was stabilized, and now there is no text, reset sleep time
1782
+ if not previous_text and not res:
1783
+ sleep_time_to_add = 0
1784
+ previous_text = text
1603
1785
  if res:
1604
1786
  last_result = (res, engine_index)
1787
+ last_result_time = time.time()
1605
1788
  else:
1606
1789
  process_and_write_results(
1607
1790
  img, write_to, None, notify=notify, ocr_start_time=ocr_start_time, engine=ocr2)
@@ -488,6 +488,10 @@ class Screenshot:
488
488
  self.screenshot_timing_setting = 'middle'
489
489
  if not self.screenshot_timing_setting and not self.use_beginning_of_line_as_screenshot and not self.use_new_screenshot_logic:
490
490
  self.screenshot_timing_setting = 'end'
491
+ if self.width and self.height == 0:
492
+ self.height = -1
493
+ if self.width == 0 and self.height:
494
+ self.width = -1
491
495
 
492
496
 
493
497
  @dataclass_json
@@ -508,6 +512,8 @@ class Audio:
508
512
  def __post_init__(self):
509
513
  self.ffmpeg_reencode_options_to_use = self.ffmpeg_reencode_options.replace(
510
514
  "{format}", self.extension).replace("{encoder}", supported_formats.get(self.extension, ''))
515
+ if not self.anki_media_collection:
516
+ self.anki_media_collection = get_default_anki_media_collection_path()
511
517
  if self.anki_media_collection:
512
518
  self.anki_media_collection = os.path.normpath(
513
519
  self.anki_media_collection)
@@ -58,7 +58,7 @@ class OCRConfig:
58
58
  useWindowForConfig: bool = False
59
59
  lastWindowSelected: str = ""
60
60
  keep_newline: bool = False
61
- useObsAsSource: bool = False
61
+ useObsAsOCRSource: bool = True
62
62
 
63
63
  def has_changed(self, other: 'OCRConfig') -> bool:
64
64
  return self.to_dict() != other.to_dict()
@@ -229,7 +229,7 @@ def get_ocr_keep_newline():
229
229
  return electron_store.data.OCR.keep_newline
230
230
 
231
231
  def get_ocr_use_obs_as_source():
232
- return electron_store.data.OCR.useObsAsSource
232
+ return electron_store.data.OCR.useObsAsOCRSource
233
233
 
234
234
  def get_furigana_filter_sensitivity() -> int:
235
235
  return electron_store.data.OCR.furigana_filter_sensitivity