PyPI - GameSentenceMiner - Versions diffs - 2.14.3__py3-none-any.whl → 2.14.5__py3-none-any.whl - Mend

GameSentenceMiner 2.14.3py3-none-any.whl → 2.14.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

GameSentenceMiner/owocr/owocr/ocr.py CHANGED Viewed

@@ -276,8 +276,10 @@ class GoogleLens:
     available = False
     def __init__(self, lang='ja'):
+        import regex
         self.regex = get_regex(lang)
         self.initial_lang = lang
+        self.punctuation_regex = regex.compile(r'[\p{P}\p{S}]')
         if 'betterproto' not in sys.modules:
             logger.warning('betterproto not available, Google Lens will not work!')
         else:
@@ -375,6 +377,8 @@ class GoogleLens:
                 for line in paragraph['lines']:
                     if furigana_filter_sensitivity:
                         for word in line['words']:
+                            if not self.punctuation_regex.findall(word):
+                                continue
                             if 'geometry' not in word:
                                 res += word['plain_text'] + word['text_separator']
                                 continue
@@ -383,7 +387,7 @@ class GoogleLens:
                             if word_width > furigana_filter_sensitivity and word_height > furigana_filter_sensitivity:
                                 res += word['plain_text'] + word['text_separator']
                             else:
-                                skipped.extend([word['plain_text'] for word in line['words']])
+                                skipped.extend(word['plain_text'])
                                 continue
                     else:
                         for word in line['words']:
@@ -439,6 +443,7 @@ class GoogleLens:
         if skipped:
             logger.info(f"Skipped {len(skipped)} chars due to furigana filter sensitivity: {furigana_filter_sensitivity}")
+            logger.debug(f"Skipped chars: {''.join(skipped)}")
         # img.close()
         return x
@@ -934,10 +939,10 @@ class OneOCR:
         if sys.platform == 'win32':
             try:
                 ocr_resp = self.model.recognize_pil(img)
-                if os.path.exists(os.path.expanduser("~/GSM/temp")):
-                    with open(os.path.join(os.path.expanduser("~/GSM/temp"), 'oneocr_response.json'), 'w',
-                                encoding='utf-8') as f:
-                        json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
+                # if os.path.exists(os.path.expanduser("~/GSM/temp")):
+                #     with open(os.path.join(os.path.expanduser("~/GSM/temp"), 'oneocr_response.json'), 'w',
+                #                 encoding='utf-8') as f:
+                #         json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
                 # print(json.dumps(ocr_resp))
                 filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
                 x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
@@ -1401,6 +1406,11 @@ class localLLMOCR:
     def __init__(self, config={}, lang='ja'):
         self.keep_llm_hot_thread = None
+        # All three config values are required: url, model, api_key
+        if not config or not (config.get('url') and config.get('model') and config.get('api_key')):
+            logger.warning('Local LLM OCR requires url, model, and api_key in config, Local LLM OCR will not work!')
+            return
         try:
             import openai
         except ImportError:
@@ -1408,16 +1418,20 @@ class localLLMOCR:
             return
         import openai, threading
         try:
-            self.api_url = config.get('api_url', 'http://localhost:1234/v1/chat/completions')
+            self.api_url = config.get('url', 'http://localhost:1234/v1/chat/completions')
             self.model = config.get('model', 'qwen2.5-vl-3b-instruct')
             self.api_key = config.get('api_key', 'lm-studio')
             self.keep_warm = config.get('keep_warm', True)
             self.custom_prompt = config.get('prompt', None)
             self.available = True
+            if any(x in self.api_url for x in ['localhost', '127.0.0.1']):
+                if not self.check_connection(self.api_url):
+                    logger.warning('Local LLM OCR API is not reachable')
+                    return
             self.client = openai.OpenAI(
-                    base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
-                    api_key=self.api_key
-                )
+                base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
+                api_key=self.api_key
+            )
             if self.client.models.retrieve(self.model):
                 self.model = self.model
             logger.info(f'Local LLM OCR (OpenAI-compatible) ready with model {self.model}')
@@ -1426,6 +1440,25 @@ class localLLMOCR:
                 self.keep_llm_hot_thread.start()
         except Exception as e:
             logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
+    def check_connection(self, url, port=None):
+        # simple connectivity check with mega low timeout
+        import http.client
+        conn = http.client.HTTPConnection(url, port or 1234, timeout=0.1)
+        try:
+            conn.request("GET", "/v1/models")
+            response = conn.getresponse()
+            if response.status == 200:
+                logger.info('Local LLM OCR API is reachable')
+                return True
+            else:
+                logger.warning('Local LLM OCR API is not reachable')
+                return False
+        except Exception as e:
+            logger.warning(f'Error connecting to Local LLM OCR API: {e}')
+            return False
+        finally:
+            conn.close()
     def keep_llm_warm(self):
         def ocr_blank_black_image():

GameSentenceMiner/owocr/owocr/run.py CHANGED Viewed

@@ -41,7 +41,7 @@ import websockets
 import socketserver
 import queue
-from datetime import datetime
+from datetime import datetime, timedelta
 from PIL import Image, ImageDraw, UnidentifiedImageError
 from loguru import logger
 from desktop_notifier import DesktopNotifierSync
@@ -798,24 +798,47 @@ class ScreenshotThread(threading.Thread):
             self.windows_window_tracker_instance.join()
+import cv2
+import numpy as np
+def apply_adaptive_threshold_filter(img):
+    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    inverted = cv2.bitwise_not(gray)
+    blur = cv2.GaussianBlur(inverted, (3, 3), 0)
+    thresh = cv2.adaptiveThreshold(
+        blur, 255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        11, 2
+    )
+    result = cv2.bitwise_not(thresh)
+    return Image.fromarray(result)
 def set_last_image(image):
     global last_image
+    if image is None:
+        last_image = None
     try:
         if image == last_image:
             return
     except Exception:
-        pass
+        last_image = None
+        return
     try:
         if last_image is not None and hasattr(last_image, "close"):
             last_image.close()
     except Exception:
         pass
-    last_image = image
+    # last_image = image
+    last_image = apply_adaptive_threshold_filter(image)
 def are_images_identical(img1, img2):
     if None in (img1, img2):
-        return img1 == img2
+        return False
     try:
         img1 = np.array(img1)
@@ -829,6 +852,128 @@ def are_images_identical(img1, img2):
     return (img1.shape == img2.shape) and np.array_equal(img1, img2)
+import cv2
+import numpy as np
+from skimage.metrics import structural_similarity as ssim
+from typing import Union
+ImageType = Union[np.ndarray, Image.Image]
+def _prepare_image(image: ImageType) -> np.ndarray:
+    """
+    Standardizes an image (PIL or NumPy) into an OpenCV-compatible NumPy array (BGR).
+    """
+    # If the image is a PIL Image, convert it to a NumPy array
+    if isinstance(image, Image.Image):
+        # Convert PIL Image (which is RGB) to a NumPy array, then convert RGB to BGR for OpenCV
+        prepared_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    # If it's already a NumPy array, assume it's in a compatible format (like BGR)
+    elif isinstance(image, np.ndarray):
+        prepared_image = image
+    else:
+        raise TypeError(f"Unsupported image type: {type(image)}. Must be a PIL Image or NumPy array.")
+    return prepared_image
+i = 1
+def calculate_ssim_score(imageA: ImageType, imageB: ImageType) -> float:
+    global i
+    """
+    Calculates the structural similarity index (SSIM) between two images.
+    Args:
+        imageA: The first image as a NumPy array.
+        imageB: The second image as a NumPy array.
+    Returns:
+        The SSIM score between the two images (between -1 and 1).
+    """
+    if isinstance(imageA, Image.Image):
+        imageA = apply_adaptive_threshold_filter(imageA)
+    # Save Images to temp for debugging on a random 1/20 chance
+    # if np.random.rand() < 0.05:
+    # if i < 600:
+    #     # Save as image_000
+    #     imageA.save(os.path.join(get_temporary_directory(), f'frame_{i:03d}.png'), 'PNG')
+    #     i += 1
+        # imageB.save(os.path.join(get_temporary_directory(), f'ssim_imageB_{i:03d}.png'), 'PNG')
+    imageA = _prepare_image(imageA)
+    imageB = _prepare_image(imageB)
+    # Images must have the same dimensions
+    if imageA.shape != imageB.shape:
+        raise ValueError("Input images must have the same dimensions.")
+    # Convert images to grayscale for a more robust SSIM comparison
+    # This is less sensitive to minor color changes and lighting.
+    # grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
+    # grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
+    # Calculate the SSIM. The `score` is the main value.
+    # The `win_size` parameter must be an odd number and less than the image dimensions.
+    # We choose a value that is likely to be safe for a variety of image sizes.
+    win_size = min(3, imageA.shape[0] // 2, imageA.shape[1] // 2)
+    if win_size % 2 == 0:
+        win_size -= 1 # ensure it's odd
+    score, _ = ssim(imageA, imageB, full=True, win_size=win_size)
+    return score
+def are_images_similar(imageA: Image.Image, imageB: Image.Image, threshold: float = 0.98) -> bool:
+    """
+    Compares two images and returns True if their similarity score is above a threshold.
+    Args:
+        imageA: The first image as a NumPy array.
+        imageB: The second image as a NumPy array.
+        threshold: The minimum SSIM score to be considered "similar".
+                   Defaults to 0.98 (very high similarity). Your original `90` would
+                   be equivalent to a threshold of `0.90` here.
+    Returns:
+        True if the images are similar, False otherwise.
+    """
+    if None in (imageA, imageB):
+        logger.info("One of the images is None, cannot compare.")
+        return False
+    try:
+        score = calculate_ssim_score(imageA, imageB)
+    except Exception as e:
+        logger.info(e)
+        return False
+    return score > threshold
+def quick_text_detection(pil_image, threshold_ratio=0.01):
+    """
+    Quick check if image likely contains text using edge detection.
+    Args:
+        pil_image (PIL.Image): Input image
+        threshold_ratio (float): Minimum ratio of edge pixels to consider text present
+    Returns:
+        bool: True if text is likely present
+    """
+    # Convert to grayscale
+    gray = np.array(pil_image.convert('L'))
+    # Apply Canny edge detection
+    edges = cv2.Canny(gray, 50, 150)
+    # Calculate ratio of edge pixels
+    edge_ratio = np.sum(edges > 0) / edges.size
+    return edge_ratio > threshold_ratio
 # Use OBS for Screenshot Source (i.e. Linux)
 class OBSScreenshotThread(threading.Thread):
     def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
@@ -848,6 +993,7 @@ class OBSScreenshotThread(threading.Thread):
             periodic_screenshot_queue.put(result)
         else:
             image_queue.put((result, True))
+        screenshot_event.clear()
     def connect_obs(self):
         import GameSentenceMiner.obs as obs
@@ -911,7 +1057,7 @@ class OBSScreenshotThread(threading.Thread):
         self.current_source_name = self.current_source.get(
             "sourceName") or None
         self.current_scene = scene if scene else obs.get_current_game()
-        self.ocr_config = get_scene_ocr_config()
+        self.ocr_config = get_scene_ocr_config(refresh=True)
         if not self.ocr_config:
             logger.error("No OCR config found for the current scene.")
             return
@@ -931,7 +1077,6 @@ class OBSScreenshotThread(threading.Thread):
         self.connect_obs()
         self.init_config()
-        start = time.time()
         while not terminated:
             if not screenshot_event.wait(timeout=0.1):
                 continue
@@ -952,33 +1097,21 @@ class OBSScreenshotThread(threading.Thread):
                 if not self.current_source_name:
                     logger.error(
                         "No active source found in the current scene.")
-                    time.sleep(1)
+                    self.write_result(1)
                     continue
-                # start_time = time.time()
                 img = obs.get_screenshot_PIL(source_name=self.current_source_name,
                                              width=self.width, height=self.height, img_format='jpg', compression=80)
-                # logger.info(f"OBS screenshot taken in {time.time() - start_time:.2f} seconds.")
                 img = apply_ocr_config_to_image(img, self.ocr_config)
                 if img is not None:
-                    if not img.getbbox():
-                        logger.info("OBS Not Capturing anything, sleeping.")
-                        time.sleep(1)
-                        continue
-                    if last_image and are_images_identical(img, last_image):
-                        logger.debug(
-                            "Captured screenshot is identical to the last one, sleeping.")
-                        time.sleep(max(.5, get_ocr_scan_rate()))
-                    else:
-                        self.write_result(img)
-                        screenshot_event.clear()
+                    self.write_result(img)
                 else:
                     logger.error("Failed to get screenshot data from OBS.")
             except Exception as e:
-                logger.error(
+                print(e)
+                logger.info(
                     f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
                 continue
@@ -1569,7 +1702,9 @@ def run(read_from=None,
     config_check_thread.add_config_callback(handle_config_changes)
     config_check_thread.add_area_callback(handle_area_config_changes)
+    previous_text = "Placeholder"
+    sleep_time_to_add = 0
+    last_result_time = time.time()
     while not terminated:
         ocr_start_time = datetime.now()
         start_time = time.time()
@@ -1582,15 +1717,22 @@ def run(read_from=None,
                 notify = True
             except queue.Empty:
                 pass
+        if get_ocr_scan_rate() < .5:
+            adjusted_scan_rate = min(get_ocr_scan_rate() + sleep_time_to_add, .5)
+        else:
+            adjusted_scan_rate = get_ocr_scan_rate()
         if (not img) and process_screenshots:
-            if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > get_ocr_scan_rate():
+            if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > adjusted_scan_rate:
                 screenshot_event.set()
                 img = periodic_screenshot_queue.get()
                 filter_img = True
                 notify = False
                 last_screenshot_time = time.time()
                 ocr_start_time = datetime.now()
+                if adjusted_scan_rate > get_ocr_scan_rate():
+                    ocr_start_time = ocr_start_time - timedelta(seconds=adjusted_scan_rate - get_ocr_scan_rate())
         if img == 0:
             on_window_closed(False)
@@ -1598,10 +1740,51 @@ def run(read_from=None,
             break
         elif img:
             if filter_img:
-                res, _ = process_and_write_results(img, write_to, last_result, filtering, notify,
+                ocr_config = get_scene_ocr_config()
+                # Check if the image is completely empty (all white or all black)
+                try:
+                    extrema = img.getextrema()
+                    # For RGB or RGBA images, extrema is a tuple of (min, max) for each channel
+                    if isinstance(extrema[0], tuple):
+                        is_empty = all(e[0] == e[1] for e in extrema)
+                    else:
+                        is_empty = extrema[0] == extrema[1]
+                    if is_empty:
+                        logger.info("Image is totally empty (all pixels the same), sleeping.")
+                        sleep_time_to_add = .5
+                        continue
+                except Exception as e:
+                    logger.debug(f"Could not determine if image is empty: {e}")
+                # Compare images, but only if it's one box, multiple boxes skews results way too much and produces false positives
+                if ocr_config and len(ocr_config.rectangles) < 2:
+                    if are_images_similar(img, last_image):
+                        logger.info("Captured screenshot is similar to the last one, sleeping.")
+                        if time.time() - last_result_time > 10:
+                            sleep_time_to_add += .005
+                        continue
+                else:
+                    if are_images_identical(img, last_image):
+                        logger.info("Captured screenshot is identical to the last one, sleeping.")
+                        if time.time() - last_result_time > 10:
+                            sleep_time_to_add += .005
+                        continue
+                res, text = process_and_write_results(img, write_to, last_result, filtering, notify,
                                                    ocr_start_time=ocr_start_time, furigana_filter_sensitivity=get_ocr_furigana_filter_sensitivity())
+                if not text and not previous_text and time.time() - last_result_time > 10:
+                    sleep_time_to_add += .005
+                    logger.info(f"No text detected again, sleeping.")
+                else:
+                    sleep_time_to_add = 0
+                # If image was stabilized, and now there is no text, reset sleep time
+                if not previous_text and not res:
+                    sleep_time_to_add = 0
+                previous_text = text
                 if res:
                     last_result = (res, engine_index)
+                    last_result_time = time.time()
             else:
                 process_and_write_results(
                     img, write_to, None, notify=notify, ocr_start_time=ocr_start_time, engine=ocr2)

GameSentenceMiner/util/configuration.py CHANGED Viewed

@@ -488,6 +488,10 @@ class Screenshot:
             self.screenshot_timing_setting = 'middle'
         if not self.screenshot_timing_setting and not self.use_beginning_of_line_as_screenshot and not self.use_new_screenshot_logic:
             self.screenshot_timing_setting = 'end'
+        if self.width and self.height == 0:
+            self.height = -1
+        if self.width == 0 and self.height:
+            self.width = -1
 @dataclass_json
@@ -508,6 +512,8 @@ class Audio:
     def __post_init__(self):
         self.ffmpeg_reencode_options_to_use = self.ffmpeg_reencode_options.replace(
             "{format}", self.extension).replace("{encoder}", supported_formats.get(self.extension, ''))
+        if not self.anki_media_collection:
+            self.anki_media_collection = get_default_anki_media_collection_path()
         if self.anki_media_collection:
             self.anki_media_collection = os.path.normpath(
                 self.anki_media_collection)

GameSentenceMiner/util/electron_config.py CHANGED Viewed

@@ -58,7 +58,7 @@ class OCRConfig:
     useWindowForConfig: bool = False
     lastWindowSelected: str = ""
     keep_newline: bool = False
-    useObsAsSource: bool = False
+    useObsAsOCRSource: bool = True
     def has_changed(self, other: 'OCRConfig') -> bool:
         return self.to_dict() != other.to_dict()
@@ -229,7 +229,7 @@ def get_ocr_keep_newline():
     return electron_store.data.OCR.keep_newline
 def get_ocr_use_obs_as_source():
-    return electron_store.data.OCR.useObsAsSource
+    return electron_store.data.OCR.useObsAsOCRSource
 def get_furigana_filter_sensitivity() -> int:
     return electron_store.data.OCR.furigana_filter_sensitivity

GameSentenceMiner 2.14.3__py3-none-any.whl → 2.14.5__py3-none-any.whl

GameSentenceMiner 2.14.3py3-none-any.whl → 2.14.5py3-none-any.whl