PyPI - GameSentenceMiner - Versions diffs - 2.13.12__tar.gz → 2.13.13__tar.gz - Mend

GameSentenceMiner 2.13.12tar.gz → 2.13.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/config_gui.py RENAMED Viewed

@@ -1873,7 +1873,7 @@ class ConfigApp:
         gemini_model_i18n = ai_i18n.get('gemini_model', {})
         HoverInfoLabelWidget(ai_frame, text=gemini_model_i18n.get('label', '...'), tooltip=gemini_model_i18n.get('tooltip', '...'),
                              row=self.current_row, column=0)
-        ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemma-3n-e4b-it', 'gemini-2.5-flash-lite', 'gemini-2.5-flash','gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
+        ttk.Combobox(ai_frame, textvariable=self.gemini_model_value, values=['gemini-2.5-flash-lite', 'gemini-2.5-flash', 'gemma-3-27b-it', 'gemini-2.0-flash', 'gemini-2.0-flash-lite'], state="readonly").grid(row=self.current_row, column=1, sticky='EW', pady=2)
         self.current_row += 1
         gemini_key_i18n = ai_i18n.get('gemini_api_key', {})

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/gsm_ocr_config.py RENAMED Viewed

@@ -9,6 +9,7 @@ from dataclasses_json import dataclass_json
 from typing import List, Optional, Union
 from GameSentenceMiner.util.configuration import logger, get_app_directory
+from GameSentenceMiner.util.electron_config import get_ocr_use_window_for_config
 from GameSentenceMiner.util.gsm_utils import sanitize_filename
@@ -92,6 +93,13 @@ class OCRConfig:
                     floor(rectangle.coordinates[2] * width),
                     floor(rectangle.coordinates[3] * height),
                 ]
+def has_config_changed(current_config: OCRConfig) -> bool:
+    new_config = get_scene_ocr_config(use_window_as_config=get_ocr_use_window_for_config(), window=current_config.window)
+    if new_config.rectangles != current_config.rectangles:
+        logger.info("OCR config has changed.")
+        return True
+    return False
 def get_window(title):

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/ocr/owocr_helper.py RENAMED Viewed

@@ -22,7 +22,7 @@ from GameSentenceMiner.util.electron_config import *
 from GameSentenceMiner.ocr.ss_picker import ScreenCropper
 from GameSentenceMiner.owocr.owocr.run import TextFiltering
 from GameSentenceMiner.util.configuration import get_config, get_app_directory, get_temporary_directory
-from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, set_dpi_awareness, get_window, get_ocr_config_path
+from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, has_config_changed, set_dpi_awareness, get_window, get_ocr_config_path
 from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
 from GameSentenceMiner.util.gsm_utils import sanitize_filename, do_text_replacements, OCR_REPLACEMENTS_FILE
 import threading
@@ -252,7 +252,8 @@ class ConfigChangeCheckThread(threading.Thread):
     def __init__(self):
         super().__init__(daemon=True)
         self.last_changes = None
-        self.callbacks = []
+        self.config_callbacks = []
+        self.area_callbacks = []
     def run(self):
         global ocr_config
@@ -265,20 +266,32 @@ class ConfigChangeCheckThread(threading.Thread):
                 # Only run this block after a change has occurred and then the section is stable (no change)
                 if self.last_changes is not None and not section_changed:
                     logger.info(f"Detected config changes: {self.last_changes}")
-                    for cb in self.callbacks:
+                    for cb in self.config_callbacks:
                         cb(self.last_changes)
                     if hasattr(run, 'handle_config_change'):
                         run.handle_config_change()
                     if any(c in self.last_changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
                         reset_callback_vars()
                     self.last_changes = None
+                ocr_config_changed = has_config_changed(ocr_config)
+                if ocr_config_changed:
+                    logger.info("OCR config has changed, reloading...")
+                    ocr_config = get_ocr_config(use_window_for_config=True, window=obs.get_current_game())
+                    for cb in self.area_callbacks:
+                        cb(ocr_config)
+                    if hasattr(run, 'handle_area_config_changes'):
+                        run.handle_area_config_changes(ocr_config)
+                    reset_callback_vars()
             except Exception as e:
                 logger.debug(f"ConfigChangeCheckThread error: {e}")
             time.sleep(0.25)  # Lowered to 0.25s for more responsiveness
-    def add_callback(self, callback):
-        self.callbacks.append(callback)
+    def add_config_callback(self, callback):
+        self.config_callbacks.append(callback)
+    def add_area_callback(self, callback):
+        self.area_callbacks.append(callback)
 def reset_callback_vars():
     global previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, previous_text_list, last_ocr2_result
     previous_text = None

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/ocr.py RENAMED Viewed

@@ -17,7 +17,12 @@ from PIL import Image
 from loguru import logger
 import requests
-from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
+try:
+    from GameSentenceMiner.util.electron_config import get_ocr_language, get_furigana_filter_sensitivity
+    from GameSentenceMiner.util.configuration import CommonLanguages
+except ImportError:
+    pass
 # from GameSentenceMiner.util.configuration import get_temporary_directory
@@ -894,7 +899,7 @@ class OneOCR:
             self.regex = re.compile(
             r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
-    def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
+    def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False, multiple_crop_coords=False):
         lang = get_ocr_language()
         furigana_filter_sensitivity = get_furigana_filter_sensitivity()
         if lang != self.initial_lang:
@@ -910,6 +915,7 @@ class OneOCR:
         if not img:
             return (False, 'Invalid image provided')
         crop_coords = None
+        crop_coords_list = []
         if sys.platform == 'win32':
             try:
                 ocr_resp = self.model.recognize_pil(img)
@@ -985,6 +991,12 @@ class OneOCR:
                             }
                             boxes.append(box)
                     res = ocr_resp['text']
+                elif multiple_crop_coords:
+                    for line in filtered_lines:
+                        crop_coords_list.append(
+                            (line['bounding_rect']['x1'] - 5, line['bounding_rect']['y1'] - 5,
+                             line['bounding_rect']['x3'] + 5, line['bounding_rect']['y3'] + 5))
+                    res = ocr_resp['text']
                 else:
                     res = ocr_resp['text']
@@ -1004,6 +1016,8 @@ class OneOCR:
             res = res.json()['text']
         if return_coords:
             x = (True, res, filtered_lines)
+        elif multiple_crop_coords:
+            x = (True, res, crop_coords_list)
         else:
             x = (True, res, crop_coords)
         if is_path:
@@ -1367,70 +1381,208 @@ class GroqOCR:
     def _preprocess(self, img):
         return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
+# OpenAI-Compatible Endpoint OCR using LM Studio
+class localLLMOCR:
+    name= 'local_llm_ocr'
+    readable_name = 'Local LLM OCR'
+    key = 'a'
+    available = False
+    last_ocr_time = time.time() - 5
+    def __init__(self, config={}, lang='ja'):
+        self.keep_llm_hot_thread = None
+        try:
+            import openai
+        except ImportError:
+            logger.warning('openai module not available, Local LLM OCR will not work!')
+            return
+        import openai, threading
+        try:
+            self.api_url = config.get('api_url', 'http://localhost:1234/v1/chat/completions')
+            self.model = config.get('model', 'qwen2.5-vl-3b-instruct')
+            self.api_key = config.get('api_key', 'lm-studio')
+            self.keep_warm = config.get('keep_warm', True)
+            self.custom_prompt = config.get('prompt', None)
+            self.available = True
+            self.client = openai.OpenAI(
+                    base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
+                    api_key=self.api_key
+                )
+            logger.info('Local LLM OCR (OpenAI-compatible) ready')
+            self.keep_llm_hot_thread = threading.Thread(target=self.keep_llm_warm, daemon=True)
+            self.keep_llm_hot_thread.start()
+        except Exception as e:
+            logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
+    def keep_llm_warm(self):
+        def ocr_blank_black_image():
+            if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:
+                return
+            import numpy as np
+            from PIL import Image
+            # Create a blank black image
+            blank_image = Image.fromarray(np.zeros((100, 100, 3), dtype=np.uint8))
+            logger.info('Keeping local LLM OCR warm with a blank black image')
+            self(blank_image)
+        while True:
+            ocr_blank_black_image()
+            time.sleep(5)
+    def __call__(self, img, furigana_filter_sensitivity=0):
+        import base64
+        try:
+            img, is_path = input_to_pil_image(img)
+            img_bytes = pil_image_to_bytes(img)
+            img_base64 = base64.b64encode(img_bytes).decode('utf-8')
+            if self.custom_prompt and self.custom_prompt.strip() != "":
+                prompt = self.custom_prompt.strip()
+            else:
+                prompt = f"""
+                Extract all {CommonLanguages.from_code(get_ocr_language())} Text from Image. Ignore all Furigana. Do not return any commentary, just the text in the image. If there is no text in the image, return "" (Empty String).
+                """
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
+                        ],
+                    }
+                ],
+                max_tokens=512,
+                temperature=0.1
+            )
+            self.last_ocr_time = time.time()
+            if response.choices and response.choices[0].message.content:
+                text_output = response.choices[0].message.content.strip()
+                return (True, text_output)
+            else:
+                return (True, "")
+        except Exception as e:
+            return (False, f'Local LLM OCR request failed: {e}')
 # class QWENOCR:
 #     name = 'qwenv2'
 #     readable_name = 'Qwen2-VL'
 #     key = 'q'
+#     # Class-level attributes for model and processor to ensure they are loaded only once
+#     model = None
+#     processor = None
+#     device = None
 #     available = False
-#     def __init__(self, config={}, lang='ja'):
-#         try:
-#             import torch
-#             import transformers
-#             from transformers import AutoModelForImageTextToText, AutoProcessor
-#             self.model = AutoModelForImageTextToText.from_pretrained(
-#                 "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
-#             )
-#             self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
-#             self.device = "cuda" if torch.cuda.is_available() else "cpu"
-#             print(self.device)
-#             self.available = True
-#             logger.info('Qwen2.5-VL ready')
-#         except Exception as e:
-#             logger.warning(f'Qwen2-VL not available: {e}')
+#     @classmethod
+#     def initialize(cls):
+#         import torch
+#         from transformers import AutoModelForImageTextToText, AutoProcessor
+#         """
+#         Class method to initialize the model. Call this once at the start of your application.
+#         This prevents reloading the model on every instantiation.
+#         """
+#         if cls.model is not None:
+#             logger.info('Qwen2-VL is already initialized.')
+#             return
-#     def __call__(self, img, furigana_filter_sensitivity=0):
-#         if not self.available:
-#             return (False, 'Qwen2-VL is not available.')
 #         try:
-#             img, is_path = input_to_pil_image(img)
-#             # img.show()
+#             if not torch.cuda.is_available():
+#                 logger.warning("CUDA not available, Qwen2-VL will run on CPU, which will be very slow.")
+#                 # You might want to prevent initialization on CPU entirely
+#                 # raise RuntimeError("CUDA is required for efficient Qwen2-VL operation.")
+#             cls.device = "cuda" if torch.cuda.is_available() else "cpu"
+#             cls.model = AutoModelForImageTextToText.from_pretrained(
+#                 "Qwen/Qwen2-VL-2B-Instruct",
+#                 torch_dtype="auto", # Uses bfloat16/float16 if available, which is faster
+#                 device_map=cls.device
+#             )
+#             # For PyTorch 2.0+, torch.compile can significantly speed up inference after a warm-up call
+#             # cls.model = torch.compile(cls.model)
+#             cls.processor = AutoProcessor.from_pretrained(
+#                 "Qwen/Qwen2-VL-2B-Instruct",
+#                 use_fast=True
+#             )
+#             cls.available = True
 #             conversation = [
 #                 {
 #                     "role": "user",
 #                     "content": [
 #                         {"type": "image"},
-#                         {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana (Small characters above the kanji). Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
+#                         {"type": "text", "text": "Extract all the text from this image, ignore all furigana."},
 #                     ],
 #                 }
 #             ]
-#             text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
+#             # The same prompt is applied to all images in the batch
+#             cls.text_prompt = cls.processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
+#             logger.info(f'Qwen2.5-VL ready on device: {cls.device}')
+#         except Exception as e:
+#             logger.warning(f'Qwen2-VL not available: {e}')
+#             cls.available = False
+#     def __init__(self, config={}, lang='ja'):
+#         # The __init__ is now very lightweight. It just checks if initialization has happened.
+#         if not self.available:
+#             raise RuntimeError("QWENOCR has not been initialized. Call QWENOCR.initialize() first.")
+#     def __call__(self, images):
+#         """
+#         Processes a single image or a list of images.
+#         :param images: A single image (path or PIL.Image) or a list of images.
+#         :return: A tuple (success, list_of_results)
+#         """
+#         if not self.available:
+#             return (False, ['Qwen2-VL is not available.'])
+#         try:
+#             # Standardize input to be a list
+#             if not isinstance(images, list):
+#                 images = [images]
+#             pil_images = [input_to_pil_image(img)[0] for img in images]
+#             # The processor handles batching of images and text prompts
 #             inputs = self.processor(
-#                 text=[text_prompt], images=[img], padding=True, return_tensors="pt"
-#             )
-#             inputs = inputs.to(self.device)
-#             output_ids = self.model.generate(**inputs, max_new_tokens=128)
+#                 text=[self.text_prompt] * len(pil_images),
+#                 images=pil_images,
+#                 padding=True,
+#                 return_tensors="pt"
+#             ).to(self.device)
+#             output_ids = self.model.generate(**inputs, max_new_tokens=32)
+#             # The decoding logic needs to be slightly adjusted for batching
+#             input_ids_len = [len(x) for x in inputs.input_ids]
 #             generated_ids = [
-#                 output_ids[len(input_ids):]
-#                 for input_ids, output_ids in zip(inputs.input_ids, output_ids)
+#                 output_ids[i][input_ids_len[i]:] for i in range(len(input_ids_len))
 #             ]
 #             output_text = self.processor.batch_decode(
 #                 generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
 #             )
-#             return (True, output_text[0] if output_text else "")
+#             return (True, output_text)
 #         except Exception as e:
-#             return (False, f'Qwen2-VL inference failed: {e}')
-#     def _preprocess(self, img):
-#         return base64.b64encode(pil_image_to_bytes(img, png_compression=6)).decode('utf-8')
+#             return (False, [f'Qwen2-VL inference failed: {e}'])
+# QWENOCR.initialize()
 # qwenocr = QWENOCR()
+# localOCR = localLLMOCR(config={'api_url': 'http://localhost:1234/v1/chat/completions', 'model': 'qwen2.5-vl-3b-instruct'})
 # for i in range(10):
 #     start_time = time.time()
-#     res, text = qwenocr(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png"), furigana_filter_sensitivity=0)  # Example usage
+#     res, text = localOCR(Image.open(r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\test_furigana.png"))  # Example usage
 #     end_time = time.time()
 #     print(f"Time taken: {end_time - start_time:.2f} seconds")

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/owocr/owocr/run.py RENAMED Viewed

@@ -800,8 +800,11 @@ class ScreenshotThread(threading.Thread):
 def set_last_image(image):
     global last_image
-    if image == last_image:
-        return
+    try:
+        if image == last_image:
+            return
+    except Exception:
+        pass
     try:
         if last_image is not None and hasattr(last_image, "close"):
             last_image.close()
@@ -890,43 +893,44 @@ class OBSScreenshotThread(threading.Thread):
             logger.info(
                 "Using default aspect ratio scaling (original resolution).")
             return width, height
+    def init_config(self, source=None, scene=None):
+        import GameSentenceMiner.obs as obs
+        obs.update_current_game()
+        self.current_source = source if source else obs.get_active_source()
+        logger.info(f"Current OBS source: {self.current_source}")
+        self.source_width = self.current_source.get(
+            "sceneItemTransform").get("sourceWidth") or self.width
+        self.source_height = self.current_source.get(
+            "sceneItemTransform").get("sourceHeight") or self.height
+        if self.source_width and self.source_height:
+            self.width, self.height = self.scale_down_width_height(
+                self.source_width, self.source_height)
+            logger.info(
+                f"Using OBS source dimensions: {self.width}x{self.height}")
+        self.current_source_name = self.current_source.get(
+            "sourceName") or None
+        self.current_scene = scene if scene else obs.get_current_game()
+        self.ocr_config = get_scene_ocr_config()
+        if not self.ocr_config:
+            logger.error("No OCR config found for the current scene.")
+            return
+        self.ocr_config.scale_to_custom_size(self.width, self.height)
     def run(self):
         global last_image
         from PIL import Image
         import GameSentenceMiner.obs as obs
-        def init_config(source=None, scene=None):
-            obs.update_current_game()
-            self.current_source = source if source else obs.get_active_source()
-            logger.info(f"Current OBS source: {self.current_source}")
-            self.source_width = self.current_source.get(
-                "sceneItemTransform").get("sourceWidth") or self.width
-            self.source_height = self.current_source.get(
-                "sceneItemTransform").get("sourceHeight") or self.height
-            if self.source_width and self.source_height:
-                self.width, self.height = self.scale_down_width_height(
-                    self.source_width, self.source_height)
-                logger.info(
-                    f"Using OBS source dimensions: {self.width}x{self.height}")
-            self.current_source_name = self.current_source.get(
-                "sourceName") or None
-            self.current_scene = scene if scene else obs.get_current_game()
-            self.ocr_config = get_scene_ocr_config()
-            if not self.ocr_config:
-                logger.error("No OCR config found for the current scene.")
-                return
-            self.ocr_config.scale_to_custom_size(self.width, self.height)
         # Register a scene switch callback in obsws
         def on_scene_switch(scene):
             logger.info(f"Scene switched to: {scene}. Loading new OCR config.")
-            init_config(scene=scene)
+            self.init_config(scene=scene)
         asyncio.run(obs.register_scene_change_callback(on_scene_switch))
         self.connect_obs()
-        init_config()
+        self.init_config()
         start = time.time()
         while not terminated:
             if not screenshot_event.wait(timeout=0.1):
@@ -1396,6 +1400,7 @@ def run(read_from=None,
     global notifier
     global websocket_server_thread
     global screenshot_thread
+    global obs_screenshot_thread
     global image_queue
     global ocr_1
     global ocr_2
@@ -1555,7 +1560,15 @@ def run(read_from=None,
         if any(c in changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
             last_result = ([], engine_index)
             engine_change_handler_name(get_ocr_ocr1())
-    config_check_thread.add_callback(handle_config_changes)
+    def handle_area_config_changes(changes):
+        if screenshot_thread:
+            screenshot_thread.ocr_config = get_scene_ocr_config()
+        if obs_screenshot_thread:
+            obs_screenshot_thread.init_config()
+    config_check_thread.add_config_callback(handle_config_changes)
+    config_check_thread.add_area_callback(handle_area_config_changes)
     while not terminated:
         ocr_start_time = datetime.now()

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/util/text_log.py RENAMED Viewed

@@ -4,9 +4,12 @@ from datetime import datetime
 from difflib import SequenceMatcher
 from typing import Optional
+import rapidfuzz
 from GameSentenceMiner.util.gsm_utils import remove_html_and_cloze_tags
 from GameSentenceMiner.util.configuration import logger, get_config, gsm_state
 from GameSentenceMiner.util.model import AnkiCard
+import re
 initial_time = datetime.now()
@@ -107,20 +110,26 @@ class GameText:
 game_log = GameText()
-def similar(a, b):
-    return SequenceMatcher(None, a, b).ratio()
+def strip_whitespace_and_punctuation(text: str) -> str:
+    """
+    Strips whitespace and punctuation from the given text.
+    """
+    # Remove all whitespace and specified punctuation using regex
+    # Includes Japanese and common punctuation
+    return re.sub(r'[\s　、。「」【】《》., ]', '', text).strip()
 def lines_match(texthooker_sentence, anki_sentence):
-    texthooker_sentence = texthooker_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
-    anki_sentence = anki_sentence.replace("\n", "").replace("\r", "").replace(' ', '').strip()
-    similarity = similar(texthooker_sentence, anki_sentence)
+    # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
+    texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
+    anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
+    similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
+    logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
     if texthooker_sentence in anki_sentence:
         logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
     elif anki_sentence in texthooker_sentence:
         logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
-    return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 0.8)
+    return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence and similarity > 80)
 def get_text_event(last_note) -> GameLine:

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner/wip/get_overlay_coords.py RENAMED Viewed

@@ -315,7 +315,7 @@ async def get_full_screenshot() -> Image.Image | None:
             else:
                 monitors = [monitors[0]]
             monitor = monitors[get_config().wip.monitor_to_capture]
-            img = get_screenshot_PIL(compression=90, img_format='jpg')
+            img = get_screenshot_PIL(compression=90, img_format='jpg', width=monitor['width'] // 2, height=monitor['height'] // 2)
             # Put the image over a transparent background without stretching
             new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
             # Calculate coordinates to center img horizontally and vertically
@@ -356,34 +356,49 @@ async def get_full_screenshot() -> Image.Image | None:
         return None
 async def do_work(sentence_to_check=None):
+    from math import floor
     # connect_to_obs_sync(5)
     logger.info("in find_box")
     # await asyncio.sleep(.5)
     logger.info("after_initial_sleep")
+    start_time = time.time()
     full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
-    oneocr_results = oneocr(full_screenshot_image)
-    crop_coords = oneocr_results[2]
-    logger.info("Cropping full screenshot with coordinates: %s", crop_coords)
-    cropped_image = full_screenshot_image.crop(crop_coords)
+    oneocr_results = oneocr(full_screenshot_image, multiple_crop_coords=True)
+    crop_coords_list = oneocr_results[2]
+    logger.info("Cropping full screenshot with coordinates: %s", crop_coords_list)
+    cropped_images = []
+    img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
+    for crop_coords in crop_coords_list:
+        cropped_image = full_screenshot_image.crop(crop_coords)
+        cropped_images.append(cropped_image)
+        # Paste the cropped image onto the transparent background
+        img.paste(cropped_image, (floor(crop_coords[0]), floor(crop_coords[1])))
+    # img.show()
     # Convert 1/4
     if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
-        cropped_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
+        img.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
+    logger.info(f"Time taken to get cropped image for lens: {time.time() - start_time:.2f} seconds")
     # full_screenshot_image.show()
-    if cropped_image:
+    if img:
+        start_time = time.time()
         logger.info("Full screenshot captured successfully. Now performing OCR...")
         # ocr_results = oneocr(full_screenshot_image, return_coords=True)
-        google_ocr_results = lens(cropped_image, return_coords=True)[2]
+        google_ocr_results = lens(img, return_coords=True)[2]
         ret = extract_text_with_pixel_boxes(
             api_response=google_ocr_results,
             original_width=monitor_width,
             original_height=monitor_height,
-            crop_x=crop_coords[0],
-            crop_y=crop_coords[1],
-            crop_width=crop_coords[2] - crop_coords[0],
-            crop_height=crop_coords[3] - crop_coords[1]
+            crop_x=0,
+            crop_y=0,
+            crop_width=img.width,
+            crop_height=img.height
         )
+        logger.info(f"Time taken for Lens OCR: {time.time() - start_time:.2f} seconds")
         # boxes_of_text = google_ocr_results[2]
         # logger.info(f"Boxes of text found: {boxes_of_text}")

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/GameSentenceMiner.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.13.12
+Version: 2.13.13
 Summary: A tool for mining sentences from games. Update: Overlay?
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.13.12
+Version: 2.13.13
 Summary: A tool for mining sentences from games. Update: Overlay?
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License

{gamesentenceminer-2.13.12 → gamesentenceminer-2.13.13}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "GameSentenceMiner"
-version = "2.13.12"
+version = "2.13.13"
 description = "A tool for mining sentences from games. Update: Overlay?"
 readme = "README.md"
 requires-python = ">=3.10"