PyPI - GameSentenceMiner - Versions diffs - 2.12.3__py3-none-any.whl → 2.12.4__py3-none-any.whl - Mend

GameSentenceMiner 2.12.3py3-none-any.whl → 2.12.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

GameSentenceMiner/obs.py CHANGED Viewed

@@ -385,12 +385,17 @@ def get_screenshot_base64(compression=75, width=None, height=None):
         return None
-def get_screenshot_PIL(compression=75, img_format='png', width=None, height=None, retry=3):
+def get_screenshot_PIL(source_name=None, compression=75, img_format='png', width=None, height=None, retry=3):
     import io
     import base64
     from PIL import Image
+    if not source_name:
+        source_name = get_active_source().get('sourceName', None)
+    if not source_name:
+        logger.error("No active source found in the current scene.")
+        return None
     while True:
-        response = client.get_source_screenshot(name=get_current_game(), img_format=img_format, quality=compression, width=width, height=height)
+        response = client.get_source_screenshot(name=source_name, img_format=img_format, quality=compression, width=width, height=height)
         try:
             response.image_data = response.image_data.split(',', 1)[-1]  # Remove data:image/png;base64, prefix if present
         except AttributeError:
@@ -463,26 +468,42 @@ if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     # main()
     connect_to_obs_sync()
-    i = 100
+    # i = 100
+    # for i in range(1, 100):
+    #     print(f"Getting screenshot {i}")
+    #     start = time.time()
+    # # get_screenshot(compression=95)
+    # # get_screenshot_base64(compression=95, width=1280, height=720)
+    #     img = get_screenshot_PIL(compression=i, img_format='jpg', width=1280, height=720)
+    #     end = time.time()
+    #     print(f"Time taken to get screenshot with compression {i}: {end - start} seconds")
     # for i in range(1, 100):
-    print(f"Getting screenshot {i}")
-    start = time.time()
-    # get_screenshot(compression=95)
-    # get_screenshot_base64(compression=95, width=1280, height=720)
-    img = get_screenshot_PIL(compression=i, img_format='png')
-    end = time.time()
-    print(f"Time taken to get screenshot with compression {i}: {end - start} seconds")
-    img.show()
+    #     print(f"Getting screenshot {i}")
+    #     start = time.time()
+    # # get_screenshot(compression=95)
+    # # get_screenshot_base64(compression=95, width=1280, height=720)
+    #     img = get_screenshot_PIL(compression=i, img_format='jpg', width=2560, height=1440)
+    #     end = time.time()
+    #     print(f"Time taken to get screenshot full sized jpg with compression {i}: {end - start} seconds")
+    # png_img = get_screenshot_PIL(compression=75, img_format='png', width=1280, height=720)
+    # jpg_img = get_screenshot_PIL(compression=100, img_format='jpg', width=2560, height=1440)
+    # png_img.show()
+    # jpg_img.show()
-    start = time.time()
-    with mss() as sct:
-        monitor = sct.monitors[1]
-        sct_img = sct.grab(monitor)
-        img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
-        img.show()
-    end = time.time()
-    print(f"Time taken to get screenshot with mss: {end - start} seconds")
+    # start = time.time()
+    # with mss() as sct:
+    #     monitor = sct.monitors[1]
+    #     sct_img = sct.grab(monitor)
+    #     img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
+    #     img.show()
+    # end = time.time()
+    # print(f"Time taken to get screenshot with mss: {end - start} seconds")
     # print(get_screenshot_base64(compression=75, width=1280, height=720))

GameSentenceMiner/owocr/owocr/ocr.py CHANGED Viewed

@@ -436,7 +436,7 @@ class GoogleLens:
         #             res += '\n'
         if return_coords:
-            x = (True, res, lines)
+            x = (True, res, response_dict)
         else:
             x = (True, res)
@@ -887,7 +887,28 @@ class OneOCR:
             except:
                 logger.warning('Error reading URL from config, OneOCR will not work!')
-    def __call__(self, img, furigana_filter_sensitivity=0, sentence_to_check=None, return_coords=False):
+    def get_regex(self, lang):
+        if lang == "ja":
+            self.regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
+        elif lang == "zh":
+            self.regex = re.compile(r'[\u4E00-\u9FFF]')
+        elif lang == "ko":
+            self.regex = re.compile(r'[\uAC00-\uD7AF]')
+        elif lang == "ar":
+            self.regex = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
+        elif lang == "ru":
+            self.regex = re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
+        elif lang == "el":
+            self.regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
+        elif lang == "he":
+            self.regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
+        elif lang == "th":
+            self.regex = re.compile(r'[\u0E00-\u0E7F]')
+        else:
+            self.regex = re.compile(
+            r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
+    def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
         lang = get_ocr_language()
         if lang != self.initial_lang:
             self.initial_lang = lang
@@ -911,6 +932,10 @@ class OneOCR:
                         json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
                 # print(json.dumps(ocr_resp))
                 filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
+                x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
+                y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
+                if x_coords and y_coords:
+                    crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
                 # logger.info(filtered_lines)
                 res = ''
                 skipped = []
@@ -964,30 +989,6 @@ class OneOCR:
                     #         else:
                     #             continue
                     #     res += '\n'
-                elif sentence_to_check:
-                    lines_to_build_area = []
-                    widths = []
-                    heights = []
-                    for line in ocr_resp['lines']:
-                        print(line['text'])
-                        if sentence_to_check in line['text'] or line['text'] in sentence_to_check or rapidfuzz.fuzz.partial_ratio(sentence_to_check, line['text']) > 50:
-                            lines_to_build_area.append(line)
-                            res += line['text']
-                            for word in line['words']:
-                                widths.append(word['bounding_rect']['x2'] - word['bounding_rect']['x1'])
-                                heights.append(word['bounding_rect']['y3'] - word['bounding_rect']['y1'])
-                    x_coords = [line['bounding_rect'][f'x{i}'] for line in lines_to_build_area for i in
-                                range(1, 5)]
-                    y_coords = [line['bounding_rect'][f'y{i}'] for line in lines_to_build_area for i in
-                                range(1, 5)]
-                    if widths:
-                        avg_width = sum(widths) / len(widths)
-                    if heights:
-                        avg_height = sum(heights) / len(heights)
-                    if x_coords and y_coords:
-                        crop_coords = (
-                            min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
                 elif return_coords:
                     for line in filtered_lines:
                         for word in line['words']:
@@ -998,10 +999,6 @@ class OneOCR:
                             boxes.append(box)
                     res = ocr_resp['text']
                 else:
-                    x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
-                    y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
-                    if x_coords and y_coords:
-                        crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
                     res = ocr_resp['text']
             except RuntimeError as e:
@@ -1019,7 +1016,7 @@ class OneOCR:
             res = res.json()['text']
         if return_coords:
-            x = (True, res, boxes)
+            x = (True, res, filtered_lines)
         else:
             x = (True, res, crop_coords)
         if is_path:

GameSentenceMiner/owocr/owocr/run.py CHANGED Viewed

@@ -44,7 +44,6 @@ import queue
 from datetime import datetime
 from PIL import Image, ImageDraw, UnidentifiedImageError
 from loguru import logger
-from pynput import keyboard
 from desktop_notifier import DesktopNotifierSync
 import psutil
@@ -384,6 +383,7 @@ class TextFiltering:
                 block_filtered = self.latin_extended_regex.findall(block)
             else:
                 block_filtered = self.latin_extended_regex.findall(block)
             if block_filtered:
                 orig_text_filtered.append(''.join(block_filtered))
             else:
@@ -547,39 +547,6 @@ class ScreenshotThread(threading.Thread):
             else:
                 raise ValueError('Window capture is only currently supported on Windows and macOS')
-    def __del__(self):
-        if self.macos_window_tracker_instance:
-            self.macos_window_tracker_instance.join()
-        elif self.windows_window_tracker_instance:
-            self.windows_window_tracker_instance.join()
-    def setup_persistent_windows_window_tracker(self):
-        global window_open
-        window_open = False
-        def setup_tracker():
-            global window_open
-            self.window_handle, window_title = self.get_windows_window_handle(self.screen_capture_window)
-            if not self.window_handle:
-                # print(f"Window '{screen_capture_window}' not found.")
-                return
-            set_dpi_awareness()
-            window_open = True
-            self.windows_window_tracker_instance = threading.Thread(target=self.windows_window_tracker)
-            self.windows_window_tracker_instance.start()
-            logger.opt(ansi=True).info(f'Selected window: {window_title}')
-        while not terminated:
-            if not window_open:
-                try:
-                    setup_tracker()
-                except ValueError as e:
-                    logger.error(f"Error setting up persistent windows window tracker: {e}")
-                    break
-            time.sleep(5)
     def get_windows_window_handle(self, window_title):
         def callback(hwnd, window_title_part):
             window_title = win32gui.GetWindowText(hwnd)
@@ -602,7 +569,7 @@ class ScreenshotThread(threading.Thread):
     def windows_window_tracker(self):
         found = True
-        while not terminated or window_open:
+        while not terminated:
             found = win32gui.IsWindow(self.window_handle)
             if not found:
                 break
@@ -872,18 +839,9 @@ class OBSScreenshotThread(threading.Thread):
             image_queue.put((result, True))
     def connect_obs(self):
-        try:
-            import obsws_python as obs
-            self.obs_client = obs.ReqClient(
-                host=get_config().obs.host,
-                port=get_config().obs.port,
-                password=get_config().obs.password,
-                timeout=10
-            )
-            logger.info("Connected to OBS WebSocket.")
-        except Exception as e:
-            logger.error(f"Failed to connect to OBS: {e}")
-            self.obs_client = None
+        import GameSentenceMiner.obs as obs
+        obs.connect_to_obs_sync()
     def run(self):
         global last_image
@@ -895,7 +853,7 @@ class OBSScreenshotThread(threading.Thread):
         def init_config(source=None, scene=None):
             obs.update_current_game()
             self.current_source = source if source else obs.get_active_source()
-            self.current_source_name = self.current_source.get('sourceName') if isinstance(self.current_source, dict) else None
+            self.current_source_name = self.current_source.get("sourceName") or None
             self.current_scene = scene if scene else obs.get_current_game()
             self.ocr_config = get_scene_ocr_config()
             self.ocr_config.scale_to_custom_size(self.width, self.height)
@@ -927,20 +885,20 @@ class OBSScreenshotThread(threading.Thread):
             if not self.ocr_config:
                 time.sleep(1)
                 continue
+            if not self.current_source_name:
+                obs.update_current_game()
+                self.current_source = obs.get_active_source()
+                self.current_source_name = self.current_source.get("sourceName") or None
             try:
-                response = self.obs_client.get_source_screenshot(
-                    name=self.current_source_name,
-                    img_format='png',
-                    quality=75,
-                    width=self.width,
-                    height=self.height,
-                )
+                if not self.current_source_name:
+                    logger.error("No active source found in the current scene.")
+                    time.sleep(1)
+                    continue
+                img = obs.get_screenshot_PIL(source_name=self.current_source_name, width=self.width, height=self.height, img_format='jpg', compression=90)
-                if response.image_data:
-                    image_data = base64.b64decode(response.image_data.split(",")[1])
-                    img = Image.open(io.BytesIO(image_data)).convert("RGBA")
+                if img is not None:
                     if not img.getbbox():
                         logger.info("OBS Not Capturing anything, sleeping.")
                         time.sleep(1)
@@ -1118,11 +1076,10 @@ def signal_handler(sig, frame):
 def on_window_closed(alive):
-    global terminated, window_open
+    global terminated
     if not (alive or terminated):
         logger.info('Window closed or error occurred, terminated!')
-        window_open = False
-        # terminated = True
+        terminated = True
 def on_screenshot_combo():
@@ -1464,8 +1421,12 @@ def run(read_from=None,
         read_from_readable.append(f'directory {read_from_path}')
     if len(key_combos) > 0:
-        key_combo_listener = keyboard.GlobalHotKeys(key_combos)
-        key_combo_listener.start()
+        try:
+            from pynput import keyboard
+            key_combo_listener = keyboard.GlobalHotKeys(key_combos)
+            key_combo_listener.start()
+        except ImportError:
+            pass
     if write_to in ('clipboard', 'websocket', 'callback'):
         write_to_readable = write_to

GameSentenceMiner/util/downloader/oneocr_dl.py CHANGED Viewed

@@ -239,11 +239,12 @@ class Downloader:
 # Example usage:
 if __name__ == "__main__":
     downloader = Downloader()
-    if downloader.download_and_extract():
-        print("SnippingTool files are ready.")
-        print("Press Ctrl+C or X on window to exit.")
-        input()
-    else:
-        print("Failed to download and extract SnippingTool files. You may need to follow instructions at https://github.com/AuroraWright/oneocr")
-        print("Press Ctrl+C or X on window to exit.")
-        input()
+    downloader.download_and_extract()
+    # if downloader.download_and_extract():
+    #     print("SnippingTool files are ready.")
+    #     print("Press Ctrl+C or X on window to exit.")
+    #     # input()
+    # else:
+    #     # print("Failed to download and extract SnippingTool files. You may need to follow instructions at https://github.com/AuroraWright/oneocr")
+    #     print("Press Ctrl+C or X on window to exit.")
+    #     input()

GameSentenceMiner/wip/get_overlay_coords.py CHANGED Viewed

@@ -1,88 +1,285 @@
 import asyncio
 import io
 import base64
+import math
 from PIL import Image
 from GameSentenceMiner.util.configuration import get_config
+from typing import Dict, Any, List, Tuple
+from GameSentenceMiner.util.electron_config import get_ocr_language
 if get_config().wip.overlay_websocket_send:
-    from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, OneOCR
+    from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, OneOCR, get_regex
 from GameSentenceMiner.obs import *
-# OBS WebSocket settings
-OBS_HOST = 'localhost'
-OBS_PORT = 7274
-OBS_PASSWORD = 'your_obs_websocket_password' # Set your OBS WebSocket password here, if any
-WINDOW_NAME = "Nier:Automata"
-WIDTH = 2560
-HEIGHT = 1440
 if get_config().wip.overlay_websocket_send:
     oneocr = OneOCR()
     lens = GoogleLens()
-def correct_ocr_text(detected_text: str, reference_text: str) -> str:
+def _convert_box_to_pixels_v2(
+    bbox_data: Dict[str, float],
+    original_width: int,
+    original_height: int,
+    crop_x: int,
+    crop_y: int,
+    crop_width: int,
+    crop_height: int
+) -> Dict[str, float]:
+    """
+    Simplified conversion: scales normalized bbox to pixel coordinates, ignores rotation.
+    Args:
+        bbox_data: A dictionary with normalized 'center_x', 'center_y', 'width', 'height'.
+        original_width: The width of the original, full-size image in pixels.
+        original_height: The height of the original, full-size image in pixels.
+    Returns:
+        A dictionary of the four corner points with absolute pixel coordinates.
+    """
+    cx, cy = bbox_data['center_x'], bbox_data['center_y']
+    w, h = bbox_data['width'], bbox_data['height']
+    # Scale normalized coordinates to pixel coordinates
+    box_width_px = w * crop_width
+    box_height_px = h * crop_height
+    center_x_px = cx * crop_width + crop_x
+    center_y_px = cy * crop_height + crop_y
+    # Calculate corners (no rotation)
+    x1 = center_x_px - box_width_px / 2
+    y1 = center_y_px - box_height_px / 2
+    x2 = center_x_px + box_width_px / 2
+    y2 = center_y_px - box_height_px / 2
+    x3 = center_x_px + box_width_px / 2
+    y3 = center_y_px + box_height_px / 2
+    x4 = center_x_px - box_width_px / 2
+    y4 = center_y_px + box_height_px / 2
+    return {
+        "x1": x1,
+        "y1": y1,
+        "x2": x2,
+        "y2": y2,
+        "x3": x3,
+        "y3": y3,
+        "x4": x4,
+        "y4": y4,
+    }
+def _convert_box_to_pixels(
+    bbox_data: Dict[str, float],
+    original_width: int,
+    original_height: int,
+    crop_x: int,
+    crop_y: int,
+    crop_width: int,
+    crop_height: int
+) -> Dict[str, Dict[str, float]]:
+    """
+    Converts a normalized bounding box to an absolute pixel-based quad.
+    Args:
+        bbox_data: A dictionary with normalized 'center_x', 'center_y', etc.
+        original_width: The width of the original, full-size image in pixels.
+        original_height: The height of the original, full-size image in pixels.
+    Returns:
+        A dictionary of the four corner points with absolute pixel coordinates.
     """
-    Correct OCR text by comparing character-by-character with reference text.
-    When mismatches are found, look for subsequent matches and correct previous mismatches.
+    # Normalized coordinates from the input
+    cx, cy = bbox_data['center_x'], bbox_data['center_y']
+    w, h = bbox_data['width'], bbox_data['height']
+    angle_rad = bbox_data.get('rotation_z', 0.0)
+    # Calculate un-rotated corner points (still normalized) relative to the center
+    half_w, half_h = w / 2, h / 2
+    corners = [
+        (-half_w, -half_h),  # Top-left
+        ( half_w, -half_h),  # Top-right
+        ( half_w,  half_h),  # Bottom-right
+        (-half_w,  half_h),  # Bottom-left
+    ]
+    # Rotate each corner and translate it to its absolute normalized position
+    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
+    pixel_corners = []
+    for x_norm, y_norm in corners:
+        # 2D rotation
+        x_rot_norm = x_norm * cos_a - y_norm * sin_a
+        y_rot_norm = x_norm * sin_a + y_norm * cos_a
+        # Translate to absolute normalized position
+        abs_x_norm = cx + x_rot_norm
+        abs_y_norm = cy + y_rot_norm
+        # Scale up to pixel coordinates
+        pixel_corners.append((
+            abs_x_norm * crop_width + crop_x,
+            abs_y_norm * crop_height + crop_y
+        ))
+    # Return as x1, y1, x2, y2, x3, y3, x4, y4
+    return {
+        "x1": pixel_corners[0][0],
+        "y1": pixel_corners[0][1],
+        "x2": pixel_corners[1][0],
+        "y2": pixel_corners[1][1],
+        "x3": pixel_corners[2][0],
+        "y3": pixel_corners[2][1],
+        "x4": pixel_corners[3][0],
+        "y4": pixel_corners[3][1],
+    }
+def extract_text_with_pixel_boxes(
+    api_response: Dict[str, Any],
+    original_width: int,
+    original_height: int,
+    crop_x: int,
+    crop_y: int,
+    crop_width: int,
+    crop_height: int
+) -> List[Dict[str, Any]]:
     """
-    if not detected_text or not reference_text:
-        return detected_text
+    Extracts sentences and words and converts their normalized bounding boxes
+    to absolute pixel coordinates based on original image dimensions.
+    Args:
+        api_response: The dictionary parsed from the source JSON.
+        original_width: The width of the original, full-size image.
+        original_height: The height of the original, full-size image.
+    Returns:
+        A list of sentence objects with text and bounding boxes in pixel coordinates.
+    """
+    results = []
+    regex = get_regex(get_ocr_language())
+    try:
+        paragraphs = api_response["objects_response"]["text"]["text_layout"]["paragraphs"]
+    except KeyError:
+        return [] # Return empty list if the structure is not found
+    for para in paragraphs:
+        for line in para.get("lines", []):
+            line_text_parts = []
+            word_list = []
+            for word in line.get("words", []):
+                if not regex.search(word.get("plain_text", "")):
+                    continue
+                word_text = word.get("plain_text", "")
+                line_text_parts.append(word_text)
+                # Convert word's bounding box to pixel coordinates
+                word_box = _convert_box_to_pixels_v2(
+                    word["geometry"]["bounding_box"],
+                    original_width,
+                    original_height,
+                    crop_x=crop_x,
+                    crop_y=crop_y,
+                    crop_width=crop_width,
+                    crop_height=crop_height
+                )
+                word_list.append({
+                    "text": word_text,
+                    "bounding_rect": word_box
+                })
+            if not line_text_parts:
+                continue
+            # Assemble the sentence object
+            full_sentence_text = "".join(line_text_parts)
+            # Convert the full line's bounding box to pixel coordinates
+            line_box = _convert_box_to_pixels_v2(
+                line["geometry"]["bounding_box"],
+                original_width,
+                original_height,
+                crop_x=crop_x,
+                crop_y=crop_y,
+                crop_width=crop_width,
+                crop_height=crop_height
+            )
+            results.append({
+                "text": full_sentence_text,
+                "bounding_rect": line_box,
+                "words": word_list
+            })
+    return results
+# def correct_ocr_text(detected_text: str, reference_text: str) -> str:
+#     """
+#     Correct OCR text by comparing character-by-character with reference text.
+#     When mismatches are found, look for subsequent matches and correct previous mismatches.
+#     """
+#     if not detected_text or not reference_text:
+#         return detected_text
-    detected_chars = list(detected_text)
-    reference_chars = list(reference_text)
+#     detected_chars = list(detected_text)
+#     reference_chars = list(reference_text)
-    # Track positions where mismatches occurred
-    mismatched_positions = []
+#     # Track positions where mismatches occurred
+#     mismatched_positions = []
-    min_length = min(len(detected_chars), len(reference_chars))
+#     min_length = min(len(detected_chars), len(reference_chars))
-    for i in range(min_length):
-        if detected_chars[i] != reference_chars[i]:
-            mismatched_positions.append(i)
-            logger.info(f"Mismatch at position {i}: detected '{detected_chars[i]}' vs reference '{reference_chars[i]}'")
-        else:
-            # We found a match - if we have previous mismatches, correct the most recent one
-            if mismatched_positions:
-                # Correct the most recent mismatch (simple 1-for-1 strategy)
-                last_mismatch_pos = mismatched_positions.pop()
-                old_char = detected_chars[last_mismatch_pos]
-                detected_chars[last_mismatch_pos] = reference_chars[last_mismatch_pos]
-                logger.info(f"Corrected position {last_mismatch_pos}: '{old_char}' -> '{reference_chars[last_mismatch_pos]}'")
+#     start_of_reference = 0
+#     for char in detected_chars:
+#         if char == reference_chars[start_of_reference]:
+#             start_of_reference += 1
+#     for i in range(min_length):
+#         if detected_chars[i] != reference_chars[i]:
+#             mismatched_positions.append(i)
+#             logger.info(f"Mismatch at position {i}: detected '{detected_chars[i]}' vs reference '{reference_chars[i]}'")
+#         else:
+#             # We found a match - if we have previous mismatches, correct the most recent one
+#             if mismatched_positions:
+#                 # Correct the most recent mismatch (simple 1-for-1 strategy)
+#                 last_mismatch_pos = mismatched_positions.pop()
+#                 old_char = detected_chars[last_mismatch_pos]
+#                 detected_chars[last_mismatch_pos] = reference_chars[last_mismatch_pos]
+#                 logger.info(f"Corrected position {last_mismatch_pos}: '{old_char}' -> '{reference_chars[last_mismatch_pos]}'")
-    corrected_text = ''.join(detected_chars)
-    return corrected_text
+#     corrected_text = ''.join(detected_chars)
+#     return corrected_text
-def redistribute_corrected_text(original_boxes: list, original_text: str, corrected_text: str) -> list:
-    """
-    Redistribute corrected text back to the original text boxes while maintaining their positions.
-    """
-    if original_text == corrected_text:
-        return original_boxes
+# def redistribute_corrected_text(original_boxes: list, original_text: str, corrected_text: str) -> list:
+#     """
+#     Redistribute corrected text back to the original text boxes while maintaining their positions.
+#     """
+#     if original_text == corrected_text:
+#         return original_boxes
-    corrected_boxes = []
-    text_position = 0
+#     corrected_boxes = []
+#     text_position = 0
-    for box in original_boxes:
-        original_word = box['text']
-        word_length = len(original_word)
+#     for box in original_boxes:
+#         original_word = box['text']
+#         word_length = len(original_word)
-        # Extract the corrected portion for this box
-        if text_position + word_length <= len(corrected_text):
-            corrected_word = corrected_text[text_position:text_position + word_length]
-        else:
-            # Handle case where corrected text is shorter
-            corrected_word = corrected_text[text_position:] if text_position < len(corrected_text) else ""
+#         # Extract the corrected portion for this box
+#         if text_position + word_length <= len(corrected_text):
+#             corrected_word = corrected_text[text_position:text_position + word_length]
+#         else:
+#             # Handle case where corrected text is shorter
+#             corrected_word = corrected_text[text_position:] if text_position < len(corrected_text) else ""
-        # Create a new box with corrected text but same coordinates
-        corrected_box = box.copy()
-        corrected_box['text'] = corrected_word
-        corrected_boxes.append(corrected_box)
+#         # Create a new box with corrected text but same coordinates
+#         corrected_box = box.copy()
+#         corrected_box['text'] = corrected_word
+#         corrected_boxes.append(corrected_box)
-        text_position += word_length
+#         text_position += word_length
-        logger.info(f"Redistributed: '{original_word}' -> '{corrected_word}'")
+#         logger.info(f"Redistributed: '{original_word}' -> '{corrected_word}'")
-    return corrected_boxes
+#     return corrected_boxes
 async def get_full_screenshot() -> Image.Image | None:
     # logger.info(f"Attempting to connect to OBS WebSocket at ws://{OBS_HOST}:{OBS_PORT}")
@@ -117,14 +314,36 @@ async def get_full_screenshot() -> Image.Image | None:
             else:
                 monitors = [monitors[0]]
             monitor = monitors[get_config().wip.monitor_to_capture]
-            sct_img = sct.grab(monitor)
-            img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
-            # img.show()
-            return img
+            img = get_screenshot_PIL(compression=100, img_format='jpg')
+            # Put the image over a transparent background without stretching
+            new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
+            # Calculate coordinates to center img horizontally and vertically
+            left = 0
+            top = 0
+            if img.width < monitor['width'] and img.height < monitor['height']:
+                # scale image to fit monitor
+                img = img.resize((monitor['width'], monitor['height']), Image.Resampling.BILINEAR)
+            if img.width < monitor['width']:
+                left = (monitor['width'] - img.width) // 2
+            if img.height < monitor['height']:
+                top = (monitor['height'] - img.height) // 2
+            print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
+            new_img.paste(img, (left, top))
+            # new_img.show()
+            return new_img, monitor['width'], monitor['height']
+        #     sct_img = sct.grab(monitor)
+        #     img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
+        #     # img.show()
+        #     return img
             # update_current_game()
             # image_data = get_screenshot_base64(compression=75, width=1280, height=720)
             # image_data = base64.b64decode(image_data)
+        img = get_screenshot_PIL(img_format='jpg')
         # img = Image.open(io.BytesIO(image_data)).convert("RGBA").resize((WIDTH, HEIGHT), Image.Resampling.LANCZOS)
         # img.show()
         logger.info(f"Screenshot captured in {time.time() - start_time:.2f} seconds.")
@@ -140,56 +359,77 @@ async def do_work(sentence_to_check=None):
     logger.info("in find_box")
     # await asyncio.sleep(.5)
     logger.info("after_initial_sleep")
-    full_screenshot_image = await get_full_screenshot()
+    full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
+    oneocr_results = oneocr(full_screenshot_image)
+    crop_coords = oneocr_results[2]
+    logger.info("Cropping full screenshot with coordinates: %s", crop_coords)
+    cropped_image = full_screenshot_image.crop(crop_coords)
+    # Convert 1/4
     if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
-        full_screenshot_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
+        cropped_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
     # full_screenshot_image.show()
-    if full_screenshot_image:
+    if cropped_image:
         logger.info("Full screenshot captured successfully. Now performing local OCR...")
-        ocr_results = oneocr(full_screenshot_image, return_coords=True)
+        # ocr_results = oneocr(full_screenshot_image, return_coords=True)
+        google_ocr_results = lens(cropped_image, return_coords=True)[2]
-        boxes_of_text = ocr_results[2]
+        ret = extract_text_with_pixel_boxes(
+            api_response=google_ocr_results,
+            original_width=monitor_width,
+            original_height=monitor_height,
+            crop_x=crop_coords[0],
+            crop_y=crop_coords[1],
+            crop_width=crop_coords[2] - crop_coords[0],
+            crop_height=crop_coords[3] - crop_coords[1]
+        )
+        # boxes_of_text = google_ocr_results[2]
         # logger.info(f"Boxes of text found: {boxes_of_text}")
         words = []
+        # logger.info(json.dumps(ret, indent=4, ensure_ascii=False))
+        return ret, 48
         # If we have a reference sentence, perform character-by-character correction
-        if sentence_to_check:
-            # Concatenate all OCR text to form the detected sentence
-            detected_sentence = ''.join([box['text'] for box in boxes_of_text])
-            logger.info(f"Original detected sentence: '{detected_sentence}'")
-            logger.info(f"Reference sentence: '{sentence_to_check}'")
+        # if sentence_to_check:
+        #     # Concatenate all OCR text to form the detected sentence
+        #     detected_sentence = ''.join([box['text'] for box in boxes_of_text])
+        #     logger.info(f"Original detected sentence: '{detected_sentence}'")
+        #     logger.info(f"Reference sentence: '{sentence_to_check}'")
-            # Perform character-by-character comparison and correction
-            corrected_sentence = correct_ocr_text(detected_sentence, sentence_to_check)
-            logger.info(f"Corrected sentence: '{corrected_sentence}'")
+        #     # Perform character-by-character comparison and correction
+        #     corrected_sentence = correct_ocr_text(detected_sentence, sentence_to_check)
+        #     logger.info(f"Corrected sentence: '{corrected_sentence}'")
-            # Redistribute corrected text back to boxes while maintaining positions
-            corrected_boxes = redistribute_corrected_text(boxes_of_text, detected_sentence, corrected_sentence)
-        else:
-            corrected_boxes = boxes_of_text
+        #     # Redistribute corrected text back to boxes while maintaining positions
+        #     corrected_boxes = redistribute_corrected_text(boxes_of_text, detected_sentence, corrected_sentence)
+        # else:
+        #     corrected_boxes = boxes_of_text
-        sentence_position = 0
-        for box in corrected_boxes:
-            word = box['text']
-            # logger.info(f"Box: {box}")
-            x1, y1 = box['bounding_rect']['x1'], box['bounding_rect']['y1']
-            x2, y2 = box['bounding_rect']['x3'], box['bounding_rect']['y3']
-            words.append({
-                "x1": x1,
-                "y1": y1,
-                "x2": x2,
-                "y2": y2,
-                "word": box['text']
-            })
+        # sentence_position = 0
+        # for box in corrected_boxes:
+        #     word = box['text']
+        #     # logger.info(f"Box: {box}")
+        #     x1, y1 = box['bounding_rect']['x1'], box['bounding_rect']['y1']
+        #     x2, y2 = box['bounding_rect']['x3'], box['bounding_rect']['y3']
+        #     words.append({
+        #         "x1": x1,
+        #         "y1": y1,
+        #         "x2": x2,
+        #         "y2": y2,
+        #         "word": box['text']
+        #     })
-        # logger.info(f"Returning words: {words}")
+        # # logger.info(f"Returning words: {words}")
-        ret = [
-            {
-                "words": words,
-            }
-        ]
+        # ret = [
+        #     {
+        #         "words": words,
+        #     }
+        # ]
         # cropped_sections = []
         # for box in boxes_of_text:
         #     # Ensure crop coordinates are within image bounds
@@ -247,8 +487,30 @@ async def find_box_for_sentence(sentence_to_check):
         return [], 48
 async def main():
+    import mss as mss
     connect_to_obs_sync(5)
-    await find_box_for_sentence("はじめから")
+    start_time = time.time()
+    with mss.mss() as sct:
+        monitors = sct.monitors
+        if len(monitors) > 1:
+            monitors = monitors[1:]
+        else:
+            monitors = [monitors[0]]
+        monitor = monitors[get_config().wip.monitor_to_capture]
+        img = get_screenshot_PIL(img_format='jpg')
+        img.show()
+        # Put the image over a transparent background without stretching
+        # Create a transparent image with the same size as the monitor
+        new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
+        # Calculate coordinates to center img horizontally and vertically
+        left = (monitor['width'] - img.width) // 2
+        top = (monitor['height'] - img.height) // 2
+        print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
+        print(f"Left: {left}, Top: {top}, Width: {monitor['width']}, Height: {monitor['height']}")
+        new_img.paste(img, (left, top))
+        new_img.show()
+        return new_img
 if __name__ == '__main__':
     try:

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.12.3
+Version: 2.12.4
 Summary: A tool for mining sentences from games. Update: Overlay?
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License
@@ -150,6 +150,8 @@ If you encounter issues, please ask for help in my [Discord](https://discord.gg/
 * [Renji's Texthooker](https://github.com/Renji-XD/texthooker-ui)
+* https://github.com/Saplling/transparent-texthooker-overlay
 ## Donations
 If you've found this or any of my other projects helpful, please consider supporting my work through [GitHub Sponsors](https://github.com/sponsors/bpwhelan), [Ko-fi](https://ko-fi.com/beangate), or [Patreon](https://www.patreon.com/GameSentenceMiner).

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ GameSentenceMiner/anki.py,sha256=FUwcWO0-arzfQjejQmDKP7pNNakhboo8InQ4s_jv6AY,190
 GameSentenceMiner/config_gui.py,sha256=GBcPWWoki8dMigWqORcG9memBwKp-BNFbhXhjfFLV0c,104414
 GameSentenceMiner/gametext.py,sha256=fIm28ZvRzKvnVHj86TmSYR2QQifo_Lk6cx4UptIltLs,7844
 GameSentenceMiner/gsm.py,sha256=GGF0owRrrYJgdfXx-INwfuKbaoY-G5gLllE-sNrwYnI,25341
-GameSentenceMiner/obs.py,sha256=lRJFFOB9oHsE_uCRmxl4xwSpkqtjWVzebyqHXmynS1E,17755
+GameSentenceMiner/obs.py,sha256=bMVWAPQ6QLf4celLiOsL9BUO8pTdMn9lpT9fQCNfm7Q,18718
 GameSentenceMiner/vad.py,sha256=zo9JpuEOCXczPXM-dq8lbr-zM-MPpfJ8aajggR3mKk4,18710
 GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 GameSentenceMiner/ai/ai_prompting.py,sha256=iHkEx2pQJ-tEyejOgYy4G0DcZc8qvBugVL6-CQpPSME,26089
@@ -25,8 +25,8 @@ GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9
 GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
 GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
 GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
-GameSentenceMiner/owocr/owocr/ocr.py,sha256=6ArGr0xd-Fhkw9uPn4MH3urxbLBwZ-UmxfwoKUUgxio,63459
-GameSentenceMiner/owocr/owocr/run.py,sha256=nkDpXICJCTKgJTS4MYRnaz-GYqAS-GskcSg1ZkGIRuE,67285
+GameSentenceMiner/owocr/owocr/ocr.py,sha256=Zii5r15ZlHFJWSbmXpva6QJVGkU3j2wT5Q0izazLyCQ,63021
+GameSentenceMiner/owocr/owocr/run.py,sha256=GJAAqifaERxDnxcqPBTsEnxn-rJsUBgDC1s2F26N6KM,65724
 GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
 GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
@@ -46,7 +46,7 @@ GameSentenceMiner/util/communication/websocket.py,sha256=TbphRGmxVrgEupS7tNdifsm
 GameSentenceMiner/util/downloader/Untitled_json.py,sha256=RUUl2bbbCpUDUUS0fP0tdvf5FngZ7ILdA_J5TFYAXUQ,15272
 GameSentenceMiner/util/downloader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 GameSentenceMiner/util/downloader/download_tools.py,sha256=zR-aEHiFVkyo-9oPoSx6nQ2K-_J8WBHLZyLoOhypsW4,8458
-GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=EJbKISaZ9p2x9P4x0rpMM5nAInTTc9b7arraGBcd-SA,10381
+GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=l3s9Z-x1b57GX048o5h-MVv0UTZo4H-Q-zb-JREkMLI,10439
 GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 GameSentenceMiner/web/service.py,sha256=S7bYf2kSk08u-8R9Qpv7piM-pxfFjYZUvU825xupmuI,5279
 GameSentenceMiner/web/texthooking_page.py,sha256=2ZS89CAI17xVkx64rGmHHbF96eKR8gPWiR_WAoDJ0Mw,17399
@@ -63,10 +63,10 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 GameSentenceMiner/web/templates/index.html,sha256=Gv3CJvNnhAzIVV_QxhNq4OD-pXDt1vKCu9k6WdHSXuA,215343
 GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
 GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
-GameSentenceMiner/wip/get_overlay_coords.py,sha256=hE-XxbhzvHDZoU9hLLyIFtfpHDO_QXHU0DbR-aJGPuI,10153
-gamesentenceminer-2.12.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-gamesentenceminer-2.12.3.dist-info/METADATA,sha256=vy4RJLP3o-9ojyVqkSw6KD8XMUNIPclIoZp4c4mR1b0,6999
-gamesentenceminer-2.12.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gamesentenceminer-2.12.3.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
-gamesentenceminer-2.12.3.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
-gamesentenceminer-2.12.3.dist-info/RECORD,,
+GameSentenceMiner/wip/get_overlay_coords.py,sha256=yivn8C26BBRK4cjE7yPv1XfvbyqWC0itLL9Vay8aY-c,19780
+gamesentenceminer-2.12.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+gamesentenceminer-2.12.4.dist-info/METADATA,sha256=xidAx_PVQT2GCaZoAkfYeMAJHqAppcpiPhmZ5Lhz1X4,7061
+gamesentenceminer-2.12.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+gamesentenceminer-2.12.4.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
+gamesentenceminer-2.12.4.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
+gamesentenceminer-2.12.4.dist-info/RECORD,,

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gamesentenceminer-2.12.3.dist-info → gamesentenceminer-2.12.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

GameSentenceMiner 2.12.3__py3-none-any.whl → 2.12.4__py3-none-any.whl

GameSentenceMiner 2.12.3py3-none-any.whl → 2.12.4py3-none-any.whl