PyPI - GameSentenceMiner - Versions diffs - 2.15.4__tar.gz → 2.15.6__tar.gz - Mend

GameSentenceMiner 2.15.4tar.gz → 2.15.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/anki.py RENAMED Viewed

@@ -280,31 +280,30 @@ def get_initial_card_info(last_note: AnkiCard, selected_lines):
     if get_config().overlay.websocket_port and texthooking_page.overlay_server_thread.has_clients():
         sentence_in_anki = last_note.get_field(get_config().anki.sentence_field).replace("\n", "").replace("\r", "").strip()
-        if lines_match(game_line.text, remove_html_and_cloze_tags(sentence_in_anki)):
-            logger.info("Found matching line in Anki, Preserving HTML and fix spacing!")
-            if "<b>" in sentence_in_anki:
-                text_inside_bold = re.findall(r'<b>(.*?)</b>', sentence_in_anki)
-                logger.info(text_inside_bold)
-                if text_inside_bold:
-                    text = text_inside_bold[0].replace(" ", "").replace('\n', '').strip()
-                    note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_bold[0], f"<b>{text}</b>")
-                    logger.info(f"Preserved bold Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
-            if "<i>" in sentence_in_anki:
-                text_inside_italic = re.findall(r'<i>(.*?)</i>', sentence_in_anki)
-                if text_inside_italic:
-                    text = text_inside_italic[0].replace(" ", "").replace('\n', '').strip()
-                    note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_italic[0], f"<i>{text}</i>")
-                    logger.info(f"Preserved italic Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
-            if "<u>" in sentence_in_anki:
-                text_inside_underline = re.findall(r'<u>(.*?)</u>', sentence_in_anki)
-                if text_inside_underline:
-                    text = text_inside_underline[0].replace(" ", "").replace('\n', '').strip()
-                    note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_underline[0], f"<u>{text}</u>")
-                    logger.info(f"Preserved underline Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
-            if get_config().anki.sentence_field not in note['fields']:
-                logger.info("No HTML tags found to preserve, just fixing spacing")
-                note['fields'][get_config().anki.sentence_field] = game_line.text
+        logger.info("Found matching line in Anki, Preserving HTML and fix spacing!")
+        if "<b>" in sentence_in_anki:
+            text_inside_bold = re.findall(r'<b>(.*?)</b>', sentence_in_anki)
+            logger.info(text_inside_bold)
+            if text_inside_bold:
+                text = text_inside_bold[0].replace(" ", "").replace('\n', '').strip()
+                note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_bold[0], f"<b>{text}</b>")
+                logger.info(f"Preserved bold Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
+        if "<i>" in sentence_in_anki:
+            text_inside_italic = re.findall(r'<i>(.*?)</i>', sentence_in_anki)
+            if text_inside_italic:
+                text = text_inside_italic[0].replace(" ", "").replace('\n', '').strip()
+                note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_italic[0], f"<i>{text}</i>")
+                logger.info(f"Preserved italic Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
+        if "<u>" in sentence_in_anki:
+            text_inside_underline = re.findall(r'<u>(.*?)</u>', sentence_in_anki)
+            if text_inside_underline:
+                text = text_inside_underline[0].replace(" ", "").replace('\n', '').strip()
+                note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_underline[0], f"<u>{text}</u>")
+                logger.info(f"Preserved underline Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
+        if get_config().anki.sentence_field not in note['fields']:
+            logger.info("No HTML tags found to preserve, just fixing spacing")
+            note['fields'][get_config().anki.sentence_field] = game_line.text
     if selected_lines:
         try:
             sentence_in_anki = last_note.get_field(get_config().anki.sentence_field)

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/gsm.py RENAMED Viewed

@@ -663,10 +663,10 @@ def initialize_text_monitor():
 def async_loop():
     async def loop():
-        await obs.connect_to_obs()
+        logger.info("Post-Initialization started.")
+        await obs.connect_to_obs(connections=3, check_output=True)
         await register_scene_switcher_callback()
         await check_obs_folder_is_correct()
-        logger.info("Post-Initialization started.")
         vad_processor.init()
         # if is_beangate:
         # await run_test_code()

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/obs.py RENAMED Viewed

@@ -101,7 +101,7 @@ class OBSConnectionPool:
 class OBSConnectionManager(threading.Thread):
-    def __init__(self, check_output=True):
+    def __init__(self, check_output=False):
         super().__init__()
         self.daemon = True
         self.running = True
@@ -261,7 +261,7 @@ def get_obs_websocket_config_values():
         full_config.save()
         reload_config()
-async def connect_to_obs(retry=5, check_output=True):
+async def connect_to_obs(retry=5, connections=2, check_output=False):
     global connection_pool, obs_connection_manager, event_client, connecting
     if is_windows():
         get_obs_websocket_config_values()
@@ -275,7 +275,7 @@ async def connect_to_obs(retry=5, check_output=True):
                 'password': get_config().obs.password,
                 'timeout': 3,
             }
-            connection_pool = OBSConnectionPool(size=3, **pool_kwargs)
+            connection_pool = OBSConnectionPool(size=connections, **pool_kwargs)
             connection_pool.connect_all()
             with connection_pool.get_client() as client:
@@ -306,46 +306,8 @@ async def connect_to_obs(retry=5, check_output=True):
             retry -= 1
     connecting = False
-def connect_to_obs_sync(retry=2, check_output=True):
-    global connection_pool, obs_connection_manager, event_client
-    if is_windows():
-        get_obs_websocket_config_values()
-    while True:
-        try:
-            pool_kwargs = {
-                'host': get_config().obs.host,
-                'port': get_config().obs.port,
-                'password': get_config().obs.password,
-                'timeout': 3,
-            }
-            connection_pool = OBSConnectionPool(size=5, **pool_kwargs)
-            connection_pool.connect_all()
-            with connection_pool.get_client() as client:
-                client.get_version() # Test one connection to confirm it works
-            event_client = obs.EventClient(
-                host=get_config().obs.host,
-                port=get_config().obs.port,
-                password=get_config().obs.password,
-                timeout=1,
-            )
-            if not obs_connection_manager:
-                obs_connection_manager = OBSConnectionManager(check_output=check_output)
-                obs_connection_manager.start()
-            update_current_game()
-            logger.info("Connected to OBS WebSocket.")
-            break  # Exit the loop once connected
-        except Exception as e:
-            if retry <= 0:
-                gsm_status.obs_connected = False
-                logger.error(f"Failed to connect to OBS WebSocket: {e}")
-                connection_pool = None
-                event_client = None
-                break
-            time.sleep(1)
-            retry -= 1
+def connect_to_obs_sync(retry=2, connections=2, check_output=False):
+    asyncio.run(connect_to_obs(retry=retry, connections=connections, check_output=check_output))
 def disconnect_from_obs():
@@ -419,14 +381,13 @@ def stop_replay_buffer():
         logger.warning(f"Error stopping replay buffer: {e}")
 def save_replay_buffer():
-    status = get_replay_buffer_status()
-    if status:
+    try:
         with connection_pool.get_client() as client:
             response = client.save_replay_buffer()
         if response and response.ok:
             logger.info("Replay buffer saved. If your log stops here, make sure your obs output path matches \"Path To Watch\" in GSM settings.")
-    else:
-        raise Exception("Replay Buffer is not active, could not save Replay Buffer!")
+    except Exception as e:
+        raise Exception(f"Error saving replay buffer: {e}")
 def get_current_scene():
     try:

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/ocr/owocr_helper.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+from copy import copy
 import io
 import json
 import logging
@@ -376,18 +377,19 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
                 return
             previous_orig_text = orig_text_string
             previous_ocr1_result = previous_text
-            if crop_coords and get_ocr_optimize_second_scan():
-                x1, y1, x2, y2 = crop_coords
-                x1 = max(0, min(x1, img.width))
-                y1 = max(0, min(y1, img.height))
-                x2 = max(x1, min(x2, img.width))
-                y2 = max(y1, min(y2, img.height))
-                previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
-                try:
-                    previous_img_local = previous_img_local.crop((x1, y1, x2, y2))
-                except ValueError:
-                    logger.warning("Error cropping image, using original image")
-            second_ocr_queue.put((previous_text, stable_time, previous_img_local, filtering, pre_crop_image))
+            ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2())
+            # if crop_coords and get_ocr_optimize_second_scan():
+            #     x1, y1, x2, y2 = crop_coords
+            #     x1 = max(0, min(x1, img.width))
+            #     y1 = max(0, min(y1, img.height))
+            #     x2 = max(x1, min(x2, img.width))
+            #     y2 = max(y1, min(y2, img.height))
+            #     previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
+            #     try:
+            #         previous_img_local = previous_img_local.crop((x1, y1, x2, y2))
+            #     except ValueError:
+            #         logger.warning("Error cropping image, using original image")
+            second_ocr_queue.put((previous_text, stable_time, ocr2_image, filtering, pre_crop_image))
             # threading.Thread(target=do_second_ocr, args=(previous_text, stable_time, previous_img_local, filtering), daemon=True).start()
             previous_img = None
             previous_text = None
@@ -412,6 +414,69 @@ done = False
 # Create a queue for tasks
 second_ocr_queue = queue.Queue()
+def get_ocr2_image(crop_coords, og_image, ocr2_engine=None):
+    """
+    Returns the image to use for the second OCR pass, cropping and scaling as needed.
+    Logic is unchanged, but code is refactored for clarity and maintainability.
+    """
+    def return_original_image():
+        logger.info("Returning original image for OCR2 (no cropping or optimization).")
+        if not crop_coords or not get_ocr_optimize_second_scan():
+            return og_image
+        x1, y1, x2, y2 = crop_coords
+        x1 = max(0, min(x1, og_image.width))
+        y1 = max(0, min(y1, og_image.height))
+        x2 = max(x1, min(x2, og_image.width))
+        y2 = max(y1, min(y2, og_image.height))
+        og_image.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
+        return og_image.crop((x1, y1, x2, y2))
+    LOCAL_OCR_ENGINES = ['easyocr', 'oneocr', 'rapidocr', 'mangaocr', 'winrtocr']
+    local_ocr = ocr2_engine in LOCAL_OCR_ENGINES
+    ocr_config_local = copy(ocr_config)
+    # Non-local OCR: just crop the original image if needed
+    if not local_ocr:
+        return return_original_image()
+    # Local OCR: get fresh screenshot and apply config/cropping
+    obs_width = getattr(run.obs_screenshot_thread, 'width', None)
+    obs_height = getattr(run.obs_screenshot_thread, 'height', None)
+    if not obs_width or not obs_height:
+        return return_original_image()
+    logger.debug(f"Getting OCR2 image with OBS dimensions: {obs_width}x{obs_height}")
+    img = obs.get_screenshot_PIL(compression=100, img_format="jpg")
+    ocr_config_local.scale_to_custom_size(img.width, img.height)
+    # If no crop or optimization, just apply config and return
+    if not crop_coords or not get_ocr_optimize_second_scan():
+        img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=True)
+        return img
+    # Calculate scaling ratios
+    width_ratio = img.width / obs_width if obs_width else 1
+    height_ratio = img.height / obs_height if obs_height else 1
+    logger.debug(f"Cropping OCR2 image with crop coordinates: {crop_coords} and ratios: {width_ratio}, {height_ratio}")
+    # Scale crop_coords
+    x1 = int(crop_coords[0] * width_ratio)
+    y1 = int(crop_coords[1] * height_ratio)
+    x2 = int(crop_coords[2] * width_ratio)
+    y2 = int(crop_coords[3] * height_ratio)
+    logger.debug(f"Scaled crop coordinates: {(x1, y1, x2, y2)}")
+    # Clamp coordinates to image bounds
+    x1 = max(0, min(x1, img.width))
+    y1 = max(0, min(y1, img.height))
+    x2 = max(x1, min(x2, img.width))
+    y2 = max(y1, min(y2, img.height))
+    img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=False)
+    return img.crop((x1, y1, x2, y2))
 def process_task_queue():
     while True:
         try:
@@ -456,7 +521,7 @@ def run_oneocr(ocr_config: OCRConfig, rectangles, config_check_thread):
                 gsm_ocr_config=ocr_config,
                 screen_capture_areas=screen_areas,
                 furigana_filter_sensitivity=furigana_filter_sensitivity,
-                screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None,
+                screen_capture_combo=manual_ocr_hotkey.upper() if manual_ocr_hotkey and manual else None,
                 config_check_thread=config_check_thread)
     except Exception as e:
         logger.exception(f"Error running OneOCR: {e}")
@@ -530,7 +595,7 @@ def set_force_stable_hotkey():
 if __name__ == "__main__":
     try:
-        global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr
+        global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr, manual
         import sys
         import argparse

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/ocr.py RENAMED Viewed

@@ -1243,7 +1243,7 @@ class OCRSpace:
 class GeminiOCR:
     name = 'gemini'
     readable_name = 'Gemini'
-    key = 'm'
+    key = ';'
     available = False
     def __init__(self, config={'api_key': None}, lang='ja'):
@@ -1433,10 +1433,14 @@ class localLLMOCR:
             self.keep_warm = config.get('keep_warm', True)
             self.custom_prompt = config.get('prompt', None)
             self.available = True
+            if not self.check_url_for_connectivity(self.api_url):
+                self.available = False
+                logger.warning(f'Local LLM OCR API URL not reachable: {self.api_url}')
+                return
             self.client = openai.OpenAI(
                 base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
                 api_key=self.api_key,
-                timeout=3
+                timeout=1
             )
             if self.client.models.retrieve(self.model):
                 self.model = self.model
@@ -1446,7 +1450,15 @@ class localLLMOCR:
                 self.keep_llm_hot_thread.start()
         except Exception as e:
             logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
+    def check_url_for_connectivity(self, url):
+        import requests
+        try:
+            response = requests.get(url, timeout=0.5)
+            return response.status_code == 200
+        except Exception:
+            return False
     def keep_llm_warm(self):
         def ocr_blank_black_image():
             if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/owocr/owocr/run.py RENAMED Viewed

@@ -42,6 +42,7 @@ import socketserver
 import cv2
 import numpy as np
+from collections import deque
 from datetime import datetime, timedelta
 from PIL import Image, ImageDraw
 from loguru import logger
@@ -337,6 +338,7 @@ class TextFiltering:
         self.thai_regex = re.compile(r'[\u0E00-\u0E7F]')
         self.latin_extended_regex = re.compile(
             r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
+        self.last_few_results = {}
         try:
             from transformers import pipeline, AutoTokenizer
             import torch
@@ -361,7 +363,7 @@ class TextFiltering:
             import langid
             self.classify = langid.classify
-    def __call__(self, text, last_result):
+    def __call__(self, text, last_result, engine=None, is_second_ocr=False):
         lang = get_ocr_language()
         if self.initial_lang != lang:
             from pysbd import Segmenter
@@ -402,11 +404,24 @@ class TextFiltering:
         try:
             if isinstance(last_result, list):
-                last_text = last_result
+                last_text = last_result.copy()
             elif last_result and last_result[1] == engine_index:
                 last_text = last_result[0]
             else:
                 last_text = []
+            if engine and not is_second_ocr:
+                if self.last_few_results and self.last_few_results.get(engine):
+                    for sublist in self.last_few_results.get(engine, []):
+                        if sublist:
+                            for item in sublist:
+                                if item and item not in last_text:
+                                    last_text.append(item)
+                    self.last_few_results[engine].append(orig_text_filtered)
+                else:
+                    self.last_few_results[engine] = deque(maxlen=3)
+                    self.last_few_results[engine].append(orig_text_filtered)
         except Exception as e:
             logger.error(f"Error processing last_result {last_result}: {e}")
             last_text = []
@@ -981,7 +996,7 @@ def quick_text_detection(pil_image, threshold_ratio=0.01):
 # Use OBS for Screenshot Source (i.e. Linux)
 class OBSScreenshotThread(threading.Thread):
-    def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
+    def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1, is_manual_ocr=False):
         super().__init__(daemon=True)
         self.ocr_config = ocr_config
         self.interval = interval
@@ -992,6 +1007,7 @@ class OBSScreenshotThread(threading.Thread):
         self.width = width
         self.height = height
         self.use_periodic_queue = not screen_capture_on_combo
+        self.is_manual_ocr = is_manual_ocr
     def write_result(self, result):
         if self.use_periodic_queue:
@@ -1003,62 +1019,26 @@ class OBSScreenshotThread(threading.Thread):
     def connect_obs(self):
         import GameSentenceMiner.obs as obs
         obs.connect_to_obs_sync(check_output=False)
-    def scale_down_width_height(self, width, height):
-        if width == 0 or height == 0:
-            return self.width, self.height
-        # return width, height
-        aspect_ratio = width / height
-        logger.info(
-            f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
-        if aspect_ratio > 2.66:
-            # Ultra-wide (32:9) - use 1920x540
-            logger.info("Using ultra-wide aspect ratio scaling (32:9).")
-            return 1920, 540
-        elif aspect_ratio > 2.33:
-            # 21:9 - use 1920x800
-            logger.info("Using ultra-wide aspect ratio scaling (21:9).")
-            return 1920, 800
-        elif aspect_ratio > 1.77:
-            # 16:9 - use 1280x720
-            logger.info("Using standard aspect ratio scaling (16:9).")
-            return 1280, 720
-        elif aspect_ratio > 1.6:
-            # 16:10 - use 1280x800
-            logger.info("Using standard aspect ratio scaling (16:10).")
-            return 1280, 800
-        elif aspect_ratio > 1.33:
-            # 4:3 - use 960x720
-            logger.info("Using standard aspect ratio scaling (4:3).")
-            return 960, 720
-        elif aspect_ratio > 1.25:
-            # 5:4 - use 900x720
-            logger.info("Using standard aspect ratio scaling (5:4).")
-            return 900, 720
-        elif aspect_ratio > 1.5:
-            # 3:2 - use 1080x720
-            logger.info("Using standard aspect ratio scaling (3:2).")
-            return 1080, 720
-        else:
-            # Default fallback - use original resolution
-            logger.info(
-                "Using default aspect ratio scaling (original resolution).")
-            return width, height
     def init_config(self, source=None, scene=None):
         import GameSentenceMiner.obs as obs
         obs.update_current_game()
         self.current_source = source if source else obs.get_active_source()
-        logger.info(f"Current OBS source: {self.current_source}")
+        logger.debug(f"Current OBS source: {self.current_source}")
         self.source_width = self.current_source.get(
             "sceneItemTransform").get("sourceWidth") or self.width
         self.source_height = self.current_source.get(
             "sceneItemTransform").get("sourceHeight") or self.height
-        if self.source_width and self.source_height:
-            self.width, self.height = self.scale_down_width_height(
+        if self.source_width and self.source_height and not self.is_manual_ocr and not get_ocr_two_pass_ocr():
+            self.width, self.height = scale_down_width_height(
                 self.source_width, self.source_height)
             logger.info(
-                f"Using OBS source dimensions: {self.width}x{self.height}")
+                f"Using OBS source dimensions: {self.source_width}x{self.source_height}")
+        else:
+            self.width = self.source_width or 1280
+            self.height = self.source_height or 720
+            logger.info(
+                f"Using source dimensions: {self.width}x{self.height}")
         self.current_source_name = self.current_source.get(
             "sourceName") or None
         self.current_scene = scene if scene else obs.get_current_game()
@@ -1105,7 +1085,7 @@ class OBSScreenshotThread(threading.Thread):
                     self.write_result(1)
                     continue
                 img = obs.get_screenshot_PIL(source_name=self.current_source_name,
-                                             width=self.width, height=self.height, img_format='jpg', compression=80)
+                                             width=self.width, height=self.height, img_format='jpg', compression=100)
                 img = apply_ocr_config_to_image(img, self.ocr_config)
@@ -1120,6 +1100,39 @@ class OBSScreenshotThread(threading.Thread):
                     f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
                 time.sleep(.5)
                 continue
+def scale_down_width_height(width, height):
+        if width == 0 or height == 0:
+            return width, height
+        # return width, height
+        aspect_ratio = width / height
+        logger.info(
+            f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
+        if aspect_ratio > 2.66:
+            logger.info("Using ultra-wide aspect ratio scaling (32:9).")
+            return 1920, 540
+        elif aspect_ratio > 2.33:
+            logger.info("Using ultra-wide aspect ratio scaling (21:9).")
+            return 1920, 800
+        elif aspect_ratio > 1.77:
+            logger.info("Using standard aspect ratio scaling (16:9).")
+            return 1280, 720
+        elif aspect_ratio > 1.6:
+            logger.info("Using standard aspect ratio scaling (16:10).")
+            return 1280, 800
+        elif aspect_ratio > 1.33:
+            logger.info("Using standard aspect ratio scaling (4:3).")
+            return 960, 720
+        elif aspect_ratio > 1.25:
+            logger.info("Using standard aspect ratio scaling (5:4).")
+            return 900, 720
+        elif aspect_ratio > 1.5:
+            logger.info("Using standard aspect ratio scaling (3:2).")
+            return 1080, 720
+        else:
+            logger.info(
+                "Using default aspect ratio scaling (original resolution).")
+            return width, height
 def apply_ocr_config_to_image(img, ocr_config, is_secondary=False):
@@ -1317,8 +1330,10 @@ def do_configured_ocr_replacements(text: str) -> str:
     return do_text_replacements(text, OCR_REPLACEMENTS_FILE)
-def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering=None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
+def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering: TextFiltering = None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
     global engine_index
+    # TODO Replace this at a later date
+    is_second_ocr = bool(engine)
     if auto_pause_handler:
         auto_pause_handler.stop()
     if engine:
@@ -1328,9 +1343,10 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
                 break
     else:
         engine_instance = engine_instances[engine_index]
+        engine = engine_instance.name
     engine_color = config.get_general('engine_color')
     start_time = time.time()
     result = engine_instance(img_or_path, furigana_filter_sensitivity)
     res, text, crop_coords = (*result, None)[:3]
@@ -1362,7 +1378,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
     if res:
         text = do_configured_ocr_replacements(text)
         if filtering:
-            text, orig_text = filtering(text, last_result)
+            text, orig_text = filtering(text, last_result, engine=engine, is_second_ocr=is_second_ocr)
         if get_ocr_language() == "ja" or get_ocr_language() == "zh":
             text = post_process(text, keep_blank_lines=get_ocr_keep_newline())
         if notify and config.get_general('notifications'):
@@ -1382,7 +1398,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
             pyperclipfix.copy(text)
         elif write_to == "callback":
             txt_callback(text, orig_text, ocr_start_time,
-                         img_or_path, bool(engine), filtering, crop_coords)
+                         img_or_path, is_second_ocr, filtering, crop_coords)
         elif write_to:
             with Path(write_to).open('a', encoding='utf-8') as f:
                 f.write(text + '\n')
@@ -1404,7 +1420,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
     This function checks if the detected text area falls entirely within secondary rectangles (menu areas).
     :param text: The recognized text from OCR.
-    :param crop_coords: Tuple containing (x, y, width, height) of the detected text area relative to the cropped image.
+    :param crop_coords: Tuple containing (x, y, x2, y2) of the detected text area relative to the cropped image.
     :return: True if the text is all menu items (within secondary rectangles), False otherwise.
     """
     if not text or not crop_coords:
@@ -1412,7 +1428,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
     original_width = obs_screenshot_thread.width
     original_height = obs_screenshot_thread.height
-    crop_x, crop_y, crop_w, crop_h = crop_coords
+    crop_x, crop_y, crop_x2, crop_y2 = crop_coords
     ocr_config = get_scene_ocr_config()
@@ -1430,14 +1446,14 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
         return False
     if not primary_rectangles:
-        if crop_x < 0 or crop_y < 0 or crop_x + crop_w > original_width or crop_y + crop_h > original_height:
+        if crop_x < 0 or crop_y < 0 or crop_x2 > original_width or crop_y2 > original_height:
             return False
         for menu_rect in menu_rectangles:
             rect_left, rect_top, rect_width, rect_height = menu_rect.coordinates
             rect_right = rect_left + rect_width
             rect_bottom = rect_top + rect_height
             if (crop_x >= rect_left and crop_y >= rect_top and
-                crop_x + crop_w <= rect_right and crop_y + crop_h <= rect_bottom):
+                crop_x2 <= rect_right and crop_y2 <= rect_bottom):
                 return True
         return False
@@ -1445,19 +1461,25 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
     if len(primary_rectangles) == 1:
         primary_rect = primary_rectangles[0]
-        primary_left, primary_top = primary_rect.coordinates[0], primary_rect.coordinates[1]
+        primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
         original_x = crop_x + primary_left
         original_y = crop_y + primary_top
+        original_x2 = crop_x2 + primary_left
+        original_y2 = crop_y2 + primary_top
     else:
         current_y_offset = 0
         original_x = None
         original_y = None
+        original_x2 = None
+        original_y2 = None
         for i, primary_rect in enumerate(primary_rectangles):
             primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
             section_height = primary_height
             if crop_y >= current_y_offset and crop_y < current_y_offset + section_height:
                 original_x = crop_x + primary_left
                 original_y = (crop_y - current_y_offset) + primary_top
+                original_x2 = crop_x2 + primary_left
+                original_y2 = crop_y2 + primary_top
                 break
             current_y_offset += section_height + 50
         if original_x is None or original_y is None:
@@ -1471,7 +1493,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
         rect_right = rect_left + rect_width
         rect_bottom = rect_top + rect_height
         if (original_x >= rect_left and original_y >= rect_top and
-            original_x <= rect_right and original_y <= rect_bottom):
+            original_x2 <= rect_right and original_y2 <= rect_bottom):
             return True
     return False
@@ -1712,7 +1734,7 @@ def run(read_from=None,
         last_result = ([], engine_index)
         screenshot_event = threading.Event()
         obs_screenshot_thread = OBSScreenshotThread(
-            gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs)
+            gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs, is_manual_ocr=bool(screen_capture_on_combo))
         obs_screenshot_thread.start()
         filtering = TextFiltering()
         read_from_readable.append('obs')

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner/vad.py RENAMED Viewed

@@ -139,7 +139,7 @@ class VADProcessor(ABC):
             self.extract_audio_and_combine_segments(input_audio, voice_activity, output_audio, padding=get_config().vad.splice_padding)
         else:
             ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio, trim_beginning=get_config().vad.trim_beginning, fade_in_duration=0.05, fade_out_duration=0)
-        return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, self.vad_system_name, voice_activity, output_audio)
+        return VADResult(True, max(0, start_time + get_config().vad.beginning_offset), max(0, end_time + get_config().audio.end_offset), self.vad_system_name, voice_activity, output_audio)
 class SileroVADProcessor(VADProcessor):
     def __init__(self):
@@ -165,10 +165,12 @@ class WhisperVADProcessor(VADProcessor):
     def load_whisper_model(self):
         import stable_whisper as whisper
+        import torch
         if not self.vad_model:
+            self.device = "cpu" if get_config().vad.use_cpu_for_inference else "cuda" if torch.cuda.is_available() else "cpu"
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore")
-                self.vad_model = whisper.load_model(get_config().vad.whisper_model, device="cpu" if get_config().vad.use_cpu_for_inference else None)
+                self.vad_model = whisper.load_faster_whisper(get_config().vad.whisper_model, device=self.device)
             logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
         return self.vad_model
@@ -187,7 +189,7 @@ class WhisperVADProcessor(VADProcessor):
                                                              temperature=0.0)
         voice_activity = []
-        logger.debug(result.to_dict())
+        logger.debug(json.dumps(result.to_dict(), indent=2))
         # Process the segments to extract tokens, timestamps, and confidence
         for i, segment in enumerate(result.segments):
@@ -198,6 +200,10 @@ class WhisperVADProcessor(VADProcessor):
                 else:
                     logger.info(
                         "Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
+            if segment.no_speech_prob and segment.no_speech_prob > 0.8:
+                logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")
+                continue
             logger.debug(segment.to_dict())

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.15.4
+Version: 2.15.6
 Summary: A tool for mining sentences from games. Update: Overlay?
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License
@@ -47,11 +47,12 @@ Requires-Dist: betterproto==2.0.0b7
 Requires-Dist: obsws-python~=1.7.2
 Requires-Dist: numpy==2.2.6
 Requires-Dist: regex
+Requires-Dist: faster-whisper~=1.2.0
 Dynamic: license-file
 # GSM - An Immersion toolkit for Games.
-### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
+### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
 An application designed to assist with language learning through games.

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/GameSentenceMiner.egg-info/requires.txt RENAMED Viewed

@@ -28,6 +28,7 @@ betterproto==2.0.0b7
 obsws-python~=1.7.2
 numpy==2.2.6
 regex
+faster-whisper~=1.2.0
 [:sys_platform != "win32"]
 openai-whisper

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.15.4
+Version: 2.15.6
 Summary: A tool for mining sentences from games. Update: Overlay?
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License
@@ -47,11 +47,12 @@ Requires-Dist: betterproto==2.0.0b7
 Requires-Dist: obsws-python~=1.7.2
 Requires-Dist: numpy==2.2.6
 Requires-Dist: regex
+Requires-Dist: faster-whisper~=1.2.0
 Dynamic: license-file
 # GSM - An Immersion toolkit for Games.
-### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
+### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
 An application designed to assist with language learning through games.

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 # GSM - An Immersion toolkit for Games.
-### English | [日本語](../docs/ja/README.md) | [简体中文](../docs/zh/README.md).
+### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
 An application designed to assist with language learning through games.

{gamesentenceminer-2.15.4 → gamesentenceminer-2.15.6}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "GameSentenceMiner"
-version = "2.15.4"
+version = "2.15.6"
 description = "A tool for mining sentences from games. Update: Overlay?"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -56,7 +56,8 @@ dependencies = [
   "betterproto==2.0.0b7",
   "obsws-python~=1.7.2",
   "numpy==2.2.6",
-  "regex"
+  "regex",
+  "faster-whisper~=1.2.0"
 ]
 # This creates a command-line script named `gamesentenceminer` that will