PyPI - GameSentenceMiner - Versions diffs - 2.7.16__py3-none-any.whl → 2.8.0__py3-none-any.whl - Mend

GameSentenceMiner 2.7.16py3-none-any.whl → 2.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

GameSentenceMiner/anki.py +7 -8
GameSentenceMiner/config_gui.py +19 -3
GameSentenceMiner/configuration.py +8 -1
GameSentenceMiner/ffmpeg.py +1 -3
GameSentenceMiner/gametext.py +16 -155
GameSentenceMiner/gsm.py +28 -29
GameSentenceMiner/obs.py +0 -3
GameSentenceMiner/ocr/ocrconfig.py +0 -1
GameSentenceMiner/ocr/oneocr_dl.py +243 -0
GameSentenceMiner/ocr/owocr_area_selector.py +0 -1
GameSentenceMiner/ocr/owocr_helper.py +25 -26
GameSentenceMiner/owocr/owocr/run.py +1 -1
GameSentenceMiner/text_log.py +186 -0
GameSentenceMiner/util.py +52 -3
GameSentenceMiner/web/__init__.py +0 -0
GameSentenceMiner/web/static/__init__.py +0 -0
GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
GameSentenceMiner/web/static/favicon-96x96.png +0 -0
GameSentenceMiner/web/static/favicon.ico +0 -0
GameSentenceMiner/web/static/favicon.svg +3 -0
GameSentenceMiner/web/static/site.webmanifest +21 -0
GameSentenceMiner/web/static/style.css +292 -0
GameSentenceMiner/web/static/text_replacements.html +238 -0
GameSentenceMiner/web/static/utility.html +313 -0
GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
GameSentenceMiner/web/texthooking_page.py +234 -0
{gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/METADATA +2 -1
gamesentenceminer-2.8.0.dist-info/RECORD +58 -0
{gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/WHEEL +1 -1
GameSentenceMiner/utility_gui.py +0 -204
gamesentenceminer-2.7.16.dist-info/RECORD +0 -44
{gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/entry_points.txt +0 -0
{gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/licenses/LICENSE +0 -0
{gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/top_level.txt +0 -0

GameSentenceMiner/ocr/oneocr_dl.py ADDED Viewed

@@ -0,0 +1,243 @@
+import os
+import zipfile
+import shutil
+from os.path import expanduser
+import requests
+import re
+import tempfile
+# Placeholder functions/constants for removed proprietary ones
+# In a real application, you would replace these with appropriate logic
+# or standard library equivalents.
+def checkdir(d):
+    """Checks if a directory exists and contains the expected files."""
+    flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
+    return os.path.isdir(d) and all((os.path.isfile(os.path.join(d, _)) for _ in flist))
+def selectdir():
+    """Attempts to find the SnippingTool directory, prioritizing cache."""
+    cachedir = "cache/SnippingTool"
+    packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
+    if checkdir(cachedir):
+        return cachedir
+    # This part needs NativeUtils.GetPackagePathByPackageFamily, which is proprietary.
+    # We'll skip this part for simplification as requested.
+    # path = NativeUtils.GetPackagePathByPackageFamily(packageFamilyName)
+    # if not path:
+    #     return None
+    # path = os.path.join(path, "SnippingTool")
+    # if not checkdir(path):
+    #     return None
+    # return path
+    return None # Return None if not found in cache
+def getproxy():
+    """Placeholder for proxy retrieval."""
+    # Replace with actual proxy retrieval logic or return None
+    return None
+def stringfyerror(e):
+    """Placeholder for error stringification."""
+    return str(e)
+def dynamiclink(path):
+    """Placeholder for dynamic link resolution."""
+    # This would likely map a resource path to a local file path.
+    # For simplification, we'll just use the provided path string.
+    return path # Assuming path is a URL here based on usage
+# Simplified download logic extracted from the question class
+class Downloader:
+    def __init__(self):
+        self.oneocr_dir = expanduser("~/.config/oneocr")
+        self.packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
+        self.flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
+    def download_and_extract(self):
+        """
+        Main function to attempt download and extraction.
+        Tries official source first, then a fallback URL.
+        """
+        if checkdir(self.oneocr_dir):
+            print("Files already exist in cache.")
+            return True
+        try:
+            print("Attempting to download from official source...")
+            self.downloadofficial()
+            print("Download and extraction from official source successful.")
+            return True
+        except Exception as e:
+            print(f"Download from official source failed: {stringfyerror(e)}")
+            print("Attempting to download from fallback URL...")
+            try:
+                fallback_url = dynamiclink("/Resource/SnippingTool") # Assuming this resolves to a URL
+                self.downloadx(fallback_url)
+                print("Download and extraction from fallback URL successful.")
+                return True
+            except Exception as e_fallback:
+                print(f"Download from fallback URL failed: {stringfyerror(e_fallback)}")
+                print("All download attempts failed.")
+                return False
+    def downloadofficial(self):
+        """Downloads the latest SnippingTool MSIX bundle from a store API."""
+        headers = {
+            "accept": "*/*",
+            # Changed accept-language to prioritize US English
+            "accept-language": "en-US,en;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://store.rg-adguard.net",
+            "pragma": "no-cache",
+            "priority": "u=1, i",
+            "referer": "https://store.rg-adguard.net/",
+            "sec-ch-ua": '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"Windows"',
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-origin",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
+        }
+        data = dict(type="PackageFamilyName", url=self.packageFamilyName)
+        response = requests.post(
+            "https://store.rg-adguard.net/api/GetFiles",
+            headers=headers,
+            data=data,
+            proxies=getproxy(),
+        )
+        response.raise_for_status() # Raise an exception for bad status codes
+        saves = []
+        for link, package in re.findall('<a href="(.*?)".*?>(.*?)</a>', response.text):
+            if not package.startswith("Microsoft.ScreenSketch"):
+                continue
+            if not package.endswith(".msixbundle"):
+                continue
+            version = re.search(r"\d+\.\d+\.\d+\.\d+", package)
+            if not version:
+                continue
+            version = tuple(int(_) for _ in version.group().split("."))
+            saves.append((version, link, package))
+        if not saves:
+            raise Exception("Could not find suitable download link from official source.")
+        saves.sort(key=lambda _: _[0])
+        url = saves[-1][1]
+        package_name = saves[-1][2]
+        print(f"Downloading {package_name} from {url}")
+        req = requests.get(url, stream=True, proxies=getproxy())
+        req.raise_for_status()
+        total_size_in_bytes = int(req.headers.get('content-length', 0))
+        block_size = 1024 * 32 # 32 Kibibytes
+        temp_msixbundle_path = os.path.join(tempfile.gettempdir(), package_name)
+        with open(temp_msixbundle_path, "wb") as ff:
+            downloaded_size = 0
+            for chunk in req.iter_content(chunk_size=block_size):
+                ff.write(chunk)
+                downloaded_size += len(chunk)
+                # Basic progress reporting (can be removed)
+                if total_size_in_bytes:
+                    progress = (downloaded_size / total_size_in_bytes) * 100
+                    print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
+        print("\nDownload complete. Extracting...")
+        namemsix = None
+        with zipfile.ZipFile(temp_msixbundle_path) as ff:
+            for name in ff.namelist():
+                if name.startswith("SnippingTool") and name.endswith("_x64.msix"):
+                    namemsix = name
+                    break
+            if not namemsix:
+                raise Exception("Could not find MSIX file within MSIXBUNDLE.")
+            temp_msix_path = os.path.join(tempfile.gettempdir(), namemsix)
+            ff.extract(namemsix, tempfile.gettempdir())
+        print(f"Extracted {namemsix}. Extracting components...")
+        if os.path.exists(self.oneocr_dir):
+             shutil.rmtree(self.oneocr_dir)
+        os.makedirs(self.oneocr_dir, exist_ok=True)
+        with zipfile.ZipFile(temp_msix_path) as ff:
+            collect = []
+            for name in ff.namelist():
+                # Extract only the files within the "SnippingTool/" directory
+                if name.startswith("SnippingTool/") and any(name.endswith(f) for f in self.flist):
+                     # Construct target path relative to cachedir
+                    target_path = os.path.join(self.oneocr_dir, os.path.relpath(name, "SnippingTool/"))
+                    # Ensure parent directories exist
+                    os.makedirs(os.path.dirname(target_path), exist_ok=True)
+                    # Extract the file
+                    with ff.open(name) as source, open(target_path, "wb") as target:
+                        shutil.copyfileobj(source, target)
+                    collect.append(name)
+            if not collect:
+                 raise Exception("Could not find required files within MSIX.")
+        if not checkdir(self.oneocr_dir):
+            raise Exception("Extraction failed: Required files not found in cache directory.")
+        # Clean up temporary files
+        os.remove(temp_msixbundle_path)
+        os.remove(temp_msix_path)
+    def downloadx(self, url: str):
+        """Downloads a zip file from a URL and extracts it."""
+        print(f"Downloading from fallback URL: {url}")
+        # Added accept-language to the fallback download as well for consistency
+        headers = {
+             "accept-language": "en-US,en;q=0.9",
+             # Add other relevant headers if necessary for the fallback URL
+             "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
+             "accept": "*/*",
+        }
+        req = requests.get(url, verify=False, proxies=getproxy(), stream=True, headers=headers)
+        req.raise_for_status()
+        total_size_in_bytes = int(req.headers.get('content-length', 0))
+        block_size = 1024 * 32 # 32 Kibibytes
+        temp_zip_path = os.path.join(tempfile.gettempdir(), url.split("/")[-1])
+        with open(temp_zip_path, "wb") as ff:
+            downloaded_size = 0
+            for chunk in req.iter_content(chunk_size=block_size):
+                ff.write(chunk)
+                downloaded_size += len(chunk)
+                 # Basic progress reporting (can be removed)
+                if total_size_in_bytes:
+                    progress = (downloaded_size / total_size_in_bytes) * 100
+                    print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
+        print("\nDownload complete. Extracting...")
+        if os.path.exists(self.oneocr_dir):
+             shutil.rmtree(self.oneocr_dir)
+        os.makedirs(self.oneocr_dir, exist_ok=True)
+        with zipfile.ZipFile(temp_zip_path) as zipf:
+            zipf.extractall(self.oneocr_dir)
+        if not checkdir(self.oneocr_dir):
+            raise Exception("Extraction failed: Required files not found in cache directory.")
+        # Clean up temporary files
+        os.remove(temp_zip_path)
+# Example usage:
+if __name__ == "__main__":
+    downloader = Downloader()
+    if downloader.download_and_extract():
+        print("SnippingTool files are ready.")
+    else:
+        print("Failed to obtain SnippingTool files.")

GameSentenceMiner/ocr/owocr_area_selector.py CHANGED Viewed

@@ -7,7 +7,6 @@ import mss
 from PIL import Image, ImageTk, ImageDraw
 from GameSentenceMiner import obs  # Import your actual obs module
-from GameSentenceMiner.ocr.owocr_helper import get_ocr_config
 from GameSentenceMiner.util import sanitize_filename  # Import your actual util module
 try:

GameSentenceMiner/ocr/owocr_helper.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import asyncio
-import difflib
 import json
 import logging
 import os
 import queue
+import re
 import threading
 import time
 from datetime import datetime
@@ -14,17 +14,14 @@ from tkinter import messagebox
 import mss
 import websockets
 from rapidfuzz import fuzz
-from PIL import Image, ImageDraw
 from GameSentenceMiner import obs, util
-from GameSentenceMiner.configuration import get_config, get_app_directory
+from GameSentenceMiner.configuration import get_config, get_app_directory, get_temporary_directory
 from GameSentenceMiner.electron_config import get_ocr_scan_rate, get_requires_open_window
 from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, Rectangle
 from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
 from GameSentenceMiner.owocr.owocr.run import TextFiltering
-from dataclasses import dataclass
-from typing import List, Optional
+from GameSentenceMiner.util import do_text_replacements, OCR_REPLACEMENTS_FILE
 CONFIG_FILE = Path("ocr_config.json")
 DEFAULT_IMAGE_PATH = r"C:\Users\Beangate\Pictures\msedge_acbl8GL7Ax.jpg"  # CHANGE THIS
@@ -80,6 +77,7 @@ def get_ocr_config() -> OCRConfig:
     """Loads and updates screen capture areas from the corresponding JSON file."""
     app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
     ocr_config_dir = app_dir / "ocr_config"
+    os.makedirs(ocr_config_dir, exist_ok=True)
     obs.connect_to_obs()
     scene = util.sanitize_filename(obs.get_current_scene())
     config_path = ocr_config_dir / f"{scene}.json"
@@ -217,63 +215,62 @@ def do_second_ocr(ocr1_text, rectangle_index, time, img):
         if fuzz.ratio(previous_ocr2_text, text) >= 80:
             logger.info("Seems like the same text from previous ocr2 result, not sending")
             return
-        img.save(os.path.join(get_app_directory(), "temp", "last_successful_ocr.png"))
+        img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
         last_ocr2_results[rectangle_index] = text
-        if get_config().advanced.ocr_sends_to_clipboard:
-            import pyperclip
-            pyperclip.copy(text)
-        websocket_server_thread.send_text(text, time)
+        send_result(text, time)
     except json.JSONDecodeError:
         print("Invalid JSON received.")
     except Exception as e:
         logger.exception(e)
         print(f"Error processing message: {e}")
+def send_result(text, time):
+    if text:
+        text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
+        if get_config().advanced.ocr_sends_to_clipboard:
+            import pyperclip
+            pyperclip.copy(text)
+        websocket_server_thread.send_text(text, time)
 last_oneocr_results_to_check = {}  # Store last OCR result for each rectangle
 last_oneocr_times = {}    # Store last OCR time for each rectangle
 text_stable_start_times = {} # Store the start time when text becomes stable for each rectangle
+previous_imgs = {}
 orig_text_results = {} # Store original text results for each rectangle
 TEXT_APPEARENCE_DELAY = get_ocr_scan_rate() * 1000 + 500  # Adjust as needed
 def text_callback(text, orig_text, rectangle_index, time, img=None):
     global twopassocr, ocr2, last_oneocr_results_to_check, last_oneocr_times, text_stable_start_times, orig_text_results
     orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
+    # logger.debug(orig_text_string)
     current_time = time if time else datetime.now()
-    previous_text = last_oneocr_results_to_check.get(rectangle_index, "").strip()
+    previous_text = last_oneocr_results_to_check.pop(rectangle_index, "").strip()
     previous_orig_text = orig_text_results.get(rectangle_index, "").strip()
     # print(previous_orig_text)
     # if orig_text:
     #     print(orig_text_string)
+    if not twopassocr:
+        img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
+        send_result(text, time)
     if not text:
         if previous_text:
             if rectangle_index in text_stable_start_times:
-                stable_time = text_stable_start_times[rectangle_index]
+                stable_time = text_stable_start_times.pop(rectangle_index)
+                previous_img = previous_imgs.pop(rectangle_index)
                 previous_result = last_ocr1_results[rectangle_index]
                 if previous_result and fuzz.ratio(previous_result, previous_text) >= 80:
                     logger.info("Seems like the same text, not " + "doing second OCR" if twopassocr else "sending")
-                    del last_oneocr_results_to_check[rectangle_index]
                     return
                 if previous_orig_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 80:
                     logger.info("Seems like Text we already sent, not doing anything.")
-                    del last_oneocr_results_to_check[rectangle_index]
                     return
                 orig_text_results[rectangle_index] = orig_text_string
-                if twopassocr:
-                    do_second_ocr(previous_text, rectangle_index, time, img)
-                else:
-                    if get_config().advanced.ocr_sends_to_clipboard:
-                        import pyperclip
-                        pyperclip.copy(text)
-                    websocket_server_thread.send_text(previous_text, stable_time)
-                    img.save(os.path.join(get_app_directory(), "temp", "last_successful_ocr.png"))
+                do_second_ocr(previous_text, rectangle_index, stable_time, previous_img)
                 last_ocr1_results[rectangle_index] = previous_text
-                del text_stable_start_times[rectangle_index]
-            del last_oneocr_results_to_check[rectangle_index]
             return
         return
@@ -281,6 +278,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
         last_oneocr_results_to_check[rectangle_index] = text
         last_oneocr_times[rectangle_index] = current_time
         text_stable_start_times[rectangle_index] = current_time
+        previous_imgs[rectangle_index] = img
         return
     stable = text_stable_start_times.get(rectangle_index)
@@ -294,6 +292,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
     else:
         last_oneocr_results_to_check[rectangle_index] = text
         last_oneocr_times[rectangle_index] = current_time
+    previous_imgs[rectangle_index] = img
 done = False

GameSentenceMiner/owocr/owocr/run.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import datetime
+from datetime import datetime
 import sys
 import signal
 import time

GameSentenceMiner/text_log.py ADDED Viewed

@@ -0,0 +1,186 @@
+import uuid
+from dataclasses import dataclass
+from datetime import datetime
+from difflib import SequenceMatcher
+from typing import Optional
+from GameSentenceMiner.configuration import logger, get_config
+from GameSentenceMiner.model import AnkiCard
+from GameSentenceMiner.util import remove_html_and_cloze_tags
+initial_time = datetime.now()
+@dataclass
+class GameLine:
+    id: str
+    text: str
+    time: datetime
+    prev: 'GameLine | None'
+    next: 'GameLine | None'
+    index: int = 0
+    def get_previous_time(self):
+        if self.prev:
+            return self.prev.time
+        return initial_time
+    def get_next_time(self):
+        if self.next:
+            return self.next.time
+        return 0
+    def __str__(self):
+        return str({"text": self.text, "time": self.time})
+@dataclass
+class GameText:
+    values: list[GameLine]
+    values_dict: dict[str, GameLine]
+    game_line_index = 0
+    def __init__(self):
+        self.values = []
+        self.values_dict = {}
+    def __getitem__(self, key):
+        return self.values[key]
+    def get_by_id(self, line_id: str) -> Optional[GameLine]:
+        if not self.values_dict:
+            return None
+        return self.values_dict.get(line_id)
+    def get_time(self, line_text: str, occurrence: int = -1) -> datetime:
+        matches = [line for line in self.values if line.text == line_text]
+        if matches:
+            return matches[occurrence].time  # Default to latest
+        return initial_time
+    def get_event(self, line_text: str, occurrence: int = -1) -> GameLine | None:
+        matches = [line for line in self.values if line.text == line_text]
+        if matches:
+            return matches[occurrence]
+        return None
+    def add_line(self, line_text, line_time=None):
+        if not line_text:
+            return
+        line_id = str(uuid.uuid1())
+        new_line = GameLine(
+            id=line_id,  # Time-based UUID as an integer
+            text=line_text,
+            time=line_time if line_time else datetime.now(),
+            prev=self.values[-1] if self.values else None,
+            next=None,
+            index=self.game_line_index
+        )
+        self.values_dict[line_id] = new_line
+        logger.debug(f"Adding line: {new_line}")
+        self.game_line_index += 1
+        if self.values:
+            self.values[-1].next = new_line
+        self.values.append(new_line)
+        # self.remove_old_events(datetime.now() - timedelta(minutes=10))
+    def has_line(self, line_text) -> bool:
+        for game_line in self.values:
+            if game_line.text == line_text:
+                return True
+        return False
+text_log = GameText()
+def similar(a, b):
+    return SequenceMatcher(None, a, b).ratio()
+def one_contains_the_other(a, b):
+    return a in b or b in a
+def lines_match(a, b):
+    similarity = similar(a, b)
+    logger.debug(f"Comparing: {a} with {b} - Similarity: {similarity}, Or One contains the other: {one_contains_the_other(a, b)}")
+    return similar(a, b) >= 0.60 or one_contains_the_other(a, b)
+def get_text_event(last_note) -> GameLine:
+    lines = text_log.values
+    if not lines:
+        raise Exception("No lines in history. Text is required from either clipboard or websocket for GSM to work. Please check your setup/config.")
+    if not last_note:
+        return lines[-1]
+    sentence = last_note.get_field(get_config().anki.sentence_field)
+    if not sentence:
+        return lines[-1]
+    for line in reversed(lines):
+        if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
+            return line
+    logger.debug("Couldn't find a match in history, using last event")
+    return lines[-1]
+def get_line_and_future_lines(last_note):
+    if not last_note:
+        return []
+    sentence = last_note.get_field(get_config().anki.sentence_field)
+    found_lines = []
+    if sentence:
+        found = False
+        for line in text_log.values:
+            if found:
+                found_lines.append(line.text)
+            if lines_match(line.text, remove_html_and_cloze_tags(sentence)):  # 80% similarity threshold
+                found = True
+                found_lines.append(line.text)
+    return found_lines
+def get_mined_line(last_note: AnkiCard, lines):
+    if not last_note:
+        return lines[-1]
+    if not lines:
+        lines = get_all_lines()
+    sentence = last_note.get_field(get_config().anki.sentence_field)
+    for line in lines:
+        if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
+            return line
+    return lines[-1]
+def get_time_of_line(line):
+    return text_log.get_time(line)
+def get_all_lines():
+    return text_log.values
+def get_text_log() -> GameText:
+    return text_log
+def add_line(current_line_after_regex, line_time):
+    text_log.add_line(current_line_after_regex, line_time)
+def get_line_by_id(line_id: str) -> Optional[GameLine]:
+    """
+    Retrieve a GameLine by its unique ID.
+    Args:
+        line_id (str): The unique identifier of the GameLine.
+    Returns:
+        Optional[GameLine]: The GameLine object if found, otherwise None.
+    """
+    return text_log.get_by_id(line_id)

GameSentenceMiner/util.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import importlib
+import json
 import os
 import random
 import re
 import string
 import subprocess
-import sys
 import threading
 import time
 from datetime import datetime
@@ -206,4 +205,54 @@ def import_vad_models():
         from GameSentenceMiner.vad import whisper_helper
     if get_config().vad.is_vosk():
         from GameSentenceMiner.vad import vosk_helper
-    return silero_trim, whisper_helper, vosk_helper
+    return silero_trim, whisper_helper, vosk_helper
+def isascii(s: str):
+    try:
+        return s.isascii()
+    except:
+        try:
+            s.encode("ascii")
+            return True
+        except:
+            return False
+def do_text_replacements(text, replacements_json):
+    if not text:
+        return text
+    replacements = {}
+    if os.path.exists(replacements_json):
+        with open(replacements_json, 'r', encoding='utf-8') as f:
+            replacements.update(json.load(f))
+    if replacements.get("enabled", False):
+        orig_text = text
+        filters = replacements.get("args", {}).get("replacements", {})
+        for fil, replacement in filters.items():
+            if not fil:
+                continue
+            if fil.startswith("re:"):
+                pattern = fil[3:]
+                try:
+                    text = re.sub(pattern, replacement, text)
+                except Exception:
+                    logger.error(f"Invalid regex pattern: {pattern}")
+                    continue
+            if isascii(fil):
+                text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
+            else:
+                text = text.replace(fil, replacement)
+        if text != orig_text:
+            logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
+    return text
+TEXT_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'text_replacements.json')
+OCR_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'ocr_replacements.json')
+os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
+if not os.path.exists(TEXT_REPLACEMENTS_FILE):
+    #TODO : fetch raw json from github
+    pass

GameSentenceMiner/web/__init__.py ADDED Viewed

File without changes

GameSentenceMiner/web/static/__init__.py ADDED Viewed

File without changes

GameSentenceMiner/web/static/apple-touch-icon.png ADDED Viewed

Binary file

GameSentenceMiner/web/static/favicon-96x96.png ADDED Viewed

Binary file

GameSentenceMiner/web/static/favicon.ico ADDED Viewed

Binary file

GameSentenceMiner 2.7.16__py3-none-any.whl → 2.8.0__py3-none-any.whl

GameSentenceMiner 2.7.16py3-none-any.whl → 2.8.0py3-none-any.whl