GameSentenceMiner 2.7.17__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/anki.py +7 -8
- GameSentenceMiner/config_gui.py +19 -3
- GameSentenceMiner/configuration.py +8 -1
- GameSentenceMiner/ffmpeg.py +1 -3
- GameSentenceMiner/gametext.py +16 -155
- GameSentenceMiner/gsm.py +28 -29
- GameSentenceMiner/obs.py +0 -3
- GameSentenceMiner/ocr/ocrconfig.py +0 -1
- GameSentenceMiner/ocr/oneocr_dl.py +243 -0
- GameSentenceMiner/ocr/owocr_area_selector.py +0 -1
- GameSentenceMiner/ocr/owocr_helper.py +25 -26
- GameSentenceMiner/text_log.py +186 -0
- GameSentenceMiner/util.py +52 -3
- GameSentenceMiner/web/__init__.py +0 -0
- GameSentenceMiner/web/static/__init__.py +0 -0
- GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
- GameSentenceMiner/web/static/favicon-96x96.png +0 -0
- GameSentenceMiner/web/static/favicon.ico +0 -0
- GameSentenceMiner/web/static/favicon.svg +3 -0
- GameSentenceMiner/web/static/site.webmanifest +21 -0
- GameSentenceMiner/web/static/style.css +292 -0
- GameSentenceMiner/web/static/text_replacements.html +238 -0
- GameSentenceMiner/web/static/utility.html +313 -0
- GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
- GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
- GameSentenceMiner/web/texthooking_page.py +234 -0
- {gamesentenceminer-2.7.17.dist-info → gamesentenceminer-2.8.0.dist-info}/METADATA +2 -1
- gamesentenceminer-2.8.0.dist-info/RECORD +58 -0
- {gamesentenceminer-2.7.17.dist-info → gamesentenceminer-2.8.0.dist-info}/WHEEL +1 -1
- GameSentenceMiner/utility_gui.py +0 -204
- gamesentenceminer-2.7.17.dist-info/RECORD +0 -44
- {gamesentenceminer-2.7.17.dist-info → gamesentenceminer-2.8.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.7.17.dist-info → gamesentenceminer-2.8.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.7.17.dist-info → gamesentenceminer-2.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
import os
|
2
|
+
import zipfile
|
3
|
+
import shutil
|
4
|
+
from os.path import expanduser
|
5
|
+
|
6
|
+
import requests
|
7
|
+
import re
|
8
|
+
import tempfile
|
9
|
+
|
10
|
+
# Placeholder functions/constants for removed proprietary ones
|
11
|
+
# In a real application, you would replace these with appropriate logic
|
12
|
+
# or standard library equivalents.
|
13
|
+
|
14
|
+
def checkdir(d):
|
15
|
+
"""Checks if a directory exists and contains the expected files."""
|
16
|
+
flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
|
17
|
+
return os.path.isdir(d) and all((os.path.isfile(os.path.join(d, _)) for _ in flist))
|
18
|
+
|
19
|
+
def selectdir():
|
20
|
+
"""Attempts to find the SnippingTool directory, prioritizing cache."""
|
21
|
+
cachedir = "cache/SnippingTool"
|
22
|
+
packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
|
23
|
+
|
24
|
+
if checkdir(cachedir):
|
25
|
+
return cachedir
|
26
|
+
# This part needs NativeUtils.GetPackagePathByPackageFamily, which is proprietary.
|
27
|
+
# We'll skip this part for simplification as requested.
|
28
|
+
# path = NativeUtils.GetPackagePathByPackageFamily(packageFamilyName)
|
29
|
+
# if not path:
|
30
|
+
# return None
|
31
|
+
# path = os.path.join(path, "SnippingTool")
|
32
|
+
# if not checkdir(path):
|
33
|
+
# return None
|
34
|
+
# return path
|
35
|
+
return None # Return None if not found in cache
|
36
|
+
|
37
|
+
def getproxy():
|
38
|
+
"""Placeholder for proxy retrieval."""
|
39
|
+
# Replace with actual proxy retrieval logic or return None
|
40
|
+
return None
|
41
|
+
|
42
|
+
def stringfyerror(e):
|
43
|
+
"""Placeholder for error stringification."""
|
44
|
+
return str(e)
|
45
|
+
|
46
|
+
def dynamiclink(path):
|
47
|
+
"""Placeholder for dynamic link resolution."""
|
48
|
+
# This would likely map a resource path to a local file path.
|
49
|
+
# For simplification, we'll just use the provided path string.
|
50
|
+
return path # Assuming path is a URL here based on usage
|
51
|
+
|
52
|
+
# Simplified download logic extracted from the question class
|
53
|
+
class Downloader:
|
54
|
+
def __init__(self):
|
55
|
+
self.oneocr_dir = expanduser("~/.config/oneocr")
|
56
|
+
self.packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
|
57
|
+
self.flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
|
58
|
+
|
59
|
+
def download_and_extract(self):
|
60
|
+
"""
|
61
|
+
Main function to attempt download and extraction.
|
62
|
+
Tries official source first, then a fallback URL.
|
63
|
+
"""
|
64
|
+
if checkdir(self.oneocr_dir):
|
65
|
+
print("Files already exist in cache.")
|
66
|
+
return True
|
67
|
+
|
68
|
+
try:
|
69
|
+
print("Attempting to download from official source...")
|
70
|
+
self.downloadofficial()
|
71
|
+
print("Download and extraction from official source successful.")
|
72
|
+
return True
|
73
|
+
except Exception as e:
|
74
|
+
print(f"Download from official source failed: {stringfyerror(e)}")
|
75
|
+
print("Attempting to download from fallback URL...")
|
76
|
+
try:
|
77
|
+
fallback_url = dynamiclink("/Resource/SnippingTool") # Assuming this resolves to a URL
|
78
|
+
self.downloadx(fallback_url)
|
79
|
+
print("Download and extraction from fallback URL successful.")
|
80
|
+
return True
|
81
|
+
except Exception as e_fallback:
|
82
|
+
print(f"Download from fallback URL failed: {stringfyerror(e_fallback)}")
|
83
|
+
print("All download attempts failed.")
|
84
|
+
return False
|
85
|
+
|
86
|
+
|
87
|
+
def downloadofficial(self):
|
88
|
+
"""Downloads the latest SnippingTool MSIX bundle from a store API."""
|
89
|
+
headers = {
|
90
|
+
"accept": "*/*",
|
91
|
+
# Changed accept-language to prioritize US English
|
92
|
+
"accept-language": "en-US,en;q=0.9",
|
93
|
+
"cache-control": "no-cache",
|
94
|
+
"origin": "https://store.rg-adguard.net",
|
95
|
+
"pragma": "no-cache",
|
96
|
+
"priority": "u=1, i",
|
97
|
+
"referer": "https://store.rg-adguard.net/",
|
98
|
+
"sec-ch-ua": '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
99
|
+
"sec-ch-ua-mobile": "?0",
|
100
|
+
"sec-ch-ua-platform": '"Windows"',
|
101
|
+
"sec-fetch-dest": "empty",
|
102
|
+
"sec-fetch-mode": "cors",
|
103
|
+
"sec-fetch-site": "same-origin",
|
104
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
105
|
+
}
|
106
|
+
|
107
|
+
data = dict(type="PackageFamilyName", url=self.packageFamilyName)
|
108
|
+
|
109
|
+
response = requests.post(
|
110
|
+
"https://store.rg-adguard.net/api/GetFiles",
|
111
|
+
headers=headers,
|
112
|
+
data=data,
|
113
|
+
proxies=getproxy(),
|
114
|
+
)
|
115
|
+
response.raise_for_status() # Raise an exception for bad status codes
|
116
|
+
|
117
|
+
saves = []
|
118
|
+
for link, package in re.findall('<a href="(.*?)".*?>(.*?)</a>', response.text):
|
119
|
+
if not package.startswith("Microsoft.ScreenSketch"):
|
120
|
+
continue
|
121
|
+
if not package.endswith(".msixbundle"):
|
122
|
+
continue
|
123
|
+
version = re.search(r"\d+\.\d+\.\d+\.\d+", package)
|
124
|
+
if not version:
|
125
|
+
continue
|
126
|
+
version = tuple(int(_) for _ in version.group().split("."))
|
127
|
+
saves.append((version, link, package))
|
128
|
+
|
129
|
+
if not saves:
|
130
|
+
raise Exception("Could not find suitable download link from official source.")
|
131
|
+
|
132
|
+
saves.sort(key=lambda _: _[0])
|
133
|
+
url = saves[-1][1]
|
134
|
+
package_name = saves[-1][2]
|
135
|
+
|
136
|
+
print(f"Downloading {package_name} from {url}")
|
137
|
+
req = requests.get(url, stream=True, proxies=getproxy())
|
138
|
+
req.raise_for_status()
|
139
|
+
|
140
|
+
total_size_in_bytes = int(req.headers.get('content-length', 0))
|
141
|
+
block_size = 1024 * 32 # 32 Kibibytes
|
142
|
+
temp_msixbundle_path = os.path.join(tempfile.gettempdir(), package_name)
|
143
|
+
|
144
|
+
with open(temp_msixbundle_path, "wb") as ff:
|
145
|
+
downloaded_size = 0
|
146
|
+
for chunk in req.iter_content(chunk_size=block_size):
|
147
|
+
ff.write(chunk)
|
148
|
+
downloaded_size += len(chunk)
|
149
|
+
# Basic progress reporting (can be removed)
|
150
|
+
if total_size_in_bytes:
|
151
|
+
progress = (downloaded_size / total_size_in_bytes) * 100
|
152
|
+
print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
|
153
|
+
print("\nDownload complete. Extracting...")
|
154
|
+
|
155
|
+
namemsix = None
|
156
|
+
with zipfile.ZipFile(temp_msixbundle_path) as ff:
|
157
|
+
for name in ff.namelist():
|
158
|
+
if name.startswith("SnippingTool") and name.endswith("_x64.msix"):
|
159
|
+
namemsix = name
|
160
|
+
break
|
161
|
+
if not namemsix:
|
162
|
+
raise Exception("Could not find MSIX file within MSIXBUNDLE.")
|
163
|
+
temp_msix_path = os.path.join(tempfile.gettempdir(), namemsix)
|
164
|
+
ff.extract(namemsix, tempfile.gettempdir())
|
165
|
+
|
166
|
+
print(f"Extracted {namemsix}. Extracting components...")
|
167
|
+
if os.path.exists(self.oneocr_dir):
|
168
|
+
shutil.rmtree(self.oneocr_dir)
|
169
|
+
os.makedirs(self.oneocr_dir, exist_ok=True)
|
170
|
+
|
171
|
+
with zipfile.ZipFile(temp_msix_path) as ff:
|
172
|
+
collect = []
|
173
|
+
for name in ff.namelist():
|
174
|
+
# Extract only the files within the "SnippingTool/" directory
|
175
|
+
if name.startswith("SnippingTool/") and any(name.endswith(f) for f in self.flist):
|
176
|
+
# Construct target path relative to cachedir
|
177
|
+
target_path = os.path.join(self.oneocr_dir, os.path.relpath(name, "SnippingTool/"))
|
178
|
+
# Ensure parent directories exist
|
179
|
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
180
|
+
# Extract the file
|
181
|
+
with ff.open(name) as source, open(target_path, "wb") as target:
|
182
|
+
shutil.copyfileobj(source, target)
|
183
|
+
collect.append(name)
|
184
|
+
if not collect:
|
185
|
+
raise Exception("Could not find required files within MSIX.")
|
186
|
+
|
187
|
+
|
188
|
+
if not checkdir(self.oneocr_dir):
|
189
|
+
raise Exception("Extraction failed: Required files not found in cache directory.")
|
190
|
+
|
191
|
+
# Clean up temporary files
|
192
|
+
os.remove(temp_msixbundle_path)
|
193
|
+
os.remove(temp_msix_path)
|
194
|
+
|
195
|
+
|
196
|
+
def downloadx(self, url: str):
|
197
|
+
"""Downloads a zip file from a URL and extracts it."""
|
198
|
+
print(f"Downloading from fallback URL: {url}")
|
199
|
+
# Added accept-language to the fallback download as well for consistency
|
200
|
+
headers = {
|
201
|
+
"accept-language": "en-US,en;q=0.9",
|
202
|
+
# Add other relevant headers if necessary for the fallback URL
|
203
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
204
|
+
"accept": "*/*",
|
205
|
+
}
|
206
|
+
req = requests.get(url, verify=False, proxies=getproxy(), stream=True, headers=headers)
|
207
|
+
req.raise_for_status()
|
208
|
+
|
209
|
+
total_size_in_bytes = int(req.headers.get('content-length', 0))
|
210
|
+
block_size = 1024 * 32 # 32 Kibibytes
|
211
|
+
temp_zip_path = os.path.join(tempfile.gettempdir(), url.split("/")[-1])
|
212
|
+
|
213
|
+
with open(temp_zip_path, "wb") as ff:
|
214
|
+
downloaded_size = 0
|
215
|
+
for chunk in req.iter_content(chunk_size=block_size):
|
216
|
+
ff.write(chunk)
|
217
|
+
downloaded_size += len(chunk)
|
218
|
+
# Basic progress reporting (can be removed)
|
219
|
+
if total_size_in_bytes:
|
220
|
+
progress = (downloaded_size / total_size_in_bytes) * 100
|
221
|
+
print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
|
222
|
+
print("\nDownload complete. Extracting...")
|
223
|
+
|
224
|
+
if os.path.exists(self.oneocr_dir):
|
225
|
+
shutil.rmtree(self.oneocr_dir)
|
226
|
+
os.makedirs(self.oneocr_dir, exist_ok=True)
|
227
|
+
|
228
|
+
with zipfile.ZipFile(temp_zip_path) as zipf:
|
229
|
+
zipf.extractall(self.oneocr_dir)
|
230
|
+
|
231
|
+
if not checkdir(self.oneocr_dir):
|
232
|
+
raise Exception("Extraction failed: Required files not found in cache directory.")
|
233
|
+
|
234
|
+
# Clean up temporary files
|
235
|
+
os.remove(temp_zip_path)
|
236
|
+
|
237
|
+
# Example usage:
|
238
|
+
if __name__ == "__main__":
|
239
|
+
downloader = Downloader()
|
240
|
+
if downloader.download_and_extract():
|
241
|
+
print("SnippingTool files are ready.")
|
242
|
+
else:
|
243
|
+
print("Failed to obtain SnippingTool files.")
|
@@ -7,7 +7,6 @@ import mss
|
|
7
7
|
from PIL import Image, ImageTk, ImageDraw
|
8
8
|
|
9
9
|
from GameSentenceMiner import obs # Import your actual obs module
|
10
|
-
from GameSentenceMiner.ocr.owocr_helper import get_ocr_config
|
11
10
|
from GameSentenceMiner.util import sanitize_filename # Import your actual util module
|
12
11
|
|
13
12
|
try:
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import asyncio
|
2
|
-
import difflib
|
3
2
|
import json
|
4
3
|
import logging
|
5
4
|
import os
|
6
5
|
import queue
|
6
|
+
import re
|
7
7
|
import threading
|
8
8
|
import time
|
9
9
|
from datetime import datetime
|
@@ -14,17 +14,14 @@ from tkinter import messagebox
|
|
14
14
|
import mss
|
15
15
|
import websockets
|
16
16
|
from rapidfuzz import fuzz
|
17
|
-
from PIL import Image, ImageDraw
|
18
17
|
|
19
18
|
from GameSentenceMiner import obs, util
|
20
|
-
from GameSentenceMiner.configuration import get_config, get_app_directory
|
19
|
+
from GameSentenceMiner.configuration import get_config, get_app_directory, get_temporary_directory
|
21
20
|
from GameSentenceMiner.electron_config import get_ocr_scan_rate, get_requires_open_window
|
22
21
|
from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, Rectangle
|
23
22
|
from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
|
24
23
|
from GameSentenceMiner.owocr.owocr.run import TextFiltering
|
25
|
-
|
26
|
-
from dataclasses import dataclass
|
27
|
-
from typing import List, Optional
|
24
|
+
from GameSentenceMiner.util import do_text_replacements, OCR_REPLACEMENTS_FILE
|
28
25
|
|
29
26
|
CONFIG_FILE = Path("ocr_config.json")
|
30
27
|
DEFAULT_IMAGE_PATH = r"C:\Users\Beangate\Pictures\msedge_acbl8GL7Ax.jpg" # CHANGE THIS
|
@@ -80,6 +77,7 @@ def get_ocr_config() -> OCRConfig:
|
|
80
77
|
"""Loads and updates screen capture areas from the corresponding JSON file."""
|
81
78
|
app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
|
82
79
|
ocr_config_dir = app_dir / "ocr_config"
|
80
|
+
os.makedirs(ocr_config_dir, exist_ok=True)
|
83
81
|
obs.connect_to_obs()
|
84
82
|
scene = util.sanitize_filename(obs.get_current_scene())
|
85
83
|
config_path = ocr_config_dir / f"{scene}.json"
|
@@ -217,63 +215,62 @@ def do_second_ocr(ocr1_text, rectangle_index, time, img):
|
|
217
215
|
if fuzz.ratio(previous_ocr2_text, text) >= 80:
|
218
216
|
logger.info("Seems like the same text from previous ocr2 result, not sending")
|
219
217
|
return
|
220
|
-
img.save(os.path.join(
|
218
|
+
img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
|
221
219
|
last_ocr2_results[rectangle_index] = text
|
222
|
-
|
223
|
-
import pyperclip
|
224
|
-
pyperclip.copy(text)
|
225
|
-
websocket_server_thread.send_text(text, time)
|
220
|
+
send_result(text, time)
|
226
221
|
except json.JSONDecodeError:
|
227
222
|
print("Invalid JSON received.")
|
228
223
|
except Exception as e:
|
229
224
|
logger.exception(e)
|
230
225
|
print(f"Error processing message: {e}")
|
231
226
|
|
227
|
+
def send_result(text, time):
|
228
|
+
if text:
|
229
|
+
text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
|
230
|
+
if get_config().advanced.ocr_sends_to_clipboard:
|
231
|
+
import pyperclip
|
232
|
+
pyperclip.copy(text)
|
233
|
+
websocket_server_thread.send_text(text, time)
|
234
|
+
|
232
235
|
|
233
236
|
last_oneocr_results_to_check = {} # Store last OCR result for each rectangle
|
234
237
|
last_oneocr_times = {} # Store last OCR time for each rectangle
|
235
238
|
text_stable_start_times = {} # Store the start time when text becomes stable for each rectangle
|
239
|
+
previous_imgs = {}
|
236
240
|
orig_text_results = {} # Store original text results for each rectangle
|
237
241
|
TEXT_APPEARENCE_DELAY = get_ocr_scan_rate() * 1000 + 500 # Adjust as needed
|
238
242
|
|
239
243
|
def text_callback(text, orig_text, rectangle_index, time, img=None):
|
240
244
|
global twopassocr, ocr2, last_oneocr_results_to_check, last_oneocr_times, text_stable_start_times, orig_text_results
|
241
245
|
orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
|
246
|
+
# logger.debug(orig_text_string)
|
242
247
|
|
243
248
|
current_time = time if time else datetime.now()
|
244
249
|
|
245
|
-
previous_text = last_oneocr_results_to_check.
|
250
|
+
previous_text = last_oneocr_results_to_check.pop(rectangle_index, "").strip()
|
246
251
|
previous_orig_text = orig_text_results.get(rectangle_index, "").strip()
|
247
252
|
|
248
253
|
# print(previous_orig_text)
|
249
254
|
# if orig_text:
|
250
255
|
# print(orig_text_string)
|
251
|
-
|
256
|
+
if not twopassocr:
|
257
|
+
img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
|
258
|
+
send_result(text, time)
|
252
259
|
if not text:
|
253
260
|
if previous_text:
|
254
261
|
if rectangle_index in text_stable_start_times:
|
255
|
-
stable_time = text_stable_start_times
|
262
|
+
stable_time = text_stable_start_times.pop(rectangle_index)
|
263
|
+
previous_img = previous_imgs.pop(rectangle_index)
|
256
264
|
previous_result = last_ocr1_results[rectangle_index]
|
257
265
|
if previous_result and fuzz.ratio(previous_result, previous_text) >= 80:
|
258
266
|
logger.info("Seems like the same text, not " + "doing second OCR" if twopassocr else "sending")
|
259
|
-
del last_oneocr_results_to_check[rectangle_index]
|
260
267
|
return
|
261
268
|
if previous_orig_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 80:
|
262
269
|
logger.info("Seems like Text we already sent, not doing anything.")
|
263
|
-
del last_oneocr_results_to_check[rectangle_index]
|
264
270
|
return
|
265
271
|
orig_text_results[rectangle_index] = orig_text_string
|
266
|
-
|
267
|
-
do_second_ocr(previous_text, rectangle_index, time, img)
|
268
|
-
else:
|
269
|
-
if get_config().advanced.ocr_sends_to_clipboard:
|
270
|
-
import pyperclip
|
271
|
-
pyperclip.copy(text)
|
272
|
-
websocket_server_thread.send_text(previous_text, stable_time)
|
273
|
-
img.save(os.path.join(get_app_directory(), "temp", "last_successful_ocr.png"))
|
272
|
+
do_second_ocr(previous_text, rectangle_index, stable_time, previous_img)
|
274
273
|
last_ocr1_results[rectangle_index] = previous_text
|
275
|
-
del text_stable_start_times[rectangle_index]
|
276
|
-
del last_oneocr_results_to_check[rectangle_index]
|
277
274
|
return
|
278
275
|
return
|
279
276
|
|
@@ -281,6 +278,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
|
|
281
278
|
last_oneocr_results_to_check[rectangle_index] = text
|
282
279
|
last_oneocr_times[rectangle_index] = current_time
|
283
280
|
text_stable_start_times[rectangle_index] = current_time
|
281
|
+
previous_imgs[rectangle_index] = img
|
284
282
|
return
|
285
283
|
|
286
284
|
stable = text_stable_start_times.get(rectangle_index)
|
@@ -294,6 +292,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
|
|
294
292
|
else:
|
295
293
|
last_oneocr_results_to_check[rectangle_index] = text
|
296
294
|
last_oneocr_times[rectangle_index] = current_time
|
295
|
+
previous_imgs[rectangle_index] = img
|
297
296
|
|
298
297
|
done = False
|
299
298
|
|
@@ -0,0 +1,186 @@
|
|
1
|
+
import uuid
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from datetime import datetime
|
4
|
+
from difflib import SequenceMatcher
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from GameSentenceMiner.configuration import logger, get_config
|
8
|
+
from GameSentenceMiner.model import AnkiCard
|
9
|
+
from GameSentenceMiner.util import remove_html_and_cloze_tags
|
10
|
+
|
11
|
+
initial_time = datetime.now()
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass
|
15
|
+
class GameLine:
|
16
|
+
id: str
|
17
|
+
text: str
|
18
|
+
time: datetime
|
19
|
+
prev: 'GameLine | None'
|
20
|
+
next: 'GameLine | None'
|
21
|
+
index: int = 0
|
22
|
+
|
23
|
+
def get_previous_time(self):
|
24
|
+
if self.prev:
|
25
|
+
return self.prev.time
|
26
|
+
return initial_time
|
27
|
+
|
28
|
+
def get_next_time(self):
|
29
|
+
if self.next:
|
30
|
+
return self.next.time
|
31
|
+
return 0
|
32
|
+
|
33
|
+
def __str__(self):
|
34
|
+
return str({"text": self.text, "time": self.time})
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass
|
38
|
+
class GameText:
|
39
|
+
values: list[GameLine]
|
40
|
+
values_dict: dict[str, GameLine]
|
41
|
+
game_line_index = 0
|
42
|
+
|
43
|
+
def __init__(self):
|
44
|
+
self.values = []
|
45
|
+
self.values_dict = {}
|
46
|
+
|
47
|
+
def __getitem__(self, key):
|
48
|
+
return self.values[key]
|
49
|
+
|
50
|
+
def get_by_id(self, line_id: str) -> Optional[GameLine]:
|
51
|
+
if not self.values_dict:
|
52
|
+
return None
|
53
|
+
return self.values_dict.get(line_id)
|
54
|
+
|
55
|
+
def get_time(self, line_text: str, occurrence: int = -1) -> datetime:
|
56
|
+
matches = [line for line in self.values if line.text == line_text]
|
57
|
+
if matches:
|
58
|
+
return matches[occurrence].time # Default to latest
|
59
|
+
return initial_time
|
60
|
+
|
61
|
+
def get_event(self, line_text: str, occurrence: int = -1) -> GameLine | None:
|
62
|
+
matches = [line for line in self.values if line.text == line_text]
|
63
|
+
if matches:
|
64
|
+
return matches[occurrence]
|
65
|
+
return None
|
66
|
+
|
67
|
+
def add_line(self, line_text, line_time=None):
|
68
|
+
if not line_text:
|
69
|
+
return
|
70
|
+
line_id = str(uuid.uuid1())
|
71
|
+
new_line = GameLine(
|
72
|
+
id=line_id, # Time-based UUID as an integer
|
73
|
+
text=line_text,
|
74
|
+
time=line_time if line_time else datetime.now(),
|
75
|
+
prev=self.values[-1] if self.values else None,
|
76
|
+
next=None,
|
77
|
+
index=self.game_line_index
|
78
|
+
)
|
79
|
+
self.values_dict[line_id] = new_line
|
80
|
+
logger.debug(f"Adding line: {new_line}")
|
81
|
+
self.game_line_index += 1
|
82
|
+
if self.values:
|
83
|
+
self.values[-1].next = new_line
|
84
|
+
self.values.append(new_line)
|
85
|
+
# self.remove_old_events(datetime.now() - timedelta(minutes=10))
|
86
|
+
|
87
|
+
def has_line(self, line_text) -> bool:
|
88
|
+
for game_line in self.values:
|
89
|
+
if game_line.text == line_text:
|
90
|
+
return True
|
91
|
+
return False
|
92
|
+
|
93
|
+
|
94
|
+
text_log = GameText()
|
95
|
+
|
96
|
+
|
97
|
+
def similar(a, b):
|
98
|
+
return SequenceMatcher(None, a, b).ratio()
|
99
|
+
|
100
|
+
|
101
|
+
def one_contains_the_other(a, b):
|
102
|
+
return a in b or b in a
|
103
|
+
|
104
|
+
|
105
|
+
def lines_match(a, b):
|
106
|
+
similarity = similar(a, b)
|
107
|
+
logger.debug(f"Comparing: {a} with {b} - Similarity: {similarity}, Or One contains the other: {one_contains_the_other(a, b)}")
|
108
|
+
return similar(a, b) >= 0.60 or one_contains_the_other(a, b)
|
109
|
+
|
110
|
+
|
111
|
+
def get_text_event(last_note) -> GameLine:
|
112
|
+
lines = text_log.values
|
113
|
+
|
114
|
+
if not lines:
|
115
|
+
raise Exception("No lines in history. Text is required from either clipboard or websocket for GSM to work. Please check your setup/config.")
|
116
|
+
|
117
|
+
if not last_note:
|
118
|
+
return lines[-1]
|
119
|
+
|
120
|
+
sentence = last_note.get_field(get_config().anki.sentence_field)
|
121
|
+
if not sentence:
|
122
|
+
return lines[-1]
|
123
|
+
|
124
|
+
for line in reversed(lines):
|
125
|
+
if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
|
126
|
+
return line
|
127
|
+
|
128
|
+
logger.debug("Couldn't find a match in history, using last event")
|
129
|
+
return lines[-1]
|
130
|
+
|
131
|
+
|
132
|
+
def get_line_and_future_lines(last_note):
|
133
|
+
if not last_note:
|
134
|
+
return []
|
135
|
+
|
136
|
+
sentence = last_note.get_field(get_config().anki.sentence_field)
|
137
|
+
found_lines = []
|
138
|
+
if sentence:
|
139
|
+
found = False
|
140
|
+
for line in text_log.values:
|
141
|
+
if found:
|
142
|
+
found_lines.append(line.text)
|
143
|
+
if lines_match(line.text, remove_html_and_cloze_tags(sentence)): # 80% similarity threshold
|
144
|
+
found = True
|
145
|
+
found_lines.append(line.text)
|
146
|
+
return found_lines
|
147
|
+
|
148
|
+
|
149
|
+
def get_mined_line(last_note: AnkiCard, lines):
|
150
|
+
if not last_note:
|
151
|
+
return lines[-1]
|
152
|
+
if not lines:
|
153
|
+
lines = get_all_lines()
|
154
|
+
|
155
|
+
sentence = last_note.get_field(get_config().anki.sentence_field)
|
156
|
+
for line in lines:
|
157
|
+
if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
|
158
|
+
return line
|
159
|
+
return lines[-1]
|
160
|
+
|
161
|
+
|
162
|
+
def get_time_of_line(line):
|
163
|
+
return text_log.get_time(line)
|
164
|
+
|
165
|
+
|
166
|
+
def get_all_lines():
|
167
|
+
return text_log.values
|
168
|
+
|
169
|
+
|
170
|
+
def get_text_log() -> GameText:
|
171
|
+
return text_log
|
172
|
+
|
173
|
+
def add_line(current_line_after_regex, line_time):
|
174
|
+
text_log.add_line(current_line_after_regex, line_time)
|
175
|
+
|
176
|
+
def get_line_by_id(line_id: str) -> Optional[GameLine]:
|
177
|
+
"""
|
178
|
+
Retrieve a GameLine by its unique ID.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
line_id (str): The unique identifier of the GameLine.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
Optional[GameLine]: The GameLine object if found, otherwise None.
|
185
|
+
"""
|
186
|
+
return text_log.get_by_id(line_id)
|
GameSentenceMiner/util.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
import
|
1
|
+
import json
|
2
2
|
import os
|
3
3
|
import random
|
4
4
|
import re
|
5
5
|
import string
|
6
6
|
import subprocess
|
7
|
-
import sys
|
8
7
|
import threading
|
9
8
|
import time
|
10
9
|
from datetime import datetime
|
@@ -206,4 +205,54 @@ def import_vad_models():
|
|
206
205
|
from GameSentenceMiner.vad import whisper_helper
|
207
206
|
if get_config().vad.is_vosk():
|
208
207
|
from GameSentenceMiner.vad import vosk_helper
|
209
|
-
return silero_trim, whisper_helper, vosk_helper
|
208
|
+
return silero_trim, whisper_helper, vosk_helper
|
209
|
+
|
210
|
+
|
211
|
+
def isascii(s: str):
|
212
|
+
try:
|
213
|
+
return s.isascii()
|
214
|
+
except:
|
215
|
+
try:
|
216
|
+
s.encode("ascii")
|
217
|
+
return True
|
218
|
+
except:
|
219
|
+
return False
|
220
|
+
|
221
|
+
def do_text_replacements(text, replacements_json):
|
222
|
+
if not text:
|
223
|
+
return text
|
224
|
+
|
225
|
+
replacements = {}
|
226
|
+
if os.path.exists(replacements_json):
|
227
|
+
with open(replacements_json, 'r', encoding='utf-8') as f:
|
228
|
+
replacements.update(json.load(f))
|
229
|
+
|
230
|
+
if replacements.get("enabled", False):
|
231
|
+
orig_text = text
|
232
|
+
filters = replacements.get("args", {}).get("replacements", {})
|
233
|
+
for fil, replacement in filters.items():
|
234
|
+
if not fil:
|
235
|
+
continue
|
236
|
+
if fil.startswith("re:"):
|
237
|
+
pattern = fil[3:]
|
238
|
+
try:
|
239
|
+
text = re.sub(pattern, replacement, text)
|
240
|
+
except Exception:
|
241
|
+
logger.error(f"Invalid regex pattern: {pattern}")
|
242
|
+
continue
|
243
|
+
if isascii(fil):
|
244
|
+
text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
|
245
|
+
else:
|
246
|
+
text = text.replace(fil, replacement)
|
247
|
+
if text != orig_text:
|
248
|
+
logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
|
249
|
+
return text
|
250
|
+
|
251
|
+
|
252
|
+
TEXT_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'text_replacements.json')
|
253
|
+
OCR_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'ocr_replacements.json')
|
254
|
+
os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
|
255
|
+
|
256
|
+
if not os.path.exists(TEXT_REPLACEMENTS_FILE):
|
257
|
+
#TODO : fetch raw json from github
|
258
|
+
pass
|
File without changes
|
File without changes
|
Binary file
|
Binary file
|
Binary file
|