GameSentenceMiner 2.7.16__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. GameSentenceMiner/anki.py +7 -8
  2. GameSentenceMiner/config_gui.py +19 -3
  3. GameSentenceMiner/configuration.py +8 -1
  4. GameSentenceMiner/ffmpeg.py +1 -3
  5. GameSentenceMiner/gametext.py +16 -155
  6. GameSentenceMiner/gsm.py +28 -29
  7. GameSentenceMiner/obs.py +0 -3
  8. GameSentenceMiner/ocr/ocrconfig.py +0 -1
  9. GameSentenceMiner/ocr/oneocr_dl.py +243 -0
  10. GameSentenceMiner/ocr/owocr_area_selector.py +0 -1
  11. GameSentenceMiner/ocr/owocr_helper.py +25 -26
  12. GameSentenceMiner/owocr/owocr/run.py +1 -1
  13. GameSentenceMiner/text_log.py +186 -0
  14. GameSentenceMiner/util.py +52 -3
  15. GameSentenceMiner/web/__init__.py +0 -0
  16. GameSentenceMiner/web/static/__init__.py +0 -0
  17. GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  18. GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  19. GameSentenceMiner/web/static/favicon.ico +0 -0
  20. GameSentenceMiner/web/static/favicon.svg +3 -0
  21. GameSentenceMiner/web/static/site.webmanifest +21 -0
  22. GameSentenceMiner/web/static/style.css +292 -0
  23. GameSentenceMiner/web/static/text_replacements.html +238 -0
  24. GameSentenceMiner/web/static/utility.html +313 -0
  25. GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  26. GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  27. GameSentenceMiner/web/texthooking_page.py +234 -0
  28. {gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/METADATA +2 -1
  29. gamesentenceminer-2.8.0.dist-info/RECORD +58 -0
  30. {gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/WHEEL +1 -1
  31. GameSentenceMiner/utility_gui.py +0 -204
  32. gamesentenceminer-2.7.16.dist-info/RECORD +0 -44
  33. {gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/entry_points.txt +0 -0
  34. {gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/licenses/LICENSE +0 -0
  35. {gamesentenceminer-2.7.16.dist-info → gamesentenceminer-2.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,243 @@
1
+ import os
2
+ import zipfile
3
+ import shutil
4
+ from os.path import expanduser
5
+
6
+ import requests
7
+ import re
8
+ import tempfile
9
+
10
+ # Placeholder functions/constants for removed proprietary ones
11
+ # In a real application, you would replace these with appropriate logic
12
+ # or standard library equivalents.
13
+
14
+ def checkdir(d):
15
+ """Checks if a directory exists and contains the expected files."""
16
+ flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
17
+ return os.path.isdir(d) and all((os.path.isfile(os.path.join(d, _)) for _ in flist))
18
+
19
+ def selectdir():
20
+ """Attempts to find the SnippingTool directory, prioritizing cache."""
21
+ cachedir = "cache/SnippingTool"
22
+ packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
23
+
24
+ if checkdir(cachedir):
25
+ return cachedir
26
+ # This part needs NativeUtils.GetPackagePathByPackageFamily, which is proprietary.
27
+ # We'll skip this part for simplification as requested.
28
+ # path = NativeUtils.GetPackagePathByPackageFamily(packageFamilyName)
29
+ # if not path:
30
+ # return None
31
+ # path = os.path.join(path, "SnippingTool")
32
+ # if not checkdir(path):
33
+ # return None
34
+ # return path
35
+ return None # Return None if not found in cache
36
+
37
+ def getproxy():
38
+ """Placeholder for proxy retrieval."""
39
+ # Replace with actual proxy retrieval logic or return None
40
+ return None
41
+
42
+ def stringfyerror(e):
43
+ """Placeholder for error stringification."""
44
+ return str(e)
45
+
46
+ def dynamiclink(path):
47
+ """Placeholder for dynamic link resolution."""
48
+ # This would likely map a resource path to a local file path.
49
+ # For simplification, we'll just use the provided path string.
50
+ return path # Assuming path is a URL here based on usage
51
+
52
+ # Simplified download logic extracted from the question class
53
+ class Downloader:
54
+ def __init__(self):
55
+ self.oneocr_dir = expanduser("~/.config/oneocr")
56
+ self.packageFamilyName = "Microsoft.ScreenSketch_8wekyb3d8bbwe"
57
+ self.flist = ["oneocr.dll", "oneocr.onemodel", "onnxruntime.dll"]
58
+
59
+ def download_and_extract(self):
60
+ """
61
+ Main function to attempt download and extraction.
62
+ Tries official source first, then a fallback URL.
63
+ """
64
+ if checkdir(self.oneocr_dir):
65
+ print("Files already exist in cache.")
66
+ return True
67
+
68
+ try:
69
+ print("Attempting to download from official source...")
70
+ self.downloadofficial()
71
+ print("Download and extraction from official source successful.")
72
+ return True
73
+ except Exception as e:
74
+ print(f"Download from official source failed: {stringfyerror(e)}")
75
+ print("Attempting to download from fallback URL...")
76
+ try:
77
+ fallback_url = dynamiclink("/Resource/SnippingTool") # Assuming this resolves to a URL
78
+ self.downloadx(fallback_url)
79
+ print("Download and extraction from fallback URL successful.")
80
+ return True
81
+ except Exception as e_fallback:
82
+ print(f"Download from fallback URL failed: {stringfyerror(e_fallback)}")
83
+ print("All download attempts failed.")
84
+ return False
85
+
86
+
87
+ def downloadofficial(self):
88
+ """Downloads the latest SnippingTool MSIX bundle from a store API."""
89
+ headers = {
90
+ "accept": "*/*",
91
+ # Changed accept-language to prioritize US English
92
+ "accept-language": "en-US,en;q=0.9",
93
+ "cache-control": "no-cache",
94
+ "origin": "https://store.rg-adguard.net",
95
+ "pragma": "no-cache",
96
+ "priority": "u=1, i",
97
+ "referer": "https://store.rg-adguard.net/",
98
+ "sec-ch-ua": '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
99
+ "sec-ch-ua-mobile": "?0",
100
+ "sec-ch-ua-platform": '"Windows"',
101
+ "sec-fetch-dest": "empty",
102
+ "sec-fetch-mode": "cors",
103
+ "sec-fetch-site": "same-origin",
104
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
105
+ }
106
+
107
+ data = dict(type="PackageFamilyName", url=self.packageFamilyName)
108
+
109
+ response = requests.post(
110
+ "https://store.rg-adguard.net/api/GetFiles",
111
+ headers=headers,
112
+ data=data,
113
+ proxies=getproxy(),
114
+ )
115
+ response.raise_for_status() # Raise an exception for bad status codes
116
+
117
+ saves = []
118
+ for link, package in re.findall('<a href="(.*?)".*?>(.*?)</a>', response.text):
119
+ if not package.startswith("Microsoft.ScreenSketch"):
120
+ continue
121
+ if not package.endswith(".msixbundle"):
122
+ continue
123
+ version = re.search(r"\d+\.\d+\.\d+\.\d+", package)
124
+ if not version:
125
+ continue
126
+ version = tuple(int(_) for _ in version.group().split("."))
127
+ saves.append((version, link, package))
128
+
129
+ if not saves:
130
+ raise Exception("Could not find suitable download link from official source.")
131
+
132
+ saves.sort(key=lambda _: _[0])
133
+ url = saves[-1][1]
134
+ package_name = saves[-1][2]
135
+
136
+ print(f"Downloading {package_name} from {url}")
137
+ req = requests.get(url, stream=True, proxies=getproxy())
138
+ req.raise_for_status()
139
+
140
+ total_size_in_bytes = int(req.headers.get('content-length', 0))
141
+ block_size = 1024 * 32 # 32 Kibibytes
142
+ temp_msixbundle_path = os.path.join(tempfile.gettempdir(), package_name)
143
+
144
+ with open(temp_msixbundle_path, "wb") as ff:
145
+ downloaded_size = 0
146
+ for chunk in req.iter_content(chunk_size=block_size):
147
+ ff.write(chunk)
148
+ downloaded_size += len(chunk)
149
+ # Basic progress reporting (can be removed)
150
+ if total_size_in_bytes:
151
+ progress = (downloaded_size / total_size_in_bytes) * 100
152
+ print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
153
+ print("\nDownload complete. Extracting...")
154
+
155
+ namemsix = None
156
+ with zipfile.ZipFile(temp_msixbundle_path) as ff:
157
+ for name in ff.namelist():
158
+ if name.startswith("SnippingTool") and name.endswith("_x64.msix"):
159
+ namemsix = name
160
+ break
161
+ if not namemsix:
162
+ raise Exception("Could not find MSIX file within MSIXBUNDLE.")
163
+ temp_msix_path = os.path.join(tempfile.gettempdir(), namemsix)
164
+ ff.extract(namemsix, tempfile.gettempdir())
165
+
166
+ print(f"Extracted {namemsix}. Extracting components...")
167
+ if os.path.exists(self.oneocr_dir):
168
+ shutil.rmtree(self.oneocr_dir)
169
+ os.makedirs(self.oneocr_dir, exist_ok=True)
170
+
171
+ with zipfile.ZipFile(temp_msix_path) as ff:
172
+ collect = []
173
+ for name in ff.namelist():
174
+ # Extract only the files within the "SnippingTool/" directory
175
+ if name.startswith("SnippingTool/") and any(name.endswith(f) for f in self.flist):
176
+ # Construct target path relative to cachedir
177
+ target_path = os.path.join(self.oneocr_dir, os.path.relpath(name, "SnippingTool/"))
178
+ # Ensure parent directories exist
179
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
180
+ # Extract the file
181
+ with ff.open(name) as source, open(target_path, "wb") as target:
182
+ shutil.copyfileobj(source, target)
183
+ collect.append(name)
184
+ if not collect:
185
+ raise Exception("Could not find required files within MSIX.")
186
+
187
+
188
+ if not checkdir(self.oneocr_dir):
189
+ raise Exception("Extraction failed: Required files not found in cache directory.")
190
+
191
+ # Clean up temporary files
192
+ os.remove(temp_msixbundle_path)
193
+ os.remove(temp_msix_path)
194
+
195
+
196
+ def downloadx(self, url: str):
197
+ """Downloads a zip file from a URL and extracts it."""
198
+ print(f"Downloading from fallback URL: {url}")
199
+ # Added accept-language to the fallback download as well for consistency
200
+ headers = {
201
+ "accept-language": "en-US,en;q=0.9",
202
+ # Add other relevant headers if necessary for the fallback URL
203
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
204
+ "accept": "*/*",
205
+ }
206
+ req = requests.get(url, verify=False, proxies=getproxy(), stream=True, headers=headers)
207
+ req.raise_for_status()
208
+
209
+ total_size_in_bytes = int(req.headers.get('content-length', 0))
210
+ block_size = 1024 * 32 # 32 Kibibytes
211
+ temp_zip_path = os.path.join(tempfile.gettempdir(), url.split("/")[-1])
212
+
213
+ with open(temp_zip_path, "wb") as ff:
214
+ downloaded_size = 0
215
+ for chunk in req.iter_content(chunk_size=block_size):
216
+ ff.write(chunk)
217
+ downloaded_size += len(chunk)
218
+ # Basic progress reporting (can be removed)
219
+ if total_size_in_bytes:
220
+ progress = (downloaded_size / total_size_in_bytes) * 100
221
+ print(f"Downloaded {downloaded_size}/{total_size_in_bytes} bytes ({progress:.2f}%)", end='\r')
222
+ print("\nDownload complete. Extracting...")
223
+
224
+ if os.path.exists(self.oneocr_dir):
225
+ shutil.rmtree(self.oneocr_dir)
226
+ os.makedirs(self.oneocr_dir, exist_ok=True)
227
+
228
+ with zipfile.ZipFile(temp_zip_path) as zipf:
229
+ zipf.extractall(self.oneocr_dir)
230
+
231
+ if not checkdir(self.oneocr_dir):
232
+ raise Exception("Extraction failed: Required files not found in cache directory.")
233
+
234
+ # Clean up temporary files
235
+ os.remove(temp_zip_path)
236
+
237
+ # Example usage:
238
+ if __name__ == "__main__":
239
+ downloader = Downloader()
240
+ if downloader.download_and_extract():
241
+ print("SnippingTool files are ready.")
242
+ else:
243
+ print("Failed to obtain SnippingTool files.")
@@ -7,7 +7,6 @@ import mss
7
7
  from PIL import Image, ImageTk, ImageDraw
8
8
 
9
9
  from GameSentenceMiner import obs # Import your actual obs module
10
- from GameSentenceMiner.ocr.owocr_helper import get_ocr_config
11
10
  from GameSentenceMiner.util import sanitize_filename # Import your actual util module
12
11
 
13
12
  try:
@@ -1,9 +1,9 @@
1
1
  import asyncio
2
- import difflib
3
2
  import json
4
3
  import logging
5
4
  import os
6
5
  import queue
6
+ import re
7
7
  import threading
8
8
  import time
9
9
  from datetime import datetime
@@ -14,17 +14,14 @@ from tkinter import messagebox
14
14
  import mss
15
15
  import websockets
16
16
  from rapidfuzz import fuzz
17
- from PIL import Image, ImageDraw
18
17
 
19
18
  from GameSentenceMiner import obs, util
20
- from GameSentenceMiner.configuration import get_config, get_app_directory
19
+ from GameSentenceMiner.configuration import get_config, get_app_directory, get_temporary_directory
21
20
  from GameSentenceMiner.electron_config import get_ocr_scan_rate, get_requires_open_window
22
21
  from GameSentenceMiner.ocr.gsm_ocr_config import OCRConfig, Rectangle
23
22
  from GameSentenceMiner.owocr.owocr import screen_coordinate_picker, run
24
23
  from GameSentenceMiner.owocr.owocr.run import TextFiltering
25
-
26
- from dataclasses import dataclass
27
- from typing import List, Optional
24
+ from GameSentenceMiner.util import do_text_replacements, OCR_REPLACEMENTS_FILE
28
25
 
29
26
  CONFIG_FILE = Path("ocr_config.json")
30
27
  DEFAULT_IMAGE_PATH = r"C:\Users\Beangate\Pictures\msedge_acbl8GL7Ax.jpg" # CHANGE THIS
@@ -80,6 +77,7 @@ def get_ocr_config() -> OCRConfig:
80
77
  """Loads and updates screen capture areas from the corresponding JSON file."""
81
78
  app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
82
79
  ocr_config_dir = app_dir / "ocr_config"
80
+ os.makedirs(ocr_config_dir, exist_ok=True)
83
81
  obs.connect_to_obs()
84
82
  scene = util.sanitize_filename(obs.get_current_scene())
85
83
  config_path = ocr_config_dir / f"{scene}.json"
@@ -217,63 +215,62 @@ def do_second_ocr(ocr1_text, rectangle_index, time, img):
217
215
  if fuzz.ratio(previous_ocr2_text, text) >= 80:
218
216
  logger.info("Seems like the same text from previous ocr2 result, not sending")
219
217
  return
220
- img.save(os.path.join(get_app_directory(), "temp", "last_successful_ocr.png"))
218
+ img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
221
219
  last_ocr2_results[rectangle_index] = text
222
- if get_config().advanced.ocr_sends_to_clipboard:
223
- import pyperclip
224
- pyperclip.copy(text)
225
- websocket_server_thread.send_text(text, time)
220
+ send_result(text, time)
226
221
  except json.JSONDecodeError:
227
222
  print("Invalid JSON received.")
228
223
  except Exception as e:
229
224
  logger.exception(e)
230
225
  print(f"Error processing message: {e}")
231
226
 
227
+ def send_result(text, time):
228
+ if text:
229
+ text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
230
+ if get_config().advanced.ocr_sends_to_clipboard:
231
+ import pyperclip
232
+ pyperclip.copy(text)
233
+ websocket_server_thread.send_text(text, time)
234
+
232
235
 
233
236
  last_oneocr_results_to_check = {} # Store last OCR result for each rectangle
234
237
  last_oneocr_times = {} # Store last OCR time for each rectangle
235
238
  text_stable_start_times = {} # Store the start time when text becomes stable for each rectangle
239
+ previous_imgs = {}
236
240
  orig_text_results = {} # Store original text results for each rectangle
237
241
  TEXT_APPEARENCE_DELAY = get_ocr_scan_rate() * 1000 + 500 # Adjust as needed
238
242
 
239
243
  def text_callback(text, orig_text, rectangle_index, time, img=None):
240
244
  global twopassocr, ocr2, last_oneocr_results_to_check, last_oneocr_times, text_stable_start_times, orig_text_results
241
245
  orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
246
+ # logger.debug(orig_text_string)
242
247
 
243
248
  current_time = time if time else datetime.now()
244
249
 
245
- previous_text = last_oneocr_results_to_check.get(rectangle_index, "").strip()
250
+ previous_text = last_oneocr_results_to_check.pop(rectangle_index, "").strip()
246
251
  previous_orig_text = orig_text_results.get(rectangle_index, "").strip()
247
252
 
248
253
  # print(previous_orig_text)
249
254
  # if orig_text:
250
255
  # print(orig_text_string)
251
-
256
+ if not twopassocr:
257
+ img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
258
+ send_result(text, time)
252
259
  if not text:
253
260
  if previous_text:
254
261
  if rectangle_index in text_stable_start_times:
255
- stable_time = text_stable_start_times[rectangle_index]
262
+ stable_time = text_stable_start_times.pop(rectangle_index)
263
+ previous_img = previous_imgs.pop(rectangle_index)
256
264
  previous_result = last_ocr1_results[rectangle_index]
257
265
  if previous_result and fuzz.ratio(previous_result, previous_text) >= 80:
258
266
  logger.info("Seems like the same text, not " + "doing second OCR" if twopassocr else "sending")
259
- del last_oneocr_results_to_check[rectangle_index]
260
267
  return
261
268
  if previous_orig_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 80:
262
269
  logger.info("Seems like Text we already sent, not doing anything.")
263
- del last_oneocr_results_to_check[rectangle_index]
264
270
  return
265
271
  orig_text_results[rectangle_index] = orig_text_string
266
- if twopassocr:
267
- do_second_ocr(previous_text, rectangle_index, time, img)
268
- else:
269
- if get_config().advanced.ocr_sends_to_clipboard:
270
- import pyperclip
271
- pyperclip.copy(text)
272
- websocket_server_thread.send_text(previous_text, stable_time)
273
- img.save(os.path.join(get_app_directory(), "temp", "last_successful_ocr.png"))
272
+ do_second_ocr(previous_text, rectangle_index, stable_time, previous_img)
274
273
  last_ocr1_results[rectangle_index] = previous_text
275
- del text_stable_start_times[rectangle_index]
276
- del last_oneocr_results_to_check[rectangle_index]
277
274
  return
278
275
  return
279
276
 
@@ -281,6 +278,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
281
278
  last_oneocr_results_to_check[rectangle_index] = text
282
279
  last_oneocr_times[rectangle_index] = current_time
283
280
  text_stable_start_times[rectangle_index] = current_time
281
+ previous_imgs[rectangle_index] = img
284
282
  return
285
283
 
286
284
  stable = text_stable_start_times.get(rectangle_index)
@@ -294,6 +292,7 @@ def text_callback(text, orig_text, rectangle_index, time, img=None):
294
292
  else:
295
293
  last_oneocr_results_to_check[rectangle_index] = text
296
294
  last_oneocr_times[rectangle_index] = current_time
295
+ previous_imgs[rectangle_index] = img
297
296
 
298
297
  done = False
299
298
 
@@ -1,4 +1,4 @@
1
- import datetime
1
+ from datetime import datetime
2
2
  import sys
3
3
  import signal
4
4
  import time
@@ -0,0 +1,186 @@
1
+ import uuid
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from difflib import SequenceMatcher
5
+ from typing import Optional
6
+
7
+ from GameSentenceMiner.configuration import logger, get_config
8
+ from GameSentenceMiner.model import AnkiCard
9
+ from GameSentenceMiner.util import remove_html_and_cloze_tags
10
+
11
+ initial_time = datetime.now()
12
+
13
+
14
+ @dataclass
15
+ class GameLine:
16
+ id: str
17
+ text: str
18
+ time: datetime
19
+ prev: 'GameLine | None'
20
+ next: 'GameLine | None'
21
+ index: int = 0
22
+
23
+ def get_previous_time(self):
24
+ if self.prev:
25
+ return self.prev.time
26
+ return initial_time
27
+
28
+ def get_next_time(self):
29
+ if self.next:
30
+ return self.next.time
31
+ return 0
32
+
33
+ def __str__(self):
34
+ return str({"text": self.text, "time": self.time})
35
+
36
+
37
+ @dataclass
38
+ class GameText:
39
+ values: list[GameLine]
40
+ values_dict: dict[str, GameLine]
41
+ game_line_index = 0
42
+
43
+ def __init__(self):
44
+ self.values = []
45
+ self.values_dict = {}
46
+
47
+ def __getitem__(self, key):
48
+ return self.values[key]
49
+
50
+ def get_by_id(self, line_id: str) -> Optional[GameLine]:
51
+ if not self.values_dict:
52
+ return None
53
+ return self.values_dict.get(line_id)
54
+
55
+ def get_time(self, line_text: str, occurrence: int = -1) -> datetime:
56
+ matches = [line for line in self.values if line.text == line_text]
57
+ if matches:
58
+ return matches[occurrence].time # Default to latest
59
+ return initial_time
60
+
61
+ def get_event(self, line_text: str, occurrence: int = -1) -> GameLine | None:
62
+ matches = [line for line in self.values if line.text == line_text]
63
+ if matches:
64
+ return matches[occurrence]
65
+ return None
66
+
67
+ def add_line(self, line_text, line_time=None):
68
+ if not line_text:
69
+ return
70
+ line_id = str(uuid.uuid1())
71
+ new_line = GameLine(
72
+ id=line_id, # Time-based UUID as an integer
73
+ text=line_text,
74
+ time=line_time if line_time else datetime.now(),
75
+ prev=self.values[-1] if self.values else None,
76
+ next=None,
77
+ index=self.game_line_index
78
+ )
79
+ self.values_dict[line_id] = new_line
80
+ logger.debug(f"Adding line: {new_line}")
81
+ self.game_line_index += 1
82
+ if self.values:
83
+ self.values[-1].next = new_line
84
+ self.values.append(new_line)
85
+ # self.remove_old_events(datetime.now() - timedelta(minutes=10))
86
+
87
+ def has_line(self, line_text) -> bool:
88
+ for game_line in self.values:
89
+ if game_line.text == line_text:
90
+ return True
91
+ return False
92
+
93
+
94
+ text_log = GameText()
95
+
96
+
97
+ def similar(a, b):
98
+ return SequenceMatcher(None, a, b).ratio()
99
+
100
+
101
+ def one_contains_the_other(a, b):
102
+ return a in b or b in a
103
+
104
+
105
+ def lines_match(a, b):
106
+ similarity = similar(a, b)
107
+ logger.debug(f"Comparing: {a} with {b} - Similarity: {similarity}, Or One contains the other: {one_contains_the_other(a, b)}")
108
+ return similar(a, b) >= 0.60 or one_contains_the_other(a, b)
109
+
110
+
111
+ def get_text_event(last_note) -> GameLine:
112
+ lines = text_log.values
113
+
114
+ if not lines:
115
+ raise Exception("No lines in history. Text is required from either clipboard or websocket for GSM to work. Please check your setup/config.")
116
+
117
+ if not last_note:
118
+ return lines[-1]
119
+
120
+ sentence = last_note.get_field(get_config().anki.sentence_field)
121
+ if not sentence:
122
+ return lines[-1]
123
+
124
+ for line in reversed(lines):
125
+ if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
126
+ return line
127
+
128
+ logger.debug("Couldn't find a match in history, using last event")
129
+ return lines[-1]
130
+
131
+
132
+ def get_line_and_future_lines(last_note):
133
+ if not last_note:
134
+ return []
135
+
136
+ sentence = last_note.get_field(get_config().anki.sentence_field)
137
+ found_lines = []
138
+ if sentence:
139
+ found = False
140
+ for line in text_log.values:
141
+ if found:
142
+ found_lines.append(line.text)
143
+ if lines_match(line.text, remove_html_and_cloze_tags(sentence)): # 80% similarity threshold
144
+ found = True
145
+ found_lines.append(line.text)
146
+ return found_lines
147
+
148
+
149
+ def get_mined_line(last_note: AnkiCard, lines):
150
+ if not last_note:
151
+ return lines[-1]
152
+ if not lines:
153
+ lines = get_all_lines()
154
+
155
+ sentence = last_note.get_field(get_config().anki.sentence_field)
156
+ for line in lines:
157
+ if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
158
+ return line
159
+ return lines[-1]
160
+
161
+
162
+ def get_time_of_line(line):
163
+ return text_log.get_time(line)
164
+
165
+
166
+ def get_all_lines():
167
+ return text_log.values
168
+
169
+
170
+ def get_text_log() -> GameText:
171
+ return text_log
172
+
173
+ def add_line(current_line_after_regex, line_time):
174
+ text_log.add_line(current_line_after_regex, line_time)
175
+
176
+ def get_line_by_id(line_id: str) -> Optional[GameLine]:
177
+ """
178
+ Retrieve a GameLine by its unique ID.
179
+
180
+ Args:
181
+ line_id (str): The unique identifier of the GameLine.
182
+
183
+ Returns:
184
+ Optional[GameLine]: The GameLine object if found, otherwise None.
185
+ """
186
+ return text_log.get_by_id(line_id)
GameSentenceMiner/util.py CHANGED
@@ -1,10 +1,9 @@
1
- import importlib
1
+ import json
2
2
  import os
3
3
  import random
4
4
  import re
5
5
  import string
6
6
  import subprocess
7
- import sys
8
7
  import threading
9
8
  import time
10
9
  from datetime import datetime
@@ -206,4 +205,54 @@ def import_vad_models():
206
205
  from GameSentenceMiner.vad import whisper_helper
207
206
  if get_config().vad.is_vosk():
208
207
  from GameSentenceMiner.vad import vosk_helper
209
- return silero_trim, whisper_helper, vosk_helper
208
+ return silero_trim, whisper_helper, vosk_helper
209
+
210
+
211
+ def isascii(s: str):
212
+ try:
213
+ return s.isascii()
214
+ except:
215
+ try:
216
+ s.encode("ascii")
217
+ return True
218
+ except:
219
+ return False
220
+
221
+ def do_text_replacements(text, replacements_json):
222
+ if not text:
223
+ return text
224
+
225
+ replacements = {}
226
+ if os.path.exists(replacements_json):
227
+ with open(replacements_json, 'r', encoding='utf-8') as f:
228
+ replacements.update(json.load(f))
229
+
230
+ if replacements.get("enabled", False):
231
+ orig_text = text
232
+ filters = replacements.get("args", {}).get("replacements", {})
233
+ for fil, replacement in filters.items():
234
+ if not fil:
235
+ continue
236
+ if fil.startswith("re:"):
237
+ pattern = fil[3:]
238
+ try:
239
+ text = re.sub(pattern, replacement, text)
240
+ except Exception:
241
+ logger.error(f"Invalid regex pattern: {pattern}")
242
+ continue
243
+ if isascii(fil):
244
+ text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
245
+ else:
246
+ text = text.replace(fil, replacement)
247
+ if text != orig_text:
248
+ logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
249
+ return text
250
+
251
+
252
+ TEXT_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'text_replacements.json')
253
+ OCR_REPLACEMENTS_FILE = os.path.join(os.getenv('APPDATA'), 'GameSentenceMiner', 'config', 'ocr_replacements.json')
254
+ os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
255
+
256
+ if not os.path.exists(TEXT_REPLACEMENTS_FILE):
257
+ #TODO : fetch raw json from github
258
+ pass
File without changes
File without changes
Binary file