GameSentenceMiner 2.14.9__py3-none-any.whl → 2.14.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. GameSentenceMiner/ai/__init__.py +0 -0
  2. GameSentenceMiner/ai/ai_prompting.py +473 -0
  3. GameSentenceMiner/ocr/__init__.py +0 -0
  4. GameSentenceMiner/ocr/gsm_ocr_config.py +174 -0
  5. GameSentenceMiner/ocr/ocrconfig.py +129 -0
  6. GameSentenceMiner/ocr/owocr_area_selector.py +629 -0
  7. GameSentenceMiner/ocr/owocr_helper.py +638 -0
  8. GameSentenceMiner/ocr/ss_picker.py +140 -0
  9. GameSentenceMiner/owocr/owocr/__init__.py +1 -0
  10. GameSentenceMiner/owocr/owocr/__main__.py +9 -0
  11. GameSentenceMiner/owocr/owocr/config.py +148 -0
  12. GameSentenceMiner/owocr/owocr/lens_betterproto.py +1238 -0
  13. GameSentenceMiner/owocr/owocr/ocr.py +1690 -0
  14. GameSentenceMiner/owocr/owocr/run.py +1818 -0
  15. GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +109 -0
  16. GameSentenceMiner/tools/__init__.py +0 -0
  17. GameSentenceMiner/tools/audio_offset_selector.py +215 -0
  18. GameSentenceMiner/tools/ss_selector.py +135 -0
  19. GameSentenceMiner/tools/window_transparency.py +214 -0
  20. GameSentenceMiner/util/__init__.py +0 -0
  21. GameSentenceMiner/util/communication/__init__.py +22 -0
  22. GameSentenceMiner/util/communication/send.py +7 -0
  23. GameSentenceMiner/util/communication/websocket.py +94 -0
  24. GameSentenceMiner/util/configuration.py +1199 -0
  25. GameSentenceMiner/util/db.py +408 -0
  26. GameSentenceMiner/util/downloader/Untitled_json.py +472 -0
  27. GameSentenceMiner/util/downloader/__init__.py +0 -0
  28. GameSentenceMiner/util/downloader/download_tools.py +194 -0
  29. GameSentenceMiner/util/downloader/oneocr_dl.py +250 -0
  30. GameSentenceMiner/util/electron_config.py +259 -0
  31. GameSentenceMiner/util/ffmpeg.py +571 -0
  32. GameSentenceMiner/util/get_overlay_coords.py +366 -0
  33. GameSentenceMiner/util/gsm_utils.py +323 -0
  34. GameSentenceMiner/util/model.py +206 -0
  35. GameSentenceMiner/util/notification.py +157 -0
  36. GameSentenceMiner/util/text_log.py +214 -0
  37. GameSentenceMiner/util/win10toast/__init__.py +154 -0
  38. GameSentenceMiner/util/win10toast/__main__.py +22 -0
  39. GameSentenceMiner/web/__init__.py +0 -0
  40. GameSentenceMiner/web/service.py +132 -0
  41. GameSentenceMiner/web/static/__init__.py +0 -0
  42. GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  43. GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  44. GameSentenceMiner/web/static/favicon.ico +0 -0
  45. GameSentenceMiner/web/static/favicon.svg +3 -0
  46. GameSentenceMiner/web/static/site.webmanifest +21 -0
  47. GameSentenceMiner/web/static/style.css +292 -0
  48. GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  49. GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  50. GameSentenceMiner/web/templates/__init__.py +0 -0
  51. GameSentenceMiner/web/templates/index.html +50 -0
  52. GameSentenceMiner/web/templates/text_replacements.html +238 -0
  53. GameSentenceMiner/web/templates/utility.html +483 -0
  54. GameSentenceMiner/web/texthooking_page.py +584 -0
  55. GameSentenceMiner/wip/__init___.py +0 -0
  56. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/METADATA +1 -1
  57. gamesentenceminer-2.14.10.dist-info/RECORD +79 -0
  58. gamesentenceminer-2.14.9.dist-info/RECORD +0 -24
  59. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/WHEEL +0 -0
  60. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/entry_points.txt +0 -0
  61. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/licenses/LICENSE +0 -0
  62. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,366 @@
1
+ import asyncio
2
+ import io
3
+ import base64
4
+ import math
5
+ import os
6
+ import time
7
+ from PIL import Image
8
+ from typing import Dict, Any, List, Tuple
9
+
10
+ # Local application imports
11
+ from GameSentenceMiner.ocr.gsm_ocr_config import set_dpi_awareness
12
+ from GameSentenceMiner.util.configuration import get_config, is_windows
13
+ from GameSentenceMiner.util.electron_config import get_ocr_language
14
+ from GameSentenceMiner.obs import get_screenshot_PIL, logger
15
+ from GameSentenceMiner.web.texthooking_page import send_word_coordinates_to_overlay
16
+
17
+ # Conditionally import OCR engines
18
+ try:
19
+ if os.path.exists(os.path.expanduser('~/.config/oneocr/oneocr.dll')):
20
+ from GameSentenceMiner.owocr.owocr.ocr import OneOCR
21
+ else:
22
+ OneOCR = None
23
+ from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, get_regex
24
+ except ImportError as import_err:
25
+ GoogleLens, OneOCR, get_regex = None, None, None
26
+ except Exception as e:
27
+ GoogleLens, OneOCR, get_regex = None, None, None
28
+ logger.error(f"Error importing OCR engines: {e}", exc_info=True)
29
+
30
+ # Conditionally import screenshot library
31
+ try:
32
+ import mss
33
+ except ImportError:
34
+ mss = None
35
+
36
+ class OverlayProcessor:
37
+ """
38
+ Handles the entire overlay process from screen capture to text extraction.
39
+
40
+ This class encapsulates the logic for taking screenshots, identifying text
41
+ regions, performing OCR, and processing the results into a structured format
42
+ with pixel coordinates.
43
+ """
44
+
45
+ def __init__(self):
46
+ """Initializes the OCR engines and configuration."""
47
+ self.config = get_config()
48
+ self.oneocr = None
49
+ self.lens = None
50
+ self.regex = None
51
+ self.ready = False
52
+
53
+ try:
54
+ if self.config.overlay.websocket_port and all([GoogleLens, get_regex]):
55
+ logger.info("Initializing OCR engines...")
56
+ if OneOCR:
57
+ self.oneocr = OneOCR(lang=get_ocr_language())
58
+ self.lens = GoogleLens(lang=get_ocr_language())
59
+ self.ocr_language = get_ocr_language()
60
+ self.regex = get_regex(self.ocr_language)
61
+ logger.info("OCR engines initialized.")
62
+ self.ready = True
63
+ else:
64
+ logger.warning("OCR dependencies not found or websocket port not configured. OCR functionality will be disabled.")
65
+
66
+ if is_windows:
67
+ set_dpi_awareness()
68
+
69
+ if not mss:
70
+ logger.warning("MSS library not found. Screenshot functionality may be limited.")
71
+ except Exception as e:
72
+ logger.error(f"Error initializing OCR engines for overlay, try installing owocr in OCR tab of GSM: {e}", exc_info=True)
73
+ self.oneocr = None
74
+ self.lens = None
75
+ self.regex = None
76
+
77
+ async def find_box_and_send_to_overlay(self, sentence_to_check: str = None):
78
+ """
79
+ Sends the detected text boxes to the overlay via WebSocket.
80
+ """
81
+ boxes = await self.find_box_for_sentence(sentence_to_check)
82
+ logger.info(f"Sending {len(boxes)} boxes to overlay.")
83
+ await send_word_coordinates_to_overlay(boxes)
84
+
85
+ async def find_box_for_sentence(self, sentence_to_check: str = None) -> List[Dict[str, Any]]:
86
+ """
87
+ Public method to perform OCR and find text boxes for a given sentence.
88
+
89
+ This is a wrapper around the main work-horse method, providing
90
+ error handling.
91
+ """
92
+ try:
93
+ return await self._do_work(sentence_to_check)
94
+ except Exception as e:
95
+ logger.error(f"Error during OCR processing: {e}", exc_info=True)
96
+ return []
97
+
98
+ def _get_full_screenshot(self) -> Tuple[Image.Image | None, int, int]:
99
+ """Captures a screenshot of the configured monitor."""
100
+ if not mss:
101
+ raise RuntimeError("MSS screenshot library is not installed.")
102
+
103
+ with mss.mss() as sct:
104
+ monitors = sct.monitors
105
+ # Index 0 is the 'all monitors' virtual screen, so we skip it.
106
+ monitor_list = monitors[1:] if len(monitors) > 1 else [monitors[0]]
107
+
108
+ monitor_index = self.config.overlay.monitor_to_capture
109
+ if monitor_index >= len(monitor_list):
110
+ logger.error(f"Monitor index {monitor_index} is out of bounds. Found {len(monitor_list)} monitors.")
111
+ return None, 0, 0
112
+
113
+ monitor = monitor_list[monitor_index]
114
+ sct_img = sct.grab(monitor)
115
+ img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
116
+
117
+ return img, monitor['width'], monitor['height']
118
+
119
+ def _create_composite_image(
120
+ self,
121
+ full_screenshot: Image.Image,
122
+ crop_coords_list: List[Tuple[int, int, int, int]],
123
+ monitor_width: int,
124
+ monitor_height: int
125
+ ) -> Image.Image:
126
+ """
127
+ Creates a new image by pasting cropped text regions onto a transparent background.
128
+ This isolates text for more accurate secondary OCR.
129
+ """
130
+ if not crop_coords_list:
131
+ return full_screenshot
132
+
133
+ # Create a transparent canvas
134
+ composite_img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
135
+
136
+ for crop_coords in crop_coords_list:
137
+ # Ensure crop coordinates are within image bounds
138
+ x1, y1, x2, y2 = crop_coords
139
+ x1 = max(0, min(x1, full_screenshot.width))
140
+ y1 = max(0, min(y1, full_screenshot.height))
141
+ x2 = max(x1, min(x2, full_screenshot.width))
142
+ y2 = max(y1, min(y2, full_screenshot.height))
143
+
144
+ # Skip if the coordinates result in an invalid box
145
+ if x1 >= x2 or y1 >= y2:
146
+ continue
147
+ try:
148
+ cropped_image = full_screenshot.crop((x1, y1, x2, y2))
149
+ except ValueError:
150
+ logger.warning("Error cropping image, using original image")
151
+ return full_screenshot
152
+ # Paste the cropped image onto the canvas at its original location
153
+ paste_x = math.floor(x1)
154
+ paste_y = math.floor(y1)
155
+ composite_img.paste(cropped_image, (paste_x, paste_y))
156
+
157
+ return composite_img
158
+
159
+ async def _do_work(self, sentence_to_check: str = None) -> Tuple[List[Dict[str, Any]], int]:
160
+ """The main OCR workflow."""
161
+ if not self.lens:
162
+ logger.error("OCR engines are not initialized. Cannot perform OCR for Overlay.")
163
+ return []
164
+
165
+ # 1. Get screenshot
166
+ full_screenshot, monitor_width, monitor_height = self._get_full_screenshot()
167
+ if not full_screenshot:
168
+ logger.warning("Failed to get a screenshot.")
169
+ return []
170
+ if self.oneocr:
171
+ # 2. Use OneOCR to find general text areas (fast)
172
+ _, _, oneocr_results, crop_coords_list = self.oneocr(
173
+ full_screenshot,
174
+ return_coords=True,
175
+ multiple_crop_coords=True,
176
+ return_one_box=False,
177
+ furigana_filter_sensitivity=None # Disable furigana filtering
178
+ )
179
+
180
+ # 3. Create a composite image with only the detected text regions
181
+ composite_image = self._create_composite_image(
182
+ full_screenshot,
183
+ crop_coords_list,
184
+ monitor_width,
185
+ monitor_height
186
+ )
187
+ else:
188
+ composite_image = full_screenshot
189
+
190
+ # 4. Use Google Lens on the cleaner composite image for higher accuracy
191
+ res = self.lens(
192
+ composite_image,
193
+ return_coords=True,
194
+ furigana_filter_sensitivity=None # Disable furigana filtering
195
+ )
196
+
197
+ if len(res) != 3:
198
+ return oneocr_results
199
+
200
+ _, _, coords = res
201
+
202
+ if not res or not coords:
203
+ return oneocr_results
204
+
205
+ # 5. Process the high-accuracy results into the desired format
206
+ extracted_data = self._extract_text_with_pixel_boxes(
207
+ api_response=coords,
208
+ original_width=monitor_width,
209
+ original_height=monitor_height,
210
+ crop_x=0,
211
+ crop_y=0,
212
+ crop_width=composite_image.width,
213
+ crop_height=composite_image.height
214
+ )
215
+
216
+ return extracted_data
217
+
218
+ def _extract_text_with_pixel_boxes(
219
+ self,
220
+ api_response: Dict[str, Any],
221
+ original_width: int,
222
+ original_height: int,
223
+ crop_x: int,
224
+ crop_y: int,
225
+ crop_width: int,
226
+ crop_height: int
227
+ ) -> List[Dict[str, Any]]:
228
+ """
229
+ Parses Google Lens API response and converts normalized coordinates
230
+ to absolute pixel coordinates.
231
+ """
232
+ results = []
233
+ try:
234
+ paragraphs = api_response["objects_response"]["text"]["text_layout"]["paragraphs"]
235
+ except (KeyError, TypeError):
236
+ return [] # Return empty if the expected structure isn't present
237
+
238
+ for para in paragraphs:
239
+ for line in para.get("lines", []):
240
+ line_text_parts = []
241
+ word_list = []
242
+
243
+ for word in line.get("words", []):
244
+ word_text = word.get("plain_text", "")
245
+ line_text_parts.append(word_text)
246
+
247
+ word_box = self._convert_box_to_pixels_v2(
248
+ word["geometry"]["bounding_box"],
249
+ crop_x, crop_y, crop_width, crop_height
250
+ )
251
+
252
+ word_list.append({
253
+ "text": word_text,
254
+ "bounding_rect": word_box
255
+ })
256
+
257
+ if not line_text_parts:
258
+ continue
259
+
260
+ full_line_text = "".join(line_text_parts)
261
+ line_box = self._convert_box_to_pixels_v2(
262
+ line["geometry"]["bounding_box"],
263
+ crop_x, crop_y, crop_width, crop_height
264
+ )
265
+
266
+ results.append({
267
+ "text": full_line_text,
268
+ "bounding_rect": line_box,
269
+ "words": word_list
270
+ })
271
+ return results
272
+
273
+ def _convert_box_to_pixels_v2(
274
+ self,
275
+ bbox_data: Dict[str, float],
276
+ crop_x: int,
277
+ crop_y: int,
278
+ crop_width: int,
279
+ crop_height: int
280
+ ) -> Dict[str, float]:
281
+ """
282
+ Simplified conversion: scales normalized bbox to pixel coordinates within
283
+ the cropped region, then offsets by the crop position. Ignores rotation.
284
+ """
285
+ cx, cy = bbox_data['center_x'], bbox_data['center_y']
286
+ w, h = bbox_data['width'], bbox_data['height']
287
+
288
+ # Scale normalized coordinates to pixel coordinates relative to the crop area
289
+ box_width_px = w * crop_width
290
+ box_height_px = h * crop_height
291
+
292
+ # Calculate center within the cropped area and then add the crop offset
293
+ center_x_px = (cx * crop_width) + crop_x
294
+ center_y_px = (cy * crop_height) + crop_y
295
+
296
+ # Calculate corners (unrotated)
297
+ half_w_px, half_h_px = box_width_px / 2, box_height_px / 2
298
+ return {
299
+ "x1": center_x_px - half_w_px, "y1": center_y_px - half_h_px,
300
+ "x2": center_x_px + half_w_px, "y2": center_y_px - half_h_px,
301
+ "x3": center_x_px + half_w_px, "y3": center_y_px + half_h_px,
302
+ "x4": center_x_px - half_w_px, "y4": center_y_px + half_h_px,
303
+ }
304
+
305
+ async def main_test_screenshot():
306
+ """
307
+ A test function to demonstrate screenshot and image composition.
308
+ This is preserved from your original __main__ block.
309
+ """
310
+ processor = OverlayProcessor()
311
+
312
+ # Use the class method to get the screenshot
313
+ img, monitor_width, monitor_height = processor._get_full_screenshot()
314
+ if not img:
315
+ logger.error("Could not get screenshot for test.")
316
+ return
317
+
318
+ img.show()
319
+
320
+ # Create a transparent image with the same size as the monitor
321
+ new_img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
322
+
323
+ # Calculate coordinates to center the captured image (if it's not full-screen)
324
+ left = (monitor_width - img.width) // 2
325
+ top = (monitor_height - img.height) // 2
326
+
327
+ print(f"Image size: {img.size}, Monitor size: {monitor_width}x{monitor_height}")
328
+ print(f"Pasting at: Left={left}, Top={top}")
329
+
330
+ new_img.paste(img, (left, top))
331
+ new_img.show()
332
+
333
+ async def main_run_ocr():
334
+ """
335
+ Main function to demonstrate running the full OCR process.
336
+ """
337
+ processor = OverlayProcessor()
338
+ results, _ = await processor.find_box_for_sentence()
339
+ if results:
340
+ import json
341
+ print("OCR process completed successfully.")
342
+ # print(json.dumps(results, indent=2, ensure_ascii=False))
343
+ # Find first result with some text
344
+ for result in results:
345
+ if result.get("text"):
346
+ print(f"Found line: '{result['text']}'")
347
+ print(f" - Line BBox: {result['bounding_rect']}")
348
+ if result.get("words"):
349
+ print(f" - First word: '{result['words'][0]['text']}' BBox: {result['words'][0]['bounding_rect']}")
350
+ break
351
+ else:
352
+ print("OCR process did not find any text.")
353
+
354
+
355
+ if __name__ == '__main__':
356
+ try:
357
+ # To run the screenshot test:
358
+ # asyncio.run(main_test_screenshot())
359
+
360
+ # To run the full OCR process:
361
+ asyncio.run(main_run_ocr())
362
+
363
+ except KeyboardInterrupt:
364
+ logger.info("Script terminated by user.")
365
+ except Exception as e:
366
+ logger.error(f"An error occurred in the main execution block: {e}", exc_info=True)
@@ -0,0 +1,323 @@
1
+ import json
2
+ import os
3
+ import random
4
+ import re
5
+ import socket
6
+ import string
7
+ import subprocess
8
+ import threading
9
+ import time
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ import requests
14
+ from rapidfuzz import process
15
+
16
+ from GameSentenceMiner.util.configuration import logger, get_config, get_app_directory
17
+
18
+ SCRIPTS_DIR = r"E:\Japanese Stuff\agent-v0.1.4-win32-x64\data\scripts"
19
+
20
+ def run_new_thread(func):
21
+ thread = threading.Thread(target=func, daemon=True)
22
+ thread.start()
23
+ return thread
24
+
25
+ def make_unique_file_name(path):
26
+ path = Path(path)
27
+ current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
28
+ return str(path.parent / f"{path.stem}_{current_time}{path.suffix}")
29
+
30
+ def make_unique(text):
31
+ """
32
+ Generate a unique string by appending a timestamp to the input text.
33
+ This is useful for creating unique filenames or identifiers.
34
+ """
35
+ current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
36
+ return f"{text}_{current_time}"
37
+
38
+ def sanitize_filename(filename):
39
+ return re.sub(r'[ <>:"/\\|?*\x00-\x1F]', '', filename)
40
+
41
+
42
+ def get_random_digit_string():
43
+ return ''.join(random.choice(string.digits) for i in range(9))
44
+
45
+
46
+ def timedelta_to_ffmpeg_friendly_format(td_obj):
47
+ total_seconds = td_obj.total_seconds()
48
+ hours, remainder = divmod(total_seconds, 3600)
49
+ minutes, seconds = divmod(remainder, 60)
50
+ return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
51
+
52
+
53
+ def get_file_modification_time(file_path):
54
+ mod_time_epoch = os.path.getmtime(file_path)
55
+ mod_time = datetime.fromtimestamp(mod_time_epoch)
56
+ return mod_time
57
+
58
+
59
+ def get_process_id_by_title(game_title):
60
+ powershell_command = f"Get-Process | Where-Object {{$_.MainWindowTitle -like '*{game_title}*'}} | Select-Object -First 1 -ExpandProperty Id"
61
+ process_id = subprocess.check_output(["powershell", "-Command", powershell_command], text=True).strip()
62
+ logger.info(f"Process ID for {game_title}: {process_id}")
63
+ return process_id
64
+
65
+
66
+ def get_script_files(directory):
67
+ script_files = []
68
+ for root, dirs, files in os.walk(directory):
69
+ for file in files:
70
+ if file.endswith(".js"): # Assuming the scripts are .js files
71
+ script_files.append(os.path.join(root, file))
72
+ return script_files
73
+
74
+
75
+ def filter_steam_scripts(scripts):
76
+ return [script for script in scripts if "PC_Steam" in os.path.basename(script)]
77
+
78
+
79
+ def extract_game_name(script_path):
80
+ # Remove directory and file extension to get the name part
81
+ script_name = os.path.basename(script_path)
82
+ game_name = script_name.replace("PC_Steam_", "").replace(".js", "")
83
+ return game_name.replace("_", " ").replace(".", " ")
84
+
85
+
86
+ def find_most_similar_script(game_title, steam_scripts):
87
+ # Create a list of game names from the script paths
88
+ game_names = [extract_game_name(script) for script in steam_scripts]
89
+
90
+ # Use rapidfuzz to find the closest match
91
+ best_match = process.extractOne(game_title, game_names)
92
+
93
+ if best_match:
94
+ matched_game_name, confidence_score, index = best_match
95
+ return steam_scripts[index], matched_game_name, confidence_score
96
+ return None, None, None
97
+
98
+
99
+ def find_script_for_game(game_title):
100
+ script_files = get_script_files(SCRIPTS_DIR)
101
+
102
+ steam_scripts = filter_steam_scripts(script_files)
103
+
104
+ best_script, matched_game_name, confidence = find_most_similar_script(game_title, steam_scripts)
105
+
106
+
107
+ if best_script:
108
+ logger.info(f"Found Script: {best_script}")
109
+ return best_script
110
+ else:
111
+ logger.warning("No similar script found.")
112
+
113
+
114
+ def run_agent_and_hook(pname, agent_script):
115
+ command = f'agent --script=\"{agent_script}\" --pname={pname}'
116
+ logger.info("Running and Hooking Agent!")
117
+ try:
118
+ dos_process = subprocess.Popen(command, shell=True)
119
+ dos_process.wait() # Wait for the process to complete
120
+ logger.info("Agent script finished or closed.")
121
+ except Exception as e:
122
+ logger.error(f"Error occurred while running agent script: {e}")
123
+
124
+ keep_running = False
125
+
126
+
127
+ # def run_command(command, shell=False, input=None, capture_output=False, timeout=None, check=False, **kwargs):
128
+ # # Use shell=True if the OS is Linux, otherwise shell=False
129
+ # if is_linux():
130
+ # return subprocess.run(command, shell=True, input=input, capture_output=capture_output, timeout=timeout,
131
+ # check=check, **kwargs)
132
+ # else:
133
+ # return subprocess.run(command, shell=shell, input=input, capture_output=capture_output, timeout=timeout,
134
+ # check=check, **kwargs)
135
+ def remove_html_and_cloze_tags(text):
136
+ text = re.sub(r'<.*?>', '', re.sub(r'{{c\d+::(.*?)(::.*?)?}}', r'\1', text))
137
+ return text
138
+
139
+
140
+ def combine_dialogue(dialogue_lines, new_lines=None):
141
+ if not dialogue_lines: # Handle empty input
142
+ return []
143
+
144
+ if new_lines is None:
145
+ new_lines = []
146
+
147
+ if len(dialogue_lines) == 1 and '「' not in dialogue_lines[0]:
148
+ new_lines.append(dialogue_lines[0])
149
+ return new_lines
150
+
151
+ character_name = dialogue_lines[0].split("「")[0]
152
+ text = character_name + "「"
153
+
154
+ for i, line in enumerate(dialogue_lines):
155
+ if not line.startswith(character_name + "「"):
156
+ text = text + "」" + get_config().advanced.multi_line_line_break
157
+ new_lines.append(text)
158
+ new_lines.extend(combine_dialogue(dialogue_lines[i:]))
159
+ break
160
+ else:
161
+ text += (get_config().advanced.multi_line_line_break if i > 0 else "") + line.split("「")[1].rstrip("」") + ""
162
+ else:
163
+ text = text + "」"
164
+ new_lines.append(text)
165
+
166
+ return new_lines
167
+
168
+ def wait_for_stable_file(file_path, timeout=10, check_interval=0.5):
169
+ elapsed_time = 0
170
+ last_size = -1
171
+
172
+ logger.info(f"Waiting for file '{file_path}' to stabilize or become accessible...")
173
+
174
+ while elapsed_time < timeout:
175
+ try:
176
+ current_size = os.path.getsize(file_path)
177
+ if current_size == last_size:
178
+ try:
179
+ with open(file_path, 'rb'):
180
+ return True
181
+ except IOError:
182
+ pass
183
+ last_size = current_size
184
+ except FileNotFoundError:
185
+ last_size = -1
186
+ except Exception as e:
187
+ logger.warning(f"Error checking file {file_path}, will retry: {e}")
188
+ last_size = -1
189
+
190
+ time.sleep(check_interval)
191
+ elapsed_time += check_interval
192
+
193
+ logger.warning(f"File '{file_path}' did not stabilize or become accessible within {timeout} seconds. Continuing...")
194
+ return False
195
+
196
+ def isascii(s: str):
197
+ try:
198
+ return s.isascii()
199
+ except:
200
+ try:
201
+ s.encode("ascii")
202
+ return True
203
+ except:
204
+ return False
205
+
206
+ def do_text_replacements(text, replacements_json):
207
+ if not text:
208
+ return text
209
+
210
+ replacements = {}
211
+ if os.path.exists(replacements_json):
212
+ with open(replacements_json, 'r', encoding='utf-8') as f:
213
+ replacements.update(json.load(f))
214
+
215
+ if replacements.get("enabled", False):
216
+ orig_text = text
217
+ filters = replacements.get("args", {}).get("replacements", {})
218
+ for fil, replacement in filters.items():
219
+ if not fil:
220
+ continue
221
+ if fil.startswith("re:"):
222
+ pattern = fil[3:]
223
+ try:
224
+ text = re.sub(pattern, replacement, text)
225
+ except Exception:
226
+ logger.error(f"Invalid regex pattern: {pattern}")
227
+ continue
228
+ if isascii(fil):
229
+ text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
230
+ else:
231
+ text = text.replace(fil, replacement)
232
+ if text != orig_text:
233
+ logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
234
+ return text
235
+
236
+
237
+ def open_audio_in_external(fileabspath, shell=False):
238
+ logger.info(f"Opening audio in external program...")
239
+ try:
240
+ if shell:
241
+ subprocess.Popen(f' "{get_config().audio.external_tool}" "{fileabspath}" ', shell=True)
242
+ else:
243
+ subprocess.Popen([get_config().audio.external_tool, fileabspath])
244
+ except Exception as e:
245
+ logger.error(f"Failed to open audio in external program: {e}")
246
+ return False
247
+
248
+ def is_connected():
249
+ try:
250
+ # Attempt to connect to a well-known host
251
+ socket.create_connection(("www.google.com", 80), timeout=2)
252
+ return True
253
+ except OSError:
254
+ return False
255
+
256
+
257
+ TEXT_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'text_replacements.json')
258
+ OCR_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'ocr_replacements.json')
259
+ os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
260
+
261
+ # if not os.path.exists(OCR_REPLACEMENTS_FILE):
262
+ # url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/refs/heads/main/electron-src/assets/ocr_replacements.json"
263
+ # try:
264
+ # with urllib.request.urlopen(url) as response:
265
+ # data = response.read().decode('utf-8')
266
+ # with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
267
+ # f.write(data)
268
+ # except Exception as e:
269
+ # logger.error(f"Failed to fetch JSON from {url}: {e}")
270
+
271
+
272
+ # Remove GitHub replacements from local OCR replacements file, these replacements are not needed
273
+ def remove_github_replacements_from_local_ocr():
274
+ github_url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/main/electron-src/assets/ocr_replacements.json"
275
+
276
+ github_replacements = {}
277
+ try:
278
+ response = requests.get(github_url)
279
+ response.raise_for_status()
280
+ github_data = response.json()
281
+ github_replacements = github_data.get('args', {}).get('replacements', {})
282
+ logger.debug(f"Successfully fetched {len(github_replacements)} replacements from GitHub.")
283
+ except requests.exceptions.RequestException as e:
284
+ logger.debug(f"Failed to fetch GitHub replacements from {github_url}: {e}")
285
+ return
286
+ except json.JSONDecodeError as e:
287
+ logger.debug(f"Error decoding JSON from GitHub response: {e}")
288
+ return
289
+
290
+ if not os.path.exists(OCR_REPLACEMENTS_FILE):
291
+ logger.debug(f"Local file {OCR_REPLACEMENTS_FILE} does not exist. No replacements to remove.")
292
+ return
293
+
294
+ try:
295
+ with open(OCR_REPLACEMENTS_FILE, 'r', encoding='utf-8') as f:
296
+ local_ocr_data = json.load(f)
297
+
298
+ local_replacements = local_ocr_data.get('args', {}).get('replacements', {})
299
+ original_count = len(local_replacements)
300
+ logger.debug(f"Loaded {original_count} replacements from local file.")
301
+
302
+ removed_count = 0
303
+ for key_to_remove in github_replacements.keys():
304
+ if key_to_remove in local_replacements:
305
+ del local_replacements[key_to_remove]
306
+ removed_count += 1
307
+
308
+ if removed_count > 0:
309
+ local_ocr_data['args']['replacements'] = local_replacements
310
+ with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
311
+ json.dump(local_ocr_data, f, ensure_ascii=False, indent=4)
312
+ logger.debug(f"Successfully removed {removed_count} replacements from {OCR_REPLACEMENTS_FILE}.")
313
+ logger.debug(f"Remaining replacements in local file: {len(local_replacements)}")
314
+ else:
315
+ logger.debug("No matching replacements from GitHub found in your local file to remove.")
316
+
317
+ except json.JSONDecodeError as e:
318
+ logger.debug(f"Error decoding JSON from {OCR_REPLACEMENTS_FILE}: {e}. Please ensure it's valid JSON.")
319
+ except Exception as e:
320
+ logger.debug(f"An unexpected error occurred while processing {OCR_REPLACEMENTS_FILE}: {e}")
321
+
322
+
323
+ remove_github_replacements_from_local_ocr()