GameSentenceMiner 2.14.9__py3-none-any.whl → 2.14.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. GameSentenceMiner/ai/__init__.py +0 -0
  2. GameSentenceMiner/ai/ai_prompting.py +473 -0
  3. GameSentenceMiner/ocr/__init__.py +0 -0
  4. GameSentenceMiner/ocr/gsm_ocr_config.py +174 -0
  5. GameSentenceMiner/ocr/ocrconfig.py +129 -0
  6. GameSentenceMiner/ocr/owocr_area_selector.py +629 -0
  7. GameSentenceMiner/ocr/owocr_helper.py +638 -0
  8. GameSentenceMiner/ocr/ss_picker.py +140 -0
  9. GameSentenceMiner/owocr/owocr/__init__.py +1 -0
  10. GameSentenceMiner/owocr/owocr/__main__.py +9 -0
  11. GameSentenceMiner/owocr/owocr/config.py +148 -0
  12. GameSentenceMiner/owocr/owocr/lens_betterproto.py +1238 -0
  13. GameSentenceMiner/owocr/owocr/ocr.py +1690 -0
  14. GameSentenceMiner/owocr/owocr/run.py +1818 -0
  15. GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +109 -0
  16. GameSentenceMiner/tools/__init__.py +0 -0
  17. GameSentenceMiner/tools/audio_offset_selector.py +215 -0
  18. GameSentenceMiner/tools/ss_selector.py +135 -0
  19. GameSentenceMiner/tools/window_transparency.py +214 -0
  20. GameSentenceMiner/util/__init__.py +0 -0
  21. GameSentenceMiner/util/communication/__init__.py +22 -0
  22. GameSentenceMiner/util/communication/send.py +7 -0
  23. GameSentenceMiner/util/communication/websocket.py +94 -0
  24. GameSentenceMiner/util/configuration.py +1199 -0
  25. GameSentenceMiner/util/db.py +408 -0
  26. GameSentenceMiner/util/downloader/Untitled_json.py +472 -0
  27. GameSentenceMiner/util/downloader/__init__.py +0 -0
  28. GameSentenceMiner/util/downloader/download_tools.py +194 -0
  29. GameSentenceMiner/util/downloader/oneocr_dl.py +250 -0
  30. GameSentenceMiner/util/electron_config.py +259 -0
  31. GameSentenceMiner/util/ffmpeg.py +571 -0
  32. GameSentenceMiner/util/get_overlay_coords.py +366 -0
  33. GameSentenceMiner/util/gsm_utils.py +323 -0
  34. GameSentenceMiner/util/model.py +206 -0
  35. GameSentenceMiner/util/notification.py +157 -0
  36. GameSentenceMiner/util/text_log.py +214 -0
  37. GameSentenceMiner/util/win10toast/__init__.py +154 -0
  38. GameSentenceMiner/util/win10toast/__main__.py +22 -0
  39. GameSentenceMiner/web/__init__.py +0 -0
  40. GameSentenceMiner/web/service.py +132 -0
  41. GameSentenceMiner/web/static/__init__.py +0 -0
  42. GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  43. GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  44. GameSentenceMiner/web/static/favicon.ico +0 -0
  45. GameSentenceMiner/web/static/favicon.svg +3 -0
  46. GameSentenceMiner/web/static/site.webmanifest +21 -0
  47. GameSentenceMiner/web/static/style.css +292 -0
  48. GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  49. GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  50. GameSentenceMiner/web/templates/__init__.py +0 -0
  51. GameSentenceMiner/web/templates/index.html +50 -0
  52. GameSentenceMiner/web/templates/text_replacements.html +238 -0
  53. GameSentenceMiner/web/templates/utility.html +483 -0
  54. GameSentenceMiner/web/texthooking_page.py +584 -0
  55. GameSentenceMiner/wip/__init___.py +0 -0
  56. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/METADATA +1 -1
  57. gamesentenceminer-2.14.10.dist-info/RECORD +79 -0
  58. gamesentenceminer-2.14.9.dist-info/RECORD +0 -24
  59. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/WHEEL +0 -0
  60. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/entry_points.txt +0 -0
  61. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/licenses/LICENSE +0 -0
  62. {gamesentenceminer-2.14.9.dist-info → gamesentenceminer-2.14.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1818 @@
1
+ from ...ocr.gsm_ocr_config import set_dpi_awareness, get_scene_ocr_config_path, OCRConfig, get_scene_ocr_config
2
+ from ...util.electron_config import *
3
+
4
+ try:
5
+ import win32gui
6
+ import win32ui
7
+ import win32api
8
+ import win32con
9
+ import win32process
10
+ import win32clipboard
11
+ import pywintypes
12
+ import ctypes
13
+ except ImportError:
14
+ pass
15
+
16
+ try:
17
+ import objc
18
+ import platform
19
+ from AppKit import NSData, NSImage, NSBitmapImageRep, NSDeviceRGBColorSpace, NSGraphicsContext, NSZeroPoint, NSZeroRect, NSCompositingOperationCopy
20
+ from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \
21
+ CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \
22
+ CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow
23
+ from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest
24
+ except ImportError:
25
+ pass
26
+
27
+ import signal
28
+ import threading
29
+ from pathlib import Path
30
+ import queue
31
+ import io
32
+ import re
33
+ import logging
34
+ import inspect
35
+ import time
36
+
37
+ import pyperclipfix
38
+ import mss
39
+ import asyncio
40
+ import websockets
41
+ import socketserver
42
+ import queue
43
+
44
+ from datetime import datetime, timedelta
45
+ from PIL import Image, ImageDraw, UnidentifiedImageError
46
+ from loguru import logger
47
+ from desktop_notifier import DesktopNotifierSync
48
+ import psutil
49
+
50
+ import inspect
51
+ from .ocr import *
52
+ try:
53
+ from .secret import *
54
+ except ImportError:
55
+ pass
56
+ from .config import Config
57
+ from .screen_coordinate_picker import get_screen_selection
58
+ from GameSentenceMiner.util.configuration import get_temporary_directory, get_config
59
+
60
+ config = None
61
+ last_image = None
62
+
63
+
64
+ class ClipboardThread(threading.Thread):
65
+ def __init__(self):
66
+ super().__init__(daemon=True)
67
+ self.ignore_flag = config.get_general('ignore_flag')
68
+ self.delay_secs = config.get_general('delay_secs')
69
+ self.last_update = time.time()
70
+
71
+ def are_images_identical(self, img1, img2):
72
+ if None in (img1, img2):
73
+ return img1 == img2
74
+
75
+ img1 = np.array(img1)
76
+ img2 = np.array(img2)
77
+
78
+ return (img1.shape == img2.shape) and (img1 == img2).all()
79
+
80
+ def normalize_macos_clipboard(self, img):
81
+ ns_data = NSData.dataWithBytes_length_(img, len(img))
82
+ ns_image = NSImage.alloc().initWithData_(ns_data)
83
+
84
+ new_image = NSBitmapImageRep.alloc().initWithBitmapDataPlanes_pixelsWide_pixelsHigh_bitsPerSample_samplesPerPixel_hasAlpha_isPlanar_colorSpaceName_bytesPerRow_bitsPerPixel_(
85
+ None, # Set to None to create a new bitmap
86
+ int(ns_image.size().width),
87
+ int(ns_image.size().height),
88
+ 8, # Bits per sample
89
+ 4, # Samples per pixel (R, G, B, A)
90
+ True, # Has alpha
91
+ False, # Is not planar
92
+ NSDeviceRGBColorSpace,
93
+ 0, # Automatically compute bytes per row
94
+ 32 # Bits per pixel (8 bits per sample * 4 samples per pixel)
95
+ )
96
+
97
+ context = NSGraphicsContext.graphicsContextWithBitmapImageRep_(
98
+ new_image)
99
+ NSGraphicsContext.setCurrentContext_(context)
100
+
101
+ ns_image.drawAtPoint_fromRect_operation_fraction_(
102
+ NSZeroPoint,
103
+ NSZeroRect,
104
+ NSCompositingOperationCopy,
105
+ 1.0
106
+ )
107
+
108
+ return bytes(new_image.TIFFRepresentation())
109
+
110
+ def process_message(self, hwnd: int, msg: int, wparam: int, lparam: int):
111
+ WM_CLIPBOARDUPDATE = 0x031D
112
+ timestamp = time.time()
113
+ if msg == WM_CLIPBOARDUPDATE and timestamp - self.last_update > 1 and not paused:
114
+ self.last_update = timestamp
115
+ while True:
116
+ try:
117
+ win32clipboard.OpenClipboard()
118
+ break
119
+ except pywintypes.error:
120
+ pass
121
+ time.sleep(0.1)
122
+ try:
123
+ if win32clipboard.IsClipboardFormatAvailable(win32con.CF_BITMAP) and win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
124
+ clipboard_text = ''
125
+ if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
126
+ clipboard_text = win32clipboard.GetClipboardData(
127
+ win32clipboard.CF_UNICODETEXT)
128
+ if self.ignore_flag or clipboard_text != '*ocr_ignore*':
129
+ img = win32clipboard.GetClipboardData(
130
+ win32clipboard.CF_DIB)
131
+ image_queue.put((img, False))
132
+ win32clipboard.CloseClipboard()
133
+ except pywintypes.error:
134
+ pass
135
+ return 0
136
+
137
+ def create_window(self):
138
+ className = 'ClipboardHook'
139
+ wc = win32gui.WNDCLASS()
140
+ wc.lpfnWndProc = self.process_message
141
+ wc.lpszClassName = className
142
+ wc.hInstance = win32api.GetModuleHandle(None)
143
+ class_atom = win32gui.RegisterClass(wc)
144
+ return win32gui.CreateWindow(class_atom, className, 0, 0, 0, 0, 0, 0, 0, wc.hInstance, None)
145
+
146
+ def run(self):
147
+ if sys.platform == 'win32':
148
+ hwnd = self.create_window()
149
+ self.thread_id = win32api.GetCurrentThreadId()
150
+ ctypes.windll.user32.AddClipboardFormatListener(hwnd)
151
+ win32gui.PumpMessages()
152
+ else:
153
+ is_macos = sys.platform == 'darwin'
154
+ if is_macos:
155
+ from AppKit import NSPasteboard, NSPasteboardTypeTIFF, NSPasteboardTypeString
156
+ pasteboard = NSPasteboard.generalPasteboard()
157
+ count = pasteboard.changeCount()
158
+ else:
159
+ from PIL import ImageGrab
160
+ process_clipboard = False
161
+ img = None
162
+
163
+ while not terminated:
164
+ if paused:
165
+ sleep_time = 0.5
166
+ process_clipboard = False
167
+ else:
168
+ sleep_time = self.delay_secs
169
+ if is_macos:
170
+ with objc.autorelease_pool():
171
+ old_count = count
172
+ count = pasteboard.changeCount()
173
+ if process_clipboard and count != old_count:
174
+ while len(pasteboard.types()) == 0:
175
+ time.sleep(0.1)
176
+ if NSPasteboardTypeTIFF in pasteboard.types():
177
+ clipboard_text = ''
178
+ if NSPasteboardTypeString in pasteboard.types():
179
+ clipboard_text = pasteboard.stringForType_(
180
+ NSPasteboardTypeString)
181
+ if self.ignore_flag or clipboard_text != '*ocr_ignore*':
182
+ img = self.normalize_macos_clipboard(
183
+ pasteboard.dataForType_(NSPasteboardTypeTIFF))
184
+ image_queue.put((img, False))
185
+ else:
186
+ old_img = img
187
+ try:
188
+ img = ImageGrab.grabclipboard()
189
+ except Exception:
190
+ pass
191
+ else:
192
+ if (process_clipboard and isinstance(img, Image.Image) and
193
+ (self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and
194
+ (not self.are_images_identical(img, old_img))):
195
+ image_queue.put((img, False))
196
+
197
+ process_clipboard = True
198
+
199
+ if not terminated:
200
+ time.sleep(sleep_time)
201
+
202
+
203
+ class DirectoryWatcher(threading.Thread):
204
+ def __init__(self, path):
205
+ super().__init__(daemon=True)
206
+ self.path = path
207
+ self.delay_secs = config.get_general('delay_secs')
208
+ self.last_update = time.time()
209
+ self.allowed_extensions = (
210
+ '.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
211
+
212
+ def get_path_key(self, path):
213
+ return path, path.lstat().st_mtime
214
+
215
+ def run(self):
216
+ old_paths = set()
217
+ for path in self.path.iterdir():
218
+ if path.suffix.lower() in self.allowed_extensions:
219
+ old_paths.add(get_path_key(path))
220
+
221
+ while not terminated:
222
+ if paused:
223
+ sleep_time = 0.5
224
+ else:
225
+ sleep_time = self.delay_secs
226
+ for path in self.path.iterdir():
227
+ if path.suffix.lower() in self.allowed_extensions:
228
+ path_key = self.get_path_key(path)
229
+ if path_key not in old_paths:
230
+ old_paths.add(path_key)
231
+
232
+ if not paused:
233
+ image_queue.put((path, False))
234
+
235
+ if not terminated:
236
+ time.sleep(sleep_time)
237
+
238
+
239
+ class WebsocketServerThread(threading.Thread):
240
+ def __init__(self, read):
241
+ super().__init__(daemon=True)
242
+ self._loop = None
243
+ self.read = read
244
+ self.clients = set()
245
+ self._event = threading.Event()
246
+
247
+ @property
248
+ def loop(self):
249
+ self._event.wait()
250
+ return self._loop
251
+
252
+ async def send_text_coroutine(self, text):
253
+ for client in self.clients:
254
+ await client.send(text)
255
+
256
+ async def server_handler(self, websocket):
257
+ self.clients.add(websocket)
258
+ try:
259
+ async for message in websocket:
260
+ if self.read and not paused:
261
+ image_queue.put((message, False))
262
+ try:
263
+ await websocket.send('True')
264
+ except websockets.exceptions.ConnectionClosedOK:
265
+ pass
266
+ else:
267
+ try:
268
+ await websocket.send('False')
269
+ except websockets.exceptions.ConnectionClosedOK:
270
+ pass
271
+ except websockets.exceptions.ConnectionClosedError:
272
+ pass
273
+ finally:
274
+ self.clients.remove(websocket)
275
+
276
+ def send_text(self, text):
277
+ return asyncio.run_coroutine_threadsafe(self.send_text_coroutine(text), self.loop)
278
+
279
+ def stop_server(self):
280
+ self.loop.call_soon_threadsafe(self._stop_event.set)
281
+
282
+ def run(self):
283
+ async def main():
284
+ self._loop = asyncio.get_running_loop()
285
+ self._stop_event = stop_event = asyncio.Event()
286
+ self._event.set()
287
+ self.server = start_server = websockets.serve(
288
+ self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=1000000000)
289
+ async with start_server:
290
+ await stop_event.wait()
291
+ asyncio.run(main())
292
+
293
+
294
+ class RequestHandler(socketserver.BaseRequestHandler):
295
+ def handle(self):
296
+ conn = self.request
297
+ conn.settimeout(3)
298
+ data = conn.recv(4)
299
+ img_size = int.from_bytes(data)
300
+ img = bytearray()
301
+ try:
302
+ while len(img) < img_size:
303
+ data = conn.recv(4096)
304
+ if not data:
305
+ break
306
+ img.extend(data)
307
+ except TimeoutError:
308
+ pass
309
+
310
+ if not paused:
311
+ image_queue.put((img, False))
312
+ conn.sendall(b'True')
313
+ else:
314
+ conn.sendall(b'False')
315
+
316
+
317
+ class TextFiltering:
318
+ accurate_filtering = False
319
+
320
+ def __init__(self, lang='ja'):
321
+ from pysbd import Segmenter
322
+ self.initial_lang = get_ocr_language() or lang
323
+ self.segmenter = Segmenter(language=get_ocr_language(), clean=True)
324
+ self.kana_kanji_regex = re.compile(
325
+ r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
326
+ self.chinese_common_regex = re.compile(r'[\u4E00-\u9FFF]')
327
+ self.english_regex = re.compile(r'[a-zA-Z0-9.,!?;:"\'()\[\]{}]')
328
+ self.chinese_common_regex = re.compile(r'[\u4E00-\u9FFF]')
329
+ self.english_regex = re.compile(r'[a-zA-Z0-9.,!?;:"\'()\[\]{}]')
330
+ self.korean_regex = re.compile(r'[\uAC00-\uD7AF]')
331
+ self.arabic_regex = re.compile(
332
+ r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
333
+ self.russian_regex = re.compile(
334
+ r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
335
+ self.greek_regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
336
+ self.hebrew_regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
337
+ self.thai_regex = re.compile(r'[\u0E00-\u0E7F]')
338
+ self.latin_extended_regex = re.compile(
339
+ r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
340
+ try:
341
+ from transformers import pipeline, AutoTokenizer
342
+ import torch
343
+ logging.getLogger('transformers').setLevel(logging.ERROR)
344
+
345
+ model_ckpt = 'papluca/xlm-roberta-base-language-detection'
346
+ tokenizer = AutoTokenizer.from_pretrained(
347
+ model_ckpt,
348
+ use_fast=False
349
+ )
350
+
351
+ if torch.cuda.is_available():
352
+ device = 0
353
+ elif torch.backends.mps.is_available():
354
+ device = 'mps'
355
+ else:
356
+ device = -1
357
+ self.pipe = pipeline(
358
+ 'text-classification', model=model_ckpt, tokenizer=tokenizer, device=device)
359
+ self.accurate_filtering = True
360
+ except:
361
+ import langid
362
+ self.classify = langid.classify
363
+
364
+ def __call__(self, text, last_result):
365
+ lang = get_ocr_language()
366
+ if self.initial_lang != lang:
367
+ from pysbd import Segmenter
368
+ self.segmenter = Segmenter(language=get_ocr_language(), clean=True)
369
+ self.initial_lang = get_ocr_language()
370
+
371
+ orig_text = self.segmenter.segment(text)
372
+ orig_text_filtered = []
373
+ for block in orig_text:
374
+ if "BLANK_LINE" in block:
375
+ block_filtered = ["\n"]
376
+ elif lang == "ja":
377
+ block_filtered = self.kana_kanji_regex.findall(block)
378
+ elif lang == "zh":
379
+ block_filtered = self.chinese_common_regex.findall(block)
380
+ elif lang == "ko":
381
+ block_filtered = self.korean_regex.findall(block)
382
+ elif lang == "ar":
383
+ block_filtered = self.arabic_regex.findall(block)
384
+ elif lang == "ru":
385
+ block_filtered = self.russian_regex.findall(block)
386
+ elif lang == "el":
387
+ block_filtered = self.greek_regex.findall(block)
388
+ elif lang == "he":
389
+ block_filtered = self.hebrew_regex.findall(block)
390
+ elif lang == "th":
391
+ block_filtered = self.thai_regex.findall(block)
392
+ elif lang in ["en", "fr", "de", "es", "it", "pt", "nl", "sv", "da", "no",
393
+ "fi"]: # Many European languages use extended Latin
394
+ block_filtered = self.latin_extended_regex.findall(block)
395
+ else:
396
+ block_filtered = self.latin_extended_regex.findall(block)
397
+
398
+ if block_filtered:
399
+ orig_text_filtered.append(''.join(block_filtered))
400
+ else:
401
+ orig_text_filtered.append(None)
402
+
403
+ try:
404
+ if isinstance(last_result, list):
405
+ last_text = last_result
406
+ elif last_result and last_result[1] == engine_index:
407
+ last_text = last_result[0]
408
+ else:
409
+ last_text = []
410
+ except Exception as e:
411
+ logger.error(f"Error processing last_result {last_result}: {e}")
412
+ last_text = []
413
+
414
+ new_blocks = []
415
+ for idx, block in enumerate(orig_text):
416
+ if orig_text_filtered[idx] and (orig_text_filtered[idx] not in last_text):
417
+ new_blocks.append(
418
+ str(block).strip().replace("BLANK_LINE", "\n"))
419
+
420
+ final_blocks = []
421
+ if self.accurate_filtering:
422
+ detection_results = self.pipe(new_blocks, top_k=3, truncation=True)
423
+ for idx, block in enumerate(new_blocks):
424
+ for result in detection_results[idx]:
425
+ if result['label'] == lang:
426
+ final_blocks.append(block)
427
+ break
428
+ else:
429
+ for block in new_blocks:
430
+ # This only filters out NON JA/ZH from text when lang is JA/ZH
431
+ if lang not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh'] or block == "\n":
432
+ final_blocks.append(block)
433
+
434
+ text = '\n'.join(final_blocks)
435
+ return text, orig_text_filtered
436
+
437
+
438
+ class ScreenshotThread(threading.Thread):
439
+ def __init__(self, screen_capture_area, screen_capture_window, ocr_config, screen_capture_on_combo):
440
+ super().__init__(daemon=True)
441
+ self.macos_window_tracker_instance = None
442
+ self.windows_window_tracker_instance = None
443
+ self.screencapture_window_active = True
444
+ self.screencapture_window_visible = True
445
+ self.custom_left = None
446
+ self.screen_capture_window = screen_capture_window
447
+ self.areas = []
448
+ self.use_periodic_queue = not screen_capture_on_combo
449
+ self.ocr_config = ocr_config
450
+ if screen_capture_area == '':
451
+ self.screencapture_mode = 0
452
+ elif screen_capture_area.startswith('screen_'):
453
+ parts = screen_capture_area.split('_')
454
+ if len(parts) != 2 or not parts[1].isdigit():
455
+ raise ValueError('Invalid screen_capture_area')
456
+ screen_capture_monitor = int(parts[1])
457
+ self.screencapture_mode = 1
458
+ elif len(screen_capture_area.split(',')) == 4:
459
+ self.screencapture_mode = 3
460
+ else:
461
+ self.screencapture_mode = 2
462
+ self.screen_capture_window = screen_capture_area
463
+ if self.screen_capture_window:
464
+ self.screencapture_mode = 2
465
+
466
+ if self.screencapture_mode != 2:
467
+ sct = mss.mss()
468
+
469
+ if self.screencapture_mode == 1:
470
+ mon = sct.monitors
471
+ if len(mon) <= screen_capture_monitor:
472
+ raise ValueError(
473
+ 'Invalid monitor number in screen_capture_area')
474
+ coord_left = mon[screen_capture_monitor]['left']
475
+ coord_top = mon[screen_capture_monitor]['top']
476
+ coord_width = mon[screen_capture_monitor]['width']
477
+ coord_height = mon[screen_capture_monitor]['height']
478
+ elif self.screencapture_mode == 3:
479
+ coord_left, coord_top, coord_width, coord_height = [
480
+ int(c.strip()) for c in screen_capture_area.split(',')]
481
+ else:
482
+ logger.opt(ansi=True).info(
483
+ 'Launching screen coordinate picker')
484
+ screen_selection = get_screen_selection()
485
+ if not screen_selection:
486
+ raise ValueError(
487
+ 'Picker window was closed or an error occurred')
488
+ screen_capture_monitor = screen_selection['monitor']
489
+ x, y, coord_width, coord_height = screen_selection['coordinates']
490
+ if coord_width > 0 and coord_height > 0:
491
+ coord_top = screen_capture_monitor['top'] + y
492
+ coord_left = screen_capture_monitor['left'] + x
493
+ else:
494
+ logger.opt(ansi=True).info(
495
+ 'Selection is empty, selecting whole screen')
496
+ coord_left = screen_capture_monitor['left']
497
+ coord_top = screen_capture_monitor['top']
498
+ coord_width = screen_capture_monitor['width']
499
+ coord_height = screen_capture_monitor['height']
500
+
501
+ self.sct_params = {'top': coord_top, 'left': coord_left,
502
+ 'width': coord_width, 'height': coord_height}
503
+ logger.opt(ansi=True).info(
504
+ f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
505
+ else:
506
+ if len(screen_capture_area.split(',')) == 4:
507
+ self.areas.append(([int(c.strip())
508
+ for c in screen_capture_area.split(',')]))
509
+
510
+ self.areas.sort(key=lambda rect: (rect[1], rect[0]))
511
+
512
+ if self.screencapture_mode == 2 or self.screen_capture_window:
513
+ area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
514
+ if sys.platform == 'darwin':
515
+ if config.get_general('screen_capture_old_macos_api') or int(platform.mac_ver()[0].split('.')[0]) < 14:
516
+ self.old_macos_screenshot_api = True
517
+ else:
518
+ self.old_macos_screenshot_api = False
519
+ self.screencapturekit_queue = queue.Queue()
520
+ CGMainDisplayID()
521
+ window_list = CGWindowListCopyWindowInfo(
522
+ kCGWindowListExcludeDesktopElements, kCGNullWindowID)
523
+ window_titles = []
524
+ window_ids = []
525
+ window_index = None
526
+ for i, window in enumerate(window_list):
527
+ window_title = window.get(kCGWindowName, '')
528
+ if psutil.Process(window['kCGWindowOwnerPID']).name() not in ('Terminal', 'iTerm2'):
529
+ window_titles.append(window_title)
530
+ window_ids.append(window['kCGWindowNumber'])
531
+
532
+ if screen_capture_window in window_titles:
533
+ window_index = window_titles.index(screen_capture_window)
534
+ else:
535
+ for t in window_titles:
536
+ if screen_capture_window in t:
537
+ window_index = window_titles.index(t)
538
+ break
539
+
540
+ if not window_index:
541
+ raise ValueError(area_invalid_error)
542
+
543
+ self.window_id = window_ids[window_index]
544
+ window_title = window_titles[window_index]
545
+
546
+ if get_ocr_requires_open_window():
547
+ self.macos_window_tracker_instance = threading.Thread(
548
+ target=self.macos_window_tracker)
549
+ self.macos_window_tracker_instance.start()
550
+ logger.opt(ansi=True).info(f'Selected window: {window_title}')
551
+ elif sys.platform == 'win32':
552
+ self.window_handle, window_title = self.get_windows_window_handle(
553
+ screen_capture_window)
554
+
555
+ if not self.window_handle:
556
+ raise ValueError(area_invalid_error)
557
+
558
+ set_dpi_awareness()
559
+
560
+ self.windows_window_tracker_instance = threading.Thread(
561
+ target=self.windows_window_tracker)
562
+ self.windows_window_tracker_instance.start()
563
+ logger.opt(ansi=True).info(f'Selected window: {window_title}')
564
+ else:
565
+ raise ValueError(
566
+ 'Window capture is only currently supported on Windows and macOS')
567
+
568
+ def get_windows_window_handle(self, window_title):
569
+ def callback(hwnd, window_title_part):
570
+ window_title = win32gui.GetWindowText(hwnd)
571
+ if window_title_part in window_title:
572
+ handles.append((hwnd, window_title))
573
+ return True
574
+
575
+ handle = win32gui.FindWindow(None, window_title)
576
+ if handle:
577
+ return (handle, window_title)
578
+
579
+ handles = []
580
+ win32gui.EnumWindows(callback, window_title)
581
+ for handle in handles:
582
+ _, pid = win32process.GetWindowThreadProcessId(handle[0])
583
+ if psutil.Process(pid).name().lower() not in ('cmd.exe', 'powershell.exe', 'windowsterminal.exe'):
584
+ return handle
585
+
586
+ return (None, None)
587
+
588
+ def windows_window_tracker(self):
589
+ found = True
590
+ while not terminated:
591
+ found = win32gui.IsWindow(self.window_handle)
592
+ if not found:
593
+ break
594
+ if get_ocr_requires_open_window():
595
+ self.screencapture_window_active = self.window_handle == win32gui.GetForegroundWindow()
596
+ else:
597
+ self.screencapture_window_visible = not win32gui.IsIconic(
598
+ self.window_handle)
599
+ time.sleep(0.2)
600
+ if not found:
601
+ on_window_closed(False)
602
+
603
+ def capture_macos_window_screenshot(self, window_id):
604
+ def shareable_content_completion_handler(shareable_content, error):
605
+ if error:
606
+ self.screencapturekit_queue.put(None)
607
+ return
608
+
609
+ target_window = None
610
+ for window in shareable_content.windows():
611
+ if window.windowID() == window_id:
612
+ target_window = window
613
+ break
614
+
615
+ if not target_window:
616
+ self.screencapturekit_queue.put(None)
617
+ return
618
+
619
+ with objc.autorelease_pool():
620
+ content_filter = SCContentFilter.alloc(
621
+ ).initWithDesktopIndependentWindow_(target_window)
622
+
623
+ frame = content_filter.contentRect()
624
+ scale = content_filter.pointPixelScale()
625
+ width = frame.size.width * scale
626
+ height = frame.size.height * scale
627
+ configuration = SCStreamConfiguration.alloc().init()
628
+ configuration.setSourceRect_(CGRectMake(
629
+ 0, 0, frame.size.width, frame.size.height))
630
+ configuration.setWidth_(width)
631
+ configuration.setHeight_(height)
632
+ configuration.setShowsCursor_(False)
633
+ configuration.setCaptureResolution_(SCCaptureResolutionBest)
634
+ configuration.setIgnoreGlobalClipSingleWindow_(True)
635
+
636
+ SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_(
637
+ content_filter, configuration, capture_image_completion_handler
638
+ )
639
+
640
+ def capture_image_completion_handler(image, error):
641
+ if error:
642
+ self.screencapturekit_queue.put(None)
643
+ return
644
+
645
+ self.screencapturekit_queue.put(image)
646
+
647
+ SCShareableContent.getShareableContentWithCompletionHandler_(
648
+ shareable_content_completion_handler
649
+ )
650
+
651
+ def macos_window_tracker(self):
652
+ found = True
653
+ while found and not terminated:
654
+ found = False
655
+ is_active = False
656
+ with objc.autorelease_pool():
657
+ window_list = CGWindowListCopyWindowInfo(
658
+ kCGWindowListOptionOnScreenOnly, kCGNullWindowID)
659
+ for i, window in enumerate(window_list):
660
+ if found and window.get(kCGWindowName, '') == 'Fullscreen Backdrop':
661
+ is_active = True
662
+ break
663
+ if self.window_id == window['kCGWindowNumber']:
664
+ found = True
665
+ if i == 0 or window_list[i-1].get(kCGWindowName, '') in ('Dock', 'Color Enforcer Window'):
666
+ is_active = True
667
+ break
668
+ if not found:
669
+ window_list = CGWindowListCreateDescriptionFromArray(
670
+ [self.window_id])
671
+ if len(window_list) > 0:
672
+ found = True
673
+ if found:
674
+ self.screencapture_window_active = is_active
675
+ time.sleep(0.2)
676
+ if not found:
677
+ on_window_closed(False)
678
+
679
+ def write_result(self, result):
680
+ if self.use_periodic_queue:
681
+ periodic_screenshot_queue.put(result)
682
+ else:
683
+ image_queue.put((result, True))
684
+
685
+ def run(self):
686
+ if self.screencapture_mode != 2:
687
+ sct = mss.mss()
688
+ start = time.time()
689
+ while not terminated:
690
+ if time.time() - start > 1:
691
+ start = time.time()
692
+ section_changed = has_ocr_config_changed()
693
+ if section_changed:
694
+ reload_electron_config()
695
+
696
+ if not screenshot_event.wait(timeout=0.1):
697
+ continue
698
+ if self.screencapture_mode == 2 or self.screen_capture_window:
699
+ if sys.platform == 'darwin':
700
+ with objc.autorelease_pool():
701
+ if self.old_macos_screenshot_api:
702
+ cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id],
703
+ kCGWindowImageBoundsIgnoreFraming)
704
+ else:
705
+ self.capture_macos_window_screenshot(
706
+ self.window_id)
707
+ try:
708
+ cg_image = self.screencapturekit_queue.get(
709
+ timeout=0.5)
710
+ except queue.Empty:
711
+ cg_image = None
712
+ if not cg_image:
713
+ return 0
714
+ width = CGImageGetWidth(cg_image)
715
+ height = CGImageGetHeight(cg_image)
716
+ raw_data = CGDataProviderCopyData(
717
+ CGImageGetDataProvider(cg_image))
718
+ bpr = CGImageGetBytesPerRow(cg_image)
719
+ img = Image.frombuffer(
720
+ 'RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
721
+ else:
722
+ try:
723
+ coord_left, coord_top, right, bottom = win32gui.GetWindowRect(
724
+ self.window_handle)
725
+ coord_width = right - coord_left
726
+ coord_height = bottom - coord_top
727
+
728
+ hwnd_dc = win32gui.GetWindowDC(self.window_handle)
729
+ mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
730
+ save_dc = mfc_dc.CreateCompatibleDC()
731
+
732
+ save_bitmap = win32ui.CreateBitmap()
733
+ save_bitmap.CreateCompatibleBitmap(
734
+ mfc_dc, coord_width, coord_height)
735
+ save_dc.SelectObject(save_bitmap)
736
+
737
+ result = ctypes.windll.user32.PrintWindow(
738
+ self.window_handle, save_dc.GetSafeHdc(), 2)
739
+
740
+ bmpinfo = save_bitmap.GetInfo()
741
+ bmpstr = save_bitmap.GetBitmapBits(True)
742
+ except pywintypes.error:
743
+ return 0
744
+ img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0,
745
+ 1)
746
+ try:
747
+ win32gui.DeleteObject(save_bitmap.GetHandle())
748
+ except:
749
+ pass
750
+ try:
751
+ save_dc.DeleteDC()
752
+ except:
753
+ pass
754
+ try:
755
+ mfc_dc.DeleteDC()
756
+ except:
757
+ pass
758
+ try:
759
+ win32gui.ReleaseDC(self.window_handle, hwnd_dc)
760
+ except:
761
+ pass
762
+ else:
763
+ sct_img = sct.grab(self.sct_params)
764
+ img = Image.frombytes(
765
+ 'RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
766
+
767
+ if not img.getbbox():
768
+ logger.info(
769
+ "Screen Capture Didn't get Capturing anything, sleeping.")
770
+ time.sleep(1)
771
+ continue
772
+
773
+ import random # Ensure this is imported at the top of the file if not already
774
+ # Executes only once out of 10 times
775
+ rand_int = random.randint(1, 20)
776
+
777
+ if rand_int == 1: # Executes only once out of 10 times
778
+ img.save(os.path.join(get_temporary_directory(),
779
+ 'before_crop.png'), 'PNG')
780
+
781
+ img = apply_ocr_config_to_image(img, self.ocr_config)
782
+
783
+ if rand_int == 1:
784
+ img.save(os.path.join(
785
+ get_temporary_directory(), 'after_crop.png'), 'PNG')
786
+
787
+ if last_image and are_images_identical(img, last_image):
788
+ logger.debug(
789
+ "Captured screenshot is identical to the last one, sleeping.")
790
+ time.sleep(max(.5, get_ocr_scan_rate()))
791
+ else:
792
+ self.write_result(img)
793
+ screenshot_event.clear()
794
+
795
+ if self.macos_window_tracker_instance:
796
+ self.macos_window_tracker_instance.join()
797
+ elif self.windows_window_tracker_instance:
798
+ self.windows_window_tracker_instance.join()
799
+
800
+
801
+ import cv2
802
+ import numpy as np
803
+
804
+ def apply_adaptive_threshold_filter(img):
805
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
806
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
807
+ inverted = cv2.bitwise_not(gray)
808
+ blur = cv2.GaussianBlur(inverted, (3, 3), 0)
809
+ thresh = cv2.adaptiveThreshold(
810
+ blur, 255,
811
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
812
+ cv2.THRESH_BINARY,
813
+ 11, 2
814
+ )
815
+ result = cv2.bitwise_not(thresh)
816
+
817
+ return Image.fromarray(result)
818
+
819
+
820
+ def set_last_image(image):
821
+ global last_image
822
+ if image is None:
823
+ last_image = None
824
+ try:
825
+ if image == last_image:
826
+ return
827
+ except Exception:
828
+ last_image = None
829
+ return
830
+ try:
831
+ if last_image is not None and hasattr(last_image, "close"):
832
+ last_image.close()
833
+ except Exception:
834
+ pass
835
+ last_image = image
836
+ # last_image = apply_adaptive_threshold_filter(image)
837
+
838
+
839
+ def are_images_identical(img1, img2):
840
+ if None in (img1, img2):
841
+ return False
842
+
843
+ try:
844
+ img1 = np.array(img1)
845
+ img2 = np.array(img2)
846
+ except Exception:
847
+ logger.warning(
848
+ "Failed to convert images to numpy arrays for comparison.")
849
+ # If conversion to numpy array fails, consider them not identical
850
+ return False
851
+
852
+ return (img1.shape == img2.shape) and np.array_equal(img1, img2)
853
+
854
+
855
+ import cv2
856
+ import numpy as np
857
+ from skimage.metrics import structural_similarity as ssim
858
+ from typing import Union
859
+
860
+ ImageType = Union[np.ndarray, Image.Image]
861
+
862
+ def _prepare_image(image: ImageType) -> np.ndarray:
863
+ """
864
+ Standardizes an image (PIL or NumPy) into an OpenCV-compatible NumPy array (BGR).
865
+ """
866
+ # If the image is a PIL Image, convert it to a NumPy array
867
+ if isinstance(image, Image.Image):
868
+ # Convert PIL Image (which is RGB) to a NumPy array, then convert RGB to BGR for OpenCV
869
+ prepared_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
870
+ # If it's already a NumPy array, assume it's in a compatible format (like BGR)
871
+ elif isinstance(image, np.ndarray):
872
+ prepared_image = image
873
+ else:
874
+ raise TypeError(f"Unsupported image type: {type(image)}. Must be a PIL Image or NumPy array.")
875
+
876
+ return prepared_image
877
+
878
+ i = 1
879
+
880
+ def calculate_ssim_score(imageA: ImageType, imageB: ImageType) -> float:
881
+ global i
882
+ """
883
+ Calculates the structural similarity index (SSIM) between two images.
884
+
885
+ Args:
886
+ imageA: The first image as a NumPy array.
887
+ imageB: The second image as a NumPy array.
888
+
889
+ Returns:
890
+ The SSIM score between the two images (between -1 and 1).
891
+ """
892
+
893
+ if isinstance(imageA, Image.Image):
894
+ imageA = apply_adaptive_threshold_filter(imageA)
895
+
896
+ # Save Images to temp for debugging on a random 1/20 chance
897
+ # if np.random.rand() < 0.05:
898
+ # if i < 600:
899
+ # # Save as image_000
900
+ # imageA.save(os.path.join(get_temporary_directory(), f'frame_{i:03d}.png'), 'PNG')
901
+ # i += 1
902
+ # imageB.save(os.path.join(get_temporary_directory(), f'ssim_imageB_{i:03d}.png'), 'PNG')
903
+
904
+ imageA = _prepare_image(imageA)
905
+ imageB = _prepare_image(imageB)
906
+
907
+ # Images must have the same dimensions
908
+ if imageA.shape != imageB.shape:
909
+ raise ValueError("Input images must have the same dimensions.")
910
+
911
+ # Convert images to grayscale for a more robust SSIM comparison
912
+ # This is less sensitive to minor color changes and lighting.
913
+ # grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
914
+ # grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
915
+
916
+ # Calculate the SSIM. The `score` is the main value.
917
+ # The `win_size` parameter must be an odd number and less than the image dimensions.
918
+ # We choose a value that is likely to be safe for a variety of image sizes.
919
+ win_size = min(3, imageA.shape[0] // 2, imageA.shape[1] // 2)
920
+ if win_size % 2 == 0:
921
+ win_size -= 1 # ensure it's odd
922
+
923
+ score, _ = ssim(imageA, imageB, full=True, win_size=win_size)
924
+
925
+ return score
926
+
927
+
928
+
929
+ def are_images_similar(imageA: Image.Image, imageB: Image.Image, threshold: float = 0.98) -> bool:
930
+ """
931
+ Compares two images and returns True if their similarity score is above a threshold.
932
+
933
+ Args:
934
+ imageA: The first image as a NumPy array.
935
+ imageB: The second image as a NumPy array.
936
+ threshold: The minimum SSIM score to be considered "similar".
937
+ Defaults to 0.98 (very high similarity). Your original `90` would
938
+ be equivalent to a threshold of `0.90` here.
939
+
940
+ Returns:
941
+ True if the images are similar, False otherwise.
942
+ """
943
+ if None in (imageA, imageB):
944
+ logger.info("One of the images is None, cannot compare.")
945
+ return False
946
+ try:
947
+ score = calculate_ssim_score(imageA, imageB)
948
+ except Exception as e:
949
+ logger.info(e)
950
+ return False
951
+ return score > threshold
952
+
953
+
954
+ def quick_text_detection(pil_image, threshold_ratio=0.01):
955
+ """
956
+ Quick check if image likely contains text using edge detection.
957
+
958
+ Args:
959
+ pil_image (PIL.Image): Input image
960
+ threshold_ratio (float): Minimum ratio of edge pixels to consider text present
961
+
962
+ Returns:
963
+ bool: True if text is likely present
964
+ """
965
+ # Convert to grayscale
966
+ gray = np.array(pil_image.convert('L'))
967
+
968
+ # Apply Canny edge detection
969
+ edges = cv2.Canny(gray, 50, 150)
970
+
971
+ # Calculate ratio of edge pixels
972
+ edge_ratio = np.sum(edges > 0) / edges.size
973
+
974
+ return edge_ratio > threshold_ratio
975
+
976
+
977
+ # Use OBS for Screenshot Source (i.e. Linux)
978
+ class OBSScreenshotThread(threading.Thread):
979
+ def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
980
+ super().__init__(daemon=True)
981
+ self.ocr_config = ocr_config
982
+ self.interval = interval
983
+ self.websocket = None
984
+ self.current_source = None
985
+ self.current_source_name = None
986
+ self.current_scene = None
987
+ self.width = width
988
+ self.height = height
989
+ self.use_periodic_queue = not screen_capture_on_combo
990
+
991
+ def write_result(self, result):
992
+ if self.use_periodic_queue:
993
+ periodic_screenshot_queue.put(result)
994
+ else:
995
+ image_queue.put((result, True))
996
+ screenshot_event.clear()
997
+
998
+ def connect_obs(self):
999
+ import GameSentenceMiner.obs as obs
1000
+ obs.connect_to_obs_sync()
1001
+
1002
+ def scale_down_width_height(self, width, height):
1003
+ if width == 0 or height == 0:
1004
+ return self.width, self.height
1005
+ # return width, height
1006
+ aspect_ratio = width / height
1007
+ logger.info(
1008
+ f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
1009
+ if aspect_ratio > 2.66:
1010
+ # Ultra-wide (32:9) - use 1920x540
1011
+ logger.info("Using ultra-wide aspect ratio scaling (32:9).")
1012
+ return 1920, 540
1013
+ elif aspect_ratio > 2.33:
1014
+ # 21:9 - use 1920x800
1015
+ logger.info("Using ultra-wide aspect ratio scaling (21:9).")
1016
+ return 1920, 800
1017
+ elif aspect_ratio > 1.77:
1018
+ # 16:9 - use 1280x720
1019
+ logger.info("Using standard aspect ratio scaling (16:9).")
1020
+ return 1280, 720
1021
+ elif aspect_ratio > 1.6:
1022
+ # 16:10 - use 1280x800
1023
+ logger.info("Using standard aspect ratio scaling (16:10).")
1024
+ return 1280, 800
1025
+ elif aspect_ratio > 1.33:
1026
+ # 4:3 - use 960x720
1027
+ logger.info("Using standard aspect ratio scaling (4:3).")
1028
+ return 960, 720
1029
+ elif aspect_ratio > 1.25:
1030
+ # 5:4 - use 900x720
1031
+ logger.info("Using standard aspect ratio scaling (5:4).")
1032
+ return 900, 720
1033
+ elif aspect_ratio > 1.5:
1034
+ # 3:2 - use 1080x720
1035
+ logger.info("Using standard aspect ratio scaling (3:2).")
1036
+ return 1080, 720
1037
+ else:
1038
+ # Default fallback - use original resolution
1039
+ logger.info(
1040
+ "Using default aspect ratio scaling (original resolution).")
1041
+ return width, height
1042
+
1043
+ def init_config(self, source=None, scene=None):
1044
+ import GameSentenceMiner.obs as obs
1045
+ obs.update_current_game()
1046
+ self.current_source = source if source else obs.get_active_source()
1047
+ logger.info(f"Current OBS source: {self.current_source}")
1048
+ self.source_width = self.current_source.get(
1049
+ "sceneItemTransform").get("sourceWidth") or self.width
1050
+ self.source_height = self.current_source.get(
1051
+ "sceneItemTransform").get("sourceHeight") or self.height
1052
+ if self.source_width and self.source_height:
1053
+ self.width, self.height = self.scale_down_width_height(
1054
+ self.source_width, self.source_height)
1055
+ logger.info(
1056
+ f"Using OBS source dimensions: {self.width}x{self.height}")
1057
+ self.current_source_name = self.current_source.get(
1058
+ "sourceName") or None
1059
+ self.current_scene = scene if scene else obs.get_current_game()
1060
+ self.ocr_config = get_scene_ocr_config(refresh=True)
1061
+ if not self.ocr_config:
1062
+ logger.error("No OCR config found for the current scene.")
1063
+ return
1064
+ self.ocr_config.scale_to_custom_size(self.width, self.height)
1065
+
1066
+ def run(self):
1067
+ global last_image
1068
+ from PIL import Image
1069
+ import GameSentenceMiner.obs as obs
1070
+
1071
+ # Register a scene switch callback in obsws
1072
+ def on_scene_switch(scene):
1073
+ logger.info(f"Scene switched to: {scene}. Loading new OCR config.")
1074
+ self.init_config(scene=scene)
1075
+
1076
+ asyncio.run(obs.register_scene_change_callback(on_scene_switch))
1077
+
1078
+ self.connect_obs()
1079
+ self.init_config()
1080
+ while not terminated:
1081
+ if not screenshot_event.wait(timeout=0.1):
1082
+ continue
1083
+
1084
+ if not self.ocr_config:
1085
+ logger.info(
1086
+ "No OCR config found for the current scene. Waiting for scene switch.")
1087
+ time.sleep(1)
1088
+ continue
1089
+
1090
+ if not self.current_source_name:
1091
+ obs.update_current_game()
1092
+ self.current_source = obs.get_active_source()
1093
+ self.current_source_name = self.current_source.get(
1094
+ "sourceName") or None
1095
+
1096
+ try:
1097
+ if not self.current_source_name:
1098
+ logger.error(
1099
+ "No active source found in the current scene.")
1100
+ self.write_result(1)
1101
+ continue
1102
+ img = obs.get_screenshot_PIL(source_name=self.current_source_name,
1103
+ width=self.width, height=self.height, img_format='jpg', compression=80)
1104
+
1105
+ img = apply_ocr_config_to_image(img, self.ocr_config)
1106
+
1107
+ if img is not None:
1108
+ self.write_result(img)
1109
+ else:
1110
+ logger.error("Failed to get screenshot data from OBS.")
1111
+
1112
+ except Exception as e:
1113
+ print(e)
1114
+ logger.info(
1115
+ f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
1116
+ time.sleep(.5)
1117
+ continue
1118
+
1119
+
1120
+ def apply_ocr_config_to_image(img, ocr_config, is_secondary=False):
1121
+ for rectangle in ocr_config.rectangles:
1122
+ if rectangle.is_excluded:
1123
+ left, top, width, height = rectangle.coordinates
1124
+ draw = ImageDraw.Draw(img)
1125
+ draw.rectangle((left, top, left + width, top + height), fill=(0, 0, 0, 0))
1126
+
1127
+ rectangles = [r for r in ocr_config.rectangles if not r.is_excluded and r.is_secondary == is_secondary]
1128
+
1129
+ # Sort top to bottom
1130
+ if rectangles:
1131
+ rectangles.sort(key=lambda r: r.coordinates[1])
1132
+
1133
+ cropped_sections = []
1134
+ for rectangle in rectangles:
1135
+ area = rectangle.coordinates
1136
+ # Ensure crop coordinates are within image bounds
1137
+ left = max(0, area[0])
1138
+ top = max(0, area[1])
1139
+ right = min(img.width, area[0] + area[2])
1140
+ bottom = min(img.height, area[1] + area[3])
1141
+ crop = img.crop((left, top, right, bottom))
1142
+ cropped_sections.append(crop)
1143
+
1144
+ if len(cropped_sections) > 1:
1145
+ # Width is the max width of all sections, height is the sum of all sections + gaps
1146
+ # Gaps are 50 pixels between sections
1147
+ combined_width = max(section.width for section in cropped_sections)
1148
+ combined_height = sum(section.height for section in cropped_sections) + (
1149
+ len(cropped_sections) - 1) * 50
1150
+ combined_img = Image.new("RGBA", (combined_width, combined_height))
1151
+ y_offset = 0
1152
+ for section in cropped_sections:
1153
+ combined_img.paste(section, (0, y_offset))
1154
+ y_offset += section.height + 50
1155
+ img = combined_img
1156
+ elif cropped_sections:
1157
+ img = cropped_sections[0]
1158
+ return img
1159
+
1160
+
1161
+ class AutopauseTimer:
1162
+ def __init__(self, timeout):
1163
+ self.stop_event = threading.Event()
1164
+ self.timeout = timeout
1165
+ self.timer_thread = None
1166
+
1167
+ def __del__(self):
1168
+ self.stop()
1169
+
1170
+ def start(self):
1171
+ self.stop()
1172
+ self.stop_event.clear()
1173
+ self.timer_thread = threading.Thread(target=self._countdown)
1174
+ self.timer_thread.start()
1175
+
1176
+ def stop(self):
1177
+ if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive():
1178
+ self.stop_event.set()
1179
+ self.timer_thread.join()
1180
+
1181
+ def _countdown(self):
1182
+ seconds = self.timeout
1183
+ while seconds > 0 and not self.stop_event.is_set() and not terminated:
1184
+ time.sleep(1)
1185
+ seconds -= 1
1186
+ if not self.stop_event.is_set():
1187
+ self.stop_event.set()
1188
+ if not (paused or terminated):
1189
+ pause_handler(True)
1190
+
1191
+
1192
+ def pause_handler(is_combo=True):
1193
+ global paused
1194
+ message = 'Unpaused!' if paused else 'Paused!'
1195
+
1196
+ if auto_pause_handler:
1197
+ auto_pause_handler.stop()
1198
+ if is_combo:
1199
+ notifier.send(title='owocr', message=message)
1200
+ logger.info(message)
1201
+ paused = not paused
1202
+
1203
+
1204
+ def engine_change_handler(user_input='s', is_combo=True):
1205
+ global engine_index
1206
+ old_engine_index = engine_index
1207
+
1208
+ if user_input.lower() == 's':
1209
+ if engine_index == len(engine_keys) - 1:
1210
+ engine_index = 0
1211
+ else:
1212
+ engine_index += 1
1213
+ elif user_input.lower() != '' and user_input.lower() in engine_keys:
1214
+ engine_index = engine_keys.index(user_input.lower())
1215
+ if engine_index != old_engine_index:
1216
+ new_engine_name = engine_instances[engine_index].readable_name
1217
+ if is_combo:
1218
+ notifier.send(
1219
+ title='owocr', message=f'Switched to {new_engine_name}')
1220
+ engine_color = config.get_general('engine_color')
1221
+ logger.opt(ansi=True).info(
1222
+ f'Switched to <{engine_color}>{new_engine_name}</{engine_color}>!')
1223
+
1224
+
1225
+ def engine_change_handler_name(engine):
1226
+ global engine_index
1227
+ old_engine_index = engine_index
1228
+
1229
+ for i, instance in enumerate(engine_instances):
1230
+ if instance.name.lower() in engine.lower():
1231
+ engine_index = i
1232
+ break
1233
+
1234
+ if engine_index != old_engine_index:
1235
+ new_engine_name = engine_instances[engine_index].readable_name
1236
+ notifier.send(title='owocr', message=f'Switched to {new_engine_name}')
1237
+ engine_color = config.get_general('engine_color')
1238
+ logger.opt(ansi=True).info(
1239
+ f'Switched to <{engine_color}>{new_engine_name}</{engine_color}>!')
1240
+
1241
+
1242
+ def user_input_thread_run():
1243
+ def _terminate_handler():
1244
+ global terminated
1245
+ logger.info('Terminated!')
1246
+ terminated = True
1247
+ import sys
1248
+
1249
+ if sys.platform == 'win32':
1250
+ import msvcrt
1251
+ while not terminated:
1252
+ user_input = None
1253
+ if msvcrt.kbhit(): # Check if a key is pressed
1254
+ user_input_bytes = msvcrt.getch()
1255
+ try:
1256
+ user_input = user_input_bytes.decode()
1257
+ except UnicodeDecodeError:
1258
+ pass
1259
+ if not user_input: # If no input from msvcrt, check stdin
1260
+ import sys
1261
+ user_input = sys.stdin.read(1)
1262
+
1263
+ if user_input.lower() in 'tq':
1264
+ _terminate_handler()
1265
+ elif user_input.lower() == 'p':
1266
+ pause_handler(False)
1267
+ else:
1268
+ engine_change_handler(user_input, False)
1269
+ else:
1270
+ import tty
1271
+ import termios
1272
+ fd = sys.stdin.fileno()
1273
+ old_settings = termios.tcgetattr(fd)
1274
+ try:
1275
+ tty.setcbreak(sys.stdin.fileno())
1276
+ while not terminated:
1277
+ user_input = sys.stdin.read(1)
1278
+ if user_input.lower() in 'tq':
1279
+ _terminate_handler()
1280
+ elif user_input.lower() == 'p':
1281
+ pause_handler(False)
1282
+ else:
1283
+ engine_change_handler(user_input, False)
1284
+ finally:
1285
+ termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
1286
+
1287
+
1288
+ def signal_handler(sig, frame):
1289
+ global terminated
1290
+ logger.info('Terminated!')
1291
+ terminated = True
1292
+
1293
+
1294
+ def on_window_closed(alive):
1295
+ global terminated
1296
+ if not (alive or terminated):
1297
+ logger.info('Window closed or error occurred, terminated!')
1298
+ terminated = True
1299
+
1300
+
1301
+ def on_screenshot_combo():
1302
+ if not paused:
1303
+ screenshot_event.set()
1304
+
1305
+
1306
+ def on_window_minimized(minimized):
1307
+ global screencapture_window_visible
1308
+ screencapture_window_visible = not minimized
1309
+
1310
+
1311
+ def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering=None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
1312
+ global engine_index
1313
+ if auto_pause_handler:
1314
+ auto_pause_handler.stop()
1315
+ if engine:
1316
+ for i, instance in enumerate(engine_instances):
1317
+ if instance.name.lower() in engine.lower():
1318
+ engine_instance = instance
1319
+ break
1320
+ else:
1321
+ engine_instance = engine_instances[engine_index]
1322
+
1323
+ engine_color = config.get_general('engine_color')
1324
+
1325
+ start_time = time.time()
1326
+ result = engine_instance(img_or_path, furigana_filter_sensitivity)
1327
+ res, text, crop_coords = (*result, None)[:3]
1328
+
1329
+ if not res and ocr_2 == engine:
1330
+ logger.opt(ansi=True).info(
1331
+ f"<{engine_color}>{engine_instance.readable_name}</{engine_color}> failed with message: {text}, trying <{engine_color}>{ocr_1}</{engine_color}>")
1332
+ for i, instance in enumerate(engine_instances):
1333
+ if instance.name.lower() in ocr_1.lower():
1334
+ engine_instance = instance
1335
+ if last_result:
1336
+ last_result = []
1337
+ break
1338
+ start_time = time.time()
1339
+ result = engine_instance(img_or_path, furigana_filter_sensitivity)
1340
+ res, text, crop_coords = (*result, None)[:3]
1341
+
1342
+ end_time = time.time()
1343
+
1344
+ orig_text = []
1345
+ # print(filtering)
1346
+ #
1347
+ #
1348
+ # print(lang)
1349
+
1350
+ # print(last_result)
1351
+ # print(engine_index)
1352
+
1353
+ if res:
1354
+ if filtering:
1355
+ text, orig_text = filtering(text, last_result)
1356
+ if get_ocr_language() == "ja" or get_ocr_language() == "zh":
1357
+ text = post_process(text, keep_blank_lines=get_ocr_keep_newline())
1358
+ logger.opt(ansi=True).info(
1359
+ f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
1360
+ if notify and config.get_general('notifications'):
1361
+ notifier.send(title='owocr', message='Text recognized: ' + text)
1362
+
1363
+ if write_to == 'websocket':
1364
+ websocket_server_thread.send_text(text)
1365
+ elif write_to == 'clipboard':
1366
+ pyperclipfix.copy(text)
1367
+ elif write_to == "callback":
1368
+ txt_callback(text, orig_text, ocr_start_time,
1369
+ img_or_path, bool(engine), filtering, crop_coords)
1370
+ elif write_to:
1371
+ with Path(write_to).open('a', encoding='utf-8') as f:
1372
+ f.write(text + '\n')
1373
+
1374
+ if auto_pause_handler and not paused:
1375
+ auto_pause_handler.start()
1376
+ else:
1377
+ logger.opt(ansi=True).info(
1378
+ f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {text}')
1379
+
1380
+ # print(orig_text)
1381
+ # print(text)
1382
+
1383
+ return orig_text, text
1384
+
1385
+
1386
+ def get_path_key(path):
1387
+ return path, path.lstat().st_mtime
1388
+
1389
+
1390
+ def init_config(parse_args=True):
1391
+ global config
1392
+ config = Config(parse_args)
1393
+
1394
+
1395
+ def run(read_from=None,
1396
+ read_from_secondary=None,
1397
+ write_to=None,
1398
+ engine=None,
1399
+ pause_at_startup=None,
1400
+ ignore_flag=None,
1401
+ delete_images=None,
1402
+ notifications=None,
1403
+ auto_pause=0,
1404
+ combo_pause=None,
1405
+ combo_engine_switch=None,
1406
+ screen_capture_area=None,
1407
+ screen_capture_areas=None,
1408
+ screen_capture_exclusions=None,
1409
+ screen_capture_window=None,
1410
+ screen_capture_delay_secs=None,
1411
+ screen_capture_combo=None,
1412
+ stop_running_flag=None,
1413
+ screen_capture_event_bus=None,
1414
+ text_callback=None,
1415
+ monitor_index=None,
1416
+ ocr1=None,
1417
+ ocr2=None,
1418
+ gsm_ocr_config=None,
1419
+ furigana_filter_sensitivity=None,
1420
+ config_check_thread=None
1421
+ ):
1422
+ """
1423
+ Japanese OCR client
1424
+
1425
+ Runs OCR in the background.
1426
+ It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
1427
+ Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
1428
+
1429
+ :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
1430
+ :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
1431
+ :param delay_secs: How often to check for new images, in seconds.
1432
+ :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
1433
+ :param pause_at_startup: Pause at startup.
1434
+ :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
1435
+ :param delete_images: Delete image files after processing when reading from a directory.
1436
+ :param notifications: Show an operating system notification with the detected text.
1437
+ :param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
1438
+ :param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
1439
+ :param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
1440
+ :param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).
1441
+ :param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
1442
+ :param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.
1443
+ :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
1444
+ """
1445
+
1446
+ if read_from is None:
1447
+ read_from = config.get_general('read_from')
1448
+
1449
+ if read_from_secondary is None:
1450
+ read_from_secondary = config.get_general('read_from_secondary')
1451
+
1452
+ if screen_capture_area is None:
1453
+ screen_capture_area = config.get_general('screen_capture_area')
1454
+
1455
+ # if screen_capture_only_active_windows is None:
1456
+ # screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
1457
+
1458
+ if screen_capture_exclusions is None:
1459
+ screen_capture_exclusions = config.get_general(
1460
+ 'screen_capture_exclusions')
1461
+
1462
+ if screen_capture_window is None:
1463
+ screen_capture_window = config.get_general('screen_capture_window')
1464
+
1465
+ if screen_capture_delay_secs is None:
1466
+ screen_capture_delay_secs = config.get_general(
1467
+ 'screen_capture_delay_secs')
1468
+
1469
+ if screen_capture_combo is None:
1470
+ screen_capture_combo = config.get_general('screen_capture_combo')
1471
+
1472
+ if stop_running_flag is None:
1473
+ stop_running_flag = config.get_general('stop_running_flag')
1474
+
1475
+ if screen_capture_event_bus is None:
1476
+ screen_capture_event_bus = config.get_general(
1477
+ 'screen_capture_event_bus')
1478
+
1479
+ if text_callback is None:
1480
+ text_callback = config.get_general('text_callback')
1481
+
1482
+ if write_to is None:
1483
+ write_to = config.get_general('write_to')
1484
+
1485
+ logger.configure(
1486
+ handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
1487
+
1488
+ if config.has_config:
1489
+ logger.info('Parsed config file')
1490
+ else:
1491
+ logger.warning('No config file, defaults will be used.')
1492
+ if config.downloaded_config:
1493
+ logger.info(
1494
+ f'A default config file has been downloaded to {config.config_path}')
1495
+
1496
+ global engine_instances
1497
+ global engine_keys
1498
+ engine_instances = []
1499
+ config_engines = []
1500
+ engine_keys = []
1501
+ default_engine = ''
1502
+
1503
+ if len(config.get_general('engines')) > 0:
1504
+ for config_engine in config.get_general('engines').split(','):
1505
+ config_engines.append(config_engine.strip().lower())
1506
+
1507
+ for _, engine_class in sorted(inspect.getmembers(sys.modules[__name__],
1508
+ lambda x: hasattr(x, '__module__') and x.__module__ and (
1509
+ __package__ + '.ocr' in x.__module__ or __package__ + '.secret' in x.__module__) and inspect.isclass(
1510
+ x))):
1511
+ if len(config_engines) == 0 or engine_class.name in config_engines:
1512
+ if config.get_engine(engine_class.name) == None:
1513
+ engine_instance = engine_class()
1514
+ else:
1515
+ engine_instance = engine_class(config.get_engine(
1516
+ engine_class.name), lang=get_ocr_language())
1517
+
1518
+ if engine_instance.available:
1519
+ engine_instances.append(engine_instance)
1520
+ engine_keys.append(engine_class.key)
1521
+ if engine == engine_class.name:
1522
+ default_engine = engine_class.key
1523
+
1524
+ if len(engine_keys) == 0:
1525
+ msg = 'No engines available!'
1526
+ raise NotImplementedError(msg)
1527
+
1528
+ global engine_index
1529
+ global terminated
1530
+ global paused
1531
+ global just_unpaused
1532
+ global first_pressed
1533
+ global auto_pause_handler
1534
+ global notifier
1535
+ global websocket_server_thread
1536
+ global screenshot_thread
1537
+ global obs_screenshot_thread
1538
+ global image_queue
1539
+ global ocr_1
1540
+ global ocr_2
1541
+ ocr_1 = ocr1
1542
+ ocr_2 = ocr2
1543
+ custom_left = None
1544
+ terminated = False
1545
+ paused = pause_at_startup
1546
+ just_unpaused = True
1547
+ first_pressed = None
1548
+ auto_pause_handler = None
1549
+ engine_index = engine_keys.index(
1550
+ default_engine) if default_engine != '' else 0
1551
+ engine_color = config.get_general('engine_color')
1552
+ prefix_to_use = ""
1553
+ delay_secs = config.get_general('delay_secs')
1554
+
1555
+ non_path_inputs = ('screencapture', 'clipboard',
1556
+ 'websocket', 'unixsocket', 'obs')
1557
+ read_from_path = None
1558
+ read_from_readable = []
1559
+ terminated = False
1560
+ paused = config.get_general('pause_at_startup')
1561
+ auto_pause = config.get_general('auto_pause')
1562
+ clipboard_thread = None
1563
+ websocket_server_thread = None
1564
+ screenshot_thread = None
1565
+ directory_watcher_thread = None
1566
+ unix_socket_server = None
1567
+ key_combo_listener = None
1568
+ filtering = None
1569
+ auto_pause_handler = None
1570
+ engine_index = engine_keys.index(
1571
+ default_engine) if default_engine != '' else 0
1572
+ engine_color = config.get_general('engine_color')
1573
+ combo_pause = config.get_general('combo_pause')
1574
+ combo_engine_switch = config.get_general('combo_engine_switch')
1575
+ screen_capture_on_combo = False
1576
+ notifier = DesktopNotifierSync()
1577
+ image_queue = queue.Queue()
1578
+ key_combos = {}
1579
+
1580
+ if combo_pause != '':
1581
+ key_combos[combo_pause] = pause_handler
1582
+ if combo_engine_switch:
1583
+ if combo_pause:
1584
+ key_combos[combo_engine_switch] = engine_change_handler
1585
+ else:
1586
+ raise ValueError('combo_pause must also be specified')
1587
+
1588
+ if 'websocket' in (read_from, read_from_secondary) or write_to == 'websocket':
1589
+ websocket_server_thread = WebsocketServerThread(
1590
+ 'websocket' in (read_from, read_from_secondary))
1591
+ websocket_server_thread.start()
1592
+
1593
+ if write_to == "callback" and text_callback:
1594
+ global txt_callback
1595
+ txt_callback = text_callback
1596
+
1597
+ if any(x in ('screencapture', 'obs') for x in (read_from, read_from_secondary)):
1598
+ global screenshot_event
1599
+ global take_screenshot
1600
+ if screen_capture_combo != '':
1601
+ screen_capture_on_combo = True
1602
+ key_combos[screen_capture_combo] = on_screenshot_combo
1603
+ else:
1604
+ global periodic_screenshot_queue
1605
+ periodic_screenshot_queue = queue.Queue()
1606
+
1607
+ if 'screencapture' in (read_from, read_from_secondary):
1608
+ last_screenshot_time = 0
1609
+ last_result = ([], engine_index)
1610
+
1611
+ screenshot_event = threading.Event()
1612
+ screenshot_thread = ScreenshotThread(screen_capture_area, screen_capture_window,
1613
+ gsm_ocr_config, screen_capture_on_combo)
1614
+ screenshot_thread.start()
1615
+ filtering = TextFiltering()
1616
+ read_from_readable.append('screen capture')
1617
+ if 'obs' in (read_from, read_from_secondary):
1618
+ last_screenshot_time = 0
1619
+ last_result = ([], engine_index)
1620
+ screenshot_event = threading.Event()
1621
+ obs_screenshot_thread = OBSScreenshotThread(
1622
+ gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs)
1623
+ obs_screenshot_thread.start()
1624
+ filtering = TextFiltering()
1625
+ read_from_readable.append('obs')
1626
+ if 'websocket' in (read_from, read_from_secondary):
1627
+ read_from_readable.append('websocket')
1628
+ if 'unixsocket' in (read_from, read_from_secondary):
1629
+ if sys.platform == 'win32':
1630
+ raise ValueError(
1631
+ '"unixsocket" is not currently supported on Windows')
1632
+ socket_path = Path('/tmp/owocr.sock')
1633
+ if socket_path.exists():
1634
+ socket_path.unlink()
1635
+ unix_socket_server = socketserver.ThreadingUnixStreamServer(
1636
+ str(socket_path), RequestHandler)
1637
+ unix_socket_server_thread = threading.Thread(
1638
+ target=unix_socket_server.serve_forever, daemon=True)
1639
+ unix_socket_server_thread.start()
1640
+ read_from_readable.append('unix socket')
1641
+ if 'clipboard' in (read_from, read_from_secondary):
1642
+ clipboard_thread = ClipboardThread()
1643
+ clipboard_thread.start()
1644
+ read_from_readable.append('clipboard')
1645
+ if any(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
1646
+ if all(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
1647
+ raise ValueError(
1648
+ "read_from and read_from_secondary can't both be directory paths")
1649
+ delete_images = config.get_general('delete_images')
1650
+ read_from_path = Path(read_from) if read_from not in non_path_inputs else Path(
1651
+ read_from_secondary)
1652
+ if not read_from_path.is_dir():
1653
+ raise ValueError(
1654
+ 'read_from and read_from_secondary must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
1655
+ directory_watcher_thread = DirectoryWatcher(read_from_path)
1656
+ directory_watcher_thread.start()
1657
+ read_from_readable.append(f'directory {read_from_path}')
1658
+
1659
+ if len(key_combos) > 0:
1660
+ try:
1661
+ from pynput import keyboard
1662
+ key_combo_listener = keyboard.GlobalHotKeys(key_combos)
1663
+ key_combo_listener.start()
1664
+ except ImportError:
1665
+ pass
1666
+
1667
+ if write_to in ('clipboard', 'websocket', 'callback'):
1668
+ write_to_readable = write_to
1669
+ else:
1670
+ if Path(write_to).suffix.lower() != '.txt':
1671
+ raise ValueError(
1672
+ 'write_to must be either "websocket", "clipboard" or a path to a text file')
1673
+ write_to_readable = f'file {write_to}'
1674
+
1675
+ process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (
1676
+ read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
1677
+ process_screenshots = any(x in ('screencapture', 'obs') for x in (
1678
+ read_from, read_from_secondary)) and not screen_capture_on_combo
1679
+ if threading.current_thread() == threading.main_thread():
1680
+ signal.signal(signal.SIGINT, signal_handler)
1681
+ if (not process_screenshots) and auto_pause != 0:
1682
+ auto_pause_handler = AutopauseTimer(auto_pause)
1683
+ user_input_thread = threading.Thread(
1684
+ target=user_input_thread_run, daemon=True)
1685
+ user_input_thread.start()
1686
+ logger.opt(ansi=True).info(
1687
+ f"Reading from {' and '.join(read_from_readable)}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
1688
+ if screen_capture_combo:
1689
+ logger.opt(ansi=True).info(
1690
+ f'Manual OCR Running... Press <{engine_color}>{screen_capture_combo.replace("<", "").replace(">", "")}</{engine_color}> to run OCR')
1691
+
1692
+ def handle_config_changes(changes):
1693
+ nonlocal last_result
1694
+ if any(c in changes for c in ('ocr1', 'ocr2', 'language', 'furigana_filter_sensitivity')):
1695
+ last_result = ([], engine_index)
1696
+ engine_change_handler_name(get_ocr_ocr1())
1697
+
1698
+ def handle_area_config_changes(changes):
1699
+ if screenshot_thread:
1700
+ screenshot_thread.ocr_config = get_scene_ocr_config()
1701
+ if obs_screenshot_thread:
1702
+ obs_screenshot_thread.init_config()
1703
+
1704
+ config_check_thread.add_config_callback(handle_config_changes)
1705
+ config_check_thread.add_area_callback(handle_area_config_changes)
1706
+ previous_text = "Placeholder"
1707
+ sleep_time_to_add = 0
1708
+ last_result_time = time.time()
1709
+ while not terminated:
1710
+ ocr_start_time = datetime.now()
1711
+ start_time = time.time()
1712
+ img = None
1713
+ filter_img = False
1714
+
1715
+ if process_queue:
1716
+ try:
1717
+ img, filter_img = image_queue.get(timeout=0.1)
1718
+ notify = True
1719
+ except queue.Empty:
1720
+ pass
1721
+
1722
+ if get_ocr_scan_rate() < .5:
1723
+ adjusted_scan_rate = min(get_ocr_scan_rate() + sleep_time_to_add, .5)
1724
+ else:
1725
+ adjusted_scan_rate = get_ocr_scan_rate()
1726
+
1727
+ if (not img) and process_screenshots:
1728
+ if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > adjusted_scan_rate:
1729
+ screenshot_event.set()
1730
+ img = periodic_screenshot_queue.get()
1731
+ filter_img = True
1732
+ notify = False
1733
+ last_screenshot_time = time.time()
1734
+ ocr_start_time = datetime.now()
1735
+ if adjusted_scan_rate > get_ocr_scan_rate():
1736
+ ocr_start_time = ocr_start_time - timedelta(seconds=adjusted_scan_rate - get_ocr_scan_rate())
1737
+
1738
+ if img == 0:
1739
+ on_window_closed(False)
1740
+ terminated = True
1741
+ break
1742
+ elif img:
1743
+ if filter_img:
1744
+ ocr_config = get_scene_ocr_config()
1745
+ # Check if the image is completely empty (all white or all black)
1746
+ try:
1747
+ extrema = img.getextrema()
1748
+ # For RGB or RGBA images, extrema is a tuple of (min, max) for each channel
1749
+ if isinstance(extrema[0], tuple):
1750
+ is_empty = all(e[0] == e[1] for e in extrema)
1751
+ else:
1752
+ is_empty = extrema[0] == extrema[1]
1753
+ if is_empty:
1754
+ logger.info("Image is totally empty (all pixels the same), sleeping.")
1755
+ sleep_time_to_add = .5
1756
+ continue
1757
+ except Exception as e:
1758
+ logger.debug(f"Could not determine if image is empty: {e}")
1759
+
1760
+ # Compare images, but only if it's one box, multiple boxes skews results way too much and produces false positives
1761
+ # if ocr_config and len(ocr_config.rectangles) < 2:
1762
+ # if are_images_similar(img, last_image):
1763
+ # logger.info("Captured screenshot is similar to the last one, sleeping.")
1764
+ # if time.time() - last_result_time > 10:
1765
+ # sleep_time_to_add += .005
1766
+ # continue
1767
+ # else:
1768
+ if are_images_identical(img, last_image):
1769
+ logger.info("Captured screenshot is identical to the last one, sleeping.")
1770
+ if time.time() - last_result_time > 10:
1771
+ sleep_time_to_add += .005
1772
+ continue
1773
+
1774
+ res, text = process_and_write_results(img, write_to, last_result, filtering, notify,
1775
+ ocr_start_time=ocr_start_time, furigana_filter_sensitivity=get_ocr_furigana_filter_sensitivity())
1776
+ if not text and not previous_text and time.time() - last_result_time > 10:
1777
+ sleep_time_to_add += .005
1778
+ logger.info(f"No text detected again, sleeping.")
1779
+ else:
1780
+ sleep_time_to_add = 0
1781
+
1782
+ # If image was stabilized, and now there is no text, reset sleep time
1783
+ if not previous_text and not res:
1784
+ sleep_time_to_add = 0
1785
+ previous_text = text
1786
+ if res:
1787
+ last_result = (res, engine_index)
1788
+ last_result_time = time.time()
1789
+ else:
1790
+ process_and_write_results(
1791
+ img, write_to, None, notify=notify, ocr_start_time=ocr_start_time, engine=ocr2)
1792
+ if isinstance(img, Path):
1793
+ if delete_images:
1794
+ Path.unlink(img)
1795
+
1796
+ elapsed_time = time.time() - start_time
1797
+ if (not terminated) and elapsed_time < 0.1:
1798
+ time.sleep(0.1 - elapsed_time)
1799
+
1800
+ if websocket_server_thread:
1801
+ websocket_server_thread.stop_server()
1802
+ websocket_server_thread.join()
1803
+ if clipboard_thread:
1804
+ if sys.platform == 'win32':
1805
+ win32api.PostThreadMessage(
1806
+ clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
1807
+ clipboard_thread.join()
1808
+ if directory_watcher_thread:
1809
+ directory_watcher_thread.join()
1810
+ if unix_socket_server:
1811
+ unix_socket_server.shutdown()
1812
+ unix_socket_server.join()
1813
+ if screenshot_thread:
1814
+ screenshot_thread.join()
1815
+ if key_combo_listener:
1816
+ key_combo_listener.stop()
1817
+ if config_check_thread:
1818
+ config_check_thread.join()