GameSentenceMiner 2.6.5__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1096 @@
1
+ import sys
2
+ import signal
3
+ import time
4
+ import threading
5
+ from pathlib import Path
6
+
7
+ import fire
8
+ import numpy as np
9
+ import pyperclipfix
10
+ import mss
11
+ import asyncio
12
+ import websockets
13
+ import socketserver
14
+ import queue
15
+ import io
16
+ import re
17
+
18
+ from PIL import Image, ImageDraw
19
+ from PIL import UnidentifiedImageError
20
+ from loguru import logger
21
+ from pynput import keyboard
22
+ from desktop_notifier import DesktopNotifierSync
23
+ import psutil
24
+
25
+ import inspect
26
+ from .ocr import *
27
+ from .config import Config
28
+ from .screen_coordinate_picker import get_screen_selection
29
+ from ...configuration import get_temporary_directory
30
+
31
+ try:
32
+ import win32gui
33
+ import win32ui
34
+ import win32api
35
+ import win32con
36
+ import win32process
37
+ import win32clipboard
38
+ import pywintypes
39
+ import ctypes
40
+ except ImportError:
41
+ pass
42
+
43
+ try:
44
+ import objc
45
+ import platform
46
+ from AppKit import NSData, NSImage, NSBitmapImageRep, NSDeviceRGBColorSpace, NSGraphicsContext, NSZeroPoint, NSZeroRect, NSCompositingOperationCopy
47
+ from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \
48
+ CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \
49
+ CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow
50
+ from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest
51
+ except ImportError:
52
+ pass
53
+
54
+
55
+ config = None
56
+
57
+
58
+ class WindowsClipboardThread(threading.Thread):
59
+ def __init__(self):
60
+ super().__init__(daemon=True)
61
+ self.last_update = time.time()
62
+
63
+ def process_message(self, hwnd: int, msg: int, wparam: int, lparam: int):
64
+ WM_CLIPBOARDUPDATE = 0x031D
65
+ timestamp = time.time()
66
+ if msg == WM_CLIPBOARDUPDATE and timestamp - self.last_update > 1 and not paused:
67
+ if win32clipboard.IsClipboardFormatAvailable(win32con.CF_BITMAP):
68
+ clipboard_event.set()
69
+ self.last_update = timestamp
70
+ return 0
71
+
72
+ def create_window(self):
73
+ className = 'ClipboardHook'
74
+ wc = win32gui.WNDCLASS()
75
+ wc.lpfnWndProc = self.process_message
76
+ wc.lpszClassName = className
77
+ wc.hInstance = win32api.GetModuleHandle(None)
78
+ class_atom = win32gui.RegisterClass(wc)
79
+ return win32gui.CreateWindow(class_atom, className, 0, 0, 0, 0, 0, 0, 0, wc.hInstance, None)
80
+
81
+ def run(self):
82
+ hwnd = self.create_window()
83
+ self.thread_id = win32api.GetCurrentThreadId()
84
+ ctypes.windll.user32.AddClipboardFormatListener(hwnd)
85
+ win32gui.PumpMessages()
86
+
87
+
88
+ class WebsocketServerThread(threading.Thread):
89
+ def __init__(self, read):
90
+ super().__init__(daemon=True)
91
+ self._loop = None
92
+ self.read = read
93
+ self.clients = set()
94
+ self._event = threading.Event()
95
+
96
+ @property
97
+ def loop(self):
98
+ self._event.wait()
99
+ return self._loop
100
+
101
+ async def send_text_coroutine(self, text):
102
+ for client in self.clients:
103
+ await client.send(text)
104
+
105
+ async def server_handler(self, websocket):
106
+ self.clients.add(websocket)
107
+ try:
108
+ async for message in websocket:
109
+ if self.read and not paused:
110
+ websocket_queue.put(message)
111
+ try:
112
+ await websocket.send('True')
113
+ except websockets.exceptions.ConnectionClosedOK:
114
+ pass
115
+ else:
116
+ try:
117
+ await websocket.send('False')
118
+ except websockets.exceptions.ConnectionClosedOK:
119
+ pass
120
+ except websockets.exceptions.ConnectionClosedError:
121
+ pass
122
+ finally:
123
+ self.clients.remove(websocket)
124
+
125
+ def send_text(self, text):
126
+ return asyncio.run_coroutine_threadsafe(self.send_text_coroutine(text), self.loop)
127
+
128
+ def stop_server(self):
129
+ self.loop.call_soon_threadsafe(self._stop_event.set)
130
+
131
+ def run(self):
132
+ async def main():
133
+ self._loop = asyncio.get_running_loop()
134
+ self._stop_event = stop_event = asyncio.Event()
135
+ self._event.set()
136
+ self.server = start_server = websockets.serve(self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=1000000000)
137
+ async with start_server:
138
+ await stop_event.wait()
139
+ asyncio.run(main())
140
+
141
+
142
+ class RequestHandler(socketserver.BaseRequestHandler):
143
+ def handle(self):
144
+ conn = self.request
145
+ conn.settimeout(3)
146
+ data = conn.recv(4)
147
+ img_size = int.from_bytes(data)
148
+ img = bytearray()
149
+ try:
150
+ while len(img) < img_size:
151
+ data = conn.recv(4096)
152
+ if not data:
153
+ break
154
+ img.extend(data)
155
+ except TimeoutError:
156
+ pass
157
+
158
+ if not paused:
159
+ unixsocket_queue.put(img)
160
+ conn.sendall(b'True')
161
+ else:
162
+ conn.sendall(b'False')
163
+
164
+
165
+ class MacOSWindowTracker(threading.Thread):
166
+ def __init__(self, window_id):
167
+ super().__init__(daemon=True)
168
+ self.stop = False
169
+ self.window_id = window_id
170
+ self.window_active = False
171
+
172
+ def run(self):
173
+ found = True
174
+ while found and not self.stop:
175
+ found = False
176
+ is_active = False
177
+ with objc.autorelease_pool():
178
+ window_list = CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID)
179
+ for i, window in enumerate(window_list):
180
+ if found and window.get(kCGWindowName, '') == 'Fullscreen Backdrop':
181
+ is_active = True
182
+ break
183
+ if self.window_id == window['kCGWindowNumber']:
184
+ found = True
185
+ if i == 0 or window_list[i-1].get(kCGWindowName, '') in ('Dock', 'Color Enforcer Window'):
186
+ is_active = True
187
+ break
188
+ if not found:
189
+ window_list = CGWindowListCreateDescriptionFromArray([self.window_id])
190
+ if len(window_list) > 0:
191
+ found = True
192
+ if found and self.window_active != is_active:
193
+ on_window_activated(is_active)
194
+ self.window_active = is_active
195
+ time.sleep(0.2)
196
+ if not found:
197
+ on_window_closed(False)
198
+
199
+
200
+ class WindowsWindowTracker(threading.Thread):
201
+ def __init__(self, window_handle, only_active):
202
+ super().__init__(daemon=True)
203
+ self.stop = False
204
+ self.window_handle = window_handle
205
+ self.only_active = only_active
206
+ self.window_active = False
207
+ self.window_minimized = False
208
+
209
+ def run(self):
210
+ found = True
211
+ while not self.stop:
212
+ found = win32gui.IsWindow(self.window_handle)
213
+ if not found:
214
+ break
215
+ if self.only_active:
216
+ is_active = self.window_handle == win32gui.GetForegroundWindow()
217
+ if self.window_active != is_active:
218
+ on_window_activated(is_active)
219
+ self.window_active = is_active
220
+ else:
221
+ is_minimized = win32gui.IsIconic(self.window_handle)
222
+ if self.window_minimized != is_minimized:
223
+ on_window_minimized(is_minimized)
224
+ self.window_minimized = is_minimized
225
+ time.sleep(0.2)
226
+ if not found:
227
+ on_window_closed(False)
228
+
229
+
230
+ def capture_macos_window_screenshot(window_id):
231
+ def shareable_content_completion_handler(shareable_content, error):
232
+ if error:
233
+ screencapturekit_queue.put(None)
234
+ return
235
+
236
+ target_window = None
237
+ for window in shareable_content.windows():
238
+ if window.windowID() == window_id:
239
+ target_window = window
240
+ break
241
+
242
+ if not target_window:
243
+ screencapturekit_queue.put(None)
244
+ return
245
+
246
+ with objc.autorelease_pool():
247
+ content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window)
248
+
249
+ frame = content_filter.contentRect()
250
+ scale = content_filter.pointPixelScale()
251
+ width = frame.size.width * scale
252
+ height = frame.size.height * scale
253
+ configuration = SCStreamConfiguration.alloc().init()
254
+ configuration.setSourceRect_(CGRectMake(0, 0, frame.size.width, frame.size.height))
255
+ configuration.setWidth_(width)
256
+ configuration.setHeight_(height)
257
+ configuration.setShowsCursor_(False)
258
+ configuration.setCaptureResolution_(SCCaptureResolutionBest)
259
+ configuration.setIgnoreGlobalClipSingleWindow_(True)
260
+
261
+ SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_(
262
+ content_filter, configuration, capture_image_completion_handler
263
+ )
264
+
265
+ def capture_image_completion_handler(image, error):
266
+ if error:
267
+ screencapturekit_queue.put(None)
268
+ return
269
+
270
+ screencapturekit_queue.put(image)
271
+
272
+ SCShareableContent.getShareableContentWithCompletionHandler_(
273
+ shareable_content_completion_handler
274
+ )
275
+
276
+
277
+ def get_windows_window_handle(window_title):
278
+ def callback(hwnd, window_title_part):
279
+ window_title = win32gui.GetWindowText(hwnd)
280
+ if window_title_part in window_title:
281
+ handles.append((hwnd, window_title))
282
+ return True
283
+
284
+ handle = win32gui.FindWindow(None, window_title)
285
+ if handle:
286
+ return (handle, window_title)
287
+
288
+ handles = []
289
+ win32gui.EnumWindows(callback, window_title)
290
+ for handle in handles:
291
+ _, pid = win32process.GetWindowThreadProcessId(handle[0])
292
+ if psutil.Process(pid).name().lower() not in ('cmd.exe', 'powershell.exe', 'windowsterminal.exe'):
293
+ return handle
294
+
295
+ return (None, None)
296
+
297
+
298
+ class TextFiltering:
299
+ accurate_filtering = False
300
+
301
+ def __init__(self):
302
+ from pysbd import Segmenter
303
+ self.segmenter = Segmenter(language='ja', clean=True)
304
+ self.kana_kanji_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
305
+ try:
306
+ from transformers import pipeline, AutoTokenizer
307
+ import torch
308
+
309
+ model_ckpt = 'papluca/xlm-roberta-base-language-detection'
310
+ tokenizer = AutoTokenizer.from_pretrained(
311
+ model_ckpt,
312
+ use_fast = False
313
+ )
314
+
315
+ if torch.cuda.is_available():
316
+ device = 0
317
+ elif torch.backends.mps.is_available():
318
+ device = 'mps'
319
+ else:
320
+ device = -1
321
+ self.pipe = pipeline('text-classification', model=model_ckpt, tokenizer=tokenizer, device=device)
322
+ self.accurate_filtering = True
323
+ except:
324
+ import langid
325
+ self.classify = langid.classify
326
+
327
+ def __call__(self, text, last_result):
328
+ orig_text = self.segmenter.segment(text)
329
+
330
+ orig_text_filtered = []
331
+ for block in orig_text:
332
+ block_filtered = self.kana_kanji_regex.findall(block)
333
+ if block_filtered:
334
+ orig_text_filtered.append(''.join(block_filtered))
335
+ else:
336
+ orig_text_filtered.append(None)
337
+
338
+ if last_result[1] == engine_index:
339
+ last_text = last_result[0]
340
+ else:
341
+ last_text = []
342
+
343
+ new_blocks = []
344
+ for idx, block in enumerate(orig_text):
345
+ if orig_text_filtered[idx] and (orig_text_filtered[idx] not in last_text):
346
+ new_blocks.append(block)
347
+
348
+ final_blocks = []
349
+ if self.accurate_filtering:
350
+ detection_results = self.pipe(new_blocks, top_k=3, truncation=True)
351
+ for idx, block in enumerate(new_blocks):
352
+ for result in detection_results[idx]:
353
+ if result['label'] == 'ja':
354
+ final_blocks.append(block)
355
+ break
356
+ else:
357
+ for block in new_blocks:
358
+ if self.classify(block)[0] == 'ja':
359
+ final_blocks.append(block)
360
+
361
+ text = '\n'.join(final_blocks)
362
+ return text, orig_text_filtered
363
+
364
+
365
+ class AutopauseTimer:
366
+ def __init__(self, timeout):
367
+ self.stop_event = threading.Event()
368
+ self.timeout = timeout
369
+ self.timer_thread = None
370
+
371
+ def start(self):
372
+ self.stop()
373
+ self.stop_event.clear()
374
+ self.timer_thread = threading.Thread(target=self._countdown)
375
+ self.timer_thread.start()
376
+
377
+ def stop(self):
378
+ if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive():
379
+ self.stop_event.set()
380
+ self.timer_thread.join()
381
+
382
+ def _countdown(self):
383
+ seconds = self.timeout
384
+ while seconds > 0 and not self.stop_event.is_set():
385
+ time.sleep(1)
386
+ seconds -= 1
387
+ if not self.stop_event.is_set():
388
+ self.stop_event.set()
389
+ if not paused:
390
+ pause_handler(True)
391
+
392
+
393
+ def pause_handler(is_combo=True):
394
+ global paused
395
+ global just_unpaused
396
+ if paused:
397
+ message = 'Unpaused!'
398
+ just_unpaused = True
399
+ else:
400
+ message = 'Paused!'
401
+
402
+ if auto_pause_handler:
403
+ auto_pause_handler.stop()
404
+
405
+ if is_combo:
406
+ notifier.send(title='owocr', message=message)
407
+ logger.info(message)
408
+ paused = not paused
409
+
410
+
411
+ def engine_change_handler(user_input='s', is_combo=True):
412
+ global engine_index
413
+ old_engine_index = engine_index
414
+
415
+ if user_input.lower() == 's':
416
+ if engine_index == len(engine_keys) - 1:
417
+ engine_index = 0
418
+ else:
419
+ engine_index += 1
420
+ elif user_input.lower() != '' and user_input.lower() in engine_keys:
421
+ engine_index = engine_keys.index(user_input.lower())
422
+
423
+ if engine_index != old_engine_index:
424
+ new_engine_name = engine_instances[engine_index].readable_name
425
+ if is_combo:
426
+ notifier.send(title='owocr', message=f'Switched to {new_engine_name}')
427
+ engine_color = config.get_general('engine_color')
428
+ logger.opt(ansi=True).info(f'Switched to <{engine_color}>{new_engine_name}</{engine_color}>!')
429
+
430
+
431
+ def user_input_thread_run():
432
+ def _terminate_handler():
433
+ global terminated
434
+ logger.info('Terminated!')
435
+ terminated = True
436
+
437
+ if sys.platform == 'win32':
438
+ import msvcrt
439
+ while not terminated:
440
+ user_input_bytes = msvcrt.getch()
441
+ try:
442
+ user_input = user_input_bytes.decode()
443
+ if user_input.lower() in 'tq':
444
+ _terminate_handler()
445
+ elif user_input.lower() == 'p':
446
+ pause_handler(False)
447
+ else:
448
+ engine_change_handler(user_input, False)
449
+ except UnicodeDecodeError:
450
+ pass
451
+ else:
452
+ import tty, termios
453
+ fd = sys.stdin.fileno()
454
+ old_settings = termios.tcgetattr(fd)
455
+ try:
456
+ tty.setcbreak(sys.stdin.fileno())
457
+ while not terminated:
458
+ user_input = sys.stdin.read(1)
459
+ if user_input.lower() in 'tq':
460
+ _terminate_handler()
461
+ elif user_input.lower() == 'p':
462
+ pause_handler(False)
463
+ else:
464
+ engine_change_handler(user_input, False)
465
+ finally:
466
+ termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
467
+
468
+
469
+ def on_screenshot_combo():
470
+ if not paused:
471
+ screenshot_event.set()
472
+
473
+ def on_screenshot_bus():
474
+ if not paused:
475
+ screenshot_event.set()
476
+
477
+
478
+ def signal_handler(sig, frame):
479
+ global terminated
480
+ logger.info('Terminated!')
481
+ terminated = True
482
+
483
+
484
+ def on_window_closed(alive):
485
+ global terminated
486
+ if not (alive or terminated):
487
+ logger.info('Window closed or error occurred, terminated!')
488
+ terminated = True
489
+
490
+
491
+ def on_window_activated(active):
492
+ global screencapture_window_active
493
+ screencapture_window_active = active
494
+
495
+
496
+ def on_window_minimized(minimized):
497
+ global screencapture_window_visible
498
+ screencapture_window_visible = not minimized
499
+
500
+
501
+ def normalize_macos_clipboard(img):
502
+ ns_data = NSData.dataWithBytes_length_(img, len(img))
503
+ ns_image = NSImage.alloc().initWithData_(ns_data)
504
+
505
+ new_image = NSBitmapImageRep.alloc().initWithBitmapDataPlanes_pixelsWide_pixelsHigh_bitsPerSample_samplesPerPixel_hasAlpha_isPlanar_colorSpaceName_bytesPerRow_bitsPerPixel_(
506
+ None, # Set to None to create a new bitmap
507
+ int(ns_image.size().width),
508
+ int(ns_image.size().height),
509
+ 8, # Bits per sample
510
+ 4, # Samples per pixel (R, G, B, A)
511
+ True, # Has alpha
512
+ False, # Is not planar
513
+ NSDeviceRGBColorSpace,
514
+ 0, # Automatically compute bytes per row
515
+ 32 # Bits per pixel (8 bits per sample * 4 samples per pixel)
516
+ )
517
+
518
+ context = NSGraphicsContext.graphicsContextWithBitmapImageRep_(new_image)
519
+ NSGraphicsContext.setCurrentContext_(context)
520
+
521
+ ns_image.drawAtPoint_fromRect_operation_fraction_(
522
+ NSZeroPoint,
523
+ NSZeroRect,
524
+ NSCompositingOperationCopy,
525
+ 1.0
526
+ )
527
+
528
+ return new_image.TIFFRepresentation()
529
+
530
+
531
+ def are_images_identical(img1, img2):
532
+ if None in (img1, img2):
533
+ return img1 == img2
534
+
535
+ img1 = np.array(img1)
536
+ img2 = np.array(img2)
537
+
538
+ return (img1.shape == img2.shape) and (img1 == img2).all()
539
+
540
+
541
+ def process_and_write_results(img_or_path, write_to, notifications, last_result, filtering, engine=None, rectangle=None):
542
+ global engine_index
543
+ if auto_pause_handler:
544
+ auto_pause_handler.stop()
545
+ if engine:
546
+ for i, instance in enumerate(engine_instances):
547
+ if instance.name.lower() in engine.lower():
548
+ engine_instance = instance
549
+ last_result = (last_result[0], i)
550
+ break
551
+ else:
552
+ engine_instance = engine_instances[engine_index]
553
+
554
+ t0 = time.time()
555
+ res, text = engine_instance(img_or_path)
556
+ t1 = time.time()
557
+
558
+ orig_text = []
559
+ engine_color = config.get_general('engine_color')
560
+ if res:
561
+ if filtering:
562
+ text, orig_text = filtering(text, last_result)
563
+ text = post_process(text)
564
+ logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
565
+ if notifications:
566
+ notifier.send(title='owocr', message='Text recognized: ' + text)
567
+
568
+ if write_to == 'websocket':
569
+ websocket_server_thread.send_text(text)
570
+ elif write_to == 'clipboard':
571
+ pyperclipfix.copy(text)
572
+ elif write_to == "callback":
573
+ txt_callback(text, rectangle)
574
+ elif write_to:
575
+ with Path(write_to).open('a', encoding='utf-8') as f:
576
+ f.write(text + '\n')
577
+
578
+ if auto_pause_handler and not paused:
579
+ auto_pause_handler.start()
580
+ else:
581
+ logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
582
+
583
+ return orig_text, text
584
+
585
+
586
+ def get_path_key(path):
587
+ return path, path.lstat().st_mtime
588
+
589
+
590
+ def init_config(parse_args=True):
591
+ global config
592
+ config = Config(parse_args)
593
+
594
+
595
+ def run(read_from=None,
596
+ write_to=None,
597
+ engine=None,
598
+ pause_at_startup=None,
599
+ ignore_flag=None,
600
+ delete_images=None,
601
+ notifications=None,
602
+ auto_pause=None,
603
+ combo_pause=None,
604
+ combo_engine_switch=None,
605
+ screen_capture_area=None,
606
+ screen_capture_exclusions=None,
607
+ screen_capture_window=None,
608
+ screen_capture_delay_secs=None,
609
+ screen_capture_only_active_windows=None,
610
+ screen_capture_combo=None,
611
+ stop_running_flag=None,
612
+ screen_capture_event_bus=None,
613
+ rectangle=None,
614
+ text_callback=None,
615
+ ):
616
+ """
617
+ Japanese OCR client
618
+
619
+ Runs OCR in the background.
620
+ It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
621
+ Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
622
+
623
+ :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
624
+ :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
625
+ :param delay_secs: How often to check for new images, in seconds.
626
+ :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
627
+ :param pause_at_startup: Pause at startup.
628
+ :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
629
+ :param delete_images: Delete image files after processing when reading from a directory.
630
+ :param notifications: Show an operating system notification with the detected text.
631
+ :param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
632
+ :param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
633
+ :param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
634
+ :param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).
635
+ :param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
636
+ :param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.
637
+ :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
638
+ """
639
+
640
+ logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
641
+
642
+ if config.has_config:
643
+ logger.info('Parsed config file')
644
+ else:
645
+ logger.warning('No config file, defaults will be used.')
646
+ if config.downloaded_config:
647
+ logger.info(f'A default config file has been downloaded to {config.config_path}')
648
+
649
+ global engine_instances
650
+ global engine_keys
651
+ engine_instances = []
652
+ config_engines = []
653
+ engine_keys = []
654
+ default_engine = ''
655
+
656
+ if len(config.get_general('engines')) > 0:
657
+ for config_engine in config.get_general('engines').split(','):
658
+ config_engines.append(config_engine.strip().lower())
659
+
660
+ for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and x.__module__ and __package__ + '.ocr' in x.__module__ and inspect.isclass(x))):
661
+ if len(config_engines) == 0 or engine_class.name in config_engines:
662
+ if config.get_engine(engine_class.name) == None:
663
+ engine_instance = engine_class()
664
+ else:
665
+ engine_instance = engine_class(config.get_engine(engine_class.name))
666
+
667
+ if engine_instance.available:
668
+ engine_instances.append(engine_instance)
669
+ engine_keys.append(engine_class.key)
670
+ if engine == engine_class.name:
671
+ default_engine = engine_class.key
672
+
673
+ if len(engine_keys) == 0:
674
+ msg = 'No engines available!'
675
+ raise NotImplementedError(msg)
676
+
677
+ global engine_index
678
+ global terminated
679
+ global paused
680
+ global just_unpaused
681
+ global first_pressed
682
+ global notifier
683
+ global auto_pause_handler
684
+ terminated = False
685
+ paused = pause_at_startup
686
+ just_unpaused = True
687
+ first_pressed = None
688
+ auto_pause_handler = None
689
+ engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
690
+ engine_color = config.get_general('engine_color')
691
+ prefix_to_use = ""
692
+ delay_secs = config.get_general('delay_secs')
693
+ screen_capture_on_combo = False
694
+ notifier = DesktopNotifierSync()
695
+ key_combos = {}
696
+
697
+ if read_from != 'screencapture' and auto_pause != 0:
698
+ auto_pause_handler = AutopauseTimer(auto_pause)
699
+
700
+ if combo_pause:
701
+ key_combos[combo_pause] = pause_handler
702
+ if combo_engine_switch:
703
+ if combo_pause:
704
+ key_combos[combo_engine_switch] = engine_change_handler
705
+ else:
706
+ raise ValueError('combo_pause must also be specified')
707
+
708
+ if read_from == 'websocket' or write_to == 'websocket':
709
+ global websocket_server_thread
710
+ websocket_server_thread = WebsocketServerThread(read_from == 'websocket')
711
+ websocket_server_thread.start()
712
+
713
+ if write_to == "callback" and text_callback:
714
+ global txt_callback
715
+ txt_callback = text_callback
716
+
717
+ if read_from == 'websocket':
718
+ global websocket_queue
719
+ websocket_queue = queue.Queue()
720
+ read_from_readable = 'websocket'
721
+ elif read_from == 'unixsocket':
722
+ if sys.platform == 'win32':
723
+ raise ValueError('"unixsocket" is not currently supported on Windows')
724
+
725
+ global unixsocket_queue
726
+ unixsocket_queue = queue.Queue()
727
+ socket_path = Path('/tmp/owocr.sock')
728
+ if socket_path.exists():
729
+ socket_path.unlink()
730
+ unix_socket_server = socketserver.ThreadingUnixStreamServer(str(socket_path), RequestHandler)
731
+ unix_socket_server_thread = threading.Thread(target=unix_socket_server.serve_forever, daemon=True)
732
+ unix_socket_server_thread.start()
733
+ read_from_readable = 'unix socket'
734
+ elif read_from == 'clipboard':
735
+ macos_clipboard_polling = False
736
+ windows_clipboard_polling = False
737
+ img = None
738
+
739
+ if sys.platform == 'darwin':
740
+ from AppKit import NSPasteboard, NSPasteboardTypeTIFF, NSPasteboardTypeString
741
+ pasteboard = NSPasteboard.generalPasteboard()
742
+ count = pasteboard.changeCount()
743
+ macos_clipboard_polling = True
744
+ elif sys.platform == 'win32':
745
+ global clipboard_event
746
+ clipboard_event = threading.Event()
747
+ windows_clipboard_thread = WindowsClipboardThread()
748
+ windows_clipboard_thread.start()
749
+ windows_clipboard_polling = True
750
+ else:
751
+ from PIL import ImageGrab
752
+
753
+ read_from_readable = 'clipboard'
754
+ elif read_from == 'screencapture':
755
+ if screen_capture_combo:
756
+ screen_capture_on_combo = True
757
+ global screenshot_event
758
+ screenshot_event = threading.Event()
759
+ key_combos[screen_capture_combo] = on_screenshot_combo
760
+ if screen_capture_event_bus:
761
+ screen_capture_on_combo = True
762
+ screenshot_event = threading.Event()
763
+ screen_capture_event_bus = on_screenshot_bus
764
+ if type(screen_capture_area) == tuple:
765
+ screen_capture_area = ','.join(map(str, screen_capture_area))
766
+ global screencapture_window_active
767
+ global screencapture_window_visible
768
+ screencapture_mode = None
769
+ screencapture_window_active = True
770
+ screencapture_window_visible = True
771
+ last_result = ([], engine_index)
772
+ if screen_capture_area == '':
773
+ screencapture_mode = 0
774
+ elif screen_capture_area.startswith('screen_'):
775
+ parts = screen_capture_area.split('_')
776
+ if len(parts) != 2 or not parts[1].isdigit():
777
+ raise ValueError('Invalid screen_capture_area')
778
+ screen_capture_monitor = int(parts[1])
779
+ screencapture_mode = 1
780
+ elif len(screen_capture_area.split(',')) == 4:
781
+ screencapture_mode = 3
782
+ else:
783
+ screencapture_mode = 2
784
+ screen_capture_window = screen_capture_area
785
+
786
+ if screencapture_mode != 2:
787
+ sct = mss.mss()
788
+
789
+ if screencapture_mode == 1:
790
+ mon = sct.monitors
791
+ if len(mon) <= screen_capture_monitor:
792
+ raise ValueError('Invalid monitor number in screen_capture_area')
793
+ coord_left = mon[screen_capture_monitor]['left']
794
+ coord_top = mon[screen_capture_monitor]['top']
795
+ coord_width = mon[screen_capture_monitor]['width']
796
+ coord_height = mon[screen_capture_monitor]['height']
797
+ elif screencapture_mode == 3:
798
+ coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
799
+ else:
800
+ logger.opt(ansi=True).info('Launching screen coordinate picker')
801
+ screen_selection = get_screen_selection()
802
+ if not screen_selection:
803
+ raise ValueError('Picker window was closed or an error occurred')
804
+ screen_capture_monitor = screen_selection['monitor']
805
+ x, y, coord_width, coord_height = screen_selection['coordinates']
806
+ if coord_width > 0 and coord_height > 0:
807
+ coord_top = screen_capture_monitor['top'] + y
808
+ coord_left = screen_capture_monitor['left'] + x
809
+ else:
810
+ logger.opt(ansi=True).info('Selection is empty, selecting whole screen')
811
+ coord_left = screen_capture_monitor['left']
812
+ coord_top = screen_capture_monitor['top']
813
+ coord_width = screen_capture_monitor['width']
814
+ coord_height = screen_capture_monitor['height']
815
+
816
+ sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
817
+ logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
818
+ if screencapture_mode == 2 or screen_capture_window:
819
+ area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
820
+ if sys.platform == 'darwin':
821
+ if int(platform.mac_ver()[0].split('.')[0]) < 14:
822
+ old_macos_screenshot_api = True
823
+ else:
824
+ global screencapturekit_queue
825
+ screencapturekit_queue = queue.Queue()
826
+ CGMainDisplayID()
827
+ old_macos_screenshot_api = False
828
+
829
+ window_list = CGWindowListCopyWindowInfo(kCGWindowListExcludeDesktopElements, kCGNullWindowID)
830
+ window_titles = []
831
+ window_ids = []
832
+ window_index = None
833
+ for i, window in enumerate(window_list):
834
+ window_title = window.get(kCGWindowName, '')
835
+ if psutil.Process(window['kCGWindowOwnerPID']).name() not in ('Terminal', 'iTerm2'):
836
+ window_titles.append(window_title)
837
+ window_ids.append(window['kCGWindowNumber'])
838
+
839
+ if screen_capture_area in window_titles:
840
+ window_index = window_titles.index(screen_capture_window)
841
+ else:
842
+ for t in window_titles:
843
+ if screen_capture_area in t:
844
+ window_index = window_titles.index(t)
845
+ break
846
+
847
+ if not window_index:
848
+ raise ValueError(area_invalid_error)
849
+
850
+ window_id = window_ids[window_index]
851
+ window_title = window_titles[window_index]
852
+
853
+ if screen_capture_only_active_windows:
854
+ screencapture_window_active = False
855
+ macos_window_tracker = MacOSWindowTracker(window_id)
856
+ macos_window_tracker.start()
857
+ logger.opt(ansi=True).info(f'Selected window: {window_title}')
858
+ elif sys.platform == 'win32':
859
+ window_handle, window_title = get_windows_window_handle(screen_capture_window)
860
+
861
+ if not window_handle:
862
+ raise ValueError(area_invalid_error)
863
+
864
+ ctypes.windll.shcore.SetProcessDpiAwareness(1)
865
+
866
+ if screen_capture_only_active_windows:
867
+ screencapture_window_active = False
868
+ windows_window_tracker = WindowsWindowTracker(window_handle, screen_capture_only_active_windows)
869
+ windows_window_tracker.start()
870
+ logger.opt(ansi=True).info(f'Selected window: {window_title}')
871
+ else:
872
+ raise ValueError('Window capture is only currently supported on Windows and macOS')
873
+
874
+ filtering = TextFiltering()
875
+ read_from_readable = 'screen capture'
876
+ else:
877
+ read_from = Path(read_from)
878
+ if not read_from.is_dir():
879
+ raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
880
+
881
+ allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
882
+ old_paths = set()
883
+ for path in read_from.iterdir():
884
+ if path.suffix.lower() in allowed_extensions:
885
+ old_paths.add(get_path_key(path))
886
+
887
+ read_from_readable = f'directory {read_from}'
888
+
889
+ if len(key_combos) > 0:
890
+ key_combo_listener = keyboard.GlobalHotKeys(key_combos)
891
+ key_combo_listener.start()
892
+
893
+ if write_to in ('clipboard', 'websocket', 'callback'):
894
+ write_to_readable = write_to
895
+ else:
896
+ if Path(write_to).suffix.lower() != '.txt':
897
+ raise ValueError('write_to must be either "websocket", "clipboard" or a path to a text file')
898
+ write_to_readable = f'file {write_to}'
899
+
900
+ # signal.signal(signal.SIGINT, signal_handler)
901
+ user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
902
+ user_input_thread.start()
903
+ logger.opt(ansi=True).info(f"Reading from {read_from_readable}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
904
+
905
+ while not terminated and not stop_running_flag:
906
+ if read_from == 'websocket':
907
+ while True:
908
+ try:
909
+ item = websocket_queue.get(timeout=delay_secs)
910
+ except queue.Empty:
911
+ break
912
+ else:
913
+ if not paused:
914
+ img = Image.open(io.BytesIO(item))
915
+ process_and_write_results(img, write_to, notifications, None, None)
916
+ elif read_from == 'unixsocket':
917
+ while True:
918
+ try:
919
+ item = unixsocket_queue.get(timeout=delay_secs)
920
+ except queue.Empty:
921
+ break
922
+ else:
923
+ if not paused:
924
+ img = Image.open(io.BytesIO(item))
925
+ process_and_write_results(img, write_to, notifications, None, None)
926
+ elif read_from == 'clipboard':
927
+ process_clipboard = False
928
+ if windows_clipboard_polling:
929
+ if clipboard_event.wait(delay_secs):
930
+ clipboard_event.clear()
931
+ while True:
932
+ try:
933
+ win32clipboard.OpenClipboard()
934
+ break
935
+ except pywintypes.error:
936
+ pass
937
+ time.sleep(0.1)
938
+ if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
939
+ clipboard_text = ''
940
+ if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
941
+ clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
942
+ if ignore_flag or clipboard_text != '*ocr_ignore*':
943
+ img = Image.open(io.BytesIO(win32clipboard.GetClipboardData(win32clipboard.CF_DIB)))
944
+ process_clipboard = True
945
+ win32clipboard.CloseClipboard()
946
+ elif macos_clipboard_polling:
947
+ if not paused:
948
+ with objc.autorelease_pool():
949
+ old_count = count
950
+ count = pasteboard.changeCount()
951
+ if not just_unpaused and count != old_count and NSPasteboardTypeTIFF in pasteboard.types():
952
+ clipboard_text = ''
953
+ if NSPasteboardTypeString in pasteboard.types():
954
+ clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
955
+ if ignore_flag or clipboard_text != '*ocr_ignore*':
956
+ img = normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
957
+ img = Image.open(io.BytesIO(img))
958
+ process_clipboard = True
959
+ else:
960
+ if not paused:
961
+ old_img = img
962
+ try:
963
+ img = ImageGrab.grabclipboard()
964
+ except Exception:
965
+ pass
966
+ else:
967
+ if ((not just_unpaused) and isinstance(img, Image.Image) and \
968
+ (ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
969
+ (not are_images_identical(img, old_img))):
970
+ process_clipboard = True
971
+
972
+ if process_clipboard:
973
+ process_and_write_results(img, write_to, notifications, None, None)
974
+
975
+ just_unpaused = False
976
+
977
+ if not windows_clipboard_polling:
978
+ time.sleep(delay_secs)
979
+ elif read_from == 'screencapture':
980
+ if screen_capture_on_combo:
981
+ take_screenshot = screenshot_event.wait(delay_secs)
982
+ if take_screenshot:
983
+ screenshot_event.clear()
984
+ else:
985
+ take_screenshot = screencapture_window_active and not paused
986
+
987
+ if take_screenshot and screencapture_window_visible:
988
+ if screencapture_mode == 2:
989
+ if sys.platform == 'darwin':
990
+ with objc.autorelease_pool():
991
+ if old_macos_screenshot_api:
992
+ cg_image = CGWindowListCreateImageFromArray(CGRectNull, [window_id], kCGWindowImageBoundsIgnoreFraming)
993
+ else:
994
+ capture_macos_window_screenshot(window_id)
995
+ try:
996
+ cg_image = screencapturekit_queue.get(timeout=0.5)
997
+ except queue.Empty:
998
+ cg_image = None
999
+ if not cg_image:
1000
+ on_window_closed(False)
1001
+ break
1002
+ width = CGImageGetWidth(cg_image)
1003
+ height = CGImageGetHeight(cg_image)
1004
+ raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
1005
+ bpr = CGImageGetBytesPerRow(cg_image)
1006
+ img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
1007
+ else:
1008
+ try:
1009
+ coord_left, coord_top, right, bottom = win32gui.GetWindowRect(window_handle)
1010
+ coord_width = right - coord_left
1011
+ coord_height = bottom - coord_top
1012
+
1013
+ hwnd_dc = win32gui.GetWindowDC(window_handle)
1014
+ mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
1015
+ save_dc = mfc_dc.CreateCompatibleDC()
1016
+
1017
+ save_bitmap = win32ui.CreateBitmap()
1018
+ save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
1019
+ save_dc.SelectObject(save_bitmap)
1020
+
1021
+ result = ctypes.windll.user32.PrintWindow(window_handle, save_dc.GetSafeHdc(), 2)
1022
+
1023
+ bmpinfo = save_bitmap.GetInfo()
1024
+ bmpstr = save_bitmap.GetBitmapBits(True)
1025
+ except pywintypes.error:
1026
+ on_window_closed(False)
1027
+ break
1028
+ img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
1029
+
1030
+ win32gui.DeleteObject(save_bitmap.GetHandle())
1031
+ save_dc.DeleteDC()
1032
+ mfc_dc.DeleteDC()
1033
+ win32gui.ReleaseDC(window_handle, hwnd_dc)
1034
+ else:
1035
+ sct_img = sct.grab(sct_params)
1036
+ img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
1037
+ # img.save(os.path.join(get_temporary_directory(), 'screencapture_before.png'), 'png')
1038
+ if screen_capture_exclusions:
1039
+ img = img.convert("RGBA")
1040
+ draw = ImageDraw.Draw(img)
1041
+ for exclusion in screen_capture_exclusions:
1042
+ left, top, right, bottom = exclusion
1043
+ draw.rectangle((left, top, right, bottom), fill=(0, 0, 0, 0))
1044
+ # draw.rectangle((left, top, right, bottom), fill=(0, 0, 0))
1045
+ # img.save(os.path.join(get_temporary_directory(), 'screencapture.png'), 'png')
1046
+ res, _ = process_and_write_results(img, write_to, notifications, last_result, filtering, rectangle=rectangle)
1047
+ if res:
1048
+ last_result = (res, engine_index)
1049
+ delay = screen_capture_delay_secs
1050
+ else:
1051
+ delay = delay_secs
1052
+
1053
+ if not screen_capture_on_combo and delay:
1054
+ time.sleep(delay)
1055
+ else:
1056
+ for path in read_from.iterdir():
1057
+ if path.suffix.lower() in allowed_extensions:
1058
+ path_key = get_path_key(path)
1059
+ if path_key not in old_paths:
1060
+ old_paths.add(path_key)
1061
+
1062
+ if not paused:
1063
+ try:
1064
+ img = Image.open(path)
1065
+ img.load()
1066
+ except (UnidentifiedImageError, OSError) as e:
1067
+ logger.warning(f'Error while reading file {path}: {e}')
1068
+ else:
1069
+ process_and_write_results(img, write_to, notifications, None, None)
1070
+ img.close()
1071
+ if delete_images:
1072
+ Path.unlink(path)
1073
+
1074
+ time.sleep(delay_secs)
1075
+
1076
+ if read_from == 'websocket' or write_to == 'websocket':
1077
+ websocket_server_thread.stop_server()
1078
+ websocket_server_thread.join()
1079
+ if read_from == 'clipboard' and windows_clipboard_polling:
1080
+ win32api.PostThreadMessage(windows_clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
1081
+ windows_clipboard_thread.join()
1082
+ elif read_from == 'screencapture' and screencapture_mode == 2:
1083
+ if sys.platform == 'darwin':
1084
+ if screen_capture_only_active_windows:
1085
+ macos_window_tracker.stop = True
1086
+ macos_window_tracker.join()
1087
+ else:
1088
+ windows_window_tracker.stop = True
1089
+ windows_window_tracker.join()
1090
+ elif read_from == 'unixsocket':
1091
+ unix_socket_server.shutdown()
1092
+ unix_socket_server_thread.join()
1093
+ if len(key_combos) > 0:
1094
+ key_combo_listener.stop()
1095
+ if auto_pause_handler:
1096
+ auto_pause_handler.stop()