GameSentenceMiner 2.10.11__py3-none-any.whl → 2.10.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,6 +80,7 @@ class ConfigApp:
80
80
  def __init__(self, root):
81
81
  self.window = root
82
82
  self.on_exit = None
83
+ self.window.tk.call('tk', 'scaling', 1.5) # Set DPI scaling factor
83
84
  # self.window = ttk.Window(themename='darkly')
84
85
  self.window.title('GameSentenceMiner Configuration')
85
86
  self.window.protocol("WM_DELETE_WINDOW", self.hide)
@@ -44,13 +44,13 @@ class WindowGeometry:
44
44
  class OCRConfig:
45
45
  scene: str
46
46
  rectangles: List[Rectangle]
47
- pre_scale_rectangles: List[Rectangle] = None
47
+ pre_scale_rectangles: Optional[List[Rectangle]] = None
48
48
  coordinate_system: str = None
49
49
  window_geometry: Optional[WindowGeometry] = None
50
50
  window: Optional[str] = None
51
51
  language: str = "ja"
52
52
 
53
- def __post_init__(self):
53
+ def scale_coords(self):
54
54
  self.pre_scale_rectangles = deepcopy(self.rectangles)
55
55
  if self.coordinate_system and self.coordinate_system == "percentage" and self.window:
56
56
  import pygetwindow as gw
@@ -178,17 +178,28 @@ class WebsocketServerThread(threading.Thread):
178
178
  asyncio.run(main())
179
179
 
180
180
 
181
+ def compare_ocr_results(prev_text, new_text, threshold=90):
182
+ if not prev_text or not new_text:
183
+ return False
184
+ if isinstance(prev_text, list):
185
+ prev_text = ''.join([item for item in prev_text if item is not None]) if prev_text else ""
186
+ if isinstance(new_text, list):
187
+ new_text = ''.join([item for item in new_text if item is not None]) if new_text else ""
188
+ similarity = fuzz.ratio(prev_text, new_text)
189
+ return similarity >= threshold
190
+
181
191
  all_cords = None
182
192
  rectangles = None
183
- last_ocr2_result = ""
193
+ last_ocr2_result = []
184
194
 
185
195
  def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False):
186
196
  global twopassocr, ocr2, last_ocr2_result
187
197
  try:
188
198
  orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
189
199
  engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity if not ignore_furigana_filter else 0)
190
- if fuzz.ratio(last_ocr2_result, orig_text) >= 90:
191
- logger.info("Seems like the same text from previous ocr2 result, not sending")
200
+
201
+ if compare_ocr_results(last_ocr2_result, orig_text):
202
+ logger.info("Detected similar text from previous OCR2 result, not sending")
192
203
  return
193
204
  save_result_image(img)
194
205
  last_ocr2_result = orig_text
@@ -242,7 +253,7 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
242
253
  line_start_time = time if time else datetime.now()
243
254
 
244
255
  if manual or not twopassocr:
245
- if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
256
+ if compare_ocr_results(previous_orig_text, orig_text_string):
246
257
  logger.info("Seems like Text we already sent, not doing anything.")
247
258
  return
248
259
  save_result_image(img)
@@ -260,13 +271,13 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
260
271
  if previous_text and text_stable_start_time:
261
272
  stable_time = text_stable_start_time
262
273
  previous_img_local = previous_img
263
- if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
274
+ if compare_ocr_results(previous_orig_text, orig_text_string):
264
275
  logger.info("Seems like Text we already sent, not doing anything.")
265
276
  previous_text = None
266
277
  return
267
278
  previous_orig_text = orig_text_string
268
279
  previous_ocr1_result = previous_text
269
- if crop_coords:
280
+ if crop_coords and optimize_second_scan:
270
281
  previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
271
282
  previous_img_local = previous_img_local.crop(crop_coords)
272
283
  second_ocr_queue.put((previous_text, stable_time, previous_img_local, filtering))
@@ -389,70 +400,88 @@ def set_force_stable_hotkey():
389
400
  print("Press Ctrl+Shift+F to toggle force stable mode.")
390
401
 
391
402
  if __name__ == "__main__":
392
- global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window
393
- import sys
394
-
395
- import argparse
396
-
397
- parser = argparse.ArgumentParser(description="OCR Configuration")
398
- parser.add_argument("--language", type=str, default="ja", help="Language for OCR (default: ja)")
399
- parser.add_argument("--ocr1", type=str, default="oneocr", help="Primary OCR engine (default: oneocr)")
400
- parser.add_argument("--ocr2", type=str, default="glens", help="Secondary OCR engine (default: glens)")
401
- parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1, help="Enable two-pass OCR (default: 1)")
402
- parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
403
- parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
404
- parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
405
- parser.add_argument("--window", type=str, help="Specify the window name for OCR")
406
- parser.add_argument("--furigana_filter_sensitivity", type=float, default=0, help="Furigana Filter Sensitivity for OCR (default: 0)")
407
- parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
408
- parser.add_argument("--area_select_ocr_hotkey", type=str, default="ctrl+shift+o", help="Hotkey for area selection OCR (default: ctrl+shift+o)")
409
-
410
- args = parser.parse_args()
411
-
412
- language = args.language
413
- ocr1 = args.ocr1
414
- ocr2 = args.ocr2 if args.ocr2 else None
415
- twopassocr = bool(args.twopassocr)
416
- manual = args.manual
417
- ss_clipboard = args.clipboard
418
- window_name = args.window
419
- furigana_filter_sensitivity = args.furigana_filter_sensitivity
420
- ss_hotkey = args.area_select_ocr_hotkey.lower()
421
- manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift", "<shift>").replace("alt", "<alt>") if args.manual_ocr_hotkey else None
422
- clipboard_output = args.clipboard_output
423
-
424
- logger.info(f"Received arguments: {vars(args)}")
425
- # set_force_stable_hotkey()
426
- ocr_config: OCRConfig = get_ocr_config(window=window_name)
427
- if ocr_config:
428
- if ocr_config.window:
429
- start_time = time.time()
430
- while time.time() - start_time < 30:
431
- window = get_window(ocr_config.window)
432
- if window or manual:
433
- break
434
- logger.info(f"Window: {ocr_config.window} Could not be found, retrying in 1 second...")
435
- time.sleep(1)
436
- else:
437
- logger.error(f"Window '{ocr_config.window}' not found within 30 seconds.")
438
- sys.exit(1)
439
- logger.info(f"Starting OCR with configuration: Window: {ocr_config.window}, Rectangles: {ocr_config.rectangles}, Engine 1: {ocr1}, Engine 2: {ocr2}, Two-pass OCR: {twopassocr}")
440
- set_dpi_awareness()
441
- if manual or ocr_config:
442
- rectangles = ocr_config.rectangles if ocr_config and ocr_config.rectangles else []
443
- oneocr_threads = []
444
- ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,rectangles ), daemon=True)
445
- ocr_thread.start()
446
- if not manual:
447
- worker_thread = threading.Thread(target=process_task_queue, daemon=True)
448
- worker_thread.start()
449
- websocket_server_thread = WebsocketServerThread(read=True)
450
- websocket_server_thread.start()
451
- add_ss_hotkey(ss_hotkey)
452
- try:
453
- while not done:
454
- time.sleep(1)
455
- except KeyboardInterrupt as e:
456
- pass
457
- else:
458
- print("Failed to load OCR configuration. Please check the logs.")
403
+ try:
404
+ global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan
405
+ import sys
406
+
407
+ import argparse
408
+
409
+ parser = argparse.ArgumentParser(description="OCR Configuration")
410
+ parser.add_argument("--language", type=str, default="ja", help="Language for OCR (default: ja)")
411
+ parser.add_argument("--ocr1", type=str, default="oneocr", help="Primary OCR engine (default: oneocr)")
412
+ parser.add_argument("--ocr2", type=str, default="glens", help="Secondary OCR engine (default: glens)")
413
+ parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1,
414
+ help="Enable two-pass OCR (default: 1)")
415
+ parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
416
+ parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
417
+ parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
418
+ parser.add_argument("--window", type=str, help="Specify the window name for OCR")
419
+ parser.add_argument("--furigana_filter_sensitivity", type=float, default=0,
420
+ help="Furigana Filter Sensitivity for OCR (default: 0)")
421
+ parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
422
+ parser.add_argument("--area_select_ocr_hotkey", type=str, default="ctrl+shift+o",
423
+ help="Hotkey for area selection OCR (default: ctrl+shift+o)")
424
+ parser.add_argument("--optimize_second_scan", action="store_true",
425
+ help="Optimize second scan by cropping based on first scan results")
426
+
427
+ args = parser.parse_args()
428
+
429
+ language = args.language
430
+ ocr1 = args.ocr1
431
+ ocr2 = args.ocr2 if args.ocr2 else None
432
+ twopassocr = bool(args.twopassocr)
433
+ manual = args.manual
434
+ ss_clipboard = args.clipboard
435
+ window_name = args.window
436
+ furigana_filter_sensitivity = args.furigana_filter_sensitivity
437
+ ss_hotkey = args.area_select_ocr_hotkey.lower()
438
+ manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift",
439
+ "<shift>").replace(
440
+ "alt", "<alt>") if args.manual_ocr_hotkey else None
441
+ clipboard_output = args.clipboard_output
442
+ optimize_second_scan = args.optimize_second_scan
443
+
444
+ window = None
445
+ logger.info(f"Received arguments: {vars(args)}")
446
+ # set_force_stable_hotkey()
447
+ ocr_config: OCRConfig = get_ocr_config(window=window_name)
448
+ if ocr_config:
449
+ if ocr_config.window:
450
+ start_time = time.time()
451
+ while time.time() - start_time < 30:
452
+ window = get_window(ocr_config.window)
453
+ if window or manual:
454
+ if window:
455
+ ocr_config.scale_coords()
456
+ break
457
+ logger.info(f"Window: {ocr_config.window} Could not be found, retrying in 1 second...")
458
+ time.sleep(1)
459
+ else:
460
+ logger.error(f"Window '{ocr_config.window}' not found within 30 seconds.")
461
+ sys.exit(1)
462
+ logger.info(
463
+ f"Starting OCR with configuration: Window: {ocr_config.window}, Rectangles: {ocr_config.rectangles}, Engine 1: {ocr1}, Engine 2: {ocr2}, Two-pass OCR: {twopassocr}")
464
+ set_dpi_awareness()
465
+ if manual or ocr_config:
466
+ rectangles = ocr_config.rectangles if ocr_config and ocr_config.rectangles else []
467
+ oneocr_threads = []
468
+ ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config, rectangles), daemon=True)
469
+ ocr_thread.start()
470
+ if not manual:
471
+ worker_thread = threading.Thread(target=process_task_queue, daemon=True)
472
+ worker_thread.start()
473
+ websocket_server_thread = WebsocketServerThread(read=True)
474
+ websocket_server_thread.start()
475
+ add_ss_hotkey(ss_hotkey)
476
+ try:
477
+ while not done:
478
+ time.sleep(1)
479
+ except KeyboardInterrupt as e:
480
+ pass
481
+ else:
482
+ print("Failed to load OCR configuration. Please check the logs.")
483
+ except Exception as e:
484
+ logger.info(e, exc_info=True)
485
+ logger.debug(e, exc_info=True)
486
+ logger.info("Closing in 5 seconds...")
487
+ time.sleep(5)
@@ -1,6 +1,7 @@
1
1
  import re
2
2
  import os
3
3
  import io
4
+ import time
4
5
  from pathlib import Path
5
6
  import sys
6
7
  import platform
@@ -17,7 +18,7 @@ from google.generativeai import GenerationConfig
17
18
  from loguru import logger
18
19
  import requests
19
20
 
20
- from GameSentenceMiner.util.configuration import get_app_directory, get_temporary_directory
21
+ # from GameSentenceMiner.util.configuration import get_temporary_directory
21
22
 
22
23
  try:
23
24
  from manga_ocr import MangaOcr as MOCR
@@ -78,7 +79,6 @@ try:
78
79
  from GameSentenceMiner.owocr.owocr.lens_betterproto import *
79
80
  import random
80
81
  except ImportError:
81
- print('Google Lens not available, please install betterproto package!')
82
82
  pass
83
83
 
84
84
  try:
@@ -101,11 +101,13 @@ def post_process(text):
101
101
 
102
102
 
103
103
  def input_to_pil_image(img):
104
+ is_path = False
104
105
  if isinstance(img, Image.Image):
105
106
  pil_image = img
106
107
  elif isinstance(img, (bytes, bytearray)):
107
108
  pil_image = Image.open(io.BytesIO(img))
108
109
  elif isinstance(img, Path):
110
+ is_path = True
109
111
  try:
110
112
  pil_image = Image.open(img)
111
113
  pil_image.load()
@@ -113,7 +115,7 @@ def input_to_pil_image(img):
113
115
  return None
114
116
  else:
115
117
  raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
116
- return pil_image
118
+ return pil_image, is_path
117
119
 
118
120
 
119
121
  def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
@@ -164,7 +166,7 @@ class MangaOcr:
164
166
  key = 'm'
165
167
  available = False
166
168
 
167
- def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
169
+ def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}, lang='ja'):
168
170
  if 'manga_ocr' not in sys.modules:
169
171
  logger.warning('manga-ocr not available, Manga OCR will not work!')
170
172
  else:
@@ -178,7 +180,7 @@ class MangaOcr:
178
180
  logger.info('Manga OCR ready')
179
181
 
180
182
  def __call__(self, img, furigana_filter_sensitivity=0):
181
- img = input_to_pil_image(img)
183
+ img, is_path = input_to_pil_image(img)
182
184
  if not img:
183
185
  return (False, 'Invalid image provided')
184
186
 
@@ -193,7 +195,7 @@ class GoogleVision:
193
195
  key = 'g'
194
196
  available = False
195
197
 
196
- def __init__(self):
198
+ def __init__(self, lang='ja'):
197
199
  if 'google.cloud' not in sys.modules:
198
200
  logger.warning('google-cloud-vision not available, Google Vision will not work!')
199
201
  else:
@@ -208,7 +210,7 @@ class GoogleVision:
208
210
  logger.warning('Error parsing Google credentials, Google Vision will not work!')
209
211
 
210
212
  def __call__(self, img, furigana_filter_sensitivity=0):
211
- img = input_to_pil_image(img)
213
+ img, is_path = input_to_pil_image(img)
212
214
  if not img:
213
215
  return (False, 'Invalid image provided')
214
216
 
@@ -236,7 +238,7 @@ class GoogleLens:
236
238
  key = 'l'
237
239
  available = False
238
240
 
239
- def __init__(self):
241
+ def __init__(self, lang='ja'):
240
242
  self.kana_kanji_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
241
243
  if 'betterproto' not in sys.modules:
242
244
  logger.warning('betterproto not available, Google Lens will not work!')
@@ -245,7 +247,7 @@ class GoogleLens:
245
247
  logger.info('Google Lens ready')
246
248
 
247
249
  def __call__(self, img, furigana_filter_sensitivity=0):
248
- img = input_to_pil_image(img)
250
+ img, is_path = input_to_pil_image(img)
249
251
  if not img:
250
252
  return (False, 'Invalid image provided')
251
253
 
@@ -370,9 +372,7 @@ class GoogleLens:
370
372
  aspect_ratio = img.width / img.height
371
373
  new_w = int(sqrt(3000000 * aspect_ratio))
372
374
  new_h = int(new_w / aspect_ratio)
373
- img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
374
- # img.close()
375
- img = img_resized
375
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
376
376
 
377
377
  return (pil_image_to_bytes(img), img.width, img.height)
378
378
 
@@ -382,7 +382,7 @@ class GoogleLensWeb:
382
382
  key = 'k'
383
383
  available = False
384
384
 
385
- def __init__(self):
385
+ def __init__(self, lang='ja'):
386
386
  if 'pyjson5' not in sys.modules:
387
387
  logger.warning('pyjson5 not available, Google Lens (web) will not work!')
388
388
  else:
@@ -391,7 +391,7 @@ class GoogleLensWeb:
391
391
  logger.info('Google Lens (web) ready')
392
392
 
393
393
  def __call__(self, img, furigana_filter_sensitivity=0):
394
- img = input_to_pil_image(img)
394
+ img, is_path = input_to_pil_image(img)
395
395
  if not img:
396
396
  return (False, 'Invalid image provided')
397
397
 
@@ -466,9 +466,7 @@ class GoogleLensWeb:
466
466
  aspect_ratio = img.width / img.height
467
467
  new_w = int(sqrt(3000000 * aspect_ratio))
468
468
  new_h = int(new_w / aspect_ratio)
469
- img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
470
- # img.close()
471
- img = img_resized
469
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
472
470
 
473
471
  return pil_image_to_bytes(img)
474
472
 
@@ -478,13 +476,13 @@ class Bing:
478
476
  key = 'b'
479
477
  available = False
480
478
 
481
- def __init__(self):
479
+ def __init__(self, lang='ja'):
482
480
  self.requests_session = requests.Session()
483
481
  self.available = True
484
482
  logger.info('Bing ready')
485
483
 
486
484
  def __call__(self, img, furigana_filter_sensitivity=0):
487
- img = input_to_pil_image(img)
485
+ img, is_path = input_to_pil_image(img)
488
486
  if not img:
489
487
  return (False, 'Invalid image provided')
490
488
 
@@ -545,7 +543,7 @@ class Bing:
545
543
  'imageInfo': {'imageInsightsToken': image_insights_token, 'source': 'Url'},
546
544
  'knowledgeRequest': {'invokedSkills': ['OCR'], 'index': 1}
547
545
  }
548
- files = {
546
+ files = {
549
547
  'knowledgeRequest': (None, json.dumps(api_data_json), 'application/json')
550
548
  }
551
549
 
@@ -578,7 +576,7 @@ class Bing:
578
576
  for region in regions:
579
577
  for line in region.get('lines', []):
580
578
  res += line['text'] + '\n'
581
-
579
+
582
580
  x = (True, res)
583
581
 
584
582
  # img.close()
@@ -593,9 +591,7 @@ class Bing:
593
591
  resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
594
592
  new_w = int(img.width * resize_factor)
595
593
  new_h = int(img.height * resize_factor)
596
- img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
597
- # img.close()
598
- img = img_resized
594
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
599
595
 
600
596
  img_bytes, _ = limit_image_size(img, max_byte_size)
601
597
 
@@ -610,7 +606,7 @@ class AppleVision:
610
606
  key = 'a'
611
607
  available = False
612
608
 
613
- def __init__(self):
609
+ def __init__(self, lang='ja'):
614
610
  if sys.platform != 'darwin':
615
611
  logger.warning('Apple Vision is not supported on non-macOS platforms!')
616
612
  elif int(platform.mac_ver()[0].split('.')[0]) < 13:
@@ -620,7 +616,7 @@ class AppleVision:
620
616
  logger.info('Apple Vision ready')
621
617
 
622
618
  def __call__(self, img, furigana_filter_sensitivity=0):
623
- img = input_to_pil_image(img)
619
+ img, is_path = input_to_pil_image(img)
624
620
  if not img:
625
621
  return (False, 'Invalid image provided')
626
622
 
@@ -658,7 +654,7 @@ class AppleLiveText:
658
654
  key = 'd'
659
655
  available = False
660
656
 
661
- def __init__(self):
657
+ def __init__(self, lang='ja'):
662
658
  if sys.platform != 'darwin':
663
659
  logger.warning('Apple Live Text is not supported on non-macOS platforms!')
664
660
  elif int(platform.mac_ver()[0].split('.')[0]) < 13:
@@ -699,7 +695,7 @@ class AppleLiveText:
699
695
  logger.info('Apple Live Text ready')
700
696
 
701
697
  def __call__(self, img, furigana_filter_sensitivity=0):
702
- img = input_to_pil_image(img)
698
+ img, is_path = input_to_pil_image(img)
703
699
  if not img:
704
700
  return (False, 'Invalid image provided')
705
701
 
@@ -738,7 +734,7 @@ class WinRTOCR:
738
734
  key = 'w'
739
735
  available = False
740
736
 
741
- def __init__(self, config={}):
737
+ def __init__(self, config={}, lang='ja'):
742
738
  if sys.platform == 'win32':
743
739
  if int(platform.release()) < 10:
744
740
  logger.warning('WinRT OCR is not supported on Windows older than 10!')
@@ -756,7 +752,7 @@ class WinRTOCR:
756
752
  logger.warning('Error reading URL from config, WinRT OCR will not work!')
757
753
 
758
754
  def __call__(self, img, furigana_filter_sensitivity=0):
759
- img = input_to_pil_image(img)
755
+ img, is_path = input_to_pil_image(img)
760
756
  if not img:
761
757
  return (False, 'Invalid image provided')
762
758
 
@@ -791,7 +787,26 @@ class OneOCR:
791
787
  key = 'z'
792
788
  available = False
793
789
 
794
- def __init__(self, config={}):
790
+ def __init__(self, config={}, lang='ja'):
791
+ if lang == "ja":
792
+ self.regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
793
+ elif lang == "zh":
794
+ self.regex = re.compile(r'[\u4E00-\u9FFF]')
795
+ elif lang == "ko":
796
+ self.regex = re.compile(r'[\uAC00-\uD7AF]')
797
+ elif lang == "ar":
798
+ self.regex = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
799
+ elif lang == "ru":
800
+ self.regex = re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
801
+ elif lang == "el":
802
+ self.regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
803
+ elif lang == "he":
804
+ self.regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
805
+ elif lang == "th":
806
+ self.regex = re.compile(r'[\u0E00-\u0E7F]')
807
+ else:
808
+ self.regex = re.compile(
809
+ r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
795
810
  if sys.platform == 'win32':
796
811
  if int(platform.release()) < 10:
797
812
  logger.warning('OneOCR is not supported on Windows older than 10!')
@@ -814,14 +829,13 @@ class OneOCR:
814
829
  logger.warning('Error reading URL from config, OneOCR will not work!')
815
830
 
816
831
  def __call__(self, img, furigana_filter_sensitivity=0):
817
- img = input_to_pil_image(img)
832
+ img, is_path = input_to_pil_image(img)
818
833
  if img.width < 51 or img.height < 51:
819
834
  new_width = max(img.width, 51)
820
835
  new_height = max(img.height, 51)
821
836
  new_img = Image.new("RGBA", (new_width, new_height), (0, 0, 0, 0))
822
837
  new_img.paste(img, ((new_width - img.width) // 2, (new_height - img.height) // 2))
823
838
  img = new_img
824
-
825
839
  if not img:
826
840
  return (False, 'Invalid image provided')
827
841
  crop_coords = None
@@ -829,8 +843,9 @@ class OneOCR:
829
843
  try:
830
844
  ocr_resp = self.model.recognize_pil(img)
831
845
  # print(json.dumps(ocr_resp))
832
- x_coords = [line['bounding_rect'][f'x{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
833
- y_coords = [line['bounding_rect'][f'y{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
846
+ filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
847
+ x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
848
+ y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
834
849
  if x_coords and y_coords:
835
850
  crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
836
851
 
@@ -903,7 +918,6 @@ class OneOCR:
903
918
  if res.status_code != 200:
904
919
  return (False, 'Unknown error!')
905
920
 
906
-
907
921
  res = res.json()['text']
908
922
 
909
923
  x = (True, res, crop_coords)
@@ -920,7 +934,7 @@ class AzureImageAnalysis:
920
934
  key = 'v'
921
935
  available = False
922
936
 
923
- def __init__(self, config={}):
937
+ def __init__(self, config={}, lang='ja'):
924
938
  if 'azure.ai.vision.imageanalysis' not in sys.modules:
925
939
  logger.warning('azure-ai-vision-imageanalysis not available, Azure Image Analysis will not work!')
926
940
  else:
@@ -933,7 +947,7 @@ class AzureImageAnalysis:
933
947
  logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
934
948
 
935
949
  def __call__(self, img, furigana_filter_sensitivity=0):
936
- img = input_to_pil_image(img)
950
+ img, is_path = input_to_pil_image(img)
937
951
  if not img:
938
952
  return (False, 'Invalid image provided')
939
953
 
@@ -962,9 +976,7 @@ class AzureImageAnalysis:
962
976
  resize_factor = max(50 / img.width, 50 / img.height)
963
977
  new_w = int(img.width * resize_factor)
964
978
  new_h = int(img.height * resize_factor)
965
- img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
966
- # img.close()
967
- img = img_resized
979
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
968
980
 
969
981
  return pil_image_to_bytes(img)
970
982
 
@@ -974,7 +986,7 @@ class EasyOCR:
974
986
  key = 'e'
975
987
  available = False
976
988
 
977
- def __init__(self, config={'gpu': True}):
989
+ def __init__(self, config={'gpu': True}, lang='ja'):
978
990
  if 'easyocr' not in sys.modules:
979
991
  logger.warning('easyocr not available, EasyOCR will not work!')
980
992
  else:
@@ -985,7 +997,7 @@ class EasyOCR:
985
997
  logger.info('EasyOCR ready')
986
998
 
987
999
  def __call__(self, img, furigana_filter_sensitivity=0):
988
- img = input_to_pil_image(img)
1000
+ img, is_path = input_to_pil_image(img)
989
1001
  if not img:
990
1002
  return (False, 'Invalid image provided')
991
1003
 
@@ -1008,7 +1020,7 @@ class RapidOCR:
1008
1020
  key = 'r'
1009
1021
  available = False
1010
1022
 
1011
- def __init__(self):
1023
+ def __init__(self, lang='ja'):
1012
1024
  if 'rapidocr_onnxruntime' not in sys.modules:
1013
1025
  logger.warning('rapidocr_onnxruntime not available, RapidOCR will not work!')
1014
1026
  else:
@@ -1031,7 +1043,7 @@ class RapidOCR:
1031
1043
  logger.info('RapidOCR ready')
1032
1044
 
1033
1045
  def __call__(self, img, furigana_filter_sensitivity=0):
1034
- img = input_to_pil_image(img)
1046
+ img, is_path = input_to_pil_image(img)
1035
1047
  if not img:
1036
1048
  return (False, 'Invalid image provided')
1037
1049
 
@@ -1055,7 +1067,7 @@ class OCRSpace:
1055
1067
  key = 'o'
1056
1068
  available = False
1057
1069
 
1058
- def __init__(self, config={}):
1070
+ def __init__(self, config={}, lang='ja'):
1059
1071
  try:
1060
1072
  self.api_key = config['api_key']
1061
1073
  self.max_byte_size = config.get('file_size_limit', 1000000)
@@ -1065,7 +1077,7 @@ class OCRSpace:
1065
1077
  logger.warning('Error reading API key from config, OCRSpace will not work!')
1066
1078
 
1067
1079
  def __call__(self, img, furigana_filter_sensitivity=0):
1068
- img = input_to_pil_image(img)
1080
+ img, is_path = input_to_pil_image(img)
1069
1081
  if not img:
1070
1082
  return (False, 'Invalid image provided')
1071
1083
 
@@ -1102,7 +1114,7 @@ class OCRSpace:
1102
1114
  # img.close()
1103
1115
  return x
1104
1116
 
1105
- def _preprocess(self, img):
1117
+ def _preprocess(self, img):
1106
1118
  return limit_image_size(img, self.max_byte_size)
1107
1119
 
1108
1120
 
@@ -1112,7 +1124,7 @@ class GeminiOCR:
1112
1124
  key = 'm'
1113
1125
  available = False
1114
1126
 
1115
- def __init__(self, config={'api_key': None}):
1127
+ def __init__(self, config={'api_key': None}, lang='ja'):
1116
1128
  # if "google-generativeai" not in sys.modules:
1117
1129
  # logger.warning('google-generativeai not available, GeminiOCR will not work!')
1118
1130
  # else:
@@ -1139,7 +1151,7 @@ class GeminiOCR:
1139
1151
  return (False, 'GeminiOCR is not available due to missing API key or configuration error.')
1140
1152
 
1141
1153
  try:
1142
- img = input_to_pil_image(img)
1154
+ img, is_path = input_to_pil_image(img)
1143
1155
  import google.generativeai as genai
1144
1156
  img_bytes = self._preprocess(img)
1145
1157
  if not img_bytes:
@@ -1181,7 +1193,7 @@ class GroqOCR:
1181
1193
  key = 'j'
1182
1194
  available = False
1183
1195
 
1184
- def __init__(self, config={'api_key': None}):
1196
+ def __init__(self, config={'api_key': None}, lang='ja'):
1185
1197
  try:
1186
1198
  import groq
1187
1199
  self.api_key = config['api_key']
@@ -1201,7 +1213,7 @@ class GroqOCR:
1201
1213
  return (False, 'GroqOCR is not available due to missing API key or configuration error.')
1202
1214
 
1203
1215
  try:
1204
- img = input_to_pil_image(img)
1216
+ img, is_path = input_to_pil_image(img)
1205
1217
 
1206
1218
  img_base64 = self._preprocess(img)
1207
1219
  if not img_base64:
@@ -1247,13 +1259,75 @@ class GroqOCR:
1247
1259
  def _preprocess(self, img):
1248
1260
  return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
1249
1261
 
1262
+ # class QWENOCR:
1263
+ # name = 'qwenvl'
1264
+ # readable_name = 'Qwen2-VL'
1265
+ # key = 'q'
1266
+ # available = False
1267
+ #
1268
+ # def __init__(self, config={}, lang='ja'):
1269
+ # try:
1270
+ # import torch
1271
+ # from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
1272
+ # self.model = Qwen2VLForConditionalGeneration.from_pretrained(
1273
+ # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
1274
+ # )
1275
+ # self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
1276
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
1277
+ # print(self.device)
1278
+ # self.available = True
1279
+ # logger.info('Qwen2-VL ready')
1280
+ # except Exception as e:
1281
+ # logger.warning(f'Qwen2-VL not available: {e}')
1282
+ #
1283
+ # def __call__(self, img, furigana_filter_sensitivity=0):
1284
+ # if not self.available:
1285
+ # return (False, 'Qwen2-VL is not available.')
1286
+ # try:
1287
+ # img = input_to_pil_image(img)
1288
+ # conversation = [
1289
+ # {
1290
+ # "role": "user",
1291
+ # "content": [
1292
+ # {"type": "image"},
1293
+ # {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana. Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
1294
+ # ],
1295
+ # }
1296
+ # ]
1297
+ # text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
1298
+ # inputs = self.processor(
1299
+ # text=[text_prompt], images=[img], padding=True, return_tensors="pt"
1300
+ # )
1301
+ # inputs = inputs.to(self.device)
1302
+ # output_ids = self.model.generate(**inputs, max_new_tokens=128)
1303
+ # generated_ids = [
1304
+ # output_ids[len(input_ids):]
1305
+ # for input_ids, output_ids in zip(inputs.input_ids, output_ids)
1306
+ # ]
1307
+ # output_text = self.processor.batch_decode(
1308
+ # generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
1309
+ # )
1310
+ # return (True, output_text[0] if output_text else "")
1311
+ # except Exception as e:
1312
+ # return (False, f'Qwen2-VL inference failed: {e}')
1313
+
1314
+
1315
+ # qwenocr = QWENOCR()
1316
+ #
1317
+ # for i in range(10):
1318
+ # start_time = time.time()
1319
+ # res, text = qwenocr(Image.open('test_furigana.png'), furigana_filter_sensitivity=0) # Example usage
1320
+ # end_time = time.time()
1321
+ #
1322
+ # print(f"Time taken: {end_time - start_time:.2f} seconds")
1323
+ # print(text)
1250
1324
  # class LocalOCR:
1251
1325
  # name = 'local_ocr'
1252
1326
  # readable_name = 'Local OCR'
1253
1327
  # key = '-'
1254
1328
  # available = False
1255
1329
  #
1256
- # def __init__(self):
1330
+ # def __init__(self, lang='ja'):
1257
1331
  # self.requests_session = requests.Session()
1258
1332
  # self.available = True
1259
1333
  # # logger.info('Local OCR ready') # Uncomment if you have a logger defined
@@ -373,7 +373,7 @@ class TextFiltering:
373
373
  "fi"]: # Many European languages use extended Latin
374
374
  block_filtered = self.latin_extended_regex.findall(block)
375
375
  else:
376
- block_filtered = self.english_regex.findall(block)
376
+ block_filtered = self.latin_extended_regex.findall(block)
377
377
 
378
378
  if block_filtered:
379
379
  orig_text_filtered.append(''.join(block_filtered))
@@ -414,8 +414,9 @@ class TextFiltering:
414
414
  return text, orig_text_filtered
415
415
 
416
416
 
417
- class ScreenshotClass:
418
- def __init__(self, screen_capture_area, screen_capture_window, screen_capture_exclusions, screen_capture_only_active_windows, screen_capture_areas):
417
+ class ScreenshotThread(threading.Thread):
418
+ def __init__(self, screen_capture_area, screen_capture_window, screen_capture_exclusions, screen_capture_only_active_windows, screen_capture_areas, screen_capture_on_combo):
419
+ super().__init__(daemon=True)
419
420
  self.macos_window_tracker_instance = None
420
421
  self.windows_window_tracker_instance = None
421
422
  self.screencapture_window_active = True
@@ -424,6 +425,7 @@ class ScreenshotClass:
424
425
  self.screen_capture_exclusions = screen_capture_exclusions
425
426
  self.screen_capture_window = screen_capture_window
426
427
  self.areas = []
428
+ self.use_periodic_queue = not screen_capture_on_combo
427
429
  if screen_capture_area == '':
428
430
  self.screencapture_mode = 0
429
431
  elif screen_capture_area.startswith('screen_'):
@@ -441,10 +443,10 @@ class ScreenshotClass:
441
443
  self.screencapture_mode = 2
442
444
 
443
445
  if self.screencapture_mode != 2:
444
- self.sct = mss.mss()
446
+ sct = mss.mss()
445
447
 
446
448
  if self.screencapture_mode == 1:
447
- mon = self.sct.monitors
449
+ mon = sct.monitors
448
450
  if len(mon) <= screen_capture_monitor:
449
451
  raise ValueError('Invalid monitor number in screen_capture_area')
450
452
  coord_left = mon[screen_capture_monitor]['left']
@@ -535,12 +537,6 @@ class ScreenshotClass:
535
537
  else:
536
538
  raise ValueError('Window capture is only currently supported on Windows and macOS')
537
539
 
538
- def __del__(self):
539
- if self.macos_window_tracker_instance:
540
- self.macos_window_tracker_instance.join()
541
- elif self.windows_window_tracker_instance:
542
- self.windows_window_tracker_instance.join()
543
-
544
540
  def get_windows_window_handle(self, window_title):
545
541
  def callback(hwnd, window_title_part):
546
542
  window_title = win32gui.GetWindowText(hwnd)
@@ -647,114 +643,121 @@ class ScreenshotClass:
647
643
  if not found:
648
644
  on_window_closed(False)
649
645
 
650
- def __call__(self):
651
- if self.screencapture_mode == 2 or self.screen_capture_window:
652
- if sys.platform == 'darwin':
653
- with objc.autorelease_pool():
654
- if self.old_macos_screenshot_api:
655
- cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming)
656
- else:
657
- self.capture_macos_window_screenshot(self.window_id)
658
- try:
659
- cg_image = self.screencapturekit_queue.get(timeout=0.5)
660
- except queue.Empty:
661
- cg_image = None
662
- if not cg_image:
646
+ def write_result(self, result):
647
+ if self.use_periodic_queue:
648
+ periodic_screenshot_queue.put(result)
649
+ else:
650
+ image_queue.put((result, True))
651
+
652
+ def run(self):
653
+ if self.screencapture_mode != 2:
654
+ sct = mss.mss()
655
+ while not terminated:
656
+ if not screenshot_event.wait(timeout=0.1):
657
+ continue
658
+ if self.screencapture_mode == 2 or self.screen_capture_window:
659
+ if sys.platform == 'darwin':
660
+ with objc.autorelease_pool():
661
+ if self.old_macos_screenshot_api:
662
+ cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id],
663
+ kCGWindowImageBoundsIgnoreFraming)
664
+ else:
665
+ self.capture_macos_window_screenshot(self.window_id)
666
+ try:
667
+ cg_image = self.screencapturekit_queue.get(timeout=0.5)
668
+ except queue.Empty:
669
+ cg_image = None
670
+ if not cg_image:
671
+ return 0
672
+ width = CGImageGetWidth(cg_image)
673
+ height = CGImageGetHeight(cg_image)
674
+ raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
675
+ bpr = CGImageGetBytesPerRow(cg_image)
676
+ img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
677
+ else:
678
+ try:
679
+ coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
680
+ coord_width = right - coord_left
681
+ coord_height = bottom - coord_top
682
+
683
+ hwnd_dc = win32gui.GetWindowDC(self.window_handle)
684
+ mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
685
+ save_dc = mfc_dc.CreateCompatibleDC()
686
+
687
+ save_bitmap = win32ui.CreateBitmap()
688
+ save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
689
+ save_dc.SelectObject(save_bitmap)
690
+
691
+ result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
692
+
693
+ bmpinfo = save_bitmap.GetInfo()
694
+ bmpstr = save_bitmap.GetBitmapBits(True)
695
+ except pywintypes.error:
663
696
  return 0
664
- width = CGImageGetWidth(cg_image)
665
- height = CGImageGetHeight(cg_image)
666
- raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
667
- bpr = CGImageGetBytesPerRow(cg_image)
668
- img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
697
+ img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0,
698
+ 1)
699
+ try:
700
+ win32gui.DeleteObject(save_bitmap.GetHandle())
701
+ except:
702
+ pass
703
+ try:
704
+ save_dc.DeleteDC()
705
+ except:
706
+ pass
707
+ try:
708
+ mfc_dc.DeleteDC()
709
+ except:
710
+ pass
711
+ try:
712
+ win32gui.ReleaseDC(self.window_handle, hwnd_dc)
713
+ except:
714
+ pass
669
715
  else:
670
- try:
671
- coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
672
- coord_width = right - coord_left
673
- coord_height = bottom - coord_top
716
+ sct_img = sct.grab(self.sct_params)
717
+ img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
674
718
 
675
- hwnd_dc = win32gui.GetWindowDC(self.window_handle)
676
- mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
677
- save_dc = mfc_dc.CreateCompatibleDC()
719
+ import random # Ensure this is imported at the top of the file if not already
720
+ rand_int = random.randint(1, 20) # Executes only once out of 10 times
678
721
 
679
- save_bitmap = win32ui.CreateBitmap()
680
- save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
681
- save_dc.SelectObject(save_bitmap)
722
+ if rand_int == 1: # Executes only once out of 10 times
723
+ img.save(os.path.join(get_temporary_directory(), 'before_crop.png'), 'PNG')
682
724
 
683
- result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
725
+ if self.screen_capture_exclusions:
726
+ img = img.convert("RGBA")
727
+ draw = ImageDraw.Draw(img)
728
+ for exclusion in self.screen_capture_exclusions:
729
+ left, top, width, height = exclusion
730
+ draw.rectangle((left, top, left + width, top + height), fill=(0, 0, 0, 0))
684
731
 
685
- bmpinfo = save_bitmap.GetInfo()
686
- bmpstr = save_bitmap.GetBitmapBits(True)
687
- except pywintypes.error:
688
- return 0
689
- img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
690
- try:
691
- win32gui.DeleteObject(save_bitmap.GetHandle())
692
- except:
693
- pass
694
- try:
695
- save_dc.DeleteDC()
696
- except:
697
- pass
698
- try:
699
- mfc_dc.DeleteDC()
700
- except:
701
- pass
702
- try:
703
- win32gui.ReleaseDC(self.window_handle, hwnd_dc)
704
- except:
705
- pass
706
- else:
707
- sct_img = self.sct.grab(self.sct_params)
708
- img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
709
-
710
- import random # Ensure this is imported at the top of the file if not already
711
- rand_int = random.randint(1, 20) # Executes only once out of 10 times
712
-
713
- if rand_int == 1: # Executes only once out of 10 times
714
- img.save(os.path.join(get_temporary_directory(), 'before_crop.png'), 'PNG')
715
-
716
- if self.screen_capture_exclusions:
717
- img = img.convert("RGBA")
718
- draw = ImageDraw.Draw(img)
719
- for exclusion in self.screen_capture_exclusions:
720
- left, top, width, height = exclusion
721
- draw.rectangle((left, top, left + width, top + height), fill=(0, 0, 0, 0))
722
-
723
- cropped_sections = []
724
- start = time.time()
725
- for area in self.areas:
726
- cropped_sections.append(img.crop((area[0], area[1], area[0] + area[2], area[1] + area[3])))
727
-
728
- # if len(cropped_sections) > 1:
729
- # combined_width = sum(section.width for section in cropped_sections)
730
- # combined_height = max(section.height for section in cropped_sections)
731
- # combined_img = Image.new("RGBA", (combined_width, combined_height))
732
- #
733
- # x_offset = 0
734
- # for section in cropped_sections:
735
- # combined_img.paste(section, (x_offset, 0))
736
- # x_offset += section.width
737
- #
738
- # img = combined_img
739
- if len(cropped_sections) > 1:
740
- combined_width = max(section.width for section in cropped_sections)
741
- combined_height = sum(section.height for section in cropped_sections) + (len(cropped_sections) - 1) * 10 # Add space for gaps
742
- combined_img = Image.new("RGBA", (combined_width, combined_height))
743
-
744
- y_offset = 0
745
- for section in cropped_sections:
746
- combined_img.paste(section, (0, y_offset))
747
- y_offset += section.height + 50 # Add gap between sections
748
-
749
- img = combined_img
750
- elif cropped_sections:
751
- img = cropped_sections[0]
752
-
753
- if rand_int == 1:
754
- img.save(os.path.join(get_temporary_directory(), 'after_crop.png'), 'PNG')
755
-
756
- return img
732
+ cropped_sections = []
733
+ for area in self.areas:
734
+ cropped_sections.append(img.crop((area[0], area[1], area[0] + area[2], area[1] + area[3])))
735
+
736
+ if len(cropped_sections) > 1:
737
+ combined_width = max(section.width for section in cropped_sections)
738
+ combined_height = sum(section.height for section in cropped_sections) + (
739
+ len(cropped_sections) - 1) * 10 # Add space for gaps
740
+ combined_img = Image.new("RGBA", (combined_width, combined_height))
741
+
742
+ y_offset = 0
743
+ for section in cropped_sections:
744
+ combined_img.paste(section, (0, y_offset))
745
+ y_offset += section.height + 50 # Add gap between sections
757
746
 
747
+ img = combined_img
748
+ elif cropped_sections:
749
+ img = cropped_sections[0]
750
+
751
+ if rand_int == 1:
752
+ img.save(os.path.join(get_temporary_directory(), 'after_crop.png'), 'PNG')
753
+
754
+ self.write_result(img)
755
+ screenshot_event.clear()
756
+
757
+ if self.macos_window_tracker_instance:
758
+ self.macos_window_tracker_instance.join()
759
+ elif self.windows_window_tracker_instance:
760
+ self.windows_window_tracker_instance.join()
758
761
 
759
762
  class AutopauseTimer:
760
763
  def __init__(self, timeout):
@@ -823,21 +826,28 @@ def user_input_thread_run():
823
826
  global terminated
824
827
  logger.info('Terminated!')
825
828
  terminated = True
829
+ import sys
826
830
 
827
831
  if sys.platform == 'win32':
828
832
  import msvcrt
829
833
  while not terminated:
830
- user_input_bytes = msvcrt.getch()
831
- try:
832
- user_input = user_input_bytes.decode()
834
+ user_input = None
835
+ if msvcrt.kbhit(): # Check if a key is pressed
836
+ user_input_bytes = msvcrt.getch()
837
+ try:
838
+ user_input = user_input_bytes.decode()
839
+ except UnicodeDecodeError:
840
+ pass
841
+ if not user_input: # If no input from msvcrt, check stdin
842
+ import sys
843
+ user_input = sys.stdin.read(1)
844
+
833
845
  if user_input.lower() in 'tq':
834
846
  _terminate_handler()
835
847
  elif user_input.lower() == 'p':
836
848
  pause_handler(False)
837
849
  else:
838
850
  engine_change_handler(user_input, False)
839
- except UnicodeDecodeError:
840
- pass
841
851
  else:
842
852
  import tty, termios
843
853
  fd = sys.stdin.fileno()
@@ -871,8 +881,7 @@ def on_window_closed(alive):
871
881
 
872
882
  def on_screenshot_combo():
873
883
  if not paused:
874
- img = take_screenshot()
875
- image_queue.put((img, True))
884
+ screenshot_event.set()
876
885
 
877
886
 
878
887
  def on_window_minimized(minimized):
@@ -1084,7 +1093,7 @@ def run(read_from=None,
1084
1093
  if config.get_engine(engine_class.name) == None:
1085
1094
  engine_instance = engine_class()
1086
1095
  else:
1087
- engine_instance = engine_class(config.get_engine(engine_class.name))
1096
+ engine_instance = engine_class(config.get_engine(engine_class.name), lang=lang)
1088
1097
 
1089
1098
  if engine_instance.available:
1090
1099
  engine_instances.append(engine_instance)
@@ -1104,6 +1113,7 @@ def run(read_from=None,
1104
1113
  global auto_pause_handler
1105
1114
  global notifier
1106
1115
  global websocket_server_thread
1116
+ global screenshot_thread
1107
1117
  global image_queue
1108
1118
  global ocr_1
1109
1119
  global ocr_2
@@ -1128,6 +1138,7 @@ def run(read_from=None,
1128
1138
  auto_pause = config.get_general('auto_pause')
1129
1139
  clipboard_thread = None
1130
1140
  websocket_server_thread = None
1141
+ screenshot_thread = None
1131
1142
  directory_watcher_thread = None
1132
1143
  unix_socket_server = None
1133
1144
  key_combo_listener = None
@@ -1160,13 +1171,18 @@ def run(read_from=None,
1160
1171
 
1161
1172
  if 'screencapture' in (read_from, read_from_secondary):
1162
1173
  global take_screenshot
1174
+ global screenshot_event
1163
1175
  last_screenshot_time = 0
1164
1176
  last_result = ([], engine_index)
1165
1177
  if screen_capture_combo != '':
1166
1178
  screen_capture_on_combo = True
1167
1179
  key_combos[screen_capture_combo] = on_screenshot_combo
1168
- take_screenshot = ScreenshotClass(screen_capture_area, screen_capture_window, screen_capture_exclusions, screen_capture_only_active_windows, screen_capture_areas)
1169
- # global_take_screenshot = ScreenshotClass(screen_capture_area, screen_capture_window, screen_capture_exclusions, screen_capture_only_active_windows, rectangle)
1180
+ else:
1181
+ global periodic_screenshot_queue
1182
+ periodic_screenshot_queue = queue.Queue()
1183
+ screenshot_event = threading.Event()
1184
+ screenshot_thread = ScreenshotThread(screen_capture_area, screen_capture_window, screen_capture_exclusions, screen_capture_only_active_windows, screen_capture_areas, screen_capture_on_combo)
1185
+ screenshot_thread.start()
1170
1186
  filtering = TextFiltering()
1171
1187
  read_from_readable.append('screen capture')
1172
1188
  if 'websocket' in (read_from, read_from_secondary):
@@ -1233,8 +1249,9 @@ def run(read_from=None,
1233
1249
  pass
1234
1250
 
1235
1251
  if (not img) and process_screenshots:
1236
- if (not paused) and take_screenshot.screencapture_window_active and take_screenshot.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
1237
- img = take_screenshot()
1252
+ if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
1253
+ screenshot_event.set()
1254
+ img = periodic_screenshot_queue.get()
1238
1255
  filter_img = True
1239
1256
  notify = False
1240
1257
  last_screenshot_time = time.time()
@@ -1270,5 +1287,7 @@ def run(read_from=None,
1270
1287
  if unix_socket_server:
1271
1288
  unix_socket_server.shutdown()
1272
1289
  unix_socket_server_thread.join()
1290
+ if screenshot_thread:
1291
+ screenshot_thread.join()
1273
1292
  if key_combo_listener:
1274
1293
  key_combo_listener.stop()
@@ -172,7 +172,7 @@ class Screenshot:
172
172
  class Audio:
173
173
  enabled: bool = True
174
174
  extension: str = 'opus'
175
- beginning_offset: float = 0.0
175
+ beginning_offset: float = -0.5
176
176
  end_offset: float = 0.5
177
177
  pre_vad_end_offset: float = 0.0
178
178
  ffmpeg_reencode_options: str = '-c:a libopus -f opus -af \"afade=t=in:d=0.10\"' if is_windows() else ''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.10.11
3
+ Version: 2.10.13
4
4
  Summary: A tool for mining sentences from games. Update: Full UI Re-design
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  GameSentenceMiner/anki.py,sha256=kWw3PV_Jj5-lHcttCB3lRXejHlaAbiJ2Ag_NAGX-RI8,16632
3
- GameSentenceMiner/config_gui.py,sha256=h-vDxpFCC347iK_mDJAjwKm7Qubeu-NWaxvd9SvzqzY,90942
3
+ GameSentenceMiner/config_gui.py,sha256=Xa_a-sdQzht3kzR-Z9gkLy4qnaPyP1bdVadYTHp5lUQ,91018
4
4
  GameSentenceMiner/gametext.py,sha256=6VkjmBeiuZfPk8T6PHFdIAElBH2Y_oLVYvmcafqN7RM,6747
5
5
  GameSentenceMiner/gsm.py,sha256=p4DVa_Jx1EOsgUxAAdC7st7VXLKWnP2BLDGT78ToO8w,24864
6
6
  GameSentenceMiner/obs.py,sha256=ZV9Vk39hrsJLT-AlIxa3qgncKxXaL3Myl33vVJEDEoA,14670
@@ -16,21 +16,21 @@ GameSentenceMiner/assets/icon512.png,sha256=HxUj2GHjyQsk8NV433256UxU9phPhtjCY-YB
16
16
  GameSentenceMiner/assets/icon64.png,sha256=N8xgdZXvhqVQP9QUK3wX5iqxX9LxHljD7c-Bmgim6tM,9301
17
17
  GameSentenceMiner/assets/pickaxe.png,sha256=VfIGyXyIZdzEnVcc4PmG3wszPMO1W4KCT7Q_nFK6eSE,1403829
18
18
  GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=0hZmNIvZmlAEcy_NaTukG_ALUORULUT7sQ8q5VlDJU4,4047
19
+ GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=jtTzAWtMAx8GuA1XIJ_BmyNn3aYaO3u_c5Q7m5D4gS8,4056
20
20
  GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
21
21
  GameSentenceMiner/ocr/owocr_area_selector.py,sha256=boAqarX17jvFscu-7s6C9rqesjQ54s-kfuW0bjCru-M,19834
22
- GameSentenceMiner/ocr/owocr_helper.py,sha256=wkrobbrBugzzRBnUO9zBnxIwMEHWVTwxfutDn2HY17c,20205
22
+ GameSentenceMiner/ocr/owocr_helper.py,sha256=VDcuBfyZ1B7TN6yImJVuNxqWY7pr95R2cRM9jgD5Rk8,21670
23
23
  GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
24
24
  GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
25
25
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
26
26
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
27
27
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
28
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=fWrbKomSrdkSdlEiMGTKb6-F7wCgfaZZNBUo2gCqmuA,52247
29
- GameSentenceMiner/owocr/owocr/run.py,sha256=mZIGDm3fGYrYbSNuFOk7Sbslfgi36YN0YqfC1xYh_eY,55286
28
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=siEqZLXhvFX-l311a19nCs-a0PxY9iwpaOoSV5lzVj4,56562
29
+ GameSentenceMiner/owocr/owocr/run.py,sha256=goOZSO3a7z8GxjYAcWjHsPxdzM60Nt3vxjcUzy1fnZg,56242
30
30
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
31
31
  GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
33
- GameSentenceMiner/util/configuration.py,sha256=wuuM39xhXahswx7EhhWXURDQ_KIPbo4RhmQ_wPEbezo,28816
33
+ GameSentenceMiner/util/configuration.py,sha256=8PZk4IhtWFimfRy7biREcfG1NGkFNzKzFjlOjxNEFd0,28817
34
34
  GameSentenceMiner/util/electron_config.py,sha256=3VmIrcXhC-wIMMc4uqV85NrNenRl4ZUbnQfSjWEwuig,9852
35
35
  GameSentenceMiner/util/ffmpeg.py,sha256=t0tflxq170n8PZKkdw8fTZIUQfXD0p_qARa9JTdhBTc,21530
36
36
  GameSentenceMiner/util/gsm_utils.py,sha256=_279Fu9CU6FEh4cP6h40TWOt_BrqmPgytfumi8y53Ew,11491
@@ -62,9 +62,9 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
62
62
  GameSentenceMiner/web/templates/index.html,sha256=n0J-dV8eksj8JXUuaCTIh0fIxIjfgm2EvxGBdQ6gWoM,214113
63
63
  GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
64
64
  GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
65
- gamesentenceminer-2.10.11.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
66
- gamesentenceminer-2.10.11.dist-info/METADATA,sha256=pEHEHL90MhO8afUJ3yQTLDjdGvcYz5slrezzJ6biWfk,7355
67
- gamesentenceminer-2.10.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- gamesentenceminer-2.10.11.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
69
- gamesentenceminer-2.10.11.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
70
- gamesentenceminer-2.10.11.dist-info/RECORD,,
65
+ gamesentenceminer-2.10.13.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
66
+ gamesentenceminer-2.10.13.dist-info/METADATA,sha256=Uyvm-m_g2V68e571HG8RBqOfbZJmWqAfXlTOM4HuEYg,7355
67
+ gamesentenceminer-2.10.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ gamesentenceminer-2.10.13.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
69
+ gamesentenceminer-2.10.13.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
70
+ gamesentenceminer-2.10.13.dist-info/RECORD,,