GameSentenceMiner 2.12.3__py3-none-any.whl → 2.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
GameSentenceMiner/obs.py CHANGED
@@ -385,12 +385,17 @@ def get_screenshot_base64(compression=75, width=None, height=None):
385
385
  return None
386
386
 
387
387
 
388
- def get_screenshot_PIL(compression=75, img_format='png', width=None, height=None, retry=3):
388
+ def get_screenshot_PIL(source_name=None, compression=75, img_format='png', width=None, height=None, retry=3):
389
389
  import io
390
390
  import base64
391
391
  from PIL import Image
392
+ if not source_name:
393
+ source_name = get_active_source().get('sourceName', None)
394
+ if not source_name:
395
+ logger.error("No active source found in the current scene.")
396
+ return None
392
397
  while True:
393
- response = client.get_source_screenshot(name=get_current_game(), img_format=img_format, quality=compression, width=width, height=height)
398
+ response = client.get_source_screenshot(name=source_name, img_format=img_format, quality=compression, width=width, height=height)
394
399
  try:
395
400
  response.image_data = response.image_data.split(',', 1)[-1] # Remove data:image/png;base64, prefix if present
396
401
  except AttributeError:
@@ -463,26 +468,42 @@ if __name__ == '__main__':
463
468
  logging.basicConfig(level=logging.INFO)
464
469
  # main()
465
470
  connect_to_obs_sync()
466
- i = 100
471
+ # i = 100
472
+ # for i in range(1, 100):
473
+ # print(f"Getting screenshot {i}")
474
+ # start = time.time()
475
+ # # get_screenshot(compression=95)
476
+ # # get_screenshot_base64(compression=95, width=1280, height=720)
477
+
478
+ # img = get_screenshot_PIL(compression=i, img_format='jpg', width=1280, height=720)
479
+ # end = time.time()
480
+ # print(f"Time taken to get screenshot with compression {i}: {end - start} seconds")
481
+
467
482
  # for i in range(1, 100):
468
- print(f"Getting screenshot {i}")
469
- start = time.time()
470
- # get_screenshot(compression=95)
471
- # get_screenshot_base64(compression=95, width=1280, height=720)
472
- img = get_screenshot_PIL(compression=i, img_format='png')
473
- end = time.time()
474
- print(f"Time taken to get screenshot with compression {i}: {end - start} seconds")
475
- img.show()
483
+ # print(f"Getting screenshot {i}")
484
+ # start = time.time()
485
+ # # get_screenshot(compression=95)
486
+ # # get_screenshot_base64(compression=95, width=1280, height=720)
476
487
 
488
+ # img = get_screenshot_PIL(compression=i, img_format='jpg', width=2560, height=1440)
489
+ # end = time.time()
490
+ # print(f"Time taken to get screenshot full sized jpg with compression {i}: {end - start} seconds")
491
+
492
+ # png_img = get_screenshot_PIL(compression=75, img_format='png', width=1280, height=720)
493
+
494
+ # jpg_img = get_screenshot_PIL(compression=100, img_format='jpg', width=2560, height=1440)
495
+
496
+ # png_img.show()
497
+ # jpg_img.show()
477
498
 
478
- start = time.time()
479
- with mss() as sct:
480
- monitor = sct.monitors[1]
481
- sct_img = sct.grab(monitor)
482
- img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
483
- img.show()
484
- end = time.time()
485
- print(f"Time taken to get screenshot with mss: {end - start} seconds")
499
+ # start = time.time()
500
+ # with mss() as sct:
501
+ # monitor = sct.monitors[1]
502
+ # sct_img = sct.grab(monitor)
503
+ # img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
504
+ # img.show()
505
+ # end = time.time()
506
+ # print(f"Time taken to get screenshot with mss: {end - start} seconds")
486
507
 
487
508
 
488
509
  # print(get_screenshot_base64(compression=75, width=1280, height=720))
@@ -436,7 +436,7 @@ class GoogleLens:
436
436
  # res += '\n'
437
437
 
438
438
  if return_coords:
439
- x = (True, res, lines)
439
+ x = (True, res, response_dict)
440
440
  else:
441
441
  x = (True, res)
442
442
 
@@ -887,7 +887,28 @@ class OneOCR:
887
887
  except:
888
888
  logger.warning('Error reading URL from config, OneOCR will not work!')
889
889
 
890
- def __call__(self, img, furigana_filter_sensitivity=0, sentence_to_check=None, return_coords=False):
890
+ def get_regex(self, lang):
891
+ if lang == "ja":
892
+ self.regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
893
+ elif lang == "zh":
894
+ self.regex = re.compile(r'[\u4E00-\u9FFF]')
895
+ elif lang == "ko":
896
+ self.regex = re.compile(r'[\uAC00-\uD7AF]')
897
+ elif lang == "ar":
898
+ self.regex = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
899
+ elif lang == "ru":
900
+ self.regex = re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
901
+ elif lang == "el":
902
+ self.regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
903
+ elif lang == "he":
904
+ self.regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
905
+ elif lang == "th":
906
+ self.regex = re.compile(r'[\u0E00-\u0E7F]')
907
+ else:
908
+ self.regex = re.compile(
909
+ r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
910
+
911
+ def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
891
912
  lang = get_ocr_language()
892
913
  if lang != self.initial_lang:
893
914
  self.initial_lang = lang
@@ -911,6 +932,10 @@ class OneOCR:
911
932
  json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
912
933
  # print(json.dumps(ocr_resp))
913
934
  filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
935
+ x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
936
+ y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
937
+ if x_coords and y_coords:
938
+ crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
914
939
  # logger.info(filtered_lines)
915
940
  res = ''
916
941
  skipped = []
@@ -964,30 +989,6 @@ class OneOCR:
964
989
  # else:
965
990
  # continue
966
991
  # res += '\n'
967
- elif sentence_to_check:
968
- lines_to_build_area = []
969
- widths = []
970
- heights = []
971
- for line in ocr_resp['lines']:
972
- print(line['text'])
973
- if sentence_to_check in line['text'] or line['text'] in sentence_to_check or rapidfuzz.fuzz.partial_ratio(sentence_to_check, line['text']) > 50:
974
- lines_to_build_area.append(line)
975
- res += line['text']
976
- for word in line['words']:
977
- widths.append(word['bounding_rect']['x2'] - word['bounding_rect']['x1'])
978
- heights.append(word['bounding_rect']['y3'] - word['bounding_rect']['y1'])
979
-
980
- x_coords = [line['bounding_rect'][f'x{i}'] for line in lines_to_build_area for i in
981
- range(1, 5)]
982
- y_coords = [line['bounding_rect'][f'y{i}'] for line in lines_to_build_area for i in
983
- range(1, 5)]
984
- if widths:
985
- avg_width = sum(widths) / len(widths)
986
- if heights:
987
- avg_height = sum(heights) / len(heights)
988
- if x_coords and y_coords:
989
- crop_coords = (
990
- min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
991
992
  elif return_coords:
992
993
  for line in filtered_lines:
993
994
  for word in line['words']:
@@ -998,10 +999,6 @@ class OneOCR:
998
999
  boxes.append(box)
999
1000
  res = ocr_resp['text']
1000
1001
  else:
1001
- x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
1002
- y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
1003
- if x_coords and y_coords:
1004
- crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
1005
1002
  res = ocr_resp['text']
1006
1003
 
1007
1004
  except RuntimeError as e:
@@ -1019,7 +1016,7 @@ class OneOCR:
1019
1016
 
1020
1017
  res = res.json()['text']
1021
1018
  if return_coords:
1022
- x = (True, res, boxes)
1019
+ x = (True, res, filtered_lines)
1023
1020
  else:
1024
1021
  x = (True, res, crop_coords)
1025
1022
  if is_path:
@@ -44,7 +44,6 @@ import queue
44
44
  from datetime import datetime
45
45
  from PIL import Image, ImageDraw, UnidentifiedImageError
46
46
  from loguru import logger
47
- from pynput import keyboard
48
47
  from desktop_notifier import DesktopNotifierSync
49
48
  import psutil
50
49
 
@@ -384,6 +383,7 @@ class TextFiltering:
384
383
  block_filtered = self.latin_extended_regex.findall(block)
385
384
  else:
386
385
  block_filtered = self.latin_extended_regex.findall(block)
386
+
387
387
  if block_filtered:
388
388
  orig_text_filtered.append(''.join(block_filtered))
389
389
  else:
@@ -547,39 +547,6 @@ class ScreenshotThread(threading.Thread):
547
547
  else:
548
548
  raise ValueError('Window capture is only currently supported on Windows and macOS')
549
549
 
550
- def __del__(self):
551
- if self.macos_window_tracker_instance:
552
- self.macos_window_tracker_instance.join()
553
- elif self.windows_window_tracker_instance:
554
- self.windows_window_tracker_instance.join()
555
-
556
- def setup_persistent_windows_window_tracker(self):
557
- global window_open
558
- window_open = False
559
- def setup_tracker():
560
- global window_open
561
- self.window_handle, window_title = self.get_windows_window_handle(self.screen_capture_window)
562
-
563
- if not self.window_handle:
564
- # print(f"Window '{screen_capture_window}' not found.")
565
- return
566
-
567
- set_dpi_awareness()
568
- window_open = True
569
- self.windows_window_tracker_instance = threading.Thread(target=self.windows_window_tracker)
570
- self.windows_window_tracker_instance.start()
571
- logger.opt(ansi=True).info(f'Selected window: {window_title}')
572
-
573
- while not terminated:
574
- if not window_open:
575
- try:
576
- setup_tracker()
577
- except ValueError as e:
578
- logger.error(f"Error setting up persistent windows window tracker: {e}")
579
- break
580
- time.sleep(5)
581
-
582
-
583
550
  def get_windows_window_handle(self, window_title):
584
551
  def callback(hwnd, window_title_part):
585
552
  window_title = win32gui.GetWindowText(hwnd)
@@ -602,7 +569,7 @@ class ScreenshotThread(threading.Thread):
602
569
 
603
570
  def windows_window_tracker(self):
604
571
  found = True
605
- while not terminated or window_open:
572
+ while not terminated:
606
573
  found = win32gui.IsWindow(self.window_handle)
607
574
  if not found:
608
575
  break
@@ -872,18 +839,9 @@ class OBSScreenshotThread(threading.Thread):
872
839
  image_queue.put((result, True))
873
840
 
874
841
  def connect_obs(self):
875
- try:
876
- import obsws_python as obs
877
- self.obs_client = obs.ReqClient(
878
- host=get_config().obs.host,
879
- port=get_config().obs.port,
880
- password=get_config().obs.password,
881
- timeout=10
882
- )
883
- logger.info("Connected to OBS WebSocket.")
884
- except Exception as e:
885
- logger.error(f"Failed to connect to OBS: {e}")
886
- self.obs_client = None
842
+ import GameSentenceMiner.obs as obs
843
+ obs.connect_to_obs_sync()
844
+
887
845
 
888
846
  def run(self):
889
847
  global last_image
@@ -895,7 +853,7 @@ class OBSScreenshotThread(threading.Thread):
895
853
  def init_config(source=None, scene=None):
896
854
  obs.update_current_game()
897
855
  self.current_source = source if source else obs.get_active_source()
898
- self.current_source_name = self.current_source.get('sourceName') if isinstance(self.current_source, dict) else None
856
+ self.current_source_name = self.current_source.get("sourceName") or None
899
857
  self.current_scene = scene if scene else obs.get_current_game()
900
858
  self.ocr_config = get_scene_ocr_config()
901
859
  self.ocr_config.scale_to_custom_size(self.width, self.height)
@@ -927,20 +885,20 @@ class OBSScreenshotThread(threading.Thread):
927
885
  if not self.ocr_config:
928
886
  time.sleep(1)
929
887
  continue
888
+
889
+ if not self.current_source_name:
890
+ obs.update_current_game()
891
+ self.current_source = obs.get_active_source()
892
+ self.current_source_name = self.current_source.get("sourceName") or None
930
893
 
931
894
  try:
932
- response = self.obs_client.get_source_screenshot(
933
- name=self.current_source_name,
934
- img_format='png',
935
- quality=75,
936
- width=self.width,
937
- height=self.height,
938
- )
895
+ if not self.current_source_name:
896
+ logger.error("No active source found in the current scene.")
897
+ time.sleep(1)
898
+ continue
899
+ img = obs.get_screenshot_PIL(source_name=self.current_source_name, width=self.width, height=self.height, img_format='jpg', compression=90)
939
900
 
940
- if response.image_data:
941
- image_data = base64.b64decode(response.image_data.split(",")[1])
942
- img = Image.open(io.BytesIO(image_data)).convert("RGBA")
943
-
901
+ if img is not None:
944
902
  if not img.getbbox():
945
903
  logger.info("OBS Not Capturing anything, sleeping.")
946
904
  time.sleep(1)
@@ -1118,11 +1076,10 @@ def signal_handler(sig, frame):
1118
1076
 
1119
1077
 
1120
1078
  def on_window_closed(alive):
1121
- global terminated, window_open
1079
+ global terminated
1122
1080
  if not (alive or terminated):
1123
1081
  logger.info('Window closed or error occurred, terminated!')
1124
- window_open = False
1125
- # terminated = True
1082
+ terminated = True
1126
1083
 
1127
1084
 
1128
1085
  def on_screenshot_combo():
@@ -1464,8 +1421,12 @@ def run(read_from=None,
1464
1421
  read_from_readable.append(f'directory {read_from_path}')
1465
1422
 
1466
1423
  if len(key_combos) > 0:
1467
- key_combo_listener = keyboard.GlobalHotKeys(key_combos)
1468
- key_combo_listener.start()
1424
+ try:
1425
+ from pynput import keyboard
1426
+ key_combo_listener = keyboard.GlobalHotKeys(key_combos)
1427
+ key_combo_listener.start()
1428
+ except ImportError:
1429
+ pass
1469
1430
 
1470
1431
  if write_to in ('clipboard', 'websocket', 'callback'):
1471
1432
  write_to_readable = write_to
@@ -239,11 +239,12 @@ class Downloader:
239
239
  # Example usage:
240
240
  if __name__ == "__main__":
241
241
  downloader = Downloader()
242
- if downloader.download_and_extract():
243
- print("SnippingTool files are ready.")
244
- print("Press Ctrl+C or X on window to exit.")
245
- input()
246
- else:
247
- print("Failed to download and extract SnippingTool files. You may need to follow instructions at https://github.com/AuroraWright/oneocr")
248
- print("Press Ctrl+C or X on window to exit.")
249
- input()
242
+ downloader.download_and_extract()
243
+ # if downloader.download_and_extract():
244
+ # print("SnippingTool files are ready.")
245
+ # print("Press Ctrl+C or X on window to exit.")
246
+ # # input()
247
+ # else:
248
+ # # print("Failed to download and extract SnippingTool files. You may need to follow instructions at https://github.com/AuroraWright/oneocr")
249
+ # print("Press Ctrl+C or X on window to exit.")
250
+ # input()
@@ -1,88 +1,285 @@
1
1
  import asyncio
2
2
  import io
3
3
  import base64
4
+ import math
4
5
  from PIL import Image
5
6
  from GameSentenceMiner.util.configuration import get_config
7
+ from typing import Dict, Any, List, Tuple
8
+
9
+ from GameSentenceMiner.util.electron_config import get_ocr_language
6
10
 
7
11
  if get_config().wip.overlay_websocket_send:
8
- from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, OneOCR
12
+ from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, OneOCR, get_regex
9
13
  from GameSentenceMiner.obs import *
10
14
 
11
- # OBS WebSocket settings
12
- OBS_HOST = 'localhost'
13
- OBS_PORT = 7274
14
- OBS_PASSWORD = 'your_obs_websocket_password' # Set your OBS WebSocket password here, if any
15
-
16
- WINDOW_NAME = "Nier:Automata"
17
- WIDTH = 2560
18
- HEIGHT = 1440
19
15
  if get_config().wip.overlay_websocket_send:
20
16
  oneocr = OneOCR()
21
17
  lens = GoogleLens()
22
18
 
23
- def correct_ocr_text(detected_text: str, reference_text: str) -> str:
19
+
20
+ def _convert_box_to_pixels_v2(
21
+ bbox_data: Dict[str, float],
22
+ original_width: int,
23
+ original_height: int,
24
+ crop_x: int,
25
+ crop_y: int,
26
+ crop_width: int,
27
+ crop_height: int
28
+ ) -> Dict[str, float]:
29
+ """
30
+ Simplified conversion: scales normalized bbox to pixel coordinates, ignores rotation.
31
+
32
+ Args:
33
+ bbox_data: A dictionary with normalized 'center_x', 'center_y', 'width', 'height'.
34
+ original_width: The width of the original, full-size image in pixels.
35
+ original_height: The height of the original, full-size image in pixels.
36
+
37
+ Returns:
38
+ A dictionary of the four corner points with absolute pixel coordinates.
39
+ """
40
+ cx, cy = bbox_data['center_x'], bbox_data['center_y']
41
+ w, h = bbox_data['width'], bbox_data['height']
42
+
43
+ # Scale normalized coordinates to pixel coordinates
44
+ box_width_px = w * crop_width
45
+ box_height_px = h * crop_height
46
+ center_x_px = cx * crop_width + crop_x
47
+ center_y_px = cy * crop_height + crop_y
48
+
49
+ # Calculate corners (no rotation)
50
+ x1 = center_x_px - box_width_px / 2
51
+ y1 = center_y_px - box_height_px / 2
52
+ x2 = center_x_px + box_width_px / 2
53
+ y2 = center_y_px - box_height_px / 2
54
+ x3 = center_x_px + box_width_px / 2
55
+ y3 = center_y_px + box_height_px / 2
56
+ x4 = center_x_px - box_width_px / 2
57
+ y4 = center_y_px + box_height_px / 2
58
+
59
+ return {
60
+ "x1": x1,
61
+ "y1": y1,
62
+ "x2": x2,
63
+ "y2": y2,
64
+ "x3": x3,
65
+ "y3": y3,
66
+ "x4": x4,
67
+ "y4": y4,
68
+ }
69
+
70
+ def _convert_box_to_pixels(
71
+ bbox_data: Dict[str, float],
72
+ original_width: int,
73
+ original_height: int,
74
+ crop_x: int,
75
+ crop_y: int,
76
+ crop_width: int,
77
+ crop_height: int
78
+ ) -> Dict[str, Dict[str, float]]:
79
+ """
80
+ Converts a normalized bounding box to an absolute pixel-based quad.
81
+
82
+ Args:
83
+ bbox_data: A dictionary with normalized 'center_x', 'center_y', etc.
84
+ original_width: The width of the original, full-size image in pixels.
85
+ original_height: The height of the original, full-size image in pixels.
86
+
87
+ Returns:
88
+ A dictionary of the four corner points with absolute pixel coordinates.
24
89
  """
25
- Correct OCR text by comparing character-by-character with reference text.
26
- When mismatches are found, look for subsequent matches and correct previous mismatches.
90
+ # Normalized coordinates from the input
91
+ cx, cy = bbox_data['center_x'], bbox_data['center_y']
92
+ w, h = bbox_data['width'], bbox_data['height']
93
+ angle_rad = bbox_data.get('rotation_z', 0.0)
94
+
95
+ # Calculate un-rotated corner points (still normalized) relative to the center
96
+ half_w, half_h = w / 2, h / 2
97
+ corners = [
98
+ (-half_w, -half_h), # Top-left
99
+ ( half_w, -half_h), # Top-right
100
+ ( half_w, half_h), # Bottom-right
101
+ (-half_w, half_h), # Bottom-left
102
+ ]
103
+
104
+ # Rotate each corner and translate it to its absolute normalized position
105
+ cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
106
+ pixel_corners = []
107
+ for x_norm, y_norm in corners:
108
+ # 2D rotation
109
+ x_rot_norm = x_norm * cos_a - y_norm * sin_a
110
+ y_rot_norm = x_norm * sin_a + y_norm * cos_a
111
+
112
+ # Translate to absolute normalized position
113
+ abs_x_norm = cx + x_rot_norm
114
+ abs_y_norm = cy + y_rot_norm
115
+
116
+ # Scale up to pixel coordinates
117
+ pixel_corners.append((
118
+ abs_x_norm * crop_width + crop_x,
119
+ abs_y_norm * crop_height + crop_y
120
+ ))
121
+
122
+ # Return as x1, y1, x2, y2, x3, y3, x4, y4
123
+ return {
124
+ "x1": pixel_corners[0][0],
125
+ "y1": pixel_corners[0][1],
126
+ "x2": pixel_corners[1][0],
127
+ "y2": pixel_corners[1][1],
128
+ "x3": pixel_corners[2][0],
129
+ "y3": pixel_corners[2][1],
130
+ "x4": pixel_corners[3][0],
131
+ "y4": pixel_corners[3][1],
132
+ }
133
+
134
+ def extract_text_with_pixel_boxes(
135
+ api_response: Dict[str, Any],
136
+ original_width: int,
137
+ original_height: int,
138
+ crop_x: int,
139
+ crop_y: int,
140
+ crop_width: int,
141
+ crop_height: int
142
+ ) -> List[Dict[str, Any]]:
27
143
  """
28
- if not detected_text or not reference_text:
29
- return detected_text
144
+ Extracts sentences and words and converts their normalized bounding boxes
145
+ to absolute pixel coordinates based on original image dimensions.
146
+
147
+ Args:
148
+ api_response: The dictionary parsed from the source JSON.
149
+ original_width: The width of the original, full-size image.
150
+ original_height: The height of the original, full-size image.
151
+
152
+ Returns:
153
+ A list of sentence objects with text and bounding boxes in pixel coordinates.
154
+ """
155
+ results = []
156
+ regex = get_regex(get_ocr_language())
157
+
158
+ try:
159
+ paragraphs = api_response["objects_response"]["text"]["text_layout"]["paragraphs"]
160
+ except KeyError:
161
+ return [] # Return empty list if the structure is not found
162
+
163
+ for para in paragraphs:
164
+ for line in para.get("lines", []):
165
+ line_text_parts = []
166
+ word_list = []
167
+
168
+
169
+ for word in line.get("words", []):
170
+ if not regex.search(word.get("plain_text", "")):
171
+ continue
172
+ word_text = word.get("plain_text", "")
173
+ line_text_parts.append(word_text)
174
+
175
+ # Convert word's bounding box to pixel coordinates
176
+ word_box = _convert_box_to_pixels_v2(
177
+ word["geometry"]["bounding_box"],
178
+ original_width,
179
+ original_height,
180
+ crop_x=crop_x,
181
+ crop_y=crop_y,
182
+ crop_width=crop_width,
183
+ crop_height=crop_height
184
+ )
185
+
186
+ word_list.append({
187
+ "text": word_text,
188
+ "bounding_rect": word_box
189
+ })
190
+
191
+ if not line_text_parts:
192
+ continue
193
+
194
+ # Assemble the sentence object
195
+ full_sentence_text = "".join(line_text_parts)
196
+ # Convert the full line's bounding box to pixel coordinates
197
+ line_box = _convert_box_to_pixels_v2(
198
+ line["geometry"]["bounding_box"],
199
+ original_width,
200
+ original_height,
201
+ crop_x=crop_x,
202
+ crop_y=crop_y,
203
+ crop_width=crop_width,
204
+ crop_height=crop_height
205
+ )
206
+
207
+ results.append({
208
+ "text": full_sentence_text,
209
+ "bounding_rect": line_box,
210
+ "words": word_list
211
+ })
212
+
213
+ return results
214
+
215
+ # def correct_ocr_text(detected_text: str, reference_text: str) -> str:
216
+ # """
217
+ # Correct OCR text by comparing character-by-character with reference text.
218
+ # When mismatches are found, look for subsequent matches and correct previous mismatches.
219
+ # """
220
+ # if not detected_text or not reference_text:
221
+ # return detected_text
30
222
 
31
- detected_chars = list(detected_text)
32
- reference_chars = list(reference_text)
223
+ # detected_chars = list(detected_text)
224
+ # reference_chars = list(reference_text)
33
225
 
34
- # Track positions where mismatches occurred
35
- mismatched_positions = []
226
+ # # Track positions where mismatches occurred
227
+ # mismatched_positions = []
36
228
 
37
- min_length = min(len(detected_chars), len(reference_chars))
229
+ # min_length = min(len(detected_chars), len(reference_chars))
38
230
 
39
- for i in range(min_length):
40
- if detected_chars[i] != reference_chars[i]:
41
- mismatched_positions.append(i)
42
- logger.info(f"Mismatch at position {i}: detected '{detected_chars[i]}' vs reference '{reference_chars[i]}'")
43
- else:
44
- # We found a match - if we have previous mismatches, correct the most recent one
45
- if mismatched_positions:
46
- # Correct the most recent mismatch (simple 1-for-1 strategy)
47
- last_mismatch_pos = mismatched_positions.pop()
48
- old_char = detected_chars[last_mismatch_pos]
49
- detected_chars[last_mismatch_pos] = reference_chars[last_mismatch_pos]
50
- logger.info(f"Corrected position {last_mismatch_pos}: '{old_char}' -> '{reference_chars[last_mismatch_pos]}'")
231
+ # start_of_reference = 0
232
+ # for char in detected_chars:
233
+ # if char == reference_chars[start_of_reference]:
234
+ # start_of_reference += 1
235
+
236
+ # for i in range(min_length):
237
+ # if detected_chars[i] != reference_chars[i]:
238
+ # mismatched_positions.append(i)
239
+ # logger.info(f"Mismatch at position {i}: detected '{detected_chars[i]}' vs reference '{reference_chars[i]}'")
240
+ # else:
241
+ # # We found a match - if we have previous mismatches, correct the most recent one
242
+ # if mismatched_positions:
243
+ # # Correct the most recent mismatch (simple 1-for-1 strategy)
244
+ # last_mismatch_pos = mismatched_positions.pop()
245
+ # old_char = detected_chars[last_mismatch_pos]
246
+ # detected_chars[last_mismatch_pos] = reference_chars[last_mismatch_pos]
247
+ # logger.info(f"Corrected position {last_mismatch_pos}: '{old_char}' -> '{reference_chars[last_mismatch_pos]}'")
51
248
 
52
- corrected_text = ''.join(detected_chars)
53
- return corrected_text
249
+ # corrected_text = ''.join(detected_chars)
250
+ # return corrected_text
54
251
 
55
- def redistribute_corrected_text(original_boxes: list, original_text: str, corrected_text: str) -> list:
56
- """
57
- Redistribute corrected text back to the original text boxes while maintaining their positions.
58
- """
59
- if original_text == corrected_text:
60
- return original_boxes
252
+ # def redistribute_corrected_text(original_boxes: list, original_text: str, corrected_text: str) -> list:
253
+ # """
254
+ # Redistribute corrected text back to the original text boxes while maintaining their positions.
255
+ # """
256
+ # if original_text == corrected_text:
257
+ # return original_boxes
61
258
 
62
- corrected_boxes = []
63
- text_position = 0
259
+ # corrected_boxes = []
260
+ # text_position = 0
64
261
 
65
- for box in original_boxes:
66
- original_word = box['text']
67
- word_length = len(original_word)
262
+ # for box in original_boxes:
263
+ # original_word = box['text']
264
+ # word_length = len(original_word)
68
265
 
69
- # Extract the corrected portion for this box
70
- if text_position + word_length <= len(corrected_text):
71
- corrected_word = corrected_text[text_position:text_position + word_length]
72
- else:
73
- # Handle case where corrected text is shorter
74
- corrected_word = corrected_text[text_position:] if text_position < len(corrected_text) else ""
266
+ # # Extract the corrected portion for this box
267
+ # if text_position + word_length <= len(corrected_text):
268
+ # corrected_word = corrected_text[text_position:text_position + word_length]
269
+ # else:
270
+ # # Handle case where corrected text is shorter
271
+ # corrected_word = corrected_text[text_position:] if text_position < len(corrected_text) else ""
75
272
 
76
- # Create a new box with corrected text but same coordinates
77
- corrected_box = box.copy()
78
- corrected_box['text'] = corrected_word
79
- corrected_boxes.append(corrected_box)
273
+ # # Create a new box with corrected text but same coordinates
274
+ # corrected_box = box.copy()
275
+ # corrected_box['text'] = corrected_word
276
+ # corrected_boxes.append(corrected_box)
80
277
 
81
- text_position += word_length
278
+ # text_position += word_length
82
279
 
83
- logger.info(f"Redistributed: '{original_word}' -> '{corrected_word}'")
280
+ # logger.info(f"Redistributed: '{original_word}' -> '{corrected_word}'")
84
281
 
85
- return corrected_boxes
282
+ # return corrected_boxes
86
283
 
87
284
  async def get_full_screenshot() -> Image.Image | None:
88
285
  # logger.info(f"Attempting to connect to OBS WebSocket at ws://{OBS_HOST}:{OBS_PORT}")
@@ -117,14 +314,36 @@ async def get_full_screenshot() -> Image.Image | None:
117
314
  else:
118
315
  monitors = [monitors[0]]
119
316
  monitor = monitors[get_config().wip.monitor_to_capture]
120
- sct_img = sct.grab(monitor)
121
- img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
122
- # img.show()
123
- return img
317
+ img = get_screenshot_PIL(compression=100, img_format='jpg')
318
+ # Put the image over a transparent background without stretching
319
+ new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
320
+ # Calculate coordinates to center img horizontally and vertically
321
+ left = 0
322
+ top = 0
323
+ if img.width < monitor['width'] and img.height < monitor['height']:
324
+ # scale image to fit monitor
325
+ img = img.resize((monitor['width'], monitor['height']), Image.Resampling.BILINEAR)
326
+ if img.width < monitor['width']:
327
+ left = (monitor['width'] - img.width) // 2
328
+ if img.height < monitor['height']:
329
+ top = (monitor['height'] - img.height) // 2
330
+
331
+ print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
332
+ new_img.paste(img, (left, top))
333
+
334
+ # new_img.show()
335
+
336
+ return new_img, monitor['width'], monitor['height']
337
+ # sct_img = sct.grab(monitor)
338
+ # img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
339
+
340
+ # # img.show()
341
+ # return img
124
342
  # update_current_game()
125
343
 
126
344
  # image_data = get_screenshot_base64(compression=75, width=1280, height=720)
127
345
  # image_data = base64.b64decode(image_data)
346
+ img = get_screenshot_PIL(img_format='jpg')
128
347
  # img = Image.open(io.BytesIO(image_data)).convert("RGBA").resize((WIDTH, HEIGHT), Image.Resampling.LANCZOS)
129
348
  # img.show()
130
349
  logger.info(f"Screenshot captured in {time.time() - start_time:.2f} seconds.")
@@ -140,56 +359,77 @@ async def do_work(sentence_to_check=None):
140
359
  logger.info("in find_box")
141
360
  # await asyncio.sleep(.5)
142
361
  logger.info("after_initial_sleep")
143
- full_screenshot_image = await get_full_screenshot()
362
+ full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
363
+
364
+ oneocr_results = oneocr(full_screenshot_image)
365
+ crop_coords = oneocr_results[2]
366
+ logger.info("Cropping full screenshot with coordinates: %s", crop_coords)
367
+ cropped_image = full_screenshot_image.crop(crop_coords)
368
+ # Convert 1/4
144
369
  if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
145
- full_screenshot_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
370
+ cropped_image.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
146
371
  # full_screenshot_image.show()
147
- if full_screenshot_image:
372
+ if cropped_image:
148
373
  logger.info("Full screenshot captured successfully. Now performing local OCR...")
149
- ocr_results = oneocr(full_screenshot_image, return_coords=True)
374
+ # ocr_results = oneocr(full_screenshot_image, return_coords=True)
375
+ google_ocr_results = lens(cropped_image, return_coords=True)[2]
150
376
 
151
- boxes_of_text = ocr_results[2]
377
+ ret = extract_text_with_pixel_boxes(
378
+ api_response=google_ocr_results,
379
+ original_width=monitor_width,
380
+ original_height=monitor_height,
381
+ crop_x=crop_coords[0],
382
+ crop_y=crop_coords[1],
383
+ crop_width=crop_coords[2] - crop_coords[0],
384
+ crop_height=crop_coords[3] - crop_coords[1]
385
+ )
386
+
387
+ # boxes_of_text = google_ocr_results[2]
152
388
  # logger.info(f"Boxes of text found: {boxes_of_text}")
153
389
 
154
390
  words = []
155
391
 
392
+ # logger.info(json.dumps(ret, indent=4, ensure_ascii=False))
393
+
394
+ return ret, 48
395
+
156
396
  # If we have a reference sentence, perform character-by-character correction
157
- if sentence_to_check:
158
- # Concatenate all OCR text to form the detected sentence
159
- detected_sentence = ''.join([box['text'] for box in boxes_of_text])
160
- logger.info(f"Original detected sentence: '{detected_sentence}'")
161
- logger.info(f"Reference sentence: '{sentence_to_check}'")
397
+ # if sentence_to_check:
398
+ # # Concatenate all OCR text to form the detected sentence
399
+ # detected_sentence = ''.join([box['text'] for box in boxes_of_text])
400
+ # logger.info(f"Original detected sentence: '{detected_sentence}'")
401
+ # logger.info(f"Reference sentence: '{sentence_to_check}'")
162
402
 
163
- # Perform character-by-character comparison and correction
164
- corrected_sentence = correct_ocr_text(detected_sentence, sentence_to_check)
165
- logger.info(f"Corrected sentence: '{corrected_sentence}'")
403
+ # # Perform character-by-character comparison and correction
404
+ # corrected_sentence = correct_ocr_text(detected_sentence, sentence_to_check)
405
+ # logger.info(f"Corrected sentence: '{corrected_sentence}'")
166
406
 
167
- # Redistribute corrected text back to boxes while maintaining positions
168
- corrected_boxes = redistribute_corrected_text(boxes_of_text, detected_sentence, corrected_sentence)
169
- else:
170
- corrected_boxes = boxes_of_text
407
+ # # Redistribute corrected text back to boxes while maintaining positions
408
+ # corrected_boxes = redistribute_corrected_text(boxes_of_text, detected_sentence, corrected_sentence)
409
+ # else:
410
+ # corrected_boxes = boxes_of_text
171
411
 
172
- sentence_position = 0
173
- for box in corrected_boxes:
174
- word = box['text']
175
- # logger.info(f"Box: {box}")
176
- x1, y1 = box['bounding_rect']['x1'], box['bounding_rect']['y1']
177
- x2, y2 = box['bounding_rect']['x3'], box['bounding_rect']['y3']
178
- words.append({
179
- "x1": x1,
180
- "y1": y1,
181
- "x2": x2,
182
- "y2": y2,
183
- "word": box['text']
184
- })
412
+ # sentence_position = 0
413
+ # for box in corrected_boxes:
414
+ # word = box['text']
415
+ # # logger.info(f"Box: {box}")
416
+ # x1, y1 = box['bounding_rect']['x1'], box['bounding_rect']['y1']
417
+ # x2, y2 = box['bounding_rect']['x3'], box['bounding_rect']['y3']
418
+ # words.append({
419
+ # "x1": x1,
420
+ # "y1": y1,
421
+ # "x2": x2,
422
+ # "y2": y2,
423
+ # "word": box['text']
424
+ # })
185
425
 
186
- # logger.info(f"Returning words: {words}")
426
+ # # logger.info(f"Returning words: {words}")
187
427
 
188
- ret = [
189
- {
190
- "words": words,
191
- }
192
- ]
428
+ # ret = [
429
+ # {
430
+ # "words": words,
431
+ # }
432
+ # ]
193
433
  # cropped_sections = []
194
434
  # for box in boxes_of_text:
195
435
  # # Ensure crop coordinates are within image bounds
@@ -247,8 +487,30 @@ async def find_box_for_sentence(sentence_to_check):
247
487
  return [], 48
248
488
 
249
489
  async def main():
490
+ import mss as mss
250
491
  connect_to_obs_sync(5)
251
- await find_box_for_sentence("はじめから")
492
+ start_time = time.time()
493
+ with mss.mss() as sct:
494
+ monitors = sct.monitors
495
+ if len(monitors) > 1:
496
+ monitors = monitors[1:]
497
+ else:
498
+ monitors = [monitors[0]]
499
+ monitor = monitors[get_config().wip.monitor_to_capture]
500
+ img = get_screenshot_PIL(img_format='jpg')
501
+ img.show()
502
+ # Put the image over a transparent background without stretching
503
+ # Create a transparent image with the same size as the monitor
504
+ new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
505
+ # Calculate coordinates to center img horizontally and vertically
506
+ left = (monitor['width'] - img.width) // 2
507
+ top = (monitor['height'] - img.height) // 2
508
+ print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
509
+ print(f"Left: {left}, Top: {top}, Width: {monitor['width']}, Height: {monitor['height']}")
510
+ new_img.paste(img, (left, top))
511
+ new_img.show()
512
+
513
+ return new_img
252
514
 
253
515
  if __name__ == '__main__':
254
516
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.12.3
3
+ Version: 2.12.4
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -150,6 +150,8 @@ If you encounter issues, please ask for help in my [Discord](https://discord.gg/
150
150
 
151
151
  * [Renji's Texthooker](https://github.com/Renji-XD/texthooker-ui)
152
152
 
153
+ * https://github.com/Saplling/transparent-texthooker-overlay
154
+
153
155
  ## Donations
154
156
 
155
157
  If you've found this or any of my other projects helpful, please consider supporting my work through [GitHub Sponsors](https://github.com/sponsors/bpwhelan), [Ko-fi](https://ko-fi.com/beangate), or [Patreon](https://www.patreon.com/GameSentenceMiner).
@@ -3,7 +3,7 @@ GameSentenceMiner/anki.py,sha256=FUwcWO0-arzfQjejQmDKP7pNNakhboo8InQ4s_jv6AY,190
3
3
  GameSentenceMiner/config_gui.py,sha256=GBcPWWoki8dMigWqORcG9memBwKp-BNFbhXhjfFLV0c,104414
4
4
  GameSentenceMiner/gametext.py,sha256=fIm28ZvRzKvnVHj86TmSYR2QQifo_Lk6cx4UptIltLs,7844
5
5
  GameSentenceMiner/gsm.py,sha256=GGF0owRrrYJgdfXx-INwfuKbaoY-G5gLllE-sNrwYnI,25341
6
- GameSentenceMiner/obs.py,sha256=lRJFFOB9oHsE_uCRmxl4xwSpkqtjWVzebyqHXmynS1E,17755
6
+ GameSentenceMiner/obs.py,sha256=bMVWAPQ6QLf4celLiOsL9BUO8pTdMn9lpT9fQCNfm7Q,18718
7
7
  GameSentenceMiner/vad.py,sha256=zo9JpuEOCXczPXM-dq8lbr-zM-MPpfJ8aajggR3mKk4,18710
8
8
  GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  GameSentenceMiner/ai/ai_prompting.py,sha256=iHkEx2pQJ-tEyejOgYy4G0DcZc8qvBugVL6-CQpPSME,26089
@@ -25,8 +25,8 @@ GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9
25
25
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
26
26
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
27
27
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
28
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=6ArGr0xd-Fhkw9uPn4MH3urxbLBwZ-UmxfwoKUUgxio,63459
29
- GameSentenceMiner/owocr/owocr/run.py,sha256=nkDpXICJCTKgJTS4MYRnaz-GYqAS-GskcSg1ZkGIRuE,67285
28
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=Zii5r15ZlHFJWSbmXpva6QJVGkU3j2wT5Q0izazLyCQ,63021
29
+ GameSentenceMiner/owocr/owocr/run.py,sha256=GJAAqifaERxDnxcqPBTsEnxn-rJsUBgDC1s2F26N6KM,65724
30
30
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
31
31
  GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
@@ -46,7 +46,7 @@ GameSentenceMiner/util/communication/websocket.py,sha256=TbphRGmxVrgEupS7tNdifsm
46
46
  GameSentenceMiner/util/downloader/Untitled_json.py,sha256=RUUl2bbbCpUDUUS0fP0tdvf5FngZ7ILdA_J5TFYAXUQ,15272
47
47
  GameSentenceMiner/util/downloader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  GameSentenceMiner/util/downloader/download_tools.py,sha256=zR-aEHiFVkyo-9oPoSx6nQ2K-_J8WBHLZyLoOhypsW4,8458
49
- GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=EJbKISaZ9p2x9P4x0rpMM5nAInTTc9b7arraGBcd-SA,10381
49
+ GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=l3s9Z-x1b57GX048o5h-MVv0UTZo4H-Q-zb-JREkMLI,10439
50
50
  GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  GameSentenceMiner/web/service.py,sha256=S7bYf2kSk08u-8R9Qpv7piM-pxfFjYZUvU825xupmuI,5279
52
52
  GameSentenceMiner/web/texthooking_page.py,sha256=2ZS89CAI17xVkx64rGmHHbF96eKR8gPWiR_WAoDJ0Mw,17399
@@ -63,10 +63,10 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
63
63
  GameSentenceMiner/web/templates/index.html,sha256=Gv3CJvNnhAzIVV_QxhNq4OD-pXDt1vKCu9k6WdHSXuA,215343
64
64
  GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
65
65
  GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
66
- GameSentenceMiner/wip/get_overlay_coords.py,sha256=hE-XxbhzvHDZoU9hLLyIFtfpHDO_QXHU0DbR-aJGPuI,10153
67
- gamesentenceminer-2.12.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
68
- gamesentenceminer-2.12.3.dist-info/METADATA,sha256=vy4RJLP3o-9ojyVqkSw6KD8XMUNIPclIoZp4c4mR1b0,6999
69
- gamesentenceminer-2.12.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- gamesentenceminer-2.12.3.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
71
- gamesentenceminer-2.12.3.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
72
- gamesentenceminer-2.12.3.dist-info/RECORD,,
66
+ GameSentenceMiner/wip/get_overlay_coords.py,sha256=yivn8C26BBRK4cjE7yPv1XfvbyqWC0itLL9Vay8aY-c,19780
67
+ gamesentenceminer-2.12.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
68
+ gamesentenceminer-2.12.4.dist-info/METADATA,sha256=xidAx_PVQT2GCaZoAkfYeMAJHqAppcpiPhmZ5Lhz1X4,7061
69
+ gamesentenceminer-2.12.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
+ gamesentenceminer-2.12.4.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
71
+ gamesentenceminer-2.12.4.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
72
+ gamesentenceminer-2.12.4.dist-info/RECORD,,