GameSentenceMiner 2.13.15__py3-none-any.whl → 2.14.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. GameSentenceMiner/ai/ai_prompting.py +77 -132
  2. GameSentenceMiner/anki.py +48 -6
  3. GameSentenceMiner/config_gui.py +196 -30
  4. GameSentenceMiner/gametext.py +8 -19
  5. GameSentenceMiner/gsm.py +5 -4
  6. GameSentenceMiner/locales/en_us.json +21 -11
  7. GameSentenceMiner/locales/ja_jp.json +21 -11
  8. GameSentenceMiner/locales/zh_cn.json +9 -11
  9. GameSentenceMiner/owocr/owocr/ocr.py +20 -23
  10. GameSentenceMiner/tools/__init__.py +0 -0
  11. GameSentenceMiner/util/configuration.py +241 -105
  12. GameSentenceMiner/util/db.py +408 -0
  13. GameSentenceMiner/util/ffmpeg.py +2 -10
  14. GameSentenceMiner/util/get_overlay_coords.py +324 -0
  15. GameSentenceMiner/util/model.py +8 -2
  16. GameSentenceMiner/util/text_log.py +1 -1
  17. GameSentenceMiner/web/texthooking_page.py +1 -1
  18. GameSentenceMiner/wip/__init___.py +0 -0
  19. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/METADATA +5 -1
  20. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/RECORD +27 -25
  21. GameSentenceMiner/util/package.py +0 -37
  22. GameSentenceMiner/wip/get_overlay_coords.py +0 -535
  23. /GameSentenceMiner/{util → tools}/audio_offset_selector.py +0 -0
  24. /GameSentenceMiner/{util → tools}/ss_selector.py +0 -0
  25. /GameSentenceMiner/{util → tools}/window_transparency.py +0 -0
  26. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/WHEEL +0 -0
  27. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/entry_points.txt +0 -0
  28. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/licenses/LICENSE +0 -0
  29. {gamesentenceminer-2.13.15.dist-info → gamesentenceminer-2.14.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,535 +0,0 @@
1
- import asyncio
2
- import io
3
- import base64
4
- import math
5
- from PIL import Image
6
- from GameSentenceMiner.util.configuration import get_config
7
- from typing import Dict, Any, List, Tuple
8
-
9
- from GameSentenceMiner.util.electron_config import get_ocr_language
10
-
11
- if get_config().wip.overlay_websocket_send:
12
- try:
13
- from GameSentenceMiner.owocr.owocr.ocr import GoogleLens, OneOCR, get_regex
14
- oneocr = OneOCR()
15
- lens = GoogleLens()
16
- except ImportError:
17
- pass
18
- from GameSentenceMiner.obs import *
19
-
20
-
21
- def _convert_box_to_pixels_v2(
22
- bbox_data: Dict[str, float],
23
- original_width: int,
24
- original_height: int,
25
- crop_x: int,
26
- crop_y: int,
27
- crop_width: int,
28
- crop_height: int
29
- ) -> Dict[str, float]:
30
- """
31
- Simplified conversion: scales normalized bbox to pixel coordinates, ignores rotation.
32
-
33
- Args:
34
- bbox_data: A dictionary with normalized 'center_x', 'center_y', 'width', 'height'.
35
- original_width: The width of the original, full-size image in pixels.
36
- original_height: The height of the original, full-size image in pixels.
37
-
38
- Returns:
39
- A dictionary of the four corner points with absolute pixel coordinates.
40
- """
41
- cx, cy = bbox_data['center_x'], bbox_data['center_y']
42
- w, h = bbox_data['width'], bbox_data['height']
43
-
44
- # Scale normalized coordinates to pixel coordinates
45
- box_width_px = w * crop_width
46
- box_height_px = h * crop_height
47
- center_x_px = cx * crop_width + crop_x
48
- center_y_px = cy * crop_height + crop_y
49
-
50
- # Calculate corners (no rotation)
51
- x1 = center_x_px - box_width_px / 2
52
- y1 = center_y_px - box_height_px / 2
53
- x2 = center_x_px + box_width_px / 2
54
- y2 = center_y_px - box_height_px / 2
55
- x3 = center_x_px + box_width_px / 2
56
- y3 = center_y_px + box_height_px / 2
57
- x4 = center_x_px - box_width_px / 2
58
- y4 = center_y_px + box_height_px / 2
59
-
60
- return {
61
- "x1": x1,
62
- "y1": y1,
63
- "x2": x2,
64
- "y2": y2,
65
- "x3": x3,
66
- "y3": y3,
67
- "x4": x4,
68
- "y4": y4,
69
- }
70
-
71
- def _convert_box_to_pixels(
72
- bbox_data: Dict[str, float],
73
- original_width: int,
74
- original_height: int,
75
- crop_x: int,
76
- crop_y: int,
77
- crop_width: int,
78
- crop_height: int
79
- ) -> Dict[str, Dict[str, float]]:
80
- """
81
- Converts a normalized bounding box to an absolute pixel-based quad.
82
-
83
- Args:
84
- bbox_data: A dictionary with normalized 'center_x', 'center_y', etc.
85
- original_width: The width of the original, full-size image in pixels.
86
- original_height: The height of the original, full-size image in pixels.
87
-
88
- Returns:
89
- A dictionary of the four corner points with absolute pixel coordinates.
90
- """
91
- # Normalized coordinates from the input
92
- cx, cy = bbox_data['center_x'], bbox_data['center_y']
93
- w, h = bbox_data['width'], bbox_data['height']
94
- angle_rad = bbox_data.get('rotation_z', 0.0)
95
-
96
- # Calculate un-rotated corner points (still normalized) relative to the center
97
- half_w, half_h = w / 2, h / 2
98
- corners = [
99
- (-half_w, -half_h), # Top-left
100
- ( half_w, -half_h), # Top-right
101
- ( half_w, half_h), # Bottom-right
102
- (-half_w, half_h), # Bottom-left
103
- ]
104
-
105
- # Rotate each corner and translate it to its absolute normalized position
106
- cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
107
- pixel_corners = []
108
- for x_norm, y_norm in corners:
109
- # 2D rotation
110
- x_rot_norm = x_norm * cos_a - y_norm * sin_a
111
- y_rot_norm = x_norm * sin_a + y_norm * cos_a
112
-
113
- # Translate to absolute normalized position
114
- abs_x_norm = cx + x_rot_norm
115
- abs_y_norm = cy + y_rot_norm
116
-
117
- # Scale up to pixel coordinates
118
- pixel_corners.append((
119
- abs_x_norm * crop_width + crop_x,
120
- abs_y_norm * crop_height + crop_y
121
- ))
122
-
123
- # Return as x1, y1, x2, y2, x3, y3, x4, y4
124
- return {
125
- "x1": pixel_corners[0][0],
126
- "y1": pixel_corners[0][1],
127
- "x2": pixel_corners[1][0],
128
- "y2": pixel_corners[1][1],
129
- "x3": pixel_corners[2][0],
130
- "y3": pixel_corners[2][1],
131
- "x4": pixel_corners[3][0],
132
- "y4": pixel_corners[3][1],
133
- }
134
-
135
- def extract_text_with_pixel_boxes(
136
- api_response: Dict[str, Any],
137
- original_width: int,
138
- original_height: int,
139
- crop_x: int,
140
- crop_y: int,
141
- crop_width: int,
142
- crop_height: int
143
- ) -> List[Dict[str, Any]]:
144
- """
145
- Extracts sentences and words and converts their normalized bounding boxes
146
- to absolute pixel coordinates based on original image dimensions.
147
-
148
- Args:
149
- api_response: The dictionary parsed from the source JSON.
150
- original_width: The width of the original, full-size image.
151
- original_height: The height of the original, full-size image.
152
-
153
- Returns:
154
- A list of sentence objects with text and bounding boxes in pixel coordinates.
155
- """
156
- results = []
157
- regex = get_regex(get_ocr_language())
158
-
159
- try:
160
- paragraphs = api_response["objects_response"]["text"]["text_layout"]["paragraphs"]
161
- except KeyError:
162
- return [] # Return empty list if the structure is not found
163
-
164
- for para in paragraphs:
165
- for line in para.get("lines", []):
166
- line_text_parts = []
167
- word_list = []
168
-
169
-
170
- for word in line.get("words", []):
171
- # if not regex.search(word.get("plain_text", "")):
172
- # continue
173
- word_text = word.get("plain_text", "")
174
- line_text_parts.append(word_text)
175
-
176
- # Convert word's bounding box to pixel coordinates
177
- word_box = _convert_box_to_pixels_v2(
178
- word["geometry"]["bounding_box"],
179
- original_width,
180
- original_height,
181
- crop_x=crop_x,
182
- crop_y=crop_y,
183
- crop_width=crop_width,
184
- crop_height=crop_height
185
- )
186
-
187
- word_list.append({
188
- "text": word_text,
189
- "bounding_rect": word_box
190
- })
191
-
192
- if not line_text_parts:
193
- continue
194
-
195
- # Assemble the sentence object
196
- full_sentence_text = "".join(line_text_parts)
197
- # Convert the full line's bounding box to pixel coordinates
198
- line_box = _convert_box_to_pixels_v2(
199
- line["geometry"]["bounding_box"],
200
- original_width,
201
- original_height,
202
- crop_x=crop_x,
203
- crop_y=crop_y,
204
- crop_width=crop_width,
205
- crop_height=crop_height
206
- )
207
-
208
- results.append({
209
- "text": full_sentence_text,
210
- "bounding_rect": line_box,
211
- "words": word_list
212
- })
213
-
214
- return results
215
-
216
- # def correct_ocr_text(detected_text: str, reference_text: str) -> str:
217
- # """
218
- # Correct OCR text by comparing character-by-character with reference text.
219
- # When mismatches are found, look for subsequent matches and correct previous mismatches.
220
- # """
221
- # if not detected_text or not reference_text:
222
- # return detected_text
223
-
224
- # detected_chars = list(detected_text)
225
- # reference_chars = list(reference_text)
226
-
227
- # # Track positions where mismatches occurred
228
- # mismatched_positions = []
229
-
230
- # min_length = min(len(detected_chars), len(reference_chars))
231
-
232
- # start_of_reference = 0
233
- # for char in detected_chars:
234
- # if char == reference_chars[start_of_reference]:
235
- # start_of_reference += 1
236
-
237
- # for i in range(min_length):
238
- # if detected_chars[i] != reference_chars[i]:
239
- # mismatched_positions.append(i)
240
- # logger.info(f"Mismatch at position {i}: detected '{detected_chars[i]}' vs reference '{reference_chars[i]}'")
241
- # else:
242
- # # We found a match - if we have previous mismatches, correct the most recent one
243
- # if mismatched_positions:
244
- # # Correct the most recent mismatch (simple 1-for-1 strategy)
245
- # last_mismatch_pos = mismatched_positions.pop()
246
- # old_char = detected_chars[last_mismatch_pos]
247
- # detected_chars[last_mismatch_pos] = reference_chars[last_mismatch_pos]
248
- # logger.info(f"Corrected position {last_mismatch_pos}: '{old_char}' -> '{reference_chars[last_mismatch_pos]}'")
249
-
250
- # corrected_text = ''.join(detected_chars)
251
- # return corrected_text
252
-
253
- # def redistribute_corrected_text(original_boxes: list, original_text: str, corrected_text: str) -> list:
254
- # """
255
- # Redistribute corrected text back to the original text boxes while maintaining their positions.
256
- # """
257
- # if original_text == corrected_text:
258
- # return original_boxes
259
-
260
- # corrected_boxes = []
261
- # text_position = 0
262
-
263
- # for box in original_boxes:
264
- # original_word = box['text']
265
- # word_length = len(original_word)
266
-
267
- # # Extract the corrected portion for this box
268
- # if text_position + word_length <= len(corrected_text):
269
- # corrected_word = corrected_text[text_position:text_position + word_length]
270
- # else:
271
- # # Handle case where corrected text is shorter
272
- # corrected_word = corrected_text[text_position:] if text_position < len(corrected_text) else ""
273
-
274
- # # Create a new box with corrected text but same coordinates
275
- # corrected_box = box.copy()
276
- # corrected_box['text'] = corrected_word
277
- # corrected_boxes.append(corrected_box)
278
-
279
- # text_position += word_length
280
-
281
- # logger.info(f"Redistributed: '{original_word}' -> '{corrected_word}'")
282
-
283
- # return corrected_boxes
284
-
285
- async def get_full_screenshot() -> Image.Image | None:
286
- # logger.info(f"Attempting to connect to OBS WebSocket at ws://{OBS_HOST}:{OBS_PORT}")
287
- # try:
288
- # client = obs.ReqClient(host=OBS_HOST, port=OBS_PORT, password=OBS_PASSWORD, timeout=30)
289
- # logger.info("Connected to OBS WebSocket.")
290
- # except Exception as e:
291
- # logger.info(f"Failed to connect to OBS: {e}")
292
- # return None
293
- #
294
- # try:
295
- # response = client.get_source_screenshot(
296
- # name=WINDOW_NAME,
297
- # img_format='png',
298
- # quality=75,
299
- # width=WIDTH,
300
- # height=HEIGHT,
301
- # )
302
- #
303
- # if not response.image_data:
304
- # logger.info("Failed to get screenshot data from OBS.")
305
- # return None
306
-
307
- logger.info("Getting Screenshot from OBS")
308
- try:
309
- import mss as mss
310
- start_time = time.time()
311
- with mss.mss() as sct:
312
- monitors = sct.monitors
313
- if len(monitors) > 1:
314
- monitors = monitors[1:]
315
- else:
316
- monitors = [monitors[0]]
317
- monitor = monitors[get_config().wip.monitor_to_capture]
318
- img = get_screenshot_PIL(compression=90, img_format='jpg', width=monitor['width'] // 2, height=monitor['height'] // 2)
319
- # Put the image over a transparent background without stretching
320
- new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
321
- # Calculate coordinates to center img horizontally and vertically
322
- left = 0
323
- top = 0
324
- if img.width < monitor['width'] and img.height < monitor['height']:
325
- # scale image to fit monitor
326
- img = img.resize((monitor['width'], monitor['height']), Image.Resampling.BILINEAR)
327
- if img.width < monitor['width']:
328
- left = (monitor['width'] - img.width) // 2
329
- if img.height < monitor['height']:
330
- top = (monitor['height'] - img.height) // 2
331
-
332
- print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
333
- new_img.paste(img, (left, top))
334
-
335
- # new_img.show()
336
-
337
- return new_img, monitor['width'], monitor['height']
338
- # sct_img = sct.grab(monitor)
339
- # img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
340
-
341
- # # img.show()
342
- # return img
343
- # update_current_game()
344
-
345
- # image_data = get_screenshot_base64(compression=75, width=1280, height=720)
346
- # image_data = base64.b64decode(image_data)
347
- img = get_screenshot_PIL(img_format='jpg')
348
- # img = Image.open(io.BytesIO(image_data)).convert("RGBA").resize((WIDTH, HEIGHT), Image.Resampling.LANCZOS)
349
- # img.show()
350
- logger.info(f"Screenshot captured in {time.time() - start_time:.2f} seconds.")
351
-
352
- return img
353
-
354
- except Exception as e:
355
- logger.info(f"An unexpected error occurred during screenshot capture: {e}")
356
- return None
357
-
358
- async def do_work(sentence_to_check=None):
359
- from math import floor
360
- # connect_to_obs_sync(5)
361
- logger.info("in find_box")
362
- # await asyncio.sleep(.5)
363
- logger.info("after_initial_sleep")
364
- start_time = time.time()
365
- full_screenshot_image, monitor_width, monitor_height = await get_full_screenshot()
366
-
367
- oneocr_results = oneocr(full_screenshot_image, multiple_crop_coords=True)
368
- crop_coords_list = oneocr_results[2]
369
- logger.info("Cropping full screenshot with coordinates: %s", crop_coords_list)
370
- cropped_images = []
371
- img = Image.new("RGBA", (monitor_width, monitor_height), (0, 0, 0, 0))
372
- for crop_coords in crop_coords_list:
373
- cropped_image = full_screenshot_image.crop(crop_coords)
374
- cropped_images.append(cropped_image)
375
- # Paste the cropped image onto the transparent background
376
- img.paste(cropped_image, (floor(crop_coords[0]), floor(crop_coords[1])))
377
-
378
- # img.show()
379
-
380
- # Convert 1/4
381
- if os.path.exists("C:\\Users\\Beangate\\GSM\\temp"):
382
- img.save("C:\\Users\\Beangate\\GSM\\temp\\full_screenshot.png")
383
- logger.info(f"Time taken to get cropped image for lens: {time.time() - start_time:.2f} seconds")
384
-
385
- # full_screenshot_image.show()
386
- if img:
387
- start_time = time.time()
388
- logger.info("Full screenshot captured successfully. Now performing OCR...")
389
- # ocr_results = oneocr(full_screenshot_image, return_coords=True)
390
- google_ocr_results = lens(img, return_coords=True)[2]
391
-
392
- ret = extract_text_with_pixel_boxes(
393
- api_response=google_ocr_results,
394
- original_width=monitor_width,
395
- original_height=monitor_height,
396
- crop_x=0,
397
- crop_y=0,
398
- crop_width=img.width,
399
- crop_height=img.height
400
- )
401
- logger.info(f"Time taken for Lens OCR: {time.time() - start_time:.2f} seconds")
402
-
403
- # boxes_of_text = google_ocr_results[2]
404
- # logger.info(f"Boxes of text found: {boxes_of_text}")
405
-
406
- words = []
407
-
408
- # logger.info(json.dumps(ret, indent=4, ensure_ascii=False))
409
-
410
- return ret, 48
411
-
412
- # If we have a reference sentence, perform character-by-character correction
413
- # if sentence_to_check:
414
- # # Concatenate all OCR text to form the detected sentence
415
- # detected_sentence = ''.join([box['text'] for box in boxes_of_text])
416
- # logger.info(f"Original detected sentence: '{detected_sentence}'")
417
- # logger.info(f"Reference sentence: '{sentence_to_check}'")
418
-
419
- # # Perform character-by-character comparison and correction
420
- # corrected_sentence = correct_ocr_text(detected_sentence, sentence_to_check)
421
- # logger.info(f"Corrected sentence: '{corrected_sentence}'")
422
-
423
- # # Redistribute corrected text back to boxes while maintaining positions
424
- # corrected_boxes = redistribute_corrected_text(boxes_of_text, detected_sentence, corrected_sentence)
425
- # else:
426
- # corrected_boxes = boxes_of_text
427
-
428
- # sentence_position = 0
429
- # for box in corrected_boxes:
430
- # word = box['text']
431
- # # logger.info(f"Box: {box}")
432
- # x1, y1 = box['bounding_rect']['x1'], box['bounding_rect']['y1']
433
- # x2, y2 = box['bounding_rect']['x3'], box['bounding_rect']['y3']
434
- # words.append({
435
- # "x1": x1,
436
- # "y1": y1,
437
- # "x2": x2,
438
- # "y2": y2,
439
- # "word": box['text']
440
- # })
441
-
442
- # # logger.info(f"Returning words: {words}")
443
-
444
- # ret = [
445
- # {
446
- # "words": words,
447
- # }
448
- # ]
449
- # cropped_sections = []
450
- # for box in boxes_of_text:
451
- # # Ensure crop coordinates are within image bounds
452
- # left = max(0, box['bounding_rect']['x1'])
453
- # top = max(0, box['bounding_rect']['y1'])
454
- # right = min(full_screenshot_image.width, box['bounding_rect']['x3'])
455
- # bottom = min(full_screenshot_image.height, box['bounding_rect']['y3'])
456
- # cropped_sections.append(full_screenshot_image.crop((left, top, right, bottom)))
457
-
458
- # if len(cropped_sections) > 1:
459
- # # Create a transparent image with the same size as the full screenshot
460
- # combined_img = Image.new("RGBA", (full_screenshot_image.width, full_screenshot_image.height), (0, 0, 0, 0))
461
-
462
- # combined_img.show()
463
-
464
- # # Paste each cropped section at its original coordinates
465
- # for box, section in zip(boxes_of_text, cropped_sections):
466
- # left = max(0, box['bounding_rect']['x1'])
467
- # top = max(0, box['bounding_rect']['y1'])
468
- # combined_img.paste(section, (left, top))
469
-
470
- # new_image = combined_img
471
- # elif cropped_sections:
472
- # new_image = cropped_sections[0]
473
- # else:
474
- # new_image = Image.new("RGBA", full_screenshot_image.size)
475
-
476
- # new_image.show()
477
- # ocr_results = lens(new_image, return_coords=True)
478
- # ocr_results = oneocr(full_screenshot_image, sentence_to_check=sentence_to_check)
479
- # logger.info("\n--- OCR Results ---")
480
- # logger.info(ocr_results)
481
-
482
- return ret, 48
483
- # from PIL import ImageDraw
484
- # draw = ImageDraw.Draw(full_screenshot_image)
485
- # draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
486
- # full_screenshot_image.save("full_screenshot_with_ocr.png")
487
- # full_screenshot_image.show()
488
- #
489
- # logger.info(ocr_results)
490
- # if ocr_results:
491
- # for i, result in enumerate(ocr_results):
492
- # logger.info(f"Result {i + 1}:\n{result}\n")
493
- # else:
494
- # logger.info("No OCR results found.")
495
- else:
496
- logger.info("Failed to get full screenshot for OCR.")
497
-
498
- async def find_box_for_sentence(sentence_to_check):
499
- try:
500
- return await do_work(sentence_to_check=sentence_to_check)
501
- except Exception as e:
502
- logger.info(f"Error in find_box_for_sentence: {e}", exc_info=True)
503
- return [], 48
504
-
505
- async def main():
506
- import mss as mss
507
- connect_to_obs_sync(5)
508
- start_time = time.time()
509
- with mss.mss() as sct:
510
- monitors = sct.monitors
511
- if len(monitors) > 1:
512
- monitors = monitors[1:]
513
- else:
514
- monitors = [monitors[0]]
515
- monitor = monitors[get_config().wip.monitor_to_capture]
516
- img = get_screenshot_PIL(img_format='jpg')
517
- img.show()
518
- # Put the image over a transparent background without stretching
519
- # Create a transparent image with the same size as the monitor
520
- new_img = Image.new("RGBA", (monitor['width'], monitor['height']), (0, 0, 0, 0))
521
- # Calculate coordinates to center img horizontally and vertically
522
- left = (monitor['width'] - img.width) // 2
523
- top = (monitor['height'] - img.height) // 2
524
- print(f"Image size: {img.size}, Monitor size: {monitor['width']}x{monitor['height']}")
525
- print(f"Left: {left}, Top: {top}, Width: {monitor['width']}, Height: {monitor['height']}")
526
- new_img.paste(img, (left, top))
527
- new_img.show()
528
-
529
- return new_img
530
-
531
- if __name__ == '__main__':
532
- try:
533
- asyncio.run(main())
534
- except KeyboardInterrupt:
535
- logger.info("Script terminated by user.")
File without changes