GameSentenceMiner 2.15.3__py3-none-any.whl → 2.15.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/config_gui.py +53 -26
- GameSentenceMiner/gsm.py +2 -2
- GameSentenceMiner/locales/en_us.json +4 -0
- GameSentenceMiner/locales/ja_jp.json +4 -0
- GameSentenceMiner/locales/zh_cn.json +4 -0
- GameSentenceMiner/obs.py +234 -226
- GameSentenceMiner/ocr/owocr_helper.py +80 -15
- GameSentenceMiner/owocr/owocr/ocr.py +15 -3
- GameSentenceMiner/owocr/owocr/run.py +82 -60
- GameSentenceMiner/util/configuration.py +1 -0
- GameSentenceMiner/util/get_overlay_coords.py +0 -1
- GameSentenceMiner/vad.py +7 -3
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/METADATA +3 -2
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/RECORD +18 -18
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.15.3.dist-info → gamesentenceminer-2.15.5.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
from copy import copy
|
2
3
|
import io
|
3
4
|
import json
|
4
5
|
import logging
|
@@ -376,18 +377,19 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
|
|
376
377
|
return
|
377
378
|
previous_orig_text = orig_text_string
|
378
379
|
previous_ocr1_result = previous_text
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
380
|
+
ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2())
|
381
|
+
# if crop_coords and get_ocr_optimize_second_scan():
|
382
|
+
# x1, y1, x2, y2 = crop_coords
|
383
|
+
# x1 = max(0, min(x1, img.width))
|
384
|
+
# y1 = max(0, min(y1, img.height))
|
385
|
+
# x2 = max(x1, min(x2, img.width))
|
386
|
+
# y2 = max(y1, min(y2, img.height))
|
387
|
+
# previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
|
388
|
+
# try:
|
389
|
+
# previous_img_local = previous_img_local.crop((x1, y1, x2, y2))
|
390
|
+
# except ValueError:
|
391
|
+
# logger.warning("Error cropping image, using original image")
|
392
|
+
second_ocr_queue.put((previous_text, stable_time, ocr2_image, filtering, pre_crop_image))
|
391
393
|
# threading.Thread(target=do_second_ocr, args=(previous_text, stable_time, previous_img_local, filtering), daemon=True).start()
|
392
394
|
previous_img = None
|
393
395
|
previous_text = None
|
@@ -412,6 +414,69 @@ done = False
|
|
412
414
|
# Create a queue for tasks
|
413
415
|
second_ocr_queue = queue.Queue()
|
414
416
|
|
417
|
+
def get_ocr2_image(crop_coords, og_image, ocr2_engine=None):
|
418
|
+
"""
|
419
|
+
Returns the image to use for the second OCR pass, cropping and scaling as needed.
|
420
|
+
Logic is unchanged, but code is refactored for clarity and maintainability.
|
421
|
+
"""
|
422
|
+
def return_original_image():
|
423
|
+
logger.info("Returning original image for OCR2 (no cropping or optimization).")
|
424
|
+
if not crop_coords or not get_ocr_optimize_second_scan():
|
425
|
+
return og_image
|
426
|
+
x1, y1, x2, y2 = crop_coords
|
427
|
+
x1 = max(0, min(x1, og_image.width))
|
428
|
+
y1 = max(0, min(y1, og_image.height))
|
429
|
+
x2 = max(x1, min(x2, og_image.width))
|
430
|
+
y2 = max(y1, min(y2, og_image.height))
|
431
|
+
og_image.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
|
432
|
+
return og_image.crop((x1, y1, x2, y2))
|
433
|
+
|
434
|
+
LOCAL_OCR_ENGINES = ['easyocr', 'oneocr', 'rapidocr', 'mangaocr', 'winrtocr']
|
435
|
+
local_ocr = ocr2_engine in LOCAL_OCR_ENGINES
|
436
|
+
ocr_config_local = copy(ocr_config)
|
437
|
+
|
438
|
+
# Non-local OCR: just crop the original image if needed
|
439
|
+
if not local_ocr:
|
440
|
+
return return_original_image()
|
441
|
+
|
442
|
+
# Local OCR: get fresh screenshot and apply config/cropping
|
443
|
+
obs_width = getattr(run.obs_screenshot_thread, 'width', None)
|
444
|
+
obs_height = getattr(run.obs_screenshot_thread, 'height', None)
|
445
|
+
if not obs_width or not obs_height:
|
446
|
+
return return_original_image()
|
447
|
+
logger.debug(f"Getting OCR2 image with OBS dimensions: {obs_width}x{obs_height}")
|
448
|
+
|
449
|
+
img = obs.get_screenshot_PIL(compression=100, img_format="jpg")
|
450
|
+
ocr_config_local.scale_to_custom_size(img.width, img.height)
|
451
|
+
|
452
|
+
# If no crop or optimization, just apply config and return
|
453
|
+
if not crop_coords or not get_ocr_optimize_second_scan():
|
454
|
+
img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=True)
|
455
|
+
return img
|
456
|
+
|
457
|
+
# Calculate scaling ratios
|
458
|
+
width_ratio = img.width / obs_width if obs_width else 1
|
459
|
+
height_ratio = img.height / obs_height if obs_height else 1
|
460
|
+
logger.debug(f"Cropping OCR2 image with crop coordinates: {crop_coords} and ratios: {width_ratio}, {height_ratio}")
|
461
|
+
|
462
|
+
# Scale crop_coords
|
463
|
+
x1 = int(crop_coords[0] * width_ratio)
|
464
|
+
y1 = int(crop_coords[1] * height_ratio)
|
465
|
+
x2 = int(crop_coords[2] * width_ratio)
|
466
|
+
y2 = int(crop_coords[3] * height_ratio)
|
467
|
+
logger.debug(f"Scaled crop coordinates: {(x1, y1, x2, y2)}")
|
468
|
+
|
469
|
+
# Clamp coordinates to image bounds
|
470
|
+
x1 = max(0, min(x1, img.width))
|
471
|
+
y1 = max(0, min(y1, img.height))
|
472
|
+
x2 = max(x1, min(x2, img.width))
|
473
|
+
y2 = max(y1, min(y2, img.height))
|
474
|
+
|
475
|
+
img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=False)
|
476
|
+
|
477
|
+
|
478
|
+
return img.crop((x1, y1, x2, y2))
|
479
|
+
|
415
480
|
def process_task_queue():
|
416
481
|
while True:
|
417
482
|
try:
|
@@ -456,7 +521,7 @@ def run_oneocr(ocr_config: OCRConfig, rectangles, config_check_thread):
|
|
456
521
|
gsm_ocr_config=ocr_config,
|
457
522
|
screen_capture_areas=screen_areas,
|
458
523
|
furigana_filter_sensitivity=furigana_filter_sensitivity,
|
459
|
-
screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None,
|
524
|
+
screen_capture_combo=manual_ocr_hotkey.upper() if manual_ocr_hotkey and manual else None,
|
460
525
|
config_check_thread=config_check_thread)
|
461
526
|
except Exception as e:
|
462
527
|
logger.exception(f"Error running OneOCR: {e}")
|
@@ -530,7 +595,7 @@ def set_force_stable_hotkey():
|
|
530
595
|
|
531
596
|
if __name__ == "__main__":
|
532
597
|
try:
|
533
|
-
global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr
|
598
|
+
global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline, obs_ocr, manual
|
534
599
|
import sys
|
535
600
|
|
536
601
|
import argparse
|
@@ -577,7 +642,7 @@ if __name__ == "__main__":
|
|
577
642
|
keep_newline = args.keep_newline
|
578
643
|
obs_ocr = args.obs_ocr
|
579
644
|
|
580
|
-
obs.connect_to_obs_sync(
|
645
|
+
obs.connect_to_obs_sync(check_output=False)
|
581
646
|
|
582
647
|
# Start config change checker thread
|
583
648
|
config_check_thread = ConfigChangeCheckThread()
|
@@ -1243,7 +1243,7 @@ class OCRSpace:
|
|
1243
1243
|
class GeminiOCR:
|
1244
1244
|
name = 'gemini'
|
1245
1245
|
readable_name = 'Gemini'
|
1246
|
-
key = '
|
1246
|
+
key = ';'
|
1247
1247
|
available = False
|
1248
1248
|
|
1249
1249
|
def __init__(self, config={'api_key': None}, lang='ja'):
|
@@ -1433,10 +1433,14 @@ class localLLMOCR:
|
|
1433
1433
|
self.keep_warm = config.get('keep_warm', True)
|
1434
1434
|
self.custom_prompt = config.get('prompt', None)
|
1435
1435
|
self.available = True
|
1436
|
+
if not self.check_url_for_connectivity(self.api_url):
|
1437
|
+
self.available = False
|
1438
|
+
logger.warning(f'Local LLM OCR API URL not reachable: {self.api_url}')
|
1439
|
+
return
|
1436
1440
|
self.client = openai.OpenAI(
|
1437
1441
|
base_url=self.api_url.replace('/v1/chat/completions', '/v1'),
|
1438
1442
|
api_key=self.api_key,
|
1439
|
-
timeout=
|
1443
|
+
timeout=1
|
1440
1444
|
)
|
1441
1445
|
if self.client.models.retrieve(self.model):
|
1442
1446
|
self.model = self.model
|
@@ -1446,7 +1450,15 @@ class localLLMOCR:
|
|
1446
1450
|
self.keep_llm_hot_thread.start()
|
1447
1451
|
except Exception as e:
|
1448
1452
|
logger.warning(f'Error initializing Local LLM OCR, Local LLM OCR will not work!')
|
1449
|
-
|
1453
|
+
|
1454
|
+
def check_url_for_connectivity(self, url):
|
1455
|
+
import requests
|
1456
|
+
try:
|
1457
|
+
response = requests.get(url, timeout=0.5)
|
1458
|
+
return response.status_code == 200
|
1459
|
+
except Exception:
|
1460
|
+
return False
|
1461
|
+
|
1450
1462
|
def keep_llm_warm(self):
|
1451
1463
|
def ocr_blank_black_image():
|
1452
1464
|
if self.last_ocr_time and (time.time() - self.last_ocr_time) < 5:
|
@@ -42,6 +42,7 @@ import socketserver
|
|
42
42
|
import cv2
|
43
43
|
import numpy as np
|
44
44
|
|
45
|
+
from collections import deque
|
45
46
|
from datetime import datetime, timedelta
|
46
47
|
from PIL import Image, ImageDraw
|
47
48
|
from loguru import logger
|
@@ -337,6 +338,7 @@ class TextFiltering:
|
|
337
338
|
self.thai_regex = re.compile(r'[\u0E00-\u0E7F]')
|
338
339
|
self.latin_extended_regex = re.compile(
|
339
340
|
r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
|
341
|
+
self.last_few_results = {}
|
340
342
|
try:
|
341
343
|
from transformers import pipeline, AutoTokenizer
|
342
344
|
import torch
|
@@ -361,7 +363,7 @@ class TextFiltering:
|
|
361
363
|
import langid
|
362
364
|
self.classify = langid.classify
|
363
365
|
|
364
|
-
def __call__(self, text, last_result):
|
366
|
+
def __call__(self, text, last_result, engine=None, is_second_ocr=False):
|
365
367
|
lang = get_ocr_language()
|
366
368
|
if self.initial_lang != lang:
|
367
369
|
from pysbd import Segmenter
|
@@ -402,11 +404,24 @@ class TextFiltering:
|
|
402
404
|
|
403
405
|
try:
|
404
406
|
if isinstance(last_result, list):
|
405
|
-
last_text = last_result
|
407
|
+
last_text = last_result.copy()
|
406
408
|
elif last_result and last_result[1] == engine_index:
|
407
409
|
last_text = last_result[0]
|
408
410
|
else:
|
409
411
|
last_text = []
|
412
|
+
|
413
|
+
if engine and not is_second_ocr:
|
414
|
+
if self.last_few_results and self.last_few_results.get(engine):
|
415
|
+
for sublist in self.last_few_results.get(engine, []):
|
416
|
+
if sublist:
|
417
|
+
for item in sublist:
|
418
|
+
if item and item not in last_text:
|
419
|
+
last_text.append(item)
|
420
|
+
self.last_few_results[engine].append(orig_text_filtered)
|
421
|
+
else:
|
422
|
+
self.last_few_results[engine] = deque(maxlen=3)
|
423
|
+
self.last_few_results[engine].append(orig_text_filtered)
|
424
|
+
|
410
425
|
except Exception as e:
|
411
426
|
logger.error(f"Error processing last_result {last_result}: {e}")
|
412
427
|
last_text = []
|
@@ -981,7 +996,7 @@ def quick_text_detection(pil_image, threshold_ratio=0.01):
|
|
981
996
|
|
982
997
|
# Use OBS for Screenshot Source (i.e. Linux)
|
983
998
|
class OBSScreenshotThread(threading.Thread):
|
984
|
-
def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
|
999
|
+
def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1, is_manual_ocr=False):
|
985
1000
|
super().__init__(daemon=True)
|
986
1001
|
self.ocr_config = ocr_config
|
987
1002
|
self.interval = interval
|
@@ -992,6 +1007,7 @@ class OBSScreenshotThread(threading.Thread):
|
|
992
1007
|
self.width = width
|
993
1008
|
self.height = height
|
994
1009
|
self.use_periodic_queue = not screen_capture_on_combo
|
1010
|
+
self.is_manual_ocr = is_manual_ocr
|
995
1011
|
|
996
1012
|
def write_result(self, result):
|
997
1013
|
if self.use_periodic_queue:
|
@@ -1003,62 +1019,26 @@ class OBSScreenshotThread(threading.Thread):
|
|
1003
1019
|
def connect_obs(self):
|
1004
1020
|
import GameSentenceMiner.obs as obs
|
1005
1021
|
obs.connect_to_obs_sync(check_output=False)
|
1006
|
-
|
1007
|
-
def scale_down_width_height(self, width, height):
|
1008
|
-
if width == 0 or height == 0:
|
1009
|
-
return self.width, self.height
|
1010
|
-
# return width, height
|
1011
|
-
aspect_ratio = width / height
|
1012
|
-
logger.info(
|
1013
|
-
f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
|
1014
|
-
if aspect_ratio > 2.66:
|
1015
|
-
# Ultra-wide (32:9) - use 1920x540
|
1016
|
-
logger.info("Using ultra-wide aspect ratio scaling (32:9).")
|
1017
|
-
return 1920, 540
|
1018
|
-
elif aspect_ratio > 2.33:
|
1019
|
-
# 21:9 - use 1920x800
|
1020
|
-
logger.info("Using ultra-wide aspect ratio scaling (21:9).")
|
1021
|
-
return 1920, 800
|
1022
|
-
elif aspect_ratio > 1.77:
|
1023
|
-
# 16:9 - use 1280x720
|
1024
|
-
logger.info("Using standard aspect ratio scaling (16:9).")
|
1025
|
-
return 1280, 720
|
1026
|
-
elif aspect_ratio > 1.6:
|
1027
|
-
# 16:10 - use 1280x800
|
1028
|
-
logger.info("Using standard aspect ratio scaling (16:10).")
|
1029
|
-
return 1280, 800
|
1030
|
-
elif aspect_ratio > 1.33:
|
1031
|
-
# 4:3 - use 960x720
|
1032
|
-
logger.info("Using standard aspect ratio scaling (4:3).")
|
1033
|
-
return 960, 720
|
1034
|
-
elif aspect_ratio > 1.25:
|
1035
|
-
# 5:4 - use 900x720
|
1036
|
-
logger.info("Using standard aspect ratio scaling (5:4).")
|
1037
|
-
return 900, 720
|
1038
|
-
elif aspect_ratio > 1.5:
|
1039
|
-
# 3:2 - use 1080x720
|
1040
|
-
logger.info("Using standard aspect ratio scaling (3:2).")
|
1041
|
-
return 1080, 720
|
1042
|
-
else:
|
1043
|
-
# Default fallback - use original resolution
|
1044
|
-
logger.info(
|
1045
|
-
"Using default aspect ratio scaling (original resolution).")
|
1046
|
-
return width, height
|
1047
1022
|
|
1048
1023
|
def init_config(self, source=None, scene=None):
|
1049
1024
|
import GameSentenceMiner.obs as obs
|
1050
1025
|
obs.update_current_game()
|
1051
1026
|
self.current_source = source if source else obs.get_active_source()
|
1052
|
-
logger.
|
1027
|
+
logger.debug(f"Current OBS source: {self.current_source}")
|
1053
1028
|
self.source_width = self.current_source.get(
|
1054
1029
|
"sceneItemTransform").get("sourceWidth") or self.width
|
1055
1030
|
self.source_height = self.current_source.get(
|
1056
1031
|
"sceneItemTransform").get("sourceHeight") or self.height
|
1057
|
-
if self.source_width and self.source_height:
|
1058
|
-
self.width, self.height =
|
1032
|
+
if self.source_width and self.source_height and not self.is_manual_ocr and not get_ocr_two_pass_ocr():
|
1033
|
+
self.width, self.height = scale_down_width_height(
|
1059
1034
|
self.source_width, self.source_height)
|
1060
1035
|
logger.info(
|
1061
|
-
f"Using OBS source dimensions: {self.
|
1036
|
+
f"Using OBS source dimensions: {self.source_width}x{self.source_height}")
|
1037
|
+
else:
|
1038
|
+
self.width = self.source_width or 1280
|
1039
|
+
self.height = self.source_height or 720
|
1040
|
+
logger.info(
|
1041
|
+
f"Using source dimensions: {self.width}x{self.height}")
|
1062
1042
|
self.current_source_name = self.current_source.get(
|
1063
1043
|
"sourceName") or None
|
1064
1044
|
self.current_scene = scene if scene else obs.get_current_game()
|
@@ -1105,7 +1085,7 @@ class OBSScreenshotThread(threading.Thread):
|
|
1105
1085
|
self.write_result(1)
|
1106
1086
|
continue
|
1107
1087
|
img = obs.get_screenshot_PIL(source_name=self.current_source_name,
|
1108
|
-
width=self.width, height=self.height, img_format='jpg', compression=
|
1088
|
+
width=self.width, height=self.height, img_format='jpg', compression=100)
|
1109
1089
|
|
1110
1090
|
img = apply_ocr_config_to_image(img, self.ocr_config)
|
1111
1091
|
|
@@ -1120,6 +1100,39 @@ class OBSScreenshotThread(threading.Thread):
|
|
1120
1100
|
f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
|
1121
1101
|
time.sleep(.5)
|
1122
1102
|
continue
|
1103
|
+
|
1104
|
+
def scale_down_width_height(width, height):
|
1105
|
+
if width == 0 or height == 0:
|
1106
|
+
return width, height
|
1107
|
+
# return width, height
|
1108
|
+
aspect_ratio = width / height
|
1109
|
+
logger.info(
|
1110
|
+
f"Scaling down OBS source dimensions: {width}x{height} (Aspect Ratio: {aspect_ratio})")
|
1111
|
+
if aspect_ratio > 2.66:
|
1112
|
+
logger.info("Using ultra-wide aspect ratio scaling (32:9).")
|
1113
|
+
return 1920, 540
|
1114
|
+
elif aspect_ratio > 2.33:
|
1115
|
+
logger.info("Using ultra-wide aspect ratio scaling (21:9).")
|
1116
|
+
return 1920, 800
|
1117
|
+
elif aspect_ratio > 1.77:
|
1118
|
+
logger.info("Using standard aspect ratio scaling (16:9).")
|
1119
|
+
return 1280, 720
|
1120
|
+
elif aspect_ratio > 1.6:
|
1121
|
+
logger.info("Using standard aspect ratio scaling (16:10).")
|
1122
|
+
return 1280, 800
|
1123
|
+
elif aspect_ratio > 1.33:
|
1124
|
+
logger.info("Using standard aspect ratio scaling (4:3).")
|
1125
|
+
return 960, 720
|
1126
|
+
elif aspect_ratio > 1.25:
|
1127
|
+
logger.info("Using standard aspect ratio scaling (5:4).")
|
1128
|
+
return 900, 720
|
1129
|
+
elif aspect_ratio > 1.5:
|
1130
|
+
logger.info("Using standard aspect ratio scaling (3:2).")
|
1131
|
+
return 1080, 720
|
1132
|
+
else:
|
1133
|
+
logger.info(
|
1134
|
+
"Using default aspect ratio scaling (original resolution).")
|
1135
|
+
return width, height
|
1123
1136
|
|
1124
1137
|
|
1125
1138
|
def apply_ocr_config_to_image(img, ocr_config, is_secondary=False):
|
@@ -1317,8 +1330,10 @@ def do_configured_ocr_replacements(text: str) -> str:
|
|
1317
1330
|
return do_text_replacements(text, OCR_REPLACEMENTS_FILE)
|
1318
1331
|
|
1319
1332
|
|
1320
|
-
def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering=None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
|
1333
|
+
def process_and_write_results(img_or_path, write_to=None, last_result=None, filtering: TextFiltering = None, notify=None, engine=None, ocr_start_time=None, furigana_filter_sensitivity=0):
|
1321
1334
|
global engine_index
|
1335
|
+
# TODO Replace this at a later date
|
1336
|
+
is_second_ocr = bool(engine)
|
1322
1337
|
if auto_pause_handler:
|
1323
1338
|
auto_pause_handler.stop()
|
1324
1339
|
if engine:
|
@@ -1328,9 +1343,10 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
1328
1343
|
break
|
1329
1344
|
else:
|
1330
1345
|
engine_instance = engine_instances[engine_index]
|
1346
|
+
engine = engine_instance.name
|
1331
1347
|
|
1332
1348
|
engine_color = config.get_general('engine_color')
|
1333
|
-
|
1349
|
+
|
1334
1350
|
start_time = time.time()
|
1335
1351
|
result = engine_instance(img_or_path, furigana_filter_sensitivity)
|
1336
1352
|
res, text, crop_coords = (*result, None)[:3]
|
@@ -1362,7 +1378,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
1362
1378
|
if res:
|
1363
1379
|
text = do_configured_ocr_replacements(text)
|
1364
1380
|
if filtering:
|
1365
|
-
text, orig_text = filtering(text, last_result)
|
1381
|
+
text, orig_text = filtering(text, last_result, engine=engine, is_second_ocr=is_second_ocr)
|
1366
1382
|
if get_ocr_language() == "ja" or get_ocr_language() == "zh":
|
1367
1383
|
text = post_process(text, keep_blank_lines=get_ocr_keep_newline())
|
1368
1384
|
if notify and config.get_general('notifications'):
|
@@ -1382,7 +1398,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
1382
1398
|
pyperclipfix.copy(text)
|
1383
1399
|
elif write_to == "callback":
|
1384
1400
|
txt_callback(text, orig_text, ocr_start_time,
|
1385
|
-
img_or_path,
|
1401
|
+
img_or_path, is_second_ocr, filtering, crop_coords)
|
1386
1402
|
elif write_to:
|
1387
1403
|
with Path(write_to).open('a', encoding='utf-8') as f:
|
1388
1404
|
f.write(text + '\n')
|
@@ -1404,7 +1420,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
|
|
1404
1420
|
This function checks if the detected text area falls entirely within secondary rectangles (menu areas).
|
1405
1421
|
|
1406
1422
|
:param text: The recognized text from OCR.
|
1407
|
-
:param crop_coords: Tuple containing (x, y,
|
1423
|
+
:param crop_coords: Tuple containing (x, y, x2, y2) of the detected text area relative to the cropped image.
|
1408
1424
|
:return: True if the text is all menu items (within secondary rectangles), False otherwise.
|
1409
1425
|
"""
|
1410
1426
|
if not text or not crop_coords:
|
@@ -1412,7 +1428,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
|
|
1412
1428
|
|
1413
1429
|
original_width = obs_screenshot_thread.width
|
1414
1430
|
original_height = obs_screenshot_thread.height
|
1415
|
-
crop_x, crop_y,
|
1431
|
+
crop_x, crop_y, crop_x2, crop_y2 = crop_coords
|
1416
1432
|
|
1417
1433
|
ocr_config = get_scene_ocr_config()
|
1418
1434
|
|
@@ -1430,14 +1446,14 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
|
|
1430
1446
|
return False
|
1431
1447
|
|
1432
1448
|
if not primary_rectangles:
|
1433
|
-
if crop_x < 0 or crop_y < 0 or
|
1449
|
+
if crop_x < 0 or crop_y < 0 or crop_x2 > original_width or crop_y2 > original_height:
|
1434
1450
|
return False
|
1435
1451
|
for menu_rect in menu_rectangles:
|
1436
1452
|
rect_left, rect_top, rect_width, rect_height = menu_rect.coordinates
|
1437
1453
|
rect_right = rect_left + rect_width
|
1438
1454
|
rect_bottom = rect_top + rect_height
|
1439
1455
|
if (crop_x >= rect_left and crop_y >= rect_top and
|
1440
|
-
|
1456
|
+
crop_x2 <= rect_right and crop_y2 <= rect_bottom):
|
1441
1457
|
return True
|
1442
1458
|
return False
|
1443
1459
|
|
@@ -1445,19 +1461,25 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
|
|
1445
1461
|
|
1446
1462
|
if len(primary_rectangles) == 1:
|
1447
1463
|
primary_rect = primary_rectangles[0]
|
1448
|
-
primary_left, primary_top = primary_rect.coordinates
|
1464
|
+
primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
|
1449
1465
|
original_x = crop_x + primary_left
|
1450
1466
|
original_y = crop_y + primary_top
|
1467
|
+
original_x2 = crop_x2 + primary_left
|
1468
|
+
original_y2 = crop_y2 + primary_top
|
1451
1469
|
else:
|
1452
1470
|
current_y_offset = 0
|
1453
1471
|
original_x = None
|
1454
1472
|
original_y = None
|
1473
|
+
original_x2 = None
|
1474
|
+
original_y2 = None
|
1455
1475
|
for i, primary_rect in enumerate(primary_rectangles):
|
1456
1476
|
primary_left, primary_top, primary_width, primary_height = primary_rect.coordinates
|
1457
1477
|
section_height = primary_height
|
1458
1478
|
if crop_y >= current_y_offset and crop_y < current_y_offset + section_height:
|
1459
1479
|
original_x = crop_x + primary_left
|
1460
1480
|
original_y = (crop_y - current_y_offset) + primary_top
|
1481
|
+
original_x2 = crop_x2 + primary_left
|
1482
|
+
original_y2 = crop_y2 + primary_top
|
1461
1483
|
break
|
1462
1484
|
current_y_offset += section_height + 50
|
1463
1485
|
if original_x is None or original_y is None:
|
@@ -1471,7 +1493,7 @@ def check_text_is_all_menu(text: str, crop_coords: tuple) -> bool:
|
|
1471
1493
|
rect_right = rect_left + rect_width
|
1472
1494
|
rect_bottom = rect_top + rect_height
|
1473
1495
|
if (original_x >= rect_left and original_y >= rect_top and
|
1474
|
-
|
1496
|
+
original_x2 <= rect_right and original_y2 <= rect_bottom):
|
1475
1497
|
return True
|
1476
1498
|
|
1477
1499
|
return False
|
@@ -1712,7 +1734,7 @@ def run(read_from=None,
|
|
1712
1734
|
last_result = ([], engine_index)
|
1713
1735
|
screenshot_event = threading.Event()
|
1714
1736
|
obs_screenshot_thread = OBSScreenshotThread(
|
1715
|
-
gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs)
|
1737
|
+
gsm_ocr_config, screen_capture_on_combo, interval=screen_capture_delay_secs, is_manual_ocr=bool(screen_capture_on_combo))
|
1716
1738
|
obs_screenshot_thread.start()
|
1717
1739
|
filtering = TextFiltering()
|
1718
1740
|
read_from_readable.append('obs')
|
@@ -559,6 +559,7 @@ class VAD:
|
|
559
559
|
add_audio_on_no_results: bool = False
|
560
560
|
cut_and_splice_segments: bool = False
|
561
561
|
splice_padding: float = 0.1
|
562
|
+
use_cpu_for_inference: bool = False
|
562
563
|
|
563
564
|
def is_silero(self):
|
564
565
|
return self.selected_vad_model == SILERO or self.backup_vad_model == SILERO
|
GameSentenceMiner/vad.py
CHANGED
@@ -139,7 +139,7 @@ class VADProcessor(ABC):
|
|
139
139
|
self.extract_audio_and_combine_segments(input_audio, voice_activity, output_audio, padding=get_config().vad.splice_padding)
|
140
140
|
else:
|
141
141
|
ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio, trim_beginning=get_config().vad.trim_beginning, fade_in_duration=0.05, fade_out_duration=0)
|
142
|
-
return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, self.vad_system_name, voice_activity, output_audio)
|
142
|
+
return VADResult(True, max(0, start_time + get_config().vad.beginning_offset), max(0, end_time + get_config().audio.end_offset), self.vad_system_name, voice_activity, output_audio)
|
143
143
|
|
144
144
|
class SileroVADProcessor(VADProcessor):
|
145
145
|
def __init__(self):
|
@@ -165,9 +165,12 @@ class WhisperVADProcessor(VADProcessor):
|
|
165
165
|
|
166
166
|
def load_whisper_model(self):
|
167
167
|
import stable_whisper as whisper
|
168
|
+
import torch
|
168
169
|
if not self.vad_model:
|
170
|
+
self.device = "cpu" if get_config().vad.use_cpu_for_inference else "cuda" if torch.cuda.is_available() else "cpu"
|
169
171
|
with warnings.catch_warnings():
|
170
|
-
|
172
|
+
warnings.simplefilter("ignore")
|
173
|
+
self.vad_model = whisper.load_faster_whisper(get_config().vad.whisper_model, device=self.device)
|
171
174
|
logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
|
172
175
|
return self.vad_model
|
173
176
|
|
@@ -181,11 +184,12 @@ class WhisperVADProcessor(VADProcessor):
|
|
181
184
|
|
182
185
|
# Transcribe the audio using Whisper
|
183
186
|
with warnings.catch_warnings():
|
187
|
+
warnings.simplefilter("ignore")
|
184
188
|
result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language,
|
185
189
|
temperature=0.0)
|
186
190
|
voice_activity = []
|
187
191
|
|
188
|
-
logger.debug(result.to_dict())
|
192
|
+
logger.debug(json.dumps(result.to_dict(), indent=2))
|
189
193
|
|
190
194
|
# Process the segments to extract tokens, timestamps, and confidence
|
191
195
|
for i, segment in enumerate(result.segments):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: GameSentenceMiner
|
3
|
-
Version: 2.15.
|
3
|
+
Version: 2.15.5
|
4
4
|
Summary: A tool for mining sentences from games. Update: Overlay?
|
5
5
|
Author-email: Beangate <bpwhelan95@gmail.com>
|
6
6
|
License: MIT License
|
@@ -47,11 +47,12 @@ Requires-Dist: betterproto==2.0.0b7
|
|
47
47
|
Requires-Dist: obsws-python~=1.7.2
|
48
48
|
Requires-Dist: numpy==2.2.6
|
49
49
|
Requires-Dist: regex
|
50
|
+
Requires-Dist: faster-whisper~=1.2.0
|
50
51
|
Dynamic: license-file
|
51
52
|
|
52
53
|
# GSM - An Immersion toolkit for Games.
|
53
54
|
|
54
|
-
### English | [日本語](
|
55
|
+
### English | [日本語](docs/ja/README.md) | [简体中文](docs/zh/README.md).
|
55
56
|
|
56
57
|
An application designed to assist with language learning through games.
|
57
58
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
GameSentenceMiner/anki.py,sha256=4Tq6OGjfN-5tYorYRWiih7FZjSKMG6amrLv6DFKkFQc,25344
|
3
|
-
GameSentenceMiner/config_gui.py,sha256=
|
3
|
+
GameSentenceMiner/config_gui.py,sha256=i79PrY2pP8_VKvIL7uoDv5cgHvCCQBIe0mS_YnX2AVg,140792
|
4
4
|
GameSentenceMiner/gametext.py,sha256=fgBgLchezpauWELE9Y5G3kVCLfAneD0X4lJFoI3FYbs,10351
|
5
|
-
GameSentenceMiner/gsm.py,sha256=
|
6
|
-
GameSentenceMiner/obs.py,sha256=
|
7
|
-
GameSentenceMiner/vad.py,sha256=
|
5
|
+
GameSentenceMiner/gsm.py,sha256=t2GAhMwVEHUzCdqM4tIgAzBUvNmt_Gec515iePacD6k,31945
|
6
|
+
GameSentenceMiner/obs.py,sha256=EyAYhaLvMjoeC-3j7fuvkqZN5logFFanPfb8Wn1C6m0,27296
|
7
|
+
GameSentenceMiner/vad.py,sha256=WbXLwiGhHOMterYSAdXPdW209h1mF9WsIdTiWER2vOE,19542
|
8
8
|
GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
GameSentenceMiner/ai/ai_prompting.py,sha256=41xdBzE88Jlt12A0D-T_cMfLO5j6MSxfniOptpwNZm0,24068
|
10
10
|
GameSentenceMiner/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -15,21 +15,21 @@ GameSentenceMiner/assets/icon32.png,sha256=Kww0hU_qke9_22wBuO_Nq0Dv2SfnOLwMhCyGg
|
|
15
15
|
GameSentenceMiner/assets/icon512.png,sha256=HxUj2GHjyQsk8NV433256UxU9phPhtjCY-YB_7W4sqs,192487
|
16
16
|
GameSentenceMiner/assets/icon64.png,sha256=N8xgdZXvhqVQP9QUK3wX5iqxX9LxHljD7c-Bmgim6tM,9301
|
17
17
|
GameSentenceMiner/assets/pickaxe.png,sha256=VfIGyXyIZdzEnVcc4PmG3wszPMO1W4KCT7Q_nFK6eSE,1403829
|
18
|
-
GameSentenceMiner/locales/en_us.json,sha256=
|
19
|
-
GameSentenceMiner/locales/ja_jp.json,sha256
|
20
|
-
GameSentenceMiner/locales/zh_cn.json,sha256=
|
18
|
+
GameSentenceMiner/locales/en_us.json,sha256=4lCV34FnDOe0c02qHlHnfujQedmqHSL-feN3lYCCCfs,26744
|
19
|
+
GameSentenceMiner/locales/ja_jp.json,sha256=LNLo2qIugMcDGiPbSo018zVAU8K_HG8Q4zvIcsHUzTA,28517
|
20
|
+
GameSentenceMiner/locales/zh_cn.json,sha256=lZYB3HAcxhVCSVWcnvuepuCvn6_Y2mvd0-SKJEYx_ko,24829
|
21
21
|
GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=DfcR3bHTu26JJerLzqfW_KpdgUBSrRV4hqSy_LYclps,5967
|
23
23
|
GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
|
24
24
|
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=Rm1_nuZotJhfOfoJ_3mesh9udtOBjYqKhnAvSief6fo,29181
|
25
|
-
GameSentenceMiner/ocr/owocr_helper.py,sha256=
|
25
|
+
GameSentenceMiner/ocr/owocr_helper.py,sha256=v1xIfKUwKZNEUUvFNzzalTmIl_Qj_ygFEX4AJszcAVA,31290
|
26
26
|
GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
|
27
27
|
GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
|
28
28
|
GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
|
29
29
|
GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
|
30
30
|
GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
|
31
|
-
GameSentenceMiner/owocr/owocr/ocr.py,sha256=
|
32
|
-
GameSentenceMiner/owocr/owocr/run.py,sha256=
|
31
|
+
GameSentenceMiner/owocr/owocr/ocr.py,sha256=vRTMKLzi6GDBFZWCyf0tYi6es3cP1cvQOBjZqaZmnBg,70482
|
32
|
+
GameSentenceMiner/owocr/owocr/run.py,sha256=ctzAhX8nwTYy7E-tqiYFHK51XfY8Y_ze7rBHbDkVPqI,81240
|
33
33
|
GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
|
34
34
|
GameSentenceMiner/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
35
|
GameSentenceMiner/tools/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
|
@@ -37,11 +37,11 @@ GameSentenceMiner/tools/furigana_filter_preview.py,sha256=BXv7FChPEJW_VeG5XYt6su
|
|
37
37
|
GameSentenceMiner/tools/ss_selector.py,sha256=cbjMxiKOCuOfbRvLR_PCRlykBrGtm1LXd6u5czPqkmc,4793
|
38
38
|
GameSentenceMiner/tools/window_transparency.py,sha256=GtbxbmZg0-UYPXhfHff-7IKZyY2DKe4B9GdyovfmpeM,8166
|
39
39
|
GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
GameSentenceMiner/util/configuration.py,sha256=
|
40
|
+
GameSentenceMiner/util/configuration.py,sha256=cpgusOu4lItCBsZyB8QoAJaXjDovIXNlqyKzh2GMYzw,40241
|
41
41
|
GameSentenceMiner/util/db.py,sha256=2bO0rD4i8A1hhsRBER-wgZy9IK17ibRbI8DHxdKvYsI,16598
|
42
42
|
GameSentenceMiner/util/electron_config.py,sha256=KfeJToeFFVw0IR5MKa-gBzpzaGrU-lyJbR9z-sDEHYU,8767
|
43
43
|
GameSentenceMiner/util/ffmpeg.py,sha256=jA-cFtCmdCWrUSPpdtFSLr-GSoqs4qNUzW20v4HPHf0,28715
|
44
|
-
GameSentenceMiner/util/get_overlay_coords.py,sha256=
|
44
|
+
GameSentenceMiner/util/get_overlay_coords.py,sha256=P5tI7H0cnveGs33aQdvJGy9DV6aIAGh8K8Al1XjNPzw,15114
|
45
45
|
GameSentenceMiner/util/gsm_utils.py,sha256=Piwv88Q9av2LBeN7M6QDi0Mp0_R2lNbkcI6ekK5hd2o,11851
|
46
46
|
GameSentenceMiner/util/model.py,sha256=R-_RYTYLSDNgBoVTPuPBcIHeOznIqi_vBzQ7VQ20WYk,6727
|
47
47
|
GameSentenceMiner/util/notification.py,sha256=YBhf_mSo_i3cjBz-pmeTPx3wchKiG9BK2VBdZSa2prQ,4597
|
@@ -72,9 +72,9 @@ GameSentenceMiner/web/templates/index.html,sha256=LqXZx7-NE42pXSpHNZ3To680rD-vt9
|
|
72
72
|
GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
|
73
73
|
GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
|
74
74
|
GameSentenceMiner/wip/__init___.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
|
-
gamesentenceminer-2.15.
|
76
|
-
gamesentenceminer-2.15.
|
77
|
-
gamesentenceminer-2.15.
|
78
|
-
gamesentenceminer-2.15.
|
79
|
-
gamesentenceminer-2.15.
|
80
|
-
gamesentenceminer-2.15.
|
75
|
+
gamesentenceminer-2.15.5.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
76
|
+
gamesentenceminer-2.15.5.dist-info/METADATA,sha256=yHcF-77fNZT7lLRKXwrMLPN7-zhrW0bLtm5OJ2-ORTA,7348
|
77
|
+
gamesentenceminer-2.15.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
78
|
+
gamesentenceminer-2.15.5.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
|
79
|
+
gamesentenceminer-2.15.5.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
|
80
|
+
gamesentenceminer-2.15.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|