GameSentenceMiner 2.14.4__py3-none-any.whl → 2.14.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ai/ai_prompting.py +23 -22
- GameSentenceMiner/anki.py +8 -8
- GameSentenceMiner/config_gui.py +111 -49
- GameSentenceMiner/locales/en_us.json +1 -1
- GameSentenceMiner/obs.py +42 -5
- GameSentenceMiner/ocr/gsm_ocr_config.py +8 -2
- GameSentenceMiner/owocr/owocr/ocr.py +41 -9
- GameSentenceMiner/owocr/owocr/run.py +209 -26
- GameSentenceMiner/util/configuration.py +6 -0
- GameSentenceMiner/util/electron_config.py +2 -2
- GameSentenceMiner/web/templates/index.html +19 -19
- GameSentenceMiner/web/texthooking_page.py +30 -0
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/METADATA +9 -4
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/RECORD +18 -18
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.14.4.dist-info → gamesentenceminer-2.14.5.dist-info}/top_level.txt +0 -0
@@ -41,7 +41,7 @@ import websockets
|
|
41
41
|
import socketserver
|
42
42
|
import queue
|
43
43
|
|
44
|
-
from datetime import datetime
|
44
|
+
from datetime import datetime, timedelta
|
45
45
|
from PIL import Image, ImageDraw, UnidentifiedImageError
|
46
46
|
from loguru import logger
|
47
47
|
from desktop_notifier import DesktopNotifierSync
|
@@ -798,24 +798,47 @@ class ScreenshotThread(threading.Thread):
|
|
798
798
|
self.windows_window_tracker_instance.join()
|
799
799
|
|
800
800
|
|
801
|
+
import cv2
|
802
|
+
import numpy as np
|
803
|
+
|
804
|
+
def apply_adaptive_threshold_filter(img):
|
805
|
+
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
806
|
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
807
|
+
inverted = cv2.bitwise_not(gray)
|
808
|
+
blur = cv2.GaussianBlur(inverted, (3, 3), 0)
|
809
|
+
thresh = cv2.adaptiveThreshold(
|
810
|
+
blur, 255,
|
811
|
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
812
|
+
cv2.THRESH_BINARY,
|
813
|
+
11, 2
|
814
|
+
)
|
815
|
+
result = cv2.bitwise_not(thresh)
|
816
|
+
|
817
|
+
return Image.fromarray(result)
|
818
|
+
|
819
|
+
|
801
820
|
def set_last_image(image):
|
802
821
|
global last_image
|
822
|
+
if image is None:
|
823
|
+
last_image = None
|
803
824
|
try:
|
804
825
|
if image == last_image:
|
805
826
|
return
|
806
827
|
except Exception:
|
807
|
-
|
828
|
+
last_image = None
|
829
|
+
return
|
808
830
|
try:
|
809
831
|
if last_image is not None and hasattr(last_image, "close"):
|
810
832
|
last_image.close()
|
811
833
|
except Exception:
|
812
834
|
pass
|
813
|
-
last_image = image
|
835
|
+
# last_image = image
|
836
|
+
last_image = apply_adaptive_threshold_filter(image)
|
814
837
|
|
815
838
|
|
816
839
|
def are_images_identical(img1, img2):
|
817
840
|
if None in (img1, img2):
|
818
|
-
return
|
841
|
+
return False
|
819
842
|
|
820
843
|
try:
|
821
844
|
img1 = np.array(img1)
|
@@ -829,6 +852,128 @@ def are_images_identical(img1, img2):
|
|
829
852
|
return (img1.shape == img2.shape) and np.array_equal(img1, img2)
|
830
853
|
|
831
854
|
|
855
|
+
import cv2
|
856
|
+
import numpy as np
|
857
|
+
from skimage.metrics import structural_similarity as ssim
|
858
|
+
from typing import Union
|
859
|
+
|
860
|
+
ImageType = Union[np.ndarray, Image.Image]
|
861
|
+
|
862
|
+
def _prepare_image(image: ImageType) -> np.ndarray:
|
863
|
+
"""
|
864
|
+
Standardizes an image (PIL or NumPy) into an OpenCV-compatible NumPy array (BGR).
|
865
|
+
"""
|
866
|
+
# If the image is a PIL Image, convert it to a NumPy array
|
867
|
+
if isinstance(image, Image.Image):
|
868
|
+
# Convert PIL Image (which is RGB) to a NumPy array, then convert RGB to BGR for OpenCV
|
869
|
+
prepared_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
870
|
+
# If it's already a NumPy array, assume it's in a compatible format (like BGR)
|
871
|
+
elif isinstance(image, np.ndarray):
|
872
|
+
prepared_image = image
|
873
|
+
else:
|
874
|
+
raise TypeError(f"Unsupported image type: {type(image)}. Must be a PIL Image or NumPy array.")
|
875
|
+
|
876
|
+
return prepared_image
|
877
|
+
|
878
|
+
i = 1
|
879
|
+
|
880
|
+
def calculate_ssim_score(imageA: ImageType, imageB: ImageType) -> float:
|
881
|
+
global i
|
882
|
+
"""
|
883
|
+
Calculates the structural similarity index (SSIM) between two images.
|
884
|
+
|
885
|
+
Args:
|
886
|
+
imageA: The first image as a NumPy array.
|
887
|
+
imageB: The second image as a NumPy array.
|
888
|
+
|
889
|
+
Returns:
|
890
|
+
The SSIM score between the two images (between -1 and 1).
|
891
|
+
"""
|
892
|
+
|
893
|
+
if isinstance(imageA, Image.Image):
|
894
|
+
imageA = apply_adaptive_threshold_filter(imageA)
|
895
|
+
|
896
|
+
# Save Images to temp for debugging on a random 1/20 chance
|
897
|
+
# if np.random.rand() < 0.05:
|
898
|
+
# if i < 600:
|
899
|
+
# # Save as image_000
|
900
|
+
# imageA.save(os.path.join(get_temporary_directory(), f'frame_{i:03d}.png'), 'PNG')
|
901
|
+
# i += 1
|
902
|
+
# imageB.save(os.path.join(get_temporary_directory(), f'ssim_imageB_{i:03d}.png'), 'PNG')
|
903
|
+
|
904
|
+
imageA = _prepare_image(imageA)
|
905
|
+
imageB = _prepare_image(imageB)
|
906
|
+
|
907
|
+
# Images must have the same dimensions
|
908
|
+
if imageA.shape != imageB.shape:
|
909
|
+
raise ValueError("Input images must have the same dimensions.")
|
910
|
+
|
911
|
+
# Convert images to grayscale for a more robust SSIM comparison
|
912
|
+
# This is less sensitive to minor color changes and lighting.
|
913
|
+
# grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
|
914
|
+
# grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
|
915
|
+
|
916
|
+
# Calculate the SSIM. The `score` is the main value.
|
917
|
+
# The `win_size` parameter must be an odd number and less than the image dimensions.
|
918
|
+
# We choose a value that is likely to be safe for a variety of image sizes.
|
919
|
+
win_size = min(3, imageA.shape[0] // 2, imageA.shape[1] // 2)
|
920
|
+
if win_size % 2 == 0:
|
921
|
+
win_size -= 1 # ensure it's odd
|
922
|
+
|
923
|
+
score, _ = ssim(imageA, imageB, full=True, win_size=win_size)
|
924
|
+
|
925
|
+
return score
|
926
|
+
|
927
|
+
|
928
|
+
|
929
|
+
def are_images_similar(imageA: Image.Image, imageB: Image.Image, threshold: float = 0.98) -> bool:
|
930
|
+
"""
|
931
|
+
Compares two images and returns True if their similarity score is above a threshold.
|
932
|
+
|
933
|
+
Args:
|
934
|
+
imageA: The first image as a NumPy array.
|
935
|
+
imageB: The second image as a NumPy array.
|
936
|
+
threshold: The minimum SSIM score to be considered "similar".
|
937
|
+
Defaults to 0.98 (very high similarity). Your original `90` would
|
938
|
+
be equivalent to a threshold of `0.90` here.
|
939
|
+
|
940
|
+
Returns:
|
941
|
+
True if the images are similar, False otherwise.
|
942
|
+
"""
|
943
|
+
if None in (imageA, imageB):
|
944
|
+
logger.info("One of the images is None, cannot compare.")
|
945
|
+
return False
|
946
|
+
try:
|
947
|
+
score = calculate_ssim_score(imageA, imageB)
|
948
|
+
except Exception as e:
|
949
|
+
logger.info(e)
|
950
|
+
return False
|
951
|
+
return score > threshold
|
952
|
+
|
953
|
+
|
954
|
+
def quick_text_detection(pil_image, threshold_ratio=0.01):
|
955
|
+
"""
|
956
|
+
Quick check if image likely contains text using edge detection.
|
957
|
+
|
958
|
+
Args:
|
959
|
+
pil_image (PIL.Image): Input image
|
960
|
+
threshold_ratio (float): Minimum ratio of edge pixels to consider text present
|
961
|
+
|
962
|
+
Returns:
|
963
|
+
bool: True if text is likely present
|
964
|
+
"""
|
965
|
+
# Convert to grayscale
|
966
|
+
gray = np.array(pil_image.convert('L'))
|
967
|
+
|
968
|
+
# Apply Canny edge detection
|
969
|
+
edges = cv2.Canny(gray, 50, 150)
|
970
|
+
|
971
|
+
# Calculate ratio of edge pixels
|
972
|
+
edge_ratio = np.sum(edges > 0) / edges.size
|
973
|
+
|
974
|
+
return edge_ratio > threshold_ratio
|
975
|
+
|
976
|
+
|
832
977
|
# Use OBS for Screenshot Source (i.e. Linux)
|
833
978
|
class OBSScreenshotThread(threading.Thread):
|
834
979
|
def __init__(self, ocr_config, screen_capture_on_combo, width=1280, height=720, interval=1):
|
@@ -848,6 +993,7 @@ class OBSScreenshotThread(threading.Thread):
|
|
848
993
|
periodic_screenshot_queue.put(result)
|
849
994
|
else:
|
850
995
|
image_queue.put((result, True))
|
996
|
+
screenshot_event.clear()
|
851
997
|
|
852
998
|
def connect_obs(self):
|
853
999
|
import GameSentenceMiner.obs as obs
|
@@ -911,7 +1057,7 @@ class OBSScreenshotThread(threading.Thread):
|
|
911
1057
|
self.current_source_name = self.current_source.get(
|
912
1058
|
"sourceName") or None
|
913
1059
|
self.current_scene = scene if scene else obs.get_current_game()
|
914
|
-
self.ocr_config = get_scene_ocr_config()
|
1060
|
+
self.ocr_config = get_scene_ocr_config(refresh=True)
|
915
1061
|
if not self.ocr_config:
|
916
1062
|
logger.error("No OCR config found for the current scene.")
|
917
1063
|
return
|
@@ -931,7 +1077,6 @@ class OBSScreenshotThread(threading.Thread):
|
|
931
1077
|
|
932
1078
|
self.connect_obs()
|
933
1079
|
self.init_config()
|
934
|
-
start = time.time()
|
935
1080
|
while not terminated:
|
936
1081
|
if not screenshot_event.wait(timeout=0.1):
|
937
1082
|
continue
|
@@ -952,33 +1097,21 @@ class OBSScreenshotThread(threading.Thread):
|
|
952
1097
|
if not self.current_source_name:
|
953
1098
|
logger.error(
|
954
1099
|
"No active source found in the current scene.")
|
955
|
-
|
1100
|
+
self.write_result(1)
|
956
1101
|
continue
|
957
|
-
# start_time = time.time()
|
958
1102
|
img = obs.get_screenshot_PIL(source_name=self.current_source_name,
|
959
1103
|
width=self.width, height=self.height, img_format='jpg', compression=80)
|
960
|
-
# logger.info(f"OBS screenshot taken in {time.time() - start_time:.2f} seconds.")
|
961
1104
|
|
962
1105
|
img = apply_ocr_config_to_image(img, self.ocr_config)
|
963
1106
|
|
964
1107
|
if img is not None:
|
965
|
-
|
966
|
-
logger.info("OBS Not Capturing anything, sleeping.")
|
967
|
-
time.sleep(1)
|
968
|
-
continue
|
969
|
-
|
970
|
-
if last_image and are_images_identical(img, last_image):
|
971
|
-
logger.debug(
|
972
|
-
"Captured screenshot is identical to the last one, sleeping.")
|
973
|
-
time.sleep(max(.5, get_ocr_scan_rate()))
|
974
|
-
else:
|
975
|
-
self.write_result(img)
|
976
|
-
screenshot_event.clear()
|
1108
|
+
self.write_result(img)
|
977
1109
|
else:
|
978
1110
|
logger.error("Failed to get screenshot data from OBS.")
|
979
1111
|
|
980
1112
|
except Exception as e:
|
981
|
-
|
1113
|
+
print(e)
|
1114
|
+
logger.info(
|
982
1115
|
f"An unexpected error occurred during OBS Capture : {e}", exc_info=True)
|
983
1116
|
continue
|
984
1117
|
|
@@ -1569,7 +1702,9 @@ def run(read_from=None,
|
|
1569
1702
|
|
1570
1703
|
config_check_thread.add_config_callback(handle_config_changes)
|
1571
1704
|
config_check_thread.add_area_callback(handle_area_config_changes)
|
1572
|
-
|
1705
|
+
previous_text = "Placeholder"
|
1706
|
+
sleep_time_to_add = 0
|
1707
|
+
last_result_time = time.time()
|
1573
1708
|
while not terminated:
|
1574
1709
|
ocr_start_time = datetime.now()
|
1575
1710
|
start_time = time.time()
|
@@ -1582,15 +1717,22 @@ def run(read_from=None,
|
|
1582
1717
|
notify = True
|
1583
1718
|
except queue.Empty:
|
1584
1719
|
pass
|
1585
|
-
|
1720
|
+
|
1721
|
+
if get_ocr_scan_rate() < .5:
|
1722
|
+
adjusted_scan_rate = min(get_ocr_scan_rate() + sleep_time_to_add, .5)
|
1723
|
+
else:
|
1724
|
+
adjusted_scan_rate = get_ocr_scan_rate()
|
1725
|
+
|
1586
1726
|
if (not img) and process_screenshots:
|
1587
|
-
if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) >
|
1727
|
+
if (not paused) and (not screenshot_thread or (screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible)) and (time.time() - last_screenshot_time) > adjusted_scan_rate:
|
1588
1728
|
screenshot_event.set()
|
1589
1729
|
img = periodic_screenshot_queue.get()
|
1590
1730
|
filter_img = True
|
1591
1731
|
notify = False
|
1592
1732
|
last_screenshot_time = time.time()
|
1593
1733
|
ocr_start_time = datetime.now()
|
1734
|
+
if adjusted_scan_rate > get_ocr_scan_rate():
|
1735
|
+
ocr_start_time = ocr_start_time - timedelta(seconds=adjusted_scan_rate - get_ocr_scan_rate())
|
1594
1736
|
|
1595
1737
|
if img == 0:
|
1596
1738
|
on_window_closed(False)
|
@@ -1598,10 +1740,51 @@ def run(read_from=None,
|
|
1598
1740
|
break
|
1599
1741
|
elif img:
|
1600
1742
|
if filter_img:
|
1601
|
-
|
1743
|
+
ocr_config = get_scene_ocr_config()
|
1744
|
+
# Check if the image is completely empty (all white or all black)
|
1745
|
+
try:
|
1746
|
+
extrema = img.getextrema()
|
1747
|
+
# For RGB or RGBA images, extrema is a tuple of (min, max) for each channel
|
1748
|
+
if isinstance(extrema[0], tuple):
|
1749
|
+
is_empty = all(e[0] == e[1] for e in extrema)
|
1750
|
+
else:
|
1751
|
+
is_empty = extrema[0] == extrema[1]
|
1752
|
+
if is_empty:
|
1753
|
+
logger.info("Image is totally empty (all pixels the same), sleeping.")
|
1754
|
+
sleep_time_to_add = .5
|
1755
|
+
continue
|
1756
|
+
except Exception as e:
|
1757
|
+
logger.debug(f"Could not determine if image is empty: {e}")
|
1758
|
+
|
1759
|
+
# Compare images, but only if it's one box, multiple boxes skews results way too much and produces false positives
|
1760
|
+
if ocr_config and len(ocr_config.rectangles) < 2:
|
1761
|
+
if are_images_similar(img, last_image):
|
1762
|
+
logger.info("Captured screenshot is similar to the last one, sleeping.")
|
1763
|
+
if time.time() - last_result_time > 10:
|
1764
|
+
sleep_time_to_add += .005
|
1765
|
+
continue
|
1766
|
+
else:
|
1767
|
+
if are_images_identical(img, last_image):
|
1768
|
+
logger.info("Captured screenshot is identical to the last one, sleeping.")
|
1769
|
+
if time.time() - last_result_time > 10:
|
1770
|
+
sleep_time_to_add += .005
|
1771
|
+
continue
|
1772
|
+
|
1773
|
+
res, text = process_and_write_results(img, write_to, last_result, filtering, notify,
|
1602
1774
|
ocr_start_time=ocr_start_time, furigana_filter_sensitivity=get_ocr_furigana_filter_sensitivity())
|
1775
|
+
if not text and not previous_text and time.time() - last_result_time > 10:
|
1776
|
+
sleep_time_to_add += .005
|
1777
|
+
logger.info(f"No text detected again, sleeping.")
|
1778
|
+
else:
|
1779
|
+
sleep_time_to_add = 0
|
1780
|
+
|
1781
|
+
# If image was stabilized, and now there is no text, reset sleep time
|
1782
|
+
if not previous_text and not res:
|
1783
|
+
sleep_time_to_add = 0
|
1784
|
+
previous_text = text
|
1603
1785
|
if res:
|
1604
1786
|
last_result = (res, engine_index)
|
1787
|
+
last_result_time = time.time()
|
1605
1788
|
else:
|
1606
1789
|
process_and_write_results(
|
1607
1790
|
img, write_to, None, notify=notify, ocr_start_time=ocr_start_time, engine=ocr2)
|
@@ -488,6 +488,10 @@ class Screenshot:
|
|
488
488
|
self.screenshot_timing_setting = 'middle'
|
489
489
|
if not self.screenshot_timing_setting and not self.use_beginning_of_line_as_screenshot and not self.use_new_screenshot_logic:
|
490
490
|
self.screenshot_timing_setting = 'end'
|
491
|
+
if self.width and self.height == 0:
|
492
|
+
self.height = -1
|
493
|
+
if self.width == 0 and self.height:
|
494
|
+
self.width = -1
|
491
495
|
|
492
496
|
|
493
497
|
@dataclass_json
|
@@ -508,6 +512,8 @@ class Audio:
|
|
508
512
|
def __post_init__(self):
|
509
513
|
self.ffmpeg_reencode_options_to_use = self.ffmpeg_reencode_options.replace(
|
510
514
|
"{format}", self.extension).replace("{encoder}", supported_formats.get(self.extension, ''))
|
515
|
+
if not self.anki_media_collection:
|
516
|
+
self.anki_media_collection = get_default_anki_media_collection_path()
|
511
517
|
if self.anki_media_collection:
|
512
518
|
self.anki_media_collection = os.path.normpath(
|
513
519
|
self.anki_media_collection)
|
@@ -58,7 +58,7 @@ class OCRConfig:
|
|
58
58
|
useWindowForConfig: bool = False
|
59
59
|
lastWindowSelected: str = ""
|
60
60
|
keep_newline: bool = False
|
61
|
-
|
61
|
+
useObsAsOCRSource: bool = True
|
62
62
|
|
63
63
|
def has_changed(self, other: 'OCRConfig') -> bool:
|
64
64
|
return self.to_dict() != other.to_dict()
|
@@ -229,7 +229,7 @@ def get_ocr_keep_newline():
|
|
229
229
|
return electron_store.data.OCR.keep_newline
|
230
230
|
|
231
231
|
def get_ocr_use_obs_as_source():
|
232
|
-
return electron_store.data.OCR.
|
232
|
+
return electron_store.data.OCR.useObsAsOCRSource
|
233
233
|
|
234
234
|
def get_furigana_filter_sensitivity() -> int:
|
235
235
|
return electron_store.data.OCR.furigana_filter_sensitivity
|