GameSentenceMiner 2.19.3__py3-none-any.whl → 2.19.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
GameSentenceMiner/gsm.py CHANGED
@@ -32,7 +32,7 @@ try:
32
32
 
33
33
  import keyboard
34
34
  import ttkbootstrap as ttk
35
- from PIL import Image, ImageDraw
35
+ from PIL import Image
36
36
  from pystray import Icon, Menu, MenuItem
37
37
  from watchdog.events import FileSystemEventHandler
38
38
  from watchdog.observers import Observer
GameSentenceMiner/obs.py CHANGED
@@ -64,7 +64,8 @@ class OBSConnectionPool:
64
64
  except Exception as e:
65
65
  if str(e) == self.last_error_shown[i]:
66
66
  continue
67
- logger.error(f"Failed to create client {i} in pool: {e}")
67
+ if self.connected_once:
68
+ logger.error(f"Failed to create client {i} in pool during initial connection: {e}")
68
69
  self.last_error_shown[i] = str(e)
69
70
  return True
70
71
 
@@ -344,15 +344,75 @@ def reset_callback_vars():
344
344
 
345
345
  # def get_ocr_ocr2(self):
346
346
  # return self.ocr2
347
+ last_meiki_crop_coords = None
348
+ last_meiki_crop_time = None
349
+ last_meiki_success = None
347
350
 
348
- def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None, crop_coords=None):
349
- global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, previous_text_list, last_sent_result
351
+
352
+ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None, crop_coords=None, meiki_boxes=None):
353
+ global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, previous_text_list, last_sent_result, last_meiki_crop_coords, last_meiki_success, last_meiki_crop_time
350
354
  orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
351
355
  if came_from_ss:
352
356
  save_result_image(img)
353
357
  asyncio.run(send_result(text, time))
354
358
  return
355
-
359
+
360
+ if meiki_boxes:
361
+ # If we don't have a previous meiki crop coords, store this one and wait for the next run
362
+ try:
363
+ if last_meiki_crop_coords is None:
364
+ last_meiki_crop_coords = crop_coords
365
+ last_meiki_crop_time = time
366
+ previous_img = img
367
+ return
368
+
369
+ # Ensure both coords exist
370
+ if not crop_coords or not last_meiki_crop_coords:
371
+ last_meiki_crop_coords = crop_coords
372
+ last_meiki_crop_time = time
373
+ return
374
+
375
+ # Compare coordinates within tolerance (pixels)
376
+ tol = 5
377
+ try:
378
+ close = all(abs(int(crop_coords[i]) - int(last_meiki_crop_coords[i])) <= tol for i in range(4))
379
+ except Exception:
380
+ # Fallback: if values not int-convertible, set not close
381
+ close = False
382
+
383
+ if close:
384
+ if all(last_meiki_success and abs(int(crop_coords[i]) - int(last_meiki_success[i])) <= tol for i in range(4)):
385
+ # Reset last_meiki_crop_coords and time so we require another matching pair for a future queue
386
+ last_meiki_crop_coords = None
387
+ last_meiki_crop_time = None
388
+ return
389
+ # Stable crop: queue second OCR immediately
390
+ try:
391
+ stable_time = last_meiki_crop_time
392
+ previous_img_local = previous_img
393
+ pre_crop_image = previous_img_local
394
+ ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2())
395
+ # Use the earlier timestamp for when the stable crop started if available
396
+ # ocr2_image.show()
397
+ second_ocr_queue.put((text, stable_time, ocr2_image, filtering, pre_crop_image))
398
+ run.set_last_image(img)
399
+ last_meiki_success = crop_coords
400
+ except Exception as e:
401
+ logger.info(f"Failed to queue second OCR task: {e}", exc_info=True)
402
+ # Reset last_meiki_crop_coords and time so we require another matching pair for a future queue
403
+ last_meiki_crop_coords = None
404
+ last_meiki_crop_time = None
405
+ return
406
+ else:
407
+ # Not stable: replace last and wait for the next run
408
+ last_meiki_crop_coords = crop_coords
409
+ last_meiki_success = None
410
+ previous_img = img
411
+ return
412
+ except Exception as e:
413
+ logger.debug(f"Error handling meiki crop coords stability check: {e}")
414
+ last_meiki_crop_coords = crop_coords
415
+
356
416
  if not text:
357
417
  run.set_last_image(img)
358
418
 
@@ -1539,6 +1539,579 @@ class localLLMOCR:
1539
1539
  return (True, "")
1540
1540
  except Exception as e:
1541
1541
  return (False, f'Local LLM OCR request failed: {e}')
1542
+
1543
+ import os
1544
+ import onnxruntime as ort
1545
+ import numpy as np
1546
+ import cv2
1547
+ from huggingface_hub import hf_hub_download
1548
+ from PIL import Image
1549
+ import requests
1550
+ from io import BytesIO
1551
+
1552
+ # --- HELPER FUNCTION FOR VISUALIZATION (Optional but useful) ---
1553
+ def draw_detections(image: np.ndarray, detections: list, model_name: str) -> np.ndarray:
1554
+ """
1555
+ Draws bounding boxes from the detection results onto an image.
1556
+
1557
+ Args:
1558
+ image (np.ndarray): The original image (in BGR format).
1559
+ detections (list): A list of detection dictionaries, e.g., [{"box": [x1, y1, x2, y2], "score": 0.95}, ...].
1560
+ model_name (str): The name of the model ('tiny' or 'small') to determine box color.
1561
+
1562
+ Returns:
1563
+ np.ndarray: The image with bounding boxes drawn on it.
1564
+ """
1565
+ output_image = image.copy()
1566
+ color = (0, 255, 0) if model_name == "small" else (0, 0, 255) # Green for small, Blue for tiny
1567
+
1568
+ for detection in detections:
1569
+ box = detection['box']
1570
+ score = detection['score']
1571
+
1572
+ # Ensure coordinates are integers for drawing
1573
+ x_min, y_min, x_max, y_max = map(int, box)
1574
+
1575
+ # Draw the rectangle
1576
+ cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), color, 2)
1577
+
1578
+ # Optionally, add the score text
1579
+ label = f"{score:.2f}"
1580
+ cv2.putText(output_image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
1581
+
1582
+ return output_image
1583
+
1584
+
1585
+ class MeikiTextDetector:
1586
+ """
1587
+ A class to perform text detection using the meiki.text.detect.v0 models.
1588
+
1589
+ This class handles downloading the ONNX models from the Hugging Face Hub,
1590
+ loading them into an ONNX Runtime session, and providing a simple interface
1591
+ for inference.
1592
+ """
1593
+ name = 'meiki_text_detector'
1594
+ readable_name = 'Meiki Text Detector'
1595
+ available = False
1596
+ key = ']'
1597
+
1598
+ def __init__(self, model_name: str = 'tiny'):
1599
+ """
1600
+ Initializes the detector by downloading and loading the specified ONNX model.
1601
+
1602
+ Args:
1603
+ model_name (str): The model to use, either "tiny" or "small".
1604
+ Defaults to "small".
1605
+ """
1606
+ if model_name not in ['tiny', 'small']:
1607
+ raise ValueError("model_name must be either 'tiny' or 'small'")
1608
+
1609
+ ort.preload_dlls(cuda=True, directory=None)
1610
+
1611
+ self.model_name = model_name
1612
+ self.session = None
1613
+
1614
+ # --- Model-specific parameters ---
1615
+ if self.model_name == "tiny":
1616
+ self.model_size = 320
1617
+ self.is_color = False
1618
+ self.onnx_filename = "meiki.text.detect.tiny.v0.onnx"
1619
+ else: # "small"
1620
+ self.model_size = 640
1621
+ self.is_color = True
1622
+ self.onnx_filename = "meiki.text.detect.small.v0.onnx"
1623
+
1624
+ try:
1625
+ print(f"Initializing MeikiTextDetector with '{self.model_name}' model...")
1626
+ MODEL_REPO = "rtr46/meiki.text.detect.v0"
1627
+
1628
+ # Download the model file from the Hub and get its local path
1629
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=self.onnx_filename)
1630
+
1631
+ # Load the ONNX model into an inference session
1632
+ # providers = ['CUDAExecutionProvider']
1633
+ providers = ['CPUExecutionProvider']
1634
+ self.session = ort.InferenceSession(model_path, providers=providers)
1635
+
1636
+ self.available = True
1637
+ print("Model loaded successfully. MeikiTextDetector is ready.")
1638
+
1639
+ except Exception as e:
1640
+ print(f"Error initializing MeikiTextDetector: {e}")
1641
+ self.available = False
1642
+
1643
+ def _resize_and_pad(self, image: np.ndarray):
1644
+ """
1645
+ Resizes and pads an image to the model's expected square size,
1646
+ preserving the aspect ratio.
1647
+ """
1648
+ if self.is_color:
1649
+ h, w, _ = image.shape
1650
+ else:
1651
+ h, w = image.shape
1652
+
1653
+ size = self.model_size
1654
+ ratio = min(size / w, size / h)
1655
+ new_w, new_h = int(w * ratio), int(h * ratio)
1656
+
1657
+ resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
1658
+
1659
+ if self.is_color:
1660
+ padded_image = np.zeros((size, size, 3), dtype=np.uint8)
1661
+ else:
1662
+ padded_image = np.zeros((size, size), dtype=np.uint8)
1663
+
1664
+ pad_w, pad_h = (size - new_w) // 2, (size - new_h) // 2
1665
+ padded_image[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = resized_image
1666
+
1667
+ return padded_image, ratio, pad_w, pad_h
1668
+
1669
+ def __call__(self, img, confidence_threshold: float = 0.4):
1670
+ """
1671
+ Performs text detection on an input image.
1672
+
1673
+ Args:
1674
+ img: The input image. Can be a file path, URL, PIL Image, or a NumPy array (BGR format).
1675
+ confidence_threshold (float): The threshold to filter out low-confidence detections.
1676
+
1677
+ Returns:
1678
+ A list of dictionaries, where each dictionary represents a detected
1679
+ text box and contains 'box' (a list of [x_min, y_min, x_max, y_max])
1680
+ and 'score' (a float). Returns an empty list if no boxes are found.
1681
+ """
1682
+ if confidence_threshold is None:
1683
+ confidence_threshold = 0.4
1684
+ if not self.available:
1685
+ raise RuntimeError("MeikiTextDetector is not available due to an initialization error.")
1686
+
1687
+ # --- Input Handling ---
1688
+ if isinstance(img, str):
1689
+ if img.startswith('http'):
1690
+ response = requests.get(img)
1691
+ pil_image = Image.open(BytesIO(response.content)).convert("RGB")
1692
+ else:
1693
+ pil_image = Image.open(img).convert("RGB")
1694
+ # Convert PIL (RGB) to OpenCV (BGR) format
1695
+ input_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
1696
+ elif isinstance(img, Image.Image):
1697
+ # Convert PIL (RGB) to OpenCV (BGR) format
1698
+ input_image = cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2BGR)
1699
+ elif isinstance(img, np.ndarray):
1700
+ input_image = img
1701
+ else:
1702
+ raise TypeError("Unsupported input type for 'img'. Use a file path, URL, PIL Image, or NumPy array.")
1703
+
1704
+
1705
+ # --- Preprocessing ---
1706
+ if self.is_color:
1707
+ image_for_model = input_image
1708
+ else:
1709
+ image_for_model = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
1710
+
1711
+ padded_image, ratio, pad_w, pad_h = self._resize_and_pad(image_for_model)
1712
+ img_normalized = padded_image.astype(np.float32) / 255.0
1713
+
1714
+ if self.is_color:
1715
+ img_transposed = np.transpose(img_normalized, (2, 0, 1))
1716
+ input_tensor = np.expand_dims(img_transposed, axis=0)
1717
+ else:
1718
+ input_tensor = np.expand_dims(np.expand_dims(img_normalized, axis=0), axis=0)
1719
+
1720
+ # --- Inference ---
1721
+ sizes_tensor = np.array([[self.model_size, self.model_size]], dtype=np.int64)
1722
+ input_names = [inp.name for inp in self.session.get_inputs()]
1723
+ inputs = {input_names[0]: input_tensor, input_names[1]: sizes_tensor}
1724
+
1725
+ outputs = self.session.run(None, inputs)
1726
+
1727
+ # print(outputs)
1728
+
1729
+ # --- Post-processing ---
1730
+ if self.model_name == "tiny":
1731
+ boxes = outputs[0]
1732
+ scores = [1.0] * len(boxes) # Tiny model doesn't output scores
1733
+ else: # "small"
1734
+ _, boxes, scores = outputs
1735
+ boxes, scores = boxes[0], scores[0]
1736
+
1737
+ detections = []
1738
+ for box, score in zip(boxes, scores):
1739
+ if score < confidence_threshold:
1740
+ continue
1741
+
1742
+ x_min, y_min, x_max, y_max = box
1743
+
1744
+ # Rescale box coordinates to the original image size
1745
+ final_x_min = (x_min - pad_w) / ratio
1746
+ final_y_min = (y_min - pad_h) / ratio
1747
+ final_x_max = (x_max - pad_w) / ratio
1748
+ final_y_max = (y_max - pad_h) / ratio
1749
+
1750
+ detections.append({
1751
+ "box": [final_x_min, final_y_min, final_x_max, final_y_max],
1752
+ "score": float(score)
1753
+ })
1754
+
1755
+ # print(f"Processed with '{self.model_name}' model. Found {len(detections)} boxes with confidence > {confidence_threshold}.")
1756
+
1757
+ # Compute crop_coords as padded min/max of all detected boxes
1758
+ if detections:
1759
+ x_mins = [b['box'][0] for b in detections]
1760
+ y_mins = [b['box'][1] for b in detections]
1761
+ x_maxs = [b['box'][2] for b in detections]
1762
+ y_maxs = [b['box'][3] for b in detections]
1763
+
1764
+ pad = 5
1765
+ crop_xmin = min(x_mins) - pad
1766
+ crop_ymin = min(y_mins) - pad
1767
+ crop_xmax = max(x_maxs) + pad
1768
+ crop_ymax = max(y_maxs) + pad
1769
+
1770
+ # Clamp to image bounds
1771
+ h, w = input_image.shape[:2]
1772
+ crop_xmin = max(0, int(floor(crop_xmin)))
1773
+ crop_ymin = max(0, int(floor(crop_ymin)))
1774
+ crop_xmax = min(w, int(floor(crop_xmax)))
1775
+ crop_ymax = min(h, int(floor(crop_ymax)))
1776
+
1777
+ crop_coords = [crop_xmin, crop_ymin, crop_xmax, crop_ymax]
1778
+ else:
1779
+ crop_coords = None
1780
+
1781
+ resp = {
1782
+ "boxes": detections,
1783
+ "provider": 'meiki',
1784
+ "crop_coords": crop_coords
1785
+ }
1786
+
1787
+ return True, resp
1788
+
1789
+
1790
+ # --- EXAMPLE USAGE ---
1791
+ if __name__ == '__main__':
1792
+ import datetime
1793
+ # You can choose 'tiny' or 'small' here
1794
+ meiki = MeikiTextDetector(model_name='small')
1795
+ # Example: run a short warm-up then measure average over N runs
1796
+ image_path = r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\lotsofsmalltext.png"
1797
+ video_path = r"C:\Users\Beangate\GSM\GameSentenceMiner\GameSentenceMiner\owocr\owocr\tanetsumi_CdACfZkwMY.mp4"
1798
+ # Warm-up run (helps with any one-time setup cost)
1799
+ try:
1800
+ _ = meiki(image_path, confidence_threshold=0.4)
1801
+ except Exception as e:
1802
+ print(f"Error running MeikiTextDetector on warm-up: {e}")
1803
+ raise
1804
+
1805
+ # runs = 500
1806
+ times = []
1807
+ detections_list = []
1808
+ # for i in range(runs):
1809
+ # start_time = datetime.datetime.now()
1810
+ # res, resp_dict = meiki(image_path, confidence_threshold=0.4)
1811
+ # detections = resp_dict['boxes']
1812
+ # dections_list.append(detections)
1813
+ # end_time = datetime.datetime.now()
1814
+ # times.append((end_time - start_time).total_seconds())
1815
+
1816
+ # Process video frame by frame with cv2 (sample at ~10 FPS)
1817
+ cap = cv2.VideoCapture(video_path)
1818
+ try:
1819
+ src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
1820
+ except Exception:
1821
+ src_fps = 30.0
1822
+
1823
+ target_fps = 10
1824
+ sample_interval = max(1, int(round(src_fps / target_fps)))
1825
+ runs = 0
1826
+ last_detections = []
1827
+ pil_img = None
1828
+
1829
+ while True:
1830
+ ret, frame = cap.read()
1831
+ if not ret:
1832
+ break
1833
+
1834
+ # Only process sampled frames
1835
+ if runs % sample_interval == 0:
1836
+ # Convert to PIL image
1837
+ try:
1838
+ pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
1839
+ except Exception:
1840
+ runs += 1
1841
+ continue
1842
+
1843
+ # Run Meiki detector on the full frame (or you can crop before passing)
1844
+ start_t = time.time()
1845
+ try:
1846
+ ok, resp = meiki(pil_img, confidence_threshold=0.4)
1847
+ if ok:
1848
+ detections = resp.get('boxes', [])
1849
+ else:
1850
+ detections = []
1851
+ except Exception as e:
1852
+ # on error, record empty detections but keep going
1853
+ detections = []
1854
+ end_t = time.time()
1855
+
1856
+ times.append(end_t - start_t)
1857
+ detections_list.append(detections)
1858
+ last_detections = detections
1859
+
1860
+ runs += 1
1861
+
1862
+ cap.release()
1863
+
1864
+ # Make sure 'detections' variable exists for later visualization
1865
+ detections = last_detections
1866
+
1867
+ avg_time = sum(times) / len(times) if times else 0.0
1868
+
1869
+ print(f"Average processing/inference time over {runs} runs: {avg_time:.4f} seconds")
1870
+
1871
+ # --- Stability / similarity analysis across detection runs ---
1872
+ # We consider two boxes the same if their IoU >= iou_threshold.
1873
+ def iou(boxA, boxB):
1874
+ # boxes are [x_min, y_min, x_max, y_max]
1875
+ xA = max(boxA[0], boxB[0])
1876
+ yA = max(boxA[1], boxB[1])
1877
+ xB = min(boxA[2], boxB[2])
1878
+ yB = min(boxA[3], boxB[3])
1879
+
1880
+ interW = max(0.0, xB - xA)
1881
+ interH = max(0.0, yB - yA)
1882
+ interArea = interW * interH
1883
+
1884
+ boxAArea = max(0.0, boxA[2] - boxA[0]) * max(0.0, boxA[3] - boxA[1])
1885
+ boxBArea = max(0.0, boxB[2] - boxB[0]) * max(0.0, boxB[3] - boxB[1])
1886
+
1887
+ union = boxAArea + boxBArea - interArea
1888
+ if union <= 0:
1889
+ return 0.0
1890
+ return interArea / union
1891
+
1892
+ def match_counts(ref_boxes, other_boxes, iou_threshold=0.5):
1893
+ # Greedy matching by IoU
1894
+ if not ref_boxes or not other_boxes:
1895
+ return 0, []
1896
+ ref_idx = list(range(len(ref_boxes)))
1897
+ oth_idx = list(range(len(other_boxes)))
1898
+ matches = []
1899
+ # compute all IoUs
1900
+ iou_matrix = []
1901
+ for i, rb in enumerate(ref_boxes):
1902
+ row = []
1903
+ for j, ob in enumerate(other_boxes):
1904
+ row.append(iou(rb, ob))
1905
+ iou_matrix.append(row)
1906
+
1907
+ iou_matrix = np.array(iou_matrix)
1908
+ while True:
1909
+ if iou_matrix.size == 0:
1910
+ break
1911
+ # find best remaining pair
1912
+ idx = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
1913
+ best_i, best_j = idx[0], idx[1]
1914
+ best_val = iou_matrix[best_i, best_j]
1915
+ if best_val < iou_threshold:
1916
+ break
1917
+ matches.append((ref_idx[best_i], oth_idx[best_j], float(best_val)))
1918
+ # remove matched row and column
1919
+ iou_matrix = np.delete(iou_matrix, best_i, axis=0)
1920
+ iou_matrix = np.delete(iou_matrix, best_j, axis=1)
1921
+ del ref_idx[best_i]
1922
+ del oth_idx[best_j]
1923
+
1924
+ return len(matches), matches
1925
+
1926
+ # canonical reference: first run (if any)
1927
+ stability_scores = []
1928
+ avg_ious = []
1929
+ if len(detections_list) == 0:
1930
+ stability_avg = 0.0
1931
+ else:
1932
+ ref = detections_list[0]
1933
+ # extract boxes list-of-lists
1934
+ print(ref)
1935
+ ref_boxes = [d['box'] for d in ref]
1936
+ for run_idx, run in enumerate(detections_list):
1937
+ other_boxes = [d['box'] for d in run]
1938
+ matched_count, matches = match_counts(ref_boxes, other_boxes, iou_threshold=0.5)
1939
+ denom = max(len(ref_boxes), len(other_boxes), 1)
1940
+ score = matched_count / denom
1941
+ stability_scores.append(score)
1942
+ if matches:
1943
+ avg_ious.append(sum(m for (_, _, m) in matches) / len(matches))
1944
+
1945
+ stability_avg = float(np.mean(stability_scores)) if stability_scores else 0.0
1946
+ stability_std = float(np.std(stability_scores)) if stability_scores else 0.0
1947
+ median_stability = float(np.median(stability_scores)) if stability_scores else 0.0
1948
+ avg_iou_over_matches = float(np.mean(avg_ious)) if avg_ious else 0.0
1949
+
1950
+ # Heuristic for recommended pixel offset to treat boxes as identical
1951
+ # Use median box dimension across all detections and suggest a small fraction
1952
+ all_widths = []
1953
+ all_heights = []
1954
+ for run in detections_list:
1955
+ for d in run:
1956
+ b = d['box']
1957
+ w = abs(b[2] - b[0])
1958
+ h = abs(b[3] - b[1])
1959
+ all_widths.append(w)
1960
+ all_heights.append(h)
1961
+
1962
+ if all_widths and all_heights:
1963
+ med_w = float(np.median(all_widths))
1964
+ med_h = float(np.median(all_heights))
1965
+ # pixel suggestion: 5px absolute, and also ~5% of median min dimension
1966
+ suggestion_px = max(5.0, min(med_w, med_h) * 0.05)
1967
+ suggestion_px_rounded = int(round(suggestion_px))
1968
+ else:
1969
+ med_w = med_h = 0.0
1970
+ suggestion_px_rounded = 5
1971
+
1972
+ # Additional check: if we expand each box by suggestion_px_rounded (on all sides),
1973
+ # would that cause every run to fully match the reference (i.e., every box in
1974
+ # each run matches some reference box and vice-versa using the same IoU threshold)?
1975
+ def expand_box(box, px, img_w=None, img_h=None):
1976
+ # box: [x_min, y_min, x_max, y_max]
1977
+ x0, y0, x1, y1 = box
1978
+ x0 -= px
1979
+ y0 -= px
1980
+ x1 += px
1981
+ y1 += px
1982
+ if img_w is not None and img_h is not None:
1983
+ x0 = max(0, x0)
1984
+ y0 = max(0, y0)
1985
+ x1 = min(img_w, x1)
1986
+ y1 = min(img_h, y1)
1987
+ return [x0, y0, x1, y1]
1988
+
1989
+ def all_boxes_match_after_expansion(ref_boxes, other_boxes, px_expand, iou_threshold=0.5):
1990
+ # Expand both sets and perform greedy matching. True if both sets are fully matched.
1991
+ if not ref_boxes and not other_boxes:
1992
+ return True
1993
+ if not ref_boxes or not other_boxes:
1994
+ return False
1995
+
1996
+ # Expand boxes
1997
+ ref_exp = [expand_box(b, px_expand) for b in ref_boxes]
1998
+ oth_exp = [expand_box(b, px_expand) for b in other_boxes]
1999
+
2000
+ # compute IoU matrix
2001
+ mat = np.zeros((len(ref_exp), len(oth_exp)), dtype=float)
2002
+ for i, rb in enumerate(ref_exp):
2003
+ for j, ob in enumerate(oth_exp):
2004
+ mat[i, j] = iou(rb, ob)
2005
+
2006
+ # greedy match
2007
+ ref_idx = list(range(len(ref_exp)))
2008
+ oth_idx = list(range(len(oth_exp)))
2009
+ matches = 0
2010
+ m = mat.copy()
2011
+ while m.size:
2012
+ idx = np.unravel_index(np.argmax(m), m.shape)
2013
+ best_i, best_j = idx[0], idx[1]
2014
+ best_val = m[best_i, best_j]
2015
+ if best_val < iou_threshold:
2016
+ break
2017
+ matches += 1
2018
+ m = np.delete(m, best_i, axis=0)
2019
+ m = np.delete(m, best_j, axis=1)
2020
+ del ref_idx[best_i]
2021
+ del oth_idx[best_j]
2022
+
2023
+ # Fully matched if matches equals both lengths
2024
+ return (matches == len(ref_exp)) and (matches == len(oth_exp))
2025
+
2026
+ would_treat_all_same = False
2027
+ per_run_expanded_match = []
2028
+ try:
2029
+ if len(detections_list) == 0:
2030
+ would_treat_all_same = False
2031
+ else:
2032
+ ref = detections_list[0]
2033
+ ref_boxes = [d['box'] for d in ref]
2034
+ for run in detections_list:
2035
+ other_boxes = [d['box'] for d in run]
2036
+ matched = all_boxes_match_after_expansion(ref_boxes, other_boxes, suggestion_px_rounded, iou_threshold=0.5)
2037
+ per_run_expanded_match.append(bool(matched))
2038
+ would_treat_all_same = all(per_run_expanded_match) if per_run_expanded_match else False
2039
+ except Exception:
2040
+ would_treat_all_same = False
2041
+
2042
+ # Print results
2043
+ print(f"Average processing time over {runs} runs: {avg_time:.4f} seconds")
2044
+ print("--- Stability summary (reference = first run) ---")
2045
+ if len(detections_list) == 0:
2046
+ print("No detections recorded.")
2047
+ else:
2048
+ print(f"Per-run similarity ratios vs first run: {[round(s,3) for s in stability_scores]}")
2049
+ print(f"Stability average: {stability_avg:.4f}, std: {stability_std:.4f}, median: {median_stability:.4f}")
2050
+ print(f"Average IoU (matched boxes): {avg_iou_over_matches:.4f}")
2051
+ print(f"Median box size (w x h): {med_w:.1f} x {med_h:.1f} px")
2052
+ print(f"Recommended pixel-offset heuristic to treat boxes as identical: {suggestion_px_rounded} px (~5% of median box min-dim).")
2053
+ print(f"Per-run fully-matched after expanding by {suggestion_px_rounded}px: {per_run_expanded_match}")
2054
+ print(f"Would the recommendation treat all runs as identical? {would_treat_all_same}")
2055
+ print("Also consider fixed offsets like 5px or 10px depending on image DPI and scaling.")
2056
+
2057
+
2058
+ # Draw and save the last-run detections for inspection
2059
+ if pil_img:
2060
+ image_path = os.path.join(os.getcwd(), "last_frame_for_detections.png")
2061
+ pil_img.save(image_path)
2062
+ try:
2063
+ src_img = cv2.imread(image_path)
2064
+ if src_img is not None:
2065
+ res_img = draw_detections(image=src_img, detections=detections, model_name=meiki.model_name)
2066
+ out_path = Path(image_path).with_name(f"detection_result_{meiki.model_name}.png")
2067
+ cv2.imwrite(str(out_path), res_img)
2068
+ print(f"Saved detection visualization to: {out_path}")
2069
+ else:
2070
+ print(f"Could not read image for visualization: {image_path}")
2071
+ except Exception as e:
2072
+ print(f"Error drawing/saving detections: {e}")
2073
+
2074
+ # print(f"Average processing time over {runs} runs: {avg_time:.4f} seconds")
2075
+
2076
+ # if detector.available:
2077
+ # # Example image URL
2078
+ # # image_url = "https://huggingface.co/rtr46/meiki.text.detect.v0/resolve/main/test_images/manga.jpg"
2079
+ # # image_url = "https://huggingface.co/rtr46/meiki.text.detect.v0/resolve/main/test_images/sign.jpg"
2080
+
2081
+ # print(f"\nProcessing image from URL: {image_url}")
2082
+
2083
+ # # The __call__ method handles the URL directly
2084
+ # detections = detector(image_url, confidence_threshold=0.4)
2085
+
2086
+ # # Print the results
2087
+ # print("\nDetections:")
2088
+ # for det in detections:
2089
+ # # Formatting the box coordinates to 2 decimal places for cleaner printing
2090
+ # formatted_box = [f"{coord:.2f}" for coord in det['box']]
2091
+ # print(f" - Box: {formatted_box}, Score: {det['score']:.4f}")
2092
+
2093
+ # # --- Visualization ---
2094
+ # print("\nVisualizing results... Check for a window named 'Detection Result'.")
2095
+ # # Load image again for drawing
2096
+ # response = requests.get(image_url)
2097
+ # pil_img = Image.open(BytesIO(response.content)).convert("RGB")
2098
+ # original_image_np = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
2099
+
2100
+ # # Use the helper function to draw the detections
2101
+ # result_image = draw_detections(original_image_np, detections, detector.model_name)
2102
+
2103
+ # # Save or display the image
2104
+ # output_path = "detection_result.jpg"
2105
+ # cv2.imwrite(output_path, result_image)
2106
+ # print(f"Result saved to {output_path}")
2107
+
2108
+ # # To display in a window (press any key to close)
2109
+ # # cv2.imshow("Detection Result", result_image)
2110
+ # # cv2.waitKey(0)
2111
+ # # cv2.destroyAllWindows()
2112
+ # else:
2113
+ # print("\nDetector could not be initialized. Please check the error messages above.")
2114
+
1542
2115
 
1543
2116
  # class QWENOCR:
1544
2117
  # name = 'qwenv2'
@@ -1392,6 +1392,13 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1392
1392
  # print(engine_index)
1393
1393
 
1394
1394
  if res:
1395
+ if 'provider' in text:
1396
+ if write_to == 'callback':
1397
+ logger.opt(ansi=True).info(f"{len(text['boxes'])} text boxes recognized using Meiki:")
1398
+ txt_callback('', '', ocr_start_time,
1399
+ img_or_path, is_second_ocr, filtering, text.get('crop_coords', None), meiki_boxes=text.get('boxes', []))
1400
+ return str(text), str(text)
1401
+
1395
1402
  if isinstance(text, list):
1396
1403
  for i, line in enumerate(text):
1397
1404
  text[i] = do_configured_ocr_replacements(line)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.19.3
3
+ Version: 2.19.4
4
4
  Summary: A tool for mining sentences from games. Update: Dependencies, replay buffer based line searching, and bug fixes.
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -12,42 +12,43 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
- Requires-Dist: requests>=2.32.3
16
- Requires-Dist: watchdog>=5.0.2
17
- Requires-Dist: DateTime>=5.5
18
- Requires-Dist: pyperclip>=1.9.0
19
- Requires-Dist: soundfile>=0.12.1
20
- Requires-Dist: toml>=0.10.2
21
- Requires-Dist: psutil>=7.1.0
22
- Requires-Dist: rapidfuzz>=3.9.7
23
- Requires-Dist: plyer>=2.1.0
24
- Requires-Dist: keyboard>=0.13.5
25
- Requires-Dist: websockets>=15.0.1
26
- Requires-Dist: ttkbootstrap>=1.10.1
27
- Requires-Dist: dataclasses_json>=0.6.7
15
+ Requires-Dist: requests~=2.32.5
16
+ Requires-Dist: watchdog~=6.0.0
17
+ Requires-Dist: DateTime~=5.5
18
+ Requires-Dist: pyperclip~=1.9.0
19
+ Requires-Dist: soundfile~=0.13.1
20
+ Requires-Dist: toml~=0.10.2
21
+ Requires-Dist: psutil~=7.1.0
22
+ Requires-Dist: rapidfuzz~=3.10.0
23
+ Requires-Dist: plyer~=2.1.0
24
+ Requires-Dist: keyboard~=0.13.5
25
+ Requires-Dist: websockets~=15.0.1
26
+ Requires-Dist: ttkbootstrap~=1.10.1
27
+ Requires-Dist: dataclasses_json~=0.6.7
28
28
  Requires-Dist: betterproto==2.0.0b7
29
- Requires-Dist: obsws-python>=1.7.2
29
+ Requires-Dist: obsws-python~=1.8.0
30
30
  Requires-Dist: numpy==2.2.6
31
- Requires-Dist: faster-whisper>=1.2.0
32
- Requires-Dist: silero-vad>=6.0.0
33
- Requires-Dist: regex>=2025.9.18
34
- Requires-Dist: opencv-python>=4.12.0.88
35
- Requires-Dist: scikit-image>=0.25.2
36
- Requires-Dist: openai>=1.108.0
37
- Requires-Dist: owocr>=1.9.1
38
- Requires-Dist: oneocr>=1.0.10
39
- Requires-Dist: google-genai>=1.38.0
40
- Requires-Dist: sounddevice>=0.5.2
41
- Requires-Dist: matplotlib>=3.10.6
42
- Requires-Dist: groq>=0.31.1
43
- Requires-Dist: flask>=3.1.2
44
- Requires-Dist: pystray>=0.19.5
45
- Requires-Dist: pygetwindow>=0.0.9; sys_platform == "win32"
46
- Requires-Dist: pywin32>=311; sys_platform == "win32"
47
- Requires-Dist: win10toast>=0.9; sys_platform == "win32"
48
- Requires-Dist: stable-ts>=2.19.1
49
- Requires-Dist: torchcodec>=0.7.0
31
+ Requires-Dist: faster-whisper~=1.2.0
32
+ Requires-Dist: silero-vad~=6.0.0
33
+ Requires-Dist: regex~=2025.10.23
34
+ Requires-Dist: opencv-python~=4.12.0.88
35
+ Requires-Dist: scikit-image~=0.25.2
36
+ Requires-Dist: owocr==1.9.1
37
+ Requires-Dist: oneocr==1.0.10
38
+ Requires-Dist: google-genai~=1.46.0
39
+ Requires-Dist: sounddevice~=0.5.2
40
+ Requires-Dist: matplotlib~=3.10.6
41
+ Requires-Dist: groq~=0.33.0
42
+ Requires-Dist: flask~=3.1.2
43
+ Requires-Dist: pystray~=0.19.5
44
+ Requires-Dist: pygetwindow==0.0.9; sys_platform == "win32"
45
+ Requires-Dist: pywin32==311; sys_platform == "win32"
46
+ Requires-Dist: win10toast==0.9; sys_platform == "win32"
47
+ Requires-Dist: stable-ts~=2.19.1
48
+ Requires-Dist: torchcodec~=0.7.0
50
49
  Requires-Dist: torchaudio==2.8.0
50
+ Requires-Dist: pillow~=12.0.0
51
+ Requires-Dist: openai>=2.6.0
51
52
  Dynamic: license-file
52
53
 
53
54
  # GSM - An Immersion toolkit for Games.
@@ -1,8 +1,8 @@
1
1
  GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  GameSentenceMiner/anki.py,sha256=jySFPzDYz0vItb12kwZ-rm9WmtxO8Kr41wK1JdwRnU4,29638
3
3
  GameSentenceMiner/gametext.py,sha256=4PPm7QSWDmvsyooVjFANkd1Vnoy5ixbGRMHfYfhwGs0,13320
4
- GameSentenceMiner/gsm.py,sha256=TpKJ2j2N_NgjT38p35nVVy5-Lvn4w49Spo4-a-6nfAc,34580
5
- GameSentenceMiner/obs.py,sha256=vhTFqGxHWEz9g-081gain6iI2poJM_D7v5vI8Kl7rqk,37918
4
+ GameSentenceMiner/gsm.py,sha256=Do-1RRdsC9jqu1artpYN4mQIYT2XGxPPDeYbLKtHMdM,34569
5
+ GameSentenceMiner/obs.py,sha256=MdT3zQJqTe72MAxawxBafK-4a9UoRWdFDJn_V2TsIp4,37988
6
6
  GameSentenceMiner/vad.py,sha256=iMSsoUZ7-aNoWKzDKfOHdB3Zk5U2hV7x5hqTny6rj08,21501
7
7
  GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  GameSentenceMiner/ai/ai_prompting.py,sha256=mq9Odv_FpohXagU-OoSZbLWttdrEl1M1NiqnodeUpD8,29126
@@ -21,14 +21,14 @@ GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
21
21
  GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=Ov04c-nKzh3sADxO-5JyZWVe4DlrHM9edM9tc7-97Jo,5970
22
22
  GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
23
23
  GameSentenceMiner/ocr/owocr_area_selector.py,sha256=4MjItlaZ78Smxa3uxMxbjU0n2z_IBTG-iBpDB9COSL8,29270
24
- GameSentenceMiner/ocr/owocr_helper.py,sha256=GL16SZa48LwmVQ7xKYRtEgbj8V-7JwOfAHUw-CPwfp4,32370
24
+ GameSentenceMiner/ocr/owocr_helper.py,sha256=MZFKA252lQE1M39tUTtccX3vLaPRJrWfBzWvfxNq3B8,35310
25
25
  GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
26
26
  GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
27
27
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
28
28
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
29
29
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
30
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=XR6tbcj8ctDXn8NlpXrRZIel60zj2h3R0NKWBtEE5M4,72273
31
- GameSentenceMiner/owocr/owocr/run.py,sha256=z3EaF_a5m9T_ZrELYoaAzHPqzTO0cd7MQCndcnWXq_4,82035
30
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=yVrLr8nNgvLRB-pPvkyhw07zkAiWrCf85SvgfQBquEk,95309
31
+ GameSentenceMiner/owocr/owocr/run.py,sha256=y90fHSbbjH4BeMlxH_xjKU3uJzfJgdKo6nUqwNcdUJs,82455
32
32
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
33
33
  GameSentenceMiner/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  GameSentenceMiner/tools/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
@@ -135,9 +135,9 @@ GameSentenceMiner/web/templates/components/kanji_grid/thousand_character_classic
135
135
  GameSentenceMiner/web/templates/components/kanji_grid/wanikani_levels.json,sha256=8wjnnaYQqmho6t5tMxrIAc03512A2tYhQh5dfsQnfAM,11372
136
136
  GameSentenceMiner/web/templates/components/kanji_grid/words_hk_frequency_list.json,sha256=wRkqZNPzz6DT9OTPHpXwfqW96Qb96stCQNNgOL-ZdKk,17535
137
137
  GameSentenceMiner/wip/__init___.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
- gamesentenceminer-2.19.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
139
- gamesentenceminer-2.19.3.dist-info/METADATA,sha256=5zf9Ize9DvAe4tUJt1S3P4kb0ghhgnX4wTxrTvZid-E,8121
140
- gamesentenceminer-2.19.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
- gamesentenceminer-2.19.3.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
142
- gamesentenceminer-2.19.3.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
143
- gamesentenceminer-2.19.3.dist-info/RECORD,,
138
+ gamesentenceminer-2.19.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
139
+ gamesentenceminer-2.19.4.dist-info/METADATA,sha256=W32ddWeKMJrfVVFIC0YQfVQY26BQ_ulWZiooBYOD3NQ,8151
140
+ gamesentenceminer-2.19.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
+ gamesentenceminer-2.19.4.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
142
+ gamesentenceminer-2.19.4.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
143
+ gamesentenceminer-2.19.4.dist-info/RECORD,,