rbx-proofreader 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  # Resolves to the 'proofreader' root directory
7
7
  BASE_DIR = Path(__file__).resolve().parent.parent.parent
8
8
  BASE_URL = "https://github.com/lucacrose/proofreader"
9
+ VERSION_TAG = "v1.2.0"
9
10
 
10
11
  # --- ASSETS & MODELS ---
11
12
  ASSETS_PATH = BASE_DIR / "assets"
@@ -16,6 +17,7 @@ THUMBNAILS_DIR = ASSETS_PATH / "thumbnails"
16
17
  TRAIN_THUMBNAILS_DIR = ASSETS_PATH / "train_data"
17
18
  CLASS_MAP_PATH = ASSETS_PATH / "class_mapping.json"
18
19
  CLIP_BEST_PATH = ASSETS_PATH / "weights" / "clip.pt"
20
+ CLIP_VIT_BASE_PATCH32_PATH = ASSETS_PATH / "clip-vit-base-patch32"
19
21
 
20
22
  # --- TRAINING & EMULATOR ---
21
23
  TRAIN_DIR = BASE_DIR / "proofreader" / "train"
@@ -79,7 +81,7 @@ AUGMENTER_CONFIG = {
79
81
  "height_max": 1600, # Maximum height in pixels (after aspect ratio calculation)
80
82
  "total_images": 1024, # Total number of images to generate
81
83
  "max_workers": 16, # Maximum number of parallel workers for generation
82
- "train_split_fraction": 0.8, # Fraction of images used for training vs validation
84
+ "train_split_fraction": 0.9, # Fraction of images used for training vs validation
83
85
  "empty_trade_chance": 0.09, # Chance a trade has no items or robux (negative sample)
84
86
  }
85
87
  }
@@ -1,6 +1,10 @@
1
1
  from typing import List
2
2
  from ultralytics.models import YOLO
3
3
  from .schema import Box
4
+ from typing import Union
5
+ import numpy as np
6
+
7
+ ImageSource = Union[str, np.ndarray]
4
8
 
5
9
  class TradeDetector:
6
10
  def __init__(self, model_path: str):
@@ -11,11 +15,13 @@ class TradeDetector:
11
15
  1: "item_thumb",
12
16
  2: "item_name",
13
17
  3: "robux_line",
14
- 4: "robux_value"
18
+ 4: "robux_value",
19
+ 5: "outgoing_header",
20
+ 6: "incoming_header"
15
21
  }
16
22
 
17
- def detect(self, image_source: str, conf_threshold: float) -> List[Box]:
18
- results = self.model.predict(image_source, conf=conf_threshold, verbose=False)[0]
23
+ def detect(self, image: ImageSource, conf_threshold: float) -> List[Box]:
24
+ results = self.model.predict(image, verbose=False, conf=conf_threshold)[0]
19
25
 
20
26
  detected_boxes = []
21
27
 
@@ -6,14 +6,18 @@ import json
6
6
  import cv2
7
7
  from PIL import Image
8
8
  from torchvision import transforms
9
- from transformers import CLIPVisionModelWithProjection
9
+ from transformers import CLIPVisionModelWithProjection, logging as hf_logging
10
10
  from typing import List
11
11
  from .schema import TradeLayout, ResolvedItem
12
12
 
13
+ hf_logging.disable_progress_bar()
14
+ hf_logging.set_verbosity_error()
15
+
13
16
  class CLIPItemEmbedder(nn.Module):
14
- def __init__(self, num_classes, model_id="openai/clip-vit-base-patch32"):
17
+ def __init__(self, num_classes, model_path):
15
18
  super().__init__()
16
- self.vision_encoder = CLIPVisionModelWithProjection.from_pretrained(model_id)
19
+
20
+ self.vision_encoder = CLIPVisionModelWithProjection.from_pretrained(model_path, token=False, low_cpu_mem_usage=True)
17
21
  self.item_prototypes = nn.Embedding(num_classes, 512)
18
22
  self.logit_scale = nn.Parameter(torch.ones([]) * 2.659)
19
23
 
@@ -22,7 +26,7 @@ class CLIPItemEmbedder(nn.Module):
22
26
  return F.normalize(outputs.image_embeds, p=2, dim=-1)
23
27
 
24
28
  class VisualMatcher:
25
- def __init__(self, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
29
+ def __init__(self, model_path: str, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
26
30
  self.device = device
27
31
 
28
32
  with open(mapping_path, "r") as f:
@@ -33,7 +37,7 @@ class VisualMatcher:
33
37
  self.name_to_id = {str(i["name"]).lower().strip(): i["id"] for i in item_db}
34
38
 
35
39
  num_classes = len(self.class_to_idx)
36
- self.model = CLIPItemEmbedder(num_classes).to(self.device)
40
+ self.model = CLIPItemEmbedder(num_classes, model_path=model_path).to(self.device)
37
41
  self.model.load_state_dict(torch.load(weights_path, map_location=self.device))
38
42
  self.model.eval()
39
43
 
@@ -5,71 +5,84 @@ class SpatialResolver:
5
5
  def __init__(self):
6
6
  pass
7
7
 
8
- def get_center(self, box: Box):
9
- x1, y1, x2, y2 = box.coords
10
- return (x1 + x2) / 2, (y1 + y2) / 2
11
-
12
- def is_contained(self, child: Box, parent: Box, margin: int = 20) -> bool:
8
+ def get_iou(self, box1: Box, box2: Box) -> float:
9
+ b1x1, b1y1, b1x2, b1y2 = box1.coords
10
+ b2x1, b2y1, b2x2, b2y2 = box2.coords
11
+
12
+ ix1, iy1 = max(b1x1, b2x1), max(b1y1, b2y1)
13
+ ix2, iy2 = min(b1x2, b2x2), min(b1y2, b2y2)
14
+
15
+ inter_area = max(0, ix2 - ix1) * max(0, iy2 - iy1)
16
+ area1 = (b1x2 - b1x1) * (b1y2 - b1y1)
17
+ area2 = (b2x2 - b2x1) * (b2y2 - b2y1)
18
+ union_area = area1 + area2 - inter_area
19
+
20
+ return inter_area / union_area if union_area > 0 else 0
21
+
22
+ def get_ioa(self, child: Box, parent: Box) -> float:
13
23
  cx1, cy1, cx2, cy2 = child.coords
14
24
  px1, py1, px2, py2 = parent.coords
15
25
 
16
- return (cx1 >= px1 - margin and
17
- cy1 >= py1 - margin and
18
- cx2 <= px2 + margin and
19
- cy2 <= py2 + margin)
26
+ ix1, iy1 = max(cx1, px1), max(cy1, py1)
27
+ ix2, iy2 = min(cx2, px2), min(cy2, py2)
28
+
29
+ inter_area = max(0, ix2 - ix1) * max(0, iy2 - iy1)
30
+ child_area = (cx2 - cx1) * (cy2 - cy1)
31
+ return inter_area / child_area if child_area > 0 else 0
20
32
 
21
33
  def resolve(self, all_boxes: List[Box]) -> TradeLayout:
22
34
  layout = TradeLayout()
23
35
 
24
- cards = [b for b in all_boxes if b.label == "item_card"]
25
- robux_lines = [b for b in all_boxes if b.label == "robux_line"]
36
+ raw_cards = sorted([b for b in all_boxes if b.label == "item_card"],
37
+ key=lambda x: x.confidence, reverse=True)
38
+
39
+ unique_cards = []
40
+ for card in raw_cards:
41
+ if any(self.get_iou(card, accepted) > 0.5 for accepted in unique_cards):
42
+ continue
43
+ unique_cards.append(card)
26
44
 
45
+ robux_lines = [b for b in all_boxes if b.label == "robux_line"]
27
46
  names = [b for b in all_boxes if b.label == "item_name"]
28
47
  thumbs = [b for b in all_boxes if b.label == "item_thumb"]
29
48
  values = [b for b in all_boxes if b.label == "robux_value"]
49
+ header_received = next((b for b in all_boxes if b.label == "received_header"), None)
30
50
 
31
- parents = cards + robux_lines
32
- if not parents:
33
- return layout
34
-
35
- y_centers = sorted([self.get_center(p)[1] for p in parents])
36
-
37
- if len(y_centers) > 1:
38
- max_gap = -1
39
- gap_index = 0
40
-
41
- for i in range(len(y_centers) - 1):
42
- gap = y_centers[i + 1] - y_centers[i]
43
- if gap > max_gap:
44
- max_gap = gap
45
- gap_index = i + 1
46
-
47
- first_bottom_parent = next(p for p in parents if self.get_center(p)[1] == y_centers[gap_index])
48
- split_y = first_bottom_parent.coords[1] - 10
51
+ if header_received:
52
+ split_y = header_received.coords[1]
49
53
  else:
50
- split_y = y_centers[0] + 100
54
+ parents = sorted(unique_cards + robux_lines, key=lambda b: (b.coords[1] + b.coords[3])/2)
55
+ if len(parents) > 1:
56
+ y_centers = [(b.coords[1] + b.coords[3])/2 for b in parents]
57
+ max_gap = 0
58
+ split_y = y_centers[0] + 50
59
+ for i in range(len(y_centers) - 1):
60
+ gap = y_centers[i+1] - y_centers[i]
61
+ if gap > max_gap:
62
+ max_gap = gap
63
+ split_y = (y_centers[i] + y_centers[i+1]) / 2
64
+ else:
65
+ split_y = 500
66
+
67
+ unique_cards.sort(key=lambda b: b.coords[1])
51
68
 
52
- for card in cards:
69
+ for card in unique_cards:
53
70
  item = ResolvedItem(container_box=card)
54
- item.name_box = next((n for n in names if self.is_contained(n, card)), None)
55
- item.thumb_box = next((t for t in thumbs if self.is_contained(t, card)), None)
56
71
 
57
- if self.get_center(card)[1] < split_y:
72
+ item.name_box = next((n for n in names if self.get_ioa(n, card) > 0.7), None)
73
+ item.thumb_box = next((t for t in thumbs if self.get_ioa(t, card) > 0.7), None)
74
+
75
+ if (card.coords[1] + card.coords[3]) / 2 < split_y:
58
76
  layout.outgoing.items.append(item)
59
77
  else:
60
78
  layout.incoming.items.append(item)
61
-
79
+
62
80
  for line in robux_lines:
63
- val_box = next((v for v in values if self.is_contained(v, line)), None)
64
-
81
+ val_box = next((v for v in values if self.get_ioa(v, line) > 0.5), None)
65
82
  if val_box:
66
- robux_obj = robux_obj = ResolvedRobux(
67
- container_box=line,
68
- value_box=val_box
69
- )
70
-
71
- if self.get_center(line)[1] < split_y:
72
- layout.outgoing.robux = robux_obj
83
+ robux_obj = ResolvedRobux(container_box=line, value_box=val_box)
84
+ if (line.coords[1] + line.coords[3]) / 2 < split_y:
85
+ layout.outgoing.robux = robux_obj
73
86
  else:
74
87
  layout.incoming.robux = robux_obj
75
88
 
proofreader/main.py CHANGED
@@ -1,17 +1,22 @@
1
- import os
1
+ import io
2
2
  import cv2
3
3
  import torch
4
4
  import json
5
5
  import requests
6
+ from typing import Union
7
+ from pathlib import Path
8
+ import numpy as np
6
9
  from tqdm import tqdm
7
10
  from transformers import CLIPProcessor, CLIPModel
8
11
  from .core.detector import TradeDetector
9
12
  from .core.resolver import SpatialResolver
10
13
  from .core.ocr import OCRReader
11
14
  from .core.matcher import VisualMatcher
12
- from .core.config import DB_PATH, MODEL_PATH, DEVICE, CLASS_MAP_PATH, CLIP_BEST_PATH, BASE_URL, CERTAIN_VISUAL_CONF
15
+ from .core.config import DB_PATH, MODEL_PATH, DEVICE, CLASS_MAP_PATH, CLIP_BEST_PATH, BASE_URL, CERTAIN_VISUAL_CONF, VERSION_TAG, CLIP_VIT_BASE_PATCH32_PATH
13
16
  from .core.schema import ResolvedItem
14
17
 
18
+ ImageInput = Union[str, Path, np.ndarray, bytes, io.BytesIO]
19
+
15
20
  class TradeEngine:
16
21
  def __init__(self):
17
22
  self._ensure_assets()
@@ -27,9 +32,6 @@ class TradeEngine:
27
32
 
28
33
  self.device = DEVICE
29
34
 
30
- self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
31
- self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", use_fast=True)
32
-
33
35
  with open(DB_PATH, "r") as f:
34
36
  item_db = json.load(f)
35
37
 
@@ -39,24 +41,40 @@ class TradeEngine:
39
41
 
40
42
  self.matcher = VisualMatcher(
41
43
  item_db=item_db,
44
+ model_path=CLIP_VIT_BASE_PATCH32_PATH,
42
45
  weights_path=CLIP_BEST_PATH,
43
46
  mapping_path=CLASS_MAP_PATH,
44
47
  device=self.device
45
- )
48
+ )
49
+
50
+ def _download_clip_locally(self):
51
+ model_name = "openai/clip-vit-base-patch32"
52
+ model = CLIPModel.from_pretrained(model_name)
53
+ processor = CLIPProcessor.from_pretrained(model_name)
54
+
55
+ model_path = str(CLIP_VIT_BASE_PATCH32_PATH.resolve().as_posix())
56
+
57
+ model.save_pretrained(model_path)
58
+ processor.save_pretrained(model_path)
59
+ print(f"✅ CLIP saved locally at {CLIP_VIT_BASE_PATCH32_PATH}")
46
60
 
47
61
  def _ensure_assets(self):
48
62
  assets = {
49
- DB_PATH: f"{BASE_URL}/releases/download/v1.1.0/item_database.json",
50
- MODEL_PATH: f"{BASE_URL}/releases/download/v1.1.0/yolo.pt",
51
- CLIP_BEST_PATH: f"{BASE_URL}/releases/download/v1.1.0/clip.pt",
52
- CLASS_MAP_PATH: f"{BASE_URL}/releases/download/v1.1.0/class_mapping.json"
63
+ DB_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/item_database.json",
64
+ MODEL_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/yolo.pt",
65
+ CLIP_BEST_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/clip.pt",
66
+ CLASS_MAP_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/class_mapping.json"
53
67
  }
54
68
 
55
69
  for path, url in assets.items():
56
70
  if not path.exists():
57
- print(f"📦 {path.name} missing. Downloading from latest release...")
71
+ print(f"📦 {path.name} missing. Downloading from {VERSION_TAG} release...")
58
72
  self._download_file(url, path)
59
73
 
74
+ if not CLIP_VIT_BASE_PATCH32_PATH.exists():
75
+ print(f"📦 Local CLIP assets missing. Downloading to {CLIP_VIT_BASE_PATCH32_PATH}...")
76
+ self._download_clip_locally()
77
+
60
78
  def _download_file(self, url, dest_path):
61
79
  response = requests.get(url, stream=True)
62
80
  total_size = int(response.headers.get('content-length', 0))
@@ -110,15 +128,34 @@ class TradeEngine:
110
128
  item.id = ocr_id_direct
111
129
  item.name = self.matcher.id_to_name.get(str(ocr_id_direct))
112
130
 
113
- def process_image(self, image_path: str, conf_threshold: float) -> dict:
114
- if not os.path.exists(image_path):
115
- raise FileNotFoundError(f"Image not found: {image_path}")
131
+ def _load_image(self, image: ImageInput) -> np.ndarray:
132
+ if isinstance(image, np.ndarray):
133
+ return image
134
+
135
+ if isinstance(image, io.BytesIO):
136
+ image = image.getvalue()
137
+
138
+ if isinstance(image, bytes):
139
+ nparr = np.frombuffer(image, np.uint8)
140
+ img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
141
+ if img is None:
142
+ raise ValueError("Failed to decode image from bytes")
143
+ return img
144
+
145
+ image_str = str(image)
146
+ img = cv2.imread(image_str)
147
+ if img is None:
148
+ raise ValueError(f"Failed to load image: {image_str}")
149
+
150
+ return img
151
+
152
+ def process_image(self, image: ImageInput, conf_threshold: float) -> dict:
153
+ image_np = self._load_image(image)
116
154
 
117
- boxes = self.detector.detect(image_path, conf_threshold)
155
+ boxes = self.detector.detect(image_np, conf_threshold)
118
156
  layout = self.resolver.resolve(boxes)
119
- image = cv2.imread(image_path)
120
157
 
121
- self.matcher.match_item_visuals(image, layout)
158
+ self.matcher.match_item_visuals(image_np, layout)
122
159
 
123
160
  for side in [layout.outgoing, layout.incoming]:
124
161
  for item in side.items:
@@ -128,7 +165,7 @@ class TradeEngine:
128
165
  item._finalized = True
129
166
 
130
167
  self.reader.process_layout(
131
- image,
168
+ image_np,
132
169
  layout,
133
170
  skip_if=lambda item: getattr(item, "_finalized", False)
134
171
  )
@@ -25,6 +25,104 @@ from proofreader.core.config import (
25
25
 
26
26
  GENERATOR_CONFIG = AUGMENTER_CONFIG["generator"]
27
27
 
28
+ def clean_and_save_labels(page, width, height):
29
+ label_data = []
30
+
31
+ chat_bar = page.query_selector("#chat-main")
32
+ chat_box = chat_bar.bounding_box() if chat_bar and chat_bar.is_visible() else None
33
+
34
+ def get_intersection_area(boxA, boxB):
35
+ xA = max(boxA['x'], boxB['x'])
36
+ yA = max(boxA['y'], boxB['y'])
37
+ xB = min(boxA['x'] + boxA['width'], boxB['x'] + boxB['width'])
38
+ yB = min(boxA['y'] + boxA['height'], boxB['y'] + boxB['height'])
39
+ interWidth = max(0, xB - xA)
40
+ interHeight = max(0, yB - yA)
41
+ return interWidth * interHeight
42
+
43
+ def get_valid_yolo_data(box, class_id, pad=0, visibility_threshold=0.5):
44
+ if not box:
45
+ return None
46
+
47
+ x1_raw, y1_raw = box['x'] - pad, box['y'] - pad
48
+ x2_raw, y2_raw = box['x'] + box['width'] + pad, box['y'] + box['height'] + pad
49
+
50
+ padded_w = x2_raw - x1_raw
51
+ padded_h = y2_raw - y1_raw
52
+ original_area = max(1, padded_w * padded_h)
53
+
54
+ x1, y1 = max(0, x1_raw), max(0, y1_raw)
55
+ x2, y2 = min(width, x2_raw), min(height, y2_raw)
56
+
57
+ nw, nh = x2 - x1, y2 - y1
58
+ if nw <= 2 or nh <= 2:
59
+ return None
60
+
61
+ canvas_visible_area = nw * nh
62
+
63
+ overlap_with_chat = 0
64
+ if chat_box:
65
+ current_box = {'x': x1, 'y': y1, 'width': nw, 'height': nh}
66
+ overlap_with_chat = get_intersection_area(current_box, chat_box)
67
+
68
+ actual_visible_area = canvas_visible_area - overlap_with_chat
69
+
70
+ visibility_ratio = actual_visible_area / original_area
71
+
72
+ if visibility_ratio < visibility_threshold:
73
+ return None
74
+
75
+ return [
76
+ class_id,
77
+ (x1 + nw/2) / width,
78
+ (y1 + nh/2) / height,
79
+ nw / width,
80
+ nh / height
81
+ ]
82
+
83
+ items = page.query_selector_all("div[trade-item-card]")
84
+ for item in items:
85
+ if not item.is_visible(): continue
86
+
87
+ card_box = item.bounding_box()
88
+
89
+ card_res = get_valid_yolo_data(card_box, 0, pad=4, visibility_threshold=0.4)
90
+
91
+ if card_res:
92
+ label_data.append(card_res)
93
+
94
+ thumb = item.query_selector(".item-card-thumb-container")
95
+ name = item.query_selector(".item-card-name")
96
+
97
+ t_box = thumb.bounding_box() if thumb and thumb.is_visible() else None
98
+ n_box = name.bounding_box() if name and name.is_visible() else None
99
+
100
+ t_res = get_valid_yolo_data(t_box, 1, pad=4, visibility_threshold=0.5)
101
+ n_res = get_valid_yolo_data(n_box, 2, pad=4, visibility_threshold=0.5)
102
+
103
+ if t_res: label_data.append(t_res)
104
+ if n_res: label_data.append(n_res)
105
+
106
+ for section in page.query_selector_all(".robux-line"):
107
+ if section.is_visible() and "Robux Offered" in section.inner_text():
108
+ res_3 = get_valid_yolo_data(section.bounding_box(), 3, pad=6, visibility_threshold=0.4)
109
+ if res_3:
110
+ label_data.append(res_3)
111
+ val_el = section.query_selector(".robux-line-value")
112
+ if val_el and val_el.is_visible():
113
+ res_4 = get_valid_yolo_data(val_el.bounding_box(), 4, pad=4, visibility_threshold=0.5)
114
+ if res_4: label_data.append(res_4)
115
+
116
+ for header in page.query_selector_all("h3.trade-list-detail-offer-header"):
117
+ if not header.is_visible(): continue
118
+ text = header.inner_text().lower()
119
+ cid = 5 if "gave" in text else 6 if "received" in text else None
120
+ if cid:
121
+ res_h = get_valid_yolo_data(header.bounding_box(), cid, pad=4, visibility_threshold=0.4)
122
+ if res_h: label_data.append(res_h)
123
+
124
+ return label_data
125
+
28
126
  def process_batch(batch_ids, db, backgrounds_count, progress_counter):
29
127
  try:
30
128
  with sync_playwright() as p:
@@ -95,50 +193,7 @@ def generate_single_image(page, task_id, db, backgrounds_count, augmenter_js):
95
193
  }
96
194
  """)
97
195
 
98
- def get_padded_yolo(element, class_id, pad_px=2):
99
- box = element.bounding_box()
100
- if not box: return None
101
- x1, y1 = max(0, box['x'] - pad_px), max(0, box['y'] - pad_px)
102
- x2, y2 = min(width, box['x'] + box['width'] + pad_px), min(height, box['y'] + box['height'] + pad_px)
103
- nw, nh = x2 - x1, y2 - y1
104
- return [class_id, (x1 + nw/2)/width, (y1 + nh/2)/height, nw/width, nh/height]
105
-
106
- def is_fully_visible(box, width, height, pad=4):
107
- return (box['x'] - pad >= 0 and
108
- box['y'] - pad >= 0 and
109
- (box['x'] + box['width'] + pad) <= width and
110
- (box['y'] + box['height'] + pad) <= height)
111
-
112
- label_data = []
113
-
114
- items = page.query_selector_all("div[trade-item-card]")
115
- for item in items:
116
- box = item.bounding_box()
117
- if box and is_fully_visible(box, width, height):
118
- card_box = get_padded_yolo(item, 0, pad_px=4)
119
- if card_box: label_data.append(card_box)
120
-
121
- thumb = item.query_selector(".item-card-thumb-container")
122
- if thumb:
123
- thumb_box = get_padded_yolo(thumb, 1, pad_px=4)
124
- if thumb_box: label_data.append(thumb_box)
125
-
126
- name = item.query_selector(".item-card-name")
127
- if name:
128
- name_box = get_padded_yolo(name, 2, pad_px=4)
129
- if name_box: label_data.append(name_box)
130
-
131
- robux_sections = page.query_selector_all(".robux-line:not(.total-value)")
132
- for section in robux_sections:
133
- box = section.bounding_box()
134
- if box and is_fully_visible(box, width, height, 8) and section.is_visible():
135
- line_box = get_padded_yolo(section, 3, pad_px=8)
136
- if line_box: label_data.append(line_box)
137
-
138
- value_element = section.query_selector(".robux-line-value")
139
- if value_element:
140
- value_box = get_padded_yolo(value_element, 4, pad_px=4)
141
- if value_box: label_data.append(value_box)
196
+ label_data = clean_and_save_labels(page, width, height)
142
197
 
143
198
  img_buffer = page.screenshot(type="jpeg", quality=100)
144
199
  nparr = np.frombuffer(img_buffer, np.uint8)
@@ -195,7 +250,7 @@ def generate_single_image(page, task_id, db, backgrounds_count, augmenter_js):
195
250
  for label in label_data:
196
251
  f.write(f"{label[0]} {label[1]:.6f} {label[2]:.6f} {label[3]:.6f} {label[4]:.6f}\n")
197
252
 
198
- def run_mass_generation(total_images=65536, max_workers=24):
253
+ def run_mass_generation(total_images=16384, max_workers=24):
199
254
  with open(DB_PATH, "r") as f:
200
255
  db = json.load(f)
201
256
 
@@ -2,7 +2,7 @@ from ultralytics import YOLO
2
2
  from ..core.config import TRAINING_CONFIG, DATA_YAML_PATH
3
3
 
4
4
  def train_yolo(device):
5
- model = YOLO("yolo11n.pt")
5
+ model = YOLO("yolo26n.pt")
6
6
 
7
7
  model.train(
8
8
  data = DATA_YAML_PATH,
@@ -10,7 +10,7 @@ def train_yolo(device):
10
10
  imgsz = TRAINING_CONFIG["img_size"],
11
11
  device = device,
12
12
  plots = True,
13
- multi_scale = True,
13
+ multi_scale = 0.5,
14
14
 
15
15
  batch = TRAINING_CONFIG["batch_size"],
16
16
  patience = TRAINING_CONFIG["patience"],
@@ -32,9 +32,9 @@ def finish_training(file_path, device):
32
32
 
33
33
  model.train(
34
34
  data = DATA_YAML_PATH,
35
- epochs = 32,
36
- close_mosaic = 32,
37
- patience = 20,
35
+ epochs = 28,
36
+ close_mosaic = 28,
37
+ patience = 6,
38
38
  imgsz = TRAINING_CONFIG["img_size"],
39
39
  batch = TRAINING_CONFIG["batch_size"],
40
40
  device = device
@@ -1,10 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rbx-proofreader
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Visual trade detection and OCR engine
5
+ Author: Luca Rose
5
6
  License: MIT
6
7
  Classifier: Programming Language :: Python :: 3
7
8
  Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
8
10
  Requires-Python: >=3.12
9
11
  Description-Content-Type: text/markdown
10
12
  License-File: LICENSE
@@ -32,9 +34,9 @@ A high-speed vision pipeline for reading Roblox trade screenshots.
32
34
  [![License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
33
35
  [![Build Status](https://github.com/lucacrose/proofreader/actions/workflows/build.yml/badge.svg)](https://github.com/lucacrose/proofreader/actions)
34
36
  [![GPU](https://img.shields.io/badge/GPU-CUDA-blueviolet)](https://developer.nvidia.com/cuda-zone)
35
- [![YOLOv11](https://img.shields.io/badge/model-YOLOv11-blueviolet)](https://github.com/ultralytics/ultralytics)
37
+ [![YOLO26](https://img.shields.io/badge/model-YOLO26-blueviolet)](https://github.com/ultralytics/ultralytics)
36
38
 
37
- Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLOv11** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
39
+ Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLO26** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
38
40
 
39
41
  ## Why Proofreader?
40
42
 
@@ -47,20 +49,20 @@ Roblox trade screenshots are commonly used as proof in marketplaces, moderation
47
49
 
48
50
  ## ⚡ Performance
49
51
 
50
- Tested on an **RTX 5070** using $n=500$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
52
+ Tested on an **RTX 5070** using $n=1300$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
51
53
 
52
54
  | Metric | Result (E2E) |
53
55
  |:------------------------|:----------------------------|
54
- | Exact Match Accuracy | 97.2% (95% CI: 95.498.5%) |
55
- | Median latency | 36.8 ms |
56
- | 95th percentile latency | 73.4 ms |
56
+ | Exact Match Accuracy | 98.4% (95% CI: 97.599.0%) |
57
+ | Median latency | 28.0 ms |
58
+ | 95th percentile latency | 47.4 ms |
57
59
 
58
60
  > [!NOTE]
59
- > End-to-End **(E2E)** latency includes image loading, YOLO detection, spatial organization, CLIP similarity matching, and OCR fallback.
61
+ > Latencies above are reported End-to-End (**E2E**), including image loading, YOLO detection, spatial organization, CLIP matching, and OCR fallback. If passing images directly as NumPy arrays, median latency is 20.5 ms (35.0 ms P95).
60
62
 
61
63
  ## ✨ Key Features
62
64
 
63
- - **Sub-40ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
65
+ - **Sub-30ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
64
66
 
65
67
  - **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
66
68
 
@@ -77,7 +79,7 @@ pip install rbx-proofreader
77
79
  ```
78
80
 
79
81
  > [!IMPORTANT]
80
- > **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-40ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
82
+ > **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-30ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
81
83
 
82
84
  ### Usage
83
85
 
@@ -97,7 +99,7 @@ print(f"Robux In: {data['incoming']['robux_value']}")
97
99
  ## 🧩 How it Works
98
100
  The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
99
101
 
100
- 1. **Detection:** YOLOv11 localizes item cards, thumbnails, and robux containers.
102
+ 1. **Detection:** YOLO26 localizes item cards, thumbnails, and robux containers.
101
103
 
102
104
  2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
103
105
 
@@ -146,7 +148,7 @@ python scripts/train_models.py
146
148
 
147
149
  ## 🛠️ Tech Stack
148
150
 
149
- - **Vision:** YOLOv11 (Detection), CLIP (Embeddings), OpenCV (Processing)
151
+ - **Vision:** YOLO26 (Detection), CLIP (Embeddings), OpenCV (Processing)
150
152
  - **OCR:** EasyOCR
151
153
  - **Logic:** RapidFuzz (Fuzzy String Matching)
152
154
  - **Core:** Python 3.12, PyTorch, NumPy
@@ -0,0 +1,17 @@
1
+ proofreader/__init__.py,sha256=YVsRxmHmC2nvCrxvNmZX230B1s5k36RFM51kElXSxB4,285
2
+ proofreader/main.py,sha256=fR1wXfGtxDUWjMRi5bMElXR8BkXnbGNJ0QXg71mJwso,6491
3
+ proofreader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ proofreader/core/config.py,sha256=xNp4K9zcukkW7eBDwr2XG6SkOCBwc6-q1iK3UNgEFSE,5996
5
+ proofreader/core/detector.py,sha256=usY7ORLF1utfdq8tRYdrojshFxfagsdNI3Ek3M4AoFY,1131
6
+ proofreader/core/matcher.py,sha256=av5cB4JDjO4euoMxH5dcjnJCMpBytiFZQXOWNZvhFhw,3745
7
+ proofreader/core/ocr.py,sha256=FFhIS1TVrqSXUPGOll5RNbHX18q7de4xFUP1ewrnhSc,3652
8
+ proofreader/core/resolver.py,sha256=gFFIwah1n9y2y1ojnUF_AOB52hf-4NbKKXUyxNxWyao,3593
9
+ proofreader/core/schema.py,sha256=ga_7cYCBO13yFvLAtyAgDw7CFEb9c8Ui85SJDu2pcsA,2512
10
+ proofreader/train/clip_trainer.py,sha256=6hiVrJ6WX6m13E3FE8kouIxXjQo3GPrU_8X266oeXqs,6416
11
+ proofreader/train/yolo_trainer.py,sha256=ppVoQPpueMH4jfXCpyngo2ts9UEqS9I2WyecBIQe0Ac,984
12
+ proofreader/train/emulator/generator.py,sha256=vmqDcgrht43m8T0mRfXeT3GbYTQlEbOKaEX6oz_9viw,11488
13
+ rbx_proofreader-1.2.0.dist-info/licenses/LICENSE,sha256=eHSaONn9P_ZcYiY9QCi_XzVARIoQu7l2AI5BtFGA_BY,1069
14
+ rbx_proofreader-1.2.0.dist-info/METADATA,sha256=UXY6DSMfY8zo9WK6F0uFHnVzaGlmI8WADn78lk41-DU,6730
15
+ rbx_proofreader-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ rbx_proofreader-1.2.0.dist-info/top_level.txt,sha256=U3s8IVdLtGeGD3JgMmCHUgAsFhZXSSamp3vIojAFTxU,12
17
+ rbx_proofreader-1.2.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- proofreader/__init__.py,sha256=YVsRxmHmC2nvCrxvNmZX230B1s5k36RFM51kElXSxB4,285
2
- proofreader/main.py,sha256=01G_-ppevuNNafi-QCc6UB_Y2NuIW6sDoZwvjjdm1B0,5220
3
- proofreader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- proofreader/core/config.py,sha256=8L6MTBn3Z3Xa0bjPYt5q-OI-mm0-wMqeDSS0beAQ1fk,5906
5
- proofreader/core/detector.py,sha256=em2Kx0v96Zofi4kK5ipWlqMX9czq9YobHuEGuZkAQEc,987
6
- proofreader/core/matcher.py,sha256=4URgBb6EgBaCNFpafjnQrIot9KeIwoYUUNraaC9nlIk,3603
7
- proofreader/core/ocr.py,sha256=FFhIS1TVrqSXUPGOll5RNbHX18q7de4xFUP1ewrnhSc,3652
8
- proofreader/core/resolver.py,sha256=DTbf5qyQaJrBbw1QWQQJ_BZf_dg003p_xH8RMpI6sn8,2685
9
- proofreader/core/schema.py,sha256=ga_7cYCBO13yFvLAtyAgDw7CFEb9c8Ui85SJDu2pcsA,2512
10
- proofreader/train/clip_trainer.py,sha256=6hiVrJ6WX6m13E3FE8kouIxXjQo3GPrU_8X266oeXqs,6416
11
- proofreader/train/yolo_trainer.py,sha256=nOHPrYmBuefsUyiGEYqboNU6i3pykBXE0U4HYwNaqg8,986
12
- proofreader/train/emulator/generator.py,sha256=_l7qFLSoQxPYUKLDrqVIS-0sUs5FkjBK7ENWmZ-q2ls,9681
13
- rbx_proofreader-1.1.1.dist-info/licenses/LICENSE,sha256=eHSaONn9P_ZcYiY9QCi_XzVARIoQu7l2AI5BtFGA_BY,1069
14
- rbx_proofreader-1.1.1.dist-info/METADATA,sha256=CNi-FAGJvwoEL6LPmpL8m39canCcXsg_idlPTQFVeFA,6568
15
- rbx_proofreader-1.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
- rbx_proofreader-1.1.1.dist-info/top_level.txt,sha256=U3s8IVdLtGeGD3JgMmCHUgAsFhZXSSamp3vIojAFTxU,12
17
- rbx_proofreader-1.1.1.dist-info/RECORD,,