rbx-proofreader 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proofreader/core/config.py +3 -1
- proofreader/core/detector.py +9 -3
- proofreader/core/matcher.py +9 -5
- proofreader/core/resolver.py +57 -44
- proofreader/main.py +55 -18
- proofreader/train/emulator/generator.py +100 -45
- proofreader/train/yolo_trainer.py +5 -5
- {rbx_proofreader-1.1.1.dist-info → rbx_proofreader-1.2.0.dist-info}/METADATA +14 -12
- rbx_proofreader-1.2.0.dist-info/RECORD +17 -0
- rbx_proofreader-1.1.1.dist-info/RECORD +0 -17
- {rbx_proofreader-1.1.1.dist-info → rbx_proofreader-1.2.0.dist-info}/WHEEL +0 -0
- {rbx_proofreader-1.1.1.dist-info → rbx_proofreader-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {rbx_proofreader-1.1.1.dist-info → rbx_proofreader-1.2.0.dist-info}/top_level.txt +0 -0
proofreader/core/config.py
CHANGED
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
# Resolves to the 'proofreader' root directory
|
|
7
7
|
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
|
8
8
|
BASE_URL = "https://github.com/lucacrose/proofreader"
|
|
9
|
+
VERSION_TAG = "v1.2.0"
|
|
9
10
|
|
|
10
11
|
# --- ASSETS & MODELS ---
|
|
11
12
|
ASSETS_PATH = BASE_DIR / "assets"
|
|
@@ -16,6 +17,7 @@ THUMBNAILS_DIR = ASSETS_PATH / "thumbnails"
|
|
|
16
17
|
TRAIN_THUMBNAILS_DIR = ASSETS_PATH / "train_data"
|
|
17
18
|
CLASS_MAP_PATH = ASSETS_PATH / "class_mapping.json"
|
|
18
19
|
CLIP_BEST_PATH = ASSETS_PATH / "weights" / "clip.pt"
|
|
20
|
+
CLIP_VIT_BASE_PATCH32_PATH = ASSETS_PATH / "clip-vit-base-patch32"
|
|
19
21
|
|
|
20
22
|
# --- TRAINING & EMULATOR ---
|
|
21
23
|
TRAIN_DIR = BASE_DIR / "proofreader" / "train"
|
|
@@ -79,7 +81,7 @@ AUGMENTER_CONFIG = {
|
|
|
79
81
|
"height_max": 1600, # Maximum height in pixels (after aspect ratio calculation)
|
|
80
82
|
"total_images": 1024, # Total number of images to generate
|
|
81
83
|
"max_workers": 16, # Maximum number of parallel workers for generation
|
|
82
|
-
"train_split_fraction": 0.
|
|
84
|
+
"train_split_fraction": 0.9, # Fraction of images used for training vs validation
|
|
83
85
|
"empty_trade_chance": 0.09, # Chance a trade has no items or robux (negative sample)
|
|
84
86
|
}
|
|
85
87
|
}
|
proofreader/core/detector.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
from ultralytics.models import YOLO
|
|
3
3
|
from .schema import Box
|
|
4
|
+
from typing import Union
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
ImageSource = Union[str, np.ndarray]
|
|
4
8
|
|
|
5
9
|
class TradeDetector:
|
|
6
10
|
def __init__(self, model_path: str):
|
|
@@ -11,11 +15,13 @@ class TradeDetector:
|
|
|
11
15
|
1: "item_thumb",
|
|
12
16
|
2: "item_name",
|
|
13
17
|
3: "robux_line",
|
|
14
|
-
4: "robux_value"
|
|
18
|
+
4: "robux_value",
|
|
19
|
+
5: "outgoing_header",
|
|
20
|
+
6: "incoming_header"
|
|
15
21
|
}
|
|
16
22
|
|
|
17
|
-
def detect(self,
|
|
18
|
-
results = self.model.predict(
|
|
23
|
+
def detect(self, image: ImageSource, conf_threshold: float) -> List[Box]:
|
|
24
|
+
results = self.model.predict(image, verbose=False, conf=conf_threshold)[0]
|
|
19
25
|
|
|
20
26
|
detected_boxes = []
|
|
21
27
|
|
proofreader/core/matcher.py
CHANGED
|
@@ -6,14 +6,18 @@ import json
|
|
|
6
6
|
import cv2
|
|
7
7
|
from PIL import Image
|
|
8
8
|
from torchvision import transforms
|
|
9
|
-
from transformers import CLIPVisionModelWithProjection
|
|
9
|
+
from transformers import CLIPVisionModelWithProjection, logging as hf_logging
|
|
10
10
|
from typing import List
|
|
11
11
|
from .schema import TradeLayout, ResolvedItem
|
|
12
12
|
|
|
13
|
+
hf_logging.disable_progress_bar()
|
|
14
|
+
hf_logging.set_verbosity_error()
|
|
15
|
+
|
|
13
16
|
class CLIPItemEmbedder(nn.Module):
|
|
14
|
-
def __init__(self, num_classes,
|
|
17
|
+
def __init__(self, num_classes, model_path):
|
|
15
18
|
super().__init__()
|
|
16
|
-
|
|
19
|
+
|
|
20
|
+
self.vision_encoder = CLIPVisionModelWithProjection.from_pretrained(model_path, token=False, low_cpu_mem_usage=True)
|
|
17
21
|
self.item_prototypes = nn.Embedding(num_classes, 512)
|
|
18
22
|
self.logit_scale = nn.Parameter(torch.ones([]) * 2.659)
|
|
19
23
|
|
|
@@ -22,7 +26,7 @@ class CLIPItemEmbedder(nn.Module):
|
|
|
22
26
|
return F.normalize(outputs.image_embeds, p=2, dim=-1)
|
|
23
27
|
|
|
24
28
|
class VisualMatcher:
|
|
25
|
-
def __init__(self, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
|
|
29
|
+
def __init__(self, model_path: str, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
|
|
26
30
|
self.device = device
|
|
27
31
|
|
|
28
32
|
with open(mapping_path, "r") as f:
|
|
@@ -33,7 +37,7 @@ class VisualMatcher:
|
|
|
33
37
|
self.name_to_id = {str(i["name"]).lower().strip(): i["id"] for i in item_db}
|
|
34
38
|
|
|
35
39
|
num_classes = len(self.class_to_idx)
|
|
36
|
-
self.model = CLIPItemEmbedder(num_classes).to(self.device)
|
|
40
|
+
self.model = CLIPItemEmbedder(num_classes, model_path=model_path).to(self.device)
|
|
37
41
|
self.model.load_state_dict(torch.load(weights_path, map_location=self.device))
|
|
38
42
|
self.model.eval()
|
|
39
43
|
|
proofreader/core/resolver.py
CHANGED
|
@@ -5,71 +5,84 @@ class SpatialResolver:
|
|
|
5
5
|
def __init__(self):
|
|
6
6
|
pass
|
|
7
7
|
|
|
8
|
-
def
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
def get_iou(self, box1: Box, box2: Box) -> float:
|
|
9
|
+
b1x1, b1y1, b1x2, b1y2 = box1.coords
|
|
10
|
+
b2x1, b2y1, b2x2, b2y2 = box2.coords
|
|
11
|
+
|
|
12
|
+
ix1, iy1 = max(b1x1, b2x1), max(b1y1, b2y1)
|
|
13
|
+
ix2, iy2 = min(b1x2, b2x2), min(b1y2, b2y2)
|
|
14
|
+
|
|
15
|
+
inter_area = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
|
16
|
+
area1 = (b1x2 - b1x1) * (b1y2 - b1y1)
|
|
17
|
+
area2 = (b2x2 - b2x1) * (b2y2 - b2y1)
|
|
18
|
+
union_area = area1 + area2 - inter_area
|
|
19
|
+
|
|
20
|
+
return inter_area / union_area if union_area > 0 else 0
|
|
21
|
+
|
|
22
|
+
def get_ioa(self, child: Box, parent: Box) -> float:
|
|
13
23
|
cx1, cy1, cx2, cy2 = child.coords
|
|
14
24
|
px1, py1, px2, py2 = parent.coords
|
|
15
25
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
26
|
+
ix1, iy1 = max(cx1, px1), max(cy1, py1)
|
|
27
|
+
ix2, iy2 = min(cx2, px2), min(cy2, py2)
|
|
28
|
+
|
|
29
|
+
inter_area = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
|
30
|
+
child_area = (cx2 - cx1) * (cy2 - cy1)
|
|
31
|
+
return inter_area / child_area if child_area > 0 else 0
|
|
20
32
|
|
|
21
33
|
def resolve(self, all_boxes: List[Box]) -> TradeLayout:
|
|
22
34
|
layout = TradeLayout()
|
|
23
35
|
|
|
24
|
-
|
|
25
|
-
|
|
36
|
+
raw_cards = sorted([b for b in all_boxes if b.label == "item_card"],
|
|
37
|
+
key=lambda x: x.confidence, reverse=True)
|
|
38
|
+
|
|
39
|
+
unique_cards = []
|
|
40
|
+
for card in raw_cards:
|
|
41
|
+
if any(self.get_iou(card, accepted) > 0.5 for accepted in unique_cards):
|
|
42
|
+
continue
|
|
43
|
+
unique_cards.append(card)
|
|
26
44
|
|
|
45
|
+
robux_lines = [b for b in all_boxes if b.label == "robux_line"]
|
|
27
46
|
names = [b for b in all_boxes if b.label == "item_name"]
|
|
28
47
|
thumbs = [b for b in all_boxes if b.label == "item_thumb"]
|
|
29
48
|
values = [b for b in all_boxes if b.label == "robux_value"]
|
|
49
|
+
header_received = next((b for b in all_boxes if b.label == "received_header"), None)
|
|
30
50
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
return layout
|
|
34
|
-
|
|
35
|
-
y_centers = sorted([self.get_center(p)[1] for p in parents])
|
|
36
|
-
|
|
37
|
-
if len(y_centers) > 1:
|
|
38
|
-
max_gap = -1
|
|
39
|
-
gap_index = 0
|
|
40
|
-
|
|
41
|
-
for i in range(len(y_centers) - 1):
|
|
42
|
-
gap = y_centers[i + 1] - y_centers[i]
|
|
43
|
-
if gap > max_gap:
|
|
44
|
-
max_gap = gap
|
|
45
|
-
gap_index = i + 1
|
|
46
|
-
|
|
47
|
-
first_bottom_parent = next(p for p in parents if self.get_center(p)[1] == y_centers[gap_index])
|
|
48
|
-
split_y = first_bottom_parent.coords[1] - 10
|
|
51
|
+
if header_received:
|
|
52
|
+
split_y = header_received.coords[1]
|
|
49
53
|
else:
|
|
50
|
-
|
|
54
|
+
parents = sorted(unique_cards + robux_lines, key=lambda b: (b.coords[1] + b.coords[3])/2)
|
|
55
|
+
if len(parents) > 1:
|
|
56
|
+
y_centers = [(b.coords[1] + b.coords[3])/2 for b in parents]
|
|
57
|
+
max_gap = 0
|
|
58
|
+
split_y = y_centers[0] + 50
|
|
59
|
+
for i in range(len(y_centers) - 1):
|
|
60
|
+
gap = y_centers[i+1] - y_centers[i]
|
|
61
|
+
if gap > max_gap:
|
|
62
|
+
max_gap = gap
|
|
63
|
+
split_y = (y_centers[i] + y_centers[i+1]) / 2
|
|
64
|
+
else:
|
|
65
|
+
split_y = 500
|
|
66
|
+
|
|
67
|
+
unique_cards.sort(key=lambda b: b.coords[1])
|
|
51
68
|
|
|
52
|
-
for card in
|
|
69
|
+
for card in unique_cards:
|
|
53
70
|
item = ResolvedItem(container_box=card)
|
|
54
|
-
item.name_box = next((n for n in names if self.is_contained(n, card)), None)
|
|
55
|
-
item.thumb_box = next((t for t in thumbs if self.is_contained(t, card)), None)
|
|
56
71
|
|
|
57
|
-
if self.
|
|
72
|
+
item.name_box = next((n for n in names if self.get_ioa(n, card) > 0.7), None)
|
|
73
|
+
item.thumb_box = next((t for t in thumbs if self.get_ioa(t, card) > 0.7), None)
|
|
74
|
+
|
|
75
|
+
if (card.coords[1] + card.coords[3]) / 2 < split_y:
|
|
58
76
|
layout.outgoing.items.append(item)
|
|
59
77
|
else:
|
|
60
78
|
layout.incoming.items.append(item)
|
|
61
|
-
|
|
79
|
+
|
|
62
80
|
for line in robux_lines:
|
|
63
|
-
val_box = next((v for v in values if self.
|
|
64
|
-
|
|
81
|
+
val_box = next((v for v in values if self.get_ioa(v, line) > 0.5), None)
|
|
65
82
|
if val_box:
|
|
66
|
-
robux_obj =
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
if self.get_center(line)[1] < split_y:
|
|
72
|
-
layout.outgoing.robux = robux_obj
|
|
83
|
+
robux_obj = ResolvedRobux(container_box=line, value_box=val_box)
|
|
84
|
+
if (line.coords[1] + line.coords[3]) / 2 < split_y:
|
|
85
|
+
layout.outgoing.robux = robux_obj
|
|
73
86
|
else:
|
|
74
87
|
layout.incoming.robux = robux_obj
|
|
75
88
|
|
proofreader/main.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
|
-
import
|
|
1
|
+
import io
|
|
2
2
|
import cv2
|
|
3
3
|
import torch
|
|
4
4
|
import json
|
|
5
5
|
import requests
|
|
6
|
+
from typing import Union
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import numpy as np
|
|
6
9
|
from tqdm import tqdm
|
|
7
10
|
from transformers import CLIPProcessor, CLIPModel
|
|
8
11
|
from .core.detector import TradeDetector
|
|
9
12
|
from .core.resolver import SpatialResolver
|
|
10
13
|
from .core.ocr import OCRReader
|
|
11
14
|
from .core.matcher import VisualMatcher
|
|
12
|
-
from .core.config import DB_PATH, MODEL_PATH, DEVICE, CLASS_MAP_PATH, CLIP_BEST_PATH, BASE_URL, CERTAIN_VISUAL_CONF
|
|
15
|
+
from .core.config import DB_PATH, MODEL_PATH, DEVICE, CLASS_MAP_PATH, CLIP_BEST_PATH, BASE_URL, CERTAIN_VISUAL_CONF, VERSION_TAG, CLIP_VIT_BASE_PATCH32_PATH
|
|
13
16
|
from .core.schema import ResolvedItem
|
|
14
17
|
|
|
18
|
+
ImageInput = Union[str, Path, np.ndarray, bytes, io.BytesIO]
|
|
19
|
+
|
|
15
20
|
class TradeEngine:
|
|
16
21
|
def __init__(self):
|
|
17
22
|
self._ensure_assets()
|
|
@@ -27,9 +32,6 @@ class TradeEngine:
|
|
|
27
32
|
|
|
28
33
|
self.device = DEVICE
|
|
29
34
|
|
|
30
|
-
self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
|
|
31
|
-
self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", use_fast=True)
|
|
32
|
-
|
|
33
35
|
with open(DB_PATH, "r") as f:
|
|
34
36
|
item_db = json.load(f)
|
|
35
37
|
|
|
@@ -39,24 +41,40 @@ class TradeEngine:
|
|
|
39
41
|
|
|
40
42
|
self.matcher = VisualMatcher(
|
|
41
43
|
item_db=item_db,
|
|
44
|
+
model_path=CLIP_VIT_BASE_PATCH32_PATH,
|
|
42
45
|
weights_path=CLIP_BEST_PATH,
|
|
43
46
|
mapping_path=CLASS_MAP_PATH,
|
|
44
47
|
device=self.device
|
|
45
|
-
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def _download_clip_locally(self):
|
|
51
|
+
model_name = "openai/clip-vit-base-patch32"
|
|
52
|
+
model = CLIPModel.from_pretrained(model_name)
|
|
53
|
+
processor = CLIPProcessor.from_pretrained(model_name)
|
|
54
|
+
|
|
55
|
+
model_path = str(CLIP_VIT_BASE_PATCH32_PATH.resolve().as_posix())
|
|
56
|
+
|
|
57
|
+
model.save_pretrained(model_path)
|
|
58
|
+
processor.save_pretrained(model_path)
|
|
59
|
+
print(f"✅ CLIP saved locally at {CLIP_VIT_BASE_PATCH32_PATH}")
|
|
46
60
|
|
|
47
61
|
def _ensure_assets(self):
|
|
48
62
|
assets = {
|
|
49
|
-
DB_PATH: f"{BASE_URL}/releases/download/
|
|
50
|
-
MODEL_PATH: f"{BASE_URL}/releases/download/
|
|
51
|
-
CLIP_BEST_PATH: f"{BASE_URL}/releases/download/
|
|
52
|
-
CLASS_MAP_PATH: f"{BASE_URL}/releases/download/
|
|
63
|
+
DB_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/item_database.json",
|
|
64
|
+
MODEL_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/yolo.pt",
|
|
65
|
+
CLIP_BEST_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/clip.pt",
|
|
66
|
+
CLASS_MAP_PATH: f"{BASE_URL}/releases/download/{VERSION_TAG}/class_mapping.json"
|
|
53
67
|
}
|
|
54
68
|
|
|
55
69
|
for path, url in assets.items():
|
|
56
70
|
if not path.exists():
|
|
57
|
-
print(f"📦 {path.name} missing. Downloading from
|
|
71
|
+
print(f"📦 {path.name} missing. Downloading from {VERSION_TAG} release...")
|
|
58
72
|
self._download_file(url, path)
|
|
59
73
|
|
|
74
|
+
if not CLIP_VIT_BASE_PATCH32_PATH.exists():
|
|
75
|
+
print(f"📦 Local CLIP assets missing. Downloading to {CLIP_VIT_BASE_PATCH32_PATH}...")
|
|
76
|
+
self._download_clip_locally()
|
|
77
|
+
|
|
60
78
|
def _download_file(self, url, dest_path):
|
|
61
79
|
response = requests.get(url, stream=True)
|
|
62
80
|
total_size = int(response.headers.get('content-length', 0))
|
|
@@ -110,15 +128,34 @@ class TradeEngine:
|
|
|
110
128
|
item.id = ocr_id_direct
|
|
111
129
|
item.name = self.matcher.id_to_name.get(str(ocr_id_direct))
|
|
112
130
|
|
|
113
|
-
def
|
|
114
|
-
if
|
|
115
|
-
|
|
131
|
+
def _load_image(self, image: ImageInput) -> np.ndarray:
|
|
132
|
+
if isinstance(image, np.ndarray):
|
|
133
|
+
return image
|
|
134
|
+
|
|
135
|
+
if isinstance(image, io.BytesIO):
|
|
136
|
+
image = image.getvalue()
|
|
137
|
+
|
|
138
|
+
if isinstance(image, bytes):
|
|
139
|
+
nparr = np.frombuffer(image, np.uint8)
|
|
140
|
+
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
|
141
|
+
if img is None:
|
|
142
|
+
raise ValueError("Failed to decode image from bytes")
|
|
143
|
+
return img
|
|
144
|
+
|
|
145
|
+
image_str = str(image)
|
|
146
|
+
img = cv2.imread(image_str)
|
|
147
|
+
if img is None:
|
|
148
|
+
raise ValueError(f"Failed to load image: {image_str}")
|
|
149
|
+
|
|
150
|
+
return img
|
|
151
|
+
|
|
152
|
+
def process_image(self, image: ImageInput, conf_threshold: float) -> dict:
|
|
153
|
+
image_np = self._load_image(image)
|
|
116
154
|
|
|
117
|
-
boxes = self.detector.detect(
|
|
155
|
+
boxes = self.detector.detect(image_np, conf_threshold)
|
|
118
156
|
layout = self.resolver.resolve(boxes)
|
|
119
|
-
image = cv2.imread(image_path)
|
|
120
157
|
|
|
121
|
-
self.matcher.match_item_visuals(
|
|
158
|
+
self.matcher.match_item_visuals(image_np, layout)
|
|
122
159
|
|
|
123
160
|
for side in [layout.outgoing, layout.incoming]:
|
|
124
161
|
for item in side.items:
|
|
@@ -128,7 +165,7 @@ class TradeEngine:
|
|
|
128
165
|
item._finalized = True
|
|
129
166
|
|
|
130
167
|
self.reader.process_layout(
|
|
131
|
-
|
|
168
|
+
image_np,
|
|
132
169
|
layout,
|
|
133
170
|
skip_if=lambda item: getattr(item, "_finalized", False)
|
|
134
171
|
)
|
|
@@ -25,6 +25,104 @@ from proofreader.core.config import (
|
|
|
25
25
|
|
|
26
26
|
GENERATOR_CONFIG = AUGMENTER_CONFIG["generator"]
|
|
27
27
|
|
|
28
|
+
def clean_and_save_labels(page, width, height):
|
|
29
|
+
label_data = []
|
|
30
|
+
|
|
31
|
+
chat_bar = page.query_selector("#chat-main")
|
|
32
|
+
chat_box = chat_bar.bounding_box() if chat_bar and chat_bar.is_visible() else None
|
|
33
|
+
|
|
34
|
+
def get_intersection_area(boxA, boxB):
|
|
35
|
+
xA = max(boxA['x'], boxB['x'])
|
|
36
|
+
yA = max(boxA['y'], boxB['y'])
|
|
37
|
+
xB = min(boxA['x'] + boxA['width'], boxB['x'] + boxB['width'])
|
|
38
|
+
yB = min(boxA['y'] + boxA['height'], boxB['y'] + boxB['height'])
|
|
39
|
+
interWidth = max(0, xB - xA)
|
|
40
|
+
interHeight = max(0, yB - yA)
|
|
41
|
+
return interWidth * interHeight
|
|
42
|
+
|
|
43
|
+
def get_valid_yolo_data(box, class_id, pad=0, visibility_threshold=0.5):
|
|
44
|
+
if not box:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
x1_raw, y1_raw = box['x'] - pad, box['y'] - pad
|
|
48
|
+
x2_raw, y2_raw = box['x'] + box['width'] + pad, box['y'] + box['height'] + pad
|
|
49
|
+
|
|
50
|
+
padded_w = x2_raw - x1_raw
|
|
51
|
+
padded_h = y2_raw - y1_raw
|
|
52
|
+
original_area = max(1, padded_w * padded_h)
|
|
53
|
+
|
|
54
|
+
x1, y1 = max(0, x1_raw), max(0, y1_raw)
|
|
55
|
+
x2, y2 = min(width, x2_raw), min(height, y2_raw)
|
|
56
|
+
|
|
57
|
+
nw, nh = x2 - x1, y2 - y1
|
|
58
|
+
if nw <= 2 or nh <= 2:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
canvas_visible_area = nw * nh
|
|
62
|
+
|
|
63
|
+
overlap_with_chat = 0
|
|
64
|
+
if chat_box:
|
|
65
|
+
current_box = {'x': x1, 'y': y1, 'width': nw, 'height': nh}
|
|
66
|
+
overlap_with_chat = get_intersection_area(current_box, chat_box)
|
|
67
|
+
|
|
68
|
+
actual_visible_area = canvas_visible_area - overlap_with_chat
|
|
69
|
+
|
|
70
|
+
visibility_ratio = actual_visible_area / original_area
|
|
71
|
+
|
|
72
|
+
if visibility_ratio < visibility_threshold:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
return [
|
|
76
|
+
class_id,
|
|
77
|
+
(x1 + nw/2) / width,
|
|
78
|
+
(y1 + nh/2) / height,
|
|
79
|
+
nw / width,
|
|
80
|
+
nh / height
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
items = page.query_selector_all("div[trade-item-card]")
|
|
84
|
+
for item in items:
|
|
85
|
+
if not item.is_visible(): continue
|
|
86
|
+
|
|
87
|
+
card_box = item.bounding_box()
|
|
88
|
+
|
|
89
|
+
card_res = get_valid_yolo_data(card_box, 0, pad=4, visibility_threshold=0.4)
|
|
90
|
+
|
|
91
|
+
if card_res:
|
|
92
|
+
label_data.append(card_res)
|
|
93
|
+
|
|
94
|
+
thumb = item.query_selector(".item-card-thumb-container")
|
|
95
|
+
name = item.query_selector(".item-card-name")
|
|
96
|
+
|
|
97
|
+
t_box = thumb.bounding_box() if thumb and thumb.is_visible() else None
|
|
98
|
+
n_box = name.bounding_box() if name and name.is_visible() else None
|
|
99
|
+
|
|
100
|
+
t_res = get_valid_yolo_data(t_box, 1, pad=4, visibility_threshold=0.5)
|
|
101
|
+
n_res = get_valid_yolo_data(n_box, 2, pad=4, visibility_threshold=0.5)
|
|
102
|
+
|
|
103
|
+
if t_res: label_data.append(t_res)
|
|
104
|
+
if n_res: label_data.append(n_res)
|
|
105
|
+
|
|
106
|
+
for section in page.query_selector_all(".robux-line"):
|
|
107
|
+
if section.is_visible() and "Robux Offered" in section.inner_text():
|
|
108
|
+
res_3 = get_valid_yolo_data(section.bounding_box(), 3, pad=6, visibility_threshold=0.4)
|
|
109
|
+
if res_3:
|
|
110
|
+
label_data.append(res_3)
|
|
111
|
+
val_el = section.query_selector(".robux-line-value")
|
|
112
|
+
if val_el and val_el.is_visible():
|
|
113
|
+
res_4 = get_valid_yolo_data(val_el.bounding_box(), 4, pad=4, visibility_threshold=0.5)
|
|
114
|
+
if res_4: label_data.append(res_4)
|
|
115
|
+
|
|
116
|
+
for header in page.query_selector_all("h3.trade-list-detail-offer-header"):
|
|
117
|
+
if not header.is_visible(): continue
|
|
118
|
+
text = header.inner_text().lower()
|
|
119
|
+
cid = 5 if "gave" in text else 6 if "received" in text else None
|
|
120
|
+
if cid:
|
|
121
|
+
res_h = get_valid_yolo_data(header.bounding_box(), cid, pad=4, visibility_threshold=0.4)
|
|
122
|
+
if res_h: label_data.append(res_h)
|
|
123
|
+
|
|
124
|
+
return label_data
|
|
125
|
+
|
|
28
126
|
def process_batch(batch_ids, db, backgrounds_count, progress_counter):
|
|
29
127
|
try:
|
|
30
128
|
with sync_playwright() as p:
|
|
@@ -95,50 +193,7 @@ def generate_single_image(page, task_id, db, backgrounds_count, augmenter_js):
|
|
|
95
193
|
}
|
|
96
194
|
""")
|
|
97
195
|
|
|
98
|
-
|
|
99
|
-
box = element.bounding_box()
|
|
100
|
-
if not box: return None
|
|
101
|
-
x1, y1 = max(0, box['x'] - pad_px), max(0, box['y'] - pad_px)
|
|
102
|
-
x2, y2 = min(width, box['x'] + box['width'] + pad_px), min(height, box['y'] + box['height'] + pad_px)
|
|
103
|
-
nw, nh = x2 - x1, y2 - y1
|
|
104
|
-
return [class_id, (x1 + nw/2)/width, (y1 + nh/2)/height, nw/width, nh/height]
|
|
105
|
-
|
|
106
|
-
def is_fully_visible(box, width, height, pad=4):
|
|
107
|
-
return (box['x'] - pad >= 0 and
|
|
108
|
-
box['y'] - pad >= 0 and
|
|
109
|
-
(box['x'] + box['width'] + pad) <= width and
|
|
110
|
-
(box['y'] + box['height'] + pad) <= height)
|
|
111
|
-
|
|
112
|
-
label_data = []
|
|
113
|
-
|
|
114
|
-
items = page.query_selector_all("div[trade-item-card]")
|
|
115
|
-
for item in items:
|
|
116
|
-
box = item.bounding_box()
|
|
117
|
-
if box and is_fully_visible(box, width, height):
|
|
118
|
-
card_box = get_padded_yolo(item, 0, pad_px=4)
|
|
119
|
-
if card_box: label_data.append(card_box)
|
|
120
|
-
|
|
121
|
-
thumb = item.query_selector(".item-card-thumb-container")
|
|
122
|
-
if thumb:
|
|
123
|
-
thumb_box = get_padded_yolo(thumb, 1, pad_px=4)
|
|
124
|
-
if thumb_box: label_data.append(thumb_box)
|
|
125
|
-
|
|
126
|
-
name = item.query_selector(".item-card-name")
|
|
127
|
-
if name:
|
|
128
|
-
name_box = get_padded_yolo(name, 2, pad_px=4)
|
|
129
|
-
if name_box: label_data.append(name_box)
|
|
130
|
-
|
|
131
|
-
robux_sections = page.query_selector_all(".robux-line:not(.total-value)")
|
|
132
|
-
for section in robux_sections:
|
|
133
|
-
box = section.bounding_box()
|
|
134
|
-
if box and is_fully_visible(box, width, height, 8) and section.is_visible():
|
|
135
|
-
line_box = get_padded_yolo(section, 3, pad_px=8)
|
|
136
|
-
if line_box: label_data.append(line_box)
|
|
137
|
-
|
|
138
|
-
value_element = section.query_selector(".robux-line-value")
|
|
139
|
-
if value_element:
|
|
140
|
-
value_box = get_padded_yolo(value_element, 4, pad_px=4)
|
|
141
|
-
if value_box: label_data.append(value_box)
|
|
196
|
+
label_data = clean_and_save_labels(page, width, height)
|
|
142
197
|
|
|
143
198
|
img_buffer = page.screenshot(type="jpeg", quality=100)
|
|
144
199
|
nparr = np.frombuffer(img_buffer, np.uint8)
|
|
@@ -195,7 +250,7 @@ def generate_single_image(page, task_id, db, backgrounds_count, augmenter_js):
|
|
|
195
250
|
for label in label_data:
|
|
196
251
|
f.write(f"{label[0]} {label[1]:.6f} {label[2]:.6f} {label[3]:.6f} {label[4]:.6f}\n")
|
|
197
252
|
|
|
198
|
-
def run_mass_generation(total_images=
|
|
253
|
+
def run_mass_generation(total_images=16384, max_workers=24):
|
|
199
254
|
with open(DB_PATH, "r") as f:
|
|
200
255
|
db = json.load(f)
|
|
201
256
|
|
|
@@ -2,7 +2,7 @@ from ultralytics import YOLO
|
|
|
2
2
|
from ..core.config import TRAINING_CONFIG, DATA_YAML_PATH
|
|
3
3
|
|
|
4
4
|
def train_yolo(device):
|
|
5
|
-
model = YOLO("
|
|
5
|
+
model = YOLO("yolo26n.pt")
|
|
6
6
|
|
|
7
7
|
model.train(
|
|
8
8
|
data = DATA_YAML_PATH,
|
|
@@ -10,7 +10,7 @@ def train_yolo(device):
|
|
|
10
10
|
imgsz = TRAINING_CONFIG["img_size"],
|
|
11
11
|
device = device,
|
|
12
12
|
plots = True,
|
|
13
|
-
multi_scale =
|
|
13
|
+
multi_scale = 0.5,
|
|
14
14
|
|
|
15
15
|
batch = TRAINING_CONFIG["batch_size"],
|
|
16
16
|
patience = TRAINING_CONFIG["patience"],
|
|
@@ -32,9 +32,9 @@ def finish_training(file_path, device):
|
|
|
32
32
|
|
|
33
33
|
model.train(
|
|
34
34
|
data = DATA_YAML_PATH,
|
|
35
|
-
epochs =
|
|
36
|
-
close_mosaic =
|
|
37
|
-
patience =
|
|
35
|
+
epochs = 28,
|
|
36
|
+
close_mosaic = 28,
|
|
37
|
+
patience = 6,
|
|
38
38
|
imgsz = TRAINING_CONFIG["img_size"],
|
|
39
39
|
batch = TRAINING_CONFIG["batch_size"],
|
|
40
40
|
device = device
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rbx-proofreader
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Visual trade detection and OCR engine
|
|
5
|
+
Author: Luca Rose
|
|
5
6
|
License: MIT
|
|
6
7
|
Classifier: Programming Language :: Python :: 3
|
|
7
8
|
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
8
10
|
Requires-Python: >=3.12
|
|
9
11
|
Description-Content-Type: text/markdown
|
|
10
12
|
License-File: LICENSE
|
|
@@ -32,9 +34,9 @@ A high-speed vision pipeline for reading Roblox trade screenshots.
|
|
|
32
34
|
[](LICENSE)
|
|
33
35
|
[](https://github.com/lucacrose/proofreader/actions)
|
|
34
36
|
[](https://developer.nvidia.com/cuda-zone)
|
|
35
|
-
[](https://github.com/ultralytics/ultralytics)
|
|
36
38
|
|
|
37
|
-
Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **
|
|
39
|
+
Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLO26** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
|
|
38
40
|
|
|
39
41
|
## Why Proofreader?
|
|
40
42
|
|
|
@@ -47,20 +49,20 @@ Roblox trade screenshots are commonly used as proof in marketplaces, moderation
|
|
|
47
49
|
|
|
48
50
|
## ⚡ Performance
|
|
49
51
|
|
|
50
|
-
Tested on an **RTX 5070** using $n=
|
|
52
|
+
Tested on an **RTX 5070** using $n=1300$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
|
|
51
53
|
|
|
52
54
|
| Metric | Result (E2E) |
|
|
53
55
|
|:------------------------|:----------------------------|
|
|
54
|
-
| Exact Match Accuracy |
|
|
55
|
-
| Median latency |
|
|
56
|
-
| 95th percentile latency |
|
|
56
|
+
| Exact Match Accuracy | 98.4% (95% CI: 97.5–99.0%) |
|
|
57
|
+
| Median latency | 28.0 ms |
|
|
58
|
+
| 95th percentile latency | 47.4 ms |
|
|
57
59
|
|
|
58
60
|
> [!NOTE]
|
|
59
|
-
> End-to-End **
|
|
61
|
+
> Latencies above are reported End-to-End (**E2E**), including image loading, YOLO detection, spatial organization, CLIP matching, and OCR fallback. If passing images directly as NumPy arrays, median latency is 20.5 ms (35.0 ms P95).
|
|
60
62
|
|
|
61
63
|
## ✨ Key Features
|
|
62
64
|
|
|
63
|
-
- **Sub-
|
|
65
|
+
- **Sub-30ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
|
|
64
66
|
|
|
65
67
|
- **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
|
|
66
68
|
|
|
@@ -77,7 +79,7 @@ pip install rbx-proofreader
|
|
|
77
79
|
```
|
|
78
80
|
|
|
79
81
|
> [!IMPORTANT]
|
|
80
|
-
> **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-
|
|
82
|
+
> **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-30ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
|
|
81
83
|
|
|
82
84
|
### Usage
|
|
83
85
|
|
|
@@ -97,7 +99,7 @@ print(f"Robux In: {data['incoming']['robux_value']}")
|
|
|
97
99
|
## 🧩 How it Works
|
|
98
100
|
The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
|
|
99
101
|
|
|
100
|
-
1. **Detection:**
|
|
102
|
+
1. **Detection:** YOLO26 localizes item cards, thumbnails, and robux containers.
|
|
101
103
|
|
|
102
104
|
2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
|
|
103
105
|
|
|
@@ -146,7 +148,7 @@ python scripts/train_models.py
|
|
|
146
148
|
|
|
147
149
|
## 🛠️ Tech Stack
|
|
148
150
|
|
|
149
|
-
- **Vision:**
|
|
151
|
+
- **Vision:** YOLO26 (Detection), CLIP (Embeddings), OpenCV (Processing)
|
|
150
152
|
- **OCR:** EasyOCR
|
|
151
153
|
- **Logic:** RapidFuzz (Fuzzy String Matching)
|
|
152
154
|
- **Core:** Python 3.12, PyTorch, NumPy
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
proofreader/__init__.py,sha256=YVsRxmHmC2nvCrxvNmZX230B1s5k36RFM51kElXSxB4,285
|
|
2
|
+
proofreader/main.py,sha256=fR1wXfGtxDUWjMRi5bMElXR8BkXnbGNJ0QXg71mJwso,6491
|
|
3
|
+
proofreader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
proofreader/core/config.py,sha256=xNp4K9zcukkW7eBDwr2XG6SkOCBwc6-q1iK3UNgEFSE,5996
|
|
5
|
+
proofreader/core/detector.py,sha256=usY7ORLF1utfdq8tRYdrojshFxfagsdNI3Ek3M4AoFY,1131
|
|
6
|
+
proofreader/core/matcher.py,sha256=av5cB4JDjO4euoMxH5dcjnJCMpBytiFZQXOWNZvhFhw,3745
|
|
7
|
+
proofreader/core/ocr.py,sha256=FFhIS1TVrqSXUPGOll5RNbHX18q7de4xFUP1ewrnhSc,3652
|
|
8
|
+
proofreader/core/resolver.py,sha256=gFFIwah1n9y2y1ojnUF_AOB52hf-4NbKKXUyxNxWyao,3593
|
|
9
|
+
proofreader/core/schema.py,sha256=ga_7cYCBO13yFvLAtyAgDw7CFEb9c8Ui85SJDu2pcsA,2512
|
|
10
|
+
proofreader/train/clip_trainer.py,sha256=6hiVrJ6WX6m13E3FE8kouIxXjQo3GPrU_8X266oeXqs,6416
|
|
11
|
+
proofreader/train/yolo_trainer.py,sha256=ppVoQPpueMH4jfXCpyngo2ts9UEqS9I2WyecBIQe0Ac,984
|
|
12
|
+
proofreader/train/emulator/generator.py,sha256=vmqDcgrht43m8T0mRfXeT3GbYTQlEbOKaEX6oz_9viw,11488
|
|
13
|
+
rbx_proofreader-1.2.0.dist-info/licenses/LICENSE,sha256=eHSaONn9P_ZcYiY9QCi_XzVARIoQu7l2AI5BtFGA_BY,1069
|
|
14
|
+
rbx_proofreader-1.2.0.dist-info/METADATA,sha256=UXY6DSMfY8zo9WK6F0uFHnVzaGlmI8WADn78lk41-DU,6730
|
|
15
|
+
rbx_proofreader-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
rbx_proofreader-1.2.0.dist-info/top_level.txt,sha256=U3s8IVdLtGeGD3JgMmCHUgAsFhZXSSamp3vIojAFTxU,12
|
|
17
|
+
rbx_proofreader-1.2.0.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
proofreader/__init__.py,sha256=YVsRxmHmC2nvCrxvNmZX230B1s5k36RFM51kElXSxB4,285
|
|
2
|
-
proofreader/main.py,sha256=01G_-ppevuNNafi-QCc6UB_Y2NuIW6sDoZwvjjdm1B0,5220
|
|
3
|
-
proofreader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
proofreader/core/config.py,sha256=8L6MTBn3Z3Xa0bjPYt5q-OI-mm0-wMqeDSS0beAQ1fk,5906
|
|
5
|
-
proofreader/core/detector.py,sha256=em2Kx0v96Zofi4kK5ipWlqMX9czq9YobHuEGuZkAQEc,987
|
|
6
|
-
proofreader/core/matcher.py,sha256=4URgBb6EgBaCNFpafjnQrIot9KeIwoYUUNraaC9nlIk,3603
|
|
7
|
-
proofreader/core/ocr.py,sha256=FFhIS1TVrqSXUPGOll5RNbHX18q7de4xFUP1ewrnhSc,3652
|
|
8
|
-
proofreader/core/resolver.py,sha256=DTbf5qyQaJrBbw1QWQQJ_BZf_dg003p_xH8RMpI6sn8,2685
|
|
9
|
-
proofreader/core/schema.py,sha256=ga_7cYCBO13yFvLAtyAgDw7CFEb9c8Ui85SJDu2pcsA,2512
|
|
10
|
-
proofreader/train/clip_trainer.py,sha256=6hiVrJ6WX6m13E3FE8kouIxXjQo3GPrU_8X266oeXqs,6416
|
|
11
|
-
proofreader/train/yolo_trainer.py,sha256=nOHPrYmBuefsUyiGEYqboNU6i3pykBXE0U4HYwNaqg8,986
|
|
12
|
-
proofreader/train/emulator/generator.py,sha256=_l7qFLSoQxPYUKLDrqVIS-0sUs5FkjBK7ENWmZ-q2ls,9681
|
|
13
|
-
rbx_proofreader-1.1.1.dist-info/licenses/LICENSE,sha256=eHSaONn9P_ZcYiY9QCi_XzVARIoQu7l2AI5BtFGA_BY,1069
|
|
14
|
-
rbx_proofreader-1.1.1.dist-info/METADATA,sha256=CNi-FAGJvwoEL6LPmpL8m39canCcXsg_idlPTQFVeFA,6568
|
|
15
|
-
rbx_proofreader-1.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
-
rbx_proofreader-1.1.1.dist-info/top_level.txt,sha256=U3s8IVdLtGeGD3JgMmCHUgAsFhZXSSamp3vIojAFTxU,12
|
|
17
|
-
rbx_proofreader-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|