rbx-proofreader 1.0.1__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. rbx_proofreader-1.1.1/PKG-INFO +160 -0
  2. rbx_proofreader-1.1.1/README.md +136 -0
  3. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/pyproject.toml +10 -2
  4. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/core/config.py +10 -6
  5. rbx_proofreader-1.1.1/src/proofreader/core/matcher.py +89 -0
  6. rbx_proofreader-1.1.1/src/proofreader/core/ocr.py +92 -0
  7. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/core/schema.py +8 -0
  8. rbx_proofreader-1.1.1/src/proofreader/main.py +140 -0
  9. rbx_proofreader-1.1.1/src/proofreader/train/clip_trainer.py +173 -0
  10. rbx_proofreader-1.1.1/src/proofreader/train/emulator/generator.py +234 -0
  11. rbx_proofreader-1.0.1/src/proofreader/train/train.py → rbx_proofreader-1.1.1/src/proofreader/train/yolo_trainer.py +5 -8
  12. rbx_proofreader-1.1.1/src/rbx_proofreader.egg-info/PKG-INFO +160 -0
  13. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/rbx_proofreader.egg-info/SOURCES.txt +2 -2
  14. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/rbx_proofreader.egg-info/requires.txt +3 -1
  15. rbx_proofreader-1.0.1/PKG-INFO +0 -128
  16. rbx_proofreader-1.0.1/README.md +0 -107
  17. rbx_proofreader-1.0.1/src/proofreader/core/matcher.py +0 -67
  18. rbx_proofreader-1.0.1/src/proofreader/core/ocr.py +0 -79
  19. rbx_proofreader-1.0.1/src/proofreader/main.py +0 -89
  20. rbx_proofreader-1.0.1/src/proofreader/train/builder.py +0 -94
  21. rbx_proofreader-1.0.1/src/proofreader/train/emulator/generator.py +0 -186
  22. rbx_proofreader-1.0.1/src/rbx_proofreader.egg-info/PKG-INFO +0 -128
  23. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/LICENSE +0 -0
  24. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/setup.cfg +0 -0
  25. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/__init__.py +0 -0
  26. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/core/__init__.py +0 -0
  27. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/core/detector.py +0 -0
  28. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/proofreader/core/resolver.py +0 -0
  29. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/rbx_proofreader.egg-info/dependency_links.txt +0 -0
  30. {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.1}/src/rbx_proofreader.egg-info/top_level.txt +0 -0
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: rbx-proofreader
3
+ Version: 1.1.1
4
+ Summary: Visual trade detection and OCR engine
5
+ License: MIT
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Programming Language :: Python :: 3.12
8
+ Requires-Python: >=3.12
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: easyocr>=1.7.0
12
+ Requires-Dist: numpy>=1.24.0
13
+ Requires-Dist: opencv-python>=4.8.0
14
+ Requires-Dist: Pillow>=10.0.0
15
+ Requires-Dist: rapidfuzz>=3.0.0
16
+ Requires-Dist: requests>=2.31.0
17
+ Requires-Dist: torch>=2.0.0
18
+ Requires-Dist: tqdm>=4.66.0
19
+ Requires-Dist: transformers>=4.30.0
20
+ Requires-Dist: ultralytics>=8.0.0
21
+ Provides-Extra: train
22
+ Requires-Dist: playwright>=1.40.0; extra == "train"
23
+ Dynamic: license-file
24
+
25
+ # Proofreader 🔍
26
+
27
+ A high-speed vision pipeline for reading Roblox trade screenshots.
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/rbx-proofreader?color=blue&label=PyPI)](https://pypi.org/project/rbx-proofreader/)
30
+ [![Downloads](https://static.pepy.tech/badge/rbx-proofreader)](https://pepy.tech/project/rbx-proofreader)
31
+ [![Python](https://img.shields.io/pypi/pyversions/rbx-proofreader?logo=python&logoColor=white&color=blue)](https://pypi.org/project/rbx-proofreader/)
32
+ [![License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
33
+ [![Build Status](https://github.com/lucacrose/proofreader/actions/workflows/build.yml/badge.svg)](https://github.com/lucacrose/proofreader/actions)
34
+ [![GPU](https://img.shields.io/badge/GPU-CUDA-blueviolet)](https://developer.nvidia.com/cuda-zone)
35
+ [![YOLOv11](https://img.shields.io/badge/model-YOLOv11-blueviolet)](https://github.com/ultralytics/ultralytics)
36
+
37
+ Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLOv11** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
38
+
39
+ ## Why Proofreader?
40
+
41
+ Roblox trade screenshots are commonly used as proof in marketplaces, moderation workflows, and value analysis, yet they are manually verified and error-prone. Proofreader automates this process by converting screenshots into structured, verifiable data in milliseconds.
42
+
43
+
44
+ ## Example
45
+
46
+ ![Example](https://github.com/lucacrose/proofreader/raw/main/docs/assets/example.png)
47
+
48
+ ## ⚡ Performance
49
+
50
+ Tested on an **RTX 5070** using $n=500$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
51
+
52
+ | Metric | Result (E2E) |
53
+ |:------------------------|:----------------------------|
54
+ | Exact Match Accuracy | 97.2% (95% CI: 95.4–98.5%) |
55
+ | Median latency | 36.8 ms |
56
+ | 95th percentile latency | 73.4 ms |
57
+
58
+ > [!NOTE]
59
+ > End-to-End **(E2E)** latency includes image loading, YOLO detection, spatial organization, CLIP similarity matching, and OCR fallback.
60
+
61
+ ## ✨ Key Features
62
+
63
+ - **Sub-40ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
64
+
65
+ - **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
66
+
67
+ - **Fuzzy Logic Recovery:** Built-in string distance matching corrects OCR typos and text obscurations against a local asset database.
68
+
69
+ - **Theme & Scale Agnostic:** Robust performance across various UI themes (Dark/Light), resolutions, and custom display scales.
70
+
71
+ ## 💻 Quick Start
72
+
73
+ ### Installation
74
+
75
+ ```bash
76
+ pip install rbx-proofreader
77
+ ```
78
+
79
+ > [!IMPORTANT]
80
+ > **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-40ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
81
+
82
+ ### Usage
83
+
84
+ ```py
85
+ import proofreader
86
+
87
+ # Extract metadata from a screenshot
88
+ data = proofreader.get_trade_data("trade_proof.png")
89
+
90
+ print(f"Items Out: {data['outgoing']['item_count']}")
91
+ print(f"Robux In: {data['incoming']['robux_value']}")
92
+ ```
93
+
94
+ > [!TIP]
95
+ > **First Run:** On your first execution, Proofreader will automatically download the model weights and item database (~360MB). Subsequent runs will use the local cache for maximum speed.
96
+
97
+ ## 🧩 How it Works
98
+ The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
99
+
100
+ 1. **Detection:** YOLOv11 localizes item cards, thumbnails, and robux containers.
101
+
102
+ 2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
103
+
104
+ 3. **Identification:** CLIP performs similarity matching. High-confidence results become Resolved Items immediately.
105
+
106
+ 4. **Heuristic Judge:** Low-confidence visual matches trigger OCR and fuzzy-logic reconciliation.
107
+
108
+ ![Diagram](https://github.com/lucacrose/proofreader/raw/main/docs/assets/flow_diagram.png)
109
+
110
+ ## 📊 Data Schema
111
+ The `get_trade_data()` function returns a structured dictionary containing `incoming` and `outgoing` trade sides.
112
+
113
+ | Key | Type | Description |
114
+ | :--- | :--- | :--- |
115
+ | `item_count` | `int` | Number of distinct item boxes detected. |
116
+ | `robux_value` | `int` | Total Robux parsed from the trade. |
117
+ | `items` | `list` | List of `ResolvedItem` objects containing `id` and `name`. |
118
+
119
+ **ResolvedItem Schema:**
120
+
121
+ | Property | Type | Description |
122
+ | :--- | :--- | :--- |
123
+ | `id` | `int` | The official Roblox Asset ID. |
124
+ | `name` | `str` | Canonical item name from the database. |
125
+
126
+ ## 🏗️ Development & Training
127
+ To set up a custom training environment for the YOLO and CLIP models:
128
+
129
+ ```bash
130
+ # 1. Clone and Install
131
+ git clone https://github.com/lucacrose/proofreader.git
132
+ cd proofreader
133
+ pip install -e ".[train]"
134
+
135
+ # 2. Initialize Database
136
+ python scripts/setup_items.py
137
+
138
+ # 3. Training
139
+ # Place backgrounds in src/proofreader/train/emulator/backgrounds
140
+ # Place HTML templates in src/proofreader/train/emulator/templates
141
+ python scripts/train_models.py
142
+ ```
143
+
144
+ > [!CAUTION]
145
+ > **GPU Required:** Training is not recommended on a CPU. Final models save to `runs/train/weights/best.pt`. Rename to `yolo.pt` and move to `src/assets/weights`.
146
+
147
+ ## 🛠️ Tech Stack
148
+
149
+ - **Vision:** YOLOv11 (Detection), CLIP (Embeddings), OpenCV (Processing)
150
+ - **OCR:** EasyOCR
151
+ - **Logic:** RapidFuzz (Fuzzy String Matching)
152
+ - **Core:** Python 3.12, PyTorch, NumPy
153
+
154
+ ## 🤝 Contributing
155
+
156
+ Contributions are welcome! Please open an issue or submit a pull request.
157
+
158
+ ## 📜 License
159
+
160
+ This project is licensed under the MIT License.
@@ -0,0 +1,136 @@
1
+ # Proofreader 🔍
2
+
3
+ A high-speed vision pipeline for reading Roblox trade screenshots.
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/rbx-proofreader?color=blue&label=PyPI)](https://pypi.org/project/rbx-proofreader/)
6
+ [![Downloads](https://static.pepy.tech/badge/rbx-proofreader)](https://pepy.tech/project/rbx-proofreader)
7
+ [![Python](https://img.shields.io/pypi/pyversions/rbx-proofreader?logo=python&logoColor=white&color=blue)](https://pypi.org/project/rbx-proofreader/)
8
+ [![License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
9
+ [![Build Status](https://github.com/lucacrose/proofreader/actions/workflows/build.yml/badge.svg)](https://github.com/lucacrose/proofreader/actions)
10
+ [![GPU](https://img.shields.io/badge/GPU-CUDA-blueviolet)](https://developer.nvidia.com/cuda-zone)
11
+ [![YOLOv11](https://img.shields.io/badge/model-YOLOv11-blueviolet)](https://github.com/ultralytics/ultralytics)
12
+
13
+ Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLOv11** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
14
+
15
+ ## Why Proofreader?
16
+
17
+ Roblox trade screenshots are commonly used as proof in marketplaces, moderation workflows, and value analysis, yet they are manually verified and error-prone. Proofreader automates this process by converting screenshots into structured, verifiable data in milliseconds.
18
+
19
+
20
+ ## Example
21
+
22
+ ![Example](https://github.com/lucacrose/proofreader/raw/main/docs/assets/example.png)
23
+
24
+ ## ⚡ Performance
25
+
26
+ Tested on an **RTX 5070** using $n=500$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
27
+
28
+ | Metric | Result (E2E) |
29
+ |:------------------------|:----------------------------|
30
+ | Exact Match Accuracy | 97.2% (95% CI: 95.4–98.5%) |
31
+ | Median latency | 36.8 ms |
32
+ | 95th percentile latency | 73.4 ms |
33
+
34
+ > [!NOTE]
35
+ > End-to-End **(E2E)** latency includes image loading, YOLO detection, spatial organization, CLIP similarity matching, and OCR fallback.
36
+
37
+ ## ✨ Key Features
38
+
39
+ - **Sub-40ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
40
+
41
+ - **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
42
+
43
+ - **Fuzzy Logic Recovery:** Built-in string distance matching corrects OCR typos and text obscurations against a local asset database.
44
+
45
+ - **Theme & Scale Agnostic:** Robust performance across various UI themes (Dark/Light), resolutions, and custom display scales.
46
+
47
+ ## 💻 Quick Start
48
+
49
+ ### Installation
50
+
51
+ ```bash
52
+ pip install rbx-proofreader
53
+ ```
54
+
55
+ > [!IMPORTANT]
56
+ > **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-40ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
57
+
58
+ ### Usage
59
+
60
+ ```py
61
+ import proofreader
62
+
63
+ # Extract metadata from a screenshot
64
+ data = proofreader.get_trade_data("trade_proof.png")
65
+
66
+ print(f"Items Out: {data['outgoing']['item_count']}")
67
+ print(f"Robux In: {data['incoming']['robux_value']}")
68
+ ```
69
+
70
+ > [!TIP]
71
+ > **First Run:** On your first execution, Proofreader will automatically download the model weights and item database (~360MB). Subsequent runs will use the local cache for maximum speed.
72
+
73
+ ## 🧩 How it Works
74
+ The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
75
+
76
+ 1. **Detection:** YOLOv11 localizes item cards, thumbnails, and robux containers.
77
+
78
+ 2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
79
+
80
+ 3. **Identification:** CLIP performs similarity matching. High-confidence results become Resolved Items immediately.
81
+
82
+ 4. **Heuristic Judge:** Low-confidence visual matches trigger OCR and fuzzy-logic reconciliation.
83
+
84
+ ![Diagram](https://github.com/lucacrose/proofreader/raw/main/docs/assets/flow_diagram.png)
85
+
86
+ ## 📊 Data Schema
87
+ The `get_trade_data()` function returns a structured dictionary containing `incoming` and `outgoing` trade sides.
88
+
89
+ | Key | Type | Description |
90
+ | :--- | :--- | :--- |
91
+ | `item_count` | `int` | Number of distinct item boxes detected. |
92
+ | `robux_value` | `int` | Total Robux parsed from the trade. |
93
+ | `items` | `list` | List of `ResolvedItem` objects containing `id` and `name`. |
94
+
95
+ **ResolvedItem Schema:**
96
+
97
+ | Property | Type | Description |
98
+ | :--- | :--- | :--- |
99
+ | `id` | `int` | The official Roblox Asset ID. |
100
+ | `name` | `str` | Canonical item name from the database. |
101
+
102
+ ## 🏗️ Development & Training
103
+ To set up a custom training environment for the YOLO and CLIP models:
104
+
105
+ ```bash
106
+ # 1. Clone and Install
107
+ git clone https://github.com/lucacrose/proofreader.git
108
+ cd proofreader
109
+ pip install -e ".[train]"
110
+
111
+ # 2. Initialize Database
112
+ python scripts/setup_items.py
113
+
114
+ # 3. Training
115
+ # Place backgrounds in src/proofreader/train/emulator/backgrounds
116
+ # Place HTML templates in src/proofreader/train/emulator/templates
117
+ python scripts/train_models.py
118
+ ```
119
+
120
+ > [!CAUTION]
121
+ > **GPU Required:** Training is not recommended on a CPU. Final models save to `runs/train/weights/best.pt`. Rename to `yolo.pt` and move to `src/assets/weights`.
122
+
123
+ ## 🛠️ Tech Stack
124
+
125
+ - **Vision:** YOLOv11 (Detection), CLIP (Embeddings), OpenCV (Processing)
126
+ - **OCR:** EasyOCR
127
+ - **Logic:** RapidFuzz (Fuzzy String Matching)
128
+ - **Core:** Python 3.12, PyTorch, NumPy
129
+
130
+ ## 🤝 Contributing
131
+
132
+ Contributions are welcome! Please open an issue or submit a pull request.
133
+
134
+ ## 📜 License
135
+
136
+ This project is licensed under the MIT License.
@@ -4,17 +4,22 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rbx-proofreader"
7
- version = "1.0.1"
7
+ version = "1.1.1"
8
8
  description = "Visual trade detection and OCR engine"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
11
11
  license = {text = "MIT"}
12
+
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.12",
16
+ ]
17
+
12
18
  dependencies = [
13
19
  "easyocr>=1.7.0",
14
20
  "numpy>=1.24.0",
15
21
  "opencv-python>=4.8.0",
16
22
  "Pillow>=10.0.0",
17
- "playwright>=1.40.0",
18
23
  "rapidfuzz>=3.0.0",
19
24
  "requests>=2.31.0",
20
25
  "torch>=2.0.0",
@@ -23,6 +28,9 @@ dependencies = [
23
28
  "ultralytics>=8.0.0"
24
29
  ]
25
30
 
31
+ [project.optional-dependencies]
32
+ train = ["playwright>=1.40.0"]
33
+
26
34
  [tool.setuptools]
27
35
  package-dir = {"" = "src"}
28
36
 
@@ -5,13 +5,17 @@ from pathlib import Path
5
5
  # --- BASE PATHS ---
6
6
  # Resolves to the 'proofreader' root directory
7
7
  BASE_DIR = Path(__file__).resolve().parent.parent.parent
8
+ BASE_URL = "https://github.com/lucacrose/proofreader"
8
9
 
9
10
  # --- ASSETS & MODELS ---
10
11
  ASSETS_PATH = BASE_DIR / "assets"
11
12
  MODEL_PATH = ASSETS_PATH / "weights" / "yolo.pt"
12
- DB_PATH = ASSETS_PATH / "db.json"
13
- CACHE_PATH = ASSETS_PATH / "embedding_bank.pt"
13
+ DB_PATH = ASSETS_PATH / "item_database.json"
14
+ CACHE_PATH = ASSETS_PATH / "item_embeddings_bank.pt"
14
15
  THUMBNAILS_DIR = ASSETS_PATH / "thumbnails"
16
+ TRAIN_THUMBNAILS_DIR = ASSETS_PATH / "train_data"
17
+ CLASS_MAP_PATH = ASSETS_PATH / "class_mapping.json"
18
+ CLIP_BEST_PATH = ASSETS_PATH / "weights" / "clip.pt"
15
19
 
16
20
  # --- TRAINING & EMULATOR ---
17
21
  TRAIN_DIR = BASE_DIR / "proofreader" / "train"
@@ -26,11 +30,11 @@ DEFAULT_TEMPLATE = TEMPLATES_DIR / "trade_ui.html"
26
30
 
27
31
  # --- HYPERPARAMETERS (Training Settings) ---
28
32
  TRAINING_CONFIG = {
29
- "epochs": 100, # Number of times the model sees the whole dataset
33
+ "epochs": 240, # Number of times the model sees the whole dataset
30
34
  "batch_size": 16, # Number of images processed at once
31
35
  "img_size": 640, # Standard YOLO resolution
32
- "patience": 10, # Stop early if no improvement for 10 epochs
33
- "close_mosaic_epochs": 10 # Disable mosaic augmentation for the last N epochs
36
+ "patience": 20, # Stop early if no improvement for 20 epochs
37
+ "close_mosaic_epochs": 32 # Disable mosaic augmentation for the last N epochs
34
38
  }
35
39
 
36
40
  # --- AUGMENTER PROBABILITIES AND GENERATOR SETTINGS ---
@@ -82,7 +86,7 @@ AUGMENTER_CONFIG = {
82
86
 
83
87
  # Robustness Thresholds
84
88
  FUZZY_MATCH_CONFIDENCE_THRESHOLD = 60.0
85
- VISUAL_MATCH_THRESHOLD = 0.88
89
+ CERTAIN_VISUAL_CONF = 0.995
86
90
 
87
91
  # --- HARDWARE SETTINGS ---
88
92
  # Automatically detects if a GPU is available for faster training
@@ -0,0 +1,89 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+ import json
6
+ import cv2
7
+ from PIL import Image
8
+ from torchvision import transforms
9
+ from transformers import CLIPVisionModelWithProjection
10
+ from typing import List
11
+ from .schema import TradeLayout, ResolvedItem
12
+
13
+ class CLIPItemEmbedder(nn.Module):
14
+ def __init__(self, num_classes, model_id="openai/clip-vit-base-patch32"):
15
+ super().__init__()
16
+ self.vision_encoder = CLIPVisionModelWithProjection.from_pretrained(model_id)
17
+ self.item_prototypes = nn.Embedding(num_classes, 512)
18
+ self.logit_scale = nn.Parameter(torch.ones([]) * 2.659)
19
+
20
+ def forward(self, pixel_values):
21
+ outputs = self.vision_encoder(pixel_values=pixel_values)
22
+ return F.normalize(outputs.image_embeds, p=2, dim=-1)
23
+
24
+ class VisualMatcher:
25
+ def __init__(self, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
26
+ self.device = device
27
+
28
+ with open(mapping_path, "r") as f:
29
+ self.class_to_idx = json.load(f)
30
+ self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}
31
+
32
+ self.id_to_name = {str(i["id"]): i["name"] for i in item_db}
33
+ self.name_to_id = {str(i["name"]).lower().strip(): i["id"] for i in item_db}
34
+
35
+ num_classes = len(self.class_to_idx)
36
+ self.model = CLIPItemEmbedder(num_classes).to(self.device)
37
+ self.model.load_state_dict(torch.load(weights_path, map_location=self.device))
38
+ self.model.eval()
39
+
40
+ with torch.inference_mode():
41
+ self.bank_tensor = F.normalize(self.model.item_prototypes.weight, p=2, dim=-1)
42
+
43
+ self.preprocess = transforms.Compose([
44
+ transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BICUBIC),
45
+ transforms.ToTensor(),
46
+ transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
47
+ (0.26862954, 0.26130258, 0.27577711)),
48
+ ])
49
+
50
+ def match_item_visuals(self, image: np.ndarray, layout: TradeLayout):
51
+ items_to_process: List[ResolvedItem] = []
52
+ crops = []
53
+
54
+ for side in (layout.outgoing.items, layout.incoming.items):
55
+ for item in side:
56
+ if item.thumb_box:
57
+ x1, y1, x2, y2 = item.thumb_box.coords
58
+ crop = image[y1:y2, x1:x2]
59
+ if crop.size > 0:
60
+ pil_img = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
61
+ processed_crop = self.preprocess(pil_img)
62
+ crops.append(processed_crop)
63
+ items_to_process.append(item)
64
+
65
+ if not crops:
66
+ return
67
+
68
+ batch_tensor = torch.stack(crops).to(self.device)
69
+
70
+ with torch.inference_mode():
71
+ query_features = self.model(batch_tensor)
72
+
73
+ logits = query_features @ self.bank_tensor.t() * self.model.logit_scale.exp()
74
+ topk_scores, topk_indices = logits.topk(k=5, dim=1)
75
+
76
+ probs = F.softmax(topk_scores.float(), dim=1)
77
+
78
+ best_idx_in_topk = probs.argmax(dim=1)
79
+ best_indices = topk_indices[torch.arange(len(topk_indices)), best_idx_in_topk]
80
+ best_probs = probs[torch.arange(len(probs)), best_idx_in_topk]
81
+
82
+
83
+ for i, item in enumerate(items_to_process):
84
+ visual_idx = best_indices[i].item()
85
+
86
+ visual_match_id_str = self.idx_to_class[visual_idx]
87
+
88
+ item.visual_id = int(visual_match_id_str)
89
+ item.visual_conf = float(best_probs[i].item())
@@ -0,0 +1,92 @@
1
+ import cv2
2
+ import easyocr
3
+ import numpy as np
4
+ import re
5
+ from rapidfuzz import process, utils
6
+ from .schema import TradeLayout
7
+ from proofreader.core.config import FUZZY_MATCH_CONFIDENCE_THRESHOLD, OCR_LANGUAGES, OCR_USE_GPU
8
+
9
+ class OCRReader:
10
+ def __init__(self, item_list, languages=OCR_LANGUAGES, gpu=OCR_USE_GPU):
11
+ self.reader = easyocr.Reader(languages, gpu=gpu)
12
+ self.item_names = [item["name"] for item in item_list]
13
+
14
+ def _fuzzy_match_name(self, raw_text: str, threshold: float = FUZZY_MATCH_CONFIDENCE_THRESHOLD) -> str:
15
+ if not raw_text or len(raw_text) < 2:
16
+ return raw_text
17
+
18
+ match = process.extractOne(
19
+ raw_text,
20
+ self.item_names,
21
+ processor=utils.default_process
22
+ )
23
+
24
+ if match and match[1] >= threshold:
25
+ return match[0]
26
+
27
+ return raw_text
28
+
29
+ def _clean_robux_text(self, raw_text: str) -> int:
30
+ cleaned = raw_text.upper().strip()
31
+ substitutions = {
32
+ ',': '', '.': '', ' ': '',
33
+ 'S': '5', 'O': '0', 'I': '1',
34
+ 'L': '1', 'B': '8', 'G': '6'
35
+ }
36
+ for char, sub in substitutions.items():
37
+ cleaned = cleaned.replace(char, sub)
38
+
39
+ digits = re.findall(r'\d+', cleaned)
40
+ return int("".join(digits)) if digits else 0
41
+
42
+ def process_layout(self, image: np.ndarray, layout: TradeLayout, skip_if=None):
43
+ all_items = layout.outgoing.items + layout.incoming.items
44
+ crops = []
45
+ target_refs = []
46
+ STD_H = 64
47
+
48
+ for item in all_items:
49
+ if skip_if and skip_if(item):
50
+ continue
51
+
52
+ if item.name_box:
53
+ x1, y1, x2, y2 = item.name_box.coords
54
+ crop = image[max(0, y1-2):y2+2, max(0, x1-2):x2+2]
55
+ if crop.size > 0:
56
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
57
+ h, w = gray.shape
58
+ new_w = int(w * (STD_H / h))
59
+ resized = cv2.resize(gray, (new_w, STD_H), interpolation=cv2.INTER_LINEAR)
60
+ crops.append(resized)
61
+ target_refs.append({'type': 'item', 'obj': item})
62
+
63
+ for side in [layout.outgoing, layout.incoming]:
64
+ if side.robux and side.robux.value_box:
65
+ x1, y1, x2, y2 = side.robux.value_box.coords
66
+ crop = image[max(0, y1-2):y2+2, max(0, x1-2):x2+2]
67
+ if crop.size > 0:
68
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
69
+ h, w = gray.shape
70
+ new_w = int(w * (STD_H / h))
71
+ resized = cv2.resize(gray, (new_w, STD_H), interpolation=cv2.INTER_LINEAR)
72
+ crops.append(resized)
73
+ target_refs.append({'type': 'robux', 'obj': side.robux})
74
+
75
+ if not crops:
76
+ return
77
+
78
+ max_w = max(c.shape[1] for c in crops)
79
+ padded_crops = [cv2.copyMakeBorder(c, 0, 0, 0, max_w - c.shape[1], cv2.BORDER_CONSTANT, value=0) for c in crops]
80
+
81
+ batch_results = self.reader.readtext_batched(padded_crops, batch_size=len(padded_crops))
82
+
83
+ for i, res in enumerate(batch_results):
84
+ raw_text = " ".join([text_info[1] for text_info in res]).strip()
85
+ conf = np.mean([text_info[2] for text_info in res]) if res else 0.0
86
+
87
+ target = target_refs[i]
88
+ if target['type'] == 'item':
89
+ target['obj'].text_name = raw_text
90
+ target['obj'].text_conf = float(conf)
91
+ else:
92
+ target['obj'].value = self._clean_robux_text(raw_text)
@@ -15,6 +15,14 @@ class ResolvedItem:
15
15
  thumb_box: Optional[Box] = None
16
16
  name_box: Optional[Box] = None
17
17
 
18
+ visual_id: int = -1
19
+ visual_conf: float = 0
20
+
21
+ text_name: str = ""
22
+ text_conf: float = 0
23
+
24
+ _finalized: bool = False
25
+
18
26
  @dataclass
19
27
  class ResolvedRobux:
20
28
  value: int = 0