rbx-proofreader 1.0.1__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rbx_proofreader-1.1.0/PKG-INFO +160 -0
- rbx_proofreader-1.1.0/README.md +136 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/pyproject.toml +10 -2
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/core/config.py +10 -6
- rbx_proofreader-1.1.0/src/proofreader/core/matcher.py +89 -0
- rbx_proofreader-1.1.0/src/proofreader/core/ocr.py +92 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/core/schema.py +8 -0
- rbx_proofreader-1.1.0/src/proofreader/main.py +140 -0
- rbx_proofreader-1.1.0/src/proofreader/train/clip_trainer.py +173 -0
- rbx_proofreader-1.1.0/src/proofreader/train/emulator/generator.py +234 -0
- rbx_proofreader-1.0.1/src/proofreader/train/train.py → rbx_proofreader-1.1.0/src/proofreader/train/yolo_trainer.py +5 -8
- rbx_proofreader-1.1.0/src/rbx_proofreader.egg-info/PKG-INFO +160 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/rbx_proofreader.egg-info/SOURCES.txt +2 -2
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/rbx_proofreader.egg-info/requires.txt +3 -1
- rbx_proofreader-1.0.1/PKG-INFO +0 -128
- rbx_proofreader-1.0.1/README.md +0 -107
- rbx_proofreader-1.0.1/src/proofreader/core/matcher.py +0 -67
- rbx_proofreader-1.0.1/src/proofreader/core/ocr.py +0 -79
- rbx_proofreader-1.0.1/src/proofreader/main.py +0 -89
- rbx_proofreader-1.0.1/src/proofreader/train/builder.py +0 -94
- rbx_proofreader-1.0.1/src/proofreader/train/emulator/generator.py +0 -186
- rbx_proofreader-1.0.1/src/rbx_proofreader.egg-info/PKG-INFO +0 -128
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/LICENSE +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/setup.cfg +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/__init__.py +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/core/__init__.py +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/core/detector.py +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/proofreader/core/resolver.py +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/rbx_proofreader.egg-info/dependency_links.txt +0 -0
- {rbx_proofreader-1.0.1 → rbx_proofreader-1.1.0}/src/rbx_proofreader.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rbx-proofreader
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Visual trade detection and OCR engine
|
|
5
|
+
License: MIT
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: easyocr>=1.7.0
|
|
12
|
+
Requires-Dist: numpy>=1.24.0
|
|
13
|
+
Requires-Dist: opencv-python>=4.8.0
|
|
14
|
+
Requires-Dist: Pillow>=10.0.0
|
|
15
|
+
Requires-Dist: rapidfuzz>=3.0.0
|
|
16
|
+
Requires-Dist: requests>=2.31.0
|
|
17
|
+
Requires-Dist: torch>=2.0.0
|
|
18
|
+
Requires-Dist: tqdm>=4.66.0
|
|
19
|
+
Requires-Dist: transformers>=4.30.0
|
|
20
|
+
Requires-Dist: ultralytics>=8.0.0
|
|
21
|
+
Provides-Extra: train
|
|
22
|
+
Requires-Dist: playwright>=1.40.0; extra == "train"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# Proofreader 🔍
|
|
26
|
+
|
|
27
|
+
A high-speed vision pipeline for reading Roblox trade screenshots.
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/rbx-proofreader/)
|
|
30
|
+
[](https://pepy.tech/project/rbx-proofreader)
|
|
31
|
+
[](https://pypi.org/project/rbx-proofreader/)
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
[](https://github.com/lucacrose/proofreader/actions)
|
|
34
|
+
[](https://developer.nvidia.com/cuda-zone)
|
|
35
|
+
[](https://github.com/ultralytics/ultralytics)
|
|
36
|
+
|
|
37
|
+
Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLOv11** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
|
|
38
|
+
|
|
39
|
+
## Why Proofreader?
|
|
40
|
+
|
|
41
|
+
Roblox trade screenshots are commonly used as proof in marketplaces, moderation workflows, and value analysis, yet they are manually verified and error-prone. Proofreader automates this process by converting screenshots into structured, verifiable data in milliseconds.
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
## Example
|
|
45
|
+
|
|
46
|
+

|
|
47
|
+
|
|
48
|
+
## ⚡ Performance
|
|
49
|
+
|
|
50
|
+
Tested on an **RTX 5070** using $n=500$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
|
|
51
|
+
|
|
52
|
+
| Metric | Result (E2E) |
|
|
53
|
+
|:------------------------|:----------------------------|
|
|
54
|
+
| Exact Match Accuracy | 97.2% (95% CI: 95.4–98.5%) |
|
|
55
|
+
| Median latency | 36.8 ms |
|
|
56
|
+
| 95th percentile latency | 73.4 ms |
|
|
57
|
+
|
|
58
|
+
> [!NOTE]
|
|
59
|
+
> End-to-End **(E2E)** latency includes image loading, YOLO detection, spatial organization, CLIP similarity matching, and OCR fallback.
|
|
60
|
+
|
|
61
|
+
## ✨ Key Features
|
|
62
|
+
|
|
63
|
+
- **Sub-40ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
|
|
64
|
+
|
|
65
|
+
- **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
|
|
66
|
+
|
|
67
|
+
- **Fuzzy Logic Recovery:** Built-in string distance matching corrects OCR typos and text obscurations against a local asset database.
|
|
68
|
+
|
|
69
|
+
- **Theme & Scale Agnostic:** Robust performance across various UI themes (Dark/Light), resolutions, and custom display scales.
|
|
70
|
+
|
|
71
|
+
## 💻 Quick Start
|
|
72
|
+
|
|
73
|
+
### Installation
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install rbx-proofreader
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
> [!IMPORTANT]
|
|
80
|
+
> **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-40ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
|
|
81
|
+
|
|
82
|
+
### Usage
|
|
83
|
+
|
|
84
|
+
```py
|
|
85
|
+
import proofreader
|
|
86
|
+
|
|
87
|
+
# Extract metadata from a screenshot
|
|
88
|
+
data = proofreader.get_trade_data("trade_proof.png")
|
|
89
|
+
|
|
90
|
+
print(f"Items Out: {data['outgoing']['item_count']}")
|
|
91
|
+
print(f"Robux In: {data['incoming']['robux_value']}")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
> [!TIP]
|
|
95
|
+
> **First Run:** On your first execution, Proofreader will automatically download the model weights and item database (~360MB). Subsequent runs will use the local cache for maximum speed.
|
|
96
|
+
|
|
97
|
+
## 🧩 How it Works
|
|
98
|
+
The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
|
|
99
|
+
|
|
100
|
+
1. **Detection:** YOLOv11 localizes item cards, thumbnails, and robux containers.
|
|
101
|
+
|
|
102
|
+
2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
|
|
103
|
+
|
|
104
|
+
3. **Identification:** CLIP performs similarity matching. High-confidence results become Resolved Items immediately.
|
|
105
|
+
|
|
106
|
+
4. **Heuristic Judge:** Low-confidence visual matches trigger OCR and fuzzy-logic reconciliation.
|
|
107
|
+
|
|
108
|
+

|
|
109
|
+
|
|
110
|
+
## 📊 Data Schema
|
|
111
|
+
The `get_trade_data()` function returns a structured dictionary containing `incoming` and `outgoing` trade sides.
|
|
112
|
+
|
|
113
|
+
| Key | Type | Description |
|
|
114
|
+
| :--- | :--- | :--- |
|
|
115
|
+
| `item_count` | `int` | Number of distinct item boxes detected. |
|
|
116
|
+
| `robux_value` | `int` | Total Robux parsed from the trade. |
|
|
117
|
+
| `items` | `list` | List of `ResolvedItem` objects containing `id` and `name`. |
|
|
118
|
+
|
|
119
|
+
**ResolvedItem Schema:**
|
|
120
|
+
|
|
121
|
+
| Property | Type | Description |
|
|
122
|
+
| :--- | :--- | :--- |
|
|
123
|
+
| `id` | `int` | The official Roblox Asset ID. |
|
|
124
|
+
| `name` | `str` | Canonical item name from the database. |
|
|
125
|
+
|
|
126
|
+
## 🏗️ Development & Training
|
|
127
|
+
To set up a custom training environment for the YOLO and CLIP models:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# 1. Clone and Install
|
|
131
|
+
git clone https://github.com/lucacrose/proofreader.git
|
|
132
|
+
cd proofreader
|
|
133
|
+
pip install -e ".[train]"
|
|
134
|
+
|
|
135
|
+
# 2. Initialize Database
|
|
136
|
+
python scripts/setup_items.py
|
|
137
|
+
|
|
138
|
+
# 3. Training
|
|
139
|
+
# Place backgrounds in src/proofreader/train/emulator/backgrounds
|
|
140
|
+
# Place HTML templates in src/proofreader/train/emulator/templates
|
|
141
|
+
python scripts/train_models.py
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
> [!CAUTION]
|
|
145
|
+
> **GPU Required:** Training is not recommended on a CPU. Final models save to `runs/train/weights/best.pt`. Rename to `yolo.pt` and move to `src/assets/weights`.
|
|
146
|
+
|
|
147
|
+
## 🛠️ Tech Stack
|
|
148
|
+
|
|
149
|
+
- **Vision:** YOLOv11 (Detection), CLIP (Embeddings), OpenCV (Processing)
|
|
150
|
+
- **OCR:** EasyOCR
|
|
151
|
+
- **Logic:** RapidFuzz (Fuzzy String Matching)
|
|
152
|
+
- **Core:** Python 3.12, PyTorch, NumPy
|
|
153
|
+
|
|
154
|
+
## 🤝 Contributing
|
|
155
|
+
|
|
156
|
+
Contributions are welcome! Please open an issue or submit a pull request.
|
|
157
|
+
|
|
158
|
+
## 📜 License
|
|
159
|
+
|
|
160
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Proofreader 🔍
|
|
2
|
+
|
|
3
|
+
A high-speed vision pipeline for reading Roblox trade screenshots.
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/rbx-proofreader/)
|
|
6
|
+
[](https://pepy.tech/project/rbx-proofreader)
|
|
7
|
+
[](https://pypi.org/project/rbx-proofreader/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://github.com/lucacrose/proofreader/actions)
|
|
10
|
+
[](https://developer.nvidia.com/cuda-zone)
|
|
11
|
+
[](https://github.com/ultralytics/ultralytics)
|
|
12
|
+
|
|
13
|
+
Proofreader transforms unstructured screenshots of Roblox trades ("proofs", hence "proofreader") into structured Python dictionaries. By combining **YOLOv11** for object detection, **CLIP** for visual similarity, and **EasyOCR**, it achieves high accuracy across diverse UI themes, resolutions, and extensions.
|
|
14
|
+
|
|
15
|
+
## Why Proofreader?
|
|
16
|
+
|
|
17
|
+
Roblox trade screenshots are commonly used as proof in marketplaces, moderation workflows, and value analysis, yet they are manually verified and error-prone. Proofreader automates this process by converting screenshots into structured, verifiable data in milliseconds.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## Example
|
|
21
|
+
|
|
22
|
+

|
|
23
|
+
|
|
24
|
+
## ⚡ Performance
|
|
25
|
+
|
|
26
|
+
Tested on an **RTX 5070** using $n=500$ real-world "worst-case" user screenshots (compressed, cropped, and varied UI).
|
|
27
|
+
|
|
28
|
+
| Metric | Result (E2E) |
|
|
29
|
+
|:------------------------|:----------------------------|
|
|
30
|
+
| Exact Match Accuracy | 97.2% (95% CI: 95.4–98.5%) |
|
|
31
|
+
| Median latency | 36.8 ms |
|
|
32
|
+
| 95th percentile latency | 73.4 ms |
|
|
33
|
+
|
|
34
|
+
> [!NOTE]
|
|
35
|
+
> End-to-End **(E2E)** latency includes image loading, YOLO detection, spatial organization, CLIP similarity matching, and OCR fallback.
|
|
36
|
+
|
|
37
|
+
## ✨ Key Features
|
|
38
|
+
|
|
39
|
+
- **Sub-40ms Latency:** Optimized with "Fast-Path" logic that skips OCR for high-confidence visual matches, ensuring near-instant processing.
|
|
40
|
+
|
|
41
|
+
- **Multi-modal decision engine:** Weighs visual embeddings against OCR text to resolve identities across 2,500+ distinct item classes.
|
|
42
|
+
|
|
43
|
+
- **Fuzzy Logic Recovery:** Built-in string distance matching corrects OCR typos and text obscurations against a local asset database.
|
|
44
|
+
|
|
45
|
+
- **Theme & Scale Agnostic:** Robust performance across various UI themes (Dark/Light), resolutions, and custom display scales.
|
|
46
|
+
|
|
47
|
+
## 💻 Quick Start
|
|
48
|
+
|
|
49
|
+
### Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install rbx-proofreader
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
> [!IMPORTANT]
|
|
56
|
+
> **Hardware Acceleration:** Proofreader automatically detects NVIDIA GPUs. For sub-40ms performance, ensure you have the CUDA-enabled version of PyTorch installed. If a CPU-only environment is detected on a GPU-capable machine, the engine will provide the exact `pip` command to fix your environment.
|
|
57
|
+
|
|
58
|
+
### Usage
|
|
59
|
+
|
|
60
|
+
```py
|
|
61
|
+
import proofreader
|
|
62
|
+
|
|
63
|
+
# Extract metadata from a screenshot
|
|
64
|
+
data = proofreader.get_trade_data("trade_proof.png")
|
|
65
|
+
|
|
66
|
+
print(f"Items Out: {data['outgoing']['item_count']}")
|
|
67
|
+
print(f"Robux In: {data['incoming']['robux_value']}")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
> [!TIP]
|
|
71
|
+
> **First Run:** On your first execution, Proofreader will automatically download the model weights and item database (~360MB). Subsequent runs will use the local cache for maximum speed.
|
|
72
|
+
|
|
73
|
+
## 🧩 How it Works
|
|
74
|
+
The model handles the inconsistencies of user-generated screenshots (varied crops, UI themes, and extensions) through a multi-stage process:
|
|
75
|
+
|
|
76
|
+
1. **Detection:** YOLOv11 localizes item cards, thumbnails, and robux containers.
|
|
77
|
+
|
|
78
|
+
2. **Spatial Organization:** Assigns child elements (names/values) to parents and determines trade side.
|
|
79
|
+
|
|
80
|
+
3. **Identification:** CLIP performs similarity matching. High-confidence results become Resolved Items immediately.
|
|
81
|
+
|
|
82
|
+
4. **Heuristic Judge:** Low-confidence visual matches trigger OCR and fuzzy-logic reconciliation.
|
|
83
|
+
|
|
84
|
+

|
|
85
|
+
|
|
86
|
+
## 📊 Data Schema
|
|
87
|
+
The `get_trade_data()` function returns a structured dictionary containing `incoming` and `outgoing` trade sides.
|
|
88
|
+
|
|
89
|
+
| Key | Type | Description |
|
|
90
|
+
| :--- | :--- | :--- |
|
|
91
|
+
| `item_count` | `int` | Number of distinct item boxes detected. |
|
|
92
|
+
| `robux_value` | `int` | Total Robux parsed from the trade. |
|
|
93
|
+
| `items` | `list` | List of `ResolvedItem` objects containing `id` and `name`. |
|
|
94
|
+
|
|
95
|
+
**ResolvedItem Schema:**
|
|
96
|
+
|
|
97
|
+
| Property | Type | Description |
|
|
98
|
+
| :--- | :--- | :--- |
|
|
99
|
+
| `id` | `int` | The official Roblox Asset ID. |
|
|
100
|
+
| `name` | `str` | Canonical item name from the database. |
|
|
101
|
+
|
|
102
|
+
## 🏗️ Development & Training
|
|
103
|
+
To set up a custom training environment for the YOLO and CLIP models:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# 1. Clone and Install
|
|
107
|
+
git clone https://github.com/lucacrose/proofreader.git
|
|
108
|
+
cd proofreader
|
|
109
|
+
pip install -e ".[train]"
|
|
110
|
+
|
|
111
|
+
# 2. Initialize Database
|
|
112
|
+
python scripts/setup_items.py
|
|
113
|
+
|
|
114
|
+
# 3. Training
|
|
115
|
+
# Place backgrounds in src/proofreader/train/emulator/backgrounds
|
|
116
|
+
# Place HTML templates in src/proofreader/train/emulator/templates
|
|
117
|
+
python scripts/train_models.py
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
> [!CAUTION]
|
|
121
|
+
> **GPU Required:** Training is not recommended on a CPU. Final models save to `runs/train/weights/best.pt`. Rename to `yolo.pt` and move to `src/assets/weights`.
|
|
122
|
+
|
|
123
|
+
## 🛠️ Tech Stack
|
|
124
|
+
|
|
125
|
+
- **Vision:** YOLOv11 (Detection), CLIP (Embeddings), OpenCV (Processing)
|
|
126
|
+
- **OCR:** EasyOCR
|
|
127
|
+
- **Logic:** RapidFuzz (Fuzzy String Matching)
|
|
128
|
+
- **Core:** Python 3.12, PyTorch, NumPy
|
|
129
|
+
|
|
130
|
+
## 🤝 Contributing
|
|
131
|
+
|
|
132
|
+
Contributions are welcome! Please open an issue or submit a pull request.
|
|
133
|
+
|
|
134
|
+
## 📜 License
|
|
135
|
+
|
|
136
|
+
This project is licensed under the MIT License.
|
|
@@ -4,17 +4,22 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rbx-proofreader"
|
|
7
|
-
version = "1.0
|
|
7
|
+
version = "1.1.0"
|
|
8
8
|
description = "Visual trade detection and OCR engine"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.12"
|
|
11
11
|
license = {text = "MIT"}
|
|
12
|
+
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
]
|
|
17
|
+
|
|
12
18
|
dependencies = [
|
|
13
19
|
"easyocr>=1.7.0",
|
|
14
20
|
"numpy>=1.24.0",
|
|
15
21
|
"opencv-python>=4.8.0",
|
|
16
22
|
"Pillow>=10.0.0",
|
|
17
|
-
"playwright>=1.40.0",
|
|
18
23
|
"rapidfuzz>=3.0.0",
|
|
19
24
|
"requests>=2.31.0",
|
|
20
25
|
"torch>=2.0.0",
|
|
@@ -23,6 +28,9 @@ dependencies = [
|
|
|
23
28
|
"ultralytics>=8.0.0"
|
|
24
29
|
]
|
|
25
30
|
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
train = ["playwright>=1.40.0"]
|
|
33
|
+
|
|
26
34
|
[tool.setuptools]
|
|
27
35
|
package-dir = {"" = "src"}
|
|
28
36
|
|
|
@@ -5,13 +5,17 @@ from pathlib import Path
|
|
|
5
5
|
# --- BASE PATHS ---
|
|
6
6
|
# Resolves to the 'proofreader' root directory
|
|
7
7
|
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
|
8
|
+
BASE_URL = "https://github.com/lucacrose/proofreader/releases/latest/download"
|
|
8
9
|
|
|
9
10
|
# --- ASSETS & MODELS ---
|
|
10
11
|
ASSETS_PATH = BASE_DIR / "assets"
|
|
11
12
|
MODEL_PATH = ASSETS_PATH / "weights" / "yolo.pt"
|
|
12
|
-
DB_PATH = ASSETS_PATH / "
|
|
13
|
-
CACHE_PATH = ASSETS_PATH / "
|
|
13
|
+
DB_PATH = ASSETS_PATH / "item_database.json"
|
|
14
|
+
CACHE_PATH = ASSETS_PATH / "item_embeddings_bank.pt"
|
|
14
15
|
THUMBNAILS_DIR = ASSETS_PATH / "thumbnails"
|
|
16
|
+
TRAIN_THUMBNAILS_DIR = ASSETS_PATH / "train_data"
|
|
17
|
+
CLASS_MAP_PATH = ASSETS_PATH / "class_mapping.json"
|
|
18
|
+
CLIP_BEST_PATH = ASSETS_PATH / "weights" / "clip.pt"
|
|
15
19
|
|
|
16
20
|
# --- TRAINING & EMULATOR ---
|
|
17
21
|
TRAIN_DIR = BASE_DIR / "proofreader" / "train"
|
|
@@ -26,11 +30,11 @@ DEFAULT_TEMPLATE = TEMPLATES_DIR / "trade_ui.html"
|
|
|
26
30
|
|
|
27
31
|
# --- HYPERPARAMETERS (Training Settings) ---
|
|
28
32
|
TRAINING_CONFIG = {
|
|
29
|
-
"epochs":
|
|
33
|
+
"epochs": 240, # Number of times the model sees the whole dataset
|
|
30
34
|
"batch_size": 16, # Number of images processed at once
|
|
31
35
|
"img_size": 640, # Standard YOLO resolution
|
|
32
|
-
"patience":
|
|
33
|
-
"close_mosaic_epochs":
|
|
36
|
+
"patience": 20, # Stop early if no improvement for 20 epochs
|
|
37
|
+
"close_mosaic_epochs": 32 # Disable mosaic augmentation for the last N epochs
|
|
34
38
|
}
|
|
35
39
|
|
|
36
40
|
# --- AUGMENTER PROBABILITIES AND GENERATOR SETTINGS ---
|
|
@@ -82,7 +86,7 @@ AUGMENTER_CONFIG = {
|
|
|
82
86
|
|
|
83
87
|
# Robustness Thresholds
|
|
84
88
|
FUZZY_MATCH_CONFIDENCE_THRESHOLD = 60.0
|
|
85
|
-
|
|
89
|
+
CERTAIN_VISUAL_CONF = 0.995
|
|
86
90
|
|
|
87
91
|
# --- HARDWARE SETTINGS ---
|
|
88
92
|
# Automatically detects if a GPU is available for faster training
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.nn.functional as F
|
|
4
|
+
import numpy as np
|
|
5
|
+
import json
|
|
6
|
+
import cv2
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from torchvision import transforms
|
|
9
|
+
from transformers import CLIPVisionModelWithProjection
|
|
10
|
+
from typing import List
|
|
11
|
+
from .schema import TradeLayout, ResolvedItem
|
|
12
|
+
|
|
13
|
+
class CLIPItemEmbedder(nn.Module):
|
|
14
|
+
def __init__(self, num_classes, model_id="openai/clip-vit-base-patch32"):
|
|
15
|
+
super().__init__()
|
|
16
|
+
self.vision_encoder = CLIPVisionModelWithProjection.from_pretrained(model_id)
|
|
17
|
+
self.item_prototypes = nn.Embedding(num_classes, 512)
|
|
18
|
+
self.logit_scale = nn.Parameter(torch.ones([]) * 2.659)
|
|
19
|
+
|
|
20
|
+
def forward(self, pixel_values):
|
|
21
|
+
outputs = self.vision_encoder(pixel_values=pixel_values)
|
|
22
|
+
return F.normalize(outputs.image_embeds, p=2, dim=-1)
|
|
23
|
+
|
|
24
|
+
class VisualMatcher:
|
|
25
|
+
def __init__(self, weights_path: str, mapping_path: str, item_db: List[dict], device: str = "cuda"):
|
|
26
|
+
self.device = device
|
|
27
|
+
|
|
28
|
+
with open(mapping_path, "r") as f:
|
|
29
|
+
self.class_to_idx = json.load(f)
|
|
30
|
+
self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}
|
|
31
|
+
|
|
32
|
+
self.id_to_name = {str(i["id"]): i["name"] for i in item_db}
|
|
33
|
+
self.name_to_id = {str(i["name"]).lower().strip(): i["id"] for i in item_db}
|
|
34
|
+
|
|
35
|
+
num_classes = len(self.class_to_idx)
|
|
36
|
+
self.model = CLIPItemEmbedder(num_classes).to(self.device)
|
|
37
|
+
self.model.load_state_dict(torch.load(weights_path, map_location=self.device))
|
|
38
|
+
self.model.eval()
|
|
39
|
+
|
|
40
|
+
with torch.inference_mode():
|
|
41
|
+
self.bank_tensor = F.normalize(self.model.item_prototypes.weight, p=2, dim=-1)
|
|
42
|
+
|
|
43
|
+
self.preprocess = transforms.Compose([
|
|
44
|
+
transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BICUBIC),
|
|
45
|
+
transforms.ToTensor(),
|
|
46
|
+
transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
|
|
47
|
+
(0.26862954, 0.26130258, 0.27577711)),
|
|
48
|
+
])
|
|
49
|
+
|
|
50
|
+
def match_item_visuals(self, image: np.ndarray, layout: TradeLayout):
|
|
51
|
+
items_to_process: List[ResolvedItem] = []
|
|
52
|
+
crops = []
|
|
53
|
+
|
|
54
|
+
for side in (layout.outgoing.items, layout.incoming.items):
|
|
55
|
+
for item in side:
|
|
56
|
+
if item.thumb_box:
|
|
57
|
+
x1, y1, x2, y2 = item.thumb_box.coords
|
|
58
|
+
crop = image[y1:y2, x1:x2]
|
|
59
|
+
if crop.size > 0:
|
|
60
|
+
pil_img = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
|
|
61
|
+
processed_crop = self.preprocess(pil_img)
|
|
62
|
+
crops.append(processed_crop)
|
|
63
|
+
items_to_process.append(item)
|
|
64
|
+
|
|
65
|
+
if not crops:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
batch_tensor = torch.stack(crops).to(self.device)
|
|
69
|
+
|
|
70
|
+
with torch.inference_mode():
|
|
71
|
+
query_features = self.model(batch_tensor)
|
|
72
|
+
|
|
73
|
+
logits = query_features @ self.bank_tensor.t() * self.model.logit_scale.exp()
|
|
74
|
+
topk_scores, topk_indices = logits.topk(k=5, dim=1)
|
|
75
|
+
|
|
76
|
+
probs = F.softmax(topk_scores.float(), dim=1)
|
|
77
|
+
|
|
78
|
+
best_idx_in_topk = probs.argmax(dim=1)
|
|
79
|
+
best_indices = topk_indices[torch.arange(len(topk_indices)), best_idx_in_topk]
|
|
80
|
+
best_probs = probs[torch.arange(len(probs)), best_idx_in_topk]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
for i, item in enumerate(items_to_process):
|
|
84
|
+
visual_idx = best_indices[i].item()
|
|
85
|
+
|
|
86
|
+
visual_match_id_str = self.idx_to_class[visual_idx]
|
|
87
|
+
|
|
88
|
+
item.visual_id = int(visual_match_id_str)
|
|
89
|
+
item.visual_conf = float(best_probs[i].item())
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import cv2
|
|
2
|
+
import easyocr
|
|
3
|
+
import numpy as np
|
|
4
|
+
import re
|
|
5
|
+
from rapidfuzz import process, utils
|
|
6
|
+
from .schema import TradeLayout
|
|
7
|
+
from proofreader.core.config import FUZZY_MATCH_CONFIDENCE_THRESHOLD, OCR_LANGUAGES, OCR_USE_GPU
|
|
8
|
+
|
|
9
|
+
class OCRReader:
|
|
10
|
+
def __init__(self, item_list, languages=OCR_LANGUAGES, gpu=OCR_USE_GPU):
|
|
11
|
+
self.reader = easyocr.Reader(languages, gpu=gpu)
|
|
12
|
+
self.item_names = [item["name"] for item in item_list]
|
|
13
|
+
|
|
14
|
+
def _fuzzy_match_name(self, raw_text: str, threshold: float = FUZZY_MATCH_CONFIDENCE_THRESHOLD) -> str:
|
|
15
|
+
if not raw_text or len(raw_text) < 2:
|
|
16
|
+
return raw_text
|
|
17
|
+
|
|
18
|
+
match = process.extractOne(
|
|
19
|
+
raw_text,
|
|
20
|
+
self.item_names,
|
|
21
|
+
processor=utils.default_process
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
if match and match[1] >= threshold:
|
|
25
|
+
return match[0]
|
|
26
|
+
|
|
27
|
+
return raw_text
|
|
28
|
+
|
|
29
|
+
def _clean_robux_text(self, raw_text: str) -> int:
|
|
30
|
+
cleaned = raw_text.upper().strip()
|
|
31
|
+
substitutions = {
|
|
32
|
+
',': '', '.': '', ' ': '',
|
|
33
|
+
'S': '5', 'O': '0', 'I': '1',
|
|
34
|
+
'L': '1', 'B': '8', 'G': '6'
|
|
35
|
+
}
|
|
36
|
+
for char, sub in substitutions.items():
|
|
37
|
+
cleaned = cleaned.replace(char, sub)
|
|
38
|
+
|
|
39
|
+
digits = re.findall(r'\d+', cleaned)
|
|
40
|
+
return int("".join(digits)) if digits else 0
|
|
41
|
+
|
|
42
|
+
def process_layout(self, image: np.ndarray, layout: TradeLayout, skip_if=None):
|
|
43
|
+
all_items = layout.outgoing.items + layout.incoming.items
|
|
44
|
+
crops = []
|
|
45
|
+
target_refs = []
|
|
46
|
+
STD_H = 64
|
|
47
|
+
|
|
48
|
+
for item in all_items:
|
|
49
|
+
if skip_if and skip_if(item):
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
if item.name_box:
|
|
53
|
+
x1, y1, x2, y2 = item.name_box.coords
|
|
54
|
+
crop = image[max(0, y1-2):y2+2, max(0, x1-2):x2+2]
|
|
55
|
+
if crop.size > 0:
|
|
56
|
+
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
|
57
|
+
h, w = gray.shape
|
|
58
|
+
new_w = int(w * (STD_H / h))
|
|
59
|
+
resized = cv2.resize(gray, (new_w, STD_H), interpolation=cv2.INTER_LINEAR)
|
|
60
|
+
crops.append(resized)
|
|
61
|
+
target_refs.append({'type': 'item', 'obj': item})
|
|
62
|
+
|
|
63
|
+
for side in [layout.outgoing, layout.incoming]:
|
|
64
|
+
if side.robux and side.robux.value_box:
|
|
65
|
+
x1, y1, x2, y2 = side.robux.value_box.coords
|
|
66
|
+
crop = image[max(0, y1-2):y2+2, max(0, x1-2):x2+2]
|
|
67
|
+
if crop.size > 0:
|
|
68
|
+
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
|
69
|
+
h, w = gray.shape
|
|
70
|
+
new_w = int(w * (STD_H / h))
|
|
71
|
+
resized = cv2.resize(gray, (new_w, STD_H), interpolation=cv2.INTER_LINEAR)
|
|
72
|
+
crops.append(resized)
|
|
73
|
+
target_refs.append({'type': 'robux', 'obj': side.robux})
|
|
74
|
+
|
|
75
|
+
if not crops:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
max_w = max(c.shape[1] for c in crops)
|
|
79
|
+
padded_crops = [cv2.copyMakeBorder(c, 0, 0, 0, max_w - c.shape[1], cv2.BORDER_CONSTANT, value=0) for c in crops]
|
|
80
|
+
|
|
81
|
+
batch_results = self.reader.readtext_batched(padded_crops, batch_size=len(padded_crops))
|
|
82
|
+
|
|
83
|
+
for i, res in enumerate(batch_results):
|
|
84
|
+
raw_text = " ".join([text_info[1] for text_info in res]).strip()
|
|
85
|
+
conf = np.mean([text_info[2] for text_info in res]) if res else 0.0
|
|
86
|
+
|
|
87
|
+
target = target_refs[i]
|
|
88
|
+
if target['type'] == 'item':
|
|
89
|
+
target['obj'].text_name = raw_text
|
|
90
|
+
target['obj'].text_conf = float(conf)
|
|
91
|
+
else:
|
|
92
|
+
target['obj'].value = self._clean_robux_text(raw_text)
|
|
@@ -15,6 +15,14 @@ class ResolvedItem:
|
|
|
15
15
|
thumb_box: Optional[Box] = None
|
|
16
16
|
name_box: Optional[Box] = None
|
|
17
17
|
|
|
18
|
+
visual_id: int = -1
|
|
19
|
+
visual_conf: float = 0
|
|
20
|
+
|
|
21
|
+
text_name: str = ""
|
|
22
|
+
text_conf: float = 0
|
|
23
|
+
|
|
24
|
+
_finalized: bool = False
|
|
25
|
+
|
|
18
26
|
@dataclass
|
|
19
27
|
class ResolvedRobux:
|
|
20
28
|
value: int = 0
|