diagram2code 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diagram2code-0.1.2/PKG-INFO +215 -0
- diagram2code-0.1.2/README.md +198 -0
- diagram2code-0.1.2/pyproject.toml +40 -0
- diagram2code-0.1.2/setup.cfg +4 -0
- diagram2code-0.1.2/src/diagram2code/__init__.py +1 -0
- diagram2code-0.1.2/src/diagram2code/cli.py +191 -0
- diagram2code-0.1.2/src/diagram2code/export_graph.py +20 -0
- diagram2code-0.1.2/src/diagram2code/export_matplotlib.py +81 -0
- diagram2code-0.1.2/src/diagram2code/export_program.py +97 -0
- diagram2code-0.1.2/src/diagram2code/labels.py +50 -0
- diagram2code-0.1.2/src/diagram2code/schema.py +10 -0
- diagram2code-0.1.2/src/diagram2code/vision/detect_arrows.py +143 -0
- diagram2code-0.1.2/src/diagram2code/vision/detect_shapes.py +99 -0
- diagram2code-0.1.2/src/diagram2code/vision/extract_labels.py +82 -0
- diagram2code-0.1.2/src/diagram2code/vision/preprocess.py +52 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/PKG-INFO +215 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/SOURCES.txt +36 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/dependency_links.txt +1 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/entry_points.txt +2 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/requires.txt +10 -0
- diagram2code-0.1.2/src/diagram2code.egg-info/top_level.txt +1 -0
- diagram2code-0.1.2/tests/test_cli_export_bundle_scripts.py +27 -0
- diagram2code-0.1.2/tests/test_cli_export_flag.py +32 -0
- diagram2code-0.1.2/tests/test_cli_labels_flag.py +13 -0
- diagram2code-0.1.2/tests/test_cli_smoke.py +12 -0
- diagram2code-0.1.2/tests/test_detect_arrows.py +15 -0
- diagram2code-0.1.2/tests/test_detect_shapes.py +15 -0
- diagram2code-0.1.2/tests/test_end_to_end_branching.py +25 -0
- diagram2code-0.1.2/tests/test_end_to_end_branching_arrows.py +26 -0
- diagram2code-0.1.2/tests/test_export_graph.py +15 -0
- diagram2code-0.1.2/tests/test_export_matplotlib.py +25 -0
- diagram2code-0.1.2/tests/test_export_program.py +26 -0
- diagram2code-0.1.2/tests/test_export_program_branching.py +50 -0
- diagram2code-0.1.2/tests/test_export_program_context.py +27 -0
- diagram2code-0.1.2/tests/test_extract_labels.py +29 -0
- diagram2code-0.1.2/tests/test_labels.py +11 -0
- diagram2code-0.1.2/tests/test_preprocess.py +12 -0
- diagram2code-0.1.2/tests/test_toposort.py +13 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diagram2code
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Convert simple diagram images into runnable code (matplotlib/graphviz).
|
|
5
|
+
Author: Kazi Samiul Islam
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: <3.14,>=3.11
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: opencv-python>=4.8
|
|
10
|
+
Requires-Dist: numpy>=1.26
|
|
11
|
+
Requires-Dist: matplotlib>=3.8
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
15
|
+
Provides-Extra: ocr
|
|
16
|
+
Requires-Dist: pytesseract>=0.3.10; extra == "ocr"
|
|
17
|
+
|
|
18
|
+
# diagram2code
|
|
19
|
+
|
|
20
|
+
Convert simple flowchart-style diagrams into runnable Python programs.
|
|
21
|
+
|
|
22
|
+
`diagram2code` takes a diagram image (rectangular steps + arrows), detects the flow, and generates:
|
|
23
|
+
|
|
24
|
+
- a graph representation (`graph.json`)
|
|
25
|
+
- a runnable Python program (`generated_program.py`)
|
|
26
|
+
- optional debug visualizations (`debug_nodes.png`, `debug_arrows.png`)
|
|
27
|
+
- an optional exportable bundle (`--export`)
|
|
28
|
+
|
|
29
|
+
> This project is designed for **learning, prototyping, and experimentation**, not for production-grade diagram parsing. :contentReference[oaicite:1]{index=1}
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Table of Contents
|
|
34
|
+
|
|
35
|
+
1. [Installation](#installation)
|
|
36
|
+
2. [Quick Start](#quick-start)
|
|
37
|
+
3. [Using Labels](#using-labels)
|
|
38
|
+
4. [Export Bundle](#export-bundle)
|
|
39
|
+
5. [Generated Files](#generated-files)
|
|
40
|
+
6. [Examples](#examples)
|
|
41
|
+
7. [Limitations](#limitations)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
Clone the repo and install in editable mode:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
git clone https://github.com/Nimil785477/diagram2code.git
|
|
51
|
+
cd diagram2code
|
|
52
|
+
|
|
53
|
+
python -m venv .venv
|
|
54
|
+
```
|
|
55
|
+
Activate the environment
|
|
56
|
+
```
|
|
57
|
+
# Linux / macOS
|
|
58
|
+
source .venv/bin/activate
|
|
59
|
+
|
|
60
|
+
# Windows (PowerShell)
|
|
61
|
+
.\.venv\Scripts\Activate.ps1
|
|
62
|
+
```
|
|
63
|
+
Install:
|
|
64
|
+
```
|
|
65
|
+
pip install -e .
|
|
66
|
+
```
|
|
67
|
+
### Basic (no OCR)
|
|
68
|
+
```bash
|
|
69
|
+
pip install diagram2code
|
|
70
|
+
```
|
|
71
|
+
With OCR support(optional)
|
|
72
|
+
```bash
|
|
73
|
+
pip install diagram2code[ocr]
|
|
74
|
+
```
|
|
75
|
+
You must also install Tesseract OCR on your system:
|
|
76
|
+
- Windows: https://github.com/UB-Mannheim/tesseract/wiki
|
|
77
|
+
- macOS:
|
|
78
|
+
```bash
|
|
79
|
+
brew install tesseract
|
|
80
|
+
```
|
|
81
|
+
- Ubuntu/Debian:
|
|
82
|
+
```bash
|
|
83
|
+
sudo apt install tesseract-ocr
|
|
84
|
+
```
|
|
85
|
+
Then run:
|
|
86
|
+
```bash
|
|
87
|
+
diagram2code image.png --extract-labels
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
This matches exactly what your code already does ✔️
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## 3️⃣ (Optional but recommended) Add a runtime hint
|
|
95
|
+
|
|
96
|
+
You already handle this well, but one tiny UX improvement:
|
|
97
|
+
|
|
98
|
+
In `cli.py`, after `--extract-labels` failure, you could optionally print:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
safe_print("Hint: install OCR support with `pip install diagram2code[ocr]` and install Tesseract.")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Quick Start
|
|
105
|
+
Run diagram2code on a simple diagram:
|
|
106
|
+
```
|
|
107
|
+
python -m diagram2code.cli examples/simple/diagram.png --out outputs
|
|
108
|
+
```
|
|
109
|
+
This will write outputs (see Generated Files)
|
|
110
|
+
|
|
111
|
+
## Using Labels
|
|
112
|
+
You can provide custom labels for nodes using a JSON file
|
|
113
|
+
|
|
114
|
+
Example labels.json
|
|
115
|
+
```
|
|
116
|
+
{
|
|
117
|
+
"0": "Step_1_Load_Data",
|
|
118
|
+
"1": "Step_2_Train_Model"
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
Run with labels
|
|
122
|
+
```
|
|
123
|
+
python -m diagram2code.cli diagram.png --out outputs --labels labels.json
|
|
124
|
+
```
|
|
125
|
+
The exported program will then use labeled function names (sanitized into valid Python identifiers).
|
|
126
|
+
|
|
127
|
+
## Export Bundle
|
|
128
|
+
The **--export** fag creates a self-contained runnable bundle(easy to share).
|
|
129
|
+
```
|
|
130
|
+
python -m diagram2code.cli diagram.png --out outputs --export export_bundle
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
When using --export, the following files are copied:
|
|
134
|
+
```
|
|
135
|
+
export_bundle/
|
|
136
|
+
├── generated_program.py
|
|
137
|
+
├── graph.json
|
|
138
|
+
├── labels.json (if provided)
|
|
139
|
+
├── debug_nodes.png (if exists)
|
|
140
|
+
├── debug_arrows.png (if exists)
|
|
141
|
+
├── render_graph.py (if exists)
|
|
142
|
+
├── run.ps1
|
|
143
|
+
├── run.sh
|
|
144
|
+
└── README_EXPORT.md
|
|
145
|
+
```
|
|
146
|
+
Running the exported bundle
|
|
147
|
+
|
|
148
|
+
Windows (PowerShell):
|
|
149
|
+
```
|
|
150
|
+
cd export_bundle
|
|
151
|
+
.\run.ps1
|
|
152
|
+
```
|
|
153
|
+
Linux/macOS:
|
|
154
|
+
```
|
|
155
|
+
cd export_bundle
|
|
156
|
+
bash run.sh
|
|
157
|
+
```
|
|
158
|
+
or directly:
|
|
159
|
+
```
|
|
160
|
+
python generated_program.py
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Generated Files
|
|
164
|
+
After a normal run **(--out outputs)**:
|
|
165
|
+
| File | Description |
|
|
166
|
+
| ---------------------- | ------------------------------------ |
|
|
167
|
+
| `preprocessed.png` | Binary image used for detection |
|
|
168
|
+
| `debug_nodes.png` | Detected rectangles overlay |
|
|
169
|
+
| `debug_arrows.png` | Detected arrows overlay (if enabled) |
|
|
170
|
+
| `graph.json` | Graph structure (nodes + edges) |
|
|
171
|
+
| `render_graph.py` | Script to visualize the graph |
|
|
172
|
+
| `generated_program.py` | Generated executable Python program |
|
|
173
|
+
|
|
174
|
+
## Examples
|
|
175
|
+
Simple linear flow
|
|
176
|
+
```
|
|
177
|
+
[ A ] → [ B ] → [ C ]
|
|
178
|
+
```
|
|
179
|
+
Branching flow
|
|
180
|
+
```
|
|
181
|
+
→ [ B ]
|
|
182
|
+
[ A ]
|
|
183
|
+
→ [ C ]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### OCR (Optional)
|
|
187
|
+
`diagram2code` can extract text labels using Tesseract OCR.
|
|
188
|
+
|
|
189
|
+
Requirements:
|
|
190
|
+
- System: `tesseract-ocr`
|
|
191
|
+
- Python: `pytesseract`
|
|
192
|
+
|
|
193
|
+
If OCR is unavailable, the pipeline still works and labels default to empty.
|
|
194
|
+
|
|
195
|
+
## Limitations
|
|
196
|
+
- Only rectangular nodes are supported
|
|
197
|
+
- Arrow detection is heuristic-based
|
|
198
|
+
- Complex curves, diagonals, or overlapping arrows may fail
|
|
199
|
+
- No text extraction from inside shapes
|
|
200
|
+
- Not intended for UML, BPMN, or hand-drawn diagrams
|
|
201
|
+
|
|
202
|
+
## Demo
|
|
203
|
+
|
|
204
|
+
Convert a simple diagram image into runnable Python code:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
diagram2code tests/fixtures/simple.png --out demo_outputs --extract-labels
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# diagram2code
|
|
2
|
+
|
|
3
|
+
Convert simple flowchart-style diagrams into runnable Python programs.
|
|
4
|
+
|
|
5
|
+
`diagram2code` takes a diagram image (rectangular steps + arrows), detects the flow, and generates:
|
|
6
|
+
|
|
7
|
+
- a graph representation (`graph.json`)
|
|
8
|
+
- a runnable Python program (`generated_program.py`)
|
|
9
|
+
- optional debug visualizations (`debug_nodes.png`, `debug_arrows.png`)
|
|
10
|
+
- an optional exportable bundle (`--export`)
|
|
11
|
+
|
|
12
|
+
> This project is designed for **learning, prototyping, and experimentation**, not for production-grade diagram parsing. :contentReference[oaicite:1]{index=1}
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Table of Contents
|
|
17
|
+
|
|
18
|
+
1. [Installation](#installation)
|
|
19
|
+
2. [Quick Start](#quick-start)
|
|
20
|
+
3. [Using Labels](#using-labels)
|
|
21
|
+
4. [Export Bundle](#export-bundle)
|
|
22
|
+
5. [Generated Files](#generated-files)
|
|
23
|
+
6. [Examples](#examples)
|
|
24
|
+
7. [Limitations](#limitations)
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
Clone the repo and install in editable mode:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
git clone https://github.com/Nimil785477/diagram2code.git
|
|
34
|
+
cd diagram2code
|
|
35
|
+
|
|
36
|
+
python -m venv .venv
|
|
37
|
+
```
|
|
38
|
+
Activate the environment
|
|
39
|
+
```
|
|
40
|
+
# Linux / macOS
|
|
41
|
+
source .venv/bin/activate
|
|
42
|
+
|
|
43
|
+
# Windows (PowerShell)
|
|
44
|
+
.\.venv\Scripts\Activate.ps1
|
|
45
|
+
```
|
|
46
|
+
Install:
|
|
47
|
+
```
|
|
48
|
+
pip install -e .
|
|
49
|
+
```
|
|
50
|
+
### Basic (no OCR)
|
|
51
|
+
```bash
|
|
52
|
+
pip install diagram2code
|
|
53
|
+
```
|
|
54
|
+
With OCR support(optional)
|
|
55
|
+
```bash
|
|
56
|
+
pip install diagram2code[ocr]
|
|
57
|
+
```
|
|
58
|
+
You must also install Tesseract OCR on your system:
|
|
59
|
+
- Windows: https://github.com/UB-Mannheim/tesseract/wiki
|
|
60
|
+
- macOS:
|
|
61
|
+
```bash
|
|
62
|
+
brew install tesseract
|
|
63
|
+
```
|
|
64
|
+
- Ubuntu/Debian:
|
|
65
|
+
```bash
|
|
66
|
+
sudo apt install tesseract-ocr
|
|
67
|
+
```
|
|
68
|
+
Then run:
|
|
69
|
+
```bash
|
|
70
|
+
diagram2code image.png --extract-labels
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
This matches exactly what your code already does ✔️
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 3️⃣ (Optional but recommended) Add a runtime hint
|
|
78
|
+
|
|
79
|
+
You already handle this well, but one tiny UX improvement:
|
|
80
|
+
|
|
81
|
+
In `cli.py`, after `--extract-labels` failure, you could optionally print:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
safe_print("Hint: install OCR support with `pip install diagram2code[ocr]` and install Tesseract.")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Quick Start
|
|
88
|
+
Run diagram2code on a simple diagram:
|
|
89
|
+
```
|
|
90
|
+
python -m diagram2code.cli examples/simple/diagram.png --out outputs
|
|
91
|
+
```
|
|
92
|
+
This will write outputs (see Generated Files)
|
|
93
|
+
|
|
94
|
+
## Using Labels
|
|
95
|
+
You can provide custom labels for nodes using a JSON file
|
|
96
|
+
|
|
97
|
+
Example labels.json
|
|
98
|
+
```
|
|
99
|
+
{
|
|
100
|
+
"0": "Step_1_Load_Data",
|
|
101
|
+
"1": "Step_2_Train_Model"
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
Run with labels
|
|
105
|
+
```
|
|
106
|
+
python -m diagram2code.cli diagram.png --out outputs --labels labels.json
|
|
107
|
+
```
|
|
108
|
+
The exported program will then use labeled function names (sanitized into valid Python identifiers).
|
|
109
|
+
|
|
110
|
+
## Export Bundle
|
|
111
|
+
The **--export** fag creates a self-contained runnable bundle(easy to share).
|
|
112
|
+
```
|
|
113
|
+
python -m diagram2code.cli diagram.png --out outputs --export export_bundle
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
When using --export, the following files are copied:
|
|
117
|
+
```
|
|
118
|
+
export_bundle/
|
|
119
|
+
├── generated_program.py
|
|
120
|
+
├── graph.json
|
|
121
|
+
├── labels.json (if provided)
|
|
122
|
+
├── debug_nodes.png (if exists)
|
|
123
|
+
├── debug_arrows.png (if exists)
|
|
124
|
+
├── render_graph.py (if exists)
|
|
125
|
+
├── run.ps1
|
|
126
|
+
├── run.sh
|
|
127
|
+
└── README_EXPORT.md
|
|
128
|
+
```
|
|
129
|
+
Running the exported bundle
|
|
130
|
+
|
|
131
|
+
Windows (PowerShell):
|
|
132
|
+
```
|
|
133
|
+
cd export_bundle
|
|
134
|
+
.\run.ps1
|
|
135
|
+
```
|
|
136
|
+
Linux/macOS:
|
|
137
|
+
```
|
|
138
|
+
cd export_bundle
|
|
139
|
+
bash run.sh
|
|
140
|
+
```
|
|
141
|
+
or directly:
|
|
142
|
+
```
|
|
143
|
+
python generated_program.py
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Generated Files
|
|
147
|
+
After a normal run **(--out outputs)**:
|
|
148
|
+
| File | Description |
|
|
149
|
+
| ---------------------- | ------------------------------------ |
|
|
150
|
+
| `preprocessed.png` | Binary image used for detection |
|
|
151
|
+
| `debug_nodes.png` | Detected rectangles overlay |
|
|
152
|
+
| `debug_arrows.png` | Detected arrows overlay (if enabled) |
|
|
153
|
+
| `graph.json` | Graph structure (nodes + edges) |
|
|
154
|
+
| `render_graph.py` | Script to visualize the graph |
|
|
155
|
+
| `generated_program.py` | Generated executable Python program |
|
|
156
|
+
|
|
157
|
+
## Examples
|
|
158
|
+
Simple linear flow
|
|
159
|
+
```
|
|
160
|
+
[ A ] → [ B ] → [ C ]
|
|
161
|
+
```
|
|
162
|
+
Branching flow
|
|
163
|
+
```
|
|
164
|
+
→ [ B ]
|
|
165
|
+
[ A ]
|
|
166
|
+
→ [ C ]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### OCR (Optional)
|
|
170
|
+
`diagram2code` can extract text labels using Tesseract OCR.
|
|
171
|
+
|
|
172
|
+
Requirements:
|
|
173
|
+
- System: `tesseract-ocr`
|
|
174
|
+
- Python: `pytesseract`
|
|
175
|
+
|
|
176
|
+
If OCR is unavailable, the pipeline still works and labels default to empty.
|
|
177
|
+
|
|
178
|
+
## Limitations
|
|
179
|
+
- Only rectangular nodes are supported
|
|
180
|
+
- Arrow detection is heuristic-based
|
|
181
|
+
- Complex curves, diagonals, or overlapping arrows may fail
|
|
182
|
+
- No text extraction from inside shapes
|
|
183
|
+
- Not intended for UML, BPMN, or hand-drawn diagrams
|
|
184
|
+
|
|
185
|
+
## Demo
|
|
186
|
+
|
|
187
|
+
Convert a simple diagram image into runnable Python code:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
diagram2code tests/fixtures/simple.png --out demo_outputs --extract-labels
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "diagram2code"
|
|
7
|
+
version = "0.1.2"
|
|
8
|
+
description = "Convert simple diagram images into runnable code (matplotlib/graphviz)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [{name = "Kazi Samiul Islam"}]
|
|
12
|
+
dependencies = ["opencv-python>=4.8", "numpy>=1.26", "matplotlib>=3.8"]
|
|
13
|
+
requires-python = ">=3.11,<3.14"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=7.0",
|
|
19
|
+
"ruff>=0.5.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
ocr = [
|
|
23
|
+
"pytesseract>=0.3.10",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
diagram2code = "diagram2code.cli:main"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools]
|
|
30
|
+
package-dir = {"" = "src"}
|
|
31
|
+
|
|
32
|
+
[tool.pytest.ini_options]
|
|
33
|
+
testpaths = ["tests"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff]
|
|
36
|
+
line-length = 100
|
|
37
|
+
target-version = "py310"
|
|
38
|
+
|
|
39
|
+
[tool.ruff.lint]
|
|
40
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import cv2
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def safe_print(msg: str) -> None:
|
|
11
|
+
# Avoid UnicodeEncodeError on Windows CI/console encodings
|
|
12
|
+
try:
|
|
13
|
+
print(msg)
|
|
14
|
+
except UnicodeEncodeError:
|
|
15
|
+
print(msg.encode("utf-8", errors="replace").decode("utf-8"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main(argv=None) -> int:
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
prog="diagram2code",
|
|
21
|
+
description="Convert simple diagram images into runnable code.",
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument("input", nargs="?", help="Path to input image")
|
|
24
|
+
parser.add_argument("--out", default="outputs", help="Output directory (default: outputs)")
|
|
25
|
+
parser.add_argument("--version", action="store_true", help="Print version")
|
|
26
|
+
|
|
27
|
+
# labels:
|
|
28
|
+
parser.add_argument("--labels", default=None, help="Path to labels JSON (optional)")
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--extract-labels",
|
|
31
|
+
action="store_true",
|
|
32
|
+
help="Extract labels via OCR and write labels.json into --out (optional; requires pytesseract + tesseract).",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# export:
|
|
36
|
+
parser.add_argument("--export", type=str, default=None, help="Export runnable bundle to directory")
|
|
37
|
+
|
|
38
|
+
args = parser.parse_args(argv)
|
|
39
|
+
|
|
40
|
+
if args.version:
|
|
41
|
+
try:
|
|
42
|
+
from importlib.metadata import version
|
|
43
|
+
safe_print(f"diagram2code {version('diagram2code')}")
|
|
44
|
+
except Exception:
|
|
45
|
+
safe_print("diagram2code (unknown version)")
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
if not args.input:
|
|
49
|
+
parser.print_help()
|
|
50
|
+
return 0
|
|
51
|
+
|
|
52
|
+
out_dir = Path(args.out)
|
|
53
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
# --- imports for pipeline ---
|
|
56
|
+
from diagram2code.vision.preprocess import preprocess_image
|
|
57
|
+
from diagram2code.vision.detect_shapes import detect_rectangles, draw_nodes_on_image
|
|
58
|
+
from diagram2code.vision.detect_arrows import detect_arrow_edges
|
|
59
|
+
from diagram2code.export_graph import save_graph_json
|
|
60
|
+
from diagram2code.export_matplotlib import generate_from_graph_json as gen_render_script
|
|
61
|
+
from diagram2code.export_program import generate_from_graph_json as gen_program
|
|
62
|
+
from diagram2code.labels import load_labels
|
|
63
|
+
|
|
64
|
+
# ============================
|
|
65
|
+
# Step 1: Preprocess
|
|
66
|
+
# ============================
|
|
67
|
+
result = preprocess_image(args.input, out_dir)
|
|
68
|
+
safe_print(f"Wrote: {result.output_path}")
|
|
69
|
+
|
|
70
|
+
# ============================
|
|
71
|
+
# Step 2: Detect nodes
|
|
72
|
+
# ============================
|
|
73
|
+
nodes = detect_rectangles(result.image_bin)
|
|
74
|
+
|
|
75
|
+
bgr = cv2.imread(str(args.input))
|
|
76
|
+
if bgr is None:
|
|
77
|
+
safe_print(f"Error: Could not read image: {args.input}")
|
|
78
|
+
return 1
|
|
79
|
+
|
|
80
|
+
debug_nodes = draw_nodes_on_image(bgr, nodes)
|
|
81
|
+
debug_nodes_path = out_dir / "debug_nodes.png"
|
|
82
|
+
cv2.imwrite(str(debug_nodes_path), debug_nodes)
|
|
83
|
+
safe_print(f"Detected nodes: {len(nodes)}")
|
|
84
|
+
safe_print(f"Wrote: {debug_nodes_path}")
|
|
85
|
+
|
|
86
|
+
# ============================
|
|
87
|
+
# Step 3: Detect arrows (edges)
|
|
88
|
+
# ============================
|
|
89
|
+
edges = detect_arrow_edges(result.image_bin, nodes, debug_path=out_dir / "debug_arrows.png")
|
|
90
|
+
safe_print(f"Wrote: {out_dir / 'debug_arrows.png'}")
|
|
91
|
+
|
|
92
|
+
# ============================
|
|
93
|
+
# Step 4: graph.json
|
|
94
|
+
# ============================
|
|
95
|
+
graph_path = save_graph_json(nodes, edges, out_dir / "graph.json")
|
|
96
|
+
safe_print(f"Wrote: {graph_path}")
|
|
97
|
+
|
|
98
|
+
# ============================
|
|
99
|
+
# Step 5: render_graph.py
|
|
100
|
+
# ============================
|
|
101
|
+
script_path = gen_render_script(out_dir / "graph.json", out_dir / "render_graph.py")
|
|
102
|
+
safe_print(f"Wrote: {script_path}")
|
|
103
|
+
|
|
104
|
+
# ============================
|
|
105
|
+
# Step 6: labels (either load from --labels, or OCR extract to out_dir/labels.json, or empty)
|
|
106
|
+
# ============================
|
|
107
|
+
labels_dict = {}
|
|
108
|
+
|
|
109
|
+
if args.labels:
|
|
110
|
+
labels_dict = load_labels(args.labels)
|
|
111
|
+
|
|
112
|
+
elif args.extract_labels:
|
|
113
|
+
# OCR is optional: only run when user asks for it
|
|
114
|
+
try:
|
|
115
|
+
from diagram2code.vision.extract_labels import extract_node_labels
|
|
116
|
+
|
|
117
|
+
labels_dict = extract_node_labels(bgr, nodes) or {}
|
|
118
|
+
except Exception as e:
|
|
119
|
+
# IMPORTANT: do NOT fail the whole CLI; just continue without labels
|
|
120
|
+
safe_print(f"Warning: OCR label extraction failed/unavailable: {type(e).__name__}: {e}")
|
|
121
|
+
labels_dict = {}
|
|
122
|
+
|
|
123
|
+
labels_out = out_dir / "labels.json"
|
|
124
|
+
labels_out.write_text(
|
|
125
|
+
json.dumps({str(k): v for k, v in labels_dict.items()}, indent=2),
|
|
126
|
+
encoding="utf-8",
|
|
127
|
+
)
|
|
128
|
+
safe_print(f"Wrote: {labels_out}")
|
|
129
|
+
|
|
130
|
+
# ============================
|
|
131
|
+
# Step 7: generated_program.py
|
|
132
|
+
# ============================
|
|
133
|
+
program_path = gen_program(out_dir / "graph.json", out_dir / "generated_program.py", labels=labels_dict)
|
|
134
|
+
safe_print(f"Wrote: {program_path}")
|
|
135
|
+
|
|
136
|
+
# ============================
|
|
137
|
+
# Step 8: optional export bundle
|
|
138
|
+
# ============================
|
|
139
|
+
export_dir = Path(args.export) if args.export else None
|
|
140
|
+
if export_dir:
|
|
141
|
+
export_dir.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
import shutil
|
|
144
|
+
|
|
145
|
+
# Copy required artifacts
|
|
146
|
+
shutil.copy2(out_dir / "graph.json", export_dir / "graph.json")
|
|
147
|
+
shutil.copy2(out_dir / "generated_program.py", export_dir / "generated_program.py")
|
|
148
|
+
|
|
149
|
+
# Optional artifacts if they exist
|
|
150
|
+
for name in [
|
|
151
|
+
"labels.json",
|
|
152
|
+
"debug_nodes.png",
|
|
153
|
+
"debug_arrows.png",
|
|
154
|
+
"preprocessed.png",
|
|
155
|
+
"render_graph.py",
|
|
156
|
+
"render_graph.png",
|
|
157
|
+
]:
|
|
158
|
+
p = out_dir / name
|
|
159
|
+
if p.exists():
|
|
160
|
+
shutil.copy2(p, export_dir / name)
|
|
161
|
+
|
|
162
|
+
# Run scripts
|
|
163
|
+
(export_dir / "run.ps1").write_text("python .\\generated_program.py\n", encoding="utf-8")
|
|
164
|
+
(export_dir / "run.sh").write_text(
|
|
165
|
+
"#!/usr/bin/env bash\nset -e\npython3 generated_program.py\n",
|
|
166
|
+
encoding="utf-8",
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# README
|
|
170
|
+
(export_dir / "README_EXPORT.md").write_text(
|
|
171
|
+
"# diagram2code export\n\n"
|
|
172
|
+
"This folder contains an exported runnable bundle.\n\n"
|
|
173
|
+
"## Run\n\n"
|
|
174
|
+
"### Windows (PowerShell)\n"
|
|
175
|
+
"```powershell\n"
|
|
176
|
+
".\\run.ps1\n"
|
|
177
|
+
"```\n\n"
|
|
178
|
+
"### macOS / Linux\n"
|
|
179
|
+
"```bash\n"
|
|
180
|
+
"bash run.sh\n"
|
|
181
|
+
"```\n",
|
|
182
|
+
encoding="utf-8",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
safe_print(f"Exported bundle to: {export_dir}")
|
|
186
|
+
|
|
187
|
+
return 0
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Tuple
|
|
6
|
+
|
|
7
|
+
from diagram2code.schema import Node
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def save_graph_json(nodes: List[Node], edges: List[Tuple[int, int]], out_path: str | Path) -> Path:
|
|
11
|
+
out_path = Path(out_path)
|
|
12
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
data = {
|
|
15
|
+
"nodes": [{"id": n.id, "bbox": list(n.bbox)} for n in nodes],
|
|
16
|
+
"edges": [{"from": a, "to": b} for a, b in edges],
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
out_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
20
|
+
return out_path
|