image-to-code 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +21 -24
- package/image_to_code/__init__.py +1 -0
- package/image_to_code/__pycache__/__init__.cpython-312.pyc +0 -0
- package/image_to_code/__pycache__/analyze.cpython-312.pyc +0 -0
- package/image_to_code/__pycache__/colors.cpython-312.pyc +0 -0
- package/image_to_code/__pycache__/layout.cpython-312.pyc +0 -0
- package/image_to_code/__pycache__/ocr.cpython-312.pyc +0 -0
- package/image_to_code/__pycache__/utils.cpython-312.pyc +0 -0
- package/image_to_code/analyze.py +224 -0
- package/image_to_code/colors.py +252 -0
- package/image_to_code/layout.py +150 -0
- package/image_to_code/ocr.py +448 -0
- package/image_to_code/utils.py +39 -0
- package/package.json +4 -2
- package/requirements.txt +2 -0
package/bin/cli.js
CHANGED
|
@@ -1,46 +1,41 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* image-to-code — npm wrapper.
|
|
4
|
-
*
|
|
4
|
+
* Bundles Python source. Installs pip deps on first run, then delegates.
|
|
5
5
|
*/
|
|
6
6
|
const { execSync, spawn } = require("child_process");
|
|
7
7
|
const path = require("path");
|
|
8
8
|
|
|
9
|
+
const MODULE_DIR = path.resolve(__dirname, "..");
|
|
9
10
|
const PYTHON_MODULE = "image_to_code";
|
|
10
|
-
const REQUIRED_DEPS = ["Pillow>=10.0.0", "pytesseract>=0.3.10"];
|
|
11
11
|
|
|
12
12
|
function checkPython() {
|
|
13
|
-
|
|
14
|
-
execSync("python --version", { stdio: "pipe", timeout: 10000 });
|
|
15
|
-
return "python";
|
|
16
|
-
} catch {
|
|
13
|
+
for (const cmd of ["python", "python3"]) {
|
|
17
14
|
try {
|
|
18
|
-
execSync(
|
|
19
|
-
return
|
|
15
|
+
execSync(`${cmd} --version`, { stdio: "pipe", timeout: 10000 });
|
|
16
|
+
return cmd;
|
|
20
17
|
} catch {
|
|
21
|
-
|
|
18
|
+
// try next
|
|
22
19
|
}
|
|
23
20
|
}
|
|
21
|
+
return null;
|
|
24
22
|
}
|
|
25
23
|
|
|
26
|
-
function
|
|
24
|
+
function ensurePipDeps(python) {
|
|
27
25
|
try {
|
|
28
|
-
execSync(`${python} -c "import
|
|
26
|
+
execSync(`${python} -c "import PIL; import pytesseract" 2>${process.platform === "win32" ? "nul" : "/dev/null"}`, {
|
|
29
27
|
stdio: "pipe",
|
|
30
28
|
timeout: 10000,
|
|
31
29
|
});
|
|
32
|
-
return
|
|
30
|
+
return; // deps already installed
|
|
33
31
|
} catch {
|
|
34
|
-
|
|
32
|
+
// install deps
|
|
35
33
|
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
stdio: "inherit",
|
|
42
|
-
timeout: 120000,
|
|
43
|
-
});
|
|
34
|
+
console.log("→ Installing Python dependencies (Pillow, pytesseract)...");
|
|
35
|
+
execSync(
|
|
36
|
+
`${python} -m pip install Pillow>=10.0.0 pytesseract>=0.3.10 --quiet`,
|
|
37
|
+
{ stdio: "inherit", timeout: 120000 }
|
|
38
|
+
);
|
|
44
39
|
}
|
|
45
40
|
|
|
46
41
|
function main() {
|
|
@@ -52,13 +47,15 @@ function main() {
|
|
|
52
47
|
process.exit(1);
|
|
53
48
|
}
|
|
54
49
|
|
|
55
|
-
|
|
56
|
-
installPackage(python);
|
|
57
|
-
}
|
|
50
|
+
ensurePipDeps(python);
|
|
58
51
|
|
|
59
52
|
const args = process.argv.slice(2);
|
|
60
53
|
const child = spawn(python, ["-m", PYTHON_MODULE + ".analyze", ...args], {
|
|
61
54
|
stdio: "inherit",
|
|
55
|
+
env: {
|
|
56
|
+
...process.env,
|
|
57
|
+
PYTHONPATH: MODULE_DIR + (process.env.PYTHONPATH ? path.delimiter + process.env.PYTHONPATH : ""),
|
|
58
|
+
},
|
|
62
59
|
});
|
|
63
60
|
child.on("exit", (code) => process.exit(code));
|
|
64
61
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Main orchestrator: runs color, layout, and OCR analysis, produces combined JSON/CSS report."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
from PIL import Image, ImageGrab
|
|
9
|
+
|
|
10
|
+
from .colors import extract_colors
|
|
11
|
+
from .layout import detect_layout
|
|
12
|
+
from .ocr import extract_text
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def analyze_image(image_path=None, clipboard=False, ocr_language="tha+eng",
|
|
16
|
+
min_confidence=70, sample_count=2000, quantize_tolerance=15,
|
|
17
|
+
full=False, json_output=False):
|
|
18
|
+
"""Run full analysis pipeline on an image."""
|
|
19
|
+
resolved_path = image_path
|
|
20
|
+
|
|
21
|
+
if clipboard:
|
|
22
|
+
try:
|
|
23
|
+
clip_img = ImageGrab.grabclipboard()
|
|
24
|
+
if clip_img is None:
|
|
25
|
+
print("Error: No image found in clipboard", file=sys.stderr)
|
|
26
|
+
sys.exit(1)
|
|
27
|
+
temp_dir = os.path.join(tempfile.gettempdir(), "image-to-code")
|
|
28
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
29
|
+
save_path = os.path.join(temp_dir, f"clipboard_{os.urandom(4).hex()}.png")
|
|
30
|
+
clip_img.save(save_path)
|
|
31
|
+
resolved_path = save_path
|
|
32
|
+
print(f"\n[Clipboard image saved to: {save_path}]")
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(f"Error reading clipboard: {e}", file=sys.stderr)
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
if not resolved_path or not os.path.exists(resolved_path):
|
|
38
|
+
print("Error: Image path does not exist", file=sys.stderr)
|
|
39
|
+
sys.exit(1)
|
|
40
|
+
|
|
41
|
+
print("Analyzing image...", file=sys.stderr)
|
|
42
|
+
|
|
43
|
+
colors = extract_colors(resolved_path, sample_count, quantize_tolerance)
|
|
44
|
+
is_photo = colors.get("isPhoto", False)
|
|
45
|
+
|
|
46
|
+
layout = detect_layout(resolved_path)
|
|
47
|
+
layout_type = layout.get("layoutType", "unknown")
|
|
48
|
+
|
|
49
|
+
ocr_result = extract_text(resolved_path, ocr_language, min_confidence)
|
|
50
|
+
ocr_boxes = ocr_result.get("boxes", [])
|
|
51
|
+
ocr_raw_text = ocr_result.get("rawText", "")
|
|
52
|
+
ocr_by_zone = ocr_result.get("byZone", {})
|
|
53
|
+
|
|
54
|
+
w = colors.get("imageWidth", 0)
|
|
55
|
+
h = colors.get("imageHeight", 0)
|
|
56
|
+
radius_val = "8px" if "mobile" in layout_type else "6px"
|
|
57
|
+
vp_val = "width=device-width, initial-scale=1.0" if w <= 430 else ""
|
|
58
|
+
mq_val = "mobile" if w <= 430 else ("tablet" if w <= 768 else "desktop")
|
|
59
|
+
|
|
60
|
+
# Button detection: large boxes in lower area
|
|
61
|
+
buttons = []
|
|
62
|
+
search_top = h * 0.25 if h else 0
|
|
63
|
+
for box in ocr_boxes:
|
|
64
|
+
if box.get("w", 0) > 30 and box.get("h", 0) > 20 and box.get("conf", 0) > 80 and box.get("y", 0) > search_top:
|
|
65
|
+
buttons.append({
|
|
66
|
+
"text": box.get("text", ""),
|
|
67
|
+
"x": box.get("x", 0),
|
|
68
|
+
"y": box.get("y", 0),
|
|
69
|
+
"w": box.get("w", 0),
|
|
70
|
+
"h": box.get("h", 0),
|
|
71
|
+
"zone": box.get("zone", ""),
|
|
72
|
+
"conf": box.get("conf", 0),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
gradient_info = colors.get("gradient")
|
|
76
|
+
palette_data = colors.get("palette", [])
|
|
77
|
+
surface_list = colors.get("surfaces", [])
|
|
78
|
+
border_color = colors.get("border")
|
|
79
|
+
|
|
80
|
+
result = {
|
|
81
|
+
"imageType": "photo" if is_photo else "ui",
|
|
82
|
+
"image": {
|
|
83
|
+
"path": resolved_path,
|
|
84
|
+
"width": w,
|
|
85
|
+
"height": h,
|
|
86
|
+
"aspect": round(w / h, 3) if h else 0,
|
|
87
|
+
},
|
|
88
|
+
"colors": {
|
|
89
|
+
"background": colors.get("background", "#FFFFFF"),
|
|
90
|
+
"text": colors.get("text", "#1F2937"),
|
|
91
|
+
"accent": colors.get("button", "#4F46E5"),
|
|
92
|
+
"border": border_color if border_color else "#E5E7EB",
|
|
93
|
+
"palette": palette_data,
|
|
94
|
+
"surfaces": surface_list,
|
|
95
|
+
"button": colors.get("button"),
|
|
96
|
+
"textSecondary": colors.get("textSecondary"),
|
|
97
|
+
"contrastRatio": colors.get("contrastRatio", 0),
|
|
98
|
+
"harmony": colors.get("harmony", ""),
|
|
99
|
+
"gradient": gradient_info,
|
|
100
|
+
},
|
|
101
|
+
"layout": {
|
|
102
|
+
"type": layout_type,
|
|
103
|
+
"sections": layout.get("sections", []),
|
|
104
|
+
"columns": layout.get("columns", []),
|
|
105
|
+
"components": layout.get("components", []),
|
|
106
|
+
},
|
|
107
|
+
"text": {
|
|
108
|
+
"words": ocr_result.get("words", 0),
|
|
109
|
+
"boxes": ocr_boxes,
|
|
110
|
+
"buttons": buttons,
|
|
111
|
+
"fullText": ocr_raw_text,
|
|
112
|
+
"byZone": ocr_by_zone,
|
|
113
|
+
},
|
|
114
|
+
"css": {
|
|
115
|
+
"customProperties": {
|
|
116
|
+
"--bg": colors.get("background", "#FFFFFF"),
|
|
117
|
+
"--surface": surface_list[0] if surface_list else colors.get("background", "#FFFFFF"),
|
|
118
|
+
"--text": colors.get("text", "#1F2937"),
|
|
119
|
+
"--primary": colors.get("button", "#4F46E5"),
|
|
120
|
+
"--border": border_color if border_color else "#E5E7EB",
|
|
121
|
+
"--radius": radius_val,
|
|
122
|
+
},
|
|
123
|
+
"surfaces": surface_list,
|
|
124
|
+
"harmony": colors.get("harmony", ""),
|
|
125
|
+
"contrastRatio": colors.get("contrastRatio", 0),
|
|
126
|
+
"gradient": gradient_info,
|
|
127
|
+
"viewport": vp_val,
|
|
128
|
+
"mediaQuery": mq_val,
|
|
129
|
+
},
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if json_output or full:
|
|
133
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
134
|
+
|
|
135
|
+
if not json_output:
|
|
136
|
+
print()
|
|
137
|
+
print("=" * 72)
|
|
138
|
+
print(" IMAGE ANALYSIS REPORT")
|
|
139
|
+
print("=" * 72)
|
|
140
|
+
print()
|
|
141
|
+
print(f"Image: {w}x{h} ({layout_type}, {'photo' if is_photo else 'UI'})")
|
|
142
|
+
print()
|
|
143
|
+
print("--- Colors ---")
|
|
144
|
+
print(f" Background: {colors.get('background', '')}")
|
|
145
|
+
print(f" Surfaces: {', '.join(str(s) for s in surface_list)}")
|
|
146
|
+
print(f" Text: {colors.get('text', '')} (contrast: {colors.get('contrastRatio', 0)}:1)")
|
|
147
|
+
if colors.get("textSecondary"):
|
|
148
|
+
print(f" Text(2nd): {colors['textSecondary']}")
|
|
149
|
+
print(f" Button: {colors.get('button', '')}")
|
|
150
|
+
print(f" Border: {colors.get('border', '')}")
|
|
151
|
+
print(f" Harmony: {colors.get('harmony', '')}")
|
|
152
|
+
print(f" Palette: {len(palette_data)} unique colors")
|
|
153
|
+
print()
|
|
154
|
+
print("--- Layout Components ---")
|
|
155
|
+
for c in layout.get("components", []):
|
|
156
|
+
print(f" {c.get('type', ''):16} y={c.get('y_pct', 0):2}% h={c.get('h_pct', 0):2}% color={c.get('color', '')}")
|
|
157
|
+
print()
|
|
158
|
+
print(f"--- OCR Text ({ocr_result.get('words', 0)} words >= {min_confidence}%) ---")
|
|
159
|
+
if ocr_raw_text:
|
|
160
|
+
print(ocr_raw_text)
|
|
161
|
+
else:
|
|
162
|
+
print(f" [top] {ocr_by_zone.get('top', '')}")
|
|
163
|
+
print(f" [middle] {ocr_by_zone.get('middle', '')}")
|
|
164
|
+
print(f" [bottom] {ocr_by_zone.get('bottom', '')}")
|
|
165
|
+
print()
|
|
166
|
+
|
|
167
|
+
if buttons:
|
|
168
|
+
print(f"--- UI Buttons ({len(buttons)}) ---")
|
|
169
|
+
for b in buttons:
|
|
170
|
+
print(f" [button] {b.get('text', '')} (z={b.get('zone', '')}, y={b.get('y', 0)}, c={b.get('conf', 0)}%)")
|
|
171
|
+
print()
|
|
172
|
+
|
|
173
|
+
print("--- CSS Recommendations ---")
|
|
174
|
+
for key, val in result["css"]["customProperties"].items():
|
|
175
|
+
print(f" {key}: {val}")
|
|
176
|
+
if gradient_info:
|
|
177
|
+
g = gradient_info
|
|
178
|
+
print(f" gradient: {g.get('type', '')} {' -> '.join(str(c) for c in g.get('colors', []))}")
|
|
179
|
+
print()
|
|
180
|
+
print("=" * 72)
|
|
181
|
+
|
|
182
|
+
if full:
|
|
183
|
+
print()
|
|
184
|
+
print("=== Full JSON Output ===")
|
|
185
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
186
|
+
|
|
187
|
+
# Clipboard temp cleanup
|
|
188
|
+
if clipboard and os.path.exists(resolved_path):
|
|
189
|
+
try:
|
|
190
|
+
os.remove(resolved_path)
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main():
|
|
196
|
+
parser = argparse.ArgumentParser(description="Image-to-Code: Extract structured data from images")
|
|
197
|
+
parser.add_argument("image_path", nargs="?", help="Path to image file")
|
|
198
|
+
parser.add_argument("--clipboard", "-c", action="store_true", help="Read image from clipboard")
|
|
199
|
+
parser.add_argument("--lang", "-l", default="tha+eng", help="Tesseract language (default: tha+eng)")
|
|
200
|
+
parser.add_argument("--min-confidence", "-m", type=int, default=70, help="Minimum OCR confidence (default: 70)")
|
|
201
|
+
parser.add_argument("--sample-count", type=int, default=2000, help="Color sample count (default: 2000)")
|
|
202
|
+
parser.add_argument("--quantize-tolerance", type=int, default=15, help="Color quantize tolerance (default: 15)")
|
|
203
|
+
parser.add_argument("--full", "-f", action="store_true", help="Show full JSON output")
|
|
204
|
+
parser.add_argument("--json", "-j", action="store_true", help="Output JSON only")
|
|
205
|
+
args = parser.parse_args()
|
|
206
|
+
|
|
207
|
+
if not args.image_path and not args.clipboard:
|
|
208
|
+
parser.print_help()
|
|
209
|
+
sys.exit(1)
|
|
210
|
+
|
|
211
|
+
analyze_image(
|
|
212
|
+
image_path=args.image_path,
|
|
213
|
+
clipboard=args.clipboard,
|
|
214
|
+
ocr_language=args.lang,
|
|
215
|
+
min_confidence=args.min_confidence,
|
|
216
|
+
sample_count=args.sample_count,
|
|
217
|
+
quantize_tolerance=args.quantize_tolerance,
|
|
218
|
+
full=args.full,
|
|
219
|
+
json_output=args.json,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
if __name__ == "__main__":
|
|
224
|
+
main()
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Color extraction: dominant colors, semantic roles, gradient, harmony."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from PIL import Image
|
|
5
|
+
from .utils import hex_to_rgb, rgb_to_hex, luminance, contrast_ratio, saturation
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_colors(image_path, sample_count=2000, quantize_tolerance=15):
|
|
9
|
+
img = Image.open(image_path).convert("RGB")
|
|
10
|
+
w, h = img.size
|
|
11
|
+
|
|
12
|
+
coarse_colors = set()
|
|
13
|
+
lum_vals = []
|
|
14
|
+
c_step_x = max(1, w // 40)
|
|
15
|
+
c_step_y = max(1, h // 40)
|
|
16
|
+
for y in range(0, h, c_step_y):
|
|
17
|
+
for x in range(0, w, c_step_x):
|
|
18
|
+
px = img.getpixel((x, y))
|
|
19
|
+
coarse_hex = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
|
|
20
|
+
coarse_colors.add(coarse_hex)
|
|
21
|
+
lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
|
|
22
|
+
|
|
23
|
+
distinct_color_count = len(coarse_colors)
|
|
24
|
+
|
|
25
|
+
edge_count = total_pairs = 0
|
|
26
|
+
for i in range(0, len(lum_vals) - 1, 2):
|
|
27
|
+
if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
|
|
28
|
+
edge_count += 1
|
|
29
|
+
total_pairs += 1
|
|
30
|
+
edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
|
|
31
|
+
|
|
32
|
+
sorted_lums = sorted(lum_vals)
|
|
33
|
+
iqr = 0
|
|
34
|
+
if len(sorted_lums) >= 4:
|
|
35
|
+
q1 = sorted_lums[len(sorted_lums) // 4]
|
|
36
|
+
q3 = sorted_lums[len(sorted_lums) * 3 // 4]
|
|
37
|
+
iqr = q3 - q1
|
|
38
|
+
lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
|
|
39
|
+
|
|
40
|
+
is_photo = (
|
|
41
|
+
distinct_color_count > 50
|
|
42
|
+
or (distinct_color_count >= 15 and iqr < 80)
|
|
43
|
+
or (lum_range > 150 and edge_ratio < 0.3)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
color_counts = {}
|
|
47
|
+
step_x = max(1, int(w / math.sqrt(sample_count * w / h))) if w and h else 1
|
|
48
|
+
step_y = max(1, int(h / math.sqrt(sample_count * h / w))) if w and h else 1
|
|
49
|
+
total_samples = 0
|
|
50
|
+
for y in range(0, h, step_y):
|
|
51
|
+
for x in range(0, w, step_x):
|
|
52
|
+
px = img.getpixel((x, y))
|
|
53
|
+
rq = round(px[0] / quantize_tolerance) * quantize_tolerance
|
|
54
|
+
gq = round(px[1] / quantize_tolerance) * quantize_tolerance
|
|
55
|
+
bq = round(px[2] / quantize_tolerance) * quantize_tolerance
|
|
56
|
+
rq = max(0, min(255, rq))
|
|
57
|
+
gq = max(0, min(255, gq))
|
|
58
|
+
bq = max(0, min(255, bq))
|
|
59
|
+
hex_c = f"#{rq:02X}{gq:02X}{bq:02X}"
|
|
60
|
+
color_counts[hex_c] = color_counts.get(hex_c, 0) + 1
|
|
61
|
+
total_samples += 1
|
|
62
|
+
|
|
63
|
+
sorted_palette = sorted(color_counts.items(), key=lambda x: x[1], reverse=True)
|
|
64
|
+
total = max(1, total_samples)
|
|
65
|
+
palette = []
|
|
66
|
+
color_meta = []
|
|
67
|
+
for hex_c, cnt in sorted_palette:
|
|
68
|
+
pct = round(cnt / total * 100, 1)
|
|
69
|
+
palette.append({"hex": hex_c, "pct": pct, "count": cnt})
|
|
70
|
+
r, g, b = hex_to_rgb(hex_c)
|
|
71
|
+
lum = luminance(r, g, b)
|
|
72
|
+
sat = saturation(r, g, b)
|
|
73
|
+
color_meta.append(
|
|
74
|
+
{"hex": hex_c, "pct": pct, "r": r, "g": g, "b": b, "lum": lum, "sat": round(sat, 1)}
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
bg_color = palette[0]["hex"] if palette else "#FFFFFF"
|
|
78
|
+
bg_meta = color_meta[0] if color_meta else None
|
|
79
|
+
bg_lum = bg_meta["lum"] if bg_meta else 255
|
|
80
|
+
|
|
81
|
+
surfaces = []
|
|
82
|
+
text_primary = None
|
|
83
|
+
text_secondary = None
|
|
84
|
+
button_color = None
|
|
85
|
+
border_color = None
|
|
86
|
+
border_candidates = []
|
|
87
|
+
|
|
88
|
+
if not is_photo:
|
|
89
|
+
text_candidates = []
|
|
90
|
+
button_candidates = []
|
|
91
|
+
|
|
92
|
+
for cm in color_meta:
|
|
93
|
+
is_bg = cm["hex"] == bg_color
|
|
94
|
+
cr = contrast_ratio(bg_lum, cm["lum"])
|
|
95
|
+
lum_diff = abs(cm["lum"] - bg_lum)
|
|
96
|
+
|
|
97
|
+
if not is_bg and cm["pct"] > 1 and lum_diff < 40:
|
|
98
|
+
surfaces.append(cm)
|
|
99
|
+
if not is_bg and cr > 1.3 and cm["pct"] < 2 and cm["pct"] > 0.05 and lum_diff > 2:
|
|
100
|
+
border_candidates.append({"hex": cm["hex"], "contrast": cr, "lum_diff": lum_diff})
|
|
101
|
+
if not is_bg and cm["sat"] > 20 and cm["lum"] > 30 and cm["pct"] < 5 and cm["pct"] > 0.1:
|
|
102
|
+
button_candidates.append({"hex": cm["hex"], "sat": cm["sat"], "contrast": cr})
|
|
103
|
+
if not is_bg and cr > 3 and cm["pct"] < 3:
|
|
104
|
+
text_candidates.append({"hex": cm["hex"], "contrast": cr, "lum": cm["lum"]})
|
|
105
|
+
|
|
106
|
+
surfaces.sort(key=lambda x: x["pct"], reverse=True)
|
|
107
|
+
|
|
108
|
+
if text_candidates:
|
|
109
|
+
text_candidates.sort(key=lambda x: x["contrast"], reverse=True)
|
|
110
|
+
text_primary = text_candidates[0]["hex"]
|
|
111
|
+
if len(text_candidates) > 1:
|
|
112
|
+
text_secondary = text_candidates[1]["hex"]
|
|
113
|
+
else:
|
|
114
|
+
text_primary = "#1F2937" if bg_lum > 128 else "#FFFFFF"
|
|
115
|
+
|
|
116
|
+
if button_candidates:
|
|
117
|
+
button_candidates.sort(key=lambda x: x["sat"], reverse=True)
|
|
118
|
+
button_color = button_candidates[0]["hex"]
|
|
119
|
+
if border_candidates:
|
|
120
|
+
border_candidates.sort(key=lambda x: x["lum_diff"], reverse=True)
|
|
121
|
+
border_color = border_candidates[0]["hex"]
|
|
122
|
+
else:
|
|
123
|
+
for cm in color_meta:
|
|
124
|
+
is_bg = cm["hex"] == bg_color
|
|
125
|
+
lum_diff = abs(cm["lum"] - bg_lum)
|
|
126
|
+
if not is_bg and cm["pct"] > 0.5 and lum_diff < 50:
|
|
127
|
+
surfaces.append(cm)
|
|
128
|
+
surfaces.sort(key=lambda x: x["pct"], reverse=True)
|
|
129
|
+
text_primary = "#1F2937" if bg_lum > 128 else "#FFFFFF"
|
|
130
|
+
|
|
131
|
+
button_candidates = [
|
|
132
|
+
cm
|
|
133
|
+
for cm in color_meta
|
|
134
|
+
if cm["sat"] > 20 and cm["lum"] > 30 and cm["pct"] > 0.1 and cm["pct"] < 5 and cm["hex"] != bg_color
|
|
135
|
+
]
|
|
136
|
+
if button_candidates:
|
|
137
|
+
button_candidates.sort(key=lambda x: x["sat"], reverse=True)
|
|
138
|
+
button_color = button_candidates[0]["hex"]
|
|
139
|
+
|
|
140
|
+
for cm in color_meta:
|
|
141
|
+
is_bg = cm["hex"] == bg_color
|
|
142
|
+
cr = contrast_ratio(bg_lum, cm["lum"])
|
|
143
|
+
lum_diff = abs(cm["lum"] - bg_lum)
|
|
144
|
+
if not is_bg and cr > 1.3 and cm["pct"] < 2 and cm["pct"] > 0.05 and lum_diff > 2:
|
|
145
|
+
border_candidates.append({"hex": cm["hex"], "contrast": cr, "lum_diff": lum_diff})
|
|
146
|
+
if border_candidates:
|
|
147
|
+
border_candidates.sort(key=lambda x: x["lum_diff"], reverse=True)
|
|
148
|
+
border_color = border_candidates[0]["hex"]
|
|
149
|
+
|
|
150
|
+
has_gradient = False
|
|
151
|
+
gradient_type = "none"
|
|
152
|
+
gradient_colors = [bg_color]
|
|
153
|
+
grad_threshold = 30 if is_photo else 50
|
|
154
|
+
|
|
155
|
+
def _strip_lum(y_start, y_end, step=2):
|
|
156
|
+
tl = cnt = 0
|
|
157
|
+
for yy in range(y_start, min(y_end, h), step):
|
|
158
|
+
for xx in range(0, w, 20):
|
|
159
|
+
px = img.getpixel((xx, yy))
|
|
160
|
+
tl += luminance(px[0], px[1], px[2])
|
|
161
|
+
cnt += 1
|
|
162
|
+
return tl / cnt if cnt else 0
|
|
163
|
+
|
|
164
|
+
top_lum = _strip_lum(0, min(50, h))
|
|
165
|
+
mid_lum = _strip_lum(max(0, h // 2 - 25), min(h, h // 2 + 25))
|
|
166
|
+
bot_lum = _strip_lum(max(0, h - 50), h)
|
|
167
|
+
grad_range = max(abs(top_lum - bot_lum), abs(top_lum - mid_lum))
|
|
168
|
+
|
|
169
|
+
if grad_range > grad_threshold:
|
|
170
|
+
has_gradient = True
|
|
171
|
+
gradient_type = "vertical-3tone" if (abs(top_lum - mid_lum) > 15 and abs(mid_lum - bot_lum) > 15) else "vertical"
|
|
172
|
+
|
|
173
|
+
def _strip_color(y_pos):
|
|
174
|
+
r_sum = g_sum = b_sum = cnt = 0
|
|
175
|
+
for xx in range(w // 3, w * 2 // 3, 10):
|
|
176
|
+
px = img.getpixel((xx, y_pos))
|
|
177
|
+
r_sum += px[0]; g_sum += px[1]; b_sum += px[2]; cnt += 1
|
|
178
|
+
return (r_sum // cnt, g_sum // cnt, b_sum // cnt) if cnt else None
|
|
179
|
+
|
|
180
|
+
gradient_colors = []
|
|
181
|
+
for yp in (5, h // 2, h - 5):
|
|
182
|
+
c = _strip_color(yp)
|
|
183
|
+
if c:
|
|
184
|
+
gradient_colors.append(rgb_to_hex(*c))
|
|
185
|
+
|
|
186
|
+
img.close()
|
|
187
|
+
|
|
188
|
+
tr, tg, tb = hex_to_rgb(text_primary)
|
|
189
|
+
text_lum = luminance(tr, tg, tb)
|
|
190
|
+
br, bg, bb = hex_to_rgb(bg_color)
|
|
191
|
+
bg_lum_calc = luminance(br, bg, bb)
|
|
192
|
+
contrast_ratio_val = round(contrast_ratio(text_lum, bg_lum_calc), 1)
|
|
193
|
+
|
|
194
|
+
hues = []
|
|
195
|
+
for cm in color_meta:
|
|
196
|
+
if cm["pct"] <= 0.5:
|
|
197
|
+
continue
|
|
198
|
+
r, g, b = cm["r"], cm["g"], cm["b"]
|
|
199
|
+
mx = max(r, g, b)
|
|
200
|
+
mn = min(r, g, b)
|
|
201
|
+
if mx == mn:
|
|
202
|
+
continue
|
|
203
|
+
d = mx - mn
|
|
204
|
+
if mx == r:
|
|
205
|
+
hv = ((g - b) / d) % 6
|
|
206
|
+
elif mx == g:
|
|
207
|
+
hv = (b - r) / d + 2
|
|
208
|
+
else:
|
|
209
|
+
hv = (r - g) / d + 4
|
|
210
|
+
hd = round(hv * 60)
|
|
211
|
+
if hd < 0:
|
|
212
|
+
hd += 360
|
|
213
|
+
hues.append(hd)
|
|
214
|
+
|
|
215
|
+
hue_range = 0
|
|
216
|
+
if len(hues) > 1:
|
|
217
|
+
sh = sorted(hues)
|
|
218
|
+
mg = max(sh[i + 1] - sh[i] for i in range(len(sh) - 1))
|
|
219
|
+
wg = 360 - sh[-1] + sh[0]
|
|
220
|
+
if wg > mg:
|
|
221
|
+
mg = wg
|
|
222
|
+
hue_range = 360 - mg
|
|
223
|
+
|
|
224
|
+
if hue_range <= 30:
|
|
225
|
+
harmony = "monochromatic"
|
|
226
|
+
elif hue_range <= 60:
|
|
227
|
+
harmony = "analogous"
|
|
228
|
+
elif 150 <= hue_range <= 210:
|
|
229
|
+
harmony = "complementary"
|
|
230
|
+
else:
|
|
231
|
+
harmony = "neutral"
|
|
232
|
+
|
|
233
|
+
surface_colors = [s["hex"] for s in surfaces[:3]] or [bg_color]
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
"imageWidth": w,
|
|
237
|
+
"imageHeight": h,
|
|
238
|
+
"isPhoto": is_photo,
|
|
239
|
+
"distinctColors": distinct_color_count,
|
|
240
|
+
"totalColors": len(palette),
|
|
241
|
+
"samples": total_samples,
|
|
242
|
+
"background": bg_color,
|
|
243
|
+
"surfaces": surface_colors,
|
|
244
|
+
"text": text_primary,
|
|
245
|
+
"textSecondary": text_secondary,
|
|
246
|
+
"button": button_color,
|
|
247
|
+
"border": border_color,
|
|
248
|
+
"contrastRatio": contrast_ratio_val,
|
|
249
|
+
"harmony": harmony,
|
|
250
|
+
"gradient": {"type": gradient_type, "colors": gradient_colors} if has_gradient else None,
|
|
251
|
+
"palette": palette[:20],
|
|
252
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Layout detection: horizontal sections, vertical columns, component labeling."""
|
|
2
|
+
|
|
3
|
+
from PIL import Image
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _dominant_color(img, x1, y1, x2, y2, step=8):
|
|
7
|
+
counts = {}
|
|
8
|
+
for y in range(y1, y2, step):
|
|
9
|
+
for x in range(x1, x2, step):
|
|
10
|
+
px = img.getpixel((x, y))
|
|
11
|
+
rq = round(px[0] / 20) * 20
|
|
12
|
+
gq = round(px[1] / 20) * 20
|
|
13
|
+
bq = round(px[2] / 20) * 20
|
|
14
|
+
rq = max(0, min(255, rq))
|
|
15
|
+
gq = max(0, min(255, gq))
|
|
16
|
+
bq = max(0, min(255, bq))
|
|
17
|
+
hex_c = f"#{rq:02X}{gq:02X}{bq:02X}"
|
|
18
|
+
counts[hex_c] = counts.get(hex_c, 0) + 1
|
|
19
|
+
if not counts:
|
|
20
|
+
return "#000000"
|
|
21
|
+
return max(counts, key=counts.get)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def detect_layout(image_path):
|
|
25
|
+
img = Image.open(image_path).convert("RGB")
|
|
26
|
+
w, h = img.size
|
|
27
|
+
|
|
28
|
+
coarse_colors = set()
|
|
29
|
+
lum_vals = []
|
|
30
|
+
for y in range(0, h, max(1, h // 30)):
|
|
31
|
+
for x in range(0, w, max(1, w // 30)):
|
|
32
|
+
px = img.getpixel((x, y))
|
|
33
|
+
hex_c = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
|
|
34
|
+
coarse_colors.add(hex_c)
|
|
35
|
+
lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
|
|
36
|
+
|
|
37
|
+
edge_count = total_pairs = 0
|
|
38
|
+
for i in range(0, len(lum_vals) - 1, 2):
|
|
39
|
+
if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
|
|
40
|
+
edge_count += 1
|
|
41
|
+
total_pairs += 1
|
|
42
|
+
edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
|
|
43
|
+
|
|
44
|
+
sorted_lums = sorted(lum_vals)
|
|
45
|
+
iqr = 0
|
|
46
|
+
if len(sorted_lums) >= 4:
|
|
47
|
+
q1 = sorted_lums[len(sorted_lums) // 4]
|
|
48
|
+
q3 = sorted_lums[len(sorted_lums) * 3 // 4]
|
|
49
|
+
iqr = q3 - q1
|
|
50
|
+
lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
|
|
51
|
+
|
|
52
|
+
is_photo = (
|
|
53
|
+
len(coarse_colors) > 50
|
|
54
|
+
or (len(coarse_colors) >= 15 and iqr < 80)
|
|
55
|
+
or (lum_range > 150 and edge_ratio < 0.3)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
scan_resolution = max(8, h // 60) if is_photo else 4
|
|
59
|
+
|
|
60
|
+
sections = []
|
|
61
|
+
prev_color = ""
|
|
62
|
+
section_start = 0
|
|
63
|
+
|
|
64
|
+
for y in range(0, h, scan_resolution):
|
|
65
|
+
end_y = min(h, y + scan_resolution)
|
|
66
|
+
row_color = _dominant_color(img, 0, y, w, end_y, 8)
|
|
67
|
+
if row_color != prev_color and prev_color != "":
|
|
68
|
+
sections.append({"y": section_start, "h": y - section_start, "color": prev_color})
|
|
69
|
+
section_start = y
|
|
70
|
+
prev_color = row_color
|
|
71
|
+
|
|
72
|
+
if h - section_start > 2:
|
|
73
|
+
sections.append({"y": section_start, "h": h - section_start, "color": prev_color})
|
|
74
|
+
|
|
75
|
+
columns = []
|
|
76
|
+
if not is_photo:
|
|
77
|
+
min_col_w = int(w * 0.08)
|
|
78
|
+
x_step = max(1, w // 80)
|
|
79
|
+
prev_col_color = ""
|
|
80
|
+
col_start = 0
|
|
81
|
+
for x in range(0, w, x_step):
|
|
82
|
+
end_x = min(w, x + x_step)
|
|
83
|
+
col_color = _dominant_color(img, x, 0, end_x, h, 10)
|
|
84
|
+
if col_color != prev_col_color and prev_col_color != "":
|
|
85
|
+
col_w = x - col_start
|
|
86
|
+
if col_w >= min_col_w:
|
|
87
|
+
columns.append({"x": col_start, "w": col_w, "color": prev_col_color})
|
|
88
|
+
col_start = x
|
|
89
|
+
prev_col_color = col_color
|
|
90
|
+
if w - col_start > min_col_w:
|
|
91
|
+
columns.append({"x": col_start, "w": w - col_start, "color": prev_col_color})
|
|
92
|
+
|
|
93
|
+
min_height = max(20, int(h * 0.03)) if is_photo else max(8, int(h * 0.02))
|
|
94
|
+
merged_sections = []
|
|
95
|
+
buffer = None
|
|
96
|
+
for s in sections:
|
|
97
|
+
if s["h"] < min_height:
|
|
98
|
+
if buffer is not None:
|
|
99
|
+
buffer["h"] += s["h"]
|
|
100
|
+
else:
|
|
101
|
+
buffer = dict(s)
|
|
102
|
+
else:
|
|
103
|
+
if buffer is not None:
|
|
104
|
+
s["y"] = buffer["y"]
|
|
105
|
+
s["h"] += buffer["h"]
|
|
106
|
+
buffer = None
|
|
107
|
+
merged_sections.append(s)
|
|
108
|
+
if buffer is not None:
|
|
109
|
+
merged_sections.append(buffer)
|
|
110
|
+
|
|
111
|
+
components = []
|
|
112
|
+
for s in merged_sections:
|
|
113
|
+
rel_y = round(s["y"] / h * 100)
|
|
114
|
+
rel_h = round(s["h"] / h * 100)
|
|
115
|
+
|
|
116
|
+
if rel_y < 3:
|
|
117
|
+
label = "hero-padding" if rel_h > 30 else "top-segment"
|
|
118
|
+
elif rel_y + rel_h > 97:
|
|
119
|
+
label = "bottom-segment"
|
|
120
|
+
elif rel_h > 50:
|
|
121
|
+
label = "large-segment"
|
|
122
|
+
elif rel_h < 5:
|
|
123
|
+
label = "thin-band"
|
|
124
|
+
else:
|
|
125
|
+
label = "mid-segment"
|
|
126
|
+
|
|
127
|
+
components.append(
|
|
128
|
+
{
|
|
129
|
+
"type": label,
|
|
130
|
+
"y_pct": rel_y,
|
|
131
|
+
"h_pct": rel_h,
|
|
132
|
+
"y_px": s["y"],
|
|
133
|
+
"h_px": s["h"],
|
|
134
|
+
"color": s["color"],
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
img.close()
|
|
139
|
+
|
|
140
|
+
layout_type = "mobile" if w <= 430 else ("landscape/desktop" if w > h else "tablet/mobile")
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"imageWidth": w,
|
|
144
|
+
"imageHeight": h,
|
|
145
|
+
"isPhoto": is_photo,
|
|
146
|
+
"layoutType": layout_type,
|
|
147
|
+
"sections": merged_sections,
|
|
148
|
+
"columns": columns,
|
|
149
|
+
"components": components,
|
|
150
|
+
}
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""OCR module: Tesseract-based text extraction with preprocessing, footer/branding scans, Thai merging."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
import subprocess
|
|
7
|
+
import tempfile
|
|
8
|
+
from PIL import Image, ImageFilter, ImageOps
|
|
9
|
+
import pytesseract
|
|
10
|
+
|
|
11
|
+
from .utils import merge_thai_text
|
|
12
|
+
|
|
13
|
+
# Auto-detect tesseract binary
|
|
14
|
+
_TESS_CMD = None
|
|
15
|
+
for _candidate in [
|
|
16
|
+
"tesseract",
|
|
17
|
+
r"C:\Program Files\Tesseract-OCR\tesseract.exe",
|
|
18
|
+
r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
|
|
19
|
+
"/usr/bin/tesseract",
|
|
20
|
+
"/usr/local/bin/tesseract",
|
|
21
|
+
"/opt/homebrew/bin/tesseract",
|
|
22
|
+
]:
|
|
23
|
+
try:
|
|
24
|
+
subprocess.run([_candidate, "--version"], capture_output=True, timeout=5)
|
|
25
|
+
_TESS_CMD = _candidate
|
|
26
|
+
break
|
|
27
|
+
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
if _TESS_CMD:
|
|
31
|
+
pytesseract.pytesseract.tesseract_cmd = _TESS_CMD
|
|
32
|
+
|
|
33
|
+
# Auto-configure tessdata with language download
|
|
34
|
+
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
35
|
+
_USER_TESSDATA = os.path.join(_SCRIPT_DIR, "..", "tessdata")
|
|
36
|
+
os.makedirs(_USER_TESSDATA, exist_ok=True)
|
|
37
|
+
os.environ["TESSDATA_PREFIX"] = _USER_TESSDATA
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _ensure_lang_data(language):
|
|
41
|
+
"""Download missing traineddata files from GitHub."""
|
|
42
|
+
for lang in language.split("+"):
|
|
43
|
+
lang_file = os.path.join(_USER_TESSDATA, f"{lang}.traineddata")
|
|
44
|
+
if not os.path.exists(lang_file):
|
|
45
|
+
import urllib.request
|
|
46
|
+
url = f"https://github.com/tesseract-ocr/tessdata/raw/main/{lang}.traineddata"
|
|
47
|
+
print(f"Downloading {lang} language data...", file=sys.stderr)
|
|
48
|
+
try:
|
|
49
|
+
urllib.request.urlretrieve(url, lang_file)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(f"Warning: failed to download {lang}: {e}", file=sys.stderr)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _histogram_stretch(img):
|
|
55
|
+
"""Apply histogram stretch to enhance contrast."""
|
|
56
|
+
gray = img.convert("L")
|
|
57
|
+
pixels = list(gray.getdata())
|
|
58
|
+
min_l = min(pixels)
|
|
59
|
+
max_l = max(pixels)
|
|
60
|
+
rng = max(1, max_l - min_l)
|
|
61
|
+
result = Image.new("L", img.size)
|
|
62
|
+
result.putdata([max(0, min(255, int((p - min_l) / rng * 255))) for p in pixels])
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _adaptive_threshold(img):
|
|
67
|
+
"""Apply threshold to create high-contrast BW."""
|
|
68
|
+
gray = img.convert("L")
|
|
69
|
+
pixels = list(gray.getdata())
|
|
70
|
+
new_pixels = []
|
|
71
|
+
for p in pixels:
|
|
72
|
+
if p < 100:
|
|
73
|
+
np_val = 0
|
|
74
|
+
elif p > 160:
|
|
75
|
+
np_val = 255
|
|
76
|
+
else:
|
|
77
|
+
np_val = max(0, min(255, (p - 80) * 3))
|
|
78
|
+
new_pixels.append(np_val)
|
|
79
|
+
result = Image.new("L", img.size)
|
|
80
|
+
result.putdata(new_pixels)
|
|
81
|
+
return result
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _classify_image(img):
|
|
85
|
+
"""Returns (is_photo, lum_vals) for photo vs UI classification."""
|
|
86
|
+
w, h = img.size
|
|
87
|
+
color_sample = set()
|
|
88
|
+
lum_vals = []
|
|
89
|
+
for y in range(0, h, max(1, h // 50)):
|
|
90
|
+
for x in range(0, w, max(1, w // 50)):
|
|
91
|
+
px = img.getpixel((x, y))
|
|
92
|
+
hex_c = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
|
|
93
|
+
color_sample.add(hex_c)
|
|
94
|
+
lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
|
|
95
|
+
|
|
96
|
+
edge_count = total_pairs = 0
|
|
97
|
+
for i in range(0, len(lum_vals) - 1, 2):
|
|
98
|
+
if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
|
|
99
|
+
edge_count += 1
|
|
100
|
+
total_pairs += 1
|
|
101
|
+
edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
|
|
102
|
+
|
|
103
|
+
sorted_lums = sorted(lum_vals)
|
|
104
|
+
iqr = 0
|
|
105
|
+
if len(sorted_lums) >= 4:
|
|
106
|
+
q1 = sorted_lums[len(sorted_lums) // 4]
|
|
107
|
+
q3 = sorted_lums[len(sorted_lums) * 3 // 4]
|
|
108
|
+
iqr = q3 - q1
|
|
109
|
+
lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
|
|
110
|
+
|
|
111
|
+
is_photo = (
|
|
112
|
+
len(color_sample) > 50
|
|
113
|
+
or (len(color_sample) >= 15 and iqr < 80)
|
|
114
|
+
or (lum_range > 150 and edge_ratio < 0.3)
|
|
115
|
+
)
|
|
116
|
+
return is_photo, lum_vals, len(color_sample)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _tsv_to_boxes(tsv_text, min_confidence, h, w, y_offset=0):
|
|
120
|
+
"""Parse Tesseract TSV output into structured box list."""
|
|
121
|
+
boxes = []
|
|
122
|
+
lines = [l.strip() for l in tsv_text.split("\n") if l.strip()]
|
|
123
|
+
if len(lines) < 2:
|
|
124
|
+
return boxes
|
|
125
|
+
|
|
126
|
+
header = lines[0].split("\t")
|
|
127
|
+
col_map = {name: idx for idx, name in enumerate(header)}
|
|
128
|
+
|
|
129
|
+
for line in lines[1:]:
|
|
130
|
+
cols = line.split("\t")
|
|
131
|
+
if len(cols) < 12:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
text = cols[col_map.get("text", -1)] if "text" in col_map else ""
|
|
135
|
+
conf_str = cols[col_map.get("conf", -1)] if "conf" in col_map else ""
|
|
136
|
+
conf = 0.0
|
|
137
|
+
try:
|
|
138
|
+
conf = float(conf_str)
|
|
139
|
+
except (ValueError, IndexError):
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
if not text.strip() or conf < min_confidence:
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
def _safe_int(idx_key, default=0):
|
|
146
|
+
try:
|
|
147
|
+
return int(cols[col_map[idx_key]])
|
|
148
|
+
except (ValueError, IndexError, KeyError):
|
|
149
|
+
return default
|
|
150
|
+
|
|
151
|
+
bw = _safe_int("width")
|
|
152
|
+
bh = _safe_int("height")
|
|
153
|
+
if bw < 8 and bh < 8:
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
bx = _safe_int("left")
|
|
157
|
+
by = _safe_int("top") + y_offset
|
|
158
|
+
|
|
159
|
+
boxes.append(
|
|
160
|
+
{
|
|
161
|
+
"text": text.strip(),
|
|
162
|
+
"conf": round(conf, 1),
|
|
163
|
+
"x": bx,
|
|
164
|
+
"y": by,
|
|
165
|
+
"w": bw,
|
|
166
|
+
"h": bh,
|
|
167
|
+
"zone": "top" if by < h / 3 else ("middle" if by < h * 2 / 3 else "bottom"),
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
return boxes
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _dedup_boxes(boxes, new_boxes, img_h):
|
|
174
|
+
"""Deduplicate boxes: skip dups/substrings, extend longer versions."""
|
|
175
|
+
for nb in new_boxes:
|
|
176
|
+
word = nb["text"]
|
|
177
|
+
x, y = nb["x"], nb["y"]
|
|
178
|
+
|
|
179
|
+
dup = any(
|
|
180
|
+
b["text"] == word and abs(b["x"] - x) < 40 and abs(b["y"] - y) < 40 for b in boxes
|
|
181
|
+
)
|
|
182
|
+
subdup = False
|
|
183
|
+
if len(word) >= 3:
|
|
184
|
+
subdup = any(
|
|
185
|
+
word in b["text"] and abs(b["y"] - y) < 30 for b in boxes
|
|
186
|
+
)
|
|
187
|
+
extend = [
|
|
188
|
+
b
|
|
189
|
+
for b in boxes
|
|
190
|
+
if word.startswith(b["text"])
|
|
191
|
+
and abs(b["y"] - y) < 30
|
|
192
|
+
and len(word) > len(b["text"])
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
if extend:
|
|
196
|
+
for b in boxes:
|
|
197
|
+
if b in extend:
|
|
198
|
+
b["text"] = word
|
|
199
|
+
|
|
200
|
+
if not dup and not subdup and not extend:
|
|
201
|
+
nb["zone"] = "top" if y < img_h / 3 else ("middle" if y < img_h * 2 / 3 else "bottom")
|
|
202
|
+
boxes.append(nb)
|
|
203
|
+
return boxes
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def extract_text(image_path, language="tha+eng", min_confidence=70):
|
|
207
|
+
"""Extract text from image using Tesseract OCR with preprocessing."""
|
|
208
|
+
_ensure_lang_data(language)
|
|
209
|
+
img = Image.open(image_path).convert("RGB")
|
|
210
|
+
w, h = img.size
|
|
211
|
+
|
|
212
|
+
is_photo, _, _ = _classify_image(img)
|
|
213
|
+
|
|
214
|
+
orig_path = image_path
|
|
215
|
+
preprocessed_paths = [orig_path]
|
|
216
|
+
|
|
217
|
+
if is_photo:
|
|
218
|
+
pp1 = _histogram_stretch(img)
|
|
219
|
+
pp1_path = os.path.join(tempfile.gettempdir(), f"img2code_pp1_{os.urandom(4).hex()}.png")
|
|
220
|
+
pp1.save(pp1_path)
|
|
221
|
+
pp1.close()
|
|
222
|
+
preprocessed_paths.append(pp1_path)
|
|
223
|
+
|
|
224
|
+
pp2_img = _adaptive_threshold(img)
|
|
225
|
+
pp2_path = os.path.join(tempfile.gettempdir(), f"img2code_pp2_{os.urandom(4).hex()}.png")
|
|
226
|
+
pp2_img.save(pp2_path)
|
|
227
|
+
pp2_img.close()
|
|
228
|
+
preprocessed_paths.append(pp2_path)
|
|
229
|
+
|
|
230
|
+
all_boxes = []
|
|
231
|
+
psm_modes = [3, 6, 4, 11] if is_photo else [3, 11, 6, 4]
|
|
232
|
+
|
|
233
|
+
for pp_path in preprocessed_paths:
|
|
234
|
+
for psm in psm_modes:
|
|
235
|
+
try:
|
|
236
|
+
tsv = pytesseract.image_to_data(
|
|
237
|
+
Image.open(pp_path),
|
|
238
|
+
lang=language,
|
|
239
|
+
config=f"--psm {psm}",
|
|
240
|
+
output_type=pytesseract.Output.DICT,
|
|
241
|
+
)
|
|
242
|
+
except Exception:
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
num_items = len(tsv.get("text", []))
|
|
246
|
+
for i in range(num_items):
|
|
247
|
+
text = tsv["text"][i] if i < len(tsv["text"]) else ""
|
|
248
|
+
try:
|
|
249
|
+
conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
|
|
250
|
+
except (ValueError, TypeError):
|
|
251
|
+
conf = -1
|
|
252
|
+
|
|
253
|
+
if not text or text.strip() == "" or conf < min_confidence:
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
bw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
|
|
257
|
+
bh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
|
|
258
|
+
if bw < 8 and bh < 8:
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
bx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
|
|
262
|
+
by = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
|
|
263
|
+
word = text.strip()
|
|
264
|
+
|
|
265
|
+
nb = {"text": word, "conf": round(conf, 1), "x": bx, "y": by, "w": bw, "h": bh}
|
|
266
|
+
all_boxes = _dedup_boxes(all_boxes, [nb], h)
|
|
267
|
+
|
|
268
|
+
# Footer scan: crop bottom 40px for copyright
|
|
269
|
+
if h > 40:
|
|
270
|
+
footer_crop = img.crop((0, h - 40, w, h))
|
|
271
|
+
footer_stretch = _histogram_stretch(footer_crop)
|
|
272
|
+
footer_paths = [footer_crop, footer_stretch]
|
|
273
|
+
|
|
274
|
+
for fc in footer_paths:
|
|
275
|
+
for psm_f in (11, 6):
|
|
276
|
+
try:
|
|
277
|
+
tsv = pytesseract.image_to_data(
|
|
278
|
+
fc,
|
|
279
|
+
lang=language,
|
|
280
|
+
config=f"--psm {psm_f}",
|
|
281
|
+
output_type=pytesseract.Output.DICT,
|
|
282
|
+
)
|
|
283
|
+
except Exception:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
num_items = len(tsv.get("text", []))
|
|
287
|
+
for i in range(num_items):
|
|
288
|
+
text = tsv["text"][i] if i < len(tsv["text"]) else ""
|
|
289
|
+
try:
|
|
290
|
+
conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
|
|
291
|
+
except (ValueError, TypeError):
|
|
292
|
+
conf = -1
|
|
293
|
+
|
|
294
|
+
if not text or text.strip() == "" or conf < min_confidence:
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
fw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
|
|
298
|
+
fh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
|
|
299
|
+
if fw < 8 and fh < 8 or fh > 50:
|
|
300
|
+
continue
|
|
301
|
+
fx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
|
|
302
|
+
if fx > w * 0.92:
|
|
303
|
+
continue
|
|
304
|
+
fy = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
|
|
305
|
+
fy += h - 40
|
|
306
|
+
|
|
307
|
+
nb = {"text": text.strip(), "conf": round(conf, 1), "x": fx, "y": fy, "w": fw, "h": fh, "src": "footer", "psm": psm_f}
|
|
308
|
+
all_boxes = _dedup_boxes(all_boxes, [nb], h)
|
|
309
|
+
|
|
310
|
+
# Branding scan: crop bottom 70px for "MADE BY" text
|
|
311
|
+
if h > 70:
|
|
312
|
+
mb_crop = img.crop((0, h - 70, w, h))
|
|
313
|
+
for psm_mb in (8, 7, 13):
|
|
314
|
+
try:
|
|
315
|
+
tsv = pytesseract.image_to_data(
|
|
316
|
+
mb_crop,
|
|
317
|
+
lang=language,
|
|
318
|
+
config=f"--psm {psm_mb}",
|
|
319
|
+
output_type=pytesseract.Output.DICT,
|
|
320
|
+
)
|
|
321
|
+
except Exception:
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
num_items = len(tsv.get("text", []))
|
|
325
|
+
for i in range(num_items):
|
|
326
|
+
text = tsv["text"][i] if i < len(tsv["text"]) else ""
|
|
327
|
+
try:
|
|
328
|
+
conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
|
|
329
|
+
except (ValueError, TypeError):
|
|
330
|
+
conf = -1
|
|
331
|
+
|
|
332
|
+
if not text or text.strip() == "" or conf < min_confidence:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
mw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
|
|
336
|
+
mh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
|
|
337
|
+
if mw < 8 and mh < 8 or mh > 50:
|
|
338
|
+
continue
|
|
339
|
+
mx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
|
|
340
|
+
if mx > w * 0.92:
|
|
341
|
+
continue
|
|
342
|
+
my = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
|
|
343
|
+
my += h - 70
|
|
344
|
+
|
|
345
|
+
nb = {"text": text.strip(), "conf": round(conf, 1), "x": mx, "y": my, "w": mw, "h": mh, "src": "branding", "psm": psm_mb}
|
|
346
|
+
all_boxes = _dedup_boxes(all_boxes, [nb], h)
|
|
347
|
+
|
|
348
|
+
# Retry with preprocessing if word count is low
|
|
349
|
+
if len(all_boxes) < 5:
|
|
350
|
+
stretch_full = _histogram_stretch(img)
|
|
351
|
+
stretch_path = os.path.join(tempfile.gettempdir(), f"img2code_retry_{os.urandom(4).hex()}.png")
|
|
352
|
+
stretch_full.save(stretch_path)
|
|
353
|
+
stretch_full.close()
|
|
354
|
+
|
|
355
|
+
for psm_r in (3, 6, 11):
|
|
356
|
+
try:
|
|
357
|
+
tsv = pytesseract.image_to_data(
|
|
358
|
+
Image.open(stretch_path),
|
|
359
|
+
lang=language,
|
|
360
|
+
config=f"--psm {psm_r}",
|
|
361
|
+
output_type=pytesseract.Output.DICT,
|
|
362
|
+
)
|
|
363
|
+
except Exception:
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
num_items = len(tsv.get("text", []))
|
|
367
|
+
for i in range(num_items):
|
|
368
|
+
text = tsv["text"][i] if i < len(tsv["text"]) else ""
|
|
369
|
+
try:
|
|
370
|
+
conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
|
|
371
|
+
except (ValueError, TypeError):
|
|
372
|
+
conf = -1
|
|
373
|
+
|
|
374
|
+
if not text or text.strip() == "" or conf < min_confidence:
|
|
375
|
+
continue
|
|
376
|
+
|
|
377
|
+
rx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
|
|
378
|
+
ry = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
|
|
379
|
+
rw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
|
|
380
|
+
rh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
|
|
381
|
+
nb = {"text": text.strip(), "conf": round(conf, 1), "x": rx, "y": ry, "w": rw, "h": rh}
|
|
382
|
+
all_boxes = _dedup_boxes(all_boxes, [nb], h)
|
|
383
|
+
|
|
384
|
+
try:
|
|
385
|
+
os.remove(stretch_path)
|
|
386
|
+
except OSError:
|
|
387
|
+
pass
|
|
388
|
+
|
|
389
|
+
# Clean up preprocessed temp files
|
|
390
|
+
for pp in preprocessed_paths[1:]:
|
|
391
|
+
try:
|
|
392
|
+
os.remove(pp)
|
|
393
|
+
except OSError:
|
|
394
|
+
pass
|
|
395
|
+
|
|
396
|
+
# Sort boxes by zone, then y, then x
|
|
397
|
+
zone_order = {"top": 1, "middle": 2, "bottom": 3}
|
|
398
|
+
all_boxes.sort(key=lambda b: (zone_order.get(b.get("zone", "middle"), 2), b["y"], b["x"]))
|
|
399
|
+
|
|
400
|
+
# Plain-text pass for full text
|
|
401
|
+
raw_text = ""
|
|
402
|
+
raw_candidates = {}
|
|
403
|
+
try:
|
|
404
|
+
raw_candidates["orig"] = pytesseract.image_to_string(img, lang=language, config="--psm 6").strip()
|
|
405
|
+
except Exception:
|
|
406
|
+
pass
|
|
407
|
+
|
|
408
|
+
if is_photo or "tha" in language:
|
|
409
|
+
pp_raw = _histogram_stretch(img)
|
|
410
|
+
pp_raw_path = os.path.join(tempfile.gettempdir(), f"img2code_raw_{os.urandom(4).hex()}.png")
|
|
411
|
+
pp_raw.save(pp_raw_path)
|
|
412
|
+
pp_raw.close()
|
|
413
|
+
try:
|
|
414
|
+
raw_candidates["pp"] = pytesseract.image_to_string(
|
|
415
|
+
Image.open(pp_raw_path), lang=language, config="--psm 6"
|
|
416
|
+
).strip()
|
|
417
|
+
except Exception:
|
|
418
|
+
pass
|
|
419
|
+
try:
|
|
420
|
+
os.remove(pp_raw_path)
|
|
421
|
+
except OSError:
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
if raw_candidates:
|
|
425
|
+
raw_text = max(raw_candidates.values(), key=len)
|
|
426
|
+
|
|
427
|
+
raw_text_output = merge_thai_text(raw_text) if raw_text else ""
|
|
428
|
+
|
|
429
|
+
img.close()
|
|
430
|
+
|
|
431
|
+
# Build byZone
|
|
432
|
+
def _zone_text(zone_name):
|
|
433
|
+
return merge_thai_text(
|
|
434
|
+
" ".join(b["text"] for b in all_boxes if b.get("zone") == zone_name)
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
by_zone = {
|
|
438
|
+
"top": _zone_text("top"),
|
|
439
|
+
"middle": _zone_text("middle"),
|
|
440
|
+
"bottom": _zone_text("bottom"),
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return {
|
|
444
|
+
"words": len(all_boxes),
|
|
445
|
+
"boxes": all_boxes,
|
|
446
|
+
"rawText": raw_text_output,
|
|
447
|
+
"byZone": by_zone,
|
|
448
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Shared utilities: hex/rgb conversion, luminance, contrast, Thai merging."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def hex_to_rgb(hex_str):
|
|
7
|
+
r = int(hex_str[1:3], 16)
|
|
8
|
+
g = int(hex_str[3:5], 16)
|
|
9
|
+
b = int(hex_str[5:7], 16)
|
|
10
|
+
return r, g, b
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def rgb_to_hex(r, g, b):
|
|
14
|
+
return f"#{r:02X}{g:02X}{b:02X}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def luminance(r, g, b):
|
|
18
|
+
return 0.299 * r + 0.587 * g + 0.114 * b
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def contrast_ratio(lum1, lum2):
|
|
22
|
+
l1 = max(lum1, lum2) + 0.05
|
|
23
|
+
l2 = min(lum1, lum2) + 0.05
|
|
24
|
+
return l1 / l2
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def saturation(r, g, b):
|
|
28
|
+
max_c = max(r, g, b)
|
|
29
|
+
min_c = min(r, g, b)
|
|
30
|
+
if max_c == 0:
|
|
31
|
+
return 0
|
|
32
|
+
return (max_c - min_c) / max_c * 100
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def merge_thai_text(text):
|
|
36
|
+
"""Merge Thai grapheme clusters split by Tesseract into correct words."""
|
|
37
|
+
if not text:
|
|
38
|
+
return text
|
|
39
|
+
return re.sub(r"(?<=[\u0E00-\u0E7F])\s+(?=[\u0E00-\u0E7F])", "", text)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "image-to-code",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Extract structured data (colors, layout, OCR text) from images. No AI vision required.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"image-to-code": "bin/cli.js"
|
|
@@ -24,7 +24,9 @@
|
|
|
24
24
|
"homepage": "https://github.com/phumitchreal/image-to-code#readme",
|
|
25
25
|
"files": [
|
|
26
26
|
"bin/",
|
|
27
|
+
"image_to_code/",
|
|
27
28
|
"package.json",
|
|
28
|
-
"README.md"
|
|
29
|
+
"README.md",
|
|
30
|
+
"requirements.txt"
|
|
29
31
|
]
|
|
30
32
|
}
|
package/requirements.txt
ADDED