image-to-code 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -1,46 +1,41 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * image-to-code — npm wrapper.
4
- * Auto-installs the Python package via pip on first run, then delegates.
4
+ * Bundles Python source. Installs pip deps on first run, then delegates.
5
5
  */
6
6
  const { execSync, spawn } = require("child_process");
7
7
  const path = require("path");
8
8
 
9
+ const MODULE_DIR = path.resolve(__dirname, "..");
9
10
  const PYTHON_MODULE = "image_to_code";
10
- const REQUIRED_DEPS = ["Pillow>=10.0.0", "pytesseract>=0.3.10"];
11
11
 
12
12
  function checkPython() {
13
- try {
14
- execSync("python --version", { stdio: "pipe", timeout: 10000 });
15
- return "python";
16
- } catch {
13
+ for (const cmd of ["python", "python3"]) {
17
14
  try {
18
- execSync("python3 --version", { stdio: "pipe", timeout: 10000 });
19
- return "python3";
15
+ execSync(`${cmd} --version`, { stdio: "pipe", timeout: 10000 });
16
+ return cmd;
20
17
  } catch {
21
- return null;
18
+ // try next
22
19
  }
23
20
  }
21
+ return null;
24
22
  }
25
23
 
26
- function checkPackage(python) {
24
+ function ensurePipDeps(python) {
27
25
  try {
28
- execSync(`${python} -c "import ${PYTHON_MODULE}"`, {
26
+ execSync(`${python} -c "import PIL; import pytesseract" 2>${process.platform === "win32" ? "nul" : "/dev/null"}`, {
29
27
  stdio: "pipe",
30
28
  timeout: 10000,
31
29
  });
32
- return true;
30
+ return; // deps already installed
33
31
  } catch {
34
- return false;
32
+ // install deps
35
33
  }
36
- }
37
-
38
- function installPackage(python) {
39
- console.log(" Installing image-to-code Python package...");
40
- execSync(`${python} -m pip install ${PYTHON_MODULE} --upgrade`, {
41
- stdio: "inherit",
42
- timeout: 120000,
43
- });
34
+ console.log("→ Installing Python dependencies (Pillow, pytesseract)...");
35
+ execSync(
36
+ `${python} -m pip install Pillow>=10.0.0 pytesseract>=0.3.10 --quiet`,
37
+ { stdio: "inherit", timeout: 120000 }
38
+ );
44
39
  }
45
40
 
46
41
  function main() {
@@ -52,13 +47,15 @@ function main() {
52
47
  process.exit(1);
53
48
  }
54
49
 
55
- if (!checkPackage(python)) {
56
- installPackage(python);
57
- }
50
+ ensurePipDeps(python);
58
51
 
59
52
  const args = process.argv.slice(2);
60
53
  const child = spawn(python, ["-m", PYTHON_MODULE + ".analyze", ...args], {
61
54
  stdio: "inherit",
55
+ env: {
56
+ ...process.env,
57
+ PYTHONPATH: MODULE_DIR + (process.env.PYTHONPATH ? path.delimiter + process.env.PYTHONPATH : ""),
58
+ },
62
59
  });
63
60
  child.on("exit", (code) => process.exit(code));
64
61
  }
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,224 @@
1
+ """Main orchestrator: runs color, layout, and OCR analysis, produces combined JSON/CSS report."""
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ import os
7
+ import tempfile
8
+ from PIL import Image, ImageGrab
9
+
10
+ from .colors import extract_colors
11
+ from .layout import detect_layout
12
+ from .ocr import extract_text
13
+
14
+
15
+ def analyze_image(image_path=None, clipboard=False, ocr_language="tha+eng",
16
+ min_confidence=70, sample_count=2000, quantize_tolerance=15,
17
+ full=False, json_output=False):
18
+ """Run full analysis pipeline on an image."""
19
+ resolved_path = image_path
20
+
21
+ if clipboard:
22
+ try:
23
+ clip_img = ImageGrab.grabclipboard()
24
+ if clip_img is None:
25
+ print("Error: No image found in clipboard", file=sys.stderr)
26
+ sys.exit(1)
27
+ temp_dir = os.path.join(tempfile.gettempdir(), "image-to-code")
28
+ os.makedirs(temp_dir, exist_ok=True)
29
+ save_path = os.path.join(temp_dir, f"clipboard_{os.urandom(4).hex()}.png")
30
+ clip_img.save(save_path)
31
+ resolved_path = save_path
32
+ print(f"\n[Clipboard image saved to: {save_path}]")
33
+ except Exception as e:
34
+ print(f"Error reading clipboard: {e}", file=sys.stderr)
35
+ sys.exit(1)
36
+
37
+ if not resolved_path or not os.path.exists(resolved_path):
38
+ print("Error: Image path does not exist", file=sys.stderr)
39
+ sys.exit(1)
40
+
41
+ print("Analyzing image...", file=sys.stderr)
42
+
43
+ colors = extract_colors(resolved_path, sample_count, quantize_tolerance)
44
+ is_photo = colors.get("isPhoto", False)
45
+
46
+ layout = detect_layout(resolved_path)
47
+ layout_type = layout.get("layoutType", "unknown")
48
+
49
+ ocr_result = extract_text(resolved_path, ocr_language, min_confidence)
50
+ ocr_boxes = ocr_result.get("boxes", [])
51
+ ocr_raw_text = ocr_result.get("rawText", "")
52
+ ocr_by_zone = ocr_result.get("byZone", {})
53
+
54
+ w = colors.get("imageWidth", 0)
55
+ h = colors.get("imageHeight", 0)
56
+ radius_val = "8px" if "mobile" in layout_type else "6px"
57
+ vp_val = "width=device-width, initial-scale=1.0" if w <= 430 else ""
58
+ mq_val = "mobile" if w <= 430 else ("tablet" if w <= 768 else "desktop")
59
+
60
+ # Button detection: large boxes in lower area
61
+ buttons = []
62
+ search_top = h * 0.25 if h else 0
63
+ for box in ocr_boxes:
64
+ if box.get("w", 0) > 30 and box.get("h", 0) > 20 and box.get("conf", 0) > 80 and box.get("y", 0) > search_top:
65
+ buttons.append({
66
+ "text": box.get("text", ""),
67
+ "x": box.get("x", 0),
68
+ "y": box.get("y", 0),
69
+ "w": box.get("w", 0),
70
+ "h": box.get("h", 0),
71
+ "zone": box.get("zone", ""),
72
+ "conf": box.get("conf", 0),
73
+ })
74
+
75
+ gradient_info = colors.get("gradient")
76
+ palette_data = colors.get("palette", [])
77
+ surface_list = colors.get("surfaces", [])
78
+ border_color = colors.get("border")
79
+
80
+ result = {
81
+ "imageType": "photo" if is_photo else "ui",
82
+ "image": {
83
+ "path": resolved_path,
84
+ "width": w,
85
+ "height": h,
86
+ "aspect": round(w / h, 3) if h else 0,
87
+ },
88
+ "colors": {
89
+ "background": colors.get("background", "#FFFFFF"),
90
+ "text": colors.get("text", "#1F2937"),
91
+ "accent": colors.get("button", "#4F46E5"),
92
+ "border": border_color if border_color else "#E5E7EB",
93
+ "palette": palette_data,
94
+ "surfaces": surface_list,
95
+ "button": colors.get("button"),
96
+ "textSecondary": colors.get("textSecondary"),
97
+ "contrastRatio": colors.get("contrastRatio", 0),
98
+ "harmony": colors.get("harmony", ""),
99
+ "gradient": gradient_info,
100
+ },
101
+ "layout": {
102
+ "type": layout_type,
103
+ "sections": layout.get("sections", []),
104
+ "columns": layout.get("columns", []),
105
+ "components": layout.get("components", []),
106
+ },
107
+ "text": {
108
+ "words": ocr_result.get("words", 0),
109
+ "boxes": ocr_boxes,
110
+ "buttons": buttons,
111
+ "fullText": ocr_raw_text,
112
+ "byZone": ocr_by_zone,
113
+ },
114
+ "css": {
115
+ "customProperties": {
116
+ "--bg": colors.get("background", "#FFFFFF"),
117
+ "--surface": surface_list[0] if surface_list else colors.get("background", "#FFFFFF"),
118
+ "--text": colors.get("text", "#1F2937"),
119
+ "--primary": colors.get("button", "#4F46E5"),
120
+ "--border": border_color if border_color else "#E5E7EB",
121
+ "--radius": radius_val,
122
+ },
123
+ "surfaces": surface_list,
124
+ "harmony": colors.get("harmony", ""),
125
+ "contrastRatio": colors.get("contrastRatio", 0),
126
+ "gradient": gradient_info,
127
+ "viewport": vp_val,
128
+ "mediaQuery": mq_val,
129
+ },
130
+ }
131
+
132
+ if json_output or full:
133
+ print(json.dumps(result, indent=2, ensure_ascii=False))
134
+
135
+ if not json_output:
136
+ print()
137
+ print("=" * 72)
138
+ print(" IMAGE ANALYSIS REPORT")
139
+ print("=" * 72)
140
+ print()
141
+ print(f"Image: {w}x{h} ({layout_type}, {'photo' if is_photo else 'UI'})")
142
+ print()
143
+ print("--- Colors ---")
144
+ print(f" Background: {colors.get('background', '')}")
145
+ print(f" Surfaces: {', '.join(str(s) for s in surface_list)}")
146
+ print(f" Text: {colors.get('text', '')} (contrast: {colors.get('contrastRatio', 0)}:1)")
147
+ if colors.get("textSecondary"):
148
+ print(f" Text(2nd): {colors['textSecondary']}")
149
+ print(f" Button: {colors.get('button', '')}")
150
+ print(f" Border: {colors.get('border', '')}")
151
+ print(f" Harmony: {colors.get('harmony', '')}")
152
+ print(f" Palette: {len(palette_data)} unique colors")
153
+ print()
154
+ print("--- Layout Components ---")
155
+ for c in layout.get("components", []):
156
+ print(f" {c.get('type', ''):16} y={c.get('y_pct', 0):2}% h={c.get('h_pct', 0):2}% color={c.get('color', '')}")
157
+ print()
158
+ print(f"--- OCR Text ({ocr_result.get('words', 0)} words >= {min_confidence}%) ---")
159
+ if ocr_raw_text:
160
+ print(ocr_raw_text)
161
+ else:
162
+ print(f" [top] {ocr_by_zone.get('top', '')}")
163
+ print(f" [middle] {ocr_by_zone.get('middle', '')}")
164
+ print(f" [bottom] {ocr_by_zone.get('bottom', '')}")
165
+ print()
166
+
167
+ if buttons:
168
+ print(f"--- UI Buttons ({len(buttons)}) ---")
169
+ for b in buttons:
170
+ print(f" [button] {b.get('text', '')} (z={b.get('zone', '')}, y={b.get('y', 0)}, c={b.get('conf', 0)}%)")
171
+ print()
172
+
173
+ print("--- CSS Recommendations ---")
174
+ for key, val in result["css"]["customProperties"].items():
175
+ print(f" {key}: {val}")
176
+ if gradient_info:
177
+ g = gradient_info
178
+ print(f" gradient: {g.get('type', '')} {' -> '.join(str(c) for c in g.get('colors', []))}")
179
+ print()
180
+ print("=" * 72)
181
+
182
+ if full:
183
+ print()
184
+ print("=== Full JSON Output ===")
185
+ print(json.dumps(result, indent=2, ensure_ascii=False))
186
+
187
+ # Clipboard temp cleanup
188
+ if clipboard and os.path.exists(resolved_path):
189
+ try:
190
+ os.remove(resolved_path)
191
+ except OSError:
192
+ pass
193
+
194
+
195
+ def main():
196
+ parser = argparse.ArgumentParser(description="Image-to-Code: Extract structured data from images")
197
+ parser.add_argument("image_path", nargs="?", help="Path to image file")
198
+ parser.add_argument("--clipboard", "-c", action="store_true", help="Read image from clipboard")
199
+ parser.add_argument("--lang", "-l", default="tha+eng", help="Tesseract language (default: tha+eng)")
200
+ parser.add_argument("--min-confidence", "-m", type=int, default=70, help="Minimum OCR confidence (default: 70)")
201
+ parser.add_argument("--sample-count", type=int, default=2000, help="Color sample count (default: 2000)")
202
+ parser.add_argument("--quantize-tolerance", type=int, default=15, help="Color quantize tolerance (default: 15)")
203
+ parser.add_argument("--full", "-f", action="store_true", help="Show full JSON output")
204
+ parser.add_argument("--json", "-j", action="store_true", help="Output JSON only")
205
+ args = parser.parse_args()
206
+
207
+ if not args.image_path and not args.clipboard:
208
+ parser.print_help()
209
+ sys.exit(1)
210
+
211
+ analyze_image(
212
+ image_path=args.image_path,
213
+ clipboard=args.clipboard,
214
+ ocr_language=args.lang,
215
+ min_confidence=args.min_confidence,
216
+ sample_count=args.sample_count,
217
+ quantize_tolerance=args.quantize_tolerance,
218
+ full=args.full,
219
+ json_output=args.json,
220
+ )
221
+
222
+
223
+ if __name__ == "__main__":
224
+ main()
@@ -0,0 +1,252 @@
1
+ """Color extraction: dominant colors, semantic roles, gradient, harmony."""
2
+
3
+ import math
4
+ from PIL import Image
5
+ from .utils import hex_to_rgb, rgb_to_hex, luminance, contrast_ratio, saturation
6
+
7
+
8
+ def extract_colors(image_path, sample_count=2000, quantize_tolerance=15):
9
+ img = Image.open(image_path).convert("RGB")
10
+ w, h = img.size
11
+
12
+ coarse_colors = set()
13
+ lum_vals = []
14
+ c_step_x = max(1, w // 40)
15
+ c_step_y = max(1, h // 40)
16
+ for y in range(0, h, c_step_y):
17
+ for x in range(0, w, c_step_x):
18
+ px = img.getpixel((x, y))
19
+ coarse_hex = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
20
+ coarse_colors.add(coarse_hex)
21
+ lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
22
+
23
+ distinct_color_count = len(coarse_colors)
24
+
25
+ edge_count = total_pairs = 0
26
+ for i in range(0, len(lum_vals) - 1, 2):
27
+ if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
28
+ edge_count += 1
29
+ total_pairs += 1
30
+ edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
31
+
32
+ sorted_lums = sorted(lum_vals)
33
+ iqr = 0
34
+ if len(sorted_lums) >= 4:
35
+ q1 = sorted_lums[len(sorted_lums) // 4]
36
+ q3 = sorted_lums[len(sorted_lums) * 3 // 4]
37
+ iqr = q3 - q1
38
+ lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
39
+
40
+ is_photo = (
41
+ distinct_color_count > 50
42
+ or (distinct_color_count >= 15 and iqr < 80)
43
+ or (lum_range > 150 and edge_ratio < 0.3)
44
+ )
45
+
46
+ color_counts = {}
47
+ step_x = max(1, int(w / math.sqrt(sample_count * w / h))) if w and h else 1
48
+ step_y = max(1, int(h / math.sqrt(sample_count * h / w))) if w and h else 1
49
+ total_samples = 0
50
+ for y in range(0, h, step_y):
51
+ for x in range(0, w, step_x):
52
+ px = img.getpixel((x, y))
53
+ rq = round(px[0] / quantize_tolerance) * quantize_tolerance
54
+ gq = round(px[1] / quantize_tolerance) * quantize_tolerance
55
+ bq = round(px[2] / quantize_tolerance) * quantize_tolerance
56
+ rq = max(0, min(255, rq))
57
+ gq = max(0, min(255, gq))
58
+ bq = max(0, min(255, bq))
59
+ hex_c = f"#{rq:02X}{gq:02X}{bq:02X}"
60
+ color_counts[hex_c] = color_counts.get(hex_c, 0) + 1
61
+ total_samples += 1
62
+
63
+ sorted_palette = sorted(color_counts.items(), key=lambda x: x[1], reverse=True)
64
+ total = max(1, total_samples)
65
+ palette = []
66
+ color_meta = []
67
+ for hex_c, cnt in sorted_palette:
68
+ pct = round(cnt / total * 100, 1)
69
+ palette.append({"hex": hex_c, "pct": pct, "count": cnt})
70
+ r, g, b = hex_to_rgb(hex_c)
71
+ lum = luminance(r, g, b)
72
+ sat = saturation(r, g, b)
73
+ color_meta.append(
74
+ {"hex": hex_c, "pct": pct, "r": r, "g": g, "b": b, "lum": lum, "sat": round(sat, 1)}
75
+ )
76
+
77
+ bg_color = palette[0]["hex"] if palette else "#FFFFFF"
78
+ bg_meta = color_meta[0] if color_meta else None
79
+ bg_lum = bg_meta["lum"] if bg_meta else 255
80
+
81
+ surfaces = []
82
+ text_primary = None
83
+ text_secondary = None
84
+ button_color = None
85
+ border_color = None
86
+ border_candidates = []
87
+
88
+ if not is_photo:
89
+ text_candidates = []
90
+ button_candidates = []
91
+
92
+ for cm in color_meta:
93
+ is_bg = cm["hex"] == bg_color
94
+ cr = contrast_ratio(bg_lum, cm["lum"])
95
+ lum_diff = abs(cm["lum"] - bg_lum)
96
+
97
+ if not is_bg and cm["pct"] > 1 and lum_diff < 40:
98
+ surfaces.append(cm)
99
+ if not is_bg and cr > 1.3 and cm["pct"] < 2 and cm["pct"] > 0.05 and lum_diff > 2:
100
+ border_candidates.append({"hex": cm["hex"], "contrast": cr, "lum_diff": lum_diff})
101
+ if not is_bg and cm["sat"] > 20 and cm["lum"] > 30 and cm["pct"] < 5 and cm["pct"] > 0.1:
102
+ button_candidates.append({"hex": cm["hex"], "sat": cm["sat"], "contrast": cr})
103
+ if not is_bg and cr > 3 and cm["pct"] < 3:
104
+ text_candidates.append({"hex": cm["hex"], "contrast": cr, "lum": cm["lum"]})
105
+
106
+ surfaces.sort(key=lambda x: x["pct"], reverse=True)
107
+
108
+ if text_candidates:
109
+ text_candidates.sort(key=lambda x: x["contrast"], reverse=True)
110
+ text_primary = text_candidates[0]["hex"]
111
+ if len(text_candidates) > 1:
112
+ text_secondary = text_candidates[1]["hex"]
113
+ else:
114
+ text_primary = "#1F2937" if bg_lum > 128 else "#FFFFFF"
115
+
116
+ if button_candidates:
117
+ button_candidates.sort(key=lambda x: x["sat"], reverse=True)
118
+ button_color = button_candidates[0]["hex"]
119
+ if border_candidates:
120
+ border_candidates.sort(key=lambda x: x["lum_diff"], reverse=True)
121
+ border_color = border_candidates[0]["hex"]
122
+ else:
123
+ for cm in color_meta:
124
+ is_bg = cm["hex"] == bg_color
125
+ lum_diff = abs(cm["lum"] - bg_lum)
126
+ if not is_bg and cm["pct"] > 0.5 and lum_diff < 50:
127
+ surfaces.append(cm)
128
+ surfaces.sort(key=lambda x: x["pct"], reverse=True)
129
+ text_primary = "#1F2937" if bg_lum > 128 else "#FFFFFF"
130
+
131
+ button_candidates = [
132
+ cm
133
+ for cm in color_meta
134
+ if cm["sat"] > 20 and cm["lum"] > 30 and cm["pct"] > 0.1 and cm["pct"] < 5 and cm["hex"] != bg_color
135
+ ]
136
+ if button_candidates:
137
+ button_candidates.sort(key=lambda x: x["sat"], reverse=True)
138
+ button_color = button_candidates[0]["hex"]
139
+
140
+ for cm in color_meta:
141
+ is_bg = cm["hex"] == bg_color
142
+ cr = contrast_ratio(bg_lum, cm["lum"])
143
+ lum_diff = abs(cm["lum"] - bg_lum)
144
+ if not is_bg and cr > 1.3 and cm["pct"] < 2 and cm["pct"] > 0.05 and lum_diff > 2:
145
+ border_candidates.append({"hex": cm["hex"], "contrast": cr, "lum_diff": lum_diff})
146
+ if border_candidates:
147
+ border_candidates.sort(key=lambda x: x["lum_diff"], reverse=True)
148
+ border_color = border_candidates[0]["hex"]
149
+
150
+ has_gradient = False
151
+ gradient_type = "none"
152
+ gradient_colors = [bg_color]
153
+ grad_threshold = 30 if is_photo else 50
154
+
155
+ def _strip_lum(y_start, y_end, step=2):
156
+ tl = cnt = 0
157
+ for yy in range(y_start, min(y_end, h), step):
158
+ for xx in range(0, w, 20):
159
+ px = img.getpixel((xx, yy))
160
+ tl += luminance(px[0], px[1], px[2])
161
+ cnt += 1
162
+ return tl / cnt if cnt else 0
163
+
164
+ top_lum = _strip_lum(0, min(50, h))
165
+ mid_lum = _strip_lum(max(0, h // 2 - 25), min(h, h // 2 + 25))
166
+ bot_lum = _strip_lum(max(0, h - 50), h)
167
+ grad_range = max(abs(top_lum - bot_lum), abs(top_lum - mid_lum))
168
+
169
+ if grad_range > grad_threshold:
170
+ has_gradient = True
171
+ gradient_type = "vertical-3tone" if (abs(top_lum - mid_lum) > 15 and abs(mid_lum - bot_lum) > 15) else "vertical"
172
+
173
+ def _strip_color(y_pos):
174
+ r_sum = g_sum = b_sum = cnt = 0
175
+ for xx in range(w // 3, w * 2 // 3, 10):
176
+ px = img.getpixel((xx, y_pos))
177
+ r_sum += px[0]; g_sum += px[1]; b_sum += px[2]; cnt += 1
178
+ return (r_sum // cnt, g_sum // cnt, b_sum // cnt) if cnt else None
179
+
180
+ gradient_colors = []
181
+ for yp in (5, h // 2, h - 5):
182
+ c = _strip_color(yp)
183
+ if c:
184
+ gradient_colors.append(rgb_to_hex(*c))
185
+
186
+ img.close()
187
+
188
+ tr, tg, tb = hex_to_rgb(text_primary)
189
+ text_lum = luminance(tr, tg, tb)
190
+ br, bg, bb = hex_to_rgb(bg_color)
191
+ bg_lum_calc = luminance(br, bg, bb)
192
+ contrast_ratio_val = round(contrast_ratio(text_lum, bg_lum_calc), 1)
193
+
194
+ hues = []
195
+ for cm in color_meta:
196
+ if cm["pct"] <= 0.5:
197
+ continue
198
+ r, g, b = cm["r"], cm["g"], cm["b"]
199
+ mx = max(r, g, b)
200
+ mn = min(r, g, b)
201
+ if mx == mn:
202
+ continue
203
+ d = mx - mn
204
+ if mx == r:
205
+ hv = ((g - b) / d) % 6
206
+ elif mx == g:
207
+ hv = (b - r) / d + 2
208
+ else:
209
+ hv = (r - g) / d + 4
210
+ hd = round(hv * 60)
211
+ if hd < 0:
212
+ hd += 360
213
+ hues.append(hd)
214
+
215
+ hue_range = 0
216
+ if len(hues) > 1:
217
+ sh = sorted(hues)
218
+ mg = max(sh[i + 1] - sh[i] for i in range(len(sh) - 1))
219
+ wg = 360 - sh[-1] + sh[0]
220
+ if wg > mg:
221
+ mg = wg
222
+ hue_range = 360 - mg
223
+
224
+ if hue_range <= 30:
225
+ harmony = "monochromatic"
226
+ elif hue_range <= 60:
227
+ harmony = "analogous"
228
+ elif 150 <= hue_range <= 210:
229
+ harmony = "complementary"
230
+ else:
231
+ harmony = "neutral"
232
+
233
+ surface_colors = [s["hex"] for s in surfaces[:3]] or [bg_color]
234
+
235
+ return {
236
+ "imageWidth": w,
237
+ "imageHeight": h,
238
+ "isPhoto": is_photo,
239
+ "distinctColors": distinct_color_count,
240
+ "totalColors": len(palette),
241
+ "samples": total_samples,
242
+ "background": bg_color,
243
+ "surfaces": surface_colors,
244
+ "text": text_primary,
245
+ "textSecondary": text_secondary,
246
+ "button": button_color,
247
+ "border": border_color,
248
+ "contrastRatio": contrast_ratio_val,
249
+ "harmony": harmony,
250
+ "gradient": {"type": gradient_type, "colors": gradient_colors} if has_gradient else None,
251
+ "palette": palette[:20],
252
+ }
@@ -0,0 +1,150 @@
1
+ """Layout detection: horizontal sections, vertical columns, component labeling."""
2
+
3
+ from PIL import Image
4
+
5
+
6
+ def _dominant_color(img, x1, y1, x2, y2, step=8):
7
+ counts = {}
8
+ for y in range(y1, y2, step):
9
+ for x in range(x1, x2, step):
10
+ px = img.getpixel((x, y))
11
+ rq = round(px[0] / 20) * 20
12
+ gq = round(px[1] / 20) * 20
13
+ bq = round(px[2] / 20) * 20
14
+ rq = max(0, min(255, rq))
15
+ gq = max(0, min(255, gq))
16
+ bq = max(0, min(255, bq))
17
+ hex_c = f"#{rq:02X}{gq:02X}{bq:02X}"
18
+ counts[hex_c] = counts.get(hex_c, 0) + 1
19
+ if not counts:
20
+ return "#000000"
21
+ return max(counts, key=counts.get)
22
+
23
+
24
+ def detect_layout(image_path):
25
+ img = Image.open(image_path).convert("RGB")
26
+ w, h = img.size
27
+
28
+ coarse_colors = set()
29
+ lum_vals = []
30
+ for y in range(0, h, max(1, h // 30)):
31
+ for x in range(0, w, max(1, w // 30)):
32
+ px = img.getpixel((x, y))
33
+ hex_c = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
34
+ coarse_colors.add(hex_c)
35
+ lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
36
+
37
+ edge_count = total_pairs = 0
38
+ for i in range(0, len(lum_vals) - 1, 2):
39
+ if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
40
+ edge_count += 1
41
+ total_pairs += 1
42
+ edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
43
+
44
+ sorted_lums = sorted(lum_vals)
45
+ iqr = 0
46
+ if len(sorted_lums) >= 4:
47
+ q1 = sorted_lums[len(sorted_lums) // 4]
48
+ q3 = sorted_lums[len(sorted_lums) * 3 // 4]
49
+ iqr = q3 - q1
50
+ lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
51
+
52
+ is_photo = (
53
+ len(coarse_colors) > 50
54
+ or (len(coarse_colors) >= 15 and iqr < 80)
55
+ or (lum_range > 150 and edge_ratio < 0.3)
56
+ )
57
+
58
+ scan_resolution = max(8, h // 60) if is_photo else 4
59
+
60
+ sections = []
61
+ prev_color = ""
62
+ section_start = 0
63
+
64
+ for y in range(0, h, scan_resolution):
65
+ end_y = min(h, y + scan_resolution)
66
+ row_color = _dominant_color(img, 0, y, w, end_y, 8)
67
+ if row_color != prev_color and prev_color != "":
68
+ sections.append({"y": section_start, "h": y - section_start, "color": prev_color})
69
+ section_start = y
70
+ prev_color = row_color
71
+
72
+ if h - section_start > 2:
73
+ sections.append({"y": section_start, "h": h - section_start, "color": prev_color})
74
+
75
+ columns = []
76
+ if not is_photo:
77
+ min_col_w = int(w * 0.08)
78
+ x_step = max(1, w // 80)
79
+ prev_col_color = ""
80
+ col_start = 0
81
+ for x in range(0, w, x_step):
82
+ end_x = min(w, x + x_step)
83
+ col_color = _dominant_color(img, x, 0, end_x, h, 10)
84
+ if col_color != prev_col_color and prev_col_color != "":
85
+ col_w = x - col_start
86
+ if col_w >= min_col_w:
87
+ columns.append({"x": col_start, "w": col_w, "color": prev_col_color})
88
+ col_start = x
89
+ prev_col_color = col_color
90
+ if w - col_start > min_col_w:
91
+ columns.append({"x": col_start, "w": w - col_start, "color": prev_col_color})
92
+
93
+ min_height = max(20, int(h * 0.03)) if is_photo else max(8, int(h * 0.02))
94
+ merged_sections = []
95
+ buffer = None
96
+ for s in sections:
97
+ if s["h"] < min_height:
98
+ if buffer is not None:
99
+ buffer["h"] += s["h"]
100
+ else:
101
+ buffer = dict(s)
102
+ else:
103
+ if buffer is not None:
104
+ s["y"] = buffer["y"]
105
+ s["h"] += buffer["h"]
106
+ buffer = None
107
+ merged_sections.append(s)
108
+ if buffer is not None:
109
+ merged_sections.append(buffer)
110
+
111
+ components = []
112
+ for s in merged_sections:
113
+ rel_y = round(s["y"] / h * 100)
114
+ rel_h = round(s["h"] / h * 100)
115
+
116
+ if rel_y < 3:
117
+ label = "hero-padding" if rel_h > 30 else "top-segment"
118
+ elif rel_y + rel_h > 97:
119
+ label = "bottom-segment"
120
+ elif rel_h > 50:
121
+ label = "large-segment"
122
+ elif rel_h < 5:
123
+ label = "thin-band"
124
+ else:
125
+ label = "mid-segment"
126
+
127
+ components.append(
128
+ {
129
+ "type": label,
130
+ "y_pct": rel_y,
131
+ "h_pct": rel_h,
132
+ "y_px": s["y"],
133
+ "h_px": s["h"],
134
+ "color": s["color"],
135
+ }
136
+ )
137
+
138
+ img.close()
139
+
140
+ layout_type = "mobile" if w <= 430 else ("landscape/desktop" if w > h else "tablet/mobile")
141
+
142
+ return {
143
+ "imageWidth": w,
144
+ "imageHeight": h,
145
+ "isPhoto": is_photo,
146
+ "layoutType": layout_type,
147
+ "sections": merged_sections,
148
+ "columns": columns,
149
+ "components": components,
150
+ }
@@ -0,0 +1,448 @@
1
+ """OCR module: Tesseract-based text extraction with preprocessing, footer/branding scans, Thai merging."""
2
+
3
+ import os
4
+ import re
5
+ import sys
6
+ import subprocess
7
+ import tempfile
8
+ from PIL import Image, ImageFilter, ImageOps
9
+ import pytesseract
10
+
11
+ from .utils import merge_thai_text
12
+
13
+ # Auto-detect tesseract binary
14
+ _TESS_CMD = None
15
+ for _candidate in [
16
+ "tesseract",
17
+ r"C:\Program Files\Tesseract-OCR\tesseract.exe",
18
+ r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
19
+ "/usr/bin/tesseract",
20
+ "/usr/local/bin/tesseract",
21
+ "/opt/homebrew/bin/tesseract",
22
+ ]:
23
+ try:
24
+ subprocess.run([_candidate, "--version"], capture_output=True, timeout=5)
25
+ _TESS_CMD = _candidate
26
+ break
27
+ except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
28
+ continue
29
+
30
+ if _TESS_CMD:
31
+ pytesseract.pytesseract.tesseract_cmd = _TESS_CMD
32
+
33
+ # Auto-configure tessdata with language download
34
+ _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
35
+ _USER_TESSDATA = os.path.join(_SCRIPT_DIR, "..", "tessdata")
36
+ os.makedirs(_USER_TESSDATA, exist_ok=True)
37
+ os.environ["TESSDATA_PREFIX"] = _USER_TESSDATA
38
+
39
+
40
+ def _ensure_lang_data(language):
41
+ """Download missing traineddata files from GitHub."""
42
+ for lang in language.split("+"):
43
+ lang_file = os.path.join(_USER_TESSDATA, f"{lang}.traineddata")
44
+ if not os.path.exists(lang_file):
45
+ import urllib.request
46
+ url = f"https://github.com/tesseract-ocr/tessdata/raw/main/{lang}.traineddata"
47
+ print(f"Downloading {lang} language data...", file=sys.stderr)
48
+ try:
49
+ urllib.request.urlretrieve(url, lang_file)
50
+ except Exception as e:
51
+ print(f"Warning: failed to download {lang}: {e}", file=sys.stderr)
52
+
53
+
54
+ def _histogram_stretch(img):
55
+ """Apply histogram stretch to enhance contrast."""
56
+ gray = img.convert("L")
57
+ pixels = list(gray.getdata())
58
+ min_l = min(pixels)
59
+ max_l = max(pixels)
60
+ rng = max(1, max_l - min_l)
61
+ result = Image.new("L", img.size)
62
+ result.putdata([max(0, min(255, int((p - min_l) / rng * 255))) for p in pixels])
63
+ return result
64
+
65
+
66
+ def _adaptive_threshold(img):
67
+ """Apply threshold to create high-contrast BW."""
68
+ gray = img.convert("L")
69
+ pixels = list(gray.getdata())
70
+ new_pixels = []
71
+ for p in pixels:
72
+ if p < 100:
73
+ np_val = 0
74
+ elif p > 160:
75
+ np_val = 255
76
+ else:
77
+ np_val = max(0, min(255, (p - 80) * 3))
78
+ new_pixels.append(np_val)
79
+ result = Image.new("L", img.size)
80
+ result.putdata(new_pixels)
81
+ return result
82
+
83
+
84
+ def _classify_image(img):
85
+ """Returns (is_photo, lum_vals) for photo vs UI classification."""
86
+ w, h = img.size
87
+ color_sample = set()
88
+ lum_vals = []
89
+ for y in range(0, h, max(1, h // 50)):
90
+ for x in range(0, w, max(1, w // 50)):
91
+ px = img.getpixel((x, y))
92
+ hex_c = f"#{px[0] & 0xF0:02X}{px[1] & 0xF0:02X}{px[2] & 0xF0:02X}"
93
+ color_sample.add(hex_c)
94
+ lum_vals.append(int(0.299 * px[0] + 0.587 * px[1] + 0.114 * px[2]))
95
+
96
+ edge_count = total_pairs = 0
97
+ for i in range(0, len(lum_vals) - 1, 2):
98
+ if abs(lum_vals[i] - lum_vals[i + 1]) > 40:
99
+ edge_count += 1
100
+ total_pairs += 1
101
+ edge_ratio = edge_count / total_pairs if total_pairs > 0 else 0
102
+
103
+ sorted_lums = sorted(lum_vals)
104
+ iqr = 0
105
+ if len(sorted_lums) >= 4:
106
+ q1 = sorted_lums[len(sorted_lums) // 4]
107
+ q3 = sorted_lums[len(sorted_lums) * 3 // 4]
108
+ iqr = q3 - q1
109
+ lum_range = sorted_lums[-1] - sorted_lums[0] if len(sorted_lums) >= 2 else 0
110
+
111
+ is_photo = (
112
+ len(color_sample) > 50
113
+ or (len(color_sample) >= 15 and iqr < 80)
114
+ or (lum_range > 150 and edge_ratio < 0.3)
115
+ )
116
+ return is_photo, lum_vals, len(color_sample)
117
+
118
+
119
+ def _tsv_to_boxes(tsv_text, min_confidence, h, w, y_offset=0):
120
+ """Parse Tesseract TSV output into structured box list."""
121
+ boxes = []
122
+ lines = [l.strip() for l in tsv_text.split("\n") if l.strip()]
123
+ if len(lines) < 2:
124
+ return boxes
125
+
126
+ header = lines[0].split("\t")
127
+ col_map = {name: idx for idx, name in enumerate(header)}
128
+
129
+ for line in lines[1:]:
130
+ cols = line.split("\t")
131
+ if len(cols) < 12:
132
+ continue
133
+
134
+ text = cols[col_map.get("text", -1)] if "text" in col_map else ""
135
+ conf_str = cols[col_map.get("conf", -1)] if "conf" in col_map else ""
136
+ conf = 0.0
137
+ try:
138
+ conf = float(conf_str)
139
+ except (ValueError, IndexError):
140
+ pass
141
+
142
+ if not text.strip() or conf < min_confidence:
143
+ continue
144
+
145
+ def _safe_int(idx_key, default=0):
146
+ try:
147
+ return int(cols[col_map[idx_key]])
148
+ except (ValueError, IndexError, KeyError):
149
+ return default
150
+
151
+ bw = _safe_int("width")
152
+ bh = _safe_int("height")
153
+ if bw < 8 and bh < 8:
154
+ continue
155
+
156
+ bx = _safe_int("left")
157
+ by = _safe_int("top") + y_offset
158
+
159
+ boxes.append(
160
+ {
161
+ "text": text.strip(),
162
+ "conf": round(conf, 1),
163
+ "x": bx,
164
+ "y": by,
165
+ "w": bw,
166
+ "h": bh,
167
+ "zone": "top" if by < h / 3 else ("middle" if by < h * 2 / 3 else "bottom"),
168
+ }
169
+ )
170
+ return boxes
171
+
172
+
173
+ def _dedup_boxes(boxes, new_boxes, img_h):
174
+ """Deduplicate boxes: skip dups/substrings, extend longer versions."""
175
+ for nb in new_boxes:
176
+ word = nb["text"]
177
+ x, y = nb["x"], nb["y"]
178
+
179
+ dup = any(
180
+ b["text"] == word and abs(b["x"] - x) < 40 and abs(b["y"] - y) < 40 for b in boxes
181
+ )
182
+ subdup = False
183
+ if len(word) >= 3:
184
+ subdup = any(
185
+ word in b["text"] and abs(b["y"] - y) < 30 for b in boxes
186
+ )
187
+ extend = [
188
+ b
189
+ for b in boxes
190
+ if word.startswith(b["text"])
191
+ and abs(b["y"] - y) < 30
192
+ and len(word) > len(b["text"])
193
+ ]
194
+
195
+ if extend:
196
+ for b in boxes:
197
+ if b in extend:
198
+ b["text"] = word
199
+
200
+ if not dup and not subdup and not extend:
201
+ nb["zone"] = "top" if y < img_h / 3 else ("middle" if y < img_h * 2 / 3 else "bottom")
202
+ boxes.append(nb)
203
+ return boxes
204
+
205
+
206
+ def extract_text(image_path, language="tha+eng", min_confidence=70):
207
+ """Extract text from image using Tesseract OCR with preprocessing."""
208
+ _ensure_lang_data(language)
209
+ img = Image.open(image_path).convert("RGB")
210
+ w, h = img.size
211
+
212
+ is_photo, _, _ = _classify_image(img)
213
+
214
+ orig_path = image_path
215
+ preprocessed_paths = [orig_path]
216
+
217
+ if is_photo:
218
+ pp1 = _histogram_stretch(img)
219
+ pp1_path = os.path.join(tempfile.gettempdir(), f"img2code_pp1_{os.urandom(4).hex()}.png")
220
+ pp1.save(pp1_path)
221
+ pp1.close()
222
+ preprocessed_paths.append(pp1_path)
223
+
224
+ pp2_img = _adaptive_threshold(img)
225
+ pp2_path = os.path.join(tempfile.gettempdir(), f"img2code_pp2_{os.urandom(4).hex()}.png")
226
+ pp2_img.save(pp2_path)
227
+ pp2_img.close()
228
+ preprocessed_paths.append(pp2_path)
229
+
230
+ all_boxes = []
231
+ psm_modes = [3, 6, 4, 11] if is_photo else [3, 11, 6, 4]
232
+
233
+ for pp_path in preprocessed_paths:
234
+ for psm in psm_modes:
235
+ try:
236
+ tsv = pytesseract.image_to_data(
237
+ Image.open(pp_path),
238
+ lang=language,
239
+ config=f"--psm {psm}",
240
+ output_type=pytesseract.Output.DICT,
241
+ )
242
+ except Exception:
243
+ continue
244
+
245
+ num_items = len(tsv.get("text", []))
246
+ for i in range(num_items):
247
+ text = tsv["text"][i] if i < len(tsv["text"]) else ""
248
+ try:
249
+ conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
250
+ except (ValueError, TypeError):
251
+ conf = -1
252
+
253
+ if not text or text.strip() == "" or conf < min_confidence:
254
+ continue
255
+
256
+ bw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
257
+ bh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
258
+ if bw < 8 and bh < 8:
259
+ continue
260
+
261
+ bx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
262
+ by = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
263
+ word = text.strip()
264
+
265
+ nb = {"text": word, "conf": round(conf, 1), "x": bx, "y": by, "w": bw, "h": bh}
266
+ all_boxes = _dedup_boxes(all_boxes, [nb], h)
267
+
268
+ # Footer scan: crop bottom 40px for copyright
269
+ if h > 40:
270
+ footer_crop = img.crop((0, h - 40, w, h))
271
+ footer_stretch = _histogram_stretch(footer_crop)
272
+ footer_paths = [footer_crop, footer_stretch]
273
+
274
+ for fc in footer_paths:
275
+ for psm_f in (11, 6):
276
+ try:
277
+ tsv = pytesseract.image_to_data(
278
+ fc,
279
+ lang=language,
280
+ config=f"--psm {psm_f}",
281
+ output_type=pytesseract.Output.DICT,
282
+ )
283
+ except Exception:
284
+ continue
285
+
286
+ num_items = len(tsv.get("text", []))
287
+ for i in range(num_items):
288
+ text = tsv["text"][i] if i < len(tsv["text"]) else ""
289
+ try:
290
+ conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
291
+ except (ValueError, TypeError):
292
+ conf = -1
293
+
294
+ if not text or text.strip() == "" or conf < min_confidence:
295
+ continue
296
+
297
+ fw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
298
+ fh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
299
+ if fw < 8 and fh < 8 or fh > 50:
300
+ continue
301
+ fx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
302
+ if fx > w * 0.92:
303
+ continue
304
+ fy = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
305
+ fy += h - 40
306
+
307
+ nb = {"text": text.strip(), "conf": round(conf, 1), "x": fx, "y": fy, "w": fw, "h": fh, "src": "footer", "psm": psm_f}
308
+ all_boxes = _dedup_boxes(all_boxes, [nb], h)
309
+
310
+ # Branding scan: crop bottom 70px for "MADE BY" text
311
+ if h > 70:
312
+ mb_crop = img.crop((0, h - 70, w, h))
313
+ for psm_mb in (8, 7, 13):
314
+ try:
315
+ tsv = pytesseract.image_to_data(
316
+ mb_crop,
317
+ lang=language,
318
+ config=f"--psm {psm_mb}",
319
+ output_type=pytesseract.Output.DICT,
320
+ )
321
+ except Exception:
322
+ continue
323
+
324
+ num_items = len(tsv.get("text", []))
325
+ for i in range(num_items):
326
+ text = tsv["text"][i] if i < len(tsv["text"]) else ""
327
+ try:
328
+ conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
329
+ except (ValueError, TypeError):
330
+ conf = -1
331
+
332
+ if not text or text.strip() == "" or conf < min_confidence:
333
+ continue
334
+
335
+ mw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
336
+ mh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
337
+ if mw < 8 and mh < 8 or mh > 50:
338
+ continue
339
+ mx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
340
+ if mx > w * 0.92:
341
+ continue
342
+ my = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
343
+ my += h - 70
344
+
345
+ nb = {"text": text.strip(), "conf": round(conf, 1), "x": mx, "y": my, "w": mw, "h": mh, "src": "branding", "psm": psm_mb}
346
+ all_boxes = _dedup_boxes(all_boxes, [nb], h)
347
+
348
+ # Retry with preprocessing if word count is low
349
+ if len(all_boxes) < 5:
350
+ stretch_full = _histogram_stretch(img)
351
+ stretch_path = os.path.join(tempfile.gettempdir(), f"img2code_retry_{os.urandom(4).hex()}.png")
352
+ stretch_full.save(stretch_path)
353
+ stretch_full.close()
354
+
355
+ for psm_r in (3, 6, 11):
356
+ try:
357
+ tsv = pytesseract.image_to_data(
358
+ Image.open(stretch_path),
359
+ lang=language,
360
+ config=f"--psm {psm_r}",
361
+ output_type=pytesseract.Output.DICT,
362
+ )
363
+ except Exception:
364
+ continue
365
+
366
+ num_items = len(tsv.get("text", []))
367
+ for i in range(num_items):
368
+ text = tsv["text"][i] if i < len(tsv["text"]) else ""
369
+ try:
370
+ conf = float(tsv["conf"][i]) if i < len(tsv["conf"]) else -1
371
+ except (ValueError, TypeError):
372
+ conf = -1
373
+
374
+ if not text or text.strip() == "" or conf < min_confidence:
375
+ continue
376
+
377
+ rx = int(tsv["left"][i]) if i < len(tsv["left"]) else 0
378
+ ry = int(tsv["top"][i]) if i < len(tsv["top"]) else 0
379
+ rw = int(tsv["width"][i]) if i < len(tsv["width"]) else 0
380
+ rh = int(tsv["height"][i]) if i < len(tsv["height"]) else 0
381
+ nb = {"text": text.strip(), "conf": round(conf, 1), "x": rx, "y": ry, "w": rw, "h": rh}
382
+ all_boxes = _dedup_boxes(all_boxes, [nb], h)
383
+
384
+ try:
385
+ os.remove(stretch_path)
386
+ except OSError:
387
+ pass
388
+
389
+ # Clean up preprocessed temp files
390
+ for pp in preprocessed_paths[1:]:
391
+ try:
392
+ os.remove(pp)
393
+ except OSError:
394
+ pass
395
+
396
+ # Sort boxes by zone, then y, then x
397
+ zone_order = {"top": 1, "middle": 2, "bottom": 3}
398
+ all_boxes.sort(key=lambda b: (zone_order.get(b.get("zone", "middle"), 2), b["y"], b["x"]))
399
+
400
+ # Plain-text pass for full text
401
+ raw_text = ""
402
+ raw_candidates = {}
403
+ try:
404
+ raw_candidates["orig"] = pytesseract.image_to_string(img, lang=language, config="--psm 6").strip()
405
+ except Exception:
406
+ pass
407
+
408
+ if is_photo or "tha" in language:
409
+ pp_raw = _histogram_stretch(img)
410
+ pp_raw_path = os.path.join(tempfile.gettempdir(), f"img2code_raw_{os.urandom(4).hex()}.png")
411
+ pp_raw.save(pp_raw_path)
412
+ pp_raw.close()
413
+ try:
414
+ raw_candidates["pp"] = pytesseract.image_to_string(
415
+ Image.open(pp_raw_path), lang=language, config="--psm 6"
416
+ ).strip()
417
+ except Exception:
418
+ pass
419
+ try:
420
+ os.remove(pp_raw_path)
421
+ except OSError:
422
+ pass
423
+
424
+ if raw_candidates:
425
+ raw_text = max(raw_candidates.values(), key=len)
426
+
427
+ raw_text_output = merge_thai_text(raw_text) if raw_text else ""
428
+
429
+ img.close()
430
+
431
+ # Build byZone
432
+ def _zone_text(zone_name):
433
+ return merge_thai_text(
434
+ " ".join(b["text"] for b in all_boxes if b.get("zone") == zone_name)
435
+ )
436
+
437
+ by_zone = {
438
+ "top": _zone_text("top"),
439
+ "middle": _zone_text("middle"),
440
+ "bottom": _zone_text("bottom"),
441
+ }
442
+
443
+ return {
444
+ "words": len(all_boxes),
445
+ "boxes": all_boxes,
446
+ "rawText": raw_text_output,
447
+ "byZone": by_zone,
448
+ }
@@ -0,0 +1,39 @@
1
+ """Shared utilities: hex/rgb conversion, luminance, contrast, Thai merging."""
2
+
3
+ import re
4
+
5
+
6
+ def hex_to_rgb(hex_str):
7
+ r = int(hex_str[1:3], 16)
8
+ g = int(hex_str[3:5], 16)
9
+ b = int(hex_str[5:7], 16)
10
+ return r, g, b
11
+
12
+
13
+ def rgb_to_hex(r, g, b):
14
+ return f"#{r:02X}{g:02X}{b:02X}"
15
+
16
+
17
+ def luminance(r, g, b):
18
+ return 0.299 * r + 0.587 * g + 0.114 * b
19
+
20
+
21
+ def contrast_ratio(lum1, lum2):
22
+ l1 = max(lum1, lum2) + 0.05
23
+ l2 = min(lum1, lum2) + 0.05
24
+ return l1 / l2
25
+
26
+
27
+ def saturation(r, g, b):
28
+ max_c = max(r, g, b)
29
+ min_c = min(r, g, b)
30
+ if max_c == 0:
31
+ return 0
32
+ return (max_c - min_c) / max_c * 100
33
+
34
+
35
+ def merge_thai_text(text):
36
+ """Merge Thai grapheme clusters split by Tesseract into correct words."""
37
+ if not text:
38
+ return text
39
+ return re.sub(r"(?<=[\u0E00-\u0E7F])\s+(?=[\u0E00-\u0E7F])", "", text)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "image-to-code",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Extract structured data (colors, layout, OCR text) from images. No AI vision required.",
5
5
  "bin": {
6
6
  "image-to-code": "bin/cli.js"
@@ -24,7 +24,9 @@
24
24
  "homepage": "https://github.com/phumitchreal/image-to-code#readme",
25
25
  "files": [
26
26
  "bin/",
27
+ "image_to_code/",
27
28
  "package.json",
28
- "README.md"
29
+ "README.md",
30
+ "requirements.txt"
29
31
  ]
30
32
  }
@@ -0,0 +1,2 @@
1
+ Pillow>=10.0.0
2
+ pytesseract>=0.3.10