openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1078 @@
1
+ """
2
+ FormROIDetector - Enhanced Form Field Detection Library
3
+ =======================================================
4
+ Detects: text fields, checkboxes, radio buttons, date-range boxes,
5
+ tables, dropdowns, signature areas.
6
+
7
+ Output ROI format:
8
+ roi = [
9
+ [(x1, y1), (x2, y2), "field_type", "label"],
10
+ ...
11
+ ]
12
+
13
+ Field types
14
+ -----------
15
+ "text" – single-line text input
16
+ "textarea" – multi-line text area
17
+ "checkbox" – square tick box
18
+ "radio" – circular option button
19
+ "date" – date or date-range field
20
+ "table" – data table region
21
+ "dropdown" – select / combo box
22
+ "signature" – signature / initials box
23
+ """
24
+
25
+ import re
26
+ from dataclasses import dataclass
27
+
28
+ import cv2
29
+ import numpy as np
30
+
31
+ try:
32
+ import pytesseract
33
+
34
+ TESSERACT_AVAILABLE = True
35
+ except ImportError:
36
+ TESSERACT_AVAILABLE = False
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Internal data structures
41
+ # ---------------------------------------------------------------------------
42
+
43
+
44
+ @dataclass
45
+ class ROIRegion:
46
+ x1: int
47
+ y1: int
48
+ x2: int
49
+ y2: int
50
+ field_type: str # "text" | "textarea" | "checkbox" | "radio" |
51
+ # "date" | "table" | "dropdown" | "signature"
52
+ label: str = ""
53
+ checked: bool | None = None # checkbox / radio only
54
+ confidence: float = 1.0
55
+
56
+ # ------------------------------------------------------------------ #
57
+ def to_tuple(self) -> list:
58
+ """Return the canonical output format requested by the user."""
59
+ return [(self.x1, self.y1), (self.x2, self.y2), self.field_type, self.label]
60
+
61
+ @property
62
+ def bbox(self):
63
+ return (self.x1, self.y1, self.x2, self.y2)
64
+
65
+ @property
66
+ def width(self):
67
+ return self.x2 - self.x1
68
+
69
+ @property
70
+ def height(self):
71
+ return self.y2 - self.y1
72
+
73
+ @property
74
+ def area(self):
75
+ return self.width * self.height
76
+
77
+ @property
78
+ def aspect_ratio(self):
79
+ return self.width / max(self.height, 1)
80
+
81
+ @property
82
+ def center(self):
83
+ return ((self.x1 + self.x2) // 2, (self.y1 + self.y2) // 2)
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Helper – date-like pattern matcher
88
+ # ---------------------------------------------------------------------------
89
+ _DATE_PATTERNS = re.compile(
90
+ r"(date|dob|d\.o\.b|birth|expir|valid|from|to|period|dd[/\-_]mm|"
91
+ r"mm[/\-_]yy|yyyy|day|month|year)",
92
+ re.IGNORECASE,
93
+ )
94
+
95
+ _DROPDOWN_PATTERNS = re.compile(
96
+ r"(select|choose|pick|▼|v\b|\bv\b)",
97
+ re.IGNORECASE,
98
+ )
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Core detector
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ class FormROIDetector:
107
+ """
108
+ Detect form fields in document / form images and return ROIs in the
109
+ standardised list-of-tuples format.
110
+
111
+ Parameters
112
+ ----------
113
+ min_area : minimum contour area to consider (pixels²)
114
+ enable_ocr : whether to use pytesseract for label extraction
115
+ morph_kernel : morphological kernel size used for contour cleanup
116
+ row_tolerance : pixel tolerance for grouping ROIs into the same row
117
+ circle_dp : HoughCircles dp parameter (radio-button detection)
118
+ debug : draw intermediate steps (returned in result dict)
119
+ """
120
+
121
+ # ------------------------------------------------------------------
122
+ # Construction
123
+ # ------------------------------------------------------------------
124
+ def __init__(
125
+ self,
126
+ min_area: int = 400,
127
+ enable_ocr: bool = True,
128
+ morph_kernel: tuple[int, int] = (3, 3),
129
+ row_tolerance: int = 18,
130
+ circle_dp: float = 1.2,
131
+ debug: bool = False,
132
+ ):
133
+ self.min_area = min_area
134
+ self.enable_ocr = enable_ocr and TESSERACT_AVAILABLE
135
+ self.morph_kernel = morph_kernel
136
+ self.row_tolerance = row_tolerance
137
+ self.circle_dp = circle_dp
138
+ self.debug = debug
139
+
140
+ # ==================================================================
141
+ # PUBLIC API
142
+ # ==================================================================
143
+
144
+ def process(self, image: np.ndarray) -> dict:
145
+ """
146
+ Full detection pipeline.
147
+
148
+ Returns
149
+ -------
150
+ {
151
+ "roi" : [[(x1,y1),(x2,y2), type, label], ...], ← canonical
152
+ "regions" : [ROIRegion, ...], ← rich objects
153
+ "rows" : [[ROIRegion, ...], ...],
154
+ "key_values" : [{"key": str, "value_bbox": tuple, "type": str}, ...],
155
+ "debug_image": np.ndarray | None,
156
+ }
157
+ """
158
+ regions: list[ROIRegion] = []
159
+
160
+ # 1. Table detection (before general contour search)
161
+ table_regions = self._detect_tables(image)
162
+ table_masks = self._build_table_mask(image, table_regions)
163
+ regions.extend(table_regions)
164
+
165
+ # 2. Checkbox detection (Hough squares / contour aspect)
166
+ cb_regions = self._detect_checkboxes(image, table_masks)
167
+ regions.extend(cb_regions)
168
+
169
+ # 3. Radio-button detection
170
+ radio_regions = self._detect_radio_buttons(image, table_masks)
171
+ regions.extend(radio_regions)
172
+
173
+ # 4. General text / textarea / date / dropdown / signature fields
174
+ general_regions = self._detect_general_fields(image, table_masks, regions)
175
+ regions.extend(general_regions)
176
+
177
+ # 5. De-duplicate / merge overlapping regions
178
+ regions = self._deduplicate(regions)
179
+
180
+ # 6. OCR labels
181
+ if self.enable_ocr:
182
+ regions = self._assign_labels(image, regions)
183
+
184
+ # 7. Checkbox / radio fill state
185
+ regions = self._detect_fill_state(image, regions)
186
+
187
+ # 8. Row grouping & key-value pairs
188
+ rows = self._group_rows(regions)
189
+ key_values = self._extract_key_values(rows)
190
+
191
+ # 9. Build canonical output
192
+ roi_list = [r.to_tuple() for r in regions]
193
+
194
+ debug_img = None
195
+ if self.debug:
196
+ debug_img = self.visualize(image, regions)
197
+
198
+ return {
199
+ "roi": roi_list,
200
+ "regions": regions,
201
+ "rows": rows,
202
+ "key_values": key_values,
203
+ "debug_image": debug_img,
204
+ }
205
+
206
+ # ------------------------------------------------------------------
207
+ # Convenience wrapper – returns only the ROI list
208
+ # ------------------------------------------------------------------
209
+ def detect(self, image: np.ndarray) -> list:
210
+ """
211
+ Shorthand that returns only the canonical ROI list.
212
+
213
+ roi = detector.detect(img)
214
+ # → [[(x1,y1),(x2,y2), type, label], ...]
215
+ """
216
+ return self.process(image)["roi"]
217
+
218
+ # ==================================================================
219
+ # DETECTION MODULES
220
+ # ==================================================================
221
+
222
+ # ------------------------------------------------------------------
223
+ # 1. Table detection
224
+ # ------------------------------------------------------------------
225
+ def _detect_tables(self, image: np.ndarray) -> list[ROIRegion]:
226
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
227
+ binary = cv2.adaptiveThreshold(
228
+ gray,
229
+ 255,
230
+ cv2.ADAPTIVE_THRESH_MEAN_C,
231
+ cv2.THRESH_BINARY_INV,
232
+ 15,
233
+ 10,
234
+ )
235
+
236
+ # Horizontal lines
237
+ h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
238
+ horizontal = cv2.morphologyEx(binary, cv2.MORPH_OPEN, h_kernel)
239
+
240
+ # Vertical lines
241
+ v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
242
+ vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
243
+
244
+ table_mask = cv2.add(horizontal, vertical)
245
+
246
+ # Dilate to merge nearby lines into table blocks
247
+ dilate_k = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
248
+ dilated = cv2.dilate(table_mask, dilate_k, iterations=3)
249
+
250
+ contours, _ = cv2.findContours(
251
+ dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
252
+ )
253
+
254
+ regions = []
255
+ for cnt in contours:
256
+ area = cv2.contourArea(cnt)
257
+ if area < self.min_area * 4:
258
+ continue
259
+ x, y, w, h = cv2.boundingRect(cnt)
260
+ # Must have multiple lines to be a table
261
+ h_lines = cv2.countNonZero(horizontal[y : y + h, x : x + w])
262
+ v_lines = cv2.countNonZero(vertical[y : y + h, x : x + w])
263
+ if h_lines > 0 and v_lines > 0:
264
+ regions.append(ROIRegion(x, y, x + w, y + h, "table"))
265
+
266
+ return regions
267
+
268
+ def _build_table_mask(
269
+ self, image: np.ndarray, table_regions: list[ROIRegion]
270
+ ) -> np.ndarray:
271
+ mask = np.zeros(image.shape[:2], dtype=np.uint8)
272
+ for r in table_regions:
273
+ cv2.rectangle(mask, (r.x1, r.y1), (r.x2, r.y2), 255, -1)
274
+ return mask
275
+
276
+ # ------------------------------------------------------------------
277
+ # 2. Checkbox detection
278
+ # ------------------------------------------------------------------
279
+ def _detect_checkboxes(
280
+ self, image: np.ndarray, table_mask: np.ndarray
281
+ ) -> list[ROIRegion]:
282
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
283
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
284
+ _, binary = cv2.threshold(
285
+ blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
286
+ )
287
+
288
+ contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
289
+
290
+ regions = []
291
+ for cnt in contours:
292
+ area = cv2.contourArea(cnt)
293
+ if area < 50 or area > 8000:
294
+ continue
295
+
296
+ x, y, w, h = cv2.boundingRect(cnt)
297
+ ar = w / max(h, 1)
298
+
299
+ # Must be roughly square and small
300
+ if not (0.6 <= ar <= 1.6 and 8 <= w <= 80 and 8 <= h <= 80):
301
+ continue
302
+
303
+ # Solidity check – filled squares have high solidity
304
+ hull = cv2.convexHull(cnt)
305
+ hull_area = cv2.contourArea(hull)
306
+ if hull_area == 0:
307
+ continue
308
+ solidity = area / hull_area
309
+ if solidity < 0.65:
310
+ continue
311
+
312
+ # Must NOT be inside a table region already handled
313
+ cx, cy = x + w // 2, y + h // 2
314
+ if table_mask[cy, cx] > 0:
315
+ continue
316
+
317
+ # Approx polygon – checkbox ≈ 4 vertices
318
+ peri = cv2.arcLength(cnt, True)
319
+ approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
320
+ if not (3 <= len(approx) <= 8):
321
+ continue
322
+
323
+ regions.append(ROIRegion(x, y, x + w, y + h, "checkbox"))
324
+
325
+ return regions
326
+
327
+ # ------------------------------------------------------------------
328
+ # 3. Radio button detection
329
+ # ------------------------------------------------------------------
330
+ def _detect_radio_buttons(
331
+ self, image: np.ndarray, table_mask: np.ndarray
332
+ ) -> list[ROIRegion]:
333
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
334
+ blurred = cv2.GaussianBlur(gray, (5, 5), 1)
335
+
336
+ circles = cv2.HoughCircles(
337
+ blurred,
338
+ cv2.HOUGH_GRADIENT,
339
+ dp=self.circle_dp,
340
+ minDist=15,
341
+ param1=60,
342
+ param2=25,
343
+ minRadius=5,
344
+ maxRadius=25,
345
+ )
346
+
347
+ regions = []
348
+ if circles is not None:
349
+ circles = np.uint16(np.around(circles[0]))
350
+ for cx, cy, r in circles:
351
+ if table_mask[cy, cx] > 0:
352
+ continue
353
+ x1, y1 = int(cx - r), int(cy - r)
354
+ x2, y2 = int(cx + r), int(cy + r)
355
+ regions.append(ROIRegion(max(0, x1), max(0, y1), x2, y2, "radio"))
356
+
357
+ return regions
358
+
359
+ # ------------------------------------------------------------------
360
+ # 4. General field detection (text / textarea / date / dropdown / sig)
361
+ # ------------------------------------------------------------------
362
+ def _detect_general_fields(
363
+ self,
364
+ image: np.ndarray,
365
+ table_mask: np.ndarray,
366
+ existing: list[ROIRegion],
367
+ ) -> list[ROIRegion]:
368
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
369
+ binary = cv2.adaptiveThreshold(
370
+ gray,
371
+ 255,
372
+ cv2.ADAPTIVE_THRESH_MEAN_C,
373
+ cv2.THRESH_BINARY_INV,
374
+ 15,
375
+ 8,
376
+ )
377
+
378
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, self.morph_kernel)
379
+ morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
380
+
381
+ contours, _ = cv2.findContours(
382
+ morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
383
+ )
384
+
385
+ existing_bboxes = [(r.x1, r.y1, r.x2, r.y2) for r in existing]
386
+ regions = []
387
+
388
+ for cnt in contours:
389
+ area = cv2.contourArea(cnt)
390
+ if area < self.min_area:
391
+ continue
392
+
393
+ x, y, w, h = cv2.boundingRect(cnt)
394
+ cx, cy = x + w // 2, y + h // 2
395
+ x2, y2 = x + w, y + h
396
+
397
+ # Skip if centre is inside a table
398
+ if table_mask[cy, cx] > 0:
399
+ continue
400
+
401
+ # Skip if heavily overlapping an already-detected region
402
+ if self._overlaps_any(x, y, x2, y2, existing_bboxes, thresh=0.5):
403
+ continue
404
+
405
+ ar = w / max(h, 1)
406
+
407
+ # --- Classify field type by geometry ---
408
+ field_type = self._classify_general(image, x, y, x2, y2, ar, w, h)
409
+
410
+ regions.append(ROIRegion(x, y, x2, y2, field_type))
411
+
412
+ return regions
413
+
414
+ def _classify_general(
415
+ self,
416
+ image: np.ndarray,
417
+ x1: int,
418
+ y1: int,
419
+ x2: int,
420
+ y2: int,
421
+ ar: float,
422
+ w: int,
423
+ h: int,
424
+ ) -> str:
425
+ """Classify a rectangular region into a field type."""
426
+ label_text = ""
427
+ if self.enable_ocr:
428
+ # Peek at OCR content inside the box
429
+ crop = image[y1:y2, x1:x2]
430
+ label_text = self._ocr_text(crop)
431
+
432
+ # Dropdown: wide, short, with a dropdown arrow character
433
+ if ar > 3 and h < 60 and _DROPDOWN_PATTERNS.search(label_text):
434
+ return "dropdown"
435
+
436
+ # Date field: label contains date keywords or has slashes drawn inside
437
+ if _DATE_PATTERNS.search(label_text):
438
+ return "date"
439
+ # Date: look for separator lines inside (dd/mm/yyyy boxes)
440
+ if ar > 1.5 and h < 70 and self._has_internal_dividers(image, x1, y1, x2, y2):
441
+ return "date"
442
+
443
+ # Signature / large blank area: very wide, taller than a text line
444
+ if ar > 4 and h > 60:
445
+ return "signature"
446
+
447
+ # Textarea: roughly square or portrait, large area
448
+ if 0.3 <= ar <= 2.5 and h > 60 and w > 80:
449
+ return "textarea"
450
+
451
+ # Single-line text input: wide and short
452
+ if ar >= 2.5 and h < 70:
453
+ return "text"
454
+
455
+ # Fallback
456
+ return "text"
457
+
458
+ def _has_internal_dividers(
459
+ self,
460
+ image: np.ndarray,
461
+ x1: int,
462
+ y1: int,
463
+ x2: int,
464
+ y2: int,
465
+ ) -> bool:
466
+ """Check whether a box contains internal vertical dividers (date parts)."""
467
+ crop = image[y1:y2, x1:x2]
468
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
469
+ _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
470
+ v_kernel = cv2.getStructuringElement(
471
+ cv2.MORPH_RECT, (1, max(1, crop.shape[0] // 2))
472
+ )
473
+ vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
474
+ return cv2.countNonZero(vertical) > 10
475
+
476
+ # ==================================================================
477
+ # OCR
478
+ # ==================================================================
479
+
480
+ def _ocr_text(self, crop: np.ndarray) -> str:
481
+ if not self.enable_ocr or crop.size == 0:
482
+ return ""
483
+ try:
484
+ text = pytesseract.image_to_string(crop, config="--psm 6 --oem 3")
485
+ return text.strip()
486
+ except Exception:
487
+ return ""
488
+
489
+ # ==================================================================
490
+ # LABEL ASSIGNMENT
491
+ # ==================================================================
492
+
493
+ def _assign_labels(
494
+ self, image: np.ndarray, regions: list[ROIRegion]
495
+ ) -> list[ROIRegion]:
496
+ """
497
+ For each region, look for OCR text immediately to the LEFT or ABOVE
498
+ the bounding box and assign it as the label.
499
+ """
500
+ h_img, w_img = image.shape[:2]
501
+
502
+ for region in regions:
503
+ if region.label:
504
+ continue
505
+
506
+ # Search window: same height as the field, to its left
507
+ search_x1 = max(0, region.x1 - 300)
508
+ search_x2 = region.x1
509
+ search_y1 = max(0, region.y1 - 5)
510
+ search_y2 = min(h_img, region.y2 + 5)
511
+
512
+ left_crop = image[search_y1:search_y2, search_x1:search_x2]
513
+ label = self._ocr_text(left_crop)
514
+
515
+ if not label:
516
+ # Try above
517
+ search_y1b = max(0, region.y1 - 40)
518
+ search_y2b = region.y1
519
+ above_crop = image[search_y1b:search_y2b, region.x1 : region.x2]
520
+ label = self._ocr_text(above_crop)
521
+
522
+ region.label = label.replace("\n", " ").strip()[:80]
523
+
524
+ return regions
525
+
526
+ # ==================================================================
527
+ # FILL STATE (checkbox / radio)
528
+ # ==================================================================
529
+
530
+ def _detect_fill_state(
531
+ self, image: np.ndarray, regions: list[ROIRegion]
532
+ ) -> list[ROIRegion]:
533
+ for region in regions:
534
+ if region.field_type not in ("checkbox", "radio"):
535
+ continue
536
+ x1, y1, x2, y2 = region.x1, region.y1, region.x2, region.y2
537
+ crop = image[y1:y2, x1:x2]
538
+ if crop.size == 0:
539
+ continue
540
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
541
+ _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
542
+ filled_ratio = np.sum(thresh == 255) / thresh.size
543
+ region.checked = filled_ratio > 0.18
544
+
545
+ return regions
546
+
547
+ # ==================================================================
548
+ # DEDUPLICATION
549
+ # ==================================================================
550
+
551
+ def _deduplicate(self, regions: list[ROIRegion]) -> list[ROIRegion]:
552
+ """Remove regions that are nearly identical or heavily overlapping."""
553
+ if not regions:
554
+ return regions
555
+
556
+ # Sort by area descending (keep larger / more specific detections)
557
+ regions = sorted(regions, key=lambda r: r.area, reverse=True)
558
+ kept: list[ROIRegion] = []
559
+
560
+ for candidate in regions:
561
+ dominated = False
562
+ for existing in kept:
563
+ iou = self._iou(candidate, existing)
564
+ if iou > 0.45:
565
+ # Prefer more specific type
566
+ dominated = True
567
+ break
568
+ if not dominated:
569
+ kept.append(candidate)
570
+
571
+ return kept
572
+
573
+ @staticmethod
574
+ def _iou(a: ROIRegion, b: ROIRegion) -> float:
575
+ ix1 = max(a.x1, b.x1)
576
+ iy1 = max(a.y1, b.y1)
577
+ ix2 = min(a.x2, b.x2)
578
+ iy2 = min(a.y2, b.y2)
579
+ inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
580
+ if inter == 0:
581
+ return 0.0
582
+ union = a.area + b.area - inter
583
+ return inter / max(union, 1)
584
+
585
+ @staticmethod
586
+ def _overlaps_any(x1, y1, x2, y2, bboxes, thresh=0.5) -> bool:
587
+ area = max(1, (x2 - x1) * (y2 - y1))
588
+ for bx1, by1, bx2, by2 in bboxes:
589
+ ix1 = max(x1, bx1)
590
+ iy1 = max(y1, by1)
591
+ ix2 = min(x2, bx2)
592
+ iy2 = min(y2, by2)
593
+ inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
594
+ if inter / area >= thresh:
595
+ return True
596
+ return False
597
+
598
+ # ==================================================================
599
+ # ROW GROUPING
600
+ # ==================================================================
601
+
602
+ def _group_rows(self, regions: list[ROIRegion]) -> list[list[ROIRegion]]:
603
+ sorted_regions = sorted(regions, key=lambda r: (r.y1, r.x1))
604
+ rows: list[list[ROIRegion]] = []
605
+ current_row: list[ROIRegion] = []
606
+
607
+ for region in sorted_regions:
608
+ if not current_row:
609
+ current_row.append(region)
610
+ continue
611
+ prev_y = current_row[-1].y1
612
+ if abs(region.y1 - prev_y) < self.row_tolerance:
613
+ current_row.append(region)
614
+ else:
615
+ rows.append(sorted(current_row, key=lambda r: r.x1))
616
+ current_row = [region]
617
+
618
+ if current_row:
619
+ rows.append(sorted(current_row, key=lambda r: r.x1))
620
+
621
+ return rows
622
+
623
+ # ==================================================================
624
+ # KEY-VALUE EXTRACTION
625
+ # ==================================================================
626
+
627
+ def _extract_key_values(self, rows: list[list[ROIRegion]]) -> list[dict]:
628
+ key_values = []
629
+ for row in rows:
630
+ text_fields = [
631
+ r
632
+ for r in row
633
+ if r.field_type in ("text", "textarea", "date", "dropdown", "signature")
634
+ ]
635
+ input_fields = [r for r in row if r.field_type in ("checkbox", "radio")]
636
+
637
+ for tf in text_fields:
638
+ # Nearest input to the right
639
+ candidates = [b for b in input_fields if b.x1 > tf.x2]
640
+ if candidates:
641
+ nearest = min(candidates, key=lambda b: b.x1 - tf.x2)
642
+ key_values.append(
643
+ {
644
+ "key": tf.label or "?",
645
+ "value_bbox": nearest.bbox,
646
+ "type": nearest.field_type,
647
+ }
648
+ )
649
+
650
+ return key_values
651
+
652
+ # ==================================================================
653
+ # VISUALIZATION
654
+ # ==================================================================
655
+
656
+ # Color palette per field type
657
+ _TYPE_COLORS = {
658
+ "text": (34, 197, 94), # green
659
+ "textarea": (16, 185, 129), # teal
660
+ "checkbox": (59, 130, 246), # blue (unchecked)
661
+ "radio": (168, 85, 247), # purple
662
+ "date": (249, 115, 22), # orange
663
+ "table": (234, 179, 8), # yellow
664
+ "dropdown": (236, 72, 153), # pink
665
+ "signature": (239, 68, 68), # red
666
+ }
667
+ _CHECKED_COLOR = (22, 163, 74) # dark green when checked
668
+ _UNCHECKED_COLOR = (59, 130, 246) # blue when unchecked
669
+
670
+ def visualize(
671
+ self,
672
+ image: np.ndarray,
673
+ regions: list[ROIRegion] | None = None,
674
+ result: dict | None = None,
675
+ show_labels: bool = True,
676
+ show_type_legend: bool = True,
677
+ ) -> np.ndarray:
678
+ """
679
+ Draw all detected regions on a copy of *image* and return it.
680
+
681
+ Pass either *regions* directly or the full *result* dict from process().
682
+ """
683
+ if regions is None and result is not None:
684
+ regions = result.get("regions", [])
685
+ if regions is None:
686
+ regions = []
687
+
688
+ vis = image.copy()
689
+
690
+ for region in regions:
691
+ color = self._TYPE_COLORS.get(region.field_type, (200, 200, 200))
692
+
693
+ # Override checkbox / radio color by state
694
+ if (
695
+ region.field_type in ("checkbox", "radio")
696
+ and region.checked is not None
697
+ ):
698
+ color = self._CHECKED_COLOR if region.checked else self._UNCHECKED_COLOR
699
+
700
+ # Draw bounding rect
701
+ cv2.rectangle(vis, (region.x1, region.y1), (region.x2, region.y2), color, 2)
702
+
703
+ if show_labels:
704
+ tag = region.field_type.upper()
705
+ if region.label:
706
+ tag += f": {region.label[:25]}"
707
+ if region.checked is not None:
708
+ tag += " ✓" if region.checked else " ✗"
709
+
710
+ # Background pill for readability
711
+ (tw, th), _ = cv2.getTextSize(tag, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
712
+ ty = max(region.y1 - 4, th + 4)
713
+ cv2.rectangle(
714
+ vis,
715
+ (region.x1, ty - th - 4),
716
+ (region.x1 + tw + 6, ty + 2),
717
+ color,
718
+ -1,
719
+ )
720
+ cv2.putText(
721
+ vis,
722
+ tag,
723
+ (region.x1 + 3, ty - 2),
724
+ cv2.FONT_HERSHEY_SIMPLEX,
725
+ 0.45,
726
+ (255, 255, 255),
727
+ 1,
728
+ cv2.LINE_AA,
729
+ )
730
+
731
+ # Legend
732
+ if show_type_legend:
733
+ lx, ly = 10, 10
734
+ for ft, color in self._TYPE_COLORS.items():
735
+ cv2.rectangle(vis, (lx, ly), (lx + 16, ly + 16), color, -1)
736
+ cv2.putText(
737
+ vis,
738
+ ft,
739
+ (lx + 22, ly + 12),
740
+ cv2.FONT_HERSHEY_SIMPLEX,
741
+ 0.45,
742
+ color,
743
+ 1,
744
+ cv2.LINE_AA,
745
+ )
746
+ ly += 22
747
+
748
+ return vis
749
+
750
+ # ─────────────────────────── NEW METHODS ───────────────────────────
751
+
752
+ def crop_roi(self, image: np.ndarray, region: "ROIRegion") -> np.ndarray:
753
+ """Crop a single ROI region out of the source image.
754
+ Useful for feeding individual fields into an OCR or classifier.
755
+
756
+ Args:
757
+ image: BGR numpy array (the original form image).
758
+ region: ROIRegion object from process()["regions"].
759
+ Returns:
760
+ BGR numpy array crop, or empty array if out of bounds.
761
+ """
762
+ h, w = image.shape[:2]
763
+ x1 = max(0, region.x1)
764
+ y1 = max(0, region.y1)
765
+ x2 = min(w, region.x2)
766
+ y2 = min(h, region.y2)
767
+ return image[y1:y2, x1:x2].copy()
768
+
769
+ def extract_field_values(self, image: np.ndarray, regions) -> dict:
770
+ """OCR every detected field and return {label: text} mapping.
771
+ Skips checkbox/radio (use region.checked instead) and empty labels.
772
+
773
+ Args:
774
+ image: BGR numpy array.
775
+ regions: List of ROIRegion from process()["regions"].
776
+ Returns:
777
+ dict: {field_label: ocr_text}
778
+ """
779
+ if not TESSERACT_AVAILABLE:
780
+ return {}
781
+ values = {}
782
+ for region in regions:
783
+ if region.field_type in ("checkbox", "radio"):
784
+ key = region.label or f"{region.field_type}_{region.x1}_{region.y1}"
785
+ values[key] = region.checked
786
+ continue
787
+ crop = self.crop_roi(image, region)
788
+ if crop.size == 0:
789
+ continue
790
+ text = self._ocr_text(crop)
791
+ key = region.label or f"{region.field_type}_{region.x1}_{region.y1}"
792
+ values[key] = text
793
+ return values
794
+
795
+ def filter_by_type(self, regions, field_type: str):
796
+ """Return only regions matching the given field_type.
797
+
798
+ Args:
799
+ regions: List of ROIRegion from process()["regions"].
800
+ field_type: One of 'text', 'textarea', 'checkbox', 'radio', 'date',
801
+ 'table', 'dropdown', 'signature'.
802
+ Returns:
803
+ List[ROIRegion]
804
+ """
805
+ return [r for r in regions if r.field_type == field_type]
806
+
807
+ def get_checked_fields(self, regions):
808
+ """Return only checkbox and radio regions that are checked.
809
+
810
+ Args:
811
+ regions: List of ROIRegion from process()["regions"].
812
+ Returns:
813
+ List[ROIRegion]
814
+ """
815
+ return [
816
+ r
817
+ for r in regions
818
+ if r.field_type in ("checkbox", "radio") and r.checked is True
819
+ ]
820
+
821
+ def export_to_json(self, result: dict, path: str = "form_rois.json"):
822
+ """Save the canonical ROI list from process() to a JSON file.
823
+
824
+ Args:
825
+ result: Dict returned by process().
826
+ path: Output file path.
827
+ Returns:
828
+ str: Absolute path of the written file.
829
+ """
830
+ import json
831
+ import os
832
+
833
+ roi_serialisable = []
834
+ for entry in result.get("roi", []):
835
+ (x1, y1), (x2, y2), ftype, label = entry
836
+ roi_serialisable.append(
837
+ {"x1": x1, "y1": y1, "x2": x2, "y2": y2, "type": ftype, "label": label}
838
+ )
839
+ with open(path, "w", encoding="utf-8") as f:
840
+ json.dump(roi_serialisable, f, indent=2)
841
+ return os.path.abspath(path)
842
+
843
+ def export_to_csv(self, result: dict, path: str = "form_rois.csv"):
844
+ """Save the ROI list from process() to a CSV file.
845
+ Columns: x1, y1, x2, y2, type, label, checked.
846
+
847
+ Args:
848
+ result: Dict returned by process().
849
+ path: Output file path.
850
+ Returns:
851
+ str: Absolute path of the written file.
852
+ """
853
+ import csv
854
+ import os
855
+
856
+ with open(path, "w", newline="", encoding="utf-8") as f:
857
+ writer = csv.DictWriter(
858
+ f, fieldnames=["x1", "y1", "x2", "y2", "type", "label", "checked"]
859
+ )
860
+ writer.writeheader()
861
+ for region in result.get("regions", []):
862
+ writer.writerow(
863
+ {
864
+ "x1": region.x1,
865
+ "y1": region.y1,
866
+ "x2": region.x2,
867
+ "y2": region.y2,
868
+ "type": region.field_type,
869
+ "label": region.label,
870
+ "checked": region.checked,
871
+ }
872
+ )
873
+ return os.path.abspath(path)
874
+
875
+ def get_field_count(self, regions) -> dict:
876
+ """Return count of each field type detected.
877
+
878
+ Args:
879
+ regions: List of ROIRegion from process()["regions"].
880
+ Returns:
881
+ dict: {'text': 4, 'checkbox': 6, ...}
882
+ """
883
+ counts: dict = {}
884
+ for r in regions:
885
+ counts[r.field_type] = counts.get(r.field_type, 0) + 1
886
+ return counts
887
+
888
+ def get_empty_fields(self, regions):
889
+ """Return checkbox/radio regions that are not checked.
890
+
891
+ Args:
892
+ regions: List of ROIRegion objects.
893
+ Returns:
894
+ List of ROIRegion where field_type is 'checkbox' or 'radio'
895
+ and checked is not True.
896
+ """
897
+ return [
898
+ r
899
+ for r in regions
900
+ if r.field_type in ("checkbox", "radio") and not r.checked
901
+ ]
902
+
903
+ def validate_required_fields(self, regions, required_labels) -> dict:
904
+ """Check which required labels have been filled (checked).
905
+
906
+ Args:
907
+ regions: List of ROIRegion objects.
908
+ required_labels: List of label strings that must be checked.
909
+ Returns:
910
+ dict with keys 'missing' and 'filled', each a list of labels.
911
+ """
912
+ checked_labels = {
913
+ r.label.lower()
914
+ for r in regions
915
+ if r.field_type in ("checkbox", "radio") and r.checked
916
+ }
917
+ missing = [lbl for lbl in required_labels if lbl.lower() not in checked_labels]
918
+ filled = [lbl for lbl in required_labels if lbl.lower() in checked_labels]
919
+ return {"missing": missing, "filled": filled}
920
+
921
+ def get_field_by_label(self, regions, label):
922
+ """Find the first region whose label matches (case-insensitive).
923
+
924
+ Args:
925
+ regions: List of ROIRegion objects.
926
+ label: Label string to search for.
927
+ Returns:
928
+ ROIRegion if found, None otherwise.
929
+ """
930
+ label_lower = label.lower()
931
+ for r in regions:
932
+ if r.label.lower() == label_lower:
933
+ return r
934
+ return None
935
+
936
+ def get_form_completion_score(self, regions) -> float:
937
+ """Return fraction of checkboxes/radios that are checked.
938
+
939
+ Args:
940
+ regions: List of ROIRegion objects.
941
+ Returns:
942
+ Float in [0.0, 1.0]. Returns 0.0 if no checkable fields exist.
943
+ """
944
+ checkable = [r for r in regions if r.field_type in ("checkbox", "radio")]
945
+ if not checkable:
946
+ return 0.0
947
+ filled = sum(1 for r in checkable if r.checked)
948
+ return filled / len(checkable)
949
+
950
+ def highlight_empty_fields(self, image, regions, color=(0, 0, 255), thickness=2):
951
+ """Draw rectangles around empty (unchecked) checkbox/radio fields.
952
+
953
+ Args:
954
+ image: BGR numpy array.
955
+ regions: List of ROIRegion objects.
956
+ color: BGR rectangle color. Defaults to red (0, 0, 255).
957
+ thickness: Rectangle border thickness in pixels.
958
+ Returns:
959
+ Annotated BGR numpy array (copy of input).
960
+ """
961
+ out = image.copy()
962
+ for r in self.get_empty_fields(regions):
963
+ cv2.rectangle(out, (r.x1, r.y1), (r.x2, r.y2), color, thickness)
964
+ return out
965
+
966
+ def extract_all_text(self, image, regions) -> dict:
967
+ """OCR each region and return a mapping of label → text.
968
+
969
+ Args:
970
+ image: BGR numpy array.
971
+ regions: List of ROIRegion objects.
972
+ Returns:
973
+ dict mapping each region's label to its OCR text string.
974
+ """
975
+ result = {}
976
+ for r in regions:
977
+ crop = image[r.y1 : r.y2, r.x1 : r.x2]
978
+ result[r.label] = self._ocr_text(crop)
979
+ return result
980
+
981
+
982
+ # Usages:
983
+
984
+ # import cv2
985
+ # from form_roi_detector import FormROIDetector
986
+
987
+ # # Load your form image
988
+ # image = cv2.imread("my_form.png")
989
+
990
+ # # Create detector (OCR optional — needs pytesseract)
991
+ # detector = FormROIDetector(enable_ocr=True)
992
+
993
+ # # detect() → canonical ROI list only
994
+ # roi = detector.detect(image)
995
+
996
+ # # Each entry: [(x1,y1), (x2,y2), field_type, label]
997
+ # for entry in roi:
998
+ # (x1, y1), (x2, y2), ftype, label = entry
999
+ # print(f"[{ftype}] '{label}' → ({x1},{y1})→({x2},{y2})")
1000
+
1001
+ # result = detector.process(image)
1002
+
1003
+ # roi = result["roi"] # canonical list ← same as detect()
1004
+ # regions = result["regions"] # list[ROIRegion] ← rich objects
1005
+ # rows = result["rows"] # grouped by Y position
1006
+ # key_values = result["key_values"] # [{key, value_bbox, type}, ...]
1007
+
1008
+
1009
+ # Advanced Usage:
1010
+ # image = cv2.imread("my_form.png")
1011
+ # detector = FormROIDetector(
1012
+ # min_area = 400, # px² — ignore tiny noise contours
1013
+ # enable_ocr = True, # False if pytesseract not installed
1014
+ # morph_kernel = (3, 3), # larger → merges nearby strokes
1015
+ # row_tolerance= 18, # px — Y-delta for same-row grouping
1016
+ # circle_dp = 1.2, # HoughCircles dp (radio detection)
1017
+ # debug = False, # True → result["debug_image"] set
1018
+ # )
1019
+ # result = detector.detect(image)
1020
+
1021
+ # regions = result["regions"]
1022
+
1023
+ # for r in regions:
1024
+ # print(r.field_type) # "text"|"checkbox"|"radio"|"date"…
1025
+ # print(r.label) # OCR text to the left / above
1026
+ # print(r.checked) # True/False/None (checkbox+radio only)
1027
+ # print(r.bbox) # (x1, y1, x2, y2)
1028
+ # print(r.to_tuple()) # canonical [(x1,y1),(x2,y2),type,label]
1029
+ # kv = result["key_values"]
1030
+ # # Links a text label field to the nearest checkbox/radio
1031
+ # # [{"key": "Allergic", "value_bbox": (740,980,1320,1078), "type": "checkbox"}, …]
1032
+
1033
+ # for pair in kv:
1034
+ # print(f"{pair['key']} → {pair['type']} @ {pair['value_bbox']}")
1035
+
1036
+ # detector = FormROIDetector(debug=True)
1037
+ # result = detector.process(image)
1038
+
1039
+ # debug_img = result["debug_image"] # annotated np.ndarray
1040
+ # cv2.imwrite("debug.png", debug_img)
1041
+
1042
+
1043
+ # ROI Output Format:
1044
+ # roi = [
1045
+ # [(90, 980), (650, 1120), "text", "Name" ],
1046
+ # [(740, 980), (1320, 1078), "checkbox", "Allergic" ],
1047
+ # [(90, 1140), (650, 1200), "date", "Date of Birth"],
1048
+ # [(740, 1140), (900, 1200), "radio", "Male" ],
1049
+ # [(920, 1140), (1080, 1200), "radio", "Female" ],
1050
+ # [(90, 1220), (1320, 1460), "textarea", "Comments" ],
1051
+ # [(90, 1480), (1320, 1760), "table", "" ],
1052
+ # [(90, 1780), (600, 1840), "dropdown", "Country" ],
1053
+ # [(90, 1860), (500, 1940), "signature", "Signature" ],
1054
+ # ]
1055
+
1056
+
1057
+ # Visualization
1058
+ # Option 1 — via process() result
1059
+ # image = cv2.imread("my_form.png")
1060
+ # detector = FormROIDetector(enable_ocr=True)
1061
+ # result = detector.process(image)
1062
+ # vis = detector.visualize(image, result=result)
1063
+ # cv2.imwrite("annotated.png", vis)
1064
+
1065
+ # # Option 2 — pass regions directly
1066
+ # vis = detector.visualize(image, regions=result["regions"])
1067
+
1068
+ # # Option 3 — show in a window (while developing)
1069
+ # cv2.imshow("Form Fields", vis)
1070
+ # cv2.waitKey(0)
1071
+ # cv2.destroyAllWindows()
1072
+
1073
+ # # Option 4 — Jupyter / Colab inline display
1074
+ # from IPython.display import display
1075
+ # import PIL.Image, io, numpy as np
1076
+
1077
+ # rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
1078
+ # display(PIL.Image.fromarray(rgb))