openvisionkit 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openvisionkit/__init__.py +1 -0
- openvisionkit/_version.py +24 -0
- openvisionkit/capture/draw_object.py +296 -0
- openvisionkit/capture/image_template.py +61 -0
- openvisionkit/capture/screen_capture.py +13 -0
- openvisionkit/capture/video_recorder.py +128 -0
- openvisionkit/capture/video_template.py +336 -0
- openvisionkit/lib/classifier.py +186 -0
- openvisionkit/lib/face_detector.py +587 -0
- openvisionkit/lib/face_mesh_detector.py +913 -0
- openvisionkit/lib/form_detector.py +465 -0
- openvisionkit/lib/form_roi_annotator.py +679 -0
- openvisionkit/lib/form_roi_detector.py +1078 -0
- openvisionkit/lib/fps_counter.py +38 -0
- openvisionkit/lib/hair_segmentation.py +298 -0
- openvisionkit/lib/hand_detector.py +1230 -0
- openvisionkit/lib/image_detector.py +1095 -0
- openvisionkit/lib/object_detector.py +401 -0
- openvisionkit/lib/pose_detector.py +919 -0
- openvisionkit/lib/selfie_segmentation.py +528 -0
- openvisionkit/lib/text_detector.py +1229 -0
- openvisionkit/utility/live_plot.py +141 -0
- openvisionkit/utility/vision_utilis.py +871 -0
- openvisionkit-0.4.0.dist-info/METADATA +1018 -0
- openvisionkit-0.4.0.dist-info/RECORD +26 -0
- openvisionkit-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1078 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FormROIDetector - Enhanced Form Field Detection Library
|
|
3
|
+
=======================================================
|
|
4
|
+
Detects: text fields, checkboxes, radio buttons, date-range boxes,
|
|
5
|
+
tables, dropdowns, signature areas.
|
|
6
|
+
|
|
7
|
+
Output ROI format:
|
|
8
|
+
roi = [
|
|
9
|
+
[(x1, y1), (x2, y2), "field_type", "label"],
|
|
10
|
+
...
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
Field types
|
|
14
|
+
-----------
|
|
15
|
+
"text" – single-line text input
|
|
16
|
+
"textarea" – multi-line text area
|
|
17
|
+
"checkbox" – square tick box
|
|
18
|
+
"radio" – circular option button
|
|
19
|
+
"date" – date or date-range field
|
|
20
|
+
"table" – data table region
|
|
21
|
+
"dropdown" – select / combo box
|
|
22
|
+
"signature" – signature / initials box
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import re
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
|
|
28
|
+
import cv2
|
|
29
|
+
import numpy as np
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import pytesseract
|
|
33
|
+
|
|
34
|
+
TESSERACT_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
TESSERACT_AVAILABLE = False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# Internal data structures
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ROIRegion:
|
|
46
|
+
x1: int
|
|
47
|
+
y1: int
|
|
48
|
+
x2: int
|
|
49
|
+
y2: int
|
|
50
|
+
field_type: str # "text" | "textarea" | "checkbox" | "radio" |
|
|
51
|
+
# "date" | "table" | "dropdown" | "signature"
|
|
52
|
+
label: str = ""
|
|
53
|
+
checked: bool | None = None # checkbox / radio only
|
|
54
|
+
confidence: float = 1.0
|
|
55
|
+
|
|
56
|
+
# ------------------------------------------------------------------ #
|
|
57
|
+
def to_tuple(self) -> list:
|
|
58
|
+
"""Return the canonical output format requested by the user."""
|
|
59
|
+
return [(self.x1, self.y1), (self.x2, self.y2), self.field_type, self.label]
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def bbox(self):
|
|
63
|
+
return (self.x1, self.y1, self.x2, self.y2)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def width(self):
|
|
67
|
+
return self.x2 - self.x1
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def height(self):
|
|
71
|
+
return self.y2 - self.y1
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def area(self):
|
|
75
|
+
return self.width * self.height
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def aspect_ratio(self):
|
|
79
|
+
return self.width / max(self.height, 1)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def center(self):
|
|
83
|
+
return ((self.x1 + self.x2) // 2, (self.y1 + self.y2) // 2)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Helper – date-like pattern matcher
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
_DATE_PATTERNS = re.compile(
|
|
90
|
+
r"(date|dob|d\.o\.b|birth|expir|valid|from|to|period|dd[/\-_]mm|"
|
|
91
|
+
r"mm[/\-_]yy|yyyy|day|month|year)",
|
|
92
|
+
re.IGNORECASE,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
_DROPDOWN_PATTERNS = re.compile(
|
|
96
|
+
r"(select|choose|pick|▼|v\b|\bv\b)",
|
|
97
|
+
re.IGNORECASE,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Core detector
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class FormROIDetector:
|
|
107
|
+
"""
|
|
108
|
+
Detect form fields in document / form images and return ROIs in the
|
|
109
|
+
standardised list-of-tuples format.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
min_area : minimum contour area to consider (pixels²)
|
|
114
|
+
enable_ocr : whether to use pytesseract for label extraction
|
|
115
|
+
morph_kernel : morphological kernel size used for contour cleanup
|
|
116
|
+
row_tolerance : pixel tolerance for grouping ROIs into the same row
|
|
117
|
+
circle_dp : HoughCircles dp parameter (radio-button detection)
|
|
118
|
+
debug : draw intermediate steps (returned in result dict)
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# ------------------------------------------------------------------
|
|
122
|
+
# Construction
|
|
123
|
+
# ------------------------------------------------------------------
|
|
124
|
+
def __init__(
|
|
125
|
+
self,
|
|
126
|
+
min_area: int = 400,
|
|
127
|
+
enable_ocr: bool = True,
|
|
128
|
+
morph_kernel: tuple[int, int] = (3, 3),
|
|
129
|
+
row_tolerance: int = 18,
|
|
130
|
+
circle_dp: float = 1.2,
|
|
131
|
+
debug: bool = False,
|
|
132
|
+
):
|
|
133
|
+
self.min_area = min_area
|
|
134
|
+
self.enable_ocr = enable_ocr and TESSERACT_AVAILABLE
|
|
135
|
+
self.morph_kernel = morph_kernel
|
|
136
|
+
self.row_tolerance = row_tolerance
|
|
137
|
+
self.circle_dp = circle_dp
|
|
138
|
+
self.debug = debug
|
|
139
|
+
|
|
140
|
+
# ==================================================================
|
|
141
|
+
# PUBLIC API
|
|
142
|
+
# ==================================================================
|
|
143
|
+
|
|
144
|
+
def process(self, image: np.ndarray) -> dict:
|
|
145
|
+
"""
|
|
146
|
+
Full detection pipeline.
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
{
|
|
151
|
+
"roi" : [[(x1,y1),(x2,y2), type, label], ...], ← canonical
|
|
152
|
+
"regions" : [ROIRegion, ...], ← rich objects
|
|
153
|
+
"rows" : [[ROIRegion, ...], ...],
|
|
154
|
+
"key_values" : [{"key": str, "value_bbox": tuple, "type": str}, ...],
|
|
155
|
+
"debug_image": np.ndarray | None,
|
|
156
|
+
}
|
|
157
|
+
"""
|
|
158
|
+
regions: list[ROIRegion] = []
|
|
159
|
+
|
|
160
|
+
# 1. Table detection (before general contour search)
|
|
161
|
+
table_regions = self._detect_tables(image)
|
|
162
|
+
table_masks = self._build_table_mask(image, table_regions)
|
|
163
|
+
regions.extend(table_regions)
|
|
164
|
+
|
|
165
|
+
# 2. Checkbox detection (Hough squares / contour aspect)
|
|
166
|
+
cb_regions = self._detect_checkboxes(image, table_masks)
|
|
167
|
+
regions.extend(cb_regions)
|
|
168
|
+
|
|
169
|
+
# 3. Radio-button detection
|
|
170
|
+
radio_regions = self._detect_radio_buttons(image, table_masks)
|
|
171
|
+
regions.extend(radio_regions)
|
|
172
|
+
|
|
173
|
+
# 4. General text / textarea / date / dropdown / signature fields
|
|
174
|
+
general_regions = self._detect_general_fields(image, table_masks, regions)
|
|
175
|
+
regions.extend(general_regions)
|
|
176
|
+
|
|
177
|
+
# 5. De-duplicate / merge overlapping regions
|
|
178
|
+
regions = self._deduplicate(regions)
|
|
179
|
+
|
|
180
|
+
# 6. OCR labels
|
|
181
|
+
if self.enable_ocr:
|
|
182
|
+
regions = self._assign_labels(image, regions)
|
|
183
|
+
|
|
184
|
+
# 7. Checkbox / radio fill state
|
|
185
|
+
regions = self._detect_fill_state(image, regions)
|
|
186
|
+
|
|
187
|
+
# 8. Row grouping & key-value pairs
|
|
188
|
+
rows = self._group_rows(regions)
|
|
189
|
+
key_values = self._extract_key_values(rows)
|
|
190
|
+
|
|
191
|
+
# 9. Build canonical output
|
|
192
|
+
roi_list = [r.to_tuple() for r in regions]
|
|
193
|
+
|
|
194
|
+
debug_img = None
|
|
195
|
+
if self.debug:
|
|
196
|
+
debug_img = self.visualize(image, regions)
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"roi": roi_list,
|
|
200
|
+
"regions": regions,
|
|
201
|
+
"rows": rows,
|
|
202
|
+
"key_values": key_values,
|
|
203
|
+
"debug_image": debug_img,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
# Convenience wrapper – returns only the ROI list
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
def detect(self, image: np.ndarray) -> list:
|
|
210
|
+
"""
|
|
211
|
+
Shorthand that returns only the canonical ROI list.
|
|
212
|
+
|
|
213
|
+
roi = detector.detect(img)
|
|
214
|
+
# → [[(x1,y1),(x2,y2), type, label], ...]
|
|
215
|
+
"""
|
|
216
|
+
return self.process(image)["roi"]
|
|
217
|
+
|
|
218
|
+
# ==================================================================
|
|
219
|
+
# DETECTION MODULES
|
|
220
|
+
# ==================================================================
|
|
221
|
+
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
# 1. Table detection
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
def _detect_tables(self, image: np.ndarray) -> list[ROIRegion]:
|
|
226
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
227
|
+
binary = cv2.adaptiveThreshold(
|
|
228
|
+
gray,
|
|
229
|
+
255,
|
|
230
|
+
cv2.ADAPTIVE_THRESH_MEAN_C,
|
|
231
|
+
cv2.THRESH_BINARY_INV,
|
|
232
|
+
15,
|
|
233
|
+
10,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Horizontal lines
|
|
237
|
+
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
|
|
238
|
+
horizontal = cv2.morphologyEx(binary, cv2.MORPH_OPEN, h_kernel)
|
|
239
|
+
|
|
240
|
+
# Vertical lines
|
|
241
|
+
v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
|
|
242
|
+
vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
|
|
243
|
+
|
|
244
|
+
table_mask = cv2.add(horizontal, vertical)
|
|
245
|
+
|
|
246
|
+
# Dilate to merge nearby lines into table blocks
|
|
247
|
+
dilate_k = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
|
|
248
|
+
dilated = cv2.dilate(table_mask, dilate_k, iterations=3)
|
|
249
|
+
|
|
250
|
+
contours, _ = cv2.findContours(
|
|
251
|
+
dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
regions = []
|
|
255
|
+
for cnt in contours:
|
|
256
|
+
area = cv2.contourArea(cnt)
|
|
257
|
+
if area < self.min_area * 4:
|
|
258
|
+
continue
|
|
259
|
+
x, y, w, h = cv2.boundingRect(cnt)
|
|
260
|
+
# Must have multiple lines to be a table
|
|
261
|
+
h_lines = cv2.countNonZero(horizontal[y : y + h, x : x + w])
|
|
262
|
+
v_lines = cv2.countNonZero(vertical[y : y + h, x : x + w])
|
|
263
|
+
if h_lines > 0 and v_lines > 0:
|
|
264
|
+
regions.append(ROIRegion(x, y, x + w, y + h, "table"))
|
|
265
|
+
|
|
266
|
+
return regions
|
|
267
|
+
|
|
268
|
+
def _build_table_mask(
|
|
269
|
+
self, image: np.ndarray, table_regions: list[ROIRegion]
|
|
270
|
+
) -> np.ndarray:
|
|
271
|
+
mask = np.zeros(image.shape[:2], dtype=np.uint8)
|
|
272
|
+
for r in table_regions:
|
|
273
|
+
cv2.rectangle(mask, (r.x1, r.y1), (r.x2, r.y2), 255, -1)
|
|
274
|
+
return mask
|
|
275
|
+
|
|
276
|
+
# ------------------------------------------------------------------
|
|
277
|
+
# 2. Checkbox detection
|
|
278
|
+
# ------------------------------------------------------------------
|
|
279
|
+
def _detect_checkboxes(
|
|
280
|
+
self, image: np.ndarray, table_mask: np.ndarray
|
|
281
|
+
) -> list[ROIRegion]:
|
|
282
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
283
|
+
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
|
|
284
|
+
_, binary = cv2.threshold(
|
|
285
|
+
blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
289
|
+
|
|
290
|
+
regions = []
|
|
291
|
+
for cnt in contours:
|
|
292
|
+
area = cv2.contourArea(cnt)
|
|
293
|
+
if area < 50 or area > 8000:
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
x, y, w, h = cv2.boundingRect(cnt)
|
|
297
|
+
ar = w / max(h, 1)
|
|
298
|
+
|
|
299
|
+
# Must be roughly square and small
|
|
300
|
+
if not (0.6 <= ar <= 1.6 and 8 <= w <= 80 and 8 <= h <= 80):
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# Solidity check – filled squares have high solidity
|
|
304
|
+
hull = cv2.convexHull(cnt)
|
|
305
|
+
hull_area = cv2.contourArea(hull)
|
|
306
|
+
if hull_area == 0:
|
|
307
|
+
continue
|
|
308
|
+
solidity = area / hull_area
|
|
309
|
+
if solidity < 0.65:
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Must NOT be inside a table region already handled
|
|
313
|
+
cx, cy = x + w // 2, y + h // 2
|
|
314
|
+
if table_mask[cy, cx] > 0:
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Approx polygon – checkbox ≈ 4 vertices
|
|
318
|
+
peri = cv2.arcLength(cnt, True)
|
|
319
|
+
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
|
|
320
|
+
if not (3 <= len(approx) <= 8):
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
regions.append(ROIRegion(x, y, x + w, y + h, "checkbox"))
|
|
324
|
+
|
|
325
|
+
return regions
|
|
326
|
+
|
|
327
|
+
# ------------------------------------------------------------------
|
|
328
|
+
# 3. Radio button detection
|
|
329
|
+
# ------------------------------------------------------------------
|
|
330
|
+
def _detect_radio_buttons(
|
|
331
|
+
self, image: np.ndarray, table_mask: np.ndarray
|
|
332
|
+
) -> list[ROIRegion]:
|
|
333
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
334
|
+
blurred = cv2.GaussianBlur(gray, (5, 5), 1)
|
|
335
|
+
|
|
336
|
+
circles = cv2.HoughCircles(
|
|
337
|
+
blurred,
|
|
338
|
+
cv2.HOUGH_GRADIENT,
|
|
339
|
+
dp=self.circle_dp,
|
|
340
|
+
minDist=15,
|
|
341
|
+
param1=60,
|
|
342
|
+
param2=25,
|
|
343
|
+
minRadius=5,
|
|
344
|
+
maxRadius=25,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
regions = []
|
|
348
|
+
if circles is not None:
|
|
349
|
+
circles = np.uint16(np.around(circles[0]))
|
|
350
|
+
for cx, cy, r in circles:
|
|
351
|
+
if table_mask[cy, cx] > 0:
|
|
352
|
+
continue
|
|
353
|
+
x1, y1 = int(cx - r), int(cy - r)
|
|
354
|
+
x2, y2 = int(cx + r), int(cy + r)
|
|
355
|
+
regions.append(ROIRegion(max(0, x1), max(0, y1), x2, y2, "radio"))
|
|
356
|
+
|
|
357
|
+
return regions
|
|
358
|
+
|
|
359
|
+
# ------------------------------------------------------------------
|
|
360
|
+
# 4. General field detection (text / textarea / date / dropdown / sig)
|
|
361
|
+
# ------------------------------------------------------------------
|
|
362
|
+
def _detect_general_fields(
|
|
363
|
+
self,
|
|
364
|
+
image: np.ndarray,
|
|
365
|
+
table_mask: np.ndarray,
|
|
366
|
+
existing: list[ROIRegion],
|
|
367
|
+
) -> list[ROIRegion]:
|
|
368
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
369
|
+
binary = cv2.adaptiveThreshold(
|
|
370
|
+
gray,
|
|
371
|
+
255,
|
|
372
|
+
cv2.ADAPTIVE_THRESH_MEAN_C,
|
|
373
|
+
cv2.THRESH_BINARY_INV,
|
|
374
|
+
15,
|
|
375
|
+
8,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, self.morph_kernel)
|
|
379
|
+
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
|
380
|
+
|
|
381
|
+
contours, _ = cv2.findContours(
|
|
382
|
+
morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
existing_bboxes = [(r.x1, r.y1, r.x2, r.y2) for r in existing]
|
|
386
|
+
regions = []
|
|
387
|
+
|
|
388
|
+
for cnt in contours:
|
|
389
|
+
area = cv2.contourArea(cnt)
|
|
390
|
+
if area < self.min_area:
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
x, y, w, h = cv2.boundingRect(cnt)
|
|
394
|
+
cx, cy = x + w // 2, y + h // 2
|
|
395
|
+
x2, y2 = x + w, y + h
|
|
396
|
+
|
|
397
|
+
# Skip if centre is inside a table
|
|
398
|
+
if table_mask[cy, cx] > 0:
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
# Skip if heavily overlapping an already-detected region
|
|
402
|
+
if self._overlaps_any(x, y, x2, y2, existing_bboxes, thresh=0.5):
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
ar = w / max(h, 1)
|
|
406
|
+
|
|
407
|
+
# --- Classify field type by geometry ---
|
|
408
|
+
field_type = self._classify_general(image, x, y, x2, y2, ar, w, h)
|
|
409
|
+
|
|
410
|
+
regions.append(ROIRegion(x, y, x2, y2, field_type))
|
|
411
|
+
|
|
412
|
+
return regions
|
|
413
|
+
|
|
414
|
+
def _classify_general(
|
|
415
|
+
self,
|
|
416
|
+
image: np.ndarray,
|
|
417
|
+
x1: int,
|
|
418
|
+
y1: int,
|
|
419
|
+
x2: int,
|
|
420
|
+
y2: int,
|
|
421
|
+
ar: float,
|
|
422
|
+
w: int,
|
|
423
|
+
h: int,
|
|
424
|
+
) -> str:
|
|
425
|
+
"""Classify a rectangular region into a field type."""
|
|
426
|
+
label_text = ""
|
|
427
|
+
if self.enable_ocr:
|
|
428
|
+
# Peek at OCR content inside the box
|
|
429
|
+
crop = image[y1:y2, x1:x2]
|
|
430
|
+
label_text = self._ocr_text(crop)
|
|
431
|
+
|
|
432
|
+
# Dropdown: wide, short, with a dropdown arrow character
|
|
433
|
+
if ar > 3 and h < 60 and _DROPDOWN_PATTERNS.search(label_text):
|
|
434
|
+
return "dropdown"
|
|
435
|
+
|
|
436
|
+
# Date field: label contains date keywords or has slashes drawn inside
|
|
437
|
+
if _DATE_PATTERNS.search(label_text):
|
|
438
|
+
return "date"
|
|
439
|
+
# Date: look for separator lines inside (dd/mm/yyyy boxes)
|
|
440
|
+
if ar > 1.5 and h < 70 and self._has_internal_dividers(image, x1, y1, x2, y2):
|
|
441
|
+
return "date"
|
|
442
|
+
|
|
443
|
+
# Signature / large blank area: very wide, taller than a text line
|
|
444
|
+
if ar > 4 and h > 60:
|
|
445
|
+
return "signature"
|
|
446
|
+
|
|
447
|
+
# Textarea: roughly square or portrait, large area
|
|
448
|
+
if 0.3 <= ar <= 2.5 and h > 60 and w > 80:
|
|
449
|
+
return "textarea"
|
|
450
|
+
|
|
451
|
+
# Single-line text input: wide and short
|
|
452
|
+
if ar >= 2.5 and h < 70:
|
|
453
|
+
return "text"
|
|
454
|
+
|
|
455
|
+
# Fallback
|
|
456
|
+
return "text"
|
|
457
|
+
|
|
458
|
+
def _has_internal_dividers(
|
|
459
|
+
self,
|
|
460
|
+
image: np.ndarray,
|
|
461
|
+
x1: int,
|
|
462
|
+
y1: int,
|
|
463
|
+
x2: int,
|
|
464
|
+
y2: int,
|
|
465
|
+
) -> bool:
|
|
466
|
+
"""Check whether a box contains internal vertical dividers (date parts)."""
|
|
467
|
+
crop = image[y1:y2, x1:x2]
|
|
468
|
+
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
|
469
|
+
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
|
|
470
|
+
v_kernel = cv2.getStructuringElement(
|
|
471
|
+
cv2.MORPH_RECT, (1, max(1, crop.shape[0] // 2))
|
|
472
|
+
)
|
|
473
|
+
vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
|
|
474
|
+
return cv2.countNonZero(vertical) > 10
|
|
475
|
+
|
|
476
|
+
# ==================================================================
|
|
477
|
+
# OCR
|
|
478
|
+
# ==================================================================
|
|
479
|
+
|
|
480
|
+
def _ocr_text(self, crop: np.ndarray) -> str:
|
|
481
|
+
if not self.enable_ocr or crop.size == 0:
|
|
482
|
+
return ""
|
|
483
|
+
try:
|
|
484
|
+
text = pytesseract.image_to_string(crop, config="--psm 6 --oem 3")
|
|
485
|
+
return text.strip()
|
|
486
|
+
except Exception:
|
|
487
|
+
return ""
|
|
488
|
+
|
|
489
|
+
# ==================================================================
|
|
490
|
+
# LABEL ASSIGNMENT
|
|
491
|
+
# ==================================================================
|
|
492
|
+
|
|
493
|
+
def _assign_labels(
|
|
494
|
+
self, image: np.ndarray, regions: list[ROIRegion]
|
|
495
|
+
) -> list[ROIRegion]:
|
|
496
|
+
"""
|
|
497
|
+
For each region, look for OCR text immediately to the LEFT or ABOVE
|
|
498
|
+
the bounding box and assign it as the label.
|
|
499
|
+
"""
|
|
500
|
+
h_img, w_img = image.shape[:2]
|
|
501
|
+
|
|
502
|
+
for region in regions:
|
|
503
|
+
if region.label:
|
|
504
|
+
continue
|
|
505
|
+
|
|
506
|
+
# Search window: same height as the field, to its left
|
|
507
|
+
search_x1 = max(0, region.x1 - 300)
|
|
508
|
+
search_x2 = region.x1
|
|
509
|
+
search_y1 = max(0, region.y1 - 5)
|
|
510
|
+
search_y2 = min(h_img, region.y2 + 5)
|
|
511
|
+
|
|
512
|
+
left_crop = image[search_y1:search_y2, search_x1:search_x2]
|
|
513
|
+
label = self._ocr_text(left_crop)
|
|
514
|
+
|
|
515
|
+
if not label:
|
|
516
|
+
# Try above
|
|
517
|
+
search_y1b = max(0, region.y1 - 40)
|
|
518
|
+
search_y2b = region.y1
|
|
519
|
+
above_crop = image[search_y1b:search_y2b, region.x1 : region.x2]
|
|
520
|
+
label = self._ocr_text(above_crop)
|
|
521
|
+
|
|
522
|
+
region.label = label.replace("\n", " ").strip()[:80]
|
|
523
|
+
|
|
524
|
+
return regions
|
|
525
|
+
|
|
526
|
+
# ==================================================================
|
|
527
|
+
# FILL STATE (checkbox / radio)
|
|
528
|
+
# ==================================================================
|
|
529
|
+
|
|
530
|
+
def _detect_fill_state(
|
|
531
|
+
self, image: np.ndarray, regions: list[ROIRegion]
|
|
532
|
+
) -> list[ROIRegion]:
|
|
533
|
+
for region in regions:
|
|
534
|
+
if region.field_type not in ("checkbox", "radio"):
|
|
535
|
+
continue
|
|
536
|
+
x1, y1, x2, y2 = region.x1, region.y1, region.x2, region.y2
|
|
537
|
+
crop = image[y1:y2, x1:x2]
|
|
538
|
+
if crop.size == 0:
|
|
539
|
+
continue
|
|
540
|
+
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
|
541
|
+
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
|
|
542
|
+
filled_ratio = np.sum(thresh == 255) / thresh.size
|
|
543
|
+
region.checked = filled_ratio > 0.18
|
|
544
|
+
|
|
545
|
+
return regions
|
|
546
|
+
|
|
547
|
+
# ==================================================================
|
|
548
|
+
# DEDUPLICATION
|
|
549
|
+
# ==================================================================
|
|
550
|
+
|
|
551
|
+
def _deduplicate(self, regions: list[ROIRegion]) -> list[ROIRegion]:
|
|
552
|
+
"""Remove regions that are nearly identical or heavily overlapping."""
|
|
553
|
+
if not regions:
|
|
554
|
+
return regions
|
|
555
|
+
|
|
556
|
+
# Sort by area descending (keep larger / more specific detections)
|
|
557
|
+
regions = sorted(regions, key=lambda r: r.area, reverse=True)
|
|
558
|
+
kept: list[ROIRegion] = []
|
|
559
|
+
|
|
560
|
+
for candidate in regions:
|
|
561
|
+
dominated = False
|
|
562
|
+
for existing in kept:
|
|
563
|
+
iou = self._iou(candidate, existing)
|
|
564
|
+
if iou > 0.45:
|
|
565
|
+
# Prefer more specific type
|
|
566
|
+
dominated = True
|
|
567
|
+
break
|
|
568
|
+
if not dominated:
|
|
569
|
+
kept.append(candidate)
|
|
570
|
+
|
|
571
|
+
return kept
|
|
572
|
+
|
|
573
|
+
@staticmethod
|
|
574
|
+
def _iou(a: ROIRegion, b: ROIRegion) -> float:
|
|
575
|
+
ix1 = max(a.x1, b.x1)
|
|
576
|
+
iy1 = max(a.y1, b.y1)
|
|
577
|
+
ix2 = min(a.x2, b.x2)
|
|
578
|
+
iy2 = min(a.y2, b.y2)
|
|
579
|
+
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
|
580
|
+
if inter == 0:
|
|
581
|
+
return 0.0
|
|
582
|
+
union = a.area + b.area - inter
|
|
583
|
+
return inter / max(union, 1)
|
|
584
|
+
|
|
585
|
+
@staticmethod
|
|
586
|
+
def _overlaps_any(x1, y1, x2, y2, bboxes, thresh=0.5) -> bool:
|
|
587
|
+
area = max(1, (x2 - x1) * (y2 - y1))
|
|
588
|
+
for bx1, by1, bx2, by2 in bboxes:
|
|
589
|
+
ix1 = max(x1, bx1)
|
|
590
|
+
iy1 = max(y1, by1)
|
|
591
|
+
ix2 = min(x2, bx2)
|
|
592
|
+
iy2 = min(y2, by2)
|
|
593
|
+
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
|
594
|
+
if inter / area >= thresh:
|
|
595
|
+
return True
|
|
596
|
+
return False
|
|
597
|
+
|
|
598
|
+
# ==================================================================
|
|
599
|
+
# ROW GROUPING
|
|
600
|
+
# ==================================================================
|
|
601
|
+
|
|
602
|
+
def _group_rows(self, regions: list[ROIRegion]) -> list[list[ROIRegion]]:
|
|
603
|
+
sorted_regions = sorted(regions, key=lambda r: (r.y1, r.x1))
|
|
604
|
+
rows: list[list[ROIRegion]] = []
|
|
605
|
+
current_row: list[ROIRegion] = []
|
|
606
|
+
|
|
607
|
+
for region in sorted_regions:
|
|
608
|
+
if not current_row:
|
|
609
|
+
current_row.append(region)
|
|
610
|
+
continue
|
|
611
|
+
prev_y = current_row[-1].y1
|
|
612
|
+
if abs(region.y1 - prev_y) < self.row_tolerance:
|
|
613
|
+
current_row.append(region)
|
|
614
|
+
else:
|
|
615
|
+
rows.append(sorted(current_row, key=lambda r: r.x1))
|
|
616
|
+
current_row = [region]
|
|
617
|
+
|
|
618
|
+
if current_row:
|
|
619
|
+
rows.append(sorted(current_row, key=lambda r: r.x1))
|
|
620
|
+
|
|
621
|
+
return rows
|
|
622
|
+
|
|
623
|
+
# ==================================================================
|
|
624
|
+
# KEY-VALUE EXTRACTION
|
|
625
|
+
# ==================================================================
|
|
626
|
+
|
|
627
|
+
def _extract_key_values(self, rows: list[list[ROIRegion]]) -> list[dict]:
|
|
628
|
+
key_values = []
|
|
629
|
+
for row in rows:
|
|
630
|
+
text_fields = [
|
|
631
|
+
r
|
|
632
|
+
for r in row
|
|
633
|
+
if r.field_type in ("text", "textarea", "date", "dropdown", "signature")
|
|
634
|
+
]
|
|
635
|
+
input_fields = [r for r in row if r.field_type in ("checkbox", "radio")]
|
|
636
|
+
|
|
637
|
+
for tf in text_fields:
|
|
638
|
+
# Nearest input to the right
|
|
639
|
+
candidates = [b for b in input_fields if b.x1 > tf.x2]
|
|
640
|
+
if candidates:
|
|
641
|
+
nearest = min(candidates, key=lambda b: b.x1 - tf.x2)
|
|
642
|
+
key_values.append(
|
|
643
|
+
{
|
|
644
|
+
"key": tf.label or "?",
|
|
645
|
+
"value_bbox": nearest.bbox,
|
|
646
|
+
"type": nearest.field_type,
|
|
647
|
+
}
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
return key_values
|
|
651
|
+
|
|
652
|
+
# ==================================================================
|
|
653
|
+
# VISUALIZATION
|
|
654
|
+
# ==================================================================
|
|
655
|
+
|
|
656
|
+
# Color palette per field type
|
|
657
|
+
_TYPE_COLORS = {
|
|
658
|
+
"text": (34, 197, 94), # green
|
|
659
|
+
"textarea": (16, 185, 129), # teal
|
|
660
|
+
"checkbox": (59, 130, 246), # blue (unchecked)
|
|
661
|
+
"radio": (168, 85, 247), # purple
|
|
662
|
+
"date": (249, 115, 22), # orange
|
|
663
|
+
"table": (234, 179, 8), # yellow
|
|
664
|
+
"dropdown": (236, 72, 153), # pink
|
|
665
|
+
"signature": (239, 68, 68), # red
|
|
666
|
+
}
|
|
667
|
+
_CHECKED_COLOR = (22, 163, 74) # dark green when checked
|
|
668
|
+
_UNCHECKED_COLOR = (59, 130, 246) # blue when unchecked
|
|
669
|
+
|
|
670
|
+
def visualize(
|
|
671
|
+
self,
|
|
672
|
+
image: np.ndarray,
|
|
673
|
+
regions: list[ROIRegion] | None = None,
|
|
674
|
+
result: dict | None = None,
|
|
675
|
+
show_labels: bool = True,
|
|
676
|
+
show_type_legend: bool = True,
|
|
677
|
+
) -> np.ndarray:
|
|
678
|
+
"""
|
|
679
|
+
Draw all detected regions on a copy of *image* and return it.
|
|
680
|
+
|
|
681
|
+
Pass either *regions* directly or the full *result* dict from process().
|
|
682
|
+
"""
|
|
683
|
+
if regions is None and result is not None:
|
|
684
|
+
regions = result.get("regions", [])
|
|
685
|
+
if regions is None:
|
|
686
|
+
regions = []
|
|
687
|
+
|
|
688
|
+
vis = image.copy()
|
|
689
|
+
|
|
690
|
+
for region in regions:
|
|
691
|
+
color = self._TYPE_COLORS.get(region.field_type, (200, 200, 200))
|
|
692
|
+
|
|
693
|
+
# Override checkbox / radio color by state
|
|
694
|
+
if (
|
|
695
|
+
region.field_type in ("checkbox", "radio")
|
|
696
|
+
and region.checked is not None
|
|
697
|
+
):
|
|
698
|
+
color = self._CHECKED_COLOR if region.checked else self._UNCHECKED_COLOR
|
|
699
|
+
|
|
700
|
+
# Draw bounding rect
|
|
701
|
+
cv2.rectangle(vis, (region.x1, region.y1), (region.x2, region.y2), color, 2)
|
|
702
|
+
|
|
703
|
+
if show_labels:
|
|
704
|
+
tag = region.field_type.upper()
|
|
705
|
+
if region.label:
|
|
706
|
+
tag += f": {region.label[:25]}"
|
|
707
|
+
if region.checked is not None:
|
|
708
|
+
tag += " ✓" if region.checked else " ✗"
|
|
709
|
+
|
|
710
|
+
# Background pill for readability
|
|
711
|
+
(tw, th), _ = cv2.getTextSize(tag, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
|
|
712
|
+
ty = max(region.y1 - 4, th + 4)
|
|
713
|
+
cv2.rectangle(
|
|
714
|
+
vis,
|
|
715
|
+
(region.x1, ty - th - 4),
|
|
716
|
+
(region.x1 + tw + 6, ty + 2),
|
|
717
|
+
color,
|
|
718
|
+
-1,
|
|
719
|
+
)
|
|
720
|
+
cv2.putText(
|
|
721
|
+
vis,
|
|
722
|
+
tag,
|
|
723
|
+
(region.x1 + 3, ty - 2),
|
|
724
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
725
|
+
0.45,
|
|
726
|
+
(255, 255, 255),
|
|
727
|
+
1,
|
|
728
|
+
cv2.LINE_AA,
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
# Legend
|
|
732
|
+
if show_type_legend:
|
|
733
|
+
lx, ly = 10, 10
|
|
734
|
+
for ft, color in self._TYPE_COLORS.items():
|
|
735
|
+
cv2.rectangle(vis, (lx, ly), (lx + 16, ly + 16), color, -1)
|
|
736
|
+
cv2.putText(
|
|
737
|
+
vis,
|
|
738
|
+
ft,
|
|
739
|
+
(lx + 22, ly + 12),
|
|
740
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
741
|
+
0.45,
|
|
742
|
+
color,
|
|
743
|
+
1,
|
|
744
|
+
cv2.LINE_AA,
|
|
745
|
+
)
|
|
746
|
+
ly += 22
|
|
747
|
+
|
|
748
|
+
return vis
|
|
749
|
+
|
|
750
|
+
# ─────────────────────────── NEW METHODS ───────────────────────────
|
|
751
|
+
|
|
752
|
+
def crop_roi(self, image: np.ndarray, region: "ROIRegion") -> np.ndarray:
|
|
753
|
+
"""Crop a single ROI region out of the source image.
|
|
754
|
+
Useful for feeding individual fields into an OCR or classifier.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
image: BGR numpy array (the original form image).
|
|
758
|
+
region: ROIRegion object from process()["regions"].
|
|
759
|
+
Returns:
|
|
760
|
+
BGR numpy array crop, or empty array if out of bounds.
|
|
761
|
+
"""
|
|
762
|
+
h, w = image.shape[:2]
|
|
763
|
+
x1 = max(0, region.x1)
|
|
764
|
+
y1 = max(0, region.y1)
|
|
765
|
+
x2 = min(w, region.x2)
|
|
766
|
+
y2 = min(h, region.y2)
|
|
767
|
+
return image[y1:y2, x1:x2].copy()
|
|
768
|
+
|
|
769
|
+
def extract_field_values(self, image: np.ndarray, regions) -> dict:
|
|
770
|
+
"""OCR every detected field and return {label: text} mapping.
|
|
771
|
+
Skips checkbox/radio (use region.checked instead) and empty labels.
|
|
772
|
+
|
|
773
|
+
Args:
|
|
774
|
+
image: BGR numpy array.
|
|
775
|
+
regions: List of ROIRegion from process()["regions"].
|
|
776
|
+
Returns:
|
|
777
|
+
dict: {field_label: ocr_text}
|
|
778
|
+
"""
|
|
779
|
+
if not TESSERACT_AVAILABLE:
|
|
780
|
+
return {}
|
|
781
|
+
values = {}
|
|
782
|
+
for region in regions:
|
|
783
|
+
if region.field_type in ("checkbox", "radio"):
|
|
784
|
+
key = region.label or f"{region.field_type}_{region.x1}_{region.y1}"
|
|
785
|
+
values[key] = region.checked
|
|
786
|
+
continue
|
|
787
|
+
crop = self.crop_roi(image, region)
|
|
788
|
+
if crop.size == 0:
|
|
789
|
+
continue
|
|
790
|
+
text = self._ocr_text(crop)
|
|
791
|
+
key = region.label or f"{region.field_type}_{region.x1}_{region.y1}"
|
|
792
|
+
values[key] = text
|
|
793
|
+
return values
|
|
794
|
+
|
|
795
|
+
def filter_by_type(self, regions, field_type: str):
|
|
796
|
+
"""Return only regions matching the given field_type.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
regions: List of ROIRegion from process()["regions"].
|
|
800
|
+
field_type: One of 'text', 'textarea', 'checkbox', 'radio', 'date',
|
|
801
|
+
'table', 'dropdown', 'signature'.
|
|
802
|
+
Returns:
|
|
803
|
+
List[ROIRegion]
|
|
804
|
+
"""
|
|
805
|
+
return [r for r in regions if r.field_type == field_type]
|
|
806
|
+
|
|
807
|
+
def get_checked_fields(self, regions):
|
|
808
|
+
"""Return only checkbox and radio regions that are checked.
|
|
809
|
+
|
|
810
|
+
Args:
|
|
811
|
+
regions: List of ROIRegion from process()["regions"].
|
|
812
|
+
Returns:
|
|
813
|
+
List[ROIRegion]
|
|
814
|
+
"""
|
|
815
|
+
return [
|
|
816
|
+
r
|
|
817
|
+
for r in regions
|
|
818
|
+
if r.field_type in ("checkbox", "radio") and r.checked is True
|
|
819
|
+
]
|
|
820
|
+
|
|
821
|
+
def export_to_json(self, result: dict, path: str = "form_rois.json"):
|
|
822
|
+
"""Save the canonical ROI list from process() to a JSON file.
|
|
823
|
+
|
|
824
|
+
Args:
|
|
825
|
+
result: Dict returned by process().
|
|
826
|
+
path: Output file path.
|
|
827
|
+
Returns:
|
|
828
|
+
str: Absolute path of the written file.
|
|
829
|
+
"""
|
|
830
|
+
import json
|
|
831
|
+
import os
|
|
832
|
+
|
|
833
|
+
roi_serialisable = []
|
|
834
|
+
for entry in result.get("roi", []):
|
|
835
|
+
(x1, y1), (x2, y2), ftype, label = entry
|
|
836
|
+
roi_serialisable.append(
|
|
837
|
+
{"x1": x1, "y1": y1, "x2": x2, "y2": y2, "type": ftype, "label": label}
|
|
838
|
+
)
|
|
839
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
840
|
+
json.dump(roi_serialisable, f, indent=2)
|
|
841
|
+
return os.path.abspath(path)
|
|
842
|
+
|
|
843
|
+
def export_to_csv(self, result: dict, path: str = "form_rois.csv"):
|
|
844
|
+
"""Save the ROI list from process() to a CSV file.
|
|
845
|
+
Columns: x1, y1, x2, y2, type, label, checked.
|
|
846
|
+
|
|
847
|
+
Args:
|
|
848
|
+
result: Dict returned by process().
|
|
849
|
+
path: Output file path.
|
|
850
|
+
Returns:
|
|
851
|
+
str: Absolute path of the written file.
|
|
852
|
+
"""
|
|
853
|
+
import csv
|
|
854
|
+
import os
|
|
855
|
+
|
|
856
|
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
857
|
+
writer = csv.DictWriter(
|
|
858
|
+
f, fieldnames=["x1", "y1", "x2", "y2", "type", "label", "checked"]
|
|
859
|
+
)
|
|
860
|
+
writer.writeheader()
|
|
861
|
+
for region in result.get("regions", []):
|
|
862
|
+
writer.writerow(
|
|
863
|
+
{
|
|
864
|
+
"x1": region.x1,
|
|
865
|
+
"y1": region.y1,
|
|
866
|
+
"x2": region.x2,
|
|
867
|
+
"y2": region.y2,
|
|
868
|
+
"type": region.field_type,
|
|
869
|
+
"label": region.label,
|
|
870
|
+
"checked": region.checked,
|
|
871
|
+
}
|
|
872
|
+
)
|
|
873
|
+
return os.path.abspath(path)
|
|
874
|
+
|
|
875
|
+
def get_field_count(self, regions) -> dict:
|
|
876
|
+
"""Return count of each field type detected.
|
|
877
|
+
|
|
878
|
+
Args:
|
|
879
|
+
regions: List of ROIRegion from process()["regions"].
|
|
880
|
+
Returns:
|
|
881
|
+
dict: {'text': 4, 'checkbox': 6, ...}
|
|
882
|
+
"""
|
|
883
|
+
counts: dict = {}
|
|
884
|
+
for r in regions:
|
|
885
|
+
counts[r.field_type] = counts.get(r.field_type, 0) + 1
|
|
886
|
+
return counts
|
|
887
|
+
|
|
888
|
+
def get_empty_fields(self, regions):
|
|
889
|
+
"""Return checkbox/radio regions that are not checked.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
regions: List of ROIRegion objects.
|
|
893
|
+
Returns:
|
|
894
|
+
List of ROIRegion where field_type is 'checkbox' or 'radio'
|
|
895
|
+
and checked is not True.
|
|
896
|
+
"""
|
|
897
|
+
return [
|
|
898
|
+
r
|
|
899
|
+
for r in regions
|
|
900
|
+
if r.field_type in ("checkbox", "radio") and not r.checked
|
|
901
|
+
]
|
|
902
|
+
|
|
903
|
+
def validate_required_fields(self, regions, required_labels) -> dict:
|
|
904
|
+
"""Check which required labels have been filled (checked).
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
regions: List of ROIRegion objects.
|
|
908
|
+
required_labels: List of label strings that must be checked.
|
|
909
|
+
Returns:
|
|
910
|
+
dict with keys 'missing' and 'filled', each a list of labels.
|
|
911
|
+
"""
|
|
912
|
+
checked_labels = {
|
|
913
|
+
r.label.lower()
|
|
914
|
+
for r in regions
|
|
915
|
+
if r.field_type in ("checkbox", "radio") and r.checked
|
|
916
|
+
}
|
|
917
|
+
missing = [lbl for lbl in required_labels if lbl.lower() not in checked_labels]
|
|
918
|
+
filled = [lbl for lbl in required_labels if lbl.lower() in checked_labels]
|
|
919
|
+
return {"missing": missing, "filled": filled}
|
|
920
|
+
|
|
921
|
+
def get_field_by_label(self, regions, label):
|
|
922
|
+
"""Find the first region whose label matches (case-insensitive).
|
|
923
|
+
|
|
924
|
+
Args:
|
|
925
|
+
regions: List of ROIRegion objects.
|
|
926
|
+
label: Label string to search for.
|
|
927
|
+
Returns:
|
|
928
|
+
ROIRegion if found, None otherwise.
|
|
929
|
+
"""
|
|
930
|
+
label_lower = label.lower()
|
|
931
|
+
for r in regions:
|
|
932
|
+
if r.label.lower() == label_lower:
|
|
933
|
+
return r
|
|
934
|
+
return None
|
|
935
|
+
|
|
936
|
+
def get_form_completion_score(self, regions) -> float:
|
|
937
|
+
"""Return fraction of checkboxes/radios that are checked.
|
|
938
|
+
|
|
939
|
+
Args:
|
|
940
|
+
regions: List of ROIRegion objects.
|
|
941
|
+
Returns:
|
|
942
|
+
Float in [0.0, 1.0]. Returns 0.0 if no checkable fields exist.
|
|
943
|
+
"""
|
|
944
|
+
checkable = [r for r in regions if r.field_type in ("checkbox", "radio")]
|
|
945
|
+
if not checkable:
|
|
946
|
+
return 0.0
|
|
947
|
+
filled = sum(1 for r in checkable if r.checked)
|
|
948
|
+
return filled / len(checkable)
|
|
949
|
+
|
|
950
|
+
def highlight_empty_fields(self, image, regions, color=(0, 0, 255), thickness=2):
|
|
951
|
+
"""Draw rectangles around empty (unchecked) checkbox/radio fields.
|
|
952
|
+
|
|
953
|
+
Args:
|
|
954
|
+
image: BGR numpy array.
|
|
955
|
+
regions: List of ROIRegion objects.
|
|
956
|
+
color: BGR rectangle color. Defaults to red (0, 0, 255).
|
|
957
|
+
thickness: Rectangle border thickness in pixels.
|
|
958
|
+
Returns:
|
|
959
|
+
Annotated BGR numpy array (copy of input).
|
|
960
|
+
"""
|
|
961
|
+
out = image.copy()
|
|
962
|
+
for r in self.get_empty_fields(regions):
|
|
963
|
+
cv2.rectangle(out, (r.x1, r.y1), (r.x2, r.y2), color, thickness)
|
|
964
|
+
return out
|
|
965
|
+
|
|
966
|
+
def extract_all_text(self, image, regions) -> dict:
|
|
967
|
+
"""OCR each region and return a mapping of label → text.
|
|
968
|
+
|
|
969
|
+
Args:
|
|
970
|
+
image: BGR numpy array.
|
|
971
|
+
regions: List of ROIRegion objects.
|
|
972
|
+
Returns:
|
|
973
|
+
dict mapping each region's label to its OCR text string.
|
|
974
|
+
"""
|
|
975
|
+
result = {}
|
|
976
|
+
for r in regions:
|
|
977
|
+
crop = image[r.y1 : r.y2, r.x1 : r.x2]
|
|
978
|
+
result[r.label] = self._ocr_text(crop)
|
|
979
|
+
return result
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
# Usages:
|
|
983
|
+
|
|
984
|
+
# import cv2
|
|
985
|
+
# from form_roi_detector import FormROIDetector
|
|
986
|
+
|
|
987
|
+
# # Load your form image
|
|
988
|
+
# image = cv2.imread("my_form.png")
|
|
989
|
+
|
|
990
|
+
# # Create detector (OCR optional — needs pytesseract)
|
|
991
|
+
# detector = FormROIDetector(enable_ocr=True)
|
|
992
|
+
|
|
993
|
+
# # detect() → canonical ROI list only
|
|
994
|
+
# roi = detector.detect(image)
|
|
995
|
+
|
|
996
|
+
# # Each entry: [(x1,y1), (x2,y2), field_type, label]
|
|
997
|
+
# for entry in roi:
|
|
998
|
+
# (x1, y1), (x2, y2), ftype, label = entry
|
|
999
|
+
# print(f"[{ftype}] '{label}' → ({x1},{y1})→({x2},{y2})")
|
|
1000
|
+
|
|
1001
|
+
# result = detector.process(image)
|
|
1002
|
+
|
|
1003
|
+
# roi = result["roi"] # canonical list ← same as detect()
|
|
1004
|
+
# regions = result["regions"] # list[ROIRegion] ← rich objects
|
|
1005
|
+
# rows = result["rows"] # grouped by Y position
|
|
1006
|
+
# key_values = result["key_values"] # [{key, value_bbox, type}, ...]
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
# Advanced Usage:
|
|
1010
|
+
# image = cv2.imread("my_form.png")
|
|
1011
|
+
# detector = FormROIDetector(
|
|
1012
|
+
# min_area = 400, # px² — ignore tiny noise contours
|
|
1013
|
+
# enable_ocr = True, # False if pytesseract not installed
|
|
1014
|
+
# morph_kernel = (3, 3), # larger → merges nearby strokes
|
|
1015
|
+
# row_tolerance= 18, # px — Y-delta for same-row grouping
|
|
1016
|
+
# circle_dp = 1.2, # HoughCircles dp (radio detection)
|
|
1017
|
+
# debug = False, # True → result["debug_image"] set
|
|
1018
|
+
# )
|
|
1019
|
+
# result = detector.detect(image)
|
|
1020
|
+
|
|
1021
|
+
# regions = result["regions"]
|
|
1022
|
+
|
|
1023
|
+
# for r in regions:
|
|
1024
|
+
# print(r.field_type) # "text"|"checkbox"|"radio"|"date"…
|
|
1025
|
+
# print(r.label) # OCR text to the left / above
|
|
1026
|
+
# print(r.checked) # True/False/None (checkbox+radio only)
|
|
1027
|
+
# print(r.bbox) # (x1, y1, x2, y2)
|
|
1028
|
+
# print(r.to_tuple()) # canonical [(x1,y1),(x2,y2),type,label]
|
|
1029
|
+
# kv = result["key_values"]
|
|
1030
|
+
# # Links a text label field to the nearest checkbox/radio
|
|
1031
|
+
# # [{"key": "Allergic", "value_bbox": (740,980,1320,1078), "type": "checkbox"}, …]
|
|
1032
|
+
|
|
1033
|
+
# for pair in kv:
|
|
1034
|
+
# print(f"{pair['key']} → {pair['type']} @ {pair['value_bbox']}")
|
|
1035
|
+
|
|
1036
|
+
# detector = FormROIDetector(debug=True)
|
|
1037
|
+
# result = detector.process(image)
|
|
1038
|
+
|
|
1039
|
+
# debug_img = result["debug_image"] # annotated np.ndarray
|
|
1040
|
+
# cv2.imwrite("debug.png", debug_img)
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
# ROI Output Format:
|
|
1044
|
+
# roi = [
|
|
1045
|
+
# [(90, 980), (650, 1120), "text", "Name" ],
|
|
1046
|
+
# [(740, 980), (1320, 1078), "checkbox", "Allergic" ],
|
|
1047
|
+
# [(90, 1140), (650, 1200), "date", "Date of Birth"],
|
|
1048
|
+
# [(740, 1140), (900, 1200), "radio", "Male" ],
|
|
1049
|
+
# [(920, 1140), (1080, 1200), "radio", "Female" ],
|
|
1050
|
+
# [(90, 1220), (1320, 1460), "textarea", "Comments" ],
|
|
1051
|
+
# [(90, 1480), (1320, 1760), "table", "" ],
|
|
1052
|
+
# [(90, 1780), (600, 1840), "dropdown", "Country" ],
|
|
1053
|
+
# [(90, 1860), (500, 1940), "signature", "Signature" ],
|
|
1054
|
+
# ]
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
# Visualization
|
|
1058
|
+
# Option 1 — via process() result
|
|
1059
|
+
# image = cv2.imread("my_form.png")
|
|
1060
|
+
# detector = FormROIDetector(enable_ocr=True)
|
|
1061
|
+
# result = detector.process(image)
|
|
1062
|
+
# vis = detector.visualize(image, result=result)
|
|
1063
|
+
# cv2.imwrite("annotated.png", vis)
|
|
1064
|
+
|
|
1065
|
+
# # Option 2 — pass regions directly
|
|
1066
|
+
# vis = detector.visualize(image, regions=result["regions"])
|
|
1067
|
+
|
|
1068
|
+
# # Option 3 — show in a window (while developing)
|
|
1069
|
+
# cv2.imshow("Form Fields", vis)
|
|
1070
|
+
# cv2.waitKey(0)
|
|
1071
|
+
# cv2.destroyAllWindows()
|
|
1072
|
+
|
|
1073
|
+
# # Option 4 — Jupyter / Colab inline display
|
|
1074
|
+
# from IPython.display import display
|
|
1075
|
+
# import PIL.Image, io, numpy as np
|
|
1076
|
+
|
|
1077
|
+
# rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
|
|
1078
|
+
# display(PIL.Image.fromarray(rgb))
|