openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,465 @@
1
+ import cv2
2
+ import numpy as np
3
+
4
+ try:
5
+ import pytesseract
6
+
7
+ TESSERACT_AVAILABLE = True
8
+ except ImportError:
9
+ TESSERACT_AVAILABLE = False
10
+
11
+
12
+ class FormROIDetector:
13
+ """
14
+ Form Region of Interest (ROI) Detector that identifies various field types in forms, including text fields, checkboxes, radio buttons, date ranges, and table cells. It uses contour analysis for detection and can optionally perform OCR to extract text labels. The class also includes functionality to group ROIs into rows and link keys to their corresponding values.
15
+
16
+ image = cv2.imread(main_form_path)
17
+
18
+ detector = FormROIDetector(
19
+ min_area=1000,
20
+ enable_ocr=True,
21
+ debug=True
22
+ )
23
+
24
+ result = detector.process(image)
25
+ print(result["roi"])
26
+
27
+
28
+ # Option 2 — pass regions directly
29
+ vis = detector.visualize(image, regions=result)
30
+
31
+ # Option 3 — show in a window (while developing)
32
+ cv2.imshow("Form Fields", vis)
33
+ cv2.waitKey(0)
34
+ cv2.destroyAllWindows()
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ min_area=500,
40
+ enable_ocr=True,
41
+ morph_kernel_size=(5, 5),
42
+ row_tolerance=20,
43
+ debug=False,
44
+ ):
45
+ self.min_area = min_area
46
+ self.enable_ocr = enable_ocr and TESSERACT_AVAILABLE
47
+ self.kernel_size = morph_kernel_size
48
+ self.row_tolerance = row_tolerance
49
+ self.debug = debug
50
+
51
+ def _overlap_ratio(self, bbox1, bbox2):
52
+ """
53
+ Compute overlap ratio between two bounding boxes (used to filter duplicates).
54
+ A simple IoU-like metric that considers the area of overlap relative to the smaller box.
55
+
56
+ Args:
57
+ bbox1: (x1, y1, x2, y2) for the first box
58
+ bbox2: (x3, y3, x4, y4) for the second box
59
+ Returns:
60
+ A float representing the overlap ratio (0.0 to 1.0)
61
+ """
62
+ x1, y1, x2, y2 = bbox1
63
+ x3, y3, x4, y4 = bbox2
64
+ overlap_x1 = max(x1, x3)
65
+ overlap_y1 = max(y1, y3)
66
+ overlap_x2 = min(x2, x4)
67
+ overlap_y2 = min(y2, y4)
68
+ if overlap_x2 < overlap_x1 or overlap_y2 < overlap_y1:
69
+ return 0.0
70
+ overlap_area = (overlap_x2 - overlap_x1) * (overlap_y2 - overlap_y1)
71
+ area1 = (x2 - x1) * (y2 - y1)
72
+ area2 = (x4 - x3) * (y4 - y3)
73
+ return overlap_area / min(area1, area2) if min(area1, area2) > 0 else 0.0
74
+
75
+ # ---------------------------
76
+ # Preprocessing
77
+ # ---------------------------
78
+ def _preprocess(self, image):
79
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
80
+ gray = cv2.equalizeHist(gray)
81
+
82
+ thresh = cv2.adaptiveThreshold(
83
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
84
+ )
85
+
86
+ edges = cv2.Canny(gray, 50, 150)
87
+ return cv2.bitwise_or(thresh, edges)
88
+
89
+ # ---------------------------
90
+ # Table Cell Detection (new - extracts individual cells from grid lines)
91
+ # ---------------------------
92
+ def _detect_table_cells(self, image):
93
+ """Detect table grid and return cell ROIs as 'table_cell' type."""
94
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
95
+ binary = cv2.adaptiveThreshold(
96
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
97
+ )
98
+
99
+ # Detect horizontal and vertical lines (tuned for cell extraction)
100
+ h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
101
+ v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
102
+ horizontal = cv2.morphologyEx(binary, cv2.MORPH_OPEN, h_kernel)
103
+ vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
104
+
105
+ # Get line positions
106
+ h_contours, _ = cv2.findContours(
107
+ horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
108
+ )
109
+ v_contours, _ = cv2.findContours(
110
+ vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
111
+ )
112
+
113
+ horiz_lines = sorted(
114
+ [cv2.boundingRect(c)[1] for c in h_contours if cv2.contourArea(c) > 50]
115
+ )
116
+ vert_lines = sorted(
117
+ [cv2.boundingRect(c)[0] for c in v_contours if cv2.contourArea(c) > 50]
118
+ )
119
+
120
+ # Unique positions
121
+ horiz_lines = sorted(set(horiz_lines))
122
+ vert_lines = sorted(set(vert_lines))
123
+
124
+ if len(horiz_lines) < 2 or len(vert_lines) < 2:
125
+ return []
126
+
127
+ cells = []
128
+ for i in range(len(horiz_lines) - 1):
129
+ y1 = horiz_lines[i]
130
+ y2 = horiz_lines[i + 1]
131
+ for j in range(len(vert_lines) - 1):
132
+ x1 = vert_lines[j]
133
+ x2 = vert_lines[j + 1]
134
+ w = x2 - x1
135
+ h = y2 - y1
136
+ if w > 20 and h > 20:
137
+ cells.append(
138
+ {"bbox": (x1, y1, x2, y2), "type": "table_cell", "text": ""}
139
+ )
140
+ return cells
141
+
142
+ # ---------------------------
143
+ # ROI Detection (enhanced with new field types)
144
+ # ---------------------------
145
+ def detect_rois(self, image):
146
+ thresh = self._preprocess(image)
147
+
148
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, self.kernel_size)
149
+ morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
150
+
151
+ contours, _ = cv2.findContours(
152
+ morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
153
+ )
154
+
155
+ rois = []
156
+
157
+ for cnt in contours:
158
+ area = cv2.contourArea(cnt)
159
+ if area < self.min_area:
160
+ continue
161
+
162
+ x, y, w, h = cv2.boundingRect(cnt)
163
+ aspect_ratio = w / float(h)
164
+
165
+ # Additional shape analysis
166
+ peri = cv2.arcLength(cnt, True)
167
+ circularity = 4 * np.pi * area / (peri * peri) if peri > 0 else 0
168
+ approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
169
+ is_rect = len(approx) == 4
170
+
171
+ # Enhanced type classification
172
+ if (
173
+ is_rect
174
+ and 0.75 <= aspect_ratio <= 1.35
175
+ and max(w, h) < 200
176
+ and min(w, h) > 10
177
+ ):
178
+ # Small square/near-square fields
179
+ roi_type = "radio" if circularity > 0.75 else "checkbox"
180
+ elif is_rect and aspect_ratio >= 2.5:
181
+ # Wide input fields
182
+ roi_type = (
183
+ "daterange" if aspect_ratio > 5.0 else "textbox"
184
+ ) # Very wide → likely date range box
185
+ else:
186
+ roi_type = "text" # Labels or irregular text regions
187
+
188
+ label = ""
189
+ if self.enable_ocr:
190
+ # OCR on most fields (skip tiny checkboxes/radios - label is usually external)
191
+ if roi_type not in ["checkbox", "radio"]:
192
+ label = self._ocr(image[y : y + h, x : x + w])
193
+ else:
194
+ # Light OCR attempt on selection fields (rarely contains text)
195
+ label = self._ocr(image[y : y + h, x : x + w])
196
+
197
+ rois.append(
198
+ {
199
+ "bbox": (x, y, x + w, y + h),
200
+ "type": roi_type,
201
+ "text": label.strip(),
202
+ "circularity": circularity,
203
+ }
204
+ )
205
+
206
+ # Add table cells (new feature)
207
+ table_cells = self._detect_table_cells(image)
208
+ for cell in table_cells:
209
+ if self.enable_ocr:
210
+ x1, y1, x2, y2 = cell["bbox"]
211
+ cell["text"] = self._ocr(image[y1:y2, x1:x2])
212
+ rois.append(cell)
213
+
214
+ # Sort and remove near-duplicate ROIs (e.g. line contours vs table cells)
215
+ rois = sorted(rois, key=lambda r: (r["bbox"][1], r["bbox"][0]))
216
+ filtered_rois = []
217
+ for r in rois:
218
+ if not any(
219
+ self._overlap_ratio(r["bbox"], f["bbox"]) > 0.6 for f in filtered_rois
220
+ ):
221
+ filtered_rois.append(r)
222
+
223
+ return filtered_rois
224
+
225
+ # ---------------------------
226
+ # OCR
227
+ # ---------------------------
228
+ def _ocr(self, roi):
229
+ try:
230
+ text = pytesseract.image_to_string(roi, config="--psm 6")
231
+ return text.strip().split("\n")[0]
232
+ except Exception:
233
+ return ""
234
+
235
+ # ---------------------------
236
+ # Row Grouping
237
+ # ---------------------------
238
+ def group_rows(self, rois):
239
+ rows = []
240
+ current_row = []
241
+
242
+ for roi in rois:
243
+ if not current_row:
244
+ current_row.append(roi)
245
+ continue
246
+
247
+ prev_y = current_row[-1]["bbox"][1]
248
+ curr_y = roi["bbox"][1]
249
+
250
+ if abs(curr_y - prev_y) < self.row_tolerance:
251
+ current_row.append(roi)
252
+ else:
253
+ rows.append(sorted(current_row, key=lambda r: r["bbox"][0]))
254
+ current_row = [roi]
255
+
256
+ if current_row:
257
+ rows.append(sorted(current_row, key=lambda r: r["bbox"][0]))
258
+
259
+ return rows
260
+
261
+ # ---------------------------
262
+ # Key-Value Extraction (enhanced for all new field types)
263
+ # ---------------------------
264
+ def extract_key_values(self, rows):
265
+ key_values = []
266
+ field_types = {"textbox", "checkbox", "radio", "daterange"}
267
+
268
+ for row in rows:
269
+ texts = [r for r in row if r["type"] == "text"]
270
+ fields = [r for r in row if r["type"] in field_types]
271
+
272
+ for t in texts:
273
+ tx1, ty1, tx2, ty2 = t["bbox"]
274
+
275
+ # Find nearest field to the right
276
+ candidates = [b for b in fields if b["bbox"][0] > tx2 + 5]
277
+
278
+ if not candidates:
279
+ continue
280
+
281
+ nearest = min(candidates, key=lambda b: b["bbox"][0] - tx2)
282
+
283
+ key_values.append(
284
+ {
285
+ "key": t["text"],
286
+ "value_bbox": nearest["bbox"],
287
+ "value_type": nearest["type"],
288
+ "value_text": nearest.get("text", ""),
289
+ "checked": (
290
+ nearest.get("checked", False)
291
+ if nearest["type"] in ["checkbox", "radio"]
292
+ else None
293
+ ),
294
+ }
295
+ )
296
+
297
+ return key_values
298
+
299
+ # ---------------------------
300
+ # Selection State Detection (checkbox / radio)
301
+ # ---------------------------
302
+ def detect_selection_state(self, image, rois):
303
+ """Detect checked state for checkboxes and radio buttons."""
304
+ for roi in rois:
305
+ if roi["type"] not in ["checkbox", "radio"]:
306
+ continue
307
+
308
+ x1, y1, x2, y2 = roi["bbox"]
309
+ crop = image[y1:y2, x1:x2]
310
+
311
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
312
+ _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
313
+
314
+ filled_ratio = np.sum(thresh == 255) / thresh.size
315
+ roi["checked"] = filled_ratio > 0.2
316
+
317
+ return rois
318
+
319
+ # ---------------------------
320
+ # Table Mask (kept for backward compatibility + visualization)
321
+ # ---------------------------
322
+ def detect_table(self, image):
323
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
324
+
325
+ binary = cv2.adaptiveThreshold(
326
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
327
+ )
328
+
329
+ h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
330
+ horizontal = cv2.morphologyEx(binary, cv2.MORPH_OPEN, h_kernel)
331
+
332
+ v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
333
+ vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, v_kernel)
334
+
335
+ table_mask = cv2.add(horizontal, vertical)
336
+ return table_mask
337
+
338
+ # ---------------------------
339
+ # Visualization (updated for all new types)
340
+ # ---------------------------
341
+ def visualize(
342
+ self,
343
+ image,
344
+ result,
345
+ show_labels=True,
346
+ show_rows=True,
347
+ show_links=True,
348
+ show_table=False,
349
+ ):
350
+ vis = image.copy()
351
+
352
+ rois = result.get("rois", [])
353
+ rows = result.get("rows", [])
354
+ key_values = result.get("key_values", [])
355
+
356
+ # ---------------------------
357
+ # Draw ROIs with new type colors
358
+ # ---------------------------
359
+ for roi in rois:
360
+ x1, y1, x2, y2 = roi["bbox"]
361
+ roi_type = roi["type"]
362
+
363
+ # Base color by type
364
+ if roi_type == "text":
365
+ color = (0, 255, 0) # green
366
+ elif roi_type in ["checkbox", "radio"]:
367
+ color = (255, 0, 0) # blue
368
+ elif roi_type == "textbox":
369
+ color = (0, 165, 255) # orange
370
+ elif roi_type == "daterange":
371
+ color = (255, 165, 0) # yellow-orange
372
+ elif roi_type == "table_cell":
373
+ color = (255, 0, 255) # magenta
374
+ else:
375
+ color = (128, 128, 128) # gray
376
+
377
+ # Override for checked selection fields
378
+ if "checked" in roi:
379
+ color = (0, 0, 255) if roi["checked"] else (255, 0, 0)
380
+
381
+ cv2.rectangle(vis, (x1, y1), (x2, y2), color, 2)
382
+
383
+ if show_labels:
384
+ label = roi.get("text", "")
385
+ label_text = f"{roi_type}:{label}" if label else roi_type
386
+ cv2.putText(
387
+ vis,
388
+ label_text,
389
+ (x1, max(0, y1 - 5)),
390
+ cv2.FONT_HERSHEY_SIMPLEX,
391
+ 0.5,
392
+ color,
393
+ 1,
394
+ cv2.LINE_AA,
395
+ )
396
+
397
+ # ---------------------------
398
+ # Draw Rows (grouping)
399
+ # ---------------------------
400
+ if show_rows:
401
+ for _i, row in enumerate(rows):
402
+ color = tuple(np.random.randint(0, 255, 3).tolist())
403
+ for roi in row:
404
+ x1, y1, x2, y2 = roi["bbox"]
405
+ cv2.rectangle(vis, (x1, y1), (x2, y2), color, 1)
406
+
407
+ # ---------------------------
408
+ # Draw Key-Value links
409
+ # ---------------------------
410
+ if show_links:
411
+ for kv in key_values:
412
+ key_text = kv["key"]
413
+ value_bbox = kv["value_bbox"]
414
+
415
+ key_roi = next((r for r in rois if r.get("text") == key_text), None)
416
+ if key_roi is None:
417
+ continue
418
+
419
+ kx1, ky1, kx2, ky2 = key_roi["bbox"]
420
+ vx1, vy1, vx2, vy2 = value_bbox
421
+
422
+ key_center = ((kx1 + kx2) // 2, (ky1 + ky2) // 2)
423
+ val_center = ((vx1 + vx2) // 2, (vy1 + vy2) // 2)
424
+
425
+ cv2.line(vis, key_center, val_center, (0, 255, 255), 2)
426
+
427
+ # ---------------------------
428
+ # Draw Table Structure (optional)
429
+ # ---------------------------
430
+ if show_table:
431
+ table_mask = self.detect_table(image)
432
+ vis = cv2.addWeighted(
433
+ vis, 0.8, cv2.cvtColor(table_mask, cv2.COLOR_GRAY2BGR), 0.5, 0
434
+ )
435
+
436
+ return vis
437
+
438
+ # ---------------------------
439
+ # Full Pipeline (returns original dict + new "roi" list in requested format)
440
+ # ---------------------------
441
+ def process(self, image):
442
+ rois = self.detect_rois(image)
443
+ rois = self.detect_selection_state(image, rois)
444
+
445
+ rows = self.group_rows(rois)
446
+ key_values = self.extract_key_values(rows)
447
+
448
+ # NEW: ROI list in the exact format requested by the user
449
+ roi = [
450
+ [
451
+ (r["bbox"][0], r["bbox"][1]),
452
+ (r["bbox"][2], r["bbox"][3]),
453
+ r["type"],
454
+ r.get("text", ""),
455
+ ]
456
+ for r in rois
457
+ ]
458
+
459
+ return {
460
+ "rois": rois,
461
+ "rows": rows,
462
+ "key_values": key_values,
463
+ "roi": roi, # ← Desired output format
464
+ "table_mask": self.detect_table(image), # kept for convenience
465
+ }