openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,587 @@
1
+ from pathlib import Path
2
+
3
+ import cv2
4
+ import mediapipe as mp
5
+ from mediapipe.tasks import python
6
+ from mediapipe.tasks.python import vision
7
+
8
+ _MODEL_DIR = Path(__file__).parent / "models"
9
+ _DEFAULT_MODEL = str(_MODEL_DIR / "face_detector.tflite")
10
+
11
+
12
+ class FaceDetector:
13
+ """
14
+ FaceDetector class that utilizes MediaPipe's Face Detection solution to detect faces in images or video frames. It provides options to draw bounding boxes and landmarks on the detected faces.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ model_path: str = _DEFAULT_MODEL,
20
+ max_faces=5,
21
+ running_mode="IMAGE", # IMAGE | VIDEO | LIVE_STREAM
22
+ min_detection_confidence: float = 0.5,
23
+ min_suppression_threshold: float = 0.3,
24
+ ):
25
+ self.running_mode = getattr(vision.RunningMode, running_mode)
26
+ self.base_options = python.BaseOptions(model_asset_path=model_path)
27
+ self.max_faces = max_faces
28
+ self.min_detection_confidence = min_detection_confidence
29
+ self.min_suppression_threshold = min_suppression_threshold
30
+ self.options = vision.FaceDetectorOptions(
31
+ base_options=self.base_options,
32
+ running_mode=self.running_mode, # IMAGE | VIDEO | LIVE_STREAM
33
+ min_detection_confidence=self.min_detection_confidence,
34
+ min_suppression_threshold=self.min_suppression_threshold,
35
+ )
36
+ self.detector = vision.FaceDetector.create_from_options(self.options)
37
+ self.mp_drawing_utils = mp.tasks.vision.drawing_utils
38
+ self.mp_drawing_styles = mp.tasks.vision.drawing_styles
39
+
40
+ def _to_mp_image(self, image):
41
+ """
42
+ Convert a BGR image (as used by OpenCV) to an mp.Image format suitable for MediaPipe processing.
43
+ Args:
44
+ image: The input image in BGR format (as used by OpenCV).
45
+ Returns:
46
+ An mp.Image object in RGB format suitable for MediaPipe processing.
47
+ """
48
+ rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
49
+ return mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
50
+
51
+ def detect(self, image, timestamp_ms=None):
52
+ """
53
+ Detect faces in the input image using MediaPipe's Face Detector.
54
+ Args:
55
+ image: The input image in which to detect faces (BGR format).
56
+ timestamp_ms: An optional timestamp in milliseconds for video processing (required for VIDEO and LIVE_STREAM modes).
57
+ Returns:
58
+ The raw detection result from MediaPipe's Face Detector, which includes information about detected faces such
59
+ """
60
+ mp_image = self._to_mp_image(image)
61
+ if self.running_mode == vision.RunningMode.IMAGE:
62
+ result = self.detector.detect(mp_image)
63
+ else:
64
+ result = self.detector.detect_for_video(mp_image, timestamp_ms or 0)
65
+
66
+ return result
67
+
68
+ def _parse_detections(self, result, shape):
69
+ """
70
+ Parse the raw detection results from MediaPipe and extract relevant information such as bounding boxes, keypoints, and confidence scores.
71
+ Args:
72
+ result: The raw detection result from MediaPipe's Face Detector.
73
+ shape: The shape of the input image (height, width).
74
+ Returns:
75
+ A list of parsed detections, where each detection is a dictionary containing information about the detected face, including its bounding box, confidence score, keypoints, and other relevant attributes.
76
+ """
77
+
78
+ H, W = shape[:2]
79
+ parsed = []
80
+ bounding_boxes = []
81
+ key_points = []
82
+ categories = []
83
+ if not result.detections:
84
+ return parsed
85
+
86
+ for i, detection in enumerate(result.detections):
87
+ """
88
+ Detection(bounding_box=BoundingBox(origin_x=180, origin_y=145, width=701, height=701), categories=[Category(index=0, score=0.9549353718757629, display_name=None, category_name=None)], keypoints=[NormalizedKeypoint(x=0.18383397161960602, y=0.2978437542915344, label=None, score=0.0), NormalizedKeypoint(x=0.33176130056381226, y=0.2957031726837158, label=None, score=0.0), NormalizedKeypoint(x=0.25055351853370667, y=0.4610801339149475, label=None, score=0.0), NormalizedKeypoint(x=0.2593384385108948, y=0.543393611907959, label=None, score=0.0), NormalizedKeypoint(x=0.12543150782585144, y=0.3002464771270752, label=None, score=0.0), NormalizedKeypoint(x=0.4346682131290436, y=0.2893249988555908, label=None, score=0.0)])
89
+ """
90
+ score = detection.categories[0].score if detection.categories else 0
91
+ bbox = detection.bounding_box
92
+ bounding_boxes.append(bbox)
93
+ key_points.append(detection.keypoints)
94
+ categories.append(detection.categories)
95
+ x, y, w, h = (
96
+ int(bbox.origin_x),
97
+ int(bbox.origin_y),
98
+ int(bbox.width),
99
+ int(bbox.height),
100
+ )
101
+
102
+ x2 = x + w
103
+ y2 = y + h
104
+
105
+ bounding_box_coordinates = (x, y, w, h)
106
+ parsed.append(
107
+ {
108
+ "id": i,
109
+ "score": score,
110
+ "bbox": (x, y, w, h),
111
+ "bbox_xyxy": (x, y, x2, y2),
112
+ "center": (x + w // 2, y + h // 2),
113
+ "coordinates": bounding_box_coordinates,
114
+ "area": w * h,
115
+ "normalized_keypoints": self._normalize_keypoints(
116
+ detection.keypoints, W, H
117
+ ),
118
+ "bounding_boxes": bounding_boxes,
119
+ "key_points": key_points,
120
+ "categories": categories,
121
+ }
122
+ )
123
+ return parsed
124
+
125
+ def detect_faces(
126
+ self,
127
+ image,
128
+ timestamp_ms=None,
129
+ to_draw_bounding_box=True,
130
+ to_draw_landmarks=True,
131
+ ):
132
+ """
133
+ Detect faces in the input image and optionally draw bounding boxes and landmarks on the detected faces.
134
+ Args:
135
+ image: The input image in which to detect faces (BGR format).
136
+ timestamp_ms: An optional timestamp in milliseconds for video processing (required for VIDEO and LIVE_STREAM modes).
137
+ to_draw_bounding_box: Whether to draw bounding boxes around detected faces.
138
+ to_draw_landmarks: Whether to draw facial landmarks on the detected faces.
139
+ Returns:
140
+ The image with detected faces (and optionally drawn bounding boxes and landmarks).
141
+ """
142
+ # Implement face detection logic here
143
+ detection_result = self.detect(image, timestamp_ms)
144
+
145
+ detections = self._parse_detections(detection_result, image.shape)
146
+
147
+ if self.options.min_detection_confidence is not None:
148
+ detections = self.filter_by_confidence(
149
+ detections, self.options.min_detection_confidence
150
+ )
151
+
152
+ if self.max_faces is not None:
153
+ detections = self.sort_faces(detections)[: self.max_faces]
154
+
155
+ if to_draw_bounding_box:
156
+ image = self.draw_detections(image, detections, to_draw_landmarks)
157
+
158
+ return image, detections
159
+
160
+ def draw_detections(self, image, detections, draw_landmarks=True):
161
+ """
162
+ Draw bounding boxes and landmarks for detected faces on the input image.
163
+ Args:
164
+ image: The input image on which to draw detections (BGR format).
165
+ detections: A list of detected faces with their bounding box and landmark information.
166
+ draw_landmarks: Whether to draw facial landmarks on the detected faces.
167
+ Returns:
168
+ The image with drawn bounding boxes and landmarks for detected faces.
169
+ """
170
+ for det in detections:
171
+ x, y, x2, y2 = det["bbox_xyxy"]
172
+ fontface = 2 if self.running_mode == vision.RunningMode.IMAGE else 0.8
173
+ cv2.rectangle(image, (x, y), (x2, y2), (255, 0, 255), 2)
174
+
175
+ cv2.putText(
176
+ image,
177
+ f"{int(det['score'] * 100)}%",
178
+ (x, y - 10),
179
+ cv2.FONT_HERSHEY_SIMPLEX,
180
+ fontface,
181
+ (0, 255, 255),
182
+ 2,
183
+ )
184
+
185
+ if draw_landmarks:
186
+ for kx, ky in det["normalized_keypoints"]:
187
+ cv2.circle(image, (kx, ky), 2, (0, 255, 0), -1)
188
+
189
+ return image
190
+
191
+ def filter_by_confidence(self, detections, threshold=0.5):
192
+ """
193
+ Filter detected faces based on a confidence threshold.
194
+ Args:
195
+ detections: A list of detected faces with their confidence scores.
196
+ threshold: The confidence threshold for filtering detections.
197
+ Returns:
198
+ A list of detections that have confidence scores above the specified threshold.
199
+ """
200
+ return [d for d in detections if d["score"] >= threshold]
201
+
202
+ def get_largest_face(self, detections):
203
+ """
204
+ Get the largest detected face based on the area.
205
+ Args:
206
+ detections: A list of detected faces with their bounding box information.
207
+ Returns:
208
+ The detection with the largest area, or None if no detections are available.
209
+ """
210
+ if not detections:
211
+ return None
212
+ return max(detections, key=lambda d: d["area"])
213
+
214
+ def crop_faces(self, image, detections, margin=0):
215
+ """
216
+ Crop detected faces from the input image based on their bounding boxes.
217
+ Args:
218
+ image: The input image from which to crop faces (BGR format).
219
+ detections: A list of detected faces with their bounding box information.
220
+ margin: An optional margin to add around the bounding box when cropping (default is 0).
221
+ Returns:
222
+ A list of cropped face images.
223
+ """
224
+ faces = []
225
+ H, W = image.shape[:2]
226
+ for det in detections:
227
+ x, y, w, h = det["bbox"]
228
+ x1 = max(0, x - margin)
229
+ y1 = max(0, y - margin)
230
+ x2 = min(W, x + w + margin)
231
+ y2 = min(H, y + h + margin)
232
+ faces.append(image[y1:y2, x1:x2])
233
+ return faces
234
+
235
+ def sort_faces(self, detections, by="area", descending=True):
236
+ """
237
+ Sort detected faces based on a specified attribute.
238
+ Args:
239
+ detections: A list of detected faces with their attributes.
240
+ by: The attribute to sort by (default is "area").
241
+ descending: Whether to sort in descending order (default is True).
242
+ Returns:
243
+ A list of sorted detections.
244
+ """
245
+ return sorted(detections, key=lambda x: x[by], reverse=descending)
246
+
247
+ def get_iou(self, boxA, boxB):
248
+ """
249
+ useful for tracking / NMS
250
+ A box is defined by its top-left corner (x1, y1) and bottom-right corner (x2, y2).
251
+
252
+ Args:
253
+ boxA: A tuple (x1, y1, x2, y2) representing the first bounding box.
254
+ boxB: A tuple (x1, y1, x2, y2) representing the second bounding box.
255
+
256
+ Returns:
257
+ The Intersection over Union (IoU) value between the two bounding boxes.
258
+ """
259
+ xA = max(boxA[0], boxB[0])
260
+ yA = max(boxA[1], boxB[1])
261
+ xB = min(boxA[2], boxB[2])
262
+ yB = min(boxA[3], boxB[3])
263
+
264
+ inter = max(0, xB - xA) * max(0, yB - yA)
265
+
266
+ areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
267
+ areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
268
+
269
+ return inter / float(areaA + areaB - inter + 1e-6)
270
+
271
+ def _normalize_keypoints(self, keypoints, W, H):
272
+ """
273
+ Normalize keypoints to the image dimensions.
274
+ Args:
275
+ keypoints: A list of keypoints with x and y coordinates normalized between 0 and 1.
276
+ W: The width of the image.
277
+ H: The height of the image.
278
+ Returns:
279
+ A list of keypoints with coordinates scaled to the image dimensions.
280
+ """
281
+ if not keypoints:
282
+ return []
283
+ return [(int(k.x * W), int(k.y * H)) for k in keypoints]
284
+
285
+ # ─────────────────────────── NEW METHODS ───────────────────────────
286
+
287
+ def count_faces(self, detections):
288
+ """Return total number of detected faces.
289
+
290
+ Args:
291
+ detections: List of detection dicts returned by detect_faces().
292
+ Returns:
293
+ int: number of detections.
294
+ """
295
+ return len(detections)
296
+
297
+ def blur_faces(self, image, detections, blur_strength=(51, 51), margin=0):
298
+ """Blur every detected face region in-place on a copy of the image.
299
+ Useful for privacy masking before saving or streaming.
300
+
301
+ Args:
302
+ image: BGR numpy array.
303
+ detections: List of detection dicts from detect_faces().
304
+ blur_strength: (kW, kH) kernel size for GaussianBlur — must be odd.
305
+ margin: Extra pixels to expand each face crop before blurring.
306
+ Returns:
307
+ BGR numpy array with blurred faces.
308
+ """
309
+ out = image.copy()
310
+ H, W = out.shape[:2]
311
+ for det in detections:
312
+ x, y, w, h = det["bbox"]
313
+ x1 = max(0, x - margin)
314
+ y1 = max(0, y - margin)
315
+ x2 = min(W, x + w + margin)
316
+ y2 = min(H, y + h + margin)
317
+ roi = out[y1:y2, x1:x2]
318
+ if roi.size == 0:
319
+ continue
320
+ # Kernel sizes must be positive odd numbers
321
+ kw = blur_strength[0] if blur_strength[0] % 2 == 1 else blur_strength[0] + 1
322
+ kh = blur_strength[1] if blur_strength[1] % 2 == 1 else blur_strength[1] + 1
323
+ out[y1:y2, x1:x2] = cv2.GaussianBlur(roi, (kw, kh), 0)
324
+ return out
325
+
326
+ def is_face_in_zone(self, detection, zone_rect):
327
+ """Check whether the face center falls inside a rectangular zone.
328
+ Useful for attendance systems, door-unlock triggers, restricted-area alerts.
329
+
330
+ Args:
331
+ detection: Single detection dict from detect_faces().
332
+ zone_rect: (x, y, w, h) defining the zone rectangle in pixel coords.
333
+ Returns:
334
+ bool: True if face center is inside the zone.
335
+ """
336
+ cx, cy = detection["center"]
337
+ zx, zy, zw, zh = zone_rect
338
+ return (zx <= cx <= zx + zw) and (zy <= cy <= zy + zh)
339
+
340
+ def get_face_screen_position(self, detection, image_width):
341
+ """Classify horizontal position of a face as 'left', 'center', or 'right'.
342
+ Divides the frame into three equal vertical bands.
343
+
344
+ Args:
345
+ detection: Single detection dict from detect_faces().
346
+ image_width: Width of the source image in pixels.
347
+ Returns:
348
+ str: 'left' | 'center' | 'right'
349
+ """
350
+ cx = detection["center"][0]
351
+ third = image_width / 3
352
+ if cx < third:
353
+ return "left"
354
+ elif cx < 2 * third:
355
+ return "center"
356
+ return "right"
357
+
358
+ def track_faces(self, prev_detections, curr_detections, iou_threshold=0.3):
359
+ """Associate current-frame faces to previous-frame faces via IoU.
360
+ Returns matched pairs; unmatched current detections are marked as new.
361
+
362
+ Args:
363
+ prev_detections: List of detection dicts from the previous frame.
364
+ curr_detections: List of detection dicts from the current frame.
365
+ iou_threshold: Minimum IoU to consider two faces the same person.
366
+ Returns:
367
+ List of dicts: [{
368
+ 'prev': detection_or_None,
369
+ 'curr': detection,
370
+ 'is_new': bool
371
+ }]
372
+ """
373
+ matched = []
374
+ used_prev = set()
375
+
376
+ for curr in curr_detections:
377
+ best_iou = 0.0
378
+ best_prev = None
379
+ for i, prev in enumerate(prev_detections):
380
+ if i in used_prev:
381
+ continue
382
+ iou = self.get_iou(curr["bbox_xyxy"], prev["bbox_xyxy"])
383
+ if iou > best_iou:
384
+ best_iou = iou
385
+ best_prev = (i, prev)
386
+
387
+ if best_prev and best_iou >= iou_threshold:
388
+ used_prev.add(best_prev[0])
389
+ matched.append({"prev": best_prev[1], "curr": curr, "is_new": False})
390
+ else:
391
+ matched.append({"prev": None, "curr": curr, "is_new": True})
392
+
393
+ return matched
394
+
395
+ def draw_zone(
396
+ self, image, zone_rect, color=(0, 255, 255), label="Zone", thickness=2
397
+ ):
398
+ """Draw a named rectangular zone on the image.
399
+
400
+ Args:
401
+ image: BGR numpy array.
402
+ zone_rect: (x, y, w, h) zone coordinates.
403
+ color: BGR color tuple.
404
+ label: Text label drawn above the zone rectangle.
405
+ thickness: Border thickness in pixels.
406
+ Returns:
407
+ Annotated BGR numpy array.
408
+ """
409
+ out = image.copy()
410
+ x, y, w, h = zone_rect
411
+ cv2.rectangle(out, (x, y), (x + w, y + h), color, thickness)
412
+ cv2.putText(
413
+ out,
414
+ label,
415
+ (x, max(0, y - 8)),
416
+ cv2.FONT_HERSHEY_SIMPLEX,
417
+ 0.6,
418
+ color,
419
+ 2,
420
+ cv2.LINE_AA,
421
+ )
422
+ return out
423
+
424
+ # ─────────────────────── PRIVACY & CROP UTILITIES ───────────────────────
425
+
426
+ def pixelate_faces(self, image, detections, block_size=10):
427
+ """Pixelate every detected face region for privacy masking.
428
+
429
+ Downscales the face ROI to a tiny tile grid then upscales back with
430
+ nearest-neighbour interpolation, creating a mosaic / pixelation effect.
431
+
432
+ Args:
433
+ image: BGR numpy array.
434
+ detections: List of detection dicts from detect_faces().
435
+ block_size: Pixel block size; larger values = coarser mosaic.
436
+ Returns:
437
+ BGR numpy array with pixelated faces (copy of input).
438
+ """
439
+ out = image.copy()
440
+ for det in detections:
441
+ x, y, w, h = det["bbox"]
442
+ x1 = max(0, x)
443
+ y1 = max(0, y)
444
+ x2 = min(image.shape[1], x + w)
445
+ y2 = min(image.shape[0], y + h)
446
+ if x2 <= x1 or y2 <= y1:
447
+ continue
448
+ roi = out[y1:y2, x1:x2]
449
+ small_w = max(1, (x2 - x1) // block_size)
450
+ small_h = max(1, (y2 - y1) // block_size)
451
+ small = cv2.resize(roi, (small_w, small_h), interpolation=cv2.INTER_LINEAR)
452
+ out[y1:y2, x1:x2] = cv2.resize(
453
+ small, (x2 - x1, y2 - y1), interpolation=cv2.INTER_NEAREST
454
+ )
455
+ return out
456
+
457
+ def is_frontal(self, detection, threshold=0.8):
458
+ """Heuristic frontal-face check based on detection confidence.
459
+
460
+ MediaPipe Face Detection scores are higher for well-aligned, frontal
461
+ faces, so a high-confidence score is a reasonable frontal proxy.
462
+
463
+ Args:
464
+ detection: Single detection dict from detect_faces().
465
+ threshold: Minimum confidence score to consider the face frontal.
466
+ Returns:
467
+ bool: True if detection score >= threshold.
468
+ """
469
+ return detection["score"] >= threshold
470
+
471
+ def get_padded_crop(self, image, detection, pad_ratio=0.2):
472
+ """Crop a face with proportional padding on all sides.
473
+
474
+ Adds padding relative to the face bounding-box dimensions, then clips
475
+ to the image boundary so the crop is always valid.
476
+
477
+ Args:
478
+ image: BGR numpy array.
479
+ detection: Single detection dict from detect_faces().
480
+ pad_ratio: Fraction of face width/height to add as padding on each side.
481
+ Returns:
482
+ BGR numpy array crop (copy).
483
+ """
484
+ x, y, w, h = detection["bbox"]
485
+ pad_x = int(w * pad_ratio)
486
+ pad_y = int(h * pad_ratio)
487
+ h_img, w_img = image.shape[:2]
488
+ x1 = max(0, x - pad_x)
489
+ y1 = max(0, y - pad_y)
490
+ x2 = min(w_img, x + w + pad_x)
491
+ y2 = min(h_img, y + h + pad_y)
492
+ return image[y1:y2, x1:x2].copy()
493
+
494
+ # ─────────────────── TRACKING & BATCH UTILITIES ──────────────────────────
495
+
496
+ def draw_face_ids(self, image, tracked_faces):
497
+ """Overlay persistent face IDs on the image.
498
+
499
+ Draws a green bounding box and an "ID:<n>" label above each tracked face.
500
+
501
+ Args:
502
+ image: BGR numpy array.
503
+ tracked_faces: List of dicts with keys ``bbox`` (x, y, w, h) and ``id``.
504
+ Returns:
505
+ Annotated BGR numpy array (copy of input).
506
+ """
507
+ out = image.copy()
508
+ for face in tracked_faces:
509
+ x, y, w, h = face["bbox"]
510
+ face_id = face.get("id", 0)
511
+ cv2.rectangle(out, (x, y), (x + w, y + h), (0, 255, 0), 2)
512
+ cv2.putText(
513
+ out,
514
+ f"ID:{face_id}",
515
+ (x, max(y - 10, 10)),
516
+ cv2.FONT_HERSHEY_SIMPLEX,
517
+ 0.6,
518
+ (0, 255, 0),
519
+ 2,
520
+ )
521
+ return out
522
+
523
+ def get_attention_score(self, detections, img_w, img_h):
524
+ """Estimate viewer attention as a 0–1 scalar.
525
+
526
+ Combines two signals per detection and returns the best score across all
527
+ detected faces:
528
+
529
+ * **Area ratio** — larger face implies closer / more engaged viewer.
530
+ * **Centrality** — face centred in the frame scores higher than one at
531
+ the periphery.
532
+
533
+ Args:
534
+ detections: List of detection dicts from detect_faces().
535
+ img_w: Frame width in pixels.
536
+ img_h: Frame height in pixels.
537
+ Returns:
538
+ float in [0.0, 1.0]. 0.0 when no detections are present.
539
+ """
540
+ if not detections:
541
+ return 0.0
542
+ img_area = img_w * img_h
543
+ img_cx, img_cy = img_w / 2.0, img_h / 2.0
544
+ max_dist = (img_cx**2 + img_cy**2) ** 0.5
545
+ scores = []
546
+ for det in detections:
547
+ area_ratio = min(det["area"] / img_area, 1.0)
548
+ cx, cy = det["center"]
549
+ dist = ((cx - img_cx) ** 2 + (cy - img_cy) ** 2) ** 0.5
550
+ centrality = 1.0 - min(dist / max_dist, 1.0) if max_dist > 0 else 1.0
551
+ scores.append(0.5 * area_ratio + 0.5 * centrality)
552
+ return float(max(scores))
553
+
554
+ def batch_detect(self, images):
555
+ """Run detect_faces on a list of frames and return only the detection lists.
556
+
557
+ Args:
558
+ images: Iterable of BGR numpy arrays.
559
+ Returns:
560
+ List[List[dict]]: one detection list per input frame, in order.
561
+ """
562
+ return [self.detect_faces(img)[1] for img in images]
563
+
564
+ def save_crops(self, image, detections, output_dir, prefix="face"):
565
+ """Crop each detected face (with padding) and save to disk as PNG files.
566
+
567
+ Files are named ``<prefix>_<index>.png`` and written to *output_dir*,
568
+ which is created if it does not exist.
569
+
570
+ Args:
571
+ image: BGR numpy array.
572
+ detections: List of detection dicts from detect_faces().
573
+ output_dir: Destination directory path (created automatically).
574
+ prefix: Filename prefix for saved crops.
575
+ Returns:
576
+ List[str]: absolute file paths of the written PNG files, in order.
577
+ """
578
+ import os
579
+
580
+ os.makedirs(output_dir, exist_ok=True)
581
+ paths = []
582
+ for i, det in enumerate(detections):
583
+ crop = self.get_padded_crop(image, det, pad_ratio=0.1)
584
+ path = os.path.join(output_dir, f"{prefix}_{i}.png")
585
+ cv2.imwrite(path, crop)
586
+ paths.append(path)
587
+ return paths