openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,528 @@
1
+ from pathlib import Path
2
+
3
+ import cv2
4
+ import mediapipe as mp
5
+ import numpy as np
6
+ from mediapipe.tasks.python import vision
7
+ from mediapipe.tasks.python.core.base_options import BaseOptions
8
+ from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
9
+ VisionTaskRunningMode,
10
+ )
11
+
12
+ _MODEL_DIR = Path(__file__).parent / "models"
13
+ _DEFAULT_MODEL = str(_MODEL_DIR / "deeplab_v3.tflite")
14
+
15
+
16
+ class SelfieSegmentation:
17
+ def __init__(
18
+ self,
19
+ model_path: str = _DEFAULT_MODEL,
20
+ output_category_mask: bool = True,
21
+ output_confidence_masks: bool = False,
22
+ running_mode: VisionTaskRunningMode = VisionTaskRunningMode.IMAGE,
23
+ ):
24
+ base_options = BaseOptions(model_asset_path=model_path)
25
+
26
+ self.options = vision.ImageSegmenterOptions(
27
+ base_options=base_options,
28
+ output_category_mask=output_category_mask,
29
+ output_confidence_masks=output_confidence_masks,
30
+ running_mode=running_mode,
31
+ )
32
+
33
+ self.segmentor = vision.ImageSegmenter.create_from_options(self.options)
34
+
35
+ # Correct process method
36
+ def process(self, image: np.ndarray):
37
+ rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
38
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)
39
+
40
+ result = self.segmentor.segment(mp_image)
41
+ return result
42
+
43
+ def _get_mask(self, result, smooth=True):
44
+ mask = result.category_mask.numpy_view()
45
+ mask = np.squeeze(mask).astype(np.float32)
46
+ if smooth:
47
+ mask = cv2.GaussianBlur(mask, (15, 15), 0)
48
+ return mask
49
+
50
+ def _expand_mask(self, mask):
51
+ return mask[..., None]
52
+
53
+ def remove_background2(self, image: np.ndarray) -> np.ndarray:
54
+ result = self.process(image)
55
+ mask = self._get_mask(result)
56
+ condition = self._expand_mask(mask > 0.5)
57
+ return np.where(condition, image, 0)
58
+
59
+ # Remove background
60
+ def remove_background(self, image: np.ndarray) -> np.ndarray:
61
+ result = self.process(image)
62
+ category_mask = result.category_mask.numpy_view()
63
+ category_mask = np.squeeze(category_mask)
64
+ condition = category_mask > 0.5
65
+ condition = condition[..., None]
66
+ output = np.where(condition, image, 0)
67
+ return output
68
+
69
+ # Blur background
70
+ def blur_background(self, image: np.ndarray, blur_strength=(55, 55)) -> np.ndarray:
71
+ result = self.process(image)
72
+ category_mask = result.category_mask.numpy_view()
73
+ blurred = cv2.GaussianBlur(image, blur_strength, 0)
74
+ category_mask = np.squeeze(result.category_mask.numpy_view())
75
+ condition = (category_mask > 0.5)[..., None]
76
+ output = np.where(condition, image, blurred)
77
+ return output
78
+
79
+ # Replace background
80
+ def replace_background(self, image: np.ndarray, background_path: str) -> np.ndarray:
81
+ result = self.process(image)
82
+ category_mask = result.category_mask.numpy_view()
83
+ bg = cv2.imread(background_path)
84
+ bg = cv2.resize(bg, (image.shape[1], image.shape[0]))
85
+ category_mask = np.squeeze(result.category_mask.numpy_view())
86
+ condition = (category_mask > 0.5)[..., None]
87
+ output = np.where(condition, image, bg)
88
+ return output
89
+
90
+ # Color background with a specified color
91
+ def color_background(self, image: np.ndarray, color=(0, 255, 0)) -> np.ndarray:
92
+ result = self.process(image)
93
+ mask = self._get_mask(result)
94
+ bg = np.full_like(image, color, dtype=np.uint8)
95
+ # category_mask = np.squeeze(result.category_mask.numpy_view())
96
+ condition = (mask > 0.5)[..., None]
97
+ output = np.where(condition, image, bg)
98
+ return output
99
+
100
+ def extract_foreground(self, image: np.ndarray) -> np.ndarray:
101
+ result = self.process(image)
102
+ mask = self._get_mask(result)
103
+ alpha = self._expand_mask(mask)
104
+ return (image * alpha).astype(np.uint8)
105
+
106
+ def alpha_blend(self, image: np.ndarray, bg: np.ndarray) -> np.ndarray:
107
+ result = self.process(image)
108
+ mask = self._get_mask(result)
109
+ bg = cv2.resize(bg, (image.shape[1], image.shape[0]))
110
+ alpha = self._expand_mask(mask)
111
+ return (image * alpha + bg * (1 - alpha)).astype(np.uint8)
112
+
113
+ def overlay_mask(self, image: np.ndarray) -> np.ndarray:
114
+ """Debug visualization"""
115
+ result = self.process(image)
116
+ mask = self._get_mask(result, smooth=False)
117
+
118
+ heatmap = (mask * 255).astype(np.uint8)
119
+ heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
120
+ return cv2.addWeighted(image, 0.7, heatmap, 0.3, 0)
121
+
122
+ def threshold_mask(self, image: np.ndarray, threshold=0.5) -> np.ndarray:
123
+ result = self.process(image)
124
+ mask = self._get_mask(result, smooth=False)
125
+
126
+ binary = (mask > threshold).astype(np.uint8) * 255
127
+ return binary
128
+
129
+ def fast_remove_background(self, image: np.ndarray) -> np.ndarray:
130
+ """No smoothing → faster"""
131
+ result = self.process(image)
132
+ mask = result.category_mask.numpy_view()
133
+ mask = np.squeeze(mask)
134
+ condition = (mask > 0.5)[..., None]
135
+ return np.where(condition, image, 0)
136
+
137
+ def fast_process(self, frame, scale=0.5):
138
+ small = cv2.resize(frame, None, fx=scale, fy=scale)
139
+ result = self.process(small)
140
+ mask = result.category_mask.numpy_view()
141
+ mask = np.squeeze(mask)
142
+ # Upscale mask back
143
+ mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))
144
+ return mask
145
+
146
+ def optimize_virtual_background(self, frame, bg: np.ndarray):
147
+ # Initialize persistent mask (store in class instead ideally)
148
+ if not hasattr(self, "prev_mask"):
149
+ self.prev_mask = None
150
+
151
+ # 1. Downscale (use 0.7 instead of 0.5 for better clarity)
152
+ small = cv2.resize(frame, (0, 0), fx=0.7, fy=0.7)
153
+
154
+ # 2. Segment
155
+ result = self.process(small)
156
+ mask = result.category_mask.numpy_view()
157
+ mask = np.squeeze(mask).astype(np.float32)
158
+
159
+ # 3. Upscale
160
+ mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))
161
+
162
+ # 4. Normalize mask (IMPORTANT)
163
+ mask = np.clip(mask, 0, 1)
164
+
165
+ # 5. Temporal smoothing (fixed)
166
+ if self.prev_mask is not None:
167
+ mask = 0.8 * self.prev_mask + 0.2 * mask
168
+ self.prev_mask = mask
169
+
170
+ # 6. Edge refinement (LESS blur, more clarity)
171
+ mask = cv2.bilateralFilter(mask, 9, 50, 50)
172
+
173
+ # 7. Sharpen mask slightly (optional but improves clarity)
174
+ mask = np.clip(mask * 1.2, 0, 1)
175
+
176
+ alpha = mask[..., None]
177
+
178
+ # 8. Resize background
179
+ bg = cv2.resize(bg, (frame.shape[1], frame.shape[0]))
180
+
181
+ # 9. Convert to float (CRITICAL)
182
+ frame_f = frame.astype(np.float32)
183
+ bg_f = bg.astype(np.float32)
184
+
185
+ # 10. Blend
186
+ output = frame_f * alpha + bg_f * (1 - alpha)
187
+
188
+ return output.astype(np.uint8)
189
+
190
+ def optimize_virtual_background_improved(self, frame, bg: np.ndarray):
191
+ """
192
+ Optimized virtual background that keeps ONLY ONE person (the largest foreground blob).
193
+ Other people in the background are removed and replaced with the background image.
194
+ Additionally, it includes temporal smoothing and edge refinement for better visual quality.
195
+
196
+ Args:
197
+ frame: Input video frame (BGR image).
198
+ bg: Background image to replace the removed background (should be same size as frame).
199
+
200
+ Returns:
201
+ output: Frame with virtual background applied, keeping only the main person.
202
+ """
203
+ # Initialize persistent mask for temporal smoothing
204
+ if not hasattr(self, "prev_mask"):
205
+ self.prev_mask = None
206
+
207
+ # 1. Downscale for faster processing
208
+ small = cv2.resize(frame, (0, 0), fx=0.7, fy=0.7)
209
+
210
+ # 2. Run MediaPipe Selfie Segmentation
211
+ result = self.process(small)
212
+ mask = result.category_mask.numpy_view()
213
+ mask = np.squeeze(mask).astype(np.float32)
214
+
215
+ # 3. Upscale mask to original frame size
216
+ mask = cv2.resize(
217
+ mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_LINEAR
218
+ )
219
+
220
+ # 4. Normalize and threshold to binary (person vs background)
221
+ mask = np.clip(mask, 0, 1)
222
+ _, binary_mask = cv2.threshold(mask, 0.5, 1.0, cv2.THRESH_BINARY)
223
+
224
+ # 5. NEW: Keep ONLY the largest contour (main person)
225
+ binary_uint8 = (binary_mask * 255).astype(np.uint8)
226
+ contours, _ = cv2.findContours(
227
+ binary_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
228
+ )
229
+
230
+ if contours:
231
+ # Select the largest contour by area
232
+ largest_contour = max(contours, key=cv2.contourArea)
233
+
234
+ # Create a clean single-person mask
235
+ single_person_mask = np.zeros_like(binary_uint8)
236
+ cv2.drawContours(
237
+ single_person_mask, [largest_contour], -1, 255, thickness=cv2.FILLED
238
+ )
239
+
240
+ # Convert back to float [0,1]
241
+ single_person_mask = single_person_mask.astype(np.float32) / 255.0
242
+ else:
243
+ single_person_mask = binary_mask # fallback if no contours found
244
+
245
+ # 6. Temporal smoothing (using the single-person mask)
246
+ if self.prev_mask is not None:
247
+ single_person_mask = 0.8 * self.prev_mask + 0.2 * single_person_mask
248
+ self.prev_mask = single_person_mask.copy()
249
+
250
+ # 7. Edge refinement with bilateral filter (keeps edges sharp)
251
+ refined_mask = cv2.bilateralFilter(single_person_mask, 9, 50, 50)
252
+
253
+ # 8. Slight sharpening for cleaner edges
254
+ refined_mask = np.clip(refined_mask * 1.15, 0, 1)
255
+
256
+ # 9. Prepare alpha channel
257
+ alpha = refined_mask[..., None]
258
+
259
+ # 10. Resize background to match frame
260
+ bg_resized = cv2.resize(bg, (frame.shape[1], frame.shape[0]))
261
+
262
+ # 11. Blend: foreground (person) + background
263
+ frame_f = frame.astype(np.float32)
264
+ bg_f = bg_resized.astype(np.float32)
265
+
266
+ output = frame_f * alpha + bg_f * (1 - alpha)
267
+
268
+ return output.astype(np.uint8)
269
+
270
+ def confidence_alpha_blend(self, image: np.ndarray, bg: np.ndarray) -> np.ndarray:
271
+ result = self.process(image)
272
+
273
+ # Use confidence mask (foreground probability)
274
+ confidence_masks = result.confidence_masks
275
+
276
+ if not confidence_masks:
277
+ raise ValueError("Enable output_confidence_masks=True")
278
+
279
+ fg_mask = confidence_masks[0].numpy_view() # foreground prob
280
+ fg_mask = np.squeeze(fg_mask).astype(np.float32)
281
+
282
+ # Smooth + normalize
283
+ fg_mask = cv2.GaussianBlur(fg_mask, (11, 11), 0)
284
+ fg_mask = np.clip(fg_mask, 0, 1)
285
+
286
+ alpha = fg_mask[..., None]
287
+
288
+ bg = cv2.resize(bg, (image.shape[1], image.shape[0]))
289
+
290
+ return (
291
+ image.astype(np.float32) * alpha + bg.astype(np.float32) * (1 - alpha)
292
+ ).astype(np.uint8)
293
+
294
+ def morphological_segmentation(self, image: np.ndarray) -> np.ndarray:
295
+ result = self.process(image)
296
+ mask = np.squeeze(result.category_mask.numpy_view()).astype(np.float32)
297
+ # Convert to binary
298
+ binary = (mask > 0.5).astype(np.uint8)
299
+ # Morphological cleanup
300
+ kernel = np.ones((5, 5), np.uint8)
301
+ binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
302
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
303
+ binary = binary[..., None]
304
+ return np.where(binary, image, 0)
305
+
306
+ # ─────────────────────────── NEW METHODS ───────────────────────────
307
+
308
+ def get_foreground_mask(self, image: np.ndarray) -> np.ndarray:
309
+ """Return a binary uint8 mask where 255 = foreground (person) pixels.
310
+ Raw access without any compositing — useful when you want to apply your own logic.
311
+
312
+ Args:
313
+ image: BGR numpy array.
314
+ Returns:
315
+ Binary mask numpy array, shape (H, W), dtype uint8.
316
+ """
317
+ result = self.process(image)
318
+ mask = np.squeeze(result.category_mask.numpy_view())
319
+ return (mask > 0.5).astype(np.uint8) * 255
320
+
321
+ def count_people(self, image: np.ndarray) -> int:
322
+ """Estimate the number of distinct people by counting separate foreground blobs.
323
+ Uses connected-component analysis on the segmentation mask.
324
+
325
+ Args:
326
+ image: BGR numpy array.
327
+ Returns:
328
+ int: estimated person count (0 or more).
329
+ """
330
+ mask = self.get_foreground_mask(image)
331
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
332
+ min_blob_area = image.shape[0] * image.shape[1] * 0.01 # 1 % of frame
333
+ return sum(1 for c in contours if cv2.contourArea(c) > min_blob_area)
334
+
335
+ def measure_foreground_ratio(self, image: np.ndarray) -> float:
336
+ """Return the fraction of the frame occupied by the foreground (person).
337
+ Useful for presence detection or exposure compensation.
338
+
339
+ Args:
340
+ image: BGR numpy array.
341
+ Returns:
342
+ float: 0.0–1.0
343
+ """
344
+ mask = self.get_foreground_mask(image)
345
+ return float(np.sum(mask > 0)) / float(mask.size)
346
+
347
+ def draw_foreground_contour(
348
+ self, image: np.ndarray, color=(0, 255, 0), thickness=2
349
+ ) -> np.ndarray:
350
+ """Draw the outline of the detected person silhouette.
351
+
352
+ Args:
353
+ image: BGR numpy array.
354
+ color: BGR contour color.
355
+ thickness: Contour line thickness in pixels.
356
+ Returns:
357
+ Annotated BGR numpy array.
358
+ """
359
+ mask = self.get_foreground_mask(image)
360
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
361
+ out = image.copy()
362
+ cv2.drawContours(out, contours, -1, color, thickness)
363
+ return out
364
+
365
+ def layered_background(self, image: np.ndarray, bg1, bg2) -> np.ndarray:
366
+ result = self.process(image)
367
+
368
+ mask = np.squeeze(result.category_mask.numpy_view()).astype(np.float32)
369
+
370
+ bg1 = cv2.resize(bg1, (image.shape[1], image.shape[0]))
371
+ bg2 = cv2.resize(bg2, (image.shape[1], image.shape[0]))
372
+
373
+ # Split mask into layers
374
+ near = np.clip(mask * 1.5, 0, 1)
375
+ far = 1 - near
376
+
377
+ near = near[..., None]
378
+ far = far[..., None]
379
+ return (image * near + bg1 * (far * 0.5) + bg2 * (far * 0.5)).astype(np.uint8)
380
+
381
+ def blur_background2(self, image: np.ndarray) -> np.ndarray:
382
+ # Convert BGR → RGB
383
+ results = self.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
384
+ # Blur background
385
+ blurred_image = cv2.GaussianBlur(image, (55, 55), 0)
386
+ mask = results.category_mask.numpy_view() # usually (H, W)
387
+ if mask.shape != image.shape[:2]:
388
+ mask = cv2.resize(mask, (image.shape[1], image.shape[0]))
389
+
390
+ condition = np.expand_dims(mask, axis=-1) > 0.1
391
+ output_image = np.where(condition, image, blurred_image)
392
+ return output_image
393
+
394
+ # ─────────────────────── UTILITY METHODS ───────────────────────
395
+
396
+ def is_person_present(
397
+ self, image: np.ndarray, min_area_ratio: float = 0.01
398
+ ) -> bool:
399
+ """Return True if a person occupies at least min_area_ratio of the frame.
400
+
401
+ Args:
402
+ image: BGR numpy array.
403
+ min_area_ratio: Minimum fraction of total pixels that must be foreground.
404
+ Returns:
405
+ bool: True if person is detected above the area threshold.
406
+ """
407
+ result = self.process(image)
408
+ mask = self._get_mask(result)
409
+ fg_area = np.count_nonzero(mask > 128)
410
+ total = mask.shape[0] * mask.shape[1]
411
+ return bool((fg_area / total) > min_area_ratio) # noqa: SIM901
412
+
413
+ def get_person_center(self, image: np.ndarray) -> tuple:
414
+ """Return the centroid (cx, cy) of the foreground person mask.
415
+
416
+ Falls back to the image center when no foreground is detected.
417
+
418
+ Args:
419
+ image: BGR numpy array.
420
+ Returns:
421
+ tuple(int, int): Pixel coordinates (cx, cy).
422
+ """
423
+ result = self.process(image)
424
+ mask = self._get_mask(result)
425
+ binary = (mask > 128).astype(np.uint8)
426
+ M = cv2.moments(binary)
427
+ if M["m00"] == 0:
428
+ return (image.shape[1] // 2, image.shape[0] // 2)
429
+ return (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
430
+
431
+ def get_foreground_bounds(self, image: np.ndarray) -> tuple:
432
+ """Return the bounding rectangle of the foreground region.
433
+
434
+ Args:
435
+ image: BGR numpy array.
436
+ Returns:
437
+ tuple(int, int, int, int): (x, y, w, h) bounding box, or (0,0,0,0)
438
+ when no foreground is detected.
439
+ """
440
+ result = self.process(image)
441
+ mask = self._get_mask(result)
442
+ binary = (mask > 128).astype(np.uint8)
443
+ pts = cv2.findNonZero(binary)
444
+ if pts is None:
445
+ return (0, 0, 0, 0)
446
+ return cv2.boundingRect(pts)
447
+
448
+ def measure_foreground_height(self, image: np.ndarray) -> int:
449
+ """Return the pixel height of the foreground bounding box.
450
+
451
+ Args:
452
+ image: BGR numpy array.
453
+ Returns:
454
+ int: Height in pixels (0 when no foreground detected).
455
+ """
456
+ return self.get_foreground_bounds(image)[3]
457
+
458
+ def create_green_screen(self, image: np.ndarray) -> np.ndarray:
459
+ """Replace the background with solid green, keeping the foreground person.
460
+
461
+ Useful as input to chroma-key compositing pipelines.
462
+
463
+ Args:
464
+ image: BGR numpy array.
465
+ Returns:
466
+ BGR numpy array with green background.
467
+ """
468
+ result = self.process(image)
469
+ mask = self._get_mask(result)
470
+ fg = (mask > 128)[..., np.newaxis]
471
+ bg = np.zeros_like(image)
472
+ bg[:] = (0, 255, 0)
473
+ return np.where(fg, image, bg)
474
+
475
+ def extract_foreground_on_white(self, image: np.ndarray) -> np.ndarray:
476
+ """Place the foreground person on a pure white background.
477
+
478
+ Args:
479
+ image: BGR numpy array.
480
+ Returns:
481
+ BGR numpy array with white background.
482
+ """
483
+ result = self.process(image)
484
+ mask = self._get_mask(result)
485
+ fg = (mask > 128)[..., np.newaxis]
486
+ bg = np.full_like(image, 255)
487
+ return np.where(fg, image, bg)
488
+
489
+ def apply_bokeh_effect(
490
+ self, image: np.ndarray, blur_radius: int = 25
491
+ ) -> np.ndarray:
492
+ """Apply a lens-blur (bokeh) effect to the background while keeping the
493
+ foreground person sharp.
494
+
495
+ Args:
496
+ image: BGR numpy array.
497
+ blur_radius: Gaussian kernel size (will be made odd if even).
498
+ Returns:
499
+ BGR numpy array with blurred background.
500
+ """
501
+ result = self.process(image)
502
+ mask = self._get_mask(result)
503
+ r = blur_radius | 1 # ensure odd kernel size
504
+ blurred = cv2.GaussianBlur(image, (r, r), 0)
505
+ fg = (mask > 128)[..., np.newaxis]
506
+ return np.where(fg, image, blurred)
507
+
508
+ def apply_edge_glow(
509
+ self, image: np.ndarray, color=(0, 255, 0), thickness: int = 3
510
+ ) -> np.ndarray:
511
+ """Draw a colored glow outline around the detected person silhouette.
512
+
513
+ Args:
514
+ image: BGR numpy array.
515
+ color: BGR color tuple for the glow contour.
516
+ thickness: Contour line thickness in pixels.
517
+ Returns:
518
+ Annotated BGR numpy array.
519
+ """
520
+ result = self.process(image)
521
+ mask = self._get_mask(result)
522
+ binary = (mask > 128).astype(np.uint8)
523
+ contours, _ = cv2.findContours(
524
+ binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
525
+ )
526
+ out = image.copy()
527
+ cv2.drawContours(out, contours, -1, color, thickness)
528
+ return out