openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,871 @@
1
+ import inspect
2
+ import math
3
+ import os
4
+ import textwrap
5
+ import time
6
+ from glob import glob
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+
12
+ def rectangle_corners(
13
+ img, bbox, length=30, t=5, rt=1, colorR=(255, 0, 255), colorC=(0, 255, 0)
14
+ ):
15
+ """
16
+ Draws a rectangle with decorative corners on the given image.
17
+
18
+ Args:
19
+ img: The image on which to draw.
20
+ bbox: A tuple (x, y, w, h) representing the bounding box.
21
+ l: Length of the corner lines.
22
+ t: Thickness of the corner lines.
23
+ rt: Thickness of the rectangle border. If 0, no border is drawn.
24
+ colorR: Color of the rectangle border.
25
+ colorC: Color of the corner lines.
26
+
27
+ Returns:
28
+ The image with the decorative rectangle drawn.
29
+ """
30
+ x, y, w, h = bbox
31
+ x1, y1 = x + w, y + h
32
+
33
+ if rt:
34
+ cv2.rectangle(img, bbox, colorR, rt)
35
+
36
+ for (cx, cy), dx, dy in [
37
+ ((x, y), length, length),
38
+ ((x1, y), -length, length),
39
+ ((x, y1), length, -length),
40
+ ((x1, y1), -length, -length),
41
+ ]:
42
+ cv2.line(img, (cx, cy), (cx + dx, cy), colorC, t)
43
+ cv2.line(img, (cx, cy), (cx, cy + dy), colorC, t)
44
+
45
+ return img
46
+
47
+
48
+ def detect_highlighted_text(
49
+ img: np.ndarray,
50
+ hsv_colors=None, # seed HSV values
51
+ h_tol=10,
52
+ s_tol=80,
53
+ v_tol=80,
54
+ show_mask=False,
55
+ show_combined_mask=False,
56
+ show_image_with_mask=False,
57
+ ):
58
+ """
59
+ Detect highlighted text by creating HSV masks around specified colors.
60
+ Returns combined mask and individual masks for each color.
61
+
62
+ HSV (Hue, Saturation, Value) image processing is a color representation model, often preferred over RGB in computer vision
63
+ for color-based segmentation and detection. It separates color information (hue) from lighting/brightness (value), allowing
64
+ robust object tracking under varying illumination. Common uses include object tracking, color-based filtering, and thresholding
65
+ in OpenCV.
66
+
67
+ Args:
68
+ img: Input image in BGR format (as read by OpenCV)
69
+ hsv_colors: List of seed HSV tuples to detect (e.g., yellow, green)
70
+ h_tol, s_tol, v_tol: Tolerances for hue, saturation, and value to create color ranges
71
+ show: Whether to display intermediate masks and results using OpenCV windows
72
+ show_mask: Show individual color masks
73
+ show_combined_mask: Show combined mask of all detected colors
74
+ show_image_with_mask: Show the original image with detected areas masked
75
+
76
+ Returns:
77
+ image_with_mask: Original image with detected areas masked
78
+ combined_mask: Binary mask combining all detected colors
79
+ masks: List of individual masks for each specified color
80
+
81
+ Usage:
82
+ image = cv2.imread("doc.jpg")
83
+
84
+ # Common highlighter HSV seeds (you can refine using click sampling)
85
+ highlight_colors = [
86
+ (30, 200, 250), # yellow
87
+ (60, 200, 250), # green
88
+ (150, 200, 250), # pink
89
+ (15, 200, 250), # orange
90
+ ]
91
+
92
+ mask, masks = detect_highlighted_text(image, highlight_colors)
93
+ """
94
+ if hsv_colors is None:
95
+ hsv_colors = [(27, 167, 251)]
96
+ img_blur = cv2.GaussianBlur(img, (5, 5), 0)
97
+ hsv = cv2.cvtColor(img_blur, cv2.COLOR_BGR2HSV)
98
+ combined_mask = np.zeros(hsv.shape[:2], dtype=np.uint8)
99
+ masks = []
100
+
101
+ for i, (h, s, v) in enumerate(hsv_colors):
102
+ lower = np.array([max(0, h - h_tol), max(0, s - s_tol), max(0, v - v_tol)])
103
+
104
+ upper = np.array(
105
+ [min(179, h + h_tol), min(255, s + s_tol), min(255, v + v_tol)]
106
+ )
107
+
108
+ mask = cv2.inRange(hsv, lower, upper)
109
+ masks.append(mask)
110
+
111
+ # Combine all masks
112
+ combined_mask = cv2.bitwise_or(combined_mask, mask)
113
+
114
+ # Remove Noise
115
+ # kernel = np.ones((3,3), np.uint8)
116
+ # combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_CLOSE, kernel)
117
+
118
+ if show_mask:
119
+ cv2.imshow(f"Mask {i}", mask)
120
+
121
+ img_with_mask = cv2.bitwise_and(img, img, mask=combined_mask)
122
+ if show_combined_mask:
123
+ cv2.imshow("Combined Mask", combined_mask)
124
+ if show_image_with_mask:
125
+ img_with_mask = cv2.bitwise_and(img, img, mask=combined_mask)
126
+ cv2.imshow("Image with Mask", img_with_mask)
127
+
128
+ if show_mask or show_combined_mask or show_image_with_mask:
129
+ cv2.waitKey(0)
130
+ cv2.destroyAllWindows()
131
+
132
+ return img_with_mask, combined_mask, masks
133
+
134
+
135
+ def get_dominant_hsv_colors(image, k=4):
136
+ """
137
+ auto-detect highlight colors in the image by clustering pixel colors in HSV space using K-means.
138
+ Get dominant HSV colors from the image using K-means clustering.
139
+
140
+ Args:
141
+ image: Input image in BGR format (as read by OpenCV)
142
+ k: Number of dominant colors to detect (default is 4)
143
+
144
+ Returns:
145
+ List of dominant HSV color tuples (h, s, v) detected in the image.
146
+ Usage:
147
+
148
+ """
149
+ img_blur = cv2.GaussianBlur(image, (5, 5), 0)
150
+ hsv = cv2.cvtColor(img_blur, cv2.COLOR_BGR2HSV)
151
+ pixels = hsv.reshape(-1, 3)
152
+
153
+ pixels = np.float32(pixels)
154
+
155
+ _, labels, centers = cv2.kmeans(
156
+ pixels,
157
+ k,
158
+ None,
159
+ (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2),
160
+ 10,
161
+ cv2.KMEANS_RANDOM_CENTERS,
162
+ )
163
+
164
+ return [tuple(map(int, c)) for c in centers]
165
+
166
+
167
+ def refine_mask(mask):
168
+ """
169
+ Refines a binary mask by applying morphological operations to remove noise and merge words in the same line.
170
+
171
+ Args:
172
+ mask: The input binary mask to be refined.
173
+
174
+ Returns:
175
+ The refined binary mask.
176
+ """
177
+
178
+ kernel_small = np.ones((3, 3), np.uint8)
179
+ kernel_line = np.ones((15, 5), np.uint8)
180
+
181
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_small, iterations=1)
182
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_small, iterations=2)
183
+
184
+ # Merge words in same line
185
+ mask = cv2.dilate(mask, kernel_line, iterations=1)
186
+ return mask
187
+
188
+
189
+ def detect_single_highlighted_text(image, hsv_colors=None):
190
+ """Detect highlighted text based on a single HSV color.
191
+ Args:
192
+ image: Input image in BGR format (as read by OpenCV)
193
+ hsv_colors: List of HSV values to detect (default is a single yellow color)
194
+ Returns:
195
+ image_with_mask: Image with detected highlighted areas masked
196
+ mask: Binary mask of detected highlighted areas
197
+ """
198
+ if hsv_colors is None:
199
+ hsv_colors = [27, 167, 251]
200
+ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
201
+
202
+ # Example: detect blue color
203
+ print(hsv[0])
204
+ lower = np.array([hsv_colors[0], hsv_colors[1], hsv_colors[2]])
205
+ upper = np.array([140, 255, 255])
206
+
207
+ mask = cv2.inRange(hsv, lower, upper)
208
+ img_with_mask = cv2.bitwise_and(image, image, mask=mask)
209
+ return img_with_mask, mask
210
+
211
+
212
+ def find_contours(
213
+ mask,
214
+ min_area=500,
215
+ debug=False,
216
+ sort_countours=False,
217
+ sorted_bounding_box=False,
218
+ retrieval_type=cv2.RETR_EXTERNAL,
219
+ approximation_method=cv2.CHAIN_APPROX_SIMPLE,
220
+ ):
221
+ """
222
+ Find contours in a binary mask and filter them based on area and other criteria.
223
+
224
+ Args:
225
+ mask: Binary image (mask) where contours are to be found.
226
+ min_area: Minimum area threshold to filter contours (default is 500).
227
+ debug: If True, prints debug information about contours found and filtered.
228
+ sort_contours: If True, sorts contours by area in descending order (default is False).
229
+ retrieval_type: Contour retrieval mode (default is cv2.RETR_EXTERNAL).
230
+ approximation_method: Contour approximation method (default is cv2.CHAIN_APPROX_SIMPLE).
231
+
232
+ Returns:
233
+ filtered_contours: List of contours that passed the filtering criteria.
234
+ boxes: List of bounding box tuples (x, y, w, h) for the
235
+ """
236
+
237
+ # 1. Clean noise (very important)
238
+ cleaned = refine_mask(mask)
239
+ # kernel = np.ones((3, 3), np.uint8)
240
+
241
+ # # Remove small noise
242
+ # cleaned = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
243
+
244
+ # # Fill gaps inside highlights
245
+ # cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel, iterations=2)
246
+
247
+ # 2. Find contours
248
+ contours, _ = cv2.findContours(
249
+ cleaned,
250
+ retrieval_type,
251
+ approximation_method, # only outer regions
252
+ )
253
+
254
+ filtered_contours = []
255
+ boxes = []
256
+
257
+ # 3. Filter contours
258
+ for cnt in contours:
259
+ area = cv2.contourArea(cnt)
260
+
261
+ if area < min_area:
262
+ continue
263
+
264
+ x, y, w, h = cv2.boundingRect(cnt)
265
+
266
+ if w < 20 or h < 10:
267
+ continue
268
+
269
+ filtered_contours.append(cnt)
270
+ boxes.append((x, y, w, h))
271
+
272
+ if debug:
273
+ print(f"Total contours: {len(contours)}")
274
+ print(f"Filtered contours: {len(filtered_contours)}")
275
+
276
+ if sort_countours:
277
+ filtered_contours = sorted(filtered_contours, key=cv2.contourArea, reverse=True)
278
+
279
+ if sorted_bounding_box:
280
+ # Sort top-to-bottom
281
+ boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
282
+
283
+ return filtered_contours, boxes
284
+
285
+
286
+ def resize_with_padding(img, target_size, color=(0, 0, 0)):
287
+ """
288
+ Resize an image while maintaining aspect ratio and adding padding to fit the target size.
289
+
290
+ Args:
291
+ img: The input image to be resized.
292
+ target_size: A tuple (width, height) representing the desired output size.
293
+ color: The color of the padding (default is black).
294
+
295
+ Returns:
296
+ The resized image with padding to fit the target size.
297
+ """
298
+ h, w = img.shape[:2]
299
+ target_w, target_h = target_size
300
+
301
+ scale = min(target_w / w, target_h / h)
302
+ new_w, new_h = int(w * scale), int(h * scale)
303
+
304
+ resized = cv2.resize(img, (new_w, new_h))
305
+
306
+ pad_w = target_w - new_w
307
+ pad_h = target_h - new_h
308
+
309
+ top = pad_h // 2
310
+ bottom = pad_h - top
311
+ left = pad_w // 2
312
+ right = pad_w - left
313
+
314
+ return cv2.copyMakeBorder(
315
+ resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
316
+ )
317
+
318
+
319
+ def highlight_image(img, selected=False, color=(0, 255, 0), thickness=3):
320
+ """
321
+ Highlight Selected Image (Border)
322
+
323
+ Args:
324
+ img: The input image to be highlighted.
325
+ selected: A boolean indicating whether to apply the highlight (default is False).
326
+ color: The color of the highlight border (default is green).
327
+ thickness: The thickness of the highlight border (default is 3).
328
+
329
+ Returns:
330
+ The image with the highlight border applied if selected is True, otherwise the original image.
331
+
332
+ Usage:
333
+ img = cv2.imread('input.jpg')
334
+ highlighted_img = highlight_image(img, selected=True, color=(0, 255, 0), thickness=3)
335
+ cv2.imshow('Highlighted Image', highlighted_img)
336
+ cv2.waitKey(0)
337
+ cv2.destroyAllWindows()
338
+ """
339
+ if selected:
340
+ h, w = img.shape[:2]
341
+ cv2.rectangle(img, (0, 0), (w, h), color, thickness)
342
+ return img
343
+
344
+
345
+ def zoom_image(img, scale=2.0):
346
+ """
347
+ Zooms into the image by a specified scale factor.
348
+
349
+ Args:
350
+ img: The input image to be zoomed.
351
+ scale: The zoom scale factor (default is 2.0, which means 200% zoom).
352
+ Returns:
353
+ The zoomed image.
354
+ """
355
+ return cv2.resize(img, (0, 0), fx=scale, fy=scale)
356
+
357
+
358
+ def put_text_think_corners(
359
+ img, bounding_box, color=(255, 255, 255), length=20, thickness=2
360
+ ):
361
+ """
362
+ Puts text on the image with a colored rectangle background for better visibility, using the rectangle_corners function for decorative corners.
363
+
364
+ Args:
365
+ img: The input image on which to put the text.
366
+ bounding_box: A tuple (x, y, w, h) representing the bounding box coordinates.
367
+ color: The color of the text and corners (default is white).
368
+ length: The length of the corner lines (default is 20).
369
+ thickness: The thickness of the corner lines (default is 2).
370
+
371
+ Returns:
372
+ The image with the text and decorative corners drawn on it.
373
+ """
374
+ x, y, w, h = bounding_box
375
+ # Top Left corner
376
+ cv2.line(img, (x, y), (x + length, y), color, thickness)
377
+ cv2.line(img, (x, y), (x, y + length), color, thickness)
378
+ # Top Right corner
379
+ cv2.line(img, (x + w, y), (x + w - length, y), color, thickness)
380
+ cv2.line(img, (x + w, y), (x + w, y + length), color, thickness)
381
+ # Bottom Left corner
382
+ cv2.line(img, (x, y + h), (x + length, y + h), color, thickness)
383
+ cv2.line(img, (x, y + h), (x, y + h - length), color, thickness)
384
+ # Bottom Right corner
385
+ cv2.line(img, (x + w, y + h), (x + w - length, y + h), color, thickness)
386
+ cv2.line(img, (x + w, y + h), (x + w, y + h - length), color, thickness)
387
+
388
+ return img
389
+
390
+
391
+ def overlay_transparent(bg, fg, pos=(0, 0)):
392
+ x, y = pos
393
+ h, w = fg.shape[:2]
394
+ H, W = bg.shape[:2]
395
+
396
+ x1, y1 = max(x, 0), max(y, 0)
397
+ x2, y2 = min(x + w, W), min(y + h, H)
398
+
399
+ if x1 >= x2 or y1 >= y2:
400
+ return bg
401
+
402
+ fx1, fy1 = max(-x, 0), max(-y, 0)
403
+ fx2, fy2 = fx1 + (x2 - x1), fy1 + (y2 - y1)
404
+
405
+ fg_crop = fg[fy1:fy2, fx1:fx2]
406
+
407
+ # 🔥 Handle alpha safely
408
+ if fg_crop.shape[2] == 4:
409
+ alpha = fg_crop[..., 3:4] / 255.0
410
+ fg_rgb = fg_crop[..., :3]
411
+ else:
412
+ # No alpha → treat as fully opaque
413
+ alpha = np.ones((fg_crop.shape[0], fg_crop.shape[1], 1), dtype=np.float32)
414
+ fg_rgb = fg_crop
415
+
416
+ bg_crop = bg[y1:y2, x1:x2]
417
+
418
+ bg[y1:y2, x1:x2] = (bg_crop * (1 - alpha) + fg_rgb * alpha).astype(bg.dtype)
419
+
420
+ return bg
421
+
422
+
423
+ def get_valid_images(folder_path):
424
+ """
425
+ Returns a list of valid image files in the specified folder.
426
+
427
+ Args:
428
+ folder_path (str): The path to the folder containing images.
429
+
430
+ Returns:
431
+ list: A list of valid image file names.
432
+ """
433
+ patterns = ["*.png", "*.jpg", "*.jpeg", "*.bmp", "*.tiff", "*.webp"]
434
+ files = []
435
+
436
+ for p in patterns:
437
+ files.extend(glob(os.path.join(folder_path, p)))
438
+
439
+ return files
440
+
441
+
442
+ def draw_rounded_rect(img, top_left, bottom_right, color, radius=20, thickness=-1):
443
+ x1, y1 = top_left
444
+ x2, y2 = bottom_right
445
+
446
+ if thickness < 0:
447
+ cv2.rectangle(img, (x1 + radius, y1), (x2 - radius, y2), color, thickness)
448
+ cv2.rectangle(img, (x1, y1 + radius), (x2, y2 - radius), color, thickness)
449
+
450
+ cv2.circle(img, (x1 + radius, y1 + radius), radius, color, -1)
451
+ cv2.circle(img, (x2 - radius, y1 + radius), radius, color, -1)
452
+ cv2.circle(img, (x1 + radius, y2 - radius), radius, color, -1)
453
+ cv2.circle(img, (x2 - radius, y2 - radius), radius, color, -1)
454
+
455
+
456
+ def is_hovering(button, point):
457
+ px, py = point
458
+ x, y = button.pos
459
+
460
+ return x <= px <= x + button.width and y <= py <= y + button.height
461
+
462
+
463
+ def create_centered_grid_buttons(
464
+ frame,
465
+ values,
466
+ button_cls,
467
+ button_size=(100, 100),
468
+ gap=15,
469
+ y_offset=0,
470
+ ):
471
+ """
472
+ Creates centered grid buttons for any OpenCV frame.
473
+
474
+ values example:
475
+ [
476
+ ['7', '8', '9', '*'],
477
+ ['4', '5', '6', '-']
478
+ ]
479
+ """
480
+
481
+ frame_h, frame_w = frame.shape[:2]
482
+
483
+ rows = len(values)
484
+ cols = len(values[0])
485
+
486
+ btn_w, btn_h = button_size
487
+
488
+ grid_w = cols * btn_w + (cols - 1) * gap
489
+ grid_h = rows * btn_h + (rows - 1) * gap
490
+
491
+ start_x = (frame_w - grid_w) // 2
492
+ start_y = (frame_h - grid_h) // 2 + y_offset
493
+
494
+ buttons = []
495
+
496
+ for row in range(rows):
497
+ for col in range(cols):
498
+ x = start_x + col * (btn_w + gap)
499
+ y = start_y + row * (btn_h + gap)
500
+
501
+ buttons.append(
502
+ button_cls(pos=(x, y), text=values[row][col], size=button_size)
503
+ )
504
+
505
+ return buttons
506
+
507
+
508
+ def put_text_rect(
509
+ img,
510
+ text,
511
+ pos,
512
+ scale=3,
513
+ thickness=3,
514
+ colorT=(255, 255, 255),
515
+ colorR=(255, 0, 255),
516
+ font=cv2.FONT_HERSHEY_PLAIN,
517
+ offset=10,
518
+ border=None,
519
+ colorB=(0, 255, 0),
520
+ ):
521
+ """
522
+ Puts text on the image with a colored rectangle background for better visibility.
523
+ Args: img: The input image on which to put the text.
524
+ text: The text string to be displayed.
525
+ pos: A tuple (x, y) representing the bottom-left corner of the text.
526
+ scale: The font scale factor (default is 3).
527
+ thickness: The thickness of the text (default is 3).
528
+ colorT: The color of the text (default is white).
529
+ colorR: The color of the rectangle background (default is magenta).
530
+ font: The font type (default is cv2.FONT_HERSHEY_PLAIN).
531
+ offset: The offset for the rectangle padding (default is 10).
532
+ border: The thickness of the border around the rectangle (default is None, no border).
533
+ colorB: The color of the border (default is green).
534
+ Returns:
535
+ The image with the text and rectangle drawn on it.
536
+ """
537
+ (w, h), _ = cv2.getTextSize(text, font, scale, thickness)
538
+ x1, y1 = pos[0] - offset, pos[1] + offset
539
+ x2, y2 = pos[0] + w + offset, pos[1] - h - offset
540
+
541
+ cv2.rectangle(img, (x1, y1), (x2, y2), colorR, -1)
542
+ if border:
543
+ cv2.rectangle(img, (x1, y1), (x2, y2), colorB, border)
544
+ cv2.putText(img, text, pos, font, scale, colorT, thickness)
545
+
546
+ return img, [x1, y2, x2, y1]
547
+
548
+
549
+ def draw_wrapped_text(
550
+ img, text, start_pos, font, scale, color, thickness, max_width=40
551
+ ):
552
+ wrapped = textwrap.wrap(text, width=max_width)
553
+ x, y = start_pos
554
+
555
+ for i, line in enumerate(wrapped):
556
+ y_offset = y + i * int(30 * scale)
557
+ cv2.putText(img, line, (x, y_offset), font, scale, color, thickness)
558
+
559
+
560
+ def load_image(image_input):
561
+ """
562
+ Load an image from a file path or return the image if it's already a numpy array.
563
+
564
+ Args:
565
+ image_input (str or np.ndarray): The file path of the image or a numpy array.
566
+
567
+ Returns:
568
+ np.ndarray: The loaded image.
569
+
570
+ Raises:
571
+ ValueError: If the image path is invalid or the file is unreadable.
572
+ """
573
+ if isinstance(image_input, np.ndarray):
574
+ return image_input
575
+
576
+ # assume it's a path
577
+ img = cv2.imread(image_input, cv2.IMREAD_UNCHANGED)
578
+ if img is None:
579
+ raise ValueError("Invalid image path or unreadable file")
580
+
581
+ return img
582
+
583
+
584
+ def move_image(
585
+ img, direction="left_to_right", speed=5, start_pos=(0, 0), window_size=(800, 600)
586
+ ):
587
+ """
588
+ Moves an image across the screen in a specified direction at a given speed.
589
+
590
+ Args:
591
+ img: The file path of the image or a numpy array to be moved.
592
+ direction: The direction of movement (e.g., "left_to_right", "right_to_left", "top_to_bottom", "bottom_to_top", "diag_tl_br", "diag_br_tl").
593
+ speed: The speed of movement in pixels per frame (default is 5).
594
+ start_pos: A tuple (x, y) representing the starting position of the image (default is (0, 0)).
595
+ window_size: A tuple (width, height) representing the size of the display window (default is (800, 600)).
596
+ Returns:
597
+ None
598
+ Usage:
599
+ move_image("path/to/image.png", direction="left_to_right", speed=5, start_pos=(0, 0), window_size=(800, 600))
600
+ """
601
+ img = load_image(img)
602
+
603
+ h, w = img.shape[:2]
604
+ canvas_w, canvas_h = window_size
605
+
606
+ x, y = start_pos
607
+
608
+ while True:
609
+ canvas = np.zeros((canvas_h, canvas_w, 3), dtype=np.uint8)
610
+
611
+ canvas_x = int(x)
612
+ canvas_y = int(y)
613
+
614
+ # place image on canvas safely
615
+ x1, y1 = max(canvas_x, 0), max(canvas_y, 0)
616
+ x2, y2 = min(canvas_x + w, canvas_w), min(canvas_y + h, canvas_h)
617
+
618
+ img_x1, img_y1 = max(-canvas_x, 0), max(-canvas_y, 0)
619
+ img_x2, img_y2 = img_x1 + (x2 - x1), img_y1 + (y2 - y1)
620
+
621
+ if x1 < x2 and y1 < y2:
622
+ canvas[y1:y2, x1:x2] = img[img_y1:img_y2, img_x1:img_x2]
623
+
624
+ cv2.imshow("Moving Image", canvas)
625
+
626
+ key = cv2.waitKey(1)
627
+ if key == 27: # ESC to stop
628
+ break
629
+
630
+ # movement logic
631
+ if direction == "left_to_right":
632
+ x += speed
633
+ elif direction == "right_to_left":
634
+ x -= speed
635
+ elif direction == "top_to_bottom":
636
+ y += speed
637
+ elif direction == "bottom_to_top":
638
+ y -= speed
639
+ elif direction == "diag_tl_br":
640
+ x += speed
641
+ y += speed
642
+ elif direction == "diag_br_tl":
643
+ x -= speed
644
+ y -= speed
645
+
646
+ time.sleep(0.01)
647
+
648
+ cv2.destroyAllWindows()
649
+
650
+
651
+ def stack_images_grid(img_list, cols=2, scale=1.0, labels=None, bg_color=(0, 0, 0)):
652
+ """
653
+ Stack images in a grid.
654
+
655
+ Args:
656
+ img_list (list): list of images
657
+ cols (int): number of columns
658
+ scale (float): scaling factor
659
+ labels (list): optional titles for each image
660
+ bg_color (tuple): background color for empty slots
661
+
662
+ Returns:
663
+ np.ndarray: stacked image
664
+ """
665
+
666
+ if not img_list:
667
+ return None
668
+
669
+ # Convert all images to the same size and color format
670
+ h, w = img_list[0].shape[:2]
671
+
672
+ processed = []
673
+ for img in img_list:
674
+ if img is None:
675
+ img = np.zeros((h, w, 3), dtype=np.uint8)
676
+
677
+ # Convert grayscale → BGR
678
+ if len(img.shape) == 2:
679
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
680
+
681
+ img = cv2.resize(img, (w, h))
682
+ processed.append(img)
683
+
684
+ # Calculate rows needed and fill remaining slots with blank images
685
+ total = len(processed)
686
+ rows = math.ceil(total / cols)
687
+
688
+ # Fill remaining slots with blank images
689
+ blank = np.full((h, w, 3), bg_color, dtype=np.uint8)
690
+ processed += [blank] * (rows * cols - total)
691
+
692
+ # Add labels if provided
693
+ if labels:
694
+ for i, text in enumerate(labels):
695
+ if i < len(processed):
696
+ cv2.putText(
697
+ processed[i],
698
+ text,
699
+ (10, 25),
700
+ cv2.FONT_HERSHEY_SIMPLEX,
701
+ 0.7,
702
+ (0, 255, 0),
703
+ 2,
704
+ )
705
+
706
+ # Stack
707
+ grid = [np.hstack(processed[i * cols : (i + 1) * cols]) for i in range(rows)]
708
+ stacked = np.vstack(grid)
709
+
710
+ # Scale final output
711
+ if scale != 1.0:
712
+ stacked = cv2.resize(stacked, (0, 0), fx=scale, fy=scale)
713
+
714
+ return stacked
715
+
716
+
717
+ def get_currect_path():
718
+ """
719
+ Get the current file path.
720
+
721
+ Returns:
722
+ str: The absolute path of the current file.
723
+ """
724
+ full_path = os.path.realpath(__file__)
725
+ path, filename = os.path.split(full_path)
726
+ return path, filename, full_path
727
+
728
+
729
+ def get_calling_folder():
730
+ """
731
+ Get calling folder path.
732
+ Returns:
733
+ str: The absolute path of the calling folder.
734
+ """
735
+ frame = inspect.stack()[1]
736
+ calling_file = frame.filename
737
+ return os.path.dirname(os.path.abspath(calling_file)), calling_file
738
+
739
+
740
+ def find_project_root(start_path, marker="module"):
741
+ """
742
+ Finds the project root directory by looking for a specific marker (e.g., a folder or file).
743
+
744
+ Args:
745
+ start_path: The path from which to start searching.
746
+ marker: The name of the folder or file that indicates the project root.
747
+
748
+ Returns:
749
+ The absolute path to the project root directory.
750
+ """
751
+ current = os.path.abspath(start_path)
752
+ while True:
753
+ if os.path.exists(os.path.join(current, marker)):
754
+ return current
755
+ parent = os.path.dirname(current)
756
+ if parent == current:
757
+ raise RuntimeError("Project root not found")
758
+ current = parent
759
+
760
+
761
+ def mouse_drag_overlay(event, x, y, flags, param):
762
+ """
763
+ Mouse callback function to enable dragging of an overlay on the video frame. It updates the position of the overlay based on mouse events.
764
+
765
+ Args:
766
+ event: The type of mouse event (e.g., cv2.EVENT_LBUTTONDOWN, cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONUP).
767
+ x: The x-coordinate of the mouse event.
768
+ y: The y-coordinate of the mouse event.
769
+ flags: Any relevant flags passed by OpenCV (not used in this function).
770
+ param: A dictionary containing the current position of the overlay and its dimensions, as well as a flag to track dragging state.
771
+
772
+ Returns:
773
+ None. The function updates the position of the overlay in the param dictionary based on mouse interactions
774
+ """
775
+ drag_position = param["drag_position"]
776
+ overlay_w = param["overlay_w"]
777
+ overlay_h = param["overlay_h"]
778
+
779
+ ox = drag_position["x"]
780
+ oy = drag_position["y"]
781
+
782
+ inside_overlay = ox <= x <= ox + overlay_w and oy <= y <= oy + overlay_h
783
+
784
+ if event == cv2.EVENT_LBUTTONDOWN and inside_overlay:
785
+ drag_position["dragging"] = True
786
+ drag_position["offset_x"] = x - ox
787
+ drag_position["offset_y"] = y - oy
788
+
789
+ elif event == cv2.EVENT_MOUSEMOVE and drag_position["dragging"]:
790
+ drag_position["x"] = x - drag_position["offset_x"]
791
+ drag_position["y"] = y - drag_position["offset_y"]
792
+
793
+ elif event == cv2.EVENT_LBUTTONUP:
794
+ drag_position["dragging"] = False
795
+
796
+
797
+ def overlay_frame(
798
+ background,
799
+ overlay,
800
+ position="top-left",
801
+ padding=10,
802
+ draggable=False,
803
+ drag_position=None,
804
+ draw_border=True,
805
+ ):
806
+ """
807
+ Creates an overlay of one image on top of another at a specified position with optional padding and dragging functionality.
808
+
809
+ Args:
810
+ background: The background image on which to overlay.
811
+ overlay: The image to be overlaid on the background.
812
+ position: The position to place the overlay (default is "top-left"). Options: "top-left", "top-right", "bottom-left", "bottom-right", "center".
813
+ padding: The padding in pixels from the edges of the background (default is 10).
814
+ draggable: If True, allows the overlay to be dragged (default is False).
815
+ drag_position: A dictionary with "x" and "y" keys to track the current position of the overlay when dragging (default is None).
816
+ draw_border: If True, draws a border around the overlay for better visibility (default is True).
817
+
818
+ Returns:
819
+ The background image with the overlay applied at the specified position.
820
+ """
821
+
822
+ bg_h, bg_w = background.shape[:2]
823
+ ov_h, ov_w = overlay.shape[:2]
824
+
825
+ if draggable and drag_position is not None:
826
+ x = drag_position["x"]
827
+ y = drag_position["y"]
828
+ else:
829
+ if position == "top-left":
830
+ x, y = padding, padding
831
+ elif position == "top-right":
832
+ x, y = bg_w - ov_w - padding, padding
833
+ elif position == "bottom-left":
834
+ x, y = padding, bg_h - ov_h - padding
835
+ elif position == "bottom-right":
836
+ x, y = bg_w - ov_w - padding, bg_h - ov_h - padding
837
+ elif position == "center":
838
+ x = (bg_w - ov_w) // 2
839
+ y = (bg_h - ov_h) // 2
840
+ else:
841
+ x, y = padding, padding
842
+
843
+ # keep inside image
844
+ x = max(0, min(x, bg_w - ov_w))
845
+ y = max(0, min(y, bg_h - ov_h))
846
+
847
+ if draggable and drag_position is not None:
848
+ drag_position["x"] = x
849
+ drag_position["y"] = y
850
+
851
+ background[y : y + ov_h, x : x + ov_w] = overlay
852
+ return background
853
+
854
+
855
+ def auto_layout(drawings, frame_shape, cols=4, padding=20):
856
+ frame_h, frame_w = frame_shape[:2]
857
+
858
+ max_w = max(d.size[0] for d in drawings)
859
+ max_h = max(d.size[1] for d in drawings)
860
+
861
+ cell_w = max_w + padding
862
+ cell_h = max_h + padding
863
+
864
+ for idx, drawing in enumerate(drawings):
865
+ row = idx // cols
866
+ col = idx % cols
867
+
868
+ x = padding + col * cell_w
869
+ y = padding + row * cell_h
870
+
871
+ drawing.origin = (x, y)