openvisionkit 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openvisionkit/__init__.py +1 -0
- openvisionkit/_version.py +24 -0
- openvisionkit/capture/draw_object.py +296 -0
- openvisionkit/capture/image_template.py +61 -0
- openvisionkit/capture/screen_capture.py +13 -0
- openvisionkit/capture/video_recorder.py +128 -0
- openvisionkit/capture/video_template.py +336 -0
- openvisionkit/lib/classifier.py +186 -0
- openvisionkit/lib/face_detector.py +587 -0
- openvisionkit/lib/face_mesh_detector.py +913 -0
- openvisionkit/lib/form_detector.py +465 -0
- openvisionkit/lib/form_roi_annotator.py +679 -0
- openvisionkit/lib/form_roi_detector.py +1078 -0
- openvisionkit/lib/fps_counter.py +38 -0
- openvisionkit/lib/hair_segmentation.py +298 -0
- openvisionkit/lib/hand_detector.py +1230 -0
- openvisionkit/lib/image_detector.py +1095 -0
- openvisionkit/lib/object_detector.py +401 -0
- openvisionkit/lib/pose_detector.py +919 -0
- openvisionkit/lib/selfie_segmentation.py +528 -0
- openvisionkit/lib/text_detector.py +1229 -0
- openvisionkit/utility/live_plot.py +141 -0
- openvisionkit/utility/vision_utilis.py +871 -0
- openvisionkit-0.4.0.dist-info/METADATA +1018 -0
- openvisionkit-0.4.0.dist-info/RECORD +26 -0
- openvisionkit-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,871 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import math
|
|
3
|
+
import os
|
|
4
|
+
import textwrap
|
|
5
|
+
import time
|
|
6
|
+
from glob import glob
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def rectangle_corners(
|
|
13
|
+
img, bbox, length=30, t=5, rt=1, colorR=(255, 0, 255), colorC=(0, 255, 0)
|
|
14
|
+
):
|
|
15
|
+
"""
|
|
16
|
+
Draws a rectangle with decorative corners on the given image.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
img: The image on which to draw.
|
|
20
|
+
bbox: A tuple (x, y, w, h) representing the bounding box.
|
|
21
|
+
l: Length of the corner lines.
|
|
22
|
+
t: Thickness of the corner lines.
|
|
23
|
+
rt: Thickness of the rectangle border. If 0, no border is drawn.
|
|
24
|
+
colorR: Color of the rectangle border.
|
|
25
|
+
colorC: Color of the corner lines.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The image with the decorative rectangle drawn.
|
|
29
|
+
"""
|
|
30
|
+
x, y, w, h = bbox
|
|
31
|
+
x1, y1 = x + w, y + h
|
|
32
|
+
|
|
33
|
+
if rt:
|
|
34
|
+
cv2.rectangle(img, bbox, colorR, rt)
|
|
35
|
+
|
|
36
|
+
for (cx, cy), dx, dy in [
|
|
37
|
+
((x, y), length, length),
|
|
38
|
+
((x1, y), -length, length),
|
|
39
|
+
((x, y1), length, -length),
|
|
40
|
+
((x1, y1), -length, -length),
|
|
41
|
+
]:
|
|
42
|
+
cv2.line(img, (cx, cy), (cx + dx, cy), colorC, t)
|
|
43
|
+
cv2.line(img, (cx, cy), (cx, cy + dy), colorC, t)
|
|
44
|
+
|
|
45
|
+
return img
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def detect_highlighted_text(
|
|
49
|
+
img: np.ndarray,
|
|
50
|
+
hsv_colors=None, # seed HSV values
|
|
51
|
+
h_tol=10,
|
|
52
|
+
s_tol=80,
|
|
53
|
+
v_tol=80,
|
|
54
|
+
show_mask=False,
|
|
55
|
+
show_combined_mask=False,
|
|
56
|
+
show_image_with_mask=False,
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Detect highlighted text by creating HSV masks around specified colors.
|
|
60
|
+
Returns combined mask and individual masks for each color.
|
|
61
|
+
|
|
62
|
+
HSV (Hue, Saturation, Value) image processing is a color representation model, often preferred over RGB in computer vision
|
|
63
|
+
for color-based segmentation and detection. It separates color information (hue) from lighting/brightness (value), allowing
|
|
64
|
+
robust object tracking under varying illumination. Common uses include object tracking, color-based filtering, and thresholding
|
|
65
|
+
in OpenCV.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
img: Input image in BGR format (as read by OpenCV)
|
|
69
|
+
hsv_colors: List of seed HSV tuples to detect (e.g., yellow, green)
|
|
70
|
+
h_tol, s_tol, v_tol: Tolerances for hue, saturation, and value to create color ranges
|
|
71
|
+
show: Whether to display intermediate masks and results using OpenCV windows
|
|
72
|
+
show_mask: Show individual color masks
|
|
73
|
+
show_combined_mask: Show combined mask of all detected colors
|
|
74
|
+
show_image_with_mask: Show the original image with detected areas masked
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
image_with_mask: Original image with detected areas masked
|
|
78
|
+
combined_mask: Binary mask combining all detected colors
|
|
79
|
+
masks: List of individual masks for each specified color
|
|
80
|
+
|
|
81
|
+
Usage:
|
|
82
|
+
image = cv2.imread("doc.jpg")
|
|
83
|
+
|
|
84
|
+
# Common highlighter HSV seeds (you can refine using click sampling)
|
|
85
|
+
highlight_colors = [
|
|
86
|
+
(30, 200, 250), # yellow
|
|
87
|
+
(60, 200, 250), # green
|
|
88
|
+
(150, 200, 250), # pink
|
|
89
|
+
(15, 200, 250), # orange
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
mask, masks = detect_highlighted_text(image, highlight_colors)
|
|
93
|
+
"""
|
|
94
|
+
if hsv_colors is None:
|
|
95
|
+
hsv_colors = [(27, 167, 251)]
|
|
96
|
+
img_blur = cv2.GaussianBlur(img, (5, 5), 0)
|
|
97
|
+
hsv = cv2.cvtColor(img_blur, cv2.COLOR_BGR2HSV)
|
|
98
|
+
combined_mask = np.zeros(hsv.shape[:2], dtype=np.uint8)
|
|
99
|
+
masks = []
|
|
100
|
+
|
|
101
|
+
for i, (h, s, v) in enumerate(hsv_colors):
|
|
102
|
+
lower = np.array([max(0, h - h_tol), max(0, s - s_tol), max(0, v - v_tol)])
|
|
103
|
+
|
|
104
|
+
upper = np.array(
|
|
105
|
+
[min(179, h + h_tol), min(255, s + s_tol), min(255, v + v_tol)]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
mask = cv2.inRange(hsv, lower, upper)
|
|
109
|
+
masks.append(mask)
|
|
110
|
+
|
|
111
|
+
# Combine all masks
|
|
112
|
+
combined_mask = cv2.bitwise_or(combined_mask, mask)
|
|
113
|
+
|
|
114
|
+
# Remove Noise
|
|
115
|
+
# kernel = np.ones((3,3), np.uint8)
|
|
116
|
+
# combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_CLOSE, kernel)
|
|
117
|
+
|
|
118
|
+
if show_mask:
|
|
119
|
+
cv2.imshow(f"Mask {i}", mask)
|
|
120
|
+
|
|
121
|
+
img_with_mask = cv2.bitwise_and(img, img, mask=combined_mask)
|
|
122
|
+
if show_combined_mask:
|
|
123
|
+
cv2.imshow("Combined Mask", combined_mask)
|
|
124
|
+
if show_image_with_mask:
|
|
125
|
+
img_with_mask = cv2.bitwise_and(img, img, mask=combined_mask)
|
|
126
|
+
cv2.imshow("Image with Mask", img_with_mask)
|
|
127
|
+
|
|
128
|
+
if show_mask or show_combined_mask or show_image_with_mask:
|
|
129
|
+
cv2.waitKey(0)
|
|
130
|
+
cv2.destroyAllWindows()
|
|
131
|
+
|
|
132
|
+
return img_with_mask, combined_mask, masks
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_dominant_hsv_colors(image, k=4):
|
|
136
|
+
"""
|
|
137
|
+
auto-detect highlight colors in the image by clustering pixel colors in HSV space using K-means.
|
|
138
|
+
Get dominant HSV colors from the image using K-means clustering.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
image: Input image in BGR format (as read by OpenCV)
|
|
142
|
+
k: Number of dominant colors to detect (default is 4)
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
List of dominant HSV color tuples (h, s, v) detected in the image.
|
|
146
|
+
Usage:
|
|
147
|
+
|
|
148
|
+
"""
|
|
149
|
+
img_blur = cv2.GaussianBlur(image, (5, 5), 0)
|
|
150
|
+
hsv = cv2.cvtColor(img_blur, cv2.COLOR_BGR2HSV)
|
|
151
|
+
pixels = hsv.reshape(-1, 3)
|
|
152
|
+
|
|
153
|
+
pixels = np.float32(pixels)
|
|
154
|
+
|
|
155
|
+
_, labels, centers = cv2.kmeans(
|
|
156
|
+
pixels,
|
|
157
|
+
k,
|
|
158
|
+
None,
|
|
159
|
+
(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2),
|
|
160
|
+
10,
|
|
161
|
+
cv2.KMEANS_RANDOM_CENTERS,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return [tuple(map(int, c)) for c in centers]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def refine_mask(mask):
|
|
168
|
+
"""
|
|
169
|
+
Refines a binary mask by applying morphological operations to remove noise and merge words in the same line.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
mask: The input binary mask to be refined.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
The refined binary mask.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
kernel_small = np.ones((3, 3), np.uint8)
|
|
179
|
+
kernel_line = np.ones((15, 5), np.uint8)
|
|
180
|
+
|
|
181
|
+
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_small, iterations=1)
|
|
182
|
+
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_small, iterations=2)
|
|
183
|
+
|
|
184
|
+
# Merge words in same line
|
|
185
|
+
mask = cv2.dilate(mask, kernel_line, iterations=1)
|
|
186
|
+
return mask
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def detect_single_highlighted_text(image, hsv_colors=None):
|
|
190
|
+
"""Detect highlighted text based on a single HSV color.
|
|
191
|
+
Args:
|
|
192
|
+
image: Input image in BGR format (as read by OpenCV)
|
|
193
|
+
hsv_colors: List of HSV values to detect (default is a single yellow color)
|
|
194
|
+
Returns:
|
|
195
|
+
image_with_mask: Image with detected highlighted areas masked
|
|
196
|
+
mask: Binary mask of detected highlighted areas
|
|
197
|
+
"""
|
|
198
|
+
if hsv_colors is None:
|
|
199
|
+
hsv_colors = [27, 167, 251]
|
|
200
|
+
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
|
201
|
+
|
|
202
|
+
# Example: detect blue color
|
|
203
|
+
print(hsv[0])
|
|
204
|
+
lower = np.array([hsv_colors[0], hsv_colors[1], hsv_colors[2]])
|
|
205
|
+
upper = np.array([140, 255, 255])
|
|
206
|
+
|
|
207
|
+
mask = cv2.inRange(hsv, lower, upper)
|
|
208
|
+
img_with_mask = cv2.bitwise_and(image, image, mask=mask)
|
|
209
|
+
return img_with_mask, mask
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def find_contours(
|
|
213
|
+
mask,
|
|
214
|
+
min_area=500,
|
|
215
|
+
debug=False,
|
|
216
|
+
sort_countours=False,
|
|
217
|
+
sorted_bounding_box=False,
|
|
218
|
+
retrieval_type=cv2.RETR_EXTERNAL,
|
|
219
|
+
approximation_method=cv2.CHAIN_APPROX_SIMPLE,
|
|
220
|
+
):
|
|
221
|
+
"""
|
|
222
|
+
Find contours in a binary mask and filter them based on area and other criteria.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
mask: Binary image (mask) where contours are to be found.
|
|
226
|
+
min_area: Minimum area threshold to filter contours (default is 500).
|
|
227
|
+
debug: If True, prints debug information about contours found and filtered.
|
|
228
|
+
sort_contours: If True, sorts contours by area in descending order (default is False).
|
|
229
|
+
retrieval_type: Contour retrieval mode (default is cv2.RETR_EXTERNAL).
|
|
230
|
+
approximation_method: Contour approximation method (default is cv2.CHAIN_APPROX_SIMPLE).
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
filtered_contours: List of contours that passed the filtering criteria.
|
|
234
|
+
boxes: List of bounding box tuples (x, y, w, h) for the
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
# 1. Clean noise (very important)
|
|
238
|
+
cleaned = refine_mask(mask)
|
|
239
|
+
# kernel = np.ones((3, 3), np.uint8)
|
|
240
|
+
|
|
241
|
+
# # Remove small noise
|
|
242
|
+
# cleaned = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
|
|
243
|
+
|
|
244
|
+
# # Fill gaps inside highlights
|
|
245
|
+
# cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel, iterations=2)
|
|
246
|
+
|
|
247
|
+
# 2. Find contours
|
|
248
|
+
contours, _ = cv2.findContours(
|
|
249
|
+
cleaned,
|
|
250
|
+
retrieval_type,
|
|
251
|
+
approximation_method, # only outer regions
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
filtered_contours = []
|
|
255
|
+
boxes = []
|
|
256
|
+
|
|
257
|
+
# 3. Filter contours
|
|
258
|
+
for cnt in contours:
|
|
259
|
+
area = cv2.contourArea(cnt)
|
|
260
|
+
|
|
261
|
+
if area < min_area:
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
x, y, w, h = cv2.boundingRect(cnt)
|
|
265
|
+
|
|
266
|
+
if w < 20 or h < 10:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
filtered_contours.append(cnt)
|
|
270
|
+
boxes.append((x, y, w, h))
|
|
271
|
+
|
|
272
|
+
if debug:
|
|
273
|
+
print(f"Total contours: {len(contours)}")
|
|
274
|
+
print(f"Filtered contours: {len(filtered_contours)}")
|
|
275
|
+
|
|
276
|
+
if sort_countours:
|
|
277
|
+
filtered_contours = sorted(filtered_contours, key=cv2.contourArea, reverse=True)
|
|
278
|
+
|
|
279
|
+
if sorted_bounding_box:
|
|
280
|
+
# Sort top-to-bottom
|
|
281
|
+
boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
|
|
282
|
+
|
|
283
|
+
return filtered_contours, boxes
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def resize_with_padding(img, target_size, color=(0, 0, 0)):
|
|
287
|
+
"""
|
|
288
|
+
Resize an image while maintaining aspect ratio and adding padding to fit the target size.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
img: The input image to be resized.
|
|
292
|
+
target_size: A tuple (width, height) representing the desired output size.
|
|
293
|
+
color: The color of the padding (default is black).
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
The resized image with padding to fit the target size.
|
|
297
|
+
"""
|
|
298
|
+
h, w = img.shape[:2]
|
|
299
|
+
target_w, target_h = target_size
|
|
300
|
+
|
|
301
|
+
scale = min(target_w / w, target_h / h)
|
|
302
|
+
new_w, new_h = int(w * scale), int(h * scale)
|
|
303
|
+
|
|
304
|
+
resized = cv2.resize(img, (new_w, new_h))
|
|
305
|
+
|
|
306
|
+
pad_w = target_w - new_w
|
|
307
|
+
pad_h = target_h - new_h
|
|
308
|
+
|
|
309
|
+
top = pad_h // 2
|
|
310
|
+
bottom = pad_h - top
|
|
311
|
+
left = pad_w // 2
|
|
312
|
+
right = pad_w - left
|
|
313
|
+
|
|
314
|
+
return cv2.copyMakeBorder(
|
|
315
|
+
resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def highlight_image(img, selected=False, color=(0, 255, 0), thickness=3):
|
|
320
|
+
"""
|
|
321
|
+
Highlight Selected Image (Border)
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
img: The input image to be highlighted.
|
|
325
|
+
selected: A boolean indicating whether to apply the highlight (default is False).
|
|
326
|
+
color: The color of the highlight border (default is green).
|
|
327
|
+
thickness: The thickness of the highlight border (default is 3).
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
The image with the highlight border applied if selected is True, otherwise the original image.
|
|
331
|
+
|
|
332
|
+
Usage:
|
|
333
|
+
img = cv2.imread('input.jpg')
|
|
334
|
+
highlighted_img = highlight_image(img, selected=True, color=(0, 255, 0), thickness=3)
|
|
335
|
+
cv2.imshow('Highlighted Image', highlighted_img)
|
|
336
|
+
cv2.waitKey(0)
|
|
337
|
+
cv2.destroyAllWindows()
|
|
338
|
+
"""
|
|
339
|
+
if selected:
|
|
340
|
+
h, w = img.shape[:2]
|
|
341
|
+
cv2.rectangle(img, (0, 0), (w, h), color, thickness)
|
|
342
|
+
return img
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def zoom_image(img, scale=2.0):
|
|
346
|
+
"""
|
|
347
|
+
Zooms into the image by a specified scale factor.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
img: The input image to be zoomed.
|
|
351
|
+
scale: The zoom scale factor (default is 2.0, which means 200% zoom).
|
|
352
|
+
Returns:
|
|
353
|
+
The zoomed image.
|
|
354
|
+
"""
|
|
355
|
+
return cv2.resize(img, (0, 0), fx=scale, fy=scale)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def put_text_think_corners(
|
|
359
|
+
img, bounding_box, color=(255, 255, 255), length=20, thickness=2
|
|
360
|
+
):
|
|
361
|
+
"""
|
|
362
|
+
Puts text on the image with a colored rectangle background for better visibility, using the rectangle_corners function for decorative corners.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
img: The input image on which to put the text.
|
|
366
|
+
bounding_box: A tuple (x, y, w, h) representing the bounding box coordinates.
|
|
367
|
+
color: The color of the text and corners (default is white).
|
|
368
|
+
length: The length of the corner lines (default is 20).
|
|
369
|
+
thickness: The thickness of the corner lines (default is 2).
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
The image with the text and decorative corners drawn on it.
|
|
373
|
+
"""
|
|
374
|
+
x, y, w, h = bounding_box
|
|
375
|
+
# Top Left corner
|
|
376
|
+
cv2.line(img, (x, y), (x + length, y), color, thickness)
|
|
377
|
+
cv2.line(img, (x, y), (x, y + length), color, thickness)
|
|
378
|
+
# Top Right corner
|
|
379
|
+
cv2.line(img, (x + w, y), (x + w - length, y), color, thickness)
|
|
380
|
+
cv2.line(img, (x + w, y), (x + w, y + length), color, thickness)
|
|
381
|
+
# Bottom Left corner
|
|
382
|
+
cv2.line(img, (x, y + h), (x + length, y + h), color, thickness)
|
|
383
|
+
cv2.line(img, (x, y + h), (x, y + h - length), color, thickness)
|
|
384
|
+
# Bottom Right corner
|
|
385
|
+
cv2.line(img, (x + w, y + h), (x + w - length, y + h), color, thickness)
|
|
386
|
+
cv2.line(img, (x + w, y + h), (x + w, y + h - length), color, thickness)
|
|
387
|
+
|
|
388
|
+
return img
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def overlay_transparent(bg, fg, pos=(0, 0)):
|
|
392
|
+
x, y = pos
|
|
393
|
+
h, w = fg.shape[:2]
|
|
394
|
+
H, W = bg.shape[:2]
|
|
395
|
+
|
|
396
|
+
x1, y1 = max(x, 0), max(y, 0)
|
|
397
|
+
x2, y2 = min(x + w, W), min(y + h, H)
|
|
398
|
+
|
|
399
|
+
if x1 >= x2 or y1 >= y2:
|
|
400
|
+
return bg
|
|
401
|
+
|
|
402
|
+
fx1, fy1 = max(-x, 0), max(-y, 0)
|
|
403
|
+
fx2, fy2 = fx1 + (x2 - x1), fy1 + (y2 - y1)
|
|
404
|
+
|
|
405
|
+
fg_crop = fg[fy1:fy2, fx1:fx2]
|
|
406
|
+
|
|
407
|
+
# 🔥 Handle alpha safely
|
|
408
|
+
if fg_crop.shape[2] == 4:
|
|
409
|
+
alpha = fg_crop[..., 3:4] / 255.0
|
|
410
|
+
fg_rgb = fg_crop[..., :3]
|
|
411
|
+
else:
|
|
412
|
+
# No alpha → treat as fully opaque
|
|
413
|
+
alpha = np.ones((fg_crop.shape[0], fg_crop.shape[1], 1), dtype=np.float32)
|
|
414
|
+
fg_rgb = fg_crop
|
|
415
|
+
|
|
416
|
+
bg_crop = bg[y1:y2, x1:x2]
|
|
417
|
+
|
|
418
|
+
bg[y1:y2, x1:x2] = (bg_crop * (1 - alpha) + fg_rgb * alpha).astype(bg.dtype)
|
|
419
|
+
|
|
420
|
+
return bg
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def get_valid_images(folder_path):
|
|
424
|
+
"""
|
|
425
|
+
Returns a list of valid image files in the specified folder.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
folder_path (str): The path to the folder containing images.
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
list: A list of valid image file names.
|
|
432
|
+
"""
|
|
433
|
+
patterns = ["*.png", "*.jpg", "*.jpeg", "*.bmp", "*.tiff", "*.webp"]
|
|
434
|
+
files = []
|
|
435
|
+
|
|
436
|
+
for p in patterns:
|
|
437
|
+
files.extend(glob(os.path.join(folder_path, p)))
|
|
438
|
+
|
|
439
|
+
return files
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def draw_rounded_rect(img, top_left, bottom_right, color, radius=20, thickness=-1):
|
|
443
|
+
x1, y1 = top_left
|
|
444
|
+
x2, y2 = bottom_right
|
|
445
|
+
|
|
446
|
+
if thickness < 0:
|
|
447
|
+
cv2.rectangle(img, (x1 + radius, y1), (x2 - radius, y2), color, thickness)
|
|
448
|
+
cv2.rectangle(img, (x1, y1 + radius), (x2, y2 - radius), color, thickness)
|
|
449
|
+
|
|
450
|
+
cv2.circle(img, (x1 + radius, y1 + radius), radius, color, -1)
|
|
451
|
+
cv2.circle(img, (x2 - radius, y1 + radius), radius, color, -1)
|
|
452
|
+
cv2.circle(img, (x1 + radius, y2 - radius), radius, color, -1)
|
|
453
|
+
cv2.circle(img, (x2 - radius, y2 - radius), radius, color, -1)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def is_hovering(button, point):
|
|
457
|
+
px, py = point
|
|
458
|
+
x, y = button.pos
|
|
459
|
+
|
|
460
|
+
return x <= px <= x + button.width and y <= py <= y + button.height
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def create_centered_grid_buttons(
|
|
464
|
+
frame,
|
|
465
|
+
values,
|
|
466
|
+
button_cls,
|
|
467
|
+
button_size=(100, 100),
|
|
468
|
+
gap=15,
|
|
469
|
+
y_offset=0,
|
|
470
|
+
):
|
|
471
|
+
"""
|
|
472
|
+
Creates centered grid buttons for any OpenCV frame.
|
|
473
|
+
|
|
474
|
+
values example:
|
|
475
|
+
[
|
|
476
|
+
['7', '8', '9', '*'],
|
|
477
|
+
['4', '5', '6', '-']
|
|
478
|
+
]
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
frame_h, frame_w = frame.shape[:2]
|
|
482
|
+
|
|
483
|
+
rows = len(values)
|
|
484
|
+
cols = len(values[0])
|
|
485
|
+
|
|
486
|
+
btn_w, btn_h = button_size
|
|
487
|
+
|
|
488
|
+
grid_w = cols * btn_w + (cols - 1) * gap
|
|
489
|
+
grid_h = rows * btn_h + (rows - 1) * gap
|
|
490
|
+
|
|
491
|
+
start_x = (frame_w - grid_w) // 2
|
|
492
|
+
start_y = (frame_h - grid_h) // 2 + y_offset
|
|
493
|
+
|
|
494
|
+
buttons = []
|
|
495
|
+
|
|
496
|
+
for row in range(rows):
|
|
497
|
+
for col in range(cols):
|
|
498
|
+
x = start_x + col * (btn_w + gap)
|
|
499
|
+
y = start_y + row * (btn_h + gap)
|
|
500
|
+
|
|
501
|
+
buttons.append(
|
|
502
|
+
button_cls(pos=(x, y), text=values[row][col], size=button_size)
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
return buttons
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def put_text_rect(
|
|
509
|
+
img,
|
|
510
|
+
text,
|
|
511
|
+
pos,
|
|
512
|
+
scale=3,
|
|
513
|
+
thickness=3,
|
|
514
|
+
colorT=(255, 255, 255),
|
|
515
|
+
colorR=(255, 0, 255),
|
|
516
|
+
font=cv2.FONT_HERSHEY_PLAIN,
|
|
517
|
+
offset=10,
|
|
518
|
+
border=None,
|
|
519
|
+
colorB=(0, 255, 0),
|
|
520
|
+
):
|
|
521
|
+
"""
|
|
522
|
+
Puts text on the image with a colored rectangle background for better visibility.
|
|
523
|
+
Args: img: The input image on which to put the text.
|
|
524
|
+
text: The text string to be displayed.
|
|
525
|
+
pos: A tuple (x, y) representing the bottom-left corner of the text.
|
|
526
|
+
scale: The font scale factor (default is 3).
|
|
527
|
+
thickness: The thickness of the text (default is 3).
|
|
528
|
+
colorT: The color of the text (default is white).
|
|
529
|
+
colorR: The color of the rectangle background (default is magenta).
|
|
530
|
+
font: The font type (default is cv2.FONT_HERSHEY_PLAIN).
|
|
531
|
+
offset: The offset for the rectangle padding (default is 10).
|
|
532
|
+
border: The thickness of the border around the rectangle (default is None, no border).
|
|
533
|
+
colorB: The color of the border (default is green).
|
|
534
|
+
Returns:
|
|
535
|
+
The image with the text and rectangle drawn on it.
|
|
536
|
+
"""
|
|
537
|
+
(w, h), _ = cv2.getTextSize(text, font, scale, thickness)
|
|
538
|
+
x1, y1 = pos[0] - offset, pos[1] + offset
|
|
539
|
+
x2, y2 = pos[0] + w + offset, pos[1] - h - offset
|
|
540
|
+
|
|
541
|
+
cv2.rectangle(img, (x1, y1), (x2, y2), colorR, -1)
|
|
542
|
+
if border:
|
|
543
|
+
cv2.rectangle(img, (x1, y1), (x2, y2), colorB, border)
|
|
544
|
+
cv2.putText(img, text, pos, font, scale, colorT, thickness)
|
|
545
|
+
|
|
546
|
+
return img, [x1, y2, x2, y1]
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def draw_wrapped_text(
|
|
550
|
+
img, text, start_pos, font, scale, color, thickness, max_width=40
|
|
551
|
+
):
|
|
552
|
+
wrapped = textwrap.wrap(text, width=max_width)
|
|
553
|
+
x, y = start_pos
|
|
554
|
+
|
|
555
|
+
for i, line in enumerate(wrapped):
|
|
556
|
+
y_offset = y + i * int(30 * scale)
|
|
557
|
+
cv2.putText(img, line, (x, y_offset), font, scale, color, thickness)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def load_image(image_input):
|
|
561
|
+
"""
|
|
562
|
+
Load an image from a file path or return the image if it's already a numpy array.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
image_input (str or np.ndarray): The file path of the image or a numpy array.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
np.ndarray: The loaded image.
|
|
569
|
+
|
|
570
|
+
Raises:
|
|
571
|
+
ValueError: If the image path is invalid or the file is unreadable.
|
|
572
|
+
"""
|
|
573
|
+
if isinstance(image_input, np.ndarray):
|
|
574
|
+
return image_input
|
|
575
|
+
|
|
576
|
+
# assume it's a path
|
|
577
|
+
img = cv2.imread(image_input, cv2.IMREAD_UNCHANGED)
|
|
578
|
+
if img is None:
|
|
579
|
+
raise ValueError("Invalid image path or unreadable file")
|
|
580
|
+
|
|
581
|
+
return img
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def move_image(
|
|
585
|
+
img, direction="left_to_right", speed=5, start_pos=(0, 0), window_size=(800, 600)
|
|
586
|
+
):
|
|
587
|
+
"""
|
|
588
|
+
Moves an image across the screen in a specified direction at a given speed.
|
|
589
|
+
|
|
590
|
+
Args:
|
|
591
|
+
img: The file path of the image or a numpy array to be moved.
|
|
592
|
+
direction: The direction of movement (e.g., "left_to_right", "right_to_left", "top_to_bottom", "bottom_to_top", "diag_tl_br", "diag_br_tl").
|
|
593
|
+
speed: The speed of movement in pixels per frame (default is 5).
|
|
594
|
+
start_pos: A tuple (x, y) representing the starting position of the image (default is (0, 0)).
|
|
595
|
+
window_size: A tuple (width, height) representing the size of the display window (default is (800, 600)).
|
|
596
|
+
Returns:
|
|
597
|
+
None
|
|
598
|
+
Usage:
|
|
599
|
+
move_image("path/to/image.png", direction="left_to_right", speed=5, start_pos=(0, 0), window_size=(800, 600))
|
|
600
|
+
"""
|
|
601
|
+
img = load_image(img)
|
|
602
|
+
|
|
603
|
+
h, w = img.shape[:2]
|
|
604
|
+
canvas_w, canvas_h = window_size
|
|
605
|
+
|
|
606
|
+
x, y = start_pos
|
|
607
|
+
|
|
608
|
+
while True:
|
|
609
|
+
canvas = np.zeros((canvas_h, canvas_w, 3), dtype=np.uint8)
|
|
610
|
+
|
|
611
|
+
canvas_x = int(x)
|
|
612
|
+
canvas_y = int(y)
|
|
613
|
+
|
|
614
|
+
# place image on canvas safely
|
|
615
|
+
x1, y1 = max(canvas_x, 0), max(canvas_y, 0)
|
|
616
|
+
x2, y2 = min(canvas_x + w, canvas_w), min(canvas_y + h, canvas_h)
|
|
617
|
+
|
|
618
|
+
img_x1, img_y1 = max(-canvas_x, 0), max(-canvas_y, 0)
|
|
619
|
+
img_x2, img_y2 = img_x1 + (x2 - x1), img_y1 + (y2 - y1)
|
|
620
|
+
|
|
621
|
+
if x1 < x2 and y1 < y2:
|
|
622
|
+
canvas[y1:y2, x1:x2] = img[img_y1:img_y2, img_x1:img_x2]
|
|
623
|
+
|
|
624
|
+
cv2.imshow("Moving Image", canvas)
|
|
625
|
+
|
|
626
|
+
key = cv2.waitKey(1)
|
|
627
|
+
if key == 27: # ESC to stop
|
|
628
|
+
break
|
|
629
|
+
|
|
630
|
+
# movement logic
|
|
631
|
+
if direction == "left_to_right":
|
|
632
|
+
x += speed
|
|
633
|
+
elif direction == "right_to_left":
|
|
634
|
+
x -= speed
|
|
635
|
+
elif direction == "top_to_bottom":
|
|
636
|
+
y += speed
|
|
637
|
+
elif direction == "bottom_to_top":
|
|
638
|
+
y -= speed
|
|
639
|
+
elif direction == "diag_tl_br":
|
|
640
|
+
x += speed
|
|
641
|
+
y += speed
|
|
642
|
+
elif direction == "diag_br_tl":
|
|
643
|
+
x -= speed
|
|
644
|
+
y -= speed
|
|
645
|
+
|
|
646
|
+
time.sleep(0.01)
|
|
647
|
+
|
|
648
|
+
cv2.destroyAllWindows()
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def stack_images_grid(img_list, cols=2, scale=1.0, labels=None, bg_color=(0, 0, 0)):
|
|
652
|
+
"""
|
|
653
|
+
Stack images in a grid.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
img_list (list): list of images
|
|
657
|
+
cols (int): number of columns
|
|
658
|
+
scale (float): scaling factor
|
|
659
|
+
labels (list): optional titles for each image
|
|
660
|
+
bg_color (tuple): background color for empty slots
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
np.ndarray: stacked image
|
|
664
|
+
"""
|
|
665
|
+
|
|
666
|
+
if not img_list:
|
|
667
|
+
return None
|
|
668
|
+
|
|
669
|
+
# Convert all images to the same size and color format
|
|
670
|
+
h, w = img_list[0].shape[:2]
|
|
671
|
+
|
|
672
|
+
processed = []
|
|
673
|
+
for img in img_list:
|
|
674
|
+
if img is None:
|
|
675
|
+
img = np.zeros((h, w, 3), dtype=np.uint8)
|
|
676
|
+
|
|
677
|
+
# Convert grayscale → BGR
|
|
678
|
+
if len(img.shape) == 2:
|
|
679
|
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
680
|
+
|
|
681
|
+
img = cv2.resize(img, (w, h))
|
|
682
|
+
processed.append(img)
|
|
683
|
+
|
|
684
|
+
# Calculate rows needed and fill remaining slots with blank images
|
|
685
|
+
total = len(processed)
|
|
686
|
+
rows = math.ceil(total / cols)
|
|
687
|
+
|
|
688
|
+
# Fill remaining slots with blank images
|
|
689
|
+
blank = np.full((h, w, 3), bg_color, dtype=np.uint8)
|
|
690
|
+
processed += [blank] * (rows * cols - total)
|
|
691
|
+
|
|
692
|
+
# Add labels if provided
|
|
693
|
+
if labels:
|
|
694
|
+
for i, text in enumerate(labels):
|
|
695
|
+
if i < len(processed):
|
|
696
|
+
cv2.putText(
|
|
697
|
+
processed[i],
|
|
698
|
+
text,
|
|
699
|
+
(10, 25),
|
|
700
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
701
|
+
0.7,
|
|
702
|
+
(0, 255, 0),
|
|
703
|
+
2,
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
# Stack
|
|
707
|
+
grid = [np.hstack(processed[i * cols : (i + 1) * cols]) for i in range(rows)]
|
|
708
|
+
stacked = np.vstack(grid)
|
|
709
|
+
|
|
710
|
+
# Scale final output
|
|
711
|
+
if scale != 1.0:
|
|
712
|
+
stacked = cv2.resize(stacked, (0, 0), fx=scale, fy=scale)
|
|
713
|
+
|
|
714
|
+
return stacked
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def get_currect_path():
|
|
718
|
+
"""
|
|
719
|
+
Get the current file path.
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
str: The absolute path of the current file.
|
|
723
|
+
"""
|
|
724
|
+
full_path = os.path.realpath(__file__)
|
|
725
|
+
path, filename = os.path.split(full_path)
|
|
726
|
+
return path, filename, full_path
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def get_calling_folder():
|
|
730
|
+
"""
|
|
731
|
+
Get calling folder path.
|
|
732
|
+
Returns:
|
|
733
|
+
str: The absolute path of the calling folder.
|
|
734
|
+
"""
|
|
735
|
+
frame = inspect.stack()[1]
|
|
736
|
+
calling_file = frame.filename
|
|
737
|
+
return os.path.dirname(os.path.abspath(calling_file)), calling_file
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def find_project_root(start_path, marker="module"):
|
|
741
|
+
"""
|
|
742
|
+
Finds the project root directory by looking for a specific marker (e.g., a folder or file).
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
start_path: The path from which to start searching.
|
|
746
|
+
marker: The name of the folder or file that indicates the project root.
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
The absolute path to the project root directory.
|
|
750
|
+
"""
|
|
751
|
+
current = os.path.abspath(start_path)
|
|
752
|
+
while True:
|
|
753
|
+
if os.path.exists(os.path.join(current, marker)):
|
|
754
|
+
return current
|
|
755
|
+
parent = os.path.dirname(current)
|
|
756
|
+
if parent == current:
|
|
757
|
+
raise RuntimeError("Project root not found")
|
|
758
|
+
current = parent
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def mouse_drag_overlay(event, x, y, flags, param):
|
|
762
|
+
"""
|
|
763
|
+
Mouse callback function to enable dragging of an overlay on the video frame. It updates the position of the overlay based on mouse events.
|
|
764
|
+
|
|
765
|
+
Args:
|
|
766
|
+
event: The type of mouse event (e.g., cv2.EVENT_LBUTTONDOWN, cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONUP).
|
|
767
|
+
x: The x-coordinate of the mouse event.
|
|
768
|
+
y: The y-coordinate of the mouse event.
|
|
769
|
+
flags: Any relevant flags passed by OpenCV (not used in this function).
|
|
770
|
+
param: A dictionary containing the current position of the overlay and its dimensions, as well as a flag to track dragging state.
|
|
771
|
+
|
|
772
|
+
Returns:
|
|
773
|
+
None. The function updates the position of the overlay in the param dictionary based on mouse interactions
|
|
774
|
+
"""
|
|
775
|
+
drag_position = param["drag_position"]
|
|
776
|
+
overlay_w = param["overlay_w"]
|
|
777
|
+
overlay_h = param["overlay_h"]
|
|
778
|
+
|
|
779
|
+
ox = drag_position["x"]
|
|
780
|
+
oy = drag_position["y"]
|
|
781
|
+
|
|
782
|
+
inside_overlay = ox <= x <= ox + overlay_w and oy <= y <= oy + overlay_h
|
|
783
|
+
|
|
784
|
+
if event == cv2.EVENT_LBUTTONDOWN and inside_overlay:
|
|
785
|
+
drag_position["dragging"] = True
|
|
786
|
+
drag_position["offset_x"] = x - ox
|
|
787
|
+
drag_position["offset_y"] = y - oy
|
|
788
|
+
|
|
789
|
+
elif event == cv2.EVENT_MOUSEMOVE and drag_position["dragging"]:
|
|
790
|
+
drag_position["x"] = x - drag_position["offset_x"]
|
|
791
|
+
drag_position["y"] = y - drag_position["offset_y"]
|
|
792
|
+
|
|
793
|
+
elif event == cv2.EVENT_LBUTTONUP:
|
|
794
|
+
drag_position["dragging"] = False
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def overlay_frame(
|
|
798
|
+
background,
|
|
799
|
+
overlay,
|
|
800
|
+
position="top-left",
|
|
801
|
+
padding=10,
|
|
802
|
+
draggable=False,
|
|
803
|
+
drag_position=None,
|
|
804
|
+
draw_border=True,
|
|
805
|
+
):
|
|
806
|
+
"""
|
|
807
|
+
Creates an overlay of one image on top of another at a specified position with optional padding and dragging functionality.
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
background: The background image on which to overlay.
|
|
811
|
+
overlay: The image to be overlaid on the background.
|
|
812
|
+
position: The position to place the overlay (default is "top-left"). Options: "top-left", "top-right", "bottom-left", "bottom-right", "center".
|
|
813
|
+
padding: The padding in pixels from the edges of the background (default is 10).
|
|
814
|
+
draggable: If True, allows the overlay to be dragged (default is False).
|
|
815
|
+
drag_position: A dictionary with "x" and "y" keys to track the current position of the overlay when dragging (default is None).
|
|
816
|
+
draw_border: If True, draws a border around the overlay for better visibility (default is True).
|
|
817
|
+
|
|
818
|
+
Returns:
|
|
819
|
+
The background image with the overlay applied at the specified position.
|
|
820
|
+
"""
|
|
821
|
+
|
|
822
|
+
bg_h, bg_w = background.shape[:2]
|
|
823
|
+
ov_h, ov_w = overlay.shape[:2]
|
|
824
|
+
|
|
825
|
+
if draggable and drag_position is not None:
|
|
826
|
+
x = drag_position["x"]
|
|
827
|
+
y = drag_position["y"]
|
|
828
|
+
else:
|
|
829
|
+
if position == "top-left":
|
|
830
|
+
x, y = padding, padding
|
|
831
|
+
elif position == "top-right":
|
|
832
|
+
x, y = bg_w - ov_w - padding, padding
|
|
833
|
+
elif position == "bottom-left":
|
|
834
|
+
x, y = padding, bg_h - ov_h - padding
|
|
835
|
+
elif position == "bottom-right":
|
|
836
|
+
x, y = bg_w - ov_w - padding, bg_h - ov_h - padding
|
|
837
|
+
elif position == "center":
|
|
838
|
+
x = (bg_w - ov_w) // 2
|
|
839
|
+
y = (bg_h - ov_h) // 2
|
|
840
|
+
else:
|
|
841
|
+
x, y = padding, padding
|
|
842
|
+
|
|
843
|
+
# keep inside image
|
|
844
|
+
x = max(0, min(x, bg_w - ov_w))
|
|
845
|
+
y = max(0, min(y, bg_h - ov_h))
|
|
846
|
+
|
|
847
|
+
if draggable and drag_position is not None:
|
|
848
|
+
drag_position["x"] = x
|
|
849
|
+
drag_position["y"] = y
|
|
850
|
+
|
|
851
|
+
background[y : y + ov_h, x : x + ov_w] = overlay
|
|
852
|
+
return background
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def auto_layout(drawings, frame_shape, cols=4, padding=20):
|
|
856
|
+
frame_h, frame_w = frame_shape[:2]
|
|
857
|
+
|
|
858
|
+
max_w = max(d.size[0] for d in drawings)
|
|
859
|
+
max_h = max(d.size[1] for d in drawings)
|
|
860
|
+
|
|
861
|
+
cell_w = max_w + padding
|
|
862
|
+
cell_h = max_h + padding
|
|
863
|
+
|
|
864
|
+
for idx, drawing in enumerate(drawings):
|
|
865
|
+
row = idx // cols
|
|
866
|
+
col = idx % cols
|
|
867
|
+
|
|
868
|
+
x = padding + col * cell_w
|
|
869
|
+
y = padding + row * cell_h
|
|
870
|
+
|
|
871
|
+
drawing.origin = (x, y)
|