openvisionkit 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openvisionkit/__init__.py +1 -0
- openvisionkit/_version.py +24 -0
- openvisionkit/capture/draw_object.py +296 -0
- openvisionkit/capture/image_template.py +61 -0
- openvisionkit/capture/screen_capture.py +13 -0
- openvisionkit/capture/video_recorder.py +128 -0
- openvisionkit/capture/video_template.py +336 -0
- openvisionkit/lib/classifier.py +186 -0
- openvisionkit/lib/face_detector.py +587 -0
- openvisionkit/lib/face_mesh_detector.py +913 -0
- openvisionkit/lib/form_detector.py +465 -0
- openvisionkit/lib/form_roi_annotator.py +679 -0
- openvisionkit/lib/form_roi_detector.py +1078 -0
- openvisionkit/lib/fps_counter.py +38 -0
- openvisionkit/lib/hair_segmentation.py +298 -0
- openvisionkit/lib/hand_detector.py +1230 -0
- openvisionkit/lib/image_detector.py +1095 -0
- openvisionkit/lib/object_detector.py +401 -0
- openvisionkit/lib/pose_detector.py +919 -0
- openvisionkit/lib/selfie_segmentation.py +528 -0
- openvisionkit/lib/text_detector.py +1229 -0
- openvisionkit/utility/live_plot.py +141 -0
- openvisionkit/utility/vision_utilis.py +871 -0
- openvisionkit-0.4.0.dist-info/METADATA +1018 -0
- openvisionkit-0.4.0.dist-info/RECORD +26 -0
- openvisionkit-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1230 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import time
|
|
3
|
+
from collections import deque
|
|
4
|
+
from itertools import combinations
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import cv2
|
|
8
|
+
import mediapipe as mp
|
|
9
|
+
import numpy as np
|
|
10
|
+
from mediapipe.tasks import python
|
|
11
|
+
from mediapipe.tasks.python import vision
|
|
12
|
+
|
|
13
|
+
cap = cv2.VideoCapture(0)
|
|
14
|
+
|
|
15
|
+
_MODEL_DIR = Path(__file__).parent / "models"
|
|
16
|
+
_DEFAULT_MODEL = str(_MODEL_DIR / "hand_landmarker.task")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# There are 21 hand landmarks in total, and the tips of the fingers are represented by the following landmark indices: 4 (thumb), 8 (index finger), 12 (middle finger), 16 (ring finger), and 20 (little finger). These indices correspond to the specific landmarks that represent the tips of each finger in the hand landmark detection model. By accessing these landmarks, you can determine the position and state of each finger for various applications such as gesture recognition or hand tracking.
|
|
20
|
+
class HandDetector:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
model_path=_DEFAULT_MODEL,
|
|
24
|
+
running_mode="IMAGE",
|
|
25
|
+
max_hands=2,
|
|
26
|
+
detection_confidence=0.5,
|
|
27
|
+
hand_presence_confidence=0.5,
|
|
28
|
+
tracking_confidence=0.5,
|
|
29
|
+
smoothing_window=8,
|
|
30
|
+
calibration_samples=None,
|
|
31
|
+
):
|
|
32
|
+
"""Initialize the HandDetector class with the specified parameters for hand detection and tracking. The constructor sets up the MediaPipe hand landmark detection model, drawing utilities, and configuration options for hand detection and tracking.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
model_path (str): The path to the MediaPipe hand landmark detection model file.
|
|
36
|
+
running_mode (str): The mode in which the model should run. Can be "IMAGE" for image mode or "VIDEO" for video mode.
|
|
37
|
+
max_hands (int): The maximum number of hands to detect in the input images.
|
|
38
|
+
detection_confidence (float): The minimum confidence threshold for hand detection. Only detections with confidence above this threshold will be considered valid.
|
|
39
|
+
hand_presence_confidence (float): The minimum confidence threshold for determining the presence of a hand in the image. This is used to filter out false positives where the model may detect a hand that is not actually present.
|
|
40
|
+
tracking_confidence (float): The minimum confidence threshold for tracking the detected hand landmarks across frames in a video stream. This helps to maintain consistent tracking of hand landmarks over time, even if the hand moves or changes position in the video feed.
|
|
41
|
+
smoothing_window (int): The size of the window for smoothing distance measurements. This is used to average out distance measurements over a specified number of frames to reduce noise and provide more stable distance estimates.
|
|
42
|
+
calibration_samples (int): The number of samples to use for calibrating the distance estimation. If None, no calibration will be performed and default values will be used for distance estimation.
|
|
43
|
+
|
|
44
|
+
The constructor initializes the MediaPipe hand landmark detection model with the specified options and sets up the drawing utilities for visualizing the detected hand landmarks and connections. It also defines constants for margin and text color used in drawing the handedness information on the output images.
|
|
45
|
+
"""
|
|
46
|
+
self.running_mode = getattr(vision.RunningMode, running_mode)
|
|
47
|
+
base_options = python.BaseOptions(model_asset_path=model_path)
|
|
48
|
+
options = vision.HandLandmarkerOptions(
|
|
49
|
+
base_options=base_options,
|
|
50
|
+
running_mode=self.running_mode, # IMAGE | VIDEO | LIVE_STREAM
|
|
51
|
+
num_hands=max_hands,
|
|
52
|
+
min_hand_detection_confidence=detection_confidence,
|
|
53
|
+
min_hand_presence_confidence=hand_presence_confidence,
|
|
54
|
+
min_tracking_confidence=tracking_confidence,
|
|
55
|
+
)
|
|
56
|
+
self.detector = vision.HandLandmarker.create_from_options(options)
|
|
57
|
+
self.mp_hands = mp.tasks.vision.HandLandmarksConnections
|
|
58
|
+
self.mp_drawing_styles = mp.tasks.vision.drawing_styles
|
|
59
|
+
self.mp_drawing_utils = mp.tasks.vision.drawing_utils
|
|
60
|
+
self.MARGIN = 5
|
|
61
|
+
self.HANDEDNESS_TEXT_COLOR = (0, 165, 255)
|
|
62
|
+
self.fingerTips = [4, 8, 12, 16, 20]
|
|
63
|
+
self.fingerPips = [6, 10, 14, 18]
|
|
64
|
+
self.fingerDips = [7, 11, 15, 19]
|
|
65
|
+
self.wrist = 0
|
|
66
|
+
self.finger_mcp = [2, 5, 9, 13, 17]
|
|
67
|
+
self.distance_history = deque(maxlen=smoothing_window)
|
|
68
|
+
self._lm_list = [] # cached landmarks_list from last get_landmarks call
|
|
69
|
+
if calibration_samples:
|
|
70
|
+
self.fit_polynomial(calibration_samples)
|
|
71
|
+
|
|
72
|
+
def _to_mp_image(self, image):
|
|
73
|
+
"""
|
|
74
|
+
Convert a BGR image (as used by OpenCV) to an mp.Image format suitable for MediaPipe processing.
|
|
75
|
+
Args:
|
|
76
|
+
image: The input image in BGR format (as used by OpenCV).
|
|
77
|
+
Returns:
|
|
78
|
+
An mp.Image object in RGB format suitable for MediaPipe processing.
|
|
79
|
+
"""
|
|
80
|
+
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
81
|
+
return mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
|
|
82
|
+
|
|
83
|
+
def set_landmarks_image(self, image):
|
|
84
|
+
self.mp_image = self._to_mp_image(image)
|
|
85
|
+
# Perform hand detection
|
|
86
|
+
detection_result = self.detector.detect(self.mp_image)
|
|
87
|
+
# Get the hand landmarks list => list of detected hands
|
|
88
|
+
# Each hand has 21 landmarks with x, y, z coordinates
|
|
89
|
+
self.hand_landmarks_list = detection_result.hand_landmarks
|
|
90
|
+
|
|
91
|
+
# Tells if the hand is left or right
|
|
92
|
+
self.handedness_list = detection_result.handedness
|
|
93
|
+
|
|
94
|
+
def draw_landmarks(
|
|
95
|
+
self,
|
|
96
|
+
img_bgr,
|
|
97
|
+
to_draw_landmark=True,
|
|
98
|
+
to_draw_center_point=True,
|
|
99
|
+
to_draw_bounding_box=True,
|
|
100
|
+
to_put_handle_label=True,
|
|
101
|
+
flip_hands=False,
|
|
102
|
+
):
|
|
103
|
+
"""
|
|
104
|
+
Detect hand landmarks in the input BGR image and draw them on a copy of the image. The function processes the image using MediaPipe's hand landmark detection, retrieves the detected landmarks and handedness information, and optionally draws the landmarks and connections on the image for visualization.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
imgBGR (numpy array): The input image in BGR format on which to detect and draw hand landmarks.
|
|
108
|
+
to_draw_landmark (bool): Whether to draw the detected landmarks and connections on the image.
|
|
109
|
+
to_draw_center_point (bool): Whether to draw a circle at the center point of the detected hand landmarks on the image.
|
|
110
|
+
to_draw_bounding_box (bool): Whether to draw a bounding box around the detected hand landmarks on the image.
|
|
111
|
+
to_put_handle_label (bool): Whether to put the handedness label (e.g., "Left" or "Right") near the detected hand landmarks on the image.
|
|
112
|
+
flip_hands (bool): Whether to flip the left and right hand labels. This can be useful when displaying mirrored webcam feeds, where the left and right hands may appear reversed. If True, the function will swap the "Left" and "Right" labels for the detected hands in the output image.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
annotated_image (numpy array): The image with detected hand landmarks and connections drawn (if to
|
|
116
|
+
Draw is True). The function returns a copy of the input image with the detected hand landmarks and connections drawn for visualization purposes.
|
|
117
|
+
output_landmarks: A list of tuples containing the landmarks list, bounding box, landmark parameters, and hand type for each detected hand. Each tuple in the list corresponds to a detected hand and contains the following information:
|
|
118
|
+
- landmarks_list: A list of 21 landmarks for the detected hand, each represented as a tuple (x, y, z).
|
|
119
|
+
- bounding_box: A tuple (xmin, ymin, width, height) representing the bounding box around the detected hand.
|
|
120
|
+
- landmark_params: Additional parameters related to the detected landmarks.
|
|
121
|
+
- hand_type: A string indicating the handedness of the detected hand ("Left" or "Right").
|
|
122
|
+
"""
|
|
123
|
+
# Convert to MediaPipe image
|
|
124
|
+
self.set_landmarks_image(img_bgr)
|
|
125
|
+
annotated_image = np.copy(self.mp_image.numpy_view())
|
|
126
|
+
|
|
127
|
+
if not self.hand_landmarks_list:
|
|
128
|
+
return img_bgr, []
|
|
129
|
+
|
|
130
|
+
height, width, _ = annotated_image.shape
|
|
131
|
+
|
|
132
|
+
# Get structured data
|
|
133
|
+
all_hands = self.get_landmarks(img_bgr.copy(), flip_hands=flip_hands)
|
|
134
|
+
output_landmarks = []
|
|
135
|
+
for idx, hand_data in enumerate(all_hands):
|
|
136
|
+
hand_landmarks = self.hand_landmarks_list[idx]
|
|
137
|
+
bbox = hand_data["bounding_box"]
|
|
138
|
+
center = hand_data["center_point"]
|
|
139
|
+
label = hand_data["hand_type"]
|
|
140
|
+
|
|
141
|
+
xmin, ymin, w, h = bbox
|
|
142
|
+
center_x, center_y = center
|
|
143
|
+
|
|
144
|
+
# -------- Draw landmarks --------
|
|
145
|
+
if to_draw_landmark:
|
|
146
|
+
self.mp_drawing_utils.draw_landmarks(
|
|
147
|
+
annotated_image,
|
|
148
|
+
hand_landmarks,
|
|
149
|
+
self.mp_hands.HAND_CONNECTIONS,
|
|
150
|
+
self.mp_drawing_styles.get_default_hand_landmarks_style(),
|
|
151
|
+
self.mp_drawing_styles.get_default_hand_connections_style(),
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# -------- Draw bounding box --------
|
|
155
|
+
if to_draw_bounding_box:
|
|
156
|
+
cv2.rectangle(
|
|
157
|
+
annotated_image,
|
|
158
|
+
(xmin - 20, ymin - 20),
|
|
159
|
+
(xmin + w + 20, ymin + h + 20),
|
|
160
|
+
(0, 255, 0),
|
|
161
|
+
2,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# -------- Draw center --------
|
|
165
|
+
if to_draw_center_point:
|
|
166
|
+
cv2.circle(
|
|
167
|
+
annotated_image, (center_x, center_y), 8, (0, 0, 255), cv2.FILLED
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# -------- Label --------
|
|
171
|
+
if to_put_handle_label and label:
|
|
172
|
+
cv2.putText(
|
|
173
|
+
annotated_image,
|
|
174
|
+
label,
|
|
175
|
+
(xmin, ymin - 10),
|
|
176
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
177
|
+
0.7,
|
|
178
|
+
self.HANDEDNESS_TEXT_COLOR,
|
|
179
|
+
2,
|
|
180
|
+
cv2.LINE_AA,
|
|
181
|
+
)
|
|
182
|
+
landmarks_list = hand_data["landmarks_list"]
|
|
183
|
+
hand_bounding_box = bbox
|
|
184
|
+
landmark_params = {
|
|
185
|
+
"center_point": center,
|
|
186
|
+
"width": w,
|
|
187
|
+
"height": h,
|
|
188
|
+
"bbox": (xmin, ymin, w, h),
|
|
189
|
+
}
|
|
190
|
+
hand_type = label
|
|
191
|
+
|
|
192
|
+
output_landmarks.append(
|
|
193
|
+
(landmarks_list, hand_bounding_box, landmark_params, hand_type)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
|
|
197
|
+
|
|
198
|
+
return annotated_image, output_landmarks
|
|
199
|
+
|
|
200
|
+
def get_landmarks(self, img, flip_hands=False):
|
|
201
|
+
"""
|
|
202
|
+
Extracts landmarks, bounding boxes, and center points for each detected hand.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
img (numpy.ndarray): Input image.
|
|
206
|
+
flip_hands (bool): Swap Left/Right hand labels. Useful when
|
|
207
|
+
displaying mirrored webcam feeds.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
list: Hand information dictionaries.
|
|
211
|
+
|
|
212
|
+
Example output:
|
|
213
|
+
[
|
|
214
|
+
{
|
|
215
|
+
"landmarks_list": [[0, x0, y0, z0], [1, x1, y1, z1], ..., [20, x20, y20, z20]],
|
|
216
|
+
"bounding_box": (xmin, ymin, width, height),
|
|
217
|
+
"center_point": (center_x, center_y),
|
|
218
|
+
"hand_type": "Left" or "Right"
|
|
219
|
+
},
|
|
220
|
+
...
|
|
221
|
+
]
|
|
222
|
+
The function processes the input image to detect hand landmarks and returns a list of dictionaries, where
|
|
223
|
+
"""
|
|
224
|
+
height, width, _ = img.shape
|
|
225
|
+
all_hands = []
|
|
226
|
+
|
|
227
|
+
# Generate landmarks if not already available
|
|
228
|
+
if not hasattr(self, "hand_landmarks_list") or not self.hand_landmarks_list:
|
|
229
|
+
self.set_landmarks_image(img.copy())
|
|
230
|
+
|
|
231
|
+
for idx, hand_landmarks in enumerate(self.hand_landmarks_list):
|
|
232
|
+
landmarks_list = []
|
|
233
|
+
x_coords, y_coords = [], []
|
|
234
|
+
|
|
235
|
+
# Extract landmarks
|
|
236
|
+
for landmark_id, landmark in enumerate(hand_landmarks):
|
|
237
|
+
cx = int(landmark.x * width)
|
|
238
|
+
cy = int(landmark.y * height)
|
|
239
|
+
cz = int(landmark.z * width)
|
|
240
|
+
|
|
241
|
+
landmarks_list.append([landmark_id, cx, cy, cz])
|
|
242
|
+
|
|
243
|
+
x_coords.append(cx)
|
|
244
|
+
y_coords.append(cy)
|
|
245
|
+
|
|
246
|
+
# Bounding box
|
|
247
|
+
xmin, xmax = min(x_coords), max(x_coords)
|
|
248
|
+
ymin, ymax = min(y_coords), max(y_coords)
|
|
249
|
+
|
|
250
|
+
bbox_width = xmax - xmin
|
|
251
|
+
bbox_height = ymax - ymin
|
|
252
|
+
|
|
253
|
+
bbox = (xmin, ymin, bbox_width, bbox_height)
|
|
254
|
+
|
|
255
|
+
# Center point
|
|
256
|
+
center_x = xmin + bbox_width // 2
|
|
257
|
+
center_y = ymin + bbox_height // 2
|
|
258
|
+
|
|
259
|
+
# Hand label
|
|
260
|
+
hand_label = None
|
|
261
|
+
if idx < len(self.handedness_list):
|
|
262
|
+
hand_label = self.handedness_list[idx][0].category_name
|
|
263
|
+
|
|
264
|
+
if flip_hands:
|
|
265
|
+
hand_label = (
|
|
266
|
+
"Left"
|
|
267
|
+
if hand_label == "Right"
|
|
268
|
+
else "Right"
|
|
269
|
+
if hand_label == "Left"
|
|
270
|
+
else hand_label
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
if idx == 0:
|
|
274
|
+
self._lm_list = landmarks_list
|
|
275
|
+
|
|
276
|
+
all_hands.append(
|
|
277
|
+
{
|
|
278
|
+
"landmarks_list": landmarks_list,
|
|
279
|
+
"bounding_box": bbox,
|
|
280
|
+
"center_point": (center_x, center_y),
|
|
281
|
+
"hand_type": hand_label,
|
|
282
|
+
}
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
return all_hands
|
|
286
|
+
|
|
287
|
+
def is_finger_point_inside_rect(self, point, rect):
|
|
288
|
+
"""
|
|
289
|
+
Robust rectangle hit test.
|
|
290
|
+
Supports only (x, y, w, h) - SAFE & CONSISTENT.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
px, py = point
|
|
294
|
+
rx, ry, rw, rh = rect
|
|
295
|
+
|
|
296
|
+
# guard against invalid values
|
|
297
|
+
if rw < 0 or rh < 0:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
return (rx <= px <= rx + rw) and (ry <= py <= ry + rh)
|
|
301
|
+
|
|
302
|
+
def finger_joined(self, p1, p2, image, landmarks, threshold=0.25):
|
|
303
|
+
"""
|
|
304
|
+
Check if two fingers are joined based on the normalized distance between their landmarks. The function calculates the normalized distance between the specified landmarks and compares it to a threshold to determine if the fingers are considered joined. It also provides an annotated image for visualization.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
p1 (int): The index of the first finger landmark.
|
|
308
|
+
p2 (int): The index of the second finger landmark.
|
|
309
|
+
image (numpy.ndarray): The image on which to annotate the finger status.
|
|
310
|
+
landmarks (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
311
|
+
threshold (float): The normalized distance threshold below which the fingers are considered joined.
|
|
312
|
+
Returns:
|
|
313
|
+
bool: True if the fingers are joined, False otherwise.
|
|
314
|
+
numpy.ndarray: The annotated image.
|
|
315
|
+
"""
|
|
316
|
+
annotated = image.copy()
|
|
317
|
+
|
|
318
|
+
if not landmarks:
|
|
319
|
+
return False, annotated
|
|
320
|
+
|
|
321
|
+
normalized = self._normalize(landmarks, p1, p2)
|
|
322
|
+
print(f"Normalized distance between landmarks {p1} and {p2}: {normalized:.4f}")
|
|
323
|
+
is_joined = normalized < threshold
|
|
324
|
+
return is_joined, annotated
|
|
325
|
+
|
|
326
|
+
def get_distance_between_landmarks(
|
|
327
|
+
self,
|
|
328
|
+
landmark_id_1: int,
|
|
329
|
+
landmark_id_2: int,
|
|
330
|
+
hand_landmarks,
|
|
331
|
+
frame_shape=None,
|
|
332
|
+
return_points: bool = True,
|
|
333
|
+
):
|
|
334
|
+
"""
|
|
335
|
+
Calculate distance between two MediaPipe hand landmarks.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
hand_landmarks:
|
|
339
|
+
MediaPipe hand landmarks object.
|
|
340
|
+
landmark_id_1:
|
|
341
|
+
First landmark index.
|
|
342
|
+
landmark_id_2:
|
|
343
|
+
Second landmark index.
|
|
344
|
+
frame_shape:
|
|
345
|
+
frame.shape if we want pixel distance.
|
|
346
|
+
If None, returns normalized landmark distance.
|
|
347
|
+
return_points:
|
|
348
|
+
If True, also returns point coordinates.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
distance, point1, point2
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
lm1 = hand_landmarks.landmark[landmark_id_1]
|
|
355
|
+
lm2 = hand_landmarks.landmark[landmark_id_2]
|
|
356
|
+
|
|
357
|
+
if frame_shape is not None:
|
|
358
|
+
h, w = frame_shape[:2]
|
|
359
|
+
|
|
360
|
+
p1 = (int(lm1.x * w), int(lm1.y * h))
|
|
361
|
+
p2 = (int(lm2.x * w), int(lm2.y * h))
|
|
362
|
+
else:
|
|
363
|
+
p1 = (lm1.x, lm1.y)
|
|
364
|
+
p2 = (lm2.x, lm2.y)
|
|
365
|
+
|
|
366
|
+
distance = math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
|
|
367
|
+
|
|
368
|
+
if return_points:
|
|
369
|
+
return distance, p1, p2
|
|
370
|
+
|
|
371
|
+
return distance
|
|
372
|
+
|
|
373
|
+
def fingers_up(self, hand_landmarks=None):
|
|
374
|
+
"""
|
|
375
|
+
Determine which fingers are raised (up) based on the detected hand landmarks. The function analyzes the positions of the landmarks for each finger and compares them to determine if a finger is raised or not.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
hand_landmarks (list, optional): A list of landmarks for a detected hand, where each landmark contains x, y, z coordinates normalized to the image dimensions. Defaults to the cached landmarks from the last get_landmarks() call.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
fingers (list): A list of integers representing the state of each finger, where 1 indicates that the finger is raised (up) and 0 indicates that it is not raised (down). The order of the fingers in the list corresponds to the thumb, index, middle, ring, and little fingers.
|
|
382
|
+
"""
|
|
383
|
+
if hand_landmarks is None:
|
|
384
|
+
hand_landmarks = self._lm_list
|
|
385
|
+
fingers = []
|
|
386
|
+
# Thumb
|
|
387
|
+
# We need to calculate thumb separately because it moves in a different plane compared to the other fingers.
|
|
388
|
+
# The code checks the x-coordinate of the thumb tip (landmark 4) and compares it to the x-coordinate of landmark 3 (the joint before the thumb tip)
|
|
389
|
+
# to determine if the thumb is raised or not. For a right hand, if the thumb tip is to the right of landmark 3, it is considered raised (1), otherwise it is considered not raised (0).
|
|
390
|
+
# For a left hand, the logic would be reversed.
|
|
391
|
+
# When the thumb point 3 is on the left of the thumb tip point 4, it means the thumb is raised (1) for a right hand. If the thumb tip is on the right of point 3, it means the thumb is not raised (0). This logic is based on the typical orientation of the hand and how the thumb moves in relation to the other fingers.
|
|
392
|
+
|
|
393
|
+
# Check if the hand is right or left based on the x-coordinates of the thumb tip and the joint before the thumb tip. For a right hand, if the thumb tip (landmark 4) is to the right of landmark 3, it is considered raised (1), otherwise it is considered not raised (0). For a left hand, the logic would be reversed, where if the thumb tip is to the left of landmark 3, it would be considered raised (1), and if it is to the right, it would be considered not raised (0).
|
|
394
|
+
for _idx, handedness in enumerate(self.handedness_list):
|
|
395
|
+
hand_label = handedness[0].category_name # 'Left' or 'Right'
|
|
396
|
+
if hand_label == "Right":
|
|
397
|
+
if (
|
|
398
|
+
hand_landmarks[self.fingerTips[0]][1]
|
|
399
|
+
> hand_landmarks[self.fingerTips[0] - 1][1]
|
|
400
|
+
): # For right hand
|
|
401
|
+
fingers.append(1)
|
|
402
|
+
else:
|
|
403
|
+
fingers.append(0)
|
|
404
|
+
else:
|
|
405
|
+
# For a left hand, the logic is reversed. If the thumb tip (landmark 4) is to the left of landmark 3, it is considered raised (1), and if it is to the right, it is considered not raised (0). This is because the orientation of the hand is different for left and right hands, and the thumb moves in opposite directions relative to the other fingers.
|
|
406
|
+
if (
|
|
407
|
+
hand_landmarks[self.fingerTips[0]][1]
|
|
408
|
+
< hand_landmarks[self.fingerTips[0] - 1][1]
|
|
409
|
+
): # For left hand
|
|
410
|
+
fingers.append(1)
|
|
411
|
+
else:
|
|
412
|
+
fingers.append(0)
|
|
413
|
+
|
|
414
|
+
# Fingers (index, middle, ring, little)
|
|
415
|
+
for id in range(1, 5):
|
|
416
|
+
if (
|
|
417
|
+
hand_landmarks[self.fingerTips[id]][2]
|
|
418
|
+
< hand_landmarks[self.fingerTips[id] - 2][2]
|
|
419
|
+
):
|
|
420
|
+
fingers.append(1)
|
|
421
|
+
else:
|
|
422
|
+
fingers.append(0)
|
|
423
|
+
|
|
424
|
+
return fingers
|
|
425
|
+
|
|
426
|
+
def get_distance(
|
|
427
|
+
self, p1, p2, img, to_draw_circle_key_point=True, to_draw_line=True
|
|
428
|
+
):
|
|
429
|
+
# Extract coordinates from specific hand
|
|
430
|
+
x1, y1 = p1
|
|
431
|
+
x2, y2 = p2
|
|
432
|
+
|
|
433
|
+
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
|
|
434
|
+
|
|
435
|
+
# Distance
|
|
436
|
+
length = math.hypot(x2 - x1, y2 - y1)
|
|
437
|
+
|
|
438
|
+
# Draw line
|
|
439
|
+
if to_draw_line:
|
|
440
|
+
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
|
|
441
|
+
|
|
442
|
+
# Draw points
|
|
443
|
+
if to_draw_circle_key_point:
|
|
444
|
+
cv2.circle(img, (x1, y1), 8, (255, 0, 255), cv2.FILLED)
|
|
445
|
+
cv2.circle(img, (x2, y2), 8, (255, 0, 255), cv2.FILLED)
|
|
446
|
+
cv2.circle(img, (cx, cy), 8, (0, 0, 255), cv2.FILLED)
|
|
447
|
+
|
|
448
|
+
return length, img, [x1, y1, x2, y2, cx, cy]
|
|
449
|
+
|
|
450
|
+
def euclidean_distance(self, p1, p2):
|
|
451
|
+
"""
|
|
452
|
+
Euclidean distance between two points p1 and p2, where each point is represented as a tuple of (x, y) coordinates. The function calculates the distance using the formula: distance = sqrt((x2 - x1)^2 + (y2 - y1)^2), which gives the straight-line distance between the two points in a 2D space.
|
|
453
|
+
As the distance is calculated using the Euclidean distance formula, it provides a measure of how far apart the two points are in the 2D space of the image. This can be useful for various applications such as gesture recognition, where the distance between specific landmarks can indicate certain hand gestures or movements.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
p1 (tuple): The first point represented as a tuple of (x, y) coordinates.
|
|
457
|
+
p2 (tuple): The second point represented as a tuple of (x, y) coordinates.
|
|
458
|
+
Returns:
|
|
459
|
+
distance (float): The Euclidean distance between the two points p1 and p2.
|
|
460
|
+
"""
|
|
461
|
+
# return ((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2) ** 0.5
|
|
462
|
+
return float(np.linalg.norm(np.array(p1) - np.array(p2)))
|
|
463
|
+
|
|
464
|
+
def compute_real_palm_width(pixel_width, distance_cm, focal_length_px):
|
|
465
|
+
"""
|
|
466
|
+
Compute the real palm width in centimeters based on the detected palm width in pixels, the known distance from the camera to the hand, and the focal length of the camera in pixels.
|
|
467
|
+
The function uses the formula: real_palm_width_cm = (pixel_width * distance_cm) / focal_length_px, where pixel_width is the measured width of the palm in pixels, distance_cm is the known distance from the camera to the hand in centimeters, and focal_length_px is the focal length of the camera in pixels. This calculation allows for estimating the actual size of the palm in real-world units (centimeters) based on the detected size in pixels and the known distance from the camera. This can be useful for applications that require understanding the physical dimensions of the hand or for distance estimation based on the size of the detected palm in the image.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
pixel_width (float): The measured width of the palm in pixels as detected by the hand landmark detection model. This value is used in the calculation of the real palm width in centimeters.
|
|
471
|
+
distance_cm (float): The known distance from the camera to the hand in centimeters. This value is used in the calculation of the real palm width in centimeters.
|
|
472
|
+
focal_length_px (float): The focal length of the camera in pixels. This value is used in the calculation of the real palm width in centimeters.
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
float: The calculated real palm width in centimeters based on the detected pixel width, known distance, and focal length. If the pixel width is zero or negative, or if the focal length is zero or negative, the function returns None to indicate that the real palm width cannot be computed with the given input.
|
|
476
|
+
"""
|
|
477
|
+
if pixel_width <= 0 or focal_length_px <= 0:
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
return (pixel_width * distance_cm) / focal_length_px
|
|
481
|
+
|
|
482
|
+
def landmark_to_pixel(self, landmark, image_width, image_height):
|
|
483
|
+
"""
|
|
484
|
+
Landmark to pixel coordinates conversion. The function takes a landmark with normalized coordinates (x, y) and converts it to pixel coordinates based on the width and height of the image. The x-coordinate is multiplied by the image width, and the y-coordinate is multiplied by the image height to obtain the corresponding pixel coordinates in the image. This conversion is essential for accurately mapping the detected landmarks to their positions in the original image for visualization or further processing.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
landmark: A landmark with normalized coordinates (x, y) that represents a specific point on the hand detected by the MediaPipe model.
|
|
488
|
+
image_width (int): The width of the image
|
|
489
|
+
image_height (int): The height of the image
|
|
490
|
+
|
|
491
|
+
Returns:
|
|
492
|
+
tuple: A tuple containing the pixel coordinates (x, y) corresponding to the input landmark, calculated by multiplying the normalized coordinates of the landmark by the width and height of the image, respectively
|
|
493
|
+
"""
|
|
494
|
+
return int(landmark.x * image_width), int(landmark.y * image_height)
|
|
495
|
+
|
|
496
|
+
def palm_width_px(self, img, hand_landmarks, drawLandmarks=False):
|
|
497
|
+
"""
|
|
498
|
+
Uses distance between INDEX_MCP and PINKY_MCP as palm width.
|
|
499
|
+
This is more stable than fingertip distance.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
hand_landmarks (list): A list of hand landmarks, where each landmark is expected to be a tuple of (x, y) coordinates. The function specifically uses the landmarks corresponding to the INDEX_MCP and PINKY_MCP to calculate the palm width in pixels.
|
|
503
|
+
These landmarks represent the base joints of the index and pinky fingers, respectively, and their distance provides a more stable measurement of the palm width compared to using fingertip landmarks, which can be more variable due to finger bending and movement.
|
|
504
|
+
drawLandmarks (bool): Whether to draw the landmarks and connections on the image for visualization purposes. If True, the function will draw a line between the INDEX_MCP and PINKY_MCP landmarks, as well as circles at these landmark points on the input image.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
float: The calculated palm width in pixels, which is the distance between the INDEX_MCP
|
|
508
|
+
"""
|
|
509
|
+
index_mcp_landmarks = hand_landmarks[self.finger_mcp[1]]
|
|
510
|
+
pinky_mcp_landmarks = hand_landmarks[self.finger_mcp[4]]
|
|
511
|
+
index_mcp = (
|
|
512
|
+
index_mcp_landmarks[1],
|
|
513
|
+
index_mcp_landmarks[2],
|
|
514
|
+
) # (x, y) for INDEX_MCP
|
|
515
|
+
pinky_mcp = (
|
|
516
|
+
pinky_mcp_landmarks[1],
|
|
517
|
+
pinky_mcp_landmarks[2],
|
|
518
|
+
) # (x, y) for PINKY_MCP
|
|
519
|
+
if drawLandmarks:
|
|
520
|
+
cv2.line(img, index_mcp, pinky_mcp, (0, 255, 0), 2)
|
|
521
|
+
cv2.circle(img, index_mcp, 6, (255, 0, 0), -1)
|
|
522
|
+
cv2.circle(img, pinky_mcp, 6, (255, 0, 0), -1)
|
|
523
|
+
return self.euclidean_distance(index_mcp, pinky_mcp), index_mcp, pinky_mcp
|
|
524
|
+
|
|
525
|
+
def _palm_scale(self, hand):
|
|
526
|
+
"""
|
|
527
|
+
Calculate the palm scale of a hand based on specific landmarks.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
hand (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
float: The calculated palm scale of the hand, which is an average of the distances between
|
|
534
|
+
"""
|
|
535
|
+
# stable reference (best practice)
|
|
536
|
+
return (
|
|
537
|
+
self.euclidean_distance(
|
|
538
|
+
hand[self.wrist], hand[self.finger_mcp[2]]
|
|
539
|
+
) # wrist → middle MCP
|
|
540
|
+
+ self.euclidean_distance(
|
|
541
|
+
hand[self.finger_mcp[1]], hand[self.finger_mcp[4]]
|
|
542
|
+
) # index MCP → pinky MCP
|
|
543
|
+
) / 2
|
|
544
|
+
|
|
545
|
+
def calibrate_focal_length(self, palm_width_px):
|
|
546
|
+
"""
|
|
547
|
+
Calibrate the focal length of the camera based on a known distance and the detected palm width in pixels. The function calculates the focal length using the formula: focal_length = (palm_width_px * known_distance_cm) / real_palm_width_cm, where palm_width_px is the measured width of the palm in pixels, known_distance_cm is the known distance from the camera to the hand in centimeters, and real_palm_width_cm is the actual width of the palm in centimeters. This calibration allows for accurate distance estimation based on the detected palm width in subsequent frames.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
palm_width_px (float): The measured width of the palm in pixels as detected by the hand landmark detection model. This value is used in the calculation of the focal length for distance estimation.
|
|
551
|
+
|
|
552
|
+
Returns:
|
|
553
|
+
float: The calculated focal length of the camera based on the known distance and the detected palm width in pixels. This focal length can be used for accurate distance estimation in subsequent frames based on the
|
|
554
|
+
detected palm width in pixels. If the palm width in pixels is zero or negative, the function returns None to indicate that the focal length cannot be calibrated with the given input.
|
|
555
|
+
"""
|
|
556
|
+
if palm_width_px <= 0:
|
|
557
|
+
return None
|
|
558
|
+
|
|
559
|
+
self.focal_length_px = (
|
|
560
|
+
palm_width_px * self.known_distance_cm
|
|
561
|
+
) / self.real_palm_width_cm
|
|
562
|
+
|
|
563
|
+
return self.focal_length_px
|
|
564
|
+
|
|
565
|
+
def get_dynamic_palm_width(self, hand, image_shape, distance_cm, focal_length_px):
|
|
566
|
+
"""
|
|
567
|
+
Get the dynamic palm width in centimeters based on the detected landmarks of the hand, the shape of the image, the known distance from the camera to the hand, and the focal length of the camera in pixels. The function calculates the pixel width of the palm using specific landmarks (e.g., INDEX_MCP and PINKY_MCP) and then uses this pixel width along with the known distance and focal length to compute the real palm width in centimeters using the formula: real_palm_width_cm = (pixel_width * distance_cm) / focal_length_px. This allows for dynamic estimation of the palm width in real-world units based on the detected landmarks and camera parameters.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
hand (object): The detected hand landmarks object that contains the landmark information for the hand.
|
|
571
|
+
image_shape (tuple): The shape of the input image as a tuple (height, width, channels). This is used to convert the normalized landmark coordinates to pixel coordinates.
|
|
572
|
+
distance_cm (float): The known distance from the camera to the hand in centimeters. This value is used in the calculation of the real palm width in centimeters.
|
|
573
|
+
focal_length_px (float): The focal length of the camera in pixels. This value is used in the calculation of the real palm width in centimeters.
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
float: The calculated real palm width in centimeters based on the detected landmarks, image shape, known distance, and focal length. This value provides an estimate of the actual size of the palm in real-world units (centimeters) based on the detected size in pixels and the known distance from the
|
|
577
|
+
camera. If the pixel width is zero or negative, or if the focal length is zero or negative, the function returns None to indicate that the real palm width cannot be computed with the given input.
|
|
578
|
+
"""
|
|
579
|
+
h, w, _ = image_shape
|
|
580
|
+
|
|
581
|
+
lm = hand.landmark
|
|
582
|
+
|
|
583
|
+
# Index MCP (5) and Pinky MCP (17)
|
|
584
|
+
p1 = (int(lm[self.finger_mcp[1]].x * w), int(lm[self.finger_mcp[1]].y * h))
|
|
585
|
+
p2 = (int(lm[self.finger_mcp[4]].x * w), int(lm[self.finger_mcp[4]].y * h))
|
|
586
|
+
|
|
587
|
+
pixel_width = np.linalg.norm(np.array(p1) - np.array(p2))
|
|
588
|
+
|
|
589
|
+
real_width_cm = (pixel_width * distance_cm) / focal_length_px
|
|
590
|
+
|
|
591
|
+
return real_width_cm, pixel_width, p1, p2
|
|
592
|
+
|
|
593
|
+
def fit_polynomial(self, calibration_samples, polynomial_degree=2):
|
|
594
|
+
"""
|
|
595
|
+
calibration_samples format:
|
|
596
|
+
[
|
|
597
|
+
(palm_width_px, distance_cm),
|
|
598
|
+
(palm_width_px, distance_cm),
|
|
599
|
+
...
|
|
600
|
+
]
|
|
601
|
+
"""
|
|
602
|
+
x = np.array([sample[0] for sample in calibration_samples], dtype=np.float32)
|
|
603
|
+
y = np.array([sample[1] for sample in calibration_samples], dtype=np.float32)
|
|
604
|
+
|
|
605
|
+
coeffs = np.polyfit(x, y, polynomial_degree) # y = Ax^2 + Bx + C
|
|
606
|
+
self.model = np.poly1d(coeffs)
|
|
607
|
+
|
|
608
|
+
# print("Polynomial coefficients:", coeffs)
|
|
609
|
+
return coeffs
|
|
610
|
+
|
|
611
|
+
def adaptive_distance_cm(
|
|
612
|
+
self,
|
|
613
|
+
palm_width_px,
|
|
614
|
+
frame_width_px,
|
|
615
|
+
horizontal_fov_deg=60,
|
|
616
|
+
estimated_palm_width_cm=8.5,
|
|
617
|
+
):
|
|
618
|
+
if palm_width_px <= 0:
|
|
619
|
+
return None
|
|
620
|
+
|
|
621
|
+
focal_length_px = frame_width_px / (
|
|
622
|
+
2 * math.tan(math.radians(horizontal_fov_deg / 2))
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
distance_cm = (estimated_palm_width_cm * focal_length_px) / palm_width_px
|
|
626
|
+
|
|
627
|
+
return distance_cm
|
|
628
|
+
|
|
629
|
+
def estimate_distance_cm(
|
|
630
|
+
self,
|
|
631
|
+
palm_width_px,
|
|
632
|
+
):
|
|
633
|
+
"""
|
|
634
|
+
# Formula
|
|
635
|
+
# distance = (real_palm_width * focal_length) / palm_width_px
|
|
636
|
+
|
|
637
|
+
Estimate the distance from the camera to the hand in centimeters based on the detected palm width in pixels, the known real palm width in centimeters, and the focal length of the camera in pixels. The function uses the formula: distance_cm = (real_palm_width_cm * focal_length_px) / palm_width_px, where real_palm_width_cm is the actual width of the palm in centimeters, focal_length_px is the focal length of the camera in pixels, and palm_width_px is the measured width of the palm in pixels as detected by the hand landmark detection model. This estimation allows for determining how far the hand is from the camera based on the detected size of the palm in pixels and the known parameters of the camera and hand size.
|
|
638
|
+
|
|
639
|
+
Args:
|
|
640
|
+
palm_width_px (float): The measured width of the palm in pixels as detected by the
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
float: The estimated distance from the camera to the hand in centimeters based on the detected palm width in pixels, known real palm width in centimeters, and focal length of the camera in pixels. This value provides an estimate of how far the hand is from the camera based on the detected size of the palm in pixels and the known parameters of the camera and hand size. If the palm width in pixels is zero or negative, or if the focal length is zero or negative, the function returns None to indicate that the distance cannot be estimated with the given input.
|
|
644
|
+
"""
|
|
645
|
+
if self.model is None:
|
|
646
|
+
return None
|
|
647
|
+
|
|
648
|
+
distance = float(self.model(palm_width_px))
|
|
649
|
+
# reject impossible values
|
|
650
|
+
if distance <= 0 or distance > 300:
|
|
651
|
+
return None
|
|
652
|
+
|
|
653
|
+
self.distance_history.append(distance)
|
|
654
|
+
return float(np.mean(self.distance_history))
|
|
655
|
+
|
|
656
|
+
def _normalize(self, hand, p1, p2):
|
|
657
|
+
"""
|
|
658
|
+
Normalize the distance between two landmarks (p1 and p2) by the palm scale of the hand. The function calculates the Euclidean distance between the specified landmarks and divides it by the palm scale to provide a normalized distance that accounts for variations in hand size. This normalization allows for more consistent comparisons of distances between landmarks across different hands and gestures, as it takes into account the overall size of the hand rather than just the raw distance between specific landmarks.
|
|
659
|
+
|
|
660
|
+
Args:
|
|
661
|
+
hand (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
662
|
+
p1 (int): The index of the first landmark for which to calculate the distance.
|
|
663
|
+
p2 (int): The index of the second landmark for which to calculate the distance.
|
|
664
|
+
Returns:
|
|
665
|
+
float: The normalized distance between the two landmarks p1 and p2, calculated as the Euclidean distance between the landmarks divided by the palm scale of the hand. This normalized distance provides a more consistent measure of the distance between the landmarks that accounts for variations in hand size, allowing for better
|
|
666
|
+
"""
|
|
667
|
+
tip_dist = self.euclidean_distance(hand[p1], hand[p2])
|
|
668
|
+
scale = self._palm_scale(hand)
|
|
669
|
+
if scale == 0:
|
|
670
|
+
return 0
|
|
671
|
+
return tip_dist / scale
|
|
672
|
+
|
|
673
|
+
def is_fingers_joined(
|
|
674
|
+
self, p1, p2, image, landmarks, threshold=0.25, draw_intersection_point=True
|
|
675
|
+
):
|
|
676
|
+
"""
|
|
677
|
+
Check if two fingers are joined and provide the idexes which fingers are joined based on the normalized distance between their landmarks
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
p1 (int): The index of the first finger landmark.
|
|
681
|
+
p2 (int): The index of the second finger landmark.
|
|
682
|
+
image (numpy.ndarray): The image on which to annotate the finger status.
|
|
683
|
+
landmarks (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
684
|
+
threshold (float): The normalized distance threshold below which the fingers are considered joined.
|
|
685
|
+
Returns:
|
|
686
|
+
bool: True if the fingers are joined, False otherwise.
|
|
687
|
+
numpy.ndarray: The annotated image.
|
|
688
|
+
"""
|
|
689
|
+
if not landmarks:
|
|
690
|
+
return False
|
|
691
|
+
|
|
692
|
+
normalized = self._normalize(landmarks, p1, p2)
|
|
693
|
+
print(f"Normalized distance between landmarks {p1} and {p2}: {normalized:.4f}")
|
|
694
|
+
is_joined = normalized < threshold
|
|
695
|
+
if is_joined and draw_intersection_point:
|
|
696
|
+
x1, y1 = landmarks[p1][1], landmarks[p1][2]
|
|
697
|
+
x2, y2 = landmarks[p2][1], landmarks[p2][2]
|
|
698
|
+
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
|
|
699
|
+
# to make circle color red if fingers are joined, otherwise green
|
|
700
|
+
cv2.circle(image, (cx, cy), 10, (0, 255, 0), cv2.FILLED)
|
|
701
|
+
|
|
702
|
+
return is_joined
|
|
703
|
+
|
|
704
|
+
def is_fingers_joined_2(
|
|
705
|
+
self,
|
|
706
|
+
p1,
|
|
707
|
+
p2,
|
|
708
|
+
image,
|
|
709
|
+
landmarks,
|
|
710
|
+
threshold=0.18, # slightly relaxed (important)
|
|
711
|
+
draw_intersection_point=True,
|
|
712
|
+
debug=False,
|
|
713
|
+
):
|
|
714
|
+
"""
|
|
715
|
+
Check if two fingers are joined using a more robust method that accounts for variations in hand size and camera perspective. This method calculates the normalized distance between the specified finger landmarks and compares it to an adaptive threshold to determine if the fingers are joined.
|
|
716
|
+
|
|
717
|
+
Args:
|
|
718
|
+
p1 (int): The index of the first finger landmark.
|
|
719
|
+
p2 (int): The index of the second finger landmark.
|
|
720
|
+
image (numpy.ndarray): The image on which to annotate the finger status.
|
|
721
|
+
landmarks (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
722
|
+
threshold (float): The normalized distance threshold below which the fingers are considered joined.
|
|
723
|
+
draw_intersection_point (bool): Whether to draw a circle at the intersection point of the fingers.
|
|
724
|
+
debug (bool): Whether to print debug information.
|
|
725
|
+
Returns:
|
|
726
|
+
bool: True if the fingers are joined, False otherwise.
|
|
727
|
+
"""
|
|
728
|
+
|
|
729
|
+
if not landmarks or len(landmarks) <= max(p1, p2):
|
|
730
|
+
return False
|
|
731
|
+
|
|
732
|
+
x1, y1, _ = landmarks[p1][1:]
|
|
733
|
+
x2, y2, _ = landmarks[p2][1:]
|
|
734
|
+
|
|
735
|
+
# -----------------------------
|
|
736
|
+
# RAW DISTANCE
|
|
737
|
+
# -----------------------------
|
|
738
|
+
pixel_dist = self.euclidean_distance((x1, y1), (x2, y2))
|
|
739
|
+
|
|
740
|
+
# -----------------------------
|
|
741
|
+
# ROBUST PALM SCALE (FIXED)
|
|
742
|
+
# use wrist (0), index MCP (5), pinky MCP (17)
|
|
743
|
+
# works for LEFT + RIGHT hands
|
|
744
|
+
# -----------------------------
|
|
745
|
+
try:
|
|
746
|
+
wrist = landmarks[0][1:3]
|
|
747
|
+
index_mcp = landmarks[5][1:3]
|
|
748
|
+
pinky_mcp = landmarks[17][1:3]
|
|
749
|
+
|
|
750
|
+
palm_diag1 = self.euclidean_distance(wrist, index_mcp)
|
|
751
|
+
palm_diag2 = self.euclidean_distance(wrist, pinky_mcp)
|
|
752
|
+
|
|
753
|
+
palm_size = (palm_diag1 + palm_diag2) / 2
|
|
754
|
+
except Exception:
|
|
755
|
+
palm_size = self._palm_scale(landmarks)
|
|
756
|
+
|
|
757
|
+
palm_size = max(palm_size, 1e-6)
|
|
758
|
+
|
|
759
|
+
# -----------------------------
|
|
760
|
+
# NORMALIZED DISTANCE
|
|
761
|
+
# -----------------------------
|
|
762
|
+
normalized_dist = pixel_dist / palm_size
|
|
763
|
+
|
|
764
|
+
# -----------------------------
|
|
765
|
+
# ADAPTIVE THRESHOLD (IMPORTANT FIX)
|
|
766
|
+
# -----------------------------
|
|
767
|
+
# left hand tends to appear slightly scaled differently in camera
|
|
768
|
+
adaptive_threshold = threshold
|
|
769
|
+
|
|
770
|
+
# optional stability boost
|
|
771
|
+
is_joined = normalized_dist < adaptive_threshold
|
|
772
|
+
|
|
773
|
+
# -----------------------------
|
|
774
|
+
# DEBUG
|
|
775
|
+
# -----------------------------
|
|
776
|
+
if debug:
|
|
777
|
+
print(
|
|
778
|
+
f"[JOIN DEBUG] pixel={pixel_dist:.2f}, "
|
|
779
|
+
f"palm={palm_size:.2f}, "
|
|
780
|
+
f"norm={normalized_dist:.4f}, "
|
|
781
|
+
f"threshold={adaptive_threshold}"
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
# -----------------------------
|
|
785
|
+
# DRAW
|
|
786
|
+
# -----------------------------
|
|
787
|
+
if draw_intersection_point:
|
|
788
|
+
cx = int((x1 + x2) / 2)
|
|
789
|
+
cy = int((y1 + y2) / 2)
|
|
790
|
+
|
|
791
|
+
color = (0, 0, 255) if is_joined else (0, 255, 255)
|
|
792
|
+
thickness = cv2.FILLED if is_joined else 1
|
|
793
|
+
|
|
794
|
+
cv2.circle(image, (cx, cy), 8, color, thickness)
|
|
795
|
+
|
|
796
|
+
return is_joined
|
|
797
|
+
|
|
798
|
+
def joined_fingers(self, image, landmarks, threshold=0.25):
|
|
799
|
+
"""
|
|
800
|
+
Check which fingers are joined based on the normalized distance between their landmarks and return a list indicating the joined state of each finger. The function iterates through predefined pairs of finger landmarks, calculates the normalized distance for each pair, and updates a list to indicate which fingers are joined based on the specified threshold. This allows for a comprehensive analysis of finger positions and can be used for gesture recognition or other applications that require understanding of finger interactions.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
image (numpy.ndarray): The image on which to annotate the finger status.
|
|
804
|
+
landmarks (list): A list of hand landmarks. Each landmark is expected to be a tuple of (x, y) coordinates.
|
|
805
|
+
threshold (float): The normalized distance threshold below which the fingers are considered joined.
|
|
806
|
+
Returns:
|
|
807
|
+
list: A list of integers representing the joined state of each finger, where 1 indicates
|
|
808
|
+
"""
|
|
809
|
+
|
|
810
|
+
annotated = image.copy()
|
|
811
|
+
joined_state = [0, 0, 0, 0, 0]
|
|
812
|
+
if not landmarks or len(landmarks) < 21:
|
|
813
|
+
return joined_state, annotated
|
|
814
|
+
|
|
815
|
+
pairs = list(combinations(self.fingerTips, 2))
|
|
816
|
+
|
|
817
|
+
for i, (p1, p2) in enumerate(pairs):
|
|
818
|
+
normalized = self._normalize(landmarks, p1, p2)
|
|
819
|
+
print(f"Finger {p1} vs {p2} -> {normalized:.4f}")
|
|
820
|
+
|
|
821
|
+
is_joined = normalized < threshold
|
|
822
|
+
if is_joined:
|
|
823
|
+
joined_state[i] = 1
|
|
824
|
+
joined_state[i + 1] = 1
|
|
825
|
+
|
|
826
|
+
return joined_state, annotated
|
|
827
|
+
|
|
828
|
+
def count_fingers(self):
|
|
829
|
+
"""
|
|
830
|
+
Returns the total number of fingers currently up (0-5).
|
|
831
|
+
"""
|
|
832
|
+
return sum(self.fingers_up())
|
|
833
|
+
|
|
834
|
+
def is_fist(self):
|
|
835
|
+
"""
|
|
836
|
+
True if all fingers are down (classic fist).
|
|
837
|
+
"""
|
|
838
|
+
fingers = self.fingers_up()
|
|
839
|
+
return len(fingers) == 5 and all(f == 0 for f in fingers)
|
|
840
|
+
|
|
841
|
+
def is_open_hand(self):
|
|
842
|
+
"""
|
|
843
|
+
True if all fingers are up (open palm).
|
|
844
|
+
"""
|
|
845
|
+
fingers = self.fingers_up()
|
|
846
|
+
return len(fingers) == 5 and all(f == 1 for f in fingers)
|
|
847
|
+
|
|
848
|
+
def is_thumbs_up(self):
|
|
849
|
+
"""
|
|
850
|
+
Classic thumbs-up gesture.
|
|
851
|
+
"""
|
|
852
|
+
fingers = self.fingers_up()
|
|
853
|
+
return (
|
|
854
|
+
len(fingers) == 5 and fingers[0] == 1 and all(f == 0 for f in fingers[1:])
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
def is_peace_sign(self):
|
|
858
|
+
"""
|
|
859
|
+
Peace / V sign (index + middle up, others down).
|
|
860
|
+
"""
|
|
861
|
+
fingers = self.fingers_up()
|
|
862
|
+
return (
|
|
863
|
+
len(fingers) == 5
|
|
864
|
+
and fingers[1] == 1
|
|
865
|
+
and fingers[2] == 1
|
|
866
|
+
and fingers[0] == 0
|
|
867
|
+
and fingers[3] == 0
|
|
868
|
+
and fingers[4] == 0
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# ─────────────────────────── NEW METHODS ───────────────────────────
|
|
872
|
+
|
|
873
|
+
def get_gesture_name(self, hand_landmarks):
|
|
874
|
+
"""Return a human-readable gesture label for the first detected hand.
|
|
875
|
+
Checks gestures in priority order and returns the first match.
|
|
876
|
+
|
|
877
|
+
Args:
|
|
878
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
879
|
+
Returns:
|
|
880
|
+
str: 'Fist' | 'Open' | 'ThumbsUp' | 'Peace' | 'Unknown'
|
|
881
|
+
"""
|
|
882
|
+
fingers = self.fingers_up(hand_landmarks)
|
|
883
|
+
if len(fingers) < 5:
|
|
884
|
+
return "Unknown"
|
|
885
|
+
if all(f == 0 for f in fingers):
|
|
886
|
+
return "Fist"
|
|
887
|
+
if all(f == 1 for f in fingers):
|
|
888
|
+
return "Open"
|
|
889
|
+
if fingers[0] == 1 and all(f == 0 for f in fingers[1:]):
|
|
890
|
+
return "ThumbsUp"
|
|
891
|
+
if (
|
|
892
|
+
fingers[1] == 1
|
|
893
|
+
and fingers[2] == 1
|
|
894
|
+
and fingers[0] == 0
|
|
895
|
+
and fingers[3] == 0
|
|
896
|
+
and fingers[4] == 0
|
|
897
|
+
):
|
|
898
|
+
return "Peace"
|
|
899
|
+
return "Unknown"
|
|
900
|
+
|
|
901
|
+
def get_finger_count(self, hand_landmarks):
|
|
902
|
+
"""Return number of fingers currently raised (0–5).
|
|
903
|
+
|
|
904
|
+
Args:
|
|
905
|
+
hand_landmarks: landmarks_list from get_landmarks().
|
|
906
|
+
Returns:
|
|
907
|
+
int
|
|
908
|
+
"""
|
|
909
|
+
return sum(self.fingers_up(hand_landmarks))
|
|
910
|
+
|
|
911
|
+
def get_angle_between_landmarks(self, landmarks_list, a, b, c):
|
|
912
|
+
"""Compute the joint angle at landmark b formed by landmarks a-b-c.
|
|
913
|
+
Uses 2D (x, y) pixel coordinates from landmarks_list.
|
|
914
|
+
|
|
915
|
+
Args:
|
|
916
|
+
landmarks_list: List of [id, x, y, z] from get_landmarks().
|
|
917
|
+
a, b, c: Landmark indices (e.g. 5, 6, 7 for index finger PIP joint).
|
|
918
|
+
Returns:
|
|
919
|
+
float: Angle in degrees (0–180).
|
|
920
|
+
"""
|
|
921
|
+
pa = np.array([landmarks_list[a][1], landmarks_list[a][2]], dtype=float)
|
|
922
|
+
pb = np.array([landmarks_list[b][1], landmarks_list[b][2]], dtype=float)
|
|
923
|
+
pc = np.array([landmarks_list[c][1], landmarks_list[c][2]], dtype=float)
|
|
924
|
+
ba = pa - pb
|
|
925
|
+
bc = pc - pb
|
|
926
|
+
cos_val = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
|
|
927
|
+
return float(np.degrees(np.arccos(np.clip(cos_val, -1.0, 1.0))))
|
|
928
|
+
|
|
929
|
+
def get_hand_bbox(self, hand_data):
|
|
930
|
+
"""Extract bounding box from a hand data dict returned by get_landmarks().
|
|
931
|
+
|
|
932
|
+
Args:
|
|
933
|
+
hand_data: Single dict from get_landmarks() list.
|
|
934
|
+
Returns:
|
|
935
|
+
tuple(int, int, int, int): (x, y, w, h)
|
|
936
|
+
"""
|
|
937
|
+
return hand_data["bounding_box"]
|
|
938
|
+
|
|
939
|
+
def get_hand_center(self, hand_data):
|
|
940
|
+
"""Extract center point from a hand data dict returned by get_landmarks().
|
|
941
|
+
|
|
942
|
+
Args:
|
|
943
|
+
hand_data: Single dict from get_landmarks() list.
|
|
944
|
+
Returns:
|
|
945
|
+
tuple(int, int): (cx, cy)
|
|
946
|
+
"""
|
|
947
|
+
return hand_data["center_point"]
|
|
948
|
+
|
|
949
|
+
def is_pointing(self, hand_landmarks):
|
|
950
|
+
"""Return True for a pointing gesture: only index finger raised.
|
|
951
|
+
|
|
952
|
+
Args:
|
|
953
|
+
hand_landmarks: landmarks_list from get_landmarks().
|
|
954
|
+
Returns:
|
|
955
|
+
bool
|
|
956
|
+
"""
|
|
957
|
+
fingers = self.fingers_up(hand_landmarks)
|
|
958
|
+
return (
|
|
959
|
+
len(fingers) == 5
|
|
960
|
+
and fingers[1] == 1
|
|
961
|
+
and fingers[0] == 0
|
|
962
|
+
and fingers[2] == 0
|
|
963
|
+
and fingers[3] == 0
|
|
964
|
+
and fingers[4] == 0
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
def get_wrist_position(self, hand_landmarks):
|
|
968
|
+
"""Return pixel coordinates of the wrist landmark (id 0).
|
|
969
|
+
|
|
970
|
+
Args:
|
|
971
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
972
|
+
Returns:
|
|
973
|
+
tuple(int, int): (x, y) pixel position of the wrist.
|
|
974
|
+
"""
|
|
975
|
+
return (hand_landmarks[0][1], hand_landmarks[0][2])
|
|
976
|
+
|
|
977
|
+
def get_fingertip_positions(self, hand_landmarks):
|
|
978
|
+
"""Return pixel positions of all five fingertips.
|
|
979
|
+
|
|
980
|
+
Args:
|
|
981
|
+
hand_landmarks: landmarks_list from get_landmarks().
|
|
982
|
+
Returns:
|
|
983
|
+
dict: {'thumb': (x,y), 'index': (x,y), 'middle': (x,y), 'ring': (x,y), 'little': (x,y)}
|
|
984
|
+
"""
|
|
985
|
+
names = ["thumb", "index", "middle", "ring", "little"]
|
|
986
|
+
return {
|
|
987
|
+
name: (hand_landmarks[tip][1], hand_landmarks[tip][2])
|
|
988
|
+
for name, tip in zip(names, self.fingerTips, strict=False)
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
def is_ok_sign(self, hand_landmarks):
|
|
992
|
+
"""Return True if the hand is making an OK sign.
|
|
993
|
+
|
|
994
|
+
The OK sign requires the thumb tip (landmark 4) and index tip (landmark 8)
|
|
995
|
+
to be close together (normalized distance < 0.08) while the middle, ring,
|
|
996
|
+
and little fingers are raised.
|
|
997
|
+
|
|
998
|
+
Args:
|
|
999
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1000
|
+
Returns:
|
|
1001
|
+
bool
|
|
1002
|
+
"""
|
|
1003
|
+
fingers = self.fingers_up(hand_landmarks)
|
|
1004
|
+
t = hand_landmarks[4][1:3]
|
|
1005
|
+
i = hand_landmarks[8][1:3]
|
|
1006
|
+
dist = self.euclidean_distance(t, i)
|
|
1007
|
+
return dist < 0.08 and fingers[2] == 1 and fingers[3] == 1 and fingers[4] == 1
|
|
1008
|
+
|
|
1009
|
+
def is_call_me(self, hand_landmarks):
|
|
1010
|
+
"""Return True if the hand is making a 'call me' gesture.
|
|
1011
|
+
|
|
1012
|
+
The call-me gesture has the thumb and little finger extended while the
|
|
1013
|
+
index, middle, and ring fingers are folded down.
|
|
1014
|
+
|
|
1015
|
+
Args:
|
|
1016
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1017
|
+
Returns:
|
|
1018
|
+
bool
|
|
1019
|
+
"""
|
|
1020
|
+
fingers = self.fingers_up(hand_landmarks)
|
|
1021
|
+
return (
|
|
1022
|
+
fingers[0] == 1
|
|
1023
|
+
and fingers[1] == 0
|
|
1024
|
+
and fingers[2] == 0
|
|
1025
|
+
and fingers[3] == 0
|
|
1026
|
+
and fingers[4] == 1
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
def is_rock_sign(self, hand_landmarks):
|
|
1030
|
+
"""Return True if the hand is making a rock/devil-horns sign.
|
|
1031
|
+
|
|
1032
|
+
The rock sign has the index and little fingers extended while the thumb,
|
|
1033
|
+
middle, and ring fingers are folded down.
|
|
1034
|
+
|
|
1035
|
+
Args:
|
|
1036
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1037
|
+
Returns:
|
|
1038
|
+
bool
|
|
1039
|
+
"""
|
|
1040
|
+
fingers = self.fingers_up(hand_landmarks)
|
|
1041
|
+
return (
|
|
1042
|
+
fingers[0] == 0
|
|
1043
|
+
and fingers[1] == 1
|
|
1044
|
+
and fingers[2] == 0
|
|
1045
|
+
and fingers[3] == 0
|
|
1046
|
+
and fingers[4] == 1
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
def recognize_number(self, hand_landmarks):
|
|
1050
|
+
"""Return the number (0–5) represented by the hand gesture.
|
|
1051
|
+
|
|
1052
|
+
Delegates to get_finger_count to count raised fingers.
|
|
1053
|
+
|
|
1054
|
+
Args:
|
|
1055
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1056
|
+
Returns:
|
|
1057
|
+
int: Number of fingers raised (0–5).
|
|
1058
|
+
"""
|
|
1059
|
+
return self.get_finger_count(hand_landmarks)
|
|
1060
|
+
|
|
1061
|
+
def get_hand_orientation(self, hand_landmarks):
|
|
1062
|
+
"""Return the cardinal orientation of the hand based on wrist-to-middle-MCP vector.
|
|
1063
|
+
|
|
1064
|
+
Compares the wrist (landmark 0) to the middle finger MCP (landmark 9) to
|
|
1065
|
+
determine which direction the hand is pointing.
|
|
1066
|
+
|
|
1067
|
+
Args:
|
|
1068
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1069
|
+
Returns:
|
|
1070
|
+
str: One of 'palm_up', 'palm_down', 'palm_left', 'palm_right'.
|
|
1071
|
+
"""
|
|
1072
|
+
wrist = hand_landmarks[0][1:3]
|
|
1073
|
+
middle_mcp = hand_landmarks[9][1:3]
|
|
1074
|
+
dx = middle_mcp[0] - wrist[0]
|
|
1075
|
+
dy = middle_mcp[1] - wrist[1]
|
|
1076
|
+
if abs(dx) >= abs(dy):
|
|
1077
|
+
return "palm_right" if dx > 0 else "palm_left"
|
|
1078
|
+
return "palm_up" if dy < 0 else "palm_down"
|
|
1079
|
+
|
|
1080
|
+
def get_swipe_direction(self, prev_wrist, curr_wrist, threshold=20):
|
|
1081
|
+
"""Classify the swipe direction between two wrist positions.
|
|
1082
|
+
|
|
1083
|
+
Compares two (x, y) wrist positions and returns the dominant direction of
|
|
1084
|
+
movement. Returns 'none' if the displacement is below the threshold in both
|
|
1085
|
+
axes.
|
|
1086
|
+
|
|
1087
|
+
Args:
|
|
1088
|
+
prev_wrist (tuple): Previous wrist position as (x, y).
|
|
1089
|
+
curr_wrist (tuple): Current wrist position as (x, y).
|
|
1090
|
+
threshold (int): Minimum pixel displacement to register as a swipe.
|
|
1091
|
+
Returns:
|
|
1092
|
+
str: One of 'right', 'left', 'up', 'down', 'none'.
|
|
1093
|
+
"""
|
|
1094
|
+
dx = curr_wrist[0] - prev_wrist[0]
|
|
1095
|
+
dy = curr_wrist[1] - prev_wrist[1]
|
|
1096
|
+
if max(abs(dx), abs(dy)) < threshold:
|
|
1097
|
+
return "none"
|
|
1098
|
+
if abs(dx) >= abs(dy):
|
|
1099
|
+
return "right" if dx > 0 else "left"
|
|
1100
|
+
return "down" if dy > 0 else "up"
|
|
1101
|
+
|
|
1102
|
+
def get_all_finger_angles(self, hand_landmarks):
|
|
1103
|
+
"""Compute the joint angle at the middle joint of each finger.
|
|
1104
|
+
|
|
1105
|
+
Uses get_angle_between_landmarks for each finger's MCP–PIP–DIP triplet.
|
|
1106
|
+
|
|
1107
|
+
Args:
|
|
1108
|
+
hand_landmarks: landmarks_list from get_landmarks() — list of [id, x, y, z].
|
|
1109
|
+
Returns:
|
|
1110
|
+
dict: Keys are finger names ('thumb', 'index', 'middle', 'ring', 'little'),
|
|
1111
|
+
values are angles in degrees (0–180).
|
|
1112
|
+
"""
|
|
1113
|
+
joints = {
|
|
1114
|
+
"thumb": (1, 2, 3),
|
|
1115
|
+
"index": (5, 6, 7),
|
|
1116
|
+
"middle": (9, 10, 11),
|
|
1117
|
+
"ring": (13, 14, 15),
|
|
1118
|
+
"little": (17, 18, 19),
|
|
1119
|
+
}
|
|
1120
|
+
return {
|
|
1121
|
+
name: self.get_angle_between_landmarks(hand_landmarks, a, b, c)
|
|
1122
|
+
for name, (a, b, c) in joints.items()
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
def draw_gesture_label(self, image, hand_data, label):
|
|
1126
|
+
"""Draw a gesture label above the hand bounding box on a copy of the image.
|
|
1127
|
+
|
|
1128
|
+
Args:
|
|
1129
|
+
image: BGR numpy array.
|
|
1130
|
+
hand_data (dict): Hand dict from get_landmarks() with key 'bounding_box' (x, y, w, h).
|
|
1131
|
+
label (str): Gesture label text to render.
|
|
1132
|
+
Returns:
|
|
1133
|
+
numpy.ndarray: Annotated copy of the input image (BGR).
|
|
1134
|
+
"""
|
|
1135
|
+
out = image.copy()
|
|
1136
|
+
x, y, w, h = hand_data["bounding_box"]
|
|
1137
|
+
cv2.putText(
|
|
1138
|
+
out,
|
|
1139
|
+
label,
|
|
1140
|
+
(x, max(y - 10, 10)),
|
|
1141
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
1142
|
+
0.8,
|
|
1143
|
+
(255, 0, 0),
|
|
1144
|
+
2,
|
|
1145
|
+
)
|
|
1146
|
+
return out
|
|
1147
|
+
|
|
1148
|
+
def to_json(self, hand_data):
|
|
1149
|
+
"""Serialize a hand data dict to a JSON-compatible structure.
|
|
1150
|
+
|
|
1151
|
+
Args:
|
|
1152
|
+
hand_data (dict): Hand dict from get_landmarks() containing 'hand_type',
|
|
1153
|
+
'center_point', 'bounding_box', and 'landmarks_list'.
|
|
1154
|
+
Returns:
|
|
1155
|
+
dict: JSON-serializable dict with keys 'hand_type', 'center_point',
|
|
1156
|
+
'bounding_box', and 'landmarks'.
|
|
1157
|
+
"""
|
|
1158
|
+
return {
|
|
1159
|
+
"hand_type": hand_data.get("hand_type", "Unknown"),
|
|
1160
|
+
"center_point": list(hand_data.get("center_point", (0, 0))),
|
|
1161
|
+
"bounding_box": list(hand_data.get("bounding_box", (0, 0, 0, 0))),
|
|
1162
|
+
"landmarks": [list(lm) for lm in hand_data.get("landmarks_list", [])],
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
def main():
|
|
1167
|
+
currentTime = 0
|
|
1168
|
+
previousTime = 0
|
|
1169
|
+
handDetector = HandDetector()
|
|
1170
|
+
while True:
|
|
1171
|
+
ret, imgFrame = cap.read()
|
|
1172
|
+
if not ret:
|
|
1173
|
+
break
|
|
1174
|
+
|
|
1175
|
+
# Convert BGR → RGB
|
|
1176
|
+
imgRGB = cv2.cvtColor(imgFrame, cv2.COLOR_BGR2RGB)
|
|
1177
|
+
|
|
1178
|
+
# Detect the hand landmarks in the RGB image using the handDetector instance. The detected landmarks are stored in the hand_landmarks_list attribute of the handDetector object, which can be accessed for further processing or visualization.
|
|
1179
|
+
annotated_image = handDetector.draw_landmarks(imgRGB)
|
|
1180
|
+
|
|
1181
|
+
# Get the list of landmarks for the detected hands. The get_landmarks method processes the annotated image and returns a list of landmarks, which can be used for various applications such as gesture recognition or hand tracking.
|
|
1182
|
+
handDetector.get_landmarks(annotated_image)
|
|
1183
|
+
|
|
1184
|
+
# Convert RGB → BGR for OpenCV display
|
|
1185
|
+
cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
|
|
1186
|
+
|
|
1187
|
+
taps = handDetector.detect_finger_tapping()
|
|
1188
|
+
handDetector.count_fingers()
|
|
1189
|
+
|
|
1190
|
+
# Example prints (you can replace with your own logic)
|
|
1191
|
+
if any(taps):
|
|
1192
|
+
print(f"🔥 TAP DETECTED! Fingers: {taps} | Finger indices [4,8,12,16,20]")
|
|
1193
|
+
|
|
1194
|
+
if handDetector.is_fist():
|
|
1195
|
+
print("👊 Fist detected")
|
|
1196
|
+
if handDetector.is_thumbs_up():
|
|
1197
|
+
print("👍 Thumbs up")
|
|
1198
|
+
if handDetector.is_peace_sign():
|
|
1199
|
+
print("✌️ Peace sign")
|
|
1200
|
+
if handDetector.is_open_hand():
|
|
1201
|
+
print("🖐️ Open hand")
|
|
1202
|
+
|
|
1203
|
+
# FPS
|
|
1204
|
+
currentTime = time.time()
|
|
1205
|
+
fps = (
|
|
1206
|
+
1 / (currentTime - previousTime) if (currentTime - previousTime) > 0 else 0
|
|
1207
|
+
)
|
|
1208
|
+
previousTime = currentTime
|
|
1209
|
+
cv2.putText(
|
|
1210
|
+
annotated_image,
|
|
1211
|
+
f"FPS: {int(fps)}",
|
|
1212
|
+
(10, 30),
|
|
1213
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
1214
|
+
1,
|
|
1215
|
+
(0, 255, 0),
|
|
1216
|
+
2,
|
|
1217
|
+
)
|
|
1218
|
+
cv2.imshow(
|
|
1219
|
+
"Extended Hand Tracking (MediaPipe) - Tapping + Gestures", annotated_image
|
|
1220
|
+
)
|
|
1221
|
+
# Stop the loop and close the application when the 'Esc' key is pressed. The waitKey function waits for a key event for a specified amount of time (in this case, 1 millisecond) and checks if the 'Esc' key (ASCII code 27) is pressed to break the loop and release resources.
|
|
1222
|
+
if cv2.waitKey(1) & 0xFF == 27:
|
|
1223
|
+
break
|
|
1224
|
+
|
|
1225
|
+
cap.release()
|
|
1226
|
+
cv2.destroyAllWindows()
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
if __name__ == "__main__":
|
|
1230
|
+
main()
|