hand-tracking-teleop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hand-tracking-teleop might be problematic. Click here for more details.
- hand_tracking/__init__.py +17 -0
- hand_tracking/cli.py +279 -0
- hand_tracking/hand_detector.py +475 -0
- hand_tracking/visualizer_3d.py +214 -0
- hand_tracking/visualizer_combined.py +221 -0
- hand_tracking/visualizer_rerun.py +255 -0
- hand_tracking_teleop-0.1.0.dist-info/METADATA +436 -0
- hand_tracking_teleop-0.1.0.dist-info/RECORD +14 -0
- hand_tracking_teleop-0.1.0.dist-info/WHEEL +5 -0
- hand_tracking_teleop-0.1.0.dist-info/entry_points.txt +6 -0
- hand_tracking_teleop-0.1.0.dist-info/top_level.txt +3 -0
- mapping/__init__.py +12 -0
- mapping/kinematic_mapper.py +405 -0
- robot_interface/dual_interface.py +780 -0
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Hand detection using MediaPipe for data collection.
|
|
4
|
+
|
|
5
|
+
This module handles real-time hand landmark detection and tracking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import mediapipe as mp
|
|
10
|
+
import numpy as np
|
|
11
|
+
from typing import Dict, List, Optional, Tuple, NamedTuple
|
|
12
|
+
import yaml
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class HandLandmarks:
|
|
18
|
+
"""Container for hand landmark data."""
|
|
19
|
+
landmarks: np.ndarray # Shape: (21, 3) - x, y, z coordinates
|
|
20
|
+
handedness: str # "Left" or "Right"
|
|
21
|
+
confidence: float
|
|
22
|
+
bbox: Tuple[int, int, int, int] # x, y, width, height
|
|
23
|
+
|
|
24
|
+
def get_normalized_landmarks(self, palm_size_meters: float = 0.08,
|
|
25
|
+
image_center: Tuple[float, float] = (640, 360),
|
|
26
|
+
reference_distance_m: float = 0.5,
|
|
27
|
+
reference_palm_width_px: float = 180.0) -> np.ndarray:
|
|
28
|
+
"""
|
|
29
|
+
Get landmarks scaled to realistic hand proportions with depth estimation.
|
|
30
|
+
|
|
31
|
+
Centers on image center (not wrist) so hand position tracks movement in frame.
|
|
32
|
+
Scales so palm width ≈ palm_size_meters with uniform scaling on all axes.
|
|
33
|
+
Estimates Z depth from palm size using perspective geometry.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
palm_size_meters: Target palm width in meters (default 0.08m for adult hand)
|
|
37
|
+
image_center: Center of image in pixels (x, y), default (640, 360) for 1280x720
|
|
38
|
+
reference_distance_m: Distance from camera during calibration in meters
|
|
39
|
+
reference_palm_width_px: Palm width in pixels at reference distance
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
np.ndarray: Normalized landmarks in meters, shape (21, 3)
|
|
43
|
+
"""
|
|
44
|
+
# Calculate current palm width in pixels (wrist to middle finger MCP)
|
|
45
|
+
wrist = self.landmarks[0]
|
|
46
|
+
middle_mcp = self.landmarks[9]
|
|
47
|
+
palm_width_px = np.sqrt((middle_mcp[0] - wrist[0])**2 +
|
|
48
|
+
(middle_mcp[1] - wrist[1])**2)
|
|
49
|
+
|
|
50
|
+
# Scale factor: convert pixels to meters based on palm size
|
|
51
|
+
if palm_width_px > 1.0:
|
|
52
|
+
scale = palm_size_meters / palm_width_px
|
|
53
|
+
else:
|
|
54
|
+
scale = 0.001 # fallback for invalid detection
|
|
55
|
+
|
|
56
|
+
# Estimate camera distance from palm size (perspective geometry)
|
|
57
|
+
# closer hand = bigger palm, farther = smaller
|
|
58
|
+
if palm_width_px > 1.0:
|
|
59
|
+
estimated_distance_m = reference_distance_m * (reference_palm_width_px / palm_width_px)
|
|
60
|
+
else:
|
|
61
|
+
estimated_distance_m = reference_distance_m
|
|
62
|
+
|
|
63
|
+
# Z offset from reference distance (positive = farther, negative = closer)
|
|
64
|
+
z_base = estimated_distance_m - reference_distance_m
|
|
65
|
+
|
|
66
|
+
# Center on IMAGE CENTER (not wrist) and scale all axes uniformly
|
|
67
|
+
# This allows the hand to move in 3D space as it moves in the frame
|
|
68
|
+
normalized = self.landmarks.copy()
|
|
69
|
+
normalized[:, 0] = (self.landmarks[:, 0] - image_center[0]) * scale
|
|
70
|
+
normalized[:, 1] = (self.landmarks[:, 1] - image_center[1]) * scale
|
|
71
|
+
# MediaPipe z is relative finger depth within hand - add to estimated camera distance
|
|
72
|
+
normalized[:, 2] = z_base + (self.landmarks[:, 2] * scale)
|
|
73
|
+
|
|
74
|
+
return normalized
|
|
75
|
+
|
|
76
|
+
def get_palm_width(self) -> float:
|
|
77
|
+
"""
|
|
78
|
+
Get the palm width in pixels (distance between wrist and middle finger MCP).
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
float: Palm width in pixels
|
|
82
|
+
"""
|
|
83
|
+
wrist = self.landmarks[0]
|
|
84
|
+
middle_mcp = self.landmarks[9]
|
|
85
|
+
return np.sqrt((middle_mcp[0] - wrist[0])**2 + (middle_mcp[1] - wrist[1])**2)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class HandDetector:
|
|
89
|
+
"""Real-time hand detection and tracking using MediaPipe."""
|
|
90
|
+
|
|
91
|
+
def __init__(self, config_path: Optional[str] = None):
|
|
92
|
+
"""Initialize hand detector with configuration."""
|
|
93
|
+
self.config = self._load_config(config_path)
|
|
94
|
+
self._setup_mediapipe()
|
|
95
|
+
self._setup_camera()
|
|
96
|
+
|
|
97
|
+
# Load calibrated reference palm width if available
|
|
98
|
+
self.reference_palm_width = self._get_reference_palm_width()
|
|
99
|
+
|
|
100
|
+
# Load calibration parameters for metric conversion
|
|
101
|
+
self.reference_distance_m, self.calibrated_palm_width_px = self._get_calibration_params()
|
|
102
|
+
|
|
103
|
+
# Tracking state
|
|
104
|
+
self.is_tracking = False
|
|
105
|
+
self.frame_count = 0
|
|
106
|
+
self.last_landmarks = {}
|
|
107
|
+
self.last_hands = [] # Cache for frame skipping
|
|
108
|
+
|
|
109
|
+
def _load_config(self, config_path: Optional[str]) -> Dict:
|
|
110
|
+
"""Load configuration from YAML file."""
|
|
111
|
+
if config_path is None:
|
|
112
|
+
# Default configuration
|
|
113
|
+
return {
|
|
114
|
+
'hand_tracking': {
|
|
115
|
+
'camera': {'device_id': 0, 'width': 1280, 'height': 720, 'fps': 30},
|
|
116
|
+
'mediapipe': {
|
|
117
|
+
'max_num_hands': 2,
|
|
118
|
+
'min_detection_confidence': 0.7,
|
|
119
|
+
'min_tracking_confidence': 0.5,
|
|
120
|
+
'model_complexity': 1
|
|
121
|
+
},
|
|
122
|
+
'landmarks': {'smoothing_factor': 0.8, 'confidence_threshold': 0.6},
|
|
123
|
+
'display': {
|
|
124
|
+
'show_landmarks': True,
|
|
125
|
+
'show_connections': True,
|
|
126
|
+
'show_bounding_box': True,
|
|
127
|
+
'window_name': 'Hand Tracking'
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
with open(config_path, 'r') as f:
|
|
133
|
+
return yaml.safe_load(f)
|
|
134
|
+
|
|
135
|
+
def _get_reference_palm_width(self) -> float:
|
|
136
|
+
"""Get reference palm width from config or use default."""
|
|
137
|
+
# Check for calibrated value first
|
|
138
|
+
if 'hand_tracking' in self.config:
|
|
139
|
+
if 'normalization' in self.config['hand_tracking']:
|
|
140
|
+
ref = self.config['hand_tracking']['normalization'].get('reference_palm_width')
|
|
141
|
+
if ref is not None:
|
|
142
|
+
print(f"✓ Using calibrated reference palm width: {ref:.2f}px")
|
|
143
|
+
return float(ref)
|
|
144
|
+
|
|
145
|
+
# Check old calibration format
|
|
146
|
+
if 'calibration' in self.config:
|
|
147
|
+
ref = self.config['calibration'].get('reference_palm_width')
|
|
148
|
+
if ref is not None:
|
|
149
|
+
print(f"✓ Using calibrated reference palm width: {ref:.2f}px")
|
|
150
|
+
return float(ref)
|
|
151
|
+
|
|
152
|
+
# Default value
|
|
153
|
+
return 100.0
|
|
154
|
+
|
|
155
|
+
def _get_calibration_params(self) -> Tuple[float, float]:
|
|
156
|
+
"""Get calibration parameters from config.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Tuple of (reference_distance_m, calibrated_palm_width_px)
|
|
160
|
+
"""
|
|
161
|
+
if 'calibration' in self.config:
|
|
162
|
+
ref_dist = self.config['calibration'].get('reference_distance_cm', 50.0) / 100.0 # convert to meters
|
|
163
|
+
ref_palm = self.config['calibration'].get('reference_palm_width', 166.37)
|
|
164
|
+
return ref_dist, ref_palm
|
|
165
|
+
return 0.50, 166.37 # defaults in meters
|
|
166
|
+
|
|
167
|
+
def _get_reference_distance_m(self) -> float:
|
|
168
|
+
"""Get reference distance in meters from config."""
|
|
169
|
+
if 'calibration' in self.config:
|
|
170
|
+
ref_cm = self.config['calibration'].get('reference_distance_cm', 50.0)
|
|
171
|
+
return ref_cm / 100.0
|
|
172
|
+
return 0.50
|
|
173
|
+
|
|
174
|
+
def _setup_mediapipe(self):
|
|
175
|
+
"""Initialize MediaPipe hands solution."""
|
|
176
|
+
self.mp_hands = mp.solutions.hands
|
|
177
|
+
self.mp_drawing = mp.solutions.drawing_utils
|
|
178
|
+
self.mp_drawing_styles = mp.solutions.drawing_styles
|
|
179
|
+
|
|
180
|
+
# Initialize hands detector
|
|
181
|
+
mp_config = self.config['hand_tracking']['mediapipe']
|
|
182
|
+
self.hands = self.mp_hands.Hands(
|
|
183
|
+
static_image_mode=False,
|
|
184
|
+
max_num_hands=mp_config['max_num_hands'],
|
|
185
|
+
min_detection_confidence=mp_config['min_detection_confidence'],
|
|
186
|
+
min_tracking_confidence=mp_config['min_tracking_confidence'],
|
|
187
|
+
model_complexity=mp_config['model_complexity']
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def _setup_camera(self):
|
|
191
|
+
"""Initialize camera capture."""
|
|
192
|
+
camera_config = self.config['hand_tracking']['camera']
|
|
193
|
+
self.cap = cv2.VideoCapture(camera_config['device_id'])
|
|
194
|
+
|
|
195
|
+
# Set camera properties
|
|
196
|
+
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_config['width'])
|
|
197
|
+
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_config['height'])
|
|
198
|
+
self.cap.set(cv2.CAP_PROP_FPS, camera_config['fps'])
|
|
199
|
+
|
|
200
|
+
if not self.cap.isOpened():
|
|
201
|
+
raise RuntimeError(f"Failed to open camera {camera_config['device_id']}")
|
|
202
|
+
|
|
203
|
+
def start_tracking(self):
|
|
204
|
+
"""Start hand tracking."""
|
|
205
|
+
self.is_tracking = True
|
|
206
|
+
self.frame_count = 0
|
|
207
|
+
print("Hand tracking started. Press 'q' to quit.")
|
|
208
|
+
|
|
209
|
+
def stop_tracking(self):
|
|
210
|
+
"""Stop hand tracking."""
|
|
211
|
+
self.is_tracking = False
|
|
212
|
+
print("Hand tracking stopped.")
|
|
213
|
+
|
|
214
|
+
def get_frame(self) -> Optional[np.ndarray]:
|
|
215
|
+
"""Capture a frame from the camera."""
|
|
216
|
+
if not self.cap.isOpened():
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
ret, frame = self.cap.read()
|
|
220
|
+
if not ret:
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
# Flip frame horizontally for mirror effect
|
|
224
|
+
frame = cv2.flip(frame, 1)
|
|
225
|
+
return frame
|
|
226
|
+
|
|
227
|
+
def detect_hands(self, frame: np.ndarray) -> List[HandLandmarks]:
|
|
228
|
+
"""Detect hands in the given frame."""
|
|
229
|
+
if frame is None:
|
|
230
|
+
return []
|
|
231
|
+
|
|
232
|
+
# Convert BGR to RGB (MediaPipe requires RGB)
|
|
233
|
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
234
|
+
|
|
235
|
+
# Improve performance by marking the image as not writeable
|
|
236
|
+
rgb_frame.flags.writeable = False
|
|
237
|
+
|
|
238
|
+
# Process frame with MediaPipe
|
|
239
|
+
results = self.hands.process(rgb_frame)
|
|
240
|
+
|
|
241
|
+
# Mark image as writeable again
|
|
242
|
+
rgb_frame.flags.writeable = True
|
|
243
|
+
|
|
244
|
+
detected_hands = []
|
|
245
|
+
|
|
246
|
+
if results.multi_hand_landmarks and results.multi_handedness:
|
|
247
|
+
for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
|
|
248
|
+
# Extract landmark coordinates
|
|
249
|
+
landmarks = self._extract_landmarks(hand_landmarks, frame.shape)
|
|
250
|
+
|
|
251
|
+
# Get handedness
|
|
252
|
+
hand_label = handedness.classification[0].label
|
|
253
|
+
confidence = handedness.classification[0].score
|
|
254
|
+
|
|
255
|
+
# Calculate bounding box
|
|
256
|
+
bbox = self._calculate_bbox(landmarks, frame.shape)
|
|
257
|
+
|
|
258
|
+
# Apply smoothing if we have previous landmarks
|
|
259
|
+
if hand_label in self.last_landmarks:
|
|
260
|
+
landmarks = self._apply_smoothing(landmarks, self.last_landmarks[hand_label])
|
|
261
|
+
|
|
262
|
+
# Store current landmarks for next frame
|
|
263
|
+
self.last_landmarks[hand_label] = landmarks.copy()
|
|
264
|
+
|
|
265
|
+
# Create HandLandmarks object
|
|
266
|
+
hand_data = HandLandmarks(
|
|
267
|
+
landmarks=landmarks,
|
|
268
|
+
handedness=hand_label,
|
|
269
|
+
confidence=confidence,
|
|
270
|
+
bbox=bbox
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
detected_hands.append(hand_data)
|
|
274
|
+
|
|
275
|
+
return detected_hands
|
|
276
|
+
|
|
277
|
+
def _extract_landmarks(self, hand_landmarks, frame_shape: Tuple[int, int, int]) -> np.ndarray:
|
|
278
|
+
"""Extract landmark coordinates from MediaPipe results."""
|
|
279
|
+
height, width = frame_shape[:2]
|
|
280
|
+
landmarks = np.zeros((21, 3))
|
|
281
|
+
|
|
282
|
+
for i, landmark in enumerate(hand_landmarks.landmark):
|
|
283
|
+
landmarks[i] = [
|
|
284
|
+
landmark.x * width, # x in pixel space
|
|
285
|
+
landmark.y * height, # y in pixel space
|
|
286
|
+
landmark.z * width # z in pixel space (same scale as x per MediaPipe docs)
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
return landmarks
|
|
290
|
+
|
|
291
|
+
def _calculate_bbox(self, landmarks: np.ndarray, frame_shape: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
|
|
292
|
+
"""Calculate bounding box around hand landmarks."""
|
|
293
|
+
x_coords = landmarks[:, 0]
|
|
294
|
+
y_coords = landmarks[:, 1]
|
|
295
|
+
|
|
296
|
+
x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords))
|
|
297
|
+
y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords))
|
|
298
|
+
|
|
299
|
+
# Add padding
|
|
300
|
+
padding = 20
|
|
301
|
+
x_min = max(0, x_min - padding)
|
|
302
|
+
y_min = max(0, y_min - padding)
|
|
303
|
+
x_max = min(frame_shape[1], x_max + padding)
|
|
304
|
+
y_max = min(frame_shape[0], y_max + padding)
|
|
305
|
+
|
|
306
|
+
return (x_min, y_min, x_max - x_min, y_max - y_min)
|
|
307
|
+
|
|
308
|
+
def _apply_smoothing(self, current_landmarks: np.ndarray, previous_landmarks: np.ndarray) -> np.ndarray:
|
|
309
|
+
"""Apply temporal smoothing to landmarks."""
|
|
310
|
+
alpha = self.config['hand_tracking']['landmarks']['smoothing_factor']
|
|
311
|
+
return alpha * previous_landmarks + (1 - alpha) * current_landmarks
|
|
312
|
+
|
|
313
|
+
def draw_landmarks(self, frame: np.ndarray, hands: List[HandLandmarks]) -> np.ndarray:
|
|
314
|
+
"""Draw hand landmarks and connections on the frame."""
|
|
315
|
+
display_config = self.config['hand_tracking']['display']
|
|
316
|
+
|
|
317
|
+
for hand in hands:
|
|
318
|
+
if display_config['show_landmarks']:
|
|
319
|
+
# Convert landmarks back to MediaPipe format for drawing
|
|
320
|
+
mp_landmarks = self._landmarks_to_mediapipe(hand.landmarks, frame.shape)
|
|
321
|
+
|
|
322
|
+
# Draw landmarks and connections
|
|
323
|
+
self.mp_drawing.draw_landmarks(
|
|
324
|
+
frame,
|
|
325
|
+
mp_landmarks,
|
|
326
|
+
self.mp_hands.HAND_CONNECTIONS,
|
|
327
|
+
self.mp_drawing_styles.get_default_hand_landmarks_style(),
|
|
328
|
+
self.mp_drawing_styles.get_default_hand_connections_style()
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
if display_config['show_bounding_box']:
|
|
332
|
+
# Draw bounding box
|
|
333
|
+
x, y, w, h = hand.bbox
|
|
334
|
+
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
|
|
335
|
+
|
|
336
|
+
# Draw hand label
|
|
337
|
+
label = f"{hand.handedness} ({hand.confidence:.2f})"
|
|
338
|
+
cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
|
339
|
+
|
|
340
|
+
return frame
|
|
341
|
+
|
|
342
|
+
def _landmarks_to_mediapipe(self, landmarks: np.ndarray, frame_shape: Tuple[int, int, int]):
|
|
343
|
+
"""Convert landmarks back to MediaPipe format for drawing."""
|
|
344
|
+
from mediapipe.framework.formats import landmark_pb2
|
|
345
|
+
|
|
346
|
+
height, width = frame_shape[:2]
|
|
347
|
+
|
|
348
|
+
# Create proper MediaPipe NormalizedLandmarkList
|
|
349
|
+
landmark_list = landmark_pb2.NormalizedLandmarkList()
|
|
350
|
+
|
|
351
|
+
for i in range(21):
|
|
352
|
+
landmark = landmark_list.landmark.add()
|
|
353
|
+
landmark.x = landmarks[i, 0] / width
|
|
354
|
+
landmark.y = landmarks[i, 1] / height
|
|
355
|
+
landmark.z = landmarks[i, 2]
|
|
356
|
+
|
|
357
|
+
return landmark_list
|
|
358
|
+
|
|
359
|
+
def run_interactive(self, mode='combined'):
|
|
360
|
+
"""Run interactive hand tracking with visualization.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
mode: '2d' for OpenCV window, '3d' for matplotlib 3D,
|
|
364
|
+
'combined' for both, 'rerun' for Rerun SDK visualization
|
|
365
|
+
"""
|
|
366
|
+
if mode == '3d':
|
|
367
|
+
return self.run_3d_visualization()
|
|
368
|
+
elif mode == 'combined':
|
|
369
|
+
return self.run_combined_visualization()
|
|
370
|
+
elif mode == 'rerun':
|
|
371
|
+
return self.run_rerun_visualization()
|
|
372
|
+
|
|
373
|
+
# Default 2D mode
|
|
374
|
+
self.start_tracking()
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
while self.is_tracking:
|
|
378
|
+
frame = self.get_frame()
|
|
379
|
+
if frame is None:
|
|
380
|
+
break
|
|
381
|
+
|
|
382
|
+
# Detect hands
|
|
383
|
+
hands = self.detect_hands(frame)
|
|
384
|
+
|
|
385
|
+
# Draw landmarks
|
|
386
|
+
frame = self.draw_landmarks(frame, hands)
|
|
387
|
+
|
|
388
|
+
# Add frame info
|
|
389
|
+
cv2.putText(frame, f"Frame: {self.frame_count}", (10, 30),
|
|
390
|
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
|
391
|
+
cv2.putText(frame, f"Hands: {len(hands)}", (10, 70),
|
|
392
|
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
|
393
|
+
|
|
394
|
+
# Display frame
|
|
395
|
+
window_name = self.config['hand_tracking']['display']['window_name']
|
|
396
|
+
cv2.imshow(window_name, frame)
|
|
397
|
+
|
|
398
|
+
# Check for quit
|
|
399
|
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
400
|
+
break
|
|
401
|
+
|
|
402
|
+
self.frame_count += 1
|
|
403
|
+
|
|
404
|
+
except KeyboardInterrupt:
|
|
405
|
+
print("\nInterrupted by user")
|
|
406
|
+
finally:
|
|
407
|
+
self.cleanup()
|
|
408
|
+
|
|
409
|
+
def run_3d_visualization(self):
|
|
410
|
+
"""Run 3D visualization using matplotlib."""
|
|
411
|
+
try:
|
|
412
|
+
import matplotlib.pyplot as plt
|
|
413
|
+
from matplotlib.animation import FuncAnimation
|
|
414
|
+
from mpl_toolkits.mplot3d import Axes3D
|
|
415
|
+
import matplotlib
|
|
416
|
+
matplotlib.use('TkAgg')
|
|
417
|
+
except ImportError:
|
|
418
|
+
print("❌ matplotlib is required for 3D visualization")
|
|
419
|
+
print("Install with: pip install matplotlib")
|
|
420
|
+
return
|
|
421
|
+
|
|
422
|
+
from .visualizer_3d import Hand3DVisualizer
|
|
423
|
+
visualizer = Hand3DVisualizer(self)
|
|
424
|
+
visualizer.run()
|
|
425
|
+
|
|
426
|
+
def run_combined_visualization(self):
|
|
427
|
+
"""Run combined 2D + 3D visualization."""
|
|
428
|
+
try:
|
|
429
|
+
import matplotlib.pyplot as plt
|
|
430
|
+
from matplotlib.animation import FuncAnimation
|
|
431
|
+
from mpl_toolkits.mplot3d import Axes3D
|
|
432
|
+
import matplotlib
|
|
433
|
+
matplotlib.use('TkAgg')
|
|
434
|
+
except ImportError:
|
|
435
|
+
print("❌ matplotlib is required for combined visualization")
|
|
436
|
+
print("Install with: pip install matplotlib")
|
|
437
|
+
return
|
|
438
|
+
|
|
439
|
+
from .visualizer_combined import CombinedVisualizer
|
|
440
|
+
visualizer = CombinedVisualizer(self)
|
|
441
|
+
visualizer.run()
|
|
442
|
+
|
|
443
|
+
def run_rerun_visualization(self):
|
|
444
|
+
"""Run Rerun-based visualization.
|
|
445
|
+
|
|
446
|
+
Provides improved real-time performance and better 3D interaction
|
|
447
|
+
compared to matplotlib-based visualization.
|
|
448
|
+
"""
|
|
449
|
+
try:
|
|
450
|
+
import rerun as rr
|
|
451
|
+
except ImportError:
|
|
452
|
+
print("❌ Rerun is required for this visualization mode")
|
|
453
|
+
print("Install with: pip install rerun-sdk")
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
from .visualizer_rerun import RerunVisualizer
|
|
457
|
+
visualizer = RerunVisualizer(self)
|
|
458
|
+
visualizer.run()
|
|
459
|
+
|
|
460
|
+
def cleanup(self):
|
|
461
|
+
"""Clean up resources."""
|
|
462
|
+
self.stop_tracking()
|
|
463
|
+
if hasattr(self, 'cap'):
|
|
464
|
+
self.cap.release()
|
|
465
|
+
cv2.destroyAllWindows()
|
|
466
|
+
|
|
467
|
+
def __del__(self):
|
|
468
|
+
"""Destructor to ensure cleanup."""
|
|
469
|
+
self.cleanup()
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
if __name__ == "__main__":
|
|
473
|
+
# Test the hand detector
|
|
474
|
+
detector = HandDetector()
|
|
475
|
+
detector.run_interactive()
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""3D visualization module for hand tracking."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
from matplotlib.animation import FuncAnimation
|
|
6
|
+
from mpl_toolkits.mplot3d import Axes3D
|
|
7
|
+
import matplotlib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Hand3DVisualizer:
|
|
11
|
+
"""3D visualization of hand landmarks using matplotlib."""
|
|
12
|
+
|
|
13
|
+
# MediaPipe hand connections
|
|
14
|
+
HAND_CONNECTIONS = [
|
|
15
|
+
(0, 1), (1, 2), (2, 3), (3, 4), # Thumb
|
|
16
|
+
(0, 5), (5, 6), (6, 7), (7, 8), # Index
|
|
17
|
+
(0, 9), (9, 10), (10, 11), (11, 12), # Middle
|
|
18
|
+
(0, 13), (13, 14), (14, 15), (15, 16), # Ring
|
|
19
|
+
(0, 17), (17, 18), (18, 19), (19, 20), # Pinky
|
|
20
|
+
(5, 9), (9, 13), (13, 17) # Palm
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
FINGER_COLORS = {
|
|
24
|
+
'thumb': '#FF6B6B',
|
|
25
|
+
'index': '#4ECDC4',
|
|
26
|
+
'middle': '#45B7D1',
|
|
27
|
+
'ring': '#96CEB4',
|
|
28
|
+
'pinky': '#FFEAA7',
|
|
29
|
+
'palm': '#DFE6E9'
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def __init__(self, detector):
|
|
33
|
+
"""Initialize with hand detector."""
|
|
34
|
+
self.detector = detector
|
|
35
|
+
self.is_running = False
|
|
36
|
+
|
|
37
|
+
# Create figure
|
|
38
|
+
self.fig = plt.figure(figsize=(15, 6))
|
|
39
|
+
|
|
40
|
+
# 3D plots
|
|
41
|
+
self.ax_left = self.fig.add_subplot(121, projection='3d')
|
|
42
|
+
self.ax_left.set_title('Left Hand', fontsize=14, fontweight='bold')
|
|
43
|
+
|
|
44
|
+
self.ax_right = self.fig.add_subplot(122, projection='3d')
|
|
45
|
+
self.ax_right.set_title('Right Hand', fontsize=14, fontweight='bold')
|
|
46
|
+
|
|
47
|
+
# Setup axes
|
|
48
|
+
for ax in [self.ax_left, self.ax_right]:
|
|
49
|
+
self._setup_axis(ax)
|
|
50
|
+
|
|
51
|
+
# Initialize plots
|
|
52
|
+
self.left_hand_plots = self._init_hand_plots(self.ax_left)
|
|
53
|
+
self.right_hand_plots = self._init_hand_plots(self.ax_right)
|
|
54
|
+
|
|
55
|
+
plt.tight_layout()
|
|
56
|
+
|
|
57
|
+
def _setup_axis(self, ax):
|
|
58
|
+
"""Setup 3D axis with metric units (meters).
|
|
59
|
+
|
|
60
|
+
Axis limits accommodate hand movement across the frame:
|
|
61
|
+
- X/Y: ±0.3m (hand can be ~300px from center, 300 * ~0.0005 scale ≈ 0.15m)
|
|
62
|
+
- Z: ±0.15m (depth range for hand distance variations)
|
|
63
|
+
"""
|
|
64
|
+
ax.set_xlim([-0.30, 0.30])
|
|
65
|
+
ax.set_ylim([-0.30, 0.30])
|
|
66
|
+
ax.set_zlim([-0.15, 0.15])
|
|
67
|
+
ax.set_xlabel('X (m)', fontsize=10)
|
|
68
|
+
ax.set_ylabel('Y (m)', fontsize=10)
|
|
69
|
+
ax.set_zlabel('Z (m)', fontsize=10)
|
|
70
|
+
ax.view_init(elev=15, azim=45)
|
|
71
|
+
ax.grid(True, alpha=0.3)
|
|
72
|
+
|
|
73
|
+
def _init_hand_plots(self, ax):
|
|
74
|
+
"""Initialize plot elements."""
|
|
75
|
+
plots = {}
|
|
76
|
+
|
|
77
|
+
# Landmarks
|
|
78
|
+
plots['landmarks'] = ax.scatter([], [], [], c='red', s=50, alpha=0.8)
|
|
79
|
+
|
|
80
|
+
# Connections
|
|
81
|
+
plots['connections'] = []
|
|
82
|
+
for _ in self.HAND_CONNECTIONS:
|
|
83
|
+
line, = ax.plot([], [], [], 'b-', linewidth=2, alpha=0.6)
|
|
84
|
+
plots['connections'].append(line)
|
|
85
|
+
|
|
86
|
+
# Coordinate axes for key landmarks
|
|
87
|
+
plots['axes'] = []
|
|
88
|
+
key_landmarks = [0, 4, 8, 12, 16, 20]
|
|
89
|
+
for _ in key_landmarks:
|
|
90
|
+
x_axis, = ax.plot([], [], [], 'r-', linewidth=1, alpha=0.7)
|
|
91
|
+
y_axis, = ax.plot([], [], [], 'g-', linewidth=1, alpha=0.7)
|
|
92
|
+
z_axis, = ax.plot([], [], [], 'b-', linewidth=1, alpha=0.7)
|
|
93
|
+
plots['axes'].append([x_axis, y_axis, z_axis])
|
|
94
|
+
|
|
95
|
+
# Text
|
|
96
|
+
plots['text'] = ax.text2D(0.05, 0.95, '', transform=ax.transAxes,
|
|
97
|
+
fontsize=10, verticalalignment='top')
|
|
98
|
+
|
|
99
|
+
return plots
|
|
100
|
+
|
|
101
|
+
def _get_connection_color(self, conn_idx):
|
|
102
|
+
"""Get color for connection."""
|
|
103
|
+
start, end = self.HAND_CONNECTIONS[conn_idx]
|
|
104
|
+
|
|
105
|
+
if max(start, end) <= 4:
|
|
106
|
+
return self.FINGER_COLORS['thumb']
|
|
107
|
+
elif max(start, end) <= 8:
|
|
108
|
+
return self.FINGER_COLORS['index']
|
|
109
|
+
elif max(start, end) <= 12:
|
|
110
|
+
return self.FINGER_COLORS['middle']
|
|
111
|
+
elif max(start, end) <= 16:
|
|
112
|
+
return self.FINGER_COLORS['ring']
|
|
113
|
+
elif max(start, end) <= 20:
|
|
114
|
+
return self.FINGER_COLORS['pinky']
|
|
115
|
+
else:
|
|
116
|
+
return self.FINGER_COLORS['palm']
|
|
117
|
+
|
|
118
|
+
def _update_hand_plot(self, ax, plots, hand_data):
|
|
119
|
+
"""Update 3D plot for one hand."""
|
|
120
|
+
if hand_data is None:
|
|
121
|
+
plots['landmarks']._offsets3d = ([], [], [])
|
|
122
|
+
for line in plots['connections']:
|
|
123
|
+
line.set_data([], [])
|
|
124
|
+
line.set_3d_properties([])
|
|
125
|
+
for axis_lines in plots['axes']:
|
|
126
|
+
for line in axis_lines:
|
|
127
|
+
line.set_data([], [])
|
|
128
|
+
line.set_3d_properties([])
|
|
129
|
+
plots['text'].set_text('No hand detected')
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
# Get normalized landmarks in meters (palm width ≈ 0.08m)
|
|
133
|
+
# Pass calibration params for depth estimation from palm size
|
|
134
|
+
normalized = hand_data.get_normalized_landmarks(
|
|
135
|
+
palm_size_meters=0.08,
|
|
136
|
+
reference_distance_m=self.detector.reference_distance_m,
|
|
137
|
+
reference_palm_width_px=self.detector.calibrated_palm_width_px
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Update landmarks
|
|
141
|
+
plots['landmarks']._offsets3d = (normalized[:, 0], normalized[:, 1], normalized[:, 2])
|
|
142
|
+
|
|
143
|
+
# Update connections
|
|
144
|
+
for idx, (start, end) in enumerate(self.HAND_CONNECTIONS):
|
|
145
|
+
x_data = [normalized[start, 0], normalized[end, 0]]
|
|
146
|
+
y_data = [normalized[start, 1], normalized[end, 1]]
|
|
147
|
+
z_data = [normalized[start, 2], normalized[end, 2]]
|
|
148
|
+
plots['connections'][idx].set_data(x_data, y_data)
|
|
149
|
+
plots['connections'][idx].set_3d_properties(z_data)
|
|
150
|
+
plots['connections'][idx].set_color(self._get_connection_color(idx))
|
|
151
|
+
|
|
152
|
+
# Update coordinate axes
|
|
153
|
+
key_landmarks = [0, 4, 8, 12, 16, 20]
|
|
154
|
+
axis_length = 0.01 # 1cm axis lines in meters
|
|
155
|
+
|
|
156
|
+
for i, landmark_idx in enumerate(key_landmarks):
|
|
157
|
+
if i < len(plots['axes']):
|
|
158
|
+
point = normalized[landmark_idx]
|
|
159
|
+
|
|
160
|
+
# X axis (red)
|
|
161
|
+
plots['axes'][i][0].set_data([point[0], point[0] + axis_length], [point[1], point[1]])
|
|
162
|
+
plots['axes'][i][0].set_3d_properties([point[2], point[2]])
|
|
163
|
+
|
|
164
|
+
# Y axis (green)
|
|
165
|
+
plots['axes'][i][1].set_data([point[0], point[0]], [point[1], point[1] + axis_length])
|
|
166
|
+
plots['axes'][i][1].set_3d_properties([point[2], point[2]])
|
|
167
|
+
|
|
168
|
+
# Z axis (blue)
|
|
169
|
+
plots['axes'][i][2].set_data([point[0], point[0]], [point[1], point[1]])
|
|
170
|
+
plots['axes'][i][2].set_3d_properties([point[2], point[2] + axis_length])
|
|
171
|
+
|
|
172
|
+
# Update text (show wrist position to verify hand tracking in 3D space)
|
|
173
|
+
wrist = normalized[0]
|
|
174
|
+
plots['text'].set_text(f"{hand_data.handedness}\nConf: {hand_data.confidence:.2f}\nWrist: ({wrist[0]*100:.1f}, {wrist[1]*100:.1f}, {wrist[2]*100:.1f})cm")
|
|
175
|
+
|
|
176
|
+
def update(self, frame_num):
|
|
177
|
+
"""Animation update."""
|
|
178
|
+
frame = self.detector.get_frame()
|
|
179
|
+
if frame is None:
|
|
180
|
+
return []
|
|
181
|
+
|
|
182
|
+
hands = self.detector.detect_hands(frame)
|
|
183
|
+
|
|
184
|
+
left_hand = next((h for h in hands if h.handedness == 'Left'), None)
|
|
185
|
+
right_hand = next((h for h in hands if h.handedness == 'Right'), None)
|
|
186
|
+
|
|
187
|
+
self._update_hand_plot(self.ax_left, self.left_hand_plots, left_hand)
|
|
188
|
+
self._update_hand_plot(self.ax_right, self.right_hand_plots, right_hand)
|
|
189
|
+
|
|
190
|
+
self.fig.suptitle(f'3D Hand Tracking - Frame: {self.detector.frame_count}',
|
|
191
|
+
fontsize=16, fontweight='bold')
|
|
192
|
+
|
|
193
|
+
self.detector.frame_count += 1
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
def run(self):
|
|
197
|
+
"""Run visualization."""
|
|
198
|
+
print("Starting 3D visualization...")
|
|
199
|
+
print("Controls: Rotate (drag), Zoom (scroll), Close window to exit")
|
|
200
|
+
|
|
201
|
+
self.is_running = True
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
anim = FuncAnimation(self.fig, self.update, interval=33, blit=False)
|
|
205
|
+
plt.show()
|
|
206
|
+
except KeyboardInterrupt:
|
|
207
|
+
print("\nVisualization stopped")
|
|
208
|
+
finally:
|
|
209
|
+
self.cleanup()
|
|
210
|
+
|
|
211
|
+
def cleanup(self):
|
|
212
|
+
"""Cleanup."""
|
|
213
|
+
self.is_running = False
|
|
214
|
+
plt.close('all')
|