hand-tracking-teleop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hand-tracking-teleop might be problematic. Click here for more details.

@@ -0,0 +1,221 @@
1
+ """Combined 2D + 3D visualization module for hand tracking."""
2
+
3
+ import numpy as np
4
+ import cv2
5
+ import matplotlib.pyplot as plt
6
+ from matplotlib.animation import FuncAnimation
7
+ from mpl_toolkits.mplot3d import Axes3D
8
+ from matplotlib.gridspec import GridSpec
9
+ import matplotlib
10
+
11
+
12
+ class CombinedVisualizer:
13
+ """Combined 2D camera + 3D hand visualization."""
14
+
15
+ HAND_CONNECTIONS = [
16
+ (0, 1), (1, 2), (2, 3), (3, 4),
17
+ (0, 5), (5, 6), (6, 7), (7, 8),
18
+ (0, 9), (9, 10), (10, 11), (11, 12),
19
+ (0, 13), (13, 14), (14, 15), (15, 16),
20
+ (0, 17), (17, 18), (18, 19), (19, 20),
21
+ (5, 9), (9, 13), (13, 17)
22
+ ]
23
+
24
+ def __init__(self, detector):
25
+ """Initialize with hand detector."""
26
+ self.detector = detector
27
+ self.is_running = False
28
+
29
+ # Create figure
30
+ self.fig = plt.figure(figsize=(18, 7))
31
+ gs = GridSpec(2, 3, figure=self.fig, width_ratios=[2, 1, 1], height_ratios=[1, 1])
32
+
33
+ # Camera view
34
+ self.ax_camera = self.fig.add_subplot(gs[:, 0])
35
+ self.ax_camera.set_title('Camera View', fontsize=14, fontweight='bold')
36
+ self.ax_camera.axis('off')
37
+
38
+ # 3D plots
39
+ self.ax_left = self.fig.add_subplot(gs[0, 1], projection='3d')
40
+ self.ax_left.set_title('Left Hand', fontsize=12, fontweight='bold')
41
+
42
+ self.ax_right = self.fig.add_subplot(gs[0, 2], projection='3d')
43
+ self.ax_right.set_title('Right Hand', fontsize=12, fontweight='bold')
44
+
45
+ # Info panel
46
+ self.ax_info = self.fig.add_subplot(gs[1, 1:])
47
+ self.ax_info.axis('off')
48
+
49
+ # Setup 3D axes
50
+ for ax in [self.ax_left, self.ax_right]:
51
+ self._setup_3d_axis(ax)
52
+
53
+ # Initialize plots
54
+ self.camera_img = None
55
+ self.left_hand_plots = self._init_hand_plots(self.ax_left)
56
+ self.right_hand_plots = self._init_hand_plots(self.ax_right)
57
+ self.info_text = self.ax_info.text(0.1, 0.5, '', fontsize=10, family='monospace')
58
+
59
+ plt.tight_layout()
60
+
61
+ def _setup_3d_axis(self, ax):
62
+ """Setup 3D axis with metric units (meters).
63
+
64
+ Axis limits accommodate hand movement across the frame:
65
+ - X/Y: ±0.3m (hand can be ~300px from center, 300 * ~0.0005 scale ≈ 0.15m)
66
+ - Z: ±0.15m (depth range for hand distance variations)
67
+ """
68
+ ax.set_xlim([-0.30, 0.30])
69
+ ax.set_ylim([-0.30, 0.30])
70
+ ax.set_zlim([-0.15, 0.15])
71
+ ax.set_xlabel('X (m)', fontsize=8)
72
+ ax.set_ylabel('Y (m)', fontsize=8)
73
+ ax.set_zlabel('Z (m)', fontsize=8)
74
+ ax.view_init(elev=15, azim=45)
75
+ ax.grid(True, alpha=0.3)
76
+
77
+ def _init_hand_plots(self, ax):
78
+ """Initialize plot elements."""
79
+ plots = {}
80
+ plots['landmarks'] = ax.scatter([], [], [], c='red', s=30, alpha=0.8)
81
+ plots['connections'] = []
82
+ for _ in self.HAND_CONNECTIONS:
83
+ line, = ax.plot([], [], [], 'b-', linewidth=1.5, alpha=0.6)
84
+ plots['connections'].append(line)
85
+
86
+ # Coordinate axes
87
+ plots['axes'] = []
88
+ key_landmarks = [0, 4, 8, 12, 16, 20]
89
+ for _ in key_landmarks:
90
+ x_axis, = ax.plot([], [], [], 'r-', linewidth=1, alpha=0.6)
91
+ y_axis, = ax.plot([], [], [], 'g-', linewidth=1, alpha=0.6)
92
+ z_axis, = ax.plot([], [], [], 'b-', linewidth=1, alpha=0.6)
93
+ plots['axes'].append([x_axis, y_axis, z_axis])
94
+
95
+ return plots
96
+
97
+ def _get_connection_color(self, conn_idx):
98
+ """Get color for connection."""
99
+ start, end = self.HAND_CONNECTIONS[conn_idx]
100
+
101
+ if max(start, end) <= 4:
102
+ return '#FF6B6B'
103
+ elif max(start, end) <= 8:
104
+ return '#4ECDC4'
105
+ elif max(start, end) <= 12:
106
+ return '#45B7D1'
107
+ elif max(start, end) <= 16:
108
+ return '#96CEB4'
109
+ elif max(start, end) <= 20:
110
+ return '#FFEAA7'
111
+ else:
112
+ return '#DFE6E9'
113
+
114
+ def _update_camera_view(self, frame, hands):
115
+ """Update 2D camera view."""
116
+ if frame is None:
117
+ return
118
+
119
+ frame_with_landmarks = self.detector.draw_landmarks(frame.copy(), hands)
120
+ frame_rgb = cv2.cvtColor(frame_with_landmarks, cv2.COLOR_BGR2RGB)
121
+
122
+ if self.camera_img is None:
123
+ self.camera_img = self.ax_camera.imshow(frame_rgb)
124
+ else:
125
+ self.camera_img.set_data(frame_rgb)
126
+
127
+ def _update_3d_plot(self, ax, plots, hand_data):
128
+ """Update 3D plot."""
129
+ if hand_data is None:
130
+ plots['landmarks']._offsets3d = ([], [], [])
131
+ for line in plots['connections']:
132
+ line.set_data([], [])
133
+ line.set_3d_properties([])
134
+ for axis_lines in plots['axes']:
135
+ for line in axis_lines:
136
+ line.set_data([], [])
137
+ line.set_3d_properties([])
138
+ return
139
+
140
+ # Get normalized landmarks in meters (palm width ≈ 0.08m)
141
+ # Pass calibration params for depth estimation from palm size
142
+ normalized = hand_data.get_normalized_landmarks(
143
+ palm_size_meters=0.08,
144
+ reference_distance_m=self.detector.reference_distance_m,
145
+ reference_palm_width_px=self.detector.calibrated_palm_width_px
146
+ )
147
+
148
+ # Update landmarks
149
+ plots['landmarks']._offsets3d = (normalized[:, 0], normalized[:, 1], normalized[:, 2])
150
+
151
+ # Update connections
152
+ for idx, (start, end) in enumerate(self.HAND_CONNECTIONS):
153
+ x_data = [normalized[start, 0], normalized[end, 0]]
154
+ y_data = [normalized[start, 1], normalized[end, 1]]
155
+ z_data = [normalized[start, 2], normalized[end, 2]]
156
+ plots['connections'][idx].set_data(x_data, y_data)
157
+ plots['connections'][idx].set_3d_properties(z_data)
158
+ plots['connections'][idx].set_color(self._get_connection_color(idx))
159
+
160
+ # Update axes
161
+ key_landmarks = [0, 4, 8, 12, 16, 20]
162
+ axis_length = 0.01 # 1cm axis lines in meters
163
+
164
+ for i, landmark_idx in enumerate(key_landmarks):
165
+ point = normalized[landmark_idx]
166
+ plots['axes'][i][0].set_data([point[0], point[0] + axis_length], [point[1], point[1]])
167
+ plots['axes'][i][0].set_3d_properties([point[2], point[2]])
168
+ plots['axes'][i][1].set_data([point[0], point[0]], [point[1], point[1] + axis_length])
169
+ plots['axes'][i][1].set_3d_properties([point[2], point[2]])
170
+ plots['axes'][i][2].set_data([point[0], point[0]], [point[1], point[1]])
171
+ plots['axes'][i][2].set_3d_properties([point[2], point[2] + axis_length])
172
+
173
+ def _update_info_display(self, hands):
174
+ """Update info panel."""
175
+ info = [f"Frame: {self.detector.frame_count}", f"Hands: {len(hands)}", ""]
176
+ for i, hand in enumerate(hands):
177
+ info.append(f"Hand {i+1}: {hand.handedness} (Conf: {hand.confidence:.3f})")
178
+ if not hands:
179
+ info.append("No hands detected")
180
+ self.info_text.set_text('\n'.join(info))
181
+
182
+ def update(self, frame_num):
183
+ """Animation update."""
184
+ frame = self.detector.get_frame()
185
+ if frame is None:
186
+ return []
187
+
188
+ hands = self.detector.detect_hands(frame)
189
+
190
+ left_hand = next((h for h in hands if h.handedness == 'Left'), None)
191
+ right_hand = next((h for h in hands if h.handedness == 'Right'), None)
192
+
193
+ self._update_camera_view(frame, hands)
194
+ self._update_3d_plot(self.ax_left, self.left_hand_plots, left_hand)
195
+ self._update_3d_plot(self.ax_right, self.right_hand_plots, right_hand)
196
+ self._update_info_display(hands)
197
+
198
+ self.fig.suptitle('Hand Tracking - 2D + 3D View', fontsize=16, fontweight='bold')
199
+
200
+ self.detector.frame_count += 1
201
+ return []
202
+
203
+ def run(self):
204
+ """Run visualization."""
205
+ print("Starting combined 2D + 3D visualization...")
206
+ print("Controls: Rotate 3D (drag), Zoom (scroll), Close window to exit")
207
+
208
+ self.is_running = True
209
+
210
+ try:
211
+ anim = FuncAnimation(self.fig, self.update, interval=33, blit=False)
212
+ plt.show()
213
+ except KeyboardInterrupt:
214
+ print("\nVisualization stopped")
215
+ finally:
216
+ self.cleanup()
217
+
218
+ def cleanup(self):
219
+ """Cleanup."""
220
+ self.is_running = False
221
+ plt.close('all')
@@ -0,0 +1,255 @@
1
+ """Rerun-based visualization module for hand tracking.
2
+
3
+ This module provides real-time hand visualization using the Rerun SDK,
4
+ offering improved performance and better 3D interaction compared to matplotlib.
5
+ """
6
+
7
+ import numpy as np
8
+ import cv2
9
+ from typing import Optional, List
10
+
11
+ try:
12
+ import rerun as rr
13
+ import rerun.blueprint as rrb
14
+ RERUN_AVAILABLE = True
15
+ except ImportError:
16
+ RERUN_AVAILABLE = False
17
+
18
+
19
+ class RerunVisualizer:
20
+ """Real-time hand visualization using Rerun SDK."""
21
+
22
+ # MediaPipe hand connections grouped by finger
23
+ FINGER_CONNECTIONS = {
24
+ 'thumb': [(0, 1), (1, 2), (2, 3), (3, 4)],
25
+ 'index': [(0, 5), (5, 6), (6, 7), (7, 8)],
26
+ 'middle': [(0, 9), (9, 10), (10, 11), (11, 12)],
27
+ 'ring': [(0, 13), (13, 14), (14, 15), (15, 16)],
28
+ 'pinky': [(0, 17), (17, 18), (18, 19), (19, 20)],
29
+ 'palm': [(5, 9), (9, 13), (13, 17)],
30
+ }
31
+
32
+ # Colors for each finger (RGB format)
33
+ FINGER_COLORS = {
34
+ 'thumb': [255, 107, 107], # Red
35
+ 'index': [78, 205, 196], # Teal
36
+ 'middle': [69, 183, 209], # Blue
37
+ 'ring': [150, 206, 180], # Green
38
+ 'pinky': [255, 234, 167], # Yellow
39
+ 'palm': [223, 230, 233], # Gray
40
+ }
41
+
42
+ # Landmark indices for each finger
43
+ FINGER_LANDMARKS = {
44
+ 'thumb': [1, 2, 3, 4],
45
+ 'index': [5, 6, 7, 8],
46
+ 'middle': [9, 10, 11, 12],
47
+ 'ring': [13, 14, 15, 16],
48
+ 'pinky': [17, 18, 19, 20],
49
+ 'palm': [0], # Wrist
50
+ }
51
+
52
+ def __init__(self, detector):
53
+ """Initialize with hand detector.
54
+
55
+ Args:
56
+ detector: HandDetector instance for accessing camera and detection.
57
+ """
58
+ if not RERUN_AVAILABLE:
59
+ raise ImportError(
60
+ "Rerun SDK is required for this visualization mode.\n"
61
+ "Install with: pip install rerun-sdk"
62
+ )
63
+
64
+ self.detector = detector
65
+ self.is_running = False
66
+ self._setup_rerun()
67
+
68
+ def _setup_rerun(self):
69
+ """Initialize Rerun recording and send blueprint."""
70
+ # Initialize Rerun with application name
71
+ rr.init("hand_tracking", spawn=True)
72
+
73
+ # Create blueprint for layout
74
+ blueprint = rrb.Horizontal(
75
+ rrb.Spatial2DView(
76
+ name="Camera",
77
+ origin="world/camera",
78
+ ),
79
+ rrb.Spatial3DView(
80
+ name="3D Hands",
81
+ origin="world/hands_3d",
82
+ ),
83
+ column_shares=[1, 1],
84
+ )
85
+
86
+ # Send the blueprint
87
+ rr.send_blueprint(blueprint)
88
+
89
+ # Log initial coordinate system info
90
+ rr.log("world/hands_3d", rr.ViewCoordinates.RIGHT_HAND_Y_DOWN, static=True)
91
+
92
+ def _get_landmark_color(self, landmark_idx: int) -> List[int]:
93
+ """Get color for a specific landmark index.
94
+
95
+ Args:
96
+ landmark_idx: Index of the landmark (0-20).
97
+
98
+ Returns:
99
+ RGB color as list of 3 integers.
100
+ """
101
+ for finger, indices in self.FINGER_LANDMARKS.items():
102
+ if landmark_idx in indices:
103
+ return self.FINGER_COLORS[finger]
104
+ return self.FINGER_COLORS['palm']
105
+
106
+ def _log_hand_3d(self, hand_data, handedness: str):
107
+ """Log 3D hand landmarks and connections to Rerun.
108
+
109
+ Args:
110
+ hand_data: HandLandmarks object with detection data.
111
+ handedness: 'left' or 'right'.
112
+ """
113
+ if hand_data is None:
114
+ # Clear the hand data if no detection
115
+ rr.log(f"world/hands_3d/{handedness}/landmarks", rr.Clear(recursive=False))
116
+ for finger in self.FINGER_CONNECTIONS:
117
+ rr.log(f"world/hands_3d/{handedness}/{finger}", rr.Clear(recursive=False))
118
+ return
119
+
120
+ # Get normalized landmarks in meters (palm width ≈ 0.08m)
121
+ # Pass calibration params for depth estimation from palm size
122
+ normalized = hand_data.get_normalized_landmarks(
123
+ palm_size_meters=0.08,
124
+ reference_distance_m=self.detector.reference_distance_m,
125
+ reference_palm_width_px=self.detector.calibrated_palm_width_px
126
+ )
127
+
128
+ # Prepare landmark data with colors
129
+ # Radii in meters (coordinates are now metric)
130
+ positions = normalized.tolist()
131
+ colors = [self._get_landmark_color(i) for i in range(21)]
132
+ radii = [0.008 if i == 0 else 0.005 for i in range(21)] # ~8mm wrist, ~5mm joints
133
+
134
+ # Log landmarks as 3D points
135
+ rr.log(
136
+ f"world/hands_3d/{handedness}/landmarks",
137
+ rr.Points3D(
138
+ positions=positions,
139
+ colors=colors,
140
+ radii=radii,
141
+ )
142
+ )
143
+
144
+ # Log connections as line strips for each finger
145
+ for finger, connections in self.FINGER_CONNECTIONS.items():
146
+ strips = []
147
+ for start_idx, end_idx in connections:
148
+ strips.append([
149
+ normalized[start_idx].tolist(),
150
+ normalized[end_idx].tolist()
151
+ ])
152
+
153
+ rr.log(
154
+ f"world/hands_3d/{handedness}/{finger}",
155
+ rr.LineStrips3D(
156
+ strips=strips,
157
+ colors=[self.FINGER_COLORS[finger]],
158
+ radii=[0.002], # ~2mm line thickness
159
+ )
160
+ )
161
+
162
+ # Log metadata as text (positions now in meters)
163
+ wrist_pos = normalized[0]
164
+ rr.log(
165
+ f"world/hands_3d/{handedness}/info",
166
+ rr.TextLog(
167
+ f"{hand_data.handedness} Hand | "
168
+ f"Conf: {hand_data.confidence:.2f} | "
169
+ f"Wrist: ({wrist_pos[0]:.3f}m, {wrist_pos[1]:.3f}m, {wrist_pos[2]:.3f}m)"
170
+ )
171
+ )
172
+
173
+ def _log_camera_frame(self, frame: np.ndarray, hands: List):
174
+ """Log camera frame with optional overlay to Rerun.
175
+
176
+ Args:
177
+ frame: BGR image from camera.
178
+ hands: List of detected HandLandmarks.
179
+ """
180
+ if frame is None:
181
+ return
182
+
183
+ # Draw landmarks on frame
184
+ frame_with_landmarks = self.detector.draw_landmarks(frame.copy(), hands)
185
+
186
+ # Convert BGR to RGB for Rerun
187
+ frame_rgb = cv2.cvtColor(frame_with_landmarks, cv2.COLOR_BGR2RGB)
188
+
189
+ # Log the image
190
+ rr.log("world/camera/image", rr.Image(frame_rgb))
191
+
192
+ # Log frame metadata
193
+ rr.log(
194
+ "world/camera/info",
195
+ rr.TextLog(f"Frame: {self.detector.frame_count} | Hands: {len(hands)}")
196
+ )
197
+
198
+ def update(self) -> bool:
199
+ """Process one frame and log to Rerun.
200
+
201
+ Returns:
202
+ True if frame was processed successfully, False otherwise.
203
+ """
204
+ frame = self.detector.get_frame()
205
+ if frame is None:
206
+ return False
207
+
208
+ # Detect hands
209
+ hands = self.detector.detect_hands(frame)
210
+
211
+ # Log camera frame with overlays
212
+ self._log_camera_frame(frame, hands)
213
+
214
+ # Find left and right hands
215
+ left_hand = next((h for h in hands if h.handedness == 'Left'), None)
216
+ right_hand = next((h for h in hands if h.handedness == 'Right'), None)
217
+
218
+ # Log 3D hand data
219
+ self._log_hand_3d(left_hand, 'left')
220
+ self._log_hand_3d(right_hand, 'right')
221
+
222
+ self.detector.frame_count += 1
223
+ return True
224
+
225
+ def run(self):
226
+ """Run the main visualization loop."""
227
+ print("Starting Rerun visualization...")
228
+ print("The Rerun viewer should open automatically.")
229
+ print("Features:")
230
+ print(" - 2D camera view with landmark overlay")
231
+ print(" - 3D hand visualization with color-coded fingers")
232
+ print(" - Timeline scrubbing and playback controls")
233
+ print("Press Ctrl+C to stop.")
234
+
235
+ self.is_running = True
236
+
237
+ try:
238
+ while self.is_running:
239
+ if not self.update():
240
+ print("Failed to get frame, stopping...")
241
+ break
242
+
243
+ # Check for OpenCV window quit (optional backup)
244
+ if cv2.waitKey(1) & 0xFF == ord('q'):
245
+ break
246
+
247
+ except KeyboardInterrupt:
248
+ print("\nVisualization stopped by user")
249
+ finally:
250
+ self.cleanup()
251
+
252
+ def cleanup(self):
253
+ """Clean up resources."""
254
+ self.is_running = False
255
+ print("Rerun visualization cleanup complete.")