foodforthought-cli 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. ate/__init__.py +6 -0
  2. ate/__main__.py +16 -0
  3. ate/auth/__init__.py +1 -0
  4. ate/auth/device_flow.py +141 -0
  5. ate/auth/token_store.py +96 -0
  6. ate/behaviors/__init__.py +100 -0
  7. ate/behaviors/approach.py +399 -0
  8. ate/behaviors/common.py +686 -0
  9. ate/behaviors/tree.py +454 -0
  10. ate/cli.py +855 -3995
  11. ate/client.py +90 -0
  12. ate/commands/__init__.py +168 -0
  13. ate/commands/auth.py +389 -0
  14. ate/commands/bridge.py +448 -0
  15. ate/commands/data.py +185 -0
  16. ate/commands/deps.py +111 -0
  17. ate/commands/generate.py +384 -0
  18. ate/commands/memory.py +907 -0
  19. ate/commands/parts.py +166 -0
  20. ate/commands/primitive.py +399 -0
  21. ate/commands/protocol.py +288 -0
  22. ate/commands/recording.py +524 -0
  23. ate/commands/repo.py +154 -0
  24. ate/commands/simulation.py +291 -0
  25. ate/commands/skill.py +303 -0
  26. ate/commands/skills.py +487 -0
  27. ate/commands/team.py +147 -0
  28. ate/commands/workflow.py +271 -0
  29. ate/detection/__init__.py +38 -0
  30. ate/detection/base.py +142 -0
  31. ate/detection/color_detector.py +399 -0
  32. ate/detection/trash_detector.py +322 -0
  33. ate/drivers/__init__.py +39 -0
  34. ate/drivers/ble_transport.py +405 -0
  35. ate/drivers/mechdog.py +942 -0
  36. ate/drivers/wifi_camera.py +477 -0
  37. ate/interfaces/__init__.py +187 -0
  38. ate/interfaces/base.py +273 -0
  39. ate/interfaces/body.py +267 -0
  40. ate/interfaces/detection.py +282 -0
  41. ate/interfaces/locomotion.py +422 -0
  42. ate/interfaces/manipulation.py +408 -0
  43. ate/interfaces/navigation.py +389 -0
  44. ate/interfaces/perception.py +362 -0
  45. ate/interfaces/sensors.py +247 -0
  46. ate/interfaces/types.py +371 -0
  47. ate/llm_proxy.py +239 -0
  48. ate/mcp_server.py +387 -0
  49. ate/memory/__init__.py +35 -0
  50. ate/memory/cloud.py +244 -0
  51. ate/memory/context.py +269 -0
  52. ate/memory/embeddings.py +184 -0
  53. ate/memory/export.py +26 -0
  54. ate/memory/merge.py +146 -0
  55. ate/memory/migrate/__init__.py +34 -0
  56. ate/memory/migrate/base.py +89 -0
  57. ate/memory/migrate/pipeline.py +189 -0
  58. ate/memory/migrate/sources/__init__.py +13 -0
  59. ate/memory/migrate/sources/chroma.py +170 -0
  60. ate/memory/migrate/sources/pinecone.py +120 -0
  61. ate/memory/migrate/sources/qdrant.py +110 -0
  62. ate/memory/migrate/sources/weaviate.py +160 -0
  63. ate/memory/reranker.py +353 -0
  64. ate/memory/search.py +26 -0
  65. ate/memory/store.py +548 -0
  66. ate/recording/__init__.py +83 -0
  67. ate/recording/demonstration.py +378 -0
  68. ate/recording/session.py +415 -0
  69. ate/recording/upload.py +304 -0
  70. ate/recording/visual.py +416 -0
  71. ate/recording/wrapper.py +95 -0
  72. ate/robot/__init__.py +221 -0
  73. ate/robot/agentic_servo.py +856 -0
  74. ate/robot/behaviors.py +493 -0
  75. ate/robot/ble_capture.py +1000 -0
  76. ate/robot/ble_enumerate.py +506 -0
  77. ate/robot/calibration.py +668 -0
  78. ate/robot/calibration_state.py +388 -0
  79. ate/robot/commands.py +3735 -0
  80. ate/robot/direction_calibration.py +554 -0
  81. ate/robot/discovery.py +441 -0
  82. ate/robot/introspection.py +330 -0
  83. ate/robot/llm_system_id.py +654 -0
  84. ate/robot/locomotion_calibration.py +508 -0
  85. ate/robot/manager.py +270 -0
  86. ate/robot/marker_generator.py +611 -0
  87. ate/robot/perception.py +502 -0
  88. ate/robot/primitives.py +614 -0
  89. ate/robot/profiles.py +281 -0
  90. ate/robot/registry.py +322 -0
  91. ate/robot/servo_mapper.py +1153 -0
  92. ate/robot/skill_upload.py +675 -0
  93. ate/robot/target_calibration.py +500 -0
  94. ate/robot/teach.py +515 -0
  95. ate/robot/types.py +242 -0
  96. ate/robot/visual_labeler.py +1048 -0
  97. ate/robot/visual_servo_loop.py +494 -0
  98. ate/robot/visual_servoing.py +570 -0
  99. ate/robot/visual_system_id.py +906 -0
  100. ate/transports/__init__.py +121 -0
  101. ate/transports/base.py +394 -0
  102. ate/transports/ble.py +405 -0
  103. ate/transports/hybrid.py +444 -0
  104. ate/transports/serial.py +345 -0
  105. ate/urdf/__init__.py +30 -0
  106. ate/urdf/capture.py +582 -0
  107. ate/urdf/cloud.py +491 -0
  108. ate/urdf/collision.py +271 -0
  109. ate/urdf/commands.py +708 -0
  110. ate/urdf/depth.py +360 -0
  111. ate/urdf/inertial.py +312 -0
  112. ate/urdf/kinematics.py +330 -0
  113. ate/urdf/lifting.py +415 -0
  114. ate/urdf/meshing.py +300 -0
  115. ate/urdf/models/__init__.py +110 -0
  116. ate/urdf/models/depth_anything.py +253 -0
  117. ate/urdf/models/sam2.py +324 -0
  118. ate/urdf/motion_analysis.py +396 -0
  119. ate/urdf/pipeline.py +468 -0
  120. ate/urdf/scale.py +256 -0
  121. ate/urdf/scan_session.py +411 -0
  122. ate/urdf/segmentation.py +299 -0
  123. ate/urdf/synthesis.py +319 -0
  124. ate/urdf/topology.py +336 -0
  125. ate/urdf/validation.py +371 -0
  126. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/METADATA +9 -1
  127. foodforthought_cli-0.3.0.dist-info/RECORD +166 -0
  128. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/WHEEL +1 -1
  129. foodforthought_cli-0.2.7.dist-info/RECORD +0 -44
  130. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/entry_points.txt +0 -0
  131. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,856 @@
1
+ """
2
+ Agentic Visual Servoing with LLM Decision Making.
3
+
4
+ Uses a cheap LLM (Haiku) for high-level decisions while CV handles
5
+ low-level detection. This hybrid approach gives flexibility for
6
+ complex scenarios while keeping costs low.
7
+
8
+ Cost comparison per pickup attempt (~20 iterations):
9
+ - Pure CV: $0 (no API calls)
10
+ - Haiku for each frame: ~$0.001 (20 calls * 1K tokens * $0.25/1M)
11
+ - Opus for each frame: ~$0.30 (20 calls * 1K tokens * $15/1M)
12
+
13
+ Architecture:
14
+ - CV: Ball detection (fast, deterministic)
15
+ - Haiku: Decision making (when to search, give up, recover)
16
+ - Main Agent: Orchestration, reporting results
17
+ """
18
+
19
+ import time
20
+ import json
21
+ import base64
22
+ from dataclasses import dataclass
23
+ from typing import Optional, List, Dict, Any, Callable
24
+ from enum import Enum
25
+
26
+ try:
27
+ import cv2
28
+ import numpy as np
29
+ HAS_CV = True
30
+ except ImportError:
31
+ HAS_CV = False
32
+
33
+ try:
34
+ import anthropic
35
+ HAS_ANTHROPIC = True
36
+ except ImportError:
37
+ HAS_ANTHROPIC = False
38
+
39
+ # LLM Proxy for metered/billable requests through FoodforThought
40
+ try:
41
+ from ..llm_proxy import LLMProxy, LLMProxyError
42
+ HAS_LLM_PROXY = True
43
+ except ImportError:
44
+ HAS_LLM_PROXY = False
45
+
46
+ from .visual_servoing import (
47
+ GreenBallDetector,
48
+ TargetDetection,
49
+ ServoState,
50
+ )
51
+
52
+
53
+ @dataclass
54
+ class AgentDecision:
55
+ """Decision from the LLM agent."""
56
+ action: str # "turn_left", "turn_right", "forward", "grab", "search", "give_up"
57
+ confidence: float
58
+ reasoning: str
59
+ parameters: Dict[str, Any]
60
+
61
+
62
+ class HaikuServoAgent:
63
+ """
64
+ LLM-powered decision maker for visual servoing.
65
+
66
+ Uses Haiku for cheap, fast decisions about robot actions.
67
+ Falls back to rule-based decisions if API unavailable.
68
+
69
+ Supports two modes:
70
+ - use_proxy=True (default): Routes through FoodforThought edge function
71
+ - Automatically tracks usage and billing
72
+ - Requires 'ate login' authentication
73
+ - Cost metered per user
74
+ - use_proxy=False: Direct Anthropic API calls
75
+ - Requires ANTHROPIC_API_KEY env var
76
+ - No metering/billing
77
+ """
78
+
79
+ SYSTEM_PROMPT = """You are a robot control agent. You analyze camera images and ball detection data to decide the next action for a quadruped robot trying to pick up a green ball.
80
+
81
+ Available actions:
82
+ - turn_left: Rotate left to center the ball in view
83
+ - turn_right: Rotate right to center the ball in view
84
+ - forward: Move forward toward the ball
85
+ - grab: Execute pickup sequence (only when very close)
86
+ - search: Rotate to look for the ball (when not visible)
87
+ - give_up: Stop trying (after many failed attempts)
88
+
89
+ You will receive:
90
+ 1. Ball detection data (position, size, confidence)
91
+ 2. History of recent detections
92
+ 3. Current iteration count
93
+
94
+ Respond with JSON:
95
+ {
96
+ "action": "action_name",
97
+ "confidence": 0.0-1.0,
98
+ "reasoning": "brief explanation",
99
+ "parameters": {"speed": 25, ...} // optional
100
+ }
101
+
102
+ Decision guidelines:
103
+ - Ball x < 0.4: turn_left
104
+ - Ball x > 0.6: turn_right
105
+ - Ball centered (0.4-0.6) and small (size < 0.15): forward
106
+ - Ball centered and large (size >= 0.15): grab
107
+ - Ball not found: search (but give_up after 30+ iterations with no progress)
108
+ - Lost ball multiple times: search more aggressively"""
109
+
110
+ def __init__(
111
+ self,
112
+ use_vision: bool = False, # Whether to send images to Haiku
113
+ max_tokens: int = 150,
114
+ use_proxy: bool = True, # Route through FoodforThought for metering/billing
115
+ ):
116
+ self.use_vision = use_vision
117
+ self.max_tokens = max_tokens
118
+ self.use_proxy = use_proxy
119
+ self.client = None
120
+ self.proxy = None
121
+
122
+ # Try proxy first (preferred for metering/billing)
123
+ if use_proxy and HAS_LLM_PROXY:
124
+ try:
125
+ self.proxy = LLMProxy()
126
+ if self.proxy.is_authenticated():
127
+ print(" [Agent] Using FoodforThought proxy (metered)")
128
+ else:
129
+ print(" [Agent] Not logged in - run 'ate login' for metered access")
130
+ self.proxy = None
131
+ except Exception as e:
132
+ print(f" [Agent] Proxy init failed: {e}")
133
+ self.proxy = None
134
+
135
+ # Fallback to direct Anthropic if proxy unavailable
136
+ if not self.proxy and HAS_ANTHROPIC:
137
+ try:
138
+ self.client = anthropic.Anthropic()
139
+ print(" [Agent] Using direct Anthropic API (unmetered)")
140
+ except Exception:
141
+ pass
142
+
143
+ if not self.proxy and not self.client:
144
+ print(" [Agent] No LLM available - using rule-based fallback")
145
+
146
+ def decide(
147
+ self,
148
+ detection: TargetDetection,
149
+ history: List[TargetDetection],
150
+ iteration: int,
151
+ image: Optional[np.ndarray] = None,
152
+ ) -> AgentDecision:
153
+ """
154
+ Get decision from Haiku agent.
155
+
156
+ Args:
157
+ detection: Current ball detection
158
+ history: Recent detection history
159
+ iteration: Current iteration number
160
+ image: Optional camera image for vision-based decision
161
+
162
+ Returns:
163
+ AgentDecision with recommended action
164
+ """
165
+ # Try proxy first (metered/billable)
166
+ if self.proxy:
167
+ try:
168
+ return self._proxy_decide(detection, history, iteration, image)
169
+ except LLMProxyError as e:
170
+ print(f" [Agent] Proxy error: {e}")
171
+ if e.status_code == 402:
172
+ # Rate limit - don't fallback, let user upgrade
173
+ return AgentDecision(
174
+ action="give_up",
175
+ confidence=1.0,
176
+ reasoning=f"Rate limit exceeded: {e}",
177
+ parameters={},
178
+ )
179
+ except Exception as e:
180
+ print(f" [Agent] Proxy error, trying direct: {e}")
181
+
182
+ # Try direct Anthropic client
183
+ if self.client:
184
+ try:
185
+ return self._llm_decide(detection, history, iteration, image)
186
+ except Exception as e:
187
+ print(f" [Agent] LLM error, using rules: {e}")
188
+
189
+ # Fallback to rule-based
190
+ return self._rule_based_decide(detection, history, iteration)
191
+
192
+ def _llm_decide(
193
+ self,
194
+ detection: TargetDetection,
195
+ history: List[TargetDetection],
196
+ iteration: int,
197
+ image: Optional[np.ndarray],
198
+ ) -> AgentDecision:
199
+ """Get decision from Haiku."""
200
+
201
+ # Build context message
202
+ context = {
203
+ "current_detection": {
204
+ "found": detection.found,
205
+ "x": round(detection.x, 3),
206
+ "y": round(detection.y, 3),
207
+ "size": round(detection.size, 3),
208
+ "confidence": round(detection.confidence, 3),
209
+ "center_offset": round(detection.center_offset, 3),
210
+ "is_centered": detection.is_centered,
211
+ "is_close": detection.is_close,
212
+ },
213
+ "history_summary": {
214
+ "total_frames": len(history),
215
+ "found_count": sum(1 for h in history if h.found),
216
+ "recent_found": [h.found for h in history[-5:]],
217
+ },
218
+ "iteration": iteration,
219
+ }
220
+
221
+ messages = [
222
+ {
223
+ "role": "user",
224
+ "content": f"Iteration {iteration}. Detection data:\n{json.dumps(context, indent=2)}\n\nWhat action should the robot take?"
225
+ }
226
+ ]
227
+
228
+ # Optionally include image
229
+ if self.use_vision and image is not None and image.size > 0:
230
+ # Resize for efficiency
231
+ small = cv2.resize(image, (320, 240))
232
+ _, buffer = cv2.imencode('.jpg', small, [cv2.IMWRITE_JPEG_QUALITY, 70])
233
+ b64_image = base64.b64encode(buffer).decode('utf-8')
234
+
235
+ messages[0]["content"] = [
236
+ {
237
+ "type": "image",
238
+ "source": {
239
+ "type": "base64",
240
+ "media_type": "image/jpeg",
241
+ "data": b64_image,
242
+ }
243
+ },
244
+ {
245
+ "type": "text",
246
+ "text": messages[0]["content"]
247
+ }
248
+ ]
249
+
250
+ response = self.client.messages.create(
251
+ model="claude-3-5-haiku-20241022",
252
+ max_tokens=self.max_tokens,
253
+ system=self.SYSTEM_PROMPT,
254
+ messages=messages,
255
+ )
256
+
257
+ # Parse response
258
+ text = response.content[0].text.strip()
259
+
260
+ # Extract JSON from response
261
+ try:
262
+ # Handle markdown code blocks
263
+ if "```" in text:
264
+ text = text.split("```")[1]
265
+ if text.startswith("json"):
266
+ text = text[4:]
267
+
268
+ data = json.loads(text)
269
+ return AgentDecision(
270
+ action=data.get("action", "search"),
271
+ confidence=data.get("confidence", 0.5),
272
+ reasoning=data.get("reasoning", ""),
273
+ parameters=data.get("parameters", {}),
274
+ )
275
+ except json.JSONDecodeError:
276
+ # Try to extract action from text
277
+ text_lower = text.lower()
278
+ if "turn_left" in text_lower:
279
+ return AgentDecision("turn_left", 0.7, text, {})
280
+ elif "turn_right" in text_lower:
281
+ return AgentDecision("turn_right", 0.7, text, {})
282
+ elif "forward" in text_lower:
283
+ return AgentDecision("forward", 0.7, text, {})
284
+ elif "grab" in text_lower:
285
+ return AgentDecision("grab", 0.7, text, {})
286
+ elif "give_up" in text_lower:
287
+ return AgentDecision("give_up", 0.7, text, {})
288
+ else:
289
+ return AgentDecision("search", 0.5, text, {})
290
+
291
+ def _proxy_decide(
292
+ self,
293
+ detection: TargetDetection,
294
+ history: List[TargetDetection],
295
+ iteration: int,
296
+ image: Optional[np.ndarray],
297
+ ) -> AgentDecision:
298
+ """Get decision via FoodforThought proxy (metered/billable)."""
299
+
300
+ # Build context message (same as _llm_decide)
301
+ context = {
302
+ "current_detection": {
303
+ "found": detection.found,
304
+ "x": round(detection.x, 3),
305
+ "y": round(detection.y, 3),
306
+ "size": round(detection.size, 3),
307
+ "confidence": round(detection.confidence, 3),
308
+ "center_offset": round(detection.center_offset, 3),
309
+ "is_centered": detection.is_centered,
310
+ "is_close": detection.is_close,
311
+ },
312
+ "history_summary": {
313
+ "total_frames": len(history),
314
+ "found_count": sum(1 for h in history if h.found),
315
+ "recent_found": [h.found for h in history[-5:]],
316
+ },
317
+ "iteration": iteration,
318
+ }
319
+
320
+ user_content = f"Iteration {iteration}. Detection data:\n{json.dumps(context, indent=2)}\n\nWhat action should the robot take?"
321
+
322
+ # Note: Proxy doesn't support vision yet (would need base64 in messages)
323
+ # For now, use data-only mode through proxy
324
+ messages = [{"role": "user", "content": user_content}]
325
+
326
+ # Call proxy
327
+ response = self.proxy.chat(
328
+ messages=messages,
329
+ model="claude-3-5-haiku-20241022",
330
+ max_tokens=self.max_tokens,
331
+ system=self.SYSTEM_PROMPT,
332
+ )
333
+
334
+ # Parse response (same logic as _llm_decide)
335
+ text = response.content.strip()
336
+
337
+ try:
338
+ # Handle markdown code blocks
339
+ if "```" in text:
340
+ text = text.split("```")[1]
341
+ if text.startswith("json"):
342
+ text = text[4:]
343
+
344
+ data = json.loads(text)
345
+ return AgentDecision(
346
+ action=data.get("action", "search"),
347
+ confidence=data.get("confidence", 0.5),
348
+ reasoning=data.get("reasoning", ""),
349
+ parameters=data.get("parameters", {}),
350
+ )
351
+ except json.JSONDecodeError:
352
+ # Try to extract action from text
353
+ text_lower = text.lower()
354
+ if "turn_left" in text_lower:
355
+ return AgentDecision("turn_left", 0.7, text, {})
356
+ elif "turn_right" in text_lower:
357
+ return AgentDecision("turn_right", 0.7, text, {})
358
+ elif "forward" in text_lower:
359
+ return AgentDecision("forward", 0.7, text, {})
360
+ elif "grab" in text_lower:
361
+ return AgentDecision("grab", 0.7, text, {})
362
+ elif "give_up" in text_lower:
363
+ return AgentDecision("give_up", 0.7, text, {})
364
+ else:
365
+ return AgentDecision("search", 0.5, text, {})
366
+
367
+ def _rule_based_decide(
368
+ self,
369
+ detection: TargetDetection,
370
+ history: List[TargetDetection],
371
+ iteration: int,
372
+ ) -> AgentDecision:
373
+ """Rule-based fallback when LLM unavailable."""
374
+
375
+ # Check for give up conditions
376
+ if iteration > 50:
377
+ return AgentDecision(
378
+ action="give_up",
379
+ confidence=0.9,
380
+ reasoning="Max iterations exceeded",
381
+ parameters={},
382
+ )
383
+
384
+ if not detection.found:
385
+ # Check if we've been searching too long
386
+ recent_found = sum(1 for h in history[-10:] if h.found)
387
+ if recent_found == 0 and iteration > 20:
388
+ return AgentDecision(
389
+ action="give_up",
390
+ confidence=0.7,
391
+ reasoning="No detections in last 10 frames after 20 iterations",
392
+ parameters={},
393
+ )
394
+
395
+ return AgentDecision(
396
+ action="search",
397
+ confidence=0.8,
398
+ reasoning="Ball not visible, searching",
399
+ parameters={"speed": 25},
400
+ )
401
+
402
+ # Ball found - decide based on position and size
403
+ offset = detection.center_offset
404
+
405
+ if detection.is_close and detection.is_centered:
406
+ return AgentDecision(
407
+ action="grab",
408
+ confidence=0.9,
409
+ reasoning=f"Ball centered and close (size={detection.size:.2f})",
410
+ parameters={},
411
+ )
412
+
413
+ if not detection.is_centered:
414
+ if offset < -0.1:
415
+ return AgentDecision(
416
+ action="turn_left",
417
+ confidence=0.8,
418
+ reasoning=f"Ball on left (offset={offset:.2f})",
419
+ parameters={"speed": 20},
420
+ )
421
+ else:
422
+ return AgentDecision(
423
+ action="turn_right",
424
+ confidence=0.8,
425
+ reasoning=f"Ball on right (offset={offset:.2f})",
426
+ parameters={"speed": 20},
427
+ )
428
+
429
+ # Centered but not close
430
+ return AgentDecision(
431
+ action="forward",
432
+ confidence=0.8,
433
+ reasoning=f"Ball centered but far (size={detection.size:.2f})",
434
+ parameters={"speed": 20},
435
+ )
436
+
437
+
438
+ class AgenticServoController:
439
+ """
440
+ Visual servoing controller with LLM decision making.
441
+
442
+ Combines:
443
+ - Fast CV-based ball detection
444
+ - Haiku-based decision making for complex scenarios
445
+ """
446
+
447
+ def __init__(
448
+ self,
449
+ capture_fn: Callable[[], np.ndarray],
450
+ move_fn: Callable[[float, float], None],
451
+ stop_fn: Callable[[], None],
452
+ gripper_open_fn: Callable[[], None],
453
+ gripper_close_fn: Callable[[], None],
454
+ arm_down_fn: Callable[[], None],
455
+ arm_up_fn: Callable[[], None],
456
+ detector: Optional[GreenBallDetector] = None,
457
+ agent: Optional[HaikuServoAgent] = None,
458
+ max_iterations: int = 50,
459
+ save_frames: bool = True,
460
+ ):
461
+ self.capture = capture_fn
462
+ self.move = move_fn
463
+ self.stop = stop_fn
464
+ self.gripper_open = gripper_open_fn
465
+ self.gripper_close = gripper_close_fn
466
+ self.arm_down = arm_down_fn
467
+ self.arm_up = arm_up_fn
468
+
469
+ self.detector = detector or GreenBallDetector()
470
+ self.agent = agent or HaikuServoAgent()
471
+
472
+ self.max_iterations = max_iterations
473
+ self.save_frames = save_frames
474
+
475
+ self.history: List[TargetDetection] = []
476
+ self.decisions: List[AgentDecision] = []
477
+ self.iteration = 0
478
+
479
+ def pickup_target(self) -> bool:
480
+ """
481
+ Execute pickup with agentic decision making.
482
+
483
+ Returns:
484
+ True if successful
485
+ """
486
+ print("\n" + "=" * 50)
487
+ print("AGENTIC VISUAL SERVOING")
488
+ print("=" * 50)
489
+
490
+ self.history = []
491
+ self.decisions = []
492
+ self.iteration = 0
493
+
494
+ while self.iteration < self.max_iterations:
495
+ # Capture and detect
496
+ image = self.capture()
497
+ detection = self.detector.detect(image)
498
+ self.history.append(detection)
499
+
500
+ # Save frame for debugging
501
+ if self.save_frames and image is not None and image.size > 0:
502
+ path = f"/tmp/agentic_{self.iteration:03d}.jpg"
503
+ cv2.imwrite(path, image)
504
+
505
+ # Get decision from agent
506
+ decision = self.agent.decide(
507
+ detection, self.history, self.iteration, image
508
+ )
509
+ self.decisions.append(decision)
510
+
511
+ print(f"\n[{self.iteration}] {decision.action} (conf={decision.confidence:.2f})")
512
+ print(f" {decision.reasoning}")
513
+
514
+ # Execute decision
515
+ if decision.action == "give_up":
516
+ print("\n[FAILED] Agent decided to give up")
517
+ self.stop()
518
+ return False
519
+
520
+ elif decision.action == "grab":
521
+ return self._execute_grab()
522
+
523
+ elif decision.action == "turn_left":
524
+ speed = decision.parameters.get("speed", 25)
525
+ self.move(0, -speed)
526
+ time.sleep(0.3)
527
+ self.stop()
528
+
529
+ elif decision.action == "turn_right":
530
+ speed = decision.parameters.get("speed", 25)
531
+ self.move(0, speed)
532
+ time.sleep(0.3)
533
+ self.stop()
534
+
535
+ elif decision.action == "forward":
536
+ speed = decision.parameters.get("speed", 20)
537
+ self.move(speed, 0)
538
+ time.sleep(0.4)
539
+ self.stop()
540
+
541
+ elif decision.action == "search":
542
+ speed = decision.parameters.get("speed", 25)
543
+ self.move(0, speed)
544
+ time.sleep(0.5)
545
+ self.stop()
546
+
547
+ time.sleep(0.2)
548
+ self.iteration += 1
549
+
550
+ print("\n[FAILED] Max iterations reached")
551
+ self.stop()
552
+ return False
553
+
554
+ def _execute_grab(self) -> bool:
555
+ """Execute the pickup sequence."""
556
+ print("\n[GRABBING]")
557
+
558
+ print(" Opening gripper...")
559
+ self.gripper_open()
560
+ time.sleep(0.4)
561
+
562
+ print(" Lowering arm...")
563
+ self.arm_down()
564
+ time.sleep(0.5)
565
+
566
+ print(" Closing gripper...")
567
+ self.gripper_close()
568
+ time.sleep(0.4)
569
+
570
+ print(" Lifting arm...")
571
+ self.arm_up()
572
+ time.sleep(0.4)
573
+
574
+ print("\n[SUCCESS] Grab sequence complete!")
575
+ return True
576
+
577
+ def get_summary(self) -> Dict[str, Any]:
578
+ """Get summary of the pickup attempt."""
579
+ action_counts = {}
580
+ for d in self.decisions:
581
+ action_counts[d.action] = action_counts.get(d.action, 0) + 1
582
+
583
+ return {
584
+ "total_iterations": self.iteration,
585
+ "action_counts": action_counts,
586
+ "detection_rate": sum(1 for h in self.history if h.found) / max(1, len(self.history)),
587
+ "final_action": self.decisions[-1].action if self.decisions else None,
588
+ }
589
+
590
+
591
+ def run_agentic_pickup(
592
+ serial_port: str = "/dev/cu.usbserial-10",
593
+ webcam_index: int = 0,
594
+ use_llm_vision: bool = False,
595
+ use_proxy: bool = True,
596
+ ) -> bool:
597
+ """
598
+ Run the agentic pickup behavior.
599
+
600
+ Args:
601
+ serial_port: MechDog serial port
602
+ webcam_index: Webcam device index
603
+ use_llm_vision: Whether to send images to Haiku (more expensive)
604
+ use_proxy: Route through FoodforThought for metering/billing (default: True)
605
+ Requires 'ate login' authentication
606
+
607
+ Returns:
608
+ True if pickup succeeded
609
+ """
610
+ if not HAS_CV:
611
+ print("OpenCV required. Install: pip install opencv-python")
612
+ return False
613
+
614
+ try:
615
+ import serial
616
+ except ImportError:
617
+ print("pyserial required. Install: pip install pyserial")
618
+ return False
619
+
620
+ # Setup serial
621
+ try:
622
+ ser = serial.Serial(serial_port, 115200, timeout=1)
623
+ time.sleep(0.5)
624
+ ser.read(ser.in_waiting)
625
+
626
+ def cmd(command: str, wait: float = 0.3):
627
+ ser.write(f"{command}\r\n".encode())
628
+ time.sleep(wait)
629
+ return ser.read(ser.in_waiting).decode('utf-8', errors='ignore')
630
+
631
+ cmd("from HW_MechDog import MechDog", 1.5)
632
+ cmd("dog = MechDog()", 1.5)
633
+ print(f"Connected to MechDog")
634
+
635
+ except Exception as e:
636
+ print(f"Failed to connect: {e}")
637
+ return False
638
+
639
+ # Setup webcam
640
+ cap = cv2.VideoCapture(webcam_index)
641
+ if not cap.isOpened():
642
+ print("Failed to open webcam")
643
+ ser.close()
644
+ return False
645
+
646
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
647
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
648
+
649
+ # Create controller
650
+ def capture():
651
+ for _ in range(2):
652
+ cap.read()
653
+ ret, frame = cap.read()
654
+ return frame if ret else np.array([])
655
+
656
+ controller = AgenticServoController(
657
+ capture_fn=capture,
658
+ move_fn=lambda f, t: cmd(f"dog.move({int(f)}, {int(t)})", 0.2),
659
+ stop_fn=lambda: cmd("dog.move(0, 0)", 0.2),
660
+ gripper_open_fn=lambda: cmd("dog.set_servo(11, 500, 400)", 0.5),
661
+ gripper_close_fn=lambda: cmd("dog.set_servo(11, 2400, 400)", 0.5),
662
+ # CORRECTED 2026-01-03: Vision analysis confirmed servos 8,9,11 are arm
663
+ arm_down_fn=lambda: (cmd("dog.set_servo(8, 800, 600)", 0.7), cmd("dog.set_servo(9, 800, 600)", 0.7)),
664
+ arm_up_fn=lambda: (cmd("dog.set_servo(9, 2000, 600)", 0.7), cmd("dog.set_servo(8, 2200, 600)", 0.7)),
665
+ agent=HaikuServoAgent(use_vision=use_llm_vision, use_proxy=use_proxy),
666
+ )
667
+
668
+ # Run pickup
669
+ success = controller.pickup_target()
670
+
671
+ # Print summary
672
+ summary = controller.get_summary()
673
+ print("\n" + "=" * 50)
674
+ print("SUMMARY")
675
+ print("=" * 50)
676
+ print(f"Iterations: {summary['total_iterations']}")
677
+ print(f"Detection rate: {summary['detection_rate']:.1%}")
678
+ print(f"Actions: {summary['action_counts']}")
679
+
680
+ # Cleanup
681
+ cap.release()
682
+ ser.close()
683
+
684
+ return success
685
+
686
+
687
+ def run_agentic_pickup_wifi(
688
+ serial_port: str = "/dev/cu.usbserial-10",
689
+ camera_ip: str = "192.168.4.1",
690
+ use_llm_vision: bool = False,
691
+ use_proxy: bool = True,
692
+ ) -> bool:
693
+ """
694
+ Run agentic pickup using robot's onboard WiFi camera.
695
+
696
+ This is the preferred method for visual servoing as the camera
697
+ moves with the robot, enabling true closed-loop control.
698
+
699
+ Prerequisites:
700
+ - Connect your computer to the robot's WiFi AP
701
+ - Robot's camera typically at 192.168.4.1 (ESP32-CAM)
702
+
703
+ Args:
704
+ serial_port: MechDog serial port
705
+ camera_ip: WiFi camera IP address
706
+ use_llm_vision: Whether to send images to Haiku
707
+ use_proxy: Route through FoodforThought for metering
708
+
709
+ Returns:
710
+ True if pickup succeeded
711
+ """
712
+ if not HAS_CV:
713
+ print("OpenCV required. Install: pip install opencv-python")
714
+ return False
715
+
716
+ try:
717
+ import serial
718
+ except ImportError:
719
+ print("pyserial required. Install: pip install pyserial")
720
+ return False
721
+
722
+ try:
723
+ from ..drivers.wifi_camera import WiFiCamera, WiFiCameraConfig
724
+ except ImportError:
725
+ print("WiFi camera driver not available")
726
+ return False
727
+
728
+ # Check camera connectivity first
729
+ print(f"Checking WiFi camera at {camera_ip}...")
730
+ import requests
731
+ try:
732
+ response = requests.get(f"http://{camera_ip}/status", timeout=2)
733
+ if response.status_code != 200:
734
+ print(f"Camera not responding properly (status: {response.status_code})")
735
+ print("Make sure you're connected to the robot's WiFi AP")
736
+ return False
737
+ print("Camera connected!")
738
+ except requests.RequestException as e:
739
+ print(f"Cannot reach camera: {e}")
740
+ print("\nTo use the robot's WiFi camera:")
741
+ print("1. Look for WiFi network starting with 'Hiwonder' or similar")
742
+ print("2. Connect to it (password may be printed on robot)")
743
+ print("3. Camera should be at 192.168.4.1")
744
+ return False
745
+
746
+ # Setup serial
747
+ try:
748
+ ser = serial.Serial(serial_port, 115200, timeout=1)
749
+ time.sleep(0.5)
750
+ ser.read(ser.in_waiting)
751
+
752
+ def cmd(command: str, wait: float = 0.3):
753
+ ser.write(f"{command}\r\n".encode())
754
+ time.sleep(wait)
755
+ return ser.read(ser.in_waiting).decode('utf-8', errors='ignore')
756
+
757
+ cmd("from HW_MechDog import MechDog", 1.5)
758
+ cmd("dog = MechDog()", 1.5)
759
+ print(f"Connected to MechDog")
760
+
761
+ except Exception as e:
762
+ print(f"Failed to connect to robot: {e}")
763
+ return False
764
+
765
+ # Setup WiFi camera
766
+ config = WiFiCameraConfig(
767
+ ip=camera_ip,
768
+ port=80,
769
+ stream_port=81,
770
+ )
771
+ wifi_cam = WiFiCamera(config)
772
+
773
+ # Create capture function that returns numpy array for OpenCV
774
+ def capture():
775
+ try:
776
+ from PIL import Image as PILImage
777
+ import io
778
+
779
+ # Get image from WiFi camera
780
+ response = requests.get(
781
+ f"http://{camera_ip}/capture",
782
+ timeout=2,
783
+ stream=True
784
+ )
785
+ if response.status_code != 200:
786
+ return np.array([])
787
+
788
+ # Decode JPEG to numpy array via PIL
789
+ pil_image = PILImage.open(io.BytesIO(response.content))
790
+ if pil_image.mode != "RGB":
791
+ pil_image = pil_image.convert("RGB")
792
+
793
+ # Convert to BGR for OpenCV
794
+ frame = np.array(pil_image)
795
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
796
+ return frame
797
+
798
+ except Exception as e:
799
+ print(f" [Camera] Error: {e}")
800
+ return np.array([])
801
+
802
+ # Test capture
803
+ print("Testing camera capture...")
804
+ test_frame = capture()
805
+ if test_frame.size == 0:
806
+ print("Failed to capture test frame")
807
+ ser.close()
808
+ return False
809
+ print(f"Camera working: {test_frame.shape}")
810
+
811
+ # Create controller
812
+ controller = AgenticServoController(
813
+ capture_fn=capture,
814
+ move_fn=lambda f, t: cmd(f"dog.move({int(f)}, {int(t)})", 0.2),
815
+ stop_fn=lambda: cmd("dog.move(0, 0)", 0.2),
816
+ gripper_open_fn=lambda: cmd("dog.set_servo(11, 500, 400)", 0.5),
817
+ gripper_close_fn=lambda: cmd("dog.set_servo(11, 2400, 400)", 0.5),
818
+ # CORRECTED 2026-01-03: Vision analysis confirmed servos 8,9,11 are arm
819
+ arm_down_fn=lambda: (cmd("dog.set_servo(8, 800, 600)", 0.7), cmd("dog.set_servo(9, 800, 600)", 0.7)),
820
+ arm_up_fn=lambda: (cmd("dog.set_servo(9, 2000, 600)", 0.7), cmd("dog.set_servo(8, 2200, 600)", 0.7)),
821
+ agent=HaikuServoAgent(use_vision=use_llm_vision, use_proxy=use_proxy),
822
+ )
823
+
824
+ # Run pickup
825
+ success = controller.pickup_target()
826
+
827
+ # Print summary
828
+ summary = controller.get_summary()
829
+ print("\n" + "=" * 50)
830
+ print("SUMMARY")
831
+ print("=" * 50)
832
+ print(f"Iterations: {summary['total_iterations']}")
833
+ print(f"Detection rate: {summary['detection_rate']:.1%}")
834
+ print(f"Actions: {summary['action_counts']}")
835
+
836
+ # Cleanup
837
+ ser.close()
838
+
839
+ return success
840
+
841
+
842
+ if __name__ == "__main__":
843
+ import sys
844
+
845
+ # Check for --wifi flag
846
+ use_wifi = "--wifi" in sys.argv
847
+
848
+ if use_wifi:
849
+ print("Using WiFi camera mode")
850
+ success = run_agentic_pickup_wifi()
851
+ else:
852
+ print("Using external webcam mode")
853
+ print("(Use --wifi to use robot's onboard camera)")
854
+ success = run_agentic_pickup()
855
+
856
+ print(f"\nFinal result: {'SUCCESS' if success else 'FAILED'}")