PyPI - foodforthought-cli - Versions diffs - 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

foodforthought-cli 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

ate/__init__.py +6 -0
ate/__main__.py +16 -0
ate/auth/__init__.py +1 -0
ate/auth/device_flow.py +141 -0
ate/auth/token_store.py +96 -0
ate/behaviors/__init__.py +100 -0
ate/behaviors/approach.py +399 -0
ate/behaviors/common.py +686 -0
ate/behaviors/tree.py +454 -0
ate/cli.py +855 -3995
ate/client.py +90 -0
ate/commands/__init__.py +168 -0
ate/commands/auth.py +389 -0
ate/commands/bridge.py +448 -0
ate/commands/data.py +185 -0
ate/commands/deps.py +111 -0
ate/commands/generate.py +384 -0
ate/commands/memory.py +907 -0
ate/commands/parts.py +166 -0
ate/commands/primitive.py +399 -0
ate/commands/protocol.py +288 -0
ate/commands/recording.py +524 -0
ate/commands/repo.py +154 -0
ate/commands/simulation.py +291 -0
ate/commands/skill.py +303 -0
ate/commands/skills.py +487 -0
ate/commands/team.py +147 -0
ate/commands/workflow.py +271 -0
ate/detection/__init__.py +38 -0
ate/detection/base.py +142 -0
ate/detection/color_detector.py +399 -0
ate/detection/trash_detector.py +322 -0
ate/drivers/__init__.py +39 -0
ate/drivers/ble_transport.py +405 -0
ate/drivers/mechdog.py +942 -0
ate/drivers/wifi_camera.py +477 -0
ate/interfaces/__init__.py +187 -0
ate/interfaces/base.py +273 -0
ate/interfaces/body.py +267 -0
ate/interfaces/detection.py +282 -0
ate/interfaces/locomotion.py +422 -0
ate/interfaces/manipulation.py +408 -0
ate/interfaces/navigation.py +389 -0
ate/interfaces/perception.py +362 -0
ate/interfaces/sensors.py +247 -0
ate/interfaces/types.py +371 -0
ate/llm_proxy.py +239 -0
ate/mcp_server.py +387 -0
ate/memory/__init__.py +35 -0
ate/memory/cloud.py +244 -0
ate/memory/context.py +269 -0
ate/memory/embeddings.py +184 -0
ate/memory/export.py +26 -0
ate/memory/merge.py +146 -0
ate/memory/migrate/__init__.py +34 -0
ate/memory/migrate/base.py +89 -0
ate/memory/migrate/pipeline.py +189 -0
ate/memory/migrate/sources/__init__.py +13 -0
ate/memory/migrate/sources/chroma.py +170 -0
ate/memory/migrate/sources/pinecone.py +120 -0
ate/memory/migrate/sources/qdrant.py +110 -0
ate/memory/migrate/sources/weaviate.py +160 -0
ate/memory/reranker.py +353 -0
ate/memory/search.py +26 -0
ate/memory/store.py +548 -0
ate/recording/__init__.py +83 -0
ate/recording/demonstration.py +378 -0
ate/recording/session.py +415 -0
ate/recording/upload.py +304 -0
ate/recording/visual.py +416 -0
ate/recording/wrapper.py +95 -0
ate/robot/__init__.py +221 -0
ate/robot/agentic_servo.py +856 -0
ate/robot/behaviors.py +493 -0
ate/robot/ble_capture.py +1000 -0
ate/robot/ble_enumerate.py +506 -0
ate/robot/calibration.py +668 -0
ate/robot/calibration_state.py +388 -0
ate/robot/commands.py +3735 -0
ate/robot/direction_calibration.py +554 -0
ate/robot/discovery.py +441 -0
ate/robot/introspection.py +330 -0
ate/robot/llm_system_id.py +654 -0
ate/robot/locomotion_calibration.py +508 -0
ate/robot/manager.py +270 -0
ate/robot/marker_generator.py +611 -0
ate/robot/perception.py +502 -0
ate/robot/primitives.py +614 -0
ate/robot/profiles.py +281 -0
ate/robot/registry.py +322 -0
ate/robot/servo_mapper.py +1153 -0
ate/robot/skill_upload.py +675 -0
ate/robot/target_calibration.py +500 -0
ate/robot/teach.py +515 -0
ate/robot/types.py +242 -0
ate/robot/visual_labeler.py +1048 -0
ate/robot/visual_servo_loop.py +494 -0
ate/robot/visual_servoing.py +570 -0
ate/robot/visual_system_id.py +906 -0
ate/transports/__init__.py +121 -0
ate/transports/base.py +394 -0
ate/transports/ble.py +405 -0
ate/transports/hybrid.py +444 -0
ate/transports/serial.py +345 -0
ate/urdf/__init__.py +30 -0
ate/urdf/capture.py +582 -0
ate/urdf/cloud.py +491 -0
ate/urdf/collision.py +271 -0
ate/urdf/commands.py +708 -0
ate/urdf/depth.py +360 -0
ate/urdf/inertial.py +312 -0
ate/urdf/kinematics.py +330 -0
ate/urdf/lifting.py +415 -0
ate/urdf/meshing.py +300 -0
ate/urdf/models/__init__.py +110 -0
ate/urdf/models/depth_anything.py +253 -0
ate/urdf/models/sam2.py +324 -0
ate/urdf/motion_analysis.py +396 -0
ate/urdf/pipeline.py +468 -0
ate/urdf/scale.py +256 -0
ate/urdf/scan_session.py +411 -0
ate/urdf/segmentation.py +299 -0
ate/urdf/synthesis.py +319 -0
ate/urdf/topology.py +336 -0
ate/urdf/validation.py +371 -0
{foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/METADATA +9 -1
foodforthought_cli-0.3.0.dist-info/RECORD +166 -0
{foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/WHEEL +1 -1
foodforthought_cli-0.2.7.dist-info/RECORD +0 -44
{foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/entry_points.txt +0 -0
{foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/top_level.txt +0 -0

ate/robot/agentic_servo.py ADDED Viewed

@@ -0,0 +1,856 @@
+"""
+Agentic Visual Servoing with LLM Decision Making.
+Uses a cheap LLM (Haiku) for high-level decisions while CV handles
+low-level detection. This hybrid approach gives flexibility for
+complex scenarios while keeping costs low.
+Cost comparison per pickup attempt (~20 iterations):
+- Pure CV: $0 (no API calls)
+- Haiku for each frame: ~$0.001 (20 calls * 1K tokens * $0.25/1M)
+- Opus for each frame: ~$0.30 (20 calls * 1K tokens * $15/1M)
+Architecture:
+- CV: Ball detection (fast, deterministic)
+- Haiku: Decision making (when to search, give up, recover)
+- Main Agent: Orchestration, reporting results
+"""
+import time
+import json
+import base64
+from dataclasses import dataclass
+from typing import Optional, List, Dict, Any, Callable
+from enum import Enum
+try:
+    import cv2
+    import numpy as np
+    HAS_CV = True
+except ImportError:
+    HAS_CV = False
+try:
+    import anthropic
+    HAS_ANTHROPIC = True
+except ImportError:
+    HAS_ANTHROPIC = False
+# LLM Proxy for metered/billable requests through FoodforThought
+try:
+    from ..llm_proxy import LLMProxy, LLMProxyError
+    HAS_LLM_PROXY = True
+except ImportError:
+    HAS_LLM_PROXY = False
+from .visual_servoing import (
+    GreenBallDetector,
+    TargetDetection,
+    ServoState,
+)
+@dataclass
+class AgentDecision:
+    """Decision from the LLM agent."""
+    action: str  # "turn_left", "turn_right", "forward", "grab", "search", "give_up"
+    confidence: float
+    reasoning: str
+    parameters: Dict[str, Any]
+class HaikuServoAgent:
+    """
+    LLM-powered decision maker for visual servoing.
+    Uses Haiku for cheap, fast decisions about robot actions.
+    Falls back to rule-based decisions if API unavailable.
+    Supports two modes:
+    - use_proxy=True (default): Routes through FoodforThought edge function
+      - Automatically tracks usage and billing
+      - Requires 'ate login' authentication
+      - Cost metered per user
+    - use_proxy=False: Direct Anthropic API calls
+      - Requires ANTHROPIC_API_KEY env var
+      - No metering/billing
+    """
+    SYSTEM_PROMPT = """You are a robot control agent. You analyze camera images and ball detection data to decide the next action for a quadruped robot trying to pick up a green ball.
+Available actions:
+- turn_left: Rotate left to center the ball in view
+- turn_right: Rotate right to center the ball in view
+- forward: Move forward toward the ball
+- grab: Execute pickup sequence (only when very close)
+- search: Rotate to look for the ball (when not visible)
+- give_up: Stop trying (after many failed attempts)
+You will receive:
+1. Ball detection data (position, size, confidence)
+2. History of recent detections
+3. Current iteration count
+Respond with JSON:
+{
+    "action": "action_name",
+    "confidence": 0.0-1.0,
+    "reasoning": "brief explanation",
+    "parameters": {"speed": 25, ...}  // optional
+}
+Decision guidelines:
+- Ball x < 0.4: turn_left
+- Ball x > 0.6: turn_right
+- Ball centered (0.4-0.6) and small (size < 0.15): forward
+- Ball centered and large (size >= 0.15): grab
+- Ball not found: search (but give_up after 30+ iterations with no progress)
+- Lost ball multiple times: search more aggressively"""
+    def __init__(
+        self,
+        use_vision: bool = False,  # Whether to send images to Haiku
+        max_tokens: int = 150,
+        use_proxy: bool = True,  # Route through FoodforThought for metering/billing
+    ):
+        self.use_vision = use_vision
+        self.max_tokens = max_tokens
+        self.use_proxy = use_proxy
+        self.client = None
+        self.proxy = None
+        # Try proxy first (preferred for metering/billing)
+        if use_proxy and HAS_LLM_PROXY:
+            try:
+                self.proxy = LLMProxy()
+                if self.proxy.is_authenticated():
+                    print("  [Agent] Using FoodforThought proxy (metered)")
+                else:
+                    print("  [Agent] Not logged in - run 'ate login' for metered access")
+                    self.proxy = None
+            except Exception as e:
+                print(f"  [Agent] Proxy init failed: {e}")
+                self.proxy = None
+        # Fallback to direct Anthropic if proxy unavailable
+        if not self.proxy and HAS_ANTHROPIC:
+            try:
+                self.client = anthropic.Anthropic()
+                print("  [Agent] Using direct Anthropic API (unmetered)")
+            except Exception:
+                pass
+        if not self.proxy and not self.client:
+            print("  [Agent] No LLM available - using rule-based fallback")
+    def decide(
+        self,
+        detection: TargetDetection,
+        history: List[TargetDetection],
+        iteration: int,
+        image: Optional[np.ndarray] = None,
+    ) -> AgentDecision:
+        """
+        Get decision from Haiku agent.
+        Args:
+            detection: Current ball detection
+            history: Recent detection history
+            iteration: Current iteration number
+            image: Optional camera image for vision-based decision
+        Returns:
+            AgentDecision with recommended action
+        """
+        # Try proxy first (metered/billable)
+        if self.proxy:
+            try:
+                return self._proxy_decide(detection, history, iteration, image)
+            except LLMProxyError as e:
+                print(f"  [Agent] Proxy error: {e}")
+                if e.status_code == 402:
+                    # Rate limit - don't fallback, let user upgrade
+                    return AgentDecision(
+                        action="give_up",
+                        confidence=1.0,
+                        reasoning=f"Rate limit exceeded: {e}",
+                        parameters={},
+                    )
+            except Exception as e:
+                print(f"  [Agent] Proxy error, trying direct: {e}")
+        # Try direct Anthropic client
+        if self.client:
+            try:
+                return self._llm_decide(detection, history, iteration, image)
+            except Exception as e:
+                print(f"  [Agent] LLM error, using rules: {e}")
+        # Fallback to rule-based
+        return self._rule_based_decide(detection, history, iteration)
+    def _llm_decide(
+        self,
+        detection: TargetDetection,
+        history: List[TargetDetection],
+        iteration: int,
+        image: Optional[np.ndarray],
+    ) -> AgentDecision:
+        """Get decision from Haiku."""
+        # Build context message
+        context = {
+            "current_detection": {
+                "found": detection.found,
+                "x": round(detection.x, 3),
+                "y": round(detection.y, 3),
+                "size": round(detection.size, 3),
+                "confidence": round(detection.confidence, 3),
+                "center_offset": round(detection.center_offset, 3),
+                "is_centered": detection.is_centered,
+                "is_close": detection.is_close,
+            },
+            "history_summary": {
+                "total_frames": len(history),
+                "found_count": sum(1 for h in history if h.found),
+                "recent_found": [h.found for h in history[-5:]],
+            },
+            "iteration": iteration,
+        }
+        messages = [
+            {
+                "role": "user",
+                "content": f"Iteration {iteration}. Detection data:\n{json.dumps(context, indent=2)}\n\nWhat action should the robot take?"
+            }
+        ]
+        # Optionally include image
+        if self.use_vision and image is not None and image.size > 0:
+            # Resize for efficiency
+            small = cv2.resize(image, (320, 240))
+            _, buffer = cv2.imencode('.jpg', small, [cv2.IMWRITE_JPEG_QUALITY, 70])
+            b64_image = base64.b64encode(buffer).decode('utf-8')
+            messages[0]["content"] = [
+                {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "image/jpeg",
+                        "data": b64_image,
+                    }
+                },
+                {
+                    "type": "text",
+                    "text": messages[0]["content"]
+                }
+            ]
+        response = self.client.messages.create(
+            model="claude-3-5-haiku-20241022",
+            max_tokens=self.max_tokens,
+            system=self.SYSTEM_PROMPT,
+            messages=messages,
+        )
+        # Parse response
+        text = response.content[0].text.strip()
+        # Extract JSON from response
+        try:
+            # Handle markdown code blocks
+            if "```" in text:
+                text = text.split("```")[1]
+                if text.startswith("json"):
+                    text = text[4:]
+            data = json.loads(text)
+            return AgentDecision(
+                action=data.get("action", "search"),
+                confidence=data.get("confidence", 0.5),
+                reasoning=data.get("reasoning", ""),
+                parameters=data.get("parameters", {}),
+            )
+        except json.JSONDecodeError:
+            # Try to extract action from text
+            text_lower = text.lower()
+            if "turn_left" in text_lower:
+                return AgentDecision("turn_left", 0.7, text, {})
+            elif "turn_right" in text_lower:
+                return AgentDecision("turn_right", 0.7, text, {})
+            elif "forward" in text_lower:
+                return AgentDecision("forward", 0.7, text, {})
+            elif "grab" in text_lower:
+                return AgentDecision("grab", 0.7, text, {})
+            elif "give_up" in text_lower:
+                return AgentDecision("give_up", 0.7, text, {})
+            else:
+                return AgentDecision("search", 0.5, text, {})
+    def _proxy_decide(
+        self,
+        detection: TargetDetection,
+        history: List[TargetDetection],
+        iteration: int,
+        image: Optional[np.ndarray],
+    ) -> AgentDecision:
+        """Get decision via FoodforThought proxy (metered/billable)."""
+        # Build context message (same as _llm_decide)
+        context = {
+            "current_detection": {
+                "found": detection.found,
+                "x": round(detection.x, 3),
+                "y": round(detection.y, 3),
+                "size": round(detection.size, 3),
+                "confidence": round(detection.confidence, 3),
+                "center_offset": round(detection.center_offset, 3),
+                "is_centered": detection.is_centered,
+                "is_close": detection.is_close,
+            },
+            "history_summary": {
+                "total_frames": len(history),
+                "found_count": sum(1 for h in history if h.found),
+                "recent_found": [h.found for h in history[-5:]],
+            },
+            "iteration": iteration,
+        }
+        user_content = f"Iteration {iteration}. Detection data:\n{json.dumps(context, indent=2)}\n\nWhat action should the robot take?"
+        # Note: Proxy doesn't support vision yet (would need base64 in messages)
+        # For now, use data-only mode through proxy
+        messages = [{"role": "user", "content": user_content}]
+        # Call proxy
+        response = self.proxy.chat(
+            messages=messages,
+            model="claude-3-5-haiku-20241022",
+            max_tokens=self.max_tokens,
+            system=self.SYSTEM_PROMPT,
+        )
+        # Parse response (same logic as _llm_decide)
+        text = response.content.strip()
+        try:
+            # Handle markdown code blocks
+            if "```" in text:
+                text = text.split("```")[1]
+                if text.startswith("json"):
+                    text = text[4:]
+            data = json.loads(text)
+            return AgentDecision(
+                action=data.get("action", "search"),
+                confidence=data.get("confidence", 0.5),
+                reasoning=data.get("reasoning", ""),
+                parameters=data.get("parameters", {}),
+            )
+        except json.JSONDecodeError:
+            # Try to extract action from text
+            text_lower = text.lower()
+            if "turn_left" in text_lower:
+                return AgentDecision("turn_left", 0.7, text, {})
+            elif "turn_right" in text_lower:
+                return AgentDecision("turn_right", 0.7, text, {})
+            elif "forward" in text_lower:
+                return AgentDecision("forward", 0.7, text, {})
+            elif "grab" in text_lower:
+                return AgentDecision("grab", 0.7, text, {})
+            elif "give_up" in text_lower:
+                return AgentDecision("give_up", 0.7, text, {})
+            else:
+                return AgentDecision("search", 0.5, text, {})
+    def _rule_based_decide(
+        self,
+        detection: TargetDetection,
+        history: List[TargetDetection],
+        iteration: int,
+    ) -> AgentDecision:
+        """Rule-based fallback when LLM unavailable."""
+        # Check for give up conditions
+        if iteration > 50:
+            return AgentDecision(
+                action="give_up",
+                confidence=0.9,
+                reasoning="Max iterations exceeded",
+                parameters={},
+            )
+        if not detection.found:
+            # Check if we've been searching too long
+            recent_found = sum(1 for h in history[-10:] if h.found)
+            if recent_found == 0 and iteration > 20:
+                return AgentDecision(
+                    action="give_up",
+                    confidence=0.7,
+                    reasoning="No detections in last 10 frames after 20 iterations",
+                    parameters={},
+                )
+            return AgentDecision(
+                action="search",
+                confidence=0.8,
+                reasoning="Ball not visible, searching",
+                parameters={"speed": 25},
+            )
+        # Ball found - decide based on position and size
+        offset = detection.center_offset
+        if detection.is_close and detection.is_centered:
+            return AgentDecision(
+                action="grab",
+                confidence=0.9,
+                reasoning=f"Ball centered and close (size={detection.size:.2f})",
+                parameters={},
+            )
+        if not detection.is_centered:
+            if offset < -0.1:
+                return AgentDecision(
+                    action="turn_left",
+                    confidence=0.8,
+                    reasoning=f"Ball on left (offset={offset:.2f})",
+                    parameters={"speed": 20},
+                )
+            else:
+                return AgentDecision(
+                    action="turn_right",
+                    confidence=0.8,
+                    reasoning=f"Ball on right (offset={offset:.2f})",
+                    parameters={"speed": 20},
+                )
+        # Centered but not close
+        return AgentDecision(
+            action="forward",
+            confidence=0.8,
+            reasoning=f"Ball centered but far (size={detection.size:.2f})",
+            parameters={"speed": 20},
+        )
+class AgenticServoController:
+    """
+    Visual servoing controller with LLM decision making.
+    Combines:
+    - Fast CV-based ball detection
+    - Haiku-based decision making for complex scenarios
+    """
+    def __init__(
+        self,
+        capture_fn: Callable[[], np.ndarray],
+        move_fn: Callable[[float, float], None],
+        stop_fn: Callable[[], None],
+        gripper_open_fn: Callable[[], None],
+        gripper_close_fn: Callable[[], None],
+        arm_down_fn: Callable[[], None],
+        arm_up_fn: Callable[[], None],
+        detector: Optional[GreenBallDetector] = None,
+        agent: Optional[HaikuServoAgent] = None,
+        max_iterations: int = 50,
+        save_frames: bool = True,
+    ):
+        self.capture = capture_fn
+        self.move = move_fn
+        self.stop = stop_fn
+        self.gripper_open = gripper_open_fn
+        self.gripper_close = gripper_close_fn
+        self.arm_down = arm_down_fn
+        self.arm_up = arm_up_fn
+        self.detector = detector or GreenBallDetector()
+        self.agent = agent or HaikuServoAgent()
+        self.max_iterations = max_iterations
+        self.save_frames = save_frames
+        self.history: List[TargetDetection] = []
+        self.decisions: List[AgentDecision] = []
+        self.iteration = 0
+    def pickup_target(self) -> bool:
+        """
+        Execute pickup with agentic decision making.
+        Returns:
+            True if successful
+        """
+        print("\n" + "=" * 50)
+        print("AGENTIC VISUAL SERVOING")
+        print("=" * 50)
+        self.history = []
+        self.decisions = []
+        self.iteration = 0
+        while self.iteration < self.max_iterations:
+            # Capture and detect
+            image = self.capture()
+            detection = self.detector.detect(image)
+            self.history.append(detection)
+            # Save frame for debugging
+            if self.save_frames and image is not None and image.size > 0:
+                path = f"/tmp/agentic_{self.iteration:03d}.jpg"
+                cv2.imwrite(path, image)
+            # Get decision from agent
+            decision = self.agent.decide(
+                detection, self.history, self.iteration, image
+            )
+            self.decisions.append(decision)
+            print(f"\n[{self.iteration}] {decision.action} (conf={decision.confidence:.2f})")
+            print(f"     {decision.reasoning}")
+            # Execute decision
+            if decision.action == "give_up":
+                print("\n[FAILED] Agent decided to give up")
+                self.stop()
+                return False
+            elif decision.action == "grab":
+                return self._execute_grab()
+            elif decision.action == "turn_left":
+                speed = decision.parameters.get("speed", 25)
+                self.move(0, -speed)
+                time.sleep(0.3)
+                self.stop()
+            elif decision.action == "turn_right":
+                speed = decision.parameters.get("speed", 25)
+                self.move(0, speed)
+                time.sleep(0.3)
+                self.stop()
+            elif decision.action == "forward":
+                speed = decision.parameters.get("speed", 20)
+                self.move(speed, 0)
+                time.sleep(0.4)
+                self.stop()
+            elif decision.action == "search":
+                speed = decision.parameters.get("speed", 25)
+                self.move(0, speed)
+                time.sleep(0.5)
+                self.stop()
+            time.sleep(0.2)
+            self.iteration += 1
+        print("\n[FAILED] Max iterations reached")
+        self.stop()
+        return False
+    def _execute_grab(self) -> bool:
+        """Execute the pickup sequence."""
+        print("\n[GRABBING]")
+        print("  Opening gripper...")
+        self.gripper_open()
+        time.sleep(0.4)
+        print("  Lowering arm...")
+        self.arm_down()
+        time.sleep(0.5)
+        print("  Closing gripper...")
+        self.gripper_close()
+        time.sleep(0.4)
+        print("  Lifting arm...")
+        self.arm_up()
+        time.sleep(0.4)
+        print("\n[SUCCESS] Grab sequence complete!")
+        return True
+    def get_summary(self) -> Dict[str, Any]:
+        """Get summary of the pickup attempt."""
+        action_counts = {}
+        for d in self.decisions:
+            action_counts[d.action] = action_counts.get(d.action, 0) + 1
+        return {
+            "total_iterations": self.iteration,
+            "action_counts": action_counts,
+            "detection_rate": sum(1 for h in self.history if h.found) / max(1, len(self.history)),
+            "final_action": self.decisions[-1].action if self.decisions else None,
+        }
+def run_agentic_pickup(
+    serial_port: str = "/dev/cu.usbserial-10",
+    webcam_index: int = 0,
+    use_llm_vision: bool = False,
+    use_proxy: bool = True,
+) -> bool:
+    """
+    Run the agentic pickup behavior.
+    Args:
+        serial_port: MechDog serial port
+        webcam_index: Webcam device index
+        use_llm_vision: Whether to send images to Haiku (more expensive)
+        use_proxy: Route through FoodforThought for metering/billing (default: True)
+                   Requires 'ate login' authentication
+    Returns:
+        True if pickup succeeded
+    """
+    if not HAS_CV:
+        print("OpenCV required. Install: pip install opencv-python")
+        return False
+    try:
+        import serial
+    except ImportError:
+        print("pyserial required. Install: pip install pyserial")
+        return False
+    # Setup serial
+    try:
+        ser = serial.Serial(serial_port, 115200, timeout=1)
+        time.sleep(0.5)
+        ser.read(ser.in_waiting)
+        def cmd(command: str, wait: float = 0.3):
+            ser.write(f"{command}\r\n".encode())
+            time.sleep(wait)
+            return ser.read(ser.in_waiting).decode('utf-8', errors='ignore')
+        cmd("from HW_MechDog import MechDog", 1.5)
+        cmd("dog = MechDog()", 1.5)
+        print(f"Connected to MechDog")
+    except Exception as e:
+        print(f"Failed to connect: {e}")
+        return False
+    # Setup webcam
+    cap = cv2.VideoCapture(webcam_index)
+    if not cap.isOpened():
+        print("Failed to open webcam")
+        ser.close()
+        return False
+    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+    # Create controller
+    def capture():
+        for _ in range(2):
+            cap.read()
+        ret, frame = cap.read()
+        return frame if ret else np.array([])
+    controller = AgenticServoController(
+        capture_fn=capture,
+        move_fn=lambda f, t: cmd(f"dog.move({int(f)}, {int(t)})", 0.2),
+        stop_fn=lambda: cmd("dog.move(0, 0)", 0.2),
+        gripper_open_fn=lambda: cmd("dog.set_servo(11, 500, 400)", 0.5),
+        gripper_close_fn=lambda: cmd("dog.set_servo(11, 2400, 400)", 0.5),
+        # CORRECTED 2026-01-03: Vision analysis confirmed servos 8,9,11 are arm
+        arm_down_fn=lambda: (cmd("dog.set_servo(8, 800, 600)", 0.7), cmd("dog.set_servo(9, 800, 600)", 0.7)),
+        arm_up_fn=lambda: (cmd("dog.set_servo(9, 2000, 600)", 0.7), cmd("dog.set_servo(8, 2200, 600)", 0.7)),
+        agent=HaikuServoAgent(use_vision=use_llm_vision, use_proxy=use_proxy),
+    )
+    # Run pickup
+    success = controller.pickup_target()
+    # Print summary
+    summary = controller.get_summary()
+    print("\n" + "=" * 50)
+    print("SUMMARY")
+    print("=" * 50)
+    print(f"Iterations: {summary['total_iterations']}")
+    print(f"Detection rate: {summary['detection_rate']:.1%}")
+    print(f"Actions: {summary['action_counts']}")
+    # Cleanup
+    cap.release()
+    ser.close()
+    return success
+def run_agentic_pickup_wifi(
+    serial_port: str = "/dev/cu.usbserial-10",
+    camera_ip: str = "192.168.4.1",
+    use_llm_vision: bool = False,
+    use_proxy: bool = True,
+) -> bool:
+    """
+    Run agentic pickup using robot's onboard WiFi camera.
+    This is the preferred method for visual servoing as the camera
+    moves with the robot, enabling true closed-loop control.
+    Prerequisites:
+    - Connect your computer to the robot's WiFi AP
+    - Robot's camera typically at 192.168.4.1 (ESP32-CAM)
+    Args:
+        serial_port: MechDog serial port
+        camera_ip: WiFi camera IP address
+        use_llm_vision: Whether to send images to Haiku
+        use_proxy: Route through FoodforThought for metering
+    Returns:
+        True if pickup succeeded
+    """
+    if not HAS_CV:
+        print("OpenCV required. Install: pip install opencv-python")
+        return False
+    try:
+        import serial
+    except ImportError:
+        print("pyserial required. Install: pip install pyserial")
+        return False
+    try:
+        from ..drivers.wifi_camera import WiFiCamera, WiFiCameraConfig
+    except ImportError:
+        print("WiFi camera driver not available")
+        return False
+    # Check camera connectivity first
+    print(f"Checking WiFi camera at {camera_ip}...")
+    import requests
+    try:
+        response = requests.get(f"http://{camera_ip}/status", timeout=2)
+        if response.status_code != 200:
+            print(f"Camera not responding properly (status: {response.status_code})")
+            print("Make sure you're connected to the robot's WiFi AP")
+            return False
+        print("Camera connected!")
+    except requests.RequestException as e:
+        print(f"Cannot reach camera: {e}")
+        print("\nTo use the robot's WiFi camera:")
+        print("1. Look for WiFi network starting with 'Hiwonder' or similar")
+        print("2. Connect to it (password may be printed on robot)")
+        print("3. Camera should be at 192.168.4.1")
+        return False
+    # Setup serial
+    try:
+        ser = serial.Serial(serial_port, 115200, timeout=1)
+        time.sleep(0.5)
+        ser.read(ser.in_waiting)
+        def cmd(command: str, wait: float = 0.3):
+            ser.write(f"{command}\r\n".encode())
+            time.sleep(wait)
+            return ser.read(ser.in_waiting).decode('utf-8', errors='ignore')
+        cmd("from HW_MechDog import MechDog", 1.5)
+        cmd("dog = MechDog()", 1.5)
+        print(f"Connected to MechDog")
+    except Exception as e:
+        print(f"Failed to connect to robot: {e}")
+        return False
+    # Setup WiFi camera
+    config = WiFiCameraConfig(
+        ip=camera_ip,
+        port=80,
+        stream_port=81,
+    )
+    wifi_cam = WiFiCamera(config)
+    # Create capture function that returns numpy array for OpenCV
+    def capture():
+        try:
+            from PIL import Image as PILImage
+            import io
+            # Get image from WiFi camera
+            response = requests.get(
+                f"http://{camera_ip}/capture",
+                timeout=2,
+                stream=True
+            )
+            if response.status_code != 200:
+                return np.array([])
+            # Decode JPEG to numpy array via PIL
+            pil_image = PILImage.open(io.BytesIO(response.content))
+            if pil_image.mode != "RGB":
+                pil_image = pil_image.convert("RGB")
+            # Convert to BGR for OpenCV
+            frame = np.array(pil_image)
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            return frame
+        except Exception as e:
+            print(f"  [Camera] Error: {e}")
+            return np.array([])
+    # Test capture
+    print("Testing camera capture...")
+    test_frame = capture()
+    if test_frame.size == 0:
+        print("Failed to capture test frame")
+        ser.close()
+        return False
+    print(f"Camera working: {test_frame.shape}")
+    # Create controller
+    controller = AgenticServoController(
+        capture_fn=capture,
+        move_fn=lambda f, t: cmd(f"dog.move({int(f)}, {int(t)})", 0.2),
+        stop_fn=lambda: cmd("dog.move(0, 0)", 0.2),
+        gripper_open_fn=lambda: cmd("dog.set_servo(11, 500, 400)", 0.5),
+        gripper_close_fn=lambda: cmd("dog.set_servo(11, 2400, 400)", 0.5),
+        # CORRECTED 2026-01-03: Vision analysis confirmed servos 8,9,11 are arm
+        arm_down_fn=lambda: (cmd("dog.set_servo(8, 800, 600)", 0.7), cmd("dog.set_servo(9, 800, 600)", 0.7)),
+        arm_up_fn=lambda: (cmd("dog.set_servo(9, 2000, 600)", 0.7), cmd("dog.set_servo(8, 2200, 600)", 0.7)),
+        agent=HaikuServoAgent(use_vision=use_llm_vision, use_proxy=use_proxy),
+    )
+    # Run pickup
+    success = controller.pickup_target()
+    # Print summary
+    summary = controller.get_summary()
+    print("\n" + "=" * 50)
+    print("SUMMARY")
+    print("=" * 50)
+    print(f"Iterations: {summary['total_iterations']}")
+    print(f"Detection rate: {summary['detection_rate']:.1%}")
+    print(f"Actions: {summary['action_counts']}")
+    # Cleanup
+    ser.close()
+    return success
+if __name__ == "__main__":
+    import sys
+    # Check for --wifi flag
+    use_wifi = "--wifi" in sys.argv
+    if use_wifi:
+        print("Using WiFi camera mode")
+        success = run_agentic_pickup_wifi()
+    else:
+        print("Using external webcam mode")
+        print("(Use --wifi to use robot's onboard camera)")
+        success = run_agentic_pickup()
+    print(f"\nFinal result: {'SUCCESS' if success else 'FAILED'}")

foodforthought-cli 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

foodforthought-cli 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl