PyPI - quash-mcp - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

quash-mcp 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of quash-mcp might be problematic. Click here for more details.

Files changed (25) hide show

{quash_mcp-0.2.2 → quash_mcp-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: quash-mcp
-Version: 0.2.2
+Version: 0.2.4
 Summary: Model Context Protocol server for Quash - AI-powered mobile automation agent
 Project-URL: Homepage, https://quashbugs.com
 Project-URL: Repository, https://github.com/quash/quash-mcp

{quash_mcp-0.2.2 → quash_mcp-0.2.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "quash-mcp"
-version = "0.2.2"
+version = "0.2.4"
 description = "Model Context Protocol server for Quash - AI-powered mobile automation agent"
 readme = "README.md"
 license = {text = "MIT"}

{quash_mcp-0.2.2 → quash_mcp-0.2.4}/quash_mcp/backend_client.py RENAMED Viewed

@@ -190,6 +190,97 @@ class BackendClient:
             logger.error(f"Failed to log execution: {e}")
             return {"logged": False, "error": str(e)}
+    async def execute_step(
+        self,
+        api_key: str,
+        session_id: str,
+        step_number: int,
+        task: str,
+        ui_state: Dict[str, Any],
+        chat_history: list,
+        config: Dict[str, Any],
+        screenshot_bytes: Optional[bytes] = None
+    ) -> Dict[str, Any]:
+        """
+        Execute single agent step (V3 - Step-by-step execution).
+        Sends device state to backend, receives next action to execute.
+        Args:
+            api_key: Quash API key
+            session_id: Unique session identifier
+            step_number: Current step number
+            task: Original task description
+            ui_state: Device UI state (a11y_tree, phone_state)
+            chat_history: Previous conversation messages
+            config: Execution configuration
+            screenshot_bytes: Optional screenshot (only if vision=True)
+        Returns:
+            Dict with action to execute:
+            {
+                "action": {"type": str, "code": str, "reasoning": str},
+                "completed": bool,
+                "success": bool (if completed),
+                "final_message": str (if completed),
+                "assistant_response": str,
+                "tokens_used": {"prompt": int, "completion": int, "total": int},
+                "cost": float
+            }
+        """
+        import json
+        try:
+            # Prepare form data (multipart)
+            data_dict = {
+                "api_key": api_key,
+                "session_id": session_id,
+                "step_number": step_number,
+                "task": task,
+                "ui_state": ui_state,
+                "chat_history": chat_history,
+                "config": config
+            }
+            # Convert to JSON string
+            data_json = json.dumps(data_dict)
+            # Prepare form data (data field as string)
+            form_data = {"data": data_json}
+            # Prepare files dict (only screenshot if provided)
+            files = {}
+            if screenshot_bytes:
+                files["screenshot"] = ("screenshot.png", screenshot_bytes, "image/png")
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                # Send both form data and files (multipart/form-data)
+                response = await client.post(
+                    f"{self.base_url}/api/agent/step",
+                    data=form_data,
+                    files=files if files else None
+                )
+                if response.status_code == 200:
+                    return response.json()
+                else:
+                    error_msg = f"Backend error: HTTP {response.status_code}"
+                    logger.error(error_msg)
+                    return {
+                        "status": "error",
+                        "message": error_msg,
+                        "error": error_msg
+                    }
+        except Exception as e:
+            error_msg = f"Failed to execute step: {str(e)}"
+            logger.error(error_msg)
+            return {
+                "status": "error",
+                "message": error_msg,
+                "error": str(e)
+            }
 # Singleton instance
 _backend_client = None

quash_mcp-0.2.4/quash_mcp/device/state_capture.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""
+Device state capture utilities.
+Captures UI state and screenshots from Android devices.
+"""
+import logging
+import requests
+from typing import Dict, Any, Optional, Tuple
+from adbutils import adb
+logger = logging.getLogger("quash-device")
+def get_current_package(serial: str) -> str:
+    """
+    Get the currently focused app package.
+    Args:
+        serial: Device serial number
+    Returns:
+        Package name of current app
+    """
+    try:
+        device = adb.device(serial)
+        output = device.shell("dumpsys window windows | grep -E 'mCurrentFocus'")
+        # Parse output like: mCurrentFocus=Window{abc123 u0 com.android.settings/com.android.settings.MainActivity}
+        if "/" in output:
+            package = output.split("/")[0].split()[-1]
+            return package
+        return "unknown"
+    except Exception as e:
+        logger.warning(f"Failed to get current package: {e}")
+        return "unknown"
+def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
+    """
+    Get accessibility tree from Portal app via TCP.
+    Args:
+        serial: Device serial number
+        tcp_port: Local TCP port for Portal communication
+    Returns:
+        Accessibility tree XML string
+    """
+    try:
+        device = adb.device(serial)
+        local_port = device.forward_port(tcp_port)
+        response = requests.get(
+            f"http://localhost:{local_port}/get_a11y_tree",
+            timeout=10
+        )
+        if response.status_code == 200:
+            return response.text
+        else:
+            logger.warning(f"Failed to get accessibility tree: HTTP {response.status_code}")
+            return "<hierarchy></hierarchy>"
+    except Exception as e:
+        logger.warning(f"Failed to get accessibility tree: {e}")
+        return "<hierarchy></hierarchy>"
+def capture_screenshot(serial: str) -> Optional[bytes]:
+    """
+    Capture screenshot from device.
+    Args:
+        serial: Device serial number
+    Returns:
+        Screenshot as PNG bytes, or None if failed
+    """
+    try:
+        device = adb.device(serial)
+        screenshot_bytes = device.shell("screencap -p", stream=True)
+        return screenshot_bytes
+    except Exception as e:
+        logger.error(f"Failed to capture screenshot: {e}")
+        return None
+def get_device_state(serial: str) -> Tuple[Dict[str, Any], Optional[bytes]]:
+    """
+    Get complete device state: UI state and screenshot.
+    Args:
+        serial: Device serial number
+    Returns:
+        Tuple of (ui_state_dict, screenshot_bytes)
+    """
+    # Get current package
+    current_package = get_current_package(serial)
+    # Get accessibility tree
+    a11y_tree = get_accessibility_tree(serial)
+    # Build UI state
+    ui_state = {
+        "a11y_tree": a11y_tree,
+        "phone_state": {
+            "package": current_package,
+            "activity": "unknown",  # Can be added later
+        }
+    }
+    # Capture screenshot
+    screenshot = capture_screenshot(serial)
+    return ui_state, screenshot

quash_mcp-0.2.4/quash_mcp/tools/execute.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+Execute tool - Run automation tasks via step-by-step backend communication.
+V3: Hybrid architecture - AI logic on backend (private), device access local (public).
+"""
+from typing import Dict, Any, Callable, Optional
+from .execute_v3 import execute_v3
+async def execute(
+    task: str,
+    progress_callback: Optional[Callable[[str], None]] = None
+) -> Dict[str, Any]:
+    """
+    Execute an automation task on the connected Android device.
+    Uses step-by-step execution:
+    - Captures device state locally
+    - Sends to backend for AI decision
+    - Executes actions locally
+    - Keeps proprietary AI logic private on backend
+    Args:
+        task: Natural language task description
+        progress_callback: Optional callback for progress updates
+    Returns:
+        Dict with execution result and details
+    """
+    return await execute_v3(task=task, progress_callback=progress_callback)

quash_mcp-0.2.4/quash_mcp/tools/execute_v3.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""
+Execute tool V3 - Step-by-step execution with local device access.
+AI logic runs on backend (private), device access happens locally (public).
+This hybrid approach keeps proprietary code private while allowing local device control.
+"""
+import time
+import uuid
+from typing import Dict, Any, Callable, Optional
+from ..state import get_state
+from ..backend_client import get_backend_client
+from ..device.state_capture import get_device_state
+from ..device.adb_tools import AdbTools
+async def execute_v3(
+    task: str,
+    progress_callback: Optional[Callable[[str], None]] = None
+) -> Dict[str, Any]:
+    """
+    Execute automation task using step-by-step backend communication.
+    Each step:
+    1. Capture device state locally (UI + optional screenshot)
+    2. Send to backend for AI decision
+    3. Execute returned action locally
+    4. Repeat until complete
+    Args:
+        task: Natural language task description
+        progress_callback: Optional callback for progress updates
+    Returns:
+        Dict with execution result and details
+    """
+    state = get_state()
+    backend = get_backend_client()
+    # Check prerequisites
+    if not state.is_device_connected():
+        return {
+            "status": "error",
+            "message": "❌ No device connected. Please run 'connect' first.",
+            "prerequisite": "connect"
+        }
+    if not state.is_configured():
+        return {
+            "status": "error",
+            "message": "❌ Configuration incomplete. Please run 'configure' with your Quash API key.",
+            "prerequisite": "configure"
+        }
+    if not state.portal_ready:
+        return {
+            "status": "error",
+            "message": "⚠️ Portal accessibility service not ready. Please ensure it's enabled on the device.",
+            "prerequisite": "connect"
+        }
+    # Get API key and config
+    quash_api_key = state.config["api_key"]
+    config = {
+        "model": state.config["model"],
+        "temperature": state.config["temperature"],
+        "vision": state.config["vision"],
+        "reasoning": state.config["reasoning"],
+        "reflection": state.config["reflection"],
+        "debug": state.config["debug"]
+    }
+    # Validate API key
+    validation_result = await backend.validate_api_key(quash_api_key)
+    if not validation_result.get("valid", False):
+        error_msg = validation_result.get("error", "Invalid API key")
+        return {
+            "status": "error",
+            "message": f"❌ API Key validation failed: {error_msg}",
+            "prerequisite": "configure"
+        }
+    # Check credits
+    user_info = validation_result.get("user", {})
+    credits = user_info.get("credits", 0)
+    if credits <= 0:
+        return {
+            "status": "error",
+            "message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
+            "user": user_info
+        }
+    # Progress logging helper
+    def log_progress(message: str):
+        if progress_callback:
+            progress_callback(message)
+    log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
+    log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
+    log_progress(f"🚀 Starting task: {task}")
+    log_progress(f"📱 Device: {state.device_serial}")
+    log_progress(f"🧠 Model: {config['model']}")
+    # Initialize execution
+    start_time = time.time()
+    session_id = f"session_{uuid.uuid4().hex[:12]}"
+    step_number = 0
+    chat_history = []
+    total_tokens = {"prompt": 0, "completion": 0, "total": 0}
+    total_cost = 0.0
+    # Initialize local ADB tools for code execution
+    adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
+    # Code executor namespace
+    executor_globals = {
+        "__builtins__": __builtins__,
+        "adb_tools": adb_tools
+    }
+    executor_locals = {}
+    try:
+        # ============================================================
+        # STEP-BY-STEP EXECUTION LOOP
+        # ============================================================
+        while step_number < 15:  # Max 15 steps
+            step_number += 1
+            log_progress(f"🧠 Step {step_number}: Thinking...")
+            # 1. Capture device state
+            try:
+                ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
+                # Only include screenshot if vision is enabled
+                if not config["vision"]:
+                    screenshot_bytes = None
+            except Exception as e:
+                log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
+                ui_state_dict = {
+                    "a11y_tree": "<hierarchy></hierarchy>",
+                    "phone_state": {"package": "unknown"}
+                }
+                screenshot_bytes = None
+            # 2. Send to backend for AI decision
+            step_result = await backend.execute_step(
+                api_key=quash_api_key,
+                session_id=session_id,
+                step_number=step_number,
+                task=task,
+                ui_state=ui_state_dict,
+                chat_history=chat_history,
+                config=config,
+                screenshot_bytes=screenshot_bytes
+            )
+            # Handle backend errors
+            if "error" in step_result:
+                log_progress(f"💥 Backend error: {step_result['message']}")
+                return {
+                    "status": "error",
+                    "message": step_result["message"],
+                    "error": step_result["error"],
+                    "steps_taken": step_number,
+                    "tokens": total_tokens,
+                    "cost": total_cost,
+                    "duration_seconds": time.time() - start_time
+                }
+            # Update usage tracking
+            step_tokens = step_result.get("tokens_used", {})
+            step_cost = step_result.get("cost", 0.0)
+            total_tokens["prompt"] += step_tokens.get("prompt", 0)
+            total_tokens["completion"] += step_tokens.get("completion", 0)
+            total_tokens["total"] += step_tokens.get("total", 0)
+            total_cost += step_cost
+            # Get action from backend
+            action = step_result.get("action", {})
+            action_type = action.get("type")
+            code = action.get("code")
+            reasoning = action.get("reasoning")
+            # Log reasoning
+            if reasoning:
+                log_progress(f"🤔 Reasoning: {reasoning}")
+            # Update chat history
+            assistant_response = step_result.get("assistant_response", "")
+            chat_history.append({"role": "assistant", "content": assistant_response})
+            # 3. Check if task is complete
+            if step_result.get("completed", False):
+                success = step_result.get("success", False)
+                final_message = step_result.get("final_message", "Task completed")
+                duration = time.time() - start_time
+                if success:
+                    log_progress(f"✅ Task completed successfully in {step_number} steps")
+                    log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
+                    return {
+                        "status": "success",
+                        "steps_taken": step_number,
+                        "final_message": final_message,
+                        "message": f"✅ Success: {final_message}",
+                        "tokens": total_tokens,
+                        "cost": total_cost,
+                        "duration_seconds": duration
+                    }
+                else:
+                    log_progress(f"❌ Task failed: {final_message}")
+                    log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
+                    return {
+                        "status": "failed",
+                        "steps_taken": step_number,
+                        "final_message": final_message,
+                        "message": f"❌ Failed: {final_message}",
+                        "tokens": total_tokens,
+                        "cost": total_cost,
+                        "duration_seconds": duration
+                    }
+            # 4. Execute action locally
+            if code and action_type == "execute_code":
+                log_progress(f"⚡ Executing action...")
+                try:
+                    # Execute code in sandbox
+                    exec(code, executor_globals, executor_locals)
+                    # Get execution result
+                    execution_output = executor_locals.get("_result", "Code executed successfully")
+                    # Add execution result to chat history
+                    chat_history.append({
+                        "role": "user",
+                        "content": f"Execution Result:\n```\n{execution_output}\n```"
+                    })
+                except Exception as e:
+                    error_msg = f"Error during execution: {str(e)}"
+                    log_progress(f"💥 Action failed: {error_msg}")
+                    # Add error to chat history
+                    chat_history.append({
+                        "role": "user",
+                        "content": f"Execution Result:\n```\n{error_msg}\n```"
+                    })
+            else:
+                # No code to execute
+                log_progress("⚠️ No action code provided by backend")
+                chat_history.append({
+                    "role": "user",
+                    "content": "No code was provided. Please provide code to execute."
+                })
+        # Max steps reached
+        log_progress(f"⚠️ Reached maximum steps ({step_number})")
+        log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
+        return {
+            "status": "failed",
+            "steps_taken": step_number,
+            "final_message": f"Reached maximum step limit of {step_number}",
+            "message": "❌ Failed: Maximum steps reached",
+            "tokens": total_tokens,
+            "cost": total_cost,
+            "duration_seconds": time.time() - start_time
+        }
+    except KeyboardInterrupt:
+        log_progress("⏹️ Task interrupted by user")
+        return {
+            "status": "interrupted",
+            "message": "⏹️ Task execution interrupted",
+            "steps_taken": step_number,
+            "tokens": total_tokens,
+            "cost": total_cost,
+            "duration_seconds": time.time() - start_time
+        }
+    except Exception as e:
+        error_msg = str(e)
+        log_progress(f"💥 Error: {error_msg}")
+        return {
+            "status": "error",
+            "message": f"💥 Execution error: {error_msg}",
+            "error": error_msg,
+            "steps_taken": step_number,
+            "tokens": total_tokens,
+            "cost": total_cost,
+            "duration_seconds": time.time() - start_time
+        }
+    finally:
+        # Cleanup TCP forwarding
+        if adb_tools:
+            adb_tools.teardown_tcp_forward()