quash-mcp 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quash-mcp might be problematic. Click here for more details.

Files changed (27) hide show
  1. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/PKG-INFO +19 -3
  2. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/README.md +18 -2
  3. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/pyproject.toml +1 -1
  4. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/backend_client.py +71 -0
  5. quash_mcp-0.2.11/quash_mcp/tools/execute_v3.py +636 -0
  6. quash_mcp-0.2.9/quash_mcp/tools/execute_v3.py +0 -371
  7. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/.gitignore +0 -0
  8. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/SETUP_CLAUDE_CODE.md +0 -0
  9. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/__init__.py +0 -0
  10. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/__main__.py +0 -0
  11. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/device/__init__.py +0 -0
  12. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/device/adb_tools.py +0 -0
  13. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/device/portal.py +0 -0
  14. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/device/state_capture.py +0 -0
  15. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/server.py +0 -0
  16. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/state.py +0 -0
  17. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/__init__.py +0 -0
  18. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/build.py +0 -0
  19. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/build_old.py +0 -0
  20. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/configure.py +0 -0
  21. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/connect.py +0 -0
  22. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/execute.py +0 -0
  23. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/execute_v2_backup.py +0 -0
  24. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/runsuite.py +0 -0
  25. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/quash_mcp/tools/usage.py +0 -0
  26. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/test_backend_integration.py +0 -0
  27. {quash_mcp-0.2.9 → quash_mcp-0.2.11}/test_tools_loading.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quash-mcp
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Model Context Protocol server for Quash - AI-powered mobile automation agent
5
5
  Project-URL: Homepage, https://quashbugs.com
6
6
  Project-URL: Repository, https://github.com/quash/quash-mcp
@@ -57,7 +57,7 @@ All dependencies (including ADB tools and device connectivity) are automatically
57
57
 
58
58
  ### 1. Get Your API Key
59
59
 
60
- 1. Visit [quashbugs.com](https://quashbugs.com) (or your deployment URL)
60
+ 1. Visit [quashbugs.com/mcp](http://13.220.180.140.nip.io/) (or your deployment URL)
61
61
  2. Sign in with Google
62
62
  3. Go to Dashboard → API Keys
63
63
  4. Create a new API key
@@ -92,6 +92,22 @@ Add to your MCP host's config file:
92
92
  - No PATH configuration needed
93
93
  - Uses whichever Python has quash-mcp installed
94
94
 
95
+ #### CLI Configuration (If Supported by Host)
96
+
97
+ Some MCP hosts might provide a command-line interface to add servers.
98
+
99
+ **Examples:**
100
+
101
+ - **Claude Code:**
102
+ ```bash
103
+ claude mcp add quash quash-mcp
104
+ ```
105
+
106
+ - **Gemini CLI:**
107
+ ```bash
108
+ gemini mcp add quash quash-mcp
109
+ ```
110
+
95
111
  #### Alternative: Direct Command (if in PATH)
96
112
 
97
113
  If `quash-mcp` is in your PATH:
@@ -220,7 +236,7 @@ User: "Show me my usage statistics"
220
236
 
221
237
  - **Python 3.11+** - Required for the MCP server
222
238
  - **Android Device** - Emulator or physical device with USB debugging enabled
223
- - **Quash API Key** - Get from [quashbugs.com](https://quashbugs.com)
239
+ - **Quash API Key** - Get from [quashbugs.com/mcp](http://13.220.180.140.nip.io/)
224
240
 
225
241
  Dependencies automatically installed:
226
242
  - Android Debug Bridge (ADB) - via `adbutils`
@@ -24,7 +24,7 @@ All dependencies (including ADB tools and device connectivity) are automatically
24
24
 
25
25
  ### 1. Get Your API Key
26
26
 
27
- 1. Visit [quashbugs.com](https://quashbugs.com) (or your deployment URL)
27
+ 1. Visit [quashbugs.com/mcp](http://13.220.180.140.nip.io/) (or your deployment URL)
28
28
  2. Sign in with Google
29
29
  3. Go to Dashboard → API Keys
30
30
  4. Create a new API key
@@ -59,6 +59,22 @@ Add to your MCP host's config file:
59
59
  - No PATH configuration needed
60
60
  - Uses whichever Python has quash-mcp installed
61
61
 
62
+ #### CLI Configuration (If Supported by Host)
63
+
64
+ Some MCP hosts might provide a command-line interface to add servers.
65
+
66
+ **Examples:**
67
+
68
+ - **Claude Code:**
69
+ ```bash
70
+ claude mcp add quash quash-mcp
71
+ ```
72
+
73
+ - **Gemini CLI:**
74
+ ```bash
75
+ gemini mcp add quash quash-mcp
76
+ ```
77
+
62
78
  #### Alternative: Direct Command (if in PATH)
63
79
 
64
80
  If `quash-mcp` is in your PATH:
@@ -187,7 +203,7 @@ User: "Show me my usage statistics"
187
203
 
188
204
  - **Python 3.11+** - Required for the MCP server
189
205
  - **Android Device** - Emulator or physical device with USB debugging enabled
190
- - **Quash API Key** - Get from [quashbugs.com](https://quashbugs.com)
206
+ - **Quash API Key** - Get from [quashbugs.com/mcp](http://13.220.180.140.nip.io/)
191
207
 
192
208
  Dependencies automatically installed:
193
209
  - Android Debug Bridge (ADB) - via `adbutils`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "quash-mcp"
3
- version = "0.2.9"
3
+ version = "0.2.11"
4
4
  description = "Model Context Protocol server for Quash - AI-powered mobile automation agent"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -281,6 +281,77 @@ class BackendClient:
281
281
  "error": str(e)
282
282
  }
283
283
 
284
+ async def finalize_session(
285
+ self,
286
+ api_key: str,
287
+ session_id: str,
288
+ task: str,
289
+ device_serial: str,
290
+ status: str,
291
+ final_message: Optional[str] = None,
292
+ error: Optional[str] = None,
293
+ duration_seconds: float = 0.0,
294
+ config: Optional[Dict[str, Any]] = None
295
+ ) -> Dict[str, Any]:
296
+ """
297
+ Finalize a session and aggregate execution record.
298
+
299
+ Called when task ends for ANY reason: normal completion, max steps, error, interrupt.
300
+
301
+ Args:
302
+ api_key: Quash API key
303
+ session_id: Session identifier to finalize
304
+ task: Original task description
305
+ device_serial: Device serial number
306
+ status: "success", "failed", "max_steps", "error", "interrupted"
307
+ final_message: Final message from agent
308
+ error: Error message if failed
309
+ duration_seconds: Total execution time
310
+ config: Execution configuration
311
+
312
+ Returns:
313
+ Dict with finalization result:
314
+ {
315
+ "finalized": bool,
316
+ "execution_id": str,
317
+ "total_steps": int,
318
+ "total_tokens": {"prompt": int, "completion": int, "total": int},
319
+ "total_cost": float,
320
+ "error": str (if failed)
321
+ }
322
+ """
323
+ logger.info(f"🏁 Finalizing session {session_id} - Status: {status}")
324
+
325
+ try:
326
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
327
+ response = await client.post(
328
+ f"{self.base_url}/api/agent/finalize",
329
+ json={
330
+ "api_key": api_key,
331
+ "session_id": session_id,
332
+ "task": task,
333
+ "device_serial": device_serial,
334
+ "status": status,
335
+ "final_message": final_message,
336
+ "error": error,
337
+ "duration_seconds": duration_seconds,
338
+ "config": config or {}
339
+ }
340
+ )
341
+
342
+ if response.status_code == 200:
343
+ result = response.json()
344
+ if result.get("finalized"):
345
+ logger.info(f"✅ Session finalized: {result.get('total_steps')} steps, ${result.get('total_cost', 0):.4f}")
346
+ return result
347
+ else:
348
+ logger.warning(f"Failed to finalize session: HTTP {response.status_code}")
349
+ return {"finalized": False, "error": f"HTTP {response.status_code}"}
350
+
351
+ except Exception as e:
352
+ logger.error(f"Failed to finalize session: {e}")
353
+ return {"finalized": False, "error": str(e)}
354
+
284
355
 
285
356
  # Singleton instance
286
357
  _backend_client = None
@@ -0,0 +1,636 @@
1
+ """
2
+ Execute tool V3 - Step-by-step execution with state-change verification.
3
+
4
+ This reimplements the event-driven state verification from the original Mahoraga agent
5
+ using a polling-based approach suitable for the client-server architecture.
6
+
7
+ All state-change detection logic is contained in this file.
8
+ """
9
+
10
+ import time
11
+ import uuid
12
+ import asyncio
13
+ import hashlib
14
+ import json
15
+ from typing import Dict, Any, Callable, Optional, Tuple
16
+ from ..state import get_state
17
+ from ..backend_client import get_backend_client
18
+ from ..device.state_capture import get_device_state
19
+ from ..device.adb_tools import AdbTools
20
+
21
+ # Import mahoraga components for tool functions
22
+ try:
23
+ from mahoraga.tools import Tools, describe_tools
24
+ from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
25
+ from mahoraga.agent.context.personas import DEFAULT
26
+ from mahoraga.agent.utils.async_utils import async_to_sync
27
+ except ImportError as e:
28
+ print(f"Warning: Could not import mahoraga components: {e}")
29
+ Tools = None
30
+ describe_tools = None
31
+ MahoragaAdbTools = None
32
+ DEFAULT = None
33
+ async_to_sync = None
34
+
35
+
36
+ def get_ui_state_hash(ui_state_dict: Dict[str, Any]) -> str:
37
+ """
38
+ Generate a stable hash of the UI state for comparison.
39
+
40
+ Uses accessibility tree structure and package name.
41
+ Hash will change when UI updates after an action.
42
+ """
43
+ def normalize_tree(tree):
44
+ """Extract stable elements from UI tree."""
45
+ if isinstance(tree, list):
46
+ normalized = []
47
+ for item in tree:
48
+ if isinstance(item, dict):
49
+ element = {
50
+ "className": item.get("className", ""),
51
+ "text": item.get("text", ""),
52
+ "resourceId": item.get("resourceId", ""),
53
+ "bounds": item.get("bounds", ""),
54
+ }
55
+ normalized.append(element)
56
+
57
+ children = item.get("children", [])
58
+ if children:
59
+ element["children"] = normalize_tree(children)
60
+ return normalized
61
+ return []
62
+
63
+ state_repr = {
64
+ "package": ui_state_dict.get("phone_state", {}).get("package", ""),
65
+ "tree": normalize_tree(ui_state_dict.get("a11y_tree", []))
66
+ }
67
+
68
+ state_json = json.dumps(state_repr, sort_keys=True)
69
+ return hashlib.sha256(state_json.encode()).hexdigest()
70
+
71
+
72
+ def get_action_timeout(code: str) -> float:
73
+ """
74
+ Determine appropriate timeout based on action type.
75
+
76
+ Returns timeout in seconds.
77
+ """
78
+ code_lower = code.lower()
79
+
80
+ if "start_app" in code_lower:
81
+ return 10.0 # App launches can be slow
82
+ elif "tap" in code_lower or "click" in code_lower:
83
+ return 5.0 # Screen transitions
84
+ elif "swipe" in code_lower or "scroll" in code_lower:
85
+ return 2.0 # Scroll animations
86
+ elif "drag" in code_lower:
87
+ return 2.0
88
+ elif "input_text" in code_lower:
89
+ return 2.0 # Text input is fast
90
+ elif "press_back" in code_lower or "press_home" in code_lower:
91
+ return 3.0 # Navigation
92
+ elif "press_key" in code_lower:
93
+ return 1.0
94
+ else:
95
+ return 5.0 # Default timeout
96
+
97
+
98
+ def wait_for_state_change(
99
+ get_state_func,
100
+ device_serial: str,
101
+ old_state_hash: str,
102
+ max_wait: float = 10.0,
103
+ poll_interval: float = 0.5,
104
+ min_wait: float = 0.3
105
+ ) -> Tuple[Dict[str, Any], bytes, bool]:
106
+ """
107
+ Poll device until UI state changes or timeout.
108
+
109
+ This is the core polling mechanism that replaces Mahoraga's event-driven approach.
110
+
111
+ Returns:
112
+ Tuple of (ui_state_dict, screenshot_bytes, state_changed: bool)
113
+ """
114
+ # Always wait minimum time for action to take effect
115
+ time.sleep(min_wait)
116
+
117
+ start_time = time.time()
118
+
119
+ while (time.time() - start_time) < max_wait:
120
+ # Capture current state
121
+ ui_state_dict, screenshot_bytes = get_state_func(device_serial)
122
+ current_hash = get_ui_state_hash(ui_state_dict)
123
+
124
+ # Check if state changed
125
+ if current_hash != old_state_hash:
126
+ return ui_state_dict, screenshot_bytes, True
127
+
128
+ # State hasn't changed - wait and try again
129
+ time.sleep(poll_interval)
130
+
131
+ # Timeout - state never changed
132
+ ui_state_dict, screenshot_bytes = get_state_func(device_serial)
133
+ return ui_state_dict, screenshot_bytes, False
134
+
135
+
136
+ def wait_for_action_effect(
137
+ get_state_func,
138
+ device_serial: str,
139
+ old_ui_state: Dict[str, Any],
140
+ executed_code: str,
141
+ min_wait: float = 0.3,
142
+ poll_interval: float = 0.5
143
+ ) -> Tuple[Dict[str, Any], bytes, bool]:
144
+ """
145
+ Wait for an action to take effect on the device.
146
+
147
+ Returns:
148
+ Tuple of (new_ui_state_dict, screenshot_bytes, state_changed: bool)
149
+ """
150
+ # Check if action should change UI
151
+ code_lower = executed_code.lower()
152
+ if "get_state" in code_lower or "complete(" in code_lower:
153
+ # Action doesn't change UI - no need to wait
154
+ time.sleep(0.1)
155
+ return get_state_func(device_serial)[0], None, False
156
+
157
+ # Get hash of old state
158
+ old_hash = get_ui_state_hash(old_ui_state)
159
+
160
+ # Determine timeout based on action type
161
+ timeout = get_action_timeout(executed_code)
162
+
163
+ # Poll until state changes
164
+ new_ui_state, screenshot, changed = wait_for_state_change(
165
+ get_state_func,
166
+ device_serial,
167
+ old_hash,
168
+ max_wait=timeout,
169
+ poll_interval=poll_interval,
170
+ min_wait=min_wait
171
+ )
172
+
173
+ return new_ui_state, screenshot, changed
174
+
175
+
176
+ # ============================================================
177
+ # MAIN EXECUTION FUNCTION
178
+ # ============================================================
179
+
180
+ async def execute_v3(
181
+ task: str,
182
+ max_steps: int = 15,
183
+ progress_callback: Optional[Callable[[str], None]] = None
184
+ ) -> Dict[str, Any]:
185
+ """
186
+ Execute automation task using step-by-step backend communication.
187
+
188
+ Each step:
189
+ 1. Capture device state (State A)
190
+ 2. Send to backend for AI decision
191
+ 3. Execute returned action locally
192
+ 4. POLL until state changes (State B ≠ State A) or timeout
193
+ 5. Send State B to backend in next iteration
194
+ 6. Repeat until complete
195
+
196
+ This ensures the backend always sees the UPDATED state after each action,
197
+ preventing the agent from making decisions based on stale state.
198
+
199
+ Args:
200
+ task: Natural language task description
201
+ max_steps: Maximum number of steps to execute (default: 15)
202
+ progress_callback: Optional callback for progress updates
203
+
204
+ Returns:
205
+ Dict with execution result and details
206
+ """
207
+ state = get_state()
208
+ backend = get_backend_client()
209
+
210
+ # Check prerequisites
211
+ if not state.is_device_connected():
212
+ return {
213
+ "status": "error",
214
+ "message": "❌ No device connected. Please run 'connect' first.",
215
+ "prerequisite": "connect"
216
+ }
217
+
218
+ if not state.is_configured():
219
+ return {
220
+ "status": "error",
221
+ "message": "❌ Configuration incomplete. Please run 'configure' with your Quash API key.",
222
+ "prerequisite": "configure"
223
+ }
224
+
225
+ if not state.portal_ready:
226
+ return {
227
+ "status": "error",
228
+ "message": "⚠️ Portal accessibility service not ready. Please ensure it's enabled on the device.",
229
+ "prerequisite": "connect"
230
+ }
231
+
232
+ # Get API key and config
233
+ quash_api_key = state.config["api_key"]
234
+ config = {
235
+ "model": state.config["model"],
236
+ "temperature": state.config["temperature"],
237
+ "vision": state.config["vision"],
238
+ "reasoning": state.config["reasoning"],
239
+ "reflection": state.config["reflection"],
240
+ "debug": state.config["debug"]
241
+ }
242
+
243
+ # Validate API key
244
+ validation_result = await backend.validate_api_key(quash_api_key)
245
+
246
+ if not validation_result.get("valid", False):
247
+ error_msg = validation_result.get("error", "Invalid API key")
248
+ return {
249
+ "status": "error",
250
+ "message": f"❌ API Key validation failed: {error_msg}",
251
+ "prerequisite": "configure"
252
+ }
253
+
254
+ # Check credits
255
+ user_info = validation_result.get("user", {})
256
+ credits = user_info.get("credits", 0)
257
+
258
+ if credits <= 0:
259
+ return {
260
+ "status": "error",
261
+ "message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
262
+ "user": user_info
263
+ }
264
+
265
+ # Progress logging helper
266
+ def log_progress(message: str):
267
+ if progress_callback:
268
+ progress_callback(message)
269
+
270
+ log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
271
+ log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
272
+ log_progress(f"🚀 Starting task: {task}")
273
+ log_progress(f"📱 Device: {state.device_serial}")
274
+ log_progress(f"🧠 Model: {config['model']}")
275
+ log_progress(f"🔢 Max steps: {max_steps}")
276
+
277
+ # Initialize execution
278
+ start_time = time.time()
279
+ session_id = f"session_{uuid.uuid4().hex[:12]}"
280
+ step_number = 0
281
+ chat_history = []
282
+ total_tokens = {"prompt": 0, "completion": 0, "total": 0}
283
+ total_cost = 0.0
284
+
285
+ # Initialize local ADB tools for code execution
286
+ adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
287
+
288
+ # Code executor namespace - add tool functions so generated code can call them
289
+ executor_globals = {
290
+ "__builtins__": __builtins__,
291
+ "adb_tools": adb_tools
292
+ }
293
+
294
+ # Add tool functions to executor namespace (like start_app, swipe, etc.)
295
+ if describe_tools and DEFAULT and MahoragaAdbTools:
296
+ try:
297
+ # Create a mahoraga AdbTools instance for tool execution
298
+ mahoraga_tools = MahoragaAdbTools(
299
+ serial=state.device_serial,
300
+ use_tcp=True,
301
+ remote_tcp_port=8080
302
+ )
303
+
304
+ # Get all tool functions from mahoraga AdbTools instance
305
+ tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
306
+
307
+ # Filter by allowed tools from DEFAULT persona
308
+ allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
309
+ filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
310
+
311
+ # Add each tool function to executor globals with print wrapper
312
+ for tool_name, tool_function in filtered_tools.items():
313
+ # Convert async functions to sync if needed
314
+ if asyncio.iscoroutinefunction(tool_function):
315
+ if async_to_sync:
316
+ tool_function = async_to_sync(tool_function)
317
+
318
+ # Wrap tool function to print its return value
319
+ def make_printing_wrapper(func):
320
+ """Wrap a tool function to print its return value."""
321
+ def wrapper(*args, **kwargs):
322
+ result = func(*args, **kwargs)
323
+ # Print the result so stdout captures it
324
+ if result is not None:
325
+ print(result)
326
+ return result
327
+ return wrapper
328
+
329
+ # Add wrapped function to globals so code can call it directly
330
+ executor_globals[tool_name] = make_printing_wrapper(tool_function)
331
+
332
+ log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
333
+ except Exception as e:
334
+ log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
335
+ import traceback
336
+ log_progress(f"Traceback: {traceback.format_exc()}")
337
+
338
+ executor_locals = {}
339
+
340
+ try:
341
+ # ============================================================
342
+ # STEP-BY-STEP EXECUTION LOOP
343
+ # ============================================================
344
+ while step_number < max_steps: # Use user-provided max_steps
345
+ step_number += 1
346
+ log_progress(f"🧠 Step {step_number}/{max_steps}: Analyzing...")
347
+
348
+ # 1. Capture device state (State A)
349
+ try:
350
+ ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
351
+
352
+ # Only include screenshot if vision is enabled
353
+ if not config["vision"]:
354
+ screenshot_bytes = None
355
+
356
+ # Log current state
357
+ current_package = ui_state_dict.get("phone_state", {}).get("package", "unknown")
358
+ log_progress(f"📱 Current app: {current_package}")
359
+
360
+ except Exception as e:
361
+ log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
362
+ ui_state_dict = {
363
+ "a11y_tree": [{"index": 0, "text": "Error capturing UI", "children": []}],
364
+ "phone_state": {"package": "unknown"}
365
+ }
366
+ screenshot_bytes = None
367
+
368
+ # 2. Send to backend for AI decision
369
+ step_result = await backend.execute_step(
370
+ api_key=quash_api_key,
371
+ session_id=session_id,
372
+ step_number=step_number,
373
+ task=task,
374
+ ui_state=ui_state_dict,
375
+ chat_history=chat_history,
376
+ config=config,
377
+ screenshot_bytes=screenshot_bytes
378
+ )
379
+
380
+ # Handle backend errors
381
+ if "error" in step_result:
382
+ log_progress(f"💥 Backend error: {step_result['message']}")
383
+ return {
384
+ "status": "error",
385
+ "message": step_result["message"],
386
+ "error": step_result["error"],
387
+ "steps_taken": step_number,
388
+ "tokens": total_tokens,
389
+ "cost": total_cost,
390
+ "duration_seconds": time.time() - start_time
391
+ }
392
+
393
+ # Update usage tracking
394
+ step_tokens = step_result.get("tokens_used", {})
395
+ step_cost = step_result.get("cost", 0.0)
396
+
397
+ total_tokens["prompt"] += step_tokens.get("prompt", 0)
398
+ total_tokens["completion"] += step_tokens.get("completion", 0)
399
+ total_tokens["total"] += step_tokens.get("total", 0)
400
+ total_cost += step_cost
401
+
402
+ # Get action from backend
403
+ action = step_result.get("action", {})
404
+ action_type = action.get("type")
405
+ code = action.get("code")
406
+ reasoning = action.get("reasoning")
407
+
408
+ # Log reasoning
409
+ if reasoning:
410
+ log_progress(f"🤔 Reasoning: {reasoning}")
411
+
412
+ # Update chat history
413
+ assistant_response = step_result.get("assistant_response", "")
414
+ chat_history.append({"role": "assistant", "content": assistant_response})
415
+
416
+ # 3. Check if task is complete
417
+ if step_result.get("completed", False):
418
+ success = step_result.get("success", False)
419
+ final_message = step_result.get("final_message", "Task completed")
420
+
421
+ duration = time.time() - start_time
422
+
423
+ if success:
424
+ log_progress(f"✅ Task completed successfully in {step_number} steps")
425
+ log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
426
+
427
+ return {
428
+ "status": "success",
429
+ "steps_taken": step_number,
430
+ "final_message": final_message,
431
+ "message": f"✅ Success: {final_message}",
432
+ "tokens": total_tokens,
433
+ "cost": total_cost,
434
+ "duration_seconds": duration
435
+ }
436
+ else:
437
+ log_progress(f"❌ Task failed: {final_message}")
438
+ log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
439
+
440
+ return {
441
+ "status": "failed",
442
+ "steps_taken": step_number,
443
+ "final_message": final_message,
444
+ "message": f"❌ Failed: {final_message}",
445
+ "tokens": total_tokens,
446
+ "cost": total_cost,
447
+ "duration_seconds": duration
448
+ }
449
+
450
+ # 4. Execute action locally
451
+ if code and action_type == "execute_code":
452
+ log_progress(f"⚡ Executing action...")
453
+
454
+ # Store old UI state for comparison
455
+ old_ui_state = ui_state_dict.copy()
456
+
457
+ try:
458
+ import io
459
+ import contextlib
460
+
461
+ # Capture stdout and stderr to get tool function outputs
462
+ stdout = io.StringIO()
463
+ stderr = io.StringIO()
464
+
465
+ with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
466
+ # Execute code in sandbox
467
+ exec(code, executor_globals, executor_locals)
468
+
469
+ # Get captured output
470
+ execution_output = stdout.getvalue()
471
+ error_output = stderr.getvalue()
472
+
473
+ # ============================================================
474
+ # CRITICAL: Wait for state change (polling-based event detection)
475
+ # ============================================================
476
+ log_progress(f"⏳ Waiting for UI state to update...")
477
+
478
+ try:
479
+ # Poll until state changes or timeout
480
+ new_ui_state_dict, _, state_changed = wait_for_action_effect(
481
+ get_device_state,
482
+ state.device_serial,
483
+ old_ui_state,
484
+ code,
485
+ min_wait=0.3,
486
+ poll_interval=0.5
487
+ )
488
+
489
+ # Log what happened
490
+ if state_changed:
491
+ old_pkg = old_ui_state.get("phone_state", {}).get("package", "")
492
+ new_pkg = new_ui_state_dict.get("phone_state", {}).get("package", "")
493
+
494
+ if old_pkg != new_pkg:
495
+ log_progress(f"✅ State changed: App switched ({old_pkg} → {new_pkg})")
496
+ else:
497
+ log_progress(f"✅ State changed: UI updated")
498
+ else:
499
+ log_progress(f"⚠️ WARNING: State did NOT change after action (timeout)")
500
+ log_progress(f" This might mean the action had no effect or took too long")
501
+
502
+ except Exception as e:
503
+ log_progress(f"⚠️ Error during state change detection: {e}")
504
+ state_changed = False
505
+ # Fallback: Just wait a bit
506
+ time.sleep(1.5)
507
+
508
+ # Build feedback message
509
+ feedback_parts = []
510
+
511
+ if execution_output:
512
+ feedback_parts.append(f"Action output: {execution_output.strip()}")
513
+
514
+ if state_changed:
515
+ feedback_parts.append("UI state updated successfully")
516
+ else:
517
+ feedback_parts.append("WARNING: UI state did not change (action may have failed)")
518
+
519
+ if error_output:
520
+ feedback_parts.append(f"Warnings: {error_output.strip()}")
521
+
522
+ feedback = " | ".join(feedback_parts) if feedback_parts else "Action executed"
523
+
524
+ log_progress(f"✅ {feedback[:200]}")
525
+
526
+ # Add execution result to chat history
527
+ chat_history.append({
528
+ "role": "user",
529
+ "content": f"Execution Result:\n```\n{feedback}\n```"
530
+ })
531
+
532
+ except Exception as e:
533
+ error_msg = f"Error during execution: {str(e)}"
534
+ log_progress(f"💥 Action failed: {error_msg}")
535
+
536
+ # Add error to chat history
537
+ chat_history.append({
538
+ "role": "user",
539
+ "content": f"Execution Error:\n```\n{error_msg}\n```"
540
+ })
541
+
542
+ else:
543
+ # No code to execute
544
+ log_progress("⚠️ No action code provided by backend")
545
+ chat_history.append({
546
+ "role": "user",
547
+ "content": "No code was provided. Please provide code to execute."
548
+ })
549
+
550
+ # Max steps reached
551
+ log_progress(f"⚠️ Reached maximum steps ({max_steps})")
552
+ log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
553
+
554
+ duration = time.time() - start_time
555
+
556
+ # Finalize session on backend to create execution record
557
+ await backend.finalize_session(
558
+ api_key=quash_api_key,
559
+ session_id=session_id,
560
+ task=task,
561
+ device_serial=state.device_serial,
562
+ status="max_steps",
563
+ final_message=f"Reached maximum step limit of {max_steps}",
564
+ error=None,
565
+ duration_seconds=duration,
566
+ config=config
567
+ )
568
+
569
+ return {
570
+ "status": "failed",
571
+ "steps_taken": step_number,
572
+ "final_message": f"Reached maximum step limit of {max_steps}",
573
+ "message": "❌ Failed: Maximum steps reached",
574
+ "tokens": total_tokens,
575
+ "cost": total_cost,
576
+ "duration_seconds": duration
577
+ }
578
+
579
+ except KeyboardInterrupt:
580
+ log_progress("ℹ️ Task interrupted by user")
581
+ duration = time.time() - start_time
582
+
583
+ # Finalize session on backend
584
+ await backend.finalize_session(
585
+ api_key=quash_api_key,
586
+ session_id=session_id,
587
+ task=task,
588
+ device_serial=state.device_serial,
589
+ status="interrupted",
590
+ final_message="Task interrupted by user",
591
+ error=None,
592
+ duration_seconds=duration,
593
+ config=config
594
+ )
595
+
596
+ return {
597
+ "status": "interrupted",
598
+ "message": "ℹ️ Task execution interrupted",
599
+ "steps_taken": step_number,
600
+ "tokens": total_tokens,
601
+ "cost": total_cost,
602
+ "duration_seconds": duration
603
+ }
604
+
605
+ except Exception as e:
606
+ error_msg = str(e)
607
+ log_progress(f"💥 Error: {error_msg}")
608
+ duration = time.time() - start_time
609
+
610
+ # Finalize session on backend
611
+ await backend.finalize_session(
612
+ api_key=quash_api_key,
613
+ session_id=session_id,
614
+ task=task,
615
+ device_serial=state.device_serial,
616
+ status="error",
617
+ final_message=None,
618
+ error=error_msg,
619
+ duration_seconds=duration,
620
+ config=config
621
+ )
622
+
623
+ return {
624
+ "status": "error",
625
+ "message": f"💥 Execution error: {error_msg}",
626
+ "error": error_msg,
627
+ "steps_taken": step_number,
628
+ "tokens": total_tokens,
629
+ "cost": total_cost,
630
+ "duration_seconds": duration
631
+ }
632
+
633
+ finally:
634
+ # Cleanup TCP forwarding
635
+ if adb_tools:
636
+ adb_tools.teardown_tcp_forward()
@@ -1,371 +0,0 @@
1
- """
2
- Execute tool V3 - Step-by-step execution with local device access.
3
-
4
- AI logic runs on backend (private), device access happens locally (public).
5
- This hybrid approach keeps proprietary code private while allowing local device control.
6
- """
7
-
8
- import time
9
- import uuid
10
- import asyncio
11
- from typing import Dict, Any, Callable, Optional
12
- from ..state import get_state
13
- from ..backend_client import get_backend_client
14
- from ..device.state_capture import get_device_state
15
- from ..device.adb_tools import AdbTools
16
-
17
- # Import mahoraga components for tool functions
18
- try:
19
- from mahoraga.tools import Tools, describe_tools
20
- from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
21
- from mahoraga.agent.context.personas import DEFAULT
22
- from mahoraga.agent.utils.async_utils import async_to_sync
23
- except ImportError as e:
24
- print(f"Warning: Could not import mahoraga components: {e}")
25
- Tools = None
26
- describe_tools = None
27
- MahoragaAdbTools = None
28
- DEFAULT = None
29
- async_to_sync = None
30
-
31
-
32
- async def execute_v3(
33
- task: str,
34
- progress_callback: Optional[Callable[[str], None]] = None
35
- ) -> Dict[str, Any]:
36
- """
37
- Execute automation task using step-by-step backend communication.
38
-
39
- Each step:
40
- 1. Capture device state locally (UI + optional screenshot)
41
- 2. Send to backend for AI decision
42
- 3. Execute returned action locally
43
- 4. Repeat until complete
44
-
45
- Args:
46
- task: Natural language task description
47
- progress_callback: Optional callback for progress updates
48
-
49
- Returns:
50
- Dict with execution result and details
51
- """
52
- state = get_state()
53
- backend = get_backend_client()
54
-
55
- # Check prerequisites
56
- if not state.is_device_connected():
57
- return {
58
- "status": "error",
59
- "message": "❌ No device connected. Please run 'connect' first.",
60
- "prerequisite": "connect"
61
- }
62
-
63
- if not state.is_configured():
64
- return {
65
- "status": "error",
66
- "message": "❌ Configuration incomplete. Please run 'configure' with your Quash API key.",
67
- "prerequisite": "configure"
68
- }
69
-
70
- if not state.portal_ready:
71
- return {
72
- "status": "error",
73
- "message": "⚠️ Portal accessibility service not ready. Please ensure it's enabled on the device.",
74
- "prerequisite": "connect"
75
- }
76
-
77
- # Get API key and config
78
- quash_api_key = state.config["api_key"]
79
- config = {
80
- "model": state.config["model"],
81
- "temperature": state.config["temperature"],
82
- "vision": state.config["vision"],
83
- "reasoning": state.config["reasoning"],
84
- "reflection": state.config["reflection"],
85
- "debug": state.config["debug"]
86
- }
87
-
88
- # Validate API key
89
- validation_result = await backend.validate_api_key(quash_api_key)
90
-
91
- if not validation_result.get("valid", False):
92
- error_msg = validation_result.get("error", "Invalid API key")
93
- return {
94
- "status": "error",
95
- "message": f"❌ API Key validation failed: {error_msg}",
96
- "prerequisite": "configure"
97
- }
98
-
99
- # Check credits
100
- user_info = validation_result.get("user", {})
101
- credits = user_info.get("credits", 0)
102
-
103
- if credits <= 0:
104
- return {
105
- "status": "error",
106
- "message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
107
- "user": user_info
108
- }
109
-
110
- # Progress logging helper
111
- def log_progress(message: str):
112
- if progress_callback:
113
- progress_callback(message)
114
-
115
- log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
116
- log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
117
- log_progress(f"🚀 Starting task: {task}")
118
- log_progress(f"📱 Device: {state.device_serial}")
119
- log_progress(f"🧠 Model: {config['model']}")
120
-
121
- # Initialize execution
122
- start_time = time.time()
123
- session_id = f"session_{uuid.uuid4().hex[:12]}"
124
- step_number = 0
125
- chat_history = []
126
- total_tokens = {"prompt": 0, "completion": 0, "total": 0}
127
- total_cost = 0.0
128
-
129
- # Initialize local ADB tools for code execution
130
- adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
131
-
132
- # Code executor namespace - add tool functions so generated code can call them
133
- executor_globals = {
134
- "__builtins__": __builtins__,
135
- "adb_tools": adb_tools
136
- }
137
-
138
- # Add tool functions to executor namespace (like start_app, swipe, etc.)
139
- if describe_tools and DEFAULT and MahoragaAdbTools:
140
- try:
141
- # Create a mahoraga AdbTools instance for tool execution
142
- # This instance has all the tool methods like swipe, start_app, etc.
143
- mahoraga_tools = MahoragaAdbTools(
144
- serial=state.device_serial,
145
- use_tcp=True,
146
- remote_tcp_port=8080
147
- )
148
-
149
- # Get all tool functions from mahoraga AdbTools instance
150
- tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
151
-
152
- # Filter by allowed tools from DEFAULT persona
153
- allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
154
- filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
155
-
156
- # Add each tool function to executor globals
157
- for tool_name, tool_function in filtered_tools.items():
158
- # Convert async functions to sync if needed
159
- if asyncio.iscoroutinefunction(tool_function):
160
- if async_to_sync:
161
- tool_function = async_to_sync(tool_function)
162
-
163
- # Add to globals so code can call it directly
164
- executor_globals[tool_name] = tool_function
165
-
166
- log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
167
- except Exception as e:
168
- log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
169
- import traceback
170
- log_progress(f"Traceback: {traceback.format_exc()}")
171
-
172
- executor_locals = {}
173
-
174
- try:
175
- # ============================================================
176
- # STEP-BY-STEP EXECUTION LOOP
177
- # ============================================================
178
- while step_number < 15: # Max 15 steps
179
- step_number += 1
180
- log_progress(f"🧠 Step {step_number}: Thinking...")
181
-
182
- # 1. Capture device state
183
- try:
184
- ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
185
-
186
- # Only include screenshot if vision is enabled
187
- if not config["vision"]:
188
- screenshot_bytes = None
189
-
190
- # DEBUG: Log UI state
191
- a11y_preview = ui_state_dict.get("a11y_tree", "")[:150]
192
- log_progress(f"📱 UI State captured - A11y tree preview: {a11y_preview}...")
193
- log_progress(f"📷 Screenshot: {'Present' if screenshot_bytes else 'None'}")
194
-
195
- except Exception as e:
196
- log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
197
- ui_state_dict = {
198
- "a11y_tree": "<hierarchy></hierarchy>",
199
- "phone_state": {"package": "unknown"}
200
- }
201
- screenshot_bytes = None
202
-
203
- # 2. Send to backend for AI decision
204
- step_result = await backend.execute_step(
205
- api_key=quash_api_key,
206
- session_id=session_id,
207
- step_number=step_number,
208
- task=task,
209
- ui_state=ui_state_dict,
210
- chat_history=chat_history,
211
- config=config,
212
- screenshot_bytes=screenshot_bytes
213
- )
214
-
215
- # Handle backend errors
216
- if "error" in step_result:
217
- log_progress(f"💥 Backend error: {step_result['message']}")
218
- return {
219
- "status": "error",
220
- "message": step_result["message"],
221
- "error": step_result["error"],
222
- "steps_taken": step_number,
223
- "tokens": total_tokens,
224
- "cost": total_cost,
225
- "duration_seconds": time.time() - start_time
226
- }
227
-
228
- # Update usage tracking
229
- step_tokens = step_result.get("tokens_used", {})
230
- step_cost = step_result.get("cost", 0.0)
231
-
232
- total_tokens["prompt"] += step_tokens.get("prompt", 0)
233
- total_tokens["completion"] += step_tokens.get("completion", 0)
234
- total_tokens["total"] += step_tokens.get("total", 0)
235
- total_cost += step_cost
236
-
237
- # Get action from backend
238
- action = step_result.get("action", {})
239
- action_type = action.get("type")
240
- code = action.get("code")
241
- reasoning = action.get("reasoning")
242
-
243
- # DEBUG: Log full backend response
244
- log_progress(f"\n📋 DEBUG - Backend Response:")
245
- log_progress(f" - Action type: {action_type}")
246
- log_progress(f" - Completed: {step_result.get('completed', False)}")
247
- log_progress(f" - Success: {step_result.get('success', None)}")
248
- log_progress(f" - Code present: {bool(code)}")
249
- if code:
250
- log_progress(f" - Code: {code[:100]}..." if len(code) > 100 else f" - Code: {code}")
251
- log_progress(f" - Assistant response: {step_result.get('assistant_response', '')[:200]}...\n")
252
-
253
- # Log reasoning
254
- if reasoning:
255
- log_progress(f"🤔 Reasoning: {reasoning}")
256
-
257
- # Update chat history
258
- assistant_response = step_result.get("assistant_response", "")
259
- chat_history.append({"role": "assistant", "content": assistant_response})
260
-
261
- # 3. Check if task is complete
262
- if step_result.get("completed", False):
263
- success = step_result.get("success", False)
264
- final_message = step_result.get("final_message", "Task completed")
265
-
266
- duration = time.time() - start_time
267
-
268
- if success:
269
- log_progress(f"✅ Task completed successfully in {step_number} steps")
270
- log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
271
-
272
- return {
273
- "status": "success",
274
- "steps_taken": step_number,
275
- "final_message": final_message,
276
- "message": f"✅ Success: {final_message}",
277
- "tokens": total_tokens,
278
- "cost": total_cost,
279
- "duration_seconds": duration
280
- }
281
- else:
282
- log_progress(f"❌ Task failed: {final_message}")
283
- log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
284
-
285
- return {
286
- "status": "failed",
287
- "steps_taken": step_number,
288
- "final_message": final_message,
289
- "message": f"❌ Failed: {final_message}",
290
- "tokens": total_tokens,
291
- "cost": total_cost,
292
- "duration_seconds": duration
293
- }
294
-
295
- # 4. Execute action locally
296
- if code and action_type == "execute_code":
297
- log_progress(f"⚡ Executing action...")
298
-
299
- try:
300
- # Execute code in sandbox
301
- exec(code, executor_globals, executor_locals)
302
-
303
- # Get execution result
304
- execution_output = executor_locals.get("_result", "Code executed successfully")
305
-
306
- # Add execution result to chat history
307
- chat_history.append({
308
- "role": "user",
309
- "content": f"Execution Result:\n```\n{execution_output}\n```"
310
- })
311
-
312
- except Exception as e:
313
- error_msg = f"Error during execution: {str(e)}"
314
- log_progress(f"💥 Action failed: {error_msg}")
315
-
316
- # Add error to chat history
317
- chat_history.append({
318
- "role": "user",
319
- "content": f"Execution Result:\n```\n{error_msg}\n```"
320
- })
321
-
322
- else:
323
- # No code to execute
324
- log_progress("⚠️ No action code provided by backend")
325
- chat_history.append({
326
- "role": "user",
327
- "content": "No code was provided. Please provide code to execute."
328
- })
329
-
330
- # Max steps reached
331
- log_progress(f"⚠️ Reached maximum steps ({step_number})")
332
- log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
333
-
334
- return {
335
- "status": "failed",
336
- "steps_taken": step_number,
337
- "final_message": f"Reached maximum step limit of {step_number}",
338
- "message": "❌ Failed: Maximum steps reached",
339
- "tokens": total_tokens,
340
- "cost": total_cost,
341
- "duration_seconds": time.time() - start_time
342
- }
343
-
344
- except KeyboardInterrupt:
345
- log_progress("⏹️ Task interrupted by user")
346
- return {
347
- "status": "interrupted",
348
- "message": "⏹️ Task execution interrupted",
349
- "steps_taken": step_number,
350
- "tokens": total_tokens,
351
- "cost": total_cost,
352
- "duration_seconds": time.time() - start_time
353
- }
354
-
355
- except Exception as e:
356
- error_msg = str(e)
357
- log_progress(f"💥 Error: {error_msg}")
358
- return {
359
- "status": "error",
360
- "message": f"💥 Execution error: {error_msg}",
361
- "error": error_msg,
362
- "steps_taken": step_number,
363
- "tokens": total_tokens,
364
- "cost": total_cost,
365
- "duration_seconds": time.time() - start_time
366
- }
367
-
368
- finally:
369
- # Cleanup TCP forwarding
370
- if adb_tools:
371
- adb_tools.teardown_tcp_forward()
File without changes
File without changes