quash-mcp 0.2.8__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quash-mcp might be problematic. Click here for more details.
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/PKG-INFO +19 -3
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/README.md +18 -2
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/pyproject.toml +1 -1
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/backend_client.py +71 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/device/state_capture.py +6 -2
- quash_mcp-0.2.10/quash_mcp/tools/execute_v3.py +636 -0
- quash_mcp-0.2.8/quash_mcp/tools/execute_v3.py +0 -371
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/.gitignore +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/SETUP_CLAUDE_CODE.md +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/__init__.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/__main__.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/device/__init__.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/device/adb_tools.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/device/portal.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/server.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/state.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/__init__.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/build.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/build_old.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/configure.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/connect.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/execute.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/execute_v2_backup.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/runsuite.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/quash_mcp/tools/usage.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/test_backend_integration.py +0 -0
- {quash_mcp-0.2.8 → quash_mcp-0.2.10}/test_tools_loading.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quash-mcp
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.10
|
|
4
4
|
Summary: Model Context Protocol server for Quash - AI-powered mobile automation agent
|
|
5
5
|
Project-URL: Homepage, https://quashbugs.com
|
|
6
6
|
Project-URL: Repository, https://github.com/quash/quash-mcp
|
|
@@ -57,7 +57,7 @@ All dependencies (including ADB tools and device connectivity) are automatically
|
|
|
57
57
|
|
|
58
58
|
### 1. Get Your API Key
|
|
59
59
|
|
|
60
|
-
1. Visit [quashbugs.com](
|
|
60
|
+
1. Visit [quashbugs.com/mcp](http://13.220.180.140.nip.io/) (or your deployment URL)
|
|
61
61
|
2. Sign in with Google
|
|
62
62
|
3. Go to Dashboard → API Keys
|
|
63
63
|
4. Create a new API key
|
|
@@ -92,6 +92,22 @@ Add to your MCP host's config file:
|
|
|
92
92
|
- No PATH configuration needed
|
|
93
93
|
- Uses whichever Python has quash-mcp installed
|
|
94
94
|
|
|
95
|
+
#### CLI Configuration (If Supported by Host)
|
|
96
|
+
|
|
97
|
+
Some MCP hosts might provide a command-line interface to add servers.
|
|
98
|
+
|
|
99
|
+
**Examples:**
|
|
100
|
+
|
|
101
|
+
- **Claude Code:**
|
|
102
|
+
```bash
|
|
103
|
+
claude mcp add quash quash-mcp
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
- **Gemini CLI:**
|
|
107
|
+
```bash
|
|
108
|
+
gemini mcp add quash quash-mcp
|
|
109
|
+
```
|
|
110
|
+
|
|
95
111
|
#### Alternative: Direct Command (if in PATH)
|
|
96
112
|
|
|
97
113
|
If `quash-mcp` is in your PATH:
|
|
@@ -220,7 +236,7 @@ User: "Show me my usage statistics"
|
|
|
220
236
|
|
|
221
237
|
- **Python 3.11+** - Required for the MCP server
|
|
222
238
|
- **Android Device** - Emulator or physical device with USB debugging enabled
|
|
223
|
-
- **Quash API Key** - Get from [quashbugs.com](
|
|
239
|
+
- **Quash API Key** - Get from [quashbugs.com/mcp](http://13.220.180.140.nip.io/)
|
|
224
240
|
|
|
225
241
|
Dependencies automatically installed:
|
|
226
242
|
- Android Debug Bridge (ADB) - via `adbutils`
|
|
@@ -24,7 +24,7 @@ All dependencies (including ADB tools and device connectivity) are automatically
|
|
|
24
24
|
|
|
25
25
|
### 1. Get Your API Key
|
|
26
26
|
|
|
27
|
-
1. Visit [quashbugs.com](
|
|
27
|
+
1. Visit [quashbugs.com/mcp](http://13.220.180.140.nip.io/) (or your deployment URL)
|
|
28
28
|
2. Sign in with Google
|
|
29
29
|
3. Go to Dashboard → API Keys
|
|
30
30
|
4. Create a new API key
|
|
@@ -59,6 +59,22 @@ Add to your MCP host's config file:
|
|
|
59
59
|
- No PATH configuration needed
|
|
60
60
|
- Uses whichever Python has quash-mcp installed
|
|
61
61
|
|
|
62
|
+
#### CLI Configuration (If Supported by Host)
|
|
63
|
+
|
|
64
|
+
Some MCP hosts might provide a command-line interface to add servers.
|
|
65
|
+
|
|
66
|
+
**Examples:**
|
|
67
|
+
|
|
68
|
+
- **Claude Code:**
|
|
69
|
+
```bash
|
|
70
|
+
claude mcp add quash quash-mcp
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
- **Gemini CLI:**
|
|
74
|
+
```bash
|
|
75
|
+
gemini mcp add quash quash-mcp
|
|
76
|
+
```
|
|
77
|
+
|
|
62
78
|
#### Alternative: Direct Command (if in PATH)
|
|
63
79
|
|
|
64
80
|
If `quash-mcp` is in your PATH:
|
|
@@ -187,7 +203,7 @@ User: "Show me my usage statistics"
|
|
|
187
203
|
|
|
188
204
|
- **Python 3.11+** - Required for the MCP server
|
|
189
205
|
- **Android Device** - Emulator or physical device with USB debugging enabled
|
|
190
|
-
- **Quash API Key** - Get from [quashbugs.com](
|
|
206
|
+
- **Quash API Key** - Get from [quashbugs.com/mcp](http://13.220.180.140.nip.io/)
|
|
191
207
|
|
|
192
208
|
Dependencies automatically installed:
|
|
193
209
|
- Android Debug Bridge (ADB) - via `adbutils`
|
|
@@ -281,6 +281,77 @@ class BackendClient:
|
|
|
281
281
|
"error": str(e)
|
|
282
282
|
}
|
|
283
283
|
|
|
284
|
+
async def finalize_session(
|
|
285
|
+
self,
|
|
286
|
+
api_key: str,
|
|
287
|
+
session_id: str,
|
|
288
|
+
task: str,
|
|
289
|
+
device_serial: str,
|
|
290
|
+
status: str,
|
|
291
|
+
final_message: Optional[str] = None,
|
|
292
|
+
error: Optional[str] = None,
|
|
293
|
+
duration_seconds: float = 0.0,
|
|
294
|
+
config: Optional[Dict[str, Any]] = None
|
|
295
|
+
) -> Dict[str, Any]:
|
|
296
|
+
"""
|
|
297
|
+
Finalize a session and aggregate execution record.
|
|
298
|
+
|
|
299
|
+
Called when task ends for ANY reason: normal completion, max steps, error, interrupt.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
api_key: Quash API key
|
|
303
|
+
session_id: Session identifier to finalize
|
|
304
|
+
task: Original task description
|
|
305
|
+
device_serial: Device serial number
|
|
306
|
+
status: "success", "failed", "max_steps", "error", "interrupted"
|
|
307
|
+
final_message: Final message from agent
|
|
308
|
+
error: Error message if failed
|
|
309
|
+
duration_seconds: Total execution time
|
|
310
|
+
config: Execution configuration
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dict with finalization result:
|
|
314
|
+
{
|
|
315
|
+
"finalized": bool,
|
|
316
|
+
"execution_id": str,
|
|
317
|
+
"total_steps": int,
|
|
318
|
+
"total_tokens": {"prompt": int, "completion": int, "total": int},
|
|
319
|
+
"total_cost": float,
|
|
320
|
+
"error": str (if failed)
|
|
321
|
+
}
|
|
322
|
+
"""
|
|
323
|
+
logger.info(f"🏁 Finalizing session {session_id} - Status: {status}")
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
327
|
+
response = await client.post(
|
|
328
|
+
f"{self.base_url}/api/agent/finalize",
|
|
329
|
+
json={
|
|
330
|
+
"api_key": api_key,
|
|
331
|
+
"session_id": session_id,
|
|
332
|
+
"task": task,
|
|
333
|
+
"device_serial": device_serial,
|
|
334
|
+
"status": status,
|
|
335
|
+
"final_message": final_message,
|
|
336
|
+
"error": error,
|
|
337
|
+
"duration_seconds": duration_seconds,
|
|
338
|
+
"config": config or {}
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if response.status_code == 200:
|
|
343
|
+
result = response.json()
|
|
344
|
+
if result.get("finalized"):
|
|
345
|
+
logger.info(f"✅ Session finalized: {result.get('total_steps')} steps, ${result.get('total_cost', 0):.4f}")
|
|
346
|
+
return result
|
|
347
|
+
else:
|
|
348
|
+
logger.warning(f"Failed to finalize session: HTTP {response.status_code}")
|
|
349
|
+
return {"finalized": False, "error": f"HTTP {response.status_code}"}
|
|
350
|
+
|
|
351
|
+
except Exception as e:
|
|
352
|
+
logger.error(f"Failed to finalize session: {e}")
|
|
353
|
+
return {"finalized": False, "error": str(e)}
|
|
354
|
+
|
|
284
355
|
|
|
285
356
|
# Singleton instance
|
|
286
357
|
_backend_client = None
|
|
@@ -55,9 +55,13 @@ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
|
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
if response.status_code == 200:
|
|
58
|
-
# Portal returns JSON with
|
|
58
|
+
# Portal returns JSON with status and data fields
|
|
59
59
|
data = response.json()
|
|
60
|
-
|
|
60
|
+
if data.get("status") == "success":
|
|
61
|
+
return data.get("data", "<hierarchy></hierarchy>")
|
|
62
|
+
else:
|
|
63
|
+
logger.warning(f"Portal error: {data.get('error', 'Unknown error')}")
|
|
64
|
+
return "<hierarchy></hierarchy>"
|
|
61
65
|
else:
|
|
62
66
|
logger.warning(f"Failed to get accessibility tree: HTTP {response.status_code}")
|
|
63
67
|
return "<hierarchy></hierarchy>"
|
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Execute tool V3 - Step-by-step execution with state-change verification.
|
|
3
|
+
|
|
4
|
+
This reimplements the event-driven state verification from the original Mahoraga agent
|
|
5
|
+
using a polling-based approach suitable for the client-server architecture.
|
|
6
|
+
|
|
7
|
+
All state-change detection logic is contained in this file.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
import asyncio
|
|
13
|
+
import hashlib
|
|
14
|
+
import json
|
|
15
|
+
from typing import Dict, Any, Callable, Optional, Tuple
|
|
16
|
+
from ..state import get_state
|
|
17
|
+
from ..backend_client import get_backend_client
|
|
18
|
+
from ..device.state_capture import get_device_state
|
|
19
|
+
from ..device.adb_tools import AdbTools
|
|
20
|
+
|
|
21
|
+
# Import mahoraga components for tool functions
|
|
22
|
+
try:
|
|
23
|
+
from mahoraga.tools import Tools, describe_tools
|
|
24
|
+
from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
|
|
25
|
+
from mahoraga.agent.context.personas import DEFAULT
|
|
26
|
+
from mahoraga.agent.utils.async_utils import async_to_sync
|
|
27
|
+
except ImportError as e:
|
|
28
|
+
print(f"Warning: Could not import mahoraga components: {e}")
|
|
29
|
+
Tools = None
|
|
30
|
+
describe_tools = None
|
|
31
|
+
MahoragaAdbTools = None
|
|
32
|
+
DEFAULT = None
|
|
33
|
+
async_to_sync = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_ui_state_hash(ui_state_dict: Dict[str, Any]) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Generate a stable hash of the UI state for comparison.
|
|
39
|
+
|
|
40
|
+
Uses accessibility tree structure and package name.
|
|
41
|
+
Hash will change when UI updates after an action.
|
|
42
|
+
"""
|
|
43
|
+
def normalize_tree(tree):
|
|
44
|
+
"""Extract stable elements from UI tree."""
|
|
45
|
+
if isinstance(tree, list):
|
|
46
|
+
normalized = []
|
|
47
|
+
for item in tree:
|
|
48
|
+
if isinstance(item, dict):
|
|
49
|
+
element = {
|
|
50
|
+
"className": item.get("className", ""),
|
|
51
|
+
"text": item.get("text", ""),
|
|
52
|
+
"resourceId": item.get("resourceId", ""),
|
|
53
|
+
"bounds": item.get("bounds", ""),
|
|
54
|
+
}
|
|
55
|
+
normalized.append(element)
|
|
56
|
+
|
|
57
|
+
children = item.get("children", [])
|
|
58
|
+
if children:
|
|
59
|
+
element["children"] = normalize_tree(children)
|
|
60
|
+
return normalized
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
state_repr = {
|
|
64
|
+
"package": ui_state_dict.get("phone_state", {}).get("package", ""),
|
|
65
|
+
"tree": normalize_tree(ui_state_dict.get("a11y_tree", []))
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
state_json = json.dumps(state_repr, sort_keys=True)
|
|
69
|
+
return hashlib.sha256(state_json.encode()).hexdigest()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_action_timeout(code: str) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Determine appropriate timeout based on action type.
|
|
75
|
+
|
|
76
|
+
Returns timeout in seconds.
|
|
77
|
+
"""
|
|
78
|
+
code_lower = code.lower()
|
|
79
|
+
|
|
80
|
+
if "start_app" in code_lower:
|
|
81
|
+
return 10.0 # App launches can be slow
|
|
82
|
+
elif "tap" in code_lower or "click" in code_lower:
|
|
83
|
+
return 5.0 # Screen transitions
|
|
84
|
+
elif "swipe" in code_lower or "scroll" in code_lower:
|
|
85
|
+
return 2.0 # Scroll animations
|
|
86
|
+
elif "drag" in code_lower:
|
|
87
|
+
return 2.0
|
|
88
|
+
elif "input_text" in code_lower:
|
|
89
|
+
return 2.0 # Text input is fast
|
|
90
|
+
elif "press_back" in code_lower or "press_home" in code_lower:
|
|
91
|
+
return 3.0 # Navigation
|
|
92
|
+
elif "press_key" in code_lower:
|
|
93
|
+
return 1.0
|
|
94
|
+
else:
|
|
95
|
+
return 5.0 # Default timeout
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def wait_for_state_change(
|
|
99
|
+
get_state_func,
|
|
100
|
+
device_serial: str,
|
|
101
|
+
old_state_hash: str,
|
|
102
|
+
max_wait: float = 10.0,
|
|
103
|
+
poll_interval: float = 0.5,
|
|
104
|
+
min_wait: float = 0.3
|
|
105
|
+
) -> Tuple[Dict[str, Any], bytes, bool]:
|
|
106
|
+
"""
|
|
107
|
+
Poll device until UI state changes or timeout.
|
|
108
|
+
|
|
109
|
+
This is the core polling mechanism that replaces Mahoraga's event-driven approach.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Tuple of (ui_state_dict, screenshot_bytes, state_changed: bool)
|
|
113
|
+
"""
|
|
114
|
+
# Always wait minimum time for action to take effect
|
|
115
|
+
time.sleep(min_wait)
|
|
116
|
+
|
|
117
|
+
start_time = time.time()
|
|
118
|
+
|
|
119
|
+
while (time.time() - start_time) < max_wait:
|
|
120
|
+
# Capture current state
|
|
121
|
+
ui_state_dict, screenshot_bytes = get_state_func(device_serial)
|
|
122
|
+
current_hash = get_ui_state_hash(ui_state_dict)
|
|
123
|
+
|
|
124
|
+
# Check if state changed
|
|
125
|
+
if current_hash != old_state_hash:
|
|
126
|
+
return ui_state_dict, screenshot_bytes, True
|
|
127
|
+
|
|
128
|
+
# State hasn't changed - wait and try again
|
|
129
|
+
time.sleep(poll_interval)
|
|
130
|
+
|
|
131
|
+
# Timeout - state never changed
|
|
132
|
+
ui_state_dict, screenshot_bytes = get_state_func(device_serial)
|
|
133
|
+
return ui_state_dict, screenshot_bytes, False
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def wait_for_action_effect(
|
|
137
|
+
get_state_func,
|
|
138
|
+
device_serial: str,
|
|
139
|
+
old_ui_state: Dict[str, Any],
|
|
140
|
+
executed_code: str,
|
|
141
|
+
min_wait: float = 0.3,
|
|
142
|
+
poll_interval: float = 0.5
|
|
143
|
+
) -> Tuple[Dict[str, Any], bytes, bool]:
|
|
144
|
+
"""
|
|
145
|
+
Wait for an action to take effect on the device.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Tuple of (new_ui_state_dict, screenshot_bytes, state_changed: bool)
|
|
149
|
+
"""
|
|
150
|
+
# Check if action should change UI
|
|
151
|
+
code_lower = executed_code.lower()
|
|
152
|
+
if "get_state" in code_lower or "complete(" in code_lower:
|
|
153
|
+
# Action doesn't change UI - no need to wait
|
|
154
|
+
time.sleep(0.1)
|
|
155
|
+
return get_state_func(device_serial)[0], None, False
|
|
156
|
+
|
|
157
|
+
# Get hash of old state
|
|
158
|
+
old_hash = get_ui_state_hash(old_ui_state)
|
|
159
|
+
|
|
160
|
+
# Determine timeout based on action type
|
|
161
|
+
timeout = get_action_timeout(executed_code)
|
|
162
|
+
|
|
163
|
+
# Poll until state changes
|
|
164
|
+
new_ui_state, screenshot, changed = wait_for_state_change(
|
|
165
|
+
get_state_func,
|
|
166
|
+
device_serial,
|
|
167
|
+
old_hash,
|
|
168
|
+
max_wait=timeout,
|
|
169
|
+
poll_interval=poll_interval,
|
|
170
|
+
min_wait=min_wait
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return new_ui_state, screenshot, changed
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ============================================================
|
|
177
|
+
# MAIN EXECUTION FUNCTION
|
|
178
|
+
# ============================================================
|
|
179
|
+
|
|
180
|
+
async def execute_v3(
|
|
181
|
+
task: str,
|
|
182
|
+
max_steps: int = 15,
|
|
183
|
+
progress_callback: Optional[Callable[[str], None]] = None
|
|
184
|
+
) -> Dict[str, Any]:
|
|
185
|
+
"""
|
|
186
|
+
Execute automation task using step-by-step backend communication.
|
|
187
|
+
|
|
188
|
+
Each step:
|
|
189
|
+
1. Capture device state (State A)
|
|
190
|
+
2. Send to backend for AI decision
|
|
191
|
+
3. Execute returned action locally
|
|
192
|
+
4. POLL until state changes (State B ≠ State A) or timeout
|
|
193
|
+
5. Send State B to backend in next iteration
|
|
194
|
+
6. Repeat until complete
|
|
195
|
+
|
|
196
|
+
This ensures the backend always sees the UPDATED state after each action,
|
|
197
|
+
preventing the agent from making decisions based on stale state.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
task: Natural language task description
|
|
201
|
+
max_steps: Maximum number of steps to execute (default: 15)
|
|
202
|
+
progress_callback: Optional callback for progress updates
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Dict with execution result and details
|
|
206
|
+
"""
|
|
207
|
+
state = get_state()
|
|
208
|
+
backend = get_backend_client()
|
|
209
|
+
|
|
210
|
+
# Check prerequisites
|
|
211
|
+
if not state.is_device_connected():
|
|
212
|
+
return {
|
|
213
|
+
"status": "error",
|
|
214
|
+
"message": "❌ No device connected. Please run 'connect' first.",
|
|
215
|
+
"prerequisite": "connect"
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if not state.is_configured():
|
|
219
|
+
return {
|
|
220
|
+
"status": "error",
|
|
221
|
+
"message": "❌ Configuration incomplete. Please run 'configure' with your Quash API key.",
|
|
222
|
+
"prerequisite": "configure"
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if not state.portal_ready:
|
|
226
|
+
return {
|
|
227
|
+
"status": "error",
|
|
228
|
+
"message": "⚠️ Portal accessibility service not ready. Please ensure it's enabled on the device.",
|
|
229
|
+
"prerequisite": "connect"
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
# Get API key and config
|
|
233
|
+
quash_api_key = state.config["api_key"]
|
|
234
|
+
config = {
|
|
235
|
+
"model": state.config["model"],
|
|
236
|
+
"temperature": state.config["temperature"],
|
|
237
|
+
"vision": state.config["vision"],
|
|
238
|
+
"reasoning": state.config["reasoning"],
|
|
239
|
+
"reflection": state.config["reflection"],
|
|
240
|
+
"debug": state.config["debug"]
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Validate API key
|
|
244
|
+
validation_result = await backend.validate_api_key(quash_api_key)
|
|
245
|
+
|
|
246
|
+
if not validation_result.get("valid", False):
|
|
247
|
+
error_msg = validation_result.get("error", "Invalid API key")
|
|
248
|
+
return {
|
|
249
|
+
"status": "error",
|
|
250
|
+
"message": f"❌ API Key validation failed: {error_msg}",
|
|
251
|
+
"prerequisite": "configure"
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# Check credits
|
|
255
|
+
user_info = validation_result.get("user", {})
|
|
256
|
+
credits = user_info.get("credits", 0)
|
|
257
|
+
|
|
258
|
+
if credits <= 0:
|
|
259
|
+
return {
|
|
260
|
+
"status": "error",
|
|
261
|
+
"message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
|
|
262
|
+
"user": user_info
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# Progress logging helper
|
|
266
|
+
def log_progress(message: str):
|
|
267
|
+
if progress_callback:
|
|
268
|
+
progress_callback(message)
|
|
269
|
+
|
|
270
|
+
log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
|
|
271
|
+
log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
|
|
272
|
+
log_progress(f"🚀 Starting task: {task}")
|
|
273
|
+
log_progress(f"📱 Device: {state.device_serial}")
|
|
274
|
+
log_progress(f"🧠 Model: {config['model']}")
|
|
275
|
+
log_progress(f"🔢 Max steps: {max_steps}")
|
|
276
|
+
|
|
277
|
+
# Initialize execution
|
|
278
|
+
start_time = time.time()
|
|
279
|
+
session_id = f"session_{uuid.uuid4().hex[:12]}"
|
|
280
|
+
step_number = 0
|
|
281
|
+
chat_history = []
|
|
282
|
+
total_tokens = {"prompt": 0, "completion": 0, "total": 0}
|
|
283
|
+
total_cost = 0.0
|
|
284
|
+
|
|
285
|
+
# Initialize local ADB tools for code execution
|
|
286
|
+
adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
|
|
287
|
+
|
|
288
|
+
# Code executor namespace - add tool functions so generated code can call them
|
|
289
|
+
executor_globals = {
|
|
290
|
+
"__builtins__": __builtins__,
|
|
291
|
+
"adb_tools": adb_tools
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
# Add tool functions to executor namespace (like start_app, swipe, etc.)
|
|
295
|
+
if describe_tools and DEFAULT and MahoragaAdbTools:
|
|
296
|
+
try:
|
|
297
|
+
# Create a mahoraga AdbTools instance for tool execution
|
|
298
|
+
mahoraga_tools = MahoragaAdbTools(
|
|
299
|
+
serial=state.device_serial,
|
|
300
|
+
use_tcp=True,
|
|
301
|
+
remote_tcp_port=8080
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Get all tool functions from mahoraga AdbTools instance
|
|
305
|
+
tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
|
|
306
|
+
|
|
307
|
+
# Filter by allowed tools from DEFAULT persona
|
|
308
|
+
allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
|
|
309
|
+
filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
|
|
310
|
+
|
|
311
|
+
# Add each tool function to executor globals with print wrapper
|
|
312
|
+
for tool_name, tool_function in filtered_tools.items():
|
|
313
|
+
# Convert async functions to sync if needed
|
|
314
|
+
if asyncio.iscoroutinefunction(tool_function):
|
|
315
|
+
if async_to_sync:
|
|
316
|
+
tool_function = async_to_sync(tool_function)
|
|
317
|
+
|
|
318
|
+
# Wrap tool function to print its return value
|
|
319
|
+
def make_printing_wrapper(func):
|
|
320
|
+
"""Wrap a tool function to print its return value."""
|
|
321
|
+
def wrapper(*args, **kwargs):
|
|
322
|
+
result = func(*args, **kwargs)
|
|
323
|
+
# Print the result so stdout captures it
|
|
324
|
+
if result is not None:
|
|
325
|
+
print(result)
|
|
326
|
+
return result
|
|
327
|
+
return wrapper
|
|
328
|
+
|
|
329
|
+
# Add wrapped function to globals so code can call it directly
|
|
330
|
+
executor_globals[tool_name] = make_printing_wrapper(tool_function)
|
|
331
|
+
|
|
332
|
+
log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
|
|
333
|
+
except Exception as e:
|
|
334
|
+
log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
|
|
335
|
+
import traceback
|
|
336
|
+
log_progress(f"Traceback: {traceback.format_exc()}")
|
|
337
|
+
|
|
338
|
+
executor_locals = {}
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
# ============================================================
|
|
342
|
+
# STEP-BY-STEP EXECUTION LOOP
|
|
343
|
+
# ============================================================
|
|
344
|
+
while step_number < max_steps: # Use user-provided max_steps
|
|
345
|
+
step_number += 1
|
|
346
|
+
log_progress(f"🧠 Step {step_number}/{max_steps}: Analyzing...")
|
|
347
|
+
|
|
348
|
+
# 1. Capture device state (State A)
|
|
349
|
+
try:
|
|
350
|
+
ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
|
|
351
|
+
|
|
352
|
+
# Only include screenshot if vision is enabled
|
|
353
|
+
if not config["vision"]:
|
|
354
|
+
screenshot_bytes = None
|
|
355
|
+
|
|
356
|
+
# Log current state
|
|
357
|
+
current_package = ui_state_dict.get("phone_state", {}).get("package", "unknown")
|
|
358
|
+
log_progress(f"📱 Current app: {current_package}")
|
|
359
|
+
|
|
360
|
+
except Exception as e:
|
|
361
|
+
log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
|
|
362
|
+
ui_state_dict = {
|
|
363
|
+
"a11y_tree": [{"index": 0, "text": "Error capturing UI", "children": []}],
|
|
364
|
+
"phone_state": {"package": "unknown"}
|
|
365
|
+
}
|
|
366
|
+
screenshot_bytes = None
|
|
367
|
+
|
|
368
|
+
# 2. Send to backend for AI decision
|
|
369
|
+
step_result = await backend.execute_step(
|
|
370
|
+
api_key=quash_api_key,
|
|
371
|
+
session_id=session_id,
|
|
372
|
+
step_number=step_number,
|
|
373
|
+
task=task,
|
|
374
|
+
ui_state=ui_state_dict,
|
|
375
|
+
chat_history=chat_history,
|
|
376
|
+
config=config,
|
|
377
|
+
screenshot_bytes=screenshot_bytes
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Handle backend errors
|
|
381
|
+
if "error" in step_result:
|
|
382
|
+
log_progress(f"💥 Backend error: {step_result['message']}")
|
|
383
|
+
return {
|
|
384
|
+
"status": "error",
|
|
385
|
+
"message": step_result["message"],
|
|
386
|
+
"error": step_result["error"],
|
|
387
|
+
"steps_taken": step_number,
|
|
388
|
+
"tokens": total_tokens,
|
|
389
|
+
"cost": total_cost,
|
|
390
|
+
"duration_seconds": time.time() - start_time
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
# Update usage tracking
|
|
394
|
+
step_tokens = step_result.get("tokens_used", {})
|
|
395
|
+
step_cost = step_result.get("cost", 0.0)
|
|
396
|
+
|
|
397
|
+
total_tokens["prompt"] += step_tokens.get("prompt", 0)
|
|
398
|
+
total_tokens["completion"] += step_tokens.get("completion", 0)
|
|
399
|
+
total_tokens["total"] += step_tokens.get("total", 0)
|
|
400
|
+
total_cost += step_cost
|
|
401
|
+
|
|
402
|
+
# Get action from backend
|
|
403
|
+
action = step_result.get("action", {})
|
|
404
|
+
action_type = action.get("type")
|
|
405
|
+
code = action.get("code")
|
|
406
|
+
reasoning = action.get("reasoning")
|
|
407
|
+
|
|
408
|
+
# Log reasoning
|
|
409
|
+
if reasoning:
|
|
410
|
+
log_progress(f"🤔 Reasoning: {reasoning}")
|
|
411
|
+
|
|
412
|
+
# Update chat history
|
|
413
|
+
assistant_response = step_result.get("assistant_response", "")
|
|
414
|
+
chat_history.append({"role": "assistant", "content": assistant_response})
|
|
415
|
+
|
|
416
|
+
# 3. Check if task is complete
|
|
417
|
+
if step_result.get("completed", False):
|
|
418
|
+
success = step_result.get("success", False)
|
|
419
|
+
final_message = step_result.get("final_message", "Task completed")
|
|
420
|
+
|
|
421
|
+
duration = time.time() - start_time
|
|
422
|
+
|
|
423
|
+
if success:
|
|
424
|
+
log_progress(f"✅ Task completed successfully in {step_number} steps")
|
|
425
|
+
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
426
|
+
|
|
427
|
+
return {
|
|
428
|
+
"status": "success",
|
|
429
|
+
"steps_taken": step_number,
|
|
430
|
+
"final_message": final_message,
|
|
431
|
+
"message": f"✅ Success: {final_message}",
|
|
432
|
+
"tokens": total_tokens,
|
|
433
|
+
"cost": total_cost,
|
|
434
|
+
"duration_seconds": duration
|
|
435
|
+
}
|
|
436
|
+
else:
|
|
437
|
+
log_progress(f"❌ Task failed: {final_message}")
|
|
438
|
+
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
439
|
+
|
|
440
|
+
return {
|
|
441
|
+
"status": "failed",
|
|
442
|
+
"steps_taken": step_number,
|
|
443
|
+
"final_message": final_message,
|
|
444
|
+
"message": f"❌ Failed: {final_message}",
|
|
445
|
+
"tokens": total_tokens,
|
|
446
|
+
"cost": total_cost,
|
|
447
|
+
"duration_seconds": duration
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
# 4. Execute action locally
|
|
451
|
+
if code and action_type == "execute_code":
|
|
452
|
+
log_progress(f"⚡ Executing action...")
|
|
453
|
+
|
|
454
|
+
# Store old UI state for comparison
|
|
455
|
+
old_ui_state = ui_state_dict.copy()
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
import io
|
|
459
|
+
import contextlib
|
|
460
|
+
|
|
461
|
+
# Capture stdout and stderr to get tool function outputs
|
|
462
|
+
stdout = io.StringIO()
|
|
463
|
+
stderr = io.StringIO()
|
|
464
|
+
|
|
465
|
+
with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
|
|
466
|
+
# Execute code in sandbox
|
|
467
|
+
exec(code, executor_globals, executor_locals)
|
|
468
|
+
|
|
469
|
+
# Get captured output
|
|
470
|
+
execution_output = stdout.getvalue()
|
|
471
|
+
error_output = stderr.getvalue()
|
|
472
|
+
|
|
473
|
+
# ============================================================
|
|
474
|
+
# CRITICAL: Wait for state change (polling-based event detection)
|
|
475
|
+
# ============================================================
|
|
476
|
+
log_progress(f"⏳ Waiting for UI state to update...")
|
|
477
|
+
|
|
478
|
+
try:
|
|
479
|
+
# Poll until state changes or timeout
|
|
480
|
+
new_ui_state_dict, _, state_changed = wait_for_action_effect(
|
|
481
|
+
get_device_state,
|
|
482
|
+
state.device_serial,
|
|
483
|
+
old_ui_state,
|
|
484
|
+
code,
|
|
485
|
+
min_wait=0.3,
|
|
486
|
+
poll_interval=0.5
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# Log what happened
|
|
490
|
+
if state_changed:
|
|
491
|
+
old_pkg = old_ui_state.get("phone_state", {}).get("package", "")
|
|
492
|
+
new_pkg = new_ui_state_dict.get("phone_state", {}).get("package", "")
|
|
493
|
+
|
|
494
|
+
if old_pkg != new_pkg:
|
|
495
|
+
log_progress(f"✅ State changed: App switched ({old_pkg} → {new_pkg})")
|
|
496
|
+
else:
|
|
497
|
+
log_progress(f"✅ State changed: UI updated")
|
|
498
|
+
else:
|
|
499
|
+
log_progress(f"⚠️ WARNING: State did NOT change after action (timeout)")
|
|
500
|
+
log_progress(f" This might mean the action had no effect or took too long")
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
log_progress(f"⚠️ Error during state change detection: {e}")
|
|
504
|
+
state_changed = False
|
|
505
|
+
# Fallback: Just wait a bit
|
|
506
|
+
time.sleep(1.5)
|
|
507
|
+
|
|
508
|
+
# Build feedback message
|
|
509
|
+
feedback_parts = []
|
|
510
|
+
|
|
511
|
+
if execution_output:
|
|
512
|
+
feedback_parts.append(f"Action output: {execution_output.strip()}")
|
|
513
|
+
|
|
514
|
+
if state_changed:
|
|
515
|
+
feedback_parts.append("UI state updated successfully")
|
|
516
|
+
else:
|
|
517
|
+
feedback_parts.append("WARNING: UI state did not change (action may have failed)")
|
|
518
|
+
|
|
519
|
+
if error_output:
|
|
520
|
+
feedback_parts.append(f"Warnings: {error_output.strip()}")
|
|
521
|
+
|
|
522
|
+
feedback = " | ".join(feedback_parts) if feedback_parts else "Action executed"
|
|
523
|
+
|
|
524
|
+
log_progress(f"✅ {feedback[:200]}")
|
|
525
|
+
|
|
526
|
+
# Add execution result to chat history
|
|
527
|
+
chat_history.append({
|
|
528
|
+
"role": "user",
|
|
529
|
+
"content": f"Execution Result:\n```\n{feedback}\n```"
|
|
530
|
+
})
|
|
531
|
+
|
|
532
|
+
except Exception as e:
|
|
533
|
+
error_msg = f"Error during execution: {str(e)}"
|
|
534
|
+
log_progress(f"💥 Action failed: {error_msg}")
|
|
535
|
+
|
|
536
|
+
# Add error to chat history
|
|
537
|
+
chat_history.append({
|
|
538
|
+
"role": "user",
|
|
539
|
+
"content": f"Execution Error:\n```\n{error_msg}\n```"
|
|
540
|
+
})
|
|
541
|
+
|
|
542
|
+
else:
|
|
543
|
+
# No code to execute
|
|
544
|
+
log_progress("⚠️ No action code provided by backend")
|
|
545
|
+
chat_history.append({
|
|
546
|
+
"role": "user",
|
|
547
|
+
"content": "No code was provided. Please provide code to execute."
|
|
548
|
+
})
|
|
549
|
+
|
|
550
|
+
# Max steps reached
|
|
551
|
+
log_progress(f"⚠️ Reached maximum steps ({max_steps})")
|
|
552
|
+
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
553
|
+
|
|
554
|
+
duration = time.time() - start_time
|
|
555
|
+
|
|
556
|
+
# Finalize session on backend to create execution record
|
|
557
|
+
await backend.finalize_session(
|
|
558
|
+
api_key=quash_api_key,
|
|
559
|
+
session_id=session_id,
|
|
560
|
+
task=task,
|
|
561
|
+
device_serial=state.device_serial,
|
|
562
|
+
status="max_steps",
|
|
563
|
+
final_message=f"Reached maximum step limit of {max_steps}",
|
|
564
|
+
error=None,
|
|
565
|
+
duration_seconds=duration,
|
|
566
|
+
config=config
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
return {
|
|
570
|
+
"status": "failed",
|
|
571
|
+
"steps_taken": step_number,
|
|
572
|
+
"final_message": f"Reached maximum step limit of {max_steps}",
|
|
573
|
+
"message": "❌ Failed: Maximum steps reached",
|
|
574
|
+
"tokens": total_tokens,
|
|
575
|
+
"cost": total_cost,
|
|
576
|
+
"duration_seconds": duration
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
except KeyboardInterrupt:
|
|
580
|
+
log_progress("ℹ️ Task interrupted by user")
|
|
581
|
+
duration = time.time() - start_time
|
|
582
|
+
|
|
583
|
+
# Finalize session on backend
|
|
584
|
+
await backend.finalize_session(
|
|
585
|
+
api_key=quash_api_key,
|
|
586
|
+
session_id=session_id,
|
|
587
|
+
task=task,
|
|
588
|
+
device_serial=state.device_serial,
|
|
589
|
+
status="interrupted",
|
|
590
|
+
final_message="Task interrupted by user",
|
|
591
|
+
error=None,
|
|
592
|
+
duration_seconds=duration,
|
|
593
|
+
config=config
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
return {
|
|
597
|
+
"status": "interrupted",
|
|
598
|
+
"message": "ℹ️ Task execution interrupted",
|
|
599
|
+
"steps_taken": step_number,
|
|
600
|
+
"tokens": total_tokens,
|
|
601
|
+
"cost": total_cost,
|
|
602
|
+
"duration_seconds": duration
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
except Exception as e:
|
|
606
|
+
error_msg = str(e)
|
|
607
|
+
log_progress(f"💥 Error: {error_msg}")
|
|
608
|
+
duration = time.time() - start_time
|
|
609
|
+
|
|
610
|
+
# Finalize session on backend
|
|
611
|
+
await backend.finalize_session(
|
|
612
|
+
api_key=quash_api_key,
|
|
613
|
+
session_id=session_id,
|
|
614
|
+
task=task,
|
|
615
|
+
device_serial=state.device_serial,
|
|
616
|
+
status="error",
|
|
617
|
+
final_message=None,
|
|
618
|
+
error=error_msg,
|
|
619
|
+
duration_seconds=duration,
|
|
620
|
+
config=config
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
return {
|
|
624
|
+
"status": "error",
|
|
625
|
+
"message": f"💥 Execution error: {error_msg}",
|
|
626
|
+
"error": error_msg,
|
|
627
|
+
"steps_taken": step_number,
|
|
628
|
+
"tokens": total_tokens,
|
|
629
|
+
"cost": total_cost,
|
|
630
|
+
"duration_seconds": duration
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
finally:
|
|
634
|
+
# Cleanup TCP forwarding
|
|
635
|
+
if adb_tools:
|
|
636
|
+
adb_tools.teardown_tcp_forward()
|
|
@@ -1,371 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Execute tool V3 - Step-by-step execution with local device access.
|
|
3
|
-
|
|
4
|
-
AI logic runs on backend (private), device access happens locally (public).
|
|
5
|
-
This hybrid approach keeps proprietary code private while allowing local device control.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import time
|
|
9
|
-
import uuid
|
|
10
|
-
import asyncio
|
|
11
|
-
from typing import Dict, Any, Callable, Optional
|
|
12
|
-
from ..state import get_state
|
|
13
|
-
from ..backend_client import get_backend_client
|
|
14
|
-
from ..device.state_capture import get_device_state
|
|
15
|
-
from ..device.adb_tools import AdbTools
|
|
16
|
-
|
|
17
|
-
# Import mahoraga components for tool functions
|
|
18
|
-
try:
|
|
19
|
-
from mahoraga.tools import Tools, describe_tools
|
|
20
|
-
from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
|
|
21
|
-
from mahoraga.agent.context.personas import DEFAULT
|
|
22
|
-
from mahoraga.agent.utils.async_utils import async_to_sync
|
|
23
|
-
except ImportError as e:
|
|
24
|
-
print(f"Warning: Could not import mahoraga components: {e}")
|
|
25
|
-
Tools = None
|
|
26
|
-
describe_tools = None
|
|
27
|
-
MahoragaAdbTools = None
|
|
28
|
-
DEFAULT = None
|
|
29
|
-
async_to_sync = None
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
async def execute_v3(
|
|
33
|
-
task: str,
|
|
34
|
-
progress_callback: Optional[Callable[[str], None]] = None
|
|
35
|
-
) -> Dict[str, Any]:
|
|
36
|
-
"""
|
|
37
|
-
Execute automation task using step-by-step backend communication.
|
|
38
|
-
|
|
39
|
-
Each step:
|
|
40
|
-
1. Capture device state locally (UI + optional screenshot)
|
|
41
|
-
2. Send to backend for AI decision
|
|
42
|
-
3. Execute returned action locally
|
|
43
|
-
4. Repeat until complete
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
task: Natural language task description
|
|
47
|
-
progress_callback: Optional callback for progress updates
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
Dict with execution result and details
|
|
51
|
-
"""
|
|
52
|
-
state = get_state()
|
|
53
|
-
backend = get_backend_client()
|
|
54
|
-
|
|
55
|
-
# Check prerequisites
|
|
56
|
-
if not state.is_device_connected():
|
|
57
|
-
return {
|
|
58
|
-
"status": "error",
|
|
59
|
-
"message": "❌ No device connected. Please run 'connect' first.",
|
|
60
|
-
"prerequisite": "connect"
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
if not state.is_configured():
|
|
64
|
-
return {
|
|
65
|
-
"status": "error",
|
|
66
|
-
"message": "❌ Configuration incomplete. Please run 'configure' with your Quash API key.",
|
|
67
|
-
"prerequisite": "configure"
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
if not state.portal_ready:
|
|
71
|
-
return {
|
|
72
|
-
"status": "error",
|
|
73
|
-
"message": "⚠️ Portal accessibility service not ready. Please ensure it's enabled on the device.",
|
|
74
|
-
"prerequisite": "connect"
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
# Get API key and config
|
|
78
|
-
quash_api_key = state.config["api_key"]
|
|
79
|
-
config = {
|
|
80
|
-
"model": state.config["model"],
|
|
81
|
-
"temperature": state.config["temperature"],
|
|
82
|
-
"vision": state.config["vision"],
|
|
83
|
-
"reasoning": state.config["reasoning"],
|
|
84
|
-
"reflection": state.config["reflection"],
|
|
85
|
-
"debug": state.config["debug"]
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
# Validate API key
|
|
89
|
-
validation_result = await backend.validate_api_key(quash_api_key)
|
|
90
|
-
|
|
91
|
-
if not validation_result.get("valid", False):
|
|
92
|
-
error_msg = validation_result.get("error", "Invalid API key")
|
|
93
|
-
return {
|
|
94
|
-
"status": "error",
|
|
95
|
-
"message": f"❌ API Key validation failed: {error_msg}",
|
|
96
|
-
"prerequisite": "configure"
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
# Check credits
|
|
100
|
-
user_info = validation_result.get("user", {})
|
|
101
|
-
credits = user_info.get("credits", 0)
|
|
102
|
-
|
|
103
|
-
if credits <= 0:
|
|
104
|
-
return {
|
|
105
|
-
"status": "error",
|
|
106
|
-
"message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
|
|
107
|
-
"user": user_info
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
# Progress logging helper
|
|
111
|
-
def log_progress(message: str):
|
|
112
|
-
if progress_callback:
|
|
113
|
-
progress_callback(message)
|
|
114
|
-
|
|
115
|
-
log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
|
|
116
|
-
log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
|
|
117
|
-
log_progress(f"🚀 Starting task: {task}")
|
|
118
|
-
log_progress(f"📱 Device: {state.device_serial}")
|
|
119
|
-
log_progress(f"🧠 Model: {config['model']}")
|
|
120
|
-
|
|
121
|
-
# Initialize execution
|
|
122
|
-
start_time = time.time()
|
|
123
|
-
session_id = f"session_{uuid.uuid4().hex[:12]}"
|
|
124
|
-
step_number = 0
|
|
125
|
-
chat_history = []
|
|
126
|
-
total_tokens = {"prompt": 0, "completion": 0, "total": 0}
|
|
127
|
-
total_cost = 0.0
|
|
128
|
-
|
|
129
|
-
# Initialize local ADB tools for code execution
|
|
130
|
-
adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
|
|
131
|
-
|
|
132
|
-
# Code executor namespace - add tool functions so generated code can call them
|
|
133
|
-
executor_globals = {
|
|
134
|
-
"__builtins__": __builtins__,
|
|
135
|
-
"adb_tools": adb_tools
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
# Add tool functions to executor namespace (like start_app, swipe, etc.)
|
|
139
|
-
if describe_tools and DEFAULT and MahoragaAdbTools:
|
|
140
|
-
try:
|
|
141
|
-
# Create a mahoraga AdbTools instance for tool execution
|
|
142
|
-
# This instance has all the tool methods like swipe, start_app, etc.
|
|
143
|
-
mahoraga_tools = MahoragaAdbTools(
|
|
144
|
-
serial=state.device_serial,
|
|
145
|
-
use_tcp=True,
|
|
146
|
-
remote_tcp_port=8080
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
# Get all tool functions from mahoraga AdbTools instance
|
|
150
|
-
tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
|
|
151
|
-
|
|
152
|
-
# Filter by allowed tools from DEFAULT persona
|
|
153
|
-
allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
|
|
154
|
-
filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
|
|
155
|
-
|
|
156
|
-
# Add each tool function to executor globals
|
|
157
|
-
for tool_name, tool_function in filtered_tools.items():
|
|
158
|
-
# Convert async functions to sync if needed
|
|
159
|
-
if asyncio.iscoroutinefunction(tool_function):
|
|
160
|
-
if async_to_sync:
|
|
161
|
-
tool_function = async_to_sync(tool_function)
|
|
162
|
-
|
|
163
|
-
# Add to globals so code can call it directly
|
|
164
|
-
executor_globals[tool_name] = tool_function
|
|
165
|
-
|
|
166
|
-
log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
|
|
167
|
-
except Exception as e:
|
|
168
|
-
log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
|
|
169
|
-
import traceback
|
|
170
|
-
log_progress(f"Traceback: {traceback.format_exc()}")
|
|
171
|
-
|
|
172
|
-
executor_locals = {}
|
|
173
|
-
|
|
174
|
-
try:
|
|
175
|
-
# ============================================================
|
|
176
|
-
# STEP-BY-STEP EXECUTION LOOP
|
|
177
|
-
# ============================================================
|
|
178
|
-
while step_number < 15: # Max 15 steps
|
|
179
|
-
step_number += 1
|
|
180
|
-
log_progress(f"🧠 Step {step_number}: Thinking...")
|
|
181
|
-
|
|
182
|
-
# 1. Capture device state
|
|
183
|
-
try:
|
|
184
|
-
ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
|
|
185
|
-
|
|
186
|
-
# Only include screenshot if vision is enabled
|
|
187
|
-
if not config["vision"]:
|
|
188
|
-
screenshot_bytes = None
|
|
189
|
-
|
|
190
|
-
# DEBUG: Log UI state
|
|
191
|
-
a11y_preview = ui_state_dict.get("a11y_tree", "")[:150]
|
|
192
|
-
log_progress(f"📱 UI State captured - A11y tree preview: {a11y_preview}...")
|
|
193
|
-
log_progress(f"📷 Screenshot: {'Present' if screenshot_bytes else 'None'}")
|
|
194
|
-
|
|
195
|
-
except Exception as e:
|
|
196
|
-
log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
|
|
197
|
-
ui_state_dict = {
|
|
198
|
-
"a11y_tree": "<hierarchy></hierarchy>",
|
|
199
|
-
"phone_state": {"package": "unknown"}
|
|
200
|
-
}
|
|
201
|
-
screenshot_bytes = None
|
|
202
|
-
|
|
203
|
-
# 2. Send to backend for AI decision
|
|
204
|
-
step_result = await backend.execute_step(
|
|
205
|
-
api_key=quash_api_key,
|
|
206
|
-
session_id=session_id,
|
|
207
|
-
step_number=step_number,
|
|
208
|
-
task=task,
|
|
209
|
-
ui_state=ui_state_dict,
|
|
210
|
-
chat_history=chat_history,
|
|
211
|
-
config=config,
|
|
212
|
-
screenshot_bytes=screenshot_bytes
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
# Handle backend errors
|
|
216
|
-
if "error" in step_result:
|
|
217
|
-
log_progress(f"💥 Backend error: {step_result['message']}")
|
|
218
|
-
return {
|
|
219
|
-
"status": "error",
|
|
220
|
-
"message": step_result["message"],
|
|
221
|
-
"error": step_result["error"],
|
|
222
|
-
"steps_taken": step_number,
|
|
223
|
-
"tokens": total_tokens,
|
|
224
|
-
"cost": total_cost,
|
|
225
|
-
"duration_seconds": time.time() - start_time
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
# Update usage tracking
|
|
229
|
-
step_tokens = step_result.get("tokens_used", {})
|
|
230
|
-
step_cost = step_result.get("cost", 0.0)
|
|
231
|
-
|
|
232
|
-
total_tokens["prompt"] += step_tokens.get("prompt", 0)
|
|
233
|
-
total_tokens["completion"] += step_tokens.get("completion", 0)
|
|
234
|
-
total_tokens["total"] += step_tokens.get("total", 0)
|
|
235
|
-
total_cost += step_cost
|
|
236
|
-
|
|
237
|
-
# Get action from backend
|
|
238
|
-
action = step_result.get("action", {})
|
|
239
|
-
action_type = action.get("type")
|
|
240
|
-
code = action.get("code")
|
|
241
|
-
reasoning = action.get("reasoning")
|
|
242
|
-
|
|
243
|
-
# DEBUG: Log full backend response
|
|
244
|
-
log_progress(f"\n📋 DEBUG - Backend Response:")
|
|
245
|
-
log_progress(f" - Action type: {action_type}")
|
|
246
|
-
log_progress(f" - Completed: {step_result.get('completed', False)}")
|
|
247
|
-
log_progress(f" - Success: {step_result.get('success', None)}")
|
|
248
|
-
log_progress(f" - Code present: {bool(code)}")
|
|
249
|
-
if code:
|
|
250
|
-
log_progress(f" - Code: {code[:100]}..." if len(code) > 100 else f" - Code: {code}")
|
|
251
|
-
log_progress(f" - Assistant response: {step_result.get('assistant_response', '')[:200]}...\n")
|
|
252
|
-
|
|
253
|
-
# Log reasoning
|
|
254
|
-
if reasoning:
|
|
255
|
-
log_progress(f"🤔 Reasoning: {reasoning}")
|
|
256
|
-
|
|
257
|
-
# Update chat history
|
|
258
|
-
assistant_response = step_result.get("assistant_response", "")
|
|
259
|
-
chat_history.append({"role": "assistant", "content": assistant_response})
|
|
260
|
-
|
|
261
|
-
# 3. Check if task is complete
|
|
262
|
-
if step_result.get("completed", False):
|
|
263
|
-
success = step_result.get("success", False)
|
|
264
|
-
final_message = step_result.get("final_message", "Task completed")
|
|
265
|
-
|
|
266
|
-
duration = time.time() - start_time
|
|
267
|
-
|
|
268
|
-
if success:
|
|
269
|
-
log_progress(f"✅ Task completed successfully in {step_number} steps")
|
|
270
|
-
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
271
|
-
|
|
272
|
-
return {
|
|
273
|
-
"status": "success",
|
|
274
|
-
"steps_taken": step_number,
|
|
275
|
-
"final_message": final_message,
|
|
276
|
-
"message": f"✅ Success: {final_message}",
|
|
277
|
-
"tokens": total_tokens,
|
|
278
|
-
"cost": total_cost,
|
|
279
|
-
"duration_seconds": duration
|
|
280
|
-
}
|
|
281
|
-
else:
|
|
282
|
-
log_progress(f"❌ Task failed: {final_message}")
|
|
283
|
-
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
284
|
-
|
|
285
|
-
return {
|
|
286
|
-
"status": "failed",
|
|
287
|
-
"steps_taken": step_number,
|
|
288
|
-
"final_message": final_message,
|
|
289
|
-
"message": f"❌ Failed: {final_message}",
|
|
290
|
-
"tokens": total_tokens,
|
|
291
|
-
"cost": total_cost,
|
|
292
|
-
"duration_seconds": duration
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
# 4. Execute action locally
|
|
296
|
-
if code and action_type == "execute_code":
|
|
297
|
-
log_progress(f"⚡ Executing action...")
|
|
298
|
-
|
|
299
|
-
try:
|
|
300
|
-
# Execute code in sandbox
|
|
301
|
-
exec(code, executor_globals, executor_locals)
|
|
302
|
-
|
|
303
|
-
# Get execution result
|
|
304
|
-
execution_output = executor_locals.get("_result", "Code executed successfully")
|
|
305
|
-
|
|
306
|
-
# Add execution result to chat history
|
|
307
|
-
chat_history.append({
|
|
308
|
-
"role": "user",
|
|
309
|
-
"content": f"Execution Result:\n```\n{execution_output}\n```"
|
|
310
|
-
})
|
|
311
|
-
|
|
312
|
-
except Exception as e:
|
|
313
|
-
error_msg = f"Error during execution: {str(e)}"
|
|
314
|
-
log_progress(f"💥 Action failed: {error_msg}")
|
|
315
|
-
|
|
316
|
-
# Add error to chat history
|
|
317
|
-
chat_history.append({
|
|
318
|
-
"role": "user",
|
|
319
|
-
"content": f"Execution Result:\n```\n{error_msg}\n```"
|
|
320
|
-
})
|
|
321
|
-
|
|
322
|
-
else:
|
|
323
|
-
# No code to execute
|
|
324
|
-
log_progress("⚠️ No action code provided by backend")
|
|
325
|
-
chat_history.append({
|
|
326
|
-
"role": "user",
|
|
327
|
-
"content": "No code was provided. Please provide code to execute."
|
|
328
|
-
})
|
|
329
|
-
|
|
330
|
-
# Max steps reached
|
|
331
|
-
log_progress(f"⚠️ Reached maximum steps ({step_number})")
|
|
332
|
-
log_progress(f"💰 Usage: {total_tokens['total']} tokens, ${total_cost:.4f}")
|
|
333
|
-
|
|
334
|
-
return {
|
|
335
|
-
"status": "failed",
|
|
336
|
-
"steps_taken": step_number,
|
|
337
|
-
"final_message": f"Reached maximum step limit of {step_number}",
|
|
338
|
-
"message": "❌ Failed: Maximum steps reached",
|
|
339
|
-
"tokens": total_tokens,
|
|
340
|
-
"cost": total_cost,
|
|
341
|
-
"duration_seconds": time.time() - start_time
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
except KeyboardInterrupt:
|
|
345
|
-
log_progress("⏹️ Task interrupted by user")
|
|
346
|
-
return {
|
|
347
|
-
"status": "interrupted",
|
|
348
|
-
"message": "⏹️ Task execution interrupted",
|
|
349
|
-
"steps_taken": step_number,
|
|
350
|
-
"tokens": total_tokens,
|
|
351
|
-
"cost": total_cost,
|
|
352
|
-
"duration_seconds": time.time() - start_time
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
except Exception as e:
|
|
356
|
-
error_msg = str(e)
|
|
357
|
-
log_progress(f"💥 Error: {error_msg}")
|
|
358
|
-
return {
|
|
359
|
-
"status": "error",
|
|
360
|
-
"message": f"💥 Execution error: {error_msg}",
|
|
361
|
-
"error": error_msg,
|
|
362
|
-
"steps_taken": step_number,
|
|
363
|
-
"tokens": total_tokens,
|
|
364
|
-
"cost": total_cost,
|
|
365
|
-
"duration_seconds": time.time() - start_time
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
finally:
|
|
369
|
-
# Cleanup TCP forwarding
|
|
370
|
-
if adb_tools:
|
|
371
|
-
adb_tools.teardown_tcp_forward()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|