quash-mcp 0.2.13__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/PKG-INFO +1 -1
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/local_test.py +19 -5
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/pyproject.toml +1 -1
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/backend_client.py +3 -2
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/state_capture.py +20 -14
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/models.py +12 -1
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute_v3.py +170 -130
- quash_mcp-0.2.13/test_backend_integration.py +0 -100
- quash_mcp-0.2.13/test_tools_loading.py +0 -81
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/.gitignore +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/README.md +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/SETUP_CLAUDE_CODE.md +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/__init__.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/__main__.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/__init__.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/adb_tools.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/portal.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/server.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/state.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/__init__.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/build.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/build_old.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/configure.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/connect.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute_v2_backup.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/runsuite.py +0 -0
- {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/usage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quash-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Model Context Protocol server for Quash - AI-powered mobile automation agent
|
|
5
5
|
Project-URL: Homepage, https://quashbugs.com
|
|
6
6
|
Project-URL: Repository, https://github.com/quash/quash-mcp
|
|
@@ -13,13 +13,27 @@ from quash_mcp.models import SessionDTO, ConfigInfo, UIStateInfo
|
|
|
13
13
|
async def main():
|
|
14
14
|
# Initialize the session state
|
|
15
15
|
state = get_state()
|
|
16
|
-
state.config["api_key"] = "
|
|
16
|
+
state.config["api_key"] = "mhg__Sacdd_AiKSJgTow49F8p9Fu-UGyi_Wd"
|
|
17
17
|
state.device_serial = "emulator-5554"
|
|
18
18
|
state.portal_ready = True # Assume portal is ready for local testing
|
|
19
19
|
|
|
20
|
+
# --- CONFIGURATION ---
|
|
21
|
+
state.config["model"] = "anthropic/claude-sonnet-4"
|
|
22
|
+
state.config["temperature"] = 0.2
|
|
23
|
+
state.config["vision"] = False
|
|
24
|
+
state.config["reasoning"] = True
|
|
25
|
+
state.config["reflection"] = True
|
|
26
|
+
state.config["debug"] = False
|
|
27
|
+
state.config["max_steps"] = 10
|
|
28
|
+
# ---------------------
|
|
29
|
+
|
|
20
30
|
# Define the task
|
|
21
|
-
task = "Open
|
|
22
|
-
# task = "Open settings
|
|
31
|
+
# task = "Open the google search app and search for bakeries."
|
|
32
|
+
# task = "Open settings."
|
|
33
|
+
# task = "Open Markor app, replace the contents of 'Abhinav.txt' to Hello World! and save the file as Done.md"
|
|
34
|
+
# task = "Tell me all the number drawn on the screen"
|
|
35
|
+
task = "Create a file with name test.md and write Hello World! and save it in markor app"
|
|
36
|
+
# task = "Tell me what you see drawn on screen"
|
|
23
37
|
|
|
24
38
|
# Define a progress callback
|
|
25
39
|
def progress_callback(message):
|
|
@@ -31,7 +45,7 @@ async def main():
|
|
|
31
45
|
api_key=state.config["api_key"],
|
|
32
46
|
task=task,
|
|
33
47
|
device_serial=state.device_serial,
|
|
34
|
-
config=ConfigInfo(**state.config)
|
|
48
|
+
config=ConfigInfo(**{k: v for k, v in state.config.items() if k in ConfigInfo.model_fields})
|
|
35
49
|
)
|
|
36
50
|
|
|
37
51
|
# Print the initial session DTO
|
|
@@ -50,4 +64,4 @@ async def main():
|
|
|
50
64
|
|
|
51
65
|
|
|
52
66
|
if __name__ == "__main__":
|
|
53
|
-
asyncio.run(main())
|
|
67
|
+
asyncio.run(main())
|
|
@@ -35,7 +35,8 @@ class BackendClient:
|
|
|
35
35
|
Dict with validation result:
|
|
36
36
|
{
|
|
37
37
|
"valid": bool,
|
|
38
|
-
"user": {"email": str, "name": str
|
|
38
|
+
"user": {"email": str, "name": str},
|
|
39
|
+
"organization_credits": float,
|
|
39
40
|
"openrouter_api_key": str,
|
|
40
41
|
"error": str (if invalid)
|
|
41
42
|
}
|
|
@@ -211,7 +212,7 @@ class BackendClient:
|
|
|
211
212
|
|
|
212
213
|
# Prepare files dict (only screenshot if provided)
|
|
213
214
|
files = {}
|
|
214
|
-
if screenshot_bytes:
|
|
215
|
+
if screenshot_bytes and len(screenshot_bytes) > 0:
|
|
215
216
|
files["screenshot"] = ("screenshot.png", screenshot_bytes, "image/png")
|
|
216
217
|
|
|
217
218
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
Device state capture utilities.
|
|
3
|
-
Captures UI state and screenshots from Android devices.
|
|
4
|
-
"""
|
|
5
|
-
|
|
1
|
+
import json
|
|
6
2
|
import logging
|
|
7
3
|
import requests
|
|
8
|
-
from typing import Dict, Any, Optional, Tuple
|
|
4
|
+
from typing import Dict, Any, Optional, Tuple, List
|
|
9
5
|
from adbutils import adb
|
|
10
6
|
|
|
11
7
|
logger = logging.getLogger("quash-device")
|
|
@@ -34,7 +30,7 @@ def get_current_package(serial: str) -> str:
|
|
|
34
30
|
return "unknown"
|
|
35
31
|
|
|
36
32
|
|
|
37
|
-
def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
|
|
33
|
+
def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> List[Dict[str, Any]]:
|
|
38
34
|
"""
|
|
39
35
|
Get accessibility tree from Portal app via TCP.
|
|
40
36
|
|
|
@@ -43,7 +39,7 @@ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
|
|
|
43
39
|
tcp_port: Local TCP port for Portal communication
|
|
44
40
|
|
|
45
41
|
Returns:
|
|
46
|
-
Accessibility tree
|
|
42
|
+
Accessibility tree as a list of dictionaries, or an empty list if failed
|
|
47
43
|
"""
|
|
48
44
|
try:
|
|
49
45
|
device = adb.device(serial)
|
|
@@ -55,20 +51,27 @@ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
|
|
|
55
51
|
)
|
|
56
52
|
|
|
57
53
|
if response.status_code == 200:
|
|
58
|
-
# Portal returns JSON with status and data fields
|
|
59
54
|
data = response.json()
|
|
60
55
|
if data.get("status") == "success":
|
|
61
|
-
|
|
56
|
+
# The 'data' field should contain the JSON string of the a11y_tree
|
|
57
|
+
a11y_tree_json_str = data.get("data", "[]")
|
|
58
|
+
try:
|
|
59
|
+
parsed_tree = json.loads(a11y_tree_json_str)
|
|
60
|
+
logger.debug(f"get_accessibility_tree returning tree of length: {len(parsed_tree)}")
|
|
61
|
+
return parsed_tree
|
|
62
|
+
except json.JSONDecodeError:
|
|
63
|
+
logger.warning(f"Failed to parse a11y_tree JSON string: {a11y_tree_json_str}")
|
|
64
|
+
return []
|
|
62
65
|
else:
|
|
63
66
|
logger.warning(f"Portal error: {data.get('error', 'Unknown error')}")
|
|
64
|
-
return
|
|
67
|
+
return []
|
|
65
68
|
else:
|
|
66
69
|
logger.warning(f"Failed to get accessibility tree: HTTP {response.status_code}")
|
|
67
|
-
return
|
|
70
|
+
return []
|
|
68
71
|
|
|
69
72
|
except Exception as e:
|
|
70
73
|
logger.warning(f"Failed to get accessibility tree: {e}")
|
|
71
|
-
return
|
|
74
|
+
return []
|
|
72
75
|
|
|
73
76
|
|
|
74
77
|
def capture_screenshot(serial: str) -> Optional[bytes]:
|
|
@@ -83,7 +86,10 @@ def capture_screenshot(serial: str) -> Optional[bytes]:
|
|
|
83
86
|
"""
|
|
84
87
|
try:
|
|
85
88
|
device = adb.device(serial)
|
|
86
|
-
|
|
89
|
+
# device.shell("screencap -p", stream=True) returns an AdbConnection object (file-like)
|
|
90
|
+
# We need to read the bytes from it.
|
|
91
|
+
with device.shell("screencap -p", stream=True) as conn:
|
|
92
|
+
screenshot_bytes = conn.read(1024 * 1024 * 10) # Read up to 10MB
|
|
87
93
|
return screenshot_bytes
|
|
88
94
|
except Exception as e:
|
|
89
95
|
logger.error(f"Failed to capture screenshot: {e}")
|
|
@@ -16,7 +16,7 @@ class ConfigInfo(BaseModel):
|
|
|
16
16
|
debug: bool = False
|
|
17
17
|
|
|
18
18
|
class UIStateInfo(BaseModel):
|
|
19
|
-
a11y_tree: str
|
|
19
|
+
a11y_tree: List[Dict[str, Any]]
|
|
20
20
|
phone_state: Dict[str, Any]
|
|
21
21
|
|
|
22
22
|
class ChatHistoryMessage(BaseModel):
|
|
@@ -31,6 +31,12 @@ class AgentStepDTO(BaseModel):
|
|
|
31
31
|
cost: float
|
|
32
32
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
33
33
|
|
|
34
|
+
class ReflectionInfo(BaseModel):
|
|
35
|
+
"""Information about a reflection step."""
|
|
36
|
+
goal_achieved: bool
|
|
37
|
+
advice: Optional[str] = None
|
|
38
|
+
summary: Optional[str] = None
|
|
39
|
+
|
|
34
40
|
class SessionDTO(BaseModel):
|
|
35
41
|
session_id: str
|
|
36
42
|
api_key: str
|
|
@@ -39,4 +45,9 @@ class SessionDTO(BaseModel):
|
|
|
39
45
|
config: ConfigInfo
|
|
40
46
|
chat_history: List[ChatHistoryMessage] = []
|
|
41
47
|
steps: List[AgentStepDTO] = []
|
|
48
|
+
current_plan: Optional[List[str]] = None
|
|
49
|
+
current_task_index: int = 0
|
|
42
50
|
ui_state: Optional[UIStateInfo] = None
|
|
51
|
+
last_reflection: Optional[ReflectionInfo] = None # Store the last reflection for the session
|
|
52
|
+
|
|
53
|
+
last_action_completed: Optional[bool] = None
|
|
@@ -17,6 +17,7 @@ from ..state import get_state
|
|
|
17
17
|
from ..backend_client import get_backend_client
|
|
18
18
|
from ..device.state_capture import get_device_state
|
|
19
19
|
from ..device.adb_tools import AdbTools
|
|
20
|
+
import logging
|
|
20
21
|
|
|
21
22
|
# Import mahoraga components for tool functions
|
|
22
23
|
try:
|
|
@@ -181,7 +182,6 @@ from ..models import SessionDTO, UIStateInfo, ChatHistoryMessage, ConfigInfo, Ag
|
|
|
181
182
|
|
|
182
183
|
async def execute_v3(
|
|
183
184
|
task: str,
|
|
184
|
-
max_steps: int = 15,
|
|
185
185
|
progress_callback: Optional[Callable[[str], None]] = None
|
|
186
186
|
) -> Dict[str, Any]:
|
|
187
187
|
"""
|
|
@@ -222,6 +222,7 @@ async def execute_v3(
|
|
|
222
222
|
"reflection": state.config["reflection"],
|
|
223
223
|
"debug": state.config["debug"]
|
|
224
224
|
}
|
|
225
|
+
max_steps = state.config.get("max_steps", 15)
|
|
225
226
|
|
|
226
227
|
# Validate API key
|
|
227
228
|
validation_result = await backend.validate_api_key(quash_api_key)
|
|
@@ -236,12 +237,12 @@ async def execute_v3(
|
|
|
236
237
|
|
|
237
238
|
# Check credits
|
|
238
239
|
user_info = validation_result.get("user", {})
|
|
239
|
-
|
|
240
|
+
organization_credits = validation_result.get("organization_credits", 0)
|
|
240
241
|
|
|
241
|
-
if
|
|
242
|
+
if organization_credits <= 0:
|
|
242
243
|
return {
|
|
243
244
|
"status": "error",
|
|
244
|
-
"message": f"❌ Insufficient credits. Current balance: ${
|
|
245
|
+
"message": f"❌ Insufficient credits. Current balance: ${organization_credits:.2f}",
|
|
245
246
|
"user": user_info
|
|
246
247
|
}
|
|
247
248
|
|
|
@@ -250,76 +251,78 @@ async def execute_v3(
|
|
|
250
251
|
if progress_callback:
|
|
251
252
|
progress_callback(message)
|
|
252
253
|
|
|
253
|
-
log_progress(f"✅ API Key validated - Credits: ${
|
|
254
|
+
log_progress(f"✅ API Key validated - Credits: ${organization_credits:.2f}")
|
|
254
255
|
log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
|
|
255
256
|
log_progress(f"🚀 Starting task: {task}")
|
|
256
257
|
log_progress(f"📱 Device: {state.device_serial}")
|
|
257
258
|
log_progress(f"🧠 Model: {config['model']}")
|
|
258
|
-
|
|
259
259
|
log_progress(f"🔢 Max steps: {max_steps}")
|
|
260
260
|
|
|
261
261
|
# Initialize Session DTO
|
|
262
|
-
|
|
263
262
|
session = SessionDTO(
|
|
264
263
|
session_id=f"session_{uuid.uuid4().hex[:12]}",
|
|
265
264
|
api_key=quash_api_key,
|
|
266
265
|
task=task,
|
|
267
266
|
device_serial=state.device_serial,
|
|
268
|
-
config=ConfigInfo(**config)
|
|
267
|
+
config=ConfigInfo(**config),
|
|
268
|
+
last_action_completed=None # Explicitly initialize the new field
|
|
269
269
|
)
|
|
270
270
|
|
|
271
|
-
# Initialize
|
|
272
|
-
|
|
271
|
+
# Initialize a single, powerful ADB tools instance from Mahoraga
|
|
272
|
+
mahoraga_tools = None
|
|
273
|
+
try:
|
|
274
|
+
mahoraga_tools = MahoragaAdbTools(
|
|
275
|
+
serial=state.device_serial,
|
|
276
|
+
use_tcp=True,
|
|
277
|
+
remote_tcp_port=8080
|
|
278
|
+
)
|
|
279
|
+
except Exception as e:
|
|
280
|
+
log_progress(f"⚠️ CRITICAL: Failed to initialize MahoragaAdbTools: {e}")
|
|
281
|
+
return {
|
|
282
|
+
"status": "error",
|
|
283
|
+
"message": f"💥 Failed to initialize ADB tools: {e}",
|
|
284
|
+
}
|
|
273
285
|
|
|
274
286
|
# Code executor namespace - add tool functions so generated code can call them
|
|
275
287
|
executor_globals = {
|
|
276
288
|
"__builtins__": __builtins__,
|
|
277
|
-
"adb_tools": adb_tools
|
|
278
289
|
}
|
|
279
290
|
|
|
280
|
-
# Add tool functions to executor namespace
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
# Add wrapped function to globals so code can call it directly
|
|
316
|
-
executor_globals[tool_name] = make_printing_wrapper(tool_function)
|
|
317
|
-
|
|
318
|
-
log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
|
|
319
|
-
except Exception as e:
|
|
320
|
-
log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
|
|
321
|
-
import traceback
|
|
322
|
-
log_progress(f"Traceback: {traceback.format_exc()}")
|
|
291
|
+
# Add tool functions to executor namespace
|
|
292
|
+
try:
|
|
293
|
+
# Get all tool functions from the single mahoraga_tools instance
|
|
294
|
+
tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
|
|
295
|
+
|
|
296
|
+
# Filter by allowed tools from DEFAULT persona
|
|
297
|
+
allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
|
|
298
|
+
filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
|
|
299
|
+
|
|
300
|
+
# Add each tool function to executor globals with print wrapper
|
|
301
|
+
for tool_name, tool_function in filtered_tools.items():
|
|
302
|
+
# Convert async functions to sync if needed
|
|
303
|
+
if asyncio.iscoroutinefunction(tool_function):
|
|
304
|
+
if async_to_sync:
|
|
305
|
+
tool_function = async_to_sync(tool_function)
|
|
306
|
+
|
|
307
|
+
# Wrap tool function to print its return value
|
|
308
|
+
def make_printing_wrapper(func):
|
|
309
|
+
"""Wrap a tool function to print its return value."""
|
|
310
|
+
def wrapper(*args, **kwargs):
|
|
311
|
+
result = func(*args, **kwargs)
|
|
312
|
+
# Print the result so stdout captures it
|
|
313
|
+
if result is not None:
|
|
314
|
+
print(result)
|
|
315
|
+
return result
|
|
316
|
+
return wrapper
|
|
317
|
+
|
|
318
|
+
# Add wrapped function to globals so code can call it directly
|
|
319
|
+
executor_globals[tool_name] = make_printing_wrapper(tool_function)
|
|
320
|
+
|
|
321
|
+
log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
|
|
322
|
+
except Exception as e:
|
|
323
|
+
log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
|
|
324
|
+
import traceback
|
|
325
|
+
log_progress(f"Traceback: {traceback.format_exc()}")
|
|
323
326
|
|
|
324
327
|
executor_locals = {}
|
|
325
328
|
|
|
@@ -339,14 +342,14 @@ async def execute_v3(
|
|
|
339
342
|
ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
|
|
340
343
|
|
|
341
344
|
session.ui_state = UIStateInfo(**ui_state_dict)
|
|
345
|
+
|
|
342
346
|
# Update local tools with new state
|
|
343
|
-
if mahoraga_tools and "a11y_tree" in ui_state_dict and isinstance(ui_state_dict["a11y_tree"],
|
|
347
|
+
if mahoraga_tools and "a11y_tree" in ui_state_dict and isinstance(ui_state_dict["a11y_tree"], list):
|
|
344
348
|
try:
|
|
345
|
-
|
|
346
|
-
a11y_tree_obj = json.loads(ui_state_dict["a11y_tree"])
|
|
349
|
+
a11y_tree_obj = ui_state_dict["a11y_tree"]
|
|
347
350
|
mahoraga_tools.update_state(a11y_tree_obj)
|
|
348
|
-
except
|
|
349
|
-
|
|
351
|
+
except Exception as e:
|
|
352
|
+
log_progress(f"⚠️ Warning: Failed to update mahoraga_tools state: {e}")
|
|
350
353
|
|
|
351
354
|
if not config["vision"]:
|
|
352
355
|
screenshot_bytes = None
|
|
@@ -357,7 +360,7 @@ async def execute_v3(
|
|
|
357
360
|
except Exception as e:
|
|
358
361
|
log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
|
|
359
362
|
session.ui_state = UIStateInfo(
|
|
360
|
-
a11y_tree=
|
|
363
|
+
a11y_tree=[],
|
|
361
364
|
phone_state={"package": "unknown"}
|
|
362
365
|
)
|
|
363
366
|
screenshot_bytes = None
|
|
@@ -381,14 +384,29 @@ async def execute_v3(
|
|
|
381
384
|
"duration_seconds": time.time() - start_time
|
|
382
385
|
}
|
|
383
386
|
|
|
384
|
-
# Update
|
|
387
|
+
# CRITICAL: Update the client's session DTO with the one returned from the backend
|
|
388
|
+
updated_session_data = step_result.get("updated_session")
|
|
389
|
+
if updated_session_data:
|
|
390
|
+
# Ensure last_action_completed field exists
|
|
391
|
+
if "last_action_completed" not in updated_session_data:
|
|
392
|
+
updated_session_data["last_action_completed"] = None
|
|
393
|
+
session = SessionDTO(**updated_session_data)
|
|
394
|
+
else:
|
|
395
|
+
# Fallback: if updated_session not returned, update locally
|
|
396
|
+
new_step_data = step_result.get("new_step")
|
|
397
|
+
if new_step_data:
|
|
398
|
+
new_step = AgentStepDTO(**new_step_data)
|
|
399
|
+
session.steps.append(new_step)
|
|
400
|
+
assistant_response = step_result.get("assistant_response", "")
|
|
401
|
+
session.chat_history.append(ChatHistoryMessage(role="assistant", content=assistant_response))
|
|
402
|
+
|
|
403
|
+
# CRITICAL FIX: Handle plan generation responses (which have new_step=None)
|
|
404
|
+
# These don't create actual steps, just show the plan
|
|
385
405
|
new_step_data = step_result.get("new_step")
|
|
386
|
-
if new_step_data:
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
session.chat_history.append(ChatHistoryMessage(role="assistant", content=assistant_response))
|
|
391
|
-
|
|
406
|
+
if new_step_data is None and not updated_session_data:
|
|
407
|
+
# Plan was generated but no step was added
|
|
408
|
+
# This is normal - plan is informational only
|
|
409
|
+
pass
|
|
392
410
|
|
|
393
411
|
# Get action from backend
|
|
394
412
|
action = step_result.get("action", {})
|
|
@@ -396,60 +414,18 @@ async def execute_v3(
|
|
|
396
414
|
code = action.get("code")
|
|
397
415
|
reasoning = action.get("reasoning")
|
|
398
416
|
|
|
399
|
-
|
|
400
417
|
# Log reasoning
|
|
401
418
|
if reasoning:
|
|
402
419
|
log_progress(f"🤔 Reasoning: {reasoning}")
|
|
403
420
|
|
|
421
|
+
# CRITICAL FIX: Reset completion flag before executing
|
|
422
|
+
session.last_action_completed = False
|
|
404
423
|
|
|
405
|
-
# 3.
|
|
406
|
-
if
|
|
407
|
-
success = step_result.get("success", False)
|
|
408
|
-
final_message = step_result.get("final_message", "Task completed")
|
|
409
|
-
|
|
410
|
-
duration = time.time() - start_time
|
|
411
|
-
|
|
412
|
-
if success:
|
|
413
|
-
log_progress(f"✅ Task completed successfully!")
|
|
414
|
-
else:
|
|
415
|
-
log_progress(f"❌ Task marked as failed")
|
|
424
|
+
# 3. Execute action locally (if provided)
|
|
425
|
+
if code and (action_type == "execute_code" or action_type == "complete"):
|
|
416
426
|
|
|
417
|
-
# Finalize session on backend
|
|
418
|
-
finalize_result = await backend.finalize_session(session=session)
|
|
419
|
-
|
|
420
|
-
if success:
|
|
421
|
-
log_progress(f"✅ Task completed successfully in {len(session.steps)} steps")
|
|
422
|
-
log_progress(f"💰 Usage: {finalize_result.get('total_tokens', {}).get('total')} tokens, ${finalize_result.get('total_cost', 0):.4f}")
|
|
423
|
-
|
|
424
|
-
return {
|
|
425
|
-
"status": "success",
|
|
426
|
-
"steps_taken": len(session.steps),
|
|
427
|
-
"final_message": final_message,
|
|
428
|
-
"message": f"✅ Success: {final_message}",
|
|
429
|
-
"tokens": finalize_result.get("total_tokens"),
|
|
430
|
-
"cost": finalize_result.get("total_cost"),
|
|
431
|
-
"duration_seconds": duration
|
|
432
|
-
}
|
|
433
|
-
else:
|
|
434
|
-
log_progress(f"❌ Task failed: {final_message}")
|
|
435
|
-
log_progress(f"💰 Usage: {finalize_result.get('total_tokens', {}).get('total')} tokens, ${finalize_result.get('total_cost', 0):.4f}")
|
|
436
|
-
|
|
437
|
-
return {
|
|
438
|
-
"status": "failed",
|
|
439
|
-
"steps_taken": len(session.steps),
|
|
440
|
-
"final_message": final_message,
|
|
441
|
-
"message": f"❌ Failed: {final_message}",
|
|
442
|
-
"tokens": finalize_result.get("total_tokens"),
|
|
443
|
-
"cost": finalize_result.get("total_cost"),
|
|
444
|
-
"duration_seconds": duration
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
# 4. Execute action locally (only if task is not complete)
|
|
449
|
-
if code and action_type == "execute_code":
|
|
450
427
|
log_progress(f"⚡ Executing action...")
|
|
451
|
-
|
|
452
|
-
log_progress(f"```python\n{code}\n```") # Log the code
|
|
428
|
+
log_progress(f"```python\n{code}\n```")
|
|
453
429
|
|
|
454
430
|
old_ui_state = session.ui_state.model_dump().copy()
|
|
455
431
|
|
|
@@ -466,6 +442,13 @@ async def execute_v3(
|
|
|
466
442
|
execution_output = stdout.getvalue()
|
|
467
443
|
error_output = stderr.getvalue()
|
|
468
444
|
|
|
445
|
+
# CRITICAL FIX: Check if complete() was actually called
|
|
446
|
+
if mahoraga_tools and mahoraga_tools.finished:
|
|
447
|
+
log_progress("✅ Agent has signaled task completion via complete()")
|
|
448
|
+
session.last_action_completed = True
|
|
449
|
+
else:
|
|
450
|
+
session.last_action_completed = False
|
|
451
|
+
|
|
469
452
|
log_progress(f"⏳ Waiting for UI state to update...")
|
|
470
453
|
try:
|
|
471
454
|
new_ui_state_dict, _, state_changed = wait_for_action_effect(
|
|
@@ -485,7 +468,6 @@ async def execute_v3(
|
|
|
485
468
|
log_progress(f"✅ State changed: App switched ({old_pkg} → {new_pkg})")
|
|
486
469
|
else:
|
|
487
470
|
log_progress(f"✅ State changed: UI updated")
|
|
488
|
-
|
|
489
471
|
else:
|
|
490
472
|
log_progress(f"⚠️ WARNING: State did NOT change after action (timeout)")
|
|
491
473
|
log_progress(f" This might mean the action had no effect or took too long")
|
|
@@ -500,7 +482,10 @@ async def execute_v3(
|
|
|
500
482
|
if execution_output:
|
|
501
483
|
feedback_parts.append(f"Action output: {execution_output.strip()}")
|
|
502
484
|
|
|
503
|
-
|
|
485
|
+
# CRITICAL FIX: Report completion status in feedback
|
|
486
|
+
if session.last_action_completed:
|
|
487
|
+
feedback_parts.append("Sub-task completed successfully (complete() was called)")
|
|
488
|
+
elif state_changed:
|
|
504
489
|
feedback_parts.append("UI state updated successfully")
|
|
505
490
|
else:
|
|
506
491
|
feedback_parts.append("WARNING: UI state did not change (action may have failed)")
|
|
@@ -512,27 +497,86 @@ async def execute_v3(
|
|
|
512
497
|
|
|
513
498
|
log_progress(f"✅ {feedback[:200]}")
|
|
514
499
|
|
|
515
|
-
session.chat_history.append(ChatHistoryMessage(
|
|
500
|
+
session.chat_history.append(ChatHistoryMessage(
|
|
501
|
+
role="user",
|
|
502
|
+
content=f"Execution Result:\n```\n{feedback}\n```"
|
|
503
|
+
))
|
|
516
504
|
|
|
505
|
+
time.sleep(0.5)
|
|
517
506
|
|
|
518
507
|
except Exception as e:
|
|
519
508
|
error_msg = f"Error during execution: {str(e)}"
|
|
520
509
|
log_progress(f"💥 Action failed: {error_msg}")
|
|
510
|
+
session.last_action_completed = False
|
|
511
|
+
|
|
512
|
+
session.chat_history.append(ChatHistoryMessage(
|
|
513
|
+
role="user",
|
|
514
|
+
content=f"Execution Error:\n```\n{error_output.strip()}\n```"
|
|
515
|
+
))
|
|
516
|
+
|
|
517
|
+
# 4. Check if overall task is complete
|
|
518
|
+
# CRITICAL FIX: In reasoning mode with planning, DON'T exit on first complete() call
|
|
519
|
+
# The backend controls when all tasks are done via the "complete" action type
|
|
520
|
+
should_exit = False
|
|
521
|
+
|
|
522
|
+
if mahoraga_tools and mahoraga_tools.finished:
|
|
523
|
+
# Check if this is the FINAL completion from the backend
|
|
524
|
+
# In reasoning mode, the backend returns action.type="complete" when ALL tasks are done
|
|
525
|
+
action_type = action.get("type", "")
|
|
526
|
+
|
|
527
|
+
if action_type == "complete":
|
|
528
|
+
# Backend explicitly says we're done with ALL tasks
|
|
529
|
+
should_exit = True
|
|
530
|
+
success = mahoraga_tools.success
|
|
531
|
+
final_message = mahoraga_tools.reason
|
|
532
|
+
elif config["reasoning"] and session.current_plan:
|
|
533
|
+
# In reasoning mode with a plan, a single complete() call is just for one sub-task
|
|
534
|
+
# Continue the loop - the backend will advance to the next task
|
|
535
|
+
log_progress(f"✅ Sub-task completed. Moving to next task...")
|
|
536
|
+
should_exit = False
|
|
537
|
+
else:
|
|
538
|
+
# Non-reasoning mode: first complete() means done
|
|
539
|
+
should_exit = True
|
|
540
|
+
success = mahoraga_tools.success
|
|
541
|
+
final_message = mahoraga_tools.reason
|
|
542
|
+
|
|
543
|
+
if should_exit and mahoraga_tools and mahoraga_tools.finished:
|
|
544
|
+
success = mahoraga_tools.success
|
|
545
|
+
final_message = mahoraga_tools.reason
|
|
546
|
+
duration = time.time() - start_time
|
|
521
547
|
|
|
522
|
-
|
|
548
|
+
if success:
|
|
549
|
+
log_progress(f"✅ Task completed successfully!")
|
|
550
|
+
else:
|
|
551
|
+
log_progress(f"❌ Task marked as failed: {final_message}")
|
|
523
552
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
553
|
+
# Finalize session on backend
|
|
554
|
+
finalize_result = await backend.finalize_session(session=session)
|
|
555
|
+
total_tokens = finalize_result.get("total_tokens", {})
|
|
556
|
+
total_cost = finalize_result.get("total_cost", 0)
|
|
527
557
|
|
|
558
|
+
log_progress(f"💰 Usage: {total_tokens.get('total')} tokens, ${total_cost:.4f}")
|
|
528
559
|
|
|
560
|
+
return {
|
|
561
|
+
"status": "success" if success else "failed",
|
|
562
|
+
"steps_taken": len(session.steps),
|
|
563
|
+
"final_message": final_message,
|
|
564
|
+
"message": f"✅ Success: {final_message}" if success else f"❌ Failed: {final_message}",
|
|
565
|
+
"tokens": total_tokens,
|
|
566
|
+
"cost": total_cost,
|
|
567
|
+
"duration_seconds": duration
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
elif not code:
|
|
571
|
+
log_progress("⚠️ No action code provided by backend")
|
|
572
|
+
session.chat_history.append(ChatHistoryMessage(
|
|
573
|
+
role="user",
|
|
574
|
+
content="No code was provided. Please provide code to execute."
|
|
575
|
+
))
|
|
529
576
|
|
|
530
577
|
# Max steps reached
|
|
531
578
|
log_progress(f"⚠️ Reached maximum steps ({max_steps})")
|
|
532
|
-
|
|
533
579
|
duration = time.time() - start_time
|
|
534
|
-
|
|
535
|
-
# Finalize session on backend
|
|
536
580
|
finalize_result = await backend.finalize_session(session=session)
|
|
537
581
|
|
|
538
582
|
return {
|
|
@@ -548,8 +592,6 @@ async def execute_v3(
|
|
|
548
592
|
except KeyboardInterrupt:
|
|
549
593
|
log_progress("ℹ️ Task interrupted by user")
|
|
550
594
|
duration = time.time() - start_time
|
|
551
|
-
|
|
552
|
-
# Finalize session on backend
|
|
553
595
|
finalize_result = await backend.finalize_session(session=session)
|
|
554
596
|
|
|
555
597
|
return {
|
|
@@ -565,8 +607,6 @@ async def execute_v3(
|
|
|
565
607
|
error_msg = str(e)
|
|
566
608
|
log_progress(f"💥 Error: {error_msg}")
|
|
567
609
|
duration = time.time() - start_time
|
|
568
|
-
|
|
569
|
-
# Finalize session on backend
|
|
570
610
|
finalize_result = await backend.finalize_session(session=session)
|
|
571
611
|
|
|
572
612
|
return {
|
|
@@ -581,5 +621,5 @@ async def execute_v3(
|
|
|
581
621
|
|
|
582
622
|
finally:
|
|
583
623
|
# Cleanup TCP forwarding
|
|
584
|
-
if
|
|
585
|
-
|
|
624
|
+
if mahoraga_tools:
|
|
625
|
+
mahoraga_tools.teardown_tcp_forward()
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Test script to verify MCP -> Backend integration.
|
|
4
|
-
Tests API key validation and execution logging with real backend.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import asyncio
|
|
8
|
-
import os
|
|
9
|
-
import sys
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from dotenv import load_dotenv
|
|
12
|
-
|
|
13
|
-
# Load .env file from project root
|
|
14
|
-
project_root = Path(__file__).parent.parent
|
|
15
|
-
env_file = project_root / ".env"
|
|
16
|
-
load_dotenv(env_file)
|
|
17
|
-
|
|
18
|
-
# Add src to path
|
|
19
|
-
sys.path.insert(0, str(Path(__file__).parent / "src"))
|
|
20
|
-
|
|
21
|
-
from backend_client import get_backend_client
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
async def test_integration():
|
|
25
|
-
"""Test MCP backend integration."""
|
|
26
|
-
|
|
27
|
-
print("=" * 60)
|
|
28
|
-
print("🧪 Testing Mahoraga MCP -> Backend Integration")
|
|
29
|
-
print("=" * 60)
|
|
30
|
-
|
|
31
|
-
# Check environment
|
|
32
|
-
mock_mode = os.getenv("MAHORAGA_MOCK_BACKEND", "true")
|
|
33
|
-
backend_url = os.getenv("MAHORAGA_BACKEND_URL", "http://localhost:8000")
|
|
34
|
-
|
|
35
|
-
print(f"\n📋 Configuration:")
|
|
36
|
-
print(f" Mock Mode: {mock_mode}")
|
|
37
|
-
print(f" Backend URL: {backend_url}")
|
|
38
|
-
|
|
39
|
-
# Get backend client
|
|
40
|
-
client = get_backend_client()
|
|
41
|
-
|
|
42
|
-
# Test API key from database
|
|
43
|
-
test_api_key = "mah_test1234567890abcdef"
|
|
44
|
-
|
|
45
|
-
print(f"\n🔑 Testing API Key Validation...")
|
|
46
|
-
print(f" API Key: {test_api_key}")
|
|
47
|
-
|
|
48
|
-
# Test validation
|
|
49
|
-
result = await client.validate_api_key(test_api_key)
|
|
50
|
-
|
|
51
|
-
if result.get("valid"):
|
|
52
|
-
print(" ✅ API Key Valid!")
|
|
53
|
-
user = result.get("user", {})
|
|
54
|
-
print(f" 👤 User: {user.get('name')} ({user.get('email')})")
|
|
55
|
-
print(f" 💰 Credits: ${user.get('credits', 0):.2f}")
|
|
56
|
-
else:
|
|
57
|
-
print(f" ❌ API Key Invalid: {result.get('error')}")
|
|
58
|
-
return False
|
|
59
|
-
|
|
60
|
-
# Test execution logging
|
|
61
|
-
print(f"\n📊 Testing Execution Logging...")
|
|
62
|
-
|
|
63
|
-
log_result = await client.log_execution(
|
|
64
|
-
api_key=test_api_key,
|
|
65
|
-
execution_id="test_exec_integration_001",
|
|
66
|
-
status="completed",
|
|
67
|
-
tokens={"prompt": 150, "completion": 75, "total": 225},
|
|
68
|
-
cost=0.05
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
if log_result.get("logged"):
|
|
72
|
-
print(" ✅ Execution Logged!")
|
|
73
|
-
print(f" 💸 Credits Deducted: ${log_result.get('credits_deducted', 0):.2f}")
|
|
74
|
-
print(f" 💰 New Balance: ${log_result.get('new_balance', 0):.2f}")
|
|
75
|
-
else:
|
|
76
|
-
print(f" ❌ Logging Failed: {log_result.get('error')}")
|
|
77
|
-
return False
|
|
78
|
-
|
|
79
|
-
# Verify credits were deducted
|
|
80
|
-
print(f"\n🔍 Verifying Credit Deduction...")
|
|
81
|
-
|
|
82
|
-
verify_result = await client.validate_api_key(test_api_key)
|
|
83
|
-
if verify_result.get("valid"):
|
|
84
|
-
new_credits = verify_result.get("user", {}).get("credits", 0)
|
|
85
|
-
print(f" ✅ Credits Updated: ${new_credits:.2f}")
|
|
86
|
-
|
|
87
|
-
print(f"\n" + "=" * 60)
|
|
88
|
-
print("✅ All Integration Tests Passed!")
|
|
89
|
-
print("=" * 60)
|
|
90
|
-
print("\n💡 Next Steps:")
|
|
91
|
-
print(" 1. MCP is now connected to real backend")
|
|
92
|
-
print(" 2. Test with actual MCP execute command")
|
|
93
|
-
print(" 3. Build web portal for user management")
|
|
94
|
-
|
|
95
|
-
return True
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if __name__ == "__main__":
|
|
99
|
-
success = asyncio.run(test_integration())
|
|
100
|
-
sys.exit(0 if success else 1)
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Test script to verify tool functions are loaded correctly.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import sys
|
|
6
|
-
sys.path.insert(0, '/Users/abhinavsai/POC/mahoraga-mac/quash-mcp')
|
|
7
|
-
sys.path.insert(0, '/Users/abhinavsai/POC/mahoraga-mac/mahoraga')
|
|
8
|
-
|
|
9
|
-
def test_tool_loading():
|
|
10
|
-
print("Testing tool loading...")
|
|
11
|
-
|
|
12
|
-
try:
|
|
13
|
-
# Import mahoraga components
|
|
14
|
-
from mahoraga.tools import Tools, describe_tools
|
|
15
|
-
from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
|
|
16
|
-
from mahoraga.agent.context.personas import DEFAULT
|
|
17
|
-
from mahoraga.agent.utils.async_utils import async_to_sync
|
|
18
|
-
|
|
19
|
-
print("✅ All imports successful")
|
|
20
|
-
|
|
21
|
-
# Create a mahoraga AdbTools instance
|
|
22
|
-
print("\nCreating mahoraga AdbTools instance...")
|
|
23
|
-
mahoraga_tools = MahoragaAdbTools(
|
|
24
|
-
serial="emulator-5554", # Use your device serial
|
|
25
|
-
use_tcp=True,
|
|
26
|
-
remote_tcp_port=8080
|
|
27
|
-
)
|
|
28
|
-
print(f"✅ Created mahoraga AdbTools instance")
|
|
29
|
-
print(f" - Serial: {mahoraga_tools.device.serial}")
|
|
30
|
-
print(f" - TCP forwarded: {mahoraga_tools.tcp_forwarded}")
|
|
31
|
-
|
|
32
|
-
# Get tool list
|
|
33
|
-
print("\nGetting tool list...")
|
|
34
|
-
tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
|
|
35
|
-
print(f"✅ Got {len(tool_list)} tools:")
|
|
36
|
-
for tool_name, tool_func in tool_list.items():
|
|
37
|
-
print(f" - {tool_name}: {tool_func}")
|
|
38
|
-
|
|
39
|
-
# Filter by allowed tools
|
|
40
|
-
print(f"\nFiltering by DEFAULT persona allowed tools...")
|
|
41
|
-
allowed_tool_names = DEFAULT.allowed_tools
|
|
42
|
-
print(f" Allowed tools: {allowed_tool_names}")
|
|
43
|
-
|
|
44
|
-
filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
|
|
45
|
-
print(f"✅ Filtered to {len(filtered_tools)} tools:")
|
|
46
|
-
for tool_name in filtered_tools.keys():
|
|
47
|
-
print(f" - {tool_name}")
|
|
48
|
-
|
|
49
|
-
# Test executor globals setup
|
|
50
|
-
print("\nSetting up executor globals...")
|
|
51
|
-
executor_globals = {"__builtins__": __builtins__}
|
|
52
|
-
|
|
53
|
-
for tool_name, tool_function in filtered_tools.items():
|
|
54
|
-
import asyncio
|
|
55
|
-
if asyncio.iscoroutinefunction(tool_function):
|
|
56
|
-
tool_function = async_to_sync(tool_function)
|
|
57
|
-
executor_globals[tool_name] = tool_function
|
|
58
|
-
|
|
59
|
-
print(f"✅ Executor globals set up with {len(executor_globals)} items")
|
|
60
|
-
|
|
61
|
-
# Test that functions are callable
|
|
62
|
-
print("\nTesting function availability...")
|
|
63
|
-
test_functions = ['start_app', 'swipe', 'press_key', 'tap_by_index']
|
|
64
|
-
for func_name in test_functions:
|
|
65
|
-
if func_name in executor_globals:
|
|
66
|
-
print(f" ✅ {func_name} is available")
|
|
67
|
-
else:
|
|
68
|
-
print(f" ❌ {func_name} is NOT available")
|
|
69
|
-
|
|
70
|
-
print("\n✅ All tests passed!")
|
|
71
|
-
return True
|
|
72
|
-
|
|
73
|
-
except Exception as e:
|
|
74
|
-
print(f"\n❌ Test failed: {e}")
|
|
75
|
-
import traceback
|
|
76
|
-
traceback.print_exc()
|
|
77
|
-
return False
|
|
78
|
-
|
|
79
|
-
if __name__ == "__main__":
|
|
80
|
-
success = test_tool_loading()
|
|
81
|
-
sys.exit(0 if success else 1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|