quash-mcp 0.2.13__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/PKG-INFO +1 -1
  2. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/local_test.py +19 -5
  3. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/pyproject.toml +1 -1
  4. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/backend_client.py +3 -2
  5. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/state_capture.py +20 -14
  6. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/models.py +12 -1
  7. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute_v3.py +170 -130
  8. quash_mcp-0.2.13/test_backend_integration.py +0 -100
  9. quash_mcp-0.2.13/test_tools_loading.py +0 -81
  10. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/.gitignore +0 -0
  11. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/README.md +0 -0
  12. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/SETUP_CLAUDE_CODE.md +0 -0
  13. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/__init__.py +0 -0
  14. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/__main__.py +0 -0
  15. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/__init__.py +0 -0
  16. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/adb_tools.py +0 -0
  17. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/device/portal.py +0 -0
  18. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/server.py +0 -0
  19. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/state.py +0 -0
  20. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/__init__.py +0 -0
  21. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/build.py +0 -0
  22. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/build_old.py +0 -0
  23. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/configure.py +0 -0
  24. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/connect.py +0 -0
  25. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute.py +0 -0
  26. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/execute_v2_backup.py +0 -0
  27. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/runsuite.py +0 -0
  28. {quash_mcp-0.2.13 → quash_mcp-0.3.1}/quash_mcp/tools/usage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quash-mcp
3
- Version: 0.2.13
3
+ Version: 0.3.1
4
4
  Summary: Model Context Protocol server for Quash - AI-powered mobile automation agent
5
5
  Project-URL: Homepage, https://quashbugs.com
6
6
  Project-URL: Repository, https://github.com/quash/quash-mcp
@@ -13,13 +13,27 @@ from quash_mcp.models import SessionDTO, ConfigInfo, UIStateInfo
13
13
  async def main():
14
14
  # Initialize the session state
15
15
  state = get_state()
16
- state.config["api_key"] = "mhg_7S01mxJUB7q6k-sbFQeScYTX6ae3pehI"
16
+ state.config["api_key"] = "mhg__Sacdd_AiKSJgTow49F8p9Fu-UGyi_Wd"
17
17
  state.device_serial = "emulator-5554"
18
18
  state.portal_ready = True # Assume portal is ready for local testing
19
19
 
20
+ # --- CONFIGURATION ---
21
+ state.config["model"] = "anthropic/claude-sonnet-4"
22
+ state.config["temperature"] = 0.2
23
+ state.config["vision"] = False
24
+ state.config["reasoning"] = True
25
+ state.config["reflection"] = True
26
+ state.config["debug"] = False
27
+ state.config["max_steps"] = 10
28
+ # ---------------------
29
+
20
30
  # Define the task
21
- task = "Open chrome and open quashbugs.com website"
22
- # task = "Open settings and scroll 3 times."
31
+ # task = "Open the google search app and search for bakeries."
32
+ # task = "Open settings."
33
+ # task = "Open Markor app, replace the contents of 'Abhinav.txt' to Hello World! and save the file as Done.md"
34
+ # task = "Tell me all the number drawn on the screen"
35
+ task = "Create a file with name test.md and write Hello World! and save it in markor app"
36
+ # task = "Tell me what you see drawn on screen"
23
37
 
24
38
  # Define a progress callback
25
39
  def progress_callback(message):
@@ -31,7 +45,7 @@ async def main():
31
45
  api_key=state.config["api_key"],
32
46
  task=task,
33
47
  device_serial=state.device_serial,
34
- config=ConfigInfo(**state.config)
48
+ config=ConfigInfo(**{k: v for k, v in state.config.items() if k in ConfigInfo.model_fields})
35
49
  )
36
50
 
37
51
  # Print the initial session DTO
@@ -50,4 +64,4 @@ async def main():
50
64
 
51
65
 
52
66
  if __name__ == "__main__":
53
- asyncio.run(main())
67
+ asyncio.run(main())
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "quash-mcp"
3
- version = "0.2.13"
3
+ version = "0.3.1"
4
4
  description = "Model Context Protocol server for Quash - AI-powered mobile automation agent"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -35,7 +35,8 @@ class BackendClient:
35
35
  Dict with validation result:
36
36
  {
37
37
  "valid": bool,
38
- "user": {"email": str, "name": str, "credits": float},
38
+ "user": {"email": str, "name": str},
39
+ "organization_credits": float,
39
40
  "openrouter_api_key": str,
40
41
  "error": str (if invalid)
41
42
  }
@@ -211,7 +212,7 @@ class BackendClient:
211
212
 
212
213
  # Prepare files dict (only screenshot if provided)
213
214
  files = {}
214
- if screenshot_bytes:
215
+ if screenshot_bytes and len(screenshot_bytes) > 0:
215
216
  files["screenshot"] = ("screenshot.png", screenshot_bytes, "image/png")
216
217
 
217
218
  async with httpx.AsyncClient(timeout=self.timeout) as client:
@@ -1,11 +1,7 @@
1
- """
2
- Device state capture utilities.
3
- Captures UI state and screenshots from Android devices.
4
- """
5
-
1
+ import json
6
2
  import logging
7
3
  import requests
8
- from typing import Dict, Any, Optional, Tuple
4
+ from typing import Dict, Any, Optional, Tuple, List
9
5
  from adbutils import adb
10
6
 
11
7
  logger = logging.getLogger("quash-device")
@@ -34,7 +30,7 @@ def get_current_package(serial: str) -> str:
34
30
  return "unknown"
35
31
 
36
32
 
37
- def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
33
+ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> List[Dict[str, Any]]:
38
34
  """
39
35
  Get accessibility tree from Portal app via TCP.
40
36
 
@@ -43,7 +39,7 @@ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
43
39
  tcp_port: Local TCP port for Portal communication
44
40
 
45
41
  Returns:
46
- Accessibility tree XML string
42
+ Accessibility tree as a list of dictionaries, or an empty list if failed
47
43
  """
48
44
  try:
49
45
  device = adb.device(serial)
@@ -55,20 +51,27 @@ def get_accessibility_tree(serial: str, tcp_port: int = 8080) -> str:
55
51
  )
56
52
 
57
53
  if response.status_code == 200:
58
- # Portal returns JSON with status and data fields
59
54
  data = response.json()
60
55
  if data.get("status") == "success":
61
- return data.get("data", "<hierarchy></hierarchy>")
56
+ # The 'data' field should contain the JSON string of the a11y_tree
57
+ a11y_tree_json_str = data.get("data", "[]")
58
+ try:
59
+ parsed_tree = json.loads(a11y_tree_json_str)
60
+ logger.debug(f"get_accessibility_tree returning tree of length: {len(parsed_tree)}")
61
+ return parsed_tree
62
+ except json.JSONDecodeError:
63
+ logger.warning(f"Failed to parse a11y_tree JSON string: {a11y_tree_json_str}")
64
+ return []
62
65
  else:
63
66
  logger.warning(f"Portal error: {data.get('error', 'Unknown error')}")
64
- return "<hierarchy></hierarchy>"
67
+ return []
65
68
  else:
66
69
  logger.warning(f"Failed to get accessibility tree: HTTP {response.status_code}")
67
- return "<hierarchy></hierarchy>"
70
+ return []
68
71
 
69
72
  except Exception as e:
70
73
  logger.warning(f"Failed to get accessibility tree: {e}")
71
- return "<hierarchy></hierarchy>"
74
+ return []
72
75
 
73
76
 
74
77
  def capture_screenshot(serial: str) -> Optional[bytes]:
@@ -83,7 +86,10 @@ def capture_screenshot(serial: str) -> Optional[bytes]:
83
86
  """
84
87
  try:
85
88
  device = adb.device(serial)
86
- screenshot_bytes = device.shell("screencap -p", stream=True)
89
+ # device.shell("screencap -p", stream=True) returns an AdbConnection object (file-like)
90
+ # We need to read the bytes from it.
91
+ with device.shell("screencap -p", stream=True) as conn:
92
+ screenshot_bytes = conn.read(1024 * 1024 * 10) # Read up to 10MB
87
93
  return screenshot_bytes
88
94
  except Exception as e:
89
95
  logger.error(f"Failed to capture screenshot: {e}")
@@ -16,7 +16,7 @@ class ConfigInfo(BaseModel):
16
16
  debug: bool = False
17
17
 
18
18
  class UIStateInfo(BaseModel):
19
- a11y_tree: str
19
+ a11y_tree: List[Dict[str, Any]]
20
20
  phone_state: Dict[str, Any]
21
21
 
22
22
  class ChatHistoryMessage(BaseModel):
@@ -31,6 +31,12 @@ class AgentStepDTO(BaseModel):
31
31
  cost: float
32
32
  timestamp: datetime = Field(default_factory=datetime.utcnow)
33
33
 
34
+ class ReflectionInfo(BaseModel):
35
+ """Information about a reflection step."""
36
+ goal_achieved: bool
37
+ advice: Optional[str] = None
38
+ summary: Optional[str] = None
39
+
34
40
  class SessionDTO(BaseModel):
35
41
  session_id: str
36
42
  api_key: str
@@ -39,4 +45,9 @@ class SessionDTO(BaseModel):
39
45
  config: ConfigInfo
40
46
  chat_history: List[ChatHistoryMessage] = []
41
47
  steps: List[AgentStepDTO] = []
48
+ current_plan: Optional[List[str]] = None
49
+ current_task_index: int = 0
42
50
  ui_state: Optional[UIStateInfo] = None
51
+ last_reflection: Optional[ReflectionInfo] = None # Store the last reflection for the session
52
+
53
+ last_action_completed: Optional[bool] = None
@@ -17,6 +17,7 @@ from ..state import get_state
17
17
  from ..backend_client import get_backend_client
18
18
  from ..device.state_capture import get_device_state
19
19
  from ..device.adb_tools import AdbTools
20
+ import logging
20
21
 
21
22
  # Import mahoraga components for tool functions
22
23
  try:
@@ -181,7 +182,6 @@ from ..models import SessionDTO, UIStateInfo, ChatHistoryMessage, ConfigInfo, Ag
181
182
 
182
183
  async def execute_v3(
183
184
  task: str,
184
- max_steps: int = 15,
185
185
  progress_callback: Optional[Callable[[str], None]] = None
186
186
  ) -> Dict[str, Any]:
187
187
  """
@@ -222,6 +222,7 @@ async def execute_v3(
222
222
  "reflection": state.config["reflection"],
223
223
  "debug": state.config["debug"]
224
224
  }
225
+ max_steps = state.config.get("max_steps", 15)
225
226
 
226
227
  # Validate API key
227
228
  validation_result = await backend.validate_api_key(quash_api_key)
@@ -236,12 +237,12 @@ async def execute_v3(
236
237
 
237
238
  # Check credits
238
239
  user_info = validation_result.get("user", {})
239
- credits = user_info.get("credits", 0)
240
+ organization_credits = validation_result.get("organization_credits", 0)
240
241
 
241
- if credits <= 0:
242
+ if organization_credits <= 0:
242
243
  return {
243
244
  "status": "error",
244
- "message": f"❌ Insufficient credits. Current balance: ${credits:.2f}",
245
+ "message": f"❌ Insufficient credits. Current balance: ${organization_credits:.2f}",
245
246
  "user": user_info
246
247
  }
247
248
 
@@ -250,76 +251,78 @@ async def execute_v3(
250
251
  if progress_callback:
251
252
  progress_callback(message)
252
253
 
253
- log_progress(f"✅ API Key validated - Credits: ${credits:.2f}")
254
+ log_progress(f"✅ API Key validated - Credits: ${organization_credits:.2f}")
254
255
  log_progress(f"👤 User: {user_info.get('name', 'Unknown')}")
255
256
  log_progress(f"🚀 Starting task: {task}")
256
257
  log_progress(f"📱 Device: {state.device_serial}")
257
258
  log_progress(f"🧠 Model: {config['model']}")
258
-
259
259
  log_progress(f"🔢 Max steps: {max_steps}")
260
260
 
261
261
  # Initialize Session DTO
262
-
263
262
  session = SessionDTO(
264
263
  session_id=f"session_{uuid.uuid4().hex[:12]}",
265
264
  api_key=quash_api_key,
266
265
  task=task,
267
266
  device_serial=state.device_serial,
268
- config=ConfigInfo(**config)
267
+ config=ConfigInfo(**config),
268
+ last_action_completed=None # Explicitly initialize the new field
269
269
  )
270
270
 
271
- # Initialize local ADB tools for code execution
272
- adb_tools = AdbTools(serial=state.device_serial, use_tcp=True)
271
+ # Initialize a single, powerful ADB tools instance from Mahoraga
272
+ mahoraga_tools = None
273
+ try:
274
+ mahoraga_tools = MahoragaAdbTools(
275
+ serial=state.device_serial,
276
+ use_tcp=True,
277
+ remote_tcp_port=8080
278
+ )
279
+ except Exception as e:
280
+ log_progress(f"⚠️ CRITICAL: Failed to initialize MahoragaAdbTools: {e}")
281
+ return {
282
+ "status": "error",
283
+ "message": f"💥 Failed to initialize ADB tools: {e}",
284
+ }
273
285
 
274
286
  # Code executor namespace - add tool functions so generated code can call them
275
287
  executor_globals = {
276
288
  "__builtins__": __builtins__,
277
- "adb_tools": adb_tools
278
289
  }
279
290
 
280
- # Add tool functions to executor namespace (like start_app, swipe, etc.)
281
- if describe_tools and DEFAULT and MahoragaAdbTools:
282
- try:
283
- # Create a mahoraga AdbTools instance for tool execution
284
- mahoraga_tools = MahoragaAdbTools(
285
- serial=state.device_serial,
286
- use_tcp=True,
287
- remote_tcp_port=8080
288
- )
289
-
290
- # Get all tool functions from mahoraga AdbTools instance
291
- tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
292
-
293
- # Filter by allowed tools from DEFAULT persona
294
- allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
295
- filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
296
-
297
- # Add each tool function to executor globals with print wrapper
298
- for tool_name, tool_function in filtered_tools.items():
299
- # Convert async functions to sync if needed
300
- if asyncio.iscoroutinefunction(tool_function):
301
- if async_to_sync:
302
- tool_function = async_to_sync(tool_function)
303
-
304
- # Wrap tool function to print its return value
305
- def make_printing_wrapper(func):
306
- """Wrap a tool function to print its return value."""
307
- def wrapper(*args, **kwargs):
308
- result = func(*args, **kwargs)
309
- # Print the result so stdout captures it
310
- if result is not None:
311
- print(result)
312
- return result
313
- return wrapper
314
-
315
- # Add wrapped function to globals so code can call it directly
316
- executor_globals[tool_name] = make_printing_wrapper(tool_function)
317
-
318
- log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
319
- except Exception as e:
320
- log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
321
- import traceback
322
- log_progress(f"Traceback: {traceback.format_exc()}")
291
+ # Add tool functions to executor namespace
292
+ try:
293
+ # Get all tool functions from the single mahoraga_tools instance
294
+ tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
295
+
296
+ # Filter by allowed tools from DEFAULT persona
297
+ allowed_tool_names = DEFAULT.allowed_tools if hasattr(DEFAULT, 'allowed_tools') else []
298
+ filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
299
+
300
+ # Add each tool function to executor globals with print wrapper
301
+ for tool_name, tool_function in filtered_tools.items():
302
+ # Convert async functions to sync if needed
303
+ if asyncio.iscoroutinefunction(tool_function):
304
+ if async_to_sync:
305
+ tool_function = async_to_sync(tool_function)
306
+
307
+ # Wrap tool function to print its return value
308
+ def make_printing_wrapper(func):
309
+ """Wrap a tool function to print its return value."""
310
+ def wrapper(*args, **kwargs):
311
+ result = func(*args, **kwargs)
312
+ # Print the result so stdout captures it
313
+ if result is not None:
314
+ print(result)
315
+ return result
316
+ return wrapper
317
+
318
+ # Add wrapped function to globals so code can call it directly
319
+ executor_globals[tool_name] = make_printing_wrapper(tool_function)
320
+
321
+ log_progress(f"🔧 Loaded {len(filtered_tools)} tool functions: {list(filtered_tools.keys())}")
322
+ except Exception as e:
323
+ log_progress(f"⚠️ Warning: Could not load tool functions: {e}")
324
+ import traceback
325
+ log_progress(f"Traceback: {traceback.format_exc()}")
323
326
 
324
327
  executor_locals = {}
325
328
 
@@ -339,14 +342,14 @@ async def execute_v3(
339
342
  ui_state_dict, screenshot_bytes = get_device_state(state.device_serial)
340
343
 
341
344
  session.ui_state = UIStateInfo(**ui_state_dict)
345
+
342
346
  # Update local tools with new state
343
- if mahoraga_tools and "a11y_tree" in ui_state_dict and isinstance(ui_state_dict["a11y_tree"], str):
347
+ if mahoraga_tools and "a11y_tree" in ui_state_dict and isinstance(ui_state_dict["a11y_tree"], list):
344
348
  try:
345
- import json
346
- a11y_tree_obj = json.loads(ui_state_dict["a11y_tree"])
349
+ a11y_tree_obj = ui_state_dict["a11y_tree"]
347
350
  mahoraga_tools.update_state(a11y_tree_obj)
348
- except (json.JSONDecodeError, TypeError):
349
- pass # Ignore if not a valid JSON string
351
+ except Exception as e:
352
+ log_progress(f"⚠️ Warning: Failed to update mahoraga_tools state: {e}")
350
353
 
351
354
  if not config["vision"]:
352
355
  screenshot_bytes = None
@@ -357,7 +360,7 @@ async def execute_v3(
357
360
  except Exception as e:
358
361
  log_progress(f"⚠️ Warning: Failed to capture device state: {e}")
359
362
  session.ui_state = UIStateInfo(
360
- a11y_tree="<error>Failed to capture UI</error>",
363
+ a11y_tree=[],
361
364
  phone_state={"package": "unknown"}
362
365
  )
363
366
  screenshot_bytes = None
@@ -381,14 +384,29 @@ async def execute_v3(
381
384
  "duration_seconds": time.time() - start_time
382
385
  }
383
386
 
384
- # Update Session DTO with new step and chat history
387
+ # CRITICAL: Update the client's session DTO with the one returned from the backend
388
+ updated_session_data = step_result.get("updated_session")
389
+ if updated_session_data:
390
+ # Ensure last_action_completed field exists
391
+ if "last_action_completed" not in updated_session_data:
392
+ updated_session_data["last_action_completed"] = None
393
+ session = SessionDTO(**updated_session_data)
394
+ else:
395
+ # Fallback: if updated_session not returned, update locally
396
+ new_step_data = step_result.get("new_step")
397
+ if new_step_data:
398
+ new_step = AgentStepDTO(**new_step_data)
399
+ session.steps.append(new_step)
400
+ assistant_response = step_result.get("assistant_response", "")
401
+ session.chat_history.append(ChatHistoryMessage(role="assistant", content=assistant_response))
402
+
403
+ # CRITICAL FIX: Handle plan generation responses (which have new_step=None)
404
+ # These don't create actual steps, just show the plan
385
405
  new_step_data = step_result.get("new_step")
386
- if new_step_data:
387
- new_step = AgentStepDTO(**new_step_data)
388
- session.steps.append(new_step)
389
- assistant_response = step_result.get("assistant_response", "")
390
- session.chat_history.append(ChatHistoryMessage(role="assistant", content=assistant_response))
391
-
406
+ if new_step_data is None and not updated_session_data:
407
+ # Plan was generated but no step was added
408
+ # This is normal - plan is informational only
409
+ pass
392
410
 
393
411
  # Get action from backend
394
412
  action = step_result.get("action", {})
@@ -396,60 +414,18 @@ async def execute_v3(
396
414
  code = action.get("code")
397
415
  reasoning = action.get("reasoning")
398
416
 
399
-
400
417
  # Log reasoning
401
418
  if reasoning:
402
419
  log_progress(f"🤔 Reasoning: {reasoning}")
403
420
 
421
+ # CRITICAL FIX: Reset completion flag before executing
422
+ session.last_action_completed = False
404
423
 
405
- # 3. Check if task is complete BEFORE executing action
406
- if step_result.get("completed", False):
407
- success = step_result.get("success", False)
408
- final_message = step_result.get("final_message", "Task completed")
409
-
410
- duration = time.time() - start_time
411
-
412
- if success:
413
- log_progress(f"✅ Task completed successfully!")
414
- else:
415
- log_progress(f"❌ Task marked as failed")
424
+ # 3. Execute action locally (if provided)
425
+ if code and (action_type == "execute_code" or action_type == "complete"):
416
426
 
417
- # Finalize session on backend
418
- finalize_result = await backend.finalize_session(session=session)
419
-
420
- if success:
421
- log_progress(f"✅ Task completed successfully in {len(session.steps)} steps")
422
- log_progress(f"💰 Usage: {finalize_result.get('total_tokens', {}).get('total')} tokens, ${finalize_result.get('total_cost', 0):.4f}")
423
-
424
- return {
425
- "status": "success",
426
- "steps_taken": len(session.steps),
427
- "final_message": final_message,
428
- "message": f"✅ Success: {final_message}",
429
- "tokens": finalize_result.get("total_tokens"),
430
- "cost": finalize_result.get("total_cost"),
431
- "duration_seconds": duration
432
- }
433
- else:
434
- log_progress(f"❌ Task failed: {final_message}")
435
- log_progress(f"💰 Usage: {finalize_result.get('total_tokens', {}).get('total')} tokens, ${finalize_result.get('total_cost', 0):.4f}")
436
-
437
- return {
438
- "status": "failed",
439
- "steps_taken": len(session.steps),
440
- "final_message": final_message,
441
- "message": f"❌ Failed: {final_message}",
442
- "tokens": finalize_result.get("total_tokens"),
443
- "cost": finalize_result.get("total_cost"),
444
- "duration_seconds": duration
445
- }
446
-
447
-
448
- # 4. Execute action locally (only if task is not complete)
449
- if code and action_type == "execute_code":
450
427
  log_progress(f"⚡ Executing action...")
451
-
452
- log_progress(f"```python\n{code}\n```") # Log the code
428
+ log_progress(f"```python\n{code}\n```")
453
429
 
454
430
  old_ui_state = session.ui_state.model_dump().copy()
455
431
 
@@ -466,6 +442,13 @@ async def execute_v3(
466
442
  execution_output = stdout.getvalue()
467
443
  error_output = stderr.getvalue()
468
444
 
445
+ # CRITICAL FIX: Check if complete() was actually called
446
+ if mahoraga_tools and mahoraga_tools.finished:
447
+ log_progress("✅ Agent has signaled task completion via complete()")
448
+ session.last_action_completed = True
449
+ else:
450
+ session.last_action_completed = False
451
+
469
452
  log_progress(f"⏳ Waiting for UI state to update...")
470
453
  try:
471
454
  new_ui_state_dict, _, state_changed = wait_for_action_effect(
@@ -485,7 +468,6 @@ async def execute_v3(
485
468
  log_progress(f"✅ State changed: App switched ({old_pkg} → {new_pkg})")
486
469
  else:
487
470
  log_progress(f"✅ State changed: UI updated")
488
-
489
471
  else:
490
472
  log_progress(f"⚠️ WARNING: State did NOT change after action (timeout)")
491
473
  log_progress(f" This might mean the action had no effect or took too long")
@@ -500,7 +482,10 @@ async def execute_v3(
500
482
  if execution_output:
501
483
  feedback_parts.append(f"Action output: {execution_output.strip()}")
502
484
 
503
- if state_changed:
485
+ # CRITICAL FIX: Report completion status in feedback
486
+ if session.last_action_completed:
487
+ feedback_parts.append("Sub-task completed successfully (complete() was called)")
488
+ elif state_changed:
504
489
  feedback_parts.append("UI state updated successfully")
505
490
  else:
506
491
  feedback_parts.append("WARNING: UI state did not change (action may have failed)")
@@ -512,27 +497,86 @@ async def execute_v3(
512
497
 
513
498
  log_progress(f"✅ {feedback[:200]}")
514
499
 
515
- session.chat_history.append(ChatHistoryMessage(role="user", content=f"Execution Result:\n```\n{feedback}\n```"))
500
+ session.chat_history.append(ChatHistoryMessage(
501
+ role="user",
502
+ content=f"Execution Result:\n```\n{feedback}\n```"
503
+ ))
516
504
 
505
+ time.sleep(0.5)
517
506
 
518
507
  except Exception as e:
519
508
  error_msg = f"Error during execution: {str(e)}"
520
509
  log_progress(f"💥 Action failed: {error_msg}")
510
+ session.last_action_completed = False
511
+
512
+ session.chat_history.append(ChatHistoryMessage(
513
+ role="user",
514
+ content=f"Execution Error:\n```\n{error_output.strip()}\n```"
515
+ ))
516
+
517
+ # 4. Check if overall task is complete
518
+ # CRITICAL FIX: In reasoning mode with planning, DON'T exit on first complete() call
519
+ # The backend controls when all tasks are done via the "complete" action type
520
+ should_exit = False
521
+
522
+ if mahoraga_tools and mahoraga_tools.finished:
523
+ # Check if this is the FINAL completion from the backend
524
+ # In reasoning mode, the backend returns action.type="complete" when ALL tasks are done
525
+ action_type = action.get("type", "")
526
+
527
+ if action_type == "complete":
528
+ # Backend explicitly says we're done with ALL tasks
529
+ should_exit = True
530
+ success = mahoraga_tools.success
531
+ final_message = mahoraga_tools.reason
532
+ elif config["reasoning"] and session.current_plan:
533
+ # In reasoning mode with a plan, a single complete() call is just for one sub-task
534
+ # Continue the loop - the backend will advance to the next task
535
+ log_progress(f"✅ Sub-task completed. Moving to next task...")
536
+ should_exit = False
537
+ else:
538
+ # Non-reasoning mode: first complete() means done
539
+ should_exit = True
540
+ success = mahoraga_tools.success
541
+ final_message = mahoraga_tools.reason
542
+
543
+ if should_exit and mahoraga_tools and mahoraga_tools.finished:
544
+ success = mahoraga_tools.success
545
+ final_message = mahoraga_tools.reason
546
+ duration = time.time() - start_time
521
547
 
522
- session.chat_history.append(ChatHistoryMessage(role="user", content=f"Execution Error:\n```\n{error_msg}\n```"))
548
+ if success:
549
+ log_progress(f"✅ Task completed successfully!")
550
+ else:
551
+ log_progress(f"❌ Task marked as failed: {final_message}")
523
552
 
524
- elif not code:
525
- log_progress("⚠️ No action code provided by backend")
526
- session.chat_history.append(ChatHistoryMessage(role="user", content="No code was provided. Please provide code to execute."))
553
+ # Finalize session on backend
554
+ finalize_result = await backend.finalize_session(session=session)
555
+ total_tokens = finalize_result.get("total_tokens", {})
556
+ total_cost = finalize_result.get("total_cost", 0)
527
557
 
558
+ log_progress(f"💰 Usage: {total_tokens.get('total')} tokens, ${total_cost:.4f}")
528
559
 
560
+ return {
561
+ "status": "success" if success else "failed",
562
+ "steps_taken": len(session.steps),
563
+ "final_message": final_message,
564
+ "message": f"✅ Success: {final_message}" if success else f"❌ Failed: {final_message}",
565
+ "tokens": total_tokens,
566
+ "cost": total_cost,
567
+ "duration_seconds": duration
568
+ }
569
+
570
+ elif not code:
571
+ log_progress("⚠️ No action code provided by backend")
572
+ session.chat_history.append(ChatHistoryMessage(
573
+ role="user",
574
+ content="No code was provided. Please provide code to execute."
575
+ ))
529
576
 
530
577
  # Max steps reached
531
578
  log_progress(f"⚠️ Reached maximum steps ({max_steps})")
532
-
533
579
  duration = time.time() - start_time
534
-
535
- # Finalize session on backend
536
580
  finalize_result = await backend.finalize_session(session=session)
537
581
 
538
582
  return {
@@ -548,8 +592,6 @@ async def execute_v3(
548
592
  except KeyboardInterrupt:
549
593
  log_progress("ℹ️ Task interrupted by user")
550
594
  duration = time.time() - start_time
551
-
552
- # Finalize session on backend
553
595
  finalize_result = await backend.finalize_session(session=session)
554
596
 
555
597
  return {
@@ -565,8 +607,6 @@ async def execute_v3(
565
607
  error_msg = str(e)
566
608
  log_progress(f"💥 Error: {error_msg}")
567
609
  duration = time.time() - start_time
568
-
569
- # Finalize session on backend
570
610
  finalize_result = await backend.finalize_session(session=session)
571
611
 
572
612
  return {
@@ -581,5 +621,5 @@ async def execute_v3(
581
621
 
582
622
  finally:
583
623
  # Cleanup TCP forwarding
584
- if adb_tools:
585
- adb_tools.teardown_tcp_forward()
624
+ if mahoraga_tools:
625
+ mahoraga_tools.teardown_tcp_forward()
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script to verify MCP -> Backend integration.
4
- Tests API key validation and execution logging with real backend.
5
- """
6
-
7
- import asyncio
8
- import os
9
- import sys
10
- from pathlib import Path
11
- from dotenv import load_dotenv
12
-
13
- # Load .env file from project root
14
- project_root = Path(__file__).parent.parent
15
- env_file = project_root / ".env"
16
- load_dotenv(env_file)
17
-
18
- # Add src to path
19
- sys.path.insert(0, str(Path(__file__).parent / "src"))
20
-
21
- from backend_client import get_backend_client
22
-
23
-
24
- async def test_integration():
25
- """Test MCP backend integration."""
26
-
27
- print("=" * 60)
28
- print("🧪 Testing Mahoraga MCP -> Backend Integration")
29
- print("=" * 60)
30
-
31
- # Check environment
32
- mock_mode = os.getenv("MAHORAGA_MOCK_BACKEND", "true")
33
- backend_url = os.getenv("MAHORAGA_BACKEND_URL", "http://localhost:8000")
34
-
35
- print(f"\n📋 Configuration:")
36
- print(f" Mock Mode: {mock_mode}")
37
- print(f" Backend URL: {backend_url}")
38
-
39
- # Get backend client
40
- client = get_backend_client()
41
-
42
- # Test API key from database
43
- test_api_key = "mah_test1234567890abcdef"
44
-
45
- print(f"\n🔑 Testing API Key Validation...")
46
- print(f" API Key: {test_api_key}")
47
-
48
- # Test validation
49
- result = await client.validate_api_key(test_api_key)
50
-
51
- if result.get("valid"):
52
- print(" ✅ API Key Valid!")
53
- user = result.get("user", {})
54
- print(f" 👤 User: {user.get('name')} ({user.get('email')})")
55
- print(f" 💰 Credits: ${user.get('credits', 0):.2f}")
56
- else:
57
- print(f" ❌ API Key Invalid: {result.get('error')}")
58
- return False
59
-
60
- # Test execution logging
61
- print(f"\n📊 Testing Execution Logging...")
62
-
63
- log_result = await client.log_execution(
64
- api_key=test_api_key,
65
- execution_id="test_exec_integration_001",
66
- status="completed",
67
- tokens={"prompt": 150, "completion": 75, "total": 225},
68
- cost=0.05
69
- )
70
-
71
- if log_result.get("logged"):
72
- print(" ✅ Execution Logged!")
73
- print(f" 💸 Credits Deducted: ${log_result.get('credits_deducted', 0):.2f}")
74
- print(f" 💰 New Balance: ${log_result.get('new_balance', 0):.2f}")
75
- else:
76
- print(f" ❌ Logging Failed: {log_result.get('error')}")
77
- return False
78
-
79
- # Verify credits were deducted
80
- print(f"\n🔍 Verifying Credit Deduction...")
81
-
82
- verify_result = await client.validate_api_key(test_api_key)
83
- if verify_result.get("valid"):
84
- new_credits = verify_result.get("user", {}).get("credits", 0)
85
- print(f" ✅ Credits Updated: ${new_credits:.2f}")
86
-
87
- print(f"\n" + "=" * 60)
88
- print("✅ All Integration Tests Passed!")
89
- print("=" * 60)
90
- print("\n💡 Next Steps:")
91
- print(" 1. MCP is now connected to real backend")
92
- print(" 2. Test with actual MCP execute command")
93
- print(" 3. Build web portal for user management")
94
-
95
- return True
96
-
97
-
98
- if __name__ == "__main__":
99
- success = asyncio.run(test_integration())
100
- sys.exit(0 if success else 1)
@@ -1,81 +0,0 @@
1
- """
2
- Test script to verify tool functions are loaded correctly.
3
- """
4
-
5
- import sys
6
- sys.path.insert(0, '/Users/abhinavsai/POC/mahoraga-mac/quash-mcp')
7
- sys.path.insert(0, '/Users/abhinavsai/POC/mahoraga-mac/mahoraga')
8
-
9
- def test_tool_loading():
10
- print("Testing tool loading...")
11
-
12
- try:
13
- # Import mahoraga components
14
- from mahoraga.tools import Tools, describe_tools
15
- from mahoraga.tools.adb import AdbTools as MahoragaAdbTools
16
- from mahoraga.agent.context.personas import DEFAULT
17
- from mahoraga.agent.utils.async_utils import async_to_sync
18
-
19
- print("✅ All imports successful")
20
-
21
- # Create a mahoraga AdbTools instance
22
- print("\nCreating mahoraga AdbTools instance...")
23
- mahoraga_tools = MahoragaAdbTools(
24
- serial="emulator-5554", # Use your device serial
25
- use_tcp=True,
26
- remote_tcp_port=8080
27
- )
28
- print(f"✅ Created mahoraga AdbTools instance")
29
- print(f" - Serial: {mahoraga_tools.device.serial}")
30
- print(f" - TCP forwarded: {mahoraga_tools.tcp_forwarded}")
31
-
32
- # Get tool list
33
- print("\nGetting tool list...")
34
- tool_list = describe_tools(mahoraga_tools, exclude_tools=None)
35
- print(f"✅ Got {len(tool_list)} tools:")
36
- for tool_name, tool_func in tool_list.items():
37
- print(f" - {tool_name}: {tool_func}")
38
-
39
- # Filter by allowed tools
40
- print(f"\nFiltering by DEFAULT persona allowed tools...")
41
- allowed_tool_names = DEFAULT.allowed_tools
42
- print(f" Allowed tools: {allowed_tool_names}")
43
-
44
- filtered_tools = {name: func for name, func in tool_list.items() if name in allowed_tool_names}
45
- print(f"✅ Filtered to {len(filtered_tools)} tools:")
46
- for tool_name in filtered_tools.keys():
47
- print(f" - {tool_name}")
48
-
49
- # Test executor globals setup
50
- print("\nSetting up executor globals...")
51
- executor_globals = {"__builtins__": __builtins__}
52
-
53
- for tool_name, tool_function in filtered_tools.items():
54
- import asyncio
55
- if asyncio.iscoroutinefunction(tool_function):
56
- tool_function = async_to_sync(tool_function)
57
- executor_globals[tool_name] = tool_function
58
-
59
- print(f"✅ Executor globals set up with {len(executor_globals)} items")
60
-
61
- # Test that functions are callable
62
- print("\nTesting function availability...")
63
- test_functions = ['start_app', 'swipe', 'press_key', 'tap_by_index']
64
- for func_name in test_functions:
65
- if func_name in executor_globals:
66
- print(f" ✅ {func_name} is available")
67
- else:
68
- print(f" ❌ {func_name} is NOT available")
69
-
70
- print("\n✅ All tests passed!")
71
- return True
72
-
73
- except Exception as e:
74
- print(f"\n❌ Test failed: {e}")
75
- import traceback
76
- traceback.print_exc()
77
- return False
78
-
79
- if __name__ == "__main__":
80
- success = test_tool_loading()
81
- sys.exit(0 if success else 1)
File without changes
File without changes
File without changes