fleet-python 0.2.91__tar.gz → 0.2.92__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {fleet_python-0.2.91/fleet_python.egg-info → fleet_python-0.2.92}/PKG-INFO +1 -1
  2. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/__init__.py +1 -1
  3. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/base.py +1 -1
  5. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/agent.py +75 -21
  6. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/mcp_server.py +28 -4
  7. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/orchestrator.py +88 -7
  8. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/types.py +1 -1
  9. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/base.py +1 -1
  10. {fleet_python-0.2.91 → fleet_python-0.2.92/fleet_python.egg-info}/PKG-INFO +1 -1
  11. {fleet_python-0.2.91 → fleet_python-0.2.92}/pyproject.toml +1 -1
  12. {fleet_python-0.2.91 → fleet_python-0.2.92}/LICENSE +0 -0
  13. {fleet_python-0.2.91 → fleet_python-0.2.92}/README.md +0 -0
  14. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/diff_example.py +0 -0
  15. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/dsl_example.py +0 -0
  16. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example.py +0 -0
  17. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/exampleResume.py +0 -0
  18. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_account.py +0 -0
  19. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_action_log.py +0 -0
  20. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_client.py +0 -0
  21. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_mcp_anthropic.py +0 -0
  22. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_mcp_openai.py +0 -0
  23. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_sync.py +0 -0
  24. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_task.py +0 -0
  25. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_tasks.py +0 -0
  26. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/example_verifier.py +0 -0
  27. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/export_tasks.py +0 -0
  28. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/fetch_tasks.py +0 -0
  29. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/gemini_example.py +0 -0
  30. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/import_tasks.py +0 -0
  31. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/iterate_verifiers.py +0 -0
  32. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/json_tasks_example.py +0 -0
  33. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/nova_act_example.py +0 -0
  34. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/openai_example.py +0 -0
  35. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/openai_simple_example.py +0 -0
  36. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/query_builder_example.py +0 -0
  37. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/quickstart.py +0 -0
  38. {fleet_python-0.2.91 → fleet_python-0.2.92}/examples/test_cdp_logging.py +0 -0
  39. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/client.py +0 -0
  40. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/env/__init__.py +0 -0
  41. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/env/client.py +0 -0
  42. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/exceptions.py +0 -0
  43. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/global_client.py +0 -0
  44. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/instance/__init__.py +0 -0
  45. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/instance/base.py +0 -0
  46. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/instance/client.py +0 -0
  47. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/models.py +0 -0
  48. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/resources/__init__.py +0 -0
  49. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/resources/base.py +0 -0
  50. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/resources/browser.py +0 -0
  51. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/resources/mcp.py +0 -0
  52. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/resources/sqlite.py +0 -0
  53. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/tasks.py +0 -0
  54. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/verifiers/__init__.py +0 -0
  55. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/verifiers/bundler.py +0 -0
  56. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/_async/verifiers/verifier.py +0 -0
  57. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/__init__.py +0 -0
  58. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/Dockerfile +0 -0
  59. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/__init__.py +0 -0
  60. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/playwright_utils.py +0 -0
  61. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/requirements.txt +0 -0
  62. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/gemini_cua/start.sh +0 -0
  63. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/agent/utils.py +0 -0
  64. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/cli.py +0 -0
  65. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/client.py +0 -0
  66. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/config.py +0 -0
  67. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/env/__init__.py +0 -0
  68. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/env/client.py +0 -0
  69. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/eval/__init__.py +0 -0
  70. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/eval/uploader.py +0 -0
  71. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/exceptions.py +0 -0
  72. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/global_client.py +0 -0
  73. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/instance/__init__.py +0 -0
  74. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/instance/base.py +0 -0
  75. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/instance/client.py +0 -0
  76. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/instance/models.py +0 -0
  77. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/models.py +0 -0
  78. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/proxy/__init__.py +0 -0
  79. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/proxy/proxy.py +0 -0
  80. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/proxy/whitelist.py +0 -0
  81. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/resources/__init__.py +0 -0
  82. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/resources/base.py +0 -0
  83. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/resources/browser.py +0 -0
  84. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/resources/mcp.py +0 -0
  85. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/resources/sqlite.py +0 -0
  86. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/tasks.py +0 -0
  87. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/types.py +0 -0
  88. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/utils/__init__.py +0 -0
  89. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/utils/http_logging.py +0 -0
  90. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/utils/logging.py +0 -0
  91. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/utils/playwright.py +0 -0
  92. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/__init__.py +0 -0
  93. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/bundler.py +0 -0
  94. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/code.py +0 -0
  95. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/db.py +0 -0
  96. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/decorator.py +0 -0
  97. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/parse.py +0 -0
  98. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/sql_differ.py +0 -0
  99. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet/verifiers/verifier.py +0 -0
  100. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet_python.egg-info/SOURCES.txt +0 -0
  101. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet_python.egg-info/dependency_links.txt +0 -0
  102. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet_python.egg-info/entry_points.txt +0 -0
  103. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet_python.egg-info/requires.txt +0 -0
  104. {fleet_python-0.2.91 → fleet_python-0.2.92}/fleet_python.egg-info/top_level.txt +0 -0
  105. {fleet_python-0.2.91 → fleet_python-0.2.92}/scripts/fix_sync_imports.py +0 -0
  106. {fleet_python-0.2.91 → fleet_python-0.2.92}/scripts/unasync.py +0 -0
  107. {fleet_python-0.2.91 → fleet_python-0.2.92}/setup.cfg +0 -0
  108. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/__init__.py +0 -0
  109. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_app_method.py +0 -0
  110. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_expect_only.py +0 -0
  111. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_instance_dispatch.py +0 -0
  112. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_sqlite_resource_dual_mode.py +0 -0
  113. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  114. {fleet_python-0.2.91 → fleet_python-0.2.92}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.91
3
+ Version: 0.2.92
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -73,7 +73,7 @@ from . import env
73
73
  from . import global_client as _global_client
74
74
  from ._async import global_client as _async_global_client
75
75
 
76
- __version__ = "0.2.91"
76
+ __version__ = "0.2.92"
77
77
 
78
78
  __all__ = [
79
79
  # Core classes
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.91"
47
+ __version__ = "0.2.92"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.91"
29
+ __version__ = "0.2.92"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -8,7 +8,7 @@ Env vars:
8
8
  FLEET_TASK_PROMPT: Task prompt
9
9
  FLEET_TASK_KEY: Task key
10
10
  FLEET_MODEL: Model (default: gemini-2.5-pro)
11
- FLEET_MAX_STEPS: Max steps (default: 50)
11
+ FLEET_MAX_STEPS: Max steps (default: 200)
12
12
  FLEET_VERBOSE: Enable verbose logging (default: false)
13
13
  USE_OAUTH: Use gcloud OAuth instead of API key (default: false)
14
14
  GOOG_PROJECT: Google Cloud project for OAuth (default: gemini-agents-area)
@@ -95,22 +95,33 @@ class MCP:
95
95
 
96
96
  async def __aenter__(self):
97
97
  # Connect using streamable-http transport
98
- self._client = streamable_http_client(self.url)
99
- read, write, _ = await self._client.__aenter__()
100
- self._session = ClientSession(read, write)
101
- await self._session.__aenter__()
102
- await self._session.initialize()
98
+ print(f"MCP: Connecting to {self.url}...")
99
+ try:
100
+ self._client = streamable_http_client(self.url)
101
+ read, write, _ = await self._client.__aenter__()
102
+ self._session = ClientSession(read, write)
103
+ await self._session.__aenter__()
104
+ await self._session.initialize()
105
+ print(f"MCP: Connected successfully")
106
+ except Exception as e:
107
+ print(f"MCP: Connection failed: {type(e).__name__}: {e}")
108
+ raise
103
109
 
104
110
  # Fetch available tools from server
105
- result = await self._session.list_tools()
106
- self._tools = [
107
- {
108
- "name": tool.name,
109
- "description": tool.description or "",
110
- "inputSchema": tool.inputSchema,
111
- }
112
- for tool in result.tools
113
- ]
111
+ try:
112
+ result = await self._session.list_tools()
113
+ self._tools = [
114
+ {
115
+ "name": tool.name,
116
+ "description": tool.description or "",
117
+ "inputSchema": tool.inputSchema,
118
+ }
119
+ for tool in result.tools
120
+ ]
121
+ print(f"MCP: Loaded {len(self._tools)} tools")
122
+ except Exception as e:
123
+ print(f"MCP: Failed to list tools: {type(e).__name__}: {e}")
124
+ raise
114
125
  return self
115
126
 
116
127
  async def __aexit__(self, *args):
@@ -212,6 +223,8 @@ class GeminiAgent:
212
223
  self.client = get_gemini_client()
213
224
  self.transcript: List[Dict] = []
214
225
  self.session = session # Fleet session for live logging
226
+ self._consecutive_errors = 0
227
+ self._max_consecutive_errors = 5
215
228
 
216
229
  async def _execute_tool(self, name: str, args: Dict) -> Dict:
217
230
  return await self.mcp.call(name, args)
@@ -287,9 +300,27 @@ STRICT RULES:
287
300
  contents=history,
288
301
  config=config,
289
302
  )
303
+ self._consecutive_errors = 0 # Reset on success
290
304
  except Exception as e:
291
- print(f"API error: {e}")
292
- return self._result(False, str(e), step, start_time)
305
+ self._consecutive_errors += 1
306
+ error_type = type(e).__name__
307
+ print(f"API error ({error_type}): {e}")
308
+ print(f" Consecutive errors: {self._consecutive_errors}/{self._max_consecutive_errors}")
309
+
310
+ if self._consecutive_errors >= self._max_consecutive_errors:
311
+ return self._result(False, f"Too many consecutive API errors: {error_type}: {e}", step, start_time)
312
+
313
+ # Check for retryable errors
314
+ if "429" in str(e) or "quota" in str(e).lower() or "rate" in str(e).lower():
315
+ print(f" Rate limited, waiting 10s...")
316
+ await asyncio.sleep(10)
317
+ continue
318
+ elif "503" in str(e) or "500" in str(e) or "overloaded" in str(e).lower():
319
+ print(f" Server error, waiting 5s...")
320
+ await asyncio.sleep(5)
321
+ continue
322
+ else:
323
+ return self._result(False, f"{error_type}: {e}", step, start_time)
293
324
 
294
325
  if not response.candidates:
295
326
  print("[WARN] No candidates, retrying...")
@@ -309,6 +340,7 @@ STRICT RULES:
309
340
  if step == 1 and self.session.session_id:
310
341
  print(f"Session: https://fleetai.com/dashboard/sessions/{self.session.session_id}")
311
342
  except Exception as e:
343
+ print(f" [WARN] Session log failed: {type(e).__name__}: {e}")
312
344
  log_verbose(f" [WARN] Session log failed: {e}")
313
345
 
314
346
  # Log all parts for debugging
@@ -370,9 +402,28 @@ STRICT RULES:
370
402
  try:
371
403
  result = await self._execute_tool(name, args)
372
404
  log_verbose(f" Result: isError={result.get('isError', False)}, content_types={[c.get('type') for c in result.get('content', [])]}")
405
+
406
+ if result.get("isError"):
407
+ self._consecutive_errors += 1
408
+ error_text = ""
409
+ for c in result.get("content", []):
410
+ if c.get("type") == "text":
411
+ error_text = c.get("text", "")[:200]
412
+ print(f" Tool error: {error_text}")
413
+ else:
414
+ self._consecutive_errors = 0
373
415
  except Exception as e:
374
- print(f" Error: {e}")
375
- log_verbose(f" Exception: {type(e).__name__}: {e}")
416
+ self._consecutive_errors += 1
417
+ error_type = type(e).__name__
418
+ print(f" Tool exception ({error_type}): {e}")
419
+ print(f" Consecutive errors: {self._consecutive_errors}/{self._max_consecutive_errors}")
420
+ log_verbose(f" Exception: {error_type}: {e}")
421
+
422
+ # Check if this is a connection/MCP error that we should fail fast on
423
+ if "connection" in str(e).lower() or "closed" in str(e).lower():
424
+ print(f" MCP connection lost, failing task")
425
+ return self._result(False, f"MCP connection error: {e}", step, start_time)
426
+
376
427
  result = {"content": [{"type": "text", "text": str(e)}], "isError": True}
377
428
 
378
429
  # Build function response with image embedded (per reference format)
@@ -414,7 +465,10 @@ STRICT RULES:
414
465
  history.append(types.Content(role="model", parts=response_parts))
415
466
  log_verbose(f" Added {len(response_parts)} function response(s) to history")
416
467
 
417
- return self._result(False, "Max steps reached", max_steps, start_time)
468
+ # Max steps reached - still mark as completed so verification runs
469
+ # The agent may have done the task but just didn't say "DONE"
470
+ print(f"\n⚠ Max steps ({max_steps}) reached - will still run verification")
471
+ return self._result(True, "Max steps reached", max_steps, start_time, "Max steps reached - task may be complete")
418
472
 
419
473
  def _result(self, completed: bool, error: Optional[str], steps: int, start_time: float, answer: str = None) -> Dict:
420
474
  """Build result dict."""
@@ -437,7 +491,7 @@ async def main():
437
491
  "job_id": os.environ.get("FLEET_JOB_ID"),
438
492
  "instance_id": os.environ.get("FLEET_INSTANCE_ID"),
439
493
  "model": os.environ.get("FLEET_MODEL", "gemini-2.5-pro"),
440
- "max_steps": int(os.environ.get("FLEET_MAX_STEPS", "100")),
494
+ "max_steps": int(os.environ.get("FLEET_MAX_STEPS", "200")),
441
495
  }
442
496
 
443
497
  print(f"Gemini CUA Agent")
@@ -57,10 +57,20 @@ async def lifespan(app):
57
57
  )
58
58
 
59
59
  try:
60
+ logger.info("Starting Playwright browser...")
60
61
  await computer.start()
62
+ logger.info(f"Browser started, navigated to: {computer.current_url}")
61
63
  yield
64
+ except Exception as e:
65
+ logger.error(f"Browser startup FAILED: {type(e).__name__}: {e}")
66
+ raise
62
67
  finally:
63
- await computer.stop()
68
+ logger.info("Stopping Playwright browser...")
69
+ try:
70
+ await computer.stop()
71
+ logger.info("Browser stopped")
72
+ except Exception as e:
73
+ logger.error(f"Browser stop error: {type(e).__name__}: {e}")
64
74
 
65
75
 
66
76
  mcp = FastMCP("cua-server", lifespan=lifespan, host="0.0.0.0", port=PORT)
@@ -74,7 +84,13 @@ mcp = FastMCP("cua-server", lifespan=lifespan, host="0.0.0.0", port=PORT)
74
84
  async def computer_screenshot() -> list:
75
85
  """Takes a screenshot of the computer screen. Use this to see what's on screen."""
76
86
  logger.info("computer_screenshot()")
77
- return _screenshot_response(await computer.screenshot())
87
+ try:
88
+ result = await computer.screenshot()
89
+ logger.info(f"computer_screenshot() -> {len(result)} bytes")
90
+ return _screenshot_response(result)
91
+ except Exception as e:
92
+ logger.error(f"computer_screenshot() FAILED: {type(e).__name__}: {e}")
93
+ raise
78
94
 
79
95
 
80
96
  @mcp.tool()
@@ -88,7 +104,11 @@ async def mouse_click(x: int, y: int, button: str, repeats: int = 1) -> None:
88
104
  repeats: The number of times to click. Default is 1.
89
105
  """
90
106
  logger.info(f"mouse_click({x}, {y}, {button}, {repeats})")
91
- await computer.mouse_click(_dx(x), _dy(y), button, repeats)
107
+ try:
108
+ await computer.mouse_click(_dx(x), _dy(y), button, repeats)
109
+ except Exception as e:
110
+ logger.error(f"mouse_click FAILED: {type(e).__name__}: {e}")
111
+ raise
92
112
 
93
113
 
94
114
  @mcp.tool()
@@ -172,7 +192,11 @@ async def type_text(input_text: str, press_enter: bool) -> None:
172
192
  press_enter: Whether to press enter after typing.
173
193
  """
174
194
  logger.info(f"type_text({input_text[:50]}{'...' if len(input_text) > 50 else ''}, enter={press_enter})")
175
- await computer.type_text(input_text, press_enter)
195
+ try:
196
+ await computer.type_text(input_text, press_enter)
197
+ except Exception as e:
198
+ logger.error(f"type_text FAILED: {type(e).__name__}: {e}")
199
+ raise
176
200
 
177
201
 
178
202
  @mcp.tool()
@@ -168,6 +168,50 @@ class AgentOrchestrator:
168
168
  self._available_ports: List[Tuple[int, int]] = []
169
169
  # Register global cleanup handlers
170
170
  _register_cleanup()
171
+ # Stats tracking
172
+ self._stats = {"started": 0, "completed": 0, "failed": 0, "errors": {}}
173
+
174
+ def _track_error(self, category: str, message: str):
175
+ """Track an error for summary statistics."""
176
+ if category not in self._stats["errors"]:
177
+ self._stats["errors"][category] = []
178
+ # Keep up to 5 examples per category
179
+ if len(self._stats["errors"][category]) < 5:
180
+ self._stats["errors"][category].append(message[:200])
181
+
182
+ def _print_stats(self):
183
+ """Print summary statistics."""
184
+ from rich.console import Console
185
+ from rich.table import Table
186
+
187
+ console = Console()
188
+
189
+ total = self._stats["started"]
190
+ completed = self._stats["completed"]
191
+ failed = self._stats["failed"]
192
+
193
+ console.print()
194
+ console.print("[bold]Run Summary:[/bold]")
195
+ console.print(f" Started: {total}")
196
+ console.print(f" Completed: [green]{completed}[/green] ({100*completed/total:.1f}%)" if total > 0 else " Completed: 0")
197
+ console.print(f" Failed: [red]{failed}[/red] ({100*failed/total:.1f}%)" if total > 0 else " Failed: 0")
198
+
199
+ if self._stats["errors"]:
200
+ console.print()
201
+ console.print("[bold]Error Breakdown:[/bold]")
202
+ table = Table(show_header=True, header_style="bold")
203
+ table.add_column("Category")
204
+ table.add_column("Count")
205
+ table.add_column("Example")
206
+
207
+ for category, examples in sorted(self._stats["errors"].items(), key=lambda x: -len(x[1])):
208
+ table.add_row(
209
+ category,
210
+ str(len(examples)),
211
+ examples[0][:80] + "..." if len(examples[0]) > 80 else examples[0]
212
+ )
213
+
214
+ console.print(table)
171
215
 
172
216
  async def _get_next_ports(self) -> Tuple[int, int]:
173
217
  """Get next available MCP port and VNC port."""
@@ -282,6 +326,9 @@ class AgentOrchestrator:
282
326
  session_logs = list(self._log_dir.glob("*.jsonl"))
283
327
  console.print(f"Logs: {self._log_dir}/ ({len(session_logs)} sessions)")
284
328
 
329
+ # Print summary statistics
330
+ self._print_stats()
331
+
285
332
  return final
286
333
 
287
334
  async def _build_docker_image(self, agent_path: Path):
@@ -334,15 +381,18 @@ class AgentOrchestrator:
334
381
  task_prompt = task.prompt
335
382
  short_key = task_key[:20]
336
383
 
337
- logger.debug(f"[{short_key}] Starting")
384
+ self._stats["started"] += 1
385
+ logger.debug(f"[{short_key}] Starting (total started: {self._stats['started']})")
338
386
 
339
387
  env = None
340
388
  container_id = None
341
389
  port = None
342
390
  vnc_port = None
391
+ current_phase = "init"
343
392
 
344
393
  try:
345
394
  # 1. Create Fleet environment
395
+ current_phase = "create_env"
346
396
  logger.debug(f"[{short_key}] Creating env...")
347
397
  env = await make_async(
348
398
  env_key=task.env_key,
@@ -356,6 +406,7 @@ class AgentOrchestrator:
356
406
  await asyncio.sleep(3) # Wait for env to be ready
357
407
 
358
408
  # 2. Start Docker container with CUA server
409
+ current_phase = "start_container"
359
410
  port, vnc_port = await self._get_next_ports()
360
411
  logger.debug(f"[{short_key}] Starting container on port {port}...")
361
412
  container_id = await self._start_container(
@@ -373,11 +424,13 @@ class AgentOrchestrator:
373
424
  print(f"[{short_key}] Browser: http://localhost:{vnc_port}/vnc.html")
374
425
 
375
426
  # Wait for server to be ready
427
+ current_phase = "wait_for_server"
376
428
  logger.debug(f"[{short_key}] Waiting for CUA server...")
377
429
  await self._wait_for_server(port)
378
430
  logger.debug(f"[{short_key}] CUA server ready")
379
431
 
380
432
  # 3. Run agent
433
+ current_phase = "run_agent"
381
434
  logger.debug(f"[{short_key}] Running agent...")
382
435
  agent_result = await self._run_agent(
383
436
  port=port,
@@ -388,14 +441,17 @@ class AgentOrchestrator:
388
441
  logger.debug(
389
442
  f"[{short_key}] Agent done: completed={agent_result.completed}"
390
443
  )
444
+ if agent_result.error and agent_result.error != "Max steps reached":
445
+ print(f"[{short_key}] Agent error: {agent_result.error[:200]}")
391
446
 
392
447
  # 4. Run verification
448
+ current_phase = "verification"
393
449
  verification_success = None
394
450
  verification_score = None
395
451
  verifier_execution_id = None
396
452
 
397
453
  if agent_result.completed and task.verifier:
398
- logger.info(f"[{task_key}] Running verification...")
454
+ logger.info(f"[{short_key}] Running verification...")
399
455
  try:
400
456
  v = await task.verify_detailed_async(
401
457
  env=env,
@@ -407,9 +463,21 @@ class AgentOrchestrator:
407
463
  verification_score = (
408
464
  v.result if isinstance(v.result, (int, float)) else None
409
465
  )
410
- logger.info(f"[{task_key}] Verification: {verification_success}")
466
+ logger.info(f"[{short_key}] Verification: {verification_success}")
467
+ if verification_success:
468
+ self._stats["completed"] += 1
469
+ else:
470
+ self._stats["failed"] += 1
471
+ print(f"[{short_key}] Verification FAILED: score={verification_score}")
411
472
  except Exception as e:
412
- logger.error(f"[{task_key}] Verification error: {e}")
473
+ logger.error(f"[{short_key}] Verification error: {e}")
474
+ self._stats["failed"] += 1
475
+ self._track_error("verification_error", str(e))
476
+ elif not agent_result.completed:
477
+ self._stats["failed"] += 1
478
+ error_msg = agent_result.error or "unknown"
479
+ self._track_error("agent_not_completed", error_msg)
480
+ print(f"[{short_key}] Agent did not complete: {error_msg}")
413
481
 
414
482
  # 5. Complete/fail session (session was created by agent, we just complete it)
415
483
  session_id = getattr(agent_result, "session_id", None)
@@ -439,11 +507,24 @@ class AgentOrchestrator:
439
507
  )
440
508
 
441
509
  except Exception as e:
442
- logger.exception(f"[{short_key}] Failed: {e}")
510
+ import traceback
511
+ error_type = type(e).__name__
512
+ error_msg = str(e)
513
+ tb = traceback.format_exc()
514
+
515
+ # Categorize the error
516
+ error_category = f"{current_phase}:{error_type}"
517
+ self._track_error(error_category, error_msg)
518
+ self._stats["failed"] += 1
519
+
520
+ # Always print errors for visibility
521
+ print(f"[{short_key}] EXCEPTION in {current_phase}: {error_type}: {error_msg[:200]}")
522
+ logger.error(f"[{short_key}] Traceback:\n{tb}")
523
+
443
524
  return TaskResult(
444
525
  task_key=task_key,
445
526
  task_prompt=task_prompt,
446
- error=str(e),
527
+ error=f"[{current_phase}] {error_type}: {error_msg}",
447
528
  execution_time_ms=int((time.time() - start) * 1000),
448
529
  )
449
530
 
@@ -687,7 +768,7 @@ async def run_agent(
687
768
  agent: str = "gemini_cua",
688
769
  model: str = "gemini-2.5-pro",
689
770
  max_concurrent: int = 4,
690
- max_steps: int = 100,
771
+ max_steps: int = 200,
691
772
  timeout_seconds: int = 600,
692
773
  api_keys: Optional[Dict[str, str]] = None,
693
774
  headful: bool = False,
@@ -12,7 +12,7 @@ class AgentConfig(BaseModel):
12
12
  agent: str = "gemini_cua"
13
13
  model: str = "gemini-2.5-pro"
14
14
  max_concurrent: int = 4
15
- max_steps: int = 100
15
+ max_steps: int = 200
16
16
  timeout_seconds: int = 600
17
17
  screen_width: int = 1366
18
18
  screen_height: int = 768
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.91"
30
+ __version__ = "0.2.92"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.91
3
+ Version: 0.2.92
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "fleet-python"
7
7
 
8
- version = "0.2.91"
8
+ version = "0.2.92"
9
9
  description = "Python SDK for Fleet environments"
10
10
  authors = [
11
11
  {name = "Fleet AI", email = "nic@fleet.so"},
File without changes
File without changes
File without changes