fleet-python 0.2.90__tar.gz → 0.2.92__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_python-0.2.90/fleet_python.egg-info → fleet_python-0.2.92}/PKG-INFO +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/__init__.py +5 -5
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/__init__.py +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/base.py +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/client.py +2 -2
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/agent.py +75 -21
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/mcp_server.py +28 -4
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/orchestrator.py +88 -7
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/types.py +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/base.py +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/client.py +2 -2
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/models.py +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/pyproject.toml +1 -1
- {fleet_python-0.2.90 → fleet_python-0.2.92}/LICENSE +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/README.md +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/diff_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_account.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_sync.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_task.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/export_tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/fetch_tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/import_tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/iterate_verifiers.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/openai_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/quickstart.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/models.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/Dockerfile +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/playwright_utils.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/requirements.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/gemini_cua/start.sh +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/agent/utils.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/cli.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/config.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/env/client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/eval/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/eval/uploader.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/global_client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/proxy/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/proxy/proxy.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/proxy/whitelist.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/tasks.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/types.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/utils/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/utils/http_logging.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/utils/logging.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/utils/playwright.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/db.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet_python.egg-info/SOURCES.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet_python.egg-info/entry_points.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/scripts/unasync.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/setup.cfg +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/__init__.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_app_method.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_expect_only.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_instance_dispatch.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_sqlite_resource_dual_mode.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_sqlite_shared_memory_behavior.py +0 -0
- {fleet_python-0.2.90 → fleet_python-0.2.92}/tests/test_verifier_from_string.py +0 -0
|
@@ -73,7 +73,7 @@ from . import env
|
|
|
73
73
|
from . import global_client as _global_client
|
|
74
74
|
from ._async import global_client as _async_global_client
|
|
75
75
|
|
|
76
|
-
__version__ = "0.2.
|
|
76
|
+
__version__ = "0.2.92"
|
|
77
77
|
|
|
78
78
|
__all__ = [
|
|
79
79
|
# Core classes
|
|
@@ -252,11 +252,11 @@ def session_async(
|
|
|
252
252
|
)
|
|
253
253
|
|
|
254
254
|
|
|
255
|
-
def job(name: str) -> str:
|
|
255
|
+
def job(name: Optional[str] = None) -> str:
|
|
256
256
|
"""Create a new trace job (sync).
|
|
257
257
|
|
|
258
258
|
Args:
|
|
259
|
-
name: Name of the job
|
|
259
|
+
name: Name of the job (generated server-side if not provided)
|
|
260
260
|
|
|
261
261
|
Returns:
|
|
262
262
|
The job_id string
|
|
@@ -269,11 +269,11 @@ def job(name: str) -> str:
|
|
|
269
269
|
return client.trace_job(name=name)
|
|
270
270
|
|
|
271
271
|
|
|
272
|
-
async def job_async(name: str) -> str:
|
|
272
|
+
async def job_async(name: Optional[str] = None) -> str:
|
|
273
273
|
"""Create a new trace job (async).
|
|
274
274
|
|
|
275
275
|
Args:
|
|
276
|
-
name: Name of the job
|
|
276
|
+
name: Name of the job (generated server-side if not provided)
|
|
277
277
|
|
|
278
278
|
Returns:
|
|
279
279
|
The job_id string
|
|
@@ -1419,11 +1419,11 @@ class AsyncFleet:
|
|
|
1419
1419
|
instance_id=instance_id,
|
|
1420
1420
|
)
|
|
1421
1421
|
|
|
1422
|
-
async def trace_job(self, name: str) -> str:
|
|
1422
|
+
async def trace_job(self, name: Optional[str] = None) -> str:
|
|
1423
1423
|
"""Create a new trace job.
|
|
1424
1424
|
|
|
1425
1425
|
Args:
|
|
1426
|
-
name: Name of the job
|
|
1426
|
+
name: Name of the job (generated server-side if not provided)
|
|
1427
1427
|
|
|
1428
1428
|
Returns:
|
|
1429
1429
|
The job_id string
|
|
@@ -8,7 +8,7 @@ Env vars:
|
|
|
8
8
|
FLEET_TASK_PROMPT: Task prompt
|
|
9
9
|
FLEET_TASK_KEY: Task key
|
|
10
10
|
FLEET_MODEL: Model (default: gemini-2.5-pro)
|
|
11
|
-
FLEET_MAX_STEPS: Max steps (default:
|
|
11
|
+
FLEET_MAX_STEPS: Max steps (default: 200)
|
|
12
12
|
FLEET_VERBOSE: Enable verbose logging (default: false)
|
|
13
13
|
USE_OAUTH: Use gcloud OAuth instead of API key (default: false)
|
|
14
14
|
GOOG_PROJECT: Google Cloud project for OAuth (default: gemini-agents-area)
|
|
@@ -95,22 +95,33 @@ class MCP:
|
|
|
95
95
|
|
|
96
96
|
async def __aenter__(self):
|
|
97
97
|
# Connect using streamable-http transport
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
98
|
+
print(f"MCP: Connecting to {self.url}...")
|
|
99
|
+
try:
|
|
100
|
+
self._client = streamable_http_client(self.url)
|
|
101
|
+
read, write, _ = await self._client.__aenter__()
|
|
102
|
+
self._session = ClientSession(read, write)
|
|
103
|
+
await self._session.__aenter__()
|
|
104
|
+
await self._session.initialize()
|
|
105
|
+
print(f"MCP: Connected successfully")
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"MCP: Connection failed: {type(e).__name__}: {e}")
|
|
108
|
+
raise
|
|
103
109
|
|
|
104
110
|
# Fetch available tools from server
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
111
|
+
try:
|
|
112
|
+
result = await self._session.list_tools()
|
|
113
|
+
self._tools = [
|
|
114
|
+
{
|
|
115
|
+
"name": tool.name,
|
|
116
|
+
"description": tool.description or "",
|
|
117
|
+
"inputSchema": tool.inputSchema,
|
|
118
|
+
}
|
|
119
|
+
for tool in result.tools
|
|
120
|
+
]
|
|
121
|
+
print(f"MCP: Loaded {len(self._tools)} tools")
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"MCP: Failed to list tools: {type(e).__name__}: {e}")
|
|
124
|
+
raise
|
|
114
125
|
return self
|
|
115
126
|
|
|
116
127
|
async def __aexit__(self, *args):
|
|
@@ -212,6 +223,8 @@ class GeminiAgent:
|
|
|
212
223
|
self.client = get_gemini_client()
|
|
213
224
|
self.transcript: List[Dict] = []
|
|
214
225
|
self.session = session # Fleet session for live logging
|
|
226
|
+
self._consecutive_errors = 0
|
|
227
|
+
self._max_consecutive_errors = 5
|
|
215
228
|
|
|
216
229
|
async def _execute_tool(self, name: str, args: Dict) -> Dict:
|
|
217
230
|
return await self.mcp.call(name, args)
|
|
@@ -287,9 +300,27 @@ STRICT RULES:
|
|
|
287
300
|
contents=history,
|
|
288
301
|
config=config,
|
|
289
302
|
)
|
|
303
|
+
self._consecutive_errors = 0 # Reset on success
|
|
290
304
|
except Exception as e:
|
|
291
|
-
|
|
292
|
-
|
|
305
|
+
self._consecutive_errors += 1
|
|
306
|
+
error_type = type(e).__name__
|
|
307
|
+
print(f"API error ({error_type}): {e}")
|
|
308
|
+
print(f" Consecutive errors: {self._consecutive_errors}/{self._max_consecutive_errors}")
|
|
309
|
+
|
|
310
|
+
if self._consecutive_errors >= self._max_consecutive_errors:
|
|
311
|
+
return self._result(False, f"Too many consecutive API errors: {error_type}: {e}", step, start_time)
|
|
312
|
+
|
|
313
|
+
# Check for retryable errors
|
|
314
|
+
if "429" in str(e) or "quota" in str(e).lower() or "rate" in str(e).lower():
|
|
315
|
+
print(f" Rate limited, waiting 10s...")
|
|
316
|
+
await asyncio.sleep(10)
|
|
317
|
+
continue
|
|
318
|
+
elif "503" in str(e) or "500" in str(e) or "overloaded" in str(e).lower():
|
|
319
|
+
print(f" Server error, waiting 5s...")
|
|
320
|
+
await asyncio.sleep(5)
|
|
321
|
+
continue
|
|
322
|
+
else:
|
|
323
|
+
return self._result(False, f"{error_type}: {e}", step, start_time)
|
|
293
324
|
|
|
294
325
|
if not response.candidates:
|
|
295
326
|
print("[WARN] No candidates, retrying...")
|
|
@@ -309,6 +340,7 @@ STRICT RULES:
|
|
|
309
340
|
if step == 1 and self.session.session_id:
|
|
310
341
|
print(f"Session: https://fleetai.com/dashboard/sessions/{self.session.session_id}")
|
|
311
342
|
except Exception as e:
|
|
343
|
+
print(f" [WARN] Session log failed: {type(e).__name__}: {e}")
|
|
312
344
|
log_verbose(f" [WARN] Session log failed: {e}")
|
|
313
345
|
|
|
314
346
|
# Log all parts for debugging
|
|
@@ -370,9 +402,28 @@ STRICT RULES:
|
|
|
370
402
|
try:
|
|
371
403
|
result = await self._execute_tool(name, args)
|
|
372
404
|
log_verbose(f" Result: isError={result.get('isError', False)}, content_types={[c.get('type') for c in result.get('content', [])]}")
|
|
405
|
+
|
|
406
|
+
if result.get("isError"):
|
|
407
|
+
self._consecutive_errors += 1
|
|
408
|
+
error_text = ""
|
|
409
|
+
for c in result.get("content", []):
|
|
410
|
+
if c.get("type") == "text":
|
|
411
|
+
error_text = c.get("text", "")[:200]
|
|
412
|
+
print(f" Tool error: {error_text}")
|
|
413
|
+
else:
|
|
414
|
+
self._consecutive_errors = 0
|
|
373
415
|
except Exception as e:
|
|
374
|
-
|
|
375
|
-
|
|
416
|
+
self._consecutive_errors += 1
|
|
417
|
+
error_type = type(e).__name__
|
|
418
|
+
print(f" Tool exception ({error_type}): {e}")
|
|
419
|
+
print(f" Consecutive errors: {self._consecutive_errors}/{self._max_consecutive_errors}")
|
|
420
|
+
log_verbose(f" Exception: {error_type}: {e}")
|
|
421
|
+
|
|
422
|
+
# Check if this is a connection/MCP error that we should fail fast on
|
|
423
|
+
if "connection" in str(e).lower() or "closed" in str(e).lower():
|
|
424
|
+
print(f" MCP connection lost, failing task")
|
|
425
|
+
return self._result(False, f"MCP connection error: {e}", step, start_time)
|
|
426
|
+
|
|
376
427
|
result = {"content": [{"type": "text", "text": str(e)}], "isError": True}
|
|
377
428
|
|
|
378
429
|
# Build function response with image embedded (per reference format)
|
|
@@ -414,7 +465,10 @@ STRICT RULES:
|
|
|
414
465
|
history.append(types.Content(role="model", parts=response_parts))
|
|
415
466
|
log_verbose(f" Added {len(response_parts)} function response(s) to history")
|
|
416
467
|
|
|
417
|
-
|
|
468
|
+
# Max steps reached - still mark as completed so verification runs
|
|
469
|
+
# The agent may have done the task but just didn't say "DONE"
|
|
470
|
+
print(f"\n⚠ Max steps ({max_steps}) reached - will still run verification")
|
|
471
|
+
return self._result(True, "Max steps reached", max_steps, start_time, "Max steps reached - task may be complete")
|
|
418
472
|
|
|
419
473
|
def _result(self, completed: bool, error: Optional[str], steps: int, start_time: float, answer: str = None) -> Dict:
|
|
420
474
|
"""Build result dict."""
|
|
@@ -437,7 +491,7 @@ async def main():
|
|
|
437
491
|
"job_id": os.environ.get("FLEET_JOB_ID"),
|
|
438
492
|
"instance_id": os.environ.get("FLEET_INSTANCE_ID"),
|
|
439
493
|
"model": os.environ.get("FLEET_MODEL", "gemini-2.5-pro"),
|
|
440
|
-
"max_steps": int(os.environ.get("FLEET_MAX_STEPS", "
|
|
494
|
+
"max_steps": int(os.environ.get("FLEET_MAX_STEPS", "200")),
|
|
441
495
|
}
|
|
442
496
|
|
|
443
497
|
print(f"Gemini CUA Agent")
|
|
@@ -57,10 +57,20 @@ async def lifespan(app):
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
try:
|
|
60
|
+
logger.info("Starting Playwright browser...")
|
|
60
61
|
await computer.start()
|
|
62
|
+
logger.info(f"Browser started, navigated to: {computer.current_url}")
|
|
61
63
|
yield
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Browser startup FAILED: {type(e).__name__}: {e}")
|
|
66
|
+
raise
|
|
62
67
|
finally:
|
|
63
|
-
|
|
68
|
+
logger.info("Stopping Playwright browser...")
|
|
69
|
+
try:
|
|
70
|
+
await computer.stop()
|
|
71
|
+
logger.info("Browser stopped")
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f"Browser stop error: {type(e).__name__}: {e}")
|
|
64
74
|
|
|
65
75
|
|
|
66
76
|
mcp = FastMCP("cua-server", lifespan=lifespan, host="0.0.0.0", port=PORT)
|
|
@@ -74,7 +84,13 @@ mcp = FastMCP("cua-server", lifespan=lifespan, host="0.0.0.0", port=PORT)
|
|
|
74
84
|
async def computer_screenshot() -> list:
|
|
75
85
|
"""Takes a screenshot of the computer screen. Use this to see what's on screen."""
|
|
76
86
|
logger.info("computer_screenshot()")
|
|
77
|
-
|
|
87
|
+
try:
|
|
88
|
+
result = await computer.screenshot()
|
|
89
|
+
logger.info(f"computer_screenshot() -> {len(result)} bytes")
|
|
90
|
+
return _screenshot_response(result)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error(f"computer_screenshot() FAILED: {type(e).__name__}: {e}")
|
|
93
|
+
raise
|
|
78
94
|
|
|
79
95
|
|
|
80
96
|
@mcp.tool()
|
|
@@ -88,7 +104,11 @@ async def mouse_click(x: int, y: int, button: str, repeats: int = 1) -> None:
|
|
|
88
104
|
repeats: The number of times to click. Default is 1.
|
|
89
105
|
"""
|
|
90
106
|
logger.info(f"mouse_click({x}, {y}, {button}, {repeats})")
|
|
91
|
-
|
|
107
|
+
try:
|
|
108
|
+
await computer.mouse_click(_dx(x), _dy(y), button, repeats)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logger.error(f"mouse_click FAILED: {type(e).__name__}: {e}")
|
|
111
|
+
raise
|
|
92
112
|
|
|
93
113
|
|
|
94
114
|
@mcp.tool()
|
|
@@ -172,7 +192,11 @@ async def type_text(input_text: str, press_enter: bool) -> None:
|
|
|
172
192
|
press_enter: Whether to press enter after typing.
|
|
173
193
|
"""
|
|
174
194
|
logger.info(f"type_text({input_text[:50]}{'...' if len(input_text) > 50 else ''}, enter={press_enter})")
|
|
175
|
-
|
|
195
|
+
try:
|
|
196
|
+
await computer.type_text(input_text, press_enter)
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error(f"type_text FAILED: {type(e).__name__}: {e}")
|
|
199
|
+
raise
|
|
176
200
|
|
|
177
201
|
|
|
178
202
|
@mcp.tool()
|
|
@@ -168,6 +168,50 @@ class AgentOrchestrator:
|
|
|
168
168
|
self._available_ports: List[Tuple[int, int]] = []
|
|
169
169
|
# Register global cleanup handlers
|
|
170
170
|
_register_cleanup()
|
|
171
|
+
# Stats tracking
|
|
172
|
+
self._stats = {"started": 0, "completed": 0, "failed": 0, "errors": {}}
|
|
173
|
+
|
|
174
|
+
def _track_error(self, category: str, message: str):
|
|
175
|
+
"""Track an error for summary statistics."""
|
|
176
|
+
if category not in self._stats["errors"]:
|
|
177
|
+
self._stats["errors"][category] = []
|
|
178
|
+
# Keep up to 5 examples per category
|
|
179
|
+
if len(self._stats["errors"][category]) < 5:
|
|
180
|
+
self._stats["errors"][category].append(message[:200])
|
|
181
|
+
|
|
182
|
+
def _print_stats(self):
|
|
183
|
+
"""Print summary statistics."""
|
|
184
|
+
from rich.console import Console
|
|
185
|
+
from rich.table import Table
|
|
186
|
+
|
|
187
|
+
console = Console()
|
|
188
|
+
|
|
189
|
+
total = self._stats["started"]
|
|
190
|
+
completed = self._stats["completed"]
|
|
191
|
+
failed = self._stats["failed"]
|
|
192
|
+
|
|
193
|
+
console.print()
|
|
194
|
+
console.print("[bold]Run Summary:[/bold]")
|
|
195
|
+
console.print(f" Started: {total}")
|
|
196
|
+
console.print(f" Completed: [green]{completed}[/green] ({100*completed/total:.1f}%)" if total > 0 else " Completed: 0")
|
|
197
|
+
console.print(f" Failed: [red]{failed}[/red] ({100*failed/total:.1f}%)" if total > 0 else " Failed: 0")
|
|
198
|
+
|
|
199
|
+
if self._stats["errors"]:
|
|
200
|
+
console.print()
|
|
201
|
+
console.print("[bold]Error Breakdown:[/bold]")
|
|
202
|
+
table = Table(show_header=True, header_style="bold")
|
|
203
|
+
table.add_column("Category")
|
|
204
|
+
table.add_column("Count")
|
|
205
|
+
table.add_column("Example")
|
|
206
|
+
|
|
207
|
+
for category, examples in sorted(self._stats["errors"].items(), key=lambda x: -len(x[1])):
|
|
208
|
+
table.add_row(
|
|
209
|
+
category,
|
|
210
|
+
str(len(examples)),
|
|
211
|
+
examples[0][:80] + "..." if len(examples[0]) > 80 else examples[0]
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
console.print(table)
|
|
171
215
|
|
|
172
216
|
async def _get_next_ports(self) -> Tuple[int, int]:
|
|
173
217
|
"""Get next available MCP port and VNC port."""
|
|
@@ -282,6 +326,9 @@ class AgentOrchestrator:
|
|
|
282
326
|
session_logs = list(self._log_dir.glob("*.jsonl"))
|
|
283
327
|
console.print(f"Logs: {self._log_dir}/ ({len(session_logs)} sessions)")
|
|
284
328
|
|
|
329
|
+
# Print summary statistics
|
|
330
|
+
self._print_stats()
|
|
331
|
+
|
|
285
332
|
return final
|
|
286
333
|
|
|
287
334
|
async def _build_docker_image(self, agent_path: Path):
|
|
@@ -334,15 +381,18 @@ class AgentOrchestrator:
|
|
|
334
381
|
task_prompt = task.prompt
|
|
335
382
|
short_key = task_key[:20]
|
|
336
383
|
|
|
337
|
-
|
|
384
|
+
self._stats["started"] += 1
|
|
385
|
+
logger.debug(f"[{short_key}] Starting (total started: {self._stats['started']})")
|
|
338
386
|
|
|
339
387
|
env = None
|
|
340
388
|
container_id = None
|
|
341
389
|
port = None
|
|
342
390
|
vnc_port = None
|
|
391
|
+
current_phase = "init"
|
|
343
392
|
|
|
344
393
|
try:
|
|
345
394
|
# 1. Create Fleet environment
|
|
395
|
+
current_phase = "create_env"
|
|
346
396
|
logger.debug(f"[{short_key}] Creating env...")
|
|
347
397
|
env = await make_async(
|
|
348
398
|
env_key=task.env_key,
|
|
@@ -356,6 +406,7 @@ class AgentOrchestrator:
|
|
|
356
406
|
await asyncio.sleep(3) # Wait for env to be ready
|
|
357
407
|
|
|
358
408
|
# 2. Start Docker container with CUA server
|
|
409
|
+
current_phase = "start_container"
|
|
359
410
|
port, vnc_port = await self._get_next_ports()
|
|
360
411
|
logger.debug(f"[{short_key}] Starting container on port {port}...")
|
|
361
412
|
container_id = await self._start_container(
|
|
@@ -373,11 +424,13 @@ class AgentOrchestrator:
|
|
|
373
424
|
print(f"[{short_key}] Browser: http://localhost:{vnc_port}/vnc.html")
|
|
374
425
|
|
|
375
426
|
# Wait for server to be ready
|
|
427
|
+
current_phase = "wait_for_server"
|
|
376
428
|
logger.debug(f"[{short_key}] Waiting for CUA server...")
|
|
377
429
|
await self._wait_for_server(port)
|
|
378
430
|
logger.debug(f"[{short_key}] CUA server ready")
|
|
379
431
|
|
|
380
432
|
# 3. Run agent
|
|
433
|
+
current_phase = "run_agent"
|
|
381
434
|
logger.debug(f"[{short_key}] Running agent...")
|
|
382
435
|
agent_result = await self._run_agent(
|
|
383
436
|
port=port,
|
|
@@ -388,14 +441,17 @@ class AgentOrchestrator:
|
|
|
388
441
|
logger.debug(
|
|
389
442
|
f"[{short_key}] Agent done: completed={agent_result.completed}"
|
|
390
443
|
)
|
|
444
|
+
if agent_result.error and agent_result.error != "Max steps reached":
|
|
445
|
+
print(f"[{short_key}] Agent error: {agent_result.error[:200]}")
|
|
391
446
|
|
|
392
447
|
# 4. Run verification
|
|
448
|
+
current_phase = "verification"
|
|
393
449
|
verification_success = None
|
|
394
450
|
verification_score = None
|
|
395
451
|
verifier_execution_id = None
|
|
396
452
|
|
|
397
453
|
if agent_result.completed and task.verifier:
|
|
398
|
-
logger.info(f"[{
|
|
454
|
+
logger.info(f"[{short_key}] Running verification...")
|
|
399
455
|
try:
|
|
400
456
|
v = await task.verify_detailed_async(
|
|
401
457
|
env=env,
|
|
@@ -407,9 +463,21 @@ class AgentOrchestrator:
|
|
|
407
463
|
verification_score = (
|
|
408
464
|
v.result if isinstance(v.result, (int, float)) else None
|
|
409
465
|
)
|
|
410
|
-
logger.info(f"[{
|
|
466
|
+
logger.info(f"[{short_key}] Verification: {verification_success}")
|
|
467
|
+
if verification_success:
|
|
468
|
+
self._stats["completed"] += 1
|
|
469
|
+
else:
|
|
470
|
+
self._stats["failed"] += 1
|
|
471
|
+
print(f"[{short_key}] Verification FAILED: score={verification_score}")
|
|
411
472
|
except Exception as e:
|
|
412
|
-
logger.error(f"[{
|
|
473
|
+
logger.error(f"[{short_key}] Verification error: {e}")
|
|
474
|
+
self._stats["failed"] += 1
|
|
475
|
+
self._track_error("verification_error", str(e))
|
|
476
|
+
elif not agent_result.completed:
|
|
477
|
+
self._stats["failed"] += 1
|
|
478
|
+
error_msg = agent_result.error or "unknown"
|
|
479
|
+
self._track_error("agent_not_completed", error_msg)
|
|
480
|
+
print(f"[{short_key}] Agent did not complete: {error_msg}")
|
|
413
481
|
|
|
414
482
|
# 5. Complete/fail session (session was created by agent, we just complete it)
|
|
415
483
|
session_id = getattr(agent_result, "session_id", None)
|
|
@@ -439,11 +507,24 @@ class AgentOrchestrator:
|
|
|
439
507
|
)
|
|
440
508
|
|
|
441
509
|
except Exception as e:
|
|
442
|
-
|
|
510
|
+
import traceback
|
|
511
|
+
error_type = type(e).__name__
|
|
512
|
+
error_msg = str(e)
|
|
513
|
+
tb = traceback.format_exc()
|
|
514
|
+
|
|
515
|
+
# Categorize the error
|
|
516
|
+
error_category = f"{current_phase}:{error_type}"
|
|
517
|
+
self._track_error(error_category, error_msg)
|
|
518
|
+
self._stats["failed"] += 1
|
|
519
|
+
|
|
520
|
+
# Always print errors for visibility
|
|
521
|
+
print(f"[{short_key}] EXCEPTION in {current_phase}: {error_type}: {error_msg[:200]}")
|
|
522
|
+
logger.error(f"[{short_key}] Traceback:\n{tb}")
|
|
523
|
+
|
|
443
524
|
return TaskResult(
|
|
444
525
|
task_key=task_key,
|
|
445
526
|
task_prompt=task_prompt,
|
|
446
|
-
error=
|
|
527
|
+
error=f"[{current_phase}] {error_type}: {error_msg}",
|
|
447
528
|
execution_time_ms=int((time.time() - start) * 1000),
|
|
448
529
|
)
|
|
449
530
|
|
|
@@ -687,7 +768,7 @@ async def run_agent(
|
|
|
687
768
|
agent: str = "gemini_cua",
|
|
688
769
|
model: str = "gemini-2.5-pro",
|
|
689
770
|
max_concurrent: int = 4,
|
|
690
|
-
max_steps: int =
|
|
771
|
+
max_steps: int = 200,
|
|
691
772
|
timeout_seconds: int = 600,
|
|
692
773
|
api_keys: Optional[Dict[str, str]] = None,
|
|
693
774
|
headful: bool = False,
|
|
@@ -1532,11 +1532,11 @@ class Fleet:
|
|
|
1532
1532
|
instance_id=instance_id,
|
|
1533
1533
|
)
|
|
1534
1534
|
|
|
1535
|
-
def trace_job(self, name: str) -> str:
|
|
1535
|
+
def trace_job(self, name: Optional[str] = None) -> str:
|
|
1536
1536
|
"""Create a new trace job.
|
|
1537
1537
|
|
|
1538
1538
|
Args:
|
|
1539
|
-
name: Name of the job
|
|
1539
|
+
name: Name of the job (generated server-side if not provided)
|
|
1540
1540
|
|
|
1541
1541
|
Returns:
|
|
1542
1542
|
The job_id string
|
|
@@ -648,7 +648,7 @@ class SessionIngestResponse(BaseModel):
|
|
|
648
648
|
class TraceJobRequest(BaseModel):
|
|
649
649
|
"""Request to create a new trace job."""
|
|
650
650
|
|
|
651
|
-
name: str = Field(
|
|
651
|
+
name: Optional[str] = Field(None, title="Name", description="Name of the job (generated server-side if not provided)")
|
|
652
652
|
|
|
653
653
|
|
|
654
654
|
class TraceJobResponse(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|