fleet-python 0.2.82__tar.gz → 0.2.84__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_python-0.2.82/fleet_python.egg-info → fleet_python-0.2.84}/PKG-INFO +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/__init__.py +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/__init__.py +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/base.py +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/client.py +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/gemini_cua/agent.py +61 -26
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/gemini_cua/mcp_server.py +4 -2
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/orchestrator.py +26 -44
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/types.py +1 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/base.py +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/pyproject.toml +1 -1
- {fleet_python-0.2.82 → fleet_python-0.2.84}/LICENSE +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/README.md +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/diff_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_account.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_sync.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_task.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/export_tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/fetch_tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/import_tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/iterate_verifiers.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/openai_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/quickstart.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/models.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/gemini_cua/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/agent/utils.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/cli.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/config.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/env/client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/eval/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/eval/uploader.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/global_client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/models.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/proxy/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/proxy/proxy.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/proxy/whitelist.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/tasks.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/types.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/utils/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/utils/http_logging.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/utils/logging.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/utils/playwright.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/db.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet_python.egg-info/SOURCES.txt +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet_python.egg-info/entry_points.txt +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/scripts/unasync.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/setup.cfg +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/__init__.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_app_method.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_expect_only.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_instance_dispatch.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_sqlite_resource_dual_mode.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_sqlite_shared_memory_behavior.py +0 -0
- {fleet_python-0.2.82 → fleet_python-0.2.84}/tests/test_verifier_from_string.py +0 -0
|
@@ -1428,7 +1428,7 @@ class AsyncFleet:
|
|
|
1428
1428
|
Returns:
|
|
1429
1429
|
The job_id string
|
|
1430
1430
|
"""
|
|
1431
|
-
from .models import TraceJobRequest, TraceJobResponse
|
|
1431
|
+
from fleet.models import TraceJobRequest, TraceJobResponse
|
|
1432
1432
|
|
|
1433
1433
|
request = TraceJobRequest(name=name)
|
|
1434
1434
|
response = await self.client.request(
|
|
@@ -25,6 +25,7 @@ from mcp import ClientSession
|
|
|
25
25
|
from mcp.client.streamable_http import streamable_http_client
|
|
26
26
|
from google import genai
|
|
27
27
|
from google.genai import types
|
|
28
|
+
import fleet
|
|
28
29
|
from fleet.utils.logging import log_verbose, VERBOSE
|
|
29
30
|
|
|
30
31
|
# Whitelist hooks for auto-detecting model endpoints (optional)
|
|
@@ -136,20 +137,36 @@ class MCP:
|
|
|
136
137
|
result = await self._session.call_tool(name, args or {})
|
|
137
138
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
138
139
|
|
|
140
|
+
# Debug: log raw MCP result structure
|
|
141
|
+
log_verbose(f" MCP result.content ({len(result.content)} items):")
|
|
142
|
+
for i, item in enumerate(result.content):
|
|
143
|
+
log_verbose(f" [{i}] type={type(item).__name__}, attrs={dir(item)[:10]}...")
|
|
144
|
+
if hasattr(item, "type"):
|
|
145
|
+
log_verbose(f" .type = {repr(item.type)}")
|
|
146
|
+
if hasattr(item, "data"):
|
|
147
|
+
data_preview = str(item.data)[:50] if item.data else "None"
|
|
148
|
+
log_verbose(f" .data = {data_preview}...")
|
|
149
|
+
|
|
150
|
+
# Helper to get attribute or dict key
|
|
151
|
+
def _get(item, key, default=None):
|
|
152
|
+
if isinstance(item, dict):
|
|
153
|
+
return item.get(key, default)
|
|
154
|
+
return getattr(item, key, default)
|
|
155
|
+
|
|
139
156
|
# Convert MCP result to dict format expected by agent
|
|
140
157
|
content = []
|
|
141
158
|
for item in result.content:
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
159
|
+
item_type = _get(item, "type")
|
|
160
|
+
if item_type == "image":
|
|
161
|
+
content.append({
|
|
162
|
+
"type": "image",
|
|
163
|
+
"data": _get(item, "data", ""),
|
|
164
|
+
"mimeType": _get(item, "mimeType", "image/png"),
|
|
165
|
+
})
|
|
166
|
+
elif item_type == "text":
|
|
167
|
+
content.append({"type": "text", "text": _get(item, "text", "")})
|
|
151
168
|
|
|
152
|
-
# Log the call
|
|
169
|
+
# Log the call (just types, not data)
|
|
153
170
|
self._log({
|
|
154
171
|
"type": "mcp_call",
|
|
155
172
|
"tool": name,
|
|
@@ -158,20 +175,7 @@ class MCP:
|
|
|
158
175
|
"response_content_types": [c.get("type") for c in content],
|
|
159
176
|
"is_error": result.isError if hasattr(result, "isError") else False,
|
|
160
177
|
})
|
|
161
|
-
|
|
162
|
-
# Return full content (not truncated)
|
|
163
|
-
full_content = []
|
|
164
|
-
for item in result.content:
|
|
165
|
-
if hasattr(item, "type"):
|
|
166
|
-
if item.type == "image":
|
|
167
|
-
full_content.append({
|
|
168
|
-
"type": "image",
|
|
169
|
-
"data": item.data,
|
|
170
|
-
"mimeType": getattr(item, "mimeType", "image/png"),
|
|
171
|
-
})
|
|
172
|
-
elif item.type == "text":
|
|
173
|
-
full_content.append({"type": "text", "text": item.text})
|
|
174
|
-
return {"content": full_content, "isError": result.isError if hasattr(result, "isError") else False}
|
|
178
|
+
return {"content": content, "isError": result.isError if hasattr(result, "isError") else False}
|
|
175
179
|
|
|
176
180
|
def get_tools(self) -> List[Dict]:
|
|
177
181
|
"""Return the list of tools from the server."""
|
|
@@ -201,12 +205,13 @@ def get_image_data(result: Dict) -> Optional[str]:
|
|
|
201
205
|
class GeminiAgent:
|
|
202
206
|
"""Gemini Computer Use Agent."""
|
|
203
207
|
|
|
204
|
-
def __init__(self, mcp: MCP, model: str):
|
|
208
|
+
def __init__(self, mcp: MCP, model: str, session=None):
|
|
205
209
|
self.mcp = mcp
|
|
206
210
|
# Strip provider prefix if present
|
|
207
211
|
self.model = model.split("/")[-1] if "/" in model else model
|
|
208
212
|
self.client = get_gemini_client()
|
|
209
213
|
self.transcript: List[Dict] = []
|
|
214
|
+
self.session = session # Fleet session for live logging
|
|
210
215
|
|
|
211
216
|
async def _execute_tool(self, name: str, args: Dict) -> Dict:
|
|
212
217
|
return await self.mcp.call(name, args)
|
|
@@ -251,8 +256,13 @@ STRICT RULES:
|
|
|
251
256
|
max_output_tokens=4096,
|
|
252
257
|
system_instruction=system_prompt,
|
|
253
258
|
tools=[types.Tool(function_declarations=gemini_tools)],
|
|
259
|
+
thinking_config=types.ThinkingConfig(include_thoughts=True),
|
|
254
260
|
)
|
|
255
261
|
|
|
262
|
+
# Set config on session for logging (if session exists)
|
|
263
|
+
if self.session:
|
|
264
|
+
self.session.config = config
|
|
265
|
+
|
|
256
266
|
history: List[types.Content] = []
|
|
257
267
|
|
|
258
268
|
user_prompt = f"""###User instruction: {prompt}"""
|
|
@@ -292,6 +302,15 @@ STRICT RULES:
|
|
|
292
302
|
log_verbose(f" Candidate: {candidate}")
|
|
293
303
|
continue
|
|
294
304
|
|
|
305
|
+
# Log to Fleet session (live)
|
|
306
|
+
if self.session:
|
|
307
|
+
try:
|
|
308
|
+
await self.session.log(history, response)
|
|
309
|
+
if step == 1 and self.session.session_id:
|
|
310
|
+
print(f"Session: https://fleetai.com/dashboard/sessions/{self.session.session_id}")
|
|
311
|
+
except Exception as e:
|
|
312
|
+
log_verbose(f" [WARN] Session log failed: {e}")
|
|
313
|
+
|
|
295
314
|
# Log all parts for debugging
|
|
296
315
|
log_verbose(f"\n Response parts ({len(candidate.content.parts)}):")
|
|
297
316
|
for i, part in enumerate(candidate.content.parts):
|
|
@@ -415,6 +434,8 @@ async def main():
|
|
|
415
434
|
"url": os.environ.get("FLEET_MCP_URL", "http://localhost:8765"),
|
|
416
435
|
"prompt": os.environ.get("FLEET_TASK_PROMPT", ""),
|
|
417
436
|
"task_key": os.environ.get("FLEET_TASK_KEY", ""),
|
|
437
|
+
"job_id": os.environ.get("FLEET_JOB_ID"),
|
|
438
|
+
"instance_id": os.environ.get("FLEET_INSTANCE_ID"),
|
|
418
439
|
"model": os.environ.get("FLEET_MODEL", "gemini-2.5-pro"),
|
|
419
440
|
"max_steps": int(os.environ.get("FLEET_MAX_STEPS", "100")),
|
|
420
441
|
}
|
|
@@ -430,10 +451,24 @@ async def main():
|
|
|
430
451
|
print(json.dumps(result))
|
|
431
452
|
return result
|
|
432
453
|
|
|
454
|
+
# Create Fleet session for live logging
|
|
455
|
+
session = None
|
|
456
|
+
if os.environ.get("FLEET_API_KEY"):
|
|
457
|
+
session = fleet.session_async(
|
|
458
|
+
job_id=config["job_id"],
|
|
459
|
+
model=config["model"],
|
|
460
|
+
task_key=config["task_key"],
|
|
461
|
+
instance_id=config["instance_id"],
|
|
462
|
+
)
|
|
463
|
+
|
|
433
464
|
async with MCP(config["url"]) as mcp:
|
|
434
|
-
agent = GeminiAgent(mcp, config["model"])
|
|
465
|
+
agent = GeminiAgent(mcp, config["model"], session=session)
|
|
435
466
|
result = await agent.run(config["prompt"], config["max_steps"])
|
|
436
467
|
result["task_key"] = config["task_key"]
|
|
468
|
+
# Include session_id in result so orchestrator can complete it after verification
|
|
469
|
+
if session and session.session_id:
|
|
470
|
+
result["session_id"] = session.session_id
|
|
471
|
+
|
|
437
472
|
print(json.dumps(result))
|
|
438
473
|
return result
|
|
439
474
|
|
|
@@ -18,6 +18,7 @@ from contextlib import asynccontextmanager
|
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
20
|
from mcp.server.fastmcp import FastMCP
|
|
21
|
+
from mcp.types import ImageContent, TextContent
|
|
21
22
|
from starlette.requests import Request
|
|
22
23
|
from starlette.responses import JSONResponse
|
|
23
24
|
|
|
@@ -227,9 +228,10 @@ def _dy(y: int) -> int:
|
|
|
227
228
|
|
|
228
229
|
|
|
229
230
|
def _screenshot_response(img: bytes) -> list:
|
|
231
|
+
"""Return screenshot as proper MCP content types."""
|
|
230
232
|
return [
|
|
231
|
-
|
|
232
|
-
|
|
233
|
+
ImageContent(type="image", data=base64.b64encode(img).decode(), mimeType="image/png"),
|
|
234
|
+
TextContent(type="text", text=f"URL: {computer.current_url}"),
|
|
233
235
|
]
|
|
234
236
|
|
|
235
237
|
|
|
@@ -21,15 +21,14 @@ import asyncio
|
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
23
|
import os
|
|
24
|
-
import subprocess
|
|
25
24
|
import time
|
|
25
|
+
from datetime import datetime
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
from typing import Dict, List, Optional, Tuple
|
|
28
28
|
|
|
29
|
+
import fleet
|
|
29
30
|
from .utils import get_agent_path
|
|
30
31
|
from .types import AgentConfig, AgentResult, TaskResult
|
|
31
|
-
from fleet.proxy import ProxyManager
|
|
32
|
-
from fleet.eval import TrafficUploader
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
35
34
|
|
|
@@ -45,11 +44,6 @@ class AgentOrchestrator:
|
|
|
45
44
|
self._docker_image: Optional[str] = None
|
|
46
45
|
# Track available ports (recycled when tasks complete)
|
|
47
46
|
self._available_ports: List[Tuple[int, int]] = []
|
|
48
|
-
# MITM proxy for traffic capture
|
|
49
|
-
self._proxy: Optional[ProxyManager] = None
|
|
50
|
-
self._proxy_env: Dict[str, str] = {}
|
|
51
|
-
# Traffic uploader (tails proxy log, ships to backend)
|
|
52
|
-
self._uploader: Optional[TrafficUploader] = None
|
|
53
47
|
|
|
54
48
|
async def _get_next_ports(self) -> Tuple[int, int]:
|
|
55
49
|
"""Get next available MCP port and VNC port."""
|
|
@@ -75,38 +69,18 @@ class AgentOrchestrator:
|
|
|
75
69
|
from rich.console import Console
|
|
76
70
|
from rich.live import Live
|
|
77
71
|
from rich.spinner import Spinner
|
|
78
|
-
import uuid
|
|
79
72
|
|
|
80
73
|
console = Console()
|
|
81
74
|
|
|
82
|
-
#
|
|
83
|
-
|
|
84
|
-
|
|
75
|
+
# Create job via Fleet API
|
|
76
|
+
job_name = f"eval-{self.config.agent}-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
77
|
+
self._job_id = await fleet.job_async(name=job_name)
|
|
78
|
+
console.print(f"Job: https://fleetai.com/dashboard/jobs/{self._job_id}")
|
|
85
79
|
|
|
86
80
|
# Create log directory: ~/.fleet/logs/{job_id}/
|
|
87
81
|
self._log_dir = Path.home() / ".fleet" / "logs" / self._job_id
|
|
88
82
|
self._log_dir.mkdir(parents=True, exist_ok=True)
|
|
89
83
|
|
|
90
|
-
# Start MITM proxy for traffic capture
|
|
91
|
-
self._proxy = ProxyManager()
|
|
92
|
-
try:
|
|
93
|
-
self._proxy_env = await self._proxy.start()
|
|
94
|
-
console.print(f"Proxy started, logging to: {self._proxy.log_path}")
|
|
95
|
-
|
|
96
|
-
# Start traffic uploader (tails proxy log, ships raw to backend)
|
|
97
|
-
self._uploader = TrafficUploader(
|
|
98
|
-
job_id=self._job_id,
|
|
99
|
-
log_file=self._proxy.log_path,
|
|
100
|
-
whitelist=None, # No filter - upload everything
|
|
101
|
-
)
|
|
102
|
-
await self._uploader.start()
|
|
103
|
-
except Exception as e:
|
|
104
|
-
console.print(f"[yellow]⚠[/yellow] Proxy failed to start: {e}")
|
|
105
|
-
console.print("[dim] Proxy requires aiohttp: pip install aiohttp[/dim]")
|
|
106
|
-
self._proxy = None
|
|
107
|
-
self._proxy_env = {}
|
|
108
|
-
self._uploader = None
|
|
109
|
-
|
|
110
84
|
# Load tasks with spinner
|
|
111
85
|
with Live(Spinner("dots", text=f"Loading tasks from {self.config.project_key}..."), console=console, transient=True):
|
|
112
86
|
if self.config.task_keys:
|
|
@@ -168,16 +142,6 @@ class AgentOrchestrator:
|
|
|
168
142
|
else:
|
|
169
143
|
final.append(r)
|
|
170
144
|
|
|
171
|
-
# Stop uploader first (flushes remaining entries)
|
|
172
|
-
if self._uploader:
|
|
173
|
-
await self._uploader.stop()
|
|
174
|
-
stats = self._uploader.stats
|
|
175
|
-
console.print(f"Traffic: {stats['read']} read, {stats['uploaded']} uploaded")
|
|
176
|
-
|
|
177
|
-
# Stop proxy
|
|
178
|
-
if self._proxy:
|
|
179
|
-
await self._proxy.stop()
|
|
180
|
-
|
|
181
145
|
# Show logs location
|
|
182
146
|
if hasattr(self, '_log_dir') and self._log_dir.exists():
|
|
183
147
|
session_logs = list(self._log_dir.glob("*.jsonl"))
|
|
@@ -280,12 +244,14 @@ class AgentOrchestrator:
|
|
|
280
244
|
port=port,
|
|
281
245
|
task_prompt=task_prompt,
|
|
282
246
|
task_key=task_key,
|
|
247
|
+
instance_id=env.instance_id,
|
|
283
248
|
)
|
|
284
249
|
logger.debug(f"[{short_key}] Agent done: completed={agent_result.completed}")
|
|
285
250
|
|
|
286
251
|
# 4. Run verification
|
|
287
252
|
verification_success = None
|
|
288
253
|
verification_score = None
|
|
254
|
+
verifier_execution_id = None
|
|
289
255
|
|
|
290
256
|
if agent_result.completed and task.verifier:
|
|
291
257
|
logger.info(f"[{task_key}] Running verification...")
|
|
@@ -295,12 +261,27 @@ class AgentOrchestrator:
|
|
|
295
261
|
final_answer=agent_result.final_answer,
|
|
296
262
|
)
|
|
297
263
|
verification_success = v.success
|
|
264
|
+
verifier_execution_id = v.execution_id
|
|
298
265
|
# Score is in v.result (the verifier function's return value)
|
|
299
266
|
verification_score = v.result if isinstance(v.result, (int, float)) else None
|
|
300
267
|
logger.info(f"[{task_key}] Verification: {verification_success}")
|
|
301
268
|
except Exception as e:
|
|
302
269
|
logger.error(f"[{task_key}] Verification error: {e}")
|
|
303
270
|
|
|
271
|
+
# 5. Complete/fail session (session was created by agent, we just complete it)
|
|
272
|
+
session_id = getattr(agent_result, 'session_id', None)
|
|
273
|
+
if session_id:
|
|
274
|
+
try:
|
|
275
|
+
# Create session object to complete it
|
|
276
|
+
session = fleet.session_async(session_id=session_id)
|
|
277
|
+
if verification_success:
|
|
278
|
+
await session.complete(verifier_execution_id=verifier_execution_id)
|
|
279
|
+
else:
|
|
280
|
+
await session.fail(verifier_execution_id=verifier_execution_id)
|
|
281
|
+
logger.info(f"[{task_key}] Session: https://fleetai.com/dashboard/sessions/{session_id}")
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.error(f"[{task_key}] Session complete error: {e}")
|
|
284
|
+
|
|
304
285
|
return TaskResult(
|
|
305
286
|
task_key=task_key,
|
|
306
287
|
task_prompt=task_prompt,
|
|
@@ -414,6 +395,7 @@ class AgentOrchestrator:
|
|
|
414
395
|
port: int,
|
|
415
396
|
task_prompt: str,
|
|
416
397
|
task_key: str,
|
|
398
|
+
instance_id: Optional[str] = None,
|
|
417
399
|
) -> AgentResult:
|
|
418
400
|
"""Run agent process."""
|
|
419
401
|
agent_path = get_agent_path(self.config.agent)
|
|
@@ -431,6 +413,7 @@ class AgentOrchestrator:
|
|
|
431
413
|
"FLEET_JOB_ID": self._job_id,
|
|
432
414
|
"FLEET_TASK_PROMPT": task_prompt,
|
|
433
415
|
"FLEET_TASK_KEY": task_key,
|
|
416
|
+
"FLEET_INSTANCE_ID": instance_id or "",
|
|
434
417
|
"FLEET_MODEL": self.config.model,
|
|
435
418
|
"FLEET_MAX_STEPS": str(self.config.max_steps),
|
|
436
419
|
"FLEET_SCREEN_WIDTH": str(self.config.screen_width),
|
|
@@ -438,8 +421,6 @@ class AgentOrchestrator:
|
|
|
438
421
|
"FLEET_VERBOSE": "true" if self.config.verbose else "false",
|
|
439
422
|
})
|
|
440
423
|
env.update(self.config.api_keys)
|
|
441
|
-
# Add proxy env vars for traffic capture
|
|
442
|
-
env.update(self._proxy_env)
|
|
443
424
|
|
|
444
425
|
proc = await asyncio.create_subprocess_exec(
|
|
445
426
|
"python", str(agent_script),
|
|
@@ -494,6 +475,7 @@ class AgentOrchestrator:
|
|
|
494
475
|
steps_taken=result_json.get("steps_taken", 0),
|
|
495
476
|
execution_time_ms=result_json.get("execution_time_ms", 0),
|
|
496
477
|
transcript=result_json.get("transcript", []),
|
|
478
|
+
session_id=result_json.get("session_id"),
|
|
497
479
|
)
|
|
498
480
|
|
|
499
481
|
# Include stderr in error message
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|