fleet-python 0.2.83__tar.gz → 0.2.85__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_python-0.2.83/fleet_python.egg-info → fleet_python-0.2.85}/PKG-INFO +1 -1
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/__init__.py +1 -1
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/__init__.py +1 -1
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/base.py +1 -1
- fleet_python-0.2.85/fleet/agent/gemini_cua/Dockerfile +44 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/agent.py +61 -26
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/mcp_server.py +4 -2
- fleet_python-0.2.85/fleet/agent/gemini_cua/requirements.txt +4 -0
- fleet_python-0.2.85/fleet/agent/gemini_cua/start.sh +31 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/orchestrator.py +28 -51
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/types.py +1 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/base.py +1 -1
- fleet_python-0.2.85/fleet/utils/playwright.py +440 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/SOURCES.txt +4 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/pyproject.toml +4 -1
- {fleet_python-0.2.83 → fleet_python-0.2.85}/LICENSE +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/README.md +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/diff_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_account.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_sync.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_task.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/export_tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/fetch_tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/import_tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/iterate_verifiers.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/openai_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/quickstart.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/models.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/__init__.py +0 -0
- /fleet_python-0.2.83/fleet/utils/playwright.py → /fleet_python-0.2.85/fleet/agent/gemini_cua/playwright_utils.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/utils.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/cli.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/config.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/env/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/eval/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/eval/uploader.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/global_client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/models.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/proxy.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/whitelist.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/tasks.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/types.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/http_logging.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/logging.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/db.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/entry_points.txt +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/scripts/unasync.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/setup.cfg +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/__init__.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_app_method.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_expect_only.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_instance_dispatch.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_sqlite_resource_dual_mode.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_sqlite_shared_memory_behavior.py +0 -0
- {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_verifier_from_string.py +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# MCP Server - Browser control in Docker with optional VNC
|
|
2
|
+
FROM python:3.11-slim
|
|
3
|
+
|
|
4
|
+
# Install dependencies for Chromium and VNC
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
# Chromium dependencies
|
|
7
|
+
wget fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 \
|
|
8
|
+
libatspi2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
|
|
9
|
+
libnspr4 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxkbcommon0 \
|
|
10
|
+
libxrandr2 xdg-utils \
|
|
11
|
+
# VNC and display for headful mode
|
|
12
|
+
xvfb x11vnc fluxbox \
|
|
13
|
+
# noVNC for web-based viewing
|
|
14
|
+
novnc websockify \
|
|
15
|
+
# Utilities
|
|
16
|
+
procps net-tools \
|
|
17
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
18
|
+
|
|
19
|
+
WORKDIR /app
|
|
20
|
+
|
|
21
|
+
# Install Python deps
|
|
22
|
+
COPY requirements.txt .
|
|
23
|
+
RUN pip install --no-cache-dir -r requirements.txt && playwright install chromium
|
|
24
|
+
|
|
25
|
+
# Copy server files (all from same directory)
|
|
26
|
+
COPY playwright_utils.py .
|
|
27
|
+
COPY mcp_server.py .
|
|
28
|
+
COPY start.sh .
|
|
29
|
+
RUN chmod +x start.sh
|
|
30
|
+
|
|
31
|
+
# Environment
|
|
32
|
+
ENV PORT=8765 \
|
|
33
|
+
SCREEN_WIDTH=1366 \
|
|
34
|
+
SCREEN_HEIGHT=768 \
|
|
35
|
+
HEADLESS=true \
|
|
36
|
+
VNC_PORT=5900 \
|
|
37
|
+
NOVNC_PORT=6080 \
|
|
38
|
+
DISPLAY=:99
|
|
39
|
+
|
|
40
|
+
# Expose ports: MCP server, VNC, noVNC
|
|
41
|
+
EXPOSE 8765 5900 6080
|
|
42
|
+
|
|
43
|
+
# Start script handles display setup
|
|
44
|
+
CMD ["./start.sh"]
|
|
@@ -25,6 +25,7 @@ from mcp import ClientSession
|
|
|
25
25
|
from mcp.client.streamable_http import streamable_http_client
|
|
26
26
|
from google import genai
|
|
27
27
|
from google.genai import types
|
|
28
|
+
import fleet
|
|
28
29
|
from fleet.utils.logging import log_verbose, VERBOSE
|
|
29
30
|
|
|
30
31
|
# Whitelist hooks for auto-detecting model endpoints (optional)
|
|
@@ -136,20 +137,36 @@ class MCP:
|
|
|
136
137
|
result = await self._session.call_tool(name, args or {})
|
|
137
138
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
138
139
|
|
|
140
|
+
# Debug: log raw MCP result structure
|
|
141
|
+
log_verbose(f" MCP result.content ({len(result.content)} items):")
|
|
142
|
+
for i, item in enumerate(result.content):
|
|
143
|
+
log_verbose(f" [{i}] type={type(item).__name__}, attrs={dir(item)[:10]}...")
|
|
144
|
+
if hasattr(item, "type"):
|
|
145
|
+
log_verbose(f" .type = {repr(item.type)}")
|
|
146
|
+
if hasattr(item, "data"):
|
|
147
|
+
data_preview = str(item.data)[:50] if item.data else "None"
|
|
148
|
+
log_verbose(f" .data = {data_preview}...")
|
|
149
|
+
|
|
150
|
+
# Helper to get attribute or dict key
|
|
151
|
+
def _get(item, key, default=None):
|
|
152
|
+
if isinstance(item, dict):
|
|
153
|
+
return item.get(key, default)
|
|
154
|
+
return getattr(item, key, default)
|
|
155
|
+
|
|
139
156
|
# Convert MCP result to dict format expected by agent
|
|
140
157
|
content = []
|
|
141
158
|
for item in result.content:
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
159
|
+
item_type = _get(item, "type")
|
|
160
|
+
if item_type == "image":
|
|
161
|
+
content.append({
|
|
162
|
+
"type": "image",
|
|
163
|
+
"data": _get(item, "data", ""),
|
|
164
|
+
"mimeType": _get(item, "mimeType", "image/png"),
|
|
165
|
+
})
|
|
166
|
+
elif item_type == "text":
|
|
167
|
+
content.append({"type": "text", "text": _get(item, "text", "")})
|
|
151
168
|
|
|
152
|
-
# Log the call
|
|
169
|
+
# Log the call (just types, not data)
|
|
153
170
|
self._log({
|
|
154
171
|
"type": "mcp_call",
|
|
155
172
|
"tool": name,
|
|
@@ -158,20 +175,7 @@ class MCP:
|
|
|
158
175
|
"response_content_types": [c.get("type") for c in content],
|
|
159
176
|
"is_error": result.isError if hasattr(result, "isError") else False,
|
|
160
177
|
})
|
|
161
|
-
|
|
162
|
-
# Return full content (not truncated)
|
|
163
|
-
full_content = []
|
|
164
|
-
for item in result.content:
|
|
165
|
-
if hasattr(item, "type"):
|
|
166
|
-
if item.type == "image":
|
|
167
|
-
full_content.append({
|
|
168
|
-
"type": "image",
|
|
169
|
-
"data": item.data,
|
|
170
|
-
"mimeType": getattr(item, "mimeType", "image/png"),
|
|
171
|
-
})
|
|
172
|
-
elif item.type == "text":
|
|
173
|
-
full_content.append({"type": "text", "text": item.text})
|
|
174
|
-
return {"content": full_content, "isError": result.isError if hasattr(result, "isError") else False}
|
|
178
|
+
return {"content": content, "isError": result.isError if hasattr(result, "isError") else False}
|
|
175
179
|
|
|
176
180
|
def get_tools(self) -> List[Dict]:
|
|
177
181
|
"""Return the list of tools from the server."""
|
|
@@ -201,12 +205,13 @@ def get_image_data(result: Dict) -> Optional[str]:
|
|
|
201
205
|
class GeminiAgent:
|
|
202
206
|
"""Gemini Computer Use Agent."""
|
|
203
207
|
|
|
204
|
-
def __init__(self, mcp: MCP, model: str):
|
|
208
|
+
def __init__(self, mcp: MCP, model: str, session=None):
|
|
205
209
|
self.mcp = mcp
|
|
206
210
|
# Strip provider prefix if present
|
|
207
211
|
self.model = model.split("/")[-1] if "/" in model else model
|
|
208
212
|
self.client = get_gemini_client()
|
|
209
213
|
self.transcript: List[Dict] = []
|
|
214
|
+
self.session = session # Fleet session for live logging
|
|
210
215
|
|
|
211
216
|
async def _execute_tool(self, name: str, args: Dict) -> Dict:
|
|
212
217
|
return await self.mcp.call(name, args)
|
|
@@ -251,8 +256,13 @@ STRICT RULES:
|
|
|
251
256
|
max_output_tokens=4096,
|
|
252
257
|
system_instruction=system_prompt,
|
|
253
258
|
tools=[types.Tool(function_declarations=gemini_tools)],
|
|
259
|
+
thinking_config=types.ThinkingConfig(include_thoughts=True),
|
|
254
260
|
)
|
|
255
261
|
|
|
262
|
+
# Set config on session for logging (if session exists)
|
|
263
|
+
if self.session:
|
|
264
|
+
self.session.config = config
|
|
265
|
+
|
|
256
266
|
history: List[types.Content] = []
|
|
257
267
|
|
|
258
268
|
user_prompt = f"""###User instruction: {prompt}"""
|
|
@@ -292,6 +302,15 @@ STRICT RULES:
|
|
|
292
302
|
log_verbose(f" Candidate: {candidate}")
|
|
293
303
|
continue
|
|
294
304
|
|
|
305
|
+
# Log to Fleet session (live)
|
|
306
|
+
if self.session:
|
|
307
|
+
try:
|
|
308
|
+
await self.session.log(history, response)
|
|
309
|
+
if step == 1 and self.session.session_id:
|
|
310
|
+
print(f"Session: https://fleetai.com/dashboard/sessions/{self.session.session_id}")
|
|
311
|
+
except Exception as e:
|
|
312
|
+
log_verbose(f" [WARN] Session log failed: {e}")
|
|
313
|
+
|
|
295
314
|
# Log all parts for debugging
|
|
296
315
|
log_verbose(f"\n Response parts ({len(candidate.content.parts)}):")
|
|
297
316
|
for i, part in enumerate(candidate.content.parts):
|
|
@@ -415,6 +434,8 @@ async def main():
|
|
|
415
434
|
"url": os.environ.get("FLEET_MCP_URL", "http://localhost:8765"),
|
|
416
435
|
"prompt": os.environ.get("FLEET_TASK_PROMPT", ""),
|
|
417
436
|
"task_key": os.environ.get("FLEET_TASK_KEY", ""),
|
|
437
|
+
"job_id": os.environ.get("FLEET_JOB_ID"),
|
|
438
|
+
"instance_id": os.environ.get("FLEET_INSTANCE_ID"),
|
|
418
439
|
"model": os.environ.get("FLEET_MODEL", "gemini-2.5-pro"),
|
|
419
440
|
"max_steps": int(os.environ.get("FLEET_MAX_STEPS", "100")),
|
|
420
441
|
}
|
|
@@ -430,10 +451,24 @@ async def main():
|
|
|
430
451
|
print(json.dumps(result))
|
|
431
452
|
return result
|
|
432
453
|
|
|
454
|
+
# Create Fleet session for live logging
|
|
455
|
+
session = None
|
|
456
|
+
if os.environ.get("FLEET_API_KEY"):
|
|
457
|
+
session = fleet.session_async(
|
|
458
|
+
job_id=config["job_id"],
|
|
459
|
+
model=config["model"],
|
|
460
|
+
task_key=config["task_key"],
|
|
461
|
+
instance_id=config["instance_id"],
|
|
462
|
+
)
|
|
463
|
+
|
|
433
464
|
async with MCP(config["url"]) as mcp:
|
|
434
|
-
agent = GeminiAgent(mcp, config["model"])
|
|
465
|
+
agent = GeminiAgent(mcp, config["model"], session=session)
|
|
435
466
|
result = await agent.run(config["prompt"], config["max_steps"])
|
|
436
467
|
result["task_key"] = config["task_key"]
|
|
468
|
+
# Include session_id in result so orchestrator can complete it after verification
|
|
469
|
+
if session and session.session_id:
|
|
470
|
+
result["session_id"] = session.session_id
|
|
471
|
+
|
|
437
472
|
print(json.dumps(result))
|
|
438
473
|
return result
|
|
439
474
|
|
|
@@ -18,6 +18,7 @@ from contextlib import asynccontextmanager
|
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
20
|
from mcp.server.fastmcp import FastMCP
|
|
21
|
+
from mcp.types import ImageContent, TextContent
|
|
21
22
|
from starlette.requests import Request
|
|
22
23
|
from starlette.responses import JSONResponse
|
|
23
24
|
|
|
@@ -227,9 +228,10 @@ def _dy(y: int) -> int:
|
|
|
227
228
|
|
|
228
229
|
|
|
229
230
|
def _screenshot_response(img: bytes) -> list:
|
|
231
|
+
"""Return screenshot as proper MCP content types."""
|
|
230
232
|
return [
|
|
231
|
-
|
|
232
|
-
|
|
233
|
+
ImageContent(type="image", data=base64.b64encode(img).decode(), mimeType="image/png"),
|
|
234
|
+
TextContent(type="text", text=f"URL: {computer.current_url}"),
|
|
233
235
|
]
|
|
234
236
|
|
|
235
237
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -e
|
|
3
|
+
|
|
4
|
+
# Start virtual display if not headless
|
|
5
|
+
if [ "$HEADLESS" != "true" ]; then
|
|
6
|
+
echo "Starting Xvfb virtual display..."
|
|
7
|
+
Xvfb :99 -screen 0 ${SCREEN_WIDTH}x${SCREEN_HEIGHT}x24 &
|
|
8
|
+
sleep 1
|
|
9
|
+
|
|
10
|
+
echo "Starting fluxbox window manager..."
|
|
11
|
+
fluxbox &
|
|
12
|
+
sleep 1
|
|
13
|
+
|
|
14
|
+
echo "Starting VNC server on port $VNC_PORT..."
|
|
15
|
+
x11vnc -display :99 -forever -shared -rfbport $VNC_PORT -nopw &
|
|
16
|
+
sleep 1
|
|
17
|
+
|
|
18
|
+
echo "Starting noVNC on port $NOVNC_PORT..."
|
|
19
|
+
websockify --web=/usr/share/novnc/ $NOVNC_PORT localhost:$VNC_PORT &
|
|
20
|
+
sleep 1
|
|
21
|
+
|
|
22
|
+
echo ""
|
|
23
|
+
echo "=========================================="
|
|
24
|
+
echo " Browser visible at: http://localhost:$NOVNC_PORT/vnc.html"
|
|
25
|
+
echo "=========================================="
|
|
26
|
+
echo ""
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
# Start the MCP server
|
|
30
|
+
exec python mcp_server.py
|
|
31
|
+
|
|
@@ -21,15 +21,14 @@ import asyncio
|
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
23
|
import os
|
|
24
|
-
import subprocess
|
|
25
24
|
import time
|
|
25
|
+
from datetime import datetime
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
from typing import Dict, List, Optional, Tuple
|
|
28
28
|
|
|
29
|
+
import fleet
|
|
29
30
|
from .utils import get_agent_path
|
|
30
31
|
from .types import AgentConfig, AgentResult, TaskResult
|
|
31
|
-
from fleet.proxy import ProxyManager
|
|
32
|
-
from fleet.eval import TrafficUploader
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
35
34
|
|
|
@@ -45,11 +44,6 @@ class AgentOrchestrator:
|
|
|
45
44
|
self._docker_image: Optional[str] = None
|
|
46
45
|
# Track available ports (recycled when tasks complete)
|
|
47
46
|
self._available_ports: List[Tuple[int, int]] = []
|
|
48
|
-
# MITM proxy for traffic capture
|
|
49
|
-
self._proxy: Optional[ProxyManager] = None
|
|
50
|
-
self._proxy_env: Dict[str, str] = {}
|
|
51
|
-
# Traffic uploader (tails proxy log, ships to backend)
|
|
52
|
-
self._uploader: Optional[TrafficUploader] = None
|
|
53
47
|
|
|
54
48
|
async def _get_next_ports(self) -> Tuple[int, int]:
|
|
55
49
|
"""Get next available MCP port and VNC port."""
|
|
@@ -75,38 +69,18 @@ class AgentOrchestrator:
|
|
|
75
69
|
from rich.console import Console
|
|
76
70
|
from rich.live import Live
|
|
77
71
|
from rich.spinner import Spinner
|
|
78
|
-
import uuid
|
|
79
72
|
|
|
80
73
|
console = Console()
|
|
81
74
|
|
|
82
|
-
#
|
|
83
|
-
|
|
84
|
-
|
|
75
|
+
# Create job via Fleet API
|
|
76
|
+
job_name = f"eval-{self.config.agent}-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
77
|
+
self._job_id = await fleet.job_async(name=job_name)
|
|
78
|
+
console.print(f"Job: https://fleetai.com/dashboard/jobs/{self._job_id}")
|
|
85
79
|
|
|
86
80
|
# Create log directory: ~/.fleet/logs/{job_id}/
|
|
87
81
|
self._log_dir = Path.home() / ".fleet" / "logs" / self._job_id
|
|
88
82
|
self._log_dir.mkdir(parents=True, exist_ok=True)
|
|
89
83
|
|
|
90
|
-
# Start MITM proxy for traffic capture
|
|
91
|
-
self._proxy = ProxyManager()
|
|
92
|
-
try:
|
|
93
|
-
self._proxy_env = await self._proxy.start()
|
|
94
|
-
console.print(f"Proxy started, logging to: {self._proxy.log_path}")
|
|
95
|
-
|
|
96
|
-
# Start traffic uploader (tails proxy log, ships raw to backend)
|
|
97
|
-
self._uploader = TrafficUploader(
|
|
98
|
-
job_id=self._job_id,
|
|
99
|
-
log_file=self._proxy.log_path,
|
|
100
|
-
whitelist=None, # No filter - upload everything
|
|
101
|
-
)
|
|
102
|
-
await self._uploader.start()
|
|
103
|
-
except Exception as e:
|
|
104
|
-
console.print(f"[yellow]⚠[/yellow] Proxy failed to start: {e}")
|
|
105
|
-
console.print("[dim] Proxy requires aiohttp: pip install aiohttp[/dim]")
|
|
106
|
-
self._proxy = None
|
|
107
|
-
self._proxy_env = {}
|
|
108
|
-
self._uploader = None
|
|
109
|
-
|
|
110
84
|
# Load tasks with spinner
|
|
111
85
|
with Live(Spinner("dots", text=f"Loading tasks from {self.config.project_key}..."), console=console, transient=True):
|
|
112
86
|
if self.config.task_keys:
|
|
@@ -168,16 +142,6 @@ class AgentOrchestrator:
|
|
|
168
142
|
else:
|
|
169
143
|
final.append(r)
|
|
170
144
|
|
|
171
|
-
# Stop uploader first (flushes remaining entries)
|
|
172
|
-
if self._uploader:
|
|
173
|
-
await self._uploader.stop()
|
|
174
|
-
stats = self._uploader.stats
|
|
175
|
-
console.print(f"Traffic: {stats['read']} read, {stats['uploaded']} uploaded")
|
|
176
|
-
|
|
177
|
-
# Stop proxy
|
|
178
|
-
if self._proxy:
|
|
179
|
-
await self._proxy.stop()
|
|
180
|
-
|
|
181
145
|
# Show logs location
|
|
182
146
|
if hasattr(self, '_log_dir') and self._log_dir.exists():
|
|
183
147
|
session_logs = list(self._log_dir.glob("*.jsonl"))
|
|
@@ -198,17 +162,12 @@ class AgentOrchestrator:
|
|
|
198
162
|
|
|
199
163
|
image_name = f"fleet-cua-{agent_path.name}"
|
|
200
164
|
|
|
201
|
-
#
|
|
202
|
-
# agent_path is like: .../fleet-sdk/fleet/agent/gemini_cua
|
|
203
|
-
# We want: .../fleet-sdk
|
|
204
|
-
fleet_root = agent_path.parent.parent.parent
|
|
205
|
-
|
|
165
|
+
# Build context is the agent directory (all files are self-contained)
|
|
206
166
|
with Live(Spinner("dots", text=f"Building Docker image {image_name}..."), console=console, transient=True):
|
|
207
167
|
proc = await asyncio.create_subprocess_exec(
|
|
208
168
|
"docker", "build",
|
|
209
169
|
"-t", image_name,
|
|
210
|
-
|
|
211
|
-
str(fleet_root), # Build context is repo root
|
|
170
|
+
str(agent_path), # Build context is agent directory
|
|
212
171
|
stdout=asyncio.subprocess.PIPE,
|
|
213
172
|
stderr=asyncio.subprocess.PIPE,
|
|
214
173
|
)
|
|
@@ -280,12 +239,14 @@ class AgentOrchestrator:
|
|
|
280
239
|
port=port,
|
|
281
240
|
task_prompt=task_prompt,
|
|
282
241
|
task_key=task_key,
|
|
242
|
+
instance_id=env.instance_id,
|
|
283
243
|
)
|
|
284
244
|
logger.debug(f"[{short_key}] Agent done: completed={agent_result.completed}")
|
|
285
245
|
|
|
286
246
|
# 4. Run verification
|
|
287
247
|
verification_success = None
|
|
288
248
|
verification_score = None
|
|
249
|
+
verifier_execution_id = None
|
|
289
250
|
|
|
290
251
|
if agent_result.completed and task.verifier:
|
|
291
252
|
logger.info(f"[{task_key}] Running verification...")
|
|
@@ -295,12 +256,27 @@ class AgentOrchestrator:
|
|
|
295
256
|
final_answer=agent_result.final_answer,
|
|
296
257
|
)
|
|
297
258
|
verification_success = v.success
|
|
259
|
+
verifier_execution_id = v.execution_id
|
|
298
260
|
# Score is in v.result (the verifier function's return value)
|
|
299
261
|
verification_score = v.result if isinstance(v.result, (int, float)) else None
|
|
300
262
|
logger.info(f"[{task_key}] Verification: {verification_success}")
|
|
301
263
|
except Exception as e:
|
|
302
264
|
logger.error(f"[{task_key}] Verification error: {e}")
|
|
303
265
|
|
|
266
|
+
# 5. Complete/fail session (session was created by agent, we just complete it)
|
|
267
|
+
session_id = getattr(agent_result, 'session_id', None)
|
|
268
|
+
if session_id:
|
|
269
|
+
try:
|
|
270
|
+
# Create session object to complete it
|
|
271
|
+
session = fleet.session_async(session_id=session_id)
|
|
272
|
+
if verification_success:
|
|
273
|
+
await session.complete(verifier_execution_id=verifier_execution_id)
|
|
274
|
+
else:
|
|
275
|
+
await session.fail(verifier_execution_id=verifier_execution_id)
|
|
276
|
+
logger.info(f"[{task_key}] Session: https://fleetai.com/dashboard/sessions/{session_id}")
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.error(f"[{task_key}] Session complete error: {e}")
|
|
279
|
+
|
|
304
280
|
return TaskResult(
|
|
305
281
|
task_key=task_key,
|
|
306
282
|
task_prompt=task_prompt,
|
|
@@ -414,6 +390,7 @@ class AgentOrchestrator:
|
|
|
414
390
|
port: int,
|
|
415
391
|
task_prompt: str,
|
|
416
392
|
task_key: str,
|
|
393
|
+
instance_id: Optional[str] = None,
|
|
417
394
|
) -> AgentResult:
|
|
418
395
|
"""Run agent process."""
|
|
419
396
|
agent_path = get_agent_path(self.config.agent)
|
|
@@ -431,6 +408,7 @@ class AgentOrchestrator:
|
|
|
431
408
|
"FLEET_JOB_ID": self._job_id,
|
|
432
409
|
"FLEET_TASK_PROMPT": task_prompt,
|
|
433
410
|
"FLEET_TASK_KEY": task_key,
|
|
411
|
+
"FLEET_INSTANCE_ID": instance_id or "",
|
|
434
412
|
"FLEET_MODEL": self.config.model,
|
|
435
413
|
"FLEET_MAX_STEPS": str(self.config.max_steps),
|
|
436
414
|
"FLEET_SCREEN_WIDTH": str(self.config.screen_width),
|
|
@@ -438,8 +416,6 @@ class AgentOrchestrator:
|
|
|
438
416
|
"FLEET_VERBOSE": "true" if self.config.verbose else "false",
|
|
439
417
|
})
|
|
440
418
|
env.update(self.config.api_keys)
|
|
441
|
-
# Add proxy env vars for traffic capture
|
|
442
|
-
env.update(self._proxy_env)
|
|
443
419
|
|
|
444
420
|
proc = await asyncio.create_subprocess_exec(
|
|
445
421
|
"python", str(agent_script),
|
|
@@ -494,6 +470,7 @@ class AgentOrchestrator:
|
|
|
494
470
|
steps_taken=result_json.get("steps_taken", 0),
|
|
495
471
|
execution_time_ms=result_json.get("execution_time_ms", 0),
|
|
496
472
|
transcript=result_json.get("transcript", []),
|
|
473
|
+
session_id=result_json.get("session_id"),
|
|
497
474
|
)
|
|
498
475
|
|
|
499
476
|
# Include stderr in error message
|