fleet-python 0.2.83__tar.gz → 0.2.85__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {fleet_python-0.2.83/fleet_python.egg-info → fleet_python-0.2.85}/PKG-INFO +1 -1
  2. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/__init__.py +1 -1
  3. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/base.py +1 -1
  5. fleet_python-0.2.85/fleet/agent/gemini_cua/Dockerfile +44 -0
  6. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/agent.py +61 -26
  7. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/mcp_server.py +4 -2
  8. fleet_python-0.2.85/fleet/agent/gemini_cua/requirements.txt +4 -0
  9. fleet_python-0.2.85/fleet/agent/gemini_cua/start.sh +31 -0
  10. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/orchestrator.py +28 -51
  11. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/types.py +1 -0
  12. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/base.py +1 -1
  13. fleet_python-0.2.85/fleet/utils/playwright.py +440 -0
  14. {fleet_python-0.2.83 → fleet_python-0.2.85/fleet_python.egg-info}/PKG-INFO +1 -1
  15. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/SOURCES.txt +4 -0
  16. {fleet_python-0.2.83 → fleet_python-0.2.85}/pyproject.toml +4 -1
  17. {fleet_python-0.2.83 → fleet_python-0.2.85}/LICENSE +0 -0
  18. {fleet_python-0.2.83 → fleet_python-0.2.85}/README.md +0 -0
  19. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/diff_example.py +0 -0
  20. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/dsl_example.py +0 -0
  21. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example.py +0 -0
  22. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/exampleResume.py +0 -0
  23. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_account.py +0 -0
  24. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_action_log.py +0 -0
  25. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_client.py +0 -0
  26. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_mcp_anthropic.py +0 -0
  27. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_mcp_openai.py +0 -0
  28. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_sync.py +0 -0
  29. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_task.py +0 -0
  30. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_tasks.py +0 -0
  31. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/example_verifier.py +0 -0
  32. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/export_tasks.py +0 -0
  33. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/fetch_tasks.py +0 -0
  34. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/gemini_example.py +0 -0
  35. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/import_tasks.py +0 -0
  36. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/iterate_verifiers.py +0 -0
  37. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/json_tasks_example.py +0 -0
  38. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/nova_act_example.py +0 -0
  39. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/openai_example.py +0 -0
  40. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/openai_simple_example.py +0 -0
  41. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/query_builder_example.py +0 -0
  42. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/quickstart.py +0 -0
  43. {fleet_python-0.2.83 → fleet_python-0.2.85}/examples/test_cdp_logging.py +0 -0
  44. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/client.py +0 -0
  45. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/env/__init__.py +0 -0
  46. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/env/client.py +0 -0
  47. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/exceptions.py +0 -0
  48. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/global_client.py +0 -0
  49. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/__init__.py +0 -0
  50. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/base.py +0 -0
  51. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/instance/client.py +0 -0
  52. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/models.py +0 -0
  53. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/__init__.py +0 -0
  54. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/base.py +0 -0
  55. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/browser.py +0 -0
  56. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/mcp.py +0 -0
  57. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/resources/sqlite.py +0 -0
  58. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/tasks.py +0 -0
  59. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/__init__.py +0 -0
  60. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/bundler.py +0 -0
  61. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/_async/verifiers/verifier.py +0 -0
  62. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/__init__.py +0 -0
  63. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/gemini_cua/__init__.py +0 -0
  64. /fleet_python-0.2.83/fleet/utils/playwright.py → /fleet_python-0.2.85/fleet/agent/gemini_cua/playwright_utils.py +0 -0
  65. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/agent/utils.py +0 -0
  66. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/cli.py +0 -0
  67. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/client.py +0 -0
  68. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/config.py +0 -0
  69. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/env/__init__.py +0 -0
  70. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/env/client.py +0 -0
  71. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/eval/__init__.py +0 -0
  72. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/eval/uploader.py +0 -0
  73. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/exceptions.py +0 -0
  74. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/global_client.py +0 -0
  75. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/__init__.py +0 -0
  76. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/base.py +0 -0
  77. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/client.py +0 -0
  78. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/instance/models.py +0 -0
  79. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/models.py +0 -0
  80. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/__init__.py +0 -0
  81. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/proxy.py +0 -0
  82. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/proxy/whitelist.py +0 -0
  83. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/__init__.py +0 -0
  84. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/base.py +0 -0
  85. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/browser.py +0 -0
  86. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/mcp.py +0 -0
  87. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/resources/sqlite.py +0 -0
  88. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/tasks.py +0 -0
  89. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/types.py +0 -0
  90. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/__init__.py +0 -0
  91. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/http_logging.py +0 -0
  92. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/utils/logging.py +0 -0
  93. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/__init__.py +0 -0
  94. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/bundler.py +0 -0
  95. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/code.py +0 -0
  96. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/db.py +0 -0
  97. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/decorator.py +0 -0
  98. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/parse.py +0 -0
  99. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/sql_differ.py +0 -0
  100. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet/verifiers/verifier.py +0 -0
  101. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/dependency_links.txt +0 -0
  102. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/entry_points.txt +0 -0
  103. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/requires.txt +0 -0
  104. {fleet_python-0.2.83 → fleet_python-0.2.85}/fleet_python.egg-info/top_level.txt +0 -0
  105. {fleet_python-0.2.83 → fleet_python-0.2.85}/scripts/fix_sync_imports.py +0 -0
  106. {fleet_python-0.2.83 → fleet_python-0.2.85}/scripts/unasync.py +0 -0
  107. {fleet_python-0.2.83 → fleet_python-0.2.85}/setup.cfg +0 -0
  108. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/__init__.py +0 -0
  109. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_app_method.py +0 -0
  110. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_expect_only.py +0 -0
  111. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_instance_dispatch.py +0 -0
  112. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_sqlite_resource_dual_mode.py +0 -0
  113. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  114. {fleet_python-0.2.83 → fleet_python-0.2.85}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.83
3
+ Version: 0.2.85
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -73,7 +73,7 @@ from . import env
73
73
  from . import global_client as _global_client
74
74
  from ._async import global_client as _async_global_client
75
75
 
76
- __version__ = "0.2.83"
76
+ __version__ = "0.2.85"
77
77
 
78
78
  __all__ = [
79
79
  # Core classes
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.83"
47
+ __version__ = "0.2.85"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.83"
29
+ __version__ = "0.2.85"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -0,0 +1,44 @@
1
+ # MCP Server - Browser control in Docker with optional VNC
2
+ FROM python:3.11-slim
3
+
4
+ # Install dependencies for Chromium and VNC
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ # Chromium dependencies
7
+ wget fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 \
8
+ libatspi2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
9
+ libnspr4 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxkbcommon0 \
10
+ libxrandr2 xdg-utils \
11
+ # VNC and display for headful mode
12
+ xvfb x11vnc fluxbox \
13
+ # noVNC for web-based viewing
14
+ novnc websockify \
15
+ # Utilities
16
+ procps net-tools \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ WORKDIR /app
20
+
21
+ # Install Python deps
22
+ COPY requirements.txt .
23
+ RUN pip install --no-cache-dir -r requirements.txt && playwright install chromium
24
+
25
+ # Copy server files (all from same directory)
26
+ COPY playwright_utils.py .
27
+ COPY mcp_server.py .
28
+ COPY start.sh .
29
+ RUN chmod +x start.sh
30
+
31
+ # Environment
32
+ ENV PORT=8765 \
33
+ SCREEN_WIDTH=1366 \
34
+ SCREEN_HEIGHT=768 \
35
+ HEADLESS=true \
36
+ VNC_PORT=5900 \
37
+ NOVNC_PORT=6080 \
38
+ DISPLAY=:99
39
+
40
+ # Expose ports: MCP server, VNC, noVNC
41
+ EXPOSE 8765 5900 6080
42
+
43
+ # Start script handles display setup
44
+ CMD ["./start.sh"]
@@ -25,6 +25,7 @@ from mcp import ClientSession
25
25
  from mcp.client.streamable_http import streamable_http_client
26
26
  from google import genai
27
27
  from google.genai import types
28
+ import fleet
28
29
  from fleet.utils.logging import log_verbose, VERBOSE
29
30
 
30
31
  # Whitelist hooks for auto-detecting model endpoints (optional)
@@ -136,20 +137,36 @@ class MCP:
136
137
  result = await self._session.call_tool(name, args or {})
137
138
  duration_ms = int((time.time() - start_time) * 1000)
138
139
 
140
+ # Debug: log raw MCP result structure
141
+ log_verbose(f" MCP result.content ({len(result.content)} items):")
142
+ for i, item in enumerate(result.content):
143
+ log_verbose(f" [{i}] type={type(item).__name__}, attrs={dir(item)[:10]}...")
144
+ if hasattr(item, "type"):
145
+ log_verbose(f" .type = {repr(item.type)}")
146
+ if hasattr(item, "data"):
147
+ data_preview = str(item.data)[:50] if item.data else "None"
148
+ log_verbose(f" .data = {data_preview}...")
149
+
150
+ # Helper to get attribute or dict key
151
+ def _get(item, key, default=None):
152
+ if isinstance(item, dict):
153
+ return item.get(key, default)
154
+ return getattr(item, key, default)
155
+
139
156
  # Convert MCP result to dict format expected by agent
140
157
  content = []
141
158
  for item in result.content:
142
- if hasattr(item, "type"):
143
- if item.type == "image":
144
- content.append({
145
- "type": "image",
146
- "data": item.data[:100] + "..." if len(item.data) > 100 else item.data, # Truncate for logging
147
- "mimeType": getattr(item, "mimeType", "image/png"),
148
- })
149
- elif item.type == "text":
150
- content.append({"type": "text", "text": item.text})
159
+ item_type = _get(item, "type")
160
+ if item_type == "image":
161
+ content.append({
162
+ "type": "image",
163
+ "data": _get(item, "data", ""),
164
+ "mimeType": _get(item, "mimeType", "image/png"),
165
+ })
166
+ elif item_type == "text":
167
+ content.append({"type": "text", "text": _get(item, "text", "")})
151
168
 
152
- # Log the call
169
+ # Log the call (just types, not data)
153
170
  self._log({
154
171
  "type": "mcp_call",
155
172
  "tool": name,
@@ -158,20 +175,7 @@ class MCP:
158
175
  "response_content_types": [c.get("type") for c in content],
159
176
  "is_error": result.isError if hasattr(result, "isError") else False,
160
177
  })
161
-
162
- # Return full content (not truncated)
163
- full_content = []
164
- for item in result.content:
165
- if hasattr(item, "type"):
166
- if item.type == "image":
167
- full_content.append({
168
- "type": "image",
169
- "data": item.data,
170
- "mimeType": getattr(item, "mimeType", "image/png"),
171
- })
172
- elif item.type == "text":
173
- full_content.append({"type": "text", "text": item.text})
174
- return {"content": full_content, "isError": result.isError if hasattr(result, "isError") else False}
178
+ return {"content": content, "isError": result.isError if hasattr(result, "isError") else False}
175
179
 
176
180
  def get_tools(self) -> List[Dict]:
177
181
  """Return the list of tools from the server."""
@@ -201,12 +205,13 @@ def get_image_data(result: Dict) -> Optional[str]:
201
205
  class GeminiAgent:
202
206
  """Gemini Computer Use Agent."""
203
207
 
204
- def __init__(self, mcp: MCP, model: str):
208
+ def __init__(self, mcp: MCP, model: str, session=None):
205
209
  self.mcp = mcp
206
210
  # Strip provider prefix if present
207
211
  self.model = model.split("/")[-1] if "/" in model else model
208
212
  self.client = get_gemini_client()
209
213
  self.transcript: List[Dict] = []
214
+ self.session = session # Fleet session for live logging
210
215
 
211
216
  async def _execute_tool(self, name: str, args: Dict) -> Dict:
212
217
  return await self.mcp.call(name, args)
@@ -251,8 +256,13 @@ STRICT RULES:
251
256
  max_output_tokens=4096,
252
257
  system_instruction=system_prompt,
253
258
  tools=[types.Tool(function_declarations=gemini_tools)],
259
+ thinking_config=types.ThinkingConfig(include_thoughts=True),
254
260
  )
255
261
 
262
+ # Set config on session for logging (if session exists)
263
+ if self.session:
264
+ self.session.config = config
265
+
256
266
  history: List[types.Content] = []
257
267
 
258
268
  user_prompt = f"""###User instruction: {prompt}"""
@@ -292,6 +302,15 @@ STRICT RULES:
292
302
  log_verbose(f" Candidate: {candidate}")
293
303
  continue
294
304
 
305
+ # Log to Fleet session (live)
306
+ if self.session:
307
+ try:
308
+ await self.session.log(history, response)
309
+ if step == 1 and self.session.session_id:
310
+ print(f"Session: https://fleetai.com/dashboard/sessions/{self.session.session_id}")
311
+ except Exception as e:
312
+ log_verbose(f" [WARN] Session log failed: {e}")
313
+
295
314
  # Log all parts for debugging
296
315
  log_verbose(f"\n Response parts ({len(candidate.content.parts)}):")
297
316
  for i, part in enumerate(candidate.content.parts):
@@ -415,6 +434,8 @@ async def main():
415
434
  "url": os.environ.get("FLEET_MCP_URL", "http://localhost:8765"),
416
435
  "prompt": os.environ.get("FLEET_TASK_PROMPT", ""),
417
436
  "task_key": os.environ.get("FLEET_TASK_KEY", ""),
437
+ "job_id": os.environ.get("FLEET_JOB_ID"),
438
+ "instance_id": os.environ.get("FLEET_INSTANCE_ID"),
418
439
  "model": os.environ.get("FLEET_MODEL", "gemini-2.5-pro"),
419
440
  "max_steps": int(os.environ.get("FLEET_MAX_STEPS", "100")),
420
441
  }
@@ -430,10 +451,24 @@ async def main():
430
451
  print(json.dumps(result))
431
452
  return result
432
453
 
454
+ # Create Fleet session for live logging
455
+ session = None
456
+ if os.environ.get("FLEET_API_KEY"):
457
+ session = fleet.session_async(
458
+ job_id=config["job_id"],
459
+ model=config["model"],
460
+ task_key=config["task_key"],
461
+ instance_id=config["instance_id"],
462
+ )
463
+
433
464
  async with MCP(config["url"]) as mcp:
434
- agent = GeminiAgent(mcp, config["model"])
465
+ agent = GeminiAgent(mcp, config["model"], session=session)
435
466
  result = await agent.run(config["prompt"], config["max_steps"])
436
467
  result["task_key"] = config["task_key"]
468
+ # Include session_id in result so orchestrator can complete it after verification
469
+ if session and session.session_id:
470
+ result["session_id"] = session.session_id
471
+
437
472
  print(json.dumps(result))
438
473
  return result
439
474
 
@@ -18,6 +18,7 @@ from contextlib import asynccontextmanager
18
18
  from typing import Optional
19
19
 
20
20
  from mcp.server.fastmcp import FastMCP
21
+ from mcp.types import ImageContent, TextContent
21
22
  from starlette.requests import Request
22
23
  from starlette.responses import JSONResponse
23
24
 
@@ -227,9 +228,10 @@ def _dy(y: int) -> int:
227
228
 
228
229
 
229
230
  def _screenshot_response(img: bytes) -> list:
231
+ """Return screenshot as proper MCP content types."""
230
232
  return [
231
- {"type": "image", "data": base64.b64encode(img).decode(), "mimeType": "image/png"},
232
- {"type": "text", "text": f"URL: {computer.current_url}"}
233
+ ImageContent(type="image", data=base64.b64encode(img).decode(), mimeType="image/png"),
234
+ TextContent(type="text", text=f"URL: {computer.current_url}"),
233
235
  ]
234
236
 
235
237
 
@@ -0,0 +1,4 @@
1
+ playwright>=1.40.0
2
+ mcp[cli]>=1.2.0
3
+ uvicorn>=0.30.0
4
+ starlette>=0.38.0
@@ -0,0 +1,31 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Start virtual display if not headless
5
+ if [ "$HEADLESS" != "true" ]; then
6
+ echo "Starting Xvfb virtual display..."
7
+ Xvfb :99 -screen 0 ${SCREEN_WIDTH}x${SCREEN_HEIGHT}x24 &
8
+ sleep 1
9
+
10
+ echo "Starting fluxbox window manager..."
11
+ fluxbox &
12
+ sleep 1
13
+
14
+ echo "Starting VNC server on port $VNC_PORT..."
15
+ x11vnc -display :99 -forever -shared -rfbport $VNC_PORT -nopw &
16
+ sleep 1
17
+
18
+ echo "Starting noVNC on port $NOVNC_PORT..."
19
+ websockify --web=/usr/share/novnc/ $NOVNC_PORT localhost:$VNC_PORT &
20
+ sleep 1
21
+
22
+ echo ""
23
+ echo "=========================================="
24
+ echo " Browser visible at: http://localhost:$NOVNC_PORT/vnc.html"
25
+ echo "=========================================="
26
+ echo ""
27
+ fi
28
+
29
+ # Start the MCP server
30
+ exec python mcp_server.py
31
+
@@ -21,15 +21,14 @@ import asyncio
21
21
  import json
22
22
  import logging
23
23
  import os
24
- import subprocess
25
24
  import time
25
+ from datetime import datetime
26
26
  from pathlib import Path
27
27
  from typing import Dict, List, Optional, Tuple
28
28
 
29
+ import fleet
29
30
  from .utils import get_agent_path
30
31
  from .types import AgentConfig, AgentResult, TaskResult
31
- from fleet.proxy import ProxyManager
32
- from fleet.eval import TrafficUploader
33
32
 
34
33
  logger = logging.getLogger(__name__)
35
34
 
@@ -45,11 +44,6 @@ class AgentOrchestrator:
45
44
  self._docker_image: Optional[str] = None
46
45
  # Track available ports (recycled when tasks complete)
47
46
  self._available_ports: List[Tuple[int, int]] = []
48
- # MITM proxy for traffic capture
49
- self._proxy: Optional[ProxyManager] = None
50
- self._proxy_env: Dict[str, str] = {}
51
- # Traffic uploader (tails proxy log, ships to backend)
52
- self._uploader: Optional[TrafficUploader] = None
53
47
 
54
48
  async def _get_next_ports(self) -> Tuple[int, int]:
55
49
  """Get next available MCP port and VNC port."""
@@ -75,38 +69,18 @@ class AgentOrchestrator:
75
69
  from rich.console import Console
76
70
  from rich.live import Live
77
71
  from rich.spinner import Spinner
78
- import uuid
79
72
 
80
73
  console = Console()
81
74
 
82
- # Generate job ID for this run
83
- self._job_id = f"eval_{uuid.uuid4().hex[:12]}"
84
- console.print(f"Eval job: {self._job_id}")
75
+ # Create job via Fleet API
76
+ job_name = f"eval-{self.config.agent}-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
77
+ self._job_id = await fleet.job_async(name=job_name)
78
+ console.print(f"Job: https://fleetai.com/dashboard/jobs/{self._job_id}")
85
79
 
86
80
  # Create log directory: ~/.fleet/logs/{job_id}/
87
81
  self._log_dir = Path.home() / ".fleet" / "logs" / self._job_id
88
82
  self._log_dir.mkdir(parents=True, exist_ok=True)
89
83
 
90
- # Start MITM proxy for traffic capture
91
- self._proxy = ProxyManager()
92
- try:
93
- self._proxy_env = await self._proxy.start()
94
- console.print(f"Proxy started, logging to: {self._proxy.log_path}")
95
-
96
- # Start traffic uploader (tails proxy log, ships raw to backend)
97
- self._uploader = TrafficUploader(
98
- job_id=self._job_id,
99
- log_file=self._proxy.log_path,
100
- whitelist=None, # No filter - upload everything
101
- )
102
- await self._uploader.start()
103
- except Exception as e:
104
- console.print(f"[yellow]⚠[/yellow] Proxy failed to start: {e}")
105
- console.print("[dim] Proxy requires aiohttp: pip install aiohttp[/dim]")
106
- self._proxy = None
107
- self._proxy_env = {}
108
- self._uploader = None
109
-
110
84
  # Load tasks with spinner
111
85
  with Live(Spinner("dots", text=f"Loading tasks from {self.config.project_key}..."), console=console, transient=True):
112
86
  if self.config.task_keys:
@@ -168,16 +142,6 @@ class AgentOrchestrator:
168
142
  else:
169
143
  final.append(r)
170
144
 
171
- # Stop uploader first (flushes remaining entries)
172
- if self._uploader:
173
- await self._uploader.stop()
174
- stats = self._uploader.stats
175
- console.print(f"Traffic: {stats['read']} read, {stats['uploaded']} uploaded")
176
-
177
- # Stop proxy
178
- if self._proxy:
179
- await self._proxy.stop()
180
-
181
145
  # Show logs location
182
146
  if hasattr(self, '_log_dir') and self._log_dir.exists():
183
147
  session_logs = list(self._log_dir.glob("*.jsonl"))
@@ -198,17 +162,12 @@ class AgentOrchestrator:
198
162
 
199
163
  image_name = f"fleet-cua-{agent_path.name}"
200
164
 
201
- # Use fleet SDK root as build context (so Dockerfile can access fleet/utils)
202
- # agent_path is like: .../fleet-sdk/fleet/agent/gemini_cua
203
- # We want: .../fleet-sdk
204
- fleet_root = agent_path.parent.parent.parent
205
-
165
+ # Build context is the agent directory (all files are self-contained)
206
166
  with Live(Spinner("dots", text=f"Building Docker image {image_name}..."), console=console, transient=True):
207
167
  proc = await asyncio.create_subprocess_exec(
208
168
  "docker", "build",
209
169
  "-t", image_name,
210
- "-f", str(dockerfile),
211
- str(fleet_root), # Build context is repo root
170
+ str(agent_path), # Build context is agent directory
212
171
  stdout=asyncio.subprocess.PIPE,
213
172
  stderr=asyncio.subprocess.PIPE,
214
173
  )
@@ -280,12 +239,14 @@ class AgentOrchestrator:
280
239
  port=port,
281
240
  task_prompt=task_prompt,
282
241
  task_key=task_key,
242
+ instance_id=env.instance_id,
283
243
  )
284
244
  logger.debug(f"[{short_key}] Agent done: completed={agent_result.completed}")
285
245
 
286
246
  # 4. Run verification
287
247
  verification_success = None
288
248
  verification_score = None
249
+ verifier_execution_id = None
289
250
 
290
251
  if agent_result.completed and task.verifier:
291
252
  logger.info(f"[{task_key}] Running verification...")
@@ -295,12 +256,27 @@ class AgentOrchestrator:
295
256
  final_answer=agent_result.final_answer,
296
257
  )
297
258
  verification_success = v.success
259
+ verifier_execution_id = v.execution_id
298
260
  # Score is in v.result (the verifier function's return value)
299
261
  verification_score = v.result if isinstance(v.result, (int, float)) else None
300
262
  logger.info(f"[{task_key}] Verification: {verification_success}")
301
263
  except Exception as e:
302
264
  logger.error(f"[{task_key}] Verification error: {e}")
303
265
 
266
+ # 5. Complete/fail session (session was created by agent, we just complete it)
267
+ session_id = getattr(agent_result, 'session_id', None)
268
+ if session_id:
269
+ try:
270
+ # Create session object to complete it
271
+ session = fleet.session_async(session_id=session_id)
272
+ if verification_success:
273
+ await session.complete(verifier_execution_id=verifier_execution_id)
274
+ else:
275
+ await session.fail(verifier_execution_id=verifier_execution_id)
276
+ logger.info(f"[{task_key}] Session: https://fleetai.com/dashboard/sessions/{session_id}")
277
+ except Exception as e:
278
+ logger.error(f"[{task_key}] Session complete error: {e}")
279
+
304
280
  return TaskResult(
305
281
  task_key=task_key,
306
282
  task_prompt=task_prompt,
@@ -414,6 +390,7 @@ class AgentOrchestrator:
414
390
  port: int,
415
391
  task_prompt: str,
416
392
  task_key: str,
393
+ instance_id: Optional[str] = None,
417
394
  ) -> AgentResult:
418
395
  """Run agent process."""
419
396
  agent_path = get_agent_path(self.config.agent)
@@ -431,6 +408,7 @@ class AgentOrchestrator:
431
408
  "FLEET_JOB_ID": self._job_id,
432
409
  "FLEET_TASK_PROMPT": task_prompt,
433
410
  "FLEET_TASK_KEY": task_key,
411
+ "FLEET_INSTANCE_ID": instance_id or "",
434
412
  "FLEET_MODEL": self.config.model,
435
413
  "FLEET_MAX_STEPS": str(self.config.max_steps),
436
414
  "FLEET_SCREEN_WIDTH": str(self.config.screen_width),
@@ -438,8 +416,6 @@ class AgentOrchestrator:
438
416
  "FLEET_VERBOSE": "true" if self.config.verbose else "false",
439
417
  })
440
418
  env.update(self.config.api_keys)
441
- # Add proxy env vars for traffic capture
442
- env.update(self._proxy_env)
443
419
 
444
420
  proc = await asyncio.create_subprocess_exec(
445
421
  "python", str(agent_script),
@@ -494,6 +470,7 @@ class AgentOrchestrator:
494
470
  steps_taken=result_json.get("steps_taken", 0),
495
471
  execution_time_ms=result_json.get("execution_time_ms", 0),
496
472
  transcript=result_json.get("transcript", []),
473
+ session_id=result_json.get("session_id"),
497
474
  )
498
475
 
499
476
  # Include stderr in error message
@@ -33,6 +33,7 @@ class AgentResult(BaseModel):
33
33
  steps_taken: int = 0
34
34
  execution_time_ms: int = 0
35
35
  transcript: List[Dict[str, Any]] = Field(default_factory=list)
36
+ session_id: Optional[str] = None # Fleet session ID for completion
36
37
 
37
38
 
38
39
  class TaskResult(BaseModel):
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.83"
30
+ __version__ = "0.2.85"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33