fleet-python 0.2.88__tar.gz → 0.2.90__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {fleet_python-0.2.88/fleet_python.egg-info → fleet_python-0.2.90}/PKG-INFO +1 -1
  2. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/__init__.py +1 -1
  3. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/base.py +1 -1
  5. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/orchestrator.py +306 -118
  6. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/base.py +1 -1
  7. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/cli.py +0 -9
  8. {fleet_python-0.2.88 → fleet_python-0.2.90/fleet_python.egg-info}/PKG-INFO +1 -1
  9. {fleet_python-0.2.88 → fleet_python-0.2.90}/pyproject.toml +1 -1
  10. {fleet_python-0.2.88 → fleet_python-0.2.90}/LICENSE +0 -0
  11. {fleet_python-0.2.88 → fleet_python-0.2.90}/README.md +0 -0
  12. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/diff_example.py +0 -0
  13. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/dsl_example.py +0 -0
  14. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example.py +0 -0
  15. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/exampleResume.py +0 -0
  16. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_account.py +0 -0
  17. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_action_log.py +0 -0
  18. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_client.py +0 -0
  19. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_mcp_anthropic.py +0 -0
  20. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_mcp_openai.py +0 -0
  21. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_sync.py +0 -0
  22. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_task.py +0 -0
  23. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_tasks.py +0 -0
  24. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/example_verifier.py +0 -0
  25. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/export_tasks.py +0 -0
  26. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/fetch_tasks.py +0 -0
  27. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/gemini_example.py +0 -0
  28. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/import_tasks.py +0 -0
  29. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/iterate_verifiers.py +0 -0
  30. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/json_tasks_example.py +0 -0
  31. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/nova_act_example.py +0 -0
  32. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/openai_example.py +0 -0
  33. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/openai_simple_example.py +0 -0
  34. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/query_builder_example.py +0 -0
  35. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/quickstart.py +0 -0
  36. {fleet_python-0.2.88 → fleet_python-0.2.90}/examples/test_cdp_logging.py +0 -0
  37. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/client.py +0 -0
  38. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/env/__init__.py +0 -0
  39. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/env/client.py +0 -0
  40. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/exceptions.py +0 -0
  41. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/global_client.py +0 -0
  42. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/instance/__init__.py +0 -0
  43. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/instance/base.py +0 -0
  44. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/instance/client.py +0 -0
  45. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/models.py +0 -0
  46. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/resources/__init__.py +0 -0
  47. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/resources/base.py +0 -0
  48. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/resources/browser.py +0 -0
  49. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/resources/mcp.py +0 -0
  50. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/resources/sqlite.py +0 -0
  51. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/tasks.py +0 -0
  52. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/verifiers/__init__.py +0 -0
  53. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/verifiers/bundler.py +0 -0
  54. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/_async/verifiers/verifier.py +0 -0
  55. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/__init__.py +0 -0
  56. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/Dockerfile +0 -0
  57. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/__init__.py +0 -0
  58. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/agent.py +0 -0
  59. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/mcp_server.py +0 -0
  60. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/playwright_utils.py +0 -0
  61. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/requirements.txt +0 -0
  62. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/gemini_cua/start.sh +0 -0
  63. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/types.py +0 -0
  64. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/agent/utils.py +0 -0
  65. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/client.py +0 -0
  66. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/config.py +0 -0
  67. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/env/__init__.py +0 -0
  68. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/env/client.py +0 -0
  69. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/eval/__init__.py +0 -0
  70. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/eval/uploader.py +0 -0
  71. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/exceptions.py +0 -0
  72. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/global_client.py +0 -0
  73. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/instance/__init__.py +0 -0
  74. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/instance/base.py +0 -0
  75. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/instance/client.py +0 -0
  76. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/instance/models.py +0 -0
  77. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/models.py +0 -0
  78. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/proxy/__init__.py +0 -0
  79. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/proxy/proxy.py +0 -0
  80. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/proxy/whitelist.py +0 -0
  81. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/resources/__init__.py +0 -0
  82. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/resources/base.py +0 -0
  83. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/resources/browser.py +0 -0
  84. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/resources/mcp.py +0 -0
  85. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/resources/sqlite.py +0 -0
  86. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/tasks.py +0 -0
  87. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/types.py +0 -0
  88. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/utils/__init__.py +0 -0
  89. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/utils/http_logging.py +0 -0
  90. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/utils/logging.py +0 -0
  91. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/utils/playwright.py +0 -0
  92. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/__init__.py +0 -0
  93. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/bundler.py +0 -0
  94. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/code.py +0 -0
  95. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/db.py +0 -0
  96. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/decorator.py +0 -0
  97. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/parse.py +0 -0
  98. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/sql_differ.py +0 -0
  99. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet/verifiers/verifier.py +0 -0
  100. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet_python.egg-info/SOURCES.txt +0 -0
  101. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet_python.egg-info/dependency_links.txt +0 -0
  102. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet_python.egg-info/entry_points.txt +0 -0
  103. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet_python.egg-info/requires.txt +0 -0
  104. {fleet_python-0.2.88 → fleet_python-0.2.90}/fleet_python.egg-info/top_level.txt +0 -0
  105. {fleet_python-0.2.88 → fleet_python-0.2.90}/scripts/fix_sync_imports.py +0 -0
  106. {fleet_python-0.2.88 → fleet_python-0.2.90}/scripts/unasync.py +0 -0
  107. {fleet_python-0.2.88 → fleet_python-0.2.90}/setup.cfg +0 -0
  108. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/__init__.py +0 -0
  109. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_app_method.py +0 -0
  110. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_expect_only.py +0 -0
  111. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_instance_dispatch.py +0 -0
  112. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_sqlite_resource_dual_mode.py +0 -0
  113. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  114. {fleet_python-0.2.88 → fleet_python-0.2.90}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.88
3
+ Version: 0.2.90
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -73,7 +73,7 @@ from . import env
73
73
  from . import global_client as _global_client
74
74
  from ._async import global_client as _async_global_client
75
75
 
76
- __version__ = "0.2.88"
76
+ __version__ = "0.2.90"
77
77
 
78
78
  __all__ = [
79
79
  # Core classes
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.88"
47
+ __version__ = "0.2.90"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.88"
29
+ __version__ = "0.2.90"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -18,14 +18,15 @@ Usage:
18
18
  """
19
19
 
20
20
  import asyncio
21
+ import atexit
21
22
  import json
22
23
  import logging
23
24
  import os
25
+ import signal
24
26
  import sys
25
27
  import time
26
- from datetime import datetime
27
28
  from pathlib import Path
28
- from typing import Dict, List, Optional, Tuple
29
+ from typing import Dict, List, Optional, Set, Tuple
29
30
 
30
31
  import fleet
31
32
  from .utils import get_agent_path
@@ -33,10 +34,130 @@ from .types import AgentConfig, AgentResult, TaskResult
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
37
+ # Global tracking of running containers for cleanup on exit
38
+ _running_containers: Set[str] = set()
39
+ _cleanup_registered = False
40
+
41
+
42
+ def _cleanup_all_containers():
43
+ """Kill all tracked running containers. Called on exit/signal."""
44
+ import subprocess
45
+
46
+ if not _running_containers:
47
+ return
48
+
49
+ containers = list(_running_containers)
50
+ logger.debug(f"Cleaning up {len(containers)} container(s)...")
51
+
52
+ for container_id in containers:
53
+ try:
54
+ # Use docker kill for immediate termination
55
+ subprocess.run(
56
+ ["docker", "kill", container_id],
57
+ stdout=subprocess.DEVNULL,
58
+ stderr=subprocess.DEVNULL,
59
+ timeout=5,
60
+ )
61
+ _running_containers.discard(container_id)
62
+ except Exception as e:
63
+ logger.debug(f"Failed to kill container {container_id[:12]}: {e}")
64
+
65
+
66
+ def _register_cleanup():
67
+ """Register cleanup handlers (only once)."""
68
+ global _cleanup_registered
69
+ if _cleanup_registered:
70
+ return
71
+ _cleanup_registered = True
72
+
73
+ # Register atexit handler
74
+ atexit.register(_cleanup_all_containers)
75
+
76
+ # Register signal handlers for graceful shutdown
77
+ def signal_handler(signum, frame):
78
+ _cleanup_all_containers()
79
+ # Re-raise to allow normal signal handling
80
+ signal.default_int_handler(signum, frame)
81
+
82
+ try:
83
+ signal.signal(signal.SIGINT, signal_handler)
84
+ signal.signal(signal.SIGTERM, signal_handler)
85
+ except (ValueError, OSError):
86
+ # Can't set signal handlers in some contexts (e.g., non-main thread)
87
+ pass
88
+
89
+
90
+ def _cleanup_orphaned_containers(image_prefix: str = "fleet-cua-"):
91
+ """Kill any orphaned containers from previous runs.
92
+
93
+ This handles cases where a previous run was force-killed (SIGKILL)
94
+ and containers were left running, which could cause port conflicts.
95
+ """
96
+ import subprocess
97
+
98
+ try:
99
+ # List running containers with our image prefix
100
+ result = subprocess.run(
101
+ [
102
+ "docker",
103
+ "ps",
104
+ "--filter",
105
+ f"ancestor={image_prefix}",
106
+ "-q",
107
+ "--format",
108
+ "{{.ID}} {{.Image}}",
109
+ ],
110
+ capture_output=True,
111
+ text=True,
112
+ timeout=10,
113
+ )
114
+
115
+ if result.returncode != 0:
116
+ return
117
+
118
+ # Parse container IDs - we need to filter by image name pattern
119
+ # Use docker ps with format to get both ID and image
120
+ result = subprocess.run(
121
+ ["docker", "ps", "--format", "{{.ID}}\t{{.Image}}"],
122
+ capture_output=True,
123
+ text=True,
124
+ timeout=10,
125
+ )
126
+
127
+ if result.returncode != 0 or not result.stdout.strip():
128
+ return
129
+
130
+ orphaned = []
131
+ for line in result.stdout.strip().split("\n"):
132
+ if not line:
133
+ continue
134
+ parts = line.split("\t")
135
+ if len(parts) >= 2:
136
+ container_id, image = parts[0], parts[1]
137
+ if image.startswith(image_prefix):
138
+ orphaned.append(container_id)
139
+
140
+ if orphaned:
141
+ logger.info(
142
+ f"Cleaning up {len(orphaned)} orphaned container(s) from previous run..."
143
+ )
144
+ for container_id in orphaned:
145
+ try:
146
+ subprocess.run(
147
+ ["docker", "kill", container_id],
148
+ stdout=subprocess.DEVNULL,
149
+ stderr=subprocess.DEVNULL,
150
+ timeout=5,
151
+ )
152
+ except Exception:
153
+ pass
154
+ except Exception as e:
155
+ logger.debug(f"Failed to check for orphaned containers: {e}")
156
+
36
157
 
37
158
  class AgentOrchestrator:
38
159
  """Orchestrates running agents on Fleet tasks."""
39
-
160
+
40
161
  def __init__(self, config: AgentConfig):
41
162
  self.config = config
42
163
  self._port_counter = config.port_range_start
@@ -45,7 +166,9 @@ class AgentOrchestrator:
45
166
  self._docker_image: Optional[str] = None
46
167
  # Track available ports (recycled when tasks complete)
47
168
  self._available_ports: List[Tuple[int, int]] = []
48
-
169
+ # Register global cleanup handlers
170
+ _register_cleanup()
171
+
49
172
  async def _get_next_ports(self) -> Tuple[int, int]:
50
173
  """Get next available MCP port and VNC port."""
51
174
  async with self._port_lock:
@@ -58,51 +181,60 @@ class AgentOrchestrator:
58
181
  self._port_counter += 1
59
182
  self._vnc_port_counter += 1
60
183
  return port, vnc_port
61
-
184
+
62
185
  async def _release_ports(self, port: int, vnc_port: int):
63
186
  """Return ports to the pool for reuse."""
64
187
  async with self._port_lock:
65
188
  self._available_ports.append((port, vnc_port))
66
-
189
+
67
190
  async def run(self) -> List[TaskResult]:
68
191
  """Run agents on all tasks."""
69
192
  from fleet._async import load_tasks
70
193
  from rich.console import Console
71
194
  from rich.live import Live
72
195
  from rich.spinner import Spinner
73
-
196
+
74
197
  console = Console()
75
-
76
- # Create job via Fleet API
77
- job_name = f"eval-{self.config.agent}-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
78
- self._job_id = await fleet.job_async(name=job_name)
198
+
199
+ # Create job via Fleet API (name generated server-side)
200
+ self._job_id = await fleet.job_async()
79
201
  console.print(f"Job: https://fleetai.com/dashboard/jobs/{self._job_id}")
80
-
202
+
81
203
  # Create log directory: ~/.fleet/logs/{job_id}/
82
204
  self._log_dir = Path.home() / ".fleet" / "logs" / self._job_id
83
205
  self._log_dir.mkdir(parents=True, exist_ok=True)
84
-
206
+
85
207
  # Load tasks with spinner
86
- with Live(Spinner("dots", text=f"Loading tasks from {self.config.project_key}..."), console=console, transient=True):
208
+ with Live(
209
+ Spinner("dots", text=f"Loading tasks from {self.config.project_key}..."),
210
+ console=console,
211
+ transient=True,
212
+ ):
87
213
  if self.config.task_keys:
88
214
  tasks = await load_tasks(keys=self.config.task_keys)
89
215
  elif self.config.project_key:
90
216
  tasks = await load_tasks(project_key=self.config.project_key)
91
217
  else:
92
218
  raise ValueError("Either project_key or task_keys required")
93
-
219
+
94
220
  console.print(f"Loaded {len(tasks)} tasks")
95
-
221
+
96
222
  # Build Docker image
97
223
  agent_path = get_agent_path(self.config.agent)
98
224
  await self._build_docker_image(agent_path)
99
-
225
+
100
226
  # Run tasks with concurrency limit and progress
101
- from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
102
-
227
+ from rich.progress import (
228
+ Progress,
229
+ SpinnerColumn,
230
+ TextColumn,
231
+ BarColumn,
232
+ TaskProgressColumn,
233
+ )
234
+
103
235
  semaphore = asyncio.Semaphore(self.config.max_concurrent)
104
236
  results = [None] * len(tasks)
105
-
237
+
106
238
  with Progress(
107
239
  SpinnerColumn(),
108
240
  TextColumn("[progress.description]{task.description}"),
@@ -111,18 +243,18 @@ class AgentOrchestrator:
111
243
  console=console,
112
244
  ) as progress:
113
245
  task_progress = progress.add_task("Running tasks", total=len(tasks))
114
-
246
+
115
247
  async def run_with_semaphore(idx, task):
116
248
  async with semaphore:
117
249
  result = await self._run_task(task)
118
250
  progress.update(task_progress, advance=1)
119
251
  return idx, result
120
-
252
+
121
253
  completed = await asyncio.gather(
122
254
  *[run_with_semaphore(i, t) for i, t in enumerate(tasks)],
123
255
  return_exceptions=True,
124
256
  )
125
-
257
+
126
258
  # Convert to ordered list
127
259
  for item in completed:
128
260
  if isinstance(item, Exception):
@@ -130,74 +262,85 @@ class AgentOrchestrator:
130
262
  continue
131
263
  idx, result = item
132
264
  results[idx] = result
133
-
265
+
134
266
  # Fill any gaps with error results
135
267
  final = []
136
268
  for i, r in enumerate(results):
137
269
  if r is None:
138
- final.append(TaskResult(
139
- task_key=tasks[i].key,
140
- task_prompt=tasks[i].prompt,
141
- error="Task failed unexpectedly",
142
- ))
270
+ final.append(
271
+ TaskResult(
272
+ task_key=tasks[i].key,
273
+ task_prompt=tasks[i].prompt,
274
+ error="Task failed unexpectedly",
275
+ )
276
+ )
143
277
  else:
144
278
  final.append(r)
145
-
279
+
146
280
  # Show logs location
147
- if hasattr(self, '_log_dir') and self._log_dir.exists():
281
+ if hasattr(self, "_log_dir") and self._log_dir.exists():
148
282
  session_logs = list(self._log_dir.glob("*.jsonl"))
149
283
  console.print(f"Logs: {self._log_dir}/ ({len(session_logs)} sessions)")
150
-
284
+
151
285
  return final
152
-
286
+
153
287
  async def _build_docker_image(self, agent_path: Path):
154
288
  """Build Docker image for CUA server."""
155
289
  from rich.console import Console
156
290
  from rich.live import Live
157
291
  from rich.spinner import Spinner
158
-
292
+
159
293
  console = Console()
160
294
  dockerfile = agent_path / "Dockerfile"
161
295
  if not dockerfile.exists():
162
296
  raise FileNotFoundError(f"Dockerfile not found in {agent_path}")
163
-
297
+
164
298
  image_name = f"fleet-cua-{agent_path.name}"
165
-
299
+
300
+ # Clean up any orphaned containers from previous runs (prevents port conflicts)
301
+ _cleanup_orphaned_containers(image_name)
302
+
166
303
  # Build context is the agent directory (all files are self-contained)
167
- with Live(Spinner("dots", text=f"Building Docker image {image_name}..."), console=console, transient=True):
304
+ with Live(
305
+ Spinner("dots", text=f"Building Docker image {image_name}..."),
306
+ console=console,
307
+ transient=True,
308
+ ):
168
309
  proc = await asyncio.create_subprocess_exec(
169
- "docker", "build",
170
- "-t", image_name,
310
+ "docker",
311
+ "build",
312
+ "-t",
313
+ image_name,
171
314
  str(agent_path), # Build context is agent directory
172
315
  stdout=asyncio.subprocess.PIPE,
173
316
  stderr=asyncio.subprocess.PIPE,
174
317
  )
175
318
  stdout, stderr = await proc.communicate()
176
-
319
+
177
320
  if proc.returncode != 0:
178
321
  console.print(f"[red]✗[/red] Docker build failed")
179
322
  console.print(stderr.decode())
180
323
  raise RuntimeError(f"Docker build failed: {stderr.decode()}")
181
-
324
+
182
325
  self._docker_image = image_name
183
326
  console.print(f"Docker image ready: {image_name}")
184
-
327
+
185
328
  async def _run_task(self, task) -> TaskResult:
186
329
  """Run agent on a single task."""
187
330
  from fleet.env import make_async
188
-
331
+
189
332
  start = time.time()
190
333
  task_key = task.key
191
334
  task_prompt = task.prompt
192
335
  short_key = task_key[:20]
193
-
336
+
194
337
  logger.debug(f"[{short_key}] Starting")
195
-
338
+
196
339
  env = None
197
340
  container_id = None
198
341
  port = None
199
342
  vnc_port = None
200
-
343
+
201
344
  try:
202
345
  # 1. Create Fleet environment
203
346
  logger.debug(f"[{short_key}] Creating env...")
@@ -209,9 +352,9 @@ class AgentOrchestrator:
209
352
  )
210
353
  env_url = env.urls.root
211
354
  logger.debug(f"[{short_key}] Env: {env_url}")
212
-
355
+
213
356
  await asyncio.sleep(3) # Wait for env to be ready
214
-
357
+
215
358
  # 2. Start Docker container with CUA server
216
359
  port, vnc_port = await self._get_next_ports()
217
360
  logger.debug(f"[{short_key}] Starting container on port {port}...")
@@ -223,17 +366,17 @@ class AgentOrchestrator:
223
366
  task_key=task_key,
224
367
  )
225
368
  logger.debug(f"[{short_key}] Container: {container_id[:12]}")
226
-
369
+
227
370
  # Always show instance URL
228
371
  print(f"[{short_key}] Instance: {env_url}")
229
372
  if self.config.headful:
230
373
  print(f"[{short_key}] Browser: http://localhost:{vnc_port}/vnc.html")
231
-
374
+
232
375
  # Wait for server to be ready
233
376
  logger.debug(f"[{short_key}] Waiting for CUA server...")
234
377
  await self._wait_for_server(port)
235
378
  logger.debug(f"[{short_key}] CUA server ready")
236
-
379
+
237
380
  # 3. Run agent
238
381
  logger.debug(f"[{short_key}] Running agent...")
239
382
  agent_result = await self._run_agent(
@@ -242,13 +385,15 @@ class AgentOrchestrator:
242
385
  task_key=task_key,
243
386
  instance_id=env.instance_id,
244
387
  )
245
- logger.debug(f"[{short_key}] Agent done: completed={agent_result.completed}")
246
-
388
+ logger.debug(
389
+ f"[{short_key}] Agent done: completed={agent_result.completed}"
390
+ )
391
+
247
392
  # 4. Run verification
248
393
  verification_success = None
249
394
  verification_score = None
250
395
  verifier_execution_id = None
251
-
396
+
252
397
  if agent_result.completed and task.verifier:
253
398
  logger.info(f"[{task_key}] Running verification...")
254
399
  try:
@@ -259,25 +404,31 @@ class AgentOrchestrator:
259
404
  verification_success = v.success
260
405
  verifier_execution_id = v.execution_id
261
406
  # Score is in v.result (the verifier function's return value)
262
- verification_score = v.result if isinstance(v.result, (int, float)) else None
407
+ verification_score = (
408
+ v.result if isinstance(v.result, (int, float)) else None
409
+ )
263
410
  logger.info(f"[{task_key}] Verification: {verification_success}")
264
411
  except Exception as e:
265
412
  logger.error(f"[{task_key}] Verification error: {e}")
266
-
413
+
267
414
  # 5. Complete/fail session (session was created by agent, we just complete it)
268
- session_id = getattr(agent_result, 'session_id', None)
415
+ session_id = getattr(agent_result, "session_id", None)
269
416
  if session_id:
270
417
  try:
271
418
  # Create session object to complete it
272
419
  session = fleet.session_async(session_id=session_id)
273
420
  if verification_success:
274
- await session.complete(verifier_execution_id=verifier_execution_id)
421
+ await session.complete(
422
+ verifier_execution_id=verifier_execution_id
423
+ )
275
424
  else:
276
425
  await session.fail(verifier_execution_id=verifier_execution_id)
277
- logger.info(f"[{task_key}] Session: https://fleetai.com/dashboard/sessions/{session_id}")
426
+ logger.info(
427
+ f"[{task_key}] Session: https://fleetai.com/dashboard/sessions/{session_id}"
428
+ )
278
429
  except Exception as e:
279
430
  logger.error(f"[{task_key}] Session complete error: {e}")
280
-
431
+
281
432
  return TaskResult(
282
433
  task_key=task_key,
283
434
  task_prompt=task_prompt,
@@ -286,7 +437,7 @@ class AgentOrchestrator:
286
437
  verification_score=verification_score,
287
438
  execution_time_ms=int((time.time() - start) * 1000),
288
439
  )
289
-
440
+
290
441
  except Exception as e:
291
442
  logger.exception(f"[{short_key}] Failed: {e}")
292
443
  return TaskResult(
@@ -295,7 +446,7 @@ class AgentOrchestrator:
295
446
  error=str(e),
296
447
  execution_time_ms=int((time.time() - start) * 1000),
297
448
  )
298
-
449
+
299
450
  finally:
300
451
  # Cleanup
301
452
  if container_id:
@@ -307,7 +458,7 @@ class AgentOrchestrator:
307
458
  await env.close()
308
459
  except:
309
460
  pass
310
-
461
+
311
462
  async def _start_container(
312
463
  self,
313
464
  port: int,
@@ -318,62 +469,95 @@ class AgentOrchestrator:
318
469
  ) -> str:
319
470
  """Start Docker container with CUA server."""
320
471
  headless = "false" if self.config.headful else "true"
321
-
472
+
322
473
  cmd = [
323
- "docker", "run", "-d", "--rm",
324
- "-p", f"{port}:8765",
325
- "-e", f"FLEET_ENV_URL={env_url}",
326
- "-e", f"FLEET_TASK_PROMPT={task_prompt}",
327
- "-e", f"FLEET_TASK_KEY={task_key}",
328
- "-e", f"SCREEN_WIDTH={self.config.screen_width}",
329
- "-e", f"SCREEN_HEIGHT={self.config.screen_height}",
330
- "-e", f"HEADLESS={headless}",
474
+ "docker",
475
+ "run",
476
+ "-d",
477
+ "--rm",
478
+ "-p",
479
+ f"{port}:8765",
480
+ "-e",
481
+ f"FLEET_ENV_URL={env_url}",
482
+ "-e",
483
+ f"FLEET_TASK_PROMPT={task_prompt}",
484
+ "-e",
485
+ f"FLEET_TASK_KEY={task_key}",
486
+ "-e",
487
+ f"SCREEN_WIDTH={self.config.screen_width}",
488
+ "-e",
489
+ f"SCREEN_HEIGHT={self.config.screen_height}",
490
+ "-e",
491
+ f"HEADLESS={headless}",
331
492
  ]
332
-
493
+
333
494
  # Add noVNC port mapping if headful
334
495
  if self.config.headful:
335
496
  cmd.extend(["-p", f"{vnc_port}:6080"])
336
-
497
+
337
498
  cmd.append(self._docker_image)
338
-
499
+
339
500
  proc = await asyncio.create_subprocess_exec(
340
501
  *cmd,
341
502
  stdout=asyncio.subprocess.PIPE,
342
503
  stderr=asyncio.subprocess.PIPE,
343
504
  )
344
505
  stdout, stderr = await proc.communicate()
345
-
506
+
346
507
  if proc.returncode != 0:
347
- raise RuntimeError(f"Container start failed: {stderr.decode()}")
348
-
349
- return stdout.decode().strip()
350
-
508
+ stderr_str = stderr.decode()
509
+ # Check for port conflict
510
+ if (
511
+ "port is already allocated" in stderr_str
512
+ or "address already in use" in stderr_str.lower()
513
+ ):
514
+ raise RuntimeError(
515
+ f"Port conflict on {port} or {vnc_port}. Try again or check for orphaned containers with: docker ps"
516
+ )
517
+ raise RuntimeError(f"Container start failed: {stderr_str}")
518
+
519
+ container_id = stdout.decode().strip()
520
+
521
+ # Track container globally for cleanup on exit
522
+ _running_containers.add(container_id)
523
+
524
+ return container_id
525
+
351
526
  async def _stop_container(self, container_id: str):
352
527
  """Stop Docker container and capture logs."""
353
528
  # Get logs before stopping
354
529
  log_proc = await asyncio.create_subprocess_exec(
355
- "docker", "logs", "--tail", "50", container_id,
530
+ "docker",
531
+ "logs",
532
+ "--tail",
533
+ "50",
534
+ container_id,
356
535
  stdout=asyncio.subprocess.PIPE,
357
536
  stderr=asyncio.subprocess.STDOUT,
358
537
  )
359
538
  logs, _ = await log_proc.communicate()
360
539
  if logs:
361
540
  logger.debug(f"Container {container_id[:12]} logs:\n{logs.decode()}")
362
-
541
+
363
542
  proc = await asyncio.create_subprocess_exec(
364
- "docker", "stop", container_id,
543
+ "docker",
544
+ "stop",
545
+ container_id,
365
546
  stdout=asyncio.subprocess.DEVNULL,
366
547
  stderr=asyncio.subprocess.DEVNULL,
367
548
  )
368
549
  await proc.wait()
369
-
550
+
551
+ # Remove from global tracking
552
+ _running_containers.discard(container_id)
553
+
370
554
  async def _wait_for_server(self, port: int, timeout: int = 60):
371
555
  """Wait for CUA server to be ready."""
372
556
  import aiohttp
373
-
557
+
374
558
  url = f"http://localhost:{port}/health"
375
559
  start = time.time()
376
-
560
+
377
561
  while time.time() - start < timeout:
378
562
  try:
379
563
  async with aiohttp.ClientSession() as session:
@@ -383,9 +567,9 @@ class AgentOrchestrator:
383
567
  except:
384
568
  pass
385
569
  await asyncio.sleep(1)
386
-
570
+
387
571
  raise TimeoutError(f"CUA server not ready after {timeout}s")
388
-
572
+
389
573
  async def _run_agent(
390
574
  self,
391
575
  port: int,
@@ -396,35 +580,40 @@ class AgentOrchestrator:
396
580
  """Run agent process."""
397
581
  agent_path = get_agent_path(self.config.agent)
398
582
  agent_script = agent_path / "agent.py"
399
-
583
+
400
584
  # Set up environment
401
585
  env = os.environ.copy()
402
-
586
+
403
587
  # Session log file: ~/.fleet/logs/{job_id}/{task_key}.jsonl
404
588
  session_log_file = self._log_dir / f"{task_key}.jsonl"
405
-
406
- env.update({
407
- "FLEET_MCP_URL": f"http://localhost:{port}",
408
- "FLEET_SESSION_LOG": str(session_log_file), # Unified session log (MCP + HTTP)
409
- "FLEET_JOB_ID": self._job_id,
410
- "FLEET_TASK_PROMPT": task_prompt,
411
- "FLEET_TASK_KEY": task_key,
412
- "FLEET_INSTANCE_ID": instance_id or "",
413
- "FLEET_MODEL": self.config.model,
414
- "FLEET_MAX_STEPS": str(self.config.max_steps),
415
- "FLEET_SCREEN_WIDTH": str(self.config.screen_width),
416
- "FLEET_SCREEN_HEIGHT": str(self.config.screen_height),
417
- "FLEET_VERBOSE": "true" if self.config.verbose else "false",
418
- })
589
+
590
+ env.update(
591
+ {
592
+ "FLEET_MCP_URL": f"http://localhost:{port}",
593
+ "FLEET_SESSION_LOG": str(
594
+ session_log_file
595
+ ), # Unified session log (MCP + HTTP)
596
+ "FLEET_JOB_ID": self._job_id,
597
+ "FLEET_TASK_PROMPT": task_prompt,
598
+ "FLEET_TASK_KEY": task_key,
599
+ "FLEET_INSTANCE_ID": instance_id or "",
600
+ "FLEET_MODEL": self.config.model,
601
+ "FLEET_MAX_STEPS": str(self.config.max_steps),
602
+ "FLEET_SCREEN_WIDTH": str(self.config.screen_width),
603
+ "FLEET_SCREEN_HEIGHT": str(self.config.screen_height),
604
+ "FLEET_VERBOSE": "true" if self.config.verbose else "false",
605
+ }
606
+ )
419
607
  env.update(self.config.api_keys)
420
-
608
+
421
609
  proc = await asyncio.create_subprocess_exec(
422
- sys.executable, str(agent_script),
610
+ sys.executable,
611
+ str(agent_script),
423
612
  stdout=asyncio.subprocess.PIPE,
424
613
  stderr=asyncio.subprocess.PIPE,
425
614
  env=env,
426
615
  )
427
-
616
+
428
617
  try:
429
618
  stdout, stderr = await asyncio.wait_for(
430
619
  proc.communicate(),
@@ -438,11 +627,11 @@ class AgentOrchestrator:
438
627
  completed=False,
439
628
  error="Agent timeout",
440
629
  )
441
-
630
+
442
631
  # Parse result from stdout/stderr
443
632
  stdout_str = stdout.decode()
444
633
  stderr_str = stderr.decode()
445
-
634
+
446
635
  # Show full output in verbose mode
447
636
  if self.config.verbose:
448
637
  logger.info(f"Agent stdout:\n{stdout_str}")
@@ -452,13 +641,13 @@ class AgentOrchestrator:
452
641
  logger.debug(f"Agent stdout: {stdout_str[:500]}")
453
642
  if stderr_str:
454
643
  logger.debug(f"Agent stderr: {stderr_str[:500]}")
455
-
644
+
456
645
  # Always show stderr if agent crashed (non-zero exit or has stderr)
457
646
  if proc.returncode != 0 or stderr_str:
458
647
  short_key = task_key[:20]
459
648
  if stderr_str:
460
649
  print(f"[{short_key}] Agent stderr: {stderr_str[:500]}")
461
-
650
+
462
651
  result_json = None
463
652
  for line in stdout_str.split("\n"):
464
653
  line = line.strip()
@@ -467,7 +656,7 @@ class AgentOrchestrator:
467
656
  result_json = json.loads(line)
468
657
  except:
469
658
  continue
470
-
659
+
471
660
  if result_json:
472
661
  return AgentResult(
473
662
  task_key=result_json.get("task_key", task_key),
@@ -479,12 +668,12 @@ class AgentOrchestrator:
479
668
  transcript=result_json.get("transcript", []),
480
669
  session_id=result_json.get("session_id"),
481
670
  )
482
-
671
+
483
672
  # Include stderr in error message
484
673
  error_msg = f"Agent failed. stdout: {stdout_str[:300]}"
485
674
  if stderr_str:
486
675
  error_msg += f" | stderr: {stderr_str[:300]}"
487
-
676
+
488
677
  return AgentResult(
489
678
  task_key=task_key,
490
679
  completed=False,
@@ -505,7 +694,7 @@ async def run_agent(
505
694
  verbose: bool = False,
506
695
  ) -> List[TaskResult]:
507
696
  """Run agent on Fleet tasks.
508
-
697
+
509
698
  Args:
510
699
  project_key: Fleet project to run on
511
700
  task_keys: Specific tasks (alternative to project_key)
@@ -517,7 +706,7 @@ async def run_agent(
517
706
  api_keys: API keys (e.g., {"GEMINI_API_KEY": "xxx"})
518
707
  headful: Show browser via noVNC
519
708
  verbose: Enable verbose agent logging
520
-
709
+
521
710
  Returns:
522
711
  List of TaskResult
523
712
  """
@@ -533,7 +722,6 @@ async def run_agent(
533
722
  timeout_seconds=timeout_seconds,
534
723
  api_keys=api_keys or {},
535
724
  )
536
-
725
+
537
726
  orchestrator = AgentOrchestrator(config)
538
727
  return await orchestrator.run()
539
-
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.88"
30
+ __version__ = "0.2.90"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -732,15 +732,6 @@ def eval_run(
732
732
  return
733
733
 
734
734
  client = get_client()
735
- base_url = os.getenv("FLEET_BASE_URL", CLI_DEFAULT_BASE_URL)
736
-
737
- # Generate a default name if not provided
738
- if not name:
739
- model_short = model[0].split("/")[-1].split("-")[0] if model else "eval"
740
- if project_key:
741
- name = f"{project_key}-{model_short}"
742
- else:
743
- name = f"all-tasks-{model_short}"
744
735
 
745
736
  # Parse BYOK keys
746
737
  byok_keys = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.88
3
+ Version: 0.2.90
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "fleet-python"
7
7
 
8
- version = "0.2.88"
8
+ version = "0.2.90"
9
9
  description = "Python SDK for Fleet environments"
10
10
  authors = [
11
11
  {name = "Fleet AI", email = "nic@fleet.so"},
File without changes
File without changes
File without changes