fleet-python 0.2.66b2__py3-none-any.whl → 0.2.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/export_tasks.py +16 -5
- examples/export_tasks_filtered.py +245 -0
- examples/fetch_tasks.py +230 -0
- examples/import_tasks.py +140 -8
- examples/iterate_verifiers.py +725 -0
- fleet/__init__.py +128 -5
- fleet/_async/__init__.py +27 -3
- fleet/_async/base.py +24 -9
- fleet/_async/client.py +938 -41
- fleet/_async/env/client.py +60 -3
- fleet/_async/instance/client.py +52 -7
- fleet/_async/models.py +15 -0
- fleet/_async/resources/api.py +200 -0
- fleet/_async/resources/sqlite.py +1801 -46
- fleet/_async/tasks.py +122 -25
- fleet/_async/verifiers/bundler.py +22 -21
- fleet/_async/verifiers/verifier.py +25 -19
- fleet/agent/__init__.py +32 -0
- fleet/agent/gemini_cua/Dockerfile +45 -0
- fleet/agent/gemini_cua/__init__.py +10 -0
- fleet/agent/gemini_cua/agent.py +759 -0
- fleet/agent/gemini_cua/mcp/main.py +108 -0
- fleet/agent/gemini_cua/mcp_server/__init__.py +5 -0
- fleet/agent/gemini_cua/mcp_server/main.py +105 -0
- fleet/agent/gemini_cua/mcp_server/tools.py +178 -0
- fleet/agent/gemini_cua/requirements.txt +5 -0
- fleet/agent/gemini_cua/start.sh +30 -0
- fleet/agent/orchestrator.py +854 -0
- fleet/agent/types.py +49 -0
- fleet/agent/utils.py +34 -0
- fleet/base.py +34 -9
- fleet/cli.py +1061 -0
- fleet/client.py +1060 -48
- fleet/config.py +1 -1
- fleet/env/__init__.py +16 -0
- fleet/env/client.py +60 -3
- fleet/eval/__init__.py +15 -0
- fleet/eval/uploader.py +231 -0
- fleet/exceptions.py +8 -0
- fleet/instance/client.py +53 -8
- fleet/instance/models.py +1 -0
- fleet/models.py +303 -0
- fleet/proxy/__init__.py +25 -0
- fleet/proxy/proxy.py +453 -0
- fleet/proxy/whitelist.py +244 -0
- fleet/resources/api.py +200 -0
- fleet/resources/sqlite.py +1845 -46
- fleet/tasks.py +113 -20
- fleet/utils/__init__.py +7 -0
- fleet/utils/http_logging.py +178 -0
- fleet/utils/logging.py +13 -0
- fleet/utils/playwright.py +440 -0
- fleet/verifiers/bundler.py +22 -21
- fleet/verifiers/db.py +985 -1
- fleet/verifiers/decorator.py +1 -1
- fleet/verifiers/verifier.py +25 -19
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/METADATA +28 -1
- fleet_python-0.2.105.dist-info/RECORD +115 -0
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/WHEEL +1 -1
- fleet_python-0.2.105.dist-info/entry_points.txt +2 -0
- tests/test_app_method.py +85 -0
- tests/test_expect_exactly.py +4148 -0
- tests/test_expect_only.py +2593 -0
- tests/test_instance_dispatch.py +607 -0
- tests/test_sqlite_resource_dual_mode.py +263 -0
- tests/test_sqlite_shared_memory_behavior.py +117 -0
- fleet_python-0.2.66b2.dist-info/RECORD +0 -81
- tests/test_verifier_security.py +0 -427
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/licenses/LICENSE +0 -0
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/top_level.txt +0 -0
fleet/_async/tasks.py
CHANGED
|
@@ -38,6 +38,12 @@ class Task(BaseModel):
|
|
|
38
38
|
metadata: Optional[Dict[str, Any]] = Field(
|
|
39
39
|
default_factory=dict, description="Additional task metadata"
|
|
40
40
|
)
|
|
41
|
+
writer_metadata: Optional[Dict[str, Any]] = Field(
|
|
42
|
+
None, description="Metadata filled by task writer"
|
|
43
|
+
)
|
|
44
|
+
qa_metadata: Optional[Dict[str, Any]] = Field(
|
|
45
|
+
None, description="Metadata filled by QA reviewer"
|
|
46
|
+
)
|
|
41
47
|
output_json_schema: Optional[Dict[str, Any]] = Field(
|
|
42
48
|
None, description="JSON schema for expected output format"
|
|
43
49
|
)
|
|
@@ -209,23 +215,33 @@ class Task(BaseModel):
|
|
|
209
215
|
)
|
|
210
216
|
self.verifier = verifier
|
|
211
217
|
|
|
212
|
-
async def make_env(
|
|
218
|
+
async def make_env(
|
|
219
|
+
self,
|
|
220
|
+
region: Optional[str] = None,
|
|
221
|
+
image_type: Optional[str] = None,
|
|
222
|
+
ttl_seconds: Optional[int] = None,
|
|
223
|
+
run_id: Optional[str] = None,
|
|
224
|
+
heartbeat_interval: Optional[int] = None,
|
|
225
|
+
):
|
|
213
226
|
"""Create an environment instance for this task's environment.
|
|
214
227
|
|
|
215
|
-
Uses the task's env_id (and version if present) to create the env.
|
|
228
|
+
Alias for make() method. Uses the task's env_id (and version if present) to create the env.
|
|
216
229
|
"""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
230
|
+
return await self.make(
|
|
231
|
+
region=region,
|
|
232
|
+
image_type=image_type,
|
|
233
|
+
ttl_seconds=ttl_seconds,
|
|
234
|
+
run_id=run_id,
|
|
235
|
+
heartbeat_interval=heartbeat_interval,
|
|
236
|
+
)
|
|
223
237
|
|
|
224
238
|
async def make(
|
|
225
239
|
self,
|
|
226
240
|
region: Optional[str] = None,
|
|
227
241
|
image_type: Optional[str] = None,
|
|
228
242
|
ttl_seconds: Optional[int] = None,
|
|
243
|
+
run_id: Optional[str] = None,
|
|
244
|
+
heartbeat_interval: Optional[int] = None,
|
|
229
245
|
):
|
|
230
246
|
"""Create an environment instance with task's configuration.
|
|
231
247
|
|
|
@@ -233,11 +249,15 @@ class Task(BaseModel):
|
|
|
233
249
|
- env_key (env_id + version)
|
|
234
250
|
- data_key (data_id + data_version, if present)
|
|
235
251
|
- env_variables (if present)
|
|
252
|
+
- run_id (if present)
|
|
253
|
+
- heartbeat_interval (if present)
|
|
236
254
|
|
|
237
255
|
Args:
|
|
238
256
|
region: Optional AWS region for the environment
|
|
239
257
|
image_type: Optional image type for the environment
|
|
240
258
|
ttl_seconds: Optional TTL in seconds for the instance
|
|
259
|
+
run_id: Optional run ID to group instances
|
|
260
|
+
heartbeat_interval: Optional heartbeat interval in seconds (30-3600)
|
|
241
261
|
|
|
242
262
|
Returns:
|
|
243
263
|
Environment instance configured for this task
|
|
@@ -245,7 +265,7 @@ class Task(BaseModel):
|
|
|
245
265
|
Example:
|
|
246
266
|
task = fleet.Task(key="my-task", prompt="...", env_id="my-env",
|
|
247
267
|
data_id="my-data", data_version="v1.0")
|
|
248
|
-
env = await task.make(region="us-west-2")
|
|
268
|
+
env = await task.make(region="us-west-2", run_id="my-batch-123", heartbeat_interval=60)
|
|
249
269
|
"""
|
|
250
270
|
if not self.env_id:
|
|
251
271
|
raise ValueError("Task has no env_id defined")
|
|
@@ -260,11 +280,13 @@ class Task(BaseModel):
|
|
|
260
280
|
env_variables=self.env_variables if self.env_variables else None,
|
|
261
281
|
image_type=image_type,
|
|
262
282
|
ttl_seconds=ttl_seconds,
|
|
283
|
+
run_id=run_id,
|
|
284
|
+
heartbeat_interval=heartbeat_interval,
|
|
263
285
|
)
|
|
264
286
|
|
|
265
287
|
|
|
266
288
|
def verifier_from_string(
|
|
267
|
-
verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = ""
|
|
289
|
+
verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = "", verifier_runtime_version: str = ""
|
|
268
290
|
) -> "VerifierFunction":
|
|
269
291
|
"""Create a verifier function from string code.
|
|
270
292
|
|
|
@@ -273,32 +295,87 @@ def verifier_from_string(
|
|
|
273
295
|
verifier_id: Unique identifier for the verifier
|
|
274
296
|
verifier_key: Key/name for the verifier
|
|
275
297
|
sha256: SHA256 hash of the verifier code
|
|
298
|
+
verifier_runtime_version: Verifier runtime version
|
|
276
299
|
|
|
277
300
|
Returns:
|
|
278
301
|
VerifierFunction instance that can be used to verify tasks
|
|
279
302
|
"""
|
|
280
303
|
try:
|
|
281
304
|
import inspect
|
|
305
|
+
import re
|
|
306
|
+
import json
|
|
307
|
+
import string
|
|
282
308
|
from .verifiers.verifier import AsyncVerifierFunction
|
|
283
309
|
from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
|
|
284
310
|
from fleet.verifiers.db import IgnoreConfig
|
|
285
|
-
from fleet.verifiers.parsing import parse_and_validate_verifier
|
|
286
311
|
|
|
287
|
-
#
|
|
288
|
-
#
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
#
|
|
292
|
-
|
|
312
|
+
# Strip @verifier decorator if present to avoid double-wrapping
|
|
313
|
+
# Remove lines like: @verifier(key="...")
|
|
314
|
+
cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
|
|
315
|
+
# Also remove the verifier import if present
|
|
316
|
+
# Use MULTILINE flag to match beginning of lines with ^
|
|
317
|
+
cleaned_code = re.sub(r"^from fleet\.verifiers.*import.*verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
|
|
318
|
+
cleaned_code = re.sub(r"^from fleet import verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
|
|
319
|
+
cleaned_code = re.sub(r"^import fleet\.verifiers.*$\n?", "", cleaned_code, flags=re.MULTILINE)
|
|
320
|
+
cleaned_code = re.sub(r"^import fleet$\n?", "", cleaned_code, flags=re.MULTILINE)
|
|
321
|
+
|
|
322
|
+
# Define helper functions for verifier execution
|
|
323
|
+
_TRANSLATOR = str.maketrans(string.punctuation, " " * len(string.punctuation))
|
|
324
|
+
|
|
325
|
+
def _normalize_text(value: str) -> str:
|
|
326
|
+
text = value.lower().translate(_TRANSLATOR)
|
|
327
|
+
return "".join(text.split())
|
|
328
|
+
|
|
329
|
+
def _stringify_content(content: Any) -> str:
|
|
330
|
+
if isinstance(content, (dict, list)):
|
|
331
|
+
return json.dumps(content, sort_keys=True)
|
|
332
|
+
return str(content)
|
|
333
|
+
|
|
334
|
+
def normalized_contains(target: str, blob: Any) -> bool:
|
|
335
|
+
normalized_target = _normalize_text(target)
|
|
336
|
+
normalized_blob = _normalize_text(_stringify_content(blob))
|
|
337
|
+
return normalized_target in normalized_blob
|
|
338
|
+
|
|
339
|
+
def extract_numbers(text: str) -> list:
|
|
340
|
+
cleaned_text = text.replace(',', '')
|
|
341
|
+
pattern = r'-?\d+\.?\d*'
|
|
342
|
+
matches = re.findall(pattern, cleaned_text)
|
|
343
|
+
return [float(num) for num in matches]
|
|
344
|
+
|
|
345
|
+
def contains_number(text: str, target_number) -> bool:
|
|
346
|
+
numbers = extract_numbers(text)
|
|
347
|
+
try:
|
|
348
|
+
if isinstance(target_number, str):
|
|
349
|
+
target_number = target_number.replace(',', '')
|
|
350
|
+
target = float(target_number)
|
|
351
|
+
except (ValueError, AttributeError):
|
|
352
|
+
return False
|
|
353
|
+
return target in numbers
|
|
354
|
+
|
|
355
|
+
# Create a globals namespace with all required imports
|
|
356
|
+
exec_globals = globals().copy()
|
|
357
|
+
exec_globals.update({
|
|
293
358
|
"TASK_SUCCESSFUL_SCORE": TASK_SUCCESSFUL_SCORE,
|
|
294
359
|
"TASK_FAILED_SCORE": TASK_FAILED_SCORE,
|
|
295
360
|
"IgnoreConfig": IgnoreConfig,
|
|
296
361
|
"Environment": object, # Add Environment type if needed
|
|
297
|
-
|
|
362
|
+
"normalized_contains": normalized_contains,
|
|
363
|
+
"extract_numbers": extract_numbers,
|
|
364
|
+
"contains_number": contains_number,
|
|
365
|
+
"json": json,
|
|
366
|
+
"re": re,
|
|
367
|
+
"string": string,
|
|
368
|
+
})
|
|
298
369
|
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
370
|
+
# Create a local namespace for executing the code
|
|
371
|
+
local_namespace = {}
|
|
372
|
+
|
|
373
|
+
# Execute the cleaned verifier code in the namespace
|
|
374
|
+
exec(cleaned_code, exec_globals, local_namespace)
|
|
375
|
+
|
|
376
|
+
# Merge local_namespace into exec_globals so helper functions are accessible
|
|
377
|
+
# from the main verifier function when it's called
|
|
378
|
+
exec_globals.update(local_namespace)
|
|
302
379
|
|
|
303
380
|
# Find the function that was defined (not imported)
|
|
304
381
|
# Functions defined via exec have co_filename == '<string>'
|
|
@@ -319,6 +396,7 @@ def verifier_from_string(
|
|
|
319
396
|
verifier_id=verifier_id,
|
|
320
397
|
sha256=sha256,
|
|
321
398
|
raw_code=verifier_func,
|
|
399
|
+
verifier_runtime_version=verifier_runtime_version if verifier_runtime_version else None,
|
|
322
400
|
)
|
|
323
401
|
|
|
324
402
|
return verifier_instance
|
|
@@ -384,7 +462,12 @@ async def load_tasks(
|
|
|
384
462
|
|
|
385
463
|
|
|
386
464
|
async def update_task(
|
|
387
|
-
task_key: str,
|
|
465
|
+
task_key: str,
|
|
466
|
+
prompt: Optional[str] = None,
|
|
467
|
+
verifier_code: Optional[str] = None,
|
|
468
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
469
|
+
writer_metadata: Optional[Dict[str, Any]] = None,
|
|
470
|
+
qa_metadata: Optional[Dict[str, Any]] = None,
|
|
388
471
|
):
|
|
389
472
|
"""Convenience function to update an existing task.
|
|
390
473
|
|
|
@@ -392,6 +475,9 @@ async def update_task(
|
|
|
392
475
|
task_key: The key of the task to update
|
|
393
476
|
prompt: New prompt text for the task (optional)
|
|
394
477
|
verifier_code: Python code for task verification (optional)
|
|
478
|
+
metadata: Additional metadata for the task (optional)
|
|
479
|
+
writer_metadata: Metadata filled by task writer (optional)
|
|
480
|
+
qa_metadata: Metadata filled by QA reviewer (optional)
|
|
395
481
|
|
|
396
482
|
Returns:
|
|
397
483
|
TaskResponse containing the updated task details
|
|
@@ -399,16 +485,25 @@ async def update_task(
|
|
|
399
485
|
Examples:
|
|
400
486
|
response = await fleet.update_task("my-task", prompt="New prompt text")
|
|
401
487
|
response = await fleet.update_task("my-task", verifier_code="def verify(env): return True")
|
|
488
|
+
response = await fleet.update_task("my-task", metadata={"seed": 42, "story": "Updated story"})
|
|
489
|
+
response = await fleet.update_task("my-task", writer_metadata={"author": "john"})
|
|
402
490
|
"""
|
|
403
491
|
from .global_client import get_client
|
|
404
492
|
|
|
405
493
|
client = get_client()
|
|
406
494
|
return await client.update_task(
|
|
407
|
-
task_key=task_key,
|
|
495
|
+
task_key=task_key,
|
|
496
|
+
prompt=prompt,
|
|
497
|
+
verifier_code=verifier_code,
|
|
498
|
+
metadata=metadata,
|
|
499
|
+
writer_metadata=writer_metadata,
|
|
500
|
+
qa_metadata=qa_metadata,
|
|
408
501
|
)
|
|
409
502
|
|
|
410
503
|
|
|
411
|
-
async def get_task(
|
|
504
|
+
async def get_task(
|
|
505
|
+
task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None
|
|
506
|
+
):
|
|
412
507
|
"""Convenience function to get a task by key and optional version.
|
|
413
508
|
|
|
414
509
|
Args:
|
|
@@ -427,7 +522,9 @@ async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Opt
|
|
|
427
522
|
from .global_client import get_client
|
|
428
523
|
|
|
429
524
|
client = get_client()
|
|
430
|
-
return await client.get_task(
|
|
525
|
+
return await client.get_task(
|
|
526
|
+
task_key=task_key, version_id=version_id, team_id=team_id
|
|
527
|
+
)
|
|
431
528
|
|
|
432
529
|
|
|
433
530
|
async def import_task(task: Task, project_key: Optional[str] = None):
|
|
@@ -37,7 +37,7 @@ class FunctionBundler:
|
|
|
37
37
|
) -> bytes:
|
|
38
38
|
"""Create a function bundle with statically extracted code."""
|
|
39
39
|
|
|
40
|
-
logger.info(f"Creating function bundle for {func.__name__}")
|
|
40
|
+
# logger.info(f"Creating function bundle for {func.__name__}")
|
|
41
41
|
|
|
42
42
|
# 1. Parse the main function and find dependencies
|
|
43
43
|
mod_file = Path(func.__code__.co_filename)
|
|
@@ -115,7 +115,7 @@ class FunctionBundler:
|
|
|
115
115
|
|
|
116
116
|
# Find function calls within the verifier function
|
|
117
117
|
called_functions = self._extract_function_calls(main_func_ast)
|
|
118
|
-
logger.debug(f"Functions called in verifier: {called_functions}")
|
|
118
|
+
# logger.debug(f"Functions called in verifier: {called_functions}")
|
|
119
119
|
|
|
120
120
|
# Find all functions defined in the module
|
|
121
121
|
module_functions = {}
|
|
@@ -128,7 +128,7 @@ class FunctionBundler:
|
|
|
128
128
|
for func_name in called_functions:
|
|
129
129
|
if func_name in module_functions and func_name != func.__name__:
|
|
130
130
|
same_module_deps.append(func_name)
|
|
131
|
-
logger.debug(f"Found same-module dependency: {func_name}")
|
|
131
|
+
# logger.debug(f"Found same-module dependency: {func_name}")
|
|
132
132
|
|
|
133
133
|
# Separate local and external imports
|
|
134
134
|
local_imports = {}
|
|
@@ -292,7 +292,7 @@ class FunctionBundler:
|
|
|
292
292
|
code = ast.unparse(node)
|
|
293
293
|
extracted_code.append(code)
|
|
294
294
|
except Exception as e:
|
|
295
|
-
logger.warning(f"Could not unparse AST node: {e}")
|
|
295
|
+
# logger.warning(f"Could not unparse AST node: {e}")
|
|
296
296
|
# Fallback to original source extraction
|
|
297
297
|
lines = content.split("\n")
|
|
298
298
|
start_line = node.lineno - 1
|
|
@@ -305,11 +305,11 @@ class FunctionBundler:
|
|
|
305
305
|
extracted_code.append(code)
|
|
306
306
|
|
|
307
307
|
result = "\n\n".join(extracted_code)
|
|
308
|
-
logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
|
|
308
|
+
# logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
|
|
309
309
|
return result
|
|
310
310
|
|
|
311
311
|
except Exception as e:
|
|
312
|
-
logger.warning(f"Failed to extract functions from {file_path}: {e}")
|
|
312
|
+
# logger.warning(f"Failed to extract functions from {file_path}: {e}")
|
|
313
313
|
# Fallback to including the entire file
|
|
314
314
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
315
315
|
return f.read()
|
|
@@ -464,14 +464,14 @@ class FunctionBundler:
|
|
|
464
464
|
version = dist.version # Get the installed version
|
|
465
465
|
package_with_version = f"{package_name}=={version}"
|
|
466
466
|
packages.add(package_with_version)
|
|
467
|
-
logger.debug(f"Mapped {mod} -> {package_with_version}")
|
|
467
|
+
# logger.debug(f"Mapped {mod} -> {package_with_version}")
|
|
468
468
|
except imd.PackageNotFoundError:
|
|
469
469
|
# Skip stdlib or local modules
|
|
470
|
-
logger.debug(f"Skipping {mod} (stdlib or local)")
|
|
470
|
+
# logger.debug(f"Skipping {mod} (stdlib or local)")
|
|
471
471
|
continue
|
|
472
472
|
|
|
473
473
|
package_list = list(packages)
|
|
474
|
-
logger.debug(f"Final package list: {package_list}")
|
|
474
|
+
# logger.debug(f"Final package list: {package_list}")
|
|
475
475
|
return package_list
|
|
476
476
|
|
|
477
477
|
def _merge_requirements(
|
|
@@ -511,10 +511,10 @@ class FunctionBundler:
|
|
|
511
511
|
if pkg_name not in seen_packages:
|
|
512
512
|
final_requirements.append(req)
|
|
513
513
|
seen_packages.add(pkg_name)
|
|
514
|
-
else:
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
514
|
+
# else:
|
|
515
|
+
# logger.debug(
|
|
516
|
+
# f"Skipping auto-detected {req}, using explicit version instead"
|
|
517
|
+
# )
|
|
518
518
|
|
|
519
519
|
# Always ensure fleet-python is included
|
|
520
520
|
if "fleet-python" not in seen_packages:
|
|
@@ -565,9 +565,9 @@ class FunctionBundler:
|
|
|
565
565
|
)
|
|
566
566
|
if dep_src:
|
|
567
567
|
same_module_code += f"\n{dep_src}\n"
|
|
568
|
-
logger.debug(
|
|
569
|
-
|
|
570
|
-
)
|
|
568
|
+
# logger.debug(
|
|
569
|
+
# f"Extracted same-module dependency: {dep_name}"
|
|
570
|
+
# )
|
|
571
571
|
|
|
572
572
|
# Create verifier.py with the main function
|
|
573
573
|
verifier_file = build_dir / "verifier.py"
|
|
@@ -586,7 +586,7 @@ class FunctionBundler:
|
|
|
586
586
|
{code}
|
|
587
587
|
"""
|
|
588
588
|
dest_path.write_text(extracted_content)
|
|
589
|
-
logger.debug(f"Created extracted file: {relative_path}")
|
|
589
|
+
# logger.debug(f"Created extracted file: {relative_path}")
|
|
590
590
|
|
|
591
591
|
# Ensure __init__.py files exist
|
|
592
592
|
self._ensure_init_files(Path(relative_path), build_dir)
|
|
@@ -595,7 +595,7 @@ class FunctionBundler:
|
|
|
595
595
|
return self._create_zip_bundle(build_dir)
|
|
596
596
|
|
|
597
597
|
except Exception as e:
|
|
598
|
-
logger.error(f"Failed to build function bundle: {e}")
|
|
598
|
+
# logger.error(f"Failed to build function bundle: {e}")
|
|
599
599
|
raise RuntimeError(f"Function bundle creation failed: {e}")
|
|
600
600
|
|
|
601
601
|
def _ensure_init_files(self, rel_path: Path, build_dir: Path):
|
|
@@ -607,7 +607,7 @@ class FunctionBundler:
|
|
|
607
607
|
if not init_file.exists():
|
|
608
608
|
init_file.parent.mkdir(parents=True, exist_ok=True)
|
|
609
609
|
init_file.write_text("# Auto-generated __init__.py")
|
|
610
|
-
logger.debug(f"Created __init__.py: {current}")
|
|
610
|
+
# logger.debug(f"Created __init__.py: {current}")
|
|
611
611
|
current = current.parent
|
|
612
612
|
|
|
613
613
|
def _create_zip_bundle(self, build_dir: Path) -> bytes:
|
|
@@ -621,7 +621,7 @@ class FunctionBundler:
|
|
|
621
621
|
zf.write(file_path, arcname)
|
|
622
622
|
|
|
623
623
|
bundle_size = len(zip_buffer.getvalue())
|
|
624
|
-
logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
|
|
624
|
+
# logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
|
|
625
625
|
return zip_buffer.getvalue()
|
|
626
626
|
|
|
627
627
|
def _extract_function_source(
|
|
@@ -662,7 +662,8 @@ class FunctionBundler:
|
|
|
662
662
|
return "\n".join(func_lines)
|
|
663
663
|
|
|
664
664
|
except Exception as e:
|
|
665
|
-
logger.warning(f"Failed to extract function {function_name}: {e}")
|
|
665
|
+
# logger.warning(f"Failed to extract function {function_name}: {e}")
|
|
666
|
+
pass
|
|
666
667
|
|
|
667
668
|
return None
|
|
668
669
|
|
|
@@ -42,6 +42,7 @@ class AsyncVerifierFunction:
|
|
|
42
42
|
verifier_id: Optional[str] = None,
|
|
43
43
|
sha256: Optional[str] = None,
|
|
44
44
|
raw_code: Optional[str] = None,
|
|
45
|
+
verifier_runtime_version: Optional[str] = None,
|
|
45
46
|
):
|
|
46
47
|
self.func = func
|
|
47
48
|
self.key = key
|
|
@@ -52,6 +53,7 @@ class AsyncVerifierFunction:
|
|
|
52
53
|
self._bundle_data: Optional[bytes] = None # Cached bundle data
|
|
53
54
|
self._raw_code: Optional[str] = raw_code # Store raw code if provided
|
|
54
55
|
self._is_async = asyncio.iscoroutinefunction(func)
|
|
56
|
+
self.verifier_runtime_version = verifier_runtime_version
|
|
55
57
|
|
|
56
58
|
# Copy function metadata
|
|
57
59
|
functools.update_wrapper(self, func)
|
|
@@ -79,9 +81,9 @@ class AsyncVerifierFunction:
|
|
|
79
81
|
|
|
80
82
|
self._bundle_data = zip_buffer.getvalue()
|
|
81
83
|
self._bundle_sha = _get_bundle_sha(self._bundle_data)
|
|
82
|
-
logger.debug(
|
|
83
|
-
|
|
84
|
-
)
|
|
84
|
+
# logger.debug(
|
|
85
|
+
# f"Created bundle from raw code for {self.key} with SHA: {self._bundle_sha}"
|
|
86
|
+
# )
|
|
85
87
|
else:
|
|
86
88
|
# Try to create bundle from function source
|
|
87
89
|
try:
|
|
@@ -89,9 +91,9 @@ class AsyncVerifierFunction:
|
|
|
89
91
|
self.func, self.extra_requirements, self.verifier_id
|
|
90
92
|
)
|
|
91
93
|
self._bundle_sha = _get_bundle_sha(self._bundle_data)
|
|
92
|
-
logger.debug(
|
|
93
|
-
|
|
94
|
-
)
|
|
94
|
+
# logger.debug(
|
|
95
|
+
# f"Created bundle for {self.key} with SHA: {self._bundle_sha}"
|
|
96
|
+
# )
|
|
95
97
|
except OSError as e:
|
|
96
98
|
# Can't create bundle - no source and no raw code
|
|
97
99
|
raise OSError(f"Cannot create bundle for {self.key}: {e}")
|
|
@@ -104,20 +106,21 @@ class AsyncVerifierFunction:
|
|
|
104
106
|
|
|
105
107
|
# If bundle_data is empty, we're using server-side bundle
|
|
106
108
|
if not bundle_data:
|
|
107
|
-
logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
|
|
109
|
+
# logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
|
|
108
110
|
return bundle_sha, False # No upload needed, server has it
|
|
109
111
|
|
|
110
112
|
# Always check if bundle exists on server
|
|
111
113
|
try:
|
|
112
114
|
exists = await env.check_bundle_exists(bundle_sha)
|
|
113
115
|
if exists.success:
|
|
114
|
-
logger.info(f"Bundle {bundle_sha[:8]}... found on server")
|
|
116
|
+
# logger.info(f"Bundle {bundle_sha[:8]}... found on server")
|
|
115
117
|
return bundle_sha, False # Found on server, no upload needed
|
|
116
118
|
except Exception as e:
|
|
117
|
-
logger.warning(f"Failed to check bundle existence: {e}")
|
|
119
|
+
# logger.warning(f"Failed to check bundle existence: {e}")
|
|
120
|
+
pass
|
|
118
121
|
|
|
119
122
|
# Bundle not found on server - upload needed
|
|
120
|
-
logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
|
|
123
|
+
# logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
|
|
121
124
|
return bundle_sha, True # Upload needed
|
|
122
125
|
|
|
123
126
|
async def __call__(self, env: AsyncEnv, *args, **kwargs) -> float:
|
|
@@ -147,7 +150,7 @@ class AsyncVerifierFunction:
|
|
|
147
150
|
)
|
|
148
151
|
|
|
149
152
|
except Exception as e:
|
|
150
|
-
logger.error(f"Error in verifier {self.key}: {e}")
|
|
153
|
+
# logger.error(f"Error in verifier {self.key}: {e}")
|
|
151
154
|
# Return error score 0
|
|
152
155
|
return 0.0
|
|
153
156
|
|
|
@@ -179,7 +182,7 @@ class AsyncVerifierFunction:
|
|
|
179
182
|
try:
|
|
180
183
|
return float(result)
|
|
181
184
|
except (ValueError, TypeError):
|
|
182
|
-
logger.warning(f"Could not convert result to float: {result}")
|
|
185
|
+
# logger.warning(f"Could not convert result to float: {result}")
|
|
183
186
|
return 0.0
|
|
184
187
|
|
|
185
188
|
def _raise_remote_error(self, error_info: Dict[str, Any]):
|
|
@@ -238,7 +241,7 @@ Remote traceback:
|
|
|
238
241
|
|
|
239
242
|
if needs_upload:
|
|
240
243
|
# Need to upload bundle to S3
|
|
241
|
-
logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
|
|
244
|
+
# logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
|
|
242
245
|
bundle_data, _ = self._get_or_create_bundle()
|
|
243
246
|
|
|
244
247
|
response = await env.execute_verifier_remote(
|
|
@@ -250,13 +253,14 @@ Remote traceback:
|
|
|
250
253
|
args_array=args_array,
|
|
251
254
|
kwargs=kwargs,
|
|
252
255
|
needs_upload=True,
|
|
256
|
+
verifier_runtime_version=self.verifier_runtime_version,
|
|
253
257
|
)
|
|
254
258
|
|
|
255
|
-
logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
|
|
259
|
+
# logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
|
|
256
260
|
|
|
257
261
|
else:
|
|
258
262
|
# Bundle already available - execute without upload
|
|
259
|
-
logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
|
|
263
|
+
# logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
|
|
260
264
|
response = await env.execute_verifier_remote(
|
|
261
265
|
bundle_data=b"", # Empty bundle since it's cached
|
|
262
266
|
bundle_sha=bundle_sha,
|
|
@@ -266,6 +270,7 @@ Remote traceback:
|
|
|
266
270
|
args_array=args_array,
|
|
267
271
|
kwargs=kwargs,
|
|
268
272
|
needs_upload=False,
|
|
273
|
+
verifier_runtime_version=self.verifier_runtime_version,
|
|
269
274
|
)
|
|
270
275
|
|
|
271
276
|
return response
|
|
@@ -273,9 +278,9 @@ Remote traceback:
|
|
|
273
278
|
except Exception as e:
|
|
274
279
|
# Check if error indicates bundle not found and retry with upload
|
|
275
280
|
if self._is_bundle_not_found_error(e) and not needs_upload:
|
|
276
|
-
logger.info(
|
|
277
|
-
|
|
278
|
-
)
|
|
281
|
+
# logger.info(
|
|
282
|
+
# f"Bundle {bundle_sha[:8]}... not found on server, uploading..."
|
|
283
|
+
# )
|
|
279
284
|
bundle_data, _ = self._get_or_create_bundle()
|
|
280
285
|
response = await env.execute_verifier_remote(
|
|
281
286
|
bundle_data=bundle_data,
|
|
@@ -286,10 +291,11 @@ Remote traceback:
|
|
|
286
291
|
args_array=args_array,
|
|
287
292
|
kwargs=kwargs,
|
|
288
293
|
needs_upload=True,
|
|
294
|
+
verifier_runtime_version=self.verifier_runtime_version,
|
|
289
295
|
)
|
|
290
296
|
return response
|
|
291
297
|
else:
|
|
292
|
-
logger.error(f"Error in remote execution of {self.key}: {e}")
|
|
298
|
+
# logger.error(f"Error in remote execution of {self.key}: {e}")
|
|
293
299
|
raise
|
|
294
300
|
|
|
295
301
|
|
fleet/agent/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Fleet Agent - Run agents locally with Docker-based browser control.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
# Via CLI
|
|
5
|
+
flt eval run -p my-project -m google/gemini-2.5-pro --local gemini_cua
|
|
6
|
+
|
|
7
|
+
# Via Python
|
|
8
|
+
from fleet.agent import run_agent
|
|
9
|
+
|
|
10
|
+
results = await run_agent(
|
|
11
|
+
project_key="my-project",
|
|
12
|
+
agent="gemini_cua",
|
|
13
|
+
api_keys={"GEMINI_API_KEY": "xxx"},
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .types import AgentConfig, AgentResult, TaskResult
|
|
18
|
+
from .utils import get_agent_path, AGENT_DIR
|
|
19
|
+
|
|
20
|
+
# Import these last to avoid circular imports
|
|
21
|
+
from .orchestrator import run_agent, AgentOrchestrator
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"AgentConfig",
|
|
25
|
+
"AgentResult",
|
|
26
|
+
"TaskResult",
|
|
27
|
+
"run_agent",
|
|
28
|
+
"AgentOrchestrator",
|
|
29
|
+
"get_agent_path",
|
|
30
|
+
"AGENT_DIR",
|
|
31
|
+
]
|
|
32
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# MCP Server - Browser control in Docker with optional VNC
|
|
2
|
+
FROM python:3.11-slim
|
|
3
|
+
|
|
4
|
+
# Install dependencies for Chromium and VNC
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
# Chromium dependencies
|
|
7
|
+
wget fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 \
|
|
8
|
+
libatspi2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
|
|
9
|
+
libnspr4 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxkbcommon0 \
|
|
10
|
+
libxrandr2 xdg-utils \
|
|
11
|
+
# VNC and display for headful mode
|
|
12
|
+
xvfb x11vnc fluxbox \
|
|
13
|
+
# noVNC for web-based viewing
|
|
14
|
+
novnc websockify \
|
|
15
|
+
# Utilities
|
|
16
|
+
procps net-tools \
|
|
17
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
18
|
+
|
|
19
|
+
WORKDIR /app
|
|
20
|
+
|
|
21
|
+
# Install Python deps (includes fleet-python for utils like fleet.utils.playwright)
|
|
22
|
+
COPY requirements.txt .
|
|
23
|
+
RUN pip install --no-cache-dir -r requirements.txt && playwright install chromium
|
|
24
|
+
|
|
25
|
+
# Copy MCP server files (standalone scripts that import from installed fleet-python)
|
|
26
|
+
COPY mcp_server/ ./mcp_server/
|
|
27
|
+
|
|
28
|
+
# Copy start script
|
|
29
|
+
COPY start.sh .
|
|
30
|
+
RUN chmod +x start.sh
|
|
31
|
+
|
|
32
|
+
# Environment
|
|
33
|
+
ENV PORT=8765 \
|
|
34
|
+
SCREEN_WIDTH=1366 \
|
|
35
|
+
SCREEN_HEIGHT=768 \
|
|
36
|
+
HEADLESS=true \
|
|
37
|
+
VNC_PORT=5900 \
|
|
38
|
+
NOVNC_PORT=6080 \
|
|
39
|
+
DISPLAY=:99
|
|
40
|
+
|
|
41
|
+
# Expose ports: MCP server, VNC, noVNC
|
|
42
|
+
EXPOSE 8765 5900 6080
|
|
43
|
+
|
|
44
|
+
# Start script handles display setup
|
|
45
|
+
CMD ["./start.sh"]
|