fleet-python 0.2.66b2__py3-none-any.whl → 0.2.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. examples/export_tasks.py +16 -5
  2. examples/export_tasks_filtered.py +245 -0
  3. examples/fetch_tasks.py +230 -0
  4. examples/import_tasks.py +140 -8
  5. examples/iterate_verifiers.py +725 -0
  6. fleet/__init__.py +128 -5
  7. fleet/_async/__init__.py +27 -3
  8. fleet/_async/base.py +24 -9
  9. fleet/_async/client.py +938 -41
  10. fleet/_async/env/client.py +60 -3
  11. fleet/_async/instance/client.py +52 -7
  12. fleet/_async/models.py +15 -0
  13. fleet/_async/resources/api.py +200 -0
  14. fleet/_async/resources/sqlite.py +1801 -46
  15. fleet/_async/tasks.py +122 -25
  16. fleet/_async/verifiers/bundler.py +22 -21
  17. fleet/_async/verifiers/verifier.py +25 -19
  18. fleet/agent/__init__.py +32 -0
  19. fleet/agent/gemini_cua/Dockerfile +45 -0
  20. fleet/agent/gemini_cua/__init__.py +10 -0
  21. fleet/agent/gemini_cua/agent.py +759 -0
  22. fleet/agent/gemini_cua/mcp/main.py +108 -0
  23. fleet/agent/gemini_cua/mcp_server/__init__.py +5 -0
  24. fleet/agent/gemini_cua/mcp_server/main.py +105 -0
  25. fleet/agent/gemini_cua/mcp_server/tools.py +178 -0
  26. fleet/agent/gemini_cua/requirements.txt +5 -0
  27. fleet/agent/gemini_cua/start.sh +30 -0
  28. fleet/agent/orchestrator.py +854 -0
  29. fleet/agent/types.py +49 -0
  30. fleet/agent/utils.py +34 -0
  31. fleet/base.py +34 -9
  32. fleet/cli.py +1061 -0
  33. fleet/client.py +1060 -48
  34. fleet/config.py +1 -1
  35. fleet/env/__init__.py +16 -0
  36. fleet/env/client.py +60 -3
  37. fleet/eval/__init__.py +15 -0
  38. fleet/eval/uploader.py +231 -0
  39. fleet/exceptions.py +8 -0
  40. fleet/instance/client.py +53 -8
  41. fleet/instance/models.py +1 -0
  42. fleet/models.py +303 -0
  43. fleet/proxy/__init__.py +25 -0
  44. fleet/proxy/proxy.py +453 -0
  45. fleet/proxy/whitelist.py +244 -0
  46. fleet/resources/api.py +200 -0
  47. fleet/resources/sqlite.py +1845 -46
  48. fleet/tasks.py +113 -20
  49. fleet/utils/__init__.py +7 -0
  50. fleet/utils/http_logging.py +178 -0
  51. fleet/utils/logging.py +13 -0
  52. fleet/utils/playwright.py +440 -0
  53. fleet/verifiers/bundler.py +22 -21
  54. fleet/verifiers/db.py +985 -1
  55. fleet/verifiers/decorator.py +1 -1
  56. fleet/verifiers/verifier.py +25 -19
  57. {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/METADATA +28 -1
  58. fleet_python-0.2.105.dist-info/RECORD +115 -0
  59. {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/WHEEL +1 -1
  60. fleet_python-0.2.105.dist-info/entry_points.txt +2 -0
  61. tests/test_app_method.py +85 -0
  62. tests/test_expect_exactly.py +4148 -0
  63. tests/test_expect_only.py +2593 -0
  64. tests/test_instance_dispatch.py +607 -0
  65. tests/test_sqlite_resource_dual_mode.py +263 -0
  66. tests/test_sqlite_shared_memory_behavior.py +117 -0
  67. fleet_python-0.2.66b2.dist-info/RECORD +0 -81
  68. tests/test_verifier_security.py +0 -427
  69. {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/licenses/LICENSE +0 -0
  70. {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/top_level.txt +0 -0
fleet/_async/tasks.py CHANGED
@@ -38,6 +38,12 @@ class Task(BaseModel):
38
38
  metadata: Optional[Dict[str, Any]] = Field(
39
39
  default_factory=dict, description="Additional task metadata"
40
40
  )
41
+ writer_metadata: Optional[Dict[str, Any]] = Field(
42
+ None, description="Metadata filled by task writer"
43
+ )
44
+ qa_metadata: Optional[Dict[str, Any]] = Field(
45
+ None, description="Metadata filled by QA reviewer"
46
+ )
41
47
  output_json_schema: Optional[Dict[str, Any]] = Field(
42
48
  None, description="JSON schema for expected output format"
43
49
  )
@@ -209,23 +215,33 @@ class Task(BaseModel):
209
215
  )
210
216
  self.verifier = verifier
211
217
 
212
- async def make_env(self, region: Optional[str] = None):
218
+ async def make_env(
219
+ self,
220
+ region: Optional[str] = None,
221
+ image_type: Optional[str] = None,
222
+ ttl_seconds: Optional[int] = None,
223
+ run_id: Optional[str] = None,
224
+ heartbeat_interval: Optional[int] = None,
225
+ ):
213
226
  """Create an environment instance for this task's environment.
214
227
 
215
- Uses the task's env_id (and version if present) to create the env.
228
+ Alias for make() method. Uses the task's env_id (and version if present) to create the env.
216
229
  """
217
- if not self.env_id:
218
- raise ValueError("Task has no env_id defined")
219
- # Deferred import to avoid circular dependencies
220
- from .client import AsyncFleet
221
-
222
- return await AsyncFleet().make(env_key=self.env_key, region=region)
230
+ return await self.make(
231
+ region=region,
232
+ image_type=image_type,
233
+ ttl_seconds=ttl_seconds,
234
+ run_id=run_id,
235
+ heartbeat_interval=heartbeat_interval,
236
+ )
223
237
 
224
238
  async def make(
225
239
  self,
226
240
  region: Optional[str] = None,
227
241
  image_type: Optional[str] = None,
228
242
  ttl_seconds: Optional[int] = None,
243
+ run_id: Optional[str] = None,
244
+ heartbeat_interval: Optional[int] = None,
229
245
  ):
230
246
  """Create an environment instance with task's configuration.
231
247
 
@@ -233,11 +249,15 @@ class Task(BaseModel):
233
249
  - env_key (env_id + version)
234
250
  - data_key (data_id + data_version, if present)
235
251
  - env_variables (if present)
252
+ - run_id (if present)
253
+ - heartbeat_interval (if present)
236
254
 
237
255
  Args:
238
256
  region: Optional AWS region for the environment
239
257
  image_type: Optional image type for the environment
240
258
  ttl_seconds: Optional TTL in seconds for the instance
259
+ run_id: Optional run ID to group instances
260
+ heartbeat_interval: Optional heartbeat interval in seconds (30-3600)
241
261
 
242
262
  Returns:
243
263
  Environment instance configured for this task
@@ -245,7 +265,7 @@ class Task(BaseModel):
245
265
  Example:
246
266
  task = fleet.Task(key="my-task", prompt="...", env_id="my-env",
247
267
  data_id="my-data", data_version="v1.0")
248
- env = await task.make(region="us-west-2")
268
+ env = await task.make(region="us-west-2", run_id="my-batch-123", heartbeat_interval=60)
249
269
  """
250
270
  if not self.env_id:
251
271
  raise ValueError("Task has no env_id defined")
@@ -260,11 +280,13 @@ class Task(BaseModel):
260
280
  env_variables=self.env_variables if self.env_variables else None,
261
281
  image_type=image_type,
262
282
  ttl_seconds=ttl_seconds,
283
+ run_id=run_id,
284
+ heartbeat_interval=heartbeat_interval,
263
285
  )
264
286
 
265
287
 
266
288
  def verifier_from_string(
267
- verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = ""
289
+ verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = "", verifier_runtime_version: str = ""
268
290
  ) -> "VerifierFunction":
269
291
  """Create a verifier function from string code.
270
292
 
@@ -273,32 +295,87 @@ def verifier_from_string(
273
295
  verifier_id: Unique identifier for the verifier
274
296
  verifier_key: Key/name for the verifier
275
297
  sha256: SHA256 hash of the verifier code
298
+ verifier_runtime_version: Verifier runtime version
276
299
 
277
300
  Returns:
278
301
  VerifierFunction instance that can be used to verify tasks
279
302
  """
280
303
  try:
281
304
  import inspect
305
+ import re
306
+ import json
307
+ import string
282
308
  from .verifiers.verifier import AsyncVerifierFunction
283
309
  from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
284
310
  from fleet.verifiers.db import IgnoreConfig
285
- from fleet.verifiers.parsing import parse_and_validate_verifier
286
311
 
287
- # Validate the code and extract function name
288
- # This ensures no arbitrary code execution during import
289
- func_name = parse_and_validate_verifier(verifier_func)
290
-
291
- # Create a local namespace for executing the code
292
- local_namespace = {
312
+ # Strip @verifier decorator if present to avoid double-wrapping
313
+ # Remove lines like: @verifier(key="...")
314
+ cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
315
+ # Also remove the verifier import if present
316
+ # Use MULTILINE flag to match beginning of lines with ^
317
+ cleaned_code = re.sub(r"^from fleet\.verifiers.*import.*verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
318
+ cleaned_code = re.sub(r"^from fleet import verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
319
+ cleaned_code = re.sub(r"^import fleet\.verifiers.*$\n?", "", cleaned_code, flags=re.MULTILINE)
320
+ cleaned_code = re.sub(r"^import fleet$\n?", "", cleaned_code, flags=re.MULTILINE)
321
+
322
+ # Define helper functions for verifier execution
323
+ _TRANSLATOR = str.maketrans(string.punctuation, " " * len(string.punctuation))
324
+
325
+ def _normalize_text(value: str) -> str:
326
+ text = value.lower().translate(_TRANSLATOR)
327
+ return "".join(text.split())
328
+
329
+ def _stringify_content(content: Any) -> str:
330
+ if isinstance(content, (dict, list)):
331
+ return json.dumps(content, sort_keys=True)
332
+ return str(content)
333
+
334
+ def normalized_contains(target: str, blob: Any) -> bool:
335
+ normalized_target = _normalize_text(target)
336
+ normalized_blob = _normalize_text(_stringify_content(blob))
337
+ return normalized_target in normalized_blob
338
+
339
+ def extract_numbers(text: str) -> list:
340
+ cleaned_text = text.replace(',', '')
341
+ pattern = r'-?\d+\.?\d*'
342
+ matches = re.findall(pattern, cleaned_text)
343
+ return [float(num) for num in matches]
344
+
345
+ def contains_number(text: str, target_number) -> bool:
346
+ numbers = extract_numbers(text)
347
+ try:
348
+ if isinstance(target_number, str):
349
+ target_number = target_number.replace(',', '')
350
+ target = float(target_number)
351
+ except (ValueError, AttributeError):
352
+ return False
353
+ return target in numbers
354
+
355
+ # Create a globals namespace with all required imports
356
+ exec_globals = globals().copy()
357
+ exec_globals.update({
293
358
  "TASK_SUCCESSFUL_SCORE": TASK_SUCCESSFUL_SCORE,
294
359
  "TASK_FAILED_SCORE": TASK_FAILED_SCORE,
295
360
  "IgnoreConfig": IgnoreConfig,
296
361
  "Environment": object, # Add Environment type if needed
297
- }
362
+ "normalized_contains": normalized_contains,
363
+ "extract_numbers": extract_numbers,
364
+ "contains_number": contains_number,
365
+ "json": json,
366
+ "re": re,
367
+ "string": string,
368
+ })
298
369
 
299
- # Execute the verifier code in the namespace
300
- # This is now safe because we validated it contains only declarative code
301
- exec(verifier_func, globals(), local_namespace)
370
+ # Create a local namespace for executing the code
371
+ local_namespace = {}
372
+
373
+ # Execute the cleaned verifier code in the namespace
374
+ exec(cleaned_code, exec_globals, local_namespace)
375
+
376
+ # Merge local_namespace into exec_globals so helper functions are accessible
377
+ # from the main verifier function when it's called
378
+ exec_globals.update(local_namespace)
302
379
 
303
380
  # Find the function that was defined (not imported)
304
381
  # Functions defined via exec have co_filename == '<string>'
@@ -319,6 +396,7 @@ def verifier_from_string(
319
396
  verifier_id=verifier_id,
320
397
  sha256=sha256,
321
398
  raw_code=verifier_func,
399
+ verifier_runtime_version=verifier_runtime_version if verifier_runtime_version else None,
322
400
  )
323
401
 
324
402
  return verifier_instance
@@ -384,7 +462,12 @@ async def load_tasks(
384
462
 
385
463
 
386
464
  async def update_task(
387
- task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None
465
+ task_key: str,
466
+ prompt: Optional[str] = None,
467
+ verifier_code: Optional[str] = None,
468
+ metadata: Optional[Dict[str, Any]] = None,
469
+ writer_metadata: Optional[Dict[str, Any]] = None,
470
+ qa_metadata: Optional[Dict[str, Any]] = None,
388
471
  ):
389
472
  """Convenience function to update an existing task.
390
473
 
@@ -392,6 +475,9 @@ async def update_task(
392
475
  task_key: The key of the task to update
393
476
  prompt: New prompt text for the task (optional)
394
477
  verifier_code: Python code for task verification (optional)
478
+ metadata: Additional metadata for the task (optional)
479
+ writer_metadata: Metadata filled by task writer (optional)
480
+ qa_metadata: Metadata filled by QA reviewer (optional)
395
481
 
396
482
  Returns:
397
483
  TaskResponse containing the updated task details
@@ -399,16 +485,25 @@ async def update_task(
399
485
  Examples:
400
486
  response = await fleet.update_task("my-task", prompt="New prompt text")
401
487
  response = await fleet.update_task("my-task", verifier_code="def verify(env): return True")
488
+ response = await fleet.update_task("my-task", metadata={"seed": 42, "story": "Updated story"})
489
+ response = await fleet.update_task("my-task", writer_metadata={"author": "john"})
402
490
  """
403
491
  from .global_client import get_client
404
492
 
405
493
  client = get_client()
406
494
  return await client.update_task(
407
- task_key=task_key, prompt=prompt, verifier_code=verifier_code
495
+ task_key=task_key,
496
+ prompt=prompt,
497
+ verifier_code=verifier_code,
498
+ metadata=metadata,
499
+ writer_metadata=writer_metadata,
500
+ qa_metadata=qa_metadata,
408
501
  )
409
502
 
410
503
 
411
- async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None):
504
+ async def get_task(
505
+ task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None
506
+ ):
412
507
  """Convenience function to get a task by key and optional version.
413
508
 
414
509
  Args:
@@ -427,7 +522,9 @@ async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Opt
427
522
  from .global_client import get_client
428
523
 
429
524
  client = get_client()
430
- return await client.get_task(task_key=task_key, version_id=version_id, team_id=team_id)
525
+ return await client.get_task(
526
+ task_key=task_key, version_id=version_id, team_id=team_id
527
+ )
431
528
 
432
529
 
433
530
  async def import_task(task: Task, project_key: Optional[str] = None):
@@ -37,7 +37,7 @@ class FunctionBundler:
37
37
  ) -> bytes:
38
38
  """Create a function bundle with statically extracted code."""
39
39
 
40
- logger.info(f"Creating function bundle for {func.__name__}")
40
+ # logger.info(f"Creating function bundle for {func.__name__}")
41
41
 
42
42
  # 1. Parse the main function and find dependencies
43
43
  mod_file = Path(func.__code__.co_filename)
@@ -115,7 +115,7 @@ class FunctionBundler:
115
115
 
116
116
  # Find function calls within the verifier function
117
117
  called_functions = self._extract_function_calls(main_func_ast)
118
- logger.debug(f"Functions called in verifier: {called_functions}")
118
+ # logger.debug(f"Functions called in verifier: {called_functions}")
119
119
 
120
120
  # Find all functions defined in the module
121
121
  module_functions = {}
@@ -128,7 +128,7 @@ class FunctionBundler:
128
128
  for func_name in called_functions:
129
129
  if func_name in module_functions and func_name != func.__name__:
130
130
  same_module_deps.append(func_name)
131
- logger.debug(f"Found same-module dependency: {func_name}")
131
+ # logger.debug(f"Found same-module dependency: {func_name}")
132
132
 
133
133
  # Separate local and external imports
134
134
  local_imports = {}
@@ -292,7 +292,7 @@ class FunctionBundler:
292
292
  code = ast.unparse(node)
293
293
  extracted_code.append(code)
294
294
  except Exception as e:
295
- logger.warning(f"Could not unparse AST node: {e}")
295
+ # logger.warning(f"Could not unparse AST node: {e}")
296
296
  # Fallback to original source extraction
297
297
  lines = content.split("\n")
298
298
  start_line = node.lineno - 1
@@ -305,11 +305,11 @@ class FunctionBundler:
305
305
  extracted_code.append(code)
306
306
 
307
307
  result = "\n\n".join(extracted_code)
308
- logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
308
+ # logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
309
309
  return result
310
310
 
311
311
  except Exception as e:
312
- logger.warning(f"Failed to extract functions from {file_path}: {e}")
312
+ # logger.warning(f"Failed to extract functions from {file_path}: {e}")
313
313
  # Fallback to including the entire file
314
314
  with open(file_path, "r", encoding="utf-8") as f:
315
315
  return f.read()
@@ -464,14 +464,14 @@ class FunctionBundler:
464
464
  version = dist.version # Get the installed version
465
465
  package_with_version = f"{package_name}=={version}"
466
466
  packages.add(package_with_version)
467
- logger.debug(f"Mapped {mod} -> {package_with_version}")
467
+ # logger.debug(f"Mapped {mod} -> {package_with_version}")
468
468
  except imd.PackageNotFoundError:
469
469
  # Skip stdlib or local modules
470
- logger.debug(f"Skipping {mod} (stdlib or local)")
470
+ # logger.debug(f"Skipping {mod} (stdlib or local)")
471
471
  continue
472
472
 
473
473
  package_list = list(packages)
474
- logger.debug(f"Final package list: {package_list}")
474
+ # logger.debug(f"Final package list: {package_list}")
475
475
  return package_list
476
476
 
477
477
  def _merge_requirements(
@@ -511,10 +511,10 @@ class FunctionBundler:
511
511
  if pkg_name not in seen_packages:
512
512
  final_requirements.append(req)
513
513
  seen_packages.add(pkg_name)
514
- else:
515
- logger.debug(
516
- f"Skipping auto-detected {req}, using explicit version instead"
517
- )
514
+ # else:
515
+ # logger.debug(
516
+ # f"Skipping auto-detected {req}, using explicit version instead"
517
+ # )
518
518
 
519
519
  # Always ensure fleet-python is included
520
520
  if "fleet-python" not in seen_packages:
@@ -565,9 +565,9 @@ class FunctionBundler:
565
565
  )
566
566
  if dep_src:
567
567
  same_module_code += f"\n{dep_src}\n"
568
- logger.debug(
569
- f"Extracted same-module dependency: {dep_name}"
570
- )
568
+ # logger.debug(
569
+ # f"Extracted same-module dependency: {dep_name}"
570
+ # )
571
571
 
572
572
  # Create verifier.py with the main function
573
573
  verifier_file = build_dir / "verifier.py"
@@ -586,7 +586,7 @@ class FunctionBundler:
586
586
  {code}
587
587
  """
588
588
  dest_path.write_text(extracted_content)
589
- logger.debug(f"Created extracted file: {relative_path}")
589
+ # logger.debug(f"Created extracted file: {relative_path}")
590
590
 
591
591
  # Ensure __init__.py files exist
592
592
  self._ensure_init_files(Path(relative_path), build_dir)
@@ -595,7 +595,7 @@ class FunctionBundler:
595
595
  return self._create_zip_bundle(build_dir)
596
596
 
597
597
  except Exception as e:
598
- logger.error(f"Failed to build function bundle: {e}")
598
+ # logger.error(f"Failed to build function bundle: {e}")
599
599
  raise RuntimeError(f"Function bundle creation failed: {e}")
600
600
 
601
601
  def _ensure_init_files(self, rel_path: Path, build_dir: Path):
@@ -607,7 +607,7 @@ class FunctionBundler:
607
607
  if not init_file.exists():
608
608
  init_file.parent.mkdir(parents=True, exist_ok=True)
609
609
  init_file.write_text("# Auto-generated __init__.py")
610
- logger.debug(f"Created __init__.py: {current}")
610
+ # logger.debug(f"Created __init__.py: {current}")
611
611
  current = current.parent
612
612
 
613
613
  def _create_zip_bundle(self, build_dir: Path) -> bytes:
@@ -621,7 +621,7 @@ class FunctionBundler:
621
621
  zf.write(file_path, arcname)
622
622
 
623
623
  bundle_size = len(zip_buffer.getvalue())
624
- logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
624
+ # logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
625
625
  return zip_buffer.getvalue()
626
626
 
627
627
  def _extract_function_source(
@@ -662,7 +662,8 @@ class FunctionBundler:
662
662
  return "\n".join(func_lines)
663
663
 
664
664
  except Exception as e:
665
- logger.warning(f"Failed to extract function {function_name}: {e}")
665
+ # logger.warning(f"Failed to extract function {function_name}: {e}")
666
+ pass
666
667
 
667
668
  return None
668
669
 
@@ -42,6 +42,7 @@ class AsyncVerifierFunction:
42
42
  verifier_id: Optional[str] = None,
43
43
  sha256: Optional[str] = None,
44
44
  raw_code: Optional[str] = None,
45
+ verifier_runtime_version: Optional[str] = None,
45
46
  ):
46
47
  self.func = func
47
48
  self.key = key
@@ -52,6 +53,7 @@ class AsyncVerifierFunction:
52
53
  self._bundle_data: Optional[bytes] = None # Cached bundle data
53
54
  self._raw_code: Optional[str] = raw_code # Store raw code if provided
54
55
  self._is_async = asyncio.iscoroutinefunction(func)
56
+ self.verifier_runtime_version = verifier_runtime_version
55
57
 
56
58
  # Copy function metadata
57
59
  functools.update_wrapper(self, func)
@@ -79,9 +81,9 @@ class AsyncVerifierFunction:
79
81
 
80
82
  self._bundle_data = zip_buffer.getvalue()
81
83
  self._bundle_sha = _get_bundle_sha(self._bundle_data)
82
- logger.debug(
83
- f"Created bundle from raw code for {self.key} with SHA: {self._bundle_sha}"
84
- )
84
+ # logger.debug(
85
+ # f"Created bundle from raw code for {self.key} with SHA: {self._bundle_sha}"
86
+ # )
85
87
  else:
86
88
  # Try to create bundle from function source
87
89
  try:
@@ -89,9 +91,9 @@ class AsyncVerifierFunction:
89
91
  self.func, self.extra_requirements, self.verifier_id
90
92
  )
91
93
  self._bundle_sha = _get_bundle_sha(self._bundle_data)
92
- logger.debug(
93
- f"Created bundle for {self.key} with SHA: {self._bundle_sha}"
94
- )
94
+ # logger.debug(
95
+ # f"Created bundle for {self.key} with SHA: {self._bundle_sha}"
96
+ # )
95
97
  except OSError as e:
96
98
  # Can't create bundle - no source and no raw code
97
99
  raise OSError(f"Cannot create bundle for {self.key}: {e}")
@@ -104,20 +106,21 @@ class AsyncVerifierFunction:
104
106
 
105
107
  # If bundle_data is empty, we're using server-side bundle
106
108
  if not bundle_data:
107
- logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
109
+ # logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
108
110
  return bundle_sha, False # No upload needed, server has it
109
111
 
110
112
  # Always check if bundle exists on server
111
113
  try:
112
114
  exists = await env.check_bundle_exists(bundle_sha)
113
115
  if exists.success:
114
- logger.info(f"Bundle {bundle_sha[:8]}... found on server")
116
+ # logger.info(f"Bundle {bundle_sha[:8]}... found on server")
115
117
  return bundle_sha, False # Found on server, no upload needed
116
118
  except Exception as e:
117
- logger.warning(f"Failed to check bundle existence: {e}")
119
+ # logger.warning(f"Failed to check bundle existence: {e}")
120
+ pass
118
121
 
119
122
  # Bundle not found on server - upload needed
120
- logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
123
+ # logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
121
124
  return bundle_sha, True # Upload needed
122
125
 
123
126
  async def __call__(self, env: AsyncEnv, *args, **kwargs) -> float:
@@ -147,7 +150,7 @@ class AsyncVerifierFunction:
147
150
  )
148
151
 
149
152
  except Exception as e:
150
- logger.error(f"Error in verifier {self.key}: {e}")
153
+ # logger.error(f"Error in verifier {self.key}: {e}")
151
154
  # Return error score 0
152
155
  return 0.0
153
156
 
@@ -179,7 +182,7 @@ class AsyncVerifierFunction:
179
182
  try:
180
183
  return float(result)
181
184
  except (ValueError, TypeError):
182
- logger.warning(f"Could not convert result to float: {result}")
185
+ # logger.warning(f"Could not convert result to float: {result}")
183
186
  return 0.0
184
187
 
185
188
  def _raise_remote_error(self, error_info: Dict[str, Any]):
@@ -238,7 +241,7 @@ Remote traceback:
238
241
 
239
242
  if needs_upload:
240
243
  # Need to upload bundle to S3
241
- logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
244
+ # logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
242
245
  bundle_data, _ = self._get_or_create_bundle()
243
246
 
244
247
  response = await env.execute_verifier_remote(
@@ -250,13 +253,14 @@ Remote traceback:
250
253
  args_array=args_array,
251
254
  kwargs=kwargs,
252
255
  needs_upload=True,
256
+ verifier_runtime_version=self.verifier_runtime_version,
253
257
  )
254
258
 
255
- logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
259
+ # logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
256
260
 
257
261
  else:
258
262
  # Bundle already available - execute without upload
259
- logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
263
+ # logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
260
264
  response = await env.execute_verifier_remote(
261
265
  bundle_data=b"", # Empty bundle since it's cached
262
266
  bundle_sha=bundle_sha,
@@ -266,6 +270,7 @@ Remote traceback:
266
270
  args_array=args_array,
267
271
  kwargs=kwargs,
268
272
  needs_upload=False,
273
+ verifier_runtime_version=self.verifier_runtime_version,
269
274
  )
270
275
 
271
276
  return response
@@ -273,9 +278,9 @@ Remote traceback:
273
278
  except Exception as e:
274
279
  # Check if error indicates bundle not found and retry with upload
275
280
  if self._is_bundle_not_found_error(e) and not needs_upload:
276
- logger.info(
277
- f"Bundle {bundle_sha[:8]}... not found on server, uploading..."
278
- )
281
+ # logger.info(
282
+ # f"Bundle {bundle_sha[:8]}... not found on server, uploading..."
283
+ # )
279
284
  bundle_data, _ = self._get_or_create_bundle()
280
285
  response = await env.execute_verifier_remote(
281
286
  bundle_data=bundle_data,
@@ -286,10 +291,11 @@ Remote traceback:
286
291
  args_array=args_array,
287
292
  kwargs=kwargs,
288
293
  needs_upload=True,
294
+ verifier_runtime_version=self.verifier_runtime_version,
289
295
  )
290
296
  return response
291
297
  else:
292
- logger.error(f"Error in remote execution of {self.key}: {e}")
298
+ # logger.error(f"Error in remote execution of {self.key}: {e}")
293
299
  raise
294
300
 
295
301
 
@@ -0,0 +1,32 @@
1
+ """Fleet Agent - Run agents locally with Docker-based browser control.
2
+
3
+ Usage:
4
+ # Via CLI
5
+ flt eval run -p my-project -m google/gemini-2.5-pro --local gemini_cua
6
+
7
+ # Via Python
8
+ from fleet.agent import run_agent
9
+
10
+ results = await run_agent(
11
+ project_key="my-project",
12
+ agent="gemini_cua",
13
+ api_keys={"GEMINI_API_KEY": "xxx"},
14
+ )
15
+ """
16
+
17
+ from .types import AgentConfig, AgentResult, TaskResult
18
+ from .utils import get_agent_path, AGENT_DIR
19
+
20
+ # Import these last to avoid circular imports
21
+ from .orchestrator import run_agent, AgentOrchestrator
22
+
23
+ __all__ = [
24
+ "AgentConfig",
25
+ "AgentResult",
26
+ "TaskResult",
27
+ "run_agent",
28
+ "AgentOrchestrator",
29
+ "get_agent_path",
30
+ "AGENT_DIR",
31
+ ]
32
+
@@ -0,0 +1,45 @@
1
+ # MCP Server - Browser control in Docker with optional VNC
2
+ FROM python:3.11-slim
3
+
4
+ # Install dependencies for Chromium and VNC
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ # Chromium dependencies
7
+ wget fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 \
8
+ libatspi2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
9
+ libnspr4 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxkbcommon0 \
10
+ libxrandr2 xdg-utils \
11
+ # VNC and display for headful mode
12
+ xvfb x11vnc fluxbox \
13
+ # noVNC for web-based viewing
14
+ novnc websockify \
15
+ # Utilities
16
+ procps net-tools \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ WORKDIR /app
20
+
21
+ # Install Python deps (includes fleet-python for utils like fleet.utils.playwright)
22
+ COPY requirements.txt .
23
+ RUN pip install --no-cache-dir -r requirements.txt && playwright install chromium
24
+
25
+ # Copy MCP server files (standalone scripts that import from installed fleet-python)
26
+ COPY mcp_server/ ./mcp_server/
27
+
28
+ # Copy start script
29
+ COPY start.sh .
30
+ RUN chmod +x start.sh
31
+
32
+ # Environment
33
+ ENV PORT=8765 \
34
+ SCREEN_WIDTH=1366 \
35
+ SCREEN_HEIGHT=768 \
36
+ HEADLESS=true \
37
+ VNC_PORT=5900 \
38
+ NOVNC_PORT=6080 \
39
+ DISPLAY=:99
40
+
41
+ # Expose ports: MCP server, VNC, noVNC
42
+ EXPOSE 8765 5900 6080
43
+
44
+ # Start script handles display setup
45
+ CMD ["./start.sh"]
@@ -0,0 +1,10 @@
1
+ """Gemini Computer Use Agent.
2
+
3
+ - agent.py: Runs on HOST, calls Gemini API
4
+ - cua_server.py: Runs in Docker, controls browser via Playwright
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ AGENT_DIR = Path(__file__).parent
10
+