fleet-python 0.2.111__tar.gz → 0.2.113__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {fleet_python-0.2.111/fleet_python.egg-info → fleet_python-0.2.113}/PKG-INFO +1 -1
  2. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/__init__.py +9 -1
  3. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/base.py +1 -1
  5. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/client.py +14 -0
  6. fleet_python-0.2.113/fleet/_async/judge.py +121 -0
  7. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/base.py +1 -1
  8. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/client.py +14 -0
  9. fleet_python-0.2.113/fleet/judge.py +603 -0
  10. {fleet_python-0.2.111 → fleet_python-0.2.113/fleet_python.egg-info}/PKG-INFO +1 -1
  11. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet_python.egg-info/SOURCES.txt +2 -0
  12. {fleet_python-0.2.111 → fleet_python-0.2.113}/pyproject.toml +1 -1
  13. {fleet_python-0.2.111 → fleet_python-0.2.113}/LICENSE +0 -0
  14. {fleet_python-0.2.111 → fleet_python-0.2.113}/README.md +0 -0
  15. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/diff_example.py +0 -0
  16. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/dsl_example.py +0 -0
  17. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example.py +0 -0
  18. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/exampleResume.py +0 -0
  19. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_account.py +0 -0
  20. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_action_log.py +0 -0
  21. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_client.py +0 -0
  22. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_mcp_anthropic.py +0 -0
  23. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_mcp_openai.py +0 -0
  24. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_sync.py +0 -0
  25. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_task.py +0 -0
  26. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_tasks.py +0 -0
  27. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/example_verifier.py +0 -0
  28. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/export_tasks.py +0 -0
  29. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/export_tasks_filtered.py +0 -0
  30. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/fetch_tasks.py +0 -0
  31. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/gemini_example.py +0 -0
  32. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/import_tasks.py +0 -0
  33. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/iterate_verifiers.py +0 -0
  34. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/json_tasks_example.py +0 -0
  35. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/nova_act_example.py +0 -0
  36. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/openai_example.py +0 -0
  37. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/openai_simple_example.py +0 -0
  38. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/query_builder_example.py +0 -0
  39. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/quickstart.py +0 -0
  40. {fleet_python-0.2.111 → fleet_python-0.2.113}/examples/test_cdp_logging.py +0 -0
  41. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/env/__init__.py +0 -0
  42. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/env/client.py +0 -0
  43. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/exceptions.py +0 -0
  44. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/global_client.py +0 -0
  45. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/instance/__init__.py +0 -0
  46. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/instance/base.py +0 -0
  47. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/instance/client.py +0 -0
  48. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/models.py +0 -0
  49. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/__init__.py +0 -0
  50. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/api.py +0 -0
  51. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/base.py +0 -0
  52. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/browser.py +0 -0
  53. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/filesystem.py +0 -0
  54. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/mcp.py +0 -0
  55. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/resources/sqlite.py +0 -0
  56. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/tasks.py +0 -0
  57. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/verifiers/__init__.py +0 -0
  58. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/verifiers/bundler.py +0 -0
  59. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/verifiers/verifier.py +0 -0
  60. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/__init__.py +0 -0
  61. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/Dockerfile +0 -0
  62. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/__init__.py +0 -0
  63. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/agent.py +0 -0
  64. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/mcp/main.py +0 -0
  65. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/mcp_server/__init__.py +0 -0
  66. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/mcp_server/main.py +0 -0
  67. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/mcp_server/tools.py +0 -0
  68. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/requirements.txt +0 -0
  69. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/gemini_cua/start.sh +0 -0
  70. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/orchestrator.py +0 -0
  71. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/types.py +0 -0
  72. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/agent/utils.py +0 -0
  73. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/cli.py +0 -0
  74. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/config.py +0 -0
  75. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/env/__init__.py +0 -0
  76. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/env/client.py +0 -0
  77. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/eval/__init__.py +0 -0
  78. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/eval/uploader.py +0 -0
  79. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/exceptions.py +0 -0
  80. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/global_client.py +0 -0
  81. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/instance/__init__.py +0 -0
  82. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/instance/base.py +0 -0
  83. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/instance/client.py +0 -0
  84. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/instance/models.py +0 -0
  85. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/models.py +0 -0
  86. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/proxy/__init__.py +0 -0
  87. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/proxy/proxy.py +0 -0
  88. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/proxy/whitelist.py +0 -0
  89. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/__init__.py +0 -0
  90. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/api.py +0 -0
  91. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/base.py +0 -0
  92. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/browser.py +0 -0
  93. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/filesystem.py +0 -0
  94. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/mcp.py +0 -0
  95. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/resources/sqlite.py +0 -0
  96. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/tasks.py +0 -0
  97. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/types.py +0 -0
  98. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/utils/__init__.py +0 -0
  99. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/utils/http_logging.py +0 -0
  100. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/utils/logging.py +0 -0
  101. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/utils/playwright.py +0 -0
  102. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/__init__.py +0 -0
  103. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/bundler.py +0 -0
  104. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/code.py +0 -0
  105. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/db.py +0 -0
  106. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/decorator.py +0 -0
  107. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/parse.py +0 -0
  108. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/sql_differ.py +0 -0
  109. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/verifiers/verifier.py +0 -0
  110. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet_python.egg-info/dependency_links.txt +0 -0
  111. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet_python.egg-info/entry_points.txt +0 -0
  112. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet_python.egg-info/requires.txt +0 -0
  113. {fleet_python-0.2.111 → fleet_python-0.2.113}/fleet_python.egg-info/top_level.txt +0 -0
  114. {fleet_python-0.2.111 → fleet_python-0.2.113}/scripts/fix_sync_imports.py +0 -0
  115. {fleet_python-0.2.111 → fleet_python-0.2.113}/scripts/unasync.py +0 -0
  116. {fleet_python-0.2.111 → fleet_python-0.2.113}/setup.cfg +0 -0
  117. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/__init__.py +0 -0
  118. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_app_method.py +0 -0
  119. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_expect_exactly.py +0 -0
  120. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_expect_only.py +0 -0
  121. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_instance_dispatch.py +0 -0
  122. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_sqlite_resource_dual_mode.py +0 -0
  123. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  124. {fleet_python-0.2.111 → fleet_python-0.2.113}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.111
3
+ Version: 0.2.113
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -68,12 +68,15 @@ from .tasks import (
68
68
  # Import shared types
69
69
  from .types import VerifierFunction
70
70
 
71
+ # Import judge data classes
72
+ from .judge import Rubric, Criterion, Image, JudgeResult
73
+
71
74
  # Create a module-level env attribute for convenient access
72
75
  from . import env
73
76
  from . import global_client as _global_client
74
77
  from ._async import global_client as _async_global_client
75
78
 
76
- __version__ = "0.2.111"
79
+ __version__ = "0.2.113"
77
80
 
78
81
  __all__ = [
79
82
  # Core classes
@@ -90,6 +93,11 @@ __all__ = [
90
93
  # Task models
91
94
  "Task",
92
95
  "VerifierFunction",
96
+ # Judge
97
+ "Rubric",
98
+ "Criterion",
99
+ "Image",
100
+ "JudgeResult",
93
101
  # Exceptions
94
102
  "FleetError",
95
103
  "FleetAPIError",
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.111"
47
+ __version__ = "0.2.113"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.111"
29
+ __version__ = "0.2.113"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -54,6 +54,7 @@ from .tasks import Task
54
54
 
55
55
  if TYPE_CHECKING:
56
56
  from .verifiers import AsyncVerifierFunction
57
+ from .judge import AsyncJudge
57
58
 
58
59
 
59
60
  def _json_default(x: Any) -> Any:
@@ -344,6 +345,7 @@ class AsyncEnv(EnvironmentBase):
344
345
  self._client = client
345
346
  self._apps: Dict[str, AsyncInstanceClient] = {}
346
347
  self._instance: Optional[AsyncInstanceClient] = None
348
+ self._judge: Optional["AsyncJudge"] = None
347
349
 
348
350
  @property
349
351
  def instance(self) -> AsyncInstanceClient:
@@ -419,6 +421,18 @@ class AsyncEnv(EnvironmentBase):
419
421
  mcp_url = f"{self.urls.root}mcp"
420
422
  return AsyncMCPResource(url=mcp_url, env_key=self.env_key)
421
423
 
424
+ @property
425
+ def judge(self) -> "AsyncJudge":
426
+ """LLM-as-judge grading via orchestrator API."""
427
+ if self._judge is None:
428
+ from .judge import AsyncJudge
429
+
430
+ self._judge = AsyncJudge(
431
+ client=self._load_client,
432
+ instance_id=self.instance_id,
433
+ )
434
+ return self._judge
435
+
422
436
  def state(self, uri: str) -> Resource:
423
437
  return self.instance.state(uri)
424
438
 
@@ -0,0 +1,121 @@
1
+ """Fleet SDK Judge - Async version.
2
+
3
+ Provides env.judge.grade() for async verifier scripts.
4
+ """
5
+
6
+ from typing import Dict, List, Optional, Union, TYPE_CHECKING
7
+
8
+ # Import shared classes and helpers from the sync module
9
+ from ..judge import (
10
+ Criterion,
11
+ Image,
12
+ JudgeResult,
13
+ Rubric,
14
+ _build_grade_request,
15
+ _collect_image_from_env_async,
16
+ _guess_media_type,
17
+ _parse_grade_response,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from .base import AsyncWrapper
22
+
23
+ # Re-export data classes so `from fleet._async.judge import ...` works
24
+ __all__ = [
25
+ "AsyncJudge",
26
+ "Criterion",
27
+ "Image",
28
+ "JudgeResult",
29
+ "Rubric",
30
+ ]
31
+
32
+
33
+ class AsyncJudge:
34
+ """LLM-as-judge grading — calls orchestrator API, not environment API.
35
+
36
+ Accessed as env.judge on AsyncEnv instances.
37
+ """
38
+
39
+ def __init__(self, client: "AsyncWrapper", instance_id: str):
40
+ self._client = client
41
+ self._instance_id = instance_id
42
+
43
+ async def grade(
44
+ self,
45
+ rubric: Union[str, Rubric],
46
+ submission: Optional[str] = None,
47
+ *,
48
+ ground_truth: Optional[Union[str, dict]] = None,
49
+ problem: Optional[str] = None,
50
+ context: Optional[str] = None,
51
+ reference_claims: Optional[str] = None,
52
+ conversation: Optional[List[dict]] = None,
53
+ images: Optional[Dict[str, Image]] = None,
54
+ model: Optional[str] = None,
55
+ provider: Optional[str] = None,
56
+ agentic: bool = False,
57
+ collect: Optional[Dict[str, List[str]]] = None,
58
+ task_id: Optional[str] = None,
59
+ ) -> JudgeResult:
60
+ """Grade a submission using LLM-as-judge via the orchestrator API.
61
+
62
+ Returns a JudgeResult (float subclass with .details, .criteria, .feedback)
63
+ that can be returned directly from a verifier function.
64
+
65
+ Args:
66
+ rubric: Grading rubric — either a string or a structured Rubric object.
67
+ submission: The agent's final answer / submission text.
68
+ ground_truth: Expected answer (string or dict).
69
+ problem: The original problem statement.
70
+ context: Additional context for the judge.
71
+ reference_claims: Reference analysis claims.
72
+ conversation: Conversation history as list of message dicts.
73
+ images: Named images for the judge (e.g., gold reference, agent output).
74
+ model: Override LLM model (server picks default if None).
75
+ provider: Override LLM provider (server picks default if None).
76
+ agentic: If True, the orchestrator collects artifacts from the instance.
77
+ collect: File patterns for orchestrator to collect (agentic mode).
78
+ task_id: Optional task ID for tracking.
79
+ """
80
+ # Resolve Image.from_env images asynchronously before building request
81
+ resolved_images = images
82
+ if images and not agentic:
83
+ resolved_images = {}
84
+ for label, img in images.items():
85
+ if img.source == "env" and img._env is not None:
86
+ b64 = await _collect_image_from_env_async(img._env, img.filename)
87
+ if b64 is not None:
88
+ resolved_images[label] = Image.from_base64(
89
+ b64,
90
+ img.filename or "image.png",
91
+ _guess_media_type(img.filename or "image.png"),
92
+ )
93
+ else:
94
+ # Async collection failed — use collect source directly
95
+ # (don't keep the env image or serialize() will retry sync)
96
+ resolved_images[label] = Image(
97
+ source="collect",
98
+ filename=img.filename,
99
+ )
100
+ else:
101
+ resolved_images[label] = img
102
+
103
+ body = _build_grade_request(
104
+ self._instance_id,
105
+ rubric,
106
+ submission,
107
+ ground_truth=ground_truth,
108
+ problem=problem,
109
+ context=context,
110
+ reference_claims=reference_claims,
111
+ conversation=conversation,
112
+ images=resolved_images,
113
+ model=model,
114
+ provider=provider,
115
+ agentic=agentic,
116
+ collect=collect,
117
+ task_id=task_id,
118
+ )
119
+
120
+ response = await self._client.request("POST", "/v1/judge/grade", json=body)
121
+ return _parse_grade_response(response.json())
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.111"
30
+ __version__ = "0.2.113"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -59,6 +59,7 @@ from .tasks import Task
59
59
 
60
60
  if TYPE_CHECKING:
61
61
  from .verifiers import SyncVerifierFunction
62
+ from .judge import SyncJudge
62
63
 
63
64
 
64
65
  def _json_default(x: Any) -> Any:
@@ -348,6 +349,7 @@ class SyncEnv(EnvironmentBase):
348
349
  self._client = client
349
350
  self._apps: Dict[str, InstanceClient] = {}
350
351
  self._instance: Optional[InstanceClient] = None
352
+ self._judge: Optional["SyncJudge"] = None
351
353
  self._manager_url_override: Optional[str] = None # For URL mode
352
354
 
353
355
  @property
@@ -431,6 +433,18 @@ class SyncEnv(EnvironmentBase):
431
433
  mcp_url = f"{self.urls.root}mcp"
432
434
  return SyncMCPResource(url=mcp_url, env_key=self.env_key)
433
435
 
436
+ @property
437
+ def judge(self) -> "SyncJudge":
438
+ """LLM-as-judge grading via orchestrator API."""
439
+ if self._judge is None:
440
+ from .judge import SyncJudge
441
+
442
+ self._judge = SyncJudge(
443
+ client=self._load_client,
444
+ instance_id=self.instance_id,
445
+ )
446
+ return self._judge
447
+
434
448
  def state(self, uri: str) -> Resource:
435
449
  return self.instance.state(uri)
436
450