fleet-python 0.2.114__tar.gz → 0.2.116__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {fleet_python-0.2.114/fleet_python.egg-info → fleet_python-0.2.116}/PKG-INFO +1 -1
  2. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/__init__.py +3 -2
  3. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/base.py +1 -1
  5. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/client.py +0 -2
  6. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/judge.py +30 -1
  7. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/base.py +1 -1
  8. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/client.py +0 -2
  9. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/judge.py +336 -2
  10. {fleet_python-0.2.114 → fleet_python-0.2.116/fleet_python.egg-info}/PKG-INFO +1 -1
  11. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/SOURCES.txt +1 -0
  12. {fleet_python-0.2.114 → fleet_python-0.2.116}/pyproject.toml +1 -1
  13. fleet_python-0.2.116/tests/test_judge_criteria_markers.py +192 -0
  14. {fleet_python-0.2.114 → fleet_python-0.2.116}/LICENSE +0 -0
  15. {fleet_python-0.2.114 → fleet_python-0.2.116}/README.md +0 -0
  16. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/diff_example.py +0 -0
  17. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/dsl_example.py +0 -0
  18. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example.py +0 -0
  19. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/exampleResume.py +0 -0
  20. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_account.py +0 -0
  21. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_action_log.py +0 -0
  22. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_client.py +0 -0
  23. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_mcp_anthropic.py +0 -0
  24. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_mcp_openai.py +0 -0
  25. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_sync.py +0 -0
  26. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_task.py +0 -0
  27. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_tasks.py +0 -0
  28. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/example_verifier.py +0 -0
  29. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/export_tasks.py +0 -0
  30. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/export_tasks_filtered.py +0 -0
  31. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/fetch_tasks.py +0 -0
  32. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/gemini_example.py +0 -0
  33. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/import_tasks.py +0 -0
  34. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/iterate_verifiers.py +0 -0
  35. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/json_tasks_example.py +0 -0
  36. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/nova_act_example.py +0 -0
  37. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/openai_example.py +0 -0
  38. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/openai_simple_example.py +0 -0
  39. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/query_builder_example.py +0 -0
  40. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/quickstart.py +0 -0
  41. {fleet_python-0.2.114 → fleet_python-0.2.116}/examples/test_cdp_logging.py +0 -0
  42. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/env/__init__.py +0 -0
  43. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/env/client.py +0 -0
  44. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/exceptions.py +0 -0
  45. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/global_client.py +0 -0
  46. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/instance/__init__.py +0 -0
  47. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/instance/base.py +0 -0
  48. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/instance/client.py +0 -0
  49. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/models.py +0 -0
  50. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/__init__.py +0 -0
  51. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/api.py +0 -0
  52. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/base.py +0 -0
  53. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/browser.py +0 -0
  54. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/filesystem.py +0 -0
  55. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/mcp.py +0 -0
  56. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/resources/sqlite.py +0 -0
  57. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/tasks.py +0 -0
  58. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/verifiers/__init__.py +0 -0
  59. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/verifiers/bundler.py +0 -0
  60. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/verifiers/verifier.py +0 -0
  61. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/__init__.py +0 -0
  62. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/Dockerfile +0 -0
  63. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/__init__.py +0 -0
  64. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/agent.py +0 -0
  65. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/mcp/main.py +0 -0
  66. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/mcp_server/__init__.py +0 -0
  67. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/mcp_server/main.py +0 -0
  68. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/mcp_server/tools.py +0 -0
  69. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/requirements.txt +0 -0
  70. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/gemini_cua/start.sh +0 -0
  71. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/orchestrator.py +0 -0
  72. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/types.py +0 -0
  73. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/agent/utils.py +0 -0
  74. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/cli.py +0 -0
  75. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/config.py +0 -0
  76. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/env/__init__.py +0 -0
  77. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/env/client.py +0 -0
  78. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/eval/__init__.py +0 -0
  79. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/eval/uploader.py +0 -0
  80. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/exceptions.py +0 -0
  81. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/global_client.py +0 -0
  82. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/instance/__init__.py +0 -0
  83. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/instance/base.py +0 -0
  84. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/instance/client.py +0 -0
  85. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/instance/models.py +0 -0
  86. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/models.py +0 -0
  87. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/proxy/__init__.py +0 -0
  88. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/proxy/proxy.py +0 -0
  89. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/proxy/whitelist.py +0 -0
  90. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/__init__.py +0 -0
  91. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/api.py +0 -0
  92. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/base.py +0 -0
  93. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/browser.py +0 -0
  94. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/filesystem.py +0 -0
  95. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/mcp.py +0 -0
  96. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/resources/sqlite.py +0 -0
  97. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/tasks.py +0 -0
  98. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/types.py +0 -0
  99. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/utils/__init__.py +0 -0
  100. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/utils/http_logging.py +0 -0
  101. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/utils/logging.py +0 -0
  102. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/utils/playwright.py +0 -0
  103. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/__init__.py +0 -0
  104. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/bundler.py +0 -0
  105. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/code.py +0 -0
  106. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/db.py +0 -0
  107. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/decorator.py +0 -0
  108. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/parse.py +0 -0
  109. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/sql_differ.py +0 -0
  110. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/verifiers/verifier.py +0 -0
  111. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/dependency_links.txt +0 -0
  112. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/entry_points.txt +0 -0
  113. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/requires.txt +0 -0
  114. {fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/top_level.txt +0 -0
  115. {fleet_python-0.2.114 → fleet_python-0.2.116}/scripts/fix_sync_imports.py +0 -0
  116. {fleet_python-0.2.114 → fleet_python-0.2.116}/scripts/unasync.py +0 -0
  117. {fleet_python-0.2.114 → fleet_python-0.2.116}/setup.cfg +0 -0
  118. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/__init__.py +0 -0
  119. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_app_method.py +0 -0
  120. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_expect_exactly.py +0 -0
  121. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_expect_only.py +0 -0
  122. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_instance_dispatch.py +0 -0
  123. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_sqlite_resource_dual_mode.py +0 -0
  124. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  125. {fleet_python-0.2.114 → fleet_python-0.2.116}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.114
3
+ Version: 0.2.116
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -69,14 +69,14 @@ from .tasks import (
69
69
  from .types import VerifierFunction
70
70
 
71
71
  # Import judge data classes
72
- from .judge import Rubric, Criterion, Image, JudgeResult
72
+ from .judge import Rubric, Criterion, File, Image, JudgeResult
73
73
 
74
74
  # Create a module-level env attribute for convenient access
75
75
  from . import env
76
76
  from . import global_client as _global_client
77
77
  from ._async import global_client as _async_global_client
78
78
 
79
- __version__ = "0.2.114"
79
+ __version__ = "0.2.116"
80
80
 
81
81
  __all__ = [
82
82
  # Core classes
@@ -96,6 +96,7 @@ __all__ = [
96
96
  # Judge
97
97
  "Rubric",
98
98
  "Criterion",
99
+ "File",
99
100
  "Image",
100
101
  "JudgeResult",
101
102
  # Exceptions
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.114"
47
+ __version__ = "0.2.116"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.114"
29
+ __version__ = "0.2.116"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -601,7 +601,6 @@ class AsyncFleet:
601
601
  )
602
602
 
603
603
  instance = AsyncEnv(client=self.client, **response.json())
604
- await instance.instance.load()
605
604
  return instance
606
605
 
607
606
  async def make_for_task(self, task: Task) -> AsyncEnv:
@@ -653,7 +652,6 @@ class AsyncFleet:
653
652
  else:
654
653
  response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
655
654
  instance = AsyncEnv(client=self.client, **response.json())
656
- await instance.instance.load()
657
655
  return instance
658
656
 
659
657
  def _create_url_instance(self, base_url: str) -> AsyncEnv:
@@ -8,11 +8,14 @@ from typing import Dict, List, Optional, Union, TYPE_CHECKING
8
8
  # Import shared classes and helpers from the sync module
9
9
  from ..judge import (
10
10
  Criterion,
11
+ File,
11
12
  Image,
12
13
  JudgeResult,
13
14
  Rubric,
14
15
  _build_grade_request,
16
+ _collect_file_from_env_async,
15
17
  _collect_image_from_env_async,
18
+ _guess_file_media_type,
16
19
  _guess_media_type,
17
20
  _parse_grade_response,
18
21
  _print_judge_call_start,
@@ -25,6 +28,7 @@ if TYPE_CHECKING:
25
28
  __all__ = [
26
29
  "AsyncJudge",
27
30
  "Criterion",
31
+ "File",
28
32
  "Image",
29
33
  "JudgeResult",
30
34
  "Rubric",
@@ -52,6 +56,7 @@ class AsyncJudge:
52
56
  reference_claims: Optional[str] = None,
53
57
  conversation: Optional[List[dict]] = None,
54
58
  images: Optional[Dict[str, Image]] = None,
59
+ files: Optional[Dict[str, File]] = None,
55
60
  model: Optional[str] = None,
56
61
  provider: Optional[str] = None,
57
62
  agentic: bool = False,
@@ -72,6 +77,7 @@ class AsyncJudge:
72
77
  reference_claims: Reference analysis claims.
73
78
  conversation: Conversation history as list of message dicts.
74
79
  images: Named images for the judge (e.g., gold reference, agent output).
80
+ files: Named files for the judge (PDF, CSV, STEP, etc.).
75
81
  model: Override LLM model (server picks default if None).
76
82
  provider: Override LLM provider (server picks default if None).
77
83
  agentic: If True, the orchestrator collects artifacts from the instance.
@@ -101,6 +107,28 @@ class AsyncJudge:
101
107
  else:
102
108
  resolved_images[label] = img
103
109
 
110
+ # Resolve File.from_env files asynchronously before building request
111
+ resolved_files = files
112
+ if files and not agentic:
113
+ resolved_files = {}
114
+ for label, f in files.items():
115
+ if f.source == "env" and f._env is not None:
116
+ b64 = await _collect_file_from_env_async(f._env, f.filename)
117
+ if b64 is not None:
118
+ resolved_files[label] = File.from_base64(
119
+ b64,
120
+ f.filename or "file",
121
+ _guess_file_media_type(f.filename or "file"),
122
+ )
123
+ else:
124
+ # Async collection failed — use collect source directly
125
+ resolved_files[label] = File(
126
+ source="collect",
127
+ filename=f.filename,
128
+ )
129
+ else:
130
+ resolved_files[label] = f
131
+
104
132
  body = _build_grade_request(
105
133
  self._instance_id,
106
134
  rubric,
@@ -111,6 +139,7 @@ class AsyncJudge:
111
139
  reference_claims=reference_claims,
112
140
  conversation=conversation,
113
141
  images=resolved_images,
142
+ files=resolved_files,
114
143
  model=model,
115
144
  provider=provider,
116
145
  agentic=agentic,
@@ -118,6 +147,6 @@ class AsyncJudge:
118
147
  task_id=task_id,
119
148
  )
120
149
 
121
- _print_judge_call_start(rubric, resolved_images, agentic, model)
150
+ _print_judge_call_start(rubric, resolved_images, agentic, model, files=resolved_files)
122
151
  response = await self._client.request("POST", "/v1/judge/grade", json=body)
123
152
  return _parse_grade_response(response.json())
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.114"
30
+ __version__ = "0.2.116"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -613,7 +613,6 @@ class Fleet:
613
613
  )
614
614
 
615
615
  instance = SyncEnv(client=self.client, **response.json())
616
- instance.instance.load()
617
616
  return instance
618
617
 
619
618
  def make_for_task(self, task: Task) -> SyncEnv:
@@ -665,7 +664,6 @@ class Fleet:
665
664
  else:
666
665
  response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
667
666
  instance = SyncEnv(client=self.client, **response.json())
668
- instance.instance.load()
669
667
  return instance
670
668
 
671
669
  def _create_url_instance(self, base_url: str) -> SyncEnv:
@@ -38,6 +38,47 @@ def _guess_media_type(filename: str) -> str:
38
38
  }.get(ext, "image/png")
39
39
 
40
40
 
41
+ def _guess_file_media_type(filename: str) -> str:
42
+ """Guess media type from filename extension for arbitrary files.
43
+
44
+ Broader than _guess_media_type — covers documents, CAD, data formats, etc.
45
+ """
46
+ ext = filename.lower().rsplit(".", 1)[-1] if "." in filename else ""
47
+ return {
48
+ # Images
49
+ "png": "image/png",
50
+ "jpg": "image/jpeg",
51
+ "jpeg": "image/jpeg",
52
+ "gif": "image/gif",
53
+ "webp": "image/webp",
54
+ "svg": "image/svg+xml",
55
+ # Documents
56
+ "pdf": "application/pdf",
57
+ "txt": "text/plain",
58
+ "md": "text/markdown",
59
+ "html": "text/html",
60
+ "htm": "text/html",
61
+ "csv": "text/csv",
62
+ "tsv": "text/tab-separated-values",
63
+ # Data
64
+ "json": "application/json",
65
+ "xml": "application/xml",
66
+ "yaml": "application/x-yaml",
67
+ "yml": "application/x-yaml",
68
+ # CAD / Engineering
69
+ "step": "application/step",
70
+ "stp": "application/step",
71
+ "stl": "model/stl",
72
+ "iges": "model/iges",
73
+ "igs": "model/iges",
74
+ "obj": "model/obj",
75
+ # Archives
76
+ "zip": "application/zip",
77
+ "gz": "application/gzip",
78
+ "tar": "application/x-tar",
79
+ }.get(ext, "application/octet-stream")
80
+
81
+
41
82
  @dataclass
42
83
  class Criterion:
43
84
  """A single rubric criterion for grading.
@@ -199,6 +240,99 @@ class Image:
199
240
  return d
200
241
 
201
242
 
243
+ class File:
244
+ """Reference to an arbitrary file for LLM judge grading.
245
+
246
+ Supports any file type (PDF, CSV, STEP, STL, etc.) via the Anthropic
247
+ Files API. Use the static constructors to create instances:
248
+ File.s3("s3://bucket/key") - S3 URL, fetched server-side
249
+ File.from_base64(data, "part.step", "application/step") - Inline base64 data
250
+ File.from_env(env, "exported_part.step") - Collect from environment
251
+ """
252
+
253
+ def __init__(
254
+ self,
255
+ *,
256
+ source: str,
257
+ url: Optional[str] = None,
258
+ data: Optional[str] = None,
259
+ filename: Optional[str] = None,
260
+ media_type: Optional[str] = None,
261
+ _env: Optional[Any] = None,
262
+ ):
263
+ self.source = source
264
+ self.url = url
265
+ self.data = data
266
+ self.filename = filename
267
+ self.media_type = media_type
268
+ self._env = _env
269
+
270
+ @staticmethod
271
+ def s3(url: str, media_type: Optional[str] = None) -> "File":
272
+ """Reference a file in S3. The orchestrator fetches it server-side."""
273
+ return File(source="s3", url=url, media_type=media_type)
274
+
275
+ @staticmethod
276
+ def from_base64(
277
+ data: str, filename: str, media_type: Optional[str] = None
278
+ ) -> "File":
279
+ """Inline base64 file data."""
280
+ return File(
281
+ source="base64",
282
+ data=data,
283
+ filename=filename,
284
+ media_type=media_type or _guess_file_media_type(filename),
285
+ )
286
+
287
+ @staticmethod
288
+ def from_env(env: Any, filename: str) -> "File":
289
+ """Collect a file from the environment.
290
+
291
+ In non-agentic mode, the SDK collects the file client-side (DB -> filesystem)
292
+ and sends base64 to the orchestrator.
293
+
294
+ In agentic mode, only the filename hint is sent and the orchestrator collects it.
295
+ """
296
+ return File(source="env", filename=filename, _env=env)
297
+
298
+ def serialize(self, *, label: Optional[str] = None, agentic: bool = False) -> dict:
299
+ """Serialize for the orchestrator API request body."""
300
+ d: dict
301
+ if self.source == "s3":
302
+ d = {"source": "s3", "url": self.url}
303
+ if self.media_type:
304
+ d["media_type"] = self.media_type
305
+ elif self.source == "base64":
306
+ d = {
307
+ "source": "base64",
308
+ "data": self.data,
309
+ "filename": self.filename,
310
+ "media_type": self.media_type or _guess_file_media_type(self.filename or "file"),
311
+ }
312
+ elif self.source == "collect":
313
+ d = {"source": "collect", "selector": self.filename}
314
+ elif self.source == "env":
315
+ if agentic:
316
+ d = {"source": "collect", "selector": self.filename}
317
+ else:
318
+ b64 = _collect_file_from_env(self._env, self.filename)
319
+ if b64 is None:
320
+ d = {"source": "collect", "selector": self.filename}
321
+ else:
322
+ d = {
323
+ "source": "base64",
324
+ "data": b64,
325
+ "filename": self.filename,
326
+ "media_type": _guess_file_media_type(self.filename or "file"),
327
+ }
328
+ else:
329
+ raise ValueError(f"Unknown file source: {self.source}")
330
+
331
+ if label is not None:
332
+ d["label"] = label
333
+ return d
334
+
335
+
202
336
  class JudgeResult(float):
203
337
  """Float subclass that carries grading details.
204
338
 
@@ -412,6 +546,102 @@ async def _collect_image_from_env_async(env: Any, filename: str) -> Optional[str
412
546
  return None
413
547
 
414
548
 
549
+ def _collect_file_from_env(env: Any, filename: str) -> Optional[str]:
550
+ """Collect a file from the environment using DB -> filesystem strategies.
551
+
552
+ Similar to _collect_image_from_env but skips notebook cell output strategy
553
+ (which is image-specific). Returns base64-encoded file data, or None if not found.
554
+ """
555
+ # Strategy 1: DB files table
556
+ try:
557
+ current = env.db("current")
558
+ where = f"path = '{filename}' OR path LIKE '%/{filename}'"
559
+ rows = _extract_query_rows(
560
+ current.query(f"SELECT path, hex(content) AS content_hex FROM files WHERE {where}")
561
+ )
562
+ candidates = {}
563
+ for row in rows:
564
+ path, chex = row.get("path", ""), row.get("content_hex", "")
565
+ if path and chex:
566
+ try:
567
+ candidates[path] = bytes.fromhex(chex)
568
+ except Exception:
569
+ pass
570
+ # Prefer non-dataroom paths
571
+ non_dr = [p for p in candidates if not p.startswith("dataroom/")]
572
+ best = sorted(non_dr or list(candidates.keys()), key=len)
573
+ if best:
574
+ logger.debug("Loaded file from DB: %s", best[0])
575
+ return base64.b64encode(candidates[best[0]]).decode()
576
+ except Exception as e:
577
+ logger.debug("DB file query failed: %s", e)
578
+
579
+ # Strategy 2: Filesystem fallback
580
+ search_paths = [
581
+ filename,
582
+ f"/app/workspace/{filename}",
583
+ f"/workspace/{filename}",
584
+ ]
585
+ for fp in search_paths:
586
+ try:
587
+ if os.path.exists(fp):
588
+ with open(fp, "rb") as f:
589
+ logger.debug("Loaded file from filesystem: %s", fp)
590
+ return base64.b64encode(f.read()).decode()
591
+ except Exception:
592
+ pass
593
+
594
+ return None
595
+
596
+
597
+ async def _collect_file_from_env_async(env: Any, filename: str) -> Optional[str]:
598
+ """Async version of _collect_file_from_env.
599
+
600
+ Collects a file from an AsyncEnv using DB -> filesystem strategies.
601
+ Returns base64-encoded file data, or None if not found.
602
+ """
603
+ # Strategy 1: DB files table
604
+ try:
605
+ current = env.db("current")
606
+ where = f"path = '{filename}' OR path LIKE '%/{filename}'"
607
+ rows = _extract_query_rows(
608
+ await current.query(f"SELECT path, hex(content) AS content_hex FROM files WHERE {where}")
609
+ )
610
+ candidates = {}
611
+ for row in rows:
612
+ path, chex = row.get("path", ""), row.get("content_hex", "")
613
+ if path and chex:
614
+ try:
615
+ candidates[path] = bytes.fromhex(chex)
616
+ except Exception:
617
+ pass
618
+ # Prefer non-dataroom paths
619
+ non_dr = [p for p in candidates if not p.startswith("dataroom/")]
620
+ best = sorted(non_dr or list(candidates.keys()), key=len)
621
+ if best:
622
+ logger.debug("Loaded file from DB (async): %s", best[0])
623
+ return base64.b64encode(candidates[best[0]]).decode()
624
+ except Exception as e:
625
+ logger.debug("DB file query failed (async): %s", e)
626
+
627
+ # Strategy 2: Filesystem fallback
628
+ search_paths = [
629
+ filename,
630
+ f"/app/workspace/{filename}",
631
+ f"/workspace/{filename}",
632
+ ]
633
+ for fp in search_paths:
634
+ try:
635
+ if os.path.exists(fp):
636
+ with open(fp, "rb") as f:
637
+ logger.debug("Loaded file from filesystem (async): %s", fp)
638
+ return base64.b64encode(f.read()).decode()
639
+ except Exception:
640
+ pass
641
+
642
+ return None
643
+
644
+
415
645
  # ---------------------------------------------------------------------------
416
646
  # Accumulator printing (verifier protocol)
417
647
  # ---------------------------------------------------------------------------
@@ -447,6 +677,12 @@ def _print_accumulators(data: dict) -> None:
447
677
  print(json.dumps(golden_urls))
448
678
  print("<<< GOLDEN_URLS <<<")
449
679
 
680
+ agent_steps = acc.get("agent_steps")
681
+ if agent_steps:
682
+ print(">>> AGENT_STEPS >>>")
683
+ print(json.dumps(agent_steps))
684
+ print("<<< AGENT_STEPS <<<")
685
+
450
686
  timing = acc.get("timing")
451
687
  if timing:
452
688
  print(
@@ -466,6 +702,7 @@ def _print_judge_call_start(
466
702
  images: Optional[Dict[str, "Image"]],
467
703
  agentic: bool,
468
704
  model: Optional[str],
705
+ files: Optional[Dict[str, "File"]] = None,
469
706
  ) -> None:
470
707
  """Print info when initiating a judge grading call."""
471
708
  mode = "agentic" if agentic else "standard"
@@ -488,6 +725,18 @@ def _print_judge_call_start(
488
725
  else:
489
726
  print("[C] No images provided")
490
727
 
728
+ if files:
729
+ for label, f in files.items():
730
+ src = f.source
731
+ detail = ""
732
+ if f.url:
733
+ detail = f" url={f.url}"
734
+ elif f.filename:
735
+ detail = f" file={f.filename}"
736
+ if f.media_type:
737
+ detail += f" type={f.media_type}"
738
+ print(f"[C] File '{label}': source={src}{detail}")
739
+
491
740
 
492
741
  def _build_grade_request(
493
742
  instance_id: str,
@@ -500,6 +749,7 @@ def _build_grade_request(
500
749
  reference_claims: Optional[str] = None,
501
750
  conversation: Optional[List[dict]] = None,
502
751
  images: Optional[Dict[str, Image]] = None,
752
+ files: Optional[Dict[str, "File"]] = None,
503
753
  model: Optional[str] = None,
504
754
  provider: Optional[str] = None,
505
755
  agentic: bool = False,
@@ -554,6 +804,13 @@ def _build_grade_request(
554
804
  for label, img in images.items()
555
805
  ]
556
806
 
807
+ # Serialize files as labeled array
808
+ if files:
809
+ body["files"] = [
810
+ f.serialize(label=label, agentic=agentic)
811
+ for label, f in files.items()
812
+ ]
813
+
557
814
  return body
558
815
 
559
816
 
@@ -566,6 +823,54 @@ def _parse_grade_response(data: dict) -> JudgeResult:
566
823
  return JudgeResult(score, details=data)
567
824
 
568
825
 
826
+ def _print_criteria_markers(criteria: list) -> None:
827
+ """Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
828
+
829
+ The orchestrator (theseus PR #1967) scans verifier stdout for these
830
+ markers and wraps the execution result so the client (client PR #1737)
831
+ can render an expandable rubric breakdown.
832
+
833
+ Converts from the orchestrator judge-response format::
834
+
835
+ {"name": str, "score": int, "max_score": int, "reasoning": str}
836
+
837
+ to the client-expected marker format::
838
+
839
+ {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
840
+
841
+ Each criterion's score is normalised to a 0.0–1.0 float using its own
842
+ ``max_score``.
843
+ """
844
+ marker_criteria = []
845
+ for c in criteria:
846
+ name = c.get("name", "")
847
+ cscore = c.get("score", 0)
848
+ cmax = c.get("max_score", 0)
849
+
850
+ # Normalise per-criterion score to 0.0–1.0
851
+ if cmax and float(cmax) > 0:
852
+ norm_score = float(cscore) / float(cmax)
853
+ else:
854
+ norm_score = float(cscore)
855
+
856
+ entry: dict = {
857
+ "criteria": name,
858
+ "score": round(norm_score, 4),
859
+ "score_out_of": 1.0,
860
+ }
861
+
862
+ reasoning = c.get("reasoning", "")
863
+ if reasoning:
864
+ entry["description"] = reasoning
865
+
866
+ marker_criteria.append(entry)
867
+
868
+ if marker_criteria:
869
+ print(">>> CRITERIA >>>")
870
+ print(json.dumps(marker_criteria))
871
+ print("<<< CRITERIA <<<")
872
+
873
+
569
874
  def _print_judge_result(data: dict) -> None:
570
875
  """Print detailed judge grading result for verifier stdout capture."""
571
876
  model = data.get("model_used", "unknown")
@@ -591,6 +896,12 @@ def _print_judge_result(data: dict) -> None:
591
896
  if len(reasoning) > 200:
592
897
  reasoning = reasoning[:200] + "..."
593
898
  print(f"[C] {name}: {cscore}/{cmax} — {reasoning}")
899
+
900
+ # Emit structured criteria via stdout markers so the orchestrator
901
+ # (_extract_criteria_from_stdout) and client can render a rubric
902
+ # breakdown. Schema per element:
903
+ # {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
904
+ _print_criteria_markers(criteria)
594
905
  else:
595
906
  print(f"[C] Score: {normalized:.2f}")
596
907
 
@@ -605,6 +916,26 @@ def _print_judge_result(data: dict) -> None:
605
916
  for url in golden_urls:
606
917
  print(f"[C] Gold reference: {url}")
607
918
 
919
+ # Print agentic judge steps if present
920
+ agent_steps = (data.get("accumulators") or {}).get("agent_steps")
921
+ if agent_steps:
922
+ print(f"[C] Agentic judge: {len(agent_steps)} steps")
923
+ for step in agent_steps:
924
+ stype = step.get("type", "?")
925
+ if stype == "mcp_connect":
926
+ print(f"[C] MCP connected ({step.get('tools_available', '?')} tools)")
927
+ elif stype == "tool_call":
928
+ tool = step.get("tool", "?")
929
+ turn = step.get("turn", "?")
930
+ is_err = step.get("is_error", False)
931
+ result_preview = step.get("result", "")[:100]
932
+ status = "ERROR" if is_err else "ok"
933
+ print(f"[C] Turn {turn}: {tool}() → {status}: {result_preview}")
934
+ elif stype == "final_response":
935
+ print(f"[C] Turn {step.get('turn', '?')}: final response")
936
+ elif stype == "max_turns_reached":
937
+ print(f"[C] Max turns reached ({step.get('turns_used', '?')})")
938
+
608
939
 
609
940
  # ---------------------------------------------------------------------------
610
941
  # Sync judge
@@ -632,6 +963,7 @@ class SyncJudge:
632
963
  reference_claims: Optional[str] = None,
633
964
  conversation: Optional[List[dict]] = None,
634
965
  images: Optional[Dict[str, Image]] = None,
966
+ files: Optional[Dict[str, File]] = None,
635
967
  model: Optional[str] = None,
636
968
  provider: Optional[str] = None,
637
969
  agentic: bool = False,
@@ -651,7 +983,8 @@ class SyncJudge:
651
983
  context: Additional context for the judge.
652
984
  reference_claims: Reference analysis claims (folded into context).
653
985
  conversation: Conversation history as list of message dicts.
654
- images: List of Image objects for the judge.
986
+ images: Named Image objects for the judge.
987
+ files: Named File objects for the judge (PDF, CSV, STEP, etc.).
655
988
  model: Override LLM model (server picks default if None).
656
989
  provider: Override LLM provider (server picks default if None).
657
990
  agentic: If True, the orchestrator collects artifacts from the instance.
@@ -668,6 +1001,7 @@ class SyncJudge:
668
1001
  reference_claims=reference_claims,
669
1002
  conversation=conversation,
670
1003
  images=images,
1004
+ files=files,
671
1005
  model=model,
672
1006
  provider=provider,
673
1007
  agentic=agentic,
@@ -675,6 +1009,6 @@ class SyncJudge:
675
1009
  task_id=task_id,
676
1010
  )
677
1011
 
678
- _print_judge_call_start(rubric, images, agentic, model)
1012
+ _print_judge_call_start(rubric, images, agentic, model, files=files)
679
1013
  response = self._client.request("POST", "/v1/judge/grade", json=body)
680
1014
  return _parse_grade_response(response.json())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.114
3
+ Version: 0.2.116
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -117,6 +117,7 @@ tests/test_app_method.py
117
117
  tests/test_expect_exactly.py
118
118
  tests/test_expect_only.py
119
119
  tests/test_instance_dispatch.py
120
+ tests/test_judge_criteria_markers.py
120
121
  tests/test_sqlite_resource_dual_mode.py
121
122
  tests/test_sqlite_shared_memory_behavior.py
122
123
  tests/test_verifier_from_string.py
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "fleet-python"
7
7
 
8
- version = "0.2.114"
8
+ version = "0.2.116"
9
9
  description = "Python SDK for Fleet environments"
10
10
  authors = [
11
11
  {name = "Fleet AI", email = "nic@fleet.so"},