fleet-python 0.2.66b3__tar.gz → 0.2.67__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fleet-python might be problematic. Click here for more details.
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/PKG-INFO +1 -1
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/export_tasks.py +11 -1
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/import_tasks.py +15 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/client.py +4 -2
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/models.py +3 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/tasks.py +29 -19
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/client.py +4 -2
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/models.py +3 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/tasks.py +24 -18
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet_python.egg-info/PKG-INFO +1 -1
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet_python.egg-info/SOURCES.txt +1 -3
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/pyproject.toml +1 -1
- fleet_python-0.2.66b3/fleet/verifiers/parsing.py +0 -106
- fleet_python-0.2.66b3/tests/test_verifier_security.py +0 -427
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/LICENSE +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/README.md +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/diff_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_account.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_sync.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_task.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/openai_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/quickstart.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/config.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/env/client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/global_client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/types.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/db.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/scripts/unasync.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/setup.cfg +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/tests/__init__.py +0 -0
- {fleet_python-0.2.66b3 → fleet_python-0.2.67}/tests/test_verifier_from_string.py +0 -0
|
@@ -27,6 +27,12 @@ def main():
|
|
|
27
27
|
help="Optional task project key to filter tasks",
|
|
28
28
|
default=None,
|
|
29
29
|
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--env-key",
|
|
32
|
+
"-e",
|
|
33
|
+
help="Optional environment key to filter tasks",
|
|
34
|
+
default=None,
|
|
35
|
+
)
|
|
30
36
|
parser.add_argument(
|
|
31
37
|
"--output",
|
|
32
38
|
"-o",
|
|
@@ -42,12 +48,13 @@ def main():
|
|
|
42
48
|
args.project_key is not None,
|
|
43
49
|
args.task_keys is not None,
|
|
44
50
|
args.task_project_key is not None,
|
|
51
|
+
args.env_key is not None,
|
|
45
52
|
]
|
|
46
53
|
)
|
|
47
54
|
|
|
48
55
|
if filters_specified > 1:
|
|
49
56
|
parser.error(
|
|
50
|
-
"Cannot specify multiple filters. Use only one of --project-key, --task-keys,
|
|
57
|
+
"Cannot specify multiple filters. Use only one of --project-key, --task-keys, --task-project-key, or --env-key."
|
|
51
58
|
)
|
|
52
59
|
|
|
53
60
|
# Get account info
|
|
@@ -66,6 +73,9 @@ def main():
|
|
|
66
73
|
elif args.task_project_key:
|
|
67
74
|
print(f"Loading tasks from task project: {args.task_project_key}")
|
|
68
75
|
tasks = fleet.load_tasks(task_project_key=args.task_project_key)
|
|
76
|
+
elif args.env_key:
|
|
77
|
+
print(f"Loading tasks from environment: {args.env_key}")
|
|
78
|
+
tasks = fleet.load_tasks(env_key=args.env_key)
|
|
69
79
|
else:
|
|
70
80
|
print("Loading all tasks")
|
|
71
81
|
tasks = fleet.load_tasks()
|
|
@@ -210,9 +210,18 @@ async def main():
|
|
|
210
210
|
action="store_true",
|
|
211
211
|
help="Skip the verifier sanity check (not recommended)",
|
|
212
212
|
)
|
|
213
|
+
parser.add_argument(
|
|
214
|
+
"--sanity-check-only",
|
|
215
|
+
action="store_true",
|
|
216
|
+
help="Only run the sanity check without importing tasks",
|
|
217
|
+
)
|
|
213
218
|
|
|
214
219
|
args = parser.parse_args()
|
|
215
220
|
|
|
221
|
+
# Validate conflicting flags
|
|
222
|
+
if args.skip_sanity_check and args.sanity_check_only:
|
|
223
|
+
parser.error("Cannot use --skip-sanity-check and --sanity-check-only together")
|
|
224
|
+
|
|
216
225
|
# Load and parse the JSON file
|
|
217
226
|
try:
|
|
218
227
|
with open(args.json_file, "r", encoding="utf-8") as f:
|
|
@@ -286,6 +295,12 @@ async def main():
|
|
|
286
295
|
success, errors = await run_verifier_sanity_check(tasks, client)
|
|
287
296
|
if not success:
|
|
288
297
|
sys.exit(1)
|
|
298
|
+
|
|
299
|
+
# If only doing sanity check, exit successfully here
|
|
300
|
+
if args.sanity_check_only:
|
|
301
|
+
print("\n✓ Sanity check complete! (--sanity-check-only)")
|
|
302
|
+
print("Tasks are ready to import.")
|
|
303
|
+
sys.exit(0)
|
|
289
304
|
else:
|
|
290
305
|
print("\n⚠️ Skipping sanity check (--skip-sanity-check)")
|
|
291
306
|
|
|
@@ -553,7 +553,7 @@ class AsyncFleet:
|
|
|
553
553
|
env_variables=task_response.env_variables or {},
|
|
554
554
|
verifier_func=verifier_func, # Set verifier code
|
|
555
555
|
verifier=verifier, # Use created verifier or None
|
|
556
|
-
metadata={},
|
|
556
|
+
metadata=task_response.metadata or {},
|
|
557
557
|
output_json_schema=getattr(task_response, "output_json_schema", None), # Get output_json_schema if available
|
|
558
558
|
)
|
|
559
559
|
tasks.append(task)
|
|
@@ -708,6 +708,7 @@ class AsyncFleet:
|
|
|
708
708
|
task_key: str,
|
|
709
709
|
prompt: Optional[str] = None,
|
|
710
710
|
verifier_code: Optional[str] = None,
|
|
711
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
711
712
|
) -> TaskResponse:
|
|
712
713
|
"""Update an existing task.
|
|
713
714
|
|
|
@@ -715,11 +716,12 @@ class AsyncFleet:
|
|
|
715
716
|
task_key: The key of the task to update
|
|
716
717
|
prompt: New prompt text for the task (optional)
|
|
717
718
|
verifier_code: Python code for task verification (optional)
|
|
719
|
+
metadata: Additional metadata for the task (optional)
|
|
718
720
|
|
|
719
721
|
Returns:
|
|
720
722
|
TaskResponse containing the updated task details
|
|
721
723
|
"""
|
|
722
|
-
payload = TaskUpdateRequest(prompt=prompt, verifier_code=verifier_code)
|
|
724
|
+
payload = TaskUpdateRequest(prompt=prompt, verifier_code=verifier_code, metadata=metadata)
|
|
723
725
|
response = await self.client.request(
|
|
724
726
|
"PUT", f"/v1/tasks/{task_key}", json=payload.model_dump(exclude_none=True)
|
|
725
727
|
)
|
|
@@ -155,12 +155,14 @@ class TaskRequest(BaseModel):
|
|
|
155
155
|
verifier_id: Optional[str] = Field(None, title="Verifier Id")
|
|
156
156
|
version: Optional[str] = Field(None, title="Version")
|
|
157
157
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
158
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
158
159
|
output_json_schema: Optional[Dict[str, Any]] = Field(None, title="Output Json Schema")
|
|
159
160
|
|
|
160
161
|
|
|
161
162
|
class TaskUpdateRequest(BaseModel):
|
|
162
163
|
prompt: Optional[str] = Field(None, title="Prompt")
|
|
163
164
|
verifier_code: Optional[str] = Field(None, title="Verifier Code")
|
|
165
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
164
166
|
|
|
165
167
|
|
|
166
168
|
class VerifierData(BaseModel):
|
|
@@ -186,6 +188,7 @@ class TaskResponse(BaseModel):
|
|
|
186
188
|
data_version: Optional[str] = Field(None, title="Data Version")
|
|
187
189
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
188
190
|
verifier: Optional[VerifierData] = Field(None, title="Verifier")
|
|
191
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
189
192
|
output_json_schema: Optional[Dict[str, Any]] = Field(None, title="Output Json Schema")
|
|
190
193
|
|
|
191
194
|
|
|
@@ -209,17 +209,19 @@ class Task(BaseModel):
|
|
|
209
209
|
)
|
|
210
210
|
self.verifier = verifier
|
|
211
211
|
|
|
212
|
-
async def make_env(
|
|
212
|
+
async def make_env(
|
|
213
|
+
self,
|
|
214
|
+
region: Optional[str] = None,
|
|
215
|
+
image_type: Optional[str] = None,
|
|
216
|
+
ttl_seconds: Optional[int] = None,
|
|
217
|
+
):
|
|
213
218
|
"""Create an environment instance for this task's environment.
|
|
214
219
|
|
|
215
|
-
Uses the task's env_id (and version if present) to create the env.
|
|
220
|
+
Alias for make() method. Uses the task's env_id (and version if present) to create the env.
|
|
216
221
|
"""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
from .client import AsyncFleet
|
|
221
|
-
|
|
222
|
-
return await AsyncFleet().make(env_key=self.env_key, region=region)
|
|
222
|
+
return await self.make(
|
|
223
|
+
region=region, image_type=image_type, ttl_seconds=ttl_seconds
|
|
224
|
+
)
|
|
223
225
|
|
|
224
226
|
async def make(
|
|
225
227
|
self,
|
|
@@ -279,14 +281,17 @@ def verifier_from_string(
|
|
|
279
281
|
"""
|
|
280
282
|
try:
|
|
281
283
|
import inspect
|
|
284
|
+
import re
|
|
282
285
|
from .verifiers.verifier import AsyncVerifierFunction
|
|
283
286
|
from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
|
|
284
287
|
from fleet.verifiers.db import IgnoreConfig
|
|
285
|
-
from fleet.verifiers.parsing import parse_and_validate_verifier
|
|
286
288
|
|
|
287
|
-
#
|
|
288
|
-
#
|
|
289
|
-
|
|
289
|
+
# Strip @verifier decorator if present to avoid double-wrapping
|
|
290
|
+
# Remove lines like: @verifier(key="...")
|
|
291
|
+
cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
|
|
292
|
+
# Also remove the verifier import if present
|
|
293
|
+
cleaned_code = re.sub(r"from fleet import.*verifier.*\n", "", cleaned_code)
|
|
294
|
+
cleaned_code = re.sub(r"import.*verifier.*\n", "", cleaned_code)
|
|
290
295
|
|
|
291
296
|
# Create a local namespace for executing the code
|
|
292
297
|
local_namespace = {
|
|
@@ -296,9 +301,8 @@ def verifier_from_string(
|
|
|
296
301
|
"Environment": object, # Add Environment type if needed
|
|
297
302
|
}
|
|
298
303
|
|
|
299
|
-
# Execute the verifier code in the namespace
|
|
300
|
-
|
|
301
|
-
exec(verifier_func, globals(), local_namespace)
|
|
304
|
+
# Execute the cleaned verifier code in the namespace
|
|
305
|
+
exec(cleaned_code, globals(), local_namespace)
|
|
302
306
|
|
|
303
307
|
# Find the function that was defined (not imported)
|
|
304
308
|
# Functions defined via exec have co_filename == '<string>'
|
|
@@ -384,7 +388,7 @@ async def load_tasks(
|
|
|
384
388
|
|
|
385
389
|
|
|
386
390
|
async def update_task(
|
|
387
|
-
task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None
|
|
391
|
+
task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None
|
|
388
392
|
):
|
|
389
393
|
"""Convenience function to update an existing task.
|
|
390
394
|
|
|
@@ -392,6 +396,7 @@ async def update_task(
|
|
|
392
396
|
task_key: The key of the task to update
|
|
393
397
|
prompt: New prompt text for the task (optional)
|
|
394
398
|
verifier_code: Python code for task verification (optional)
|
|
399
|
+
metadata: Additional metadata for the task (optional)
|
|
395
400
|
|
|
396
401
|
Returns:
|
|
397
402
|
TaskResponse containing the updated task details
|
|
@@ -399,16 +404,19 @@ async def update_task(
|
|
|
399
404
|
Examples:
|
|
400
405
|
response = await fleet.update_task("my-task", prompt="New prompt text")
|
|
401
406
|
response = await fleet.update_task("my-task", verifier_code="def verify(env): return True")
|
|
407
|
+
response = await fleet.update_task("my-task", metadata={"seed": 42, "story": "Updated story"})
|
|
402
408
|
"""
|
|
403
409
|
from .global_client import get_client
|
|
404
410
|
|
|
405
411
|
client = get_client()
|
|
406
412
|
return await client.update_task(
|
|
407
|
-
task_key=task_key, prompt=prompt, verifier_code=verifier_code
|
|
413
|
+
task_key=task_key, prompt=prompt, verifier_code=verifier_code, metadata=metadata
|
|
408
414
|
)
|
|
409
415
|
|
|
410
416
|
|
|
411
|
-
async def get_task(
|
|
417
|
+
async def get_task(
|
|
418
|
+
task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None
|
|
419
|
+
):
|
|
412
420
|
"""Convenience function to get a task by key and optional version.
|
|
413
421
|
|
|
414
422
|
Args:
|
|
@@ -427,7 +435,9 @@ async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Opt
|
|
|
427
435
|
from .global_client import get_client
|
|
428
436
|
|
|
429
437
|
client = get_client()
|
|
430
|
-
return await client.get_task(
|
|
438
|
+
return await client.get_task(
|
|
439
|
+
task_key=task_key, version_id=version_id, team_id=team_id
|
|
440
|
+
)
|
|
431
441
|
|
|
432
442
|
|
|
433
443
|
async def import_task(task: Task, project_key: Optional[str] = None):
|
|
@@ -559,7 +559,7 @@ class Fleet:
|
|
|
559
559
|
env_variables=task_response.env_variables or {},
|
|
560
560
|
verifier_func=verifier_func, # Set verifier code
|
|
561
561
|
verifier=verifier, # Use created verifier or None
|
|
562
|
-
metadata={},
|
|
562
|
+
metadata=task_response.metadata or {},
|
|
563
563
|
output_json_schema=getattr(task_response, "output_json_schema", None), # Get output_json_schema if available
|
|
564
564
|
)
|
|
565
565
|
tasks.append(task)
|
|
@@ -706,6 +706,7 @@ class Fleet:
|
|
|
706
706
|
task_key: str,
|
|
707
707
|
prompt: Optional[str] = None,
|
|
708
708
|
verifier_code: Optional[str] = None,
|
|
709
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
709
710
|
) -> TaskResponse:
|
|
710
711
|
"""Update an existing task.
|
|
711
712
|
|
|
@@ -713,11 +714,12 @@ class Fleet:
|
|
|
713
714
|
task_key: The key of the task to update
|
|
714
715
|
prompt: New prompt text for the task (optional)
|
|
715
716
|
verifier_code: Python code for task verification (optional)
|
|
717
|
+
metadata: Additional metadata for the task (optional)
|
|
716
718
|
|
|
717
719
|
Returns:
|
|
718
720
|
TaskResponse containing the updated task details
|
|
719
721
|
"""
|
|
720
|
-
payload = TaskUpdateRequest(prompt=prompt, verifier_code=verifier_code)
|
|
722
|
+
payload = TaskUpdateRequest(prompt=prompt, verifier_code=verifier_code, metadata=metadata)
|
|
721
723
|
response = self.client.request(
|
|
722
724
|
"PUT", f"/v1/tasks/{task_key}", json=payload.model_dump(exclude_none=True)
|
|
723
725
|
)
|
|
@@ -158,6 +158,7 @@ class TaskRequest(BaseModel):
|
|
|
158
158
|
verifier_id: Optional[str] = Field(None, title="Verifier Id")
|
|
159
159
|
version: Optional[str] = Field(None, title="Version")
|
|
160
160
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
161
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
161
162
|
output_json_schema: Optional[Dict[str, Any]] = Field(
|
|
162
163
|
None, title="Output Json Schema"
|
|
163
164
|
)
|
|
@@ -166,6 +167,7 @@ class TaskRequest(BaseModel):
|
|
|
166
167
|
class TaskUpdateRequest(BaseModel):
|
|
167
168
|
prompt: Optional[str] = Field(None, title="Prompt")
|
|
168
169
|
verifier_code: Optional[str] = Field(None, title="Verifier Code")
|
|
170
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
169
171
|
|
|
170
172
|
|
|
171
173
|
class VerifierData(BaseModel):
|
|
@@ -191,6 +193,7 @@ class TaskResponse(BaseModel):
|
|
|
191
193
|
data_version: Optional[str] = Field(None, title="Data Version")
|
|
192
194
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
193
195
|
verifier: Optional[VerifierData] = Field(None, title="Verifier")
|
|
196
|
+
metadata: Optional[Dict[str, Any]] = Field(None, title="Metadata")
|
|
194
197
|
output_json_schema: Optional[Dict[str, Any]] = Field(
|
|
195
198
|
None, title="Output Json Schema"
|
|
196
199
|
)
|
|
@@ -202,17 +202,17 @@ class Task(BaseModel):
|
|
|
202
202
|
)
|
|
203
203
|
self.verifier = verifier
|
|
204
204
|
|
|
205
|
-
def make_env(
|
|
205
|
+
def make_env(
|
|
206
|
+
self,
|
|
207
|
+
region: Optional[str] = None,
|
|
208
|
+
image_type: Optional[str] = None,
|
|
209
|
+
ttl_seconds: Optional[int] = None,
|
|
210
|
+
):
|
|
206
211
|
"""Create an environment instance for this task's environment.
|
|
207
212
|
|
|
208
|
-
Uses the task's env_id (and version if present) to create the env.
|
|
213
|
+
Alias for make() method. Uses the task's env_id (and version if present) to create the env.
|
|
209
214
|
"""
|
|
210
|
-
|
|
211
|
-
raise ValueError("Task has no env_id defined")
|
|
212
|
-
# Deferred import to avoid circular dependencies
|
|
213
|
-
from .client import Fleet
|
|
214
|
-
|
|
215
|
-
return Fleet().make(env_key=self.env_key, region=region)
|
|
215
|
+
return self.make(region=region, image_type=image_type, ttl_seconds=ttl_seconds)
|
|
216
216
|
|
|
217
217
|
def make(
|
|
218
218
|
self,
|
|
@@ -272,14 +272,17 @@ def verifier_from_string(
|
|
|
272
272
|
"""
|
|
273
273
|
try:
|
|
274
274
|
import inspect
|
|
275
|
+
import re
|
|
275
276
|
from .verifiers import SyncVerifierFunction
|
|
276
277
|
from .verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
|
|
277
278
|
from .verifiers.db import IgnoreConfig
|
|
278
|
-
from .verifiers.parsing import parse_and_validate_verifier
|
|
279
279
|
|
|
280
|
-
#
|
|
281
|
-
#
|
|
282
|
-
|
|
280
|
+
# Strip @verifier decorator if present to avoid double-wrapping
|
|
281
|
+
# Remove lines like: @verifier(key="...")
|
|
282
|
+
cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
|
|
283
|
+
# Also remove the verifier import if present
|
|
284
|
+
cleaned_code = re.sub(r"from fleet import.*verifier.*\n", "", cleaned_code)
|
|
285
|
+
cleaned_code = re.sub(r"import.*verifier.*\n", "", cleaned_code)
|
|
283
286
|
|
|
284
287
|
# Create a globals namespace with all required imports
|
|
285
288
|
exec_globals = globals().copy()
|
|
@@ -295,9 +298,8 @@ def verifier_from_string(
|
|
|
295
298
|
# Create a local namespace for executing the code
|
|
296
299
|
local_namespace = {}
|
|
297
300
|
|
|
298
|
-
# Execute the verifier code in the namespace
|
|
299
|
-
|
|
300
|
-
exec(verifier_func, exec_globals, local_namespace)
|
|
301
|
+
# Execute the cleaned verifier code in the namespace
|
|
302
|
+
exec(cleaned_code, exec_globals, local_namespace)
|
|
301
303
|
|
|
302
304
|
# Find the function that was defined (not imported)
|
|
303
305
|
# Functions defined via exec have co_filename == '<string>'
|
|
@@ -387,7 +389,7 @@ def load_tasks(
|
|
|
387
389
|
|
|
388
390
|
|
|
389
391
|
def update_task(
|
|
390
|
-
task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None
|
|
392
|
+
task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None
|
|
391
393
|
):
|
|
392
394
|
"""Convenience function to update an existing task.
|
|
393
395
|
|
|
@@ -395,6 +397,7 @@ def update_task(
|
|
|
395
397
|
task_key: The key of the task to update
|
|
396
398
|
prompt: New prompt text for the task (optional)
|
|
397
399
|
verifier_code: Python code for task verification (optional)
|
|
400
|
+
metadata: Additional metadata for the task (optional)
|
|
398
401
|
|
|
399
402
|
Returns:
|
|
400
403
|
TaskResponse containing the updated task details
|
|
@@ -402,16 +405,19 @@ def update_task(
|
|
|
402
405
|
Examples:
|
|
403
406
|
response = fleet.update_task("my-task", prompt="New prompt text")
|
|
404
407
|
response = fleet.update_task("my-task", verifier_code="def verify(env): return True")
|
|
408
|
+
response = fleet.update_task("my-task", metadata={"seed": 42, "story": "Updated story"})
|
|
405
409
|
"""
|
|
406
410
|
from .global_client import get_client
|
|
407
411
|
|
|
408
412
|
client = get_client()
|
|
409
413
|
return client.update_task(
|
|
410
|
-
task_key=task_key, prompt=prompt, verifier_code=verifier_code
|
|
414
|
+
task_key=task_key, prompt=prompt, verifier_code=verifier_code, metadata=metadata
|
|
411
415
|
)
|
|
412
416
|
|
|
413
417
|
|
|
414
|
-
def get_task(
|
|
418
|
+
def get_task(
|
|
419
|
+
task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None
|
|
420
|
+
):
|
|
415
421
|
"""Convenience function to get a task by key and optional version.
|
|
416
422
|
|
|
417
423
|
Args:
|
|
@@ -70,7 +70,6 @@ fleet/verifiers/code.py
|
|
|
70
70
|
fleet/verifiers/db.py
|
|
71
71
|
fleet/verifiers/decorator.py
|
|
72
72
|
fleet/verifiers/parse.py
|
|
73
|
-
fleet/verifiers/parsing.py
|
|
74
73
|
fleet/verifiers/sql_differ.py
|
|
75
74
|
fleet/verifiers/verifier.py
|
|
76
75
|
fleet_python.egg-info/PKG-INFO
|
|
@@ -81,5 +80,4 @@ fleet_python.egg-info/top_level.txt
|
|
|
81
80
|
scripts/fix_sync_imports.py
|
|
82
81
|
scripts/unasync.py
|
|
83
82
|
tests/__init__.py
|
|
84
|
-
tests/test_verifier_from_string.py
|
|
85
|
-
tests/test_verifier_security.py
|
|
83
|
+
tests/test_verifier_from_string.py
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
"""Verifier code parsing and validation utilities."""
|
|
2
|
-
|
|
3
|
-
import ast
|
|
4
|
-
from typing import Set
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def parse_and_validate_verifier(code: str) -> str:
|
|
8
|
-
"""Parse and validate verifier code, returning the first function name.
|
|
9
|
-
|
|
10
|
-
This function ensures that the verifier code only contains safe declarative
|
|
11
|
-
statements and does not execute arbitrary code during import.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
code: Python code string containing the verifier function
|
|
15
|
-
|
|
16
|
-
Returns:
|
|
17
|
-
Name of the first function found in the code
|
|
18
|
-
|
|
19
|
-
Raises:
|
|
20
|
-
ValueError: If code is invalid or contains unsafe statements
|
|
21
|
-
SyntaxError: If code has syntax errors
|
|
22
|
-
"""
|
|
23
|
-
try:
|
|
24
|
-
tree = ast.parse(code)
|
|
25
|
-
except SyntaxError as e:
|
|
26
|
-
raise SyntaxError(f"Syntax error in verifier code: {e}")
|
|
27
|
-
|
|
28
|
-
first_function_name = None
|
|
29
|
-
|
|
30
|
-
for node in tree.body:
|
|
31
|
-
# Check for function definitions
|
|
32
|
-
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
33
|
-
# Validate that decorators don't contain function calls
|
|
34
|
-
for decorator in node.decorator_list:
|
|
35
|
-
if _contains_call(decorator):
|
|
36
|
-
raise ValueError(
|
|
37
|
-
f"Line {node.lineno}: Function decorators with function calls "
|
|
38
|
-
f"are not allowed. Decorators execute during import and could "
|
|
39
|
-
f"run arbitrary code."
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
if first_function_name is None:
|
|
43
|
-
first_function_name = node.name
|
|
44
|
-
continue
|
|
45
|
-
|
|
46
|
-
# Allow imports
|
|
47
|
-
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
|
48
|
-
continue
|
|
49
|
-
|
|
50
|
-
# Allow class definitions
|
|
51
|
-
if isinstance(node, ast.ClassDef):
|
|
52
|
-
continue
|
|
53
|
-
|
|
54
|
-
# Allow docstrings and other expression statements (but not calls)
|
|
55
|
-
if isinstance(node, ast.Expr):
|
|
56
|
-
if isinstance(node.value, ast.Constant):
|
|
57
|
-
# Docstring or constant expression - safe
|
|
58
|
-
continue
|
|
59
|
-
else:
|
|
60
|
-
# Check if it's a call or other dangerous expression
|
|
61
|
-
raise ValueError(
|
|
62
|
-
f"Line {node.lineno}: Expression statements that are not "
|
|
63
|
-
f"constants are not allowed at module level. Found: {ast.dump(node.value)}"
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# Allow variable assignments, but check the value
|
|
67
|
-
if isinstance(node, (ast.Assign, ast.AnnAssign)):
|
|
68
|
-
# Check if the assignment value contains any function calls
|
|
69
|
-
if _contains_call(node.value if isinstance(node, ast.AnnAssign) else node.value):
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f"Line {node.lineno}: Variable assignments with function calls "
|
|
72
|
-
f"are not allowed at module level. This prevents arbitrary code "
|
|
73
|
-
f"execution during import."
|
|
74
|
-
)
|
|
75
|
-
continue
|
|
76
|
-
|
|
77
|
-
# If we get here, it's an unsupported statement type
|
|
78
|
-
raise ValueError(
|
|
79
|
-
f"Line {node.lineno}: Unsupported statement type at module level: "
|
|
80
|
-
f"{node.__class__.__name__}. Only imports, function/class definitions, "
|
|
81
|
-
f"and constant assignments are allowed."
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
if first_function_name is None:
|
|
85
|
-
raise ValueError("No function found in verifier code")
|
|
86
|
-
|
|
87
|
-
return first_function_name
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def _contains_call(node: ast.AST) -> bool:
|
|
91
|
-
"""Recursively check if an AST node contains any Call nodes.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
node: AST node to check
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
True if the node or any of its children is a Call node
|
|
98
|
-
"""
|
|
99
|
-
if isinstance(node, ast.Call):
|
|
100
|
-
return True
|
|
101
|
-
|
|
102
|
-
for child in ast.walk(node):
|
|
103
|
-
if isinstance(child, ast.Call):
|
|
104
|
-
return True
|
|
105
|
-
|
|
106
|
-
return False
|
|
@@ -1,427 +0,0 @@
|
|
|
1
|
-
"""Security tests for verifier_from_string function.
|
|
2
|
-
|
|
3
|
-
Tests that the verifier parsing and validation properly blocks
|
|
4
|
-
arbitrary code execution during import.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
from fleet.tasks import verifier_from_string as sync_verifier_from_string
|
|
9
|
-
from fleet._async.tasks import verifier_from_string as async_verifier_from_string
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestSyncVerifierSecurity:
|
|
13
|
-
"""Security tests for sync version of verifier_from_string."""
|
|
14
|
-
|
|
15
|
-
def test_blocks_module_level_subprocess_run(self):
|
|
16
|
-
"""Test that module-level subprocess.run() is blocked."""
|
|
17
|
-
code = """
|
|
18
|
-
import subprocess
|
|
19
|
-
subprocess.run(['echo', 'malicious'])
|
|
20
|
-
|
|
21
|
-
def my_verifier(env):
|
|
22
|
-
return 1.0
|
|
23
|
-
"""
|
|
24
|
-
with pytest.raises(ValueError, match="Expression statements that are not constants"):
|
|
25
|
-
sync_verifier_from_string(
|
|
26
|
-
verifier_func=code,
|
|
27
|
-
verifier_id="test-verifier",
|
|
28
|
-
verifier_key="test-key",
|
|
29
|
-
sha256="test-sha",
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
def test_blocks_module_level_open(self):
|
|
33
|
-
"""Test that module-level open() is blocked."""
|
|
34
|
-
code = """
|
|
35
|
-
open('/etc/passwd', 'r')
|
|
36
|
-
|
|
37
|
-
def my_verifier(env):
|
|
38
|
-
return 1.0
|
|
39
|
-
"""
|
|
40
|
-
with pytest.raises(ValueError, match="Expression statements that are not constants"):
|
|
41
|
-
sync_verifier_from_string(
|
|
42
|
-
verifier_func=code,
|
|
43
|
-
verifier_id="test-verifier",
|
|
44
|
-
verifier_key="test-key",
|
|
45
|
-
sha256="test-sha",
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
def test_blocks_assignment_with_subprocess_call(self):
|
|
49
|
-
"""Test that variable assignment with subprocess call is blocked."""
|
|
50
|
-
code = """
|
|
51
|
-
import subprocess
|
|
52
|
-
result = subprocess.run(['echo', 'malicious'])
|
|
53
|
-
|
|
54
|
-
def my_verifier(env):
|
|
55
|
-
return 1.0
|
|
56
|
-
"""
|
|
57
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
58
|
-
sync_verifier_from_string(
|
|
59
|
-
verifier_func=code,
|
|
60
|
-
verifier_id="test-verifier",
|
|
61
|
-
verifier_key="test-key",
|
|
62
|
-
sha256="test-sha",
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
def test_blocks_assignment_with_open_call(self):
|
|
66
|
-
"""Test that variable assignment with open() is blocked."""
|
|
67
|
-
code = """
|
|
68
|
-
file_handle = open('/etc/passwd', 'r')
|
|
69
|
-
|
|
70
|
-
def my_verifier(env):
|
|
71
|
-
return 1.0
|
|
72
|
-
"""
|
|
73
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
74
|
-
sync_verifier_from_string(
|
|
75
|
-
verifier_func=code,
|
|
76
|
-
verifier_id="test-verifier",
|
|
77
|
-
verifier_key="test-key",
|
|
78
|
-
sha256="test-sha",
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
def test_blocks_assignment_with_any_function_call(self):
|
|
82
|
-
"""Test that variable assignment with any function call is blocked."""
|
|
83
|
-
code = """
|
|
84
|
-
import os
|
|
85
|
-
path = os.getcwd()
|
|
86
|
-
|
|
87
|
-
def my_verifier(env):
|
|
88
|
-
return 1.0
|
|
89
|
-
"""
|
|
90
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
91
|
-
sync_verifier_from_string(
|
|
92
|
-
verifier_func=code,
|
|
93
|
-
verifier_id="test-verifier",
|
|
94
|
-
verifier_key="test-key",
|
|
95
|
-
sha256="test-sha",
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
def test_allows_constant_assignment(self):
|
|
99
|
-
"""Test that constant variable assignments are allowed."""
|
|
100
|
-
code = """
|
|
101
|
-
CONSTANT_VALUE = 42
|
|
102
|
-
ANOTHER_CONSTANT = "test"
|
|
103
|
-
PI = 3.14159
|
|
104
|
-
|
|
105
|
-
def my_verifier(env):
|
|
106
|
-
return CONSTANT_VALUE
|
|
107
|
-
"""
|
|
108
|
-
# Should not raise
|
|
109
|
-
verifier = sync_verifier_from_string(
|
|
110
|
-
verifier_func=code,
|
|
111
|
-
verifier_id="test-verifier",
|
|
112
|
-
verifier_key="test-key",
|
|
113
|
-
sha256="test-sha",
|
|
114
|
-
)
|
|
115
|
-
assert verifier is not None
|
|
116
|
-
|
|
117
|
-
def test_allows_list_dict_constant_assignment(self):
|
|
118
|
-
"""Test that list/dict constant assignments are allowed."""
|
|
119
|
-
code = """
|
|
120
|
-
MY_LIST = [1, 2, 3]
|
|
121
|
-
MY_DICT = {"key": "value"}
|
|
122
|
-
MY_TUPLE = (1, 2, 3)
|
|
123
|
-
|
|
124
|
-
def my_verifier(env):
|
|
125
|
-
return 1.0
|
|
126
|
-
"""
|
|
127
|
-
# Should not raise
|
|
128
|
-
verifier = sync_verifier_from_string(
|
|
129
|
-
verifier_func=code,
|
|
130
|
-
verifier_id="test-verifier",
|
|
131
|
-
verifier_key="test-key",
|
|
132
|
-
sha256="test-sha",
|
|
133
|
-
)
|
|
134
|
-
assert verifier is not None
|
|
135
|
-
|
|
136
|
-
def test_allows_valid_imports(self):
|
|
137
|
-
"""Test that imports are allowed."""
|
|
138
|
-
code = """
|
|
139
|
-
import json
|
|
140
|
-
import os
|
|
141
|
-
from typing import Dict
|
|
142
|
-
|
|
143
|
-
def my_verifier(env):
|
|
144
|
-
return 1.0
|
|
145
|
-
"""
|
|
146
|
-
# Should not raise
|
|
147
|
-
verifier = sync_verifier_from_string(
|
|
148
|
-
verifier_func=code,
|
|
149
|
-
verifier_id="test-verifier",
|
|
150
|
-
verifier_key="test-key",
|
|
151
|
-
sha256="test-sha",
|
|
152
|
-
)
|
|
153
|
-
assert verifier is not None
|
|
154
|
-
|
|
155
|
-
def test_allows_class_definitions(self):
|
|
156
|
-
"""Test that class definitions are allowed."""
|
|
157
|
-
code = """
|
|
158
|
-
class MyHelper:
|
|
159
|
-
def __init__(self):
|
|
160
|
-
self.value = 42
|
|
161
|
-
|
|
162
|
-
def get_value(self):
|
|
163
|
-
return self.value
|
|
164
|
-
|
|
165
|
-
def my_verifier(env):
|
|
166
|
-
helper = MyHelper()
|
|
167
|
-
return helper.get_value()
|
|
168
|
-
"""
|
|
169
|
-
# Should not raise
|
|
170
|
-
verifier = sync_verifier_from_string(
|
|
171
|
-
verifier_func=code,
|
|
172
|
-
verifier_id="test-verifier",
|
|
173
|
-
verifier_key="test-key",
|
|
174
|
-
sha256="test-sha",
|
|
175
|
-
)
|
|
176
|
-
assert verifier is not None
|
|
177
|
-
|
|
178
|
-
def test_allows_multiple_functions(self):
|
|
179
|
-
"""Test that multiple function definitions are allowed."""
|
|
180
|
-
code = """
|
|
181
|
-
def helper_function(x):
|
|
182
|
-
return x * 2
|
|
183
|
-
|
|
184
|
-
def my_verifier(env):
|
|
185
|
-
return helper_function(0.5)
|
|
186
|
-
"""
|
|
187
|
-
# Should not raise
|
|
188
|
-
verifier = sync_verifier_from_string(
|
|
189
|
-
verifier_func=code,
|
|
190
|
-
verifier_id="test-verifier",
|
|
191
|
-
verifier_key="test-key",
|
|
192
|
-
sha256="test-sha",
|
|
193
|
-
)
|
|
194
|
-
assert verifier is not None
|
|
195
|
-
|
|
196
|
-
def test_extracts_first_function_name(self):
|
|
197
|
-
"""Test that the first function name is correctly extracted."""
|
|
198
|
-
code = """
|
|
199
|
-
def first_function(env):
|
|
200
|
-
return 1.0
|
|
201
|
-
|
|
202
|
-
def second_function(env):
|
|
203
|
-
return 0.5
|
|
204
|
-
"""
|
|
205
|
-
verifier = sync_verifier_from_string(
|
|
206
|
-
verifier_func=code,
|
|
207
|
-
verifier_id="test-verifier",
|
|
208
|
-
verifier_key="test-key",
|
|
209
|
-
sha256="test-sha",
|
|
210
|
-
)
|
|
211
|
-
# The first function should be used
|
|
212
|
-
assert verifier.func.__name__ == "first_function"
|
|
213
|
-
|
|
214
|
-
def test_error_message_includes_line_number(self):
|
|
215
|
-
"""Test that error messages include helpful line numbers."""
|
|
216
|
-
code = """
|
|
217
|
-
import subprocess
|
|
218
|
-
|
|
219
|
-
subprocess.run(['echo', 'test'])
|
|
220
|
-
|
|
221
|
-
def my_verifier(env):
|
|
222
|
-
return 1.0
|
|
223
|
-
"""
|
|
224
|
-
with pytest.raises(ValueError, match=r"Line \d+"):
|
|
225
|
-
sync_verifier_from_string(
|
|
226
|
-
verifier_func=code,
|
|
227
|
-
verifier_id="test-verifier",
|
|
228
|
-
verifier_key="test-key",
|
|
229
|
-
sha256="test-sha",
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
def test_blocks_nested_function_call_in_list(self):
|
|
233
|
-
"""Test that function calls nested in list assignments are blocked."""
|
|
234
|
-
code = """
|
|
235
|
-
import os
|
|
236
|
-
MY_LIST = [1, 2, os.getcwd()]
|
|
237
|
-
|
|
238
|
-
def my_verifier(env):
|
|
239
|
-
return 1.0
|
|
240
|
-
"""
|
|
241
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
242
|
-
sync_verifier_from_string(
|
|
243
|
-
verifier_func=code,
|
|
244
|
-
verifier_id="test-verifier",
|
|
245
|
-
verifier_key="test-key",
|
|
246
|
-
sha256="test-sha",
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
def test_blocks_nested_function_call_in_dict(self):
|
|
250
|
-
"""Test that function calls nested in dict assignments are blocked."""
|
|
251
|
-
code = """
|
|
252
|
-
import os
|
|
253
|
-
MY_DICT = {"cwd": os.getcwd()}
|
|
254
|
-
|
|
255
|
-
def my_verifier(env):
|
|
256
|
-
return 1.0
|
|
257
|
-
"""
|
|
258
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
259
|
-
sync_verifier_from_string(
|
|
260
|
-
verifier_func=code,
|
|
261
|
-
verifier_id="test-verifier",
|
|
262
|
-
verifier_key="test-key",
|
|
263
|
-
sha256="test-sha",
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
def test_allows_docstrings(self):
|
|
267
|
-
"""Test that module-level docstrings are allowed."""
|
|
268
|
-
code = '''
|
|
269
|
-
"""This is a module docstring."""
|
|
270
|
-
|
|
271
|
-
def my_verifier(env):
|
|
272
|
-
"""This is a function docstring."""
|
|
273
|
-
return 1.0
|
|
274
|
-
'''
|
|
275
|
-
# Should not raise
|
|
276
|
-
verifier = sync_verifier_from_string(
|
|
277
|
-
verifier_func=code,
|
|
278
|
-
verifier_id="test-verifier",
|
|
279
|
-
verifier_key="test-key",
|
|
280
|
-
sha256="test-sha",
|
|
281
|
-
)
|
|
282
|
-
assert verifier is not None
|
|
283
|
-
|
|
284
|
-
def test_function_with_decorator_extracts_correct_name(self):
|
|
285
|
-
"""Test that decorators don't affect function name extraction."""
|
|
286
|
-
code = """
|
|
287
|
-
def some_decorator(func):
|
|
288
|
-
return func
|
|
289
|
-
|
|
290
|
-
@some_decorator
|
|
291
|
-
def my_actual_function(env):
|
|
292
|
-
return 1.0
|
|
293
|
-
"""
|
|
294
|
-
verifier = sync_verifier_from_string(
|
|
295
|
-
verifier_func=code,
|
|
296
|
-
verifier_id="test-verifier",
|
|
297
|
-
verifier_key="test-key",
|
|
298
|
-
sha256="test-sha",
|
|
299
|
-
)
|
|
300
|
-
# Should extract 'some_decorator' (first function) or 'my_actual_function'
|
|
301
|
-
# depending on order, but NOT the decorator name itself
|
|
302
|
-
assert verifier.func.__name__ in ["some_decorator", "my_actual_function"]
|
|
303
|
-
|
|
304
|
-
def test_blocks_decorator_with_function_call(self):
|
|
305
|
-
"""Test that decorators with function calls are blocked."""
|
|
306
|
-
code = """
|
|
307
|
-
import subprocess
|
|
308
|
-
|
|
309
|
-
@subprocess.run(['echo', 'bad'])
|
|
310
|
-
def my_verifier(env):
|
|
311
|
-
return 1.0
|
|
312
|
-
"""
|
|
313
|
-
# Decorators execute during import, so calls in decorators are dangerous
|
|
314
|
-
with pytest.raises(ValueError, match="Function decorators with function calls"):
|
|
315
|
-
sync_verifier_from_string(
|
|
316
|
-
verifier_func=code,
|
|
317
|
-
verifier_id="test-verifier",
|
|
318
|
-
verifier_key="test-key",
|
|
319
|
-
sha256="test-sha",
|
|
320
|
-
)
|
|
321
|
-
|
|
322
|
-
def test_allows_simple_decorator_reference(self):
|
|
323
|
-
"""Test that simple decorator references (no calls) are allowed."""
|
|
324
|
-
code = """
|
|
325
|
-
def my_decorator(func):
|
|
326
|
-
return func
|
|
327
|
-
|
|
328
|
-
@my_decorator
|
|
329
|
-
def my_verifier(env):
|
|
330
|
-
return 1.0
|
|
331
|
-
"""
|
|
332
|
-
# Simple decorator reference (no call) should be allowed
|
|
333
|
-
verifier = sync_verifier_from_string(
|
|
334
|
-
verifier_func=code,
|
|
335
|
-
verifier_id="test-verifier",
|
|
336
|
-
verifier_key="test-key",
|
|
337
|
-
sha256="test-sha",
|
|
338
|
-
)
|
|
339
|
-
assert verifier is not None
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
class TestAsyncVerifierSecurity:
|
|
343
|
-
"""Security tests for async version of verifier_from_string."""
|
|
344
|
-
|
|
345
|
-
def test_blocks_module_level_subprocess_run(self):
|
|
346
|
-
"""Test that module-level subprocess.run() is blocked."""
|
|
347
|
-
code = """
|
|
348
|
-
import subprocess
|
|
349
|
-
subprocess.run(['echo', 'malicious'])
|
|
350
|
-
|
|
351
|
-
async def my_async_verifier(env):
|
|
352
|
-
return 1.0
|
|
353
|
-
"""
|
|
354
|
-
with pytest.raises(ValueError, match="Expression statements that are not constants"):
|
|
355
|
-
async_verifier_from_string(
|
|
356
|
-
verifier_func=code,
|
|
357
|
-
verifier_id="test-verifier",
|
|
358
|
-
verifier_key="test-key",
|
|
359
|
-
sha256="test-sha",
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
def test_blocks_assignment_with_function_call(self):
|
|
363
|
-
"""Test that variable assignment with function call is blocked."""
|
|
364
|
-
code = """
|
|
365
|
-
import subprocess
|
|
366
|
-
result = subprocess.run(['echo', 'malicious'])
|
|
367
|
-
|
|
368
|
-
async def my_async_verifier(env):
|
|
369
|
-
return 1.0
|
|
370
|
-
"""
|
|
371
|
-
with pytest.raises(ValueError, match="Variable assignments with function calls"):
|
|
372
|
-
async_verifier_from_string(
|
|
373
|
-
verifier_func=code,
|
|
374
|
-
verifier_id="test-verifier",
|
|
375
|
-
verifier_key="test-key",
|
|
376
|
-
sha256="test-sha",
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
def test_allows_constant_assignment(self):
|
|
380
|
-
"""Test that constant variable assignments are allowed."""
|
|
381
|
-
code = """
|
|
382
|
-
CONSTANT_VALUE = 42
|
|
383
|
-
|
|
384
|
-
async def my_async_verifier(env):
|
|
385
|
-
return CONSTANT_VALUE
|
|
386
|
-
"""
|
|
387
|
-
# Should not raise
|
|
388
|
-
verifier = async_verifier_from_string(
|
|
389
|
-
verifier_func=code,
|
|
390
|
-
verifier_id="test-verifier",
|
|
391
|
-
verifier_key="test-key",
|
|
392
|
-
sha256="test-sha",
|
|
393
|
-
)
|
|
394
|
-
assert verifier is not None
|
|
395
|
-
|
|
396
|
-
def test_allows_async_function_definitions(self):
|
|
397
|
-
"""Test that async function definitions are recognized."""
|
|
398
|
-
code = """
|
|
399
|
-
async def my_async_verifier(env):
|
|
400
|
-
return 1.0
|
|
401
|
-
"""
|
|
402
|
-
# Should not raise
|
|
403
|
-
verifier = async_verifier_from_string(
|
|
404
|
-
verifier_func=code,
|
|
405
|
-
verifier_id="test-verifier",
|
|
406
|
-
verifier_key="test-key",
|
|
407
|
-
sha256="test-sha",
|
|
408
|
-
)
|
|
409
|
-
assert verifier is not None
|
|
410
|
-
|
|
411
|
-
def test_extracts_first_async_function_name(self):
|
|
412
|
-
"""Test that the first async function name is correctly extracted."""
|
|
413
|
-
code = """
|
|
414
|
-
async def first_async_function(env):
|
|
415
|
-
return 1.0
|
|
416
|
-
|
|
417
|
-
async def second_async_function(env):
|
|
418
|
-
return 0.5
|
|
419
|
-
"""
|
|
420
|
-
verifier = async_verifier_from_string(
|
|
421
|
-
verifier_func=code,
|
|
422
|
-
verifier_id="test-verifier",
|
|
423
|
-
verifier_key="test-key",
|
|
424
|
-
sha256="test-sha",
|
|
425
|
-
)
|
|
426
|
-
# The first function should be used
|
|
427
|
-
assert verifier.func.__name__ == "first_async_function"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|