runloop_api_client 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runloop_api_client/__init__.py +95 -0
- runloop_api_client/_base_client.py +2127 -0
- runloop_api_client/_client.py +866 -0
- runloop_api_client/_compat.py +219 -0
- runloop_api_client/_constants.py +23 -0
- runloop_api_client/_exceptions.py +108 -0
- runloop_api_client/_files.py +123 -0
- runloop_api_client/_models.py +872 -0
- runloop_api_client/_qs.py +150 -0
- runloop_api_client/_resource.py +43 -0
- runloop_api_client/_response.py +832 -0
- runloop_api_client/_streaming.py +518 -0
- runloop_api_client/_types.py +270 -0
- runloop_api_client/_utils/__init__.py +65 -0
- runloop_api_client/_utils/_compat.py +45 -0
- runloop_api_client/_utils/_datetime_parse.py +136 -0
- runloop_api_client/_utils/_json.py +35 -0
- runloop_api_client/_utils/_logs.py +25 -0
- runloop_api_client/_utils/_proxy.py +65 -0
- runloop_api_client/_utils/_reflection.py +42 -0
- runloop_api_client/_utils/_resources_proxy.py +24 -0
- runloop_api_client/_utils/_streams.py +12 -0
- runloop_api_client/_utils/_sync.py +58 -0
- runloop_api_client/_utils/_transform.py +457 -0
- runloop_api_client/_utils/_typing.py +156 -0
- runloop_api_client/_utils/_utils.py +421 -0
- runloop_api_client/_utils/_validation.py +31 -0
- runloop_api_client/_version.py +4 -0
- runloop_api_client/lib/.keep +4 -0
- runloop_api_client/lib/__init__.py +3 -0
- runloop_api_client/lib/_ignore.py +496 -0
- runloop_api_client/lib/context_loader.py +78 -0
- runloop_api_client/lib/polling.py +75 -0
- runloop_api_client/lib/polling_async.py +60 -0
- runloop_api_client/pagination.py +986 -0
- runloop_api_client/py.typed +0 -0
- runloop_api_client/resources/__init__.py +173 -0
- runloop_api_client/resources/agents.py +431 -0
- runloop_api_client/resources/benchmark_jobs.py +394 -0
- runloop_api_client/resources/benchmark_runs.py +595 -0
- runloop_api_client/resources/benchmarks.py +1085 -0
- runloop_api_client/resources/blueprints.py +1563 -0
- runloop_api_client/resources/devboxes/__init__.py +89 -0
- runloop_api_client/resources/devboxes/browsers.py +267 -0
- runloop_api_client/resources/devboxes/computers.py +648 -0
- runloop_api_client/resources/devboxes/devboxes.py +3784 -0
- runloop_api_client/resources/devboxes/disk_snapshots.py +602 -0
- runloop_api_client/resources/devboxes/executions.py +1212 -0
- runloop_api_client/resources/devboxes/logs.py +197 -0
- runloop_api_client/resources/gateway_configs.py +658 -0
- runloop_api_client/resources/network_policies.py +680 -0
- runloop_api_client/resources/objects.py +870 -0
- runloop_api_client/resources/repositories.py +918 -0
- runloop_api_client/resources/scenarios/__init__.py +47 -0
- runloop_api_client/resources/scenarios/runs.py +973 -0
- runloop_api_client/resources/scenarios/scenarios.py +1101 -0
- runloop_api_client/resources/scenarios/scorers.py +629 -0
- runloop_api_client/resources/secrets.py +500 -0
- runloop_api_client/sdk/__init__.py +117 -0
- runloop_api_client/sdk/_helpers.py +49 -0
- runloop_api_client/sdk/_types.py +264 -0
- runloop_api_client/sdk/agent.py +70 -0
- runloop_api_client/sdk/async_.py +1036 -0
- runloop_api_client/sdk/async_agent.py +70 -0
- runloop_api_client/sdk/async_benchmark.py +169 -0
- runloop_api_client/sdk/async_benchmark_run.py +127 -0
- runloop_api_client/sdk/async_blueprint.py +104 -0
- runloop_api_client/sdk/async_devbox.py +797 -0
- runloop_api_client/sdk/async_execution.py +144 -0
- runloop_api_client/sdk/async_execution_result.py +175 -0
- runloop_api_client/sdk/async_network_policy.py +80 -0
- runloop_api_client/sdk/async_scenario.py +118 -0
- runloop_api_client/sdk/async_scenario_builder.py +480 -0
- runloop_api_client/sdk/async_scenario_run.py +242 -0
- runloop_api_client/sdk/async_scorer.py +77 -0
- runloop_api_client/sdk/async_snapshot.py +125 -0
- runloop_api_client/sdk/async_storage_object.py +188 -0
- runloop_api_client/sdk/benchmark.py +167 -0
- runloop_api_client/sdk/benchmark_run.py +127 -0
- runloop_api_client/sdk/blueprint.py +104 -0
- runloop_api_client/sdk/devbox.py +800 -0
- runloop_api_client/sdk/execution.py +132 -0
- runloop_api_client/sdk/execution_result.py +173 -0
- runloop_api_client/sdk/network_policy.py +80 -0
- runloop_api_client/sdk/scenario.py +118 -0
- runloop_api_client/sdk/scenario_builder.py +480 -0
- runloop_api_client/sdk/scenario_run.py +242 -0
- runloop_api_client/sdk/scorer.py +77 -0
- runloop_api_client/sdk/snapshot.py +125 -0
- runloop_api_client/sdk/storage_object.py +188 -0
- runloop_api_client/sdk/sync.py +1061 -0
- runloop_api_client/types/__init__.py +130 -0
- runloop_api_client/types/agent_create_params.py +21 -0
- runloop_api_client/types/agent_list_params.py +27 -0
- runloop_api_client/types/agent_list_view.py +24 -0
- runloop_api_client/types/agent_view.py +30 -0
- runloop_api_client/types/benchmark_create_params.py +40 -0
- runloop_api_client/types/benchmark_definitions_params.py +15 -0
- runloop_api_client/types/benchmark_job_create_params.py +220 -0
- runloop_api_client/types/benchmark_job_list_params.py +18 -0
- runloop_api_client/types/benchmark_job_list_view.py +19 -0
- runloop_api_client/types/benchmark_job_view.py +344 -0
- runloop_api_client/types/benchmark_list_params.py +18 -0
- runloop_api_client/types/benchmark_list_public_params.py +15 -0
- runloop_api_client/types/benchmark_run_list_params.py +21 -0
- runloop_api_client/types/benchmark_run_list_scenario_runs_params.py +18 -0
- runloop_api_client/types/benchmark_run_list_view.py +19 -0
- runloop_api_client/types/benchmark_run_view.py +58 -0
- runloop_api_client/types/benchmark_start_run_params.py +29 -0
- runloop_api_client/types/benchmark_update_params.py +42 -0
- runloop_api_client/types/benchmark_update_scenarios_params.py +18 -0
- runloop_api_client/types/benchmark_view.py +49 -0
- runloop_api_client/types/blueprint_build_log.py +16 -0
- runloop_api_client/types/blueprint_build_logs_list_view.py +16 -0
- runloop_api_client/types/blueprint_build_parameters.py +119 -0
- runloop_api_client/types/blueprint_create_from_inspection_params.py +49 -0
- runloop_api_client/types/blueprint_create_params.py +121 -0
- runloop_api_client/types/blueprint_list_params.py +21 -0
- runloop_api_client/types/blueprint_list_public_params.py +21 -0
- runloop_api_client/types/blueprint_list_view.py +19 -0
- runloop_api_client/types/blueprint_preview_params.py +121 -0
- runloop_api_client/types/blueprint_preview_view.py +10 -0
- runloop_api_client/types/blueprint_view.py +93 -0
- runloop_api_client/types/devbox_async_execution_detail_view.py +46 -0
- runloop_api_client/types/devbox_create_params.py +124 -0
- runloop_api_client/types/devbox_create_ssh_key_response.py +19 -0
- runloop_api_client/types/devbox_create_tunnel_params.py +12 -0
- runloop_api_client/types/devbox_download_file_params.py +15 -0
- runloop_api_client/types/devbox_enable_tunnel_params.py +13 -0
- runloop_api_client/types/devbox_execute_async_params.py +33 -0
- runloop_api_client/types/devbox_execute_params.py +37 -0
- runloop_api_client/types/devbox_execute_sync_params.py +31 -0
- runloop_api_client/types/devbox_execution_detail_view.py +24 -0
- runloop_api_client/types/devbox_list_disk_snapshots_params.py +32 -0
- runloop_api_client/types/devbox_list_params.py +20 -0
- runloop_api_client/types/devbox_list_view.py +19 -0
- runloop_api_client/types/devbox_read_file_contents_params.py +15 -0
- runloop_api_client/types/devbox_read_file_contents_response.py +7 -0
- runloop_api_client/types/devbox_remove_tunnel_params.py +12 -0
- runloop_api_client/types/devbox_send_std_in_result.py +16 -0
- runloop_api_client/types/devbox_snapshot_disk_async_params.py +19 -0
- runloop_api_client/types/devbox_snapshot_disk_params.py +19 -0
- runloop_api_client/types/devbox_snapshot_list_view.py +19 -0
- runloop_api_client/types/devbox_snapshot_view.py +30 -0
- runloop_api_client/types/devbox_tunnel_view.py +16 -0
- runloop_api_client/types/devbox_update_params.py +16 -0
- runloop_api_client/types/devbox_upload_file_params.py +19 -0
- runloop_api_client/types/devbox_view.py +121 -0
- runloop_api_client/types/devbox_wait_for_command_params.py +28 -0
- runloop_api_client/types/devbox_write_file_contents_params.py +18 -0
- runloop_api_client/types/devboxes/__init__.py +33 -0
- runloop_api_client/types/devboxes/browser_create_params.py +13 -0
- runloop_api_client/types/devboxes/browser_view.py +29 -0
- runloop_api_client/types/devboxes/computer_create_params.py +26 -0
- runloop_api_client/types/devboxes/computer_keyboard_interaction_params.py +16 -0
- runloop_api_client/types/devboxes/computer_keyboard_interaction_response.py +15 -0
- runloop_api_client/types/devboxes/computer_mouse_interaction_params.py +35 -0
- runloop_api_client/types/devboxes/computer_mouse_interaction_response.py +15 -0
- runloop_api_client/types/devboxes/computer_screen_interaction_params.py +12 -0
- runloop_api_client/types/devboxes/computer_screen_interaction_response.py +15 -0
- runloop_api_client/types/devboxes/computer_view.py +23 -0
- runloop_api_client/types/devboxes/devbox_logs_list_view.py +39 -0
- runloop_api_client/types/devboxes/devbox_snapshot_async_status_view.py +20 -0
- runloop_api_client/types/devboxes/disk_snapshot_list_params.py +32 -0
- runloop_api_client/types/devboxes/disk_snapshot_update_params.py +19 -0
- runloop_api_client/types/devboxes/execution_execute_async_params.py +31 -0
- runloop_api_client/types/devboxes/execution_execute_sync_params.py +31 -0
- runloop_api_client/types/devboxes/execution_kill_params.py +18 -0
- runloop_api_client/types/devboxes/execution_retrieve_params.py +14 -0
- runloop_api_client/types/devboxes/execution_send_std_in_params.py +18 -0
- runloop_api_client/types/devboxes/execution_stream_stderr_updates_params.py +17 -0
- runloop_api_client/types/devboxes/execution_stream_stdout_updates_params.py +17 -0
- runloop_api_client/types/devboxes/execution_update_chunk.py +15 -0
- runloop_api_client/types/devboxes/log_list_params.py +15 -0
- runloop_api_client/types/gateway_config_create_params.py +41 -0
- runloop_api_client/types/gateway_config_list_params.py +21 -0
- runloop_api_client/types/gateway_config_list_view.py +21 -0
- runloop_api_client/types/gateway_config_update_params.py +32 -0
- runloop_api_client/types/gateway_config_view.py +47 -0
- runloop_api_client/types/input_context.py +19 -0
- runloop_api_client/types/input_context_param.py +20 -0
- runloop_api_client/types/input_context_update_param.py +16 -0
- runloop_api_client/types/inspection_source_param.py +18 -0
- runloop_api_client/types/network_policy_create_params.py +40 -0
- runloop_api_client/types/network_policy_list_params.py +21 -0
- runloop_api_client/types/network_policy_list_view.py +21 -0
- runloop_api_client/types/network_policy_update_params.py +30 -0
- runloop_api_client/types/network_policy_view.py +52 -0
- runloop_api_client/types/object_create_params.py +30 -0
- runloop_api_client/types/object_download_params.py +12 -0
- runloop_api_client/types/object_download_url_view.py +12 -0
- runloop_api_client/types/object_list_params.py +27 -0
- runloop_api_client/types/object_list_public_params.py +27 -0
- runloop_api_client/types/object_list_view.py +24 -0
- runloop_api_client/types/object_view.py +36 -0
- runloop_api_client/types/repository_connection_list_view.py +19 -0
- runloop_api_client/types/repository_connection_view.py +18 -0
- runloop_api_client/types/repository_create_params.py +22 -0
- runloop_api_client/types/repository_inspect_params.py +13 -0
- runloop_api_client/types/repository_inspection_details.py +83 -0
- runloop_api_client/types/repository_inspection_list_view.py +13 -0
- runloop_api_client/types/repository_list_params.py +21 -0
- runloop_api_client/types/repository_manifest_view.py +174 -0
- runloop_api_client/types/repository_refresh_params.py +16 -0
- runloop_api_client/types/scenario_create_params.py +53 -0
- runloop_api_client/types/scenario_definition_list_view.py +19 -0
- runloop_api_client/types/scenario_environment.py +29 -0
- runloop_api_client/types/scenario_environment_param.py +31 -0
- runloop_api_client/types/scenario_list_params.py +24 -0
- runloop_api_client/types/scenario_list_public_params.py +18 -0
- runloop_api_client/types/scenario_run_list_view.py +19 -0
- runloop_api_client/types/scenario_run_view.py +55 -0
- runloop_api_client/types/scenario_start_run_params.py +30 -0
- runloop_api_client/types/scenario_update_params.py +49 -0
- runloop_api_client/types/scenario_view.py +61 -0
- runloop_api_client/types/scenarios/__init__.py +14 -0
- runloop_api_client/types/scenarios/run_list_params.py +27 -0
- runloop_api_client/types/scenarios/scorer_create_params.py +18 -0
- runloop_api_client/types/scenarios/scorer_create_response.py +18 -0
- runloop_api_client/types/scenarios/scorer_list_params.py +15 -0
- runloop_api_client/types/scenarios/scorer_list_response.py +18 -0
- runloop_api_client/types/scenarios/scorer_retrieve_response.py +18 -0
- runloop_api_client/types/scenarios/scorer_update_params.py +18 -0
- runloop_api_client/types/scenarios/scorer_update_response.py +18 -0
- runloop_api_client/types/scenarios/scorer_validate_params.py +17 -0
- runloop_api_client/types/scenarios/scorer_validate_response.py +23 -0
- runloop_api_client/types/scoring_contract.py +17 -0
- runloop_api_client/types/scoring_contract_param.py +19 -0
- runloop_api_client/types/scoring_contract_result_view.py +20 -0
- runloop_api_client/types/scoring_contract_update_param.py +15 -0
- runloop_api_client/types/scoring_function.py +157 -0
- runloop_api_client/types/scoring_function_param.py +153 -0
- runloop_api_client/types/scoring_function_result_view.py +25 -0
- runloop_api_client/types/secret_create_params.py +23 -0
- runloop_api_client/types/secret_list_params.py +12 -0
- runloop_api_client/types/secret_list_view.py +24 -0
- runloop_api_client/types/secret_update_params.py +16 -0
- runloop_api_client/types/secret_view.py +26 -0
- runloop_api_client/types/shared/__init__.py +10 -0
- runloop_api_client/types/shared/after_idle.py +15 -0
- runloop_api_client/types/shared/agent_mount.py +31 -0
- runloop_api_client/types/shared/agent_source.py +75 -0
- runloop_api_client/types/shared/code_mount_parameters.py +24 -0
- runloop_api_client/types/shared/launch_parameters.py +86 -0
- runloop_api_client/types/shared/mount.py +43 -0
- runloop_api_client/types/shared/object_mount.py +21 -0
- runloop_api_client/types/shared/run_profile.py +37 -0
- runloop_api_client/types/shared_params/__init__.py +10 -0
- runloop_api_client/types/shared_params/after_idle.py +15 -0
- runloop_api_client/types/shared_params/agent_mount.py +31 -0
- runloop_api_client/types/shared_params/agent_source.py +78 -0
- runloop_api_client/types/shared_params/code_mount_parameters.py +25 -0
- runloop_api_client/types/shared_params/launch_parameters.py +88 -0
- runloop_api_client/types/shared_params/mount.py +43 -0
- runloop_api_client/types/shared_params/object_mount.py +21 -0
- runloop_api_client/types/shared_params/run_profile.py +38 -0
- runloop_api_client/types/tunnel_view.py +34 -0
- runloop_api_client-1.5.1.dist-info/METADATA +522 -0
- runloop_api_client-1.5.1.dist-info/RECORD +261 -0
- runloop_api_client-1.5.1.dist-info/WHEEL +4 -0
- runloop_api_client-1.5.1.dist-info/licenses/LICENSE +7 -0
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
"""AsyncScenarioBuilder for constructing scenarios with a fluent API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List, Iterable, Optional
|
|
6
|
+
from typing_extensions import Self, Unpack, Literal, override
|
|
7
|
+
|
|
8
|
+
from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
|
|
9
|
+
from ._types import ScenarioPreview, LongRequestOptions
|
|
10
|
+
from .._client import AsyncRunloop
|
|
11
|
+
from .async_scenario import AsyncScenario
|
|
12
|
+
from .async_snapshot import AsyncSnapshot
|
|
13
|
+
from .async_blueprint import AsyncBlueprint
|
|
14
|
+
from ..types.scoring_function_param import (
|
|
15
|
+
Scorer,
|
|
16
|
+
ScoringFunctionParam,
|
|
17
|
+
ScorerCustomScoringFunction,
|
|
18
|
+
ScorerAstGrepScoringFunction,
|
|
19
|
+
ScorerCommandScoringFunction,
|
|
20
|
+
ScorerTestBasedScoringFunction,
|
|
21
|
+
ScorerBashScriptScoringFunction,
|
|
22
|
+
ScorerPythonScriptScoringFunction,
|
|
23
|
+
ScorerTestBasedScoringFunctionTestFile,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AsyncScenarioBuilder:
|
|
28
|
+
"""Async builder for constructing scenarios with a fluent API.
|
|
29
|
+
|
|
30
|
+
Provides a step-by-step interface for configuring all aspects of a scenario
|
|
31
|
+
before pushing it to the platform.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> builder = (
|
|
35
|
+
... runloop.scenario.builder("my-scenario")
|
|
36
|
+
... .from_blueprint(blueprint)
|
|
37
|
+
... .with_working_directory("/app")
|
|
38
|
+
... .with_problem_statement("Fix the bug in main.py")
|
|
39
|
+
... .add_test_command_scorer("tests", test_command="pytest")
|
|
40
|
+
... )
|
|
41
|
+
>>> params = builder.build()
|
|
42
|
+
>>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push()
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, name: str, client: AsyncRunloop) -> None:
|
|
46
|
+
"""Initialize the builder.
|
|
47
|
+
|
|
48
|
+
:param name: Name for the scenario
|
|
49
|
+
:type name: str
|
|
50
|
+
:param client: AsyncRunloop client instance
|
|
51
|
+
:type client: AsyncRunloop
|
|
52
|
+
"""
|
|
53
|
+
self._client = client
|
|
54
|
+
self._name = name
|
|
55
|
+
|
|
56
|
+
# Environment configuration
|
|
57
|
+
self._blueprint: Optional[AsyncBlueprint] = None
|
|
58
|
+
self._snapshot: Optional[AsyncSnapshot] = None
|
|
59
|
+
self._working_directory: Optional[str] = None
|
|
60
|
+
|
|
61
|
+
# Input context
|
|
62
|
+
self._problem_statement: Optional[str] = None
|
|
63
|
+
self._additional_context: Optional[object] = None
|
|
64
|
+
|
|
65
|
+
# Scoring
|
|
66
|
+
self._scorers: List[ScoringFunctionParam] = []
|
|
67
|
+
|
|
68
|
+
# Metadata and other options
|
|
69
|
+
self._metadata: Dict[str, str] = {}
|
|
70
|
+
self._reference_output: Optional[str] = None
|
|
71
|
+
self._required_env_vars: Optional[List[str]] = None
|
|
72
|
+
self._required_secrets: Optional[List[str]] = None
|
|
73
|
+
self._validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] = None
|
|
74
|
+
|
|
75
|
+
@override
|
|
76
|
+
def __repr__(self) -> str:
|
|
77
|
+
return f"<AsyncScenarioBuilder name={self._name!r}>"
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def name(self) -> str:
|
|
81
|
+
"""Return the scenario name.
|
|
82
|
+
|
|
83
|
+
:return: Scenario name
|
|
84
|
+
:rtype: str
|
|
85
|
+
"""
|
|
86
|
+
return self._name
|
|
87
|
+
|
|
88
|
+
def from_blueprint(self, blueprint: AsyncBlueprint) -> Self:
|
|
89
|
+
"""Set a blueprint to define the baseline environment for the scenario.
|
|
90
|
+
|
|
91
|
+
:param blueprint: Blueprint to use
|
|
92
|
+
:type blueprint: AsyncBlueprint
|
|
93
|
+
:return: Self for method chaining
|
|
94
|
+
:rtype: Self
|
|
95
|
+
"""
|
|
96
|
+
self._blueprint = blueprint
|
|
97
|
+
self._snapshot = None # Clear snapshot if blueprint is set
|
|
98
|
+
return self
|
|
99
|
+
|
|
100
|
+
def from_snapshot(self, snapshot: AsyncSnapshot) -> Self:
|
|
101
|
+
"""Set a snapshot to define the baseline environment for the scenario.
|
|
102
|
+
|
|
103
|
+
:param snapshot: Snapshot to use
|
|
104
|
+
:type snapshot: AsyncSnapshot
|
|
105
|
+
:return: Self for method chaining
|
|
106
|
+
:rtype: Self
|
|
107
|
+
"""
|
|
108
|
+
self._snapshot = snapshot
|
|
109
|
+
self._blueprint = None # Clear blueprint if snapshot is set
|
|
110
|
+
return self
|
|
111
|
+
|
|
112
|
+
def with_working_directory(self, directory: str) -> Self:
|
|
113
|
+
"""Set the working directory for the scenario.
|
|
114
|
+
|
|
115
|
+
:param directory: Working directory path
|
|
116
|
+
:type directory: str
|
|
117
|
+
:return: Self for method chaining
|
|
118
|
+
:rtype: Self
|
|
119
|
+
"""
|
|
120
|
+
self._working_directory = directory
|
|
121
|
+
return self
|
|
122
|
+
|
|
123
|
+
def with_problem_statement(self, statement: str) -> Self:
|
|
124
|
+
"""Set the problem statement for the scenario; this will be provided as input context to the agent.
|
|
125
|
+
|
|
126
|
+
:param statement: Problem statement text
|
|
127
|
+
:type statement: str
|
|
128
|
+
:return: Self for method chaining
|
|
129
|
+
:rtype: Self
|
|
130
|
+
"""
|
|
131
|
+
self._problem_statement = statement
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def with_additional_context(self, context: object) -> Self:
|
|
135
|
+
"""Set additional structured context for the scenario.
|
|
136
|
+
This can be used to provide additional information to the agent, such as hints, examples, or other relevant information.
|
|
137
|
+
|
|
138
|
+
:param context: Additional context (JSON-serializable)
|
|
139
|
+
:type context: object
|
|
140
|
+
:return: Self for method chaining
|
|
141
|
+
:rtype: Self
|
|
142
|
+
"""
|
|
143
|
+
self._additional_context = context
|
|
144
|
+
return self
|
|
145
|
+
|
|
146
|
+
def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self:
|
|
147
|
+
"""Internal helper to add a scorer to the list.
|
|
148
|
+
|
|
149
|
+
:raises ValueError: If weight is not positive
|
|
150
|
+
"""
|
|
151
|
+
if weight <= 0:
|
|
152
|
+
raise ValueError(f"Scorer weight must be positive, got {weight}")
|
|
153
|
+
self._scorers.append({"name": name, "weight": weight, "scorer": scorer})
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
def add_test_command_scorer(
|
|
157
|
+
self,
|
|
158
|
+
name: str,
|
|
159
|
+
*,
|
|
160
|
+
test_command: str,
|
|
161
|
+
weight: float = 1.0,
|
|
162
|
+
test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] = None,
|
|
163
|
+
) -> Self:
|
|
164
|
+
"""Add a test-based scorer that runs a test command.
|
|
165
|
+
|
|
166
|
+
:param name: Name of the scoring function
|
|
167
|
+
:type name: str
|
|
168
|
+
:param test_command: Command to run tests (e.g., "pytest")
|
|
169
|
+
:type test_command: str
|
|
170
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
171
|
+
:type weight: float
|
|
172
|
+
:param test_files: Optional test files to create before running
|
|
173
|
+
:type test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]]
|
|
174
|
+
:return: Self for method chaining
|
|
175
|
+
:rtype: Self
|
|
176
|
+
"""
|
|
177
|
+
scorer: ScorerTestBasedScoringFunction = {
|
|
178
|
+
"type": "test_based_scorer",
|
|
179
|
+
"test_command": test_command,
|
|
180
|
+
}
|
|
181
|
+
if test_files:
|
|
182
|
+
scorer["test_files"] = test_files
|
|
183
|
+
return self._add_scorer(name, weight, scorer)
|
|
184
|
+
|
|
185
|
+
def add_shell_command_scorer(
|
|
186
|
+
self,
|
|
187
|
+
name: str,
|
|
188
|
+
*,
|
|
189
|
+
command: str,
|
|
190
|
+
weight: float = 1.0,
|
|
191
|
+
) -> Self:
|
|
192
|
+
"""Add a command scorer that runs a shell command.
|
|
193
|
+
|
|
194
|
+
:param name: Name of the scoring function
|
|
195
|
+
:type name: str
|
|
196
|
+
:param command: Shell command to execute
|
|
197
|
+
:type command: str
|
|
198
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
199
|
+
:type weight: float
|
|
200
|
+
:return: Self for method chaining
|
|
201
|
+
:rtype: Self
|
|
202
|
+
"""
|
|
203
|
+
scorer: ScorerCommandScoringFunction = {
|
|
204
|
+
"type": "command_scorer",
|
|
205
|
+
"command": command,
|
|
206
|
+
}
|
|
207
|
+
return self._add_scorer(name, weight, scorer)
|
|
208
|
+
|
|
209
|
+
def add_bash_script_scorer(
|
|
210
|
+
self,
|
|
211
|
+
name: str,
|
|
212
|
+
*,
|
|
213
|
+
bash_script: str,
|
|
214
|
+
weight: float = 1.0,
|
|
215
|
+
) -> Self:
|
|
216
|
+
"""Add a standalone bash script scorer.
|
|
217
|
+
|
|
218
|
+
The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive.
|
|
219
|
+
|
|
220
|
+
:param name: Name of the scoring function
|
|
221
|
+
:type name: str
|
|
222
|
+
:param bash_script: Bash script content
|
|
223
|
+
:type bash_script: str
|
|
224
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
225
|
+
:type weight: float
|
|
226
|
+
:return: Self for method chaining
|
|
227
|
+
:rtype: Self
|
|
228
|
+
"""
|
|
229
|
+
scorer: ScorerBashScriptScoringFunction = {
|
|
230
|
+
"type": "bash_script_scorer",
|
|
231
|
+
"bash_script": bash_script,
|
|
232
|
+
}
|
|
233
|
+
return self._add_scorer(name, weight, scorer)
|
|
234
|
+
|
|
235
|
+
def add_python_script_scorer(
|
|
236
|
+
self,
|
|
237
|
+
name: str,
|
|
238
|
+
*,
|
|
239
|
+
python_script: str,
|
|
240
|
+
weight: float = 1.0,
|
|
241
|
+
python_version_constraint: Optional[str] = None,
|
|
242
|
+
requirements_contents: Optional[str] = None,
|
|
243
|
+
) -> Self:
|
|
244
|
+
"""Add a standalone Python script scorer.
|
|
245
|
+
|
|
246
|
+
The script is run in an isolated uv environment, and the dependencies are declared in the
|
|
247
|
+
`uv script header <https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies>`__.
|
|
248
|
+
|
|
249
|
+
The script should print the score in the range [0.0, 1.0] to stdout.
|
|
250
|
+
|
|
251
|
+
:param name: Name of the scoring function
|
|
252
|
+
:type name: str
|
|
253
|
+
:param python_script: Python script content
|
|
254
|
+
:type python_script: str
|
|
255
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
256
|
+
:type weight: float
|
|
257
|
+
:param python_version_constraint: Python version (default "==3.12.10")
|
|
258
|
+
:type python_version_constraint: Optional[str]
|
|
259
|
+
:param requirements_contents: pip requirements.txt content
|
|
260
|
+
:type requirements_contents: Optional[str]
|
|
261
|
+
:return: Self for method chaining
|
|
262
|
+
:rtype: Self
|
|
263
|
+
"""
|
|
264
|
+
scorer: ScorerPythonScriptScoringFunction = {
|
|
265
|
+
"type": "python_script_scorer",
|
|
266
|
+
"python_script": python_script,
|
|
267
|
+
}
|
|
268
|
+
if python_version_constraint:
|
|
269
|
+
scorer["python_version_constraint"] = python_version_constraint
|
|
270
|
+
if requirements_contents:
|
|
271
|
+
scorer["requirements_contents"] = requirements_contents
|
|
272
|
+
return self._add_scorer(name, weight, scorer)
|
|
273
|
+
|
|
274
|
+
def add_ast_grep_scorer(
|
|
275
|
+
self,
|
|
276
|
+
name: str,
|
|
277
|
+
*,
|
|
278
|
+
pattern: str,
|
|
279
|
+
weight: float = 1.0,
|
|
280
|
+
search_directory: str = ".",
|
|
281
|
+
lang: Optional[str] = None,
|
|
282
|
+
) -> Self:
|
|
283
|
+
"""Add an AST grep scorer that matches code patterns.
|
|
284
|
+
|
|
285
|
+
:param name: Name of the scoring function
|
|
286
|
+
:type name: str
|
|
287
|
+
:param pattern: AST pattern to match
|
|
288
|
+
:type pattern: str
|
|
289
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
290
|
+
:type weight: float
|
|
291
|
+
:param search_directory: Directory to search (default ".")
|
|
292
|
+
:type search_directory: str
|
|
293
|
+
:param lang: Language of the pattern (optional)
|
|
294
|
+
:type lang: Optional[str]
|
|
295
|
+
:return: Self for method chaining
|
|
296
|
+
:rtype: Self
|
|
297
|
+
"""
|
|
298
|
+
scorer: ScorerAstGrepScoringFunction = {
|
|
299
|
+
"type": "ast_grep_scorer",
|
|
300
|
+
"pattern": pattern,
|
|
301
|
+
"search_directory": search_directory,
|
|
302
|
+
}
|
|
303
|
+
if lang:
|
|
304
|
+
scorer["lang"] = lang
|
|
305
|
+
return self._add_scorer(name, weight, scorer)
|
|
306
|
+
|
|
307
|
+
def add_custom_scorer(
|
|
308
|
+
self,
|
|
309
|
+
name: str,
|
|
310
|
+
*,
|
|
311
|
+
custom_scorer_type: str,
|
|
312
|
+
weight: float = 1.0,
|
|
313
|
+
scorer_params: Optional[object] = None,
|
|
314
|
+
) -> Self:
|
|
315
|
+
"""Add a custom scorer registered with Runloop.
|
|
316
|
+
|
|
317
|
+
:param name: Name of the scoring function
|
|
318
|
+
:type name: str
|
|
319
|
+
:param custom_scorer_type: Type identifier registered with Runloop
|
|
320
|
+
:type custom_scorer_type: str
|
|
321
|
+
:param weight: Weight for this scorer (normalized automatically)
|
|
322
|
+
:type weight: float
|
|
323
|
+
:param scorer_params: Additional JSON parameters for the scorer
|
|
324
|
+
:type scorer_params: Optional[object]
|
|
325
|
+
:return: Self for method chaining
|
|
326
|
+
:rtype: Self
|
|
327
|
+
"""
|
|
328
|
+
scorer: ScorerCustomScoringFunction = {
|
|
329
|
+
"type": "custom_scorer",
|
|
330
|
+
"custom_scorer_type": custom_scorer_type,
|
|
331
|
+
}
|
|
332
|
+
if scorer_params:
|
|
333
|
+
scorer["scorer_params"] = scorer_params
|
|
334
|
+
return self._add_scorer(name, weight, scorer)
|
|
335
|
+
|
|
336
|
+
def with_metadata(self, metadata: Dict[str, str]) -> Self:
|
|
337
|
+
"""Set metadata for the scenario.
|
|
338
|
+
|
|
339
|
+
:param metadata: Key-value metadata
|
|
340
|
+
:type metadata: Dict[str, str]
|
|
341
|
+
:return: Self for method chaining
|
|
342
|
+
:rtype: Self
|
|
343
|
+
"""
|
|
344
|
+
self._metadata = metadata
|
|
345
|
+
return self
|
|
346
|
+
|
|
347
|
+
def with_reference_output(self, output: str) -> Self:
|
|
348
|
+
"""Set the reference solution or gold patch for validation.
|
|
349
|
+
After application, the scorer is expected to return a score of 1.0.
|
|
350
|
+
|
|
351
|
+
:param output: Reference solution or gold patch (e.g., git diff)
|
|
352
|
+
:type output: str
|
|
353
|
+
:return: Self for method chaining
|
|
354
|
+
:rtype: Self
|
|
355
|
+
"""
|
|
356
|
+
self._reference_output = output
|
|
357
|
+
return self
|
|
358
|
+
|
|
359
|
+
def with_required_env_vars(self, env_vars: List[str]) -> Self:
|
|
360
|
+
"""Set required environment variables.
|
|
361
|
+
|
|
362
|
+
:param env_vars: List of required environment variable names
|
|
363
|
+
:type env_vars: List[str]
|
|
364
|
+
:return: Self for method chaining
|
|
365
|
+
:rtype: Self
|
|
366
|
+
"""
|
|
367
|
+
self._required_env_vars = env_vars
|
|
368
|
+
return self
|
|
369
|
+
|
|
370
|
+
def with_required_secrets(self, secrets: List[str]) -> Self:
|
|
371
|
+
"""Set required secrets.
|
|
372
|
+
|
|
373
|
+
:param secrets: List of required secret names
|
|
374
|
+
:type secrets: List[str]
|
|
375
|
+
:return: Self for method chaining
|
|
376
|
+
:rtype: Self
|
|
377
|
+
"""
|
|
378
|
+
self._required_secrets = secrets
|
|
379
|
+
return self
|
|
380
|
+
|
|
381
|
+
def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self:
|
|
382
|
+
"""Set the validation strategy to specify how the reference solution or gold patch is applied to the scenario.
|
|
383
|
+
|
|
384
|
+
:param validation_type: Validation type
|
|
385
|
+
:type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]
|
|
386
|
+
:return: Self for method chaining
|
|
387
|
+
:rtype: Self
|
|
388
|
+
"""
|
|
389
|
+
self._validation_type = validation_type
|
|
390
|
+
return self
|
|
391
|
+
|
|
392
|
+
def _build_normalized_scorers(self) -> List[ScoringFunctionParam]:
|
|
393
|
+
"""Build normalized scorers list."""
|
|
394
|
+
total_weight = sum(s["weight"] for s in self._scorers)
|
|
395
|
+
return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
|
|
396
|
+
|
|
397
|
+
def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]:
|
|
398
|
+
"""Build environment parameters."""
|
|
399
|
+
if not self._blueprint and not self._snapshot and not self._working_directory:
|
|
400
|
+
return None
|
|
401
|
+
return {
|
|
402
|
+
"blueprint_id": self._blueprint.id if self._blueprint else None,
|
|
403
|
+
"snapshot_id": self._snapshot.id if self._snapshot else None,
|
|
404
|
+
"working_directory": self._working_directory if self._working_directory else None,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
def build(self) -> ScenarioCreateParams:
|
|
408
|
+
"""Build the scenario creation parameters.
|
|
409
|
+
|
|
410
|
+
Weights are automatically normalized to sum to 1.0.
|
|
411
|
+
|
|
412
|
+
:raises ValueError: If required fields are missing
|
|
413
|
+
:return: Parameters for scenario creation
|
|
414
|
+
:rtype: ScenarioCreateParams
|
|
415
|
+
"""
|
|
416
|
+
if not self._problem_statement:
|
|
417
|
+
raise ValueError("Problem statement is required. Call with_problem_statement() first.")
|
|
418
|
+
|
|
419
|
+
if not self._scorers:
|
|
420
|
+
raise ValueError(
|
|
421
|
+
"At least one scorer is required. "
|
|
422
|
+
"Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first."
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
return {
|
|
426
|
+
"name": self._name,
|
|
427
|
+
"input_context": {
|
|
428
|
+
"problem_statement": self._problem_statement,
|
|
429
|
+
"additional_context": self._additional_context,
|
|
430
|
+
},
|
|
431
|
+
"scoring_contract": {
|
|
432
|
+
"scoring_function_parameters": self._build_normalized_scorers(),
|
|
433
|
+
},
|
|
434
|
+
"environment_parameters": self._build_environment_params(),
|
|
435
|
+
"metadata": self._metadata,
|
|
436
|
+
"reference_output": self._reference_output,
|
|
437
|
+
"required_environment_variables": self._required_env_vars,
|
|
438
|
+
"required_secret_names": self._required_secrets,
|
|
439
|
+
"validation_type": self._validation_type,
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
def preview(self) -> ScenarioPreview:
|
|
443
|
+
"""Preview the scenario configuration without pushing to the platform.
|
|
444
|
+
|
|
445
|
+
Returns the current configuration state as a ScenarioPreview object.
|
|
446
|
+
Does not validate or raise errors for missing required fields.
|
|
447
|
+
|
|
448
|
+
:return: Preview of the scenario configuration
|
|
449
|
+
:rtype: ScenarioPreview
|
|
450
|
+
"""
|
|
451
|
+
return ScenarioPreview.model_validate(
|
|
452
|
+
{
|
|
453
|
+
"name": self._name,
|
|
454
|
+
"input_context": {
|
|
455
|
+
"problem_statement": self._problem_statement,
|
|
456
|
+
"additional_context": self._additional_context,
|
|
457
|
+
},
|
|
458
|
+
"scoring_contract": {
|
|
459
|
+
"scoring_function_parameters": self._build_normalized_scorers(),
|
|
460
|
+
},
|
|
461
|
+
"environment": self._build_environment_params(),
|
|
462
|
+
"metadata": self._metadata,
|
|
463
|
+
"reference_output": self._reference_output,
|
|
464
|
+
"required_environment_variables": self._required_env_vars,
|
|
465
|
+
"required_secret_names": self._required_secrets,
|
|
466
|
+
"validation_type": self._validation_type,
|
|
467
|
+
}
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario:
|
|
471
|
+
"""Create the scenario on the platform.
|
|
472
|
+
|
|
473
|
+
:param options: Optional long-running request configuration
|
|
474
|
+
:raises ValueError: If required fields are missing
|
|
475
|
+
:return: Created scenario wrapper
|
|
476
|
+
:rtype: AsyncScenario
|
|
477
|
+
"""
|
|
478
|
+
params = self.build()
|
|
479
|
+
scenario_view = await self._client.scenarios.create(**params, **options)
|
|
480
|
+
return AsyncScenario(self._client, scenario_view.id)
|