eval-protocol 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- development/__init__.py +1 -0
- development/normalize_sandbox_fusion.py +628 -0
- development/utils/__init__.py +1 -0
- development/utils/generate_api_key.py +31 -0
- development/utils/subprocess_manager.py +481 -0
- eval_protocol/__init__.py +86 -0
- eval_protocol/__main__.py +10 -0
- eval_protocol/_version.py +21 -0
- eval_protocol/adapters/__init__.py +1 -0
- eval_protocol/adapters/braintrust.py +8 -0
- eval_protocol/adapters/trl.py +8 -0
- eval_protocol/agent/__init__.py +29 -0
- eval_protocol/agent/models.py +69 -0
- eval_protocol/agent/orchestrator.py +893 -0
- eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol/agent/resources/__init__.py +44 -0
- eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
- eval_protocol/agent/resources/docker_resource.py +479 -0
- eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
- eval_protocol/agent/resources/http_rollout_resource.py +325 -0
- eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol/agent/task_manager.py +1064 -0
- eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol/auth.py +156 -0
- eval_protocol/cli.py +425 -0
- eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
- eval_protocol/cli_commands/common.py +242 -0
- eval_protocol/cli_commands/deploy.py +486 -0
- eval_protocol/cli_commands/deploy_mcp.py +287 -0
- eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol/cli_commands/run_eval_cmd.py +202 -0
- eval_protocol/common_utils.py +36 -0
- eval_protocol/config.py +180 -0
- eval_protocol/datasets/__init__.py +1 -0
- eval_protocol/datasets/loader.py +521 -0
- eval_protocol/evaluation.py +1045 -0
- eval_protocol/execution/__init__.py +1 -0
- eval_protocol/execution/pipeline.py +920 -0
- eval_protocol/gcp_tools.py +484 -0
- eval_protocol/generation/cache.py +141 -0
- eval_protocol/generation/clients/base.py +67 -0
- eval_protocol/generation/clients.py +248 -0
- eval_protocol/generic_server.py +165 -0
- eval_protocol/integrations/__init__.py +12 -0
- eval_protocol/integrations/braintrust.py +51 -0
- eval_protocol/integrations/deepeval.py +106 -0
- eval_protocol/integrations/openeval.py +40 -0
- eval_protocol/integrations/trl.py +187 -0
- eval_protocol/mcp/__init__.py +48 -0
- eval_protocol/mcp/adapter.py +131 -0
- eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol/mcp/client/connection.py +499 -0
- eval_protocol/mcp/clients.py +195 -0
- eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol/mcp/execution/base_policy.py +227 -0
- eval_protocol/mcp/execution/fireworks_policy.py +209 -0
- eval_protocol/mcp/execution/manager.py +506 -0
- eval_protocol/mcp/execution/policy.py +421 -0
- eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol/mcp/mcpgym.py +637 -0
- eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol/mcp/session/manager.py +228 -0
- eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol/mcp/simulation_server.py +458 -0
- eval_protocol/mcp/types.py +80 -0
- eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol/mcp_agent/intermediary_server.py +542 -0
- eval_protocol/mcp_agent/main.py +210 -0
- eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
- eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
- eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol/mcp_agent/session.py +79 -0
- eval_protocol/mcp_env.py +304 -0
- eval_protocol/models.py +366 -0
- eval_protocol/packaging.py +219 -0
- eval_protocol/platform_api.py +360 -0
- eval_protocol/playback_policy.py +396 -0
- eval_protocol/resources.py +128 -0
- eval_protocol/reward_function.py +410 -0
- eval_protocol/rewards/__init__.py +94 -0
- eval_protocol/rewards/accuracy.py +454 -0
- eval_protocol/rewards/accuracy_length.py +173 -0
- eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol/rewards/apps_testing_util.py +559 -0
- eval_protocol/rewards/bfcl_reward.py +313 -0
- eval_protocol/rewards/code_execution.py +1620 -0
- eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol/rewards/deepcoder_reward.py +161 -0
- eval_protocol/rewards/format.py +129 -0
- eval_protocol/rewards/function_calling.py +541 -0
- eval_protocol/rewards/json_schema.py +422 -0
- eval_protocol/rewards/language_consistency.py +700 -0
- eval_protocol/rewards/lean_prover.py +479 -0
- eval_protocol/rewards/length.py +375 -0
- eval_protocol/rewards/list_comparison_math_reward.py +221 -0
- eval_protocol/rewards/math.py +762 -0
- eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
- eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol/rewards/repetition.py +342 -0
- eval_protocol/rewards/tag_count.py +162 -0
- eval_protocol/rl_processing.py +82 -0
- eval_protocol/server.py +271 -0
- eval_protocol/typed_interface.py +260 -0
- eval_protocol/utils/__init__.py +8 -0
- eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol/utils/module_loader.py +56 -0
- eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol/utils/static_policy.py +305 -0
- eval_protocol-0.0.3.dist-info/METADATA +635 -0
- eval_protocol-0.0.3.dist-info/RECORD +130 -0
- eval_protocol-0.0.3.dist-info/WHEEL +5 -0
- eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
- eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
- eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FileSystemResource: A ForkableResource for managing a directory structure as state.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import tarfile
|
|
8
|
+
import uuid
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
|
|
12
|
+
from ..resource_abc import ForkableResource
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileSystemResource(ForkableResource):
|
|
16
|
+
"""
|
|
17
|
+
A ForkableResource that manages a directory and its contents as its state.
|
|
18
|
+
|
|
19
|
+
Allows for initializing a directory structure, forking it (deep copy),
|
|
20
|
+
checkpointing (archiving to tar.gz), and restoring. File system operations
|
|
21
|
+
can be performed via the step() method.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
_config (Dict[str, Any]): Configuration for the resource.
|
|
25
|
+
_managed_dir_path (Optional[Path]): Path to the root of the managed directory.
|
|
26
|
+
_base_managed_dir_path (Optional[Path]): Path to the initially set up directory,
|
|
27
|
+
used as a template for forking.
|
|
28
|
+
_temp_base_dir (Path): Base directory to store all managed directories and checkpoints.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self._config: Dict[str, Any] = {}
|
|
33
|
+
self._managed_dir_path: Optional[Path] = None
|
|
34
|
+
self._base_managed_dir_path: Optional[Path] = None # Stores the path of the initial setup
|
|
35
|
+
self._temp_base_dir = Path("./.rk_temp_fs").resolve() # Resolve to absolute path
|
|
36
|
+
self._temp_base_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
def _get_new_managed_path(self, prefix: str = "fs_") -> Path:
|
|
39
|
+
"""Generates a new unique path within the temp base directory."""
|
|
40
|
+
return self._temp_base_dir / f"{prefix}{uuid.uuid4().hex}"
|
|
41
|
+
|
|
42
|
+
async def setup(self, config: Dict[str, Any]) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Initializes the managed directory structure.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
config: Configuration dictionary. Expected keys:
|
|
48
|
+
- 'base_dir_name' (Optional[str]): Name for the root managed directory.
|
|
49
|
+
Defaults to a UUID.
|
|
50
|
+
- 'initial_files' (Optional[Dict[str, str]]):
|
|
51
|
+
A dictionary where keys are relative file paths within the
|
|
52
|
+
managed directory and values are their content.
|
|
53
|
+
Example: {"subdir/file.txt": "Hello", "root_file.py": "print('world')"}
|
|
54
|
+
"""
|
|
55
|
+
self._config = config.copy()
|
|
56
|
+
|
|
57
|
+
base_dir_name = self._config.get("base_dir_name", f"fs_base_{uuid.uuid4().hex}")
|
|
58
|
+
self._base_managed_dir_path = self._temp_base_dir / base_dir_name
|
|
59
|
+
self._managed_dir_path = self._base_managed_dir_path # Initially, current is base
|
|
60
|
+
|
|
61
|
+
if self._base_managed_dir_path is not None and self._base_managed_dir_path.exists():
|
|
62
|
+
shutil.rmtree(self._base_managed_dir_path) # Clean start
|
|
63
|
+
if self._base_managed_dir_path is not None:
|
|
64
|
+
self._base_managed_dir_path.mkdir(parents=True)
|
|
65
|
+
|
|
66
|
+
initial_files = self._config.get("initial_files", {})
|
|
67
|
+
for rel_path_str, content in initial_files.items():
|
|
68
|
+
if self._base_managed_dir_path is not None:
|
|
69
|
+
abs_path = self._base_managed_dir_path / Path(rel_path_str)
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("Base managed directory path is not set")
|
|
72
|
+
abs_path.parent.mkdir(parents=True, exist_ok=True)
|
|
73
|
+
with open(abs_path, "w", encoding="utf-8") as f:
|
|
74
|
+
f.write(content)
|
|
75
|
+
|
|
76
|
+
async def fork(self) -> "FileSystemResource":
|
|
77
|
+
"""
|
|
78
|
+
Creates a new FileSystemResource instance with a deep copy of the
|
|
79
|
+
current managed directory state.
|
|
80
|
+
"""
|
|
81
|
+
if not self._managed_dir_path or not self._managed_dir_path.exists():
|
|
82
|
+
raise RuntimeError("Cannot fork: managed directory does not exist or setup was not called.")
|
|
83
|
+
|
|
84
|
+
forked_resource = FileSystemResource()
|
|
85
|
+
forked_resource._config = self._config.copy()
|
|
86
|
+
forked_resource._temp_base_dir = self._temp_base_dir
|
|
87
|
+
|
|
88
|
+
# The new fork's "base" is the current state of this resource
|
|
89
|
+
|
|
90
|
+
forked_dir_path = self._get_new_managed_path(prefix="fs_fork_")
|
|
91
|
+
shutil.copytree(self._managed_dir_path, forked_dir_path)
|
|
92
|
+
|
|
93
|
+
forked_resource._managed_dir_path = forked_dir_path
|
|
94
|
+
# The concept of _base_managed_dir_path for a fork is tricky.
|
|
95
|
+
# For now, a fork doesn't have its own "base template" in the same way the first instance does.
|
|
96
|
+
# It's just a live copy. If it forks again, its current state is copied.
|
|
97
|
+
forked_resource._base_managed_dir_path = None # Or perhaps self._managed_dir_path?
|
|
98
|
+
|
|
99
|
+
return forked_resource
|
|
100
|
+
|
|
101
|
+
async def checkpoint(self) -> Dict[str, Any]:
|
|
102
|
+
"""
|
|
103
|
+
Creates a tar.gz archive of the current managed directory and returns its path.
|
|
104
|
+
"""
|
|
105
|
+
if not self._managed_dir_path or not self._managed_dir_path.exists():
|
|
106
|
+
raise RuntimeError("Cannot checkpoint: managed directory does not exist.")
|
|
107
|
+
|
|
108
|
+
checkpoint_filename = f"checkpoint_fs_{self._managed_dir_path.name}_{uuid.uuid4().hex}.tar.gz"
|
|
109
|
+
checkpoint_path = self._temp_base_dir / checkpoint_filename
|
|
110
|
+
|
|
111
|
+
with tarfile.open(checkpoint_path, "w:gz") as tar:
|
|
112
|
+
# Add files relative to the managed_dir_path so they extract correctly
|
|
113
|
+
tar.add(str(self._managed_dir_path), arcname=".")
|
|
114
|
+
|
|
115
|
+
return {"type": "filesystem_tar_gz", "checkpoint_path": str(checkpoint_path)}
|
|
116
|
+
|
|
117
|
+
async def restore(self, state_data: Dict[str, Any]) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Restores the managed directory state from a tar.gz archive.
|
|
120
|
+
The current managed directory will be replaced.
|
|
121
|
+
"""
|
|
122
|
+
archive_type = state_data.get("type")
|
|
123
|
+
checkpoint_path_str = state_data.get("checkpoint_path")
|
|
124
|
+
|
|
125
|
+
if archive_type != "filesystem_tar_gz" or not checkpoint_path_str:
|
|
126
|
+
raise ValueError("Invalid state_data for FileSystemResource restore.")
|
|
127
|
+
|
|
128
|
+
checkpoint_path = Path(checkpoint_path_str)
|
|
129
|
+
if not checkpoint_path.exists():
|
|
130
|
+
raise FileNotFoundError(f"Checkpoint archive not found: {checkpoint_path}")
|
|
131
|
+
|
|
132
|
+
if not self._managed_dir_path:
|
|
133
|
+
self._managed_dir_path = self._get_new_managed_path(prefix="fs_restored_")
|
|
134
|
+
|
|
135
|
+
if self._managed_dir_path.exists():
|
|
136
|
+
shutil.rmtree(self._managed_dir_path) # Clean before restore
|
|
137
|
+
self._managed_dir_path.mkdir(parents=True)
|
|
138
|
+
|
|
139
|
+
with tarfile.open(checkpoint_path, "r:gz") as tar:
|
|
140
|
+
tar.extractall(path=str(self._managed_dir_path))
|
|
141
|
+
|
|
142
|
+
# The restored state becomes the new "base" for subsequent forks from this instance
|
|
143
|
+
|
|
144
|
+
# _resolve_path is a synchronous helper, no need to make it async unless it performs async I/O
|
|
145
|
+
def _resolve_path(self, rel_path: Union[str, Path]) -> Path:
|
|
146
|
+
"""Resolves a relative path against the managed directory and ensures it's within."""
|
|
147
|
+
if not self._managed_dir_path:
|
|
148
|
+
raise RuntimeError("Managed directory path not set.")
|
|
149
|
+
|
|
150
|
+
abs_path = (self._managed_dir_path / rel_path).resolve()
|
|
151
|
+
|
|
152
|
+
# Security check: ensure the path is within the managed directory
|
|
153
|
+
if self._managed_dir_path.resolve() not in abs_path.parents and abs_path != self._managed_dir_path.resolve():
|
|
154
|
+
raise ValueError(f"Path '{rel_path}' attempts to access outside the managed directory.")
|
|
155
|
+
return abs_path
|
|
156
|
+
|
|
157
|
+
async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
|
|
158
|
+
"""
|
|
159
|
+
Performs a file system operation within the managed directory.
|
|
160
|
+
|
|
161
|
+
Supported actions:
|
|
162
|
+
- 'create_file': Creates an empty file or overwrites an existing one.
|
|
163
|
+
Params: {'path': str, 'content': Optional[str]}
|
|
164
|
+
- 'read_file': Reads the content of a file.
|
|
165
|
+
Params: {'path': str} -> Returns: str (content)
|
|
166
|
+
- 'delete_file': Deletes a file.
|
|
167
|
+
Params: {'path': str}
|
|
168
|
+
- 'list_dir': Lists contents of a directory.
|
|
169
|
+
Params: {'path': str (relative to managed_dir), 'recursive': Optional[bool]} -> Returns: List[str]
|
|
170
|
+
- 'create_dir': Creates a directory.
|
|
171
|
+
Params: {'path': str}
|
|
172
|
+
- 'delete_dir': Deletes a directory recursively.
|
|
173
|
+
Params: {'path': str}
|
|
174
|
+
"""
|
|
175
|
+
path_str = action_params.get("path")
|
|
176
|
+
if path_str is None and action_name not in []: # Some actions might not need a path
|
|
177
|
+
raise ValueError(f"Missing 'path' in action_params for '{action_name}'.")
|
|
178
|
+
|
|
179
|
+
abs_path = self._resolve_path(path_str) if path_str else None
|
|
180
|
+
|
|
181
|
+
if action_name == "create_file" or action_name == "write_file":
|
|
182
|
+
if not abs_path:
|
|
183
|
+
raise ValueError("Path is required for create/write_file")
|
|
184
|
+
content = action_params.get("content", "")
|
|
185
|
+
abs_path.parent.mkdir(parents=True, exist_ok=True)
|
|
186
|
+
with open(abs_path, "w", encoding="utf-8") as f:
|
|
187
|
+
f.write(content)
|
|
188
|
+
return {
|
|
189
|
+
"status": "success",
|
|
190
|
+
"path": (
|
|
191
|
+
str(abs_path.relative_to(self._managed_dir_path))
|
|
192
|
+
if self._managed_dir_path is not None
|
|
193
|
+
else str(abs_path)
|
|
194
|
+
),
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
elif action_name == "read_file":
|
|
198
|
+
if not abs_path:
|
|
199
|
+
raise ValueError("Path is required for read_file")
|
|
200
|
+
if not abs_path.is_file():
|
|
201
|
+
raise FileNotFoundError(f"File not found: {path_str}")
|
|
202
|
+
with open(abs_path, "r", encoding="utf-8") as f:
|
|
203
|
+
return f.read()
|
|
204
|
+
|
|
205
|
+
elif action_name == "delete_file":
|
|
206
|
+
if not abs_path:
|
|
207
|
+
raise ValueError("Path is required for delete_file")
|
|
208
|
+
if not abs_path.is_file():
|
|
209
|
+
raise FileNotFoundError(f"File not found: {path_str}")
|
|
210
|
+
abs_path.unlink()
|
|
211
|
+
return {
|
|
212
|
+
"status": "success",
|
|
213
|
+
"path": (
|
|
214
|
+
str(abs_path.relative_to(self._managed_dir_path))
|
|
215
|
+
if self._managed_dir_path is not None
|
|
216
|
+
else str(abs_path)
|
|
217
|
+
),
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
elif action_name == "list_dir":
|
|
221
|
+
if not abs_path:
|
|
222
|
+
raise ValueError("Path is required for list_dir")
|
|
223
|
+
if not abs_path.is_dir():
|
|
224
|
+
raise NotADirectoryError(f"Not a directory: {path_str}")
|
|
225
|
+
|
|
226
|
+
recursive = action_params.get("recursive", False)
|
|
227
|
+
items = []
|
|
228
|
+
if recursive:
|
|
229
|
+
for item in abs_path.rglob("*"):
|
|
230
|
+
items.append(
|
|
231
|
+
str(item.relative_to(self._managed_dir_path))
|
|
232
|
+
if self._managed_dir_path is not None
|
|
233
|
+
else str(item)
|
|
234
|
+
)
|
|
235
|
+
else:
|
|
236
|
+
for item in abs_path.iterdir():
|
|
237
|
+
items.append(
|
|
238
|
+
str(item.relative_to(self._managed_dir_path))
|
|
239
|
+
if self._managed_dir_path is not None
|
|
240
|
+
else str(item)
|
|
241
|
+
)
|
|
242
|
+
return items
|
|
243
|
+
|
|
244
|
+
elif action_name == "create_dir":
|
|
245
|
+
if not abs_path:
|
|
246
|
+
raise ValueError("Path is required for create_dir")
|
|
247
|
+
abs_path.mkdir(parents=True, exist_ok=True)
|
|
248
|
+
return {
|
|
249
|
+
"status": "success",
|
|
250
|
+
"path": (
|
|
251
|
+
str(abs_path.relative_to(self._managed_dir_path))
|
|
252
|
+
if self._managed_dir_path is not None
|
|
253
|
+
else str(abs_path)
|
|
254
|
+
),
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
elif action_name == "delete_dir":
|
|
258
|
+
if not abs_path:
|
|
259
|
+
raise ValueError("Path is required for delete_dir")
|
|
260
|
+
if not abs_path.is_dir():
|
|
261
|
+
raise NotADirectoryError(f"Not a directory: {path_str}")
|
|
262
|
+
if abs_path == self._managed_dir_path: # Safety: don't delete the root managed dir itself via step
|
|
263
|
+
raise ValueError("Cannot delete the root managed directory via 'delete_dir' action.")
|
|
264
|
+
shutil.rmtree(abs_path)
|
|
265
|
+
return {
|
|
266
|
+
"status": "success",
|
|
267
|
+
"path": (
|
|
268
|
+
str(abs_path.relative_to(self._managed_dir_path))
|
|
269
|
+
if self._managed_dir_path is not None
|
|
270
|
+
else str(abs_path)
|
|
271
|
+
),
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
else:
|
|
275
|
+
raise NotImplementedError(f"Action '{action_name}' not supported by FileSystemResource.")
|
|
276
|
+
|
|
277
|
+
async def get_observation(self) -> Dict[str, Any]:
|
|
278
|
+
"""
|
|
279
|
+
Returns the path to the managed directory.
|
|
280
|
+
"""
|
|
281
|
+
return {
|
|
282
|
+
"type": "filesystem",
|
|
283
|
+
"managed_dir_path": (str(self._managed_dir_path) if self._managed_dir_path else None),
|
|
284
|
+
"status": ("ready" if self._managed_dir_path and self._managed_dir_path.exists() else "uninitialized"),
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async def get_tools_spec(self) -> List[Dict[str, Any]]:
|
|
288
|
+
"""
|
|
289
|
+
Returns tool specifications for file system operations.
|
|
290
|
+
"""
|
|
291
|
+
# This can be extensive. For now, a few examples.
|
|
292
|
+
return [
|
|
293
|
+
{
|
|
294
|
+
"type": "function",
|
|
295
|
+
"function": {
|
|
296
|
+
"name": "write_file",
|
|
297
|
+
"description": "Creates or overwrites a file with the given content within the managed directory.",
|
|
298
|
+
"parameters": {
|
|
299
|
+
"type": "object",
|
|
300
|
+
"properties": {
|
|
301
|
+
"path": {
|
|
302
|
+
"type": "string",
|
|
303
|
+
"description": "Relative path to the file.",
|
|
304
|
+
},
|
|
305
|
+
"content": {
|
|
306
|
+
"type": "string",
|
|
307
|
+
"description": "Content to write to the file.",
|
|
308
|
+
},
|
|
309
|
+
},
|
|
310
|
+
"required": ["path", "content"],
|
|
311
|
+
},
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
"type": "function",
|
|
316
|
+
"function": {
|
|
317
|
+
"name": "read_file",
|
|
318
|
+
"description": "Reads the content of a file from the managed directory.",
|
|
319
|
+
"parameters": {
|
|
320
|
+
"type": "object",
|
|
321
|
+
"properties": {
|
|
322
|
+
"path": {
|
|
323
|
+
"type": "string",
|
|
324
|
+
"description": "Relative path to the file.",
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
"required": ["path"],
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
},
|
|
331
|
+
{
|
|
332
|
+
"type": "function",
|
|
333
|
+
"function": {
|
|
334
|
+
"name": "list_dir",
|
|
335
|
+
"description": "Lists files and directories at the given relative path within the managed directory.",
|
|
336
|
+
"parameters": {
|
|
337
|
+
"type": "object",
|
|
338
|
+
"properties": {
|
|
339
|
+
"path": {
|
|
340
|
+
"type": "string",
|
|
341
|
+
"description": "Relative path to the directory.",
|
|
342
|
+
},
|
|
343
|
+
"recursive": {
|
|
344
|
+
"type": "boolean",
|
|
345
|
+
"description": "List recursively (default: false).",
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
"required": ["path"],
|
|
349
|
+
},
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
# Add more tools: delete_file, create_dir, delete_dir etc.
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
async def close(self) -> None:
|
|
356
|
+
"""
|
|
357
|
+
Cleans up by deleting the managed directory and any checkpoints if they are temporary.
|
|
358
|
+
For now, it only deletes the current _managed_dir_path.
|
|
359
|
+
A more robust strategy for _temp_base_dir cleanup is needed for production.
|
|
360
|
+
"""
|
|
361
|
+
if self._managed_dir_path and self._managed_dir_path.exists():
|
|
362
|
+
try:
|
|
363
|
+
shutil.rmtree(self._managed_dir_path)
|
|
364
|
+
except OSError as e:
|
|
365
|
+
print(f"Error deleting managed directory {self._managed_dir_path}: {e}")
|
|
366
|
+
|
|
367
|
+
# self._base_managed_dir_path might also need cleanup if it's different and temporary.
|
|
368
|
+
self._managed_dir_path = None
|
|
369
|
+
self._base_managed_dir_path = None
|
|
370
|
+
# Deleting _temp_base_dir itself could be too aggressive if it holds checkpoints
|
|
371
|
+
# or other active resources.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP Rollout Protocol - Standardized types for HTTP rollout communication.
|
|
3
|
+
|
|
4
|
+
This module defines the standard request/response models for HTTP rollout servers
|
|
5
|
+
and clients, ensuring consistent communication across different implementations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StartEpisodeRequest(BaseModel):
|
|
14
|
+
"""Request to start a new episode."""
|
|
15
|
+
|
|
16
|
+
class Config:
|
|
17
|
+
extra = "allow" # Allow arbitrary extra fields (like seed)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StartEpisodeResponse(BaseModel):
|
|
21
|
+
"""Response from starting a new episode."""
|
|
22
|
+
|
|
23
|
+
episode_id: str
|
|
24
|
+
observation: Dict[str, Any]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class StepRequest(BaseModel):
|
|
28
|
+
"""Request to take a step in the environment."""
|
|
29
|
+
|
|
30
|
+
episode_id: str
|
|
31
|
+
action: Any # Can be int, str, dict, etc. depending on environment
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class StepResponse(BaseModel):
|
|
35
|
+
"""Response from taking a step in the environment."""
|
|
36
|
+
|
|
37
|
+
observation: Dict[str, Any]
|
|
38
|
+
is_done: bool
|
|
39
|
+
info: Optional[Dict[str, Any]] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EndEpisodeRequest(BaseModel):
|
|
43
|
+
"""Request to end an episode."""
|
|
44
|
+
|
|
45
|
+
episode_id: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class EndEpisodeResponse(BaseModel):
|
|
49
|
+
"""Response from ending an episode."""
|
|
50
|
+
|
|
51
|
+
message: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class HealthResponse(BaseModel):
|
|
55
|
+
"""Response from health check endpoint."""
|
|
56
|
+
|
|
57
|
+
status: str
|
|
58
|
+
game: Optional[str] = None
|
|
59
|
+
version: Optional[str] = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class HttpRolloutConfig(BaseModel):
|
|
63
|
+
"""Configuration for HTTP rollout resource."""
|
|
64
|
+
|
|
65
|
+
base_url: str
|
|
66
|
+
start_episode_endpoint: str = "/start_episode"
|
|
67
|
+
step_endpoint: str = "/step"
|
|
68
|
+
end_episode_endpoint: str = "/end_episode"
|
|
69
|
+
health_endpoint: str = "/health"
|
|
70
|
+
timeout: float = 30.0
|
|
71
|
+
max_retries: int = 3
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Observation structure for game environments
|
|
75
|
+
class GameObservation(BaseModel):
|
|
76
|
+
"""Standard observation structure for game environments."""
|
|
77
|
+
|
|
78
|
+
position: Optional[List[int]] = None
|
|
79
|
+
current_cell: Optional[str] = None
|
|
80
|
+
done: bool = False
|
|
81
|
+
won: bool = False
|
|
82
|
+
visual: Optional[str] = None
|
|
83
|
+
message: Optional[str] = None
|
|
84
|
+
step_count: Optional[int] = None
|
|
85
|
+
max_steps: Optional[int] = None
|