eval-protocol 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- development/__init__.py +1 -0
- development/normalize_sandbox_fusion.py +628 -0
- development/utils/__init__.py +1 -0
- development/utils/generate_api_key.py +31 -0
- development/utils/subprocess_manager.py +481 -0
- eval_protocol/__init__.py +86 -0
- eval_protocol/__main__.py +10 -0
- eval_protocol/_version.py +21 -0
- eval_protocol/adapters/__init__.py +1 -0
- eval_protocol/adapters/braintrust.py +8 -0
- eval_protocol/adapters/trl.py +8 -0
- eval_protocol/agent/__init__.py +29 -0
- eval_protocol/agent/models.py +69 -0
- eval_protocol/agent/orchestrator.py +893 -0
- eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol/agent/resources/__init__.py +44 -0
- eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
- eval_protocol/agent/resources/docker_resource.py +479 -0
- eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
- eval_protocol/agent/resources/http_rollout_resource.py +325 -0
- eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol/agent/task_manager.py +1064 -0
- eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol/auth.py +156 -0
- eval_protocol/cli.py +425 -0
- eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
- eval_protocol/cli_commands/common.py +242 -0
- eval_protocol/cli_commands/deploy.py +486 -0
- eval_protocol/cli_commands/deploy_mcp.py +287 -0
- eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol/cli_commands/run_eval_cmd.py +202 -0
- eval_protocol/common_utils.py +36 -0
- eval_protocol/config.py +180 -0
- eval_protocol/datasets/__init__.py +1 -0
- eval_protocol/datasets/loader.py +521 -0
- eval_protocol/evaluation.py +1045 -0
- eval_protocol/execution/__init__.py +1 -0
- eval_protocol/execution/pipeline.py +920 -0
- eval_protocol/gcp_tools.py +484 -0
- eval_protocol/generation/cache.py +141 -0
- eval_protocol/generation/clients/base.py +67 -0
- eval_protocol/generation/clients.py +248 -0
- eval_protocol/generic_server.py +165 -0
- eval_protocol/integrations/__init__.py +12 -0
- eval_protocol/integrations/braintrust.py +51 -0
- eval_protocol/integrations/deepeval.py +106 -0
- eval_protocol/integrations/openeval.py +40 -0
- eval_protocol/integrations/trl.py +187 -0
- eval_protocol/mcp/__init__.py +48 -0
- eval_protocol/mcp/adapter.py +131 -0
- eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol/mcp/client/connection.py +499 -0
- eval_protocol/mcp/clients.py +195 -0
- eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol/mcp/execution/base_policy.py +227 -0
- eval_protocol/mcp/execution/fireworks_policy.py +209 -0
- eval_protocol/mcp/execution/manager.py +506 -0
- eval_protocol/mcp/execution/policy.py +421 -0
- eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol/mcp/mcpgym.py +637 -0
- eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol/mcp/session/manager.py +228 -0
- eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol/mcp/simulation_server.py +458 -0
- eval_protocol/mcp/types.py +80 -0
- eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol/mcp_agent/intermediary_server.py +542 -0
- eval_protocol/mcp_agent/main.py +210 -0
- eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
- eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
- eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol/mcp_agent/session.py +79 -0
- eval_protocol/mcp_env.py +304 -0
- eval_protocol/models.py +366 -0
- eval_protocol/packaging.py +219 -0
- eval_protocol/platform_api.py +360 -0
- eval_protocol/playback_policy.py +396 -0
- eval_protocol/resources.py +128 -0
- eval_protocol/reward_function.py +410 -0
- eval_protocol/rewards/__init__.py +94 -0
- eval_protocol/rewards/accuracy.py +454 -0
- eval_protocol/rewards/accuracy_length.py +173 -0
- eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol/rewards/apps_testing_util.py +559 -0
- eval_protocol/rewards/bfcl_reward.py +313 -0
- eval_protocol/rewards/code_execution.py +1620 -0
- eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol/rewards/deepcoder_reward.py +161 -0
- eval_protocol/rewards/format.py +129 -0
- eval_protocol/rewards/function_calling.py +541 -0
- eval_protocol/rewards/json_schema.py +422 -0
- eval_protocol/rewards/language_consistency.py +700 -0
- eval_protocol/rewards/lean_prover.py +479 -0
- eval_protocol/rewards/length.py +375 -0
- eval_protocol/rewards/list_comparison_math_reward.py +221 -0
- eval_protocol/rewards/math.py +762 -0
- eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
- eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol/rewards/repetition.py +342 -0
- eval_protocol/rewards/tag_count.py +162 -0
- eval_protocol/rl_processing.py +82 -0
- eval_protocol/server.py +271 -0
- eval_protocol/typed_interface.py +260 -0
- eval_protocol/utils/__init__.py +8 -0
- eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol/utils/module_loader.py +56 -0
- eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol/utils/static_policy.py +305 -0
- eval_protocol-0.0.3.dist-info/METADATA +635 -0
- eval_protocol-0.0.3.dist-info/RECORD +130 -0
- eval_protocol-0.0.3.dist-info/WHEEL +5 -0
- eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
- eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
- eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapters for integrating reward-kit reward functions with TRL (Transformer Reinforcement Learning) trainers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
7
|
+
|
|
8
|
+
from eval_protocol.models import Message
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_trl_adapter(
|
|
14
|
+
reward_fn: Callable,
|
|
15
|
+
dataset_to_reward_kwargs_map: Dict[str, str],
|
|
16
|
+
static_reward_kwargs: Optional[Dict[str, Any]] = None,
|
|
17
|
+
user_message_fn: Optional[Callable[[Any], str]] = None, # Function to construct user message content
|
|
18
|
+
assistant_message_fn: Optional[Callable[[Any], str]] = None, # Function to construct assistant message content
|
|
19
|
+
) -> Callable[[List[Any], List[str]], List[float]]:
|
|
20
|
+
"""
|
|
21
|
+
Creates an adapter function compatible with TRL trainers (e.g., GRPOTrainer, PPOTrainer)
|
|
22
|
+
from a reward-kit reward function.
|
|
23
|
+
|
|
24
|
+
The TRL trainer expects a reward function with the signature:
|
|
25
|
+
(prompts: List[str], completions: List[str], **kwargs: Any) -> List[float]
|
|
26
|
+
where **kwargs contains other columns from the HuggingFace dataset.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
reward_fn: The reward-kit reward function to adapt. This function should
|
|
30
|
+
already be decorated with @reward_function or follow its
|
|
31
|
+
input/output conventions (takes List[Message] or List[Dict],
|
|
32
|
+
returns Dict with a 'score' key).
|
|
33
|
+
dataset_to_reward_kwargs_map: A dictionary mapping dataset column names
|
|
34
|
+
(which appear as keys in **kwargs passed by TRL)
|
|
35
|
+
to the parameter names of the `reward_fn`.
|
|
36
|
+
Example: {"test_cases_column": "test_cases_param"}
|
|
37
|
+
This tells the adapter to take the data from
|
|
38
|
+
kwargs['test_cases_column'] and pass it as
|
|
39
|
+
the `test_cases_param` argument to `reward_fn`.
|
|
40
|
+
static_reward_kwargs: A dictionary of static keyword arguments that will be
|
|
41
|
+
passed to `reward_fn` for every sample.
|
|
42
|
+
Example: {"language": "python", "timeout": 10}
|
|
43
|
+
user_message_fn: An optional function that takes a prompt string and returns
|
|
44
|
+
the content for the user message. If None, the prompt itself
|
|
45
|
+
is used as content.
|
|
46
|
+
assistant_message_fn: An optional function that takes a completion string and
|
|
47
|
+
returns the content for the assistant message. If None,
|
|
48
|
+
the completion itself is used as content.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
An adapter function that can be passed to TRL trainers.
|
|
53
|
+
"""
|
|
54
|
+
if static_reward_kwargs is None:
|
|
55
|
+
static_reward_kwargs = {}
|
|
56
|
+
|
|
57
|
+
def trl_reward_pipeline(
|
|
58
|
+
prompts: List[Any], # Changed from List[str] to List[Any]
|
|
59
|
+
completions: Optional[List[str]] = None,
|
|
60
|
+
**kwargs: Any, # Contains other dataset columns, e.g., kwargs['test_cases']
|
|
61
|
+
) -> List[float]:
|
|
62
|
+
"""
|
|
63
|
+
This is the actual function TRL will call.
|
|
64
|
+
|
|
65
|
+
Note: completions parameter is optional to handle cases where prompts already
|
|
66
|
+
contain complete conversations.
|
|
67
|
+
"""
|
|
68
|
+
scores: List[float] = []
|
|
69
|
+
num_samples = len(prompts)
|
|
70
|
+
|
|
71
|
+
# If completions is None, assume prompts contains complete conversations
|
|
72
|
+
if completions is None:
|
|
73
|
+
completions = [""] * num_samples
|
|
74
|
+
|
|
75
|
+
if not (len(completions) == num_samples):
|
|
76
|
+
logger.warning(
|
|
77
|
+
f"Mismatch in lengths of prompts ({num_samples}) and "
|
|
78
|
+
f"completions ({len(completions)}). Using min length."
|
|
79
|
+
)
|
|
80
|
+
num_samples = min(num_samples, len(completions))
|
|
81
|
+
|
|
82
|
+
# Pre-extract data for all samples from kwargs based on the map
|
|
83
|
+
# This makes it easier to access per-sample data in the loop
|
|
84
|
+
mapped_kwargs_data: Dict[str, List[Any]] = {}
|
|
85
|
+
for (
|
|
86
|
+
dataset_col_name,
|
|
87
|
+
reward_fn_param_name,
|
|
88
|
+
) in dataset_to_reward_kwargs_map.items():
|
|
89
|
+
if dataset_col_name not in kwargs:
|
|
90
|
+
logger.warning(
|
|
91
|
+
f"Dataset column '{dataset_col_name}' (mapped to reward_fn param "
|
|
92
|
+
f"'{reward_fn_param_name}') not found in TRL kwargs. "
|
|
93
|
+
f"Reward function will receive None for this parameter for all samples."
|
|
94
|
+
)
|
|
95
|
+
# Ensure the key exists in mapped_kwargs_data with a list of Nones
|
|
96
|
+
mapped_kwargs_data[reward_fn_param_name] = [None] * num_samples
|
|
97
|
+
else:
|
|
98
|
+
# Ensure the data from TRL kwargs is a list of the correct length
|
|
99
|
+
data_list = kwargs[dataset_col_name]
|
|
100
|
+
if not isinstance(data_list, list) or len(data_list) != num_samples:
|
|
101
|
+
logger.error(
|
|
102
|
+
f"Data for dataset column '{dataset_col_name}' is not a list of "
|
|
103
|
+
f"length {num_samples}. Received: {data_list}. "
|
|
104
|
+
f"Reward function will receive None for this parameter for all samples."
|
|
105
|
+
)
|
|
106
|
+
mapped_kwargs_data[reward_fn_param_name] = [None] * num_samples
|
|
107
|
+
else:
|
|
108
|
+
mapped_kwargs_data[reward_fn_param_name] = data_list
|
|
109
|
+
|
|
110
|
+
for i in range(num_samples):
|
|
111
|
+
current_prompt_item: Any = prompts[i]
|
|
112
|
+
current_completion: str = completions[i]
|
|
113
|
+
|
|
114
|
+
# Construct messages
|
|
115
|
+
# If user_message_fn is provided, it's responsible for converting current_prompt_item to string content.
|
|
116
|
+
# If not, and current_prompt_item is not a string, this might error or behave unexpectedly.
|
|
117
|
+
# Default behavior: assume current_prompt_item is a string if user_message_fn is None.
|
|
118
|
+
user_content = user_message_fn(current_prompt_item) if user_message_fn else str(current_prompt_item)
|
|
119
|
+
|
|
120
|
+
# Default extraction for assistant_content if current_completion is not a simple string
|
|
121
|
+
final_assistant_str_content = ""
|
|
122
|
+
if assistant_message_fn:
|
|
123
|
+
final_assistant_str_content = assistant_message_fn(current_completion)
|
|
124
|
+
elif isinstance(current_completion, str):
|
|
125
|
+
final_assistant_str_content = current_completion
|
|
126
|
+
elif (
|
|
127
|
+
isinstance(current_completion, list)
|
|
128
|
+
and len(current_completion) == 1
|
|
129
|
+
and isinstance(current_completion[0], dict)
|
|
130
|
+
and "content" in current_completion[0]
|
|
131
|
+
and isinstance(current_completion[0].get("content"), str)
|
|
132
|
+
):
|
|
133
|
+
# Handles cases like [{'role':'assistant', 'content':'actual_text'}]
|
|
134
|
+
final_assistant_str_content = current_completion[0]["content"]
|
|
135
|
+
else:
|
|
136
|
+
# Fallback if current_completion is an unexpected type
|
|
137
|
+
logger.warning(
|
|
138
|
+
f"Completion for assistant message was not a string or expected list/dict structure: {current_completion}. Using str()."
|
|
139
|
+
)
|
|
140
|
+
final_assistant_str_content = str(current_completion)
|
|
141
|
+
|
|
142
|
+
# Ensure messages_for_reward is typed as List[Message] as per EvaluateFunction protocol
|
|
143
|
+
messages_for_reward: List[Message] = [
|
|
144
|
+
Message(role="user", content=user_content),
|
|
145
|
+
Message(role="assistant", content=final_assistant_str_content),
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
# Prepare kwargs for the specific reward_fn call for this sample
|
|
149
|
+
current_dynamic_kwargs: Dict[str, Any] = {}
|
|
150
|
+
for reward_fn_param_name, data_list_for_param in mapped_kwargs_data.items():
|
|
151
|
+
# data_list_for_param is already ensured to be a list of Nones or actual data
|
|
152
|
+
current_dynamic_kwargs[reward_fn_param_name] = data_list_for_param[i]
|
|
153
|
+
|
|
154
|
+
# Combine static and dynamic kwargs
|
|
155
|
+
final_reward_fn_kwargs = {**static_reward_kwargs, **current_dynamic_kwargs}
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
# reward_fn is expected to be decorated with @reward_function,
|
|
159
|
+
# so it handles Message object creation internally if dicts are passed,
|
|
160
|
+
# and returns a dict.
|
|
161
|
+
reward_output_dict: Dict[str, Any] = reward_fn(messages=messages_for_reward, **final_reward_fn_kwargs)
|
|
162
|
+
|
|
163
|
+
score = reward_output_dict.get("score")
|
|
164
|
+
if score is None:
|
|
165
|
+
logger.warning(
|
|
166
|
+
f"Sample {i}: 'score' key not found in reward_output_dict or is None. "
|
|
167
|
+
f"Output: {reward_output_dict}. Assigning 0.0."
|
|
168
|
+
)
|
|
169
|
+
scores.append(0.0)
|
|
170
|
+
else:
|
|
171
|
+
scores.append(float(score))
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(
|
|
175
|
+
f"Error calling reward_fn for sample {i} (prompt: '{str(current_prompt_item)[:50]}...'): {e}",
|
|
176
|
+
exc_info=True,
|
|
177
|
+
)
|
|
178
|
+
scores.append(0.0) # Assign 0 score on error
|
|
179
|
+
|
|
180
|
+
if scores:
|
|
181
|
+
logger.debug(
|
|
182
|
+
f"Batch rewards calculated by TRL adapter. Count: {len(scores)}, "
|
|
183
|
+
f"Min: {min(scores)}, Max: {max(scores)}, Avg: {sum(scores)/len(scores):.2f}"
|
|
184
|
+
)
|
|
185
|
+
return scores
|
|
186
|
+
|
|
187
|
+
return trl_reward_pipeline
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Reward-Kit MCP Integration Framework
|
|
3
|
+
|
|
4
|
+
This module provides utilities for creating MCP servers that integrate
|
|
5
|
+
with reward-kit environments and evaluation workflows.
|
|
6
|
+
|
|
7
|
+
It also provides the refactored MCP environment components for better modularity.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .adapter import EnvironmentAdapter
|
|
11
|
+
|
|
12
|
+
# New refactored components
|
|
13
|
+
from .client import MCPConnectionManager
|
|
14
|
+
from .execution import LLMBasePolicy, OpenAIPolicy, ExecutionManager
|
|
15
|
+
|
|
16
|
+
# FireworksPolicy is imported conditionally by execution.__init__.py
|
|
17
|
+
try:
|
|
18
|
+
from .execution import FireworksPolicy
|
|
19
|
+
except ImportError:
|
|
20
|
+
FireworksPolicy = None
|
|
21
|
+
|
|
22
|
+
# North Star MCP-Gym Framework
|
|
23
|
+
from .mcpgym import McpGym
|
|
24
|
+
from .session import GeneralMCPVectorEnv
|
|
25
|
+
from .simulation_server import SimulationServerBase
|
|
26
|
+
from .types import DatasetRow, MCPSession, MCPToolCall, Trajectory
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Legacy MCP server components
|
|
30
|
+
"EnvironmentAdapter",
|
|
31
|
+
"SimulationServerBase",
|
|
32
|
+
# New refactored components
|
|
33
|
+
"MCPConnectionManager",
|
|
34
|
+
"LLMBasePolicy",
|
|
35
|
+
"OpenAIPolicy",
|
|
36
|
+
"ExecutionManager",
|
|
37
|
+
"GeneralMCPVectorEnv",
|
|
38
|
+
"MCPSession",
|
|
39
|
+
"MCPToolCall",
|
|
40
|
+
"DatasetRow",
|
|
41
|
+
"Trajectory",
|
|
42
|
+
# North Star MCP-Gym Framework
|
|
43
|
+
"McpGym",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# Only export FireworksPolicy if it's available
|
|
47
|
+
if FireworksPolicy is not None:
|
|
48
|
+
__all__.insert(__all__.index("OpenAIPolicy") + 1, "FireworksPolicy")
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Environment Adapter Interface
|
|
3
|
+
|
|
4
|
+
This defines the interface that users implement to connect their
|
|
5
|
+
environments to the MCP framework. It also provides default implementations
|
|
6
|
+
that work with most gymnasium-style and complex environments.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from typing import Any, Dict, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EnvironmentAdapter:
|
|
14
|
+
"""
|
|
15
|
+
Environment adapter with default implementations.
|
|
16
|
+
|
|
17
|
+
Users can either use this class directly by providing an env_class,
|
|
18
|
+
or inherit from it to customize specific methods for their environment.
|
|
19
|
+
This provides a clean separation between the MCP protocol layer
|
|
20
|
+
and the environment implementation.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, env_class: Any = None, default_config: Optional[Dict[str, Any]] = None):
|
|
24
|
+
"""
|
|
25
|
+
Initialize the environment adapter.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
env_class: The environment class to instantiate (required for default implementation)
|
|
29
|
+
default_config: Default configuration for environment creation
|
|
30
|
+
"""
|
|
31
|
+
self.env_class = env_class
|
|
32
|
+
self.default_config = default_config or {}
|
|
33
|
+
|
|
34
|
+
def create_environment(self, config: Optional[Dict[str, Any]] = None) -> Any:
|
|
35
|
+
"""
|
|
36
|
+
Create and return a new environment instance.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
config: Optional configuration dict for environment creation
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Environment instance (type depends on the specific implementation)
|
|
43
|
+
"""
|
|
44
|
+
if self.env_class is None:
|
|
45
|
+
raise NotImplementedError("env_class must be provided or create_environment must be overridden")
|
|
46
|
+
|
|
47
|
+
env_config = self.get_default_config()
|
|
48
|
+
if config:
|
|
49
|
+
env_config.update(config)
|
|
50
|
+
|
|
51
|
+
env = self.env_class(config=env_config)
|
|
52
|
+
return env
|
|
53
|
+
|
|
54
|
+
def create_environment_with_seed(
|
|
55
|
+
self, config: Optional[Dict[str, Any]] = None, seed: Optional[int] = None
|
|
56
|
+
) -> Tuple[Any, Any, Dict[str, Any]]:
|
|
57
|
+
"""
|
|
58
|
+
Create and return a new environment instance with a specific seed.
|
|
59
|
+
"""
|
|
60
|
+
env = self.create_environment(config)
|
|
61
|
+
obs, info = env.reset(seed=seed)
|
|
62
|
+
|
|
63
|
+
return env, obs, info
|
|
64
|
+
|
|
65
|
+
def reset_environment(self, env: Any, seed: Optional[int] = None) -> Tuple[Any, Dict[str, Any]]:
|
|
66
|
+
"""
|
|
67
|
+
Reset the environment to initial state.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
env: Environment instance
|
|
71
|
+
seed: Optional seed for reproducibility
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (initial_observation, info_dict)
|
|
75
|
+
"""
|
|
76
|
+
return env.reset(seed=seed)
|
|
77
|
+
|
|
78
|
+
def step_environment(self, env: Any, action: Any) -> Tuple[Any, float, bool, bool, Dict[str, Any]]:
|
|
79
|
+
"""
|
|
80
|
+
Execute one step in the environment.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
env: Environment instance
|
|
84
|
+
action: Action to execute (type depends on environment)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Tuple of (observation, reward, terminated, truncated, info)
|
|
88
|
+
"""
|
|
89
|
+
return env.step(action)
|
|
90
|
+
|
|
91
|
+
def close_environment(self, env: Any) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Clean up environment resources.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
env: Environment instance to close
|
|
97
|
+
"""
|
|
98
|
+
env.close()
|
|
99
|
+
|
|
100
|
+
def parse_action(self, action_str: str) -> Any:
|
|
101
|
+
"""
|
|
102
|
+
Parse action string from MCP tool call into environment action.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
action_str: Action string from MCP client
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Action in format expected by environment
|
|
109
|
+
"""
|
|
110
|
+
return json.loads(action_str)
|
|
111
|
+
|
|
112
|
+
def format_observation(self, observation: Any) -> Any:
|
|
113
|
+
"""
|
|
114
|
+
Format environment observation for MCP response.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
observation: Raw observation from environment
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
JSON-serializable observation data
|
|
121
|
+
"""
|
|
122
|
+
return observation
|
|
123
|
+
|
|
124
|
+
def get_default_config(self) -> Dict[str, Any]:
|
|
125
|
+
"""
|
|
126
|
+
Get the default environment configuration.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Dict describing the default environment configuration
|
|
130
|
+
"""
|
|
131
|
+
return self.default_config
|