eval-protocol 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- development/__init__.py +1 -0
- development/normalize_sandbox_fusion.py +628 -0
- development/utils/__init__.py +1 -0
- development/utils/generate_api_key.py +31 -0
- development/utils/subprocess_manager.py +481 -0
- eval_protocol/__init__.py +86 -0
- eval_protocol/__main__.py +10 -0
- eval_protocol/_version.py +21 -0
- eval_protocol/adapters/__init__.py +1 -0
- eval_protocol/adapters/braintrust.py +8 -0
- eval_protocol/adapters/trl.py +8 -0
- eval_protocol/agent/__init__.py +29 -0
- eval_protocol/agent/models.py +69 -0
- eval_protocol/agent/orchestrator.py +893 -0
- eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol/agent/resources/__init__.py +44 -0
- eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
- eval_protocol/agent/resources/docker_resource.py +479 -0
- eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
- eval_protocol/agent/resources/http_rollout_resource.py +325 -0
- eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol/agent/task_manager.py +1064 -0
- eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol/auth.py +156 -0
- eval_protocol/cli.py +425 -0
- eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
- eval_protocol/cli_commands/common.py +242 -0
- eval_protocol/cli_commands/deploy.py +486 -0
- eval_protocol/cli_commands/deploy_mcp.py +287 -0
- eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol/cli_commands/run_eval_cmd.py +202 -0
- eval_protocol/common_utils.py +36 -0
- eval_protocol/config.py +180 -0
- eval_protocol/datasets/__init__.py +1 -0
- eval_protocol/datasets/loader.py +521 -0
- eval_protocol/evaluation.py +1045 -0
- eval_protocol/execution/__init__.py +1 -0
- eval_protocol/execution/pipeline.py +920 -0
- eval_protocol/gcp_tools.py +484 -0
- eval_protocol/generation/cache.py +141 -0
- eval_protocol/generation/clients/base.py +67 -0
- eval_protocol/generation/clients.py +248 -0
- eval_protocol/generic_server.py +165 -0
- eval_protocol/integrations/__init__.py +12 -0
- eval_protocol/integrations/braintrust.py +51 -0
- eval_protocol/integrations/deepeval.py +106 -0
- eval_protocol/integrations/openeval.py +40 -0
- eval_protocol/integrations/trl.py +187 -0
- eval_protocol/mcp/__init__.py +48 -0
- eval_protocol/mcp/adapter.py +131 -0
- eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol/mcp/client/connection.py +499 -0
- eval_protocol/mcp/clients.py +195 -0
- eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol/mcp/execution/base_policy.py +227 -0
- eval_protocol/mcp/execution/fireworks_policy.py +209 -0
- eval_protocol/mcp/execution/manager.py +506 -0
- eval_protocol/mcp/execution/policy.py +421 -0
- eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol/mcp/mcpgym.py +637 -0
- eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol/mcp/session/manager.py +228 -0
- eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol/mcp/simulation_server.py +458 -0
- eval_protocol/mcp/types.py +80 -0
- eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol/mcp_agent/intermediary_server.py +542 -0
- eval_protocol/mcp_agent/main.py +210 -0
- eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
- eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
- eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol/mcp_agent/session.py +79 -0
- eval_protocol/mcp_env.py +304 -0
- eval_protocol/models.py +366 -0
- eval_protocol/packaging.py +219 -0
- eval_protocol/platform_api.py +360 -0
- eval_protocol/playback_policy.py +396 -0
- eval_protocol/resources.py +128 -0
- eval_protocol/reward_function.py +410 -0
- eval_protocol/rewards/__init__.py +94 -0
- eval_protocol/rewards/accuracy.py +454 -0
- eval_protocol/rewards/accuracy_length.py +173 -0
- eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol/rewards/apps_testing_util.py +559 -0
- eval_protocol/rewards/bfcl_reward.py +313 -0
- eval_protocol/rewards/code_execution.py +1620 -0
- eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol/rewards/deepcoder_reward.py +161 -0
- eval_protocol/rewards/format.py +129 -0
- eval_protocol/rewards/function_calling.py +541 -0
- eval_protocol/rewards/json_schema.py +422 -0
- eval_protocol/rewards/language_consistency.py +700 -0
- eval_protocol/rewards/lean_prover.py +479 -0
- eval_protocol/rewards/length.py +375 -0
- eval_protocol/rewards/list_comparison_math_reward.py +221 -0
- eval_protocol/rewards/math.py +762 -0
- eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
- eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol/rewards/repetition.py +342 -0
- eval_protocol/rewards/tag_count.py +162 -0
- eval_protocol/rl_processing.py +82 -0
- eval_protocol/server.py +271 -0
- eval_protocol/typed_interface.py +260 -0
- eval_protocol/utils/__init__.py +8 -0
- eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol/utils/module_loader.py +56 -0
- eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol/utils/static_policy.py +305 -0
- eval_protocol-0.0.3.dist-info/METADATA +635 -0
- eval_protocol-0.0.3.dist-info/RECORD +130 -0
- eval_protocol-0.0.3.dist-info/WHEEL +5 -0
- eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
- eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
- eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Implementation of MathAPI."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MathAPI:
|
|
5
|
+
"""A simple math API for BFCL evaluation."""
|
|
6
|
+
|
|
7
|
+
def __init__(self):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
def _load_scenario(self, config):
|
|
11
|
+
# MathAPI is stateless, so no scenarios to load
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
def add(self, a, b):
|
|
15
|
+
"""Add two numbers"""
|
|
16
|
+
return {"result": a + b}
|
|
17
|
+
|
|
18
|
+
def subtract(self, a, b):
|
|
19
|
+
"""Subtract b from a"""
|
|
20
|
+
return {"result": a - b}
|
|
21
|
+
|
|
22
|
+
def multiply(self, a, b):
|
|
23
|
+
"""Multiply two numbers"""
|
|
24
|
+
return {"result": a * b}
|
|
25
|
+
|
|
26
|
+
def divide(self, a, b):
|
|
27
|
+
"""Divide a by b"""
|
|
28
|
+
if b == 0:
|
|
29
|
+
return {"error": "Cannot divide by zero"}
|
|
30
|
+
return {"result": a / b}
|
|
31
|
+
|
|
32
|
+
def square_root(self, a):
|
|
33
|
+
"""Calculate the square root of a number"""
|
|
34
|
+
if a < 0:
|
|
35
|
+
return {"error": "Cannot calculate square root of negative number"}
|
|
36
|
+
return {"result": a**0.5}
|
|
37
|
+
|
|
38
|
+
def power(self, base, exponent):
|
|
39
|
+
"""Calculate base raised to the power of exponent"""
|
|
40
|
+
return {"result": base**exponent}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Implementation of TwitterAPI."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TwitterAPI:
|
|
5
|
+
"""A Twitter API for BFCL evaluation."""
|
|
6
|
+
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self.username = ""
|
|
9
|
+
self.password = ""
|
|
10
|
+
self.authenticated = False
|
|
11
|
+
self.tweets = {}
|
|
12
|
+
self.comments = {}
|
|
13
|
+
self.retweets = {}
|
|
14
|
+
self.following_list = []
|
|
15
|
+
self.tweet_counter = 0
|
|
16
|
+
|
|
17
|
+
def _load_scenario(self, config):
|
|
18
|
+
"""Load the Twitter API state from configuration."""
|
|
19
|
+
for key, value in config.items():
|
|
20
|
+
setattr(self, key, value)
|
|
21
|
+
|
|
22
|
+
def login(self, username, password):
|
|
23
|
+
"""Log in to Twitter."""
|
|
24
|
+
if username == self.username and password == self.password:
|
|
25
|
+
self.authenticated = True
|
|
26
|
+
return {"status": "success", "message": f"Logged in as {username}"}
|
|
27
|
+
else:
|
|
28
|
+
return {"status": "error", "message": "Invalid username or password"}
|
|
29
|
+
|
|
30
|
+
def logout(self):
|
|
31
|
+
"""Log out from Twitter."""
|
|
32
|
+
if self.authenticated:
|
|
33
|
+
self.authenticated = False
|
|
34
|
+
return {"status": "success", "message": "Logged out successfully"}
|
|
35
|
+
else:
|
|
36
|
+
return {"status": "error", "message": "Not logged in"}
|
|
37
|
+
|
|
38
|
+
def post_tweet(self, content, tags=None, mentions=None):
|
|
39
|
+
"""Post a new tweet."""
|
|
40
|
+
if not self.authenticated:
|
|
41
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
42
|
+
|
|
43
|
+
if not content:
|
|
44
|
+
return {"status": "error", "message": "Tweet content cannot be empty"}
|
|
45
|
+
|
|
46
|
+
tweet_id = self.tweet_counter
|
|
47
|
+
self.tweet_counter += 1
|
|
48
|
+
|
|
49
|
+
self.tweets[str(tweet_id)] = {
|
|
50
|
+
"id": tweet_id,
|
|
51
|
+
"content": content,
|
|
52
|
+
"username": self.username,
|
|
53
|
+
"tags": tags or [],
|
|
54
|
+
"mentions": mentions or [],
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
"status": "success",
|
|
59
|
+
"message": "Tweet posted successfully",
|
|
60
|
+
"tweet_id": tweet_id,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def get_tweets(self, username=None):
|
|
64
|
+
"""Get tweets by a specific user or all tweets if username is None."""
|
|
65
|
+
tweets_to_return = {}
|
|
66
|
+
|
|
67
|
+
for tweet_id, tweet in self.tweets.items():
|
|
68
|
+
if username is None or tweet["username"] == username:
|
|
69
|
+
tweets_to_return[tweet_id] = tweet
|
|
70
|
+
|
|
71
|
+
return tweets_to_return
|
|
72
|
+
|
|
73
|
+
def search_tweets(self, query):
|
|
74
|
+
"""Search tweets by content."""
|
|
75
|
+
results = {}
|
|
76
|
+
|
|
77
|
+
for tweet_id, tweet in self.tweets.items():
|
|
78
|
+
if query.lower() in tweet["content"].lower():
|
|
79
|
+
results[tweet_id] = tweet
|
|
80
|
+
|
|
81
|
+
return results
|
|
82
|
+
|
|
83
|
+
def follow_user(self, username):
|
|
84
|
+
"""Follow a user."""
|
|
85
|
+
if not self.authenticated:
|
|
86
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
87
|
+
|
|
88
|
+
if username == self.username:
|
|
89
|
+
return {"status": "error", "message": "Cannot follow yourself"}
|
|
90
|
+
|
|
91
|
+
if username in self.following_list:
|
|
92
|
+
return {"status": "error", "message": f"Already following {username}"}
|
|
93
|
+
|
|
94
|
+
self.following_list.append(username)
|
|
95
|
+
|
|
96
|
+
return {"status": "success", "message": f"Now following {username}"}
|
|
97
|
+
|
|
98
|
+
def unfollow_user(self, username):
|
|
99
|
+
"""Unfollow a user."""
|
|
100
|
+
if not self.authenticated:
|
|
101
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
102
|
+
|
|
103
|
+
if username not in self.following_list:
|
|
104
|
+
return {"status": "error", "message": f"Not following {username}"}
|
|
105
|
+
|
|
106
|
+
self.following_list.remove(username)
|
|
107
|
+
|
|
108
|
+
return {"status": "success", "message": f"Unfollowed {username}"}
|
|
109
|
+
|
|
110
|
+
def get_following(self):
|
|
111
|
+
"""Get the list of users being followed."""
|
|
112
|
+
if not self.authenticated:
|
|
113
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
114
|
+
|
|
115
|
+
return {"status": "success", "following": self.following_list}
|
|
116
|
+
|
|
117
|
+
def comment_on_tweet(self, tweet_id, content):
|
|
118
|
+
"""Comment on a tweet."""
|
|
119
|
+
if not self.authenticated:
|
|
120
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
121
|
+
|
|
122
|
+
tweet_id_str = str(tweet_id)
|
|
123
|
+
if tweet_id_str not in self.tweets:
|
|
124
|
+
return {"status": "error", "message": f"Tweet {tweet_id} not found"}
|
|
125
|
+
|
|
126
|
+
if tweet_id_str not in self.comments:
|
|
127
|
+
self.comments[tweet_id_str] = []
|
|
128
|
+
|
|
129
|
+
comment_id = len(self.comments[tweet_id_str])
|
|
130
|
+
comment = {"id": comment_id, "content": content, "username": self.username}
|
|
131
|
+
|
|
132
|
+
self.comments[tweet_id_str].append(comment)
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
"status": "success",
|
|
136
|
+
"message": "Comment added successfully",
|
|
137
|
+
"comment_id": comment_id,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
def retweet(self, tweet_id):
|
|
141
|
+
"""Retweet a tweet."""
|
|
142
|
+
if not self.authenticated:
|
|
143
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
144
|
+
|
|
145
|
+
tweet_id_str = str(tweet_id)
|
|
146
|
+
if tweet_id_str not in self.tweets:
|
|
147
|
+
return {"status": "error", "message": f"Tweet {tweet_id} not found"}
|
|
148
|
+
|
|
149
|
+
if self.username not in self.retweets:
|
|
150
|
+
self.retweets[self.username] = []
|
|
151
|
+
|
|
152
|
+
if tweet_id_str in self.retweets[self.username]:
|
|
153
|
+
return {"status": "error", "message": f"Already retweeted tweet {tweet_id}"}
|
|
154
|
+
|
|
155
|
+
self.retweets[self.username].append(tweet_id_str)
|
|
156
|
+
|
|
157
|
+
return {"status": "success", "message": f"Retweeted tweet {tweet_id}"}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import importlib
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Import BFCL File and Directory for isinstance checks from local implementation
|
|
7
|
+
from .bfcl_envs.gorilla_file_system import Directory as BFCLDirectory
|
|
8
|
+
from .bfcl_envs.gorilla_file_system import File as BFCLFile
|
|
9
|
+
|
|
10
|
+
BFCL_TYPES_AVAILABLE = True
|
|
11
|
+
import gc
|
|
12
|
+
import inspect
|
|
13
|
+
import json
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
15
|
+
|
|
16
|
+
from ..resource_abc import ForkableResource
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BFCLSimAPIResource(ForkableResource):
|
|
20
|
+
CLASS_FILE_PATH_MAPPING = {
|
|
21
|
+
"GorillaFileSystem": "eval_protocol.agent.resources.bfcl_envs.gorilla_file_system",
|
|
22
|
+
"MathAPI": "eval_protocol.agent.resources.bfcl_envs.math_api",
|
|
23
|
+
"TwitterAPI": "eval_protocol.agent.resources.bfcl_envs.posting_api",
|
|
24
|
+
# Add these back when implemented:
|
|
25
|
+
# "MessageAPI": "eval_protocol.agent.resources.bfcl_envs.message_api",
|
|
26
|
+
# "TicketAPI": "eval_protocol.agent.resources.bfcl_envs.ticket_api",
|
|
27
|
+
# "TradingBot": "eval_protocol.agent.resources.bfcl_envs.trading_bot",
|
|
28
|
+
# "TravelAPI": "eval_protocol.agent.resources.bfcl_envs.travel_booking",
|
|
29
|
+
# "VehicleControlAPI": "eval_protocol.agent.resources.bfcl_envs.vehicle_control",
|
|
30
|
+
}
|
|
31
|
+
STATELESS_CLASSES = ["MathAPI"]
|
|
32
|
+
|
|
33
|
+
def _serialize_bfcl_file(self, file_obj: BFCLFile) -> Dict[str, Any]:
|
|
34
|
+
"""Serializes a BFCL File object into a canonical dictionary."""
|
|
35
|
+
return {
|
|
36
|
+
"type": "file", # Add a type hint for clarity, though not in original __eq__
|
|
37
|
+
"name": file_obj.name,
|
|
38
|
+
"content": file_obj.content,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def _serialize_bfcl_directory(self, dir_obj: BFCLDirectory) -> Dict[str, Any]:
|
|
42
|
+
"""Serializes a BFCL Directory object into a canonical dictionary."""
|
|
43
|
+
serialized_contents: Dict[str, Any] = {}
|
|
44
|
+
# Sort keys for canonical representation, crucial for reliable comparison
|
|
45
|
+
for item_name, item_value in sorted(dir_obj.contents.items()):
|
|
46
|
+
if BFCL_TYPES_AVAILABLE and isinstance(item_value, BFCLFile):
|
|
47
|
+
serialized_contents[item_name] = self._serialize_bfcl_file(item_value)
|
|
48
|
+
elif BFCL_TYPES_AVAILABLE and isinstance(item_value, BFCLDirectory):
|
|
49
|
+
serialized_contents[item_name] = self._serialize_bfcl_directory(item_value)
|
|
50
|
+
else:
|
|
51
|
+
# Fallback for other types if any, or if BFCL types weren't imported
|
|
52
|
+
try:
|
|
53
|
+
json.dumps(item_value)
|
|
54
|
+
serialized_contents[item_name] = item_value
|
|
55
|
+
except (TypeError, OverflowError):
|
|
56
|
+
serialized_contents[item_name] = str(item_value)
|
|
57
|
+
return {
|
|
58
|
+
"type": "directory", # Add a type hint
|
|
59
|
+
"name": dir_obj.name,
|
|
60
|
+
"contents": serialized_contents,
|
|
61
|
+
# Parent is intentionally excluded to match original Directory.__eq__
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def __init__(self, env_instances: Optional[Dict[str, Any]] = None):
|
|
65
|
+
self._env_instances = env_instances if env_instances is not None else {}
|
|
66
|
+
self._initial_config: Dict[str, Any] = {} # To store initial configuration for forking
|
|
67
|
+
|
|
68
|
+
async def setup(self, config: Dict[str, Any]) -> None:
|
|
69
|
+
"""Initializes the resource with a given configuration."""
|
|
70
|
+
self._initial_config = copy.deepcopy(config)
|
|
71
|
+
involved_classes = config.get("involved_classes", [])
|
|
72
|
+
initial_config_data = config.get("initial_config", {})
|
|
73
|
+
|
|
74
|
+
for class_name in involved_classes:
|
|
75
|
+
if class_name not in self._env_instances:
|
|
76
|
+
module_name = self.CLASS_FILE_PATH_MAPPING[class_name]
|
|
77
|
+
module = importlib.import_module(module_name)
|
|
78
|
+
class_ = getattr(module, class_name)
|
|
79
|
+
instance = class_()
|
|
80
|
+
|
|
81
|
+
if class_name not in self.STATELESS_CLASSES:
|
|
82
|
+
class_initial_config = initial_config_data.get(class_name, {})
|
|
83
|
+
instance._load_scenario(copy.deepcopy(class_initial_config))
|
|
84
|
+
|
|
85
|
+
self._env_instances[class_name] = instance
|
|
86
|
+
|
|
87
|
+
async def fork(self) -> "ForkableResource":
|
|
88
|
+
"""Creates and returns a new, independent instance of this resource
|
|
89
|
+
with an identical copy of the current state.
|
|
90
|
+
"""
|
|
91
|
+
# Deep copy the environment instances to create an independent fork
|
|
92
|
+
forked_instances = copy.deepcopy(self._env_instances)
|
|
93
|
+
new_resource = BFCLSimAPIResource(env_instances=forked_instances)
|
|
94
|
+
new_resource._initial_config = copy.deepcopy(
|
|
95
|
+
self._initial_config
|
|
96
|
+
) # Copy initial config for potential re-setup
|
|
97
|
+
return new_resource
|
|
98
|
+
|
|
99
|
+
async def checkpoint(self) -> Dict[str, Any]:
|
|
100
|
+
"""Returns a serializable representation of the resource's current state."""
|
|
101
|
+
# Use get_comparable_state for checkpointing
|
|
102
|
+
state_data = self.get_comparable_state()
|
|
103
|
+
return state_data
|
|
104
|
+
|
|
105
|
+
async def restore(self, state_data: Dict[str, Any]) -> None:
|
|
106
|
+
"""Restores the resource's state from a previously checkpointed state_data."""
|
|
107
|
+
# Re-initialize based on initial config
|
|
108
|
+
await self.setup(self._initial_config)
|
|
109
|
+
# Restore state from the provided state_data using _set_comparable_state
|
|
110
|
+
self._set_comparable_state(state_data)
|
|
111
|
+
|
|
112
|
+
async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
|
|
113
|
+
"""Executes a named action with given parameters on the resource."""
|
|
114
|
+
# Find the correct environment instance and call the method
|
|
115
|
+
for instance in self._env_instances.values():
|
|
116
|
+
if hasattr(instance, action_name):
|
|
117
|
+
try:
|
|
118
|
+
# Convert tuple back to list if needed by the tool function
|
|
119
|
+
for key, value in action_params.items():
|
|
120
|
+
if isinstance(value, tuple):
|
|
121
|
+
action_params[key] = list(value)
|
|
122
|
+
result = getattr(instance, action_name)(**action_params)
|
|
123
|
+
# BFCL envs might return results directly or modify state
|
|
124
|
+
if isinstance(result, str):
|
|
125
|
+
# Convert string result to dict if needed by type checker
|
|
126
|
+
try:
|
|
127
|
+
parsed_result = json.loads(result)
|
|
128
|
+
if isinstance(parsed_result, dict):
|
|
129
|
+
return parsed_result
|
|
130
|
+
except json.JSONDecodeError:
|
|
131
|
+
pass
|
|
132
|
+
return result
|
|
133
|
+
except Exception as e:
|
|
134
|
+
return {"error": f"Error executing tool {action_name}: {e}"}
|
|
135
|
+
return {"error": f"Tool {action_name} not found in available resources."}
|
|
136
|
+
|
|
137
|
+
async def get_observation(self) -> Dict[str, Any]:
|
|
138
|
+
"""Returns the current observable state of the resource for the agent."""
|
|
139
|
+
# This needs to be defined based on what the agent should observe from the BFCL envs.
|
|
140
|
+
# It might be a summary of the environment state or specific attributes.
|
|
141
|
+
# For now, return a placeholder or a simple representation.
|
|
142
|
+
observation = self.get_comparable_state() # Return comparable state as observation for now
|
|
143
|
+
return observation
|
|
144
|
+
|
|
145
|
+
async def get_tools_spec(self) -> List[Dict[str, Any]]:
|
|
146
|
+
"""Returns a list of tool specifications (e.g., OpenAPI format)
|
|
147
|
+
that are currently available or applicable to this resource's state.
|
|
148
|
+
"""
|
|
149
|
+
# This needs to generate tool specifications from the methods of the BFCL env instances.
|
|
150
|
+
# It can adapt the logic from verifiers.envs.tool_env.infer_schema_from_function
|
|
151
|
+
tool_specs = []
|
|
152
|
+
for instance in self._env_instances.values():
|
|
153
|
+
# Inspect methods of the instance
|
|
154
|
+
for name, method in inspect.getmembers(instance, predicate=inspect.ismethod):
|
|
155
|
+
if not name.startswith("_"): # Exclude private methods
|
|
156
|
+
# Infer schema from method signature
|
|
157
|
+
try:
|
|
158
|
+
schema = self._infer_schema_from_method(method)
|
|
159
|
+
tool_specs.append(schema)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"Could not infer schema for {name}: {e}")
|
|
162
|
+
return tool_specs
|
|
163
|
+
|
|
164
|
+
async def close(self) -> None:
|
|
165
|
+
"""Performs any necessary cleanup for the resource."""
|
|
166
|
+
self._env_instances.clear()
|
|
167
|
+
gc.collect()
|
|
168
|
+
|
|
169
|
+
def get_comparable_state(self) -> Dict[str, Any]:
|
|
170
|
+
"""
|
|
171
|
+
Returns a serializable representation of the resource's state for comparison.
|
|
172
|
+
This method is synchronous for use in reward functions.
|
|
173
|
+
"""
|
|
174
|
+
state = {}
|
|
175
|
+
for class_name, instance in self._env_instances.items():
|
|
176
|
+
instance_state = {}
|
|
177
|
+
# Specifically handle GorillaFileSystem's root attribute if it's the one
|
|
178
|
+
# This is a bit of a special case due to its recursive nature and importance.
|
|
179
|
+
if (
|
|
180
|
+
class_name == "GorillaFileSystem"
|
|
181
|
+
and hasattr(instance, "root")
|
|
182
|
+
and BFCL_TYPES_AVAILABLE
|
|
183
|
+
and isinstance(instance.root, BFCLDirectory)
|
|
184
|
+
):
|
|
185
|
+
# Serialize 'root' attribute using the new method
|
|
186
|
+
instance_state["root"] = self._serialize_bfcl_directory(instance.root) # type: ignore[assignment]
|
|
187
|
+
# Serialize other public attributes normally
|
|
188
|
+
for attr_name, value in vars(instance).items():
|
|
189
|
+
if not attr_name.startswith("_") and attr_name != "root":
|
|
190
|
+
if BFCL_TYPES_AVAILABLE and isinstance(value, BFCLDirectory):
|
|
191
|
+
instance_state[attr_name] = self._serialize_bfcl_directory(value)
|
|
192
|
+
elif BFCL_TYPES_AVAILABLE and isinstance(value, BFCLFile):
|
|
193
|
+
instance_state[attr_name] = self._serialize_bfcl_file(value)
|
|
194
|
+
else:
|
|
195
|
+
try:
|
|
196
|
+
json.dumps(value)
|
|
197
|
+
instance_state[attr_name] = value
|
|
198
|
+
except (TypeError, OverflowError):
|
|
199
|
+
instance_state[attr_name] = str( # type: ignore[assignment]
|
|
200
|
+
value
|
|
201
|
+
) # Convert non-serializable objects to string
|
|
202
|
+
else: # For other classes or if GorillaFileSystem doesn't have 'root' or types unavailable
|
|
203
|
+
for attr_name, value in vars(instance).items():
|
|
204
|
+
if not attr_name.startswith("_"):
|
|
205
|
+
# Check if value is an instance of BFCLDirectory or BFCLFile first
|
|
206
|
+
if BFCL_TYPES_AVAILABLE and isinstance(value, BFCLDirectory):
|
|
207
|
+
instance_state[attr_name] = self._serialize_bfcl_directory(value)
|
|
208
|
+
elif BFCL_TYPES_AVAILABLE and isinstance(value, BFCLFile):
|
|
209
|
+
instance_state[attr_name] = self._serialize_bfcl_file(value)
|
|
210
|
+
else:
|
|
211
|
+
try:
|
|
212
|
+
json.dumps(value)
|
|
213
|
+
instance_state[attr_name] = value
|
|
214
|
+
except (TypeError, OverflowError):
|
|
215
|
+
instance_state[attr_name] = str( # type: ignore[assignment]
|
|
216
|
+
value
|
|
217
|
+
) # Convert non-serializable objects to string
|
|
218
|
+
state[class_name] = instance_state
|
|
219
|
+
return state
|
|
220
|
+
|
|
221
|
+
def _set_comparable_state(self, state_data: Dict[str, Any]) -> None:
|
|
222
|
+
"""Helper to set state on BFCL environment instances from a comparable state dict."""
|
|
223
|
+
for class_name, state in state_data.items():
|
|
224
|
+
if class_name in self._env_instances:
|
|
225
|
+
instance = self._env_instances[class_name]
|
|
226
|
+
for attr_name, value in state.items():
|
|
227
|
+
if hasattr(instance, attr_name):
|
|
228
|
+
try:
|
|
229
|
+
setattr(instance, attr_name, value)
|
|
230
|
+
except Exception as e:
|
|
231
|
+
print(f"Could not set attribute {attr_name} on {instance.__class__.__name__}: {e}")
|
|
232
|
+
|
|
233
|
+
def _infer_schema_from_method(self, method: Any) -> Dict[str, Any]:
|
|
234
|
+
"""Helper to infer tool schema from a method signature."""
|
|
235
|
+
# This is a simplified version, can be expanded based on verifiers.envs.tool_env.infer_schema_from_function
|
|
236
|
+
schema = {
|
|
237
|
+
"name": method.__name__,
|
|
238
|
+
"description": method.__doc__ if method.__doc__ else "",
|
|
239
|
+
"parameters": {"type": "object", "properties": {}, "required": []},
|
|
240
|
+
}
|
|
241
|
+
sig = inspect.signature(method)
|
|
242
|
+
type_mapping = {
|
|
243
|
+
str: "string",
|
|
244
|
+
int: "integer",
|
|
245
|
+
float: "number",
|
|
246
|
+
bool: "boolean",
|
|
247
|
+
list: "array",
|
|
248
|
+
List: "array",
|
|
249
|
+
dict: "object",
|
|
250
|
+
Dict: "object",
|
|
251
|
+
Any: "string", # Default to string for Any or unknown
|
|
252
|
+
type(None): "null", # For Optional[str] = None
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
for name, param in sig.parameters.items():
|
|
256
|
+
if name == "self":
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
param_type_annotation = param.annotation
|
|
260
|
+
json_type = "string" # Default
|
|
261
|
+
|
|
262
|
+
if param_type_annotation != inspect.Parameter.empty:
|
|
263
|
+
# Handle Optional types like Optional[str]
|
|
264
|
+
if hasattr(param_type_annotation, "__origin__") and param_type_annotation.__origin__ is Union:
|
|
265
|
+
# Get the first non-None type from Union for Optional[T]
|
|
266
|
+
union_args = [arg for arg in param_type_annotation.__args__ if arg is not type(None)]
|
|
267
|
+
if union_args:
|
|
268
|
+
actual_type = union_args[0]
|
|
269
|
+
json_type = type_mapping.get(actual_type, "string")
|
|
270
|
+
# Handle List[str] etc.
|
|
271
|
+
if hasattr(actual_type, "__origin__") and actual_type.__origin__ in [list, List]:
|
|
272
|
+
json_type = "array"
|
|
273
|
+
# Try to infer item type for List[T]
|
|
274
|
+
if hasattr(actual_type, "__args__") and actual_type.__args__:
|
|
275
|
+
item_type_annotation = actual_type.__args__[0]
|
|
276
|
+
item_json_type = type_mapping.get(item_type_annotation, "string")
|
|
277
|
+
schema["parameters"]["properties"][name] = {
|
|
278
|
+
"type": "array",
|
|
279
|
+
"items": {"type": item_json_type},
|
|
280
|
+
}
|
|
281
|
+
else: # Fallback if item type can't be inferred
|
|
282
|
+
schema["parameters"]["properties"][name] = {
|
|
283
|
+
"type": "array",
|
|
284
|
+
"items": {"type": "string"},
|
|
285
|
+
}
|
|
286
|
+
if param.default == inspect.Parameter.empty:
|
|
287
|
+
schema["parameters"]["required"].append(name)
|
|
288
|
+
continue # Skip default property assignment below
|
|
289
|
+
else: # Should not happen for valid Optional[T]
|
|
290
|
+
json_type = "string"
|
|
291
|
+
elif hasattr(param_type_annotation, "__origin__") and param_type_annotation.__origin__ in [list, List]:
|
|
292
|
+
json_type = "array"
|
|
293
|
+
if hasattr(param_type_annotation, "__args__") and param_type_annotation.__args__:
|
|
294
|
+
item_type_annotation = param_type_annotation.__args__[0]
|
|
295
|
+
item_json_type = type_mapping.get(item_type_annotation, "string")
|
|
296
|
+
schema["parameters"]["properties"][name] = {
|
|
297
|
+
"type": "array",
|
|
298
|
+
"items": {"type": item_json_type},
|
|
299
|
+
}
|
|
300
|
+
else: # Fallback
|
|
301
|
+
schema["parameters"]["properties"][name] = {
|
|
302
|
+
"type": "array",
|
|
303
|
+
"items": {"type": "string"},
|
|
304
|
+
}
|
|
305
|
+
if param.default == inspect.Parameter.empty:
|
|
306
|
+
schema["parameters"]["required"].append(name)
|
|
307
|
+
continue # Skip default property assignment
|
|
308
|
+
else:
|
|
309
|
+
json_type = type_mapping.get(param_type_annotation, "string")
|
|
310
|
+
|
|
311
|
+
schema["parameters"]["properties"][name] = {"type": json_type}
|
|
312
|
+
if param.default == inspect.Parameter.empty:
|
|
313
|
+
schema["parameters"]["required"].append(name)
|
|
314
|
+
return schema
|