eval-protocol 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. development/__init__.py +1 -0
  2. development/normalize_sandbox_fusion.py +628 -0
  3. development/utils/__init__.py +1 -0
  4. development/utils/generate_api_key.py +31 -0
  5. development/utils/subprocess_manager.py +481 -0
  6. eval_protocol/__init__.py +86 -0
  7. eval_protocol/__main__.py +10 -0
  8. eval_protocol/_version.py +21 -0
  9. eval_protocol/adapters/__init__.py +1 -0
  10. eval_protocol/adapters/braintrust.py +8 -0
  11. eval_protocol/adapters/trl.py +8 -0
  12. eval_protocol/agent/__init__.py +29 -0
  13. eval_protocol/agent/models.py +69 -0
  14. eval_protocol/agent/orchestrator.py +893 -0
  15. eval_protocol/agent/resource_abc.py +89 -0
  16. eval_protocol/agent/resource_pool.py +184 -0
  17. eval_protocol/agent/resources/__init__.py +44 -0
  18. eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  19. eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  20. eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  21. eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  22. eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
  23. eval_protocol/agent/resources/docker_resource.py +479 -0
  24. eval_protocol/agent/resources/filesystem_resource.py +371 -0
  25. eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
  26. eval_protocol/agent/resources/http_rollout_resource.py +325 -0
  27. eval_protocol/agent/resources/python_state_resource.py +170 -0
  28. eval_protocol/agent/resources/sql_resource.py +271 -0
  29. eval_protocol/agent/task_manager.py +1064 -0
  30. eval_protocol/agent/tool_registry.py +111 -0
  31. eval_protocol/auth.py +156 -0
  32. eval_protocol/cli.py +425 -0
  33. eval_protocol/cli_commands/__init__.py +1 -0
  34. eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
  35. eval_protocol/cli_commands/common.py +242 -0
  36. eval_protocol/cli_commands/deploy.py +486 -0
  37. eval_protocol/cli_commands/deploy_mcp.py +287 -0
  38. eval_protocol/cli_commands/preview.py +186 -0
  39. eval_protocol/cli_commands/run_eval_cmd.py +202 -0
  40. eval_protocol/common_utils.py +36 -0
  41. eval_protocol/config.py +180 -0
  42. eval_protocol/datasets/__init__.py +1 -0
  43. eval_protocol/datasets/loader.py +521 -0
  44. eval_protocol/evaluation.py +1045 -0
  45. eval_protocol/execution/__init__.py +1 -0
  46. eval_protocol/execution/pipeline.py +920 -0
  47. eval_protocol/gcp_tools.py +484 -0
  48. eval_protocol/generation/cache.py +141 -0
  49. eval_protocol/generation/clients/base.py +67 -0
  50. eval_protocol/generation/clients.py +248 -0
  51. eval_protocol/generic_server.py +165 -0
  52. eval_protocol/integrations/__init__.py +12 -0
  53. eval_protocol/integrations/braintrust.py +51 -0
  54. eval_protocol/integrations/deepeval.py +106 -0
  55. eval_protocol/integrations/openeval.py +40 -0
  56. eval_protocol/integrations/trl.py +187 -0
  57. eval_protocol/mcp/__init__.py +48 -0
  58. eval_protocol/mcp/adapter.py +131 -0
  59. eval_protocol/mcp/client/__init__.py +12 -0
  60. eval_protocol/mcp/client/connection.py +499 -0
  61. eval_protocol/mcp/clients.py +195 -0
  62. eval_protocol/mcp/execution/__init__.py +23 -0
  63. eval_protocol/mcp/execution/base_policy.py +227 -0
  64. eval_protocol/mcp/execution/fireworks_policy.py +209 -0
  65. eval_protocol/mcp/execution/manager.py +506 -0
  66. eval_protocol/mcp/execution/policy.py +421 -0
  67. eval_protocol/mcp/grid_renderer.py +54 -0
  68. eval_protocol/mcp/mcpgym.py +637 -0
  69. eval_protocol/mcp/process_manager.py +177 -0
  70. eval_protocol/mcp/session/__init__.py +11 -0
  71. eval_protocol/mcp/session/manager.py +228 -0
  72. eval_protocol/mcp/simple_process_manager.py +291 -0
  73. eval_protocol/mcp/simulation_server.py +458 -0
  74. eval_protocol/mcp/types.py +80 -0
  75. eval_protocol/mcp_agent/__init__.py +1 -0
  76. eval_protocol/mcp_agent/config.py +147 -0
  77. eval_protocol/mcp_agent/intermediary_server.py +542 -0
  78. eval_protocol/mcp_agent/main.py +210 -0
  79. eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  80. eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  81. eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
  82. eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
  83. eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  84. eval_protocol/mcp_agent/session.py +79 -0
  85. eval_protocol/mcp_env.py +304 -0
  86. eval_protocol/models.py +366 -0
  87. eval_protocol/packaging.py +219 -0
  88. eval_protocol/platform_api.py +360 -0
  89. eval_protocol/playback_policy.py +396 -0
  90. eval_protocol/resources.py +128 -0
  91. eval_protocol/reward_function.py +410 -0
  92. eval_protocol/rewards/__init__.py +94 -0
  93. eval_protocol/rewards/accuracy.py +454 -0
  94. eval_protocol/rewards/accuracy_length.py +173 -0
  95. eval_protocol/rewards/apps_coding_reward.py +331 -0
  96. eval_protocol/rewards/apps_execution_utils.py +149 -0
  97. eval_protocol/rewards/apps_testing_util.py +559 -0
  98. eval_protocol/rewards/bfcl_reward.py +313 -0
  99. eval_protocol/rewards/code_execution.py +1620 -0
  100. eval_protocol/rewards/code_execution_utils.py +72 -0
  101. eval_protocol/rewards/cpp_code.py +861 -0
  102. eval_protocol/rewards/deepcoder_reward.py +161 -0
  103. eval_protocol/rewards/format.py +129 -0
  104. eval_protocol/rewards/function_calling.py +541 -0
  105. eval_protocol/rewards/json_schema.py +422 -0
  106. eval_protocol/rewards/language_consistency.py +700 -0
  107. eval_protocol/rewards/lean_prover.py +479 -0
  108. eval_protocol/rewards/length.py +375 -0
  109. eval_protocol/rewards/list_comparison_math_reward.py +221 -0
  110. eval_protocol/rewards/math.py +762 -0
  111. eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
  112. eval_protocol/rewards/reasoning_steps.py +249 -0
  113. eval_protocol/rewards/repetition.py +342 -0
  114. eval_protocol/rewards/tag_count.py +162 -0
  115. eval_protocol/rl_processing.py +82 -0
  116. eval_protocol/server.py +271 -0
  117. eval_protocol/typed_interface.py +260 -0
  118. eval_protocol/utils/__init__.py +8 -0
  119. eval_protocol/utils/batch_evaluation.py +217 -0
  120. eval_protocol/utils/batch_transformation.py +205 -0
  121. eval_protocol/utils/dataset_helpers.py +112 -0
  122. eval_protocol/utils/module_loader.py +56 -0
  123. eval_protocol/utils/packaging_utils.py +108 -0
  124. eval_protocol/utils/static_policy.py +305 -0
  125. eval_protocol-0.0.3.dist-info/METADATA +635 -0
  126. eval_protocol-0.0.3.dist-info/RECORD +130 -0
  127. eval_protocol-0.0.3.dist-info/WHEEL +5 -0
  128. eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
  129. eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
  130. eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,40 @@
1
+ """Implementation of MathAPI."""
2
+
3
+
4
+ class MathAPI:
5
+ """A simple math API for BFCL evaluation."""
6
+
7
+ def __init__(self):
8
+ pass
9
+
10
+ def _load_scenario(self, config):
11
+ # MathAPI is stateless, so no scenarios to load
12
+ pass
13
+
14
+ def add(self, a, b):
15
+ """Add two numbers"""
16
+ return {"result": a + b}
17
+
18
+ def subtract(self, a, b):
19
+ """Subtract b from a"""
20
+ return {"result": a - b}
21
+
22
+ def multiply(self, a, b):
23
+ """Multiply two numbers"""
24
+ return {"result": a * b}
25
+
26
+ def divide(self, a, b):
27
+ """Divide a by b"""
28
+ if b == 0:
29
+ return {"error": "Cannot divide by zero"}
30
+ return {"result": a / b}
31
+
32
+ def square_root(self, a):
33
+ """Calculate the square root of a number"""
34
+ if a < 0:
35
+ return {"error": "Cannot calculate square root of negative number"}
36
+ return {"result": a**0.5}
37
+
38
+ def power(self, base, exponent):
39
+ """Calculate base raised to the power of exponent"""
40
+ return {"result": base**exponent}
@@ -0,0 +1,157 @@
1
+ """Implementation of TwitterAPI."""
2
+
3
+
4
+ class TwitterAPI:
5
+ """A Twitter API for BFCL evaluation."""
6
+
7
+ def __init__(self):
8
+ self.username = ""
9
+ self.password = ""
10
+ self.authenticated = False
11
+ self.tweets = {}
12
+ self.comments = {}
13
+ self.retweets = {}
14
+ self.following_list = []
15
+ self.tweet_counter = 0
16
+
17
+ def _load_scenario(self, config):
18
+ """Load the Twitter API state from configuration."""
19
+ for key, value in config.items():
20
+ setattr(self, key, value)
21
+
22
+ def login(self, username, password):
23
+ """Log in to Twitter."""
24
+ if username == self.username and password == self.password:
25
+ self.authenticated = True
26
+ return {"status": "success", "message": f"Logged in as {username}"}
27
+ else:
28
+ return {"status": "error", "message": "Invalid username or password"}
29
+
30
+ def logout(self):
31
+ """Log out from Twitter."""
32
+ if self.authenticated:
33
+ self.authenticated = False
34
+ return {"status": "success", "message": "Logged out successfully"}
35
+ else:
36
+ return {"status": "error", "message": "Not logged in"}
37
+
38
+ def post_tweet(self, content, tags=None, mentions=None):
39
+ """Post a new tweet."""
40
+ if not self.authenticated:
41
+ return {"status": "error", "message": "Not authenticated"}
42
+
43
+ if not content:
44
+ return {"status": "error", "message": "Tweet content cannot be empty"}
45
+
46
+ tweet_id = self.tweet_counter
47
+ self.tweet_counter += 1
48
+
49
+ self.tweets[str(tweet_id)] = {
50
+ "id": tweet_id,
51
+ "content": content,
52
+ "username": self.username,
53
+ "tags": tags or [],
54
+ "mentions": mentions or [],
55
+ }
56
+
57
+ return {
58
+ "status": "success",
59
+ "message": "Tweet posted successfully",
60
+ "tweet_id": tweet_id,
61
+ }
62
+
63
+ def get_tweets(self, username=None):
64
+ """Get tweets by a specific user or all tweets if username is None."""
65
+ tweets_to_return = {}
66
+
67
+ for tweet_id, tweet in self.tweets.items():
68
+ if username is None or tweet["username"] == username:
69
+ tweets_to_return[tweet_id] = tweet
70
+
71
+ return tweets_to_return
72
+
73
+ def search_tweets(self, query):
74
+ """Search tweets by content."""
75
+ results = {}
76
+
77
+ for tweet_id, tweet in self.tweets.items():
78
+ if query.lower() in tweet["content"].lower():
79
+ results[tweet_id] = tweet
80
+
81
+ return results
82
+
83
+ def follow_user(self, username):
84
+ """Follow a user."""
85
+ if not self.authenticated:
86
+ return {"status": "error", "message": "Not authenticated"}
87
+
88
+ if username == self.username:
89
+ return {"status": "error", "message": "Cannot follow yourself"}
90
+
91
+ if username in self.following_list:
92
+ return {"status": "error", "message": f"Already following {username}"}
93
+
94
+ self.following_list.append(username)
95
+
96
+ return {"status": "success", "message": f"Now following {username}"}
97
+
98
+ def unfollow_user(self, username):
99
+ """Unfollow a user."""
100
+ if not self.authenticated:
101
+ return {"status": "error", "message": "Not authenticated"}
102
+
103
+ if username not in self.following_list:
104
+ return {"status": "error", "message": f"Not following {username}"}
105
+
106
+ self.following_list.remove(username)
107
+
108
+ return {"status": "success", "message": f"Unfollowed {username}"}
109
+
110
+ def get_following(self):
111
+ """Get the list of users being followed."""
112
+ if not self.authenticated:
113
+ return {"status": "error", "message": "Not authenticated"}
114
+
115
+ return {"status": "success", "following": self.following_list}
116
+
117
+ def comment_on_tweet(self, tweet_id, content):
118
+ """Comment on a tweet."""
119
+ if not self.authenticated:
120
+ return {"status": "error", "message": "Not authenticated"}
121
+
122
+ tweet_id_str = str(tweet_id)
123
+ if tweet_id_str not in self.tweets:
124
+ return {"status": "error", "message": f"Tweet {tweet_id} not found"}
125
+
126
+ if tweet_id_str not in self.comments:
127
+ self.comments[tweet_id_str] = []
128
+
129
+ comment_id = len(self.comments[tweet_id_str])
130
+ comment = {"id": comment_id, "content": content, "username": self.username}
131
+
132
+ self.comments[tweet_id_str].append(comment)
133
+
134
+ return {
135
+ "status": "success",
136
+ "message": "Comment added successfully",
137
+ "comment_id": comment_id,
138
+ }
139
+
140
+ def retweet(self, tweet_id):
141
+ """Retweet a tweet."""
142
+ if not self.authenticated:
143
+ return {"status": "error", "message": "Not authenticated"}
144
+
145
+ tweet_id_str = str(tweet_id)
146
+ if tweet_id_str not in self.tweets:
147
+ return {"status": "error", "message": f"Tweet {tweet_id} not found"}
148
+
149
+ if self.username not in self.retweets:
150
+ self.retweets[self.username] = []
151
+
152
+ if tweet_id_str in self.retweets[self.username]:
153
+ return {"status": "error", "message": f"Already retweeted tweet {tweet_id}"}
154
+
155
+ self.retweets[self.username].append(tweet_id_str)
156
+
157
+ return {"status": "success", "message": f"Retweeted tweet {tweet_id}"}
@@ -0,0 +1,314 @@
1
+ import copy
2
+ import importlib
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ # Import BFCL File and Directory for isinstance checks from local implementation
7
+ from .bfcl_envs.gorilla_file_system import Directory as BFCLDirectory
8
+ from .bfcl_envs.gorilla_file_system import File as BFCLFile
9
+
10
+ BFCL_TYPES_AVAILABLE = True
11
+ import gc
12
+ import inspect
13
+ import json
14
+ from typing import Any, Dict, List, Optional, Tuple, Union
15
+
16
+ from ..resource_abc import ForkableResource
17
+
18
+
19
+ class BFCLSimAPIResource(ForkableResource):
20
+ CLASS_FILE_PATH_MAPPING = {
21
+ "GorillaFileSystem": "eval_protocol.agent.resources.bfcl_envs.gorilla_file_system",
22
+ "MathAPI": "eval_protocol.agent.resources.bfcl_envs.math_api",
23
+ "TwitterAPI": "eval_protocol.agent.resources.bfcl_envs.posting_api",
24
+ # Add these back when implemented:
25
+ # "MessageAPI": "eval_protocol.agent.resources.bfcl_envs.message_api",
26
+ # "TicketAPI": "eval_protocol.agent.resources.bfcl_envs.ticket_api",
27
+ # "TradingBot": "eval_protocol.agent.resources.bfcl_envs.trading_bot",
28
+ # "TravelAPI": "eval_protocol.agent.resources.bfcl_envs.travel_booking",
29
+ # "VehicleControlAPI": "eval_protocol.agent.resources.bfcl_envs.vehicle_control",
30
+ }
31
+ STATELESS_CLASSES = ["MathAPI"]
32
+
33
+ def _serialize_bfcl_file(self, file_obj: BFCLFile) -> Dict[str, Any]:
34
+ """Serializes a BFCL File object into a canonical dictionary."""
35
+ return {
36
+ "type": "file", # Add a type hint for clarity, though not in original __eq__
37
+ "name": file_obj.name,
38
+ "content": file_obj.content,
39
+ }
40
+
41
+ def _serialize_bfcl_directory(self, dir_obj: BFCLDirectory) -> Dict[str, Any]:
42
+ """Serializes a BFCL Directory object into a canonical dictionary."""
43
+ serialized_contents: Dict[str, Any] = {}
44
+ # Sort keys for canonical representation, crucial for reliable comparison
45
+ for item_name, item_value in sorted(dir_obj.contents.items()):
46
+ if BFCL_TYPES_AVAILABLE and isinstance(item_value, BFCLFile):
47
+ serialized_contents[item_name] = self._serialize_bfcl_file(item_value)
48
+ elif BFCL_TYPES_AVAILABLE and isinstance(item_value, BFCLDirectory):
49
+ serialized_contents[item_name] = self._serialize_bfcl_directory(item_value)
50
+ else:
51
+ # Fallback for other types if any, or if BFCL types weren't imported
52
+ try:
53
+ json.dumps(item_value)
54
+ serialized_contents[item_name] = item_value
55
+ except (TypeError, OverflowError):
56
+ serialized_contents[item_name] = str(item_value)
57
+ return {
58
+ "type": "directory", # Add a type hint
59
+ "name": dir_obj.name,
60
+ "contents": serialized_contents,
61
+ # Parent is intentionally excluded to match original Directory.__eq__
62
+ }
63
+
64
+ def __init__(self, env_instances: Optional[Dict[str, Any]] = None):
65
+ self._env_instances = env_instances if env_instances is not None else {}
66
+ self._initial_config: Dict[str, Any] = {} # To store initial configuration for forking
67
+
68
+ async def setup(self, config: Dict[str, Any]) -> None:
69
+ """Initializes the resource with a given configuration."""
70
+ self._initial_config = copy.deepcopy(config)
71
+ involved_classes = config.get("involved_classes", [])
72
+ initial_config_data = config.get("initial_config", {})
73
+
74
+ for class_name in involved_classes:
75
+ if class_name not in self._env_instances:
76
+ module_name = self.CLASS_FILE_PATH_MAPPING[class_name]
77
+ module = importlib.import_module(module_name)
78
+ class_ = getattr(module, class_name)
79
+ instance = class_()
80
+
81
+ if class_name not in self.STATELESS_CLASSES:
82
+ class_initial_config = initial_config_data.get(class_name, {})
83
+ instance._load_scenario(copy.deepcopy(class_initial_config))
84
+
85
+ self._env_instances[class_name] = instance
86
+
87
+ async def fork(self) -> "ForkableResource":
88
+ """Creates and returns a new, independent instance of this resource
89
+ with an identical copy of the current state.
90
+ """
91
+ # Deep copy the environment instances to create an independent fork
92
+ forked_instances = copy.deepcopy(self._env_instances)
93
+ new_resource = BFCLSimAPIResource(env_instances=forked_instances)
94
+ new_resource._initial_config = copy.deepcopy(
95
+ self._initial_config
96
+ ) # Copy initial config for potential re-setup
97
+ return new_resource
98
+
99
+ async def checkpoint(self) -> Dict[str, Any]:
100
+ """Returns a serializable representation of the resource's current state."""
101
+ # Use get_comparable_state for checkpointing
102
+ state_data = self.get_comparable_state()
103
+ return state_data
104
+
105
+ async def restore(self, state_data: Dict[str, Any]) -> None:
106
+ """Restores the resource's state from a previously checkpointed state_data."""
107
+ # Re-initialize based on initial config
108
+ await self.setup(self._initial_config)
109
+ # Restore state from the provided state_data using _set_comparable_state
110
+ self._set_comparable_state(state_data)
111
+
112
+ async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
113
+ """Executes a named action with given parameters on the resource."""
114
+ # Find the correct environment instance and call the method
115
+ for instance in self._env_instances.values():
116
+ if hasattr(instance, action_name):
117
+ try:
118
+ # Convert tuple back to list if needed by the tool function
119
+ for key, value in action_params.items():
120
+ if isinstance(value, tuple):
121
+ action_params[key] = list(value)
122
+ result = getattr(instance, action_name)(**action_params)
123
+ # BFCL envs might return results directly or modify state
124
+ if isinstance(result, str):
125
+ # Convert string result to dict if needed by type checker
126
+ try:
127
+ parsed_result = json.loads(result)
128
+ if isinstance(parsed_result, dict):
129
+ return parsed_result
130
+ except json.JSONDecodeError:
131
+ pass
132
+ return result
133
+ except Exception as e:
134
+ return {"error": f"Error executing tool {action_name}: {e}"}
135
+ return {"error": f"Tool {action_name} not found in available resources."}
136
+
137
+ async def get_observation(self) -> Dict[str, Any]:
138
+ """Returns the current observable state of the resource for the agent."""
139
+ # This needs to be defined based on what the agent should observe from the BFCL envs.
140
+ # It might be a summary of the environment state or specific attributes.
141
+ # For now, return a placeholder or a simple representation.
142
+ observation = self.get_comparable_state() # Return comparable state as observation for now
143
+ return observation
144
+
145
+ async def get_tools_spec(self) -> List[Dict[str, Any]]:
146
+ """Returns a list of tool specifications (e.g., OpenAPI format)
147
+ that are currently available or applicable to this resource's state.
148
+ """
149
+ # This needs to generate tool specifications from the methods of the BFCL env instances.
150
+ # It can adapt the logic from verifiers.envs.tool_env.infer_schema_from_function
151
+ tool_specs = []
152
+ for instance in self._env_instances.values():
153
+ # Inspect methods of the instance
154
+ for name, method in inspect.getmembers(instance, predicate=inspect.ismethod):
155
+ if not name.startswith("_"): # Exclude private methods
156
+ # Infer schema from method signature
157
+ try:
158
+ schema = self._infer_schema_from_method(method)
159
+ tool_specs.append(schema)
160
+ except Exception as e:
161
+ print(f"Could not infer schema for {name}: {e}")
162
+ return tool_specs
163
+
164
+ async def close(self) -> None:
165
+ """Performs any necessary cleanup for the resource."""
166
+ self._env_instances.clear()
167
+ gc.collect()
168
+
169
+ def get_comparable_state(self) -> Dict[str, Any]:
170
+ """
171
+ Returns a serializable representation of the resource's state for comparison.
172
+ This method is synchronous for use in reward functions.
173
+ """
174
+ state = {}
175
+ for class_name, instance in self._env_instances.items():
176
+ instance_state = {}
177
+ # Specifically handle GorillaFileSystem's root attribute if it's the one
178
+ # This is a bit of a special case due to its recursive nature and importance.
179
+ if (
180
+ class_name == "GorillaFileSystem"
181
+ and hasattr(instance, "root")
182
+ and BFCL_TYPES_AVAILABLE
183
+ and isinstance(instance.root, BFCLDirectory)
184
+ ):
185
+ # Serialize 'root' attribute using the new method
186
+ instance_state["root"] = self._serialize_bfcl_directory(instance.root) # type: ignore[assignment]
187
+ # Serialize other public attributes normally
188
+ for attr_name, value in vars(instance).items():
189
+ if not attr_name.startswith("_") and attr_name != "root":
190
+ if BFCL_TYPES_AVAILABLE and isinstance(value, BFCLDirectory):
191
+ instance_state[attr_name] = self._serialize_bfcl_directory(value)
192
+ elif BFCL_TYPES_AVAILABLE and isinstance(value, BFCLFile):
193
+ instance_state[attr_name] = self._serialize_bfcl_file(value)
194
+ else:
195
+ try:
196
+ json.dumps(value)
197
+ instance_state[attr_name] = value
198
+ except (TypeError, OverflowError):
199
+ instance_state[attr_name] = str( # type: ignore[assignment]
200
+ value
201
+ ) # Convert non-serializable objects to string
202
+ else: # For other classes or if GorillaFileSystem doesn't have 'root' or types unavailable
203
+ for attr_name, value in vars(instance).items():
204
+ if not attr_name.startswith("_"):
205
+ # Check if value is an instance of BFCLDirectory or BFCLFile first
206
+ if BFCL_TYPES_AVAILABLE and isinstance(value, BFCLDirectory):
207
+ instance_state[attr_name] = self._serialize_bfcl_directory(value)
208
+ elif BFCL_TYPES_AVAILABLE and isinstance(value, BFCLFile):
209
+ instance_state[attr_name] = self._serialize_bfcl_file(value)
210
+ else:
211
+ try:
212
+ json.dumps(value)
213
+ instance_state[attr_name] = value
214
+ except (TypeError, OverflowError):
215
+ instance_state[attr_name] = str( # type: ignore[assignment]
216
+ value
217
+ ) # Convert non-serializable objects to string
218
+ state[class_name] = instance_state
219
+ return state
220
+
221
+ def _set_comparable_state(self, state_data: Dict[str, Any]) -> None:
222
+ """Helper to set state on BFCL environment instances from a comparable state dict."""
223
+ for class_name, state in state_data.items():
224
+ if class_name in self._env_instances:
225
+ instance = self._env_instances[class_name]
226
+ for attr_name, value in state.items():
227
+ if hasattr(instance, attr_name):
228
+ try:
229
+ setattr(instance, attr_name, value)
230
+ except Exception as e:
231
+ print(f"Could not set attribute {attr_name} on {instance.__class__.__name__}: {e}")
232
+
233
+ def _infer_schema_from_method(self, method: Any) -> Dict[str, Any]:
234
+ """Helper to infer tool schema from a method signature."""
235
+ # This is a simplified version, can be expanded based on verifiers.envs.tool_env.infer_schema_from_function
236
+ schema = {
237
+ "name": method.__name__,
238
+ "description": method.__doc__ if method.__doc__ else "",
239
+ "parameters": {"type": "object", "properties": {}, "required": []},
240
+ }
241
+ sig = inspect.signature(method)
242
+ type_mapping = {
243
+ str: "string",
244
+ int: "integer",
245
+ float: "number",
246
+ bool: "boolean",
247
+ list: "array",
248
+ List: "array",
249
+ dict: "object",
250
+ Dict: "object",
251
+ Any: "string", # Default to string for Any or unknown
252
+ type(None): "null", # For Optional[str] = None
253
+ }
254
+
255
+ for name, param in sig.parameters.items():
256
+ if name == "self":
257
+ continue
258
+
259
+ param_type_annotation = param.annotation
260
+ json_type = "string" # Default
261
+
262
+ if param_type_annotation != inspect.Parameter.empty:
263
+ # Handle Optional types like Optional[str]
264
+ if hasattr(param_type_annotation, "__origin__") and param_type_annotation.__origin__ is Union:
265
+ # Get the first non-None type from Union for Optional[T]
266
+ union_args = [arg for arg in param_type_annotation.__args__ if arg is not type(None)]
267
+ if union_args:
268
+ actual_type = union_args[0]
269
+ json_type = type_mapping.get(actual_type, "string")
270
+ # Handle List[str] etc.
271
+ if hasattr(actual_type, "__origin__") and actual_type.__origin__ in [list, List]:
272
+ json_type = "array"
273
+ # Try to infer item type for List[T]
274
+ if hasattr(actual_type, "__args__") and actual_type.__args__:
275
+ item_type_annotation = actual_type.__args__[0]
276
+ item_json_type = type_mapping.get(item_type_annotation, "string")
277
+ schema["parameters"]["properties"][name] = {
278
+ "type": "array",
279
+ "items": {"type": item_json_type},
280
+ }
281
+ else: # Fallback if item type can't be inferred
282
+ schema["parameters"]["properties"][name] = {
283
+ "type": "array",
284
+ "items": {"type": "string"},
285
+ }
286
+ if param.default == inspect.Parameter.empty:
287
+ schema["parameters"]["required"].append(name)
288
+ continue # Skip default property assignment below
289
+ else: # Should not happen for valid Optional[T]
290
+ json_type = "string"
291
+ elif hasattr(param_type_annotation, "__origin__") and param_type_annotation.__origin__ in [list, List]:
292
+ json_type = "array"
293
+ if hasattr(param_type_annotation, "__args__") and param_type_annotation.__args__:
294
+ item_type_annotation = param_type_annotation.__args__[0]
295
+ item_json_type = type_mapping.get(item_type_annotation, "string")
296
+ schema["parameters"]["properties"][name] = {
297
+ "type": "array",
298
+ "items": {"type": item_json_type},
299
+ }
300
+ else: # Fallback
301
+ schema["parameters"]["properties"][name] = {
302
+ "type": "array",
303
+ "items": {"type": "string"},
304
+ }
305
+ if param.default == inspect.Parameter.empty:
306
+ schema["parameters"]["required"].append(name)
307
+ continue # Skip default property assignment
308
+ else:
309
+ json_type = type_mapping.get(param_type_annotation, "string")
310
+
311
+ schema["parameters"]["properties"][name] = {"type": json_type}
312
+ if param.default == inspect.Parameter.empty:
313
+ schema["parameters"]["required"].append(name)
314
+ return schema