sandboxy 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandboxy/__init__.py +3 -0
- sandboxy/agents/__init__.py +21 -0
- sandboxy/agents/base.py +66 -0
- sandboxy/agents/llm_prompt.py +308 -0
- sandboxy/agents/loader.py +222 -0
- sandboxy/api/__init__.py +5 -0
- sandboxy/api/app.py +76 -0
- sandboxy/api/routes/__init__.py +1 -0
- sandboxy/api/routes/agents.py +92 -0
- sandboxy/api/routes/local.py +1388 -0
- sandboxy/api/routes/tools.py +106 -0
- sandboxy/cli/__init__.py +1 -0
- sandboxy/cli/main.py +1196 -0
- sandboxy/cli/type_detector.py +48 -0
- sandboxy/config.py +49 -0
- sandboxy/core/__init__.py +1 -0
- sandboxy/core/async_runner.py +824 -0
- sandboxy/core/mdl_parser.py +441 -0
- sandboxy/core/runner.py +599 -0
- sandboxy/core/safe_eval.py +165 -0
- sandboxy/core/state.py +234 -0
- sandboxy/datasets/__init__.py +20 -0
- sandboxy/datasets/loader.py +193 -0
- sandboxy/datasets/runner.py +442 -0
- sandboxy/errors.py +166 -0
- sandboxy/local/context.py +235 -0
- sandboxy/local/results.py +173 -0
- sandboxy/logging.py +31 -0
- sandboxy/mcp/__init__.py +25 -0
- sandboxy/mcp/client.py +360 -0
- sandboxy/mcp/wrapper.py +99 -0
- sandboxy/providers/__init__.py +34 -0
- sandboxy/providers/anthropic_provider.py +271 -0
- sandboxy/providers/base.py +123 -0
- sandboxy/providers/http_client.py +101 -0
- sandboxy/providers/openai_provider.py +282 -0
- sandboxy/providers/openrouter.py +958 -0
- sandboxy/providers/registry.py +199 -0
- sandboxy/scenarios/__init__.py +11 -0
- sandboxy/scenarios/comparison.py +491 -0
- sandboxy/scenarios/loader.py +262 -0
- sandboxy/scenarios/runner.py +468 -0
- sandboxy/scenarios/unified.py +1434 -0
- sandboxy/session/__init__.py +21 -0
- sandboxy/session/manager.py +278 -0
- sandboxy/tools/__init__.py +34 -0
- sandboxy/tools/base.py +127 -0
- sandboxy/tools/loader.py +270 -0
- sandboxy/tools/yaml_tools.py +708 -0
- sandboxy/ui/__init__.py +27 -0
- sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
- sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
- sandboxy/ui/dist/index.html +14 -0
- sandboxy/utils/__init__.py +3 -0
- sandboxy/utils/time.py +20 -0
- sandboxy-0.0.1.dist-info/METADATA +241 -0
- sandboxy-0.0.1.dist-info/RECORD +60 -0
- sandboxy-0.0.1.dist-info/WHEEL +4 -0
- sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
- sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,708 @@
|
|
|
1
|
+
"""YAML-defined mock tools for declarative scenario creation.
|
|
2
|
+
|
|
3
|
+
This module enables defining tools entirely in YAML without writing Python code.
|
|
4
|
+
Tools can have static returns, parameterized returns, conditional logic, and
|
|
5
|
+
side effects that modify scenario state.
|
|
6
|
+
|
|
7
|
+
Example tool definition:
|
|
8
|
+
tools:
|
|
9
|
+
power_off_rack:
|
|
10
|
+
description: "Power off a server rack"
|
|
11
|
+
params:
|
|
12
|
+
rack_id:
|
|
13
|
+
type: string
|
|
14
|
+
required: true
|
|
15
|
+
returns: "Rack {rack_id} powered off."
|
|
16
|
+
side_effects:
|
|
17
|
+
- set: "rack_{rack_id}_status"
|
|
18
|
+
value: "offline"
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import re
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Literal
|
|
27
|
+
|
|
28
|
+
import yaml
|
|
29
|
+
from pydantic import BaseModel, Field, field_validator
|
|
30
|
+
|
|
31
|
+
from sandboxy.tools.base import ToolConfig, ToolResult
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# -----------------------------------------------------------------------------
|
|
36
|
+
# Schema Models
|
|
37
|
+
# -----------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ParamSchema(BaseModel):
|
|
41
|
+
"""Schema for a tool parameter.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
type: Parameter type (string, number, boolean, integer, array, object).
|
|
45
|
+
description: Human-readable description of the parameter.
|
|
46
|
+
required: Whether the parameter must be provided.
|
|
47
|
+
default: Default value if not provided.
|
|
48
|
+
enum: List of allowed values (for validation).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
type: Literal["string", "number", "boolean", "integer", "array", "object"] = "string"
|
|
52
|
+
description: str = ""
|
|
53
|
+
required: bool = False
|
|
54
|
+
default: Any = None
|
|
55
|
+
enum: list[Any] | None = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SideEffect(BaseModel):
|
|
59
|
+
"""A side effect that modifies scenario state when a tool is called.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
set: State key to set (supports {param} substitution).
|
|
63
|
+
value: Value to set (supports {param} and {state.key} substitution).
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
set: str
|
|
67
|
+
value: Any
|
|
68
|
+
|
|
69
|
+
def apply(self, state: dict[str, Any], params: dict[str, Any]) -> None:
|
|
70
|
+
"""Apply this side effect to the state.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
state: Environment state dict to modify.
|
|
74
|
+
params: Parameters from the tool invocation.
|
|
75
|
+
"""
|
|
76
|
+
key = _interpolate(self.set, params, state)
|
|
77
|
+
value = self.value
|
|
78
|
+
|
|
79
|
+
# Interpolate string values
|
|
80
|
+
if isinstance(value, str):
|
|
81
|
+
value = _interpolate(value, params, state)
|
|
82
|
+
|
|
83
|
+
state[key] = value
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ConditionalReturn(BaseModel):
|
|
87
|
+
"""A conditional return value based on state.
|
|
88
|
+
|
|
89
|
+
Attributes:
|
|
90
|
+
when: Condition expression to evaluate.
|
|
91
|
+
value: Return value if condition is true.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
when: str
|
|
95
|
+
value: str
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ActionSpec(BaseModel):
|
|
99
|
+
"""Specification for a single tool action.
|
|
100
|
+
|
|
101
|
+
Attributes:
|
|
102
|
+
description: Human-readable description of the action.
|
|
103
|
+
params: Parameter definitions for this action.
|
|
104
|
+
returns: Return value (string or list of ConditionalReturn).
|
|
105
|
+
returns_error: Error message to return when error_when is true.
|
|
106
|
+
error_when: Condition expression that triggers an error.
|
|
107
|
+
side_effects: State modifications to apply on success.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
description: str = ""
|
|
111
|
+
params: dict[str, ParamSchema] = Field(default_factory=dict)
|
|
112
|
+
returns: str | list[ConditionalReturn] = ""
|
|
113
|
+
returns_error: str | None = None
|
|
114
|
+
error_when: str | None = None
|
|
115
|
+
side_effects: list[SideEffect] = Field(default_factory=list)
|
|
116
|
+
|
|
117
|
+
@field_validator("returns", mode="before")
|
|
118
|
+
@classmethod
|
|
119
|
+
def parse_returns(cls, v: Any) -> str | list[ConditionalReturn]:
|
|
120
|
+
"""Parse returns field - can be string or list of conditionals."""
|
|
121
|
+
if isinstance(v, str):
|
|
122
|
+
return v
|
|
123
|
+
if isinstance(v, list):
|
|
124
|
+
return [ConditionalReturn(**item) if isinstance(item, dict) else item for item in v]
|
|
125
|
+
if isinstance(v, dict) and "conditions" in v:
|
|
126
|
+
# Support { conditions: [...] } format
|
|
127
|
+
return [
|
|
128
|
+
ConditionalReturn(**item) if isinstance(item, dict) else item
|
|
129
|
+
for item in v["conditions"]
|
|
130
|
+
]
|
|
131
|
+
return str(v) if v else ""
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ToolSpec(BaseModel):
|
|
135
|
+
"""Specification for a complete YAML-defined tool.
|
|
136
|
+
|
|
137
|
+
A tool can either have multiple actions (like existing mock tools) or
|
|
138
|
+
be a simple single-action tool (just has returns directly).
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
name: str = ""
|
|
142
|
+
description: str = ""
|
|
143
|
+
|
|
144
|
+
# For multi-action tools
|
|
145
|
+
actions: dict[str, ActionSpec] = Field(default_factory=dict)
|
|
146
|
+
|
|
147
|
+
# For single-action tools (shorthand) - these become the default "call" action
|
|
148
|
+
params: dict[str, ParamSchema] = Field(default_factory=dict)
|
|
149
|
+
returns: str | list[ConditionalReturn] = ""
|
|
150
|
+
returns_error: str | None = None
|
|
151
|
+
error_when: str | None = None
|
|
152
|
+
side_effects: list[SideEffect] = Field(default_factory=list)
|
|
153
|
+
|
|
154
|
+
def get_effective_actions(self) -> dict[str, ActionSpec]:
|
|
155
|
+
"""Get all actions, including synthesized default action for simple tools."""
|
|
156
|
+
if self.actions:
|
|
157
|
+
return self.actions
|
|
158
|
+
|
|
159
|
+
# Single-action tool - create a "call" action from top-level fields
|
|
160
|
+
if self.returns or self.params or self.returns_error:
|
|
161
|
+
return {
|
|
162
|
+
"call": ActionSpec(
|
|
163
|
+
description=self.description,
|
|
164
|
+
params=self.params,
|
|
165
|
+
returns=self.returns,
|
|
166
|
+
returns_error=self.returns_error,
|
|
167
|
+
error_when=self.error_when,
|
|
168
|
+
side_effects=self.side_effects,
|
|
169
|
+
)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return {}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class ToolLibrary(BaseModel):
|
|
176
|
+
"""A library of YAML-defined tools loaded from a file."""
|
|
177
|
+
|
|
178
|
+
name: str = ""
|
|
179
|
+
description: str = ""
|
|
180
|
+
tools: dict[str, ToolSpec] = Field(default_factory=dict)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# -----------------------------------------------------------------------------
|
|
184
|
+
# Expression Evaluation
|
|
185
|
+
# -----------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _interpolate(template: Any, params: dict[str, Any], state: dict[str, Any]) -> Any:
|
|
189
|
+
"""Interpolate {param} and {state.key} placeholders in a template string.
|
|
190
|
+
|
|
191
|
+
Non-string values are returned unchanged.
|
|
192
|
+
"""
|
|
193
|
+
if not isinstance(template, str):
|
|
194
|
+
return template
|
|
195
|
+
|
|
196
|
+
def replace(match: re.Match[str]) -> str:
|
|
197
|
+
key = match.group(1)
|
|
198
|
+
|
|
199
|
+
# Check params first
|
|
200
|
+
if key in params:
|
|
201
|
+
return str(params[key])
|
|
202
|
+
|
|
203
|
+
# Check state with state. prefix
|
|
204
|
+
if key.startswith("state."):
|
|
205
|
+
state_key = key[6:]
|
|
206
|
+
return str(state.get(state_key, f"{{{key}}}"))
|
|
207
|
+
|
|
208
|
+
# Check state directly
|
|
209
|
+
if key in state:
|
|
210
|
+
return str(state[key])
|
|
211
|
+
|
|
212
|
+
# Not found - return original placeholder
|
|
213
|
+
return match.group(0)
|
|
214
|
+
|
|
215
|
+
return re.sub(r"\{(\w+(?:\.\w+)*)\}", replace, template)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _evaluate_condition(expr: str, params: dict[str, Any], state: dict[str, Any]) -> bool:
|
|
219
|
+
"""Safely evaluate a condition expression.
|
|
220
|
+
|
|
221
|
+
Supports:
|
|
222
|
+
- Simple comparisons: "param == value", "state.key != 'foo'"
|
|
223
|
+
- Boolean state checks: "state.is_active", "!state.is_disabled"
|
|
224
|
+
- Parameterized keys: "state.rack_{rack_id}_powered == false"
|
|
225
|
+
"""
|
|
226
|
+
if not expr or not expr.strip():
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
# Interpolate any {param} references in the expression
|
|
230
|
+
expr = _interpolate(expr, params, state)
|
|
231
|
+
|
|
232
|
+
# Build evaluation context
|
|
233
|
+
context: dict[str, Any] = {
|
|
234
|
+
"true": True,
|
|
235
|
+
"false": False,
|
|
236
|
+
"True": True,
|
|
237
|
+
"False": False,
|
|
238
|
+
"none": None,
|
|
239
|
+
"None": None,
|
|
240
|
+
}
|
|
241
|
+
context.update(params)
|
|
242
|
+
context["state"] = state
|
|
243
|
+
|
|
244
|
+
# Also expose state keys directly for convenience
|
|
245
|
+
for key, value in state.items():
|
|
246
|
+
if key.isidentifier():
|
|
247
|
+
context[key] = value
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
# Restrict evaluation to safe operations
|
|
251
|
+
safe_builtins = {
|
|
252
|
+
"True": True,
|
|
253
|
+
"False": False,
|
|
254
|
+
"None": None,
|
|
255
|
+
"len": len,
|
|
256
|
+
"str": str,
|
|
257
|
+
"int": int,
|
|
258
|
+
"float": float,
|
|
259
|
+
"bool": bool,
|
|
260
|
+
}
|
|
261
|
+
return bool(eval(expr, {"__builtins__": safe_builtins}, context)) # noqa: S307
|
|
262
|
+
except Exception as e:
|
|
263
|
+
logger.debug("Condition evaluation failed for expression '%s': %s", expr, e)
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# -----------------------------------------------------------------------------
|
|
268
|
+
# YAML Mock Tool Implementation
|
|
269
|
+
# -----------------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class YamlMockTool:
|
|
273
|
+
"""A tool implementation backed by YAML definitions.
|
|
274
|
+
|
|
275
|
+
Implements the Tool protocol for seamless integration with the existing
|
|
276
|
+
runner and agent systems.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
def __init__(self, config: ToolConfig, spec: ToolSpec) -> None:
|
|
280
|
+
"""Initialize from config and spec.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
config: Tool configuration (name, description, etc.)
|
|
284
|
+
spec: YAML tool specification
|
|
285
|
+
"""
|
|
286
|
+
self.name = config.name
|
|
287
|
+
self.description = config.description or spec.description
|
|
288
|
+
self.config = config.config
|
|
289
|
+
self.spec = spec
|
|
290
|
+
self._call_log: list[dict[str, Any]] = []
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def call_log(self) -> list[dict[str, Any]]:
|
|
294
|
+
"""Get log of all tool calls made."""
|
|
295
|
+
return self._call_log
|
|
296
|
+
|
|
297
|
+
def invoke(
|
|
298
|
+
self,
|
|
299
|
+
action: str,
|
|
300
|
+
args: dict[str, Any],
|
|
301
|
+
env_state: dict[str, Any],
|
|
302
|
+
) -> ToolResult:
|
|
303
|
+
"""Invoke a tool action.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
action: The action to perform
|
|
307
|
+
args: Arguments from the caller
|
|
308
|
+
env_state: Current environment state (will be modified by side effects)
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
ToolResult with success/error and data
|
|
312
|
+
"""
|
|
313
|
+
actions = self.spec.get_effective_actions()
|
|
314
|
+
|
|
315
|
+
if action not in actions:
|
|
316
|
+
available = ", ".join(actions.keys()) or "none"
|
|
317
|
+
return ToolResult(
|
|
318
|
+
success=False,
|
|
319
|
+
error=f"Unknown action '{action}'. Available: {available}",
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
action_spec = actions[action]
|
|
323
|
+
|
|
324
|
+
# Validate and apply defaults to params
|
|
325
|
+
validated_args = self._validate_params(action_spec, args)
|
|
326
|
+
if isinstance(validated_args, ToolResult):
|
|
327
|
+
return validated_args # Validation error
|
|
328
|
+
|
|
329
|
+
# Log the call
|
|
330
|
+
self._call_log.append(
|
|
331
|
+
{
|
|
332
|
+
"action": action,
|
|
333
|
+
"args": validated_args.copy(),
|
|
334
|
+
"state_before": env_state.copy(),
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Check for error condition
|
|
339
|
+
if action_spec.error_when and _evaluate_condition(
|
|
340
|
+
action_spec.error_when, validated_args, env_state
|
|
341
|
+
):
|
|
342
|
+
error_msg = action_spec.returns_error or "Operation failed"
|
|
343
|
+
error_msg = _interpolate(error_msg, validated_args, env_state)
|
|
344
|
+
return ToolResult(success=False, error=error_msg)
|
|
345
|
+
|
|
346
|
+
# Apply side effects
|
|
347
|
+
for effect in action_spec.side_effects:
|
|
348
|
+
effect.apply(env_state, validated_args)
|
|
349
|
+
|
|
350
|
+
# Compute return value
|
|
351
|
+
result_value = self._compute_return(action_spec, validated_args, env_state)
|
|
352
|
+
|
|
353
|
+
return ToolResult(success=True, data=result_value)
|
|
354
|
+
|
|
355
|
+
def _validate_params(
|
|
356
|
+
self,
|
|
357
|
+
action_spec: ActionSpec,
|
|
358
|
+
args: dict[str, Any],
|
|
359
|
+
) -> dict[str, Any] | ToolResult:
|
|
360
|
+
"""Validate and normalize parameters.
|
|
361
|
+
|
|
362
|
+
Returns validated args dict or ToolResult with error.
|
|
363
|
+
"""
|
|
364
|
+
validated: dict[str, Any] = {}
|
|
365
|
+
|
|
366
|
+
for name, schema in action_spec.params.items():
|
|
367
|
+
if name in args:
|
|
368
|
+
# TODO: Type coercion/validation
|
|
369
|
+
validated[name] = args[name]
|
|
370
|
+
elif schema.required:
|
|
371
|
+
return ToolResult(
|
|
372
|
+
success=False,
|
|
373
|
+
error=f"Missing required parameter: {name}",
|
|
374
|
+
)
|
|
375
|
+
elif schema.default is not None:
|
|
376
|
+
validated[name] = schema.default
|
|
377
|
+
|
|
378
|
+
# Include any extra args not in schema (permissive mode)
|
|
379
|
+
for name, value in args.items():
|
|
380
|
+
if name not in validated:
|
|
381
|
+
validated[name] = value
|
|
382
|
+
|
|
383
|
+
return validated
|
|
384
|
+
|
|
385
|
+
def _compute_return(
|
|
386
|
+
self,
|
|
387
|
+
action_spec: ActionSpec,
|
|
388
|
+
params: dict[str, Any],
|
|
389
|
+
state: dict[str, Any],
|
|
390
|
+
) -> Any:
|
|
391
|
+
"""Compute the return value for an action."""
|
|
392
|
+
returns = action_spec.returns
|
|
393
|
+
|
|
394
|
+
# Handle conditional returns
|
|
395
|
+
if isinstance(returns, list):
|
|
396
|
+
for cond in returns:
|
|
397
|
+
if isinstance(cond, ConditionalReturn):
|
|
398
|
+
# Check "default" condition (always true)
|
|
399
|
+
if cond.when in ("default", "else", "otherwise", "true", "True"):
|
|
400
|
+
return _interpolate(cond.value, params, state)
|
|
401
|
+
|
|
402
|
+
if _evaluate_condition(cond.when, params, state):
|
|
403
|
+
return _interpolate(cond.value, params, state)
|
|
404
|
+
|
|
405
|
+
# No condition matched - return empty
|
|
406
|
+
return ""
|
|
407
|
+
|
|
408
|
+
# Simple string return with interpolation
|
|
409
|
+
return _interpolate(returns, params, state)
|
|
410
|
+
|
|
411
|
+
def get_actions(self) -> list[dict[str, Any]]:
|
|
412
|
+
"""Get available actions with their schemas for the agent."""
|
|
413
|
+
result = []
|
|
414
|
+
|
|
415
|
+
for name, action_spec in self.spec.get_effective_actions().items():
|
|
416
|
+
# Build JSON Schema for parameters
|
|
417
|
+
properties: dict[str, Any] = {}
|
|
418
|
+
required: list[str] = []
|
|
419
|
+
|
|
420
|
+
for param_name, param_schema in action_spec.params.items():
|
|
421
|
+
prop: dict[str, Any] = {
|
|
422
|
+
"type": param_schema.type,
|
|
423
|
+
}
|
|
424
|
+
if param_schema.description:
|
|
425
|
+
prop["description"] = param_schema.description
|
|
426
|
+
if param_schema.enum:
|
|
427
|
+
prop["enum"] = param_schema.enum
|
|
428
|
+
if param_schema.default is not None:
|
|
429
|
+
prop["default"] = param_schema.default
|
|
430
|
+
|
|
431
|
+
properties[param_name] = prop
|
|
432
|
+
|
|
433
|
+
if param_schema.required:
|
|
434
|
+
required.append(param_name)
|
|
435
|
+
|
|
436
|
+
result.append(
|
|
437
|
+
{
|
|
438
|
+
"name": name,
|
|
439
|
+
"description": action_spec.description or self.description,
|
|
440
|
+
"parameters": {
|
|
441
|
+
"type": "object",
|
|
442
|
+
"properties": properties,
|
|
443
|
+
"required": required,
|
|
444
|
+
},
|
|
445
|
+
}
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return result
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# -----------------------------------------------------------------------------
|
|
452
|
+
# Tool Loading
|
|
453
|
+
# -----------------------------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class YamlToolLoader:
|
|
457
|
+
"""Loads YAML-defined tools from files and inline definitions."""
|
|
458
|
+
|
|
459
|
+
# Default search paths for tool libraries
|
|
460
|
+
DEFAULT_TOOL_DIRS = [
|
|
461
|
+
Path("tools"),
|
|
462
|
+
Path("sandboxy/tools/libraries"),
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
def __init__(self, tool_dirs: list[Path] | None = None) -> None:
|
|
466
|
+
"""Initialize loader with search directories.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
tool_dirs: Directories to search for tool library files.
|
|
470
|
+
Defaults to DEFAULT_TOOL_DIRS.
|
|
471
|
+
"""
|
|
472
|
+
self.tool_dirs = tool_dirs or self.DEFAULT_TOOL_DIRS
|
|
473
|
+
self._library_cache: dict[str, ToolLibrary] = {}
|
|
474
|
+
|
|
475
|
+
def load_library(self, name: str) -> ToolLibrary:
|
|
476
|
+
"""Load a tool library by name.
|
|
477
|
+
|
|
478
|
+
Searches for {name}.yml or {name}.yaml in tool directories.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
name: Library name (without extension)
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
Loaded ToolLibrary
|
|
485
|
+
|
|
486
|
+
Raises:
|
|
487
|
+
FileNotFoundError: If library file not found
|
|
488
|
+
"""
|
|
489
|
+
if name in self._library_cache:
|
|
490
|
+
return self._library_cache[name]
|
|
491
|
+
|
|
492
|
+
# Search for the file
|
|
493
|
+
for dir_path in self.tool_dirs:
|
|
494
|
+
for ext in (".yml", ".yaml"):
|
|
495
|
+
file_path = dir_path / f"{name}{ext}"
|
|
496
|
+
if file_path.exists():
|
|
497
|
+
library = self._load_library_file(file_path)
|
|
498
|
+
self._library_cache[name] = library
|
|
499
|
+
return library
|
|
500
|
+
|
|
501
|
+
msg = f"Tool library '{name}' not found in: {self.tool_dirs}"
|
|
502
|
+
raise FileNotFoundError(msg)
|
|
503
|
+
|
|
504
|
+
def load_library_file(self, path: Path) -> ToolLibrary:
|
|
505
|
+
"""Load a tool library from a specific file path.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
path: Path to the YAML file
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
Loaded ToolLibrary
|
|
512
|
+
"""
|
|
513
|
+
return self._load_library_file(path)
|
|
514
|
+
|
|
515
|
+
def _load_library_file(self, path: Path) -> ToolLibrary:
|
|
516
|
+
"""Internal: Load and parse a library file."""
|
|
517
|
+
content = path.read_text()
|
|
518
|
+
raw = yaml.safe_load(content)
|
|
519
|
+
|
|
520
|
+
if not raw:
|
|
521
|
+
return ToolLibrary(name=path.stem)
|
|
522
|
+
|
|
523
|
+
# Parse tools
|
|
524
|
+
tools: dict[str, ToolSpec] = {}
|
|
525
|
+
raw_tools = raw.get("tools", {})
|
|
526
|
+
|
|
527
|
+
for tool_name, tool_data in raw_tools.items():
|
|
528
|
+
if isinstance(tool_data, dict):
|
|
529
|
+
tool_data["name"] = tool_name
|
|
530
|
+
tools[tool_name] = self._parse_tool_spec(tool_data)
|
|
531
|
+
|
|
532
|
+
return ToolLibrary(
|
|
533
|
+
name=raw.get("name", path.stem),
|
|
534
|
+
description=raw.get("description", ""),
|
|
535
|
+
tools=tools,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
def _parse_tool_spec(self, data: dict[str, Any]) -> ToolSpec:
|
|
539
|
+
"""Parse a tool specification from raw YAML data."""
|
|
540
|
+
# Parse params
|
|
541
|
+
params: dict[str, ParamSchema] = {}
|
|
542
|
+
raw_params = data.get("params", {})
|
|
543
|
+
for name, param_data in raw_params.items():
|
|
544
|
+
if isinstance(param_data, dict):
|
|
545
|
+
params[name] = ParamSchema(**param_data)
|
|
546
|
+
elif isinstance(param_data, str):
|
|
547
|
+
# Shorthand: just the type
|
|
548
|
+
params[name] = ParamSchema(type=param_data) # type: ignore[arg-type]
|
|
549
|
+
|
|
550
|
+
# Parse actions
|
|
551
|
+
actions: dict[str, ActionSpec] = {}
|
|
552
|
+
raw_actions = data.get("actions", {})
|
|
553
|
+
for name, action_data in raw_actions.items():
|
|
554
|
+
if isinstance(action_data, dict):
|
|
555
|
+
actions[name] = self._parse_action_spec(action_data)
|
|
556
|
+
|
|
557
|
+
# Parse side effects
|
|
558
|
+
side_effects: list[SideEffect] = []
|
|
559
|
+
raw_effects = data.get("side_effects", [])
|
|
560
|
+
for effect_data in raw_effects:
|
|
561
|
+
if isinstance(effect_data, dict):
|
|
562
|
+
side_effects.append(SideEffect(**effect_data))
|
|
563
|
+
|
|
564
|
+
return ToolSpec(
|
|
565
|
+
name=data.get("name", ""),
|
|
566
|
+
description=data.get("description", ""),
|
|
567
|
+
actions=actions,
|
|
568
|
+
params=params,
|
|
569
|
+
returns=data.get("returns", ""),
|
|
570
|
+
returns_error=data.get("returns_error"),
|
|
571
|
+
error_when=data.get("error_when"),
|
|
572
|
+
side_effects=side_effects,
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
def _parse_action_spec(self, data: dict[str, Any]) -> ActionSpec:
|
|
576
|
+
"""Parse an action specification from raw YAML data."""
|
|
577
|
+
# Parse params
|
|
578
|
+
params: dict[str, ParamSchema] = {}
|
|
579
|
+
raw_params = data.get("params", {})
|
|
580
|
+
for name, param_data in raw_params.items():
|
|
581
|
+
if isinstance(param_data, dict):
|
|
582
|
+
params[name] = ParamSchema(**param_data)
|
|
583
|
+
elif isinstance(param_data, str):
|
|
584
|
+
params[name] = ParamSchema(type=param_data) # type: ignore[arg-type]
|
|
585
|
+
|
|
586
|
+
# Parse side effects
|
|
587
|
+
side_effects: list[SideEffect] = []
|
|
588
|
+
raw_effects = data.get("side_effects", [])
|
|
589
|
+
for effect_data in raw_effects:
|
|
590
|
+
if isinstance(effect_data, dict):
|
|
591
|
+
side_effects.append(SideEffect(**effect_data))
|
|
592
|
+
|
|
593
|
+
return ActionSpec(
|
|
594
|
+
description=data.get("description", ""),
|
|
595
|
+
params=params,
|
|
596
|
+
returns=data.get("returns", ""),
|
|
597
|
+
returns_error=data.get("returns_error"),
|
|
598
|
+
error_when=data.get("error_when"),
|
|
599
|
+
side_effects=side_effects,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
def parse_inline_tools(
|
|
603
|
+
self,
|
|
604
|
+
tools_data: dict[str, Any] | list[dict[str, Any]],
|
|
605
|
+
) -> dict[str, ToolSpec]:
|
|
606
|
+
"""Parse inline tool definitions from scenario YAML.
|
|
607
|
+
|
|
608
|
+
Args:
|
|
609
|
+
tools_data: Raw tools section from scenario YAML.
|
|
610
|
+
Can be a dict (name -> spec) or list of specs with names.
|
|
611
|
+
|
|
612
|
+
Returns:
|
|
613
|
+
Dictionary mapping tool name to ToolSpec
|
|
614
|
+
"""
|
|
615
|
+
result: dict[str, ToolSpec] = {}
|
|
616
|
+
|
|
617
|
+
if isinstance(tools_data, dict):
|
|
618
|
+
for name, data in tools_data.items():
|
|
619
|
+
if isinstance(data, dict):
|
|
620
|
+
data["name"] = name
|
|
621
|
+
result[name] = self._parse_tool_spec(data)
|
|
622
|
+
elif isinstance(tools_data, list):
|
|
623
|
+
for item in tools_data:
|
|
624
|
+
if isinstance(item, dict) and "name" in item:
|
|
625
|
+
result[item["name"]] = self._parse_tool_spec(item)
|
|
626
|
+
|
|
627
|
+
return result
|
|
628
|
+
|
|
629
|
+
def create_tool_instances(
|
|
630
|
+
self,
|
|
631
|
+
specs: dict[str, ToolSpec],
|
|
632
|
+
) -> dict[str, YamlMockTool]:
|
|
633
|
+
"""Create tool instances from specifications.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
specs: Dictionary of tool name to ToolSpec
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
Dictionary of tool name to YamlMockTool instance
|
|
640
|
+
"""
|
|
641
|
+
tools: dict[str, YamlMockTool] = {}
|
|
642
|
+
|
|
643
|
+
for name, spec in specs.items():
|
|
644
|
+
config = ToolConfig(
|
|
645
|
+
name=name,
|
|
646
|
+
type="yaml_mock",
|
|
647
|
+
description=spec.description,
|
|
648
|
+
)
|
|
649
|
+
tools[name] = YamlMockTool(config, spec)
|
|
650
|
+
|
|
651
|
+
return tools
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
# -----------------------------------------------------------------------------
|
|
655
|
+
# Integration Helper
|
|
656
|
+
# -----------------------------------------------------------------------------
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def load_scenario_tools(
|
|
660
|
+
scenario_data: dict[str, Any],
|
|
661
|
+
tool_dirs: list[Path] | None = None,
|
|
662
|
+
tool_overrides: dict[str, Any] | None = None,
|
|
663
|
+
) -> dict[str, YamlMockTool]:
|
|
664
|
+
"""Load all tools for a scenario from YAML data.
|
|
665
|
+
|
|
666
|
+
Handles both `tools_from` library references and inline `tools` definitions.
|
|
667
|
+
Inline tools override library tools with the same name.
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
scenario_data: Parsed scenario YAML
|
|
671
|
+
tool_dirs: Optional tool search directories
|
|
672
|
+
tool_overrides: Optional dict mapping "tool.action" to override response data.
|
|
673
|
+
Used by dataset benchmarking to inject test case data.
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
Dictionary of tool name to YamlMockTool
|
|
677
|
+
"""
|
|
678
|
+
loader = YamlToolLoader(tool_dirs)
|
|
679
|
+
all_specs: dict[str, ToolSpec] = {}
|
|
680
|
+
|
|
681
|
+
# Load from libraries first
|
|
682
|
+
tools_from = scenario_data.get("tools_from", [])
|
|
683
|
+
if isinstance(tools_from, str):
|
|
684
|
+
tools_from = [tools_from]
|
|
685
|
+
|
|
686
|
+
for lib_name in tools_from:
|
|
687
|
+
# Strip extension if provided
|
|
688
|
+
lib_name = Path(lib_name).stem
|
|
689
|
+
try:
|
|
690
|
+
library = loader.load_library(lib_name)
|
|
691
|
+
all_specs.update(library.tools)
|
|
692
|
+
except FileNotFoundError:
|
|
693
|
+
logger.warning("Tool library '%s' not found, skipping", lib_name)
|
|
694
|
+
|
|
695
|
+
# Load inline tools (override library tools)
|
|
696
|
+
inline_tools = scenario_data.get("tools", {})
|
|
697
|
+
if inline_tools:
|
|
698
|
+
inline_specs = loader.parse_inline_tools(inline_tools)
|
|
699
|
+
all_specs.update(inline_specs)
|
|
700
|
+
|
|
701
|
+
tools = loader.create_tool_instances(all_specs)
|
|
702
|
+
|
|
703
|
+
# Apply tool overrides if provided (for dataset benchmarking)
|
|
704
|
+
if tool_overrides:
|
|
705
|
+
for tool in tools.values():
|
|
706
|
+
tool.set_overrides(tool_overrides)
|
|
707
|
+
|
|
708
|
+
return tools
|