sandboxy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. sandboxy/__init__.py +3 -0
  2. sandboxy/agents/__init__.py +21 -0
  3. sandboxy/agents/base.py +66 -0
  4. sandboxy/agents/llm_prompt.py +308 -0
  5. sandboxy/agents/loader.py +222 -0
  6. sandboxy/api/__init__.py +5 -0
  7. sandboxy/api/app.py +76 -0
  8. sandboxy/api/routes/__init__.py +1 -0
  9. sandboxy/api/routes/agents.py +92 -0
  10. sandboxy/api/routes/local.py +1388 -0
  11. sandboxy/api/routes/tools.py +106 -0
  12. sandboxy/cli/__init__.py +1 -0
  13. sandboxy/cli/main.py +1196 -0
  14. sandboxy/cli/type_detector.py +48 -0
  15. sandboxy/config.py +49 -0
  16. sandboxy/core/__init__.py +1 -0
  17. sandboxy/core/async_runner.py +824 -0
  18. sandboxy/core/mdl_parser.py +441 -0
  19. sandboxy/core/runner.py +599 -0
  20. sandboxy/core/safe_eval.py +165 -0
  21. sandboxy/core/state.py +234 -0
  22. sandboxy/datasets/__init__.py +20 -0
  23. sandboxy/datasets/loader.py +193 -0
  24. sandboxy/datasets/runner.py +442 -0
  25. sandboxy/errors.py +166 -0
  26. sandboxy/local/context.py +235 -0
  27. sandboxy/local/results.py +173 -0
  28. sandboxy/logging.py +31 -0
  29. sandboxy/mcp/__init__.py +25 -0
  30. sandboxy/mcp/client.py +360 -0
  31. sandboxy/mcp/wrapper.py +99 -0
  32. sandboxy/providers/__init__.py +34 -0
  33. sandboxy/providers/anthropic_provider.py +271 -0
  34. sandboxy/providers/base.py +123 -0
  35. sandboxy/providers/http_client.py +101 -0
  36. sandboxy/providers/openai_provider.py +282 -0
  37. sandboxy/providers/openrouter.py +958 -0
  38. sandboxy/providers/registry.py +199 -0
  39. sandboxy/scenarios/__init__.py +11 -0
  40. sandboxy/scenarios/comparison.py +491 -0
  41. sandboxy/scenarios/loader.py +262 -0
  42. sandboxy/scenarios/runner.py +468 -0
  43. sandboxy/scenarios/unified.py +1434 -0
  44. sandboxy/session/__init__.py +21 -0
  45. sandboxy/session/manager.py +278 -0
  46. sandboxy/tools/__init__.py +34 -0
  47. sandboxy/tools/base.py +127 -0
  48. sandboxy/tools/loader.py +270 -0
  49. sandboxy/tools/yaml_tools.py +708 -0
  50. sandboxy/ui/__init__.py +27 -0
  51. sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
  52. sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
  53. sandboxy/ui/dist/index.html +14 -0
  54. sandboxy/utils/__init__.py +3 -0
  55. sandboxy/utils/time.py +20 -0
  56. sandboxy-0.0.1.dist-info/METADATA +241 -0
  57. sandboxy-0.0.1.dist-info/RECORD +60 -0
  58. sandboxy-0.0.1.dist-info/WHEEL +4 -0
  59. sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
  60. sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,708 @@
1
+ """YAML-defined mock tools for declarative scenario creation.
2
+
3
+ This module enables defining tools entirely in YAML without writing Python code.
4
+ Tools can have static returns, parameterized returns, conditional logic, and
5
+ side effects that modify scenario state.
6
+
7
+ Example tool definition:
8
+ tools:
9
+ power_off_rack:
10
+ description: "Power off a server rack"
11
+ params:
12
+ rack_id:
13
+ type: string
14
+ required: true
15
+ returns: "Rack {rack_id} powered off."
16
+ side_effects:
17
+ - set: "rack_{rack_id}_status"
18
+ value: "offline"
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ import re
25
+ from pathlib import Path
26
+ from typing import Any, Literal
27
+
28
+ import yaml
29
+ from pydantic import BaseModel, Field, field_validator
30
+
31
+ from sandboxy.tools.base import ToolConfig, ToolResult
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # -----------------------------------------------------------------------------
36
+ # Schema Models
37
+ # -----------------------------------------------------------------------------
38
+
39
+
40
+ class ParamSchema(BaseModel):
41
+ """Schema for a tool parameter.
42
+
43
+ Attributes:
44
+ type: Parameter type (string, number, boolean, integer, array, object).
45
+ description: Human-readable description of the parameter.
46
+ required: Whether the parameter must be provided.
47
+ default: Default value if not provided.
48
+ enum: List of allowed values (for validation).
49
+ """
50
+
51
+ type: Literal["string", "number", "boolean", "integer", "array", "object"] = "string"
52
+ description: str = ""
53
+ required: bool = False
54
+ default: Any = None
55
+ enum: list[Any] | None = None
56
+
57
+
58
+ class SideEffect(BaseModel):
59
+ """A side effect that modifies scenario state when a tool is called.
60
+
61
+ Attributes:
62
+ set: State key to set (supports {param} substitution).
63
+ value: Value to set (supports {param} and {state.key} substitution).
64
+ """
65
+
66
+ set: str
67
+ value: Any
68
+
69
+ def apply(self, state: dict[str, Any], params: dict[str, Any]) -> None:
70
+ """Apply this side effect to the state.
71
+
72
+ Args:
73
+ state: Environment state dict to modify.
74
+ params: Parameters from the tool invocation.
75
+ """
76
+ key = _interpolate(self.set, params, state)
77
+ value = self.value
78
+
79
+ # Interpolate string values
80
+ if isinstance(value, str):
81
+ value = _interpolate(value, params, state)
82
+
83
+ state[key] = value
84
+
85
+
86
+ class ConditionalReturn(BaseModel):
87
+ """A conditional return value based on state.
88
+
89
+ Attributes:
90
+ when: Condition expression to evaluate.
91
+ value: Return value if condition is true.
92
+ """
93
+
94
+ when: str
95
+ value: str
96
+
97
+
98
+ class ActionSpec(BaseModel):
99
+ """Specification for a single tool action.
100
+
101
+ Attributes:
102
+ description: Human-readable description of the action.
103
+ params: Parameter definitions for this action.
104
+ returns: Return value (string or list of ConditionalReturn).
105
+ returns_error: Error message to return when error_when is true.
106
+ error_when: Condition expression that triggers an error.
107
+ side_effects: State modifications to apply on success.
108
+ """
109
+
110
+ description: str = ""
111
+ params: dict[str, ParamSchema] = Field(default_factory=dict)
112
+ returns: str | list[ConditionalReturn] = ""
113
+ returns_error: str | None = None
114
+ error_when: str | None = None
115
+ side_effects: list[SideEffect] = Field(default_factory=list)
116
+
117
+ @field_validator("returns", mode="before")
118
+ @classmethod
119
+ def parse_returns(cls, v: Any) -> str | list[ConditionalReturn]:
120
+ """Parse returns field - can be string or list of conditionals."""
121
+ if isinstance(v, str):
122
+ return v
123
+ if isinstance(v, list):
124
+ return [ConditionalReturn(**item) if isinstance(item, dict) else item for item in v]
125
+ if isinstance(v, dict) and "conditions" in v:
126
+ # Support { conditions: [...] } format
127
+ return [
128
+ ConditionalReturn(**item) if isinstance(item, dict) else item
129
+ for item in v["conditions"]
130
+ ]
131
+ return str(v) if v else ""
132
+
133
+
134
+ class ToolSpec(BaseModel):
135
+ """Specification for a complete YAML-defined tool.
136
+
137
+ A tool can either have multiple actions (like existing mock tools) or
138
+ be a simple single-action tool (just has returns directly).
139
+ """
140
+
141
+ name: str = ""
142
+ description: str = ""
143
+
144
+ # For multi-action tools
145
+ actions: dict[str, ActionSpec] = Field(default_factory=dict)
146
+
147
+ # For single-action tools (shorthand) - these become the default "call" action
148
+ params: dict[str, ParamSchema] = Field(default_factory=dict)
149
+ returns: str | list[ConditionalReturn] = ""
150
+ returns_error: str | None = None
151
+ error_when: str | None = None
152
+ side_effects: list[SideEffect] = Field(default_factory=list)
153
+
154
+ def get_effective_actions(self) -> dict[str, ActionSpec]:
155
+ """Get all actions, including synthesized default action for simple tools."""
156
+ if self.actions:
157
+ return self.actions
158
+
159
+ # Single-action tool - create a "call" action from top-level fields
160
+ if self.returns or self.params or self.returns_error:
161
+ return {
162
+ "call": ActionSpec(
163
+ description=self.description,
164
+ params=self.params,
165
+ returns=self.returns,
166
+ returns_error=self.returns_error,
167
+ error_when=self.error_when,
168
+ side_effects=self.side_effects,
169
+ )
170
+ }
171
+
172
+ return {}
173
+
174
+
175
+ class ToolLibrary(BaseModel):
176
+ """A library of YAML-defined tools loaded from a file."""
177
+
178
+ name: str = ""
179
+ description: str = ""
180
+ tools: dict[str, ToolSpec] = Field(default_factory=dict)
181
+
182
+
183
+ # -----------------------------------------------------------------------------
184
+ # Expression Evaluation
185
+ # -----------------------------------------------------------------------------
186
+
187
+
188
+ def _interpolate(template: Any, params: dict[str, Any], state: dict[str, Any]) -> Any:
189
+ """Interpolate {param} and {state.key} placeholders in a template string.
190
+
191
+ Non-string values are returned unchanged.
192
+ """
193
+ if not isinstance(template, str):
194
+ return template
195
+
196
+ def replace(match: re.Match[str]) -> str:
197
+ key = match.group(1)
198
+
199
+ # Check params first
200
+ if key in params:
201
+ return str(params[key])
202
+
203
+ # Check state with state. prefix
204
+ if key.startswith("state."):
205
+ state_key = key[6:]
206
+ return str(state.get(state_key, f"{{{key}}}"))
207
+
208
+ # Check state directly
209
+ if key in state:
210
+ return str(state[key])
211
+
212
+ # Not found - return original placeholder
213
+ return match.group(0)
214
+
215
+ return re.sub(r"\{(\w+(?:\.\w+)*)\}", replace, template)
216
+
217
+
218
+ def _evaluate_condition(expr: str, params: dict[str, Any], state: dict[str, Any]) -> bool:
219
+ """Safely evaluate a condition expression.
220
+
221
+ Supports:
222
+ - Simple comparisons: "param == value", "state.key != 'foo'"
223
+ - Boolean state checks: "state.is_active", "!state.is_disabled"
224
+ - Parameterized keys: "state.rack_{rack_id}_powered == false"
225
+ """
226
+ if not expr or not expr.strip():
227
+ return False
228
+
229
+ # Interpolate any {param} references in the expression
230
+ expr = _interpolate(expr, params, state)
231
+
232
+ # Build evaluation context
233
+ context: dict[str, Any] = {
234
+ "true": True,
235
+ "false": False,
236
+ "True": True,
237
+ "False": False,
238
+ "none": None,
239
+ "None": None,
240
+ }
241
+ context.update(params)
242
+ context["state"] = state
243
+
244
+ # Also expose state keys directly for convenience
245
+ for key, value in state.items():
246
+ if key.isidentifier():
247
+ context[key] = value
248
+
249
+ try:
250
+ # Restrict evaluation to safe operations
251
+ safe_builtins = {
252
+ "True": True,
253
+ "False": False,
254
+ "None": None,
255
+ "len": len,
256
+ "str": str,
257
+ "int": int,
258
+ "float": float,
259
+ "bool": bool,
260
+ }
261
+ return bool(eval(expr, {"__builtins__": safe_builtins}, context)) # noqa: S307
262
+ except Exception as e:
263
+ logger.debug("Condition evaluation failed for expression '%s': %s", expr, e)
264
+ return False
265
+
266
+
267
+ # -----------------------------------------------------------------------------
268
+ # YAML Mock Tool Implementation
269
+ # -----------------------------------------------------------------------------
270
+
271
+
272
+ class YamlMockTool:
273
+ """A tool implementation backed by YAML definitions.
274
+
275
+ Implements the Tool protocol for seamless integration with the existing
276
+ runner and agent systems.
277
+ """
278
+
279
+ def __init__(self, config: ToolConfig, spec: ToolSpec) -> None:
280
+ """Initialize from config and spec.
281
+
282
+ Args:
283
+ config: Tool configuration (name, description, etc.)
284
+ spec: YAML tool specification
285
+ """
286
+ self.name = config.name
287
+ self.description = config.description or spec.description
288
+ self.config = config.config
289
+ self.spec = spec
290
+ self._call_log: list[dict[str, Any]] = []
291
+
292
+ @property
293
+ def call_log(self) -> list[dict[str, Any]]:
294
+ """Get log of all tool calls made."""
295
+ return self._call_log
296
+
297
+ def invoke(
298
+ self,
299
+ action: str,
300
+ args: dict[str, Any],
301
+ env_state: dict[str, Any],
302
+ ) -> ToolResult:
303
+ """Invoke a tool action.
304
+
305
+ Args:
306
+ action: The action to perform
307
+ args: Arguments from the caller
308
+ env_state: Current environment state (will be modified by side effects)
309
+
310
+ Returns:
311
+ ToolResult with success/error and data
312
+ """
313
+ actions = self.spec.get_effective_actions()
314
+
315
+ if action not in actions:
316
+ available = ", ".join(actions.keys()) or "none"
317
+ return ToolResult(
318
+ success=False,
319
+ error=f"Unknown action '{action}'. Available: {available}",
320
+ )
321
+
322
+ action_spec = actions[action]
323
+
324
+ # Validate and apply defaults to params
325
+ validated_args = self._validate_params(action_spec, args)
326
+ if isinstance(validated_args, ToolResult):
327
+ return validated_args # Validation error
328
+
329
+ # Log the call
330
+ self._call_log.append(
331
+ {
332
+ "action": action,
333
+ "args": validated_args.copy(),
334
+ "state_before": env_state.copy(),
335
+ }
336
+ )
337
+
338
+ # Check for error condition
339
+ if action_spec.error_when and _evaluate_condition(
340
+ action_spec.error_when, validated_args, env_state
341
+ ):
342
+ error_msg = action_spec.returns_error or "Operation failed"
343
+ error_msg = _interpolate(error_msg, validated_args, env_state)
344
+ return ToolResult(success=False, error=error_msg)
345
+
346
+ # Apply side effects
347
+ for effect in action_spec.side_effects:
348
+ effect.apply(env_state, validated_args)
349
+
350
+ # Compute return value
351
+ result_value = self._compute_return(action_spec, validated_args, env_state)
352
+
353
+ return ToolResult(success=True, data=result_value)
354
+
355
+ def _validate_params(
356
+ self,
357
+ action_spec: ActionSpec,
358
+ args: dict[str, Any],
359
+ ) -> dict[str, Any] | ToolResult:
360
+ """Validate and normalize parameters.
361
+
362
+ Returns validated args dict or ToolResult with error.
363
+ """
364
+ validated: dict[str, Any] = {}
365
+
366
+ for name, schema in action_spec.params.items():
367
+ if name in args:
368
+ # TODO: Type coercion/validation
369
+ validated[name] = args[name]
370
+ elif schema.required:
371
+ return ToolResult(
372
+ success=False,
373
+ error=f"Missing required parameter: {name}",
374
+ )
375
+ elif schema.default is not None:
376
+ validated[name] = schema.default
377
+
378
+ # Include any extra args not in schema (permissive mode)
379
+ for name, value in args.items():
380
+ if name not in validated:
381
+ validated[name] = value
382
+
383
+ return validated
384
+
385
+ def _compute_return(
386
+ self,
387
+ action_spec: ActionSpec,
388
+ params: dict[str, Any],
389
+ state: dict[str, Any],
390
+ ) -> Any:
391
+ """Compute the return value for an action."""
392
+ returns = action_spec.returns
393
+
394
+ # Handle conditional returns
395
+ if isinstance(returns, list):
396
+ for cond in returns:
397
+ if isinstance(cond, ConditionalReturn):
398
+ # Check "default" condition (always true)
399
+ if cond.when in ("default", "else", "otherwise", "true", "True"):
400
+ return _interpolate(cond.value, params, state)
401
+
402
+ if _evaluate_condition(cond.when, params, state):
403
+ return _interpolate(cond.value, params, state)
404
+
405
+ # No condition matched - return empty
406
+ return ""
407
+
408
+ # Simple string return with interpolation
409
+ return _interpolate(returns, params, state)
410
+
411
+ def get_actions(self) -> list[dict[str, Any]]:
412
+ """Get available actions with their schemas for the agent."""
413
+ result = []
414
+
415
+ for name, action_spec in self.spec.get_effective_actions().items():
416
+ # Build JSON Schema for parameters
417
+ properties: dict[str, Any] = {}
418
+ required: list[str] = []
419
+
420
+ for param_name, param_schema in action_spec.params.items():
421
+ prop: dict[str, Any] = {
422
+ "type": param_schema.type,
423
+ }
424
+ if param_schema.description:
425
+ prop["description"] = param_schema.description
426
+ if param_schema.enum:
427
+ prop["enum"] = param_schema.enum
428
+ if param_schema.default is not None:
429
+ prop["default"] = param_schema.default
430
+
431
+ properties[param_name] = prop
432
+
433
+ if param_schema.required:
434
+ required.append(param_name)
435
+
436
+ result.append(
437
+ {
438
+ "name": name,
439
+ "description": action_spec.description or self.description,
440
+ "parameters": {
441
+ "type": "object",
442
+ "properties": properties,
443
+ "required": required,
444
+ },
445
+ }
446
+ )
447
+
448
+ return result
449
+
450
+
451
+ # -----------------------------------------------------------------------------
452
+ # Tool Loading
453
+ # -----------------------------------------------------------------------------
454
+
455
+
456
+ class YamlToolLoader:
457
+ """Loads YAML-defined tools from files and inline definitions."""
458
+
459
+ # Default search paths for tool libraries
460
+ DEFAULT_TOOL_DIRS = [
461
+ Path("tools"),
462
+ Path("sandboxy/tools/libraries"),
463
+ ]
464
+
465
+ def __init__(self, tool_dirs: list[Path] | None = None) -> None:
466
+ """Initialize loader with search directories.
467
+
468
+ Args:
469
+ tool_dirs: Directories to search for tool library files.
470
+ Defaults to DEFAULT_TOOL_DIRS.
471
+ """
472
+ self.tool_dirs = tool_dirs or self.DEFAULT_TOOL_DIRS
473
+ self._library_cache: dict[str, ToolLibrary] = {}
474
+
475
+ def load_library(self, name: str) -> ToolLibrary:
476
+ """Load a tool library by name.
477
+
478
+ Searches for {name}.yml or {name}.yaml in tool directories.
479
+
480
+ Args:
481
+ name: Library name (without extension)
482
+
483
+ Returns:
484
+ Loaded ToolLibrary
485
+
486
+ Raises:
487
+ FileNotFoundError: If library file not found
488
+ """
489
+ if name in self._library_cache:
490
+ return self._library_cache[name]
491
+
492
+ # Search for the file
493
+ for dir_path in self.tool_dirs:
494
+ for ext in (".yml", ".yaml"):
495
+ file_path = dir_path / f"{name}{ext}"
496
+ if file_path.exists():
497
+ library = self._load_library_file(file_path)
498
+ self._library_cache[name] = library
499
+ return library
500
+
501
+ msg = f"Tool library '{name}' not found in: {self.tool_dirs}"
502
+ raise FileNotFoundError(msg)
503
+
504
+ def load_library_file(self, path: Path) -> ToolLibrary:
505
+ """Load a tool library from a specific file path.
506
+
507
+ Args:
508
+ path: Path to the YAML file
509
+
510
+ Returns:
511
+ Loaded ToolLibrary
512
+ """
513
+ return self._load_library_file(path)
514
+
515
+ def _load_library_file(self, path: Path) -> ToolLibrary:
516
+ """Internal: Load and parse a library file."""
517
+ content = path.read_text()
518
+ raw = yaml.safe_load(content)
519
+
520
+ if not raw:
521
+ return ToolLibrary(name=path.stem)
522
+
523
+ # Parse tools
524
+ tools: dict[str, ToolSpec] = {}
525
+ raw_tools = raw.get("tools", {})
526
+
527
+ for tool_name, tool_data in raw_tools.items():
528
+ if isinstance(tool_data, dict):
529
+ tool_data["name"] = tool_name
530
+ tools[tool_name] = self._parse_tool_spec(tool_data)
531
+
532
+ return ToolLibrary(
533
+ name=raw.get("name", path.stem),
534
+ description=raw.get("description", ""),
535
+ tools=tools,
536
+ )
537
+
538
+ def _parse_tool_spec(self, data: dict[str, Any]) -> ToolSpec:
539
+ """Parse a tool specification from raw YAML data."""
540
+ # Parse params
541
+ params: dict[str, ParamSchema] = {}
542
+ raw_params = data.get("params", {})
543
+ for name, param_data in raw_params.items():
544
+ if isinstance(param_data, dict):
545
+ params[name] = ParamSchema(**param_data)
546
+ elif isinstance(param_data, str):
547
+ # Shorthand: just the type
548
+ params[name] = ParamSchema(type=param_data) # type: ignore[arg-type]
549
+
550
+ # Parse actions
551
+ actions: dict[str, ActionSpec] = {}
552
+ raw_actions = data.get("actions", {})
553
+ for name, action_data in raw_actions.items():
554
+ if isinstance(action_data, dict):
555
+ actions[name] = self._parse_action_spec(action_data)
556
+
557
+ # Parse side effects
558
+ side_effects: list[SideEffect] = []
559
+ raw_effects = data.get("side_effects", [])
560
+ for effect_data in raw_effects:
561
+ if isinstance(effect_data, dict):
562
+ side_effects.append(SideEffect(**effect_data))
563
+
564
+ return ToolSpec(
565
+ name=data.get("name", ""),
566
+ description=data.get("description", ""),
567
+ actions=actions,
568
+ params=params,
569
+ returns=data.get("returns", ""),
570
+ returns_error=data.get("returns_error"),
571
+ error_when=data.get("error_when"),
572
+ side_effects=side_effects,
573
+ )
574
+
575
+ def _parse_action_spec(self, data: dict[str, Any]) -> ActionSpec:
576
+ """Parse an action specification from raw YAML data."""
577
+ # Parse params
578
+ params: dict[str, ParamSchema] = {}
579
+ raw_params = data.get("params", {})
580
+ for name, param_data in raw_params.items():
581
+ if isinstance(param_data, dict):
582
+ params[name] = ParamSchema(**param_data)
583
+ elif isinstance(param_data, str):
584
+ params[name] = ParamSchema(type=param_data) # type: ignore[arg-type]
585
+
586
+ # Parse side effects
587
+ side_effects: list[SideEffect] = []
588
+ raw_effects = data.get("side_effects", [])
589
+ for effect_data in raw_effects:
590
+ if isinstance(effect_data, dict):
591
+ side_effects.append(SideEffect(**effect_data))
592
+
593
+ return ActionSpec(
594
+ description=data.get("description", ""),
595
+ params=params,
596
+ returns=data.get("returns", ""),
597
+ returns_error=data.get("returns_error"),
598
+ error_when=data.get("error_when"),
599
+ side_effects=side_effects,
600
+ )
601
+
602
+ def parse_inline_tools(
603
+ self,
604
+ tools_data: dict[str, Any] | list[dict[str, Any]],
605
+ ) -> dict[str, ToolSpec]:
606
+ """Parse inline tool definitions from scenario YAML.
607
+
608
+ Args:
609
+ tools_data: Raw tools section from scenario YAML.
610
+ Can be a dict (name -> spec) or list of specs with names.
611
+
612
+ Returns:
613
+ Dictionary mapping tool name to ToolSpec
614
+ """
615
+ result: dict[str, ToolSpec] = {}
616
+
617
+ if isinstance(tools_data, dict):
618
+ for name, data in tools_data.items():
619
+ if isinstance(data, dict):
620
+ data["name"] = name
621
+ result[name] = self._parse_tool_spec(data)
622
+ elif isinstance(tools_data, list):
623
+ for item in tools_data:
624
+ if isinstance(item, dict) and "name" in item:
625
+ result[item["name"]] = self._parse_tool_spec(item)
626
+
627
+ return result
628
+
629
+ def create_tool_instances(
630
+ self,
631
+ specs: dict[str, ToolSpec],
632
+ ) -> dict[str, YamlMockTool]:
633
+ """Create tool instances from specifications.
634
+
635
+ Args:
636
+ specs: Dictionary of tool name to ToolSpec
637
+
638
+ Returns:
639
+ Dictionary of tool name to YamlMockTool instance
640
+ """
641
+ tools: dict[str, YamlMockTool] = {}
642
+
643
+ for name, spec in specs.items():
644
+ config = ToolConfig(
645
+ name=name,
646
+ type="yaml_mock",
647
+ description=spec.description,
648
+ )
649
+ tools[name] = YamlMockTool(config, spec)
650
+
651
+ return tools
652
+
653
+
654
+ # -----------------------------------------------------------------------------
655
+ # Integration Helper
656
+ # -----------------------------------------------------------------------------
657
+
658
+
659
+ def load_scenario_tools(
660
+ scenario_data: dict[str, Any],
661
+ tool_dirs: list[Path] | None = None,
662
+ tool_overrides: dict[str, Any] | None = None,
663
+ ) -> dict[str, YamlMockTool]:
664
+ """Load all tools for a scenario from YAML data.
665
+
666
+ Handles both `tools_from` library references and inline `tools` definitions.
667
+ Inline tools override library tools with the same name.
668
+
669
+ Args:
670
+ scenario_data: Parsed scenario YAML
671
+ tool_dirs: Optional tool search directories
672
+ tool_overrides: Optional dict mapping "tool.action" to override response data.
673
+ Used by dataset benchmarking to inject test case data.
674
+
675
+ Returns:
676
+ Dictionary of tool name to YamlMockTool
677
+ """
678
+ loader = YamlToolLoader(tool_dirs)
679
+ all_specs: dict[str, ToolSpec] = {}
680
+
681
+ # Load from libraries first
682
+ tools_from = scenario_data.get("tools_from", [])
683
+ if isinstance(tools_from, str):
684
+ tools_from = [tools_from]
685
+
686
+ for lib_name in tools_from:
687
+ # Strip extension if provided
688
+ lib_name = Path(lib_name).stem
689
+ try:
690
+ library = loader.load_library(lib_name)
691
+ all_specs.update(library.tools)
692
+ except FileNotFoundError:
693
+ logger.warning("Tool library '%s' not found, skipping", lib_name)
694
+
695
+ # Load inline tools (override library tools)
696
+ inline_tools = scenario_data.get("tools", {})
697
+ if inline_tools:
698
+ inline_specs = loader.parse_inline_tools(inline_tools)
699
+ all_specs.update(inline_specs)
700
+
701
+ tools = loader.create_tool_instances(all_specs)
702
+
703
+ # Apply tool overrides if provided (for dataset benchmarking)
704
+ if tool_overrides:
705
+ for tool in tools.values():
706
+ tool.set_overrides(tool_overrides)
707
+
708
+ return tools