proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema validation system for Proxilion.
|
|
3
|
+
|
|
4
|
+
This module provides dataclass-based schema definitions and validation
|
|
5
|
+
for tool call arguments. It validates types, required fields, constraints,
|
|
6
|
+
and includes security-focused validations like path traversal detection.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
import threading
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any, Literal
|
|
16
|
+
|
|
17
|
+
from proxilion.exceptions import SchemaValidationError
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Type alias for risk levels
|
|
22
|
+
RiskLevel = Literal["low", "medium", "high", "critical"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ParameterSchema:
|
|
27
|
+
"""
|
|
28
|
+
Schema definition for a single tool parameter.
|
|
29
|
+
|
|
30
|
+
Defines the expected type, constraints, and metadata for a parameter.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
name: Parameter name.
|
|
34
|
+
type: Python type name ("str", "int", "float", "bool", "list", "dict").
|
|
35
|
+
description: Human-readable description.
|
|
36
|
+
constraints: Validation constraints (min, max, pattern, enum, etc.).
|
|
37
|
+
sensitive: Whether this parameter contains sensitive data (for redaction).
|
|
38
|
+
default: Default value if not provided.
|
|
39
|
+
required: Whether the parameter is required (can also be set at ToolSchema level).
|
|
40
|
+
|
|
41
|
+
Constraint Options:
|
|
42
|
+
- min: Minimum value (for numbers) or length (for strings/lists).
|
|
43
|
+
- max: Maximum value (for numbers) or length (for strings/lists).
|
|
44
|
+
- pattern: Regex pattern for string validation.
|
|
45
|
+
- enum: List of allowed values.
|
|
46
|
+
- min_length: Minimum string/list length.
|
|
47
|
+
- max_length: Maximum string/list length.
|
|
48
|
+
- allow_path_traversal: If False, reject ".." in paths (default: False).
|
|
49
|
+
- allow_sql_keywords: If False, reject SQL injection patterns (default: True).
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> param = ParameterSchema(
|
|
53
|
+
... name="query",
|
|
54
|
+
... type="str",
|
|
55
|
+
... description="SQL query to execute",
|
|
56
|
+
... constraints={"max_length": 1000, "allow_sql_keywords": False},
|
|
57
|
+
... sensitive=True,
|
|
58
|
+
... )
|
|
59
|
+
"""
|
|
60
|
+
name: str
|
|
61
|
+
type: str = "str"
|
|
62
|
+
description: str = ""
|
|
63
|
+
constraints: dict[str, Any] = field(default_factory=dict)
|
|
64
|
+
sensitive: bool = False
|
|
65
|
+
default: Any = None
|
|
66
|
+
required: bool = True
|
|
67
|
+
|
|
68
|
+
def __post_init__(self) -> None:
|
|
69
|
+
"""Validate the parameter schema itself."""
|
|
70
|
+
valid_types = {"str", "int", "float", "bool", "list", "dict", "any"}
|
|
71
|
+
if self.type not in valid_types:
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Unknown type '{self.type}' for parameter '{self.name}'. "
|
|
74
|
+
f"Valid types: {valid_types}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class ToolSchema:
|
|
80
|
+
"""
|
|
81
|
+
Schema definition for a tool.
|
|
82
|
+
|
|
83
|
+
Defines all parameters, their types, and validation rules for a tool.
|
|
84
|
+
Used to validate tool call arguments before execution.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
name: Tool name (should match the tool function name).
|
|
88
|
+
description: Human-readable tool description.
|
|
89
|
+
parameters: Dictionary mapping parameter names to ParameterSchema.
|
|
90
|
+
required_parameters: List of required parameter names.
|
|
91
|
+
risk_level: Risk level for audit and approval decisions.
|
|
92
|
+
tags: Optional tags for categorization.
|
|
93
|
+
version: Schema version for compatibility tracking.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> schema = ToolSchema(
|
|
97
|
+
... name="file_read",
|
|
98
|
+
... description="Read contents of a file",
|
|
99
|
+
... parameters={
|
|
100
|
+
... "path": ParameterSchema(
|
|
101
|
+
... name="path",
|
|
102
|
+
... type="str",
|
|
103
|
+
... constraints={"allow_path_traversal": False},
|
|
104
|
+
... ),
|
|
105
|
+
... "encoding": ParameterSchema(
|
|
106
|
+
... name="encoding",
|
|
107
|
+
... type="str",
|
|
108
|
+
... default="utf-8",
|
|
109
|
+
... required=False,
|
|
110
|
+
... ),
|
|
111
|
+
... },
|
|
112
|
+
... required_parameters=["path"],
|
|
113
|
+
... risk_level="medium",
|
|
114
|
+
... )
|
|
115
|
+
"""
|
|
116
|
+
name: str
|
|
117
|
+
description: str = ""
|
|
118
|
+
parameters: dict[str, ParameterSchema] = field(default_factory=dict)
|
|
119
|
+
required_parameters: list[str] = field(default_factory=list)
|
|
120
|
+
risk_level: RiskLevel = "low"
|
|
121
|
+
tags: list[str] = field(default_factory=list)
|
|
122
|
+
version: str = "1.0"
|
|
123
|
+
|
|
124
|
+
def get_sensitive_parameters(self) -> list[str]:
|
|
125
|
+
"""Get names of parameters marked as sensitive."""
|
|
126
|
+
return [
|
|
127
|
+
name for name, param in self.parameters.items()
|
|
128
|
+
if param.sensitive
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
def get_parameter(self, name: str) -> ParameterSchema | None:
|
|
132
|
+
"""Get a parameter schema by name."""
|
|
133
|
+
return self.parameters.get(name)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class ValidationResult:
|
|
138
|
+
"""
|
|
139
|
+
Result of schema validation.
|
|
140
|
+
|
|
141
|
+
Attributes:
|
|
142
|
+
valid: Whether validation passed.
|
|
143
|
+
errors: List of validation error messages.
|
|
144
|
+
warnings: List of validation warnings.
|
|
145
|
+
sanitized_arguments: Arguments after sanitization (if applicable).
|
|
146
|
+
"""
|
|
147
|
+
valid: bool
|
|
148
|
+
errors: list[str] = field(default_factory=list)
|
|
149
|
+
warnings: list[str] = field(default_factory=list)
|
|
150
|
+
sanitized_arguments: dict[str, Any] | None = None
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def success(
|
|
154
|
+
cls,
|
|
155
|
+
sanitized: dict[str, Any] | None = None,
|
|
156
|
+
warnings: list[str] | None = None,
|
|
157
|
+
) -> ValidationResult:
|
|
158
|
+
"""Create a successful validation result."""
|
|
159
|
+
return cls(
|
|
160
|
+
valid=True,
|
|
161
|
+
sanitized_arguments=sanitized,
|
|
162
|
+
warnings=warnings or [],
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def failure(cls, errors: list[str]) -> ValidationResult:
|
|
167
|
+
"""Create a failed validation result."""
|
|
168
|
+
return cls(valid=False, errors=errors)
|
|
169
|
+
|
|
170
|
+
def raise_if_invalid(self, tool_name: str) -> None:
|
|
171
|
+
"""Raise SchemaValidationError if validation failed."""
|
|
172
|
+
if not self.valid:
|
|
173
|
+
raise SchemaValidationError(
|
|
174
|
+
tool_name=tool_name,
|
|
175
|
+
validation_errors=self.errors,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class SchemaValidator:
|
|
180
|
+
"""
|
|
181
|
+
Validates tool call arguments against registered schemas.
|
|
182
|
+
|
|
183
|
+
The SchemaValidator maintains a registry of tool schemas and
|
|
184
|
+
validates arguments against them. It performs type checking,
|
|
185
|
+
constraint validation, and security checks.
|
|
186
|
+
|
|
187
|
+
Features:
|
|
188
|
+
- Type validation (str, int, float, bool, list, dict)
|
|
189
|
+
- Constraint validation (min, max, pattern, enum, etc.)
|
|
190
|
+
- Security validations (path traversal, SQL injection)
|
|
191
|
+
- Required field checking
|
|
192
|
+
- Default value application
|
|
193
|
+
- Thread-safe operations
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
>>> validator = SchemaValidator()
|
|
197
|
+
>>> validator.register_schema("calculator", calculator_schema)
|
|
198
|
+
>>> result = validator.validate("calculator", {
|
|
199
|
+
... "operation": "add",
|
|
200
|
+
... "a": 5,
|
|
201
|
+
... "b": 3,
|
|
202
|
+
... })
|
|
203
|
+
>>> if result.valid:
|
|
204
|
+
... print("Validation passed!")
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
# SQL injection patterns to detect
|
|
208
|
+
SQL_INJECTION_PATTERNS = [
|
|
209
|
+
r"(?i)\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER)\b",
|
|
210
|
+
r"(?i)\b(EXEC|EXECUTE|INTO|FROM|WHERE)\b.*[;'\"]",
|
|
211
|
+
r"--", # SQL comment
|
|
212
|
+
r"/\*", # Block comment start
|
|
213
|
+
r";\s*(SELECT|INSERT|UPDATE|DELETE|DROP)", # Chained statements
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
def __init__(self, strict_mode: bool = False) -> None:
|
|
217
|
+
"""
|
|
218
|
+
Initialize the schema validator.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
strict_mode: If True, reject unknown parameters.
|
|
222
|
+
If False, unknown parameters are allowed with a warning.
|
|
223
|
+
"""
|
|
224
|
+
self._schemas: dict[str, ToolSchema] = {}
|
|
225
|
+
self._lock = threading.RLock()
|
|
226
|
+
self.strict_mode = strict_mode
|
|
227
|
+
|
|
228
|
+
# Compile SQL injection patterns
|
|
229
|
+
self._sql_patterns = [
|
|
230
|
+
re.compile(pattern) for pattern in self.SQL_INJECTION_PATTERNS
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
def register_schema(self, tool_name: str, schema: ToolSchema) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Register a schema for a tool.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
tool_name: The tool name (should match function name).
|
|
239
|
+
schema: The ToolSchema definition.
|
|
240
|
+
|
|
241
|
+
Example:
|
|
242
|
+
>>> validator.register_schema("my_tool", my_schema)
|
|
243
|
+
"""
|
|
244
|
+
with self._lock:
|
|
245
|
+
if tool_name in self._schemas:
|
|
246
|
+
logger.warning(f"Overwriting schema for tool '{tool_name}'")
|
|
247
|
+
self._schemas[tool_name] = schema
|
|
248
|
+
logger.debug(f"Registered schema for tool '{tool_name}'")
|
|
249
|
+
|
|
250
|
+
def unregister_schema(self, tool_name: str) -> bool:
|
|
251
|
+
"""
|
|
252
|
+
Unregister a tool schema.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
tool_name: The tool name to unregister.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
True if a schema was removed.
|
|
259
|
+
"""
|
|
260
|
+
with self._lock:
|
|
261
|
+
if tool_name in self._schemas:
|
|
262
|
+
del self._schemas[tool_name]
|
|
263
|
+
return True
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
def get_schema(self, tool_name: str) -> ToolSchema | None:
|
|
267
|
+
"""Get the schema for a tool."""
|
|
268
|
+
with self._lock:
|
|
269
|
+
return self._schemas.get(tool_name)
|
|
270
|
+
|
|
271
|
+
def has_schema(self, tool_name: str) -> bool:
|
|
272
|
+
"""Check if a schema is registered for a tool."""
|
|
273
|
+
with self._lock:
|
|
274
|
+
return tool_name in self._schemas
|
|
275
|
+
|
|
276
|
+
def list_schemas(self) -> list[str]:
|
|
277
|
+
"""List all registered tool names."""
|
|
278
|
+
with self._lock:
|
|
279
|
+
return list(self._schemas.keys())
|
|
280
|
+
|
|
281
|
+
def validate(
|
|
282
|
+
self,
|
|
283
|
+
tool_name: str,
|
|
284
|
+
arguments: dict[str, Any],
|
|
285
|
+
) -> ValidationResult:
|
|
286
|
+
"""
|
|
287
|
+
Validate arguments against a tool's schema.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
tool_name: The name of the tool.
|
|
291
|
+
arguments: The arguments to validate.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
ValidationResult with validation status and any errors.
|
|
295
|
+
|
|
296
|
+
Example:
|
|
297
|
+
>>> result = validator.validate("calculator", {"op": "add", "a": 5})
|
|
298
|
+
>>> if not result.valid:
|
|
299
|
+
... print("Errors:", result.errors)
|
|
300
|
+
"""
|
|
301
|
+
with self._lock:
|
|
302
|
+
schema = self._schemas.get(tool_name)
|
|
303
|
+
|
|
304
|
+
if schema is None:
|
|
305
|
+
if self.strict_mode:
|
|
306
|
+
return ValidationResult.failure(
|
|
307
|
+
[f"No schema registered for tool '{tool_name}'"]
|
|
308
|
+
)
|
|
309
|
+
# No schema = allow everything (with warning)
|
|
310
|
+
logger.warning(f"No schema for tool '{tool_name}', skipping validation")
|
|
311
|
+
return ValidationResult.success(
|
|
312
|
+
sanitized=arguments,
|
|
313
|
+
warnings=[f"No schema registered for tool '{tool_name}'"],
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
errors: list[str] = []
|
|
317
|
+
warnings: list[str] = []
|
|
318
|
+
sanitized = dict(arguments)
|
|
319
|
+
|
|
320
|
+
# Check required parameters
|
|
321
|
+
for param_name in schema.required_parameters:
|
|
322
|
+
if param_name not in arguments:
|
|
323
|
+
param_schema = schema.get_parameter(param_name)
|
|
324
|
+
if param_schema and param_schema.default is not None:
|
|
325
|
+
sanitized[param_name] = param_schema.default
|
|
326
|
+
else:
|
|
327
|
+
errors.append(f"Missing required parameter: '{param_name}'")
|
|
328
|
+
|
|
329
|
+
# Validate each provided argument
|
|
330
|
+
for arg_name, arg_value in arguments.items():
|
|
331
|
+
param_schema = schema.get_parameter(arg_name)
|
|
332
|
+
|
|
333
|
+
if param_schema is None:
|
|
334
|
+
if self.strict_mode:
|
|
335
|
+
errors.append(f"Unknown parameter: '{arg_name}'")
|
|
336
|
+
else:
|
|
337
|
+
warnings.append(f"Unknown parameter: '{arg_name}'")
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
# Validate the parameter
|
|
341
|
+
param_errors = self._validate_parameter(
|
|
342
|
+
param_schema, arg_value, arg_name
|
|
343
|
+
)
|
|
344
|
+
errors.extend(param_errors)
|
|
345
|
+
|
|
346
|
+
if errors:
|
|
347
|
+
return ValidationResult.failure(errors)
|
|
348
|
+
|
|
349
|
+
return ValidationResult.success(sanitized=sanitized, warnings=warnings)
|
|
350
|
+
|
|
351
|
+
def _validate_parameter(
|
|
352
|
+
self,
|
|
353
|
+
schema: ParameterSchema,
|
|
354
|
+
value: Any,
|
|
355
|
+
name: str,
|
|
356
|
+
) -> list[str]:
|
|
357
|
+
"""
|
|
358
|
+
Validate a single parameter value.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
schema: The parameter schema.
|
|
362
|
+
value: The value to validate.
|
|
363
|
+
name: The parameter name (for error messages).
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
List of error messages (empty if valid).
|
|
367
|
+
"""
|
|
368
|
+
errors: list[str] = []
|
|
369
|
+
|
|
370
|
+
# Type validation
|
|
371
|
+
type_error = self._validate_type(schema.type, value, name)
|
|
372
|
+
if type_error:
|
|
373
|
+
errors.append(type_error)
|
|
374
|
+
return errors # Skip other validations if type is wrong
|
|
375
|
+
|
|
376
|
+
# Constraint validation
|
|
377
|
+
constraint_errors = self._validate_constraints(
|
|
378
|
+
schema.constraints, value, name, schema.type
|
|
379
|
+
)
|
|
380
|
+
errors.extend(constraint_errors)
|
|
381
|
+
|
|
382
|
+
return errors
|
|
383
|
+
|
|
384
|
+
def _validate_type(
|
|
385
|
+
self,
|
|
386
|
+
expected_type: str,
|
|
387
|
+
value: Any,
|
|
388
|
+
name: str,
|
|
389
|
+
) -> str | None:
|
|
390
|
+
"""
|
|
391
|
+
Validate value type.
|
|
392
|
+
|
|
393
|
+
Returns error message if invalid, None if valid.
|
|
394
|
+
"""
|
|
395
|
+
if expected_type == "any":
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
type_map = {
|
|
399
|
+
"str": str,
|
|
400
|
+
"int": int,
|
|
401
|
+
"float": (int, float), # Allow int for float
|
|
402
|
+
"bool": bool,
|
|
403
|
+
"list": list,
|
|
404
|
+
"dict": dict,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
expected = type_map.get(expected_type)
|
|
408
|
+
if expected is None:
|
|
409
|
+
return None # Unknown type, skip validation
|
|
410
|
+
|
|
411
|
+
if not isinstance(value, expected):
|
|
412
|
+
return (
|
|
413
|
+
f"Parameter '{name}' expected type '{expected_type}', "
|
|
414
|
+
f"got '{type(value).__name__}'"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
def _validate_constraints(
|
|
420
|
+
self,
|
|
421
|
+
constraints: dict[str, Any],
|
|
422
|
+
value: Any,
|
|
423
|
+
name: str,
|
|
424
|
+
value_type: str,
|
|
425
|
+
) -> list[str]:
|
|
426
|
+
"""
|
|
427
|
+
Validate value against constraints.
|
|
428
|
+
|
|
429
|
+
Returns list of error messages.
|
|
430
|
+
"""
|
|
431
|
+
errors: list[str] = []
|
|
432
|
+
|
|
433
|
+
# Numeric constraints
|
|
434
|
+
if value_type in ("int", "float") and isinstance(value, (int, float)):
|
|
435
|
+
if "min" in constraints and value < constraints["min"]:
|
|
436
|
+
errors.append(
|
|
437
|
+
f"Parameter '{name}' value {value} is less than "
|
|
438
|
+
f"minimum {constraints['min']}"
|
|
439
|
+
)
|
|
440
|
+
if "max" in constraints and value > constraints["max"]:
|
|
441
|
+
errors.append(
|
|
442
|
+
f"Parameter '{name}' value {value} is greater than "
|
|
443
|
+
f"maximum {constraints['max']}"
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# String constraints
|
|
447
|
+
if value_type == "str" and isinstance(value, str):
|
|
448
|
+
# Length constraints
|
|
449
|
+
if "min_length" in constraints and len(value) < constraints["min_length"]:
|
|
450
|
+
errors.append(
|
|
451
|
+
f"Parameter '{name}' length {len(value)} is less than "
|
|
452
|
+
f"minimum {constraints['min_length']}"
|
|
453
|
+
)
|
|
454
|
+
if "max_length" in constraints and len(value) > constraints["max_length"]:
|
|
455
|
+
errors.append(
|
|
456
|
+
f"Parameter '{name}' length {len(value)} exceeds "
|
|
457
|
+
f"maximum {constraints['max_length']}"
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Pattern constraint
|
|
461
|
+
if "pattern" in constraints:
|
|
462
|
+
pattern = constraints["pattern"]
|
|
463
|
+
if not re.match(pattern, value):
|
|
464
|
+
errors.append(
|
|
465
|
+
f"Parameter '{name}' does not match pattern '{pattern}'"
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Path traversal check
|
|
469
|
+
if not constraints.get("allow_path_traversal", True):
|
|
470
|
+
if self._check_path_traversal(value):
|
|
471
|
+
errors.append(
|
|
472
|
+
f"Parameter '{name}' contains path traversal sequence"
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
# SQL injection check
|
|
476
|
+
if not constraints.get("allow_sql_keywords", True):
|
|
477
|
+
if self._check_sql_injection(value):
|
|
478
|
+
errors.append(
|
|
479
|
+
f"Parameter '{name}' contains potential SQL injection"
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Enum constraint
|
|
483
|
+
if "enum" in constraints and value not in constraints["enum"]:
|
|
484
|
+
errors.append(
|
|
485
|
+
f"Parameter '{name}' value '{value}' not in allowed values: "
|
|
486
|
+
f"{constraints['enum']}"
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# List constraints
|
|
490
|
+
if value_type == "list" and isinstance(value, list):
|
|
491
|
+
if "min_length" in constraints and len(value) < constraints["min_length"]:
|
|
492
|
+
errors.append(
|
|
493
|
+
f"Parameter '{name}' has {len(value)} items, "
|
|
494
|
+
f"minimum is {constraints['min_length']}"
|
|
495
|
+
)
|
|
496
|
+
if "max_length" in constraints and len(value) > constraints["max_length"]:
|
|
497
|
+
errors.append(
|
|
498
|
+
f"Parameter '{name}' has {len(value)} items, "
|
|
499
|
+
f"maximum is {constraints['max_length']}"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
return errors
|
|
503
|
+
|
|
504
|
+
def _check_path_traversal(self, value: str) -> bool:
|
|
505
|
+
"""
|
|
506
|
+
Check for path traversal attempts.
|
|
507
|
+
|
|
508
|
+
Detects patterns like "..", "..\\", encoded variants.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
value: The string to check.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
True if path traversal detected.
|
|
515
|
+
"""
|
|
516
|
+
# Direct check
|
|
517
|
+
if ".." in value:
|
|
518
|
+
return True
|
|
519
|
+
|
|
520
|
+
# URL encoded
|
|
521
|
+
if "%2e%2e" in value.lower():
|
|
522
|
+
return True
|
|
523
|
+
|
|
524
|
+
# Double URL encoded
|
|
525
|
+
if "%252e%252e" in value.lower():
|
|
526
|
+
return True
|
|
527
|
+
|
|
528
|
+
# Unicode variants
|
|
529
|
+
return ".." in value
|
|
530
|
+
|
|
531
|
+
def _check_sql_injection(self, value: str) -> bool:
|
|
532
|
+
"""
|
|
533
|
+
Check for SQL injection patterns.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
value: The string to check.
|
|
537
|
+
|
|
538
|
+
Returns:
|
|
539
|
+
True if potential SQL injection detected.
|
|
540
|
+
"""
|
|
541
|
+
return any(pattern.search(value) for pattern in self._sql_patterns)
|
|
542
|
+
|
|
543
|
+
def validate_object_id(
|
|
544
|
+
self,
|
|
545
|
+
value: str,
|
|
546
|
+
id_type: str = "uuid",
|
|
547
|
+
) -> bool:
|
|
548
|
+
"""
|
|
549
|
+
Validate object ID format to prevent IDOR attacks.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
value: The ID value to validate.
|
|
553
|
+
id_type: Expected ID format ("uuid", "numeric", "alphanumeric").
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
True if the ID format is valid.
|
|
557
|
+
"""
|
|
558
|
+
if id_type == "uuid":
|
|
559
|
+
# UUID format: 8-4-4-4-12 hex characters
|
|
560
|
+
uuid_pattern = re.compile(
|
|
561
|
+
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
|
|
562
|
+
re.IGNORECASE,
|
|
563
|
+
)
|
|
564
|
+
return bool(uuid_pattern.match(value))
|
|
565
|
+
|
|
566
|
+
elif id_type == "numeric":
|
|
567
|
+
return value.isdigit()
|
|
568
|
+
|
|
569
|
+
elif id_type == "alphanumeric":
|
|
570
|
+
return value.isalnum()
|
|
571
|
+
|
|
572
|
+
return True # Unknown type, allow
|
|
573
|
+
|
|
574
|
+
def create_schema_from_function(
|
|
575
|
+
self,
|
|
576
|
+
func: Any,
|
|
577
|
+
risk_level: RiskLevel = "low",
|
|
578
|
+
) -> ToolSchema:
|
|
579
|
+
"""
|
|
580
|
+
Create a ToolSchema from a function's type hints.
|
|
581
|
+
|
|
582
|
+
This provides a convenient way to auto-generate schemas
|
|
583
|
+
from function signatures.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
func: The function to analyze.
|
|
587
|
+
risk_level: Risk level for the schema.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
A ToolSchema derived from the function.
|
|
591
|
+
|
|
592
|
+
Example:
|
|
593
|
+
>>> def my_tool(query: str, limit: int = 10) -> dict:
|
|
594
|
+
... pass
|
|
595
|
+
>>> schema = validator.create_schema_from_function(my_tool)
|
|
596
|
+
"""
|
|
597
|
+
import inspect
|
|
598
|
+
|
|
599
|
+
sig = inspect.signature(func)
|
|
600
|
+
hints = getattr(func, "__annotations__", {})
|
|
601
|
+
|
|
602
|
+
parameters: dict[str, ParameterSchema] = {}
|
|
603
|
+
required: list[str] = []
|
|
604
|
+
|
|
605
|
+
for param_name, param in sig.parameters.items():
|
|
606
|
+
if param_name in ("self", "cls", "user", "context"):
|
|
607
|
+
continue # Skip common non-argument parameters
|
|
608
|
+
|
|
609
|
+
# Determine type
|
|
610
|
+
type_hint = hints.get(param_name, Any)
|
|
611
|
+
param_type = self._python_type_to_schema_type(type_hint)
|
|
612
|
+
|
|
613
|
+
# Check if required
|
|
614
|
+
has_default = param.default is not inspect.Parameter.empty
|
|
615
|
+
default_value = param.default if has_default else None
|
|
616
|
+
|
|
617
|
+
if not has_default:
|
|
618
|
+
required.append(param_name)
|
|
619
|
+
|
|
620
|
+
parameters[param_name] = ParameterSchema(
|
|
621
|
+
name=param_name,
|
|
622
|
+
type=param_type,
|
|
623
|
+
description="",
|
|
624
|
+
default=default_value,
|
|
625
|
+
required=not has_default,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
return ToolSchema(
|
|
629
|
+
name=func.__name__,
|
|
630
|
+
description=func.__doc__ or "",
|
|
631
|
+
parameters=parameters,
|
|
632
|
+
required_parameters=required,
|
|
633
|
+
risk_level=risk_level,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
def _python_type_to_schema_type(self, type_hint: Any) -> str:
|
|
637
|
+
"""Convert Python type hint to schema type string."""
|
|
638
|
+
if type_hint is str:
|
|
639
|
+
return "str"
|
|
640
|
+
elif type_hint is int:
|
|
641
|
+
return "int"
|
|
642
|
+
elif type_hint is float:
|
|
643
|
+
return "float"
|
|
644
|
+
elif type_hint is bool:
|
|
645
|
+
return "bool"
|
|
646
|
+
elif type_hint is list:
|
|
647
|
+
return "list"
|
|
648
|
+
elif type_hint is dict:
|
|
649
|
+
return "dict"
|
|
650
|
+
else:
|
|
651
|
+
return "any"
|