kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control.py +740 -0
- kailash/api/__main__.py +6 -0
- kailash/api/auth.py +668 -0
- kailash/api/custom_nodes.py +285 -0
- kailash/api/custom_nodes_secure.py +377 -0
- kailash/api/database.py +620 -0
- kailash/api/studio.py +915 -0
- kailash/api/studio_secure.py +893 -0
- kailash/mcp/__init__.py +53 -0
- kailash/mcp/__main__.py +13 -0
- kailash/mcp/ai_registry_server.py +712 -0
- kailash/mcp/client.py +447 -0
- kailash/mcp/client_new.py +334 -0
- kailash/mcp/server.py +293 -0
- kailash/mcp/server_new.py +336 -0
- kailash/mcp/servers/__init__.py +12 -0
- kailash/mcp/servers/ai_registry.py +289 -0
- kailash/nodes/__init__.py +4 -2
- kailash/nodes/ai/__init__.py +38 -0
- kailash/nodes/ai/a2a.py +1790 -0
- kailash/nodes/ai/agents.py +116 -2
- kailash/nodes/ai/ai_providers.py +206 -8
- kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
- kailash/nodes/ai/iterative_llm_agent.py +1280 -0
- kailash/nodes/ai/llm_agent.py +324 -1
- kailash/nodes/ai/self_organizing.py +1623 -0
- kailash/nodes/api/http.py +106 -25
- kailash/nodes/api/rest.py +116 -21
- kailash/nodes/base.py +15 -2
- kailash/nodes/base_async.py +45 -0
- kailash/nodes/base_cycle_aware.py +374 -0
- kailash/nodes/base_with_acl.py +338 -0
- kailash/nodes/code/python.py +135 -27
- kailash/nodes/data/readers.py +116 -53
- kailash/nodes/data/writers.py +16 -6
- kailash/nodes/logic/__init__.py +8 -0
- kailash/nodes/logic/async_operations.py +48 -9
- kailash/nodes/logic/convergence.py +642 -0
- kailash/nodes/logic/loop.py +153 -0
- kailash/nodes/logic/operations.py +212 -27
- kailash/nodes/logic/workflow.py +26 -18
- kailash/nodes/mixins/__init__.py +11 -0
- kailash/nodes/mixins/mcp.py +228 -0
- kailash/nodes/mixins.py +387 -0
- kailash/nodes/transform/__init__.py +8 -1
- kailash/nodes/transform/processors.py +119 -4
- kailash/runtime/__init__.py +2 -1
- kailash/runtime/access_controlled.py +458 -0
- kailash/runtime/local.py +106 -33
- kailash/runtime/parallel_cyclic.py +529 -0
- kailash/sdk_exceptions.py +90 -5
- kailash/security.py +845 -0
- kailash/tracking/manager.py +38 -15
- kailash/tracking/models.py +1 -1
- kailash/tracking/storage/filesystem.py +30 -2
- kailash/utils/__init__.py +8 -0
- kailash/workflow/__init__.py +18 -0
- kailash/workflow/convergence.py +270 -0
- kailash/workflow/cycle_analyzer.py +768 -0
- kailash/workflow/cycle_builder.py +573 -0
- kailash/workflow/cycle_config.py +709 -0
- kailash/workflow/cycle_debugger.py +760 -0
- kailash/workflow/cycle_exceptions.py +601 -0
- kailash/workflow/cycle_profiler.py +671 -0
- kailash/workflow/cycle_state.py +338 -0
- kailash/workflow/cyclic_runner.py +985 -0
- kailash/workflow/graph.py +500 -39
- kailash/workflow/migration.py +768 -0
- kailash/workflow/safety.py +365 -0
- kailash/workflow/templates.py +744 -0
- kailash/workflow/validation.py +693 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
- kailash-0.2.0.dist-info/RECORD +125 -0
- kailash/nodes/mcp/__init__.py +0 -11
- kailash/nodes/mcp/client.py +0 -554
- kailash/nodes/mcp/resource.py +0 -682
- kailash/nodes/mcp/server.py +0 -577
- kailash-0.1.4.dist-info/RECORD +0 -85
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
kailash/security.py
ADDED
@@ -0,0 +1,845 @@
|
|
1
|
+
"""
|
2
|
+
Comprehensive Security Framework for the Kailash SDK.
|
3
|
+
|
4
|
+
This module provides an extensive security framework designed to protect against
|
5
|
+
common vulnerabilities and ensure safe execution of workflows, particularly in
|
6
|
+
cyclic patterns where long-running processes may be exposed to additional risks.
|
7
|
+
It implements defense-in-depth strategies with configurable policies.
|
8
|
+
|
9
|
+
Design Philosophy:
|
10
|
+
Implements a comprehensive security-first approach with fail-safe defaults,
|
11
|
+
defense-in-depth strategies, and extensive monitoring. Designed to protect
|
12
|
+
against both common web vulnerabilities and workflow-specific attack vectors
|
13
|
+
while maintaining usability and performance.
|
14
|
+
|
15
|
+
Key Security Features:
|
16
|
+
- **Path Security**: Comprehensive path traversal prevention
|
17
|
+
- **Input Validation**: Multi-layer input sanitization and validation
|
18
|
+
- **Execution Security**: Safe code execution with sandboxing
|
19
|
+
- **Resource Limits**: Memory, CPU, and execution time constraints
|
20
|
+
- **Injection Protection**: Command and code injection prevention
|
21
|
+
- **Audit Logging**: Comprehensive security event logging
|
22
|
+
|
23
|
+
Cycle Security Enhancements (v0.2.0):
|
24
|
+
Enhanced security specifically for cyclic workflows including:
|
25
|
+
- Long-running process monitoring and limits
|
26
|
+
- Iteration-based resource accumulation detection
|
27
|
+
- Parameter injection attack prevention in cycles
|
28
|
+
- State corruption detection and prevention
|
29
|
+
- Convergence manipulation attack detection
|
30
|
+
|
31
|
+
Security Layers:
|
32
|
+
1. **Input Layer**: Validation and sanitization of all inputs
|
33
|
+
2. **Execution Layer**: Sandboxed execution with resource limits
|
34
|
+
3. **File System Layer**: Controlled file access with path validation
|
35
|
+
4. **Network Layer**: Controlled external communication
|
36
|
+
5. **Monitoring Layer**: Real-time security event detection
|
37
|
+
|
38
|
+
Vulnerability Protection:
|
39
|
+
- **Path Traversal**: Comprehensive path validation and canonicalization
|
40
|
+
- **Command Injection**: Input sanitization and safe command execution
|
41
|
+
- **Code Injection**: AST validation and safe code execution
|
42
|
+
- **Resource Exhaustion**: Memory, CPU, and time limits
|
43
|
+
- **Information Disclosure**: Controlled error messages and logging
|
44
|
+
- **Privilege Escalation**: Sandboxed execution environments
|
45
|
+
|
46
|
+
Core Components:
|
47
|
+
- SecurityConfig: Centralized security policy configuration
|
48
|
+
- ValidationFramework: Multi-layer input validation system
|
49
|
+
- ExecutionSandbox: Safe code execution environment
|
50
|
+
- ResourceMonitor: Real-time resource usage monitoring
|
51
|
+
- AuditLogger: Comprehensive security event logging
|
52
|
+
|
53
|
+
Upstream Dependencies:
|
54
|
+
- Operating system security features for sandboxing
|
55
|
+
- Python security libraries for validation and monitoring
|
56
|
+
- Workflow execution framework for integration points
|
57
|
+
|
58
|
+
Downstream Consumers:
|
59
|
+
- All workflow execution components requiring security
|
60
|
+
- Node implementations with external resource access
|
61
|
+
- Runtime engines executing user-provided code
|
62
|
+
- API endpoints handling external workflow requests
|
63
|
+
|
64
|
+
Examples:
|
65
|
+
Basic security configuration:
|
66
|
+
|
67
|
+
>>> from kailash.security import SecurityConfig, validate_node_parameters
|
68
|
+
>>> # Configure security policy
|
69
|
+
>>> config = SecurityConfig(
|
70
|
+
... max_execution_time=300,
|
71
|
+
... max_memory_mb=1024,
|
72
|
+
... allowed_paths=["/safe/directory"]
|
73
|
+
... )
|
74
|
+
>>> # Validate node parameters
|
75
|
+
>>> validate_node_parameters(parameters, config)
|
76
|
+
|
77
|
+
Secure file operations:
|
78
|
+
|
79
|
+
>>> from kailash.security import safe_file_operation, validate_path
|
80
|
+
>>> # Validate and access file safely
|
81
|
+
>>> safe_path = validate_path("/user/input/path", base_dir="/safe/root")
|
82
|
+
>>> with safe_file_operation(safe_path, "r") as f:
|
83
|
+
... content = f.read()
|
84
|
+
|
85
|
+
Execution timeout protection:
|
86
|
+
|
87
|
+
>>> from kailash.security import execution_timeout
|
88
|
+
>>> @execution_timeout(seconds=30)
|
89
|
+
... def potentially_long_running_function():
|
90
|
+
... # Function will be terminated if it runs longer than 30 seconds
|
91
|
+
... return process_data()
|
92
|
+
|
93
|
+
Comprehensive monitoring:
|
94
|
+
|
95
|
+
>>> from kailash.security import SecurityMonitor
|
96
|
+
>>> monitor = SecurityMonitor()
|
97
|
+
>>> with monitor.track_execution("workflow_execution"):
|
98
|
+
... # All security events will be monitored and logged
|
99
|
+
... runtime.execute(workflow)
|
100
|
+
|
101
|
+
Security Policies:
|
102
|
+
Configurable security policies allow adaptation to different environments:
|
103
|
+
- **Development**: Relaxed policies for debugging and testing
|
104
|
+
- **Staging**: Moderate policies balancing security and functionality
|
105
|
+
- **Production**: Strict policies prioritizing security
|
106
|
+
- **High-Security**: Maximum security for sensitive environments
|
107
|
+
|
108
|
+
See Also:
|
109
|
+
- :mod:`kailash.nodes.code.python` for secure code execution
|
110
|
+
- :mod:`kailash.workflow.safety` for workflow-specific safety measures
|
111
|
+
- :doc:`/guides/security` for comprehensive security best practices
|
112
|
+
"""
|
113
|
+
|
114
|
+
import logging
|
115
|
+
import os
|
116
|
+
import re
|
117
|
+
import tempfile
|
118
|
+
import time
|
119
|
+
from contextlib import contextmanager
|
120
|
+
from pathlib import Path
|
121
|
+
from typing import Any, Dict, List, Optional, Union
|
122
|
+
|
123
|
+
logger = logging.getLogger(__name__)
|
124
|
+
|
125
|
+
|
126
|
+
class SecurityError(Exception):
|
127
|
+
"""Raised when a security policy violation is detected."""
|
128
|
+
|
129
|
+
pass
|
130
|
+
|
131
|
+
|
132
|
+
class PathTraversalError(SecurityError):
|
133
|
+
"""Raised when path traversal attempt is detected."""
|
134
|
+
|
135
|
+
pass
|
136
|
+
|
137
|
+
|
138
|
+
class CommandInjectionError(SecurityError):
|
139
|
+
"""Raised when command injection attempt is detected."""
|
140
|
+
|
141
|
+
pass
|
142
|
+
|
143
|
+
|
144
|
+
class ExecutionTimeoutError(SecurityError):
|
145
|
+
"""Raised when execution exceeds allowed time limit."""
|
146
|
+
|
147
|
+
pass
|
148
|
+
|
149
|
+
|
150
|
+
class MemoryLimitError(SecurityError):
|
151
|
+
"""Raised when memory usage exceeds allowed limit."""
|
152
|
+
|
153
|
+
pass
|
154
|
+
|
155
|
+
|
156
|
+
class SecurityConfig:
|
157
|
+
"""Configuration for security policies and limits."""
|
158
|
+
|
159
|
+
def __init__(
|
160
|
+
self,
|
161
|
+
allowed_directories: Optional[List[str]] = None,
|
162
|
+
max_file_size: int = 100 * 1024 * 1024, # 100MB
|
163
|
+
execution_timeout: float = 300.0, # 5 minutes
|
164
|
+
memory_limit: int = 512 * 1024 * 1024, # 512MB
|
165
|
+
allowed_file_extensions: Optional[List[str]] = None,
|
166
|
+
enable_audit_logging: bool = True,
|
167
|
+
enable_path_validation: bool = True,
|
168
|
+
enable_command_validation: bool = True,
|
169
|
+
):
|
170
|
+
"""
|
171
|
+
Initialize security configuration.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
allowed_directories: List of directories where file operations are permitted
|
175
|
+
max_file_size: Maximum file size in bytes
|
176
|
+
execution_timeout: Maximum execution time in seconds
|
177
|
+
memory_limit: Maximum memory usage in bytes
|
178
|
+
allowed_file_extensions: List of allowed file extensions
|
179
|
+
enable_audit_logging: Whether to log security events
|
180
|
+
enable_path_validation: Whether to validate file paths
|
181
|
+
enable_command_validation: Whether to validate command strings
|
182
|
+
"""
|
183
|
+
default_dirs = [
|
184
|
+
os.path.expanduser("~/.kailash"),
|
185
|
+
tempfile.gettempdir(), # Allow all temp directories
|
186
|
+
os.getcwd(),
|
187
|
+
"/tmp", # Unix temp directory
|
188
|
+
"/var/tmp", # Unix temp directory
|
189
|
+
]
|
190
|
+
|
191
|
+
# Check for additional allowed directories from environment
|
192
|
+
env_dirs = os.environ.get("KAILASH_ALLOWED_DIRS", "")
|
193
|
+
if env_dirs:
|
194
|
+
for dir_path in env_dirs.split(":"):
|
195
|
+
if dir_path and os.path.isdir(dir_path):
|
196
|
+
default_dirs.append(os.path.abspath(dir_path))
|
197
|
+
|
198
|
+
self.allowed_directories = allowed_directories or default_dirs
|
199
|
+
self.max_file_size = max_file_size
|
200
|
+
self.execution_timeout = execution_timeout
|
201
|
+
self.memory_limit = memory_limit
|
202
|
+
self.allowed_file_extensions = allowed_file_extensions or [
|
203
|
+
".txt",
|
204
|
+
".csv",
|
205
|
+
".tsv",
|
206
|
+
".json",
|
207
|
+
".yaml",
|
208
|
+
".yml",
|
209
|
+
".py",
|
210
|
+
".md",
|
211
|
+
".xml",
|
212
|
+
".log",
|
213
|
+
".dat",
|
214
|
+
".conf",
|
215
|
+
".cfg",
|
216
|
+
".ini",
|
217
|
+
".properties",
|
218
|
+
".html",
|
219
|
+
".htm",
|
220
|
+
".xhtml",
|
221
|
+
".jsonl",
|
222
|
+
".ndjson",
|
223
|
+
]
|
224
|
+
self.enable_audit_logging = enable_audit_logging
|
225
|
+
self.enable_path_validation = enable_path_validation
|
226
|
+
self.enable_command_validation = enable_command_validation
|
227
|
+
|
228
|
+
|
229
|
+
# Global security configuration
|
230
|
+
_security_config = SecurityConfig()
|
231
|
+
|
232
|
+
|
233
|
+
def get_security_config() -> SecurityConfig:
|
234
|
+
"""Get the current security configuration."""
|
235
|
+
return _security_config
|
236
|
+
|
237
|
+
|
238
|
+
def set_security_config(config: SecurityConfig) -> None:
|
239
|
+
"""Set the global security configuration."""
|
240
|
+
global _security_config
|
241
|
+
_security_config = config
|
242
|
+
|
243
|
+
|
244
|
+
def validate_file_path(
|
245
|
+
file_path: Union[str, Path],
|
246
|
+
config: Optional[SecurityConfig] = None,
|
247
|
+
operation: str = "access",
|
248
|
+
) -> Path:
|
249
|
+
"""
|
250
|
+
Validate and sanitize file paths to prevent traversal attacks.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
file_path: The file path to validate
|
254
|
+
config: Security configuration (uses global if None)
|
255
|
+
operation: Description of the operation for logging
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Validated and normalized Path object
|
259
|
+
|
260
|
+
Raises:
|
261
|
+
PathTraversalError: If path traversal attempt is detected
|
262
|
+
SecurityError: If path is outside allowed directories
|
263
|
+
|
264
|
+
Examples:
|
265
|
+
>>> # Safe paths
|
266
|
+
>>> validate_file_path("data/file.txt")
|
267
|
+
PosixPath('data/file.txt')
|
268
|
+
|
269
|
+
>>> # Blocked paths
|
270
|
+
>>> validate_file_path("../../../etc/passwd")
|
271
|
+
Traceback (most recent call last):
|
272
|
+
PathTraversalError: Path traversal attempt detected
|
273
|
+
"""
|
274
|
+
if config is None:
|
275
|
+
config = get_security_config()
|
276
|
+
|
277
|
+
if not config.enable_path_validation:
|
278
|
+
return Path(file_path)
|
279
|
+
|
280
|
+
try:
|
281
|
+
# Convert to Path and resolve to absolute path
|
282
|
+
path = Path(file_path).resolve()
|
283
|
+
|
284
|
+
# Check for path traversal indicators
|
285
|
+
path_str = str(path)
|
286
|
+
if ".." in str(file_path):
|
287
|
+
if config.enable_audit_logging:
|
288
|
+
logger.warning(
|
289
|
+
f"Path traversal attempt detected: {file_path} -> {path}"
|
290
|
+
)
|
291
|
+
raise PathTraversalError(f"Path traversal attempt detected: {file_path}")
|
292
|
+
|
293
|
+
# Check for access to sensitive system directories
|
294
|
+
sensitive_dirs = ["/etc", "/var", "/usr", "/root", "/boot", "/sys", "/proc"]
|
295
|
+
if any(path_str.startswith(sensitive) for sensitive in sensitive_dirs):
|
296
|
+
if config.enable_audit_logging:
|
297
|
+
logger.warning(
|
298
|
+
f"Path traversal attempt detected: {file_path} -> {path}"
|
299
|
+
)
|
300
|
+
raise PathTraversalError(f"Path traversal attempt detected: {file_path}")
|
301
|
+
|
302
|
+
# Validate file extension
|
303
|
+
if path.suffix and path.suffix.lower() not in config.allowed_file_extensions:
|
304
|
+
if config.enable_audit_logging:
|
305
|
+
logger.warning(f"File extension not allowed: {path.suffix} in {path}")
|
306
|
+
raise SecurityError(f"File extension not allowed: {path.suffix}")
|
307
|
+
|
308
|
+
# Check if path is within allowed directories
|
309
|
+
path_in_allowed_dir = False
|
310
|
+
for allowed_dir in config.allowed_directories:
|
311
|
+
try:
|
312
|
+
allowed_path = Path(allowed_dir).resolve()
|
313
|
+
# Use more robust relative path checking
|
314
|
+
try:
|
315
|
+
path.relative_to(allowed_path)
|
316
|
+
path_in_allowed_dir = True
|
317
|
+
break
|
318
|
+
except ValueError:
|
319
|
+
# Try alternative method for compatibility
|
320
|
+
if str(path).startswith(str(allowed_path)):
|
321
|
+
path_in_allowed_dir = True
|
322
|
+
break
|
323
|
+
except (ValueError, OSError):
|
324
|
+
# Handle cases where path resolution fails
|
325
|
+
if str(path).startswith(str(allowed_dir)):
|
326
|
+
path_in_allowed_dir = True
|
327
|
+
break
|
328
|
+
|
329
|
+
if not path_in_allowed_dir:
|
330
|
+
if config.enable_audit_logging:
|
331
|
+
logger.warning(f"Path outside allowed directories: {path}")
|
332
|
+
raise SecurityError(f"Path outside allowed directories: {path}")
|
333
|
+
|
334
|
+
if config.enable_audit_logging:
|
335
|
+
logger.info(f"File path validated for {operation}: {path}")
|
336
|
+
|
337
|
+
return path
|
338
|
+
|
339
|
+
except (OSError, ValueError) as e:
|
340
|
+
if config.enable_audit_logging:
|
341
|
+
logger.error(f"Path validation error: {e}")
|
342
|
+
raise SecurityError(f"Invalid file path: {file_path}")
|
343
|
+
|
344
|
+
|
345
|
+
def safe_open(
|
346
|
+
file_path: Union[str, Path],
|
347
|
+
mode: str = "r",
|
348
|
+
config: Optional[SecurityConfig] = None,
|
349
|
+
**kwargs,
|
350
|
+
):
|
351
|
+
"""
|
352
|
+
Safely open a file with security validation.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
file_path: Path to the file
|
356
|
+
mode: File open mode
|
357
|
+
config: Security configuration
|
358
|
+
**kwargs: Additional arguments for open()
|
359
|
+
|
360
|
+
Returns:
|
361
|
+
File handle
|
362
|
+
|
363
|
+
Raises:
|
364
|
+
SecurityError: If security validation fails
|
365
|
+
|
366
|
+
Examples:
|
367
|
+
>>> with safe_open("data/file.txt", "r") as f:
|
368
|
+
... content = f.read()
|
369
|
+
"""
|
370
|
+
if config is None:
|
371
|
+
config = get_security_config()
|
372
|
+
|
373
|
+
# Validate the file path
|
374
|
+
validated_path = validate_file_path(file_path, config, f"open({mode})")
|
375
|
+
|
376
|
+
# Check file size for read operations
|
377
|
+
if "r" in mode and validated_path.exists():
|
378
|
+
file_size = validated_path.stat().st_size
|
379
|
+
if file_size > config.max_file_size:
|
380
|
+
raise SecurityError(
|
381
|
+
f"File too large: {file_size} bytes > {config.max_file_size}"
|
382
|
+
)
|
383
|
+
|
384
|
+
# Create directory if writing and it doesn't exist
|
385
|
+
if "w" in mode or "a" in mode:
|
386
|
+
validated_path.parent.mkdir(parents=True, exist_ok=True)
|
387
|
+
|
388
|
+
if config.enable_audit_logging:
|
389
|
+
logger.info(f"Opening file: {validated_path} (mode: {mode})")
|
390
|
+
|
391
|
+
return open(validated_path, mode, **kwargs)
|
392
|
+
|
393
|
+
|
394
|
+
def validate_command_string(
|
395
|
+
command: str, config: Optional[SecurityConfig] = None
|
396
|
+
) -> str:
|
397
|
+
"""
|
398
|
+
Validate command strings to prevent injection attacks.
|
399
|
+
|
400
|
+
Args:
|
401
|
+
command: Command string to validate
|
402
|
+
config: Security configuration
|
403
|
+
|
404
|
+
Returns:
|
405
|
+
Validated command string
|
406
|
+
|
407
|
+
Raises:
|
408
|
+
CommandInjectionError: If command injection attempt is detected
|
409
|
+
"""
|
410
|
+
if config is None:
|
411
|
+
config = get_security_config()
|
412
|
+
|
413
|
+
if not config.enable_command_validation:
|
414
|
+
return command
|
415
|
+
|
416
|
+
# Check for common injection patterns
|
417
|
+
dangerous_patterns = [
|
418
|
+
r";", # Command chaining
|
419
|
+
r"&&", # Logical AND command chaining
|
420
|
+
r"\|\|", # Logical OR command chaining
|
421
|
+
r"\|", # Pipe operations
|
422
|
+
r"\$\(", # Command substitution
|
423
|
+
r"`.*`", # Backtick command substitution
|
424
|
+
r">\s*/dev/", # Redirect to devices
|
425
|
+
r"<.*>", # Input/output redirection
|
426
|
+
r"\beval\b", # eval command
|
427
|
+
r"\bexec\b", # exec command
|
428
|
+
r"rm\s+.*(\/|\*)", # rm with dangerous paths
|
429
|
+
r"cat\s+\/etc\/", # reading system files
|
430
|
+
]
|
431
|
+
|
432
|
+
for pattern in dangerous_patterns:
|
433
|
+
if re.search(pattern, command, re.IGNORECASE):
|
434
|
+
if config.enable_audit_logging:
|
435
|
+
logger.warning(f"Command injection attempt detected: {command}")
|
436
|
+
raise CommandInjectionError(f"Potentially dangerous command: {command}")
|
437
|
+
|
438
|
+
if config.enable_audit_logging:
|
439
|
+
logger.info(
|
440
|
+
f"Command validated: {command[:100]}{'...' if len(command) > 100 else ''}"
|
441
|
+
)
|
442
|
+
|
443
|
+
return command
|
444
|
+
|
445
|
+
|
446
|
+
@contextmanager
|
447
|
+
def execution_timeout(
|
448
|
+
timeout: Optional[float] = None, config: Optional[SecurityConfig] = None
|
449
|
+
):
|
450
|
+
"""
|
451
|
+
Context manager to enforce execution timeouts.
|
452
|
+
|
453
|
+
Args:
|
454
|
+
timeout: Timeout in seconds (uses config default if None)
|
455
|
+
config: Security configuration
|
456
|
+
|
457
|
+
Raises:
|
458
|
+
ExecutionTimeoutError: If execution exceeds timeout
|
459
|
+
|
460
|
+
Examples:
|
461
|
+
>>> with execution_timeout(30.0):
|
462
|
+
... # Code that should complete within 30 seconds
|
463
|
+
... time.sleep(5)
|
464
|
+
"""
|
465
|
+
if config is None:
|
466
|
+
config = get_security_config()
|
467
|
+
|
468
|
+
if timeout is None:
|
469
|
+
timeout = config.execution_timeout
|
470
|
+
|
471
|
+
start_time = time.time()
|
472
|
+
|
473
|
+
try:
|
474
|
+
yield
|
475
|
+
finally:
|
476
|
+
elapsed_time = time.time() - start_time
|
477
|
+
if elapsed_time > timeout:
|
478
|
+
if config.enable_audit_logging:
|
479
|
+
logger.warning(f"Execution timeout: {elapsed_time:.2f}s > {timeout}s")
|
480
|
+
raise ExecutionTimeoutError(
|
481
|
+
f"Execution timeout: {elapsed_time:.2f}s > {timeout}s"
|
482
|
+
)
|
483
|
+
|
484
|
+
|
485
|
+
def sanitize_input(
|
486
|
+
value: Any,
|
487
|
+
max_length: int = 10000,
|
488
|
+
allowed_types: Optional[List[type]] = None,
|
489
|
+
config: Optional[SecurityConfig] = None,
|
490
|
+
) -> Any:
|
491
|
+
"""
|
492
|
+
Sanitize input values to prevent injection attacks.
|
493
|
+
|
494
|
+
Args:
|
495
|
+
value: Value to sanitize
|
496
|
+
max_length: Maximum string length
|
497
|
+
allowed_types: List of allowed types
|
498
|
+
config: Security configuration
|
499
|
+
|
500
|
+
Returns:
|
501
|
+
Sanitized value
|
502
|
+
|
503
|
+
Raises:
|
504
|
+
SecurityError: If input fails validation
|
505
|
+
"""
|
506
|
+
if config is None:
|
507
|
+
config = get_security_config()
|
508
|
+
|
509
|
+
if allowed_types is None:
|
510
|
+
allowed_types = [str, int, float, bool, list, dict, tuple, set, type(None)]
|
511
|
+
|
512
|
+
# Core data science types
|
513
|
+
try:
|
514
|
+
import pandas as pd
|
515
|
+
|
516
|
+
allowed_types.extend(
|
517
|
+
[
|
518
|
+
pd.DataFrame,
|
519
|
+
pd.Series,
|
520
|
+
pd.Index,
|
521
|
+
pd.MultiIndex,
|
522
|
+
pd.Categorical,
|
523
|
+
pd.Timestamp,
|
524
|
+
pd.Timedelta,
|
525
|
+
pd.Period,
|
526
|
+
pd.DatetimeIndex,
|
527
|
+
pd.TimedeltaIndex,
|
528
|
+
pd.PeriodIndex,
|
529
|
+
]
|
530
|
+
)
|
531
|
+
except ImportError:
|
532
|
+
pass
|
533
|
+
|
534
|
+
try:
|
535
|
+
import numpy as np
|
536
|
+
|
537
|
+
numpy_types = [
|
538
|
+
np.ndarray,
|
539
|
+
np.ma.MaskedArray,
|
540
|
+
# All numpy scalar types
|
541
|
+
np.int8,
|
542
|
+
np.int16,
|
543
|
+
np.int32,
|
544
|
+
np.int64,
|
545
|
+
np.uint8,
|
546
|
+
np.uint16,
|
547
|
+
np.uint32,
|
548
|
+
np.uint64,
|
549
|
+
np.float16,
|
550
|
+
np.float32,
|
551
|
+
np.float64,
|
552
|
+
np.complex64,
|
553
|
+
np.complex128,
|
554
|
+
np.bool_,
|
555
|
+
np.object_,
|
556
|
+
np.datetime64,
|
557
|
+
np.timedelta64,
|
558
|
+
]
|
559
|
+
|
560
|
+
# Add matrix if available (deprecated in NumPy 2.0)
|
561
|
+
if hasattr(np, "matrix"):
|
562
|
+
numpy_types.append(np.matrix)
|
563
|
+
|
564
|
+
# Handle NumPy version differences
|
565
|
+
if hasattr(np, "string_"):
|
566
|
+
numpy_types.append(np.string_)
|
567
|
+
elif hasattr(np, "bytes_"):
|
568
|
+
numpy_types.append(np.bytes_)
|
569
|
+
|
570
|
+
if hasattr(np, "unicode_"):
|
571
|
+
numpy_types.append(np.unicode_)
|
572
|
+
elif hasattr(np, "str_"):
|
573
|
+
numpy_types.append(np.str_)
|
574
|
+
|
575
|
+
# Add platform-specific types if available
|
576
|
+
if hasattr(np, "float128"):
|
577
|
+
numpy_types.append(np.float128)
|
578
|
+
if hasattr(np, "complex256"):
|
579
|
+
numpy_types.append(np.complex256)
|
580
|
+
|
581
|
+
# Add generic numpy type to catch all numpy scalars
|
582
|
+
if hasattr(np, "generic"):
|
583
|
+
numpy_types.append(np.generic)
|
584
|
+
|
585
|
+
allowed_types.extend(numpy_types)
|
586
|
+
except ImportError:
|
587
|
+
pass
|
588
|
+
|
589
|
+
# Deep learning frameworks
|
590
|
+
try:
|
591
|
+
import torch
|
592
|
+
|
593
|
+
allowed_types.extend(
|
594
|
+
[
|
595
|
+
torch.Tensor,
|
596
|
+
torch.nn.Module,
|
597
|
+
torch.nn.Parameter,
|
598
|
+
torch.cuda.FloatTensor,
|
599
|
+
torch.cuda.DoubleTensor,
|
600
|
+
torch.cuda.IntTensor,
|
601
|
+
torch.cuda.LongTensor,
|
602
|
+
]
|
603
|
+
)
|
604
|
+
except ImportError:
|
605
|
+
pass
|
606
|
+
|
607
|
+
try:
|
608
|
+
import tensorflow as tf
|
609
|
+
|
610
|
+
allowed_types.extend(
|
611
|
+
[
|
612
|
+
tf.Tensor,
|
613
|
+
tf.Variable,
|
614
|
+
tf.constant,
|
615
|
+
tf.keras.Model,
|
616
|
+
tf.keras.layers.Layer,
|
617
|
+
tf.data.Dataset,
|
618
|
+
]
|
619
|
+
)
|
620
|
+
except ImportError:
|
621
|
+
pass
|
622
|
+
|
623
|
+
# Scientific computing
|
624
|
+
try:
|
625
|
+
import scipy.sparse
|
626
|
+
|
627
|
+
allowed_types.extend(
|
628
|
+
[
|
629
|
+
scipy.sparse.csr_matrix,
|
630
|
+
scipy.sparse.csc_matrix,
|
631
|
+
scipy.sparse.coo_matrix,
|
632
|
+
scipy.sparse.dia_matrix,
|
633
|
+
scipy.sparse.dok_matrix,
|
634
|
+
scipy.sparse.lil_matrix,
|
635
|
+
]
|
636
|
+
)
|
637
|
+
except ImportError:
|
638
|
+
pass
|
639
|
+
|
640
|
+
# Machine learning frameworks
|
641
|
+
try:
|
642
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
643
|
+
|
644
|
+
allowed_types.extend([BaseEstimator, TransformerMixin])
|
645
|
+
except ImportError:
|
646
|
+
pass
|
647
|
+
|
648
|
+
try:
|
649
|
+
import xgboost as xgb
|
650
|
+
|
651
|
+
allowed_types.extend([xgb.DMatrix, xgb.Booster])
|
652
|
+
except ImportError:
|
653
|
+
pass
|
654
|
+
|
655
|
+
try:
|
656
|
+
import lightgbm as lgb
|
657
|
+
|
658
|
+
allowed_types.extend([lgb.Dataset, lgb.Booster])
|
659
|
+
except ImportError:
|
660
|
+
pass
|
661
|
+
|
662
|
+
# Data visualization
|
663
|
+
try:
|
664
|
+
from matplotlib.axes import Axes
|
665
|
+
from matplotlib.figure import Figure
|
666
|
+
|
667
|
+
allowed_types.extend([Figure, Axes])
|
668
|
+
except ImportError:
|
669
|
+
pass
|
670
|
+
|
671
|
+
try:
|
672
|
+
import plotly.graph_objects as go
|
673
|
+
|
674
|
+
allowed_types.append(go.Figure)
|
675
|
+
except ImportError:
|
676
|
+
pass
|
677
|
+
|
678
|
+
# Statistical modeling
|
679
|
+
try:
|
680
|
+
import statsmodels.api as sm
|
681
|
+
|
682
|
+
allowed_types.extend([sm.OLS, sm.GLM, sm.GLS, sm.WLS, sm.RegressionResults])
|
683
|
+
except ImportError:
|
684
|
+
pass
|
685
|
+
|
686
|
+
# Image processing
|
687
|
+
try:
|
688
|
+
from PIL import Image
|
689
|
+
|
690
|
+
allowed_types.append(Image.Image)
|
691
|
+
except ImportError:
|
692
|
+
pass
|
693
|
+
|
694
|
+
try:
|
695
|
+
# OpenCV uses numpy arrays, already covered
|
696
|
+
import cv2 # noqa: F401
|
697
|
+
except ImportError:
|
698
|
+
pass
|
699
|
+
|
700
|
+
# NLP libraries
|
701
|
+
try:
|
702
|
+
from spacy.tokens import Doc, Span, Token
|
703
|
+
|
704
|
+
allowed_types.extend([Doc, Span, Token])
|
705
|
+
except ImportError:
|
706
|
+
pass
|
707
|
+
|
708
|
+
# Graph/Network analysis
|
709
|
+
try:
|
710
|
+
import networkx as nx
|
711
|
+
|
712
|
+
allowed_types.extend([nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph])
|
713
|
+
except ImportError:
|
714
|
+
pass
|
715
|
+
|
716
|
+
# Time series
|
717
|
+
try:
|
718
|
+
from prophet import Prophet
|
719
|
+
from prophet.forecaster import Prophet as ProphetModel
|
720
|
+
|
721
|
+
allowed_types.extend([Prophet, ProphetModel])
|
722
|
+
except ImportError:
|
723
|
+
pass
|
724
|
+
|
725
|
+
# Type validation - allow data science types
|
726
|
+
type_allowed = any(isinstance(value, t) for t in allowed_types)
|
727
|
+
|
728
|
+
# Additional check for numpy scalar types
|
729
|
+
if not type_allowed:
|
730
|
+
try:
|
731
|
+
import numpy as np
|
732
|
+
|
733
|
+
# Check if it's any numpy type
|
734
|
+
if isinstance(value, np.generic):
|
735
|
+
type_allowed = True
|
736
|
+
except ImportError:
|
737
|
+
pass
|
738
|
+
|
739
|
+
if not type_allowed:
|
740
|
+
raise SecurityError(f"Input type not allowed: {type(value)}")
|
741
|
+
|
742
|
+
# String sanitization
|
743
|
+
if isinstance(value, str):
|
744
|
+
if len(value) > max_length:
|
745
|
+
raise SecurityError(f"Input too long: {len(value)} > {max_length}")
|
746
|
+
|
747
|
+
# Remove potentially dangerous characters and patterns
|
748
|
+
sanitized = re.sub(r"[<>;&|`$()]", "", value)
|
749
|
+
# Remove script tags and javascript
|
750
|
+
sanitized = re.sub(
|
751
|
+
r"<script.*?</script>", "", sanitized, flags=re.IGNORECASE | re.DOTALL
|
752
|
+
)
|
753
|
+
sanitized = re.sub(r"javascript:", "", sanitized, flags=re.IGNORECASE)
|
754
|
+
|
755
|
+
if sanitized != value and config.enable_audit_logging:
|
756
|
+
logger.warning(f"Input sanitized: {value[:50]}... -> {sanitized[:50]}...")
|
757
|
+
|
758
|
+
return sanitized
|
759
|
+
|
760
|
+
# Dictionary sanitization (recursive)
|
761
|
+
if isinstance(value, dict):
|
762
|
+
return {
|
763
|
+
sanitize_input(k, max_length, allowed_types, config): sanitize_input(
|
764
|
+
v, max_length, allowed_types, config
|
765
|
+
)
|
766
|
+
for k, v in value.items()
|
767
|
+
}
|
768
|
+
|
769
|
+
# List sanitization (recursive)
|
770
|
+
if isinstance(value, list):
|
771
|
+
return [
|
772
|
+
sanitize_input(item, max_length, allowed_types, config) for item in value
|
773
|
+
]
|
774
|
+
|
775
|
+
return value
|
776
|
+
|
777
|
+
|
778
|
+
def create_secure_temp_dir(
|
779
|
+
prefix: str = "kailash_", config: Optional[SecurityConfig] = None
|
780
|
+
) -> Path:
|
781
|
+
"""
|
782
|
+
Create a secure temporary directory.
|
783
|
+
|
784
|
+
Args:
|
785
|
+
prefix: Prefix for the directory name
|
786
|
+
config: Security configuration
|
787
|
+
|
788
|
+
Returns:
|
789
|
+
Path to the secure temporary directory
|
790
|
+
"""
|
791
|
+
if config is None:
|
792
|
+
config = get_security_config()
|
793
|
+
|
794
|
+
# Create temp directory with secure permissions
|
795
|
+
temp_dir = Path(tempfile.mkdtemp(prefix=prefix))
|
796
|
+
|
797
|
+
# Set restrictive permissions (owner only)
|
798
|
+
temp_dir.chmod(0o700)
|
799
|
+
|
800
|
+
if config.enable_audit_logging:
|
801
|
+
logger.info(f"Created secure temp directory: {temp_dir}")
|
802
|
+
|
803
|
+
return temp_dir
|
804
|
+
|
805
|
+
|
806
|
+
def validate_node_parameters(
|
807
|
+
parameters: Dict[str, Any], config: Optional[SecurityConfig] = None
|
808
|
+
) -> Dict[str, Any]:
|
809
|
+
"""
|
810
|
+
Validate and sanitize node parameters.
|
811
|
+
|
812
|
+
Args:
|
813
|
+
parameters: Node parameters to validate
|
814
|
+
config: Security configuration
|
815
|
+
|
816
|
+
Returns:
|
817
|
+
Validated and sanitized parameters
|
818
|
+
|
819
|
+
Raises:
|
820
|
+
SecurityError: If parameters fail validation
|
821
|
+
"""
|
822
|
+
if config is None:
|
823
|
+
config = get_security_config()
|
824
|
+
|
825
|
+
validated_params = {}
|
826
|
+
|
827
|
+
for key, value in parameters.items():
|
828
|
+
# Sanitize parameter key
|
829
|
+
clean_key = sanitize_input(key, config=config)
|
830
|
+
|
831
|
+
# Special handling for file paths
|
832
|
+
if "path" in key.lower() or "file" in key.lower():
|
833
|
+
if isinstance(value, (str, Path)):
|
834
|
+
validated_value = validate_file_path(value, config, f"parameter {key}")
|
835
|
+
else:
|
836
|
+
validated_value = sanitize_input(value, config=config)
|
837
|
+
else:
|
838
|
+
validated_value = sanitize_input(value, config=config)
|
839
|
+
|
840
|
+
validated_params[clean_key] = validated_value
|
841
|
+
|
842
|
+
if config.enable_audit_logging:
|
843
|
+
logger.info(f"Node parameters validated: {list(validated_params.keys())}")
|
844
|
+
|
845
|
+
return validated_params
|