kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control.py +740 -0
  3. kailash/api/__main__.py +6 -0
  4. kailash/api/auth.py +668 -0
  5. kailash/api/custom_nodes.py +285 -0
  6. kailash/api/custom_nodes_secure.py +377 -0
  7. kailash/api/database.py +620 -0
  8. kailash/api/studio.py +915 -0
  9. kailash/api/studio_secure.py +893 -0
  10. kailash/mcp/__init__.py +53 -0
  11. kailash/mcp/__main__.py +13 -0
  12. kailash/mcp/ai_registry_server.py +712 -0
  13. kailash/mcp/client.py +447 -0
  14. kailash/mcp/client_new.py +334 -0
  15. kailash/mcp/server.py +293 -0
  16. kailash/mcp/server_new.py +336 -0
  17. kailash/mcp/servers/__init__.py +12 -0
  18. kailash/mcp/servers/ai_registry.py +289 -0
  19. kailash/nodes/__init__.py +4 -2
  20. kailash/nodes/ai/__init__.py +38 -0
  21. kailash/nodes/ai/a2a.py +1790 -0
  22. kailash/nodes/ai/agents.py +116 -2
  23. kailash/nodes/ai/ai_providers.py +206 -8
  24. kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
  25. kailash/nodes/ai/iterative_llm_agent.py +1280 -0
  26. kailash/nodes/ai/llm_agent.py +324 -1
  27. kailash/nodes/ai/self_organizing.py +1623 -0
  28. kailash/nodes/api/http.py +106 -25
  29. kailash/nodes/api/rest.py +116 -21
  30. kailash/nodes/base.py +15 -2
  31. kailash/nodes/base_async.py +45 -0
  32. kailash/nodes/base_cycle_aware.py +374 -0
  33. kailash/nodes/base_with_acl.py +338 -0
  34. kailash/nodes/code/python.py +135 -27
  35. kailash/nodes/data/readers.py +116 -53
  36. kailash/nodes/data/writers.py +16 -6
  37. kailash/nodes/logic/__init__.py +8 -0
  38. kailash/nodes/logic/async_operations.py +48 -9
  39. kailash/nodes/logic/convergence.py +642 -0
  40. kailash/nodes/logic/loop.py +153 -0
  41. kailash/nodes/logic/operations.py +212 -27
  42. kailash/nodes/logic/workflow.py +26 -18
  43. kailash/nodes/mixins/__init__.py +11 -0
  44. kailash/nodes/mixins/mcp.py +228 -0
  45. kailash/nodes/mixins.py +387 -0
  46. kailash/nodes/transform/__init__.py +8 -1
  47. kailash/nodes/transform/processors.py +119 -4
  48. kailash/runtime/__init__.py +2 -1
  49. kailash/runtime/access_controlled.py +458 -0
  50. kailash/runtime/local.py +106 -33
  51. kailash/runtime/parallel_cyclic.py +529 -0
  52. kailash/sdk_exceptions.py +90 -5
  53. kailash/security.py +845 -0
  54. kailash/tracking/manager.py +38 -15
  55. kailash/tracking/models.py +1 -1
  56. kailash/tracking/storage/filesystem.py +30 -2
  57. kailash/utils/__init__.py +8 -0
  58. kailash/workflow/__init__.py +18 -0
  59. kailash/workflow/convergence.py +270 -0
  60. kailash/workflow/cycle_analyzer.py +768 -0
  61. kailash/workflow/cycle_builder.py +573 -0
  62. kailash/workflow/cycle_config.py +709 -0
  63. kailash/workflow/cycle_debugger.py +760 -0
  64. kailash/workflow/cycle_exceptions.py +601 -0
  65. kailash/workflow/cycle_profiler.py +671 -0
  66. kailash/workflow/cycle_state.py +338 -0
  67. kailash/workflow/cyclic_runner.py +985 -0
  68. kailash/workflow/graph.py +500 -39
  69. kailash/workflow/migration.py +768 -0
  70. kailash/workflow/safety.py +365 -0
  71. kailash/workflow/templates.py +744 -0
  72. kailash/workflow/validation.py +693 -0
  73. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
  74. kailash-0.2.0.dist-info/RECORD +125 -0
  75. kailash/nodes/mcp/__init__.py +0 -11
  76. kailash/nodes/mcp/client.py +0 -554
  77. kailash/nodes/mcp/resource.py +0 -682
  78. kailash/nodes/mcp/server.py +0 -577
  79. kailash-0.1.4.dist-info/RECORD +0 -85
  80. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
  81. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
  82. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
  83. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
kailash/security.py ADDED
@@ -0,0 +1,845 @@
1
+ """
2
+ Comprehensive Security Framework for the Kailash SDK.
3
+
4
+ This module provides an extensive security framework designed to protect against
5
+ common vulnerabilities and ensure safe execution of workflows, particularly in
6
+ cyclic patterns where long-running processes may be exposed to additional risks.
7
+ It implements defense-in-depth strategies with configurable policies.
8
+
9
+ Design Philosophy:
10
+ Implements a comprehensive security-first approach with fail-safe defaults,
11
+ defense-in-depth strategies, and extensive monitoring. Designed to protect
12
+ against both common web vulnerabilities and workflow-specific attack vectors
13
+ while maintaining usability and performance.
14
+
15
+ Key Security Features:
16
+ - **Path Security**: Comprehensive path traversal prevention
17
+ - **Input Validation**: Multi-layer input sanitization and validation
18
+ - **Execution Security**: Safe code execution with sandboxing
19
+ - **Resource Limits**: Memory, CPU, and execution time constraints
20
+ - **Injection Protection**: Command and code injection prevention
21
+ - **Audit Logging**: Comprehensive security event logging
22
+
23
+ Cycle Security Enhancements (v0.2.0):
24
+ Enhanced security specifically for cyclic workflows including:
25
+ - Long-running process monitoring and limits
26
+ - Iteration-based resource accumulation detection
27
+ - Parameter injection attack prevention in cycles
28
+ - State corruption detection and prevention
29
+ - Convergence manipulation attack detection
30
+
31
+ Security Layers:
32
+ 1. **Input Layer**: Validation and sanitization of all inputs
33
+ 2. **Execution Layer**: Sandboxed execution with resource limits
34
+ 3. **File System Layer**: Controlled file access with path validation
35
+ 4. **Network Layer**: Controlled external communication
36
+ 5. **Monitoring Layer**: Real-time security event detection
37
+
38
+ Vulnerability Protection:
39
+ - **Path Traversal**: Comprehensive path validation and canonicalization
40
+ - **Command Injection**: Input sanitization and safe command execution
41
+ - **Code Injection**: AST validation and safe code execution
42
+ - **Resource Exhaustion**: Memory, CPU, and time limits
43
+ - **Information Disclosure**: Controlled error messages and logging
44
+ - **Privilege Escalation**: Sandboxed execution environments
45
+
46
+ Core Components:
47
+ - SecurityConfig: Centralized security policy configuration
48
+ - ValidationFramework: Multi-layer input validation system
49
+ - ExecutionSandbox: Safe code execution environment
50
+ - ResourceMonitor: Real-time resource usage monitoring
51
+ - AuditLogger: Comprehensive security event logging
52
+
53
+ Upstream Dependencies:
54
+ - Operating system security features for sandboxing
55
+ - Python security libraries for validation and monitoring
56
+ - Workflow execution framework for integration points
57
+
58
+ Downstream Consumers:
59
+ - All workflow execution components requiring security
60
+ - Node implementations with external resource access
61
+ - Runtime engines executing user-provided code
62
+ - API endpoints handling external workflow requests
63
+
64
+ Examples:
65
+ Basic security configuration:
66
+
67
+ >>> from kailash.security import SecurityConfig, validate_node_parameters
68
+ >>> # Configure security policy
69
+ >>> config = SecurityConfig(
70
+ ... max_execution_time=300,
71
+ ... max_memory_mb=1024,
72
+ ... allowed_paths=["/safe/directory"]
73
+ ... )
74
+ >>> # Validate node parameters
75
+ >>> validate_node_parameters(parameters, config)
76
+
77
+ Secure file operations:
78
+
79
+ >>> from kailash.security import safe_file_operation, validate_path
80
+ >>> # Validate and access file safely
81
+ >>> safe_path = validate_path("/user/input/path", base_dir="/safe/root")
82
+ >>> with safe_file_operation(safe_path, "r") as f:
83
+ ... content = f.read()
84
+
85
+ Execution timeout protection:
86
+
87
+ >>> from kailash.security import execution_timeout
88
+ >>> @execution_timeout(seconds=30)
89
+ ... def potentially_long_running_function():
90
+ ... # Function will be terminated if it runs longer than 30 seconds
91
+ ... return process_data()
92
+
93
+ Comprehensive monitoring:
94
+
95
+ >>> from kailash.security import SecurityMonitor
96
+ >>> monitor = SecurityMonitor()
97
+ >>> with monitor.track_execution("workflow_execution"):
98
+ ... # All security events will be monitored and logged
99
+ ... runtime.execute(workflow)
100
+
101
+ Security Policies:
102
+ Configurable security policies allow adaptation to different environments:
103
+ - **Development**: Relaxed policies for debugging and testing
104
+ - **Staging**: Moderate policies balancing security and functionality
105
+ - **Production**: Strict policies prioritizing security
106
+ - **High-Security**: Maximum security for sensitive environments
107
+
108
+ See Also:
109
+ - :mod:`kailash.nodes.code.python` for secure code execution
110
+ - :mod:`kailash.workflow.safety` for workflow-specific safety measures
111
+ - :doc:`/guides/security` for comprehensive security best practices
112
+ """
113
+
114
+ import logging
115
+ import os
116
+ import re
117
+ import tempfile
118
+ import time
119
+ from contextlib import contextmanager
120
+ from pathlib import Path
121
+ from typing import Any, Dict, List, Optional, Union
122
+
123
+ logger = logging.getLogger(__name__)
124
+
125
+
126
+ class SecurityError(Exception):
127
+ """Raised when a security policy violation is detected."""
128
+
129
+ pass
130
+
131
+
132
+ class PathTraversalError(SecurityError):
133
+ """Raised when path traversal attempt is detected."""
134
+
135
+ pass
136
+
137
+
138
+ class CommandInjectionError(SecurityError):
139
+ """Raised when command injection attempt is detected."""
140
+
141
+ pass
142
+
143
+
144
+ class ExecutionTimeoutError(SecurityError):
145
+ """Raised when execution exceeds allowed time limit."""
146
+
147
+ pass
148
+
149
+
150
+ class MemoryLimitError(SecurityError):
151
+ """Raised when memory usage exceeds allowed limit."""
152
+
153
+ pass
154
+
155
+
156
+ class SecurityConfig:
157
+ """Configuration for security policies and limits."""
158
+
159
+ def __init__(
160
+ self,
161
+ allowed_directories: Optional[List[str]] = None,
162
+ max_file_size: int = 100 * 1024 * 1024, # 100MB
163
+ execution_timeout: float = 300.0, # 5 minutes
164
+ memory_limit: int = 512 * 1024 * 1024, # 512MB
165
+ allowed_file_extensions: Optional[List[str]] = None,
166
+ enable_audit_logging: bool = True,
167
+ enable_path_validation: bool = True,
168
+ enable_command_validation: bool = True,
169
+ ):
170
+ """
171
+ Initialize security configuration.
172
+
173
+ Args:
174
+ allowed_directories: List of directories where file operations are permitted
175
+ max_file_size: Maximum file size in bytes
176
+ execution_timeout: Maximum execution time in seconds
177
+ memory_limit: Maximum memory usage in bytes
178
+ allowed_file_extensions: List of allowed file extensions
179
+ enable_audit_logging: Whether to log security events
180
+ enable_path_validation: Whether to validate file paths
181
+ enable_command_validation: Whether to validate command strings
182
+ """
183
+ default_dirs = [
184
+ os.path.expanduser("~/.kailash"),
185
+ tempfile.gettempdir(), # Allow all temp directories
186
+ os.getcwd(),
187
+ "/tmp", # Unix temp directory
188
+ "/var/tmp", # Unix temp directory
189
+ ]
190
+
191
+ # Check for additional allowed directories from environment
192
+ env_dirs = os.environ.get("KAILASH_ALLOWED_DIRS", "")
193
+ if env_dirs:
194
+ for dir_path in env_dirs.split(":"):
195
+ if dir_path and os.path.isdir(dir_path):
196
+ default_dirs.append(os.path.abspath(dir_path))
197
+
198
+ self.allowed_directories = allowed_directories or default_dirs
199
+ self.max_file_size = max_file_size
200
+ self.execution_timeout = execution_timeout
201
+ self.memory_limit = memory_limit
202
+ self.allowed_file_extensions = allowed_file_extensions or [
203
+ ".txt",
204
+ ".csv",
205
+ ".tsv",
206
+ ".json",
207
+ ".yaml",
208
+ ".yml",
209
+ ".py",
210
+ ".md",
211
+ ".xml",
212
+ ".log",
213
+ ".dat",
214
+ ".conf",
215
+ ".cfg",
216
+ ".ini",
217
+ ".properties",
218
+ ".html",
219
+ ".htm",
220
+ ".xhtml",
221
+ ".jsonl",
222
+ ".ndjson",
223
+ ]
224
+ self.enable_audit_logging = enable_audit_logging
225
+ self.enable_path_validation = enable_path_validation
226
+ self.enable_command_validation = enable_command_validation
227
+
228
+
229
+ # Global security configuration
230
+ _security_config = SecurityConfig()
231
+
232
+
233
+ def get_security_config() -> SecurityConfig:
234
+ """Get the current security configuration."""
235
+ return _security_config
236
+
237
+
238
+ def set_security_config(config: SecurityConfig) -> None:
239
+ """Set the global security configuration."""
240
+ global _security_config
241
+ _security_config = config
242
+
243
+
244
+ def validate_file_path(
245
+ file_path: Union[str, Path],
246
+ config: Optional[SecurityConfig] = None,
247
+ operation: str = "access",
248
+ ) -> Path:
249
+ """
250
+ Validate and sanitize file paths to prevent traversal attacks.
251
+
252
+ Args:
253
+ file_path: The file path to validate
254
+ config: Security configuration (uses global if None)
255
+ operation: Description of the operation for logging
256
+
257
+ Returns:
258
+ Validated and normalized Path object
259
+
260
+ Raises:
261
+ PathTraversalError: If path traversal attempt is detected
262
+ SecurityError: If path is outside allowed directories
263
+
264
+ Examples:
265
+ >>> # Safe paths
266
+ >>> validate_file_path("data/file.txt")
267
+ PosixPath('data/file.txt')
268
+
269
+ >>> # Blocked paths
270
+ >>> validate_file_path("../../../etc/passwd")
271
+ Traceback (most recent call last):
272
+ PathTraversalError: Path traversal attempt detected
273
+ """
274
+ if config is None:
275
+ config = get_security_config()
276
+
277
+ if not config.enable_path_validation:
278
+ return Path(file_path)
279
+
280
+ try:
281
+ # Convert to Path and resolve to absolute path
282
+ path = Path(file_path).resolve()
283
+
284
+ # Check for path traversal indicators
285
+ path_str = str(path)
286
+ if ".." in str(file_path):
287
+ if config.enable_audit_logging:
288
+ logger.warning(
289
+ f"Path traversal attempt detected: {file_path} -> {path}"
290
+ )
291
+ raise PathTraversalError(f"Path traversal attempt detected: {file_path}")
292
+
293
+ # Check for access to sensitive system directories
294
+ sensitive_dirs = ["/etc", "/var", "/usr", "/root", "/boot", "/sys", "/proc"]
295
+ if any(path_str.startswith(sensitive) for sensitive in sensitive_dirs):
296
+ if config.enable_audit_logging:
297
+ logger.warning(
298
+ f"Path traversal attempt detected: {file_path} -> {path}"
299
+ )
300
+ raise PathTraversalError(f"Path traversal attempt detected: {file_path}")
301
+
302
+ # Validate file extension
303
+ if path.suffix and path.suffix.lower() not in config.allowed_file_extensions:
304
+ if config.enable_audit_logging:
305
+ logger.warning(f"File extension not allowed: {path.suffix} in {path}")
306
+ raise SecurityError(f"File extension not allowed: {path.suffix}")
307
+
308
+ # Check if path is within allowed directories
309
+ path_in_allowed_dir = False
310
+ for allowed_dir in config.allowed_directories:
311
+ try:
312
+ allowed_path = Path(allowed_dir).resolve()
313
+ # Use more robust relative path checking
314
+ try:
315
+ path.relative_to(allowed_path)
316
+ path_in_allowed_dir = True
317
+ break
318
+ except ValueError:
319
+ # Try alternative method for compatibility
320
+ if str(path).startswith(str(allowed_path)):
321
+ path_in_allowed_dir = True
322
+ break
323
+ except (ValueError, OSError):
324
+ # Handle cases where path resolution fails
325
+ if str(path).startswith(str(allowed_dir)):
326
+ path_in_allowed_dir = True
327
+ break
328
+
329
+ if not path_in_allowed_dir:
330
+ if config.enable_audit_logging:
331
+ logger.warning(f"Path outside allowed directories: {path}")
332
+ raise SecurityError(f"Path outside allowed directories: {path}")
333
+
334
+ if config.enable_audit_logging:
335
+ logger.info(f"File path validated for {operation}: {path}")
336
+
337
+ return path
338
+
339
+ except (OSError, ValueError) as e:
340
+ if config.enable_audit_logging:
341
+ logger.error(f"Path validation error: {e}")
342
+ raise SecurityError(f"Invalid file path: {file_path}")
343
+
344
+
345
+ def safe_open(
346
+ file_path: Union[str, Path],
347
+ mode: str = "r",
348
+ config: Optional[SecurityConfig] = None,
349
+ **kwargs,
350
+ ):
351
+ """
352
+ Safely open a file with security validation.
353
+
354
+ Args:
355
+ file_path: Path to the file
356
+ mode: File open mode
357
+ config: Security configuration
358
+ **kwargs: Additional arguments for open()
359
+
360
+ Returns:
361
+ File handle
362
+
363
+ Raises:
364
+ SecurityError: If security validation fails
365
+
366
+ Examples:
367
+ >>> with safe_open("data/file.txt", "r") as f:
368
+ ... content = f.read()
369
+ """
370
+ if config is None:
371
+ config = get_security_config()
372
+
373
+ # Validate the file path
374
+ validated_path = validate_file_path(file_path, config, f"open({mode})")
375
+
376
+ # Check file size for read operations
377
+ if "r" in mode and validated_path.exists():
378
+ file_size = validated_path.stat().st_size
379
+ if file_size > config.max_file_size:
380
+ raise SecurityError(
381
+ f"File too large: {file_size} bytes > {config.max_file_size}"
382
+ )
383
+
384
+ # Create directory if writing and it doesn't exist
385
+ if "w" in mode or "a" in mode:
386
+ validated_path.parent.mkdir(parents=True, exist_ok=True)
387
+
388
+ if config.enable_audit_logging:
389
+ logger.info(f"Opening file: {validated_path} (mode: {mode})")
390
+
391
+ return open(validated_path, mode, **kwargs)
392
+
393
+
394
+ def validate_command_string(
395
+ command: str, config: Optional[SecurityConfig] = None
396
+ ) -> str:
397
+ """
398
+ Validate command strings to prevent injection attacks.
399
+
400
+ Args:
401
+ command: Command string to validate
402
+ config: Security configuration
403
+
404
+ Returns:
405
+ Validated command string
406
+
407
+ Raises:
408
+ CommandInjectionError: If command injection attempt is detected
409
+ """
410
+ if config is None:
411
+ config = get_security_config()
412
+
413
+ if not config.enable_command_validation:
414
+ return command
415
+
416
+ # Check for common injection patterns
417
+ dangerous_patterns = [
418
+ r";", # Command chaining
419
+ r"&&", # Logical AND command chaining
420
+ r"\|\|", # Logical OR command chaining
421
+ r"\|", # Pipe operations
422
+ r"\$\(", # Command substitution
423
+ r"`.*`", # Backtick command substitution
424
+ r">\s*/dev/", # Redirect to devices
425
+ r"<.*>", # Input/output redirection
426
+ r"\beval\b", # eval command
427
+ r"\bexec\b", # exec command
428
+ r"rm\s+.*(\/|\*)", # rm with dangerous paths
429
+ r"cat\s+\/etc\/", # reading system files
430
+ ]
431
+
432
+ for pattern in dangerous_patterns:
433
+ if re.search(pattern, command, re.IGNORECASE):
434
+ if config.enable_audit_logging:
435
+ logger.warning(f"Command injection attempt detected: {command}")
436
+ raise CommandInjectionError(f"Potentially dangerous command: {command}")
437
+
438
+ if config.enable_audit_logging:
439
+ logger.info(
440
+ f"Command validated: {command[:100]}{'...' if len(command) > 100 else ''}"
441
+ )
442
+
443
+ return command
444
+
445
+
446
+ @contextmanager
447
+ def execution_timeout(
448
+ timeout: Optional[float] = None, config: Optional[SecurityConfig] = None
449
+ ):
450
+ """
451
+ Context manager to enforce execution timeouts.
452
+
453
+ Args:
454
+ timeout: Timeout in seconds (uses config default if None)
455
+ config: Security configuration
456
+
457
+ Raises:
458
+ ExecutionTimeoutError: If execution exceeds timeout
459
+
460
+ Examples:
461
+ >>> with execution_timeout(30.0):
462
+ ... # Code that should complete within 30 seconds
463
+ ... time.sleep(5)
464
+ """
465
+ if config is None:
466
+ config = get_security_config()
467
+
468
+ if timeout is None:
469
+ timeout = config.execution_timeout
470
+
471
+ start_time = time.time()
472
+
473
+ try:
474
+ yield
475
+ finally:
476
+ elapsed_time = time.time() - start_time
477
+ if elapsed_time > timeout:
478
+ if config.enable_audit_logging:
479
+ logger.warning(f"Execution timeout: {elapsed_time:.2f}s > {timeout}s")
480
+ raise ExecutionTimeoutError(
481
+ f"Execution timeout: {elapsed_time:.2f}s > {timeout}s"
482
+ )
483
+
484
+
485
+ def sanitize_input(
486
+ value: Any,
487
+ max_length: int = 10000,
488
+ allowed_types: Optional[List[type]] = None,
489
+ config: Optional[SecurityConfig] = None,
490
+ ) -> Any:
491
+ """
492
+ Sanitize input values to prevent injection attacks.
493
+
494
+ Args:
495
+ value: Value to sanitize
496
+ max_length: Maximum string length
497
+ allowed_types: List of allowed types
498
+ config: Security configuration
499
+
500
+ Returns:
501
+ Sanitized value
502
+
503
+ Raises:
504
+ SecurityError: If input fails validation
505
+ """
506
+ if config is None:
507
+ config = get_security_config()
508
+
509
+ if allowed_types is None:
510
+ allowed_types = [str, int, float, bool, list, dict, tuple, set, type(None)]
511
+
512
+ # Core data science types
513
+ try:
514
+ import pandas as pd
515
+
516
+ allowed_types.extend(
517
+ [
518
+ pd.DataFrame,
519
+ pd.Series,
520
+ pd.Index,
521
+ pd.MultiIndex,
522
+ pd.Categorical,
523
+ pd.Timestamp,
524
+ pd.Timedelta,
525
+ pd.Period,
526
+ pd.DatetimeIndex,
527
+ pd.TimedeltaIndex,
528
+ pd.PeriodIndex,
529
+ ]
530
+ )
531
+ except ImportError:
532
+ pass
533
+
534
+ try:
535
+ import numpy as np
536
+
537
+ numpy_types = [
538
+ np.ndarray,
539
+ np.ma.MaskedArray,
540
+ # All numpy scalar types
541
+ np.int8,
542
+ np.int16,
543
+ np.int32,
544
+ np.int64,
545
+ np.uint8,
546
+ np.uint16,
547
+ np.uint32,
548
+ np.uint64,
549
+ np.float16,
550
+ np.float32,
551
+ np.float64,
552
+ np.complex64,
553
+ np.complex128,
554
+ np.bool_,
555
+ np.object_,
556
+ np.datetime64,
557
+ np.timedelta64,
558
+ ]
559
+
560
+ # Add matrix if available (deprecated in NumPy 2.0)
561
+ if hasattr(np, "matrix"):
562
+ numpy_types.append(np.matrix)
563
+
564
+ # Handle NumPy version differences
565
+ if hasattr(np, "string_"):
566
+ numpy_types.append(np.string_)
567
+ elif hasattr(np, "bytes_"):
568
+ numpy_types.append(np.bytes_)
569
+
570
+ if hasattr(np, "unicode_"):
571
+ numpy_types.append(np.unicode_)
572
+ elif hasattr(np, "str_"):
573
+ numpy_types.append(np.str_)
574
+
575
+ # Add platform-specific types if available
576
+ if hasattr(np, "float128"):
577
+ numpy_types.append(np.float128)
578
+ if hasattr(np, "complex256"):
579
+ numpy_types.append(np.complex256)
580
+
581
+ # Add generic numpy type to catch all numpy scalars
582
+ if hasattr(np, "generic"):
583
+ numpy_types.append(np.generic)
584
+
585
+ allowed_types.extend(numpy_types)
586
+ except ImportError:
587
+ pass
588
+
589
+ # Deep learning frameworks
590
+ try:
591
+ import torch
592
+
593
+ allowed_types.extend(
594
+ [
595
+ torch.Tensor,
596
+ torch.nn.Module,
597
+ torch.nn.Parameter,
598
+ torch.cuda.FloatTensor,
599
+ torch.cuda.DoubleTensor,
600
+ torch.cuda.IntTensor,
601
+ torch.cuda.LongTensor,
602
+ ]
603
+ )
604
+ except ImportError:
605
+ pass
606
+
607
+ try:
608
+ import tensorflow as tf
609
+
610
+ allowed_types.extend(
611
+ [
612
+ tf.Tensor,
613
+ tf.Variable,
614
+ tf.constant,
615
+ tf.keras.Model,
616
+ tf.keras.layers.Layer,
617
+ tf.data.Dataset,
618
+ ]
619
+ )
620
+ except ImportError:
621
+ pass
622
+
623
+ # Scientific computing
624
+ try:
625
+ import scipy.sparse
626
+
627
+ allowed_types.extend(
628
+ [
629
+ scipy.sparse.csr_matrix,
630
+ scipy.sparse.csc_matrix,
631
+ scipy.sparse.coo_matrix,
632
+ scipy.sparse.dia_matrix,
633
+ scipy.sparse.dok_matrix,
634
+ scipy.sparse.lil_matrix,
635
+ ]
636
+ )
637
+ except ImportError:
638
+ pass
639
+
640
+ # Machine learning frameworks
641
+ try:
642
+ from sklearn.base import BaseEstimator, TransformerMixin
643
+
644
+ allowed_types.extend([BaseEstimator, TransformerMixin])
645
+ except ImportError:
646
+ pass
647
+
648
+ try:
649
+ import xgboost as xgb
650
+
651
+ allowed_types.extend([xgb.DMatrix, xgb.Booster])
652
+ except ImportError:
653
+ pass
654
+
655
+ try:
656
+ import lightgbm as lgb
657
+
658
+ allowed_types.extend([lgb.Dataset, lgb.Booster])
659
+ except ImportError:
660
+ pass
661
+
662
+ # Data visualization
663
+ try:
664
+ from matplotlib.axes import Axes
665
+ from matplotlib.figure import Figure
666
+
667
+ allowed_types.extend([Figure, Axes])
668
+ except ImportError:
669
+ pass
670
+
671
+ try:
672
+ import plotly.graph_objects as go
673
+
674
+ allowed_types.append(go.Figure)
675
+ except ImportError:
676
+ pass
677
+
678
+ # Statistical modeling
679
+ try:
680
+ import statsmodels.api as sm
681
+
682
+ allowed_types.extend([sm.OLS, sm.GLM, sm.GLS, sm.WLS, sm.RegressionResults])
683
+ except ImportError:
684
+ pass
685
+
686
+ # Image processing
687
+ try:
688
+ from PIL import Image
689
+
690
+ allowed_types.append(Image.Image)
691
+ except ImportError:
692
+ pass
693
+
694
+ try:
695
+ # OpenCV uses numpy arrays, already covered
696
+ import cv2 # noqa: F401
697
+ except ImportError:
698
+ pass
699
+
700
+ # NLP libraries
701
+ try:
702
+ from spacy.tokens import Doc, Span, Token
703
+
704
+ allowed_types.extend([Doc, Span, Token])
705
+ except ImportError:
706
+ pass
707
+
708
+ # Graph/Network analysis
709
+ try:
710
+ import networkx as nx
711
+
712
+ allowed_types.extend([nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph])
713
+ except ImportError:
714
+ pass
715
+
716
+ # Time series
717
+ try:
718
+ from prophet import Prophet
719
+ from prophet.forecaster import Prophet as ProphetModel
720
+
721
+ allowed_types.extend([Prophet, ProphetModel])
722
+ except ImportError:
723
+ pass
724
+
725
+ # Type validation - allow data science types
726
+ type_allowed = any(isinstance(value, t) for t in allowed_types)
727
+
728
+ # Additional check for numpy scalar types
729
+ if not type_allowed:
730
+ try:
731
+ import numpy as np
732
+
733
+ # Check if it's any numpy type
734
+ if isinstance(value, np.generic):
735
+ type_allowed = True
736
+ except ImportError:
737
+ pass
738
+
739
+ if not type_allowed:
740
+ raise SecurityError(f"Input type not allowed: {type(value)}")
741
+
742
+ # String sanitization
743
+ if isinstance(value, str):
744
+ if len(value) > max_length:
745
+ raise SecurityError(f"Input too long: {len(value)} > {max_length}")
746
+
747
+ # Remove potentially dangerous characters and patterns
748
+ sanitized = re.sub(r"[<>;&|`$()]", "", value)
749
+ # Remove script tags and javascript
750
+ sanitized = re.sub(
751
+ r"<script.*?</script>", "", sanitized, flags=re.IGNORECASE | re.DOTALL
752
+ )
753
+ sanitized = re.sub(r"javascript:", "", sanitized, flags=re.IGNORECASE)
754
+
755
+ if sanitized != value and config.enable_audit_logging:
756
+ logger.warning(f"Input sanitized: {value[:50]}... -> {sanitized[:50]}...")
757
+
758
+ return sanitized
759
+
760
+ # Dictionary sanitization (recursive)
761
+ if isinstance(value, dict):
762
+ return {
763
+ sanitize_input(k, max_length, allowed_types, config): sanitize_input(
764
+ v, max_length, allowed_types, config
765
+ )
766
+ for k, v in value.items()
767
+ }
768
+
769
+ # List sanitization (recursive)
770
+ if isinstance(value, list):
771
+ return [
772
+ sanitize_input(item, max_length, allowed_types, config) for item in value
773
+ ]
774
+
775
+ return value
776
+
777
+
778
+ def create_secure_temp_dir(
779
+ prefix: str = "kailash_", config: Optional[SecurityConfig] = None
780
+ ) -> Path:
781
+ """
782
+ Create a secure temporary directory.
783
+
784
+ Args:
785
+ prefix: Prefix for the directory name
786
+ config: Security configuration
787
+
788
+ Returns:
789
+ Path to the secure temporary directory
790
+ """
791
+ if config is None:
792
+ config = get_security_config()
793
+
794
+ # Create temp directory with secure permissions
795
+ temp_dir = Path(tempfile.mkdtemp(prefix=prefix))
796
+
797
+ # Set restrictive permissions (owner only)
798
+ temp_dir.chmod(0o700)
799
+
800
+ if config.enable_audit_logging:
801
+ logger.info(f"Created secure temp directory: {temp_dir}")
802
+
803
+ return temp_dir
804
+
805
+
806
+ def validate_node_parameters(
807
+ parameters: Dict[str, Any], config: Optional[SecurityConfig] = None
808
+ ) -> Dict[str, Any]:
809
+ """
810
+ Validate and sanitize node parameters.
811
+
812
+ Args:
813
+ parameters: Node parameters to validate
814
+ config: Security configuration
815
+
816
+ Returns:
817
+ Validated and sanitized parameters
818
+
819
+ Raises:
820
+ SecurityError: If parameters fail validation
821
+ """
822
+ if config is None:
823
+ config = get_security_config()
824
+
825
+ validated_params = {}
826
+
827
+ for key, value in parameters.items():
828
+ # Sanitize parameter key
829
+ clean_key = sanitize_input(key, config=config)
830
+
831
+ # Special handling for file paths
832
+ if "path" in key.lower() or "file" in key.lower():
833
+ if isinstance(value, (str, Path)):
834
+ validated_value = validate_file_path(value, config, f"parameter {key}")
835
+ else:
836
+ validated_value = sanitize_input(value, config=config)
837
+ else:
838
+ validated_value = sanitize_input(value, config=config)
839
+
840
+ validated_params[clean_key] = validated_value
841
+
842
+ if config.enable_audit_logging:
843
+ logger.info(f"Node parameters validated: {list(validated_params.keys())}")
844
+
845
+ return validated_params