kailash 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control.py +740 -0
- kailash/api/__main__.py +6 -0
- kailash/api/auth.py +668 -0
- kailash/api/custom_nodes.py +285 -0
- kailash/api/custom_nodes_secure.py +377 -0
- kailash/api/database.py +620 -0
- kailash/api/studio.py +915 -0
- kailash/api/studio_secure.py +893 -0
- kailash/mcp/__init__.py +53 -0
- kailash/mcp/__main__.py +13 -0
- kailash/mcp/ai_registry_server.py +712 -0
- kailash/mcp/client.py +447 -0
- kailash/mcp/client_new.py +334 -0
- kailash/mcp/server.py +293 -0
- kailash/mcp/server_new.py +336 -0
- kailash/mcp/servers/__init__.py +12 -0
- kailash/mcp/servers/ai_registry.py +289 -0
- kailash/nodes/__init__.py +4 -2
- kailash/nodes/ai/__init__.py +2 -0
- kailash/nodes/ai/a2a.py +714 -67
- kailash/nodes/ai/intelligent_agent_orchestrator.py +31 -37
- kailash/nodes/ai/iterative_llm_agent.py +1280 -0
- kailash/nodes/ai/llm_agent.py +324 -1
- kailash/nodes/ai/self_organizing.py +5 -6
- kailash/nodes/base.py +15 -2
- kailash/nodes/base_async.py +45 -0
- kailash/nodes/base_cycle_aware.py +374 -0
- kailash/nodes/base_with_acl.py +338 -0
- kailash/nodes/code/python.py +135 -27
- kailash/nodes/data/readers.py +16 -6
- kailash/nodes/data/writers.py +16 -6
- kailash/nodes/logic/__init__.py +8 -0
- kailash/nodes/logic/convergence.py +642 -0
- kailash/nodes/logic/loop.py +153 -0
- kailash/nodes/logic/operations.py +187 -27
- kailash/nodes/mixins/__init__.py +11 -0
- kailash/nodes/mixins/mcp.py +228 -0
- kailash/nodes/mixins.py +387 -0
- kailash/runtime/__init__.py +2 -1
- kailash/runtime/access_controlled.py +458 -0
- kailash/runtime/local.py +106 -33
- kailash/runtime/parallel_cyclic.py +529 -0
- kailash/sdk_exceptions.py +90 -5
- kailash/security.py +845 -0
- kailash/tracking/manager.py +38 -15
- kailash/tracking/models.py +1 -1
- kailash/tracking/storage/filesystem.py +30 -2
- kailash/utils/__init__.py +8 -0
- kailash/workflow/__init__.py +18 -0
- kailash/workflow/convergence.py +270 -0
- kailash/workflow/cycle_analyzer.py +768 -0
- kailash/workflow/cycle_builder.py +573 -0
- kailash/workflow/cycle_config.py +709 -0
- kailash/workflow/cycle_debugger.py +760 -0
- kailash/workflow/cycle_exceptions.py +601 -0
- kailash/workflow/cycle_profiler.py +671 -0
- kailash/workflow/cycle_state.py +338 -0
- kailash/workflow/cyclic_runner.py +985 -0
- kailash/workflow/graph.py +500 -39
- kailash/workflow/migration.py +768 -0
- kailash/workflow/safety.py +365 -0
- kailash/workflow/templates.py +744 -0
- kailash/workflow/validation.py +693 -0
- {kailash-0.1.5.dist-info → kailash-0.2.0.dist-info}/METADATA +256 -12
- kailash-0.2.0.dist-info/RECORD +125 -0
- kailash/nodes/mcp/__init__.py +0 -11
- kailash/nodes/mcp/client.py +0 -554
- kailash/nodes/mcp/resource.py +0 -682
- kailash/nodes/mcp/server.py +0 -577
- kailash-0.1.5.dist-info/RECORD +0 -88
- {kailash-0.1.5.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,338 @@
|
|
1
|
+
"""
|
2
|
+
Base Node with Optional Access Control Layer
|
3
|
+
|
4
|
+
This module extends the base Node class with optional access control capabilities.
|
5
|
+
The access control is completely transparent and disabled by default, ensuring
|
6
|
+
no interference with existing SDK usage.
|
7
|
+
|
8
|
+
Key Design Principles:
|
9
|
+
- Access control is OFF by default
|
10
|
+
- Zero performance impact when disabled
|
11
|
+
- Fully backward compatible
|
12
|
+
- Opt-in at workflow or node level
|
13
|
+
- No changes required to existing code
|
14
|
+
"""
|
15
|
+
|
16
|
+
import logging
|
17
|
+
from typing import Any, Dict, Optional
|
18
|
+
|
19
|
+
from kailash.access_control import (
|
20
|
+
AccessDecision,
|
21
|
+
NodePermission,
|
22
|
+
UserContext,
|
23
|
+
get_access_control_manager,
|
24
|
+
)
|
25
|
+
from kailash.nodes.base import Node
|
26
|
+
from kailash.nodes.base_async import AsyncNode
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class NodeWithAccessControl(Node):
|
32
|
+
"""
|
33
|
+
Base node class with optional access control capabilities.
|
34
|
+
|
35
|
+
Extends the standard Node class with transparent access control features
|
36
|
+
that can be enabled on demand without affecting existing functionality.
|
37
|
+
Access control is completely disabled by default for backward compatibility.
|
38
|
+
|
39
|
+
Design Purpose:
|
40
|
+
Provides a foundation for nodes that need access control while
|
41
|
+
maintaining complete backward compatibility. Enables fine-grained
|
42
|
+
permissions, data masking, and conditional execution.
|
43
|
+
|
44
|
+
Upstream Dependencies:
|
45
|
+
- AccessControlManager for permission evaluation
|
46
|
+
- UserContext from authentication systems
|
47
|
+
- PermissionRule definitions from configuration
|
48
|
+
|
49
|
+
Downstream Consumers:
|
50
|
+
- AccessControlledRuntime for secure execution
|
51
|
+
- Audit systems for logging access attempts
|
52
|
+
- Data masking systems for output filtering
|
53
|
+
|
54
|
+
Usage Patterns:
|
55
|
+
- Extended by nodes requiring access control
|
56
|
+
- Configured with permission requirements
|
57
|
+
- Used in conjunction with AccessControlledRuntime
|
58
|
+
- Transparent to existing node implementations
|
59
|
+
|
60
|
+
Implementation Details:
|
61
|
+
Access control is evaluated only when explicitly enabled.
|
62
|
+
Permissions checked before node execution.
|
63
|
+
Output masking applied based on user roles.
|
64
|
+
Fallback execution for denied access scenarios.
|
65
|
+
|
66
|
+
Error Handling:
|
67
|
+
- Access denied returns user-friendly error messages
|
68
|
+
- Missing permissions default to deny
|
69
|
+
- Configuration errors are logged and treated as disabled
|
70
|
+
- Execution errors maintain standard Node behavior
|
71
|
+
|
72
|
+
Side Effects:
|
73
|
+
- Logs access attempts for audit purposes
|
74
|
+
- May redirect execution to fallback nodes
|
75
|
+
- Applies data masking to sensitive outputs
|
76
|
+
|
77
|
+
Example:
|
78
|
+
>>> class SecureProcessorNode(NodeWithAccessControl):
|
79
|
+
... def _execute(self, **inputs):
|
80
|
+
... return {"result": "processed"}
|
81
|
+
>>>
|
82
|
+
>>> node = SecureProcessorNode(
|
83
|
+
... enable_access_control=True,
|
84
|
+
... required_permission=NodePermission.EXECUTE,
|
85
|
+
... mask_output_fields=["sensitive_data"]
|
86
|
+
... )
|
87
|
+
"""
|
88
|
+
|
89
|
+
def __init__(self, **config):
|
90
|
+
super().__init__(**config)
|
91
|
+
# Access control is disabled by default
|
92
|
+
self._access_control_enabled = config.get("enable_access_control", False)
|
93
|
+
self._required_permission = config.get(
|
94
|
+
"required_permission", NodePermission.EXECUTE
|
95
|
+
)
|
96
|
+
self._fallback_node = config.get("fallback_node", None)
|
97
|
+
self._mask_output_fields = config.get("mask_output_fields", [])
|
98
|
+
|
99
|
+
def run(self, **inputs) -> Any:
|
100
|
+
"""
|
101
|
+
Execute node with optional access control checks.
|
102
|
+
|
103
|
+
If access control is disabled or no user context is present,
|
104
|
+
this behaves exactly like the standard Node.run() method.
|
105
|
+
"""
|
106
|
+
# Extract runtime context if present
|
107
|
+
runtime_context = inputs.pop("_runtime_context", None)
|
108
|
+
user_context = inputs.pop("_user_context", None)
|
109
|
+
|
110
|
+
# If no access control needed, run normally
|
111
|
+
if not self._should_check_access(user_context):
|
112
|
+
return self._execute(**inputs)
|
113
|
+
|
114
|
+
# Perform access check
|
115
|
+
acm = get_access_control_manager()
|
116
|
+
decision = acm.check_node_access(
|
117
|
+
user_context,
|
118
|
+
self._get_node_id(),
|
119
|
+
self._required_permission,
|
120
|
+
runtime_context or {},
|
121
|
+
)
|
122
|
+
|
123
|
+
# Handle access decision
|
124
|
+
if decision.allowed:
|
125
|
+
# Execute node
|
126
|
+
result = self._execute(**inputs)
|
127
|
+
|
128
|
+
# Apply output masking if needed
|
129
|
+
if decision.masked_fields and isinstance(result, dict):
|
130
|
+
result = self._mask_fields(result, decision.masked_fields)
|
131
|
+
|
132
|
+
return result
|
133
|
+
else:
|
134
|
+
# Access denied
|
135
|
+
return self._handle_access_denied(decision, inputs)
|
136
|
+
|
137
|
+
def _execute(self, **inputs) -> Any:
|
138
|
+
"""
|
139
|
+
The actual node execution logic.
|
140
|
+
Override this method in subclasses instead of run().
|
141
|
+
"""
|
142
|
+
# Default implementation calls parent run()
|
143
|
+
# This maintains compatibility with existing nodes
|
144
|
+
if hasattr(super(), "run"):
|
145
|
+
return super().run(**inputs)
|
146
|
+
else:
|
147
|
+
raise NotImplementedError("Node must implement _execute() method")
|
148
|
+
|
149
|
+
def _should_check_access(self, user_context: Optional[UserContext]) -> bool:
|
150
|
+
"""
|
151
|
+
Determine if access control should be checked.
|
152
|
+
|
153
|
+
Returns False (no check) if:
|
154
|
+
- Access control is disabled globally
|
155
|
+
- No user context is provided
|
156
|
+
- Node has explicitly disabled access control
|
157
|
+
"""
|
158
|
+
# Global check
|
159
|
+
acm = get_access_control_manager()
|
160
|
+
if not acm or not getattr(acm, "enabled", False):
|
161
|
+
return False
|
162
|
+
|
163
|
+
# Node-level check
|
164
|
+
if not self._access_control_enabled:
|
165
|
+
return False
|
166
|
+
|
167
|
+
# User context check
|
168
|
+
if not user_context:
|
169
|
+
return False
|
170
|
+
|
171
|
+
return True
|
172
|
+
|
173
|
+
def _get_node_id(self) -> str:
|
174
|
+
"""Get the node ID for access control checks"""
|
175
|
+
# Try to get from config first
|
176
|
+
if "node_id" in self.config:
|
177
|
+
return self.config["node_id"]
|
178
|
+
|
179
|
+
# Fall back to class name
|
180
|
+
return self.__class__.__name__
|
181
|
+
|
182
|
+
def _mask_fields(self, data: Dict[str, Any], fields: list[str]) -> Dict[str, Any]:
|
183
|
+
"""Mask specified fields in output data"""
|
184
|
+
masked_data = data.copy()
|
185
|
+
for field in fields:
|
186
|
+
if field in masked_data:
|
187
|
+
masked_data[field] = "***MASKED***"
|
188
|
+
return masked_data
|
189
|
+
|
190
|
+
def _handle_access_denied(
|
191
|
+
self, decision: AccessDecision, inputs: Dict[str, Any]
|
192
|
+
) -> Any:
|
193
|
+
"""
|
194
|
+
Handle access denied scenarios.
|
195
|
+
|
196
|
+
Can be overridden by subclasses for custom behavior.
|
197
|
+
"""
|
198
|
+
# Log access denial
|
199
|
+
logger.warning(
|
200
|
+
f"Access denied for node {self._get_node_id()}: {decision.reason}"
|
201
|
+
)
|
202
|
+
|
203
|
+
# If a fallback node is configured, return a marker for the runtime
|
204
|
+
if self._fallback_node:
|
205
|
+
return {
|
206
|
+
"_access_denied": True,
|
207
|
+
"_redirect_to": self._fallback_node,
|
208
|
+
"_original_inputs": inputs,
|
209
|
+
}
|
210
|
+
|
211
|
+
# Return empty result by default
|
212
|
+
return {}
|
213
|
+
|
214
|
+
|
215
|
+
class AsyncNodeWithAccessControl(AsyncNode):
|
216
|
+
"""Async version of NodeWithAccessControl"""
|
217
|
+
|
218
|
+
def __init__(self, **config):
|
219
|
+
super().__init__(**config)
|
220
|
+
self._access_control_enabled = config.get("enable_access_control", False)
|
221
|
+
self._required_permission = config.get(
|
222
|
+
"required_permission", NodePermission.EXECUTE
|
223
|
+
)
|
224
|
+
self._fallback_node = config.get("fallback_node", None)
|
225
|
+
self._mask_output_fields = config.get("mask_output_fields", [])
|
226
|
+
|
227
|
+
async def run(self, **inputs) -> Any:
|
228
|
+
"""Async execution with optional access control"""
|
229
|
+
runtime_context = inputs.pop("_runtime_context", None)
|
230
|
+
user_context = inputs.pop("_user_context", None)
|
231
|
+
|
232
|
+
if not self._should_check_access(user_context):
|
233
|
+
return await self._execute(**inputs)
|
234
|
+
|
235
|
+
acm = get_access_control_manager()
|
236
|
+
decision = acm.check_node_access(
|
237
|
+
user_context,
|
238
|
+
self._get_node_id(),
|
239
|
+
self._required_permission,
|
240
|
+
runtime_context or {},
|
241
|
+
)
|
242
|
+
|
243
|
+
if decision.allowed:
|
244
|
+
result = await self._execute(**inputs)
|
245
|
+
|
246
|
+
if decision.masked_fields and isinstance(result, dict):
|
247
|
+
result = self._mask_fields(result, decision.masked_fields)
|
248
|
+
|
249
|
+
return result
|
250
|
+
else:
|
251
|
+
return self._handle_access_denied(decision, inputs)
|
252
|
+
|
253
|
+
async def _execute(self, **inputs) -> Any:
|
254
|
+
"""Async execution logic"""
|
255
|
+
if hasattr(super(), "run"):
|
256
|
+
return await super().run(**inputs)
|
257
|
+
else:
|
258
|
+
raise NotImplementedError("Node must implement _execute() method")
|
259
|
+
|
260
|
+
# Reuse other methods from sync version
|
261
|
+
_should_check_access = NodeWithAccessControl._should_check_access
|
262
|
+
_get_node_id = NodeWithAccessControl._get_node_id
|
263
|
+
_mask_fields = NodeWithAccessControl._mask_fields
|
264
|
+
_handle_access_denied = NodeWithAccessControl._handle_access_denied
|
265
|
+
|
266
|
+
|
267
|
+
def make_node_access_controlled(node_class, **acl_config):
|
268
|
+
"""
|
269
|
+
Factory function to add access control to any existing node class.
|
270
|
+
|
271
|
+
This allows adding access control to nodes without modifying their code:
|
272
|
+
|
273
|
+
>>> from kailash.nodes.data.readers import CSVReaderNode
|
274
|
+
>>> SecureCSVReader = make_node_access_controlled(
|
275
|
+
... CSVReaderNode,
|
276
|
+
... enable_access_control=True,
|
277
|
+
... required_permission=NodePermission.READ_OUTPUT
|
278
|
+
... )
|
279
|
+
"""
|
280
|
+
|
281
|
+
class AccessControlledNode(NodeWithAccessControl, node_class):
|
282
|
+
def __init__(self, **config):
|
283
|
+
# Merge ACL config with node config
|
284
|
+
full_config = {**acl_config, **config}
|
285
|
+
super().__init__(**full_config)
|
286
|
+
|
287
|
+
def _execute(self, **inputs):
|
288
|
+
# Call the original node's run method
|
289
|
+
return node_class.run(self, **inputs)
|
290
|
+
|
291
|
+
# Preserve the original class name and module
|
292
|
+
AccessControlledNode.__name__ = f"Secure{node_class.__name__}"
|
293
|
+
AccessControlledNode.__module__ = node_class.__module__
|
294
|
+
|
295
|
+
return AccessControlledNode
|
296
|
+
|
297
|
+
|
298
|
+
def add_access_control(node_instance, **acl_config):
|
299
|
+
"""
|
300
|
+
Add access control to an existing node instance.
|
301
|
+
|
302
|
+
This function adds access control attributes to a node instance.
|
303
|
+
For simplicity in this example, we'll just add the attributes
|
304
|
+
and let the AccessControlledRuntime handle the actual access control.
|
305
|
+
|
306
|
+
Args:
|
307
|
+
node_instance: The node instance to wrap
|
308
|
+
**acl_config: Access control configuration
|
309
|
+
- enable_access_control: Whether to enable access control (default: True)
|
310
|
+
- required_permission: Permission required to execute the node
|
311
|
+
- node_id: Unique identifier for access control rules
|
312
|
+
- mask_output_fields: List of fields to mask in output for non-admin users
|
313
|
+
- fallback_node: Node ID to execute if access is denied
|
314
|
+
|
315
|
+
Returns:
|
316
|
+
Node instance with access control capabilities
|
317
|
+
|
318
|
+
Example:
|
319
|
+
>>> reader = CSVReaderNode(file_path="data.csv")
|
320
|
+
>>> secure_reader = add_access_control(
|
321
|
+
... reader,
|
322
|
+
... enable_access_control=True,
|
323
|
+
... required_permission=NodePermission.EXECUTE,
|
324
|
+
... node_id="secure_csv_reader"
|
325
|
+
... )
|
326
|
+
"""
|
327
|
+
# If access control is disabled, return the original node
|
328
|
+
if not acl_config.get("enable_access_control", True):
|
329
|
+
return node_instance
|
330
|
+
|
331
|
+
# Add access control attributes to the node instance
|
332
|
+
for key, value in acl_config.items():
|
333
|
+
setattr(node_instance, key, value)
|
334
|
+
|
335
|
+
# Mark this node as access-controlled
|
336
|
+
setattr(node_instance, "_access_controlled", True)
|
337
|
+
|
338
|
+
return node_instance
|
kailash/nodes/code/python.py
CHANGED
@@ -1,29 +1,56 @@
|
|
1
|
-
"""Python
|
2
|
-
|
3
|
-
This module provides nodes
|
4
|
-
to create custom processing logic without defining new node classes.
|
5
|
-
both function-based and class-based code execution with automatic type
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
1
|
+
"""Advanced Python Code Execution Node with Cycle Support.
|
2
|
+
|
3
|
+
This module provides sophisticated nodes for executing arbitrary Python code,
|
4
|
+
allowing users to create custom processing logic without defining new node classes.
|
5
|
+
It supports both function-based and class-based code execution with automatic type
|
6
|
+
inference, comprehensive error handling, and advanced cycle-aware capabilities.
|
7
|
+
|
8
|
+
Examples:
|
9
|
+
Basic code execution:
|
10
|
+
|
11
|
+
>>> node = PythonCodeNode(
|
12
|
+
... name="processor",
|
13
|
+
... code="result = {'value': input_value * 2, 'status': 'processed'}"
|
14
|
+
... )
|
15
|
+
|
16
|
+
Cycle-aware execution:
|
17
|
+
|
18
|
+
>>> cycle_node = PythonCodeNode(
|
19
|
+
... name="accumulator",
|
20
|
+
... code='''
|
21
|
+
... # Safe cycle parameter access
|
22
|
+
... try:
|
23
|
+
... count = count
|
24
|
+
... total = total
|
25
|
+
... except NameError:
|
26
|
+
... count = 0
|
27
|
+
... total = 0
|
28
|
+
...
|
29
|
+
... count += 1
|
30
|
+
... total += input_value
|
31
|
+
... average = total / count
|
32
|
+
...
|
33
|
+
... result = {
|
34
|
+
... 'count': count,
|
35
|
+
... 'total': total,
|
36
|
+
... 'average': average,
|
37
|
+
... 'converged': average > 10.0
|
38
|
+
... }
|
39
|
+
... '''
|
40
|
+
... )
|
41
|
+
|
42
|
+
Function integration:
|
43
|
+
|
44
|
+
>>> def custom_processor(data: dict) -> dict:
|
45
|
+
... return {'processed': data['value'] * 2}
|
46
|
+
>>> node = PythonCodeNode.from_function(custom_processor)
|
21
47
|
"""
|
22
48
|
|
23
49
|
import ast
|
24
50
|
import importlib.util
|
25
51
|
import inspect
|
26
52
|
import logging
|
53
|
+
import resource
|
27
54
|
import traceback
|
28
55
|
from pathlib import Path
|
29
56
|
from typing import Any, Callable, Dict, List, Optional, Type, Union, get_type_hints
|
@@ -34,6 +61,14 @@ from kailash.sdk_exceptions import (
|
|
34
61
|
NodeExecutionError,
|
35
62
|
SafetyViolationError,
|
36
63
|
)
|
64
|
+
from kailash.security import (
|
65
|
+
ExecutionTimeoutError,
|
66
|
+
MemoryLimitError,
|
67
|
+
SecurityConfig,
|
68
|
+
execution_timeout,
|
69
|
+
get_security_config,
|
70
|
+
validate_node_parameters,
|
71
|
+
)
|
37
72
|
|
38
73
|
logger = logging.getLogger(__name__)
|
39
74
|
|
@@ -48,7 +83,9 @@ ALLOWED_MODULES = {
|
|
48
83
|
"collections",
|
49
84
|
"functools",
|
50
85
|
"string",
|
86
|
+
"time",
|
51
87
|
"re",
|
88
|
+
"os", # For file operations in cycles
|
52
89
|
"pandas",
|
53
90
|
"numpy",
|
54
91
|
"scipy",
|
@@ -94,7 +131,7 @@ class SafeCodeChecker(ast.NodeVisitor):
|
|
94
131
|
if isinstance(node.func, ast.Name):
|
95
132
|
func_name = node.func.id
|
96
133
|
# Check for dangerous built-in functions
|
97
|
-
if func_name in {"eval", "exec", "compile"
|
134
|
+
if func_name in {"eval", "exec", "compile"}:
|
98
135
|
self.violations.append(f"Call to '{func_name}' is not allowed")
|
99
136
|
elif isinstance(node.func, ast.Attribute):
|
100
137
|
# Check for dangerous method calls
|
@@ -126,14 +163,20 @@ class CodeExecutor:
|
|
126
163
|
- Memory limits (future enhancement)
|
127
164
|
"""
|
128
165
|
|
129
|
-
def __init__(
|
166
|
+
def __init__(
|
167
|
+
self,
|
168
|
+
allowed_modules: Optional[List[str]] = None,
|
169
|
+
security_config: Optional[SecurityConfig] = None,
|
170
|
+
):
|
130
171
|
"""Initialize the code executor.
|
131
172
|
|
132
173
|
Args:
|
133
174
|
allowed_modules: List of module names allowed for import.
|
134
175
|
Defaults to common data processing modules.
|
176
|
+
security_config: Security configuration for execution limits.
|
135
177
|
"""
|
136
178
|
self.allowed_modules = set(allowed_modules or ALLOWED_MODULES)
|
179
|
+
self.security_config = security_config or get_security_config()
|
137
180
|
self.allowed_builtins = {
|
138
181
|
"abs",
|
139
182
|
"all",
|
@@ -159,6 +202,43 @@ class CodeExecutor:
|
|
159
202
|
"type",
|
160
203
|
"zip",
|
161
204
|
"print", # Allow print for debugging
|
205
|
+
"hasattr", # For attribute checking
|
206
|
+
# Exception classes for proper error handling
|
207
|
+
"Exception",
|
208
|
+
"ValueError",
|
209
|
+
"TypeError",
|
210
|
+
"KeyError",
|
211
|
+
"NameError",
|
212
|
+
"AttributeError",
|
213
|
+
"IndexError",
|
214
|
+
"RuntimeError",
|
215
|
+
"StopIteration",
|
216
|
+
"ImportError",
|
217
|
+
"OSError",
|
218
|
+
"IOError",
|
219
|
+
"FileNotFoundError",
|
220
|
+
"ZeroDivisionError",
|
221
|
+
"ArithmeticError",
|
222
|
+
"AssertionError",
|
223
|
+
# Useful built-ins for data science
|
224
|
+
"set",
|
225
|
+
"frozenset",
|
226
|
+
"bytes",
|
227
|
+
"bytearray",
|
228
|
+
"complex",
|
229
|
+
"divmod",
|
230
|
+
"pow",
|
231
|
+
"hex",
|
232
|
+
"oct",
|
233
|
+
"bin",
|
234
|
+
"format",
|
235
|
+
"ord",
|
236
|
+
"chr",
|
237
|
+
"repr",
|
238
|
+
"vars", # For debugging
|
239
|
+
"getattr", # For attribute access
|
240
|
+
"open", # For file operations
|
241
|
+
"__import__", # For imports (controlled by ALLOWED_MODULES)
|
162
242
|
}
|
163
243
|
self._execution_namespace = {}
|
164
244
|
|
@@ -195,10 +275,15 @@ class CodeExecutor:
|
|
195
275
|
|
196
276
|
Raises:
|
197
277
|
NodeExecutionError: If code execution fails
|
278
|
+
ExecutionTimeoutError: If execution exceeds timeout
|
279
|
+
MemoryLimitError: If memory usage exceeds limit
|
198
280
|
"""
|
199
281
|
# Check code safety first
|
200
282
|
self.check_code_safety(code)
|
201
283
|
|
284
|
+
# Sanitize inputs
|
285
|
+
sanitized_inputs = validate_node_parameters(inputs, self.security_config)
|
286
|
+
|
202
287
|
# Create isolated namespace
|
203
288
|
import builtins
|
204
289
|
|
@@ -218,19 +303,42 @@ class CodeExecutor:
|
|
218
303
|
except ImportError:
|
219
304
|
logger.warning(f"Module {module_name} not available")
|
220
305
|
|
221
|
-
# Add inputs
|
222
|
-
namespace.update(
|
306
|
+
# Add sanitized inputs
|
307
|
+
namespace.update(sanitized_inputs)
|
223
308
|
|
224
309
|
try:
|
225
|
-
|
310
|
+
# Set memory limit if supported (Unix systems)
|
311
|
+
if hasattr(resource, "RLIMIT_AS") and self.security_config.memory_limit:
|
312
|
+
try:
|
313
|
+
resource.setrlimit(
|
314
|
+
resource.RLIMIT_AS,
|
315
|
+
(
|
316
|
+
self.security_config.memory_limit,
|
317
|
+
self.security_config.memory_limit,
|
318
|
+
),
|
319
|
+
)
|
320
|
+
except (OSError, ValueError):
|
321
|
+
logger.warning(
|
322
|
+
"Could not set memory limit - continuing without limit"
|
323
|
+
)
|
324
|
+
|
325
|
+
# Execute with timeout
|
326
|
+
with execution_timeout(
|
327
|
+
self.security_config.execution_timeout, self.security_config
|
328
|
+
):
|
329
|
+
exec(code, namespace)
|
226
330
|
# Return all non-private variables that weren't in inputs
|
227
331
|
return {
|
228
332
|
k: v
|
229
333
|
for k, v in namespace.items()
|
230
334
|
if not k.startswith("_")
|
231
|
-
and k not in
|
335
|
+
and k not in sanitized_inputs
|
232
336
|
and k not in self.allowed_modules
|
233
337
|
}
|
338
|
+
except ExecutionTimeoutError:
|
339
|
+
raise
|
340
|
+
except MemoryLimitError:
|
341
|
+
raise
|
234
342
|
except Exception as e:
|
235
343
|
error_msg = f"Code execution failed: {str(e)}\n{traceback.format_exc()}"
|
236
344
|
logger.error(error_msg)
|
@@ -328,7 +436,7 @@ class FunctionWrapper:
|
|
328
436
|
"""Extract output type from function signature.
|
329
437
|
|
330
438
|
Returns:
|
331
|
-
Return type annotation or Any
|
439
|
+
Return type annotation or Any.
|
332
440
|
"""
|
333
441
|
return self.type_hints.get("return", Any)
|
334
442
|
|
kailash/nodes/data/readers.py
CHANGED
@@ -33,6 +33,7 @@ import json
|
|
33
33
|
from typing import Any, Dict
|
34
34
|
|
35
35
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
36
|
+
from kailash.security import safe_open, validate_file_path
|
36
37
|
|
37
38
|
|
38
39
|
@register_node()
|
@@ -245,7 +246,7 @@ class CSVReaderNode(Node):
|
|
245
246
|
- Analyzers can process row-by-row
|
246
247
|
- data_indexed is useful for lookups and joins
|
247
248
|
"""
|
248
|
-
file_path = kwargs
|
249
|
+
file_path = kwargs.get("file_path") or self.config.get("file_path")
|
249
250
|
headers = kwargs.get("headers", True)
|
250
251
|
delimiter = kwargs.get("delimiter", ",")
|
251
252
|
index_column = kwargs.get("index_column")
|
@@ -253,7 +254,10 @@ class CSVReaderNode(Node):
|
|
253
254
|
data = []
|
254
255
|
data_indexed = {}
|
255
256
|
|
256
|
-
|
257
|
+
# Validate file path for security
|
258
|
+
validated_path = validate_file_path(file_path, operation="CSV read")
|
259
|
+
|
260
|
+
with safe_open(validated_path, "r", encoding="utf-8") as f:
|
257
261
|
reader = csv.reader(f, delimiter=delimiter)
|
258
262
|
|
259
263
|
if headers:
|
@@ -402,9 +406,12 @@ class JSONReaderNode(Node):
|
|
402
406
|
- Compatible with JSONWriter for round-trip
|
403
407
|
- Transform nodes can process nested data
|
404
408
|
"""
|
405
|
-
file_path = kwargs
|
409
|
+
file_path = kwargs.get("file_path") or self.config.get("file_path")
|
410
|
+
|
411
|
+
# Validate file path for security
|
412
|
+
validated_path = validate_file_path(file_path, operation="JSON read")
|
406
413
|
|
407
|
-
with
|
414
|
+
with safe_open(validated_path, "r", encoding="utf-8") as f:
|
408
415
|
data = json.load(f)
|
409
416
|
|
410
417
|
return {"data": data}
|
@@ -540,10 +547,13 @@ class TextReaderNode(Node):
|
|
540
547
|
- Pattern nodes can search content
|
541
548
|
- Writers can save processed text
|
542
549
|
"""
|
543
|
-
file_path = kwargs
|
550
|
+
file_path = kwargs.get("file_path") or self.config.get("file_path")
|
544
551
|
encoding = kwargs.get("encoding", "utf-8")
|
545
552
|
|
546
|
-
|
553
|
+
# Validate file path for security
|
554
|
+
validated_path = validate_file_path(file_path, operation="text read")
|
555
|
+
|
556
|
+
with safe_open(validated_path, "r", encoding=encoding) as f:
|
547
557
|
text = f.read()
|
548
558
|
|
549
559
|
return {"text": text}
|