kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1021 @@
|
|
1
|
+
"""Python code execution node implementation.
|
2
|
+
|
3
|
+
This module provides nodes that can execute arbitrary Python code, allowing users
|
4
|
+
to create custom processing logic without defining new node classes. It supports
|
5
|
+
both function-based and class-based code execution with automatic type inference
|
6
|
+
and error handling.
|
7
|
+
|
8
|
+
Design Principles:
|
9
|
+
1. Safety - Code execution is sandboxed with proper error handling
|
10
|
+
2. Flexibility - Support functions, classes, and inline code
|
11
|
+
3. Type Safety - Automatic type inference with validation
|
12
|
+
4. Composability - Works seamlessly with other nodes in workflows
|
13
|
+
5. Simplicity - Easy to use for non-technical users
|
14
|
+
|
15
|
+
Components:
|
16
|
+
- PythonCodeNode: Main node for code execution
|
17
|
+
- CodeExecutor: Safe code execution environment
|
18
|
+
- FunctionWrapper: Converts functions to nodes
|
19
|
+
- ClassWrapper: Converts classes to nodes
|
20
|
+
- SafeCodeChecker: AST-based security validation
|
21
|
+
"""
|
22
|
+
|
23
|
+
import ast
|
24
|
+
import importlib.util
|
25
|
+
import inspect
|
26
|
+
import logging
|
27
|
+
import traceback
|
28
|
+
from pathlib import Path
|
29
|
+
from typing import Any, Callable, Dict, List, Optional, Type, Union, get_type_hints
|
30
|
+
|
31
|
+
from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
|
32
|
+
from kailash.sdk_exceptions import (
|
33
|
+
NodeConfigurationError,
|
34
|
+
NodeExecutionError,
|
35
|
+
SafetyViolationError,
|
36
|
+
)
|
37
|
+
|
38
|
+
logger = logging.getLogger(__name__)
|
39
|
+
|
40
|
+
# Module whitelist for safety
|
41
|
+
ALLOWED_MODULES = {
|
42
|
+
"math",
|
43
|
+
"statistics",
|
44
|
+
"datetime",
|
45
|
+
"json",
|
46
|
+
"random",
|
47
|
+
"itertools",
|
48
|
+
"collections",
|
49
|
+
"functools",
|
50
|
+
"string",
|
51
|
+
"re",
|
52
|
+
"pandas",
|
53
|
+
"numpy",
|
54
|
+
"scipy",
|
55
|
+
"sklearn",
|
56
|
+
"matplotlib",
|
57
|
+
"seaborn",
|
58
|
+
"plotly",
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
class SafeCodeChecker(ast.NodeVisitor):
|
63
|
+
"""AST visitor to check code safety.
|
64
|
+
|
65
|
+
This class analyzes Python code to detect potentially dangerous operations
|
66
|
+
before execution. It helps prevent security vulnerabilities and system abuse.
|
67
|
+
"""
|
68
|
+
|
69
|
+
def __init__(self):
|
70
|
+
self.violations = []
|
71
|
+
|
72
|
+
def visit_Import(self, node):
|
73
|
+
"""Check import statements."""
|
74
|
+
for alias in node.names:
|
75
|
+
module_name = alias.name.split(".")[0]
|
76
|
+
if module_name not in ALLOWED_MODULES:
|
77
|
+
self.violations.append(
|
78
|
+
f"Import of module '{module_name}' is not allowed"
|
79
|
+
)
|
80
|
+
self.generic_visit(node)
|
81
|
+
|
82
|
+
def visit_ImportFrom(self, node):
|
83
|
+
"""Check from imports."""
|
84
|
+
if node.module:
|
85
|
+
module_name = node.module.split(".")[0]
|
86
|
+
if module_name not in ALLOWED_MODULES:
|
87
|
+
self.violations.append(
|
88
|
+
f"Import from module '{module_name}' is not allowed"
|
89
|
+
)
|
90
|
+
self.generic_visit(node)
|
91
|
+
|
92
|
+
def visit_Call(self, node):
|
93
|
+
"""Check function calls."""
|
94
|
+
if isinstance(node.func, ast.Name):
|
95
|
+
func_name = node.func.id
|
96
|
+
# Check for dangerous built-in functions
|
97
|
+
if func_name in {"eval", "exec", "compile", "__import__"}:
|
98
|
+
self.violations.append(f"Call to '{func_name}' is not allowed")
|
99
|
+
elif isinstance(node.func, ast.Attribute):
|
100
|
+
# Check for dangerous method calls
|
101
|
+
if node.func.attr in {"system", "popen"}:
|
102
|
+
self.violations.append(
|
103
|
+
f"Call to method '{node.func.attr}' is not allowed"
|
104
|
+
)
|
105
|
+
self.generic_visit(node)
|
106
|
+
|
107
|
+
|
108
|
+
class CodeExecutor:
|
109
|
+
"""Safe executor for Python code.
|
110
|
+
|
111
|
+
This class provides a sandboxed environment for executing arbitrary Python code
|
112
|
+
with proper error handling and resource management. It supports both string-based
|
113
|
+
code and function/class objects.
|
114
|
+
|
115
|
+
Design Purpose:
|
116
|
+
- Isolate code execution from the main system
|
117
|
+
- Provide comprehensive error reporting
|
118
|
+
- Support dynamic code loading and execution
|
119
|
+
- Enable code inspection and analysis
|
120
|
+
|
121
|
+
Security Considerations:
|
122
|
+
- Limited module imports (configurable whitelist)
|
123
|
+
- AST-based code safety checking
|
124
|
+
- Restricted built-in functions
|
125
|
+
- Execution timeout (future enhancement)
|
126
|
+
- Memory limits (future enhancement)
|
127
|
+
"""
|
128
|
+
|
129
|
+
def __init__(self, allowed_modules: Optional[List[str]] = None):
|
130
|
+
"""Initialize the code executor.
|
131
|
+
|
132
|
+
Args:
|
133
|
+
allowed_modules: List of module names allowed for import.
|
134
|
+
Defaults to common data processing modules.
|
135
|
+
"""
|
136
|
+
self.allowed_modules = set(allowed_modules or ALLOWED_MODULES)
|
137
|
+
self.allowed_builtins = {
|
138
|
+
"abs",
|
139
|
+
"all",
|
140
|
+
"any",
|
141
|
+
"bool",
|
142
|
+
"dict",
|
143
|
+
"enumerate",
|
144
|
+
"filter",
|
145
|
+
"float",
|
146
|
+
"int",
|
147
|
+
"len",
|
148
|
+
"list",
|
149
|
+
"map",
|
150
|
+
"max",
|
151
|
+
"min",
|
152
|
+
"range",
|
153
|
+
"round",
|
154
|
+
"sorted",
|
155
|
+
"str",
|
156
|
+
"sum",
|
157
|
+
"tuple",
|
158
|
+
"type",
|
159
|
+
"zip",
|
160
|
+
"print", # Allow print for debugging
|
161
|
+
}
|
162
|
+
self._execution_namespace = {}
|
163
|
+
|
164
|
+
def check_code_safety(self, code: str) -> None:
|
165
|
+
"""Check if code is safe to execute.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
code: Python code to check
|
169
|
+
|
170
|
+
Raises:
|
171
|
+
SafetyViolationError: If code contains unsafe operations
|
172
|
+
"""
|
173
|
+
try:
|
174
|
+
tree = ast.parse(code)
|
175
|
+
checker = SafeCodeChecker()
|
176
|
+
checker.visit(tree)
|
177
|
+
|
178
|
+
if checker.violations:
|
179
|
+
raise SafetyViolationError(
|
180
|
+
f"Code contains unsafe operations: {'; '.join(checker.violations)}"
|
181
|
+
)
|
182
|
+
except SyntaxError as e:
|
183
|
+
raise NodeExecutionError(f"Invalid Python syntax: {e}")
|
184
|
+
|
185
|
+
def execute_code(self, code: str, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
186
|
+
"""Execute Python code with given inputs.
|
187
|
+
|
188
|
+
Args:
|
189
|
+
code: Python code to execute
|
190
|
+
inputs: Dictionary of input variables
|
191
|
+
|
192
|
+
Returns:
|
193
|
+
Dictionary of variables after execution
|
194
|
+
|
195
|
+
Raises:
|
196
|
+
NodeExecutionError: If code execution fails
|
197
|
+
"""
|
198
|
+
# Check code safety first
|
199
|
+
self.check_code_safety(code)
|
200
|
+
|
201
|
+
# Create isolated namespace
|
202
|
+
import builtins
|
203
|
+
|
204
|
+
namespace = {
|
205
|
+
"__builtins__": {
|
206
|
+
name: getattr(builtins, name)
|
207
|
+
for name in self.allowed_builtins
|
208
|
+
if hasattr(builtins, name)
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
# Add allowed modules
|
213
|
+
for module_name in self.allowed_modules:
|
214
|
+
try:
|
215
|
+
module = importlib.import_module(module_name)
|
216
|
+
namespace[module_name] = module
|
217
|
+
except ImportError:
|
218
|
+
logger.warning(f"Module {module_name} not available")
|
219
|
+
|
220
|
+
# Add inputs
|
221
|
+
namespace.update(inputs)
|
222
|
+
|
223
|
+
try:
|
224
|
+
exec(code, namespace)
|
225
|
+
# Return all non-private variables that weren't in inputs
|
226
|
+
return {
|
227
|
+
k: v
|
228
|
+
for k, v in namespace.items()
|
229
|
+
if not k.startswith("_")
|
230
|
+
and k not in inputs
|
231
|
+
and k not in self.allowed_modules
|
232
|
+
}
|
233
|
+
except Exception as e:
|
234
|
+
error_msg = f"Code execution failed: {str(e)}\n{traceback.format_exc()}"
|
235
|
+
logger.error(error_msg)
|
236
|
+
raise NodeExecutionError(error_msg)
|
237
|
+
|
238
|
+
def execute_function(self, func: Callable, inputs: Dict[str, Any]) -> Any:
|
239
|
+
"""Execute a Python function with given inputs.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
func: Function to execute
|
243
|
+
inputs: Dictionary of input arguments
|
244
|
+
|
245
|
+
Returns:
|
246
|
+
Function return value
|
247
|
+
|
248
|
+
Raises:
|
249
|
+
NodeExecutionError: If function execution fails
|
250
|
+
"""
|
251
|
+
try:
|
252
|
+
# Get function signature
|
253
|
+
sig = inspect.signature(func)
|
254
|
+
|
255
|
+
# Map inputs to function parameters
|
256
|
+
kwargs = {}
|
257
|
+
for param_name, param in sig.parameters.items():
|
258
|
+
if param_name in inputs:
|
259
|
+
kwargs[param_name] = inputs[param_name]
|
260
|
+
elif param.default is not param.empty:
|
261
|
+
# Use default value
|
262
|
+
continue
|
263
|
+
else:
|
264
|
+
raise NodeExecutionError(
|
265
|
+
f"Missing required parameter: {param_name}"
|
266
|
+
)
|
267
|
+
|
268
|
+
# Execute function
|
269
|
+
return func(**kwargs)
|
270
|
+
|
271
|
+
except Exception as e:
|
272
|
+
error_msg = f"Function execution failed: {str(e)}\n{traceback.format_exc()}"
|
273
|
+
logger.error(error_msg)
|
274
|
+
raise NodeExecutionError(error_msg)
|
275
|
+
|
276
|
+
|
277
|
+
class FunctionWrapper:
|
278
|
+
"""Wrapper for converting Python functions to nodes.
|
279
|
+
|
280
|
+
This class analyzes a Python function's signature and creates a node
|
281
|
+
that can execute the function within a workflow. It handles type inference,
|
282
|
+
parameter validation, and error management.
|
283
|
+
|
284
|
+
Example:
|
285
|
+
def process(data: pd.DataFrame) -> pd.DataFrame:
|
286
|
+
return data.dropna()
|
287
|
+
|
288
|
+
wrapper = FunctionWrapper(process)
|
289
|
+
node = wrapper.to_node(name="dropna_processor")
|
290
|
+
"""
|
291
|
+
|
292
|
+
def __init__(self, func: Callable, executor: Optional[CodeExecutor] = None):
|
293
|
+
"""Initialize the function wrapper.
|
294
|
+
|
295
|
+
Args:
|
296
|
+
func: Python function to wrap
|
297
|
+
executor: Code executor instance (optional)
|
298
|
+
"""
|
299
|
+
self.func = func
|
300
|
+
self.executor = executor or CodeExecutor()
|
301
|
+
self.signature = inspect.signature(func)
|
302
|
+
self.name = func.__name__
|
303
|
+
self.doc = inspect.getdoc(func) or ""
|
304
|
+
try:
|
305
|
+
self.type_hints = get_type_hints(func)
|
306
|
+
except (NameError, TypeError):
|
307
|
+
# Handle cases where type hints can't be resolved
|
308
|
+
self.type_hints = {}
|
309
|
+
|
310
|
+
def get_input_types(self) -> Dict[str, Type]:
|
311
|
+
"""Extract input types from function signature.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
Dictionary mapping parameter names to types
|
315
|
+
"""
|
316
|
+
input_types = {}
|
317
|
+
for param_name, param in self.signature.parameters.items():
|
318
|
+
# Skip self parameter for class methods
|
319
|
+
if param_name == "self":
|
320
|
+
continue
|
321
|
+
|
322
|
+
param_type = self.type_hints.get(param_name, Any)
|
323
|
+
input_types[param_name] = param_type
|
324
|
+
return input_types
|
325
|
+
|
326
|
+
def get_output_type(self) -> Type:
|
327
|
+
"""Extract output type from function signature.
|
328
|
+
|
329
|
+
Returns:
|
330
|
+
Return type annotation or Any
|
331
|
+
"""
|
332
|
+
return self.type_hints.get("return", Any)
|
333
|
+
|
334
|
+
def execute(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
335
|
+
"""Execute the wrapped function."""
|
336
|
+
result = self.executor.execute_function(self.func, inputs)
|
337
|
+
|
338
|
+
# Wrap non-dict results in a dict
|
339
|
+
if not isinstance(result, dict):
|
340
|
+
result = {"result": result}
|
341
|
+
|
342
|
+
return result
|
343
|
+
|
344
|
+
def to_node(
|
345
|
+
self,
|
346
|
+
name: Optional[str] = None,
|
347
|
+
description: Optional[str] = None,
|
348
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
349
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
350
|
+
) -> "PythonCodeNode":
|
351
|
+
"""Convert function to a PythonCodeNode.
|
352
|
+
|
353
|
+
Args:
|
354
|
+
name: Node name (defaults to function name)
|
355
|
+
description: Node description (defaults to function docstring)
|
356
|
+
input_schema: Explicit input parameter schema for validation
|
357
|
+
output_schema: Explicit output parameter schema for validation
|
358
|
+
|
359
|
+
Returns:
|
360
|
+
PythonCodeNode instance
|
361
|
+
"""
|
362
|
+
return PythonCodeNode(
|
363
|
+
name=name or self.name,
|
364
|
+
function=self.func,
|
365
|
+
description=description or self.doc,
|
366
|
+
input_types=self.get_input_types(),
|
367
|
+
output_type=self.get_output_type(),
|
368
|
+
input_schema=input_schema,
|
369
|
+
output_schema=output_schema,
|
370
|
+
)
|
371
|
+
|
372
|
+
|
373
|
+
class ClassWrapper:
|
374
|
+
"""Wrapper for converting Python classes to stateful nodes.
|
375
|
+
|
376
|
+
This class analyzes a Python class and creates a node that maintains
|
377
|
+
state between executions. Useful for complex processing that requires
|
378
|
+
initialization or accumulated state.
|
379
|
+
|
380
|
+
Example:
|
381
|
+
class Accumulator:
|
382
|
+
def __init__(self):
|
383
|
+
self.total = 0
|
384
|
+
|
385
|
+
def process(self, value: float) -> float:
|
386
|
+
self.total += value
|
387
|
+
return self.total
|
388
|
+
|
389
|
+
wrapper = ClassWrapper(Accumulator)
|
390
|
+
node = wrapper.to_node(name="accumulator")
|
391
|
+
"""
|
392
|
+
|
393
|
+
def __init__(
|
394
|
+
self,
|
395
|
+
cls: Type,
|
396
|
+
method_name: Optional[str] = None,
|
397
|
+
executor: Optional[CodeExecutor] = None,
|
398
|
+
):
|
399
|
+
"""Initialize the class wrapper.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
cls: Python class to wrap
|
403
|
+
method_name: Method name to call (auto-detected if not provided)
|
404
|
+
executor: Code executor instance (optional)
|
405
|
+
"""
|
406
|
+
self.cls = cls
|
407
|
+
self.method_name = method_name
|
408
|
+
self.executor = executor or CodeExecutor()
|
409
|
+
self.name = cls.__name__
|
410
|
+
self.doc = inspect.getdoc(cls) or ""
|
411
|
+
self.instance = None
|
412
|
+
self._analyze_class()
|
413
|
+
|
414
|
+
def _analyze_class(self):
|
415
|
+
"""Analyze class structure to find processing method."""
|
416
|
+
if self.method_name:
|
417
|
+
# Use provided method name
|
418
|
+
if not hasattr(self.cls, self.method_name):
|
419
|
+
raise NodeConfigurationError(
|
420
|
+
f"Class {self.name} has no method '{self.method_name}'"
|
421
|
+
)
|
422
|
+
self.process_method = self.method_name
|
423
|
+
else:
|
424
|
+
# Look for common method names
|
425
|
+
process_methods = ["process", "execute", "run", "transform", "__call__"]
|
426
|
+
|
427
|
+
self.process_method = None
|
428
|
+
for method_name in process_methods:
|
429
|
+
if hasattr(self.cls, method_name):
|
430
|
+
method = getattr(self.cls, method_name)
|
431
|
+
if callable(method) and not method_name.startswith("_"):
|
432
|
+
self.process_method = method_name
|
433
|
+
break
|
434
|
+
|
435
|
+
if not self.process_method:
|
436
|
+
raise NodeConfigurationError(
|
437
|
+
f"Class {self.name} must have a process method "
|
438
|
+
f"(one of: {', '.join(process_methods)})"
|
439
|
+
)
|
440
|
+
|
441
|
+
# Get method and signature
|
442
|
+
method = getattr(self.cls, self.process_method)
|
443
|
+
if not method:
|
444
|
+
raise NodeConfigurationError(
|
445
|
+
f"Class {self.name} does not have method '{self.process_method}'"
|
446
|
+
)
|
447
|
+
|
448
|
+
self.method = method
|
449
|
+
self.signature = inspect.signature(method)
|
450
|
+
|
451
|
+
# Get type hints
|
452
|
+
try:
|
453
|
+
self.type_hints = get_type_hints(method)
|
454
|
+
except (TypeError, NameError):
|
455
|
+
# Handle descriptor objects like properties
|
456
|
+
self.type_hints = {}
|
457
|
+
|
458
|
+
def get_input_types(self) -> Dict[str, Type]:
|
459
|
+
"""Extract input types from method signature."""
|
460
|
+
input_types = {}
|
461
|
+
for param_name, param in self.signature.parameters.items():
|
462
|
+
# Skip self parameter
|
463
|
+
if param_name == "self":
|
464
|
+
continue
|
465
|
+
|
466
|
+
param_type = self.type_hints.get(param_name, Any)
|
467
|
+
input_types[param_name] = param_type
|
468
|
+
return input_types
|
469
|
+
|
470
|
+
def get_output_type(self) -> Type:
|
471
|
+
"""Extract output type from method signature."""
|
472
|
+
return self.type_hints.get("return", Any)
|
473
|
+
|
474
|
+
def execute(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
475
|
+
"""Execute the wrapped method."""
|
476
|
+
# Create instance if needed
|
477
|
+
if self.instance is None:
|
478
|
+
try:
|
479
|
+
self.instance = self.cls()
|
480
|
+
except Exception as e:
|
481
|
+
raise NodeExecutionError(
|
482
|
+
f"Failed to create instance of {self.cls.__name__}: {e}"
|
483
|
+
) from e
|
484
|
+
|
485
|
+
# Get the method from the instance
|
486
|
+
method = getattr(self.instance, self.process_method)
|
487
|
+
|
488
|
+
# Execute the method
|
489
|
+
result = self.executor.execute_function(method, inputs)
|
490
|
+
|
491
|
+
# Wrap non-dict results in a dict
|
492
|
+
if not isinstance(result, dict):
|
493
|
+
result = {"result": result}
|
494
|
+
|
495
|
+
return result
|
496
|
+
|
497
|
+
def to_node(
|
498
|
+
self,
|
499
|
+
name: Optional[str] = None,
|
500
|
+
description: Optional[str] = None,
|
501
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
502
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
503
|
+
) -> "PythonCodeNode":
|
504
|
+
"""Convert class to a PythonCodeNode.
|
505
|
+
|
506
|
+
Args:
|
507
|
+
name: Node name (defaults to class name)
|
508
|
+
description: Node description (defaults to class docstring)
|
509
|
+
input_schema: Explicit input parameter schema for validation
|
510
|
+
output_schema: Explicit output parameter schema for validation
|
511
|
+
|
512
|
+
Returns:
|
513
|
+
PythonCodeNode instance
|
514
|
+
"""
|
515
|
+
return PythonCodeNode(
|
516
|
+
name=name or self.name,
|
517
|
+
class_type=self.cls,
|
518
|
+
process_method=self.process_method,
|
519
|
+
description=description or self.doc,
|
520
|
+
input_schema=input_schema,
|
521
|
+
output_schema=output_schema,
|
522
|
+
)
|
523
|
+
|
524
|
+
|
525
|
+
@register_node()
|
526
|
+
class PythonCodeNode(Node):
|
527
|
+
"""Node for executing arbitrary Python code.
|
528
|
+
|
529
|
+
This node allows users to execute custom Python code within a workflow.
|
530
|
+
It supports multiple input methods:
|
531
|
+
1. Direct code string execution
|
532
|
+
2. Function wrapping
|
533
|
+
3. Class wrapping
|
534
|
+
4. File-based code loading
|
535
|
+
|
536
|
+
Design Purpose:
|
537
|
+
- Provide maximum flexibility for custom logic
|
538
|
+
- Bridge gap between predefined nodes and custom requirements
|
539
|
+
- Enable rapid prototyping without node development
|
540
|
+
- Support both stateless and stateful processing
|
541
|
+
|
542
|
+
Key Features:
|
543
|
+
- Type inference from function signatures
|
544
|
+
- Safe code execution with error handling
|
545
|
+
- Support for external libraries
|
546
|
+
- State management for class-based nodes
|
547
|
+
- AST-based security validation
|
548
|
+
|
549
|
+
Example:
|
550
|
+
# Function-based node
|
551
|
+
def custom_filter(data: pd.DataFrame, threshold: float) -> pd.DataFrame:
|
552
|
+
return data[data['value'] > threshold]
|
553
|
+
|
554
|
+
node = PythonCodeNode.from_function(
|
555
|
+
func=custom_filter,
|
556
|
+
name="threshold_filter"
|
557
|
+
)
|
558
|
+
|
559
|
+
# Class-based stateful node
|
560
|
+
class MovingAverage:
|
561
|
+
def __init__(self, window_size: int = 3):
|
562
|
+
self.window_size = window_size
|
563
|
+
self.values = []
|
564
|
+
|
565
|
+
def process(self, value: float) -> float:
|
566
|
+
self.values.append(value)
|
567
|
+
if len(self.values) > self.window_size:
|
568
|
+
self.values.pop(0)
|
569
|
+
return sum(self.values) / len(self.values)
|
570
|
+
|
571
|
+
node = PythonCodeNode.from_class(
|
572
|
+
cls=MovingAverage,
|
573
|
+
name="moving_avg"
|
574
|
+
)
|
575
|
+
|
576
|
+
# Code string node
|
577
|
+
code = '''
|
578
|
+
result = []
|
579
|
+
for item in data:
|
580
|
+
if item > threshold:
|
581
|
+
result.append(item * 2)
|
582
|
+
'''
|
583
|
+
|
584
|
+
node = PythonCodeNode(
|
585
|
+
name="custom_processor",
|
586
|
+
code=code,
|
587
|
+
input_types={'data': list, 'threshold': float},
|
588
|
+
output_type=list
|
589
|
+
)
|
590
|
+
"""
|
591
|
+
|
592
|
+
def __init__(
|
593
|
+
self,
|
594
|
+
name: str,
|
595
|
+
code: Optional[str] = None,
|
596
|
+
function: Optional[Callable] = None,
|
597
|
+
class_type: Optional[Type] = None,
|
598
|
+
process_method: Optional[str] = None,
|
599
|
+
input_types: Optional[Dict[str, Type]] = None,
|
600
|
+
output_type: Optional[Type] = None,
|
601
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
602
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
603
|
+
description: Optional[str] = None,
|
604
|
+
**kwargs,
|
605
|
+
):
|
606
|
+
"""Initialize a Python code node.
|
607
|
+
|
608
|
+
Args:
|
609
|
+
name: Node name
|
610
|
+
code: Python code string to execute
|
611
|
+
function: Python function to wrap
|
612
|
+
class_type: Python class to instantiate
|
613
|
+
process_method: Method name for class-based execution
|
614
|
+
input_types: Dictionary of input names to types
|
615
|
+
output_type: Expected output type
|
616
|
+
input_schema: Explicit input parameter schema for validation
|
617
|
+
output_schema: Explicit output parameter schema for validation
|
618
|
+
description: Node description
|
619
|
+
**kwargs: Additional node parameters
|
620
|
+
"""
|
621
|
+
# Validate inputs
|
622
|
+
if not any([code, function, class_type]):
|
623
|
+
raise NodeConfigurationError(
|
624
|
+
"Must provide either code string, function, or class"
|
625
|
+
)
|
626
|
+
|
627
|
+
if sum([bool(code), bool(function), bool(class_type)]) > 1:
|
628
|
+
raise NodeConfigurationError(
|
629
|
+
"Can only provide one of: code, function, or class"
|
630
|
+
)
|
631
|
+
|
632
|
+
self.code = code
|
633
|
+
self.function = function
|
634
|
+
self.class_type = class_type
|
635
|
+
self.process_method = process_method
|
636
|
+
self.input_types = input_types or {}
|
637
|
+
self.output_type = output_type or Any
|
638
|
+
self._input_schema = input_schema
|
639
|
+
self._output_schema = output_schema
|
640
|
+
|
641
|
+
# For class-based nodes, maintain instance
|
642
|
+
self.instance = None
|
643
|
+
if self.class_type:
|
644
|
+
self.instance = self.class_type()
|
645
|
+
|
646
|
+
# Initialize executor
|
647
|
+
self.executor = CodeExecutor()
|
648
|
+
|
649
|
+
# Create metadata (avoiding conflicts with kwargs)
|
650
|
+
if "metadata" not in kwargs:
|
651
|
+
kwargs["metadata"] = NodeMetadata(
|
652
|
+
id=name.replace(" ", "_").lower(),
|
653
|
+
name=name,
|
654
|
+
description=description or "Custom Python code node",
|
655
|
+
tags={"custom", "python", "code"},
|
656
|
+
version="1.0.0",
|
657
|
+
)
|
658
|
+
|
659
|
+
# Pass kwargs to parent
|
660
|
+
super().__init__(**kwargs)
|
661
|
+
|
662
|
+
def _validate_config(self):
|
663
|
+
"""Override config validation for dynamic parameters.
|
664
|
+
|
665
|
+
PythonCodeNode has dynamic parameters based on the wrapped function/class,
|
666
|
+
so we skip the base class validation at initialization time.
|
667
|
+
"""
|
668
|
+
# Skip validation for python code nodes to avoid complex type issues
|
669
|
+
if not hasattr(self, "_skip_validation"):
|
670
|
+
self._skip_validation = True
|
671
|
+
|
672
|
+
def get_parameters(self) -> Dict[str, "NodeParameter"]:
|
673
|
+
"""Define the parameters this node accepts.
|
674
|
+
|
675
|
+
Returns:
|
676
|
+
Dictionary mapping parameter names to their definitions
|
677
|
+
"""
|
678
|
+
# Use explicit input schema if provided
|
679
|
+
if self._input_schema:
|
680
|
+
return self._input_schema
|
681
|
+
|
682
|
+
# Otherwise, generate schema from input types or function/class analysis
|
683
|
+
parameters = {}
|
684
|
+
|
685
|
+
# Add parameters from input_types
|
686
|
+
for name, type_ in self.input_types.items():
|
687
|
+
# Use Any type for complex types to avoid validation issues
|
688
|
+
param_type = Any if hasattr(type_, "__origin__") else type_
|
689
|
+
|
690
|
+
parameters[name] = NodeParameter(
|
691
|
+
name=name,
|
692
|
+
type=param_type,
|
693
|
+
required=True,
|
694
|
+
description=f"Input parameter {name}",
|
695
|
+
)
|
696
|
+
|
697
|
+
# If we have a function/class, extract parameter info
|
698
|
+
if self.function:
|
699
|
+
wrapper = FunctionWrapper(self.function, self.executor)
|
700
|
+
for name, type_ in wrapper.get_input_types().items():
|
701
|
+
if name not in parameters:
|
702
|
+
# Use Any type for complex types to avoid validation issues
|
703
|
+
param_type = Any if hasattr(type_, "__origin__") else type_
|
704
|
+
|
705
|
+
parameters[name] = NodeParameter(
|
706
|
+
name=name,
|
707
|
+
type=param_type,
|
708
|
+
required=True,
|
709
|
+
description=f"Input parameter {name}",
|
710
|
+
)
|
711
|
+
elif self.class_type and self.process_method:
|
712
|
+
wrapper = ClassWrapper(
|
713
|
+
self.class_type, self.process_method or "process", self.executor
|
714
|
+
)
|
715
|
+
for name, type_ in wrapper.get_input_types().items():
|
716
|
+
if name not in parameters:
|
717
|
+
# Use Any type for complex types to avoid validation issues
|
718
|
+
param_type = Any if hasattr(type_, "__origin__") else type_
|
719
|
+
|
720
|
+
parameters[name] = NodeParameter(
|
721
|
+
name=name,
|
722
|
+
type=param_type,
|
723
|
+
required=True,
|
724
|
+
description=f"Input parameter {name}",
|
725
|
+
)
|
726
|
+
|
727
|
+
return parameters
|
728
|
+
|
729
|
+
def get_output_schema(self) -> Dict[str, "NodeParameter"]:
|
730
|
+
"""Define output parameters for this node.
|
731
|
+
|
732
|
+
Returns:
|
733
|
+
Dictionary mapping output names to their parameter definitions
|
734
|
+
"""
|
735
|
+
# Return explicit output schema if provided
|
736
|
+
if self._output_schema:
|
737
|
+
return self._output_schema
|
738
|
+
|
739
|
+
# Otherwise, return default result schema
|
740
|
+
return {
|
741
|
+
"result": NodeParameter(
|
742
|
+
name="result",
|
743
|
+
type=Any, # Use Any instead of self.output_type to avoid validation issues
|
744
|
+
required=True,
|
745
|
+
description="Output result",
|
746
|
+
)
|
747
|
+
}
|
748
|
+
|
749
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
750
|
+
"""Execute the node's logic.
|
751
|
+
|
752
|
+
Args:
|
753
|
+
**kwargs: Validated input data
|
754
|
+
|
755
|
+
Returns:
|
756
|
+
Dictionary of outputs
|
757
|
+
"""
|
758
|
+
try:
|
759
|
+
if self.code:
|
760
|
+
# Execute code string
|
761
|
+
outputs = self.executor.execute_code(self.code, kwargs)
|
762
|
+
# Return 'result' variable if it exists, otherwise all outputs
|
763
|
+
if "result" in outputs:
|
764
|
+
return {"result": outputs["result"]}
|
765
|
+
return outputs
|
766
|
+
|
767
|
+
elif self.function:
|
768
|
+
# Execute function
|
769
|
+
wrapper = FunctionWrapper(self.function, self.executor)
|
770
|
+
return wrapper.execute(kwargs)
|
771
|
+
|
772
|
+
elif self.class_type:
|
773
|
+
# Execute class method
|
774
|
+
wrapper = ClassWrapper(
|
775
|
+
self.class_type, self.process_method or "process", self.executor
|
776
|
+
)
|
777
|
+
return wrapper.execute(kwargs)
|
778
|
+
|
779
|
+
else:
|
780
|
+
raise NodeExecutionError("No execution method available")
|
781
|
+
|
782
|
+
except NodeExecutionError:
|
783
|
+
raise
|
784
|
+
except Exception as e:
|
785
|
+
logger.error(f"Python code execution failed: {e}")
|
786
|
+
raise NodeExecutionError(f"Execution failed: {str(e)}")
|
787
|
+
|
788
|
+
@classmethod
|
789
|
+
def from_function(
|
790
|
+
cls,
|
791
|
+
func: Callable,
|
792
|
+
name: Optional[str] = None,
|
793
|
+
description: Optional[str] = None,
|
794
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
795
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
796
|
+
**kwargs,
|
797
|
+
) -> "PythonCodeNode":
|
798
|
+
"""Create a node from a Python function.
|
799
|
+
|
800
|
+
Args:
|
801
|
+
func: Python function to wrap
|
802
|
+
name: Node name (defaults to function name)
|
803
|
+
description: Node description
|
804
|
+
input_schema: Explicit input parameter schema for validation
|
805
|
+
output_schema: Explicit output parameter schema for validation
|
806
|
+
**kwargs: Additional node parameters
|
807
|
+
|
808
|
+
Returns:
|
809
|
+
PythonCodeNode instance
|
810
|
+
"""
|
811
|
+
# Extract type information
|
812
|
+
wrapper = FunctionWrapper(func, CodeExecutor())
|
813
|
+
input_types = wrapper.get_input_types()
|
814
|
+
output_type = wrapper.get_output_type()
|
815
|
+
|
816
|
+
return cls(
|
817
|
+
name=name or func.__name__,
|
818
|
+
function=func,
|
819
|
+
input_types=input_types,
|
820
|
+
output_type=output_type,
|
821
|
+
input_schema=input_schema,
|
822
|
+
output_schema=output_schema,
|
823
|
+
description=description or func.__doc__,
|
824
|
+
**kwargs,
|
825
|
+
)
|
826
|
+
|
827
|
+
@classmethod
|
828
|
+
def from_class(
|
829
|
+
cls,
|
830
|
+
class_type: Type,
|
831
|
+
process_method: Optional[str] = None,
|
832
|
+
name: Optional[str] = None,
|
833
|
+
description: Optional[str] = None,
|
834
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
835
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
836
|
+
**kwargs,
|
837
|
+
) -> "PythonCodeNode":
|
838
|
+
"""Create a node from a Python class.
|
839
|
+
|
840
|
+
Args:
|
841
|
+
class_type: Python class to wrap
|
842
|
+
process_method: Method name for processing (auto-detected if not provided)
|
843
|
+
name: Node name (defaults to class name)
|
844
|
+
description: Node description
|
845
|
+
input_schema: Explicit input parameter schema for validation
|
846
|
+
output_schema: Explicit output parameter schema for validation
|
847
|
+
**kwargs: Additional node parameters
|
848
|
+
|
849
|
+
Returns:
|
850
|
+
PythonCodeNode instance
|
851
|
+
"""
|
852
|
+
# Extract type information
|
853
|
+
wrapper = ClassWrapper(class_type, process_method, CodeExecutor())
|
854
|
+
input_types = wrapper.get_input_types()
|
855
|
+
output_type = wrapper.get_output_type()
|
856
|
+
|
857
|
+
return cls(
|
858
|
+
name=name or class_type.__name__,
|
859
|
+
class_type=class_type,
|
860
|
+
process_method=wrapper.process_method,
|
861
|
+
input_types=input_types,
|
862
|
+
output_type=output_type,
|
863
|
+
input_schema=input_schema,
|
864
|
+
output_schema=output_schema,
|
865
|
+
description=description or class_type.__doc__,
|
866
|
+
**kwargs,
|
867
|
+
)
|
868
|
+
|
869
|
+
@classmethod
|
870
|
+
def from_file(
|
871
|
+
cls,
|
872
|
+
file_path: Union[str, Path],
|
873
|
+
function_name: Optional[str] = None,
|
874
|
+
class_name: Optional[str] = None,
|
875
|
+
name: Optional[str] = None,
|
876
|
+
description: Optional[str] = None,
|
877
|
+
input_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
878
|
+
output_schema: Optional[Dict[str, "NodeParameter"]] = None,
|
879
|
+
) -> "PythonCodeNode":
|
880
|
+
"""Create a node from a Python file.
|
881
|
+
|
882
|
+
Args:
|
883
|
+
file_path: Path to Python file
|
884
|
+
function_name: Function to use from file
|
885
|
+
class_name: Class to use from file
|
886
|
+
name: Node name
|
887
|
+
description: Node description
|
888
|
+
|
889
|
+
Returns:
|
890
|
+
PythonCodeNode instance
|
891
|
+
|
892
|
+
Raises:
|
893
|
+
NodeConfigurationError: If file cannot be loaded
|
894
|
+
"""
|
895
|
+
file_path = Path(file_path)
|
896
|
+
if not file_path.exists():
|
897
|
+
raise NodeConfigurationError(f"File not found: {file_path}")
|
898
|
+
|
899
|
+
# Load module from file
|
900
|
+
spec = importlib.util.spec_from_file_location("custom_module", file_path)
|
901
|
+
if not spec or not spec.loader:
|
902
|
+
raise NodeConfigurationError(f"Cannot load module from {file_path}")
|
903
|
+
|
904
|
+
module = importlib.util.module_from_spec(spec)
|
905
|
+
spec.loader.exec_module(module)
|
906
|
+
|
907
|
+
# Extract function or class
|
908
|
+
if function_name:
|
909
|
+
if not hasattr(module, function_name):
|
910
|
+
raise NodeConfigurationError(
|
911
|
+
f"Function {function_name} not found in {file_path}"
|
912
|
+
)
|
913
|
+
func = getattr(module, function_name)
|
914
|
+
return cls.from_function(
|
915
|
+
func,
|
916
|
+
name=name,
|
917
|
+
description=description,
|
918
|
+
input_schema=input_schema,
|
919
|
+
output_schema=output_schema,
|
920
|
+
)
|
921
|
+
|
922
|
+
elif class_name:
|
923
|
+
if not hasattr(module, class_name):
|
924
|
+
raise NodeConfigurationError(
|
925
|
+
f"Class {class_name} not found in {file_path}"
|
926
|
+
)
|
927
|
+
class_type = getattr(module, class_name)
|
928
|
+
return cls.from_class(
|
929
|
+
class_type,
|
930
|
+
name=name,
|
931
|
+
description=description,
|
932
|
+
input_schema=input_schema,
|
933
|
+
output_schema=output_schema,
|
934
|
+
)
|
935
|
+
|
936
|
+
else:
|
937
|
+
# Look for main function or first function
|
938
|
+
for attr_name in dir(module):
|
939
|
+
attr = getattr(module, attr_name)
|
940
|
+
if callable(attr) and not attr_name.startswith("_"):
|
941
|
+
return cls.from_function(
|
942
|
+
attr,
|
943
|
+
name=name,
|
944
|
+
description=description,
|
945
|
+
input_schema=input_schema,
|
946
|
+
output_schema=output_schema,
|
947
|
+
)
|
948
|
+
|
949
|
+
raise NodeConfigurationError(
|
950
|
+
f"No suitable function or class found in {file_path}"
|
951
|
+
)
|
952
|
+
|
953
|
+
def execute_code(self, inputs: Dict[str, Any]) -> Any:
|
954
|
+
"""Execute the code with given inputs.
|
955
|
+
|
956
|
+
This is a convenience method that directly executes the code
|
957
|
+
without going through the base node validation.
|
958
|
+
|
959
|
+
Args:
|
960
|
+
inputs: Dictionary of input values
|
961
|
+
|
962
|
+
Returns:
|
963
|
+
Result of code execution
|
964
|
+
"""
|
965
|
+
# Execute directly based on execution type
|
966
|
+
if self.code:
|
967
|
+
outputs = self.executor.execute_code(self.code, inputs)
|
968
|
+
return outputs.get("result", outputs)
|
969
|
+
elif self.function:
|
970
|
+
wrapper = FunctionWrapper(self.function, self.executor)
|
971
|
+
result = wrapper.execute(inputs)
|
972
|
+
return result.get("result", result)
|
973
|
+
elif self.class_type:
|
974
|
+
wrapper = ClassWrapper(
|
975
|
+
self.class_type, self.process_method or "process", self.executor
|
976
|
+
)
|
977
|
+
# Use the same instance for stateful behavior
|
978
|
+
wrapper.instance = self.instance
|
979
|
+
result = wrapper.execute(inputs)
|
980
|
+
return result.get("result", result)
|
981
|
+
else:
|
982
|
+
raise NodeExecutionError("No execution method available")
|
983
|
+
|
984
|
+
def get_config(self) -> Dict[str, Any]:
|
985
|
+
"""Get node configuration for serialization.
|
986
|
+
|
987
|
+
Returns:
|
988
|
+
Configuration dictionary
|
989
|
+
"""
|
990
|
+
# Get base config from parent class
|
991
|
+
config = {
|
992
|
+
"name": self.metadata.name,
|
993
|
+
"description": self.metadata.description,
|
994
|
+
"version": self.metadata.version,
|
995
|
+
"tags": list(self.metadata.tags) if self.metadata.tags else [],
|
996
|
+
}
|
997
|
+
|
998
|
+
# Add code-specific config
|
999
|
+
config.update(
|
1000
|
+
{
|
1001
|
+
"code": self.code,
|
1002
|
+
"input_types": {
|
1003
|
+
name: type_.__name__ if hasattr(type_, "__name__") else str(type_)
|
1004
|
+
for name, type_ in self.input_types.items()
|
1005
|
+
},
|
1006
|
+
"output_type": (
|
1007
|
+
self.output_type.__name__
|
1008
|
+
if hasattr(self.output_type, "__name__")
|
1009
|
+
else str(self.output_type)
|
1010
|
+
),
|
1011
|
+
}
|
1012
|
+
)
|
1013
|
+
|
1014
|
+
# For function/class nodes, include source code
|
1015
|
+
if self.function:
|
1016
|
+
config["function_source"] = inspect.getsource(self.function)
|
1017
|
+
elif self.class_type:
|
1018
|
+
config["class_source"] = inspect.getsource(self.class_type)
|
1019
|
+
config["process_method"] = self.process_method
|
1020
|
+
|
1021
|
+
return config
|