kailash 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/client/__init__.py +12 -0
  3. kailash/client/enhanced_client.py +306 -0
  4. kailash/core/actors/__init__.py +16 -0
  5. kailash/core/actors/connection_actor.py +566 -0
  6. kailash/core/actors/supervisor.py +364 -0
  7. kailash/edge/__init__.py +16 -0
  8. kailash/edge/compliance.py +834 -0
  9. kailash/edge/discovery.py +659 -0
  10. kailash/edge/location.py +582 -0
  11. kailash/gateway/__init__.py +33 -0
  12. kailash/gateway/api.py +289 -0
  13. kailash/gateway/enhanced_gateway.py +357 -0
  14. kailash/gateway/resource_resolver.py +217 -0
  15. kailash/gateway/security.py +227 -0
  16. kailash/middleware/auth/models.py +2 -2
  17. kailash/middleware/database/base_models.py +1 -7
  18. kailash/middleware/database/repositories.py +3 -1
  19. kailash/middleware/gateway/__init__.py +22 -0
  20. kailash/middleware/gateway/checkpoint_manager.py +398 -0
  21. kailash/middleware/gateway/deduplicator.py +382 -0
  22. kailash/middleware/gateway/durable_gateway.py +417 -0
  23. kailash/middleware/gateway/durable_request.py +498 -0
  24. kailash/middleware/gateway/event_store.py +459 -0
  25. kailash/nodes/admin/audit_log.py +364 -6
  26. kailash/nodes/admin/permission_check.py +817 -33
  27. kailash/nodes/admin/role_management.py +1242 -108
  28. kailash/nodes/admin/schema_manager.py +438 -0
  29. kailash/nodes/admin/user_management.py +1209 -681
  30. kailash/nodes/api/http.py +95 -71
  31. kailash/nodes/base.py +281 -164
  32. kailash/nodes/base_async.py +30 -31
  33. kailash/nodes/code/__init__.py +8 -1
  34. kailash/nodes/code/async_python.py +1035 -0
  35. kailash/nodes/code/python.py +1 -0
  36. kailash/nodes/data/async_sql.py +12 -25
  37. kailash/nodes/data/sql.py +20 -11
  38. kailash/nodes/data/workflow_connection_pool.py +643 -0
  39. kailash/nodes/rag/__init__.py +1 -4
  40. kailash/resources/__init__.py +40 -0
  41. kailash/resources/factory.py +533 -0
  42. kailash/resources/health.py +319 -0
  43. kailash/resources/reference.py +288 -0
  44. kailash/resources/registry.py +392 -0
  45. kailash/runtime/async_local.py +711 -302
  46. kailash/testing/__init__.py +34 -0
  47. kailash/testing/async_test_case.py +353 -0
  48. kailash/testing/async_utils.py +345 -0
  49. kailash/testing/fixtures.py +458 -0
  50. kailash/testing/mock_registry.py +495 -0
  51. kailash/utils/resource_manager.py +420 -0
  52. kailash/workflow/__init__.py +8 -0
  53. kailash/workflow/async_builder.py +621 -0
  54. kailash/workflow/async_patterns.py +766 -0
  55. kailash/workflow/builder.py +93 -10
  56. kailash/workflow/cyclic_runner.py +111 -41
  57. kailash/workflow/graph.py +7 -2
  58. kailash/workflow/resilience.py +11 -1
  59. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/METADATA +12 -7
  60. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/RECORD +64 -28
  61. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/WHEEL +0 -0
  62. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/entry_points.txt +0 -0
  63. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/licenses/LICENSE +0 -0
  64. {kailash-0.4.2.dist-info → kailash-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1035 @@
1
+ """Asynchronous Python Code Execution Node.
2
+
3
+ This module provides AsyncPythonCodeNode for executing async Python code
4
+ with proper security controls and resource management.
5
+
6
+ Key Features:
7
+ - Native async/await support for Python code execution
8
+ - Concurrent task management with resource limits
9
+ - Security sandbox with controlled module access
10
+ - Timeout and memory limit enforcement
11
+ - Integration with async libraries and databases
12
+
13
+ Example Usage:
14
+ Basic async execution:
15
+ ```python
16
+ node = AsyncPythonCodeNode(
17
+ code='''
18
+ import asyncio
19
+
20
+ # Fetch data concurrently
21
+ async def fetch_item(id):
22
+ await asyncio.sleep(0.1) # Simulate I/O
23
+ return {"id": id, "data": f"Item {id}"}
24
+
25
+ ids = [1, 2, 3, 4, 5]
26
+ tasks = [fetch_item(id) for id in ids]
27
+ items = await asyncio.gather(*tasks)
28
+
29
+ result = {"items": items, "count": len(items)}
30
+ ''',
31
+ timeout=30,
32
+ max_concurrent_tasks=10
33
+ )
34
+
35
+ output = await node.execute_async()
36
+ ```
37
+
38
+ Database operations with connection pool:
39
+ ```python
40
+ node = AsyncPythonCodeNode(
41
+ code='''
42
+ # Async database operations
43
+ conn = await pool.acquire()
44
+ try:
45
+ # Run multiple queries concurrently
46
+ results = await asyncio.gather(
47
+ pool.execute("SELECT * FROM users WHERE active = true"),
48
+ pool.execute("SELECT COUNT(*) FROM orders"),
49
+ pool.execute("SELECT * FROM products LIMIT 10")
50
+ )
51
+
52
+ result = {
53
+ "users": results[0],
54
+ "order_count": results[1],
55
+ "products": results[2]
56
+ }
57
+ finally:
58
+ await pool.release(conn)
59
+ '''
60
+ )
61
+
62
+ # Execute with runtime inputs
63
+ output = await node.execute_async(pool=database_pool)
64
+ ```
65
+
66
+ Security Model:
67
+ The node operates in a secure sandbox with:
68
+ - Whitelisted module imports only
69
+ - No access to filesystem (except through allowed modules)
70
+ - No subprocess or system command execution
71
+ - Resource limits on memory and concurrent tasks
72
+ - Timeout enforcement for runaway code
73
+
74
+ Performance Considerations:
75
+ - Best for I/O-bound operations (database, API calls)
76
+ - Overhead for CPU-bound tasks (use PythonCodeNode instead)
77
+ - Concurrent task limit prevents resource exhaustion
78
+ - Event loop is managed automatically
79
+ """
80
+
81
+ import ast
82
+ import asyncio
83
+ import inspect
84
+ import logging
85
+ import time
86
+ import traceback
87
+ from typing import Any, Dict, Optional, Set
88
+
89
+ from kailash.nodes.base import NodeMetadata, NodeParameter, register_node
90
+ from kailash.nodes.base_async import AsyncNode
91
+ from kailash.resources import ResourceRegistry
92
+ from kailash.sdk_exceptions import (
93
+ NodeConfigurationError,
94
+ NodeExecutionError,
95
+ SafetyViolationError,
96
+ )
97
+ from kailash.security import ExecutionTimeoutError
98
+
99
+ logger = logging.getLogger(__name__)
100
+
101
+ # Async-safe module whitelist with expanded capabilities
102
+ ALLOWED_ASYNC_MODULES = {
103
+ # Core async functionality
104
+ "asyncio",
105
+ "contextvars", # For async context management
106
+ "concurrent.futures", # For thread/process pools
107
+ # Standard library - data types and utilities
108
+ "uuid",
109
+ "json",
110
+ "datetime",
111
+ "time",
112
+ "random",
113
+ "collections",
114
+ "functools",
115
+ "itertools",
116
+ "math",
117
+ "statistics",
118
+ "string",
119
+ "re",
120
+ "enum",
121
+ "dataclasses",
122
+ "typing",
123
+ "copy",
124
+ "pickle", # For serialization (with caution)
125
+ "base64",
126
+ "hashlib",
127
+ "hmac",
128
+ "secrets", # For cryptographic randomness
129
+ # Data processing and analysis
130
+ "pandas",
131
+ "numpy",
132
+ "scipy",
133
+ "sklearn",
134
+ # Async file operations
135
+ "aiofiles", # Async file I/O
136
+ "pathlib",
137
+ "os", # Limited to safe operations
138
+ "tempfile", # For temporary files
139
+ # Async HTTP and networking
140
+ "aiohttp", # Async HTTP client/server
141
+ "httpx", # Modern async HTTP client
142
+ "websockets", # WebSocket support
143
+ # Database drivers (async-native)
144
+ "asyncpg", # PostgreSQL
145
+ "aiomysql", # MySQL
146
+ "motor", # MongoDB
147
+ "aioredis", # Redis
148
+ "aiosqlite", # SQLite
149
+ # Message queues and streaming
150
+ "aiokafka", # Kafka
151
+ "aio_pika", # RabbitMQ
152
+ # Cloud SDKs (async variants)
153
+ "aioboto3", # AWS
154
+ "aioazure", # Azure
155
+ # Serialization and encoding
156
+ "msgpack",
157
+ "orjson", # Fast JSON
158
+ "yaml",
159
+ "toml",
160
+ # Monitoring and logging
161
+ "structlog", # Structured logging
162
+ "prometheus_client", # Metrics
163
+ # Utilities
164
+ "cachetools", # Caching
165
+ "tenacity", # Retry logic
166
+ "ratelimit", # Rate limiting
167
+ }
168
+
169
+ # Dangerous async operations to block
170
+ BLOCKED_ASYNC_PATTERNS = [
171
+ "subprocess",
172
+ "multiprocessing",
173
+ "__import__",
174
+ "eval",
175
+ "exec",
176
+ "compile",
177
+ "open", # Use aiofiles instead
178
+ "input",
179
+ "raw_input",
180
+ ]
181
+
182
+
183
+ class AsyncSafeCodeChecker(ast.NodeVisitor):
184
+ """AST visitor to check async code safety."""
185
+
186
+ def __init__(self):
187
+ self.violations = []
188
+ self.imports_found = []
189
+ self.has_async = False
190
+ self.concurrent_task_count = 0
191
+
192
+ def visit_Import(self, node):
193
+ """Check import statements."""
194
+ for alias in node.names:
195
+ module_name = alias.name.split(".")[0]
196
+ self.imports_found.append(module_name)
197
+ if module_name not in ALLOWED_ASYNC_MODULES:
198
+ self.violations.append(
199
+ {
200
+ "type": "import",
201
+ "module": module_name,
202
+ "line": node.lineno,
203
+ "message": f"Import of module '{module_name}' is not allowed in async context",
204
+ }
205
+ )
206
+ self.generic_visit(node)
207
+
208
+ def visit_ImportFrom(self, node):
209
+ """Check from imports."""
210
+ if node.module:
211
+ module_name = node.module.split(".")[0]
212
+ self.imports_found.append(module_name)
213
+ if module_name not in ALLOWED_ASYNC_MODULES:
214
+ self.violations.append(
215
+ {
216
+ "type": "import_from",
217
+ "module": module_name,
218
+ "line": node.lineno,
219
+ "message": f"Import from module '{module_name}' is not allowed in async context",
220
+ }
221
+ )
222
+ self.generic_visit(node)
223
+
224
+ def visit_AsyncFunctionDef(self, node):
225
+ """Track async function definitions."""
226
+ self.has_async = True
227
+ self.generic_visit(node)
228
+
229
+ def visit_AsyncWith(self, node):
230
+ """Track async with statements."""
231
+ self.has_async = True
232
+ self.generic_visit(node)
233
+
234
+ def visit_AsyncFor(self, node):
235
+ """Track async for loops."""
236
+ self.has_async = True
237
+ self.generic_visit(node)
238
+
239
+ def visit_Subscript(self, node):
240
+ """Check for dangerous access through __builtins__ or other methods."""
241
+ # Check if accessing __builtins__
242
+ if isinstance(node.value, ast.Name) and node.value.id == "__builtins__":
243
+ # Check if trying to access blocked functions
244
+ if isinstance(node.slice, ast.Constant):
245
+ func_name = node.slice.value
246
+ if func_name in BLOCKED_ASYNC_PATTERNS:
247
+ self.violations.append(
248
+ {
249
+ "type": "dangerous_access",
250
+ "function": func_name,
251
+ "line": node.lineno,
252
+ "message": f"Access to '{func_name}' through __builtins__ is not allowed for security reasons",
253
+ }
254
+ )
255
+ self.generic_visit(node)
256
+
257
+ def visit_Call(self, node):
258
+ """Check for dangerous function calls."""
259
+ func_name = None
260
+ is_builtin_call = False
261
+
262
+ if isinstance(node.func, ast.Name):
263
+ func_name = node.func.id
264
+ is_builtin_call = True # Direct function call like open()
265
+ elif isinstance(node.func, ast.Attribute):
266
+ func_name = node.func.attr
267
+ # Check if it's a module.function call that we should block
268
+ if isinstance(node.func.value, ast.Name):
269
+ module_name = node.func.value.id
270
+ # Only block if it's not from an allowed module
271
+ if module_name not in ALLOWED_ASYNC_MODULES:
272
+ is_builtin_call = True
273
+
274
+ # Only block dangerous patterns if they're direct calls or from non-allowed modules
275
+ if func_name in BLOCKED_ASYNC_PATTERNS and is_builtin_call:
276
+ self.violations.append(
277
+ {
278
+ "type": "dangerous_call",
279
+ "function": func_name,
280
+ "line": node.lineno,
281
+ "message": f"Call to '{func_name}' is not allowed for security reasons",
282
+ }
283
+ )
284
+
285
+ # Track concurrent task creation
286
+ if func_name in ["create_task", "ensure_future", "gather"]:
287
+ self.concurrent_task_count += 1
288
+
289
+ self.generic_visit(node)
290
+
291
+
292
+ @register_node()
293
+ class AsyncPythonCodeNode(AsyncNode):
294
+ """Execute asynchronous Python code with security controls and resource management.
295
+
296
+ AsyncPythonCodeNode provides a secure environment for executing async Python code
297
+ within Kailash workflows. It's designed for I/O-bound operations that benefit from
298
+ concurrent execution, such as database queries, API calls, and file operations.
299
+
300
+ Features:
301
+ - **Native async/await support**: Write natural async Python code
302
+ - **Concurrent execution**: Run multiple async operations in parallel
303
+ - **Resource limits**: Control memory usage and concurrent task count
304
+ - **Security sandbox**: Only whitelisted modules can be imported
305
+ - **Timeout protection**: Prevent runaway code execution
306
+ - **Rich module ecosystem**: Access to async database drivers, HTTP clients, etc.
307
+
308
+ Security Model:
309
+ The node executes code in a restricted environment where:
310
+ - Only modules in ALLOWED_ASYNC_MODULES can be imported
311
+ - Dangerous operations (subprocess, eval, exec) are blocked
312
+ - File system access is limited to safe operations
313
+ - Network access is allowed through whitelisted libraries
314
+ - Resource limits prevent memory and CPU exhaustion
315
+
316
+ Parameters:
317
+ code (str): The async Python code to execute. Must be valid Python with
318
+ proper async/await syntax. The code should set a 'result' variable
319
+ with the output data.
320
+ timeout (int): Maximum execution time in seconds (default: 30).
321
+ Prevents infinite loops and runaway code.
322
+ max_concurrent_tasks (int): Maximum number of concurrent asyncio tasks
323
+ (default: 10). Prevents resource exhaustion from too many parallel operations.
324
+ max_memory_mb (int): Maximum memory usage in MB (default: 512).
325
+ Note: Only enforced on Unix systems with resource module support.
326
+
327
+ Inputs:
328
+ The node accepts arbitrary keyword arguments that will be available as
329
+ variables in the execution context. All inputs must be JSON-serializable
330
+ when used through the gateway API.
331
+
332
+ Outputs:
333
+ Returns a dictionary containing the 'result' variable from the executed code.
334
+ If no 'result' variable is set, returns an empty dictionary.
335
+
336
+ Example - Basic async operation:
337
+ ```python
338
+ node = AsyncPythonCodeNode(
339
+ code='''
340
+ import asyncio
341
+
342
+ # Simple async operation
343
+ await asyncio.sleep(0.1)
344
+ result = {"status": "completed", "duration": 0.1}
345
+ '''
346
+ )
347
+
348
+ output = await node.execute_async()
349
+ # Returns: {"status": "completed", "duration": 0.1}
350
+ ```
351
+
352
+ Example - Concurrent database queries:
353
+ ```python
354
+ node = AsyncPythonCodeNode(
355
+ code='''
356
+ import asyncio
357
+ import asyncpg
358
+
359
+ # Connect to database
360
+ conn = await asyncpg.connect(database_url)
361
+
362
+ try:
363
+ # Run queries concurrently
364
+ user_query = conn.fetch("SELECT * FROM users WHERE active = true")
365
+ stats_query = conn.fetch("SELECT COUNT(*) as total FROM orders")
366
+ recent_query = conn.fetch("SELECT * FROM orders ORDER BY created DESC LIMIT 10")
367
+
368
+ users, stats, recent = await asyncio.gather(
369
+ user_query, stats_query, recent_query
370
+ )
371
+
372
+ result = {
373
+ "active_users": len(users),
374
+ "total_orders": stats[0]['total'],
375
+ "recent_orders": [dict(order) for order in recent]
376
+ }
377
+ finally:
378
+ await conn.close()
379
+ ''',
380
+ timeout=10,
381
+ max_concurrent_tasks=5
382
+ )
383
+
384
+ output = await node.execute_async(database_url="postgresql://...")
385
+ ```
386
+
387
+ Example - Parallel API calls with rate limiting:
388
+ ```python
389
+ node = AsyncPythonCodeNode(
390
+ code='''
391
+ import asyncio
392
+ import aiohttp
393
+ from asyncio import Semaphore
394
+
395
+ # Rate limit to 5 concurrent requests
396
+ semaphore = Semaphore(5)
397
+
398
+ async def fetch_data(session, url):
399
+ async with semaphore:
400
+ async with session.get(url) as response:
401
+ return await response.json()
402
+
403
+ # Process URLs in parallel
404
+ async with aiohttp.ClientSession() as session:
405
+ tasks = [fetch_data(session, url) for url in urls]
406
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
407
+
408
+ # Filter out errors
409
+ valid_responses = [r for r in responses if not isinstance(r, Exception)]
410
+
411
+ result = {
412
+ "success_count": len(valid_responses),
413
+ "error_count": len(responses) - len(valid_responses),
414
+ "data": valid_responses
415
+ }
416
+ ''',
417
+ max_concurrent_tasks=20 # Allow more tasks for API calls
418
+ )
419
+
420
+ urls = ["https://api.example.com/data/1", ...]
421
+ output = await node.execute_async(urls=urls)
422
+ ```
423
+
424
+ Best Practices:
425
+ 1. Always use try/finally blocks for cleanup (closing connections, files)
426
+ 2. Use asyncio.gather() for concurrent operations
427
+ 3. Implement proper error handling for network operations
428
+ 4. Set appropriate timeouts for external service calls
429
+ 5. Use semaphores to limit concurrent operations when needed
430
+ 6. Return results in a dictionary format for consistency
431
+
432
+ Performance Tips:
433
+ - Use for I/O-bound operations (network, database, file I/O)
434
+ - Not ideal for CPU-bound tasks (use PythonCodeNode instead)
435
+ - Batch operations when possible to reduce overhead
436
+ - Monitor concurrent task count to avoid overwhelming resources
437
+
438
+ Limitations:
439
+ - Cannot import modules not in ALLOWED_ASYNC_MODULES
440
+ - Cannot execute system commands or create subprocesses
441
+ - Limited file system access (use dedicated file nodes for complex operations)
442
+ - All inputs must be serializable when used through gateway
443
+ - Memory limits may not be enforced on all platforms
444
+ """
445
+
446
+ metadata = NodeMetadata(
447
+ name="AsyncPythonCodeNode",
448
+ description="Execute asynchronous Python code with security controls",
449
+ category="code",
450
+ version="1.0.0",
451
+ display_name="Async Python Code",
452
+ icon="mdi-language-python",
453
+ tags=["code", "async", "python", "script"],
454
+ )
455
+
456
+ def __init__(self, **config):
457
+ """Initialize AsyncPythonCodeNode with configuration.
458
+
459
+ Creates a new async Python code execution node with the specified
460
+ configuration. The code is validated at initialization time to catch
461
+ syntax errors and security violations early.
462
+
463
+ Args:
464
+ code (str): The async Python code to execute. Must contain valid
465
+ Python syntax with async/await support. The code should set
466
+ a 'result' variable with the output data.
467
+
468
+ timeout (int, optional): Maximum execution time in seconds.
469
+ Defaults to 30. Set higher for long-running operations like
470
+ data processing or multiple API calls.
471
+
472
+ max_concurrent_tasks (int, optional): Maximum number of concurrent
473
+ asyncio tasks allowed. Defaults to 10. Increase for highly
474
+ parallel workloads, decrease to limit resource usage.
475
+
476
+ max_memory_mb (int, optional): Maximum memory usage in MB.
477
+ Defaults to 512. Only enforced on Unix systems with resource
478
+ module support. Set higher for data-intensive operations.
479
+
480
+ imports (list[str], optional): Additional modules to make available
481
+ in the execution context. Currently not implemented - all
482
+ imports must be from ALLOWED_ASYNC_MODULES.
483
+
484
+ **config: Additional configuration parameters passed to parent class.
485
+
486
+ Raises:
487
+ NodeConfigurationError: If code is empty or has syntax errors.
488
+ SafetyViolationError: If code contains security violations like
489
+ forbidden imports or dangerous operations.
490
+
491
+ Example:
492
+ ```python
493
+ # Basic initialization
494
+ node = AsyncPythonCodeNode(
495
+ code="await asyncio.sleep(0.1); result = {'done': True}"
496
+ )
497
+
498
+ # Advanced configuration
499
+ node = AsyncPythonCodeNode(
500
+ code=complex_async_code,
501
+ timeout=60, # 1 minute timeout
502
+ max_concurrent_tasks=50, # Allow many parallel operations
503
+ max_memory_mb=1024 # 1GB memory limit
504
+ )
505
+ ```
506
+ """
507
+ super().__init__(**config)
508
+
509
+ self.code = config.get("code", "")
510
+ self.timeout = config.get("timeout", 30)
511
+ self.max_concurrent_tasks = config.get("max_concurrent_tasks", 10)
512
+ self.max_memory_mb = config.get("max_memory_mb", 512)
513
+ self.allowed_imports = set(config.get("imports", []))
514
+
515
+ # Validate code at initialization
516
+ self._validate_code()
517
+
518
+ def _validate_code(self):
519
+ """Validate code for safety violations."""
520
+ if not self.code:
521
+ raise NodeConfigurationError("Code cannot be empty")
522
+
523
+ try:
524
+ tree = ast.parse(self.code)
525
+ except SyntaxError as e:
526
+ raise NodeConfigurationError(f"Invalid Python syntax: {e}")
527
+
528
+ checker = AsyncSafeCodeChecker()
529
+ checker.visit(tree)
530
+
531
+ if checker.violations:
532
+ violation_messages = []
533
+ suggestions = []
534
+
535
+ for violation in checker.violations:
536
+ violation_messages.append(
537
+ f"Line {violation['line']}: {violation['message']}"
538
+ )
539
+
540
+ if violation["type"] in ["import", "import_from"]:
541
+ suggestions.append(
542
+ f"- Module '{violation['module']}' is not allowed. "
543
+ f"Available async modules: {', '.join(sorted(ALLOWED_ASYNC_MODULES))}"
544
+ )
545
+
546
+ error_msg = "Code safety violations found:\n" + "\n".join(
547
+ violation_messages
548
+ )
549
+
550
+ if suggestions:
551
+ error_msg += "\n\nSuggestions:\n" + "\n".join(suggestions)
552
+
553
+ raise SafetyViolationError(error_msg)
554
+
555
+ # Warn if too many concurrent tasks
556
+ if checker.concurrent_task_count > self.max_concurrent_tasks:
557
+ logger.warning(
558
+ f"Code may create {checker.concurrent_task_count} concurrent tasks, "
559
+ f"but limit is {self.max_concurrent_tasks}"
560
+ )
561
+
562
+ def get_parameters(self) -> Dict[str, NodeParameter]:
563
+ """Get node parameters."""
564
+ return {
565
+ "code": NodeParameter(
566
+ name="code",
567
+ type=str,
568
+ description="Async Python code to execute",
569
+ required=True,
570
+ default="",
571
+ ),
572
+ "timeout": NodeParameter(
573
+ name="timeout",
574
+ type=int,
575
+ description="Execution timeout in seconds",
576
+ required=False,
577
+ default=30,
578
+ ),
579
+ "max_concurrent_tasks": NodeParameter(
580
+ name="max_concurrent_tasks",
581
+ type=int,
582
+ description="Maximum concurrent asyncio tasks",
583
+ required=False,
584
+ default=10,
585
+ ),
586
+ }
587
+
588
+ def validate_inputs(self, **kwargs) -> Dict[str, Any]:
589
+ """Validate and pass through runtime inputs.
590
+
591
+ AsyncPythonCodeNode accepts arbitrary inputs that will be available
592
+ as variables in the code execution context. This allows maximum
593
+ flexibility for custom code logic.
594
+
595
+ Unlike typed nodes, we don't validate input types or required fields.
596
+ The executed code is responsible for its own validation and error
597
+ handling.
598
+
599
+ Args:
600
+ **kwargs: Any keyword arguments passed at execution time.
601
+ These will be available as variables in the async code.
602
+
603
+ Common inputs include:
604
+ - Database connections or pools
605
+ - API endpoints or credentials
606
+ - Data to process
607
+ - Configuration parameters
608
+
609
+ All inputs must be serializable if using through gateway.
610
+
611
+ Returns:
612
+ Dict[str, Any]: All inputs unchanged, ready for code execution.
613
+
614
+ Example:
615
+ ```python
616
+ # These inputs...
617
+ result = await node.execute_async(
618
+ database_url="postgresql://localhost/mydb",
619
+ api_key="secret123",
620
+ user_ids=[1, 2, 3],
621
+ timeout_seconds=10
622
+ )
623
+
624
+ # ...become variables in the code:
625
+ # database_url = "postgresql://localhost/mydb"
626
+ # api_key = "secret123"
627
+ # user_ids = [1, 2, 3]
628
+ # timeout_seconds = 10
629
+ ```
630
+
631
+ Note:
632
+ Input validation should be done in the async code itself:
633
+ ```python
634
+ # In your async code
635
+ if not database_url:
636
+ raise ValueError("database_url is required")
637
+ if not isinstance(user_ids, list):
638
+ raise TypeError("user_ids must be a list")
639
+ ```
640
+ """
641
+ # Pass through all inputs for async code execution
642
+ return kwargs
643
+
644
+ def _create_safe_namespace(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
645
+ """Create a safe execution namespace."""
646
+
647
+ # Custom import function that only allows whitelisted modules
648
+ def safe_import(name, *args, **kwargs):
649
+ """Restricted import that only allows whitelisted modules."""
650
+ module_name = name.split(".")[0]
651
+ if module_name not in ALLOWED_ASYNC_MODULES:
652
+ raise ImportError(f"Import of module '{module_name}' is not allowed")
653
+ return __import__(name, *args, **kwargs)
654
+
655
+ # Safe builtins (limited set)
656
+ safe_builtins = {
657
+ "__import__": safe_import, # Controlled import
658
+ "locals": locals,
659
+ "globals": globals,
660
+ "len": len,
661
+ "range": range,
662
+ "enumerate": enumerate,
663
+ "zip": zip,
664
+ "map": map,
665
+ "filter": filter,
666
+ "sum": sum,
667
+ "min": min,
668
+ "max": max,
669
+ "abs": abs,
670
+ "round": round,
671
+ "sorted": sorted,
672
+ "reversed": reversed,
673
+ "all": all,
674
+ "any": any,
675
+ "bool": bool,
676
+ "int": int,
677
+ "float": float,
678
+ "str": str,
679
+ "list": list,
680
+ "dict": dict,
681
+ "set": set,
682
+ "tuple": tuple,
683
+ "print": print, # For debugging
684
+ "isinstance": isinstance,
685
+ "hasattr": hasattr,
686
+ "getattr": getattr,
687
+ "setattr": setattr,
688
+ "type": type,
689
+ "callable": callable,
690
+ "hash": hash,
691
+ "Exception": Exception,
692
+ "ValueError": ValueError,
693
+ "TypeError": TypeError,
694
+ "KeyError": KeyError,
695
+ "IndexError": IndexError,
696
+ "RuntimeError": RuntimeError,
697
+ "ConnectionError": ConnectionError,
698
+ "OSError": OSError,
699
+ "FileNotFoundError": FileNotFoundError,
700
+ }
701
+
702
+ # Create namespace with inputs and safe builtins
703
+ namespace = {
704
+ "__builtins__": safe_builtins,
705
+ **inputs, # Make inputs available as variables
706
+ }
707
+
708
+ return namespace
709
+
710
+ def _indent_code(self, code: str, indent: str = " ") -> str:
711
+ """Indent code by the specified amount."""
712
+ lines = code.split("\n")
713
+ indented_lines = []
714
+ for line in lines:
715
+ if line.strip(): # Non-empty lines
716
+ indented_lines.append(indent + line)
717
+ else: # Preserve empty lines
718
+ indented_lines.append("")
719
+ return "\n".join(indented_lines)
720
+
721
+ async def async_run(
722
+ self, resource_registry: Optional[ResourceRegistry] = None, **kwargs
723
+ ) -> Dict[str, Any]:
724
+ """Execute async Python code in a secure sandbox.
725
+
726
+ This method is called by the AsyncNode base class to execute the
727
+ configured Python code. It sets up a secure execution environment,
728
+ injects input variables, and manages resource limits.
729
+
730
+ The execution process:
731
+ 1. Filter node configuration from runtime inputs
732
+ 2. Create secure namespace with whitelisted builtins
733
+ 3. Compile async code into an executable function
734
+ 4. Set up resource limits (timeout, task concurrency)
735
+ 5. Execute code and capture result
736
+ 6. Validate and return output
737
+
738
+ Args:
739
+ **kwargs: Runtime inputs passed from execute_async().
740
+ These become variables in the code execution context.
741
+ Node configuration parameters are filtered out.
742
+
743
+ Returns:
744
+ Dict[str, Any]: Dictionary containing execution results.
745
+ If code sets 'result' variable, returns its value.
746
+ Otherwise returns empty dict.
747
+
748
+ Raises:
749
+ NodeExecutionError: If code execution fails for any reason:
750
+ - Syntax errors in code
751
+ - Runtime errors (e.g., NameError, TypeError)
752
+ - Import of forbidden modules
753
+ - Timeout exceeded
754
+ - Security violations
755
+
756
+ The error message includes details about what went wrong.
757
+
758
+ Example Flow:
759
+ ```python
760
+ # User calls:
761
+ output = await node.execute_async(data=[1,2,3], multiplier=2)
762
+
763
+ # This method:
764
+ # 1. Filters out config params, keeps data and multiplier
765
+ # 2. Makes them available in code as variables
766
+ # 3. Executes the async code
767
+ # 4. Returns the result
768
+ ```
769
+
770
+ Security Notes:
771
+ - Code runs with limited builtins (no eval, exec, etc.)
772
+ - Only whitelisted modules can be imported
773
+ - Concurrent tasks are limited by semaphore
774
+ - Execution time is bounded by timeout
775
+ - AST is checked for dangerous patterns before execution
776
+ """
777
+ try:
778
+ # Filter out node configuration parameters from runtime inputs
779
+ config_params = {
780
+ "code",
781
+ "timeout",
782
+ "max_concurrent_tasks",
783
+ "max_memory_mb",
784
+ "imports",
785
+ "config",
786
+ }
787
+ runtime_inputs = {k: v for k, v in kwargs.items() if k not in config_params}
788
+
789
+ # Create safe namespace with inputs
790
+ namespace = self._create_safe_namespace(runtime_inputs)
791
+
792
+ # Add resource access if registry provided
793
+ if resource_registry:
794
+
795
+ async def get_resource(name: str):
796
+ """Get resource from registry."""
797
+ return await resource_registry.get_resource(name)
798
+
799
+ namespace["get_resource"] = get_resource
800
+
801
+ # Generate unique function name to avoid conflicts
802
+ func_name = f"_async_user_func_{id(self)}"
803
+
804
+ # Create the async function definition with user code
805
+ # First, inject input variables into the function
806
+ input_assignments = []
807
+ for key in runtime_inputs:
808
+ # Create assignments that reference the namespace
809
+ input_assignments.append(f" {key} = _namespace['{key}']")
810
+
811
+ input_code = (
812
+ "\n".join(input_assignments) if input_assignments else " pass"
813
+ )
814
+
815
+ # We'll compile the entire async function as a unit
816
+ async_func_code = f"""
817
+ async def {func_name}(_namespace):
818
+ # Extract input variables
819
+ {input_code}
820
+
821
+ # User's async code
822
+ {self._indent_code(self.code)}
823
+
824
+ # Return result if defined
825
+ try:
826
+ return result
827
+ except NameError:
828
+ return {{}}
829
+ """
830
+
831
+ # Compile and execute the function definition
832
+ try:
833
+ compiled = compile(async_func_code, "<async_user_code>", "exec")
834
+ exec(compiled, namespace)
835
+ except SyntaxError as e:
836
+ raise NodeExecutionError(f"Syntax error in async code: {e}")
837
+
838
+ # Get the function from namespace
839
+ user_function = namespace[func_name]
840
+
841
+ # Track concurrent tasks
842
+ task_semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
843
+ original_create_task = asyncio.create_task
844
+
845
+ def limited_create_task(coro):
846
+ """Limit concurrent task creation."""
847
+
848
+ async def wrapped():
849
+ async with task_semaphore:
850
+ return await coro
851
+
852
+ return original_create_task(wrapped())
853
+
854
+ # Monkey patch for this execution
855
+ asyncio.create_task = limited_create_task
856
+
857
+ try:
858
+ # Execute with timeout
859
+ start_time = time.time()
860
+ result = await asyncio.wait_for(
861
+ user_function(namespace), timeout=self.timeout
862
+ )
863
+ execution_time = time.time() - start_time
864
+
865
+ logger.debug(
866
+ f"AsyncPythonCodeNode executed successfully in {execution_time:.2f}s"
867
+ )
868
+
869
+ # Ensure result is a dictionary
870
+ if not isinstance(result, dict):
871
+ result = {"value": result}
872
+
873
+ return result
874
+
875
+ finally:
876
+ # Restore original create_task
877
+ asyncio.create_task = original_create_task
878
+
879
+ except asyncio.TimeoutError:
880
+ raise NodeExecutionError(
881
+ f"Async code execution exceeded {self.timeout}s timeout"
882
+ )
883
+ except Exception as e:
884
+ logger.error(f"Async code execution failed: {e}")
885
+ logger.debug(f"Traceback: {traceback.format_exc()}")
886
+ raise NodeExecutionError(f"Execution failed: {str(e)}")
887
+
888
+ def validate_outputs(self, outputs: Dict[str, Any]) -> Dict[str, Any]:
889
+ """Validate outputs are JSON-serializable."""
890
+ import json
891
+
892
+ try:
893
+ # Test JSON serialization
894
+ json.dumps(outputs)
895
+ return outputs
896
+ except (TypeError, ValueError) as e:
897
+ raise NodeExecutionError(f"Output must be JSON-serializable: {e}")
898
+
899
+ @classmethod
900
+ def from_function(cls, func, **config):
901
+ """Create AsyncPythonCodeNode from an existing async function.
902
+
903
+ This factory method allows you to convert an async Python function
904
+ into an AsyncPythonCodeNode. The function's source code is extracted
905
+ and used as the node's code parameter.
906
+
907
+ This is useful when:
908
+ - You have existing async functions to integrate
909
+ - You want IDE support while writing the function
910
+ - You need to reuse async logic across multiple nodes
911
+ - You prefer writing functions over inline code strings
912
+
913
+ Args:
914
+ func (Callable): An async function (defined with 'async def').
915
+ The function should follow these conventions:
916
+ - Accept parameters that match expected inputs
917
+ - Return a dictionary (becomes the 'result')
918
+ - Use only allowed modules
919
+ - Handle its own errors
920
+
921
+ **config: Additional node configuration:
922
+ - name (str): Node name (defaults to function name)
923
+ - timeout (int): Execution timeout in seconds
924
+ - max_concurrent_tasks (int): Concurrent task limit
925
+ - max_memory_mb (int): Memory limit in MB
926
+ - Any other Node configuration parameters
927
+
928
+ Returns:
929
+ AsyncPythonCodeNode: Configured node instance ready for execution.
930
+
931
+ Raises:
932
+ ValueError: If the provided function is not async (not a coroutine).
933
+ ValueError: If the function source cannot be extracted.
934
+
935
+ Example:
936
+ ```python
937
+ # Define an async function
938
+ async def process_user_data(user_ids: list, database_url: str) -> dict:
939
+ import asyncio
940
+ import asyncpg
941
+
942
+ # Connect to database
943
+ conn = await asyncpg.connect(database_url)
944
+
945
+ try:
946
+ # Process users concurrently
947
+ tasks = []
948
+ for user_id in user_ids:
949
+ task = conn.fetchrow(
950
+ "SELECT * FROM users WHERE id = $1",
951
+ user_id
952
+ )
953
+ tasks.append(task)
954
+
955
+ users = await asyncio.gather(*tasks)
956
+
957
+ # Transform data
958
+ result = {
959
+ "users": [dict(u) for u in users if u],
960
+ "count": len(users),
961
+ "missing": len([u for u in users if not u])
962
+ }
963
+ return result
964
+
965
+ finally:
966
+ await conn.close()
967
+
968
+ # Create node from function
969
+ node = AsyncPythonCodeNode.from_function(
970
+ process_user_data,
971
+ name="user_processor",
972
+ timeout=30,
973
+ max_concurrent_tasks=20
974
+ )
975
+
976
+ # Execute with inputs
977
+ result = await node.execute_async(
978
+ user_ids=[1, 2, 3],
979
+ database_url="postgresql://localhost/mydb"
980
+ )
981
+ ```
982
+
983
+ Technical Notes:
984
+ - Function source is extracted using inspect.getsource()
985
+ - The function body is dedented to remove indentation
986
+ - The 'return' statement becomes 'result = ...'
987
+ - Function must be defined in a file (not in REPL)
988
+ - Decorators are not preserved
989
+ - Default arguments are not preserved (pass as inputs)
990
+
991
+ Limitations:
992
+ - Cannot extract source from built-in functions
993
+ - Cannot handle functions defined in interactive sessions
994
+ - Nested functions may not work correctly
995
+ - Closures (captured variables) are not preserved
996
+ """
997
+ if not inspect.iscoroutinefunction(func):
998
+ raise ValueError("Function must be async (defined with 'async def')")
999
+
1000
+ # Get function source
1001
+ source = inspect.getsource(func)
1002
+
1003
+ # Extract just the function body
1004
+ lines = source.split("\n")
1005
+ # Find the function definition line
1006
+ for i, line in enumerate(lines):
1007
+ if line.strip().startswith("async def"):
1008
+ # Get everything after the function definition
1009
+ body_lines = lines[i + 1 :]
1010
+ break
1011
+ else:
1012
+ raise ValueError("Could not find async function definition")
1013
+
1014
+ # Remove common indentation
1015
+ min_indent = float("inf")
1016
+ for line in body_lines:
1017
+ if line.strip():
1018
+ indent = len(line) - len(line.lstrip())
1019
+ min_indent = min(min_indent, indent)
1020
+
1021
+ if min_indent == float("inf"):
1022
+ min_indent = 0
1023
+
1024
+ # Remove the common indentation
1025
+ dedented_lines = []
1026
+ for line in body_lines:
1027
+ if line.strip():
1028
+ dedented_lines.append(line[min_indent:])
1029
+ else:
1030
+ dedented_lines.append("")
1031
+
1032
+ code = "\n".join(dedented_lines)
1033
+
1034
+ # Create node with function's code
1035
+ return cls(code=code, name=config.get("name", func.__name__), **config)