kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
kailash/nodes/base.py
ADDED
@@ -0,0 +1,1032 @@
|
|
1
|
+
"""Base node class and node system implementation.
|
2
|
+
|
3
|
+
This module provides the foundation for all nodes in the Kailash system. It defines
|
4
|
+
the abstract base class that all nodes must inherit from, along with supporting
|
5
|
+
classes for metadata, configuration, and registration.
|
6
|
+
|
7
|
+
The node system is designed to be:
|
8
|
+
1. Type-safe through parameter validation
|
9
|
+
2. Discoverable through the node registry
|
10
|
+
3. Composable in workflows
|
11
|
+
4. Serializable for export/import
|
12
|
+
5. Extensible for custom implementations
|
13
|
+
|
14
|
+
Key Components:
|
15
|
+
- Node: Abstract base class for all nodes
|
16
|
+
- NodeMetadata: Metadata about nodes for discovery and documentation
|
17
|
+
- NodeParameter: Type definitions for node inputs/outputs
|
18
|
+
- NodeRegistry: Global registry for node discovery
|
19
|
+
"""
|
20
|
+
|
21
|
+
import json
|
22
|
+
import logging
|
23
|
+
from abc import ABC, abstractmethod
|
24
|
+
from datetime import datetime, timezone
|
25
|
+
from typing import Any, Dict, Optional, Set, Type
|
26
|
+
|
27
|
+
from pydantic import BaseModel, Field, ValidationError
|
28
|
+
|
29
|
+
from kailash.sdk_exceptions import (
|
30
|
+
NodeConfigurationError,
|
31
|
+
NodeExecutionError,
|
32
|
+
NodeValidationError,
|
33
|
+
)
|
34
|
+
|
35
|
+
|
36
|
+
class NodeMetadata(BaseModel):
|
37
|
+
"""Metadata for a node.
|
38
|
+
|
39
|
+
This class stores descriptive information about a node that is used for:
|
40
|
+
1. Discovery in the UI/CLI (name, description, tags)
|
41
|
+
2. Version tracking and compatibility checks
|
42
|
+
3. Documentation and tooltips
|
43
|
+
4. Workflow export metadata
|
44
|
+
|
45
|
+
Upstream consumers:
|
46
|
+
- Node.__init__: Creates metadata during node instantiation
|
47
|
+
- NodeRegistry: Uses metadata for discovery and filtering
|
48
|
+
- WorkflowExporter: Includes metadata in exported workflows
|
49
|
+
|
50
|
+
Downstream usage:
|
51
|
+
- Workflow visualization: Shows node names and descriptions
|
52
|
+
- CLI help: Displays available nodes with their metadata
|
53
|
+
- Kailash UI: Node palette and property panels
|
54
|
+
"""
|
55
|
+
|
56
|
+
id: str = Field("", description="Node ID")
|
57
|
+
name: str = Field(..., description="Node name")
|
58
|
+
description: str = Field("", description="Node description")
|
59
|
+
version: str = Field("1.0.0", description="Node version")
|
60
|
+
author: str = Field("", description="Node author")
|
61
|
+
created_at: datetime = Field(
|
62
|
+
default_factory=datetime.utcnow, description="Node creation date"
|
63
|
+
)
|
64
|
+
tags: Set[str] = Field(default_factory=set, description="Node tags")
|
65
|
+
|
66
|
+
|
67
|
+
class NodeParameter(BaseModel):
|
68
|
+
"""Definition of a node parameter.
|
69
|
+
|
70
|
+
This class defines the schema for node inputs and outputs, providing:
|
71
|
+
1. Type information for validation
|
72
|
+
2. Default values for optional parameters
|
73
|
+
3. Documentation for users
|
74
|
+
4. Requirements specification
|
75
|
+
|
76
|
+
Design Purpose:
|
77
|
+
- Enables static analysis of workflow connections
|
78
|
+
- Provides runtime validation of data types
|
79
|
+
- Supports automatic UI generation for node configuration
|
80
|
+
- Facilitates workflow validation before execution
|
81
|
+
|
82
|
+
Upstream usage:
|
83
|
+
- Node.get_parameters(): Returns dict of parameters
|
84
|
+
- Custom nodes: Define their input/output schemas
|
85
|
+
|
86
|
+
Downstream consumers:
|
87
|
+
- Node._validate_config(): Validates configuration against parameters
|
88
|
+
- Node.validate_inputs(): Validates runtime inputs
|
89
|
+
- Workflow.connect(): Validates connections between nodes
|
90
|
+
- WorkflowExporter: Exports parameter schemas
|
91
|
+
"""
|
92
|
+
|
93
|
+
name: str
|
94
|
+
type: Type
|
95
|
+
required: bool = True
|
96
|
+
default: Any = None
|
97
|
+
description: str = ""
|
98
|
+
|
99
|
+
|
100
|
+
class Node(ABC):
|
101
|
+
"""Base class for all nodes in the Kailash system.
|
102
|
+
|
103
|
+
This abstract class defines the contract that all nodes must implement.
|
104
|
+
It provides the foundation for:
|
105
|
+
1. Parameter validation and type checking
|
106
|
+
2. Execution lifecycle management
|
107
|
+
3. Error handling and reporting
|
108
|
+
4. Serialization for workflow export
|
109
|
+
5. Configuration management
|
110
|
+
|
111
|
+
Design Philosophy:
|
112
|
+
- Nodes are stateless processors of data
|
113
|
+
- All configuration is provided at initialization
|
114
|
+
- Runtime inputs are validated against schemas
|
115
|
+
- Outputs must be JSON-serializable
|
116
|
+
- Errors are wrapped in appropriate exception types
|
117
|
+
|
118
|
+
Inheritance Pattern:
|
119
|
+
All concrete nodes must:
|
120
|
+
1. Implement get_parameters() to define inputs
|
121
|
+
2. Implement run() to process data
|
122
|
+
3. Call super().__init__() with configuration
|
123
|
+
4. Use self.logger for logging
|
124
|
+
|
125
|
+
Upstream components:
|
126
|
+
- Workflow: Creates and manages node instances
|
127
|
+
- NodeRegistry: Provides node classes for instantiation
|
128
|
+
- CLI/UI: Configures nodes based on user input
|
129
|
+
|
130
|
+
Downstream usage:
|
131
|
+
- LocalRuntime: Executes nodes in workflows
|
132
|
+
- TaskManager: Tracks node execution status
|
133
|
+
- WorkflowExporter: Serializes nodes for export
|
134
|
+
"""
|
135
|
+
|
136
|
+
def __init__(self, **kwargs):
|
137
|
+
"""Initialize the node with configuration parameters.
|
138
|
+
|
139
|
+
This method performs the following initialization steps:
|
140
|
+
1. Sets the node ID (defaults to class name)
|
141
|
+
2. Creates metadata from provided arguments
|
142
|
+
3. Sets up logging for the node
|
143
|
+
4. Stores configuration in self.config
|
144
|
+
5. Validates configuration against parameters
|
145
|
+
|
146
|
+
The configuration is validated by calling _validate_config(), which
|
147
|
+
checks that all required parameters are present and of the correct type.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
**kwargs: Configuration parameters including:
|
151
|
+
- id: Optional custom node ID
|
152
|
+
- name: Display name for the node
|
153
|
+
- description: Node description
|
154
|
+
- version: Node version
|
155
|
+
- author: Node author
|
156
|
+
- tags: Set of tags for discovery
|
157
|
+
- Any parameters defined in get_parameters()
|
158
|
+
|
159
|
+
Raises:
|
160
|
+
NodeConfigurationError: If configuration is invalid or
|
161
|
+
if metadata validation fails
|
162
|
+
|
163
|
+
Downstream effects:
|
164
|
+
- Creates self.metadata for discovery
|
165
|
+
- Sets up self.logger for execution logging
|
166
|
+
- Stores self.config for runtime access
|
167
|
+
- Validates parameters are correctly specified
|
168
|
+
"""
|
169
|
+
try:
|
170
|
+
self.id = kwargs.get("id", self.__class__.__name__)
|
171
|
+
self.metadata = kwargs.get(
|
172
|
+
"metadata",
|
173
|
+
NodeMetadata(
|
174
|
+
id=self.id,
|
175
|
+
name=kwargs.get("name", self.__class__.__name__),
|
176
|
+
description=kwargs.get("description", self.__doc__ or ""),
|
177
|
+
version=kwargs.get("version", "1.0.0"),
|
178
|
+
author=kwargs.get("author", ""),
|
179
|
+
tags=kwargs.get("tags", set()),
|
180
|
+
),
|
181
|
+
)
|
182
|
+
self.logger = logging.getLogger(f"kailash.nodes.{self.id}")
|
183
|
+
self.config = kwargs
|
184
|
+
self._validate_config()
|
185
|
+
except ValidationError as e:
|
186
|
+
raise NodeConfigurationError(f"Invalid node metadata: {e}") from e
|
187
|
+
except Exception as e:
|
188
|
+
raise NodeConfigurationError(
|
189
|
+
f"Failed to initialize node '{self.id}': {e}"
|
190
|
+
) from e
|
191
|
+
|
192
|
+
@abstractmethod
|
193
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
194
|
+
"""Define the parameters this node accepts.
|
195
|
+
|
196
|
+
This abstract method must be implemented by all concrete nodes to
|
197
|
+
specify their input schema. The parameters define:
|
198
|
+
1. What inputs the node expects
|
199
|
+
2. Type requirements for each input
|
200
|
+
3. Whether inputs are required or optional
|
201
|
+
4. Default values for optional inputs
|
202
|
+
5. Documentation for each parameter
|
203
|
+
|
204
|
+
The returned dictionary is used throughout the node lifecycle:
|
205
|
+
- During initialization: _validate_config() checks configuration
|
206
|
+
- During execution: validate_inputs() validates runtime data
|
207
|
+
- During workflow creation: Used for connection validation
|
208
|
+
- During export: Included in workflow manifests
|
209
|
+
|
210
|
+
Example implementation:
|
211
|
+
def get_parameters(self):
|
212
|
+
return {
|
213
|
+
'input_file': NodeParameter(
|
214
|
+
name='input_file',
|
215
|
+
type=str,
|
216
|
+
required=True,
|
217
|
+
description='Path to input CSV file'
|
218
|
+
),
|
219
|
+
'delimiter': NodeParameter(
|
220
|
+
name='delimiter',
|
221
|
+
type=str,
|
222
|
+
required=False,
|
223
|
+
default=',',
|
224
|
+
description='CSV delimiter character'
|
225
|
+
)
|
226
|
+
}
|
227
|
+
|
228
|
+
Returns:
|
229
|
+
Dictionary mapping parameter names to their definitions
|
230
|
+
|
231
|
+
Used by:
|
232
|
+
- _validate_config(): Validates configuration matches parameters
|
233
|
+
- validate_inputs(): Validates runtime inputs
|
234
|
+
- to_dict(): Includes parameters in serialization
|
235
|
+
- Workflow.connect(): Validates compatible connections
|
236
|
+
"""
|
237
|
+
pass
|
238
|
+
|
239
|
+
def get_output_schema(self) -> Dict[str, NodeParameter]:
|
240
|
+
"""Define output parameters for this node.
|
241
|
+
|
242
|
+
This optional method allows nodes to specify their output schema for validation.
|
243
|
+
If not overridden, outputs will only be validated for JSON-serializability.
|
244
|
+
|
245
|
+
Design purpose:
|
246
|
+
- Enables static analysis of node outputs
|
247
|
+
- Provides runtime validation of output types
|
248
|
+
- Supports automatic documentation of outputs
|
249
|
+
- Facilitates workflow validation and type checking
|
250
|
+
|
251
|
+
The output schema serves similar purposes as input parameters:
|
252
|
+
1. Type validation during execution
|
253
|
+
2. Documentation for downstream consumers
|
254
|
+
3. Workflow connection validation
|
255
|
+
4. Export manifest generation
|
256
|
+
|
257
|
+
Example implementation:
|
258
|
+
def get_output_schema(self):
|
259
|
+
return {
|
260
|
+
'dataframe': NodeParameter(
|
261
|
+
name='dataframe',
|
262
|
+
type=dict,
|
263
|
+
required=True,
|
264
|
+
description='Processed data as dictionary'
|
265
|
+
),
|
266
|
+
'row_count': NodeParameter(
|
267
|
+
name='row_count',
|
268
|
+
type=int,
|
269
|
+
required=True,
|
270
|
+
description='Number of rows processed'
|
271
|
+
),
|
272
|
+
'processing_time': NodeParameter(
|
273
|
+
name='processing_time',
|
274
|
+
type=float,
|
275
|
+
required=False,
|
276
|
+
description='Time taken to process in seconds'
|
277
|
+
)
|
278
|
+
}
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Dictionary mapping output names to their parameter definitions
|
282
|
+
Empty dict by default (no schema validation)
|
283
|
+
|
284
|
+
Used by:
|
285
|
+
- validate_outputs(): Validates runtime outputs
|
286
|
+
- Workflow.connect(): Validates connections between nodes
|
287
|
+
- Documentation generators: Create output documentation
|
288
|
+
- Export systems: Include output schemas in manifests
|
289
|
+
"""
|
290
|
+
return {}
|
291
|
+
|
292
|
+
@abstractmethod
|
293
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
294
|
+
"""Execute the node's logic.
|
295
|
+
|
296
|
+
This is the core method that implements the node's data processing logic.
|
297
|
+
It receives validated inputs and must return a dictionary of outputs.
|
298
|
+
|
299
|
+
Design requirements:
|
300
|
+
1. Must be stateless - no side effects between runs
|
301
|
+
2. All inputs are provided as keyword arguments
|
302
|
+
3. Must return a dictionary (JSON-serializable)
|
303
|
+
4. Should handle errors gracefully
|
304
|
+
5. Can use self.config for configuration values
|
305
|
+
6. Should use self.logger for status reporting
|
306
|
+
|
307
|
+
The method is called by execute() which handles:
|
308
|
+
- Input validation before calling run()
|
309
|
+
- Output validation after run() completes
|
310
|
+
- Error wrapping and logging
|
311
|
+
- Execution timing and metrics
|
312
|
+
|
313
|
+
Example implementation:
|
314
|
+
def run(self, input_file, delimiter=','):
|
315
|
+
df = pd.read_csv(input_file, delimiter=delimiter)
|
316
|
+
return {
|
317
|
+
'dataframe': df.to_dict(),
|
318
|
+
'row_count': len(df),
|
319
|
+
'columns': list(df.columns)
|
320
|
+
}
|
321
|
+
|
322
|
+
Args:
|
323
|
+
**kwargs: Validated input parameters matching get_parameters()
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
Dictionary of outputs that will be validated and passed
|
327
|
+
to downstream nodes
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
NodeExecutionError: If execution fails (will be caught and
|
331
|
+
re-raised by execute())
|
332
|
+
|
333
|
+
Called by:
|
334
|
+
- execute(): Wraps with validation and error handling
|
335
|
+
- LocalRuntime: During workflow execution
|
336
|
+
- TestRunner: During unit testing
|
337
|
+
"""
|
338
|
+
pass
|
339
|
+
|
340
|
+
def _validate_config(self):
|
341
|
+
"""Validate node configuration against defined parameters.
|
342
|
+
|
343
|
+
This internal method is called during __init__ to ensure that the
|
344
|
+
provided configuration matches the node's parameter requirements.
|
345
|
+
|
346
|
+
Validation process:
|
347
|
+
1. Calls get_parameters() to get schema
|
348
|
+
2. For each parameter, checks if:
|
349
|
+
- Required parameters are present
|
350
|
+
- Values match expected types
|
351
|
+
- Type conversion is possible if needed
|
352
|
+
3. Sets default values for missing optional parameters
|
353
|
+
4. Updates self.config with validated values
|
354
|
+
|
355
|
+
Type conversion:
|
356
|
+
- If a value doesn't match the expected type, attempts conversion
|
357
|
+
- For example: string "123" -> int 123
|
358
|
+
- Conversion failures result in descriptive errors
|
359
|
+
|
360
|
+
Called by:
|
361
|
+
- __init__(): During node initialization
|
362
|
+
|
363
|
+
Modifies:
|
364
|
+
- self.config: Updates with defaults and converted values
|
365
|
+
|
366
|
+
Raises:
|
367
|
+
NodeConfigurationError: If configuration is invalid, including:
|
368
|
+
- Missing required parameters
|
369
|
+
- Type mismatches that can't be converted
|
370
|
+
- get_parameters() implementation errors
|
371
|
+
"""
|
372
|
+
try:
|
373
|
+
params = self.get_parameters()
|
374
|
+
except Exception as e:
|
375
|
+
raise NodeConfigurationError(f"Failed to get node parameters: {e}") from e
|
376
|
+
|
377
|
+
for param_name, param_def in params.items():
|
378
|
+
if param_name not in self.config:
|
379
|
+
if param_def.required and param_def.default is None:
|
380
|
+
raise NodeConfigurationError(
|
381
|
+
f"Required parameter '{param_name}' not provided in configuration"
|
382
|
+
)
|
383
|
+
elif param_def.default is not None:
|
384
|
+
self.config[param_name] = param_def.default
|
385
|
+
|
386
|
+
if param_name in self.config:
|
387
|
+
value = self.config[param_name]
|
388
|
+
# Skip type checking for Any type
|
389
|
+
if param_def.type is Any:
|
390
|
+
continue
|
391
|
+
if not isinstance(value, param_def.type):
|
392
|
+
try:
|
393
|
+
self.config[param_name] = param_def.type(value)
|
394
|
+
except (ValueError, TypeError) as e:
|
395
|
+
raise NodeConfigurationError(
|
396
|
+
f"Configuration parameter '{param_name}' must be of type "
|
397
|
+
f"{param_def.type.__name__}, got {type(value).__name__}. "
|
398
|
+
f"Conversion failed: {e}"
|
399
|
+
) from e
|
400
|
+
|
401
|
+
def validate_inputs(self, **kwargs) -> Dict[str, Any]:
|
402
|
+
"""Validate runtime inputs against node requirements.
|
403
|
+
|
404
|
+
This method validates inputs provided at execution time against the
|
405
|
+
node's parameter schema. It ensures type safety and provides helpful
|
406
|
+
error messages for invalid inputs.
|
407
|
+
|
408
|
+
Validation steps:
|
409
|
+
1. Gets parameter definitions from get_parameters()
|
410
|
+
2. Checks each parameter for:
|
411
|
+
- Presence (if required)
|
412
|
+
- Type compatibility
|
413
|
+
- Null handling for optional parameters
|
414
|
+
3. Attempts type conversion if needed
|
415
|
+
4. Applies default values for missing optional parameters
|
416
|
+
|
417
|
+
Key behaviors:
|
418
|
+
- Required parameters must be provided or have defaults
|
419
|
+
- Optional parameters can be None
|
420
|
+
- Type mismatches attempt conversion before failing
|
421
|
+
- Error messages include parameter descriptions
|
422
|
+
|
423
|
+
Example flow:
|
424
|
+
# Node expects: {'count': int, 'name': str (optional)}
|
425
|
+
inputs = {'count': '42', 'name': None}
|
426
|
+
validated = validate_inputs(**inputs)
|
427
|
+
# Returns: {'count': 42} # Converted and None removed
|
428
|
+
|
429
|
+
Args:
|
430
|
+
**kwargs: Runtime inputs to validate
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Dictionary of validated inputs with:
|
434
|
+
- Type conversions applied
|
435
|
+
- Defaults for missing optional parameters
|
436
|
+
- None values removed for optional parameters
|
437
|
+
|
438
|
+
Raises:
|
439
|
+
NodeValidationError: If inputs are invalid:
|
440
|
+
- Missing required parameters
|
441
|
+
- Type conversion failures
|
442
|
+
- get_parameters() errors
|
443
|
+
|
444
|
+
Called by:
|
445
|
+
- execute(): Before passing inputs to run()
|
446
|
+
- Workflow validation: During connection checks
|
447
|
+
"""
|
448
|
+
try:
|
449
|
+
params = self.get_parameters()
|
450
|
+
except Exception as e:
|
451
|
+
raise NodeValidationError(
|
452
|
+
f"Failed to get node parameters for validation: {e}"
|
453
|
+
) from e
|
454
|
+
|
455
|
+
validated = {}
|
456
|
+
|
457
|
+
for param_name, param_def in params.items():
|
458
|
+
if param_def.required and param_name not in kwargs:
|
459
|
+
if param_def.default is not None:
|
460
|
+
validated[param_name] = param_def.default
|
461
|
+
else:
|
462
|
+
raise NodeValidationError(
|
463
|
+
f"Required input '{param_name}' not provided. "
|
464
|
+
f"Description: {param_def.description or 'No description available'}"
|
465
|
+
)
|
466
|
+
|
467
|
+
if param_name in kwargs:
|
468
|
+
value = kwargs[param_name]
|
469
|
+
if value is None and not param_def.required:
|
470
|
+
continue
|
471
|
+
|
472
|
+
# Skip type checking for Any type
|
473
|
+
if param_def.type is Any:
|
474
|
+
validated[param_name] = value
|
475
|
+
elif not isinstance(value, param_def.type):
|
476
|
+
try:
|
477
|
+
validated[param_name] = param_def.type(value)
|
478
|
+
except (ValueError, TypeError) as e:
|
479
|
+
raise NodeValidationError(
|
480
|
+
f"Input '{param_name}' must be of type {param_def.type.__name__}, "
|
481
|
+
f"got {type(value).__name__}. Conversion failed: {e}"
|
482
|
+
) from e
|
483
|
+
else:
|
484
|
+
validated[param_name] = value
|
485
|
+
|
486
|
+
return validated
|
487
|
+
|
488
|
+
def validate_outputs(self, outputs: Dict[str, Any]) -> Dict[str, Any]:
|
489
|
+
"""Validate outputs against schema and JSON-serializability.
|
490
|
+
|
491
|
+
This enhanced method validates outputs in two ways:
|
492
|
+
1. Schema validation: If get_output_schema() is defined, validates
|
493
|
+
types and required fields
|
494
|
+
2. JSON serialization: Ensures all outputs can be serialized
|
495
|
+
|
496
|
+
Validation process:
|
497
|
+
1. Check outputs is a dictionary
|
498
|
+
2. If output schema exists:
|
499
|
+
- Validate required fields are present
|
500
|
+
- Check type compatibility
|
501
|
+
- Attempt type conversion if needed
|
502
|
+
3. Verify JSON-serializability
|
503
|
+
4. Return validated outputs
|
504
|
+
|
505
|
+
Schema validation features:
|
506
|
+
- Required outputs must be present
|
507
|
+
- Optional outputs can be None or missing
|
508
|
+
- Type mismatches attempt conversion
|
509
|
+
- Clear error messages with field details
|
510
|
+
|
511
|
+
Args:
|
512
|
+
outputs: Outputs to validate from run() method
|
513
|
+
|
514
|
+
Returns:
|
515
|
+
The same outputs dictionary if valid
|
516
|
+
|
517
|
+
Raises:
|
518
|
+
NodeValidationError: If outputs are invalid:
|
519
|
+
- Not a dictionary
|
520
|
+
- Missing required outputs
|
521
|
+
- Type validation failures
|
522
|
+
- Non-serializable values
|
523
|
+
|
524
|
+
Called by:
|
525
|
+
- execute(): After run() completes
|
526
|
+
- Test utilities: For output validation
|
527
|
+
"""
|
528
|
+
if not isinstance(outputs, dict):
|
529
|
+
raise NodeValidationError(
|
530
|
+
f"Node outputs must be a dictionary, got {type(outputs).__name__}"
|
531
|
+
)
|
532
|
+
|
533
|
+
# First, validate against output schema if defined
|
534
|
+
output_schema = self.get_output_schema()
|
535
|
+
if output_schema:
|
536
|
+
validated_outputs = {}
|
537
|
+
|
538
|
+
for param_name, param_def in output_schema.items():
|
539
|
+
if param_def.required and param_name not in outputs:
|
540
|
+
raise NodeValidationError(
|
541
|
+
f"Required output '{param_name}' not provided. "
|
542
|
+
f"Description: {param_def.description or 'No description available'}"
|
543
|
+
)
|
544
|
+
|
545
|
+
if param_name in outputs:
|
546
|
+
value = outputs[param_name]
|
547
|
+
if value is None and not param_def.required:
|
548
|
+
continue # Optional outputs can be None
|
549
|
+
|
550
|
+
if value is not None:
|
551
|
+
# Skip type checking for Any type
|
552
|
+
if param_def.type is Any:
|
553
|
+
validated_outputs[param_name] = value
|
554
|
+
elif not isinstance(value, param_def.type):
|
555
|
+
try:
|
556
|
+
# Attempt type conversion
|
557
|
+
converted_value = param_def.type(value)
|
558
|
+
validated_outputs[param_name] = converted_value
|
559
|
+
except (ValueError, TypeError) as e:
|
560
|
+
raise NodeValidationError(
|
561
|
+
f"Output '{param_name}' must be of type {param_def.type.__name__}, "
|
562
|
+
f"got {type(value).__name__}. Conversion failed: {e}"
|
563
|
+
) from e
|
564
|
+
else:
|
565
|
+
validated_outputs[param_name] = value
|
566
|
+
else:
|
567
|
+
validated_outputs[param_name] = None
|
568
|
+
|
569
|
+
# Include any additional outputs not in schema (for flexibility)
|
570
|
+
for key, value in outputs.items():
|
571
|
+
if key not in validated_outputs:
|
572
|
+
validated_outputs[key] = value
|
573
|
+
|
574
|
+
outputs = validated_outputs
|
575
|
+
|
576
|
+
# Then validate JSON-serializability
|
577
|
+
# Skip JSON validation for state management objects
|
578
|
+
from pydantic import BaseModel
|
579
|
+
|
580
|
+
from kailash.workflow.state import WorkflowStateWrapper
|
581
|
+
|
582
|
+
non_serializable = []
|
583
|
+
for k, v in outputs.items():
|
584
|
+
# Allow WorkflowStateWrapper objects to pass through
|
585
|
+
if isinstance(v, WorkflowStateWrapper):
|
586
|
+
continue
|
587
|
+
# Allow Pydantic models (they can be serialized with .model_dump())
|
588
|
+
if isinstance(v, BaseModel):
|
589
|
+
continue
|
590
|
+
if not self._is_json_serializable(v):
|
591
|
+
non_serializable.append(k)
|
592
|
+
|
593
|
+
if non_serializable:
|
594
|
+
raise NodeValidationError(
|
595
|
+
f"Node outputs must be JSON-serializable. Failed keys: {non_serializable}"
|
596
|
+
)
|
597
|
+
|
598
|
+
return outputs
|
599
|
+
|
600
|
+
def _is_json_serializable(self, obj: Any) -> bool:
|
601
|
+
"""Check if an object is JSON-serializable.
|
602
|
+
|
603
|
+
Helper method that attempts JSON serialization to determine
|
604
|
+
if an object can be serialized. Used by validate_outputs()
|
605
|
+
to identify problematic values.
|
606
|
+
|
607
|
+
Args:
|
608
|
+
obj: Any object to test for JSON serializability
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
True if object can be JSON serialized, False otherwise
|
612
|
+
|
613
|
+
Used by:
|
614
|
+
- validate_outputs(): To identify non-serializable keys
|
615
|
+
"""
|
616
|
+
try:
|
617
|
+
json.dumps(obj)
|
618
|
+
return True
|
619
|
+
except (TypeError, ValueError):
|
620
|
+
return False
|
621
|
+
|
622
|
+
def execute(self, **runtime_inputs) -> Dict[str, Any]:
|
623
|
+
"""Execute the node with validation and error handling.
|
624
|
+
|
625
|
+
This is the main entry point for node execution that orchestrates
|
626
|
+
the complete execution lifecycle:
|
627
|
+
|
628
|
+
1. Input validation (validate_inputs)
|
629
|
+
2. Execution (run)
|
630
|
+
3. Output validation (validate_outputs)
|
631
|
+
4. Error handling and logging
|
632
|
+
5. Performance metrics
|
633
|
+
|
634
|
+
Execution flow:
|
635
|
+
1. Logs execution start
|
636
|
+
2. Validates inputs against parameter schema
|
637
|
+
3. Calls run() with validated inputs
|
638
|
+
4. Validates outputs are JSON-serializable
|
639
|
+
5. Logs execution time
|
640
|
+
6. Returns validated outputs
|
641
|
+
|
642
|
+
Error handling strategy:
|
643
|
+
- NodeValidationError: Re-raised as-is (input/output issues)
|
644
|
+
- NodeExecutionError: Re-raised as-is (run() failures)
|
645
|
+
- Other exceptions: Wrapped in NodeExecutionError
|
646
|
+
|
647
|
+
Performance tracking:
|
648
|
+
- Records execution start/end times
|
649
|
+
- Logs total execution duration
|
650
|
+
- Includes timing in execution logs
|
651
|
+
|
652
|
+
Returns:
|
653
|
+
Dictionary of validated outputs from run()
|
654
|
+
|
655
|
+
Raises:
|
656
|
+
NodeExecutionError: If execution fails in run()
|
657
|
+
NodeValidationError: If input/output validation fails
|
658
|
+
|
659
|
+
Called by:
|
660
|
+
- LocalRuntime: During workflow execution
|
661
|
+
- TaskManager: With execution tracking
|
662
|
+
- Unit tests: For node testing
|
663
|
+
|
664
|
+
Downstream effects:
|
665
|
+
- Logs provide execution history
|
666
|
+
- Metrics enable performance monitoring
|
667
|
+
- Validation ensures data integrity
|
668
|
+
"""
|
669
|
+
start_time = datetime.now(timezone.utc)
|
670
|
+
try:
|
671
|
+
self.logger.info(f"Executing node {self.id}")
|
672
|
+
|
673
|
+
# Merge runtime inputs with config (runtime inputs take precedence)
|
674
|
+
merged_inputs = {**self.config, **runtime_inputs}
|
675
|
+
|
676
|
+
# Handle nested config case (for nodes that store parameters in config['config'])
|
677
|
+
if "config" in merged_inputs and isinstance(merged_inputs["config"], dict):
|
678
|
+
# Extract nested config
|
679
|
+
nested_config = merged_inputs["config"]
|
680
|
+
merged_inputs.update(nested_config)
|
681
|
+
# Don't remove the config key as some nodes might need it
|
682
|
+
|
683
|
+
# Validate inputs
|
684
|
+
validated_inputs = self.validate_inputs(**merged_inputs)
|
685
|
+
self.logger.debug(f"Validated inputs for {self.id}: {validated_inputs}")
|
686
|
+
|
687
|
+
# Execute node logic
|
688
|
+
outputs = self.run(**validated_inputs)
|
689
|
+
|
690
|
+
# Validate outputs
|
691
|
+
validated_outputs = self.validate_outputs(outputs)
|
692
|
+
|
693
|
+
execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
|
694
|
+
self.logger.info(
|
695
|
+
f"Node {self.id} executed successfully in {execution_time:.3f}s"
|
696
|
+
)
|
697
|
+
return validated_outputs
|
698
|
+
|
699
|
+
except NodeValidationError:
|
700
|
+
# Re-raise validation errors as-is
|
701
|
+
raise
|
702
|
+
except NodeExecutionError:
|
703
|
+
# Re-raise execution errors as-is
|
704
|
+
raise
|
705
|
+
except Exception as e:
|
706
|
+
# Wrap any other exception in NodeExecutionError
|
707
|
+
self.logger.error(f"Node {self.id} execution failed: {e}", exc_info=True)
|
708
|
+
raise NodeExecutionError(
|
709
|
+
f"Node '{self.id}' execution failed: {type(e).__name__}: {e}"
|
710
|
+
) from e
|
711
|
+
|
712
|
+
def to_dict(self) -> Dict[str, Any]:
|
713
|
+
"""Convert node to dictionary representation.
|
714
|
+
|
715
|
+
Serializes the node instance to a dictionary format suitable for:
|
716
|
+
1. Workflow export
|
717
|
+
2. Node persistence
|
718
|
+
3. API responses
|
719
|
+
4. Configuration sharing
|
720
|
+
|
721
|
+
The serialized format includes:
|
722
|
+
- id: Unique node identifier
|
723
|
+
- type: Node class name
|
724
|
+
- metadata: Complete node metadata
|
725
|
+
- config: Current configuration
|
726
|
+
- parameters: Parameter definitions with types
|
727
|
+
|
728
|
+
Type serialization:
|
729
|
+
- Python types are converted to string names
|
730
|
+
- Complex types may require custom handling
|
731
|
+
- Parameter defaults are included
|
732
|
+
|
733
|
+
Returns:
|
734
|
+
Dictionary representation containing:
|
735
|
+
- Node identification and type
|
736
|
+
- Complete metadata
|
737
|
+
- Configuration values
|
738
|
+
- Parameter schemas
|
739
|
+
|
740
|
+
Raises:
|
741
|
+
NodeExecutionError: If serialization fails due to:
|
742
|
+
- get_parameters() errors
|
743
|
+
- Metadata serialization issues
|
744
|
+
- Type conversion problems
|
745
|
+
|
746
|
+
Used by:
|
747
|
+
- WorkflowExporter: For workflow serialization
|
748
|
+
- CLI: For node inspection
|
749
|
+
- API: For node information endpoints
|
750
|
+
- Debugging: For node state inspection
|
751
|
+
"""
|
752
|
+
try:
|
753
|
+
return {
|
754
|
+
"id": self.id,
|
755
|
+
"type": self.__class__.__name__,
|
756
|
+
"metadata": self.metadata.model_dump(),
|
757
|
+
"config": self.config,
|
758
|
+
"parameters": {
|
759
|
+
name: {
|
760
|
+
"type": param.type.__name__,
|
761
|
+
"required": param.required,
|
762
|
+
"default": param.default,
|
763
|
+
"description": param.description,
|
764
|
+
}
|
765
|
+
for name, param in self.get_parameters().items()
|
766
|
+
},
|
767
|
+
}
|
768
|
+
except Exception as e:
|
769
|
+
raise NodeExecutionError(
|
770
|
+
f"Failed to serialize node '{self.id}': {e}"
|
771
|
+
) from e
|
772
|
+
|
773
|
+
|
774
|
+
# Node Registry
|
775
|
+
class NodeRegistry:
|
776
|
+
"""Registry for discovering and managing available nodes.
|
777
|
+
|
778
|
+
This singleton class provides a global registry for node types,
|
779
|
+
enabling:
|
780
|
+
1. Dynamic node discovery
|
781
|
+
2. Node class registration
|
782
|
+
3. Workflow deserialization
|
783
|
+
4. CLI/UI node palettes
|
784
|
+
|
785
|
+
Design pattern: Singleton
|
786
|
+
- Single global instance (_instance)
|
787
|
+
- Shared registry of node classes (_nodes)
|
788
|
+
- Thread-safe through class methods
|
789
|
+
|
790
|
+
Registration flow:
|
791
|
+
1. Nodes register via @register_node decorator
|
792
|
+
2. Registry validates node inheritance
|
793
|
+
3. Stores class reference by name/alias
|
794
|
+
4. Available for instantiation
|
795
|
+
|
796
|
+
Usage patterns:
|
797
|
+
- Automatic: @register_node decorator
|
798
|
+
- Manual: NodeRegistry.register(NodeClass)
|
799
|
+
- Discovery: NodeRegistry.list_nodes()
|
800
|
+
- Instantiation: NodeRegistry.get('NodeName')
|
801
|
+
|
802
|
+
Upstream components:
|
803
|
+
- Node implementations: Register themselves
|
804
|
+
- Module imports: Trigger registration
|
805
|
+
- Setup scripts: Bulk registration
|
806
|
+
|
807
|
+
Downstream consumers:
|
808
|
+
- Workflow: Creates nodes by name
|
809
|
+
- CLI: Lists available nodes
|
810
|
+
- UI: Populates node palette
|
811
|
+
- WorkflowImporter: Deserializes nodes
|
812
|
+
"""
|
813
|
+
|
814
|
+
_instance = None
|
815
|
+
_nodes: Dict[str, Type[Node]] = {}
|
816
|
+
|
817
|
+
def __new__(cls):
|
818
|
+
"""Ensure singleton instance.
|
819
|
+
|
820
|
+
Implements the singleton pattern to maintain a single
|
821
|
+
global registry of nodes.
|
822
|
+
|
823
|
+
Returns:
|
824
|
+
The single NodeRegistry instance
|
825
|
+
"""
|
826
|
+
if cls._instance is None:
|
827
|
+
cls._instance = super().__new__(cls)
|
828
|
+
return cls._instance
|
829
|
+
|
830
|
+
@classmethod
|
831
|
+
def register(cls, node_class: Type[Node], alias: Optional[str] = None):
|
832
|
+
"""Register a node class.
|
833
|
+
|
834
|
+
Adds a node class to the global registry, making it available
|
835
|
+
for discovery and instantiation.
|
836
|
+
|
837
|
+
Registration process:
|
838
|
+
1. Validates node_class inherits from Node
|
839
|
+
2. Determines registration name (alias or class name)
|
840
|
+
3. Warns if overwriting existing registration
|
841
|
+
4. Stores class reference in registry
|
842
|
+
|
843
|
+
Thread safety:
|
844
|
+
- Class method ensures single registry
|
845
|
+
- Dictionary operations are atomic
|
846
|
+
- Safe for concurrent registration
|
847
|
+
|
848
|
+
Example usage:
|
849
|
+
NodeRegistry.register(CSVReaderNode)
|
850
|
+
NodeRegistry.register(CustomNode, alias='MyNode')
|
851
|
+
|
852
|
+
Args:
|
853
|
+
node_class: Node class to register (must inherit from Node)
|
854
|
+
alias: Optional alias for the node (defaults to class name)
|
855
|
+
|
856
|
+
Raises:
|
857
|
+
NodeConfigurationError: If registration fails:
|
858
|
+
- node_class doesn't inherit from Node
|
859
|
+
- Invalid class type provided
|
860
|
+
|
861
|
+
Side effects:
|
862
|
+
- Updates cls._nodes dictionary
|
863
|
+
- Logs registration success/warnings
|
864
|
+
- Overwrites existing registrations
|
865
|
+
|
866
|
+
Used by:
|
867
|
+
- @register_node decorator
|
868
|
+
- Manual registration in setup
|
869
|
+
- Plugin loading systems
|
870
|
+
"""
|
871
|
+
if not issubclass(node_class, Node):
|
872
|
+
raise NodeConfigurationError(
|
873
|
+
f"Cannot register {node_class.__name__}: must be a subclass of Node"
|
874
|
+
)
|
875
|
+
|
876
|
+
node_name = alias or node_class.__name__
|
877
|
+
|
878
|
+
if node_name in cls._nodes:
|
879
|
+
logging.warning(f"Overwriting existing node registration for '{node_name}'")
|
880
|
+
|
881
|
+
cls._nodes[node_name] = node_class
|
882
|
+
logging.info(f"Registered node '{node_name}'")
|
883
|
+
|
884
|
+
@classmethod
|
885
|
+
def get(cls, node_name: str) -> Type[Node]:
|
886
|
+
"""Get a registered node class by name.
|
887
|
+
|
888
|
+
Retrieves a node class from the registry for instantiation.
|
889
|
+
Used during workflow creation and deserialization.
|
890
|
+
|
891
|
+
Lookup process:
|
892
|
+
1. Searches registry by exact name match
|
893
|
+
2. Returns class reference if found
|
894
|
+
3. Provides helpful error with available nodes
|
895
|
+
|
896
|
+
Example usage:
|
897
|
+
NodeClass = NodeRegistry.get('CSVReader')
|
898
|
+
node = NodeClass(config={'file': 'data.csv'})
|
899
|
+
|
900
|
+
Args:
|
901
|
+
node_name: Name of the node (class name or alias)
|
902
|
+
|
903
|
+
Returns:
|
904
|
+
Node class ready for instantiation
|
905
|
+
|
906
|
+
Raises:
|
907
|
+
NodeConfigurationError: If node is not registered:
|
908
|
+
- Includes list of available nodes
|
909
|
+
- Suggests similar names if possible
|
910
|
+
|
911
|
+
Used by:
|
912
|
+
- Workflow.add_node(): Creates nodes by name
|
913
|
+
- WorkflowImporter: Deserializes nodes
|
914
|
+
- CLI commands: Instantiates nodes
|
915
|
+
- Factory methods: Dynamic node creation
|
916
|
+
"""
|
917
|
+
if node_name not in cls._nodes:
|
918
|
+
available_nodes = ", ".join(sorted(cls._nodes.keys()))
|
919
|
+
raise NodeConfigurationError(
|
920
|
+
f"Node '{node_name}' not found in registry. "
|
921
|
+
f"Available nodes: {available_nodes}"
|
922
|
+
)
|
923
|
+
return cls._nodes[node_name]
|
924
|
+
|
925
|
+
@classmethod
|
926
|
+
def list_nodes(cls) -> Dict[str, Type[Node]]:
|
927
|
+
"""List all registered nodes.
|
928
|
+
|
929
|
+
Returns a copy of the registry for discovery purposes.
|
930
|
+
Used by CLI help, UI node palettes, and documentation.
|
931
|
+
|
932
|
+
Returns:
|
933
|
+
Dictionary mapping node names to their classes:
|
934
|
+
- Keys: Node names/aliases
|
935
|
+
- Values: Node class references
|
936
|
+
- Safe copy prevents registry modification
|
937
|
+
|
938
|
+
Used by:
|
939
|
+
- CLI 'list-nodes' command
|
940
|
+
- UI node palette population
|
941
|
+
- Documentation generators
|
942
|
+
- Testing and debugging
|
943
|
+
"""
|
944
|
+
return cls._nodes.copy()
|
945
|
+
|
946
|
+
@classmethod
|
947
|
+
def clear(cls):
|
948
|
+
"""Clear all registered nodes.
|
949
|
+
|
950
|
+
Removes all nodes from the registry. Primarily used for:
|
951
|
+
1. Testing - Clean state between tests
|
952
|
+
2. Reloading - Before re-registering nodes
|
953
|
+
3. Cleanup - Memory management
|
954
|
+
|
955
|
+
Side effects:
|
956
|
+
- Empties the _nodes dictionary
|
957
|
+
- Logs the clearing action
|
958
|
+
- Existing node instances remain valid
|
959
|
+
|
960
|
+
Warning:
|
961
|
+
- Subsequent get() calls will fail
|
962
|
+
- Workflows may not deserialize
|
963
|
+
- Should re-register needed nodes
|
964
|
+
"""
|
965
|
+
cls._nodes.clear()
|
966
|
+
logging.info("Cleared all registered nodes")
|
967
|
+
|
968
|
+
|
969
|
+
def register_node(alias: Optional[str] = None):
|
970
|
+
"""Decorator to register a node class.
|
971
|
+
|
972
|
+
Provides a convenient decorator pattern for automatic node
|
973
|
+
registration when the module is imported.
|
974
|
+
|
975
|
+
Usage patterns:
|
976
|
+
@register_node()
|
977
|
+
class MyNode(Node):
|
978
|
+
pass
|
979
|
+
|
980
|
+
@register_node(alias='CustomName')
|
981
|
+
class MyNode(Node):
|
982
|
+
pass
|
983
|
+
|
984
|
+
Registration timing:
|
985
|
+
- Occurs when module is imported
|
986
|
+
- Before any workflow creation
|
987
|
+
- Enables automatic discovery
|
988
|
+
|
989
|
+
Error handling:
|
990
|
+
- Wraps registration errors
|
991
|
+
- Provides clear error messages
|
992
|
+
- Preserves original class
|
993
|
+
|
994
|
+
Args:
|
995
|
+
alias: Optional alias for the node (defaults to class name)
|
996
|
+
|
997
|
+
Returns:
|
998
|
+
Decorator function that:
|
999
|
+
- Registers the node class
|
1000
|
+
- Returns the unmodified class
|
1001
|
+
- Handles registration errors
|
1002
|
+
|
1003
|
+
Example:
|
1004
|
+
@register_node(alias='CSV')
|
1005
|
+
class CSVReaderNode(Node):
|
1006
|
+
def get_parameters(self):
|
1007
|
+
return {'file': NodeParameter(...)}
|
1008
|
+
def run(self, file):
|
1009
|
+
return pd.read_csv(file)
|
1010
|
+
"""
|
1011
|
+
|
1012
|
+
def decorator(node_class: Type[Node]):
|
1013
|
+
"""Inner decorator that performs registration.
|
1014
|
+
|
1015
|
+
Args:
|
1016
|
+
node_class: The node class to register
|
1017
|
+
|
1018
|
+
Returns:
|
1019
|
+
The unmodified node class
|
1020
|
+
|
1021
|
+
Raises:
|
1022
|
+
NodeConfigurationError: If registration fails
|
1023
|
+
"""
|
1024
|
+
try:
|
1025
|
+
NodeRegistry.register(node_class, alias)
|
1026
|
+
except Exception as e:
|
1027
|
+
raise NodeConfigurationError(
|
1028
|
+
f"Failed to register node {node_class.__name__}: {e}"
|
1029
|
+
) from e
|
1030
|
+
return node_class
|
1031
|
+
|
1032
|
+
return decorator
|