kailash 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. kailash/__init__.py +31 -0
  2. kailash/__main__.py +11 -0
  3. kailash/cli/__init__.py +5 -0
  4. kailash/cli/commands.py +563 -0
  5. kailash/manifest.py +778 -0
  6. kailash/nodes/__init__.py +23 -0
  7. kailash/nodes/ai/__init__.py +26 -0
  8. kailash/nodes/ai/agents.py +417 -0
  9. kailash/nodes/ai/models.py +488 -0
  10. kailash/nodes/api/__init__.py +52 -0
  11. kailash/nodes/api/auth.py +567 -0
  12. kailash/nodes/api/graphql.py +480 -0
  13. kailash/nodes/api/http.py +598 -0
  14. kailash/nodes/api/rate_limiting.py +572 -0
  15. kailash/nodes/api/rest.py +665 -0
  16. kailash/nodes/base.py +1032 -0
  17. kailash/nodes/base_async.py +128 -0
  18. kailash/nodes/code/__init__.py +32 -0
  19. kailash/nodes/code/python.py +1021 -0
  20. kailash/nodes/data/__init__.py +125 -0
  21. kailash/nodes/data/readers.py +496 -0
  22. kailash/nodes/data/sharepoint_graph.py +623 -0
  23. kailash/nodes/data/sql.py +380 -0
  24. kailash/nodes/data/streaming.py +1168 -0
  25. kailash/nodes/data/vector_db.py +964 -0
  26. kailash/nodes/data/writers.py +529 -0
  27. kailash/nodes/logic/__init__.py +6 -0
  28. kailash/nodes/logic/async_operations.py +702 -0
  29. kailash/nodes/logic/operations.py +551 -0
  30. kailash/nodes/transform/__init__.py +5 -0
  31. kailash/nodes/transform/processors.py +379 -0
  32. kailash/runtime/__init__.py +6 -0
  33. kailash/runtime/async_local.py +356 -0
  34. kailash/runtime/docker.py +697 -0
  35. kailash/runtime/local.py +434 -0
  36. kailash/runtime/parallel.py +557 -0
  37. kailash/runtime/runner.py +110 -0
  38. kailash/runtime/testing.py +347 -0
  39. kailash/sdk_exceptions.py +307 -0
  40. kailash/tracking/__init__.py +7 -0
  41. kailash/tracking/manager.py +885 -0
  42. kailash/tracking/metrics_collector.py +342 -0
  43. kailash/tracking/models.py +535 -0
  44. kailash/tracking/storage/__init__.py +0 -0
  45. kailash/tracking/storage/base.py +113 -0
  46. kailash/tracking/storage/database.py +619 -0
  47. kailash/tracking/storage/filesystem.py +543 -0
  48. kailash/utils/__init__.py +0 -0
  49. kailash/utils/export.py +924 -0
  50. kailash/utils/templates.py +680 -0
  51. kailash/visualization/__init__.py +62 -0
  52. kailash/visualization/api.py +732 -0
  53. kailash/visualization/dashboard.py +951 -0
  54. kailash/visualization/performance.py +808 -0
  55. kailash/visualization/reports.py +1471 -0
  56. kailash/workflow/__init__.py +15 -0
  57. kailash/workflow/builder.py +245 -0
  58. kailash/workflow/graph.py +827 -0
  59. kailash/workflow/mermaid_visualizer.py +628 -0
  60. kailash/workflow/mock_registry.py +63 -0
  61. kailash/workflow/runner.py +302 -0
  62. kailash/workflow/state.py +238 -0
  63. kailash/workflow/visualization.py +588 -0
  64. kailash-0.1.0.dist-info/METADATA +710 -0
  65. kailash-0.1.0.dist-info/RECORD +69 -0
  66. kailash-0.1.0.dist-info/WHEEL +5 -0
  67. kailash-0.1.0.dist-info/entry_points.txt +2 -0
  68. kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. kailash-0.1.0.dist-info/top_level.txt +1 -0
kailash/nodes/base.py ADDED
@@ -0,0 +1,1032 @@
1
+ """Base node class and node system implementation.
2
+
3
+ This module provides the foundation for all nodes in the Kailash system. It defines
4
+ the abstract base class that all nodes must inherit from, along with supporting
5
+ classes for metadata, configuration, and registration.
6
+
7
+ The node system is designed to be:
8
+ 1. Type-safe through parameter validation
9
+ 2. Discoverable through the node registry
10
+ 3. Composable in workflows
11
+ 4. Serializable for export/import
12
+ 5. Extensible for custom implementations
13
+
14
+ Key Components:
15
+ - Node: Abstract base class for all nodes
16
+ - NodeMetadata: Metadata about nodes for discovery and documentation
17
+ - NodeParameter: Type definitions for node inputs/outputs
18
+ - NodeRegistry: Global registry for node discovery
19
+ """
20
+
21
+ import json
22
+ import logging
23
+ from abc import ABC, abstractmethod
24
+ from datetime import datetime, timezone
25
+ from typing import Any, Dict, Optional, Set, Type
26
+
27
+ from pydantic import BaseModel, Field, ValidationError
28
+
29
+ from kailash.sdk_exceptions import (
30
+ NodeConfigurationError,
31
+ NodeExecutionError,
32
+ NodeValidationError,
33
+ )
34
+
35
+
36
+ class NodeMetadata(BaseModel):
37
+ """Metadata for a node.
38
+
39
+ This class stores descriptive information about a node that is used for:
40
+ 1. Discovery in the UI/CLI (name, description, tags)
41
+ 2. Version tracking and compatibility checks
42
+ 3. Documentation and tooltips
43
+ 4. Workflow export metadata
44
+
45
+ Upstream consumers:
46
+ - Node.__init__: Creates metadata during node instantiation
47
+ - NodeRegistry: Uses metadata for discovery and filtering
48
+ - WorkflowExporter: Includes metadata in exported workflows
49
+
50
+ Downstream usage:
51
+ - Workflow visualization: Shows node names and descriptions
52
+ - CLI help: Displays available nodes with their metadata
53
+ - Kailash UI: Node palette and property panels
54
+ """
55
+
56
+ id: str = Field("", description="Node ID")
57
+ name: str = Field(..., description="Node name")
58
+ description: str = Field("", description="Node description")
59
+ version: str = Field("1.0.0", description="Node version")
60
+ author: str = Field("", description="Node author")
61
+ created_at: datetime = Field(
62
+ default_factory=datetime.utcnow, description="Node creation date"
63
+ )
64
+ tags: Set[str] = Field(default_factory=set, description="Node tags")
65
+
66
+
67
+ class NodeParameter(BaseModel):
68
+ """Definition of a node parameter.
69
+
70
+ This class defines the schema for node inputs and outputs, providing:
71
+ 1. Type information for validation
72
+ 2. Default values for optional parameters
73
+ 3. Documentation for users
74
+ 4. Requirements specification
75
+
76
+ Design Purpose:
77
+ - Enables static analysis of workflow connections
78
+ - Provides runtime validation of data types
79
+ - Supports automatic UI generation for node configuration
80
+ - Facilitates workflow validation before execution
81
+
82
+ Upstream usage:
83
+ - Node.get_parameters(): Returns dict of parameters
84
+ - Custom nodes: Define their input/output schemas
85
+
86
+ Downstream consumers:
87
+ - Node._validate_config(): Validates configuration against parameters
88
+ - Node.validate_inputs(): Validates runtime inputs
89
+ - Workflow.connect(): Validates connections between nodes
90
+ - WorkflowExporter: Exports parameter schemas
91
+ """
92
+
93
+ name: str
94
+ type: Type
95
+ required: bool = True
96
+ default: Any = None
97
+ description: str = ""
98
+
99
+
100
+ class Node(ABC):
101
+ """Base class for all nodes in the Kailash system.
102
+
103
+ This abstract class defines the contract that all nodes must implement.
104
+ It provides the foundation for:
105
+ 1. Parameter validation and type checking
106
+ 2. Execution lifecycle management
107
+ 3. Error handling and reporting
108
+ 4. Serialization for workflow export
109
+ 5. Configuration management
110
+
111
+ Design Philosophy:
112
+ - Nodes are stateless processors of data
113
+ - All configuration is provided at initialization
114
+ - Runtime inputs are validated against schemas
115
+ - Outputs must be JSON-serializable
116
+ - Errors are wrapped in appropriate exception types
117
+
118
+ Inheritance Pattern:
119
+ All concrete nodes must:
120
+ 1. Implement get_parameters() to define inputs
121
+ 2. Implement run() to process data
122
+ 3. Call super().__init__() with configuration
123
+ 4. Use self.logger for logging
124
+
125
+ Upstream components:
126
+ - Workflow: Creates and manages node instances
127
+ - NodeRegistry: Provides node classes for instantiation
128
+ - CLI/UI: Configures nodes based on user input
129
+
130
+ Downstream usage:
131
+ - LocalRuntime: Executes nodes in workflows
132
+ - TaskManager: Tracks node execution status
133
+ - WorkflowExporter: Serializes nodes for export
134
+ """
135
+
136
+ def __init__(self, **kwargs):
137
+ """Initialize the node with configuration parameters.
138
+
139
+ This method performs the following initialization steps:
140
+ 1. Sets the node ID (defaults to class name)
141
+ 2. Creates metadata from provided arguments
142
+ 3. Sets up logging for the node
143
+ 4. Stores configuration in self.config
144
+ 5. Validates configuration against parameters
145
+
146
+ The configuration is validated by calling _validate_config(), which
147
+ checks that all required parameters are present and of the correct type.
148
+
149
+ Args:
150
+ **kwargs: Configuration parameters including:
151
+ - id: Optional custom node ID
152
+ - name: Display name for the node
153
+ - description: Node description
154
+ - version: Node version
155
+ - author: Node author
156
+ - tags: Set of tags for discovery
157
+ - Any parameters defined in get_parameters()
158
+
159
+ Raises:
160
+ NodeConfigurationError: If configuration is invalid or
161
+ if metadata validation fails
162
+
163
+ Downstream effects:
164
+ - Creates self.metadata for discovery
165
+ - Sets up self.logger for execution logging
166
+ - Stores self.config for runtime access
167
+ - Validates parameters are correctly specified
168
+ """
169
+ try:
170
+ self.id = kwargs.get("id", self.__class__.__name__)
171
+ self.metadata = kwargs.get(
172
+ "metadata",
173
+ NodeMetadata(
174
+ id=self.id,
175
+ name=kwargs.get("name", self.__class__.__name__),
176
+ description=kwargs.get("description", self.__doc__ or ""),
177
+ version=kwargs.get("version", "1.0.0"),
178
+ author=kwargs.get("author", ""),
179
+ tags=kwargs.get("tags", set()),
180
+ ),
181
+ )
182
+ self.logger = logging.getLogger(f"kailash.nodes.{self.id}")
183
+ self.config = kwargs
184
+ self._validate_config()
185
+ except ValidationError as e:
186
+ raise NodeConfigurationError(f"Invalid node metadata: {e}") from e
187
+ except Exception as e:
188
+ raise NodeConfigurationError(
189
+ f"Failed to initialize node '{self.id}': {e}"
190
+ ) from e
191
+
192
+ @abstractmethod
193
+ def get_parameters(self) -> Dict[str, NodeParameter]:
194
+ """Define the parameters this node accepts.
195
+
196
+ This abstract method must be implemented by all concrete nodes to
197
+ specify their input schema. The parameters define:
198
+ 1. What inputs the node expects
199
+ 2. Type requirements for each input
200
+ 3. Whether inputs are required or optional
201
+ 4. Default values for optional inputs
202
+ 5. Documentation for each parameter
203
+
204
+ The returned dictionary is used throughout the node lifecycle:
205
+ - During initialization: _validate_config() checks configuration
206
+ - During execution: validate_inputs() validates runtime data
207
+ - During workflow creation: Used for connection validation
208
+ - During export: Included in workflow manifests
209
+
210
+ Example implementation:
211
+ def get_parameters(self):
212
+ return {
213
+ 'input_file': NodeParameter(
214
+ name='input_file',
215
+ type=str,
216
+ required=True,
217
+ description='Path to input CSV file'
218
+ ),
219
+ 'delimiter': NodeParameter(
220
+ name='delimiter',
221
+ type=str,
222
+ required=False,
223
+ default=',',
224
+ description='CSV delimiter character'
225
+ )
226
+ }
227
+
228
+ Returns:
229
+ Dictionary mapping parameter names to their definitions
230
+
231
+ Used by:
232
+ - _validate_config(): Validates configuration matches parameters
233
+ - validate_inputs(): Validates runtime inputs
234
+ - to_dict(): Includes parameters in serialization
235
+ - Workflow.connect(): Validates compatible connections
236
+ """
237
+ pass
238
+
239
+ def get_output_schema(self) -> Dict[str, NodeParameter]:
240
+ """Define output parameters for this node.
241
+
242
+ This optional method allows nodes to specify their output schema for validation.
243
+ If not overridden, outputs will only be validated for JSON-serializability.
244
+
245
+ Design purpose:
246
+ - Enables static analysis of node outputs
247
+ - Provides runtime validation of output types
248
+ - Supports automatic documentation of outputs
249
+ - Facilitates workflow validation and type checking
250
+
251
+ The output schema serves similar purposes as input parameters:
252
+ 1. Type validation during execution
253
+ 2. Documentation for downstream consumers
254
+ 3. Workflow connection validation
255
+ 4. Export manifest generation
256
+
257
+ Example implementation:
258
+ def get_output_schema(self):
259
+ return {
260
+ 'dataframe': NodeParameter(
261
+ name='dataframe',
262
+ type=dict,
263
+ required=True,
264
+ description='Processed data as dictionary'
265
+ ),
266
+ 'row_count': NodeParameter(
267
+ name='row_count',
268
+ type=int,
269
+ required=True,
270
+ description='Number of rows processed'
271
+ ),
272
+ 'processing_time': NodeParameter(
273
+ name='processing_time',
274
+ type=float,
275
+ required=False,
276
+ description='Time taken to process in seconds'
277
+ )
278
+ }
279
+
280
+ Returns:
281
+ Dictionary mapping output names to their parameter definitions
282
+ Empty dict by default (no schema validation)
283
+
284
+ Used by:
285
+ - validate_outputs(): Validates runtime outputs
286
+ - Workflow.connect(): Validates connections between nodes
287
+ - Documentation generators: Create output documentation
288
+ - Export systems: Include output schemas in manifests
289
+ """
290
+ return {}
291
+
292
+ @abstractmethod
293
+ def run(self, **kwargs) -> Dict[str, Any]:
294
+ """Execute the node's logic.
295
+
296
+ This is the core method that implements the node's data processing logic.
297
+ It receives validated inputs and must return a dictionary of outputs.
298
+
299
+ Design requirements:
300
+ 1. Must be stateless - no side effects between runs
301
+ 2. All inputs are provided as keyword arguments
302
+ 3. Must return a dictionary (JSON-serializable)
303
+ 4. Should handle errors gracefully
304
+ 5. Can use self.config for configuration values
305
+ 6. Should use self.logger for status reporting
306
+
307
+ The method is called by execute() which handles:
308
+ - Input validation before calling run()
309
+ - Output validation after run() completes
310
+ - Error wrapping and logging
311
+ - Execution timing and metrics
312
+
313
+ Example implementation:
314
+ def run(self, input_file, delimiter=','):
315
+ df = pd.read_csv(input_file, delimiter=delimiter)
316
+ return {
317
+ 'dataframe': df.to_dict(),
318
+ 'row_count': len(df),
319
+ 'columns': list(df.columns)
320
+ }
321
+
322
+ Args:
323
+ **kwargs: Validated input parameters matching get_parameters()
324
+
325
+ Returns:
326
+ Dictionary of outputs that will be validated and passed
327
+ to downstream nodes
328
+
329
+ Raises:
330
+ NodeExecutionError: If execution fails (will be caught and
331
+ re-raised by execute())
332
+
333
+ Called by:
334
+ - execute(): Wraps with validation and error handling
335
+ - LocalRuntime: During workflow execution
336
+ - TestRunner: During unit testing
337
+ """
338
+ pass
339
+
340
+ def _validate_config(self):
341
+ """Validate node configuration against defined parameters.
342
+
343
+ This internal method is called during __init__ to ensure that the
344
+ provided configuration matches the node's parameter requirements.
345
+
346
+ Validation process:
347
+ 1. Calls get_parameters() to get schema
348
+ 2. For each parameter, checks if:
349
+ - Required parameters are present
350
+ - Values match expected types
351
+ - Type conversion is possible if needed
352
+ 3. Sets default values for missing optional parameters
353
+ 4. Updates self.config with validated values
354
+
355
+ Type conversion:
356
+ - If a value doesn't match the expected type, attempts conversion
357
+ - For example: string "123" -> int 123
358
+ - Conversion failures result in descriptive errors
359
+
360
+ Called by:
361
+ - __init__(): During node initialization
362
+
363
+ Modifies:
364
+ - self.config: Updates with defaults and converted values
365
+
366
+ Raises:
367
+ NodeConfigurationError: If configuration is invalid, including:
368
+ - Missing required parameters
369
+ - Type mismatches that can't be converted
370
+ - get_parameters() implementation errors
371
+ """
372
+ try:
373
+ params = self.get_parameters()
374
+ except Exception as e:
375
+ raise NodeConfigurationError(f"Failed to get node parameters: {e}") from e
376
+
377
+ for param_name, param_def in params.items():
378
+ if param_name not in self.config:
379
+ if param_def.required and param_def.default is None:
380
+ raise NodeConfigurationError(
381
+ f"Required parameter '{param_name}' not provided in configuration"
382
+ )
383
+ elif param_def.default is not None:
384
+ self.config[param_name] = param_def.default
385
+
386
+ if param_name in self.config:
387
+ value = self.config[param_name]
388
+ # Skip type checking for Any type
389
+ if param_def.type is Any:
390
+ continue
391
+ if not isinstance(value, param_def.type):
392
+ try:
393
+ self.config[param_name] = param_def.type(value)
394
+ except (ValueError, TypeError) as e:
395
+ raise NodeConfigurationError(
396
+ f"Configuration parameter '{param_name}' must be of type "
397
+ f"{param_def.type.__name__}, got {type(value).__name__}. "
398
+ f"Conversion failed: {e}"
399
+ ) from e
400
+
401
+ def validate_inputs(self, **kwargs) -> Dict[str, Any]:
402
+ """Validate runtime inputs against node requirements.
403
+
404
+ This method validates inputs provided at execution time against the
405
+ node's parameter schema. It ensures type safety and provides helpful
406
+ error messages for invalid inputs.
407
+
408
+ Validation steps:
409
+ 1. Gets parameter definitions from get_parameters()
410
+ 2. Checks each parameter for:
411
+ - Presence (if required)
412
+ - Type compatibility
413
+ - Null handling for optional parameters
414
+ 3. Attempts type conversion if needed
415
+ 4. Applies default values for missing optional parameters
416
+
417
+ Key behaviors:
418
+ - Required parameters must be provided or have defaults
419
+ - Optional parameters can be None
420
+ - Type mismatches attempt conversion before failing
421
+ - Error messages include parameter descriptions
422
+
423
+ Example flow:
424
+ # Node expects: {'count': int, 'name': str (optional)}
425
+ inputs = {'count': '42', 'name': None}
426
+ validated = validate_inputs(**inputs)
427
+ # Returns: {'count': 42} # Converted and None removed
428
+
429
+ Args:
430
+ **kwargs: Runtime inputs to validate
431
+
432
+ Returns:
433
+ Dictionary of validated inputs with:
434
+ - Type conversions applied
435
+ - Defaults for missing optional parameters
436
+ - None values removed for optional parameters
437
+
438
+ Raises:
439
+ NodeValidationError: If inputs are invalid:
440
+ - Missing required parameters
441
+ - Type conversion failures
442
+ - get_parameters() errors
443
+
444
+ Called by:
445
+ - execute(): Before passing inputs to run()
446
+ - Workflow validation: During connection checks
447
+ """
448
+ try:
449
+ params = self.get_parameters()
450
+ except Exception as e:
451
+ raise NodeValidationError(
452
+ f"Failed to get node parameters for validation: {e}"
453
+ ) from e
454
+
455
+ validated = {}
456
+
457
+ for param_name, param_def in params.items():
458
+ if param_def.required and param_name not in kwargs:
459
+ if param_def.default is not None:
460
+ validated[param_name] = param_def.default
461
+ else:
462
+ raise NodeValidationError(
463
+ f"Required input '{param_name}' not provided. "
464
+ f"Description: {param_def.description or 'No description available'}"
465
+ )
466
+
467
+ if param_name in kwargs:
468
+ value = kwargs[param_name]
469
+ if value is None and not param_def.required:
470
+ continue
471
+
472
+ # Skip type checking for Any type
473
+ if param_def.type is Any:
474
+ validated[param_name] = value
475
+ elif not isinstance(value, param_def.type):
476
+ try:
477
+ validated[param_name] = param_def.type(value)
478
+ except (ValueError, TypeError) as e:
479
+ raise NodeValidationError(
480
+ f"Input '{param_name}' must be of type {param_def.type.__name__}, "
481
+ f"got {type(value).__name__}. Conversion failed: {e}"
482
+ ) from e
483
+ else:
484
+ validated[param_name] = value
485
+
486
+ return validated
487
+
488
+ def validate_outputs(self, outputs: Dict[str, Any]) -> Dict[str, Any]:
489
+ """Validate outputs against schema and JSON-serializability.
490
+
491
+ This enhanced method validates outputs in two ways:
492
+ 1. Schema validation: If get_output_schema() is defined, validates
493
+ types and required fields
494
+ 2. JSON serialization: Ensures all outputs can be serialized
495
+
496
+ Validation process:
497
+ 1. Check outputs is a dictionary
498
+ 2. If output schema exists:
499
+ - Validate required fields are present
500
+ - Check type compatibility
501
+ - Attempt type conversion if needed
502
+ 3. Verify JSON-serializability
503
+ 4. Return validated outputs
504
+
505
+ Schema validation features:
506
+ - Required outputs must be present
507
+ - Optional outputs can be None or missing
508
+ - Type mismatches attempt conversion
509
+ - Clear error messages with field details
510
+
511
+ Args:
512
+ outputs: Outputs to validate from run() method
513
+
514
+ Returns:
515
+ The same outputs dictionary if valid
516
+
517
+ Raises:
518
+ NodeValidationError: If outputs are invalid:
519
+ - Not a dictionary
520
+ - Missing required outputs
521
+ - Type validation failures
522
+ - Non-serializable values
523
+
524
+ Called by:
525
+ - execute(): After run() completes
526
+ - Test utilities: For output validation
527
+ """
528
+ if not isinstance(outputs, dict):
529
+ raise NodeValidationError(
530
+ f"Node outputs must be a dictionary, got {type(outputs).__name__}"
531
+ )
532
+
533
+ # First, validate against output schema if defined
534
+ output_schema = self.get_output_schema()
535
+ if output_schema:
536
+ validated_outputs = {}
537
+
538
+ for param_name, param_def in output_schema.items():
539
+ if param_def.required and param_name not in outputs:
540
+ raise NodeValidationError(
541
+ f"Required output '{param_name}' not provided. "
542
+ f"Description: {param_def.description or 'No description available'}"
543
+ )
544
+
545
+ if param_name in outputs:
546
+ value = outputs[param_name]
547
+ if value is None and not param_def.required:
548
+ continue # Optional outputs can be None
549
+
550
+ if value is not None:
551
+ # Skip type checking for Any type
552
+ if param_def.type is Any:
553
+ validated_outputs[param_name] = value
554
+ elif not isinstance(value, param_def.type):
555
+ try:
556
+ # Attempt type conversion
557
+ converted_value = param_def.type(value)
558
+ validated_outputs[param_name] = converted_value
559
+ except (ValueError, TypeError) as e:
560
+ raise NodeValidationError(
561
+ f"Output '{param_name}' must be of type {param_def.type.__name__}, "
562
+ f"got {type(value).__name__}. Conversion failed: {e}"
563
+ ) from e
564
+ else:
565
+ validated_outputs[param_name] = value
566
+ else:
567
+ validated_outputs[param_name] = None
568
+
569
+ # Include any additional outputs not in schema (for flexibility)
570
+ for key, value in outputs.items():
571
+ if key not in validated_outputs:
572
+ validated_outputs[key] = value
573
+
574
+ outputs = validated_outputs
575
+
576
+ # Then validate JSON-serializability
577
+ # Skip JSON validation for state management objects
578
+ from pydantic import BaseModel
579
+
580
+ from kailash.workflow.state import WorkflowStateWrapper
581
+
582
+ non_serializable = []
583
+ for k, v in outputs.items():
584
+ # Allow WorkflowStateWrapper objects to pass through
585
+ if isinstance(v, WorkflowStateWrapper):
586
+ continue
587
+ # Allow Pydantic models (they can be serialized with .model_dump())
588
+ if isinstance(v, BaseModel):
589
+ continue
590
+ if not self._is_json_serializable(v):
591
+ non_serializable.append(k)
592
+
593
+ if non_serializable:
594
+ raise NodeValidationError(
595
+ f"Node outputs must be JSON-serializable. Failed keys: {non_serializable}"
596
+ )
597
+
598
+ return outputs
599
+
600
+ def _is_json_serializable(self, obj: Any) -> bool:
601
+ """Check if an object is JSON-serializable.
602
+
603
+ Helper method that attempts JSON serialization to determine
604
+ if an object can be serialized. Used by validate_outputs()
605
+ to identify problematic values.
606
+
607
+ Args:
608
+ obj: Any object to test for JSON serializability
609
+
610
+ Returns:
611
+ True if object can be JSON serialized, False otherwise
612
+
613
+ Used by:
614
+ - validate_outputs(): To identify non-serializable keys
615
+ """
616
+ try:
617
+ json.dumps(obj)
618
+ return True
619
+ except (TypeError, ValueError):
620
+ return False
621
+
622
+ def execute(self, **runtime_inputs) -> Dict[str, Any]:
623
+ """Execute the node with validation and error handling.
624
+
625
+ This is the main entry point for node execution that orchestrates
626
+ the complete execution lifecycle:
627
+
628
+ 1. Input validation (validate_inputs)
629
+ 2. Execution (run)
630
+ 3. Output validation (validate_outputs)
631
+ 4. Error handling and logging
632
+ 5. Performance metrics
633
+
634
+ Execution flow:
635
+ 1. Logs execution start
636
+ 2. Validates inputs against parameter schema
637
+ 3. Calls run() with validated inputs
638
+ 4. Validates outputs are JSON-serializable
639
+ 5. Logs execution time
640
+ 6. Returns validated outputs
641
+
642
+ Error handling strategy:
643
+ - NodeValidationError: Re-raised as-is (input/output issues)
644
+ - NodeExecutionError: Re-raised as-is (run() failures)
645
+ - Other exceptions: Wrapped in NodeExecutionError
646
+
647
+ Performance tracking:
648
+ - Records execution start/end times
649
+ - Logs total execution duration
650
+ - Includes timing in execution logs
651
+
652
+ Returns:
653
+ Dictionary of validated outputs from run()
654
+
655
+ Raises:
656
+ NodeExecutionError: If execution fails in run()
657
+ NodeValidationError: If input/output validation fails
658
+
659
+ Called by:
660
+ - LocalRuntime: During workflow execution
661
+ - TaskManager: With execution tracking
662
+ - Unit tests: For node testing
663
+
664
+ Downstream effects:
665
+ - Logs provide execution history
666
+ - Metrics enable performance monitoring
667
+ - Validation ensures data integrity
668
+ """
669
+ start_time = datetime.now(timezone.utc)
670
+ try:
671
+ self.logger.info(f"Executing node {self.id}")
672
+
673
+ # Merge runtime inputs with config (runtime inputs take precedence)
674
+ merged_inputs = {**self.config, **runtime_inputs}
675
+
676
+ # Handle nested config case (for nodes that store parameters in config['config'])
677
+ if "config" in merged_inputs and isinstance(merged_inputs["config"], dict):
678
+ # Extract nested config
679
+ nested_config = merged_inputs["config"]
680
+ merged_inputs.update(nested_config)
681
+ # Don't remove the config key as some nodes might need it
682
+
683
+ # Validate inputs
684
+ validated_inputs = self.validate_inputs(**merged_inputs)
685
+ self.logger.debug(f"Validated inputs for {self.id}: {validated_inputs}")
686
+
687
+ # Execute node logic
688
+ outputs = self.run(**validated_inputs)
689
+
690
+ # Validate outputs
691
+ validated_outputs = self.validate_outputs(outputs)
692
+
693
+ execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
694
+ self.logger.info(
695
+ f"Node {self.id} executed successfully in {execution_time:.3f}s"
696
+ )
697
+ return validated_outputs
698
+
699
+ except NodeValidationError:
700
+ # Re-raise validation errors as-is
701
+ raise
702
+ except NodeExecutionError:
703
+ # Re-raise execution errors as-is
704
+ raise
705
+ except Exception as e:
706
+ # Wrap any other exception in NodeExecutionError
707
+ self.logger.error(f"Node {self.id} execution failed: {e}", exc_info=True)
708
+ raise NodeExecutionError(
709
+ f"Node '{self.id}' execution failed: {type(e).__name__}: {e}"
710
+ ) from e
711
+
712
+ def to_dict(self) -> Dict[str, Any]:
713
+ """Convert node to dictionary representation.
714
+
715
+ Serializes the node instance to a dictionary format suitable for:
716
+ 1. Workflow export
717
+ 2. Node persistence
718
+ 3. API responses
719
+ 4. Configuration sharing
720
+
721
+ The serialized format includes:
722
+ - id: Unique node identifier
723
+ - type: Node class name
724
+ - metadata: Complete node metadata
725
+ - config: Current configuration
726
+ - parameters: Parameter definitions with types
727
+
728
+ Type serialization:
729
+ - Python types are converted to string names
730
+ - Complex types may require custom handling
731
+ - Parameter defaults are included
732
+
733
+ Returns:
734
+ Dictionary representation containing:
735
+ - Node identification and type
736
+ - Complete metadata
737
+ - Configuration values
738
+ - Parameter schemas
739
+
740
+ Raises:
741
+ NodeExecutionError: If serialization fails due to:
742
+ - get_parameters() errors
743
+ - Metadata serialization issues
744
+ - Type conversion problems
745
+
746
+ Used by:
747
+ - WorkflowExporter: For workflow serialization
748
+ - CLI: For node inspection
749
+ - API: For node information endpoints
750
+ - Debugging: For node state inspection
751
+ """
752
+ try:
753
+ return {
754
+ "id": self.id,
755
+ "type": self.__class__.__name__,
756
+ "metadata": self.metadata.model_dump(),
757
+ "config": self.config,
758
+ "parameters": {
759
+ name: {
760
+ "type": param.type.__name__,
761
+ "required": param.required,
762
+ "default": param.default,
763
+ "description": param.description,
764
+ }
765
+ for name, param in self.get_parameters().items()
766
+ },
767
+ }
768
+ except Exception as e:
769
+ raise NodeExecutionError(
770
+ f"Failed to serialize node '{self.id}': {e}"
771
+ ) from e
772
+
773
+
774
+ # Node Registry
775
+ class NodeRegistry:
776
+ """Registry for discovering and managing available nodes.
777
+
778
+ This singleton class provides a global registry for node types,
779
+ enabling:
780
+ 1. Dynamic node discovery
781
+ 2. Node class registration
782
+ 3. Workflow deserialization
783
+ 4. CLI/UI node palettes
784
+
785
+ Design pattern: Singleton
786
+ - Single global instance (_instance)
787
+ - Shared registry of node classes (_nodes)
788
+ - Thread-safe through class methods
789
+
790
+ Registration flow:
791
+ 1. Nodes register via @register_node decorator
792
+ 2. Registry validates node inheritance
793
+ 3. Stores class reference by name/alias
794
+ 4. Available for instantiation
795
+
796
+ Usage patterns:
797
+ - Automatic: @register_node decorator
798
+ - Manual: NodeRegistry.register(NodeClass)
799
+ - Discovery: NodeRegistry.list_nodes()
800
+ - Instantiation: NodeRegistry.get('NodeName')
801
+
802
+ Upstream components:
803
+ - Node implementations: Register themselves
804
+ - Module imports: Trigger registration
805
+ - Setup scripts: Bulk registration
806
+
807
+ Downstream consumers:
808
+ - Workflow: Creates nodes by name
809
+ - CLI: Lists available nodes
810
+ - UI: Populates node palette
811
+ - WorkflowImporter: Deserializes nodes
812
+ """
813
+
814
+ _instance = None
815
+ _nodes: Dict[str, Type[Node]] = {}
816
+
817
+ def __new__(cls):
818
+ """Ensure singleton instance.
819
+
820
+ Implements the singleton pattern to maintain a single
821
+ global registry of nodes.
822
+
823
+ Returns:
824
+ The single NodeRegistry instance
825
+ """
826
+ if cls._instance is None:
827
+ cls._instance = super().__new__(cls)
828
+ return cls._instance
829
+
830
+ @classmethod
831
+ def register(cls, node_class: Type[Node], alias: Optional[str] = None):
832
+ """Register a node class.
833
+
834
+ Adds a node class to the global registry, making it available
835
+ for discovery and instantiation.
836
+
837
+ Registration process:
838
+ 1. Validates node_class inherits from Node
839
+ 2. Determines registration name (alias or class name)
840
+ 3. Warns if overwriting existing registration
841
+ 4. Stores class reference in registry
842
+
843
+ Thread safety:
844
+ - Class method ensures single registry
845
+ - Dictionary operations are atomic
846
+ - Safe for concurrent registration
847
+
848
+ Example usage:
849
+ NodeRegistry.register(CSVReaderNode)
850
+ NodeRegistry.register(CustomNode, alias='MyNode')
851
+
852
+ Args:
853
+ node_class: Node class to register (must inherit from Node)
854
+ alias: Optional alias for the node (defaults to class name)
855
+
856
+ Raises:
857
+ NodeConfigurationError: If registration fails:
858
+ - node_class doesn't inherit from Node
859
+ - Invalid class type provided
860
+
861
+ Side effects:
862
+ - Updates cls._nodes dictionary
863
+ - Logs registration success/warnings
864
+ - Overwrites existing registrations
865
+
866
+ Used by:
867
+ - @register_node decorator
868
+ - Manual registration in setup
869
+ - Plugin loading systems
870
+ """
871
+ if not issubclass(node_class, Node):
872
+ raise NodeConfigurationError(
873
+ f"Cannot register {node_class.__name__}: must be a subclass of Node"
874
+ )
875
+
876
+ node_name = alias or node_class.__name__
877
+
878
+ if node_name in cls._nodes:
879
+ logging.warning(f"Overwriting existing node registration for '{node_name}'")
880
+
881
+ cls._nodes[node_name] = node_class
882
+ logging.info(f"Registered node '{node_name}'")
883
+
884
+ @classmethod
885
+ def get(cls, node_name: str) -> Type[Node]:
886
+ """Get a registered node class by name.
887
+
888
+ Retrieves a node class from the registry for instantiation.
889
+ Used during workflow creation and deserialization.
890
+
891
+ Lookup process:
892
+ 1. Searches registry by exact name match
893
+ 2. Returns class reference if found
894
+ 3. Provides helpful error with available nodes
895
+
896
+ Example usage:
897
+ NodeClass = NodeRegistry.get('CSVReader')
898
+ node = NodeClass(config={'file': 'data.csv'})
899
+
900
+ Args:
901
+ node_name: Name of the node (class name or alias)
902
+
903
+ Returns:
904
+ Node class ready for instantiation
905
+
906
+ Raises:
907
+ NodeConfigurationError: If node is not registered:
908
+ - Includes list of available nodes
909
+ - Suggests similar names if possible
910
+
911
+ Used by:
912
+ - Workflow.add_node(): Creates nodes by name
913
+ - WorkflowImporter: Deserializes nodes
914
+ - CLI commands: Instantiates nodes
915
+ - Factory methods: Dynamic node creation
916
+ """
917
+ if node_name not in cls._nodes:
918
+ available_nodes = ", ".join(sorted(cls._nodes.keys()))
919
+ raise NodeConfigurationError(
920
+ f"Node '{node_name}' not found in registry. "
921
+ f"Available nodes: {available_nodes}"
922
+ )
923
+ return cls._nodes[node_name]
924
+
925
+ @classmethod
926
+ def list_nodes(cls) -> Dict[str, Type[Node]]:
927
+ """List all registered nodes.
928
+
929
+ Returns a copy of the registry for discovery purposes.
930
+ Used by CLI help, UI node palettes, and documentation.
931
+
932
+ Returns:
933
+ Dictionary mapping node names to their classes:
934
+ - Keys: Node names/aliases
935
+ - Values: Node class references
936
+ - Safe copy prevents registry modification
937
+
938
+ Used by:
939
+ - CLI 'list-nodes' command
940
+ - UI node palette population
941
+ - Documentation generators
942
+ - Testing and debugging
943
+ """
944
+ return cls._nodes.copy()
945
+
946
+ @classmethod
947
+ def clear(cls):
948
+ """Clear all registered nodes.
949
+
950
+ Removes all nodes from the registry. Primarily used for:
951
+ 1. Testing - Clean state between tests
952
+ 2. Reloading - Before re-registering nodes
953
+ 3. Cleanup - Memory management
954
+
955
+ Side effects:
956
+ - Empties the _nodes dictionary
957
+ - Logs the clearing action
958
+ - Existing node instances remain valid
959
+
960
+ Warning:
961
+ - Subsequent get() calls will fail
962
+ - Workflows may not deserialize
963
+ - Should re-register needed nodes
964
+ """
965
+ cls._nodes.clear()
966
+ logging.info("Cleared all registered nodes")
967
+
968
+
969
+ def register_node(alias: Optional[str] = None):
970
+ """Decorator to register a node class.
971
+
972
+ Provides a convenient decorator pattern for automatic node
973
+ registration when the module is imported.
974
+
975
+ Usage patterns:
976
+ @register_node()
977
+ class MyNode(Node):
978
+ pass
979
+
980
+ @register_node(alias='CustomName')
981
+ class MyNode(Node):
982
+ pass
983
+
984
+ Registration timing:
985
+ - Occurs when module is imported
986
+ - Before any workflow creation
987
+ - Enables automatic discovery
988
+
989
+ Error handling:
990
+ - Wraps registration errors
991
+ - Provides clear error messages
992
+ - Preserves original class
993
+
994
+ Args:
995
+ alias: Optional alias for the node (defaults to class name)
996
+
997
+ Returns:
998
+ Decorator function that:
999
+ - Registers the node class
1000
+ - Returns the unmodified class
1001
+ - Handles registration errors
1002
+
1003
+ Example:
1004
+ @register_node(alias='CSV')
1005
+ class CSVReaderNode(Node):
1006
+ def get_parameters(self):
1007
+ return {'file': NodeParameter(...)}
1008
+ def run(self, file):
1009
+ return pd.read_csv(file)
1010
+ """
1011
+
1012
+ def decorator(node_class: Type[Node]):
1013
+ """Inner decorator that performs registration.
1014
+
1015
+ Args:
1016
+ node_class: The node class to register
1017
+
1018
+ Returns:
1019
+ The unmodified node class
1020
+
1021
+ Raises:
1022
+ NodeConfigurationError: If registration fails
1023
+ """
1024
+ try:
1025
+ NodeRegistry.register(node_class, alias)
1026
+ except Exception as e:
1027
+ raise NodeConfigurationError(
1028
+ f"Failed to register node {node_class.__name__}: {e}"
1029
+ ) from e
1030
+ return node_class
1031
+
1032
+ return decorator