kailash 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. kailash/api/__init__.py +7 -0
  2. kailash/api/workflow_api.py +383 -0
  3. kailash/nodes/__init__.py +2 -1
  4. kailash/nodes/ai/__init__.py +26 -0
  5. kailash/nodes/ai/ai_providers.py +1272 -0
  6. kailash/nodes/ai/embedding_generator.py +853 -0
  7. kailash/nodes/ai/llm_agent.py +1166 -0
  8. kailash/nodes/api/auth.py +3 -3
  9. kailash/nodes/api/graphql.py +2 -2
  10. kailash/nodes/api/http.py +391 -48
  11. kailash/nodes/api/rate_limiting.py +2 -2
  12. kailash/nodes/api/rest.py +465 -57
  13. kailash/nodes/base.py +71 -12
  14. kailash/nodes/code/python.py +2 -1
  15. kailash/nodes/data/__init__.py +7 -0
  16. kailash/nodes/data/readers.py +28 -26
  17. kailash/nodes/data/retrieval.py +178 -0
  18. kailash/nodes/data/sharepoint_graph.py +7 -7
  19. kailash/nodes/data/sources.py +65 -0
  20. kailash/nodes/data/sql.py +7 -5
  21. kailash/nodes/data/vector_db.py +2 -2
  22. kailash/nodes/data/writers.py +6 -3
  23. kailash/nodes/logic/__init__.py +2 -1
  24. kailash/nodes/logic/operations.py +2 -1
  25. kailash/nodes/logic/workflow.py +439 -0
  26. kailash/nodes/mcp/__init__.py +11 -0
  27. kailash/nodes/mcp/client.py +558 -0
  28. kailash/nodes/mcp/resource.py +682 -0
  29. kailash/nodes/mcp/server.py +577 -0
  30. kailash/nodes/transform/__init__.py +16 -1
  31. kailash/nodes/transform/chunkers.py +78 -0
  32. kailash/nodes/transform/formatters.py +96 -0
  33. kailash/nodes/transform/processors.py +5 -3
  34. kailash/runtime/docker.py +8 -6
  35. kailash/sdk_exceptions.py +24 -10
  36. kailash/tracking/metrics_collector.py +2 -1
  37. kailash/tracking/models.py +0 -20
  38. kailash/tracking/storage/database.py +4 -4
  39. kailash/tracking/storage/filesystem.py +0 -1
  40. kailash/utils/templates.py +6 -6
  41. kailash/visualization/performance.py +7 -7
  42. kailash/visualization/reports.py +1 -1
  43. kailash/workflow/graph.py +4 -4
  44. kailash/workflow/mock_registry.py +1 -1
  45. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/METADATA +441 -47
  46. kailash-0.1.3.dist-info/RECORD +83 -0
  47. kailash-0.1.1.dist-info/RECORD +0 -69
  48. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/WHEEL +0 -0
  49. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/entry_points.txt +0 -0
  50. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/licenses/LICENSE +0 -0
  51. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/top_level.txt +0 -0
kailash/nodes/base.py CHANGED
@@ -37,6 +37,7 @@ class NodeMetadata(BaseModel):
37
37
  """Metadata for a node.
38
38
 
39
39
  This class stores descriptive information about a node that is used for:
40
+
40
41
  1. Discovery in the UI/CLI (name, description, tags)
41
42
  2. Version tracking and compatibility checks
42
43
  3. Documentation and tooltips
@@ -68,6 +69,7 @@ class NodeParameter(BaseModel):
68
69
  """Definition of a node parameter.
69
70
 
70
71
  This class defines the schema for node inputs and outputs, providing:
72
+
71
73
  1. Type information for validation
72
74
  2. Default values for optional parameters
73
75
  3. Documentation for users
@@ -102,6 +104,7 @@ class Node(ABC):
102
104
 
103
105
  This abstract class defines the contract that all nodes must implement.
104
106
  It provides the foundation for:
107
+
105
108
  1. Parameter validation and type checking
106
109
  2. Execution lifecycle management
107
110
  3. Error handling and reporting
@@ -117,6 +120,7 @@ class Node(ABC):
117
120
 
118
121
  Inheritance Pattern:
119
122
  All concrete nodes must:
123
+
120
124
  1. Implement get_parameters() to define inputs
121
125
  2. Implement run() to process data
122
126
  3. Call super().__init__() with configuration
@@ -137,6 +141,7 @@ class Node(ABC):
137
141
  """Initialize the node with configuration parameters.
138
142
 
139
143
  This method performs the following initialization steps:
144
+
140
145
  1. Sets the node ID (defaults to class name)
141
146
  2. Creates metadata from provided arguments
142
147
  3. Sets up logging for the node
@@ -195,6 +200,7 @@ class Node(ABC):
195
200
 
196
201
  This abstract method must be implemented by all concrete nodes to
197
202
  specify their input schema. The parameters define:
203
+
198
204
  1. What inputs the node expects
199
205
  2. Type requirements for each input
200
206
  3. Whether inputs are required or optional
@@ -202,12 +208,14 @@ class Node(ABC):
202
208
  5. Documentation for each parameter
203
209
 
204
210
  The returned dictionary is used throughout the node lifecycle:
211
+
205
212
  - During initialization: _validate_config() checks configuration
206
213
  - During execution: validate_inputs() validates runtime data
207
214
  - During workflow creation: Used for connection validation
208
215
  - During export: Included in workflow manifests
209
216
 
210
- Example implementation:
217
+ Example::
218
+
211
219
  def get_parameters(self):
212
220
  return {
213
221
  'input_file': NodeParameter(
@@ -239,8 +247,10 @@ class Node(ABC):
239
247
  def get_output_schema(self) -> Dict[str, NodeParameter]:
240
248
  """Define output parameters for this node.
241
249
 
242
- This optional method allows nodes to specify their output schema for validation.
243
- If not overridden, outputs will only be validated for JSON-serializability.
250
+ This optional method allows nodes to specify their output schema for
251
+ validation.
252
+ If not overridden, outputs will only be validated for
253
+ JSON-serializability.
244
254
 
245
255
  Design purpose:
246
256
  - Enables static analysis of node outputs
@@ -249,12 +259,14 @@ class Node(ABC):
249
259
  - Facilitates workflow validation and type checking
250
260
 
251
261
  The output schema serves similar purposes as input parameters:
262
+
252
263
  1. Type validation during execution
253
264
  2. Documentation for downstream consumers
254
265
  3. Workflow connection validation
255
266
  4. Export manifest generation
256
267
 
257
- Example implementation:
268
+ Example::
269
+
258
270
  def get_output_schema(self):
259
271
  return {
260
272
  'dataframe': NodeParameter(
@@ -293,10 +305,12 @@ class Node(ABC):
293
305
  def run(self, **kwargs) -> Dict[str, Any]:
294
306
  """Execute the node's logic.
295
307
 
296
- This is the core method that implements the node's data processing logic.
308
+ This is the core method that implements the node's data processing
309
+ logic.
297
310
  It receives validated inputs and must return a dictionary of outputs.
298
311
 
299
312
  Design requirements:
313
+
300
314
  1. Must be stateless - no side effects between runs
301
315
  2. All inputs are provided as keyword arguments
302
316
  3. Must return a dictionary (JSON-serializable)
@@ -305,12 +319,14 @@ class Node(ABC):
305
319
  6. Should use self.logger for status reporting
306
320
 
307
321
  The method is called by execute() which handles:
322
+
308
323
  - Input validation before calling run()
309
324
  - Output validation after run() completes
310
325
  - Error wrapping and logging
311
326
  - Execution timing and metrics
312
327
 
313
- Example implementation:
328
+ Example::
329
+
314
330
  def run(self, input_file, delimiter=','):
315
331
  df = pd.read_csv(input_file, delimiter=delimiter)
316
332
  return {
@@ -344,15 +360,19 @@ class Node(ABC):
344
360
  provided configuration matches the node's parameter requirements.
345
361
 
346
362
  Validation process:
363
+
347
364
  1. Calls get_parameters() to get schema
348
365
  2. For each parameter, checks if:
366
+
349
367
  - Required parameters are present
350
368
  - Values match expected types
351
369
  - Type conversion is possible if needed
370
+
352
371
  3. Sets default values for missing optional parameters
353
372
  4. Updates self.config with validated values
354
373
 
355
374
  Type conversion:
375
+
356
376
  - If a value doesn't match the expected type, attempts conversion
357
377
  - For example: string "123" -> int 123
358
378
  - Conversion failures result in descriptive errors
@@ -406,15 +426,19 @@ class Node(ABC):
406
426
  error messages for invalid inputs.
407
427
 
408
428
  Validation steps:
429
+
409
430
  1. Gets parameter definitions from get_parameters()
410
431
  2. Checks each parameter for:
432
+
411
433
  - Presence (if required)
412
434
  - Type compatibility
413
435
  - Null handling for optional parameters
436
+
414
437
  3. Attempts type conversion if needed
415
438
  4. Applies default values for missing optional parameters
416
439
 
417
440
  Key behaviors:
441
+
418
442
  - Required parameters must be provided or have defaults
419
443
  - Optional parameters can be None
420
444
  - Type mismatches attempt conversion before failing
@@ -423,7 +447,7 @@ class Node(ABC):
423
447
  Example flow:
424
448
  # Node expects: {'count': int, 'name': str (optional)}
425
449
  inputs = {'count': '42', 'name': None}
426
- validated = validate_inputs(**inputs)
450
+ validated = validate_inputs(\**inputs)
427
451
  # Returns: {'count': 42} # Converted and None removed
428
452
 
429
453
  Args:
@@ -431,12 +455,14 @@ class Node(ABC):
431
455
 
432
456
  Returns:
433
457
  Dictionary of validated inputs with:
458
+
434
459
  - Type conversions applied
435
460
  - Defaults for missing optional parameters
436
461
  - None values removed for optional parameters
437
462
 
438
463
  Raises:
439
464
  NodeValidationError: If inputs are invalid:
465
+
440
466
  - Missing required parameters
441
467
  - Type conversion failures
442
468
  - get_parameters() errors
@@ -489,20 +515,25 @@ class Node(ABC):
489
515
  """Validate outputs against schema and JSON-serializability.
490
516
 
491
517
  This enhanced method validates outputs in two ways:
518
+
492
519
  1. Schema validation: If get_output_schema() is defined, validates
493
520
  types and required fields
494
521
  2. JSON serialization: Ensures all outputs can be serialized
495
522
 
496
523
  Validation process:
524
+
497
525
  1. Check outputs is a dictionary
498
526
  2. If output schema exists:
527
+
499
528
  - Validate required fields are present
500
529
  - Check type compatibility
501
530
  - Attempt type conversion if needed
531
+
502
532
  3. Verify JSON-serializability
503
533
  4. Return validated outputs
504
534
 
505
535
  Schema validation features:
536
+
506
537
  - Required outputs must be present
507
538
  - Optional outputs can be None or missing
508
539
  - Type mismatches attempt conversion
@@ -516,6 +547,7 @@ class Node(ABC):
516
547
 
517
548
  Raises:
518
549
  NodeValidationError: If outputs are invalid:
550
+
519
551
  - Not a dictionary
520
552
  - Missing required outputs
521
553
  - Type validation failures
@@ -632,6 +664,7 @@ class Node(ABC):
632
664
  5. Performance metrics
633
665
 
634
666
  Execution flow:
667
+
635
668
  1. Logs execution start
636
669
  2. Validates inputs against parameter schema
637
670
  3. Calls run() with validated inputs
@@ -640,11 +673,13 @@ class Node(ABC):
640
673
  6. Returns validated outputs
641
674
 
642
675
  Error handling strategy:
676
+
643
677
  - NodeValidationError: Re-raised as-is (input/output issues)
644
678
  - NodeExecutionError: Re-raised as-is (run() failures)
645
679
  - Other exceptions: Wrapped in NodeExecutionError
646
680
 
647
681
  Performance tracking:
682
+
648
683
  - Records execution start/end times
649
684
  - Logs total execution duration
650
685
  - Includes timing in execution logs
@@ -713,12 +748,14 @@ class Node(ABC):
713
748
  """Convert node to dictionary representation.
714
749
 
715
750
  Serializes the node instance to a dictionary format suitable for:
751
+
716
752
  1. Workflow export
717
753
  2. Node persistence
718
754
  3. API responses
719
755
  4. Configuration sharing
720
756
 
721
757
  The serialized format includes:
758
+
722
759
  - id: Unique node identifier
723
760
  - type: Node class name
724
761
  - metadata: Complete node metadata
@@ -726,12 +763,14 @@ class Node(ABC):
726
763
  - parameters: Parameter definitions with types
727
764
 
728
765
  Type serialization:
766
+
729
767
  - Python types are converted to string names
730
768
  - Complex types may require custom handling
731
769
  - Parameter defaults are included
732
770
 
733
771
  Returns:
734
772
  Dictionary representation containing:
773
+
735
774
  - Node identification and type
736
775
  - Complete metadata
737
776
  - Configuration values
@@ -739,6 +778,7 @@ class Node(ABC):
739
778
 
740
779
  Raises:
741
780
  NodeExecutionError: If serialization fails due to:
781
+
742
782
  - get_parameters() errors
743
783
  - Metadata serialization issues
744
784
  - Type conversion problems
@@ -777,23 +817,27 @@ class NodeRegistry:
777
817
 
778
818
  This singleton class provides a global registry for node types,
779
819
  enabling:
820
+
780
821
  1. Dynamic node discovery
781
822
  2. Node class registration
782
823
  3. Workflow deserialization
783
824
  4. CLI/UI node palettes
784
825
 
785
826
  Design pattern: Singleton
827
+
786
828
  - Single global instance (_instance)
787
829
  - Shared registry of node classes (_nodes)
788
830
  - Thread-safe through class methods
789
831
 
790
832
  Registration flow:
833
+
791
834
  1. Nodes register via @register_node decorator
792
835
  2. Registry validates node inheritance
793
836
  3. Stores class reference by name/alias
794
837
  4. Available for instantiation
795
838
 
796
839
  Usage patterns:
840
+
797
841
  - Automatic: @register_node decorator
798
842
  - Manual: NodeRegistry.register(NodeClass)
799
843
  - Discovery: NodeRegistry.list_nodes()
@@ -835,12 +879,14 @@ class NodeRegistry:
835
879
  for discovery and instantiation.
836
880
 
837
881
  Registration process:
882
+
838
883
  1. Validates node_class inherits from Node
839
884
  2. Determines registration name (alias or class name)
840
885
  3. Warns if overwriting existing registration
841
886
  4. Stores class reference in registry
842
887
 
843
888
  Thread safety:
889
+
844
890
  - Class method ensures single registry
845
891
  - Dictionary operations are atomic
846
892
  - Safe for concurrent registration
@@ -855,10 +901,12 @@ class NodeRegistry:
855
901
 
856
902
  Raises:
857
903
  NodeConfigurationError: If registration fails:
904
+
858
905
  - node_class doesn't inherit from Node
859
906
  - Invalid class type provided
860
907
 
861
908
  Side effects:
909
+
862
910
  - Updates cls._nodes dictionary
863
911
  - Logs registration success/warnings
864
912
  - Overwrites existing registrations
@@ -889,6 +937,7 @@ class NodeRegistry:
889
937
  Used during workflow creation and deserialization.
890
938
 
891
939
  Lookup process:
940
+
892
941
  1. Searches registry by exact name match
893
942
  2. Returns class reference if found
894
943
  3. Provides helpful error with available nodes
@@ -905,6 +954,7 @@ class NodeRegistry:
905
954
 
906
955
  Raises:
907
956
  NodeConfigurationError: If node is not registered:
957
+
908
958
  - Includes list of available nodes
909
959
  - Suggests similar names if possible
910
960
 
@@ -931,6 +981,7 @@ class NodeRegistry:
931
981
 
932
982
  Returns:
933
983
  Dictionary mapping node names to their classes:
984
+
934
985
  - Keys: Node names/aliases
935
986
  - Values: Node class references
936
987
  - Safe copy prevents registry modification
@@ -948,19 +999,22 @@ class NodeRegistry:
948
999
  """Clear all registered nodes.
949
1000
 
950
1001
  Removes all nodes from the registry. Primarily used for:
1002
+
951
1003
  1. Testing - Clean state between tests
952
1004
  2. Reloading - Before re-registering nodes
953
1005
  3. Cleanup - Memory management
954
1006
 
955
1007
  Side effects:
1008
+
956
1009
  - Empties the _nodes dictionary
957
1010
  - Logs the clearing action
958
1011
  - Existing node instances remain valid
959
1012
 
960
- Warning:
961
- - Subsequent get() calls will fail
962
- - Workflows may not deserialize
963
- - Should re-register needed nodes
1013
+ Warning::
1014
+
1015
+ - Subsequent get() calls will fail
1016
+ - Workflows may not deserialize
1017
+ - Should re-register needed nodes
964
1018
  """
965
1019
  cls._nodes.clear()
966
1020
  logging.info("Cleared all registered nodes")
@@ -982,11 +1036,13 @@ def register_node(alias: Optional[str] = None):
982
1036
  pass
983
1037
 
984
1038
  Registration timing:
1039
+
985
1040
  - Occurs when module is imported
986
1041
  - Before any workflow creation
987
1042
  - Enables automatic discovery
988
1043
 
989
1044
  Error handling:
1045
+
990
1046
  - Wraps registration errors
991
1047
  - Provides clear error messages
992
1048
  - Preserves original class
@@ -996,15 +1052,18 @@ def register_node(alias: Optional[str] = None):
996
1052
 
997
1053
  Returns:
998
1054
  Decorator function that:
1055
+
999
1056
  - Registers the node class
1000
1057
  - Returns the unmodified class
1001
1058
  - Handles registration errors
1002
1059
 
1003
- Example:
1060
+ Example::
1061
+
1004
1062
  @register_node(alias='CSV')
1005
1063
  class CSVReaderNode(Node):
1006
1064
  def get_parameters(self):
1007
1065
  return {'file': NodeParameter(...)}
1066
+
1008
1067
  def run(self, file):
1009
1068
  return pd.read_csv(file)
1010
1069
  """
@@ -546,7 +546,8 @@ class PythonCodeNode(Node):
546
546
  - State management for class-based nodes
547
547
  - AST-based security validation
548
548
 
549
- Example:
549
+ Example::
550
+
550
551
  # Function-based node
551
552
  def custom_filter(data: pd.DataFrame, threshold: float) -> pd.DataFrame:
552
553
  return data[data['value'] > threshold]
@@ -81,10 +81,12 @@ Example Workflows:
81
81
  """
82
82
 
83
83
  from kailash.nodes.data.readers import CSVReader, JSONReader, TextReader
84
+ from kailash.nodes.data.retrieval import RelevanceScorerNode
84
85
  from kailash.nodes.data.sharepoint_graph import (
85
86
  SharePointGraphReader,
86
87
  SharePointGraphWriter,
87
88
  )
89
+ from kailash.nodes.data.sources import DocumentSourceNode, QuerySourceNode
88
90
  from kailash.nodes.data.sql import SQLDatabaseNode, SQLQueryBuilderNode
89
91
  from kailash.nodes.data.streaming import (
90
92
  EventStreamNode,
@@ -110,6 +112,11 @@ __all__ = [
110
112
  "JSONWriter",
111
113
  "TextWriter",
112
114
  "SharePointGraphWriter",
115
+ # Sources
116
+ "DocumentSourceNode",
117
+ "QuerySourceNode",
118
+ # Retrieval
119
+ "RelevanceScorerNode",
113
120
  # SQL
114
121
  "SQLDatabaseNode",
115
122
  "SQLQueryBuilderNode",
@@ -78,7 +78,8 @@ class CSVReader(Node):
78
78
  - UnicodeDecodeError: Encoding mismatch
79
79
  - csv.Error: Malformed CSV data
80
80
 
81
- Example:
81
+ Example::
82
+
82
83
  # Read customer data with headers
83
84
  reader = CSVReader(
84
85
  file_path='customers.csv',
@@ -242,40 +243,40 @@ class JSONReader(Node):
242
243
  structure while ensuring compatibility with downstream nodes.
243
244
 
244
245
  Design Features:
245
- 1. Preserves JSON structure integrity
246
- 2. Handles nested objects and arrays
247
- 3. Unicode-safe reading
248
- 4. Automatic type preservation
249
- 5. Memory-efficient for reasonable file sizes
246
+ 1. Preserves JSON structure integrity
247
+ 2. Handles nested objects and arrays
248
+ 3. Unicode-safe reading
249
+ 4. Automatic type preservation
250
+ 5. Memory-efficient for reasonable file sizes
250
251
 
251
252
  Data Flow:
252
- - Input: JSON file path
253
- - Processing: Parse JSON maintaining structure
254
- - Output: Python objects matching JSON structure
253
+ - Input: JSON file path
254
+ - Processing: Parse JSON maintaining structure
255
+ - Output: Python objects matching JSON structure
255
256
 
256
257
  Common Usage Patterns:
257
- 1. Loading configuration files
258
- 2. Reading API response caches
259
- 3. Processing structured data exports
260
- 4. Loading machine learning datasets
258
+ 1. Loading configuration files
259
+ 2. Reading API response caches
260
+ 3. Processing structured data exports
261
+ 4. Loading machine learning datasets
261
262
 
262
263
  Upstream Sources:
263
- - API response saves
264
- - Configuration management
265
- - Data export systems
266
- - Previous JSONWriter outputs
264
+ - API response saves
265
+ - Configuration management
266
+ - Data export systems
267
+ - Previous JSONWriter outputs
267
268
 
268
269
  Downstream Consumers:
269
- - Transform nodes: Process structured data
270
- - Logic nodes: Navigate JSON structure
271
- - JSONWriter: Re-export with modifications
272
- - AI nodes: Use as structured input
270
+ - Transform nodes: Process structured data
271
+ - Logic nodes: Navigate JSON structure
272
+ - JSONWriter: Re-export with modifications
273
+ - AI nodes: Use as structured input
273
274
 
274
275
  Error Handling:
275
- - FileNotFoundError: Missing file
276
- - json.JSONDecodeError: Invalid JSON syntax
277
- - PermissionError: Access denied
278
- - MemoryError: File too large
276
+ - FileNotFoundError: Missing file
277
+ - json.JSONDecodeError: Invalid JSON syntax
278
+ - PermissionError: Access denied
279
+ - MemoryError: File too large
279
280
 
280
281
  Example:
281
282
  # Read API response data
@@ -402,7 +403,8 @@ class TextReader(Node):
402
403
  - UnicodeDecodeError: Wrong encoding
403
404
  - MemoryError: File too large
404
405
 
405
- Example:
406
+ Example::
407
+
406
408
  # Read a log file
407
409
  reader = TextReader(
408
410
  file_path='application.log',