kailash 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/api/custom_nodes_secure.py +2 -2
  3. kailash/api/studio_secure.py +1 -1
  4. kailash/mcp/client_new.py +1 -1
  5. kailash/nodes/ai/a2a.py +1 -1
  6. kailash/nodes/api/__init__.py +26 -0
  7. kailash/nodes/api/monitoring.py +463 -0
  8. kailash/nodes/api/security.py +822 -0
  9. kailash/nodes/base.py +3 -3
  10. kailash/nodes/code/python.py +6 -0
  11. kailash/nodes/data/__init__.py +9 -0
  12. kailash/nodes/data/directory.py +278 -0
  13. kailash/nodes/data/event_generation.py +297 -0
  14. kailash/nodes/data/file_discovery.py +601 -0
  15. kailash/nodes/data/sql.py +2 -2
  16. kailash/nodes/transform/processors.py +32 -1
  17. kailash/runtime/async_local.py +1 -1
  18. kailash/runtime/docker.py +4 -4
  19. kailash/runtime/local.py +41 -4
  20. kailash/runtime/parallel.py +2 -2
  21. kailash/runtime/parallel_cyclic.py +2 -2
  22. kailash/runtime/testing.py +2 -2
  23. kailash/utils/templates.py +6 -6
  24. kailash/visualization/performance.py +16 -3
  25. kailash/visualization/reports.py +5 -1
  26. kailash/workflow/convergence.py +1 -1
  27. kailash/workflow/cycle_analyzer.py +8 -1
  28. kailash/workflow/cyclic_runner.py +1 -1
  29. kailash/workflow/graph.py +33 -6
  30. kailash/workflow/visualization.py +10 -2
  31. kailash-0.3.0.dist-info/METADATA +428 -0
  32. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/RECORD +36 -31
  33. kailash-0.2.1.dist-info/METADATA +0 -1617
  34. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
  35. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
  36. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
  37. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/runtime/docker.py CHANGED
@@ -174,7 +174,7 @@ def main():
174
174
  logger.info(f"Loaded configuration for {node_data['class']} node")
175
175
 
176
176
  # Load runtime inputs if available
177
- input_path = Path("/examples/data/input/inputs.json")
177
+ input_path = Path("/data/inputs/json/inputs.json")
178
178
  runtime_inputs = {}
179
179
  if input_path.exists():
180
180
  logger.info(f"Loading inputs from {input_path}")
@@ -206,7 +206,7 @@ def main():
206
206
  except Exception as e:
207
207
  logger.error(f"Node execution failed: {e}")
208
208
  # Save error information
209
- with open("/examples/data/output/error.json", 'w') as f:
209
+ with open("/data/outputs/json/error.json", 'w') as f:
210
210
  json.dump({
211
211
  "error": str(e),
212
212
  "type": e.__class__.__name__
@@ -216,7 +216,7 @@ def main():
216
216
  # Save results
217
217
  logger.info("Saving execution results")
218
218
  try:
219
- result_path = Path("/examples/data/output/result.json")
219
+ result_path = Path("/data/outputs/json/result.json")
220
220
  with open(result_path, 'w') as f:
221
221
  # Handle non-serializable objects with basic conversion
222
222
  try:
@@ -590,7 +590,7 @@ class DockerRuntime:
590
590
 
591
591
  try:
592
592
  # Validate workflow
593
- workflow.validate()
593
+ workflow.validate(runtime_parameters=inputs)
594
594
 
595
595
  # Get execution order
596
596
  execution_order = workflow.get_execution_order()
kailash/runtime/local.py CHANGED
@@ -113,8 +113,8 @@ class LocalRuntime:
113
113
  run_id = None
114
114
 
115
115
  try:
116
- # Validate workflow
117
- workflow.validate()
116
+ # Validate workflow with runtime parameters (Session 061)
117
+ workflow.validate(runtime_parameters=parameters)
118
118
 
119
119
  # Initialize tracking
120
120
  if task_manager:
@@ -296,6 +296,10 @@ class LocalRuntime:
296
296
  parameters=parameters.get(node_id, {}),
297
297
  )
298
298
 
299
+ # Update node config with parameters (Session 061: direct config update)
300
+ {**node_instance.config, **parameters.get(node_id, {})}
301
+ node_instance.config.update(parameters.get(node_id, {}))
302
+
299
303
  if self.debug:
300
304
  self.logger.debug(f"Node {node_id} inputs: {inputs}")
301
305
 
@@ -391,16 +395,36 @@ class LocalRuntime:
391
395
  """
392
396
  inputs = {}
393
397
 
394
- # Start with node configuration
395
- inputs.update(node_instance.config)
398
+ # NOTE: Node configuration is handled separately in configure() call
399
+ # Only add runtime inputs and data from connected nodes here
400
+
401
+ # Add runtime parameters (those not used for node configuration)
402
+ # Map specific runtime parameters for known node types
403
+ if "consumer_timeout_ms" in parameters:
404
+ inputs["timeout_ms"] = parameters["consumer_timeout_ms"]
405
+
406
+ # Add other potential runtime parameters that are not configuration
407
+ runtime_param_names = {"max_messages", "timeout_ms", "limit", "offset"}
408
+ for param_name, param_value in parameters.items():
409
+ if param_name in runtime_param_names:
410
+ inputs[param_name] = param_value
396
411
 
397
412
  # Add connected inputs from other nodes
398
413
  for edge in workflow.graph.in_edges(node_id, data=True):
399
414
  source_node_id = edge[0]
400
415
  mapping = edge[2].get("mapping", {})
401
416
 
417
+ if self.debug:
418
+ self.logger.debug(f"Processing edge {source_node_id} -> {node_id}")
419
+ self.logger.debug(f" Edge data: {edge[2]}")
420
+ self.logger.debug(f" Mapping: {mapping}")
421
+
402
422
  if source_node_id in node_outputs:
403
423
  source_outputs = node_outputs[source_node_id]
424
+ if self.debug:
425
+ self.logger.debug(
426
+ f" Source outputs: {list(source_outputs.keys())}"
427
+ )
404
428
 
405
429
  # Check if the source node failed
406
430
  if isinstance(source_outputs, dict) and source_outputs.get("failed"):
@@ -411,11 +435,24 @@ class LocalRuntime:
411
435
  for source_key, target_key in mapping.items():
412
436
  if source_key in source_outputs:
413
437
  inputs[target_key] = source_outputs[source_key]
438
+ if self.debug:
439
+ self.logger.debug(
440
+ f" MAPPED: {source_key} -> {target_key} (type: {type(source_outputs[source_key])})"
441
+ )
414
442
  else:
443
+ if self.debug:
444
+ self.logger.debug(
445
+ f" MISSING: {source_key} not in {list(source_outputs.keys())}"
446
+ )
415
447
  self.logger.warning(
416
448
  f"Source output '{source_key}' not found in node '{source_node_id}'. "
417
449
  f"Available outputs: {list(source_outputs.keys())}"
418
450
  )
451
+ else:
452
+ if self.debug:
453
+ self.logger.debug(
454
+ f" No outputs found for source node {source_node_id}"
455
+ )
419
456
 
420
457
  # Apply parameter overrides
421
458
  inputs.update(parameters)
@@ -91,7 +91,7 @@ class ParallelRuntime:
91
91
 
92
92
  try:
93
93
  # Validate workflow
94
- workflow.validate()
94
+ workflow.validate(runtime_parameters=parameters)
95
95
 
96
96
  # Initialize semaphore for concurrent execution control
97
97
  self.semaphore = asyncio.Semaphore(self.max_workers)
@@ -398,7 +398,7 @@ class ParallelRuntime:
398
398
  async def execute_with_metrics():
399
399
  with collector.collect(node_id=node_id) as context:
400
400
  result = await loop.run_in_executor(
401
- None, lambda: node_instance.execute(**inputs)
401
+ None, lambda: node_instance.run(**inputs)
402
402
  )
403
403
  return result, context.result()
404
404
 
@@ -80,7 +80,7 @@ class ParallelCyclicRuntime:
80
80
 
81
81
  try:
82
82
  # Validate workflow
83
- workflow.validate()
83
+ workflow.validate(runtime_parameters=parameters)
84
84
 
85
85
  # Check for cycles first
86
86
  if self.enable_cycles and workflow.has_cycles():
@@ -380,7 +380,7 @@ class ParallelCyclicRuntime:
380
380
  # Execute node with metrics collection
381
381
  collector = MetricsCollector()
382
382
  with collector.collect(node_id=node_id) as metrics_context:
383
- outputs = node_instance.execute(**inputs)
383
+ outputs = node_instance.run(**inputs)
384
384
 
385
385
  # Get performance metrics
386
386
  performance_metrics = metrics_context.result()
@@ -229,12 +229,12 @@ class NodeTestHelper:
229
229
  """Test node execution with given inputs."""
230
230
  if should_fail:
231
231
  try:
232
- result = node.execute(**inputs)
232
+ result = node.run(**inputs)
233
233
  assert False, "Node execution should have failed but didn't"
234
234
  except (NodeValidationError, WorkflowExecutionError):
235
235
  return {}
236
236
  else:
237
- result = node.execute(**inputs)
237
+ result = node.run(**inputs)
238
238
 
239
239
  # Check expected output keys
240
240
  for key in expected_keys:
@@ -327,7 +327,7 @@ A Kailash workflow project.
327
327
 
328
328
  - `workflows/`: Workflow definitions
329
329
  - `nodes/`: Custom node implementations
330
- - `examples/data/`: Input data files
330
+ - `data/inputs/`: Input data files
331
331
  - `outputs/`: Output files
332
332
 
333
333
  ## Usage
@@ -363,7 +363,7 @@ workflow = Workflow(
363
363
  )
364
364
 
365
365
  # Add nodes
366
- workflow.add_node("reader", CSVReaderNode(), file_path="examples/examples/data/input.csv")
366
+ workflow.add_node("reader", CSVReaderNode(), file_path="data/inputs/csv/input.csv")
367
367
  workflow.add_node("filter", Filter(), field="value", operator=">", value=100)
368
368
  workflow.add_node("sort", Sort(), field="value", reverse=True)
369
369
  workflow.add_node("aggregate", Aggregator(), group_by="category", operation="sum")
@@ -482,8 +482,8 @@ workflow = Workflow(
482
482
  )
483
483
 
484
484
  # Data ingestion
485
- workflow.add_node("csv_reader", CSVReaderNode(), file_path="examples/examples/data/sales_data.csv")
486
- workflow.add_node("json_reader", JSONReaderNode(), file_path="examples/examples/data/product_data.json")
485
+ workflow.add_node("csv_reader", CSVReaderNode(), file_path="data/inputs/csv/sales_data.csv")
486
+ workflow.add_node("json_reader", JSONReaderNode(), file_path="data/inputs/json/product_data.json")
487
487
 
488
488
  # Transform data
489
489
  workflow.add_node("filter_sales", Filter(), field="amount", operator=">", value=1000)
@@ -553,7 +553,7 @@ workflow = Workflow(
553
553
  )
554
554
 
555
555
  # Data ingestion
556
- workflow.add_node("read_data", CSVReaderNode(), file_path="examples/examples/data/text_data.csv")
556
+ workflow.add_node("read_data", CSVReaderNode(), file_path="data/inputs/csv/text_data.csv")
557
557
 
558
558
  # Preprocessing
559
559
  workflow.add_node("extract_text", Map(), field="content")
@@ -616,7 +616,7 @@ workflow = Workflow(
616
616
  )
617
617
 
618
618
  # Read configuration
619
- workflow.add_node("read_config", JSONReaderNode(), file_path="examples/examples/data/api_config.json")
619
+ workflow.add_node("read_config", JSONReaderNode(), file_path="data/inputs/json/api_config.json")
620
620
 
621
621
  # Process with AI agent
622
622
  workflow.add_node("chat_agent", ChatAgent(),
@@ -63,8 +63,12 @@ class PerformanceVisualizer:
63
63
  Dictionary mapping chart names to file paths
64
64
  """
65
65
  if output_dir is None:
66
- # Use relative path that works from project root or create in current directory
67
- output_dir = Path.cwd() / "outputs" / "performance"
66
+ # Use centralized output directory
67
+ # Get project root and use data/outputs/visualizations/performance
68
+ project_root = Path(__file__).parent.parent.parent.parent
69
+ output_dir = (
70
+ project_root / "data" / "outputs" / "visualizations" / "performance"
71
+ )
68
72
  output_dir.mkdir(parents=True, exist_ok=True)
69
73
 
70
74
  # Get run data
@@ -719,7 +723,16 @@ class PerformanceVisualizer:
719
723
  ) -> Path:
720
724
  """Compare performance across multiple runs."""
721
725
  if output_path is None:
722
- output_path = Path.cwd() / "outputs" / "performance" / "comparison.png"
726
+ # Use centralized output directory
727
+ project_root = Path(__file__).parent.parent.parent.parent
728
+ output_path = (
729
+ project_root
730
+ / "data"
731
+ / "outputs"
732
+ / "visualizations"
733
+ / "performance"
734
+ / "comparison.png"
735
+ )
723
736
  output_path.parent.mkdir(parents=True, exist_ok=True)
724
737
 
725
738
  fig, axes = plt.subplots(2, 2, figsize=(15, 12))
@@ -168,9 +168,13 @@ class WorkflowPerformanceReporter:
168
168
  """
169
169
  if output_path is None:
170
170
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
171
+ # Use centralized output directory
172
+ project_root = Path(__file__).parent.parent.parent.parent
171
173
  output_path = (
172
- Path.cwd()
174
+ project_root
175
+ / "data"
173
176
  / "outputs"
177
+ / "reports"
174
178
  / f"workflow_report_{run_id[:8]}_{timestamp}.{format.value}"
175
179
  )
176
180
 
@@ -229,7 +229,7 @@ class AdaptiveCondition(ConvergenceCondition):
229
229
 
230
230
 
231
231
  def create_convergence_condition(
232
- spec: Union[str, int, Callable, Dict]
232
+ spec: Union[str, int, Callable, Dict],
233
233
  ) -> ConvergenceCondition:
234
234
  """Factory function to create convergence conditions from various specs.
235
235
 
@@ -84,7 +84,14 @@ class CycleAnalyzer:
84
84
  self.analysis_level = analysis_level
85
85
  self.enable_profiling = enable_profiling
86
86
  self.enable_debugging = enable_debugging
87
- self.output_directory = Path(output_directory) if output_directory else None
87
+
88
+ # Set output directory - use centralized location if not specified
89
+ if output_directory:
90
+ self.output_directory = Path(output_directory)
91
+ else:
92
+ # Use centralized output directory by default
93
+ project_root = Path(__file__).parent.parent.parent.parent
94
+ self.output_directory = project_root / "data" / "outputs" / "cycle_analysis"
88
95
 
89
96
  # Initialize components based on configuration
90
97
  debug_level = {
@@ -168,7 +168,7 @@ class CyclicWorkflowExecutor:
168
168
  WorkflowValidationError: If workflow is invalid
169
169
  """
170
170
  # Validate workflow (including cycles)
171
- workflow.validate()
171
+ workflow.validate(runtime_parameters=parameters)
172
172
 
173
173
  # Generate run ID if not provided
174
174
  if not run_id:
kailash/workflow/graph.py CHANGED
@@ -191,10 +191,12 @@ class Workflow:
191
191
 
192
192
  # Store node instance and metadata
193
193
  try:
194
+ # Use the node instance's actual config, which includes both original config and any updates
195
+ actual_config = node_instance.config.copy()
194
196
  node_instance_data = NodeInstance(
195
197
  node_id=node_id,
196
198
  node_type=node_type,
197
- config=config,
199
+ config=actual_config,
198
200
  position=(len(self.nodes) * 150, 100),
199
201
  )
200
202
  self.nodes[node_id] = node_instance_data
@@ -203,8 +205,10 @@ class Workflow:
203
205
 
204
206
  self._node_instances[node_id] = node_instance
205
207
 
206
- # Add to graph
207
- self.graph.add_node(node_id, node=node_instance, type=node_type, config=config)
208
+ # Add to graph with actual config
209
+ self.graph.add_node(
210
+ node_id, node=node_instance, type=node_type, config=actual_config
211
+ )
208
212
  logger.info(f"Added node '{node_id}' of type '{node_type}'")
209
213
 
210
214
  def _add_node_internal(
@@ -707,9 +711,12 @@ class Workflow:
707
711
  # This shouldn't happen, but handle gracefully
708
712
  raise WorkflowValidationError("Unable to determine execution order")
709
713
 
710
- def validate(self) -> None:
714
+ def validate(self, runtime_parameters: Optional[Dict[str, Any]] = None) -> None:
711
715
  """Validate the workflow structure.
712
716
 
717
+ Args:
718
+ runtime_parameters: Parameters that will be provided at runtime (Session 061)
719
+
713
720
  Raises:
714
721
  WorkflowValidationError: If workflow is invalid
715
722
  """
@@ -758,14 +765,19 @@ class Workflow:
758
765
  # Check nested config
759
766
  found_in_config = param_name in node_instance.config["config"]
760
767
 
761
- if not found_in_config:
768
+ # Session 061: Check if parameter will be provided at runtime
769
+ found_in_runtime = False
770
+ if runtime_parameters and node_id in runtime_parameters:
771
+ found_in_runtime = param_name in runtime_parameters[node_id]
772
+
773
+ if not found_in_config and not found_in_runtime:
762
774
  if param_def.default is None:
763
775
  missing_inputs.append(param_name)
764
776
 
765
777
  if missing_inputs:
766
778
  raise WorkflowValidationError(
767
779
  f"Node '{node_id}' missing required inputs: {missing_inputs}. "
768
- f"Provide these inputs via connections or node configuration"
780
+ f"Provide these inputs via connections, node configuration, or runtime parameters"
769
781
  )
770
782
 
771
783
  logger.info(f"Workflow '{self.name}' validated successfully")
@@ -928,6 +940,14 @@ class Workflow:
928
940
  to_input = edge_data.get("to_input")
929
941
  mapping = edge_data.get("mapping", {})
930
942
 
943
+ print(f"CONNECTION DEBUG: {source_node_id} -> {node_id}")
944
+ print(f" Edge data: {edge_data}")
945
+ print(f" from_output: {from_output}, to_input: {to_input}")
946
+ print(f" mapping: {mapping}")
947
+ print(
948
+ f" source_results keys: {list(results.get(source_node_id, {}).keys())}"
949
+ )
950
+
931
951
  source_results = results.get(source_node_id, {})
932
952
 
933
953
  # Handle backward compatibility - from_output/to_input can be string or list
@@ -951,6 +971,13 @@ class Workflow:
951
971
  for source_key, target_key in mapping.items():
952
972
  if source_key in source_results:
953
973
  node_inputs[target_key] = source_results[source_key]
974
+ print(
975
+ f"MAPPING DEBUG: {source_key} -> {target_key}, value type: {type(source_results[source_key])}"
976
+ )
977
+ else:
978
+ print(
979
+ f"MAPPING DEBUG: Source key '{source_key}' not found in source results: {list(source_results.keys())}"
980
+ )
954
981
 
955
982
  # Apply overrides
956
983
  node_overrides = inputs.get(node_id, {})
@@ -399,8 +399,16 @@ class WorkflowVisualizer:
399
399
 
400
400
  # Determine output path
401
401
  if output_path is None:
402
- # Create default directory if it doesn't exist
403
- output_dir = Path.cwd() / "outputs" / "workflow_executions"
402
+ # Use centralized output directory
403
+ # Get project root and use data/outputs/visualizations
404
+ project_root = Path(__file__).parent.parent.parent.parent
405
+ output_dir = (
406
+ project_root
407
+ / "data"
408
+ / "outputs"
409
+ / "visualizations"
410
+ / "workflow_executions"
411
+ )
404
412
  output_dir.mkdir(parents=True, exist_ok=True)
405
413
  output_path = output_dir / f"execution_{run_id}.md"
406
414
  else: