kailash 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/api/custom_nodes_secure.py +2 -2
- kailash/api/studio_secure.py +1 -1
- kailash/mcp/client_new.py +1 -1
- kailash/nodes/ai/a2a.py +1 -1
- kailash/nodes/api/__init__.py +26 -0
- kailash/nodes/api/monitoring.py +463 -0
- kailash/nodes/api/security.py +822 -0
- kailash/nodes/base.py +3 -3
- kailash/nodes/code/python.py +6 -0
- kailash/nodes/data/__init__.py +9 -0
- kailash/nodes/data/directory.py +278 -0
- kailash/nodes/data/event_generation.py +297 -0
- kailash/nodes/data/file_discovery.py +601 -0
- kailash/nodes/data/sql.py +2 -2
- kailash/nodes/transform/processors.py +32 -1
- kailash/runtime/async_local.py +1 -1
- kailash/runtime/docker.py +4 -4
- kailash/runtime/local.py +41 -4
- kailash/runtime/parallel.py +2 -2
- kailash/runtime/parallel_cyclic.py +2 -2
- kailash/runtime/testing.py +2 -2
- kailash/utils/templates.py +6 -6
- kailash/visualization/performance.py +16 -3
- kailash/visualization/reports.py +5 -1
- kailash/workflow/convergence.py +1 -1
- kailash/workflow/cycle_analyzer.py +8 -1
- kailash/workflow/cyclic_runner.py +1 -1
- kailash/workflow/graph.py +33 -6
- kailash/workflow/visualization.py +10 -2
- kailash-0.3.0.dist-info/METADATA +428 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/RECORD +36 -31
- kailash-0.2.1.dist-info/METADATA +0 -1617
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/runtime/docker.py
CHANGED
@@ -174,7 +174,7 @@ def main():
|
|
174
174
|
logger.info(f"Loaded configuration for {node_data['class']} node")
|
175
175
|
|
176
176
|
# Load runtime inputs if available
|
177
|
-
input_path = Path("/
|
177
|
+
input_path = Path("/data/inputs/json/inputs.json")
|
178
178
|
runtime_inputs = {}
|
179
179
|
if input_path.exists():
|
180
180
|
logger.info(f"Loading inputs from {input_path}")
|
@@ -206,7 +206,7 @@ def main():
|
|
206
206
|
except Exception as e:
|
207
207
|
logger.error(f"Node execution failed: {e}")
|
208
208
|
# Save error information
|
209
|
-
with open("/
|
209
|
+
with open("/data/outputs/json/error.json", 'w') as f:
|
210
210
|
json.dump({
|
211
211
|
"error": str(e),
|
212
212
|
"type": e.__class__.__name__
|
@@ -216,7 +216,7 @@ def main():
|
|
216
216
|
# Save results
|
217
217
|
logger.info("Saving execution results")
|
218
218
|
try:
|
219
|
-
result_path = Path("/
|
219
|
+
result_path = Path("/data/outputs/json/result.json")
|
220
220
|
with open(result_path, 'w') as f:
|
221
221
|
# Handle non-serializable objects with basic conversion
|
222
222
|
try:
|
@@ -590,7 +590,7 @@ class DockerRuntime:
|
|
590
590
|
|
591
591
|
try:
|
592
592
|
# Validate workflow
|
593
|
-
workflow.validate()
|
593
|
+
workflow.validate(runtime_parameters=inputs)
|
594
594
|
|
595
595
|
# Get execution order
|
596
596
|
execution_order = workflow.get_execution_order()
|
kailash/runtime/local.py
CHANGED
@@ -113,8 +113,8 @@ class LocalRuntime:
|
|
113
113
|
run_id = None
|
114
114
|
|
115
115
|
try:
|
116
|
-
# Validate workflow
|
117
|
-
workflow.validate()
|
116
|
+
# Validate workflow with runtime parameters (Session 061)
|
117
|
+
workflow.validate(runtime_parameters=parameters)
|
118
118
|
|
119
119
|
# Initialize tracking
|
120
120
|
if task_manager:
|
@@ -296,6 +296,10 @@ class LocalRuntime:
|
|
296
296
|
parameters=parameters.get(node_id, {}),
|
297
297
|
)
|
298
298
|
|
299
|
+
# Update node config with parameters (Session 061: direct config update)
|
300
|
+
{**node_instance.config, **parameters.get(node_id, {})}
|
301
|
+
node_instance.config.update(parameters.get(node_id, {}))
|
302
|
+
|
299
303
|
if self.debug:
|
300
304
|
self.logger.debug(f"Node {node_id} inputs: {inputs}")
|
301
305
|
|
@@ -391,16 +395,36 @@ class LocalRuntime:
|
|
391
395
|
"""
|
392
396
|
inputs = {}
|
393
397
|
|
394
|
-
#
|
395
|
-
inputs
|
398
|
+
# NOTE: Node configuration is handled separately in configure() call
|
399
|
+
# Only add runtime inputs and data from connected nodes here
|
400
|
+
|
401
|
+
# Add runtime parameters (those not used for node configuration)
|
402
|
+
# Map specific runtime parameters for known node types
|
403
|
+
if "consumer_timeout_ms" in parameters:
|
404
|
+
inputs["timeout_ms"] = parameters["consumer_timeout_ms"]
|
405
|
+
|
406
|
+
# Add other potential runtime parameters that are not configuration
|
407
|
+
runtime_param_names = {"max_messages", "timeout_ms", "limit", "offset"}
|
408
|
+
for param_name, param_value in parameters.items():
|
409
|
+
if param_name in runtime_param_names:
|
410
|
+
inputs[param_name] = param_value
|
396
411
|
|
397
412
|
# Add connected inputs from other nodes
|
398
413
|
for edge in workflow.graph.in_edges(node_id, data=True):
|
399
414
|
source_node_id = edge[0]
|
400
415
|
mapping = edge[2].get("mapping", {})
|
401
416
|
|
417
|
+
if self.debug:
|
418
|
+
self.logger.debug(f"Processing edge {source_node_id} -> {node_id}")
|
419
|
+
self.logger.debug(f" Edge data: {edge[2]}")
|
420
|
+
self.logger.debug(f" Mapping: {mapping}")
|
421
|
+
|
402
422
|
if source_node_id in node_outputs:
|
403
423
|
source_outputs = node_outputs[source_node_id]
|
424
|
+
if self.debug:
|
425
|
+
self.logger.debug(
|
426
|
+
f" Source outputs: {list(source_outputs.keys())}"
|
427
|
+
)
|
404
428
|
|
405
429
|
# Check if the source node failed
|
406
430
|
if isinstance(source_outputs, dict) and source_outputs.get("failed"):
|
@@ -411,11 +435,24 @@ class LocalRuntime:
|
|
411
435
|
for source_key, target_key in mapping.items():
|
412
436
|
if source_key in source_outputs:
|
413
437
|
inputs[target_key] = source_outputs[source_key]
|
438
|
+
if self.debug:
|
439
|
+
self.logger.debug(
|
440
|
+
f" MAPPED: {source_key} -> {target_key} (type: {type(source_outputs[source_key])})"
|
441
|
+
)
|
414
442
|
else:
|
443
|
+
if self.debug:
|
444
|
+
self.logger.debug(
|
445
|
+
f" MISSING: {source_key} not in {list(source_outputs.keys())}"
|
446
|
+
)
|
415
447
|
self.logger.warning(
|
416
448
|
f"Source output '{source_key}' not found in node '{source_node_id}'. "
|
417
449
|
f"Available outputs: {list(source_outputs.keys())}"
|
418
450
|
)
|
451
|
+
else:
|
452
|
+
if self.debug:
|
453
|
+
self.logger.debug(
|
454
|
+
f" No outputs found for source node {source_node_id}"
|
455
|
+
)
|
419
456
|
|
420
457
|
# Apply parameter overrides
|
421
458
|
inputs.update(parameters)
|
kailash/runtime/parallel.py
CHANGED
@@ -91,7 +91,7 @@ class ParallelRuntime:
|
|
91
91
|
|
92
92
|
try:
|
93
93
|
# Validate workflow
|
94
|
-
workflow.validate()
|
94
|
+
workflow.validate(runtime_parameters=parameters)
|
95
95
|
|
96
96
|
# Initialize semaphore for concurrent execution control
|
97
97
|
self.semaphore = asyncio.Semaphore(self.max_workers)
|
@@ -398,7 +398,7 @@ class ParallelRuntime:
|
|
398
398
|
async def execute_with_metrics():
|
399
399
|
with collector.collect(node_id=node_id) as context:
|
400
400
|
result = await loop.run_in_executor(
|
401
|
-
None, lambda: node_instance.
|
401
|
+
None, lambda: node_instance.run(**inputs)
|
402
402
|
)
|
403
403
|
return result, context.result()
|
404
404
|
|
@@ -80,7 +80,7 @@ class ParallelCyclicRuntime:
|
|
80
80
|
|
81
81
|
try:
|
82
82
|
# Validate workflow
|
83
|
-
workflow.validate()
|
83
|
+
workflow.validate(runtime_parameters=parameters)
|
84
84
|
|
85
85
|
# Check for cycles first
|
86
86
|
if self.enable_cycles and workflow.has_cycles():
|
@@ -380,7 +380,7 @@ class ParallelCyclicRuntime:
|
|
380
380
|
# Execute node with metrics collection
|
381
381
|
collector = MetricsCollector()
|
382
382
|
with collector.collect(node_id=node_id) as metrics_context:
|
383
|
-
outputs = node_instance.
|
383
|
+
outputs = node_instance.run(**inputs)
|
384
384
|
|
385
385
|
# Get performance metrics
|
386
386
|
performance_metrics = metrics_context.result()
|
kailash/runtime/testing.py
CHANGED
@@ -229,12 +229,12 @@ class NodeTestHelper:
|
|
229
229
|
"""Test node execution with given inputs."""
|
230
230
|
if should_fail:
|
231
231
|
try:
|
232
|
-
result = node.
|
232
|
+
result = node.run(**inputs)
|
233
233
|
assert False, "Node execution should have failed but didn't"
|
234
234
|
except (NodeValidationError, WorkflowExecutionError):
|
235
235
|
return {}
|
236
236
|
else:
|
237
|
-
result = node.
|
237
|
+
result = node.run(**inputs)
|
238
238
|
|
239
239
|
# Check expected output keys
|
240
240
|
for key in expected_keys:
|
kailash/utils/templates.py
CHANGED
@@ -327,7 +327,7 @@ A Kailash workflow project.
|
|
327
327
|
|
328
328
|
- `workflows/`: Workflow definitions
|
329
329
|
- `nodes/`: Custom node implementations
|
330
|
-
- `
|
330
|
+
- `data/inputs/`: Input data files
|
331
331
|
- `outputs/`: Output files
|
332
332
|
|
333
333
|
## Usage
|
@@ -363,7 +363,7 @@ workflow = Workflow(
|
|
363
363
|
)
|
364
364
|
|
365
365
|
# Add nodes
|
366
|
-
workflow.add_node("reader", CSVReaderNode(), file_path="
|
366
|
+
workflow.add_node("reader", CSVReaderNode(), file_path="data/inputs/csv/input.csv")
|
367
367
|
workflow.add_node("filter", Filter(), field="value", operator=">", value=100)
|
368
368
|
workflow.add_node("sort", Sort(), field="value", reverse=True)
|
369
369
|
workflow.add_node("aggregate", Aggregator(), group_by="category", operation="sum")
|
@@ -482,8 +482,8 @@ workflow = Workflow(
|
|
482
482
|
)
|
483
483
|
|
484
484
|
# Data ingestion
|
485
|
-
workflow.add_node("csv_reader", CSVReaderNode(), file_path="
|
486
|
-
workflow.add_node("json_reader", JSONReaderNode(), file_path="
|
485
|
+
workflow.add_node("csv_reader", CSVReaderNode(), file_path="data/inputs/csv/sales_data.csv")
|
486
|
+
workflow.add_node("json_reader", JSONReaderNode(), file_path="data/inputs/json/product_data.json")
|
487
487
|
|
488
488
|
# Transform data
|
489
489
|
workflow.add_node("filter_sales", Filter(), field="amount", operator=">", value=1000)
|
@@ -553,7 +553,7 @@ workflow = Workflow(
|
|
553
553
|
)
|
554
554
|
|
555
555
|
# Data ingestion
|
556
|
-
workflow.add_node("read_data", CSVReaderNode(), file_path="
|
556
|
+
workflow.add_node("read_data", CSVReaderNode(), file_path="data/inputs/csv/text_data.csv")
|
557
557
|
|
558
558
|
# Preprocessing
|
559
559
|
workflow.add_node("extract_text", Map(), field="content")
|
@@ -616,7 +616,7 @@ workflow = Workflow(
|
|
616
616
|
)
|
617
617
|
|
618
618
|
# Read configuration
|
619
|
-
workflow.add_node("read_config", JSONReaderNode(), file_path="
|
619
|
+
workflow.add_node("read_config", JSONReaderNode(), file_path="data/inputs/json/api_config.json")
|
620
620
|
|
621
621
|
# Process with AI agent
|
622
622
|
workflow.add_node("chat_agent", ChatAgent(),
|
@@ -63,8 +63,12 @@ class PerformanceVisualizer:
|
|
63
63
|
Dictionary mapping chart names to file paths
|
64
64
|
"""
|
65
65
|
if output_dir is None:
|
66
|
-
# Use
|
67
|
-
|
66
|
+
# Use centralized output directory
|
67
|
+
# Get project root and use data/outputs/visualizations/performance
|
68
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
69
|
+
output_dir = (
|
70
|
+
project_root / "data" / "outputs" / "visualizations" / "performance"
|
71
|
+
)
|
68
72
|
output_dir.mkdir(parents=True, exist_ok=True)
|
69
73
|
|
70
74
|
# Get run data
|
@@ -719,7 +723,16 @@ class PerformanceVisualizer:
|
|
719
723
|
) -> Path:
|
720
724
|
"""Compare performance across multiple runs."""
|
721
725
|
if output_path is None:
|
722
|
-
|
726
|
+
# Use centralized output directory
|
727
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
728
|
+
output_path = (
|
729
|
+
project_root
|
730
|
+
/ "data"
|
731
|
+
/ "outputs"
|
732
|
+
/ "visualizations"
|
733
|
+
/ "performance"
|
734
|
+
/ "comparison.png"
|
735
|
+
)
|
723
736
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
724
737
|
|
725
738
|
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
kailash/visualization/reports.py
CHANGED
@@ -168,9 +168,13 @@ class WorkflowPerformanceReporter:
|
|
168
168
|
"""
|
169
169
|
if output_path is None:
|
170
170
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
171
|
+
# Use centralized output directory
|
172
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
171
173
|
output_path = (
|
172
|
-
|
174
|
+
project_root
|
175
|
+
/ "data"
|
173
176
|
/ "outputs"
|
177
|
+
/ "reports"
|
174
178
|
/ f"workflow_report_{run_id[:8]}_{timestamp}.{format.value}"
|
175
179
|
)
|
176
180
|
|
kailash/workflow/convergence.py
CHANGED
@@ -229,7 +229,7 @@ class AdaptiveCondition(ConvergenceCondition):
|
|
229
229
|
|
230
230
|
|
231
231
|
def create_convergence_condition(
|
232
|
-
spec: Union[str, int, Callable, Dict]
|
232
|
+
spec: Union[str, int, Callable, Dict],
|
233
233
|
) -> ConvergenceCondition:
|
234
234
|
"""Factory function to create convergence conditions from various specs.
|
235
235
|
|
@@ -84,7 +84,14 @@ class CycleAnalyzer:
|
|
84
84
|
self.analysis_level = analysis_level
|
85
85
|
self.enable_profiling = enable_profiling
|
86
86
|
self.enable_debugging = enable_debugging
|
87
|
-
|
87
|
+
|
88
|
+
# Set output directory - use centralized location if not specified
|
89
|
+
if output_directory:
|
90
|
+
self.output_directory = Path(output_directory)
|
91
|
+
else:
|
92
|
+
# Use centralized output directory by default
|
93
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
94
|
+
self.output_directory = project_root / "data" / "outputs" / "cycle_analysis"
|
88
95
|
|
89
96
|
# Initialize components based on configuration
|
90
97
|
debug_level = {
|
@@ -168,7 +168,7 @@ class CyclicWorkflowExecutor:
|
|
168
168
|
WorkflowValidationError: If workflow is invalid
|
169
169
|
"""
|
170
170
|
# Validate workflow (including cycles)
|
171
|
-
workflow.validate()
|
171
|
+
workflow.validate(runtime_parameters=parameters)
|
172
172
|
|
173
173
|
# Generate run ID if not provided
|
174
174
|
if not run_id:
|
kailash/workflow/graph.py
CHANGED
@@ -191,10 +191,12 @@ class Workflow:
|
|
191
191
|
|
192
192
|
# Store node instance and metadata
|
193
193
|
try:
|
194
|
+
# Use the node instance's actual config, which includes both original config and any updates
|
195
|
+
actual_config = node_instance.config.copy()
|
194
196
|
node_instance_data = NodeInstance(
|
195
197
|
node_id=node_id,
|
196
198
|
node_type=node_type,
|
197
|
-
config=
|
199
|
+
config=actual_config,
|
198
200
|
position=(len(self.nodes) * 150, 100),
|
199
201
|
)
|
200
202
|
self.nodes[node_id] = node_instance_data
|
@@ -203,8 +205,10 @@ class Workflow:
|
|
203
205
|
|
204
206
|
self._node_instances[node_id] = node_instance
|
205
207
|
|
206
|
-
# Add to graph
|
207
|
-
self.graph.add_node(
|
208
|
+
# Add to graph with actual config
|
209
|
+
self.graph.add_node(
|
210
|
+
node_id, node=node_instance, type=node_type, config=actual_config
|
211
|
+
)
|
208
212
|
logger.info(f"Added node '{node_id}' of type '{node_type}'")
|
209
213
|
|
210
214
|
def _add_node_internal(
|
@@ -707,9 +711,12 @@ class Workflow:
|
|
707
711
|
# This shouldn't happen, but handle gracefully
|
708
712
|
raise WorkflowValidationError("Unable to determine execution order")
|
709
713
|
|
710
|
-
def validate(self) -> None:
|
714
|
+
def validate(self, runtime_parameters: Optional[Dict[str, Any]] = None) -> None:
|
711
715
|
"""Validate the workflow structure.
|
712
716
|
|
717
|
+
Args:
|
718
|
+
runtime_parameters: Parameters that will be provided at runtime (Session 061)
|
719
|
+
|
713
720
|
Raises:
|
714
721
|
WorkflowValidationError: If workflow is invalid
|
715
722
|
"""
|
@@ -758,14 +765,19 @@ class Workflow:
|
|
758
765
|
# Check nested config
|
759
766
|
found_in_config = param_name in node_instance.config["config"]
|
760
767
|
|
761
|
-
if
|
768
|
+
# Session 061: Check if parameter will be provided at runtime
|
769
|
+
found_in_runtime = False
|
770
|
+
if runtime_parameters and node_id in runtime_parameters:
|
771
|
+
found_in_runtime = param_name in runtime_parameters[node_id]
|
772
|
+
|
773
|
+
if not found_in_config and not found_in_runtime:
|
762
774
|
if param_def.default is None:
|
763
775
|
missing_inputs.append(param_name)
|
764
776
|
|
765
777
|
if missing_inputs:
|
766
778
|
raise WorkflowValidationError(
|
767
779
|
f"Node '{node_id}' missing required inputs: {missing_inputs}. "
|
768
|
-
f"Provide these inputs via connections or
|
780
|
+
f"Provide these inputs via connections, node configuration, or runtime parameters"
|
769
781
|
)
|
770
782
|
|
771
783
|
logger.info(f"Workflow '{self.name}' validated successfully")
|
@@ -928,6 +940,14 @@ class Workflow:
|
|
928
940
|
to_input = edge_data.get("to_input")
|
929
941
|
mapping = edge_data.get("mapping", {})
|
930
942
|
|
943
|
+
print(f"CONNECTION DEBUG: {source_node_id} -> {node_id}")
|
944
|
+
print(f" Edge data: {edge_data}")
|
945
|
+
print(f" from_output: {from_output}, to_input: {to_input}")
|
946
|
+
print(f" mapping: {mapping}")
|
947
|
+
print(
|
948
|
+
f" source_results keys: {list(results.get(source_node_id, {}).keys())}"
|
949
|
+
)
|
950
|
+
|
931
951
|
source_results = results.get(source_node_id, {})
|
932
952
|
|
933
953
|
# Handle backward compatibility - from_output/to_input can be string or list
|
@@ -951,6 +971,13 @@ class Workflow:
|
|
951
971
|
for source_key, target_key in mapping.items():
|
952
972
|
if source_key in source_results:
|
953
973
|
node_inputs[target_key] = source_results[source_key]
|
974
|
+
print(
|
975
|
+
f"MAPPING DEBUG: {source_key} -> {target_key}, value type: {type(source_results[source_key])}"
|
976
|
+
)
|
977
|
+
else:
|
978
|
+
print(
|
979
|
+
f"MAPPING DEBUG: Source key '{source_key}' not found in source results: {list(source_results.keys())}"
|
980
|
+
)
|
954
981
|
|
955
982
|
# Apply overrides
|
956
983
|
node_overrides = inputs.get(node_id, {})
|
@@ -399,8 +399,16 @@ class WorkflowVisualizer:
|
|
399
399
|
|
400
400
|
# Determine output path
|
401
401
|
if output_path is None:
|
402
|
-
#
|
403
|
-
|
402
|
+
# Use centralized output directory
|
403
|
+
# Get project root and use data/outputs/visualizations
|
404
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
405
|
+
output_dir = (
|
406
|
+
project_root
|
407
|
+
/ "data"
|
408
|
+
/ "outputs"
|
409
|
+
/ "visualizations"
|
410
|
+
/ "workflow_executions"
|
411
|
+
)
|
404
412
|
output_dir.mkdir(parents=True, exist_ok=True)
|
405
413
|
output_path = output_dir / f"execution_{run_id}.md"
|
406
414
|
else:
|