kailash 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/nodes/api/__init__.py +5 -0
- kailash/nodes/api/monitoring.py +463 -0
- kailash/nodes/api/security.py +822 -0
- kailash/nodes/base.py +3 -3
- kailash/nodes/data/__init__.py +6 -0
- kailash/nodes/data/event_generation.py +297 -0
- kailash/nodes/data/file_discovery.py +601 -0
- kailash/nodes/transform/processors.py +1 -1
- kailash/runtime/async_local.py +1 -1
- kailash/runtime/docker.py +4 -4
- kailash/runtime/local.py +39 -15
- kailash/runtime/parallel.py +2 -2
- kailash/runtime/parallel_cyclic.py +2 -2
- kailash/runtime/testing.py +2 -2
- kailash/utils/templates.py +6 -6
- kailash/visualization/performance.py +16 -3
- kailash/visualization/reports.py +5 -1
- kailash/workflow/cycle_analyzer.py +8 -1
- kailash/workflow/cyclic_runner.py +1 -1
- kailash/workflow/graph.py +18 -6
- kailash/workflow/visualization.py +10 -2
- kailash-0.3.0.dist-info/METADATA +428 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/RECORD +28 -24
- kailash-0.2.2.dist-info/METADATA +0 -121
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/runtime/local.py
CHANGED
@@ -113,8 +113,8 @@ class LocalRuntime:
|
|
113
113
|
run_id = None
|
114
114
|
|
115
115
|
try:
|
116
|
-
# Validate workflow
|
117
|
-
workflow.validate()
|
116
|
+
# Validate workflow with runtime parameters (Session 061)
|
117
|
+
workflow.validate(runtime_parameters=parameters)
|
118
118
|
|
119
119
|
# Initialize tracking
|
120
120
|
if task_manager:
|
@@ -296,6 +296,10 @@ class LocalRuntime:
|
|
296
296
|
parameters=parameters.get(node_id, {}),
|
297
297
|
)
|
298
298
|
|
299
|
+
# Update node config with parameters (Session 061: direct config update)
|
300
|
+
{**node_instance.config, **parameters.get(node_id, {})}
|
301
|
+
node_instance.config.update(parameters.get(node_id, {}))
|
302
|
+
|
299
303
|
if self.debug:
|
300
304
|
self.logger.debug(f"Node {node_id} inputs: {inputs}")
|
301
305
|
|
@@ -391,21 +395,36 @@ class LocalRuntime:
|
|
391
395
|
"""
|
392
396
|
inputs = {}
|
393
397
|
|
394
|
-
#
|
395
|
-
inputs
|
398
|
+
# NOTE: Node configuration is handled separately in configure() call
|
399
|
+
# Only add runtime inputs and data from connected nodes here
|
400
|
+
|
401
|
+
# Add runtime parameters (those not used for node configuration)
|
402
|
+
# Map specific runtime parameters for known node types
|
403
|
+
if "consumer_timeout_ms" in parameters:
|
404
|
+
inputs["timeout_ms"] = parameters["consumer_timeout_ms"]
|
405
|
+
|
406
|
+
# Add other potential runtime parameters that are not configuration
|
407
|
+
runtime_param_names = {"max_messages", "timeout_ms", "limit", "offset"}
|
408
|
+
for param_name, param_value in parameters.items():
|
409
|
+
if param_name in runtime_param_names:
|
410
|
+
inputs[param_name] = param_value
|
396
411
|
|
397
412
|
# Add connected inputs from other nodes
|
398
413
|
for edge in workflow.graph.in_edges(node_id, data=True):
|
399
414
|
source_node_id = edge[0]
|
400
415
|
mapping = edge[2].get("mapping", {})
|
401
416
|
|
402
|
-
|
403
|
-
|
404
|
-
|
417
|
+
if self.debug:
|
418
|
+
self.logger.debug(f"Processing edge {source_node_id} -> {node_id}")
|
419
|
+
self.logger.debug(f" Edge data: {edge[2]}")
|
420
|
+
self.logger.debug(f" Mapping: {mapping}")
|
405
421
|
|
406
422
|
if source_node_id in node_outputs:
|
407
423
|
source_outputs = node_outputs[source_node_id]
|
408
|
-
|
424
|
+
if self.debug:
|
425
|
+
self.logger.debug(
|
426
|
+
f" Source outputs: {list(source_outputs.keys())}"
|
427
|
+
)
|
409
428
|
|
410
429
|
# Check if the source node failed
|
411
430
|
if isinstance(source_outputs, dict) and source_outputs.get("failed"):
|
@@ -416,19 +435,24 @@ class LocalRuntime:
|
|
416
435
|
for source_key, target_key in mapping.items():
|
417
436
|
if source_key in source_outputs:
|
418
437
|
inputs[target_key] = source_outputs[source_key]
|
419
|
-
|
420
|
-
|
421
|
-
|
438
|
+
if self.debug:
|
439
|
+
self.logger.debug(
|
440
|
+
f" MAPPED: {source_key} -> {target_key} (type: {type(source_outputs[source_key])})"
|
441
|
+
)
|
422
442
|
else:
|
423
|
-
|
424
|
-
|
425
|
-
|
443
|
+
if self.debug:
|
444
|
+
self.logger.debug(
|
445
|
+
f" MISSING: {source_key} not in {list(source_outputs.keys())}"
|
446
|
+
)
|
426
447
|
self.logger.warning(
|
427
448
|
f"Source output '{source_key}' not found in node '{source_node_id}'. "
|
428
449
|
f"Available outputs: {list(source_outputs.keys())}"
|
429
450
|
)
|
430
451
|
else:
|
431
|
-
|
452
|
+
if self.debug:
|
453
|
+
self.logger.debug(
|
454
|
+
f" No outputs found for source node {source_node_id}"
|
455
|
+
)
|
432
456
|
|
433
457
|
# Apply parameter overrides
|
434
458
|
inputs.update(parameters)
|
kailash/runtime/parallel.py
CHANGED
@@ -91,7 +91,7 @@ class ParallelRuntime:
|
|
91
91
|
|
92
92
|
try:
|
93
93
|
# Validate workflow
|
94
|
-
workflow.validate()
|
94
|
+
workflow.validate(runtime_parameters=parameters)
|
95
95
|
|
96
96
|
# Initialize semaphore for concurrent execution control
|
97
97
|
self.semaphore = asyncio.Semaphore(self.max_workers)
|
@@ -398,7 +398,7 @@ class ParallelRuntime:
|
|
398
398
|
async def execute_with_metrics():
|
399
399
|
with collector.collect(node_id=node_id) as context:
|
400
400
|
result = await loop.run_in_executor(
|
401
|
-
None, lambda: node_instance.
|
401
|
+
None, lambda: node_instance.run(**inputs)
|
402
402
|
)
|
403
403
|
return result, context.result()
|
404
404
|
|
@@ -80,7 +80,7 @@ class ParallelCyclicRuntime:
|
|
80
80
|
|
81
81
|
try:
|
82
82
|
# Validate workflow
|
83
|
-
workflow.validate()
|
83
|
+
workflow.validate(runtime_parameters=parameters)
|
84
84
|
|
85
85
|
# Check for cycles first
|
86
86
|
if self.enable_cycles and workflow.has_cycles():
|
@@ -380,7 +380,7 @@ class ParallelCyclicRuntime:
|
|
380
380
|
# Execute node with metrics collection
|
381
381
|
collector = MetricsCollector()
|
382
382
|
with collector.collect(node_id=node_id) as metrics_context:
|
383
|
-
outputs = node_instance.
|
383
|
+
outputs = node_instance.run(**inputs)
|
384
384
|
|
385
385
|
# Get performance metrics
|
386
386
|
performance_metrics = metrics_context.result()
|
kailash/runtime/testing.py
CHANGED
@@ -229,12 +229,12 @@ class NodeTestHelper:
|
|
229
229
|
"""Test node execution with given inputs."""
|
230
230
|
if should_fail:
|
231
231
|
try:
|
232
|
-
result = node.
|
232
|
+
result = node.run(**inputs)
|
233
233
|
assert False, "Node execution should have failed but didn't"
|
234
234
|
except (NodeValidationError, WorkflowExecutionError):
|
235
235
|
return {}
|
236
236
|
else:
|
237
|
-
result = node.
|
237
|
+
result = node.run(**inputs)
|
238
238
|
|
239
239
|
# Check expected output keys
|
240
240
|
for key in expected_keys:
|
kailash/utils/templates.py
CHANGED
@@ -327,7 +327,7 @@ A Kailash workflow project.
|
|
327
327
|
|
328
328
|
- `workflows/`: Workflow definitions
|
329
329
|
- `nodes/`: Custom node implementations
|
330
|
-
- `
|
330
|
+
- `data/inputs/`: Input data files
|
331
331
|
- `outputs/`: Output files
|
332
332
|
|
333
333
|
## Usage
|
@@ -363,7 +363,7 @@ workflow = Workflow(
|
|
363
363
|
)
|
364
364
|
|
365
365
|
# Add nodes
|
366
|
-
workflow.add_node("reader", CSVReaderNode(), file_path="
|
366
|
+
workflow.add_node("reader", CSVReaderNode(), file_path="data/inputs/csv/input.csv")
|
367
367
|
workflow.add_node("filter", Filter(), field="value", operator=">", value=100)
|
368
368
|
workflow.add_node("sort", Sort(), field="value", reverse=True)
|
369
369
|
workflow.add_node("aggregate", Aggregator(), group_by="category", operation="sum")
|
@@ -482,8 +482,8 @@ workflow = Workflow(
|
|
482
482
|
)
|
483
483
|
|
484
484
|
# Data ingestion
|
485
|
-
workflow.add_node("csv_reader", CSVReaderNode(), file_path="
|
486
|
-
workflow.add_node("json_reader", JSONReaderNode(), file_path="
|
485
|
+
workflow.add_node("csv_reader", CSVReaderNode(), file_path="data/inputs/csv/sales_data.csv")
|
486
|
+
workflow.add_node("json_reader", JSONReaderNode(), file_path="data/inputs/json/product_data.json")
|
487
487
|
|
488
488
|
# Transform data
|
489
489
|
workflow.add_node("filter_sales", Filter(), field="amount", operator=">", value=1000)
|
@@ -553,7 +553,7 @@ workflow = Workflow(
|
|
553
553
|
)
|
554
554
|
|
555
555
|
# Data ingestion
|
556
|
-
workflow.add_node("read_data", CSVReaderNode(), file_path="
|
556
|
+
workflow.add_node("read_data", CSVReaderNode(), file_path="data/inputs/csv/text_data.csv")
|
557
557
|
|
558
558
|
# Preprocessing
|
559
559
|
workflow.add_node("extract_text", Map(), field="content")
|
@@ -616,7 +616,7 @@ workflow = Workflow(
|
|
616
616
|
)
|
617
617
|
|
618
618
|
# Read configuration
|
619
|
-
workflow.add_node("read_config", JSONReaderNode(), file_path="
|
619
|
+
workflow.add_node("read_config", JSONReaderNode(), file_path="data/inputs/json/api_config.json")
|
620
620
|
|
621
621
|
# Process with AI agent
|
622
622
|
workflow.add_node("chat_agent", ChatAgent(),
|
@@ -63,8 +63,12 @@ class PerformanceVisualizer:
|
|
63
63
|
Dictionary mapping chart names to file paths
|
64
64
|
"""
|
65
65
|
if output_dir is None:
|
66
|
-
# Use
|
67
|
-
|
66
|
+
# Use centralized output directory
|
67
|
+
# Get project root and use data/outputs/visualizations/performance
|
68
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
69
|
+
output_dir = (
|
70
|
+
project_root / "data" / "outputs" / "visualizations" / "performance"
|
71
|
+
)
|
68
72
|
output_dir.mkdir(parents=True, exist_ok=True)
|
69
73
|
|
70
74
|
# Get run data
|
@@ -719,7 +723,16 @@ class PerformanceVisualizer:
|
|
719
723
|
) -> Path:
|
720
724
|
"""Compare performance across multiple runs."""
|
721
725
|
if output_path is None:
|
722
|
-
|
726
|
+
# Use centralized output directory
|
727
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
728
|
+
output_path = (
|
729
|
+
project_root
|
730
|
+
/ "data"
|
731
|
+
/ "outputs"
|
732
|
+
/ "visualizations"
|
733
|
+
/ "performance"
|
734
|
+
/ "comparison.png"
|
735
|
+
)
|
723
736
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
724
737
|
|
725
738
|
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
kailash/visualization/reports.py
CHANGED
@@ -168,9 +168,13 @@ class WorkflowPerformanceReporter:
|
|
168
168
|
"""
|
169
169
|
if output_path is None:
|
170
170
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
171
|
+
# Use centralized output directory
|
172
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
171
173
|
output_path = (
|
172
|
-
|
174
|
+
project_root
|
175
|
+
/ "data"
|
173
176
|
/ "outputs"
|
177
|
+
/ "reports"
|
174
178
|
/ f"workflow_report_{run_id[:8]}_{timestamp}.{format.value}"
|
175
179
|
)
|
176
180
|
|
@@ -84,7 +84,14 @@ class CycleAnalyzer:
|
|
84
84
|
self.analysis_level = analysis_level
|
85
85
|
self.enable_profiling = enable_profiling
|
86
86
|
self.enable_debugging = enable_debugging
|
87
|
-
|
87
|
+
|
88
|
+
# Set output directory - use centralized location if not specified
|
89
|
+
if output_directory:
|
90
|
+
self.output_directory = Path(output_directory)
|
91
|
+
else:
|
92
|
+
# Use centralized output directory by default
|
93
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
94
|
+
self.output_directory = project_root / "data" / "outputs" / "cycle_analysis"
|
88
95
|
|
89
96
|
# Initialize components based on configuration
|
90
97
|
debug_level = {
|
@@ -168,7 +168,7 @@ class CyclicWorkflowExecutor:
|
|
168
168
|
WorkflowValidationError: If workflow is invalid
|
169
169
|
"""
|
170
170
|
# Validate workflow (including cycles)
|
171
|
-
workflow.validate()
|
171
|
+
workflow.validate(runtime_parameters=parameters)
|
172
172
|
|
173
173
|
# Generate run ID if not provided
|
174
174
|
if not run_id:
|
kailash/workflow/graph.py
CHANGED
@@ -191,10 +191,12 @@ class Workflow:
|
|
191
191
|
|
192
192
|
# Store node instance and metadata
|
193
193
|
try:
|
194
|
+
# Use the node instance's actual config, which includes both original config and any updates
|
195
|
+
actual_config = node_instance.config.copy()
|
194
196
|
node_instance_data = NodeInstance(
|
195
197
|
node_id=node_id,
|
196
198
|
node_type=node_type,
|
197
|
-
config=
|
199
|
+
config=actual_config,
|
198
200
|
position=(len(self.nodes) * 150, 100),
|
199
201
|
)
|
200
202
|
self.nodes[node_id] = node_instance_data
|
@@ -203,8 +205,10 @@ class Workflow:
|
|
203
205
|
|
204
206
|
self._node_instances[node_id] = node_instance
|
205
207
|
|
206
|
-
# Add to graph
|
207
|
-
self.graph.add_node(
|
208
|
+
# Add to graph with actual config
|
209
|
+
self.graph.add_node(
|
210
|
+
node_id, node=node_instance, type=node_type, config=actual_config
|
211
|
+
)
|
208
212
|
logger.info(f"Added node '{node_id}' of type '{node_type}'")
|
209
213
|
|
210
214
|
def _add_node_internal(
|
@@ -707,9 +711,12 @@ class Workflow:
|
|
707
711
|
# This shouldn't happen, but handle gracefully
|
708
712
|
raise WorkflowValidationError("Unable to determine execution order")
|
709
713
|
|
710
|
-
def validate(self) -> None:
|
714
|
+
def validate(self, runtime_parameters: Optional[Dict[str, Any]] = None) -> None:
|
711
715
|
"""Validate the workflow structure.
|
712
716
|
|
717
|
+
Args:
|
718
|
+
runtime_parameters: Parameters that will be provided at runtime (Session 061)
|
719
|
+
|
713
720
|
Raises:
|
714
721
|
WorkflowValidationError: If workflow is invalid
|
715
722
|
"""
|
@@ -758,14 +765,19 @@ class Workflow:
|
|
758
765
|
# Check nested config
|
759
766
|
found_in_config = param_name in node_instance.config["config"]
|
760
767
|
|
761
|
-
if
|
768
|
+
# Session 061: Check if parameter will be provided at runtime
|
769
|
+
found_in_runtime = False
|
770
|
+
if runtime_parameters and node_id in runtime_parameters:
|
771
|
+
found_in_runtime = param_name in runtime_parameters[node_id]
|
772
|
+
|
773
|
+
if not found_in_config and not found_in_runtime:
|
762
774
|
if param_def.default is None:
|
763
775
|
missing_inputs.append(param_name)
|
764
776
|
|
765
777
|
if missing_inputs:
|
766
778
|
raise WorkflowValidationError(
|
767
779
|
f"Node '{node_id}' missing required inputs: {missing_inputs}. "
|
768
|
-
f"Provide these inputs via connections or
|
780
|
+
f"Provide these inputs via connections, node configuration, or runtime parameters"
|
769
781
|
)
|
770
782
|
|
771
783
|
logger.info(f"Workflow '{self.name}' validated successfully")
|
@@ -399,8 +399,16 @@ class WorkflowVisualizer:
|
|
399
399
|
|
400
400
|
# Determine output path
|
401
401
|
if output_path is None:
|
402
|
-
#
|
403
|
-
|
402
|
+
# Use centralized output directory
|
403
|
+
# Get project root and use data/outputs/visualizations
|
404
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
405
|
+
output_dir = (
|
406
|
+
project_root
|
407
|
+
/ "data"
|
408
|
+
/ "outputs"
|
409
|
+
/ "visualizations"
|
410
|
+
/ "workflow_executions"
|
411
|
+
)
|
404
412
|
output_dir.mkdir(parents=True, exist_ok=True)
|
405
413
|
output_path = output_dir / f"execution_{run_id}.md"
|
406
414
|
else:
|