kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,557 @@
|
|
1
|
+
"""Parallel runtime engine for executing workflows with concurrent node execution.
|
2
|
+
|
3
|
+
This module provides a parallel execution engine for Kailash workflows,
|
4
|
+
specifically designed to run independent nodes concurrently for maximum performance.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
import time
|
10
|
+
from collections import deque
|
11
|
+
from datetime import datetime, timezone
|
12
|
+
from typing import Any, Deque, Dict, Optional, Set, Tuple
|
13
|
+
|
14
|
+
import networkx as nx
|
15
|
+
|
16
|
+
from kailash.nodes.base_async import AsyncNode
|
17
|
+
from kailash.sdk_exceptions import (
|
18
|
+
RuntimeExecutionError,
|
19
|
+
WorkflowExecutionError,
|
20
|
+
WorkflowValidationError,
|
21
|
+
)
|
22
|
+
from kailash.tracking import TaskManager, TaskStatus
|
23
|
+
from kailash.tracking.metrics_collector import MetricsCollector
|
24
|
+
from kailash.tracking.models import TaskMetrics
|
25
|
+
from kailash.workflow.graph import Workflow
|
26
|
+
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
class ParallelRuntime:
|
31
|
+
"""Parallel execution engine for workflows.
|
32
|
+
|
33
|
+
This runtime provides true concurrent execution of independent nodes in a workflow,
|
34
|
+
allowing for maximum performance with both synchronous and asynchronous nodes.
|
35
|
+
|
36
|
+
Key features:
|
37
|
+
- Concurrent execution of independent nodes
|
38
|
+
- Dynamic scheduling based on dependency resolution
|
39
|
+
- Support for both sync and async nodes
|
40
|
+
- Configurable parallelism limits
|
41
|
+
- Detailed execution metrics and visualization
|
42
|
+
|
43
|
+
Usage:
|
44
|
+
runtime = ParallelRuntime(max_workers=8)
|
45
|
+
results, run_id = await runtime.execute(workflow, parameters={...})
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(self, max_workers: int = 8, debug: bool = False):
|
49
|
+
"""Initialize the parallel runtime.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
max_workers: Maximum number of concurrent node executions
|
53
|
+
debug: Whether to enable debug logging
|
54
|
+
"""
|
55
|
+
self.max_workers = max_workers
|
56
|
+
self.debug = debug
|
57
|
+
self.logger = logger
|
58
|
+
|
59
|
+
if debug:
|
60
|
+
self.logger.setLevel(logging.DEBUG)
|
61
|
+
else:
|
62
|
+
self.logger.setLevel(logging.INFO)
|
63
|
+
|
64
|
+
self.semaphore = None # Will be initialized during execution
|
65
|
+
|
66
|
+
async def execute(
|
67
|
+
self,
|
68
|
+
workflow: Workflow,
|
69
|
+
task_manager: Optional[TaskManager] = None,
|
70
|
+
parameters: Optional[Dict[str, Dict[str, Any]]] = None,
|
71
|
+
) -> Tuple[Dict[str, Any], Optional[str]]:
|
72
|
+
"""Execute a workflow with parallel node execution.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
workflow: Workflow to execute
|
76
|
+
task_manager: Optional task manager for tracking
|
77
|
+
parameters: Optional parameter overrides per node
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
Tuple of (results dict, run_id)
|
81
|
+
|
82
|
+
Raises:
|
83
|
+
RuntimeExecutionError: If execution fails
|
84
|
+
WorkflowValidationError: If workflow is invalid
|
85
|
+
"""
|
86
|
+
if not workflow:
|
87
|
+
raise RuntimeExecutionError("No workflow provided")
|
88
|
+
|
89
|
+
run_id = None
|
90
|
+
start_time = time.time()
|
91
|
+
|
92
|
+
try:
|
93
|
+
# Validate workflow
|
94
|
+
workflow.validate()
|
95
|
+
|
96
|
+
# Initialize semaphore for concurrent execution control
|
97
|
+
self.semaphore = asyncio.Semaphore(self.max_workers)
|
98
|
+
|
99
|
+
# Initialize tracking
|
100
|
+
if task_manager:
|
101
|
+
try:
|
102
|
+
run_id = task_manager.create_run(
|
103
|
+
workflow_name=workflow.name,
|
104
|
+
metadata={
|
105
|
+
"parameters": parameters,
|
106
|
+
"debug": self.debug,
|
107
|
+
"runtime": "parallel",
|
108
|
+
"max_workers": self.max_workers,
|
109
|
+
},
|
110
|
+
)
|
111
|
+
except Exception as e:
|
112
|
+
self.logger.warning(f"Failed to create task run: {e}")
|
113
|
+
# Continue without tracking
|
114
|
+
|
115
|
+
# Execute workflow with parallel node execution
|
116
|
+
results = await self._execute_workflow_parallel(
|
117
|
+
workflow=workflow,
|
118
|
+
task_manager=task_manager,
|
119
|
+
run_id=run_id,
|
120
|
+
parameters=parameters or {},
|
121
|
+
)
|
122
|
+
|
123
|
+
# Mark run as completed
|
124
|
+
if task_manager and run_id:
|
125
|
+
try:
|
126
|
+
end_time = time.time()
|
127
|
+
execution_time = end_time - start_time
|
128
|
+
task_manager.update_run_status(
|
129
|
+
run_id, "completed", metadata={"execution_time": execution_time}
|
130
|
+
)
|
131
|
+
except Exception as e:
|
132
|
+
self.logger.warning(f"Failed to update run status: {e}")
|
133
|
+
|
134
|
+
return results, run_id
|
135
|
+
|
136
|
+
except WorkflowValidationError:
|
137
|
+
# Re-raise validation errors as-is
|
138
|
+
if task_manager and run_id:
|
139
|
+
try:
|
140
|
+
task_manager.update_run_status(
|
141
|
+
run_id, "failed", error="Validation failed"
|
142
|
+
)
|
143
|
+
except Exception:
|
144
|
+
pass
|
145
|
+
raise
|
146
|
+
except Exception as e:
|
147
|
+
# Mark run as failed
|
148
|
+
if task_manager and run_id:
|
149
|
+
try:
|
150
|
+
task_manager.update_run_status(run_id, "failed", error=str(e))
|
151
|
+
except Exception:
|
152
|
+
pass
|
153
|
+
|
154
|
+
# Wrap other errors in RuntimeExecutionError
|
155
|
+
raise RuntimeExecutionError(
|
156
|
+
f"Parallel workflow execution failed: {type(e).__name__}: {e}"
|
157
|
+
) from e
|
158
|
+
|
159
|
+
async def _execute_workflow_parallel(
|
160
|
+
self,
|
161
|
+
workflow: Workflow,
|
162
|
+
task_manager: Optional[TaskManager],
|
163
|
+
run_id: Optional[str],
|
164
|
+
parameters: Dict[str, Dict[str, Any]],
|
165
|
+
) -> Dict[str, Any]:
|
166
|
+
"""Execute the workflow nodes in parallel where possible.
|
167
|
+
|
168
|
+
This method uses a dynamic scheduling approach to run independent nodes
|
169
|
+
concurrently while respecting dependencies.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
workflow: Workflow to execute
|
173
|
+
task_manager: Task manager for tracking
|
174
|
+
run_id: Run ID for tracking
|
175
|
+
parameters: Parameter overrides
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
Dictionary of node results
|
179
|
+
|
180
|
+
Raises:
|
181
|
+
WorkflowExecutionError: If execution fails
|
182
|
+
"""
|
183
|
+
# Initialize result storage and tracking
|
184
|
+
results = {}
|
185
|
+
node_outputs = {}
|
186
|
+
node_tasks = {}
|
187
|
+
failed_nodes = set()
|
188
|
+
|
189
|
+
# Calculate initial dependencies for each node
|
190
|
+
dependencies = {
|
191
|
+
node: set(workflow.graph.predecessors(node))
|
192
|
+
for node in workflow.graph.nodes()
|
193
|
+
}
|
194
|
+
ready_nodes = deque([node for node, deps in dependencies.items() if not deps])
|
195
|
+
pending_nodes = set(workflow.graph.nodes()) - set(ready_nodes)
|
196
|
+
|
197
|
+
self.logger.info(
|
198
|
+
f"Starting parallel execution with {len(ready_nodes)} initially ready nodes"
|
199
|
+
)
|
200
|
+
|
201
|
+
# Process nodes until all are complete
|
202
|
+
while ready_nodes or pending_nodes:
|
203
|
+
# Schedule ready nodes up to max_workers limit
|
204
|
+
while ready_nodes and len(node_tasks) < self.max_workers:
|
205
|
+
node_id = ready_nodes.popleft()
|
206
|
+
|
207
|
+
# Skip if node already failed
|
208
|
+
if node_id in failed_nodes:
|
209
|
+
continue
|
210
|
+
|
211
|
+
# Create and start task for this node
|
212
|
+
task = asyncio.create_task(
|
213
|
+
self._execute_node(
|
214
|
+
workflow=workflow,
|
215
|
+
node_id=node_id,
|
216
|
+
node_outputs=node_outputs,
|
217
|
+
parameters=parameters.get(node_id, {}),
|
218
|
+
task_manager=task_manager,
|
219
|
+
run_id=run_id,
|
220
|
+
)
|
221
|
+
)
|
222
|
+
node_tasks[node_id] = task
|
223
|
+
|
224
|
+
self.logger.debug(f"Scheduled node {node_id} for execution")
|
225
|
+
|
226
|
+
# Wait for any node to complete if we have active tasks
|
227
|
+
if node_tasks:
|
228
|
+
# Wait for the first task to complete
|
229
|
+
done, _ = await asyncio.wait(
|
230
|
+
node_tasks.values(), return_when=asyncio.FIRST_COMPLETED
|
231
|
+
)
|
232
|
+
|
233
|
+
# Process completed nodes
|
234
|
+
for task in done:
|
235
|
+
# Find the node_id for this task
|
236
|
+
completed_node_id = next(
|
237
|
+
node_id
|
238
|
+
for node_id, node_task in node_tasks.items()
|
239
|
+
if node_task == task
|
240
|
+
)
|
241
|
+
|
242
|
+
# Remove from active tasks
|
243
|
+
node_tasks.pop(completed_node_id)
|
244
|
+
|
245
|
+
try:
|
246
|
+
# Get result and add to outputs
|
247
|
+
node_result, success = task.result()
|
248
|
+
results[completed_node_id] = node_result
|
249
|
+
|
250
|
+
if success:
|
251
|
+
node_outputs[completed_node_id] = node_result
|
252
|
+
self.logger.info(
|
253
|
+
f"Node {completed_node_id} completed successfully"
|
254
|
+
)
|
255
|
+
|
256
|
+
# Update dependent nodes
|
257
|
+
for dependent in workflow.graph.successors(
|
258
|
+
completed_node_id
|
259
|
+
):
|
260
|
+
if dependent in pending_nodes:
|
261
|
+
dependencies[dependent].remove(completed_node_id)
|
262
|
+
# If all dependencies are satisfied, mark as ready
|
263
|
+
if not dependencies[dependent]:
|
264
|
+
ready_nodes.append(dependent)
|
265
|
+
pending_nodes.remove(dependent)
|
266
|
+
self.logger.debug(
|
267
|
+
f"Node {dependent} is now ready"
|
268
|
+
)
|
269
|
+
else:
|
270
|
+
# Node failed, mark it and check if we should continue
|
271
|
+
failed_nodes.add(completed_node_id)
|
272
|
+
self.logger.error(f"Node {completed_node_id} failed")
|
273
|
+
|
274
|
+
# Determine if we should stop execution
|
275
|
+
if self._should_stop_on_error(workflow, completed_node_id):
|
276
|
+
error_msg = f"Node '{completed_node_id}' failed"
|
277
|
+
raise WorkflowExecutionError(error_msg)
|
278
|
+
|
279
|
+
# Update dependent nodes to also mark as failed
|
280
|
+
self._mark_dependent_nodes_as_failed(
|
281
|
+
workflow,
|
282
|
+
completed_node_id,
|
283
|
+
failed_nodes,
|
284
|
+
pending_nodes,
|
285
|
+
ready_nodes,
|
286
|
+
)
|
287
|
+
except Exception as e:
|
288
|
+
# Handle unexpected task exceptions
|
289
|
+
failed_nodes.add(completed_node_id)
|
290
|
+
self.logger.error(
|
291
|
+
f"Unexpected error in node {completed_node_id}: {e}"
|
292
|
+
)
|
293
|
+
|
294
|
+
# Determine if we should stop execution
|
295
|
+
if self._should_stop_on_error(workflow, completed_node_id):
|
296
|
+
error_msg = f"Node '{completed_node_id}' failed with unexpected error: {e}"
|
297
|
+
raise WorkflowExecutionError(error_msg) from e
|
298
|
+
|
299
|
+
# Mark dependents as failed
|
300
|
+
self._mark_dependent_nodes_as_failed(
|
301
|
+
workflow,
|
302
|
+
completed_node_id,
|
303
|
+
failed_nodes,
|
304
|
+
pending_nodes,
|
305
|
+
ready_nodes,
|
306
|
+
)
|
307
|
+
else:
|
308
|
+
# No active tasks but we still have pending nodes - this indicates a deadlock
|
309
|
+
if pending_nodes:
|
310
|
+
remaining = list(pending_nodes)
|
311
|
+
raise WorkflowExecutionError(
|
312
|
+
f"Deadlock detected. Nodes waiting for dependencies: {remaining}"
|
313
|
+
)
|
314
|
+
# No tasks and no pending nodes means we're done
|
315
|
+
break
|
316
|
+
|
317
|
+
self.logger.info(
|
318
|
+
f"Parallel execution complete. Succeeded: {len(results) - len(failed_nodes)}, Failed: {len(failed_nodes)}"
|
319
|
+
)
|
320
|
+
return results
|
321
|
+
|
322
|
+
async def _execute_node(
|
323
|
+
self,
|
324
|
+
workflow: Workflow,
|
325
|
+
node_id: str,
|
326
|
+
node_outputs: Dict[str, Dict[str, Any]],
|
327
|
+
parameters: Dict[str, Any],
|
328
|
+
task_manager: Optional[TaskManager],
|
329
|
+
run_id: Optional[str],
|
330
|
+
) -> Tuple[Dict[str, Any], bool]:
|
331
|
+
"""Execute a single node asynchronously.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
workflow: The workflow being executed
|
335
|
+
node_id: ID of the node to execute
|
336
|
+
node_outputs: Dictionary of outputs from previously executed nodes
|
337
|
+
parameters: Parameter overrides for this node
|
338
|
+
task_manager: Task manager for tracking
|
339
|
+
run_id: Run ID for tracking
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
Tuple of (node_result, success)
|
343
|
+
|
344
|
+
Note:
|
345
|
+
This method never raises exceptions - it returns success=False instead
|
346
|
+
to allow the caller to handle failures appropriately.
|
347
|
+
"""
|
348
|
+
# Get node instance
|
349
|
+
node_instance = workflow._node_instances.get(node_id)
|
350
|
+
if not node_instance:
|
351
|
+
self.logger.error(f"Node instance '{node_id}' not found in workflow")
|
352
|
+
return {"error": "Node instance not found"}, False
|
353
|
+
|
354
|
+
# Start task tracking
|
355
|
+
task = None
|
356
|
+
try:
|
357
|
+
if task_manager and run_id:
|
358
|
+
task = task_manager.create_task(
|
359
|
+
run_id=run_id,
|
360
|
+
node_id=node_id,
|
361
|
+
node_type=node_instance.__class__.__name__,
|
362
|
+
started_at=datetime.now(timezone.utc),
|
363
|
+
)
|
364
|
+
except Exception as e:
|
365
|
+
self.logger.warning(f"Failed to create task for node '{node_id}': {e}")
|
366
|
+
|
367
|
+
try:
|
368
|
+
# Limit concurrent execution
|
369
|
+
async with self.semaphore:
|
370
|
+
# Update task status
|
371
|
+
if task:
|
372
|
+
task.update_status(TaskStatus.RUNNING)
|
373
|
+
|
374
|
+
# Prepare inputs
|
375
|
+
inputs = self._prepare_node_inputs(
|
376
|
+
workflow=workflow,
|
377
|
+
node_id=node_id,
|
378
|
+
node_instance=node_instance,
|
379
|
+
node_outputs=node_outputs,
|
380
|
+
parameters=parameters,
|
381
|
+
)
|
382
|
+
|
383
|
+
if self.debug:
|
384
|
+
self.logger.debug(f"Node {node_id} inputs: {inputs}")
|
385
|
+
|
386
|
+
# Execute node with metrics collection
|
387
|
+
collector = MetricsCollector()
|
388
|
+
|
389
|
+
if isinstance(node_instance, AsyncNode):
|
390
|
+
# Use async execution for AsyncNode
|
391
|
+
outputs, performance_metrics = await collector.collect_async(
|
392
|
+
node_instance.execute_async(**inputs), node_id=node_id
|
393
|
+
)
|
394
|
+
else:
|
395
|
+
# Use sync execution in an executor for regular Node
|
396
|
+
loop = asyncio.get_running_loop()
|
397
|
+
|
398
|
+
async def execute_with_metrics():
|
399
|
+
with collector.collect(node_id=node_id) as context:
|
400
|
+
result = await loop.run_in_executor(
|
401
|
+
None, lambda: node_instance.execute(**inputs)
|
402
|
+
)
|
403
|
+
return result, context.result()
|
404
|
+
|
405
|
+
outputs, performance_metrics = await execute_with_metrics()
|
406
|
+
|
407
|
+
# Update task status with enhanced metrics
|
408
|
+
if task:
|
409
|
+
task.update_status(
|
410
|
+
TaskStatus.COMPLETED,
|
411
|
+
result=outputs,
|
412
|
+
ended_at=datetime.now(timezone.utc),
|
413
|
+
metadata={"execution_time": performance_metrics.duration},
|
414
|
+
)
|
415
|
+
|
416
|
+
# Convert and save performance metrics
|
417
|
+
if task_manager:
|
418
|
+
task_metrics_data = performance_metrics.to_task_metrics()
|
419
|
+
task_metrics = TaskMetrics(**task_metrics_data)
|
420
|
+
task_manager.update_task_metrics(task.task_id, task_metrics)
|
421
|
+
|
422
|
+
self.logger.info(
|
423
|
+
f"Node {node_id} completed successfully in {performance_metrics.duration:.3f}s"
|
424
|
+
)
|
425
|
+
|
426
|
+
return outputs, True
|
427
|
+
|
428
|
+
except Exception as e:
|
429
|
+
self.logger.error(f"Node {node_id} failed: {e}", exc_info=self.debug)
|
430
|
+
|
431
|
+
# Update task status
|
432
|
+
if task:
|
433
|
+
task.update_status(
|
434
|
+
TaskStatus.FAILED, error=str(e), ended_at=datetime.now(timezone.utc)
|
435
|
+
)
|
436
|
+
|
437
|
+
# Return error result
|
438
|
+
error_result = {
|
439
|
+
"error": str(e),
|
440
|
+
"error_type": type(e).__name__,
|
441
|
+
"failed": True,
|
442
|
+
}
|
443
|
+
|
444
|
+
return error_result, False
|
445
|
+
|
446
|
+
def _prepare_node_inputs(
|
447
|
+
self,
|
448
|
+
workflow: Workflow,
|
449
|
+
node_id: str,
|
450
|
+
node_instance: Any,
|
451
|
+
node_outputs: Dict[str, Dict[str, Any]],
|
452
|
+
parameters: Dict[str, Any],
|
453
|
+
) -> Dict[str, Any]:
|
454
|
+
"""Prepare inputs for a node execution.
|
455
|
+
|
456
|
+
Args:
|
457
|
+
workflow: The workflow being executed
|
458
|
+
node_id: Current node ID
|
459
|
+
node_instance: Current node instance
|
460
|
+
node_outputs: Outputs from previously executed nodes
|
461
|
+
parameters: Parameter overrides
|
462
|
+
|
463
|
+
Returns:
|
464
|
+
Dictionary of inputs for the node
|
465
|
+
|
466
|
+
Raises:
|
467
|
+
WorkflowExecutionError: If input preparation fails
|
468
|
+
"""
|
469
|
+
inputs = {}
|
470
|
+
|
471
|
+
# Start with node configuration
|
472
|
+
inputs.update(node_instance.config)
|
473
|
+
|
474
|
+
# Add connected inputs from other nodes
|
475
|
+
for edge in workflow.graph.in_edges(node_id, data=True):
|
476
|
+
source_node_id = edge[0]
|
477
|
+
mapping = edge[2].get("mapping", {})
|
478
|
+
|
479
|
+
if source_node_id in node_outputs:
|
480
|
+
source_outputs = node_outputs[source_node_id]
|
481
|
+
|
482
|
+
# Check if the source node failed
|
483
|
+
if isinstance(source_outputs, dict) and source_outputs.get("failed"):
|
484
|
+
raise WorkflowExecutionError(
|
485
|
+
f"Cannot use outputs from failed node '{source_node_id}'"
|
486
|
+
)
|
487
|
+
|
488
|
+
for source_key, target_key in mapping.items():
|
489
|
+
if source_key in source_outputs:
|
490
|
+
inputs[target_key] = source_outputs[source_key]
|
491
|
+
else:
|
492
|
+
self.logger.warning(
|
493
|
+
f"Source output '{source_key}' not found in node '{source_node_id}'. "
|
494
|
+
f"Available outputs: {list(source_outputs.keys())}"
|
495
|
+
)
|
496
|
+
|
497
|
+
# Apply parameter overrides
|
498
|
+
inputs.update(parameters)
|
499
|
+
|
500
|
+
return inputs
|
501
|
+
|
502
|
+
def _should_stop_on_error(self, workflow: Workflow, node_id: str) -> bool:
|
503
|
+
"""Determine if execution should stop when a node fails.
|
504
|
+
|
505
|
+
Args:
|
506
|
+
workflow: The workflow being executed
|
507
|
+
node_id: Failed node ID
|
508
|
+
|
509
|
+
Returns:
|
510
|
+
Whether to stop execution
|
511
|
+
"""
|
512
|
+
# Check if any downstream nodes depend on this node
|
513
|
+
has_dependents = workflow.graph.out_degree(node_id) > 0
|
514
|
+
|
515
|
+
# For now, stop if the failed node has dependents
|
516
|
+
# Future: implement configurable error handling policies
|
517
|
+
return has_dependents
|
518
|
+
|
519
|
+
def _mark_dependent_nodes_as_failed(
|
520
|
+
self,
|
521
|
+
workflow: Workflow,
|
522
|
+
failed_node: str,
|
523
|
+
failed_nodes: Set[str],
|
524
|
+
pending_nodes: Set[str],
|
525
|
+
ready_nodes: Deque[str],
|
526
|
+
) -> None:
|
527
|
+
"""Mark all dependent nodes as failed.
|
528
|
+
|
529
|
+
Args:
|
530
|
+
workflow: The workflow being executed
|
531
|
+
failed_node: The node that failed
|
532
|
+
failed_nodes: Set to track failed nodes
|
533
|
+
pending_nodes: Set of nodes waiting for dependencies
|
534
|
+
ready_nodes: Queue of nodes ready to execute
|
535
|
+
"""
|
536
|
+
# Get all descendants of the failed node
|
537
|
+
descendants = set(nx.descendants(workflow.graph, failed_node))
|
538
|
+
|
539
|
+
# Mark all descendants as failed
|
540
|
+
for node in descendants:
|
541
|
+
failed_nodes.add(node)
|
542
|
+
|
543
|
+
# Remove from pending or ready as appropriate
|
544
|
+
if node in pending_nodes:
|
545
|
+
pending_nodes.remove(node)
|
546
|
+
|
547
|
+
# Need to handle as list comprehension since deque doesn't support
|
548
|
+
# efficient removal of arbitrary elements
|
549
|
+
if node in ready_nodes:
|
550
|
+
ready_nodes_list = list(ready_nodes)
|
551
|
+
ready_nodes_list.remove(node)
|
552
|
+
ready_nodes.clear()
|
553
|
+
ready_nodes.extend(ready_nodes_list)
|
554
|
+
|
555
|
+
self.logger.debug(
|
556
|
+
f"Marked {len(descendants)} dependent nodes as failed due to failure of node {failed_node}"
|
557
|
+
)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
"""Main runner for workflow execution."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from typing import Any, Dict, Optional, Tuple
|
5
|
+
|
6
|
+
from kailash.runtime.local import LocalRuntime
|
7
|
+
from kailash.tracking import TaskManager
|
8
|
+
from kailash.workflow import Workflow
|
9
|
+
|
10
|
+
|
11
|
+
class WorkflowRunner:
|
12
|
+
"""High-level interface for running workflows."""
|
13
|
+
|
14
|
+
def __init__(self, debug: bool = False, task_manager: Optional[TaskManager] = None):
|
15
|
+
"""Initialize the workflow runner.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
debug: Whether to enable debug mode
|
19
|
+
task_manager: Optional task manager for tracking
|
20
|
+
"""
|
21
|
+
self.debug = debug
|
22
|
+
self.task_manager = task_manager or TaskManager()
|
23
|
+
self.logger = logging.getLogger("kailash.runner")
|
24
|
+
|
25
|
+
# Configure logging
|
26
|
+
if debug:
|
27
|
+
logging.basicConfig(
|
28
|
+
level=logging.DEBUG,
|
29
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
30
|
+
)
|
31
|
+
else:
|
32
|
+
logging.basicConfig(
|
33
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
34
|
+
)
|
35
|
+
|
36
|
+
def run(
|
37
|
+
self,
|
38
|
+
workflow: Workflow,
|
39
|
+
parameters: Optional[Dict[str, Dict[str, Any]]] = None,
|
40
|
+
runtime_type: str = "local",
|
41
|
+
) -> Tuple[Dict[str, Any], str]:
|
42
|
+
"""Run a workflow.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
workflow: Workflow to run
|
46
|
+
parameters: Optional parameter overrides
|
47
|
+
runtime_type: Type of runtime to use (currently only "local")
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
Tuple of (results, run_id)
|
51
|
+
"""
|
52
|
+
self.logger.info(f"Starting workflow: {workflow.name}")
|
53
|
+
|
54
|
+
# Select runtime
|
55
|
+
if runtime_type == "local":
|
56
|
+
runtime = LocalRuntime(debug=self.debug)
|
57
|
+
else:
|
58
|
+
raise ValueError(f"Unknown runtime type: {runtime_type}")
|
59
|
+
|
60
|
+
# Execute workflow
|
61
|
+
try:
|
62
|
+
results, run_id = runtime.execute(
|
63
|
+
workflow=workflow, task_manager=self.task_manager, parameters=parameters
|
64
|
+
)
|
65
|
+
|
66
|
+
self.logger.info(f"Workflow completed successfully: {run_id}")
|
67
|
+
return results, run_id
|
68
|
+
|
69
|
+
except Exception as e:
|
70
|
+
self.logger.error(f"Workflow failed: {e}")
|
71
|
+
raise
|
72
|
+
|
73
|
+
def validate(self, workflow: Workflow) -> list:
|
74
|
+
"""Validate a workflow.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
workflow: Workflow to validate
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
List of validation warnings
|
81
|
+
"""
|
82
|
+
runtime = LocalRuntime(debug=self.debug)
|
83
|
+
return runtime.validate_workflow(workflow)
|
84
|
+
|
85
|
+
def get_run_status(self, run_id: str) -> Dict[str, Any]:
|
86
|
+
"""Get status of a workflow run.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
run_id: Run ID to check
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
Status information
|
93
|
+
"""
|
94
|
+
return self.task_manager.get_run_status(run_id)
|
95
|
+
|
96
|
+
def get_run_history(
|
97
|
+
self, workflow_name: Optional[str] = None, limit: int = 10
|
98
|
+
) -> list:
|
99
|
+
"""Get run history.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
workflow_name: Optional workflow name to filter by
|
103
|
+
limit: Maximum number of runs to return
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
List of run summaries
|
107
|
+
"""
|
108
|
+
return self.task_manager.get_run_history(
|
109
|
+
workflow_name=workflow_name, limit=limit
|
110
|
+
)
|