kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control.py +740 -0
- kailash/api/__main__.py +6 -0
- kailash/api/auth.py +668 -0
- kailash/api/custom_nodes.py +285 -0
- kailash/api/custom_nodes_secure.py +377 -0
- kailash/api/database.py +620 -0
- kailash/api/studio.py +915 -0
- kailash/api/studio_secure.py +893 -0
- kailash/mcp/__init__.py +53 -0
- kailash/mcp/__main__.py +13 -0
- kailash/mcp/ai_registry_server.py +712 -0
- kailash/mcp/client.py +447 -0
- kailash/mcp/client_new.py +334 -0
- kailash/mcp/server.py +293 -0
- kailash/mcp/server_new.py +336 -0
- kailash/mcp/servers/__init__.py +12 -0
- kailash/mcp/servers/ai_registry.py +289 -0
- kailash/nodes/__init__.py +4 -2
- kailash/nodes/ai/__init__.py +38 -0
- kailash/nodes/ai/a2a.py +1790 -0
- kailash/nodes/ai/agents.py +116 -2
- kailash/nodes/ai/ai_providers.py +206 -8
- kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
- kailash/nodes/ai/iterative_llm_agent.py +1280 -0
- kailash/nodes/ai/llm_agent.py +324 -1
- kailash/nodes/ai/self_organizing.py +1623 -0
- kailash/nodes/api/http.py +106 -25
- kailash/nodes/api/rest.py +116 -21
- kailash/nodes/base.py +15 -2
- kailash/nodes/base_async.py +45 -0
- kailash/nodes/base_cycle_aware.py +374 -0
- kailash/nodes/base_with_acl.py +338 -0
- kailash/nodes/code/python.py +135 -27
- kailash/nodes/data/readers.py +116 -53
- kailash/nodes/data/writers.py +16 -6
- kailash/nodes/logic/__init__.py +8 -0
- kailash/nodes/logic/async_operations.py +48 -9
- kailash/nodes/logic/convergence.py +642 -0
- kailash/nodes/logic/loop.py +153 -0
- kailash/nodes/logic/operations.py +212 -27
- kailash/nodes/logic/workflow.py +26 -18
- kailash/nodes/mixins/__init__.py +11 -0
- kailash/nodes/mixins/mcp.py +228 -0
- kailash/nodes/mixins.py +387 -0
- kailash/nodes/transform/__init__.py +8 -1
- kailash/nodes/transform/processors.py +119 -4
- kailash/runtime/__init__.py +2 -1
- kailash/runtime/access_controlled.py +458 -0
- kailash/runtime/local.py +106 -33
- kailash/runtime/parallel_cyclic.py +529 -0
- kailash/sdk_exceptions.py +90 -5
- kailash/security.py +845 -0
- kailash/tracking/manager.py +38 -15
- kailash/tracking/models.py +1 -1
- kailash/tracking/storage/filesystem.py +30 -2
- kailash/utils/__init__.py +8 -0
- kailash/workflow/__init__.py +18 -0
- kailash/workflow/convergence.py +270 -0
- kailash/workflow/cycle_analyzer.py +768 -0
- kailash/workflow/cycle_builder.py +573 -0
- kailash/workflow/cycle_config.py +709 -0
- kailash/workflow/cycle_debugger.py +760 -0
- kailash/workflow/cycle_exceptions.py +601 -0
- kailash/workflow/cycle_profiler.py +671 -0
- kailash/workflow/cycle_state.py +338 -0
- kailash/workflow/cyclic_runner.py +985 -0
- kailash/workflow/graph.py +500 -39
- kailash/workflow/migration.py +768 -0
- kailash/workflow/safety.py +365 -0
- kailash/workflow/templates.py +744 -0
- kailash/workflow/validation.py +693 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
- kailash-0.2.0.dist-info/RECORD +125 -0
- kailash/nodes/mcp/__init__.py +0 -11
- kailash/nodes/mcp/client.py +0 -554
- kailash/nodes/mcp/resource.py +0 -682
- kailash/nodes/mcp/server.py +0 -577
- kailash-0.1.4.dist-info/RECORD +0 -85
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
kailash/workflow/graph.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import uuid
|
6
|
+
import warnings
|
6
7
|
from datetime import datetime, timezone
|
7
8
|
from typing import Any, Dict, List, Optional, Tuple
|
8
9
|
|
@@ -52,6 +53,33 @@ class Connection(BaseModel):
|
|
52
53
|
target_input: str = Field(..., description="Input field on target")
|
53
54
|
|
54
55
|
|
56
|
+
class CyclicConnection(Connection):
|
57
|
+
"""Extended connection supporting cycle metadata."""
|
58
|
+
|
59
|
+
cycle: bool = Field(
|
60
|
+
default=False, description="Whether this connection creates a cycle"
|
61
|
+
)
|
62
|
+
max_iterations: Optional[int] = Field(
|
63
|
+
default=None, description="Maximum cycle iterations"
|
64
|
+
)
|
65
|
+
convergence_check: Optional[str] = Field(
|
66
|
+
default=None, description="Convergence condition expression"
|
67
|
+
)
|
68
|
+
cycle_id: Optional[str] = Field(
|
69
|
+
default=None, description="Logical cycle group identifier"
|
70
|
+
)
|
71
|
+
timeout: Optional[float] = Field(
|
72
|
+
default=None, description="Cycle timeout in seconds"
|
73
|
+
)
|
74
|
+
memory_limit: Optional[int] = Field(default=None, description="Memory limit in MB")
|
75
|
+
condition: Optional[str] = Field(
|
76
|
+
default=None, description="Conditional cycle routing expression"
|
77
|
+
)
|
78
|
+
parent_cycle: Optional[str] = Field(
|
79
|
+
default=None, description="Parent cycle for nested cycles"
|
80
|
+
)
|
81
|
+
|
82
|
+
|
55
83
|
class Workflow:
|
56
84
|
"""Represents a workflow DAG of nodes."""
|
57
85
|
|
@@ -198,6 +226,14 @@ class Workflow:
|
|
198
226
|
source_node: str,
|
199
227
|
target_node: str,
|
200
228
|
mapping: Optional[Dict[str, str]] = None,
|
229
|
+
cycle: bool = False,
|
230
|
+
max_iterations: Optional[int] = None,
|
231
|
+
convergence_check: Optional[str] = None,
|
232
|
+
cycle_id: Optional[str] = None,
|
233
|
+
timeout: Optional[float] = None,
|
234
|
+
memory_limit: Optional[int] = None,
|
235
|
+
condition: Optional[str] = None,
|
236
|
+
parent_cycle: Optional[str] = None,
|
201
237
|
) -> None:
|
202
238
|
"""Connect two nodes in the workflow.
|
203
239
|
|
@@ -205,10 +241,18 @@ class Workflow:
|
|
205
241
|
source_node: Source node ID
|
206
242
|
target_node: Target node ID
|
207
243
|
mapping: Dict mapping source outputs to target inputs
|
244
|
+
cycle: Whether this connection creates a cycle
|
245
|
+
max_iterations: Maximum cycle iterations (required if cycle=True)
|
246
|
+
convergence_check: Convergence condition expression
|
247
|
+
cycle_id: Logical cycle group identifier
|
248
|
+
timeout: Cycle timeout in seconds
|
249
|
+
memory_limit: Memory limit in MB
|
250
|
+
condition: Conditional cycle routing expression
|
251
|
+
parent_cycle: Parent cycle for nested cycles
|
208
252
|
|
209
253
|
Raises:
|
210
254
|
ConnectionError: If connection is invalid
|
211
|
-
WorkflowValidationError: If nodes don't exist
|
255
|
+
WorkflowValidationError: If nodes don't exist or cycle parameters invalid
|
212
256
|
"""
|
213
257
|
if source_node not in self.nodes:
|
214
258
|
available_nodes = ", ".join(self.nodes.keys())
|
@@ -223,54 +267,259 @@ class Workflow:
|
|
223
267
|
f"Available nodes: {available_nodes}"
|
224
268
|
)
|
225
269
|
|
226
|
-
# Self-connection check
|
227
|
-
if source_node == target_node:
|
228
|
-
raise ConnectionError(
|
270
|
+
# Self-connection check (allow for cycles)
|
271
|
+
if source_node == target_node and not cycle:
|
272
|
+
raise ConnectionError(
|
273
|
+
f"Cannot connect node '{source_node}' to itself unless it's a cycle"
|
274
|
+
)
|
275
|
+
|
276
|
+
# Validate cycle parameters and issue deprecation warning
|
277
|
+
if cycle:
|
278
|
+
# Issue deprecation warning for cycle usage via connect()
|
279
|
+
warnings.warn(
|
280
|
+
"Using workflow.connect() with cycle=True is deprecated and will be removed in v0.2.0. "
|
281
|
+
"Use the new CycleBuilder API instead:\n"
|
282
|
+
" workflow.create_cycle('cycle_name')\\\n"
|
283
|
+
" .connect(source_node, target_node)\\\n"
|
284
|
+
" .max_iterations(N)\\\n"
|
285
|
+
" .converge_when('condition')\\\n"
|
286
|
+
" .build()\n"
|
287
|
+
"See Phase 5 API documentation for details.",
|
288
|
+
DeprecationWarning,
|
289
|
+
stacklevel=2,
|
290
|
+
)
|
291
|
+
|
292
|
+
# Import enhanced exceptions for better error messaging
|
293
|
+
try:
|
294
|
+
from kailash.workflow.cycle_exceptions import CycleConfigurationError
|
295
|
+
|
296
|
+
if max_iterations is None and convergence_check is None:
|
297
|
+
raise CycleConfigurationError(
|
298
|
+
"Cycle connections must specify either max_iterations or convergence_check",
|
299
|
+
error_code="CYCLE_CONFIG_001",
|
300
|
+
suggestions=[
|
301
|
+
"Add max_iterations parameter (recommended: 10-100)",
|
302
|
+
"Add convergence_check expression (e.g., 'error < 0.01')",
|
303
|
+
"Consider using the new CycleBuilder API for better validation",
|
304
|
+
],
|
305
|
+
)
|
306
|
+
if max_iterations is not None and max_iterations <= 0:
|
307
|
+
raise CycleConfigurationError(
|
308
|
+
f"max_iterations must be positive, got {max_iterations}",
|
309
|
+
error_code="CYCLE_CONFIG_002",
|
310
|
+
invalid_params={"max_iterations": max_iterations},
|
311
|
+
suggestions=[
|
312
|
+
"Use 10-100 iterations for quick convergence",
|
313
|
+
"Use 100-1000 iterations for complex optimization",
|
314
|
+
],
|
315
|
+
)
|
316
|
+
if timeout is not None and timeout <= 0:
|
317
|
+
raise CycleConfigurationError(
|
318
|
+
f"timeout must be positive, got {timeout}",
|
319
|
+
error_code="CYCLE_CONFIG_003",
|
320
|
+
invalid_params={"timeout": timeout},
|
321
|
+
suggestions=[
|
322
|
+
"Use 30-300 seconds for most cycles",
|
323
|
+
"Use longer timeouts for complex processing",
|
324
|
+
],
|
325
|
+
)
|
326
|
+
if memory_limit is not None and memory_limit <= 0:
|
327
|
+
raise CycleConfigurationError(
|
328
|
+
f"memory_limit must be positive, got {memory_limit}",
|
329
|
+
error_code="CYCLE_CONFIG_004",
|
330
|
+
invalid_params={"memory_limit": memory_limit},
|
331
|
+
suggestions=[
|
332
|
+
"Use 100-1000 MB for most cycles",
|
333
|
+
"Increase limit for data-intensive processing",
|
334
|
+
],
|
335
|
+
)
|
336
|
+
except ImportError:
|
337
|
+
# Fallback to old exceptions if enhanced ones aren't available
|
338
|
+
if max_iterations is None and convergence_check is None:
|
339
|
+
raise WorkflowValidationError(
|
340
|
+
"Cycle connections must specify either max_iterations or convergence_check"
|
341
|
+
)
|
342
|
+
if max_iterations is not None and max_iterations <= 0:
|
343
|
+
raise WorkflowValidationError("max_iterations must be positive")
|
344
|
+
if timeout is not None and timeout <= 0:
|
345
|
+
raise WorkflowValidationError("timeout must be positive")
|
346
|
+
if memory_limit is not None and memory_limit <= 0:
|
347
|
+
raise WorkflowValidationError("memory_limit must be positive")
|
229
348
|
|
230
349
|
# Default mapping if not provided
|
231
350
|
if mapping is None:
|
232
351
|
mapping = {"output": "input"}
|
233
352
|
|
234
|
-
# Check for existing connections
|
353
|
+
# Check for existing connections (allow multiple cycles with different IDs)
|
235
354
|
existing_connections = [
|
236
355
|
c
|
237
356
|
for c in self.connections
|
238
357
|
if c.source_node == source_node and c.target_node == target_node
|
239
358
|
]
|
240
|
-
if existing_connections:
|
359
|
+
if existing_connections and not cycle:
|
241
360
|
raise ConnectionError(
|
242
361
|
f"Connection already exists between '{source_node}' and '{target_node}'. "
|
243
362
|
f"Existing mappings: {[c.model_dump() for c in existing_connections]}"
|
244
363
|
)
|
245
364
|
|
246
|
-
# Create connections
|
365
|
+
# Create connections (store in self.connections list)
|
247
366
|
for source_output, target_input in mapping.items():
|
248
367
|
try:
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
368
|
+
if cycle:
|
369
|
+
# Create cyclic connection with all metadata
|
370
|
+
connection = CyclicConnection(
|
371
|
+
source_node=source_node,
|
372
|
+
source_output=source_output,
|
373
|
+
target_node=target_node,
|
374
|
+
target_input=target_input,
|
375
|
+
cycle=cycle,
|
376
|
+
max_iterations=max_iterations,
|
377
|
+
convergence_check=convergence_check,
|
378
|
+
cycle_id=cycle_id,
|
379
|
+
timeout=timeout,
|
380
|
+
memory_limit=memory_limit,
|
381
|
+
condition=condition,
|
382
|
+
parent_cycle=parent_cycle,
|
383
|
+
)
|
384
|
+
else:
|
385
|
+
# Create regular connection
|
386
|
+
connection = Connection(
|
387
|
+
source_node=source_node,
|
388
|
+
source_output=source_output,
|
389
|
+
target_node=target_node,
|
390
|
+
target_input=target_input,
|
391
|
+
)
|
255
392
|
except ValidationError as e:
|
256
393
|
raise ConnectionError(f"Invalid connection data: {e}") from e
|
257
394
|
|
258
395
|
self.connections.append(connection)
|
259
396
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
397
|
+
# FIXED: Add edge to graph ONCE with the complete mapping
|
398
|
+
edge_data = {
|
399
|
+
"mapping": mapping, # Complete mapping dictionary
|
400
|
+
}
|
401
|
+
|
402
|
+
# For backward compatibility, store single mappings as strings
|
403
|
+
# and multi-mappings as lists
|
404
|
+
if len(mapping) == 1:
|
405
|
+
# Single mapping - store as strings for backward compatibility
|
406
|
+
edge_data["from_output"] = list(mapping.keys())[0]
|
407
|
+
edge_data["to_input"] = list(mapping.values())[0]
|
408
|
+
else:
|
409
|
+
# Multiple mappings - store as lists
|
410
|
+
edge_data["from_output"] = list(mapping.keys())
|
411
|
+
edge_data["to_input"] = list(mapping.values())
|
412
|
+
|
413
|
+
# Add cycle metadata to edge
|
414
|
+
if cycle:
|
415
|
+
edge_data.update(
|
416
|
+
{
|
417
|
+
"cycle": cycle,
|
418
|
+
"max_iterations": max_iterations,
|
419
|
+
"convergence_check": convergence_check,
|
420
|
+
"cycle_id": cycle_id,
|
421
|
+
"timeout": timeout,
|
422
|
+
"memory_limit": memory_limit,
|
423
|
+
"condition": condition,
|
424
|
+
"parent_cycle": parent_cycle,
|
425
|
+
}
|
269
426
|
)
|
270
427
|
|
271
|
-
|
272
|
-
|
273
|
-
|
428
|
+
# Add or update the edge (NetworkX will update if edge exists)
|
429
|
+
self.graph.add_edge(source_node, target_node, **edge_data)
|
430
|
+
|
431
|
+
# Enhanced logging for cycles
|
432
|
+
if cycle:
|
433
|
+
cycle_info = f" (CYCLE: id={cycle_id}, max_iter={max_iterations}, conv={convergence_check})"
|
434
|
+
logger.info(
|
435
|
+
f"Connected '{source_node}' to '{target_node}' with mapping: {mapping}{cycle_info}"
|
436
|
+
)
|
437
|
+
else:
|
438
|
+
logger.info(
|
439
|
+
f"Connected '{source_node}' to '{target_node}' with mapping: {mapping}"
|
440
|
+
)
|
441
|
+
|
442
|
+
def create_cycle(self, cycle_id: Optional[str] = None):
|
443
|
+
"""
|
444
|
+
Create a new CycleBuilder for intuitive cycle configuration.
|
445
|
+
|
446
|
+
This method provides the entry point to the enhanced CycleBuilder API,
|
447
|
+
which offers a fluent, chainable interface for creating cyclic workflow
|
448
|
+
connections with better developer experience than the raw connect() method.
|
449
|
+
|
450
|
+
Design Philosophy:
|
451
|
+
Replaces verbose parameter-heavy cycle creation with an intuitive
|
452
|
+
builder pattern that guides developers through cycle configuration
|
453
|
+
with IDE auto-completion and method chaining.
|
454
|
+
|
455
|
+
Upstream Dependencies:
|
456
|
+
- Requires source and target nodes to exist in workflow
|
457
|
+
- Uses existing connection validation and cycle infrastructure
|
458
|
+
|
459
|
+
Downstream Consumers:
|
460
|
+
- CycleBuilder.build() calls back to workflow.connect() internally
|
461
|
+
- CyclicWorkflowExecutor for execution of configured cycles
|
462
|
+
- Cycle debugging and visualization tools
|
463
|
+
|
464
|
+
Usage Patterns:
|
465
|
+
1. Simple cycles: create_cycle().connect().max_iterations().build()
|
466
|
+
2. Convergence-based: create_cycle().connect().converge_when().build()
|
467
|
+
3. Complex cycles: Full builder chain with timeouts and conditions
|
468
|
+
|
469
|
+
Implementation Details:
|
470
|
+
Creates a CycleBuilder instance that accumulates configuration
|
471
|
+
through method chaining, then applies it via workflow.connect()
|
472
|
+
when build() is called. Maintains full backward compatibility.
|
473
|
+
|
474
|
+
Error Handling:
|
475
|
+
- WorkflowValidationError: If cycle_id conflicts with existing cycles
|
476
|
+
- CycleConfigurationError: Raised by CycleBuilder for invalid config
|
477
|
+
|
478
|
+
Side Effects:
|
479
|
+
Creates CycleBuilder instance but does not modify workflow until
|
480
|
+
build() is called. No validation occurs until build() time.
|
481
|
+
|
482
|
+
Args:
|
483
|
+
cycle_id (Optional[str]): Optional identifier for the cycle group.
|
484
|
+
If None, cycles are grouped by connection pattern.
|
485
|
+
Used for nested cycles and debugging identification.
|
486
|
+
|
487
|
+
Returns:
|
488
|
+
CycleBuilder: Fluent builder instance for configuring the cycle
|
489
|
+
|
490
|
+
Raises:
|
491
|
+
ImportError: If CycleBuilder module cannot be imported
|
492
|
+
|
493
|
+
Example:
|
494
|
+
>>> # Basic cycle with iteration limit
|
495
|
+
>>> workflow.create_cycle("optimization") \\
|
496
|
+
... .connect("processor", "evaluator") \\
|
497
|
+
... .max_iterations(50) \\
|
498
|
+
... .build()
|
499
|
+
|
500
|
+
>>> # Convergence-based cycle with timeout
|
501
|
+
>>> workflow.create_cycle("quality_improvement") \\
|
502
|
+
... .connect("cleaner", "validator", {"result": "data"}) \\
|
503
|
+
... .converge_when("quality > 0.95") \\
|
504
|
+
... .timeout(300) \\
|
505
|
+
... .build()
|
506
|
+
|
507
|
+
>>> # Nested cycle with memory limit
|
508
|
+
>>> workflow.create_cycle("inner_optimization") \\
|
509
|
+
... .connect("fine_tuner", "evaluator") \\
|
510
|
+
... .max_iterations(10) \\
|
511
|
+
... .nested_in("outer_optimization") \\
|
512
|
+
... .memory_limit(1024) \\
|
513
|
+
... .build()
|
514
|
+
"""
|
515
|
+
try:
|
516
|
+
from kailash.workflow.cycle_builder import CycleBuilder
|
517
|
+
except ImportError as e:
|
518
|
+
raise ImportError(
|
519
|
+
"CycleBuilder not available. Ensure kailash.workflow.cycle_builder is installed."
|
520
|
+
) from e
|
521
|
+
|
522
|
+
return CycleBuilder(workflow=self, cycle_id=cycle_id)
|
274
523
|
|
275
524
|
def _add_edge_internal(
|
276
525
|
self, from_node: str, from_output: str, to_node: str, to_input: str
|
@@ -308,23 +557,155 @@ class Workflow:
|
|
308
557
|
# Fallback to _node_instances
|
309
558
|
return self._node_instances.get(node_id)
|
310
559
|
|
560
|
+
def separate_dag_and_cycle_edges(self) -> Tuple[List[Tuple], List[Tuple]]:
|
561
|
+
"""Separate DAG edges from cycle edges.
|
562
|
+
|
563
|
+
Returns:
|
564
|
+
Tuple of (dag_edges, cycle_edges) where each edge is (source, target, data)
|
565
|
+
"""
|
566
|
+
dag_edges = []
|
567
|
+
cycle_edges = []
|
568
|
+
|
569
|
+
for source, target, data in self.graph.edges(data=True):
|
570
|
+
if data.get("cycle", False):
|
571
|
+
cycle_edges.append((source, target, data))
|
572
|
+
else:
|
573
|
+
dag_edges.append((source, target, data))
|
574
|
+
|
575
|
+
return dag_edges, cycle_edges
|
576
|
+
|
577
|
+
def get_cycle_groups(self) -> Dict[str, List[Tuple]]:
|
578
|
+
"""Get cycle edges grouped by cycle_id with enhanced multi-node cycle detection.
|
579
|
+
|
580
|
+
For multi-node cycles like A → B → C → A where only C → A is marked as cycle,
|
581
|
+
this method identifies all nodes (A, B, C) that are part of the same strongly
|
582
|
+
connected component and groups them together.
|
583
|
+
|
584
|
+
Returns:
|
585
|
+
Dict mapping cycle_id to list of cycle edges
|
586
|
+
"""
|
587
|
+
cycle_groups = {}
|
588
|
+
_, cycle_edges = self.separate_dag_and_cycle_edges()
|
589
|
+
|
590
|
+
# First pass: group by cycle_id as before
|
591
|
+
for source, target, data in cycle_edges:
|
592
|
+
cycle_id = data.get("cycle_id", "default")
|
593
|
+
if cycle_id not in cycle_groups:
|
594
|
+
cycle_groups[cycle_id] = []
|
595
|
+
cycle_groups[cycle_id].append((source, target, data))
|
596
|
+
|
597
|
+
# Second pass: enhance cycle groups with strongly connected components
|
598
|
+
enhanced_groups = {}
|
599
|
+
for cycle_id, edges in cycle_groups.items():
|
600
|
+
# Find all nodes that are part of strongly connected components
|
601
|
+
# containing any cycle edge nodes
|
602
|
+
cycle_nodes = set()
|
603
|
+
for source, target, data in edges:
|
604
|
+
cycle_nodes.add(source)
|
605
|
+
cycle_nodes.add(target)
|
606
|
+
|
607
|
+
# Find strongly connected components in the full graph
|
608
|
+
try:
|
609
|
+
# Get all strongly connected components
|
610
|
+
sccs = list(nx.strongly_connected_components(self.graph))
|
611
|
+
|
612
|
+
# Find which SCC contains our cycle nodes
|
613
|
+
target_scc = None
|
614
|
+
for scc in sccs:
|
615
|
+
if any(node in scc for node in cycle_nodes):
|
616
|
+
target_scc = scc
|
617
|
+
break
|
618
|
+
|
619
|
+
if target_scc and len(target_scc) > 1:
|
620
|
+
# Multi-node cycle detected - include all SCC nodes
|
621
|
+
logger.debug(
|
622
|
+
f"Enhanced cycle detection for {cycle_id}: {cycle_nodes} → {target_scc}"
|
623
|
+
)
|
624
|
+
|
625
|
+
# Add edges for all nodes in the SCC that are connected
|
626
|
+
enhanced_edges = list(edges) # Start with original cycle edges
|
627
|
+
for node in target_scc:
|
628
|
+
for successor in self.graph.successors(node):
|
629
|
+
if successor in target_scc:
|
630
|
+
# This is an edge within the SCC
|
631
|
+
edge_data = self.graph.get_edge_data(node, successor)
|
632
|
+
if not edge_data.get("cycle", False):
|
633
|
+
# Add as a synthetic cycle edge for execution planning
|
634
|
+
synthetic_edge_data = edge_data.copy()
|
635
|
+
synthetic_edge_data.update(
|
636
|
+
{
|
637
|
+
"cycle": True,
|
638
|
+
"cycle_id": cycle_id,
|
639
|
+
"synthetic": True, # Mark as synthetic for reference
|
640
|
+
"max_iterations": edges[0][2].get(
|
641
|
+
"max_iterations"
|
642
|
+
),
|
643
|
+
"convergence_check": edges[0][2].get(
|
644
|
+
"convergence_check"
|
645
|
+
),
|
646
|
+
"timeout": edges[0][2].get("timeout"),
|
647
|
+
"memory_limit": edges[0][2].get(
|
648
|
+
"memory_limit"
|
649
|
+
),
|
650
|
+
}
|
651
|
+
)
|
652
|
+
enhanced_edges.append(
|
653
|
+
(node, successor, synthetic_edge_data)
|
654
|
+
)
|
655
|
+
|
656
|
+
enhanced_groups[cycle_id] = enhanced_edges
|
657
|
+
else:
|
658
|
+
# Single-node cycle or no SCC found
|
659
|
+
enhanced_groups[cycle_id] = edges
|
660
|
+
|
661
|
+
except Exception as e:
|
662
|
+
logger.warning(f"Could not enhance cycle detection for {cycle_id}: {e}")
|
663
|
+
# Fall back to original behavior
|
664
|
+
enhanced_groups[cycle_id] = edges
|
665
|
+
|
666
|
+
return enhanced_groups
|
667
|
+
|
668
|
+
def has_cycles(self) -> bool:
|
669
|
+
"""Check if the workflow contains any cycle connections.
|
670
|
+
|
671
|
+
Returns:
|
672
|
+
True if workflow has cycle connections, False otherwise
|
673
|
+
"""
|
674
|
+
_, cycle_edges = self.separate_dag_and_cycle_edges()
|
675
|
+
return len(cycle_edges) > 0
|
676
|
+
|
311
677
|
def get_execution_order(self) -> List[str]:
|
312
|
-
"""Get topological execution order for nodes.
|
678
|
+
"""Get topological execution order for nodes, handling cycles gracefully.
|
313
679
|
|
314
680
|
Returns:
|
315
681
|
List of node IDs in execution order
|
316
682
|
|
317
683
|
Raises:
|
318
|
-
WorkflowValidationError: If workflow contains cycles
|
684
|
+
WorkflowValidationError: If workflow contains unmarked cycles
|
319
685
|
"""
|
686
|
+
# Create a copy of the graph without cycle edges for topological sort
|
687
|
+
dag_edges, cycle_edges = self.separate_dag_and_cycle_edges()
|
688
|
+
|
689
|
+
# Create DAG-only graph
|
690
|
+
dag_graph = nx.DiGraph()
|
691
|
+
dag_graph.add_nodes_from(self.graph.nodes(data=True))
|
692
|
+
for source, target, data in dag_edges:
|
693
|
+
dag_graph.add_edge(source, target, **data)
|
694
|
+
|
320
695
|
try:
|
321
|
-
|
696
|
+
# Get topological order for DAG portion
|
697
|
+
return list(nx.topological_sort(dag_graph))
|
322
698
|
except nx.NetworkXUnfeasible:
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
699
|
+
# Check if there are unmarked cycles
|
700
|
+
cycles = list(nx.simple_cycles(dag_graph))
|
701
|
+
if cycles:
|
702
|
+
raise WorkflowValidationError(
|
703
|
+
f"Workflow contains unmarked cycles: {cycles}. "
|
704
|
+
"Mark cycle connections with cycle=True or remove circular dependencies."
|
705
|
+
)
|
706
|
+
else:
|
707
|
+
# This shouldn't happen, but handle gracefully
|
708
|
+
raise WorkflowValidationError("Unable to determine execution order")
|
328
709
|
|
329
710
|
def validate(self) -> None:
|
330
711
|
"""Validate the workflow structure.
|
@@ -332,12 +713,15 @@ class Workflow:
|
|
332
713
|
Raises:
|
333
714
|
WorkflowValidationError: If workflow is invalid
|
334
715
|
"""
|
335
|
-
# Check for cycles
|
716
|
+
# Check for unmarked cycles and validate execution order
|
336
717
|
try:
|
337
718
|
self.get_execution_order()
|
338
719
|
except WorkflowValidationError:
|
339
720
|
raise
|
340
721
|
|
722
|
+
# Validate cycle configurations
|
723
|
+
self._validate_cycles()
|
724
|
+
|
341
725
|
# Check all nodes have required inputs
|
342
726
|
for node_id, node_instance in self._node_instances.items():
|
343
727
|
try:
|
@@ -354,8 +738,12 @@ class Workflow:
|
|
354
738
|
for _, _, data in incoming_edges:
|
355
739
|
to_input = data.get("to_input")
|
356
740
|
if to_input:
|
357
|
-
|
358
|
-
|
741
|
+
# Handle both string and list formats
|
742
|
+
if isinstance(to_input, list):
|
743
|
+
connected_inputs.update(to_input)
|
744
|
+
else:
|
745
|
+
connected_inputs.add(to_input)
|
746
|
+
# For backward compatibility and complete mapping
|
359
747
|
mapping = data.get("mapping", {})
|
360
748
|
connected_inputs.update(mapping.values())
|
361
749
|
|
@@ -382,6 +770,66 @@ class Workflow:
|
|
382
770
|
|
383
771
|
logger.info(f"Workflow '{self.name}' validated successfully")
|
384
772
|
|
773
|
+
def _validate_cycles(self) -> None:
|
774
|
+
"""Validate cycle configurations and detect potential issues.
|
775
|
+
|
776
|
+
Raises:
|
777
|
+
WorkflowValidationError: If cycle configuration is invalid
|
778
|
+
"""
|
779
|
+
cycle_groups = self.get_cycle_groups()
|
780
|
+
|
781
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
782
|
+
# Check for conflicting cycle parameters within the same group
|
783
|
+
max_iterations_set = set()
|
784
|
+
convergence_checks = set()
|
785
|
+
timeouts = set()
|
786
|
+
|
787
|
+
for source, target, data in cycle_edges:
|
788
|
+
if data.get("max_iterations") is not None:
|
789
|
+
max_iterations_set.add(data["max_iterations"])
|
790
|
+
if data.get("convergence_check") is not None:
|
791
|
+
convergence_checks.add(data["convergence_check"])
|
792
|
+
if data.get("timeout") is not None:
|
793
|
+
timeouts.add(data["timeout"])
|
794
|
+
|
795
|
+
# Warn about conflicting parameters (but don't fail)
|
796
|
+
if len(max_iterations_set) > 1:
|
797
|
+
logger.warning(
|
798
|
+
f"Cycle group '{cycle_id}' has conflicting max_iterations: {max_iterations_set}"
|
799
|
+
)
|
800
|
+
if len(convergence_checks) > 1:
|
801
|
+
logger.warning(
|
802
|
+
f"Cycle group '{cycle_id}' has conflicting convergence_check: {convergence_checks}"
|
803
|
+
)
|
804
|
+
if len(timeouts) > 1:
|
805
|
+
logger.warning(
|
806
|
+
f"Cycle group '{cycle_id}' has conflicting timeouts: {timeouts}"
|
807
|
+
)
|
808
|
+
|
809
|
+
# Check for nested cycle validity
|
810
|
+
parent_cycles = set()
|
811
|
+
child_cycles = set()
|
812
|
+
|
813
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
814
|
+
for source, target, data in cycle_edges:
|
815
|
+
if data.get("parent_cycle"):
|
816
|
+
parent_cycles.add(data["parent_cycle"])
|
817
|
+
child_cycles.add(cycle_id)
|
818
|
+
|
819
|
+
# Ensure parent cycles exist
|
820
|
+
for parent_cycle in parent_cycles:
|
821
|
+
if parent_cycle not in cycle_groups:
|
822
|
+
raise WorkflowValidationError(
|
823
|
+
f"Parent cycle '{parent_cycle}' not found in workflow"
|
824
|
+
)
|
825
|
+
|
826
|
+
# Check for circular parent relationships
|
827
|
+
for child_cycle in child_cycles:
|
828
|
+
if child_cycle in parent_cycles:
|
829
|
+
raise WorkflowValidationError(
|
830
|
+
f"Cycle '{child_cycle}' cannot be both parent and child"
|
831
|
+
)
|
832
|
+
|
385
833
|
def run(
|
386
834
|
self, task_manager: Optional[TaskManager] = None, **overrides
|
387
835
|
) -> Tuple[Dict[str, Any], Optional[str]]:
|
@@ -482,9 +930,22 @@ class Workflow:
|
|
482
930
|
|
483
931
|
source_results = results.get(source_node_id, {})
|
484
932
|
|
485
|
-
#
|
486
|
-
if from_output and to_input
|
487
|
-
|
933
|
+
# Handle backward compatibility - from_output/to_input can be string or list
|
934
|
+
if from_output and to_input:
|
935
|
+
# Convert to lists if they're strings (backward compatibility)
|
936
|
+
from_outputs = (
|
937
|
+
[from_output]
|
938
|
+
if isinstance(from_output, str)
|
939
|
+
else from_output
|
940
|
+
)
|
941
|
+
to_inputs = (
|
942
|
+
[to_input] if isinstance(to_input, str) else to_input
|
943
|
+
)
|
944
|
+
|
945
|
+
# Process each mapping pair
|
946
|
+
for i, (src, dst) in enumerate(zip(from_outputs, to_inputs)):
|
947
|
+
if src in source_results:
|
948
|
+
node_inputs[dst] = source_results[src]
|
488
949
|
|
489
950
|
# Also add connections using mapping format for backward compatibility
|
490
951
|
for source_key, target_key in mapping.items():
|