kailash 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kailash/runtime/local.py CHANGED
@@ -35,14 +35,20 @@ Examples:
35
35
  """
36
36
 
37
37
  import asyncio
38
+ import hashlib
38
39
  import logging
40
+ import time
41
+ from collections import defaultdict
39
42
  from datetime import UTC, datetime
40
- from typing import Any, Dict, Optional
43
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
41
44
 
42
45
  import networkx as nx
46
+ import psutil
43
47
 
44
48
  from kailash.nodes import Node
49
+ from kailash.runtime.compatibility_reporter import CompatibilityReporter
45
50
  from kailash.runtime.parameter_injector import WorkflowParameterInjector
51
+ from kailash.runtime.performance_monitor import ExecutionMetrics, PerformanceMonitor
46
52
  from kailash.runtime.secret_provider import EnvironmentSecretProvider, SecretProvider
47
53
  from kailash.runtime.validation.connection_context import ConnectionContext
48
54
  from kailash.runtime.validation.enhanced_error_formatter import EnhancedErrorFormatter
@@ -67,6 +73,101 @@ from kailash.workflow.cyclic_runner import CyclicWorkflowExecutor
67
73
  logger = logging.getLogger(__name__)
68
74
 
69
75
 
76
+ class ContentAwareExecutionError(Exception):
77
+ """Exception raised when content-aware success detection identifies a failure."""
78
+
79
+ pass
80
+
81
+
82
+ def detect_success(result):
83
+ """Detect success or failure from a node execution result."""
84
+ # Handle None result (backward compatibility)
85
+ if result is None:
86
+ return True, None
87
+
88
+ # Handle non-dict results (backward compatibility)
89
+ if not isinstance(result, dict):
90
+ return True, None
91
+
92
+ # Handle empty dict (backward compatibility)
93
+ if not result:
94
+ return True, None
95
+
96
+ # Check for success field
97
+ if "success" not in result:
98
+ # No success field, default to success (backward compatibility)
99
+ return True, None
100
+
101
+ success_value = result["success"]
102
+
103
+ # Evaluate success value as boolean
104
+ is_success = bool(success_value)
105
+
106
+ if is_success:
107
+ # Operation succeeded
108
+ return True, None
109
+ else:
110
+ # Operation failed, extract error information
111
+ error_info = result.get("error", "Operation failed (no error details provided)")
112
+ return False, error_info
113
+
114
+
115
+ def should_stop_on_content_failure(result, content_aware_mode=True, stop_on_error=True):
116
+ """Check if execution should stop based on content indicating failure."""
117
+ if not content_aware_mode or not stop_on_error:
118
+ return False, None
119
+
120
+ # Use detect_success for the actual detection logic
121
+ is_success, error_info = detect_success(result)
122
+
123
+ if is_success:
124
+ # Operation succeeded, continue execution
125
+ return False, None
126
+ else:
127
+ # Operation failed, stop execution
128
+ return True, error_info
129
+
130
+
131
+ def create_content_aware_error(node_id, result, error_message=None):
132
+ """Create a ContentAwareExecutionError from node result."""
133
+ if error_message is None:
134
+ error_message = result.get("error", "Operation failed")
135
+
136
+ error = ContentAwareExecutionError(
137
+ f"Node '{node_id}' reported failure: {error_message}"
138
+ )
139
+ error.node_id = node_id
140
+ error.failure_data = result
141
+ return error
142
+
143
+
144
+ # Conditional execution imports (lazy-loaded to avoid circular imports)
145
+ _ConditionalBranchAnalyzer = None
146
+ _DynamicExecutionPlanner = None
147
+
148
+
149
+ def _get_conditional_analyzer():
150
+ """Lazy import ConditionalBranchAnalyzer to avoid circular imports."""
151
+ global _ConditionalBranchAnalyzer
152
+ if _ConditionalBranchAnalyzer is None:
153
+ from kailash.analysis.conditional_branch_analyzer import (
154
+ ConditionalBranchAnalyzer,
155
+ )
156
+
157
+ _ConditionalBranchAnalyzer = ConditionalBranchAnalyzer
158
+ return _ConditionalBranchAnalyzer
159
+
160
+
161
+ def _get_execution_planner():
162
+ """Lazy import DynamicExecutionPlanner to avoid circular imports."""
163
+ global _DynamicExecutionPlanner
164
+ if _DynamicExecutionPlanner is None:
165
+ from kailash.planning.dynamic_execution_planner import DynamicExecutionPlanner
166
+
167
+ _DynamicExecutionPlanner = DynamicExecutionPlanner
168
+ return _DynamicExecutionPlanner
169
+
170
+
70
171
  class LocalRuntime:
71
172
  """Unified runtime with enterprise capabilities.
72
173
 
@@ -96,6 +197,8 @@ class LocalRuntime:
96
197
  resource_limits: Optional[dict[str, Any]] = None,
97
198
  secret_provider: Optional[Any] = None,
98
199
  connection_validation: str = "warn",
200
+ conditional_execution: str = "route_data",
201
+ content_aware_success_detection: bool = True,
99
202
  ):
100
203
  """Initialize the unified runtime.
101
204
 
@@ -114,13 +217,27 @@ class LocalRuntime:
114
217
  - "off": No validation (backward compatibility)
115
218
  - "warn": Log warnings on validation errors (default)
116
219
  - "strict": Raise errors on validation failures
220
+ conditional_execution: Execution strategy for conditional routing:
221
+ - "route_data": Current behavior - all nodes execute, data routing only (default)
222
+ - "skip_branches": New behavior - skip unreachable branches entirely
223
+ content_aware_success_detection: Whether to enable content-aware success detection:
224
+ - True: Check return value content for success/failure patterns (default)
225
+ - False: Only use exception-based failure detection (legacy mode)
117
226
  """
118
227
  # Validate connection_validation parameter
119
- valid_modes = {"off", "warn", "strict"}
120
- if connection_validation not in valid_modes:
228
+ valid_conn_modes = {"off", "warn", "strict"}
229
+ if connection_validation not in valid_conn_modes:
121
230
  raise ValueError(
122
231
  f"Invalid connection_validation mode: {connection_validation}. "
123
- f"Must be one of: {valid_modes}"
232
+ f"Must be one of: {valid_conn_modes}"
233
+ )
234
+
235
+ # Validate conditional_execution parameter
236
+ valid_exec_modes = {"route_data", "skip_branches"}
237
+ if conditional_execution not in valid_exec_modes:
238
+ raise ValueError(
239
+ f"Invalid conditional_execution mode: {conditional_execution}. "
240
+ f"Must be one of: {valid_exec_modes}"
124
241
  )
125
242
 
126
243
  self.debug = debug
@@ -134,6 +251,8 @@ class LocalRuntime:
134
251
  self.enable_audit = enable_audit
135
252
  self.resource_limits = resource_limits or {}
136
253
  self.connection_validation = connection_validation
254
+ self.conditional_execution = conditional_execution
255
+ self.content_aware_success_detection = content_aware_success_detection
137
256
  self.logger = logger
138
257
 
139
258
  # Enterprise feature managers (lazy initialization)
@@ -143,6 +262,30 @@ class LocalRuntime:
143
262
  if enable_cycles:
144
263
  self.cyclic_executor = CyclicWorkflowExecutor()
145
264
 
265
+ # Initialize conditional execution components (lazy initialization)
266
+ self._conditional_branch_analyzer = None
267
+ self._dynamic_execution_planner = None
268
+
269
+ # Phase 3: Basic Integration features
270
+ self._performance_monitor = None
271
+ self._compatibility_reporter = None
272
+ self._enable_performance_monitoring = False
273
+ self._performance_switch_enabled = False
274
+ self._enable_compatibility_reporting = False
275
+
276
+ # Phase 5: Production readiness features
277
+ self._execution_plan_cache = {}
278
+ self._performance_metrics = {}
279
+ self._fallback_metrics = {}
280
+ self._analytics_data = {
281
+ "conditional_executions": [],
282
+ "performance_history": [],
283
+ "cache_hits": 0,
284
+ "cache_misses": 0,
285
+ "execution_patterns": {},
286
+ "optimization_stats": {},
287
+ }
288
+
146
289
  # Configure logging
147
290
  if debug:
148
291
  self.logger.setLevel(logging.DEBUG)
@@ -405,10 +548,112 @@ class LocalRuntime:
405
548
  raise RuntimeExecutionError(
406
549
  f"Cyclic workflow execution failed: {e}"
407
550
  ) from e
551
+ elif (
552
+ self.conditional_execution == "skip_branches"
553
+ and self._has_conditional_patterns(workflow)
554
+ ):
555
+ # Check for automatic mode switching based on performance
556
+ current_mode = self.conditional_execution
557
+ if (
558
+ self._enable_performance_monitoring
559
+ and self._performance_switch_enabled
560
+ ):
561
+ should_switch, recommended_mode, reason = (
562
+ self._check_performance_switch(current_mode)
563
+ )
564
+ if should_switch:
565
+ self.logger.info(f"Switching execution mode: {reason}")
566
+ self.conditional_execution = recommended_mode
567
+ # If switching to route_data, use standard execution
568
+ if recommended_mode == "route_data":
569
+ results = await self._execute_workflow_async(
570
+ workflow=workflow,
571
+ task_manager=task_manager,
572
+ run_id=run_id,
573
+ parameters=processed_parameters or {},
574
+ workflow_context=workflow_context,
575
+ )
576
+ else:
577
+ # Continue with conditional execution
578
+ try:
579
+ results = await self._execute_conditional_approach(
580
+ workflow=workflow,
581
+ parameters=processed_parameters or {},
582
+ task_manager=task_manager,
583
+ run_id=run_id,
584
+ workflow_context=workflow_context,
585
+ )
586
+ except Exception as e:
587
+ self.logger.warning(
588
+ f"Conditional execution failed, falling back to standard execution: {e}"
589
+ )
590
+ # Fallback to standard execution
591
+ results = await self._execute_workflow_async(
592
+ workflow=workflow,
593
+ task_manager=task_manager,
594
+ run_id=run_id,
595
+ parameters=processed_parameters or {},
596
+ workflow_context=workflow_context,
597
+ )
598
+ else:
599
+ # No switch recommended, continue with current mode
600
+ self.logger.info(
601
+ "Conditional workflow detected, using conditional execution optimization"
602
+ )
603
+ try:
604
+ results = await self._execute_conditional_approach(
605
+ workflow=workflow,
606
+ parameters=processed_parameters or {},
607
+ task_manager=task_manager,
608
+ run_id=run_id,
609
+ workflow_context=workflow_context,
610
+ )
611
+ except Exception as e:
612
+ self.logger.warning(
613
+ f"Conditional execution failed, falling back to standard execution: {e}"
614
+ )
615
+ # Fallback to standard execution
616
+ results = await self._execute_workflow_async(
617
+ workflow=workflow,
618
+ task_manager=task_manager,
619
+ run_id=run_id,
620
+ parameters=processed_parameters or {},
621
+ workflow_context=workflow_context,
622
+ )
623
+ else:
624
+ # Performance monitoring disabled
625
+ self.logger.info(
626
+ "Conditional workflow detected, using conditional execution optimization"
627
+ )
628
+ try:
629
+ results = await self._execute_conditional_approach(
630
+ workflow=workflow,
631
+ parameters=processed_parameters or {},
632
+ task_manager=task_manager,
633
+ run_id=run_id,
634
+ workflow_context=workflow_context,
635
+ )
636
+ except Exception as e:
637
+ self.logger.warning(
638
+ f"Conditional execution failed, falling back to standard execution: {e}"
639
+ )
640
+ # Fallback to standard execution
641
+ results = await self._execute_workflow_async(
642
+ workflow=workflow,
643
+ task_manager=task_manager,
644
+ run_id=run_id,
645
+ parameters=processed_parameters or {},
646
+ workflow_context=workflow_context,
647
+ )
408
648
  else:
409
649
  # Execute standard DAG workflow with enterprise features
650
+ execution_mode = (
651
+ "route_data"
652
+ if self.conditional_execution == "route_data"
653
+ else "standard"
654
+ )
410
655
  self.logger.info(
411
- "Standard DAG workflow detected, using unified enterprise execution"
656
+ f"Standard DAG workflow detected, using unified enterprise execution ({execution_mode} mode)"
412
657
  )
413
658
  results = await self._execute_workflow_async(
414
659
  workflow=workflow,
@@ -545,6 +790,9 @@ class LocalRuntime:
545
790
  node_outputs = {}
546
791
  failed_nodes = []
547
792
 
793
+ # Make results available to _should_skip_conditional_node for transitive dependency checking
794
+ self._current_results = results
795
+
548
796
  # Use the workflow context passed from _execute_async
549
797
  if workflow_context is None:
550
798
  workflow_context = {}
@@ -638,6 +886,10 @@ class LocalRuntime:
638
886
 
639
887
  # CONDITIONAL EXECUTION: Skip nodes that only receive None inputs from conditional routing
640
888
  if self._should_skip_conditional_node(workflow, node_id, inputs):
889
+ if self.debug:
890
+ self.logger.debug(
891
+ f"DEBUG: Skipping {node_id} - inputs: {inputs}"
892
+ )
641
893
  self.logger.info(
642
894
  f"Skipping node {node_id} - all conditional inputs are None"
643
895
  )
@@ -691,6 +943,43 @@ class LocalRuntime:
691
943
  if self.debug:
692
944
  self.logger.debug(f"Node {node_id} outputs: {outputs}")
693
945
 
946
+ # Content-aware success detection (CRITICAL FIX)
947
+ if self.content_aware_success_detection:
948
+ should_stop, error_message = should_stop_on_content_failure(
949
+ result=outputs,
950
+ content_aware_mode=True,
951
+ stop_on_error=True, # Always stop on content failures when content-aware mode is enabled
952
+ )
953
+
954
+ if should_stop:
955
+ # Create detailed error for content-aware failure
956
+ error = create_content_aware_error(
957
+ node_id=node_id,
958
+ result=(
959
+ outputs
960
+ if isinstance(outputs, dict)
961
+ else {"error": error_message}
962
+ ),
963
+ error_message=error_message,
964
+ )
965
+
966
+ # Log the content-aware failure
967
+ self.logger.error(
968
+ f"Content-aware failure detected in node {node_id}: {error_message}"
969
+ )
970
+
971
+ # Update task status to failed if task manager exists
972
+ if task and task_manager:
973
+ task_manager.update_task_status(
974
+ task.task_id,
975
+ TaskStatus.FAILED,
976
+ error=str(error),
977
+ ended_at=datetime.now(UTC),
978
+ )
979
+
980
+ # Raise the content-aware execution error
981
+ raise error
982
+
694
983
  # Update task status with enhanced metrics
695
984
  if task and task_manager:
696
985
  # Convert performance metrics to TaskMetrics format
@@ -744,7 +1033,12 @@ class LocalRuntime:
744
1033
  f"Error during node {node_id} cleanup after failure: {cleanup_error}"
745
1034
  )
746
1035
 
747
- # Determine if we should continue
1036
+ # Content-aware execution errors should always stop execution
1037
+ if isinstance(e, ContentAwareExecutionError):
1038
+ error_msg = f"Content-aware failure in node '{node_id}': {e}"
1039
+ raise WorkflowExecutionError(error_msg) from e
1040
+
1041
+ # Determine if we should continue for other exceptions
748
1042
  if self._should_stop_on_error(workflow, node_id):
749
1043
  error_msg = f"Node '{node_id}' failed: {e}"
750
1044
  if len(failed_nodes) > 1:
@@ -886,19 +1180,44 @@ class LocalRuntime:
886
1180
  break
887
1181
 
888
1182
  if found:
889
- inputs[target_key] = value
890
- if self.debug:
891
- self.logger.debug(
892
- f" MAPPED: {source_key} -> {target_key} (type: {type(value)})"
893
- )
1183
+ # CONDITIONAL EXECUTION FIX: Don't overwrite existing non-None values with None
1184
+ # This handles cases where multiple edges map to the same input parameter
1185
+ if (
1186
+ target_key in inputs
1187
+ and inputs[target_key] is not None
1188
+ and value is None
1189
+ ):
1190
+ if self.debug:
1191
+ self.logger.debug(
1192
+ f" SKIP: Not overwriting existing non-None value for {target_key} with None from {source_node_id}"
1193
+ )
1194
+ else:
1195
+ inputs[target_key] = value
1196
+ if self.debug:
1197
+ self.logger.debug(
1198
+ f" MAPPED: {source_key} -> {target_key} (type: {type(value)})"
1199
+ )
894
1200
  else:
895
1201
  # Simple key mapping
896
1202
  if source_key in source_outputs:
897
- inputs[target_key] = source_outputs[source_key]
898
- if self.debug:
899
- self.logger.debug(
900
- f" MAPPED: {source_key} -> {target_key} (type: {type(source_outputs[source_key])})"
901
- )
1203
+ value = source_outputs[source_key]
1204
+ # CONDITIONAL EXECUTION FIX: Don't overwrite existing non-None values with None
1205
+ # This handles cases where multiple edges map to the same input parameter
1206
+ if (
1207
+ target_key in inputs
1208
+ and inputs[target_key] is not None
1209
+ and value is None
1210
+ ):
1211
+ if self.debug:
1212
+ self.logger.debug(
1213
+ f" SKIP: Not overwriting existing non-None value for {target_key} with None from {source_node_id}"
1214
+ )
1215
+ else:
1216
+ inputs[target_key] = value
1217
+ if self.debug:
1218
+ self.logger.debug(
1219
+ f" MAPPED: {source_key} -> {target_key} (type: {type(value)})"
1220
+ )
902
1221
  else:
903
1222
  if self.debug:
904
1223
  self.logger.debug(
@@ -1169,18 +1488,39 @@ class LocalRuntime:
1169
1488
  if not incoming_edges:
1170
1489
  return False
1171
1490
 
1172
- # Check if any incoming edges are from conditional nodes
1491
+ # Check for conditional inputs and analyze the nature of the data
1173
1492
  has_conditional_inputs = False
1493
+ has_non_none_connected_input = False
1494
+
1174
1495
  for source_node_id, _, edge_data in incoming_edges:
1175
1496
  source_node = workflow._node_instances.get(source_node_id)
1497
+ mapping = edge_data.get("mapping", {})
1498
+
1499
+ # Check if this edge provides any non-None inputs
1500
+ for source_key, target_key in mapping.items():
1501
+ if target_key in inputs and inputs[target_key] is not None:
1502
+ has_non_none_connected_input = True
1503
+
1504
+ # Direct connection from SwitchNode
1176
1505
  if source_node and source_node.__class__.__name__ in ["SwitchNode"]:
1177
1506
  has_conditional_inputs = True
1178
- break
1507
+ # Transitive dependency: source node was skipped due to conditional routing
1508
+ elif (
1509
+ hasattr(self, "_current_results")
1510
+ and source_node_id in self._current_results
1511
+ ):
1512
+ if self._current_results[source_node_id] is None:
1513
+ has_conditional_inputs = True
1179
1514
 
1180
1515
  # If no conditional inputs, don't skip
1181
1516
  if not has_conditional_inputs:
1182
1517
  return False
1183
1518
 
1519
+ # If we have conditional inputs but also have non-None data, don't skip
1520
+ # This handles mixed scenarios where some inputs are skipped but others provide data
1521
+ if has_non_none_connected_input:
1522
+ return False
1523
+
1184
1524
  # Get the node instance to check for configuration parameters
1185
1525
  node_instance = workflow._node_instances.get(node_id)
1186
1526
  if not node_instance:
@@ -1213,17 +1553,41 @@ class LocalRuntime:
1213
1553
  # Check if all connected inputs are None
1214
1554
  # This is the main condition for conditional routing
1215
1555
  has_non_none_input = False
1216
- for _, _, edge_data in incoming_edges:
1556
+
1557
+ # Count total connected inputs and None inputs from conditional sources
1558
+ total_connected_inputs = 0
1559
+ none_conditional_inputs = 0
1560
+
1561
+ for source_node_id, _, edge_data in incoming_edges:
1217
1562
  mapping = edge_data.get("mapping", {})
1218
1563
  for source_key, target_key in mapping.items():
1219
- if target_key in inputs and inputs[target_key] is not None:
1220
- has_non_none_input = True
1221
- break
1222
- if has_non_none_input:
1223
- break
1564
+ if target_key in inputs:
1565
+ total_connected_inputs += 1
1566
+ if inputs[target_key] is not None:
1567
+ has_non_none_input = True
1568
+ else:
1569
+ # Check if this None input came from conditional routing
1570
+ source_node = workflow._node_instances.get(source_node_id)
1571
+ is_from_conditional = (
1572
+ source_node
1573
+ and source_node.__class__.__name__ in ["SwitchNode"]
1574
+ ) or (
1575
+ hasattr(self, "_current_results")
1576
+ and source_node_id in self._current_results
1577
+ and self._current_results[source_node_id] is None
1578
+ )
1579
+ if is_from_conditional:
1580
+ none_conditional_inputs += 1
1581
+
1582
+ # Skip the node only if ALL connected inputs are None AND from conditional routing
1583
+ # This means nodes with mixed inputs (some None from conditional, some real data) should still execute
1584
+ if (
1585
+ total_connected_inputs > 0
1586
+ and none_conditional_inputs == total_connected_inputs
1587
+ ):
1588
+ return True
1224
1589
 
1225
- # Skip the node if all connected inputs are None
1226
- return not has_non_none_input
1590
+ return False
1227
1591
 
1228
1592
  def _should_stop_on_error(self, workflow: Workflow, node_id: str) -> bool:
1229
1593
  """Determine if execution should stop when a node fails.
@@ -1236,7 +1600,11 @@ class LocalRuntime:
1236
1600
  Whether to stop execution.
1237
1601
  """
1238
1602
  # Check if any downstream nodes depend on this node
1239
- has_dependents = workflow.graph.out_degree(node_id) > 0
1603
+ try:
1604
+ has_dependents = workflow.graph.out_degree(node_id) > 0
1605
+ except (TypeError, KeyError):
1606
+ # Handle case where node doesn't exist or graph issues
1607
+ has_dependents = False
1240
1608
 
1241
1609
  # For now, stop if the failed node has dependents
1242
1610
  # Future: implement configurable error handling policies
@@ -1675,3 +2043,1512 @@ class LocalRuntime:
1675
2043
  )
1676
2044
 
1677
2045
  return violations
2046
+
2047
+ def _has_conditional_patterns(self, workflow: Workflow) -> bool:
2048
+ """
2049
+ Check if workflow has conditional patterns (SwitchNodes) and is suitable for conditional execution.
2050
+
2051
+ CRITICAL: Only enable conditional execution for DAG workflows.
2052
+ Cyclic workflows must use normal execution to preserve cycle safety mechanisms.
2053
+
2054
+ Args:
2055
+ workflow: Workflow to check
2056
+
2057
+ Returns:
2058
+ True if workflow contains SwitchNode instances AND is a DAG (no cycles)
2059
+ """
2060
+ try:
2061
+ if not hasattr(workflow, "graph") or workflow.graph is None:
2062
+ return False
2063
+
2064
+ # CRITICAL: Check for cycles first - conditional execution is only safe for DAGs
2065
+ if self._workflow_has_cycles(workflow):
2066
+ self.logger.info(
2067
+ "Cyclic workflow detected - using normal execution to preserve cycle safety mechanisms"
2068
+ )
2069
+ return False
2070
+
2071
+ # Import here to avoid circular dependencies
2072
+ from kailash.analysis import ConditionalBranchAnalyzer
2073
+
2074
+ analyzer = ConditionalBranchAnalyzer(workflow)
2075
+ switch_nodes = analyzer._find_switch_nodes()
2076
+
2077
+ has_switches = len(switch_nodes) > 0
2078
+
2079
+ if has_switches:
2080
+ self.logger.debug(
2081
+ f"Found {len(switch_nodes)} SwitchNodes in DAG workflow - eligible for conditional execution"
2082
+ )
2083
+ else:
2084
+ self.logger.debug("No SwitchNodes found - using normal execution")
2085
+
2086
+ return has_switches
2087
+
2088
+ except Exception as e:
2089
+ self.logger.warning(f"Error checking conditional patterns: {e}")
2090
+ return False
2091
+
2092
+ def _workflow_has_cycles(self, workflow: Workflow) -> bool:
2093
+ """
2094
+ Detect if workflow has cycles using multiple detection methods.
2095
+
2096
+ Args:
2097
+ workflow: Workflow to check
2098
+
2099
+ Returns:
2100
+ True if workflow contains any cycles
2101
+ """
2102
+ try:
2103
+ # Method 1: Check for explicitly marked cycle connections
2104
+ if hasattr(workflow, "has_cycles") and callable(workflow.has_cycles):
2105
+ if workflow.has_cycles():
2106
+ self.logger.debug("Detected cycles via workflow.has_cycles()")
2107
+ return True
2108
+
2109
+ # Method 2: Check for cycle edges in connections
2110
+ if hasattr(workflow, "connections"):
2111
+ for connection in workflow.connections:
2112
+ if hasattr(connection, "cycle") and connection.cycle:
2113
+ self.logger.debug("Detected cycle via connection.cycle flag")
2114
+ return True
2115
+
2116
+ # Method 3: NetworkX graph cycle detection
2117
+ if hasattr(workflow, "graph") and workflow.graph is not None:
2118
+ import networkx as nx
2119
+
2120
+ is_dag = nx.is_directed_acyclic_graph(workflow.graph)
2121
+ if not is_dag:
2122
+ self.logger.debug("Detected cycles via NetworkX graph analysis")
2123
+ return True
2124
+
2125
+ # Method 4: Check graph edges for cycle metadata
2126
+ if hasattr(workflow, "graph") and workflow.graph is not None:
2127
+ for u, v, edge_data in workflow.graph.edges(data=True):
2128
+ if edge_data.get("cycle", False):
2129
+ self.logger.debug("Detected cycle via edge metadata")
2130
+ return True
2131
+
2132
+ return False
2133
+
2134
+ except Exception as e:
2135
+ self.logger.warning(f"Error detecting cycles: {e}")
2136
+ # On error, assume cycles exist for safety
2137
+ return True
2138
+
2139
+ async def _execute_conditional_approach(
2140
+ self,
2141
+ workflow: Workflow,
2142
+ parameters: dict[str, Any],
2143
+ task_manager: TaskManager,
2144
+ run_id: str,
2145
+ workflow_context: dict[str, Any],
2146
+ ) -> dict[str, dict[str, Any]]:
2147
+ """
2148
+ Execute workflow using conditional approach with two-phase execution.
2149
+
2150
+ Phase 1: Execute SwitchNodes to determine branches
2151
+ Phase 2: Execute only reachable nodes based on switch results
2152
+
2153
+ Args:
2154
+ workflow: Workflow to execute
2155
+ parameters: Node-specific parameters
2156
+ task_manager: Task manager for execution
2157
+ run_id: Unique run identifier
2158
+ workflow_context: Workflow execution context
2159
+
2160
+ Returns:
2161
+ Dictionary mapping node_id -> execution results
2162
+ """
2163
+ self.logger.info("Starting conditional execution approach")
2164
+ results = {}
2165
+ fallback_reason = None
2166
+ start_time = time.time()
2167
+ total_nodes = len(workflow.graph.nodes())
2168
+
2169
+ try:
2170
+ # Enhanced pre-execution validation
2171
+ if not self._validate_conditional_execution_prerequisites(workflow):
2172
+ fallback_reason = "Prerequisites validation failed"
2173
+ raise ValueError(
2174
+ f"Conditional execution prerequisites not met: {fallback_reason}"
2175
+ )
2176
+
2177
+ # Phase 1: Execute SwitchNodes to determine conditional branches
2178
+ self.logger.info("Phase 1: Executing SwitchNodes")
2179
+ phase1_results = await self._execute_switch_nodes(
2180
+ workflow=workflow,
2181
+ parameters=parameters,
2182
+ task_manager=task_manager,
2183
+ run_id=run_id,
2184
+ workflow_context=workflow_context,
2185
+ )
2186
+
2187
+ # Extract just switch results for validation and planning
2188
+ from kailash.analysis import ConditionalBranchAnalyzer
2189
+
2190
+ analyzer = ConditionalBranchAnalyzer(workflow)
2191
+ switch_node_ids = analyzer._find_switch_nodes()
2192
+ switch_results = {
2193
+ node_id: phase1_results[node_id]
2194
+ for node_id in switch_node_ids
2195
+ if node_id in phase1_results
2196
+ }
2197
+
2198
+ # Validate switch results before proceeding
2199
+ if not self._validate_switch_results(switch_results):
2200
+ fallback_reason = "Invalid switch results detected"
2201
+ raise ValueError(f"Switch results validation failed: {fallback_reason}")
2202
+
2203
+ # Add all phase 1 results to overall results
2204
+ results.update(phase1_results)
2205
+
2206
+ # Phase 2: Create pruned execution plan and execute remaining nodes
2207
+ self.logger.info("Phase 2: Creating and executing pruned plan")
2208
+ remaining_results = await self._execute_pruned_plan(
2209
+ workflow=workflow,
2210
+ switch_results=switch_results,
2211
+ parameters=parameters,
2212
+ task_manager=task_manager,
2213
+ run_id=run_id,
2214
+ workflow_context=workflow_context,
2215
+ existing_results=results,
2216
+ )
2217
+
2218
+ # Merge remaining results
2219
+ results.update(remaining_results)
2220
+
2221
+ # Final validation of conditional execution results
2222
+ if not self._validate_conditional_execution_results(results, workflow):
2223
+ fallback_reason = "Results validation failed"
2224
+ raise ValueError(
2225
+ f"Conditional execution results invalid: {fallback_reason}"
2226
+ )
2227
+
2228
+ # Performance tracking
2229
+ self._track_conditional_execution_performance(results, workflow)
2230
+
2231
+ # Record execution metrics for performance monitoring
2232
+ execution_time = time.time() - start_time
2233
+ nodes_executed = len(results)
2234
+ nodes_skipped = total_nodes - nodes_executed
2235
+
2236
+ self._record_execution_metrics(
2237
+ workflow=workflow,
2238
+ execution_time=execution_time,
2239
+ node_count=nodes_executed,
2240
+ skipped_nodes=nodes_skipped,
2241
+ execution_mode="skip_branches",
2242
+ )
2243
+
2244
+ # Log performance improvement
2245
+ if nodes_skipped > 0:
2246
+ skip_percentage = (nodes_skipped / total_nodes) * 100
2247
+ self.logger.info(
2248
+ f"Conditional execution performance: {skip_percentage:.1f}% reduction in executed nodes "
2249
+ f"({nodes_skipped}/{total_nodes} skipped)"
2250
+ )
2251
+
2252
+ self.logger.info(
2253
+ f"Conditional execution completed successfully: {nodes_executed} nodes executed"
2254
+ )
2255
+ return results
2256
+
2257
+ except Exception as e:
2258
+ # Enhanced error logging with fallback reasoning
2259
+ self.logger.error(f"Error in conditional execution approach: {e}")
2260
+ if fallback_reason:
2261
+ self.logger.warning(f"Fallback reason: {fallback_reason}")
2262
+
2263
+ # Log performance impact before fallback
2264
+ self._log_conditional_execution_failure(e, workflow, len(results))
2265
+
2266
+ # Enhanced fallback with detailed logging
2267
+ self.logger.warning(
2268
+ "Falling back to normal execution approach due to conditional execution failure"
2269
+ )
2270
+
2271
+ try:
2272
+ # Execute fallback with additional monitoring
2273
+ fallback_results, _ = await self._execute_async(
2274
+ workflow=workflow,
2275
+ parameters=parameters,
2276
+ task_manager=task_manager,
2277
+ )
2278
+
2279
+ # Track fallback usage for monitoring
2280
+ self._track_fallback_usage(workflow, str(e), fallback_reason)
2281
+
2282
+ return fallback_results
2283
+
2284
+ except Exception as fallback_error:
2285
+ self.logger.error(f"Fallback execution also failed: {fallback_error}")
2286
+ # If both conditional and fallback fail, re-raise the original error
2287
+ raise e from fallback_error
2288
+
2289
+ async def _execute_switch_nodes(
2290
+ self,
2291
+ workflow: Workflow,
2292
+ parameters: dict[str, Any],
2293
+ task_manager: TaskManager,
2294
+ run_id: str,
2295
+ workflow_context: dict[str, Any],
2296
+ ) -> dict[str, dict[str, Any]]:
2297
+ """
2298
+ Execute SwitchNodes first to determine conditional branches.
2299
+
2300
+ Args:
2301
+ workflow: Workflow being executed
2302
+ parameters: Node-specific parameters
2303
+ task_manager: Task manager for execution
2304
+ run_id: Unique run identifier
2305
+ workflow_context: Workflow execution context
2306
+
2307
+ Returns:
2308
+ Dictionary mapping switch_node_id -> execution results
2309
+ """
2310
+ self.logger.info("Phase 1: Executing SwitchNodes and their dependencies")
2311
+ all_phase1_results = {} # Store ALL results from Phase 1, not just switches
2312
+
2313
+ try:
2314
+ # Import here to avoid circular dependencies
2315
+ from kailash.analysis import ConditionalBranchAnalyzer
2316
+
2317
+ # Check if we should use hierarchical switch execution
2318
+ analyzer = ConditionalBranchAnalyzer(workflow)
2319
+ switch_node_ids = analyzer._find_switch_nodes()
2320
+
2321
+ if switch_node_ids and self._should_use_hierarchical_execution(
2322
+ workflow, switch_node_ids
2323
+ ):
2324
+ # Use hierarchical switch executor for complex switch patterns
2325
+ self.logger.info(
2326
+ "Using hierarchical switch execution for optimized performance"
2327
+ )
2328
+ from kailash.runtime.hierarchical_switch_executor import (
2329
+ HierarchicalSwitchExecutor,
2330
+ )
2331
+
2332
+ executor = HierarchicalSwitchExecutor(workflow, debug=self.debug)
2333
+
2334
+ # Define node executor function
2335
+ async def node_executor(
2336
+ node_id,
2337
+ node_instance,
2338
+ all_results,
2339
+ parameters,
2340
+ task_manager,
2341
+ workflow,
2342
+ workflow_context,
2343
+ ):
2344
+ node_inputs = self._prepare_node_inputs(
2345
+ workflow=workflow,
2346
+ node_id=node_id,
2347
+ node_instance=node_instance,
2348
+ node_outputs=all_results,
2349
+ parameters=parameters,
2350
+ )
2351
+
2352
+ result = await self._execute_single_node(
2353
+ node_id=node_id,
2354
+ node_instance=node_instance,
2355
+ node_inputs=node_inputs,
2356
+ task_manager=task_manager,
2357
+ workflow=workflow,
2358
+ workflow_context=workflow_context,
2359
+ run_id=run_id,
2360
+ )
2361
+ return result
2362
+
2363
+ # Execute switches hierarchically
2364
+ all_results, switch_results = (
2365
+ await executor.execute_switches_hierarchically(
2366
+ parameters=parameters,
2367
+ task_manager=task_manager,
2368
+ run_id=run_id,
2369
+ workflow_context=workflow_context,
2370
+ node_executor=node_executor,
2371
+ )
2372
+ )
2373
+
2374
+ # Log execution summary
2375
+ if self.debug:
2376
+ summary = executor.get_execution_summary(switch_results)
2377
+ self.logger.debug(f"Hierarchical execution summary: {summary}")
2378
+
2379
+ return all_results
2380
+
2381
+ # Otherwise, use standard execution
2382
+ self.logger.info("Using standard switch execution")
2383
+
2384
+ if not switch_node_ids:
2385
+ self.logger.info("No SwitchNodes found in workflow")
2386
+ return all_phase1_results
2387
+
2388
+ # Get topological order for all nodes
2389
+ all_nodes_order = list(nx.topological_sort(workflow.graph))
2390
+
2391
+ # Find all nodes that switches depend on (need to execute these too)
2392
+ nodes_to_execute = set(switch_node_ids)
2393
+ for switch_id in switch_node_ids:
2394
+ # Get all predecessors (direct and indirect) of this switch
2395
+ predecessors = nx.ancestors(workflow.graph, switch_id)
2396
+ nodes_to_execute.update(predecessors)
2397
+
2398
+ # Execute nodes in topological order, but only those needed for switches
2399
+ execution_order = [
2400
+ node_id for node_id in all_nodes_order if node_id in nodes_to_execute
2401
+ ]
2402
+
2403
+ self.logger.info(
2404
+ f"Executing {len(execution_order)} nodes in Phase 1 (switches and their dependencies)"
2405
+ )
2406
+ self.logger.debug(f"Phase 1 execution order: {execution_order}")
2407
+
2408
+ # Execute all nodes needed for switches in dependency order
2409
+ for node_id in execution_order:
2410
+ try:
2411
+ # Get node instance
2412
+ node_data = workflow.graph.nodes[node_id]
2413
+ # Try both 'node' and 'instance' keys for compatibility
2414
+ node_instance = node_data.get("node") or node_data.get("instance")
2415
+
2416
+ if node_instance is None:
2417
+ self.logger.warning(f"No instance found for node {node_id}")
2418
+ continue
2419
+
2420
+ # Prepare inputs for the node
2421
+ node_inputs = self._prepare_node_inputs(
2422
+ workflow=workflow,
2423
+ node_id=node_id,
2424
+ node_instance=node_instance,
2425
+ node_outputs=all_phase1_results, # Use all results so far
2426
+ parameters=parameters,
2427
+ )
2428
+
2429
+ # CRITICAL FIX: During phase 1, ensure SwitchNodes don't get their 'value' parameter
2430
+ # mistakenly used as 'input_data' when the actual input is missing
2431
+ if not node_inputs or "input_data" not in node_inputs:
2432
+ # Get incoming edges to check if input_data is expected
2433
+ has_input_connection = False
2434
+ for edge in workflow.graph.in_edges(switch_id, data=True):
2435
+ mapping = edge[2].get("mapping", {})
2436
+ if "input_data" in mapping.values():
2437
+ has_input_connection = True
2438
+ break
2439
+
2440
+ if has_input_connection:
2441
+ # If input_data is expected from a connection but not available,
2442
+ # explicitly set it to None to prevent config fallback
2443
+ node_inputs["input_data"] = None
2444
+
2445
+ # Execute the switch
2446
+ self.logger.debug(f"Executing SwitchNode: {switch_id}")
2447
+ result = await self._execute_single_node(
2448
+ node_id=node_id,
2449
+ node_instance=node_instance,
2450
+ node_inputs=node_inputs,
2451
+ task_manager=task_manager,
2452
+ workflow=workflow,
2453
+ run_id=run_id,
2454
+ workflow_context=workflow_context,
2455
+ )
2456
+
2457
+ all_phase1_results[node_id] = result
2458
+ self.logger.debug(
2459
+ f"Node {node_id} completed with result keys: {list(result.keys()) if isinstance(result, dict) else type(result)}"
2460
+ )
2461
+
2462
+ except Exception as e:
2463
+ self.logger.error(f"Error executing node {node_id}: {e}")
2464
+ # Continue with other nodes
2465
+ all_phase1_results[node_id] = {
2466
+ "error": str(e),
2467
+ "error_type": type(e).__name__,
2468
+ "failed": True,
2469
+ }
2470
+
2471
+ # Extract just switch results to return
2472
+ switch_results = {
2473
+ node_id: all_phase1_results[node_id]
2474
+ for node_id in switch_node_ids
2475
+ if node_id in all_phase1_results
2476
+ }
2477
+
2478
+ self.logger.info(
2479
+ f"Phase 1 completed: {len(all_phase1_results)} nodes executed ({len(switch_results)} switches)"
2480
+ )
2481
+ return all_phase1_results # Return ALL results, not just switches
2482
+
2483
+ except Exception as e:
2484
+ self.logger.error(f"Error in switch execution phase: {e}")
2485
+ return all_phase1_results
2486
+
2487
+ async def _execute_pruned_plan(
2488
+ self,
2489
+ workflow: Workflow,
2490
+ switch_results: dict[str, dict[str, Any]],
2491
+ parameters: dict[str, Any],
2492
+ task_manager: TaskManager,
2493
+ run_id: str,
2494
+ workflow_context: dict[str, Any],
2495
+ existing_results: dict[str, dict[str, Any]],
2496
+ ) -> dict[str, dict[str, Any]]:
2497
+ """
2498
+ Execute pruned execution plan based on SwitchNode results.
2499
+
2500
+ Args:
2501
+ workflow: Workflow being executed
2502
+ switch_results: Results from SwitchNode execution
2503
+ parameters: Node-specific parameters
2504
+ task_manager: Task manager for execution
2505
+ run_id: Unique run identifier
2506
+ workflow_context: Workflow execution context
2507
+ existing_results: Results from previous execution phases
2508
+
2509
+ Returns:
2510
+ Dictionary mapping node_id -> execution results for remaining nodes
2511
+ """
2512
+ self.logger.info("Phase 2: Executing pruned plan based on switch results")
2513
+ remaining_results = {}
2514
+
2515
+ try:
2516
+ # Import here to avoid circular dependencies
2517
+ from kailash.planning import DynamicExecutionPlanner
2518
+
2519
+ planner = DynamicExecutionPlanner(workflow)
2520
+
2521
+ # Create execution plan based on switch results
2522
+ execution_plan = planner.create_execution_plan(switch_results)
2523
+ self.logger.debug(
2524
+ f"DynamicExecutionPlanner returned plan: {execution_plan}"
2525
+ )
2526
+
2527
+ # Remove nodes that were already executed, but check if switches need re-execution
2528
+ already_executed = set(existing_results.keys())
2529
+ self.logger.debug(
2530
+ f"Already executed nodes from Phase 1: {already_executed}"
2531
+ )
2532
+ self.logger.debug(f"Full execution plan for Phase 2: {execution_plan}")
2533
+
2534
+ # Check which switches had incomplete execution (no input_data in phase 1)
2535
+ switches_needing_reexecution = set()
2536
+ for switch_id, result in switch_results.items():
2537
+ # If a switch executed with None input in phase 1, it needs re-execution
2538
+ if (
2539
+ result.get("true_output") is None
2540
+ and result.get("false_output") is None
2541
+ and switch_id in execution_plan
2542
+ ):
2543
+ # Check if this switch has dependencies that will now provide data
2544
+ has_dependencies = False
2545
+ for edge in workflow.graph.in_edges(switch_id):
2546
+ source_node = edge[0]
2547
+ if source_node in execution_plan:
2548
+ has_dependencies = True
2549
+ break
2550
+
2551
+ if has_dependencies:
2552
+ switches_needing_reexecution.add(switch_id)
2553
+ self.logger.debug(
2554
+ f"Switch {switch_id} needs re-execution with actual data"
2555
+ )
2556
+
2557
+ # Include switches that need re-execution AND any nodes not yet executed
2558
+ remaining_nodes = [
2559
+ node_id
2560
+ for node_id in execution_plan
2561
+ if node_id not in already_executed
2562
+ or node_id in switches_needing_reexecution
2563
+ ]
2564
+
2565
+ # Debug log to understand what's happening
2566
+ not_executed = set(execution_plan) - already_executed
2567
+ self.logger.debug(
2568
+ f"Nodes in execution plan but not executed: {not_executed}"
2569
+ )
2570
+ self.logger.debug(
2571
+ f"Switches needing re-execution: {switches_needing_reexecution}"
2572
+ )
2573
+ self.logger.debug(f"Filtering logic: remaining_nodes = {remaining_nodes}")
2574
+
2575
+ self.logger.info(
2576
+ f"Executing {len(remaining_nodes)} remaining nodes after pruning"
2577
+ )
2578
+ self.logger.debug(f"Remaining execution plan: {remaining_nodes}")
2579
+
2580
+ # Execute remaining nodes in the pruned order
2581
+ for node_id in remaining_nodes:
2582
+ try:
2583
+ # Get node instance
2584
+ node_data = workflow.graph.nodes[node_id]
2585
+ # Try both 'node' and 'instance' keys for compatibility
2586
+ node_instance = node_data.get("node") or node_data.get("instance")
2587
+
2588
+ if node_instance is None:
2589
+ self.logger.warning(f"No instance found for node {node_id}")
2590
+ continue
2591
+
2592
+ # Prepare inputs using all results so far (switches + remaining)
2593
+ all_results = {**existing_results, **remaining_results}
2594
+ node_inputs = self._prepare_node_inputs(
2595
+ workflow=workflow,
2596
+ node_id=node_id,
2597
+ node_instance=node_instance,
2598
+ node_outputs=all_results,
2599
+ parameters=parameters,
2600
+ )
2601
+
2602
+ # Execute the node
2603
+ self.logger.debug(f"Executing remaining node: {node_id}")
2604
+ result = await self._execute_single_node(
2605
+ node_id=node_id,
2606
+ node_instance=node_instance,
2607
+ node_inputs=node_inputs,
2608
+ task_manager=task_manager,
2609
+ workflow=workflow,
2610
+ run_id=run_id,
2611
+ workflow_context=workflow_context,
2612
+ )
2613
+
2614
+ remaining_results[node_id] = result
2615
+ self.logger.debug(f"Node {node_id} completed")
2616
+
2617
+ except Exception as e:
2618
+ self.logger.error(f"Error executing remaining node {node_id}: {e}")
2619
+ # Continue with other nodes or stop based on error handling
2620
+ if self._should_stop_on_error(workflow, node_id):
2621
+ raise
2622
+ else:
2623
+ remaining_results[node_id] = {
2624
+ "error": str(e),
2625
+ "error_type": type(e).__name__,
2626
+ "failed": True,
2627
+ }
2628
+
2629
+ self.logger.info(
2630
+ f"Phase 2 completed: {len(remaining_results)} remaining nodes executed"
2631
+ )
2632
+ return remaining_results
2633
+
2634
+ except Exception as e:
2635
+ self.logger.error(f"Error in pruned plan execution: {e}")
2636
+ return remaining_results
2637
+
2638
+ async def _execute_single_node(
2639
+ self,
2640
+ node_id: str,
2641
+ node_instance: Any,
2642
+ node_inputs: dict[str, Any],
2643
+ task_manager: Any,
2644
+ workflow: Workflow,
2645
+ run_id: str,
2646
+ workflow_context: dict[str, Any],
2647
+ ) -> dict[str, Any]:
2648
+ """
2649
+ Execute a single node with proper validation and context setup.
2650
+
2651
+ Args:
2652
+ node_id: Node identifier
2653
+ node_instance: Node instance to execute
2654
+ node_inputs: Prepared inputs for the node
2655
+ task_manager: Task manager for tracking
2656
+ workflow: Workflow being executed
2657
+ run_id: Unique run identifier
2658
+ workflow_context: Workflow execution context
2659
+
2660
+ Returns:
2661
+ Node execution results
2662
+ """
2663
+ # Validate inputs before execution
2664
+ from kailash.utils.data_validation import DataTypeValidator
2665
+
2666
+ validated_inputs = DataTypeValidator.validate_node_input(node_id, node_inputs)
2667
+
2668
+ # Set workflow context on the node instance
2669
+ if hasattr(node_instance, "_workflow_context"):
2670
+ node_instance._workflow_context = workflow_context
2671
+ else:
2672
+ # Initialize the workflow context if it doesn't exist
2673
+ node_instance._workflow_context = workflow_context
2674
+
2675
+ # Execute the node with unified async/sync support
2676
+ if self.enable_async and hasattr(node_instance, "execute_async"):
2677
+ # Use async execution method that includes validation
2678
+ outputs = await node_instance.execute_async(**validated_inputs)
2679
+ else:
2680
+ # Standard synchronous execution
2681
+ outputs = node_instance.execute(**validated_inputs)
2682
+
2683
+ return outputs
2684
+
2685
+ def _should_use_hierarchical_execution(
2686
+ self, workflow: Workflow, switch_node_ids: List[str]
2687
+ ) -> bool:
2688
+ """
2689
+ Determine if hierarchical switch execution should be used.
2690
+
2691
+ Args:
2692
+ workflow: The workflow to analyze
2693
+ switch_node_ids: List of switch node IDs
2694
+
2695
+ Returns:
2696
+ True if hierarchical execution would be beneficial
2697
+ """
2698
+ # Use hierarchical execution if:
2699
+ # 1. There are multiple switches
2700
+ if len(switch_node_ids) < 2:
2701
+ return False
2702
+
2703
+ # 2. Check if switches have dependencies on each other
2704
+ from kailash.analysis import ConditionalBranchAnalyzer
2705
+
2706
+ analyzer = ConditionalBranchAnalyzer(workflow)
2707
+ hierarchy_info = analyzer.analyze_switch_hierarchies(switch_node_ids)
2708
+
2709
+ # Use hierarchical if there are multiple execution layers
2710
+ execution_layers = hierarchy_info.get("execution_layers", [])
2711
+ if len(execution_layers) > 1:
2712
+ self.logger.debug(
2713
+ f"Detected {len(execution_layers)} execution layers in switch hierarchy"
2714
+ )
2715
+ return True
2716
+
2717
+ # Use hierarchical if there are dependency chains
2718
+ dependency_chains = hierarchy_info.get("dependency_chains", [])
2719
+ if dependency_chains and any(len(chain) > 1 for chain in dependency_chains):
2720
+ self.logger.debug("Detected dependency chains in switch hierarchy")
2721
+ return True
2722
+
2723
+ return False
2724
+
2725
+ def _validate_conditional_execution_prerequisites(self, workflow: Workflow) -> bool:
2726
+ """
2727
+ Validate that workflow meets prerequisites for conditional execution.
2728
+
2729
+ Args:
2730
+ workflow: Workflow to validate
2731
+
2732
+ Returns:
2733
+ True if prerequisites are met, False otherwise
2734
+ """
2735
+ try:
2736
+ # Check if workflow has at least one SwitchNode
2737
+ from kailash.analysis import ConditionalBranchAnalyzer
2738
+
2739
+ analyzer = ConditionalBranchAnalyzer(workflow)
2740
+ switch_nodes = analyzer._find_switch_nodes()
2741
+
2742
+ if not switch_nodes:
2743
+ self.logger.debug(
2744
+ "No SwitchNodes found - cannot use conditional execution"
2745
+ )
2746
+ return False
2747
+
2748
+ # Check if workflow is too complex for conditional execution
2749
+ if len(workflow.graph.nodes) > 100: # Configurable threshold
2750
+ self.logger.warning(
2751
+ "Workflow too large for conditional execution optimization"
2752
+ )
2753
+ return False
2754
+
2755
+ # Validate that all SwitchNodes have proper outputs
2756
+ for switch_id in switch_nodes:
2757
+ node_data = workflow.graph.nodes[switch_id]
2758
+ node_instance = node_data.get("node") or node_data.get("instance")
2759
+
2760
+ if node_instance is None:
2761
+ self.logger.warning(f"SwitchNode {switch_id} has no instance")
2762
+ return False
2763
+
2764
+ # Check if the SwitchNode has proper output configuration
2765
+ # SwitchNode might store condition_field in different ways
2766
+ has_condition = (
2767
+ hasattr(node_instance, "condition_field")
2768
+ or hasattr(node_instance, "_condition_field")
2769
+ or (
2770
+ hasattr(node_instance, "parameters")
2771
+ and "condition_field"
2772
+ in getattr(node_instance, "parameters", {})
2773
+ )
2774
+ or "SwitchNode"
2775
+ in str(type(node_instance)) # Type-based validation as fallback
2776
+ )
2777
+
2778
+ if not has_condition:
2779
+ self.logger.debug(
2780
+ f"SwitchNode {switch_id} condition validation unclear - allowing execution"
2781
+ )
2782
+ # Don't fail here - let conditional execution attempt and fall back if needed
2783
+
2784
+ return True
2785
+
2786
+ except Exception as e:
2787
+ self.logger.warning(
2788
+ f"Error validating conditional execution prerequisites: {e}"
2789
+ )
2790
+ return False
2791
+
2792
+ def _validate_switch_results(
2793
+ self, switch_results: dict[str, dict[str, Any]]
2794
+ ) -> bool:
2795
+ """
2796
+ Validate that switch results are valid for conditional execution.
2797
+
2798
+ Args:
2799
+ switch_results: Results from SwitchNode execution
2800
+
2801
+ Returns:
2802
+ True if results are valid, False otherwise
2803
+ """
2804
+ try:
2805
+ if not switch_results:
2806
+ self.logger.debug("No switch results to validate")
2807
+ return True
2808
+
2809
+ for switch_id, result in switch_results.items():
2810
+ # Check for execution errors
2811
+ if isinstance(result, dict) and result.get("failed"):
2812
+ self.logger.warning(
2813
+ f"SwitchNode {switch_id} failed during execution"
2814
+ )
2815
+ return False
2816
+
2817
+ # Validate result structure
2818
+ if not isinstance(result, dict):
2819
+ self.logger.warning(
2820
+ f"SwitchNode {switch_id} returned invalid result type: {type(result)}"
2821
+ )
2822
+ return False
2823
+
2824
+ # Check for required output keys (at least one branch should be present)
2825
+ has_output = any(
2826
+ key in result for key in ["true_output", "false_output"]
2827
+ )
2828
+ if not has_output:
2829
+ self.logger.warning(
2830
+ f"SwitchNode {switch_id} missing required output keys"
2831
+ )
2832
+ return False
2833
+
2834
+ return True
2835
+
2836
+ except Exception as e:
2837
+ self.logger.warning(f"Error validating switch results: {e}")
2838
+ return False
2839
+
2840
+ def _validate_conditional_execution_results(
2841
+ self, results: dict[str, dict[str, Any]], workflow: Workflow
2842
+ ) -> bool:
2843
+ """
2844
+ Validate final results from conditional execution.
2845
+
2846
+ Args:
2847
+ results: Execution results
2848
+ workflow: Original workflow
2849
+
2850
+ Returns:
2851
+ True if results are valid, False otherwise
2852
+ """
2853
+ try:
2854
+ # Check that at least some nodes executed
2855
+ if not results:
2856
+ self.logger.warning("No results from conditional execution")
2857
+ return False
2858
+
2859
+ # Validate that critical nodes (if any) were executed
2860
+ # This could be expanded based on workflow metadata
2861
+ total_nodes = len(workflow.graph.nodes)
2862
+ executed_nodes = len(results)
2863
+
2864
+ # If we executed less than 30% of nodes, might be an issue
2865
+ if executed_nodes < (total_nodes * 0.3):
2866
+ self.logger.warning(
2867
+ f"Conditional execution only ran {executed_nodes}/{total_nodes} nodes - might indicate an issue"
2868
+ )
2869
+ # Don't fail here, but log for monitoring
2870
+
2871
+ # Check for excessive failures
2872
+ failed_nodes = sum(
2873
+ 1
2874
+ for result in results.values()
2875
+ if isinstance(result, dict) and result.get("failed")
2876
+ )
2877
+
2878
+ if failed_nodes > (executed_nodes * 0.5):
2879
+ self.logger.warning(
2880
+ f"Too many node failures: {failed_nodes}/{executed_nodes}"
2881
+ )
2882
+ return False
2883
+
2884
+ return True
2885
+
2886
+ except Exception as e:
2887
+ self.logger.warning(f"Error validating conditional execution results: {e}")
2888
+ return False
2889
+
2890
+ def _track_conditional_execution_performance(
2891
+ self, results: dict[str, dict[str, Any]], workflow: Workflow
2892
+ ):
2893
+ """
2894
+ Track performance metrics for conditional execution.
2895
+
2896
+ Args:
2897
+ results: Execution results
2898
+ workflow: Original workflow
2899
+ """
2900
+ try:
2901
+ total_nodes = len(workflow.graph.nodes)
2902
+ executed_nodes = len(results)
2903
+ skipped_nodes = total_nodes - executed_nodes
2904
+
2905
+ # Log performance metrics
2906
+ if skipped_nodes > 0:
2907
+ performance_improvement = (skipped_nodes / total_nodes) * 100
2908
+ self.logger.info(
2909
+ f"Conditional execution performance: {performance_improvement:.1f}% reduction in executed nodes ({skipped_nodes}/{total_nodes} skipped)"
2910
+ )
2911
+
2912
+ # Track for monitoring (could be sent to metrics system)
2913
+ if hasattr(self, "_performance_metrics"):
2914
+ self._performance_metrics["conditional_execution"] = {
2915
+ "total_nodes": total_nodes,
2916
+ "executed_nodes": executed_nodes,
2917
+ "skipped_nodes": skipped_nodes,
2918
+ "performance_improvement_percent": (
2919
+ (skipped_nodes / total_nodes) * 100 if total_nodes > 0 else 0
2920
+ ),
2921
+ }
2922
+
2923
+ except Exception as e:
2924
+ self.logger.warning(
2925
+ f"Error tracking conditional execution performance: {e}"
2926
+ )
2927
+
2928
+ def _log_conditional_execution_failure(
2929
+ self, error: Exception, workflow: Workflow, nodes_completed: int
2930
+ ):
2931
+ """
2932
+ Log detailed information about conditional execution failure.
2933
+
2934
+ Args:
2935
+ error: Exception that caused the failure
2936
+ workflow: Workflow that failed
2937
+ nodes_completed: Number of nodes that completed before failure
2938
+ """
2939
+ try:
2940
+ total_nodes = len(workflow.graph.nodes)
2941
+
2942
+ self.logger.error(
2943
+ f"Conditional execution failed after {nodes_completed}/{total_nodes} nodes"
2944
+ )
2945
+ self.logger.error(f"Error type: {type(error).__name__}")
2946
+ self.logger.error(f"Error message: {str(error)}")
2947
+
2948
+ # Log workflow characteristics for debugging
2949
+ from kailash.analysis import ConditionalBranchAnalyzer
2950
+
2951
+ analyzer = ConditionalBranchAnalyzer(workflow)
2952
+ switch_nodes = analyzer._find_switch_nodes()
2953
+
2954
+ self.logger.debug(
2955
+ f"Workflow characteristics: {len(switch_nodes)} switches, {total_nodes} total nodes"
2956
+ )
2957
+
2958
+ except Exception as log_error:
2959
+ self.logger.warning(
2960
+ f"Error logging conditional execution failure: {log_error}"
2961
+ )
2962
+
2963
+ def _track_fallback_usage(
2964
+ self, workflow: Workflow, error_message: str, fallback_reason: str
2965
+ ):
2966
+ """
2967
+ Track fallback usage for monitoring and optimization.
2968
+
2969
+ Args:
2970
+ workflow: Workflow that required fallback
2971
+ error_message: Error that triggered fallback
2972
+ fallback_reason: Reason for fallback
2973
+ """
2974
+ try:
2975
+ import time
2976
+
2977
+ # Log fallback usage
2978
+ self.logger.info(
2979
+ f"Fallback used for workflow '{workflow.name}': {fallback_reason}"
2980
+ )
2981
+
2982
+ # Track for monitoring (could be sent to metrics system)
2983
+ if hasattr(self, "_fallback_metrics"):
2984
+ if "fallback_usage" not in self._fallback_metrics:
2985
+ self._fallback_metrics["fallback_usage"] = []
2986
+
2987
+ self._fallback_metrics["fallback_usage"].append(
2988
+ {
2989
+ "workflow_name": workflow.name,
2990
+ "workflow_id": workflow.workflow_id,
2991
+ "error_message": error_message,
2992
+ "fallback_reason": fallback_reason,
2993
+ "timestamp": time.time(),
2994
+ }
2995
+ )
2996
+
2997
+ # Limit tracking history to prevent memory growth
2998
+ if (
2999
+ hasattr(self, "_fallback_metrics")
3000
+ and len(self._fallback_metrics.get("fallback_usage", [])) > 100
3001
+ ):
3002
+ self._fallback_metrics["fallback_usage"] = self._fallback_metrics[
3003
+ "fallback_usage"
3004
+ ][-50:]
3005
+
3006
+ except Exception as e:
3007
+ self.logger.warning(f"Error tracking fallback usage: {e}")
3008
+
3009
+ # ===== PHASE 5: PRODUCTION READINESS =====
3010
+
3011
+ def get_execution_plan_cached(
3012
+ self, workflow: Workflow, switch_results: Dict[str, Dict[str, Any]]
3013
+ ) -> List[str]:
3014
+ """
3015
+ Get execution plan with caching for improved performance.
3016
+
3017
+ Args:
3018
+ workflow: Workflow to create execution plan for
3019
+ switch_results: Results from SwitchNode execution
3020
+
3021
+ Returns:
3022
+ Cached or newly computed execution plan
3023
+ """
3024
+ # Create cache key based on workflow structure and switch results
3025
+ cache_key = self._create_execution_plan_cache_key(workflow, switch_results)
3026
+
3027
+ if cache_key in self._execution_plan_cache:
3028
+ self._analytics_data["cache_hits"] += 1
3029
+ self.logger.debug(f"Cache hit for execution plan: {cache_key[:32]}...")
3030
+ return self._execution_plan_cache[cache_key]
3031
+
3032
+ # Cache miss - compute new plan
3033
+ self._analytics_data["cache_misses"] += 1
3034
+ self.logger.debug(f"Cache miss for execution plan: {cache_key[:32]}...")
3035
+
3036
+ try:
3037
+ from kailash.planning import DynamicExecutionPlanner
3038
+
3039
+ planner = DynamicExecutionPlanner(workflow)
3040
+ execution_plan = planner.create_execution_plan(switch_results)
3041
+
3042
+ # Cache the result (with size limit)
3043
+ if len(self._execution_plan_cache) >= 100: # Limit cache size
3044
+ # Remove oldest entries (simple FIFO)
3045
+ oldest_key = next(iter(self._execution_plan_cache))
3046
+ del self._execution_plan_cache[oldest_key]
3047
+
3048
+ self._execution_plan_cache[cache_key] = execution_plan
3049
+
3050
+ except Exception as e:
3051
+ self.logger.warning(f"Error creating cached execution plan: {e}")
3052
+ # Fallback to basic topological order
3053
+ execution_plan = list(nx.topological_sort(workflow.graph))
3054
+
3055
+ return execution_plan
3056
+
3057
+ def _create_execution_plan_cache_key(
3058
+ self, workflow: Workflow, switch_results: Dict[str, Dict[str, Any]]
3059
+ ) -> str:
3060
+ """
3061
+ Create cache key for execution plan.
3062
+
3063
+ Args:
3064
+ workflow: Workflow instance
3065
+ switch_results: SwitchNode results
3066
+
3067
+ Returns:
3068
+ Cache key string
3069
+ """
3070
+ import json
3071
+
3072
+ try:
3073
+ # Create key from workflow structure + switch results
3074
+ workflow_key = f"{workflow.workflow_id}_{len(workflow.graph.nodes)}_{len(workflow.graph.edges)}"
3075
+
3076
+ # Sort switch results for consistent caching
3077
+ sorted_results = {}
3078
+ for switch_id, result in switch_results.items():
3079
+ if isinstance(result, dict):
3080
+ # Create deterministic representation
3081
+ sorted_results[switch_id] = {
3082
+ k: v
3083
+ for k, v in sorted(result.items())
3084
+ if k in ["true_output", "false_output", "condition_result"]
3085
+ }
3086
+
3087
+ results_str = json.dumps(sorted_results, sort_keys=True, default=str)
3088
+ combined_key = f"{workflow_key}:{results_str}"
3089
+
3090
+ # Hash to fixed length
3091
+ return hashlib.md5(combined_key.encode()).hexdigest()
3092
+
3093
+ except Exception as e:
3094
+ self.logger.warning(f"Error creating cache key: {e}")
3095
+ # Fallback to simple key
3096
+ return f"{workflow.workflow_id}_{hash(str(switch_results))}"
3097
+
3098
+ def get_execution_analytics(self) -> Dict[str, Any]:
3099
+ """
3100
+ Get comprehensive execution analytics for monitoring and optimization.
3101
+
3102
+ Returns:
3103
+ Dictionary containing detailed analytics data
3104
+ """
3105
+ analytics = {
3106
+ "cache_performance": {
3107
+ "hits": self._analytics_data["cache_hits"],
3108
+ "misses": self._analytics_data["cache_misses"],
3109
+ "hit_rate": self._analytics_data["cache_hits"]
3110
+ / max(
3111
+ 1,
3112
+ self._analytics_data["cache_hits"]
3113
+ + self._analytics_data["cache_misses"],
3114
+ ),
3115
+ },
3116
+ "conditional_execution_stats": {
3117
+ "total_executions": len(self._analytics_data["conditional_executions"]),
3118
+ "average_performance_improvement": 0.0,
3119
+ "fallback_rate": 0.0,
3120
+ },
3121
+ "performance_history": self._analytics_data["performance_history"][
3122
+ -50:
3123
+ ], # Last 50 executions
3124
+ "execution_patterns": self._analytics_data["execution_patterns"],
3125
+ "optimization_stats": self._analytics_data["optimization_stats"],
3126
+ }
3127
+
3128
+ # Calculate conditional execution statistics
3129
+ if self._analytics_data["conditional_executions"]:
3130
+ improvements = [
3131
+ exec_data.get("performance_improvement", 0)
3132
+ for exec_data in self._analytics_data["conditional_executions"]
3133
+ ]
3134
+ analytics["conditional_execution_stats"][
3135
+ "average_performance_improvement"
3136
+ ] = sum(improvements) / len(improvements)
3137
+
3138
+ fallbacks = sum(
3139
+ 1
3140
+ for exec_data in self._analytics_data["conditional_executions"]
3141
+ if exec_data.get("used_fallback", False)
3142
+ )
3143
+ analytics["conditional_execution_stats"]["fallback_rate"] = fallbacks / len(
3144
+ self._analytics_data["conditional_executions"]
3145
+ )
3146
+
3147
+ # Add cache statistics
3148
+ cache_size = len(self._execution_plan_cache)
3149
+ analytics["cache_performance"]["cache_size"] = cache_size
3150
+ analytics["cache_performance"]["cache_efficiency"] = min(
3151
+ 1.0, cache_size / 100.0
3152
+ ) # Relative to max size
3153
+
3154
+ return analytics
3155
+
3156
+ def record_execution_performance(
3157
+ self,
3158
+ workflow: Workflow,
3159
+ execution_time: float,
3160
+ nodes_executed: int,
3161
+ used_conditional: bool,
3162
+ performance_improvement: float = 0.0,
3163
+ ):
3164
+ """
3165
+ Record execution performance for analytics.
3166
+
3167
+ Args:
3168
+ workflow: Workflow that was executed
3169
+ execution_time: Total execution time in seconds
3170
+ nodes_executed: Number of nodes actually executed
3171
+ used_conditional: Whether conditional execution was used
3172
+ performance_improvement: Performance improvement percentage (0.0-1.0)
3173
+ """
3174
+ import time
3175
+
3176
+ performance_record = {
3177
+ "timestamp": time.time(),
3178
+ "workflow_id": workflow.workflow_id,
3179
+ "workflow_name": workflow.name,
3180
+ "total_nodes": len(workflow.graph.nodes),
3181
+ "executed_nodes": nodes_executed,
3182
+ "execution_time": execution_time,
3183
+ "used_conditional_execution": used_conditional,
3184
+ "performance_improvement": performance_improvement,
3185
+ "nodes_per_second": nodes_executed / max(0.001, execution_time),
3186
+ }
3187
+
3188
+ # Add to performance history
3189
+ self._analytics_data["performance_history"].append(performance_record)
3190
+
3191
+ # Limit history size
3192
+ if len(self._analytics_data["performance_history"]) > 1000:
3193
+ self._analytics_data["performance_history"] = self._analytics_data[
3194
+ "performance_history"
3195
+ ][-500:]
3196
+
3197
+ # Record conditional execution if used
3198
+ if used_conditional:
3199
+ self._analytics_data["conditional_executions"].append(
3200
+ {
3201
+ "timestamp": time.time(),
3202
+ "workflow_id": workflow.workflow_id,
3203
+ "performance_improvement": performance_improvement,
3204
+ "nodes_skipped": len(workflow.graph.nodes) - nodes_executed,
3205
+ "used_fallback": False, # Set by fallback tracking
3206
+ }
3207
+ )
3208
+
3209
+ # Update execution patterns
3210
+ pattern_key = f"{len(workflow.graph.nodes)}_nodes"
3211
+ if pattern_key not in self._analytics_data["execution_patterns"]:
3212
+ self._analytics_data["execution_patterns"][pattern_key] = {
3213
+ "count": 0,
3214
+ "avg_execution_time": 0.0,
3215
+ "avg_performance_improvement": 0.0,
3216
+ }
3217
+
3218
+ pattern = self._analytics_data["execution_patterns"][pattern_key]
3219
+ pattern["count"] += 1
3220
+ pattern["avg_execution_time"] = (
3221
+ pattern["avg_execution_time"] * (pattern["count"] - 1) + execution_time
3222
+ ) / pattern["count"]
3223
+ if used_conditional:
3224
+ pattern["avg_performance_improvement"] = (
3225
+ pattern["avg_performance_improvement"] * (pattern["count"] - 1)
3226
+ + performance_improvement
3227
+ ) / pattern["count"]
3228
+
3229
+ def clear_analytics_data(self, keep_patterns: bool = True):
3230
+ """
3231
+ Clear analytics data for fresh monitoring.
3232
+
3233
+ Args:
3234
+ keep_patterns: Whether to preserve execution patterns
3235
+ """
3236
+ self._analytics_data["conditional_executions"] = []
3237
+ self._analytics_data["performance_history"] = []
3238
+ self._analytics_data["cache_hits"] = 0
3239
+ self._analytics_data["cache_misses"] = 0
3240
+
3241
+ if not keep_patterns:
3242
+ self._analytics_data["execution_patterns"] = {}
3243
+ self._analytics_data["optimization_stats"] = {}
3244
+
3245
+ # Clear caches
3246
+ self._execution_plan_cache.clear()
3247
+
3248
+ self.logger.info("Analytics data cleared")
3249
+
3250
+ def get_health_diagnostics(self) -> Dict[str, Any]:
3251
+ """
3252
+ Get health diagnostics for monitoring system health.
3253
+
3254
+ Returns:
3255
+ Dictionary containing health check results
3256
+ """
3257
+ import os
3258
+ import time
3259
+
3260
+ diagnostics = {
3261
+ "timestamp": time.time(),
3262
+ "runtime_health": "healthy",
3263
+ "cache_health": "healthy",
3264
+ "performance_health": "healthy",
3265
+ "memory_usage": {},
3266
+ "cache_statistics": {},
3267
+ "performance_indicators": {},
3268
+ "warnings": [],
3269
+ "errors": [],
3270
+ }
3271
+
3272
+ try:
3273
+ # Memory usage
3274
+ process = psutil.Process(os.getpid())
3275
+ memory_info = process.memory_info()
3276
+ diagnostics["memory_usage"] = {
3277
+ "rss_mb": memory_info.rss / 1024 / 1024,
3278
+ "vms_mb": memory_info.vms / 1024 / 1024,
3279
+ "percent": process.memory_percent(),
3280
+ }
3281
+
3282
+ # Cache health
3283
+ cache_size = len(self._execution_plan_cache)
3284
+ analytics = self.get_execution_analytics()
3285
+ cache_hit_rate = analytics["cache_performance"]["hit_rate"]
3286
+
3287
+ diagnostics["cache_statistics"] = {
3288
+ "size": cache_size,
3289
+ "hit_rate": cache_hit_rate,
3290
+ "hits": analytics["cache_performance"]["hits"],
3291
+ "misses": analytics["cache_performance"]["misses"],
3292
+ }
3293
+
3294
+ # Performance indicators
3295
+ recent_executions = self._analytics_data["performance_history"][-10:]
3296
+ if recent_executions:
3297
+ avg_execution_time = sum(
3298
+ e["execution_time"] for e in recent_executions
3299
+ ) / len(recent_executions)
3300
+ avg_improvement = sum(
3301
+ e["performance_improvement"] for e in recent_executions
3302
+ ) / len(recent_executions)
3303
+
3304
+ diagnostics["performance_indicators"] = {
3305
+ "avg_execution_time": avg_execution_time,
3306
+ "avg_performance_improvement": avg_improvement,
3307
+ "recent_executions": len(recent_executions),
3308
+ }
3309
+
3310
+ # Health checks
3311
+ if (
3312
+ cache_hit_rate < 0.3
3313
+ and analytics["cache_performance"]["hits"]
3314
+ + analytics["cache_performance"]["misses"]
3315
+ > 10
3316
+ ):
3317
+ diagnostics["warnings"].append(
3318
+ "Low cache hit rate - consider workflow optimization"
3319
+ )
3320
+ diagnostics["cache_health"] = "warning"
3321
+
3322
+ if diagnostics["memory_usage"]["percent"] > 80:
3323
+ diagnostics["warnings"].append("High memory usage detected")
3324
+ diagnostics["runtime_health"] = "warning"
3325
+
3326
+ if recent_executions and avg_execution_time > 5.0:
3327
+ diagnostics["warnings"].append("Slow execution times detected")
3328
+ diagnostics["performance_health"] = "warning"
3329
+
3330
+ except Exception as e:
3331
+ diagnostics["errors"].append(f"Health check error: {e}")
3332
+ diagnostics["runtime_health"] = "error"
3333
+
3334
+ return diagnostics
3335
+
3336
+ def optimize_runtime_performance(self) -> Dict[str, Any]:
3337
+ """
3338
+ Optimize runtime performance based on analytics data.
3339
+
3340
+ Returns:
3341
+ Dictionary describing optimizations applied
3342
+ """
3343
+ optimization_result = {
3344
+ "optimizations_applied": [],
3345
+ "performance_impact": {},
3346
+ "recommendations": [],
3347
+ "cache_optimizations": {},
3348
+ "memory_optimizations": {},
3349
+ }
3350
+
3351
+ try:
3352
+ # Cache optimization
3353
+ cache_analytics = self.get_execution_analytics()["cache_performance"]
3354
+
3355
+ if (
3356
+ cache_analytics["hit_rate"] < 0.5
3357
+ and cache_analytics["hits"] + cache_analytics["misses"] > 20
3358
+ ):
3359
+ # Poor cache performance - clear and rebuild
3360
+ old_size = len(self._execution_plan_cache)
3361
+ self._execution_plan_cache.clear()
3362
+ optimization_result["optimizations_applied"].append("cache_clear")
3363
+ optimization_result["cache_optimizations"]["cleared_entries"] = old_size
3364
+ optimization_result["recommendations"].append(
3365
+ "Consider using more consistent workflows for better caching"
3366
+ )
3367
+
3368
+ # Memory optimization
3369
+ if len(self._analytics_data["performance_history"]) > 500:
3370
+ old_count = len(self._analytics_data["performance_history"])
3371
+ self._analytics_data["performance_history"] = self._analytics_data[
3372
+ "performance_history"
3373
+ ][-250:]
3374
+ optimization_result["optimizations_applied"].append("history_cleanup")
3375
+ optimization_result["memory_optimizations"][
3376
+ "history_entries_removed"
3377
+ ] = (old_count - 250)
3378
+
3379
+ # Execution pattern analysis
3380
+ patterns = self._analytics_data["execution_patterns"]
3381
+ if patterns:
3382
+ most_common_pattern = max(patterns.items(), key=lambda x: x[1]["count"])
3383
+ optimization_result["recommendations"].append(
3384
+ f"Most common pattern: {most_common_pattern[0]} with {most_common_pattern[1]['count']} executions"
3385
+ )
3386
+
3387
+ # Suggest optimizations based on patterns
3388
+ for pattern_key, pattern_data in patterns.items():
3389
+ if pattern_data["avg_execution_time"] > 3.0:
3390
+ optimization_result["recommendations"].append(
3391
+ f"Consider optimizing workflows with {pattern_key} - avg time: {pattern_data['avg_execution_time']:.2f}s"
3392
+ )
3393
+
3394
+ self.logger.info(
3395
+ f"Runtime optimization completed: {len(optimization_result['optimizations_applied'])} optimizations applied"
3396
+ )
3397
+
3398
+ except Exception as e:
3399
+ self.logger.warning(f"Error during runtime optimization: {e}")
3400
+ optimization_result["error"] = str(e)
3401
+
3402
+ return optimization_result
3403
+
3404
+ # ===== PHASE 3 COMPLETION: Performance Monitoring & Compatibility =====
3405
+
3406
+ def _check_performance_switch(self, current_mode: str) -> Tuple[bool, str, str]:
3407
+ """Check if execution mode should be switched based on performance.
3408
+
3409
+ Args:
3410
+ current_mode: Current execution mode
3411
+
3412
+ Returns:
3413
+ Tuple of (should_switch, recommended_mode, reason)
3414
+ """
3415
+ # Initialize performance monitor if needed
3416
+ if self._performance_monitor is None:
3417
+ self._performance_monitor = PerformanceMonitor()
3418
+
3419
+ return self._performance_monitor.should_switch_mode(current_mode)
3420
+
3421
+ def _record_execution_metrics(
3422
+ self,
3423
+ workflow: Workflow,
3424
+ execution_time: float,
3425
+ node_count: int,
3426
+ skipped_nodes: int,
3427
+ execution_mode: str,
3428
+ ) -> None:
3429
+ """Record execution metrics for performance monitoring.
3430
+
3431
+ Args:
3432
+ workflow: Executed workflow
3433
+ execution_time: Total execution time
3434
+ node_count: Number of nodes executed
3435
+ skipped_nodes: Number of nodes skipped
3436
+ execution_mode: Execution mode used
3437
+ """
3438
+ if not self._enable_performance_monitoring:
3439
+ return
3440
+
3441
+ # Initialize performance monitor if needed
3442
+ if self._performance_monitor is None:
3443
+ self._performance_monitor = PerformanceMonitor()
3444
+
3445
+ metrics = ExecutionMetrics(
3446
+ execution_time=execution_time,
3447
+ node_count=node_count,
3448
+ skipped_nodes=skipped_nodes,
3449
+ execution_mode=execution_mode,
3450
+ )
3451
+
3452
+ self._performance_monitor.record_execution(metrics)
3453
+
3454
+ def get_performance_report(self) -> Dict[str, Any]:
3455
+ """Get performance monitoring report.
3456
+
3457
+ Returns:
3458
+ Performance statistics and recommendations
3459
+ """
3460
+ if self._performance_monitor is None:
3461
+ return {"status": "Performance monitoring not initialized"}
3462
+
3463
+ return self._performance_monitor.get_performance_report()
3464
+
3465
+ def generate_compatibility_report(self, workflow: Workflow) -> Dict[str, Any]:
3466
+ """Generate compatibility report for a workflow.
3467
+
3468
+ Args:
3469
+ workflow: Workflow to analyze
3470
+
3471
+ Returns:
3472
+ Compatibility report dictionary
3473
+ """
3474
+ if not self._enable_compatibility_reporting:
3475
+ return {"status": "Compatibility reporting disabled"}
3476
+
3477
+ # Initialize reporter if needed
3478
+ if self._compatibility_reporter is None:
3479
+ self._compatibility_reporter = CompatibilityReporter()
3480
+
3481
+ report = self._compatibility_reporter.analyze_workflow(workflow)
3482
+ return report.to_dict()
3483
+
3484
+ def get_compatibility_report_markdown(self, workflow: Workflow) -> str:
3485
+ """Generate compatibility report in markdown format.
3486
+
3487
+ Args:
3488
+ workflow: Workflow to analyze
3489
+
3490
+ Returns:
3491
+ Markdown formatted report
3492
+ """
3493
+ if not self._enable_compatibility_reporting:
3494
+ return "# Compatibility reporting disabled"
3495
+
3496
+ # Initialize reporter if needed
3497
+ if self._compatibility_reporter is None:
3498
+ self._compatibility_reporter = CompatibilityReporter()
3499
+
3500
+ report = self._compatibility_reporter.analyze_workflow(workflow)
3501
+ return report.to_markdown()
3502
+
3503
+ def set_performance_monitoring(self, enabled: bool) -> None:
3504
+ """Enable or disable performance monitoring.
3505
+
3506
+ Args:
3507
+ enabled: Whether to enable performance monitoring
3508
+ """
3509
+ self._enable_performance_monitoring = enabled
3510
+ self.logger.info(
3511
+ f"Performance monitoring {'enabled' if enabled else 'disabled'}"
3512
+ )
3513
+
3514
+ def set_automatic_mode_switching(self, enabled: bool) -> None:
3515
+ """Enable or disable automatic mode switching based on performance.
3516
+
3517
+ Args:
3518
+ enabled: Whether to enable automatic switching
3519
+ """
3520
+ self._performance_switch_enabled = enabled
3521
+ self.logger.info(
3522
+ f"Automatic mode switching {'enabled' if enabled else 'disabled'}"
3523
+ )
3524
+
3525
+ def set_compatibility_reporting(self, enabled: bool) -> None:
3526
+ """Enable or disable compatibility reporting.
3527
+
3528
+ Args:
3529
+ enabled: Whether to enable compatibility reporting
3530
+ """
3531
+ self._enable_compatibility_reporting = enabled
3532
+ self.logger.info(
3533
+ f"Compatibility reporting {'enabled' if enabled else 'disabled'}"
3534
+ )
3535
+
3536
+ def get_execution_path_debug_info(self) -> Dict[str, Any]:
3537
+ """Get detailed debug information about execution paths.
3538
+
3539
+ Returns:
3540
+ Debug information including execution decisions and paths
3541
+ """
3542
+ debug_info = {
3543
+ "conditional_execution_mode": self.conditional_execution,
3544
+ "performance_monitoring_enabled": self._enable_performance_monitoring,
3545
+ "automatic_switching_enabled": self._performance_switch_enabled,
3546
+ "compatibility_reporting_enabled": self._enable_compatibility_reporting,
3547
+ "fallback_metrics": self._fallback_metrics,
3548
+ "execution_analytics": self.get_execution_analytics(),
3549
+ }
3550
+
3551
+ if self._performance_monitor:
3552
+ debug_info["performance_report"] = self.get_performance_report()
3553
+
3554
+ return debug_info