kailash 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +35 -5
- kailash/adapters/__init__.py +5 -0
- kailash/adapters/mcp_platform_adapter.py +273 -0
- kailash/channels/__init__.py +21 -0
- kailash/channels/api_channel.py +409 -0
- kailash/channels/base.py +271 -0
- kailash/channels/cli_channel.py +661 -0
- kailash/channels/event_router.py +496 -0
- kailash/channels/mcp_channel.py +648 -0
- kailash/channels/session.py +423 -0
- kailash/mcp_server/discovery.py +1 -1
- kailash/middleware/mcp/enhanced_server.py +22 -16
- kailash/nexus/__init__.py +21 -0
- kailash/nexus/factory.py +413 -0
- kailash/nexus/gateway.py +545 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/iterative_llm_agent.py +988 -17
- kailash/nodes/ai/llm_agent.py +29 -9
- kailash/nodes/api/__init__.py +2 -2
- kailash/nodes/api/monitoring.py +1 -1
- kailash/nodes/base_async.py +54 -14
- kailash/nodes/code/async_python.py +1 -1
- kailash/nodes/data/bulk_operations.py +939 -0
- kailash/nodes/data/query_builder.py +373 -0
- kailash/nodes/data/query_cache.py +512 -0
- kailash/nodes/monitoring/__init__.py +10 -0
- kailash/nodes/monitoring/deadlock_detector.py +964 -0
- kailash/nodes/monitoring/performance_anomaly.py +1078 -0
- kailash/nodes/monitoring/race_condition_detector.py +1151 -0
- kailash/nodes/monitoring/transaction_metrics.py +790 -0
- kailash/nodes/monitoring/transaction_monitor.py +931 -0
- kailash/nodes/system/__init__.py +17 -0
- kailash/nodes/system/command_parser.py +820 -0
- kailash/nodes/transaction/__init__.py +48 -0
- kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
- kailash/nodes/transaction/saga_coordinator.py +652 -0
- kailash/nodes/transaction/saga_state_storage.py +411 -0
- kailash/nodes/transaction/saga_step.py +467 -0
- kailash/nodes/transaction/transaction_context.py +756 -0
- kailash/nodes/transaction/two_phase_commit.py +978 -0
- kailash/nodes/transform/processors.py +17 -1
- kailash/nodes/validation/__init__.py +21 -0
- kailash/nodes/validation/test_executor.py +532 -0
- kailash/nodes/validation/validation_nodes.py +447 -0
- kailash/resources/factory.py +1 -1
- kailash/runtime/async_local.py +84 -21
- kailash/runtime/local.py +21 -2
- kailash/runtime/parameter_injector.py +187 -31
- kailash/security.py +16 -1
- kailash/servers/__init__.py +32 -0
- kailash/servers/durable_workflow_server.py +430 -0
- kailash/servers/enterprise_workflow_server.py +466 -0
- kailash/servers/gateway.py +183 -0
- kailash/servers/workflow_server.py +290 -0
- kailash/utils/data_validation.py +192 -0
- kailash/workflow/builder.py +291 -12
- kailash/workflow/validation.py +144 -8
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/METADATA +1 -1
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/RECORD +63 -25
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/WHEEL +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,21 @@
|
|
3
3
|
import time
|
4
4
|
from dataclasses import dataclass, field
|
5
5
|
from datetime import datetime
|
6
|
-
from
|
6
|
+
from enum import Enum
|
7
|
+
from typing import Any, Callable, Dict, List, Optional
|
7
8
|
|
8
9
|
from kailash.nodes.ai.llm_agent import LLMAgentNode
|
9
10
|
from kailash.nodes.base import NodeParameter, register_node
|
10
11
|
|
11
12
|
|
13
|
+
class ConvergenceMode(Enum):
|
14
|
+
"""Convergence modes for iterative agent."""
|
15
|
+
|
16
|
+
SATISFACTION = "satisfaction" # Original confidence-based
|
17
|
+
TEST_DRIVEN = "test_driven" # New validation-based
|
18
|
+
HYBRID = "hybrid" # Combination of both
|
19
|
+
|
20
|
+
|
12
21
|
@dataclass
|
13
22
|
class IterationState:
|
14
23
|
"""State tracking for a single iteration."""
|
@@ -85,30 +94,42 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
85
94
|
Key Features:
|
86
95
|
- Progressive MCP discovery without pre-configuration
|
87
96
|
- 6-phase iterative process (Discovery → Planning → Execution → Reflection → Convergence → Synthesis)
|
97
|
+
- **Real MCP tool execution** - Calls actual MCP tools instead of mock execution
|
88
98
|
- Semantic tool understanding and capability mapping
|
89
99
|
- Adaptive strategy based on iteration results
|
90
100
|
- Smart convergence criteria and resource management
|
101
|
+
- Configurable execution modes (real MCP vs mock for testing)
|
91
102
|
|
92
103
|
Examples:
|
93
|
-
>>> # Basic iterative agent
|
104
|
+
>>> # Basic iterative agent with real MCP execution
|
94
105
|
>>> agent = IterativeLLMAgentNode()
|
95
106
|
>>> result = agent.execute(
|
96
107
|
... messages=[{"role": "user", "content": "Find and analyze healthcare AI trends"}],
|
97
108
|
... mcp_servers=["http://localhost:8080"],
|
98
|
-
... max_iterations=3
|
109
|
+
... max_iterations=3,
|
110
|
+
... use_real_mcp=True # Enables real MCP tool execution
|
99
111
|
... )
|
100
112
|
|
101
|
-
>>> # Advanced iterative agent with custom convergence
|
113
|
+
>>> # Advanced iterative agent with custom convergence and real MCP
|
102
114
|
>>> result = agent.execute(
|
103
115
|
... messages=[{"role": "user", "content": "Research and recommend AI implementation strategy"}],
|
104
116
|
... mcp_servers=["http://ai-registry:8080", "http://knowledge-base:8081"],
|
105
117
|
... max_iterations=5,
|
106
118
|
... discovery_mode="semantic",
|
119
|
+
... use_real_mcp=True, # Use real MCP tools
|
107
120
|
... convergence_criteria={
|
108
121
|
... "goal_satisfaction": {"threshold": 0.9},
|
109
122
|
... "diminishing_returns": {"min_improvement": 0.1}
|
110
123
|
... }
|
111
124
|
... )
|
125
|
+
|
126
|
+
>>> # Test mode with mock execution for development
|
127
|
+
>>> result = agent.execute(
|
128
|
+
... messages=[{"role": "user", "content": "Test query"}],
|
129
|
+
... mcp_servers=["http://localhost:8080"],
|
130
|
+
... max_iterations=2,
|
131
|
+
... use_real_mcp=False # Uses mock execution for testing
|
132
|
+
... )
|
112
133
|
"""
|
113
134
|
|
114
135
|
def get_parameters(self) -> dict[str, NodeParameter]:
|
@@ -136,8 +157,17 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
136
157
|
- quality_gates: {"min_confidence": 0.7, "custom_validator": callable} - Quality checks with optional custom validator
|
137
158
|
- resource_limits: {"max_cost": 1.0, "max_time": 300} - Hard resource limits
|
138
159
|
- custom_criteria: [{"name": "my_check", "function": callable, "weight": 0.5}] - User-defined criteria
|
160
|
+
- test_requirements: {"syntax_valid": True, "executes_without_error": True} - Test-driven requirements
|
139
161
|
""",
|
140
162
|
),
|
163
|
+
# Convergence Mode
|
164
|
+
"convergence_mode": NodeParameter(
|
165
|
+
name="convergence_mode",
|
166
|
+
type=str,
|
167
|
+
required=False,
|
168
|
+
default="satisfaction",
|
169
|
+
description="Convergence mode: satisfaction (default), test_driven, or hybrid",
|
170
|
+
),
|
141
171
|
# Discovery Configuration
|
142
172
|
"discovery_mode": NodeParameter(
|
143
173
|
name="discovery_mode",
|
@@ -168,6 +198,25 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
168
198
|
default="dynamic",
|
169
199
|
description="How to adapt strategy: static, dynamic, ml_guided",
|
170
200
|
),
|
201
|
+
# Test-driven specific parameters
|
202
|
+
"enable_auto_validation": NodeParameter(
|
203
|
+
name="enable_auto_validation",
|
204
|
+
type=bool,
|
205
|
+
required=False,
|
206
|
+
default=True,
|
207
|
+
description="Automatically add validation tools to MCP servers",
|
208
|
+
),
|
209
|
+
"validation_strategy": NodeParameter(
|
210
|
+
name="validation_strategy",
|
211
|
+
type=dict,
|
212
|
+
required=False,
|
213
|
+
default={
|
214
|
+
"progressive": True, # Start with syntax, move to semantic
|
215
|
+
"fail_fast": True, # Stop on first validation failure
|
216
|
+
"auto_fix": True, # Attempt to fix validation errors
|
217
|
+
},
|
218
|
+
description="Strategy for validation execution",
|
219
|
+
),
|
171
220
|
# Performance and Monitoring
|
172
221
|
"enable_detailed_logging": NodeParameter(
|
173
222
|
name="enable_detailed_logging",
|
@@ -183,6 +232,14 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
183
232
|
default=300,
|
184
233
|
description="Timeout for each iteration in seconds",
|
185
234
|
),
|
235
|
+
# MCP Execution Control
|
236
|
+
"use_real_mcp": NodeParameter(
|
237
|
+
name="use_real_mcp",
|
238
|
+
type=bool,
|
239
|
+
required=False,
|
240
|
+
default=True,
|
241
|
+
description="Use real MCP tool execution instead of mock execution",
|
242
|
+
),
|
186
243
|
}
|
187
244
|
|
188
245
|
# Merge base parameters with iterative parameters
|
@@ -209,6 +266,9 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
209
266
|
# Extract iterative-specific parameters
|
210
267
|
max_iterations = kwargs.get("max_iterations", 5)
|
211
268
|
convergence_criteria = kwargs.get("convergence_criteria", {})
|
269
|
+
convergence_mode = ConvergenceMode(
|
270
|
+
kwargs.get("convergence_mode", "satisfaction")
|
271
|
+
)
|
212
272
|
discovery_mode = kwargs.get("discovery_mode", "progressive")
|
213
273
|
discovery_budget = kwargs.get(
|
214
274
|
"discovery_budget", {"max_servers": 5, "max_tools": 20, "max_resources": 50}
|
@@ -216,8 +276,27 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
216
276
|
reflection_enabled = kwargs.get("reflection_enabled", True)
|
217
277
|
adaptation_strategy = kwargs.get("adaptation_strategy", "dynamic")
|
218
278
|
enable_detailed_logging = kwargs.get("enable_detailed_logging", True)
|
279
|
+
enable_auto_validation = kwargs.get("enable_auto_validation", True)
|
219
280
|
kwargs.get("iteration_timeout", 300)
|
220
281
|
|
282
|
+
# Auto-inject validation tools if in test-driven mode
|
283
|
+
if convergence_mode in [ConvergenceMode.TEST_DRIVEN, ConvergenceMode.HYBRID]:
|
284
|
+
if enable_auto_validation:
|
285
|
+
mcp_servers = kwargs.get("mcp_servers", [])
|
286
|
+
# Add internal validation server if not present
|
287
|
+
if not any(
|
288
|
+
s == "builtin_validation"
|
289
|
+
or (isinstance(s, dict) and s.get("type") == "internal")
|
290
|
+
for s in mcp_servers
|
291
|
+
):
|
292
|
+
mcp_servers.append(
|
293
|
+
{"type": "internal", "name": "builtin_validation"}
|
294
|
+
)
|
295
|
+
kwargs["mcp_servers"] = mcp_servers
|
296
|
+
|
297
|
+
# Store mode in kwargs for convergence phase
|
298
|
+
kwargs["_convergence_mode"] = convergence_mode
|
299
|
+
|
221
300
|
# Initialize iterative execution state
|
222
301
|
start_time = time.time()
|
223
302
|
iterations: list[IterationState] = []
|
@@ -270,16 +349,18 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
270
349
|
kwargs, iteration_state.execution_results, iterations
|
271
350
|
)
|
272
351
|
|
273
|
-
# Phase 5: Convergence
|
274
|
-
|
275
|
-
convergence_result = self._phase_convergence(
|
352
|
+
# Phase 5: Convergence - mode-aware
|
353
|
+
convergence_result = self._phase_convergence_with_mode(
|
276
354
|
kwargs,
|
277
355
|
iteration_state,
|
278
356
|
iterations,
|
279
357
|
convergence_criteria,
|
280
358
|
global_discoveries,
|
359
|
+
kwargs.get("_convergence_mode", ConvergenceMode.SATISFACTION),
|
281
360
|
)
|
282
361
|
iteration_state.convergence_decision = convergence_result
|
362
|
+
# Set phase after convergence check is complete
|
363
|
+
iteration_state.phase = "convergence"
|
283
364
|
|
284
365
|
if convergence_result["should_stop"]:
|
285
366
|
converged = True
|
@@ -670,6 +751,9 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
670
751
|
"errors": [],
|
671
752
|
}
|
672
753
|
|
754
|
+
# Check if we should use real MCP tool execution
|
755
|
+
use_real_mcp = kwargs.get("use_real_mcp", True)
|
756
|
+
|
673
757
|
# Execute each step in the plan
|
674
758
|
for step in plan.get("execution_steps", []):
|
675
759
|
step_num = step.get("step", 0)
|
@@ -677,22 +761,35 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
677
761
|
tools = step.get("tools", [])
|
678
762
|
|
679
763
|
try:
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
764
|
+
if use_real_mcp:
|
765
|
+
# Real MCP tool execution
|
766
|
+
step_result = self._execute_tools_with_mcp(
|
767
|
+
step_num, action, tools, discoveries, kwargs
|
768
|
+
)
|
769
|
+
else:
|
770
|
+
# Mock tool execution for backward compatibility
|
771
|
+
step_result = {
|
772
|
+
"step": step_num,
|
773
|
+
"action": action,
|
774
|
+
"tools_used": tools,
|
775
|
+
"output": f"Mock execution result for {action} using tools: {', '.join(tools)}",
|
776
|
+
"success": True,
|
777
|
+
"duration": 1.5,
|
778
|
+
}
|
689
779
|
|
690
780
|
execution_results["steps_completed"].append(step_result)
|
691
781
|
execution_results["intermediate_results"].append(step_result["output"])
|
692
782
|
|
693
783
|
# Store tool outputs
|
694
784
|
for tool in tools:
|
695
|
-
|
785
|
+
if step_result["success"]:
|
786
|
+
execution_results["tool_outputs"][tool] = step_result.get(
|
787
|
+
"tool_outputs", {}
|
788
|
+
).get(tool, step_result["output"])
|
789
|
+
else:
|
790
|
+
execution_results["tool_outputs"][
|
791
|
+
tool
|
792
|
+
] = f"Error executing {tool}: {step_result.get('error', 'Unknown error')}"
|
696
793
|
|
697
794
|
except Exception as e:
|
698
795
|
error_result = {
|
@@ -708,6 +805,262 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
708
805
|
|
709
806
|
return execution_results
|
710
807
|
|
808
|
+
def _execute_tools_with_mcp(
|
809
|
+
self,
|
810
|
+
step_num: int,
|
811
|
+
action: str,
|
812
|
+
tools: list[str],
|
813
|
+
discoveries: dict[str, Any],
|
814
|
+
kwargs: dict[str, Any],
|
815
|
+
) -> dict[str, Any]:
|
816
|
+
"""Execute tools using real MCP client."""
|
817
|
+
import time
|
818
|
+
|
819
|
+
start_time = time.time()
|
820
|
+
step_result = {
|
821
|
+
"step": step_num,
|
822
|
+
"action": action,
|
823
|
+
"tools_used": tools,
|
824
|
+
"output": "",
|
825
|
+
"success": True,
|
826
|
+
"duration": 0.0,
|
827
|
+
"tool_outputs": {},
|
828
|
+
}
|
829
|
+
|
830
|
+
# Initialize MCP client if not already done
|
831
|
+
if not hasattr(self, "_mcp_client"):
|
832
|
+
from kailash.mcp_server import MCPClient
|
833
|
+
|
834
|
+
self._mcp_client = MCPClient()
|
835
|
+
|
836
|
+
# Build tool-to-server mapping from discoveries
|
837
|
+
tool_server_map = self._build_tool_server_mapping(discoveries, kwargs)
|
838
|
+
|
839
|
+
# Execute each tool
|
840
|
+
tool_results = []
|
841
|
+
for tool_name in tools:
|
842
|
+
try:
|
843
|
+
# Find server configuration for this tool
|
844
|
+
server_config = tool_server_map.get(tool_name)
|
845
|
+
if not server_config:
|
846
|
+
self.logger.warning(
|
847
|
+
f"No server configuration found for tool: {tool_name}"
|
848
|
+
)
|
849
|
+
continue
|
850
|
+
|
851
|
+
# Get tool arguments from planning context
|
852
|
+
tool_args = self._extract_tool_arguments(tool_name, action, kwargs)
|
853
|
+
|
854
|
+
# Execute the tool
|
855
|
+
tool_result = self._run_async_in_sync_context(
|
856
|
+
self._mcp_client.call_tool(server_config, tool_name, tool_args)
|
857
|
+
)
|
858
|
+
|
859
|
+
if tool_result.get("success", False):
|
860
|
+
content = tool_result.get("content", "")
|
861
|
+
step_result["tool_outputs"][tool_name] = content
|
862
|
+
tool_results.append(f"Tool {tool_name}: {content}")
|
863
|
+
else:
|
864
|
+
error_msg = tool_result.get("error", "Unknown error")
|
865
|
+
step_result["tool_outputs"][tool_name] = f"Error: {error_msg}"
|
866
|
+
tool_results.append(f"Tool {tool_name} failed: {error_msg}")
|
867
|
+
|
868
|
+
except Exception as e:
|
869
|
+
error_msg = str(e)
|
870
|
+
step_result["tool_outputs"][tool_name] = f"Error: {error_msg}"
|
871
|
+
tool_results.append(f"Tool {tool_name} failed: {error_msg}")
|
872
|
+
self.logger.error(f"Tool execution failed for {tool_name}: {e}")
|
873
|
+
|
874
|
+
# Combine all tool outputs
|
875
|
+
step_result["output"] = (
|
876
|
+
"\n".join(tool_results)
|
877
|
+
if tool_results
|
878
|
+
else f"No tools executed for action: {action}"
|
879
|
+
)
|
880
|
+
step_result["duration"] = time.time() - start_time
|
881
|
+
|
882
|
+
# Mark as failed if no tools executed successfully
|
883
|
+
if tool_results:
|
884
|
+
step_result["success"] = any(
|
885
|
+
"failed" not in result for result in tool_results
|
886
|
+
)
|
887
|
+
else:
|
888
|
+
step_result["success"] = False
|
889
|
+
|
890
|
+
return step_result
|
891
|
+
|
892
|
+
def _build_tool_server_mapping(
|
893
|
+
self, discoveries: dict[str, Any], kwargs: dict[str, Any]
|
894
|
+
) -> dict[str, Any]:
|
895
|
+
"""Build mapping from tool names to server configurations."""
|
896
|
+
tool_server_map = {}
|
897
|
+
|
898
|
+
# Get MCP servers from kwargs with platform adapter support
|
899
|
+
mcp_servers = kwargs.get("mcp_servers", [])
|
900
|
+
|
901
|
+
# Check if we have platform-format server configurations
|
902
|
+
if "server_config" in kwargs or "server_configs" in kwargs:
|
903
|
+
from kailash.adapters import MCPPlatformAdapter
|
904
|
+
|
905
|
+
try:
|
906
|
+
platform_config = {
|
907
|
+
k: v
|
908
|
+
for k, v in kwargs.items()
|
909
|
+
if k in ["server_config", "server_configs"]
|
910
|
+
}
|
911
|
+
translated_config = MCPPlatformAdapter.translate_llm_agent_config(
|
912
|
+
platform_config
|
913
|
+
)
|
914
|
+
if "mcp_servers" in translated_config:
|
915
|
+
mcp_servers = translated_config["mcp_servers"]
|
916
|
+
self.logger.debug(
|
917
|
+
f"Translated platform MCP servers: {len(mcp_servers)} servers"
|
918
|
+
)
|
919
|
+
except Exception as e:
|
920
|
+
self.logger.warning(f"Failed to translate platform MCP config: {e}")
|
921
|
+
# Continue with original mcp_servers
|
922
|
+
|
923
|
+
# Create fallback server config if we have mcp_servers
|
924
|
+
fallback_server = mcp_servers[0] if mcp_servers else None
|
925
|
+
|
926
|
+
# Process discovered tools to map them to servers
|
927
|
+
for tool in discoveries.get("new_tools", []):
|
928
|
+
if isinstance(tool, dict):
|
929
|
+
# Extract tool name and server info
|
930
|
+
if "function" in tool:
|
931
|
+
tool_name = tool["function"].get("name", "unknown")
|
932
|
+
server_config = tool["function"].get("mcp_server_config")
|
933
|
+
else:
|
934
|
+
tool_name = tool.get("name", "unknown")
|
935
|
+
server_config = tool.get("mcp_server_config")
|
936
|
+
|
937
|
+
# Skip tools with unknown names
|
938
|
+
if tool_name == "unknown":
|
939
|
+
self.logger.warning(f"Skipping tool with unknown name: {tool}")
|
940
|
+
continue
|
941
|
+
|
942
|
+
# Find matching server configuration
|
943
|
+
if server_config:
|
944
|
+
tool_server_map[tool_name] = server_config
|
945
|
+
elif fallback_server:
|
946
|
+
# Use fallback server and log the mapping
|
947
|
+
tool_server_map[tool_name] = fallback_server
|
948
|
+
self.logger.debug(
|
949
|
+
f"Mapped tool '{tool_name}' to fallback server: {fallback_server.get('name', 'unnamed')}"
|
950
|
+
)
|
951
|
+
else:
|
952
|
+
# No server available for this tool
|
953
|
+
self.logger.warning(
|
954
|
+
f"No MCP server configuration available for tool: {tool_name}"
|
955
|
+
)
|
956
|
+
|
957
|
+
# Also map any tools that might be explicitly listed in mcp_servers
|
958
|
+
for server in mcp_servers:
|
959
|
+
server_tools = server.get("tools", [])
|
960
|
+
for tool_name in server_tools:
|
961
|
+
if tool_name not in tool_server_map:
|
962
|
+
tool_server_map[tool_name] = server
|
963
|
+
self.logger.debug(
|
964
|
+
f"Pre-mapped tool '{tool_name}' from server configuration"
|
965
|
+
)
|
966
|
+
|
967
|
+
return tool_server_map
|
968
|
+
|
969
|
+
def _extract_tool_arguments(
|
970
|
+
self, tool_name: str, action: str, kwargs: dict[str, Any]
|
971
|
+
) -> dict[str, Any]:
|
972
|
+
"""Extract arguments for tool execution based on action context."""
|
973
|
+
# Get user query for context
|
974
|
+
messages = kwargs.get("messages", [])
|
975
|
+
user_query = ""
|
976
|
+
for msg in reversed(messages):
|
977
|
+
if msg.get("role") == "user":
|
978
|
+
user_query = msg.get("content", "")
|
979
|
+
break
|
980
|
+
|
981
|
+
# Check if explicit tool arguments are provided in kwargs
|
982
|
+
tool_args_key = f"{tool_name}_args"
|
983
|
+
if tool_args_key in kwargs:
|
984
|
+
explicit_args = kwargs[tool_args_key]
|
985
|
+
if isinstance(explicit_args, dict):
|
986
|
+
return explicit_args
|
987
|
+
|
988
|
+
# Check if there are general tool parameters provided
|
989
|
+
if "tool_parameters" in kwargs and isinstance(kwargs["tool_parameters"], dict):
|
990
|
+
tool_params = kwargs["tool_parameters"]
|
991
|
+
if tool_name in tool_params and isinstance(tool_params[tool_name], dict):
|
992
|
+
return tool_params[tool_name]
|
993
|
+
|
994
|
+
# Try to extract structured parameters from the action string
|
995
|
+
import re
|
996
|
+
|
997
|
+
# Look for JSON-like structures in action
|
998
|
+
json_match = re.search(r"\{[^}]+\}", action)
|
999
|
+
if json_match:
|
1000
|
+
try:
|
1001
|
+
import json
|
1002
|
+
|
1003
|
+
parsed_params = json.loads(json_match.group())
|
1004
|
+
if isinstance(parsed_params, dict):
|
1005
|
+
return parsed_params
|
1006
|
+
except (json.JSONDecodeError, ValueError):
|
1007
|
+
# Fall through to default behavior
|
1008
|
+
pass
|
1009
|
+
|
1010
|
+
# Look for key=value pairs in action
|
1011
|
+
param_matches = re.findall(r'(\w+)=(["\']?)([^,\s]+)\2', action)
|
1012
|
+
if param_matches:
|
1013
|
+
extracted_params = {}
|
1014
|
+
for key, _, value in param_matches:
|
1015
|
+
# Try to convert to appropriate type
|
1016
|
+
if value.lower() in ("true", "false"):
|
1017
|
+
extracted_params[key] = value.lower() == "true"
|
1018
|
+
elif value.isdigit():
|
1019
|
+
extracted_params[key] = int(value)
|
1020
|
+
else:
|
1021
|
+
extracted_params[key] = value
|
1022
|
+
|
1023
|
+
# Add default parameters
|
1024
|
+
extracted_params.update({"query": user_query, "action": action})
|
1025
|
+
return extracted_params
|
1026
|
+
|
1027
|
+
# Generate basic arguments based on action and user query (fallback)
|
1028
|
+
if action == "gather_data":
|
1029
|
+
return {"query": user_query, "action": "search", "source": "default"}
|
1030
|
+
elif action == "perform_analysis":
|
1031
|
+
return {"data": user_query, "action": "analyze", "format": "structured"}
|
1032
|
+
elif action == "generate_insights":
|
1033
|
+
return {"input": user_query, "action": "generate", "type": "insights"}
|
1034
|
+
elif "search" in action.lower():
|
1035
|
+
return {"query": user_query, "search_type": "general"}
|
1036
|
+
elif "file" in action.lower() or "read" in action.lower():
|
1037
|
+
return {"path": user_query, "operation": "read"}
|
1038
|
+
elif "write" in action.lower() or "create" in action.lower():
|
1039
|
+
return {"content": user_query, "operation": "write"}
|
1040
|
+
else:
|
1041
|
+
return {"query": user_query, "action": action, "context": "default"}
|
1042
|
+
|
1043
|
+
def _run_async_in_sync_context(self, coro):
|
1044
|
+
"""Run async coroutine in sync context using existing pattern from parent class."""
|
1045
|
+
try:
|
1046
|
+
import asyncio
|
1047
|
+
|
1048
|
+
# Try to get existing event loop
|
1049
|
+
loop = asyncio.get_event_loop()
|
1050
|
+
if loop.is_running():
|
1051
|
+
# If loop is running, we need to use a new thread
|
1052
|
+
import concurrent.futures
|
1053
|
+
|
1054
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
1055
|
+
future = executor.submit(asyncio.run, coro)
|
1056
|
+
return future.result()
|
1057
|
+
else:
|
1058
|
+
# Loop exists but not running, use run_until_complete
|
1059
|
+
return loop.run_until_complete(coro)
|
1060
|
+
except RuntimeError:
|
1061
|
+
# No event loop exists, create one
|
1062
|
+
return asyncio.run(coro)
|
1063
|
+
|
711
1064
|
def _phase_reflection(
|
712
1065
|
self,
|
713
1066
|
kwargs: dict[str, Any],
|
@@ -1278,3 +1631,621 @@ class IterativeLLMAgentNode(LLMAgentNode):
|
|
1278
1631
|
"average_iteration_time": total_duration / max(len(iterations), 1),
|
1279
1632
|
"estimated_cost_usd": total_api_calls * 0.01, # Mock cost calculation
|
1280
1633
|
}
|
1634
|
+
|
1635
|
+
def _phase_convergence_with_mode(
|
1636
|
+
self,
|
1637
|
+
kwargs: dict[str, Any],
|
1638
|
+
iteration_state: IterationState,
|
1639
|
+
previous_iterations: list[IterationState],
|
1640
|
+
convergence_criteria: dict[str, Any],
|
1641
|
+
global_discoveries: dict[str, Any],
|
1642
|
+
mode: ConvergenceMode,
|
1643
|
+
) -> dict[str, Any]:
|
1644
|
+
"""Execute convergence check based on selected mode."""
|
1645
|
+
|
1646
|
+
if mode == ConvergenceMode.SATISFACTION:
|
1647
|
+
# Use existing convergence logic
|
1648
|
+
return self._phase_convergence(
|
1649
|
+
kwargs,
|
1650
|
+
iteration_state,
|
1651
|
+
previous_iterations,
|
1652
|
+
convergence_criteria,
|
1653
|
+
global_discoveries,
|
1654
|
+
)
|
1655
|
+
|
1656
|
+
elif mode == ConvergenceMode.TEST_DRIVEN:
|
1657
|
+
# Use new test-driven convergence
|
1658
|
+
return self._phase_convergence_test_driven(
|
1659
|
+
kwargs,
|
1660
|
+
iteration_state,
|
1661
|
+
previous_iterations,
|
1662
|
+
convergence_criteria,
|
1663
|
+
global_discoveries,
|
1664
|
+
)
|
1665
|
+
|
1666
|
+
elif mode == ConvergenceMode.HYBRID:
|
1667
|
+
# Combine both approaches
|
1668
|
+
return self._phase_convergence_hybrid(
|
1669
|
+
kwargs,
|
1670
|
+
iteration_state,
|
1671
|
+
previous_iterations,
|
1672
|
+
convergence_criteria,
|
1673
|
+
global_discoveries,
|
1674
|
+
)
|
1675
|
+
|
1676
|
+
def _phase_convergence_test_driven(
|
1677
|
+
self,
|
1678
|
+
kwargs: dict[str, Any],
|
1679
|
+
iteration_state: IterationState,
|
1680
|
+
previous_iterations: list[IterationState],
|
1681
|
+
convergence_criteria: dict[str, Any],
|
1682
|
+
global_discoveries: dict[str, Any],
|
1683
|
+
) -> dict[str, Any]:
|
1684
|
+
"""Test-driven convergence - only stop when deliverables pass validation."""
|
1685
|
+
|
1686
|
+
convergence_result = {
|
1687
|
+
"should_stop": False,
|
1688
|
+
"reason": "",
|
1689
|
+
"confidence": 0.0,
|
1690
|
+
"validation_results": {},
|
1691
|
+
"tests_summary": {"total": 0, "passed": 0, "failed": 0, "skipped": 0},
|
1692
|
+
"recommendations": [],
|
1693
|
+
}
|
1694
|
+
|
1695
|
+
# Get test requirements from convergence criteria
|
1696
|
+
test_requirements = convergence_criteria.get(
|
1697
|
+
"test_requirements",
|
1698
|
+
{
|
1699
|
+
"syntax_valid": True,
|
1700
|
+
"imports_resolve": True,
|
1701
|
+
"executes_without_error": True,
|
1702
|
+
"unit_tests_pass": False,
|
1703
|
+
"integration_tests_pass": False,
|
1704
|
+
"output_schema_valid": False,
|
1705
|
+
},
|
1706
|
+
)
|
1707
|
+
|
1708
|
+
# Get validation strategy
|
1709
|
+
validation_strategy = kwargs.get(
|
1710
|
+
"validation_strategy",
|
1711
|
+
{"progressive": True, "fail_fast": True, "auto_fix": True},
|
1712
|
+
)
|
1713
|
+
|
1714
|
+
# Extract all validation results from execution
|
1715
|
+
validation_results = self._extract_validation_results(iteration_state)
|
1716
|
+
|
1717
|
+
# If no validation results found, look for code/workflow outputs to validate
|
1718
|
+
if not validation_results:
|
1719
|
+
validation_results = self._perform_implicit_validation(
|
1720
|
+
iteration_state, test_requirements, validation_strategy
|
1721
|
+
)
|
1722
|
+
|
1723
|
+
# Analyze validation results against requirements
|
1724
|
+
test_status = self._analyze_test_results(validation_results, test_requirements)
|
1725
|
+
|
1726
|
+
# Update summary
|
1727
|
+
convergence_result["tests_summary"]["total"] = len(test_status)
|
1728
|
+
convergence_result["tests_summary"]["passed"] = sum(
|
1729
|
+
1 for r in test_status.values() if r["passed"]
|
1730
|
+
)
|
1731
|
+
convergence_result["tests_summary"]["failed"] = sum(
|
1732
|
+
1 for r in test_status.values() if not r["passed"] and not r.get("skipped")
|
1733
|
+
)
|
1734
|
+
convergence_result["tests_summary"]["skipped"] = sum(
|
1735
|
+
1 for r in test_status.values() if r.get("skipped")
|
1736
|
+
)
|
1737
|
+
convergence_result["validation_results"] = test_status
|
1738
|
+
|
1739
|
+
# Determine convergence
|
1740
|
+
required_tests = [name for name, req in test_requirements.items() if req]
|
1741
|
+
required_passed = all(
|
1742
|
+
test_status.get(name, {}).get("passed", False) for name in required_tests
|
1743
|
+
)
|
1744
|
+
|
1745
|
+
if required_passed and convergence_result["tests_summary"]["total"] > 0:
|
1746
|
+
convergence_result["should_stop"] = True
|
1747
|
+
convergence_result["reason"] = (
|
1748
|
+
f"test_driven_success: All {len(required_tests)} required tests passed"
|
1749
|
+
)
|
1750
|
+
convergence_result["confidence"] = 0.95
|
1751
|
+
else:
|
1752
|
+
# Provide detailed failure analysis
|
1753
|
+
failed_required = [
|
1754
|
+
name
|
1755
|
+
for name in required_tests
|
1756
|
+
if not test_status.get(name, {}).get("passed", False)
|
1757
|
+
]
|
1758
|
+
|
1759
|
+
convergence_result["reason"] = (
|
1760
|
+
f"test_driven_continue: {len(failed_required)} required tests failed: {failed_required}"
|
1761
|
+
)
|
1762
|
+
convergence_result["confidence"] = convergence_result["tests_summary"][
|
1763
|
+
"passed"
|
1764
|
+
] / max(len(required_tests), 1)
|
1765
|
+
|
1766
|
+
# Generate recommendations for next iteration
|
1767
|
+
convergence_result["recommendations"] = self._generate_fix_recommendations(
|
1768
|
+
test_status, failed_required, iteration_state
|
1769
|
+
)
|
1770
|
+
|
1771
|
+
# Check resource limits even in test-driven mode
|
1772
|
+
if not convergence_result["should_stop"]:
|
1773
|
+
resource_check = self._check_resource_limits(
|
1774
|
+
kwargs, iteration_state, previous_iterations, convergence_criteria
|
1775
|
+
)
|
1776
|
+
if resource_check["exceeded"]:
|
1777
|
+
convergence_result["should_stop"] = True
|
1778
|
+
convergence_result["reason"] = (
|
1779
|
+
f"resource_limit: {resource_check['reason']}"
|
1780
|
+
)
|
1781
|
+
|
1782
|
+
return convergence_result
|
1783
|
+
|
1784
|
+
def _phase_convergence_hybrid(
|
1785
|
+
self,
|
1786
|
+
kwargs: dict[str, Any],
|
1787
|
+
iteration_state: IterationState,
|
1788
|
+
previous_iterations: list[IterationState],
|
1789
|
+
convergence_criteria: dict[str, Any],
|
1790
|
+
global_discoveries: dict[str, Any],
|
1791
|
+
) -> dict[str, Any]:
|
1792
|
+
"""Hybrid convergence combining test-driven and satisfaction-based approaches."""
|
1793
|
+
|
1794
|
+
# Get both convergence results
|
1795
|
+
test_result = self._phase_convergence_test_driven(
|
1796
|
+
kwargs,
|
1797
|
+
iteration_state,
|
1798
|
+
previous_iterations,
|
1799
|
+
convergence_criteria,
|
1800
|
+
global_discoveries,
|
1801
|
+
)
|
1802
|
+
|
1803
|
+
satisfaction_result = self._phase_convergence(
|
1804
|
+
kwargs,
|
1805
|
+
iteration_state,
|
1806
|
+
previous_iterations,
|
1807
|
+
convergence_criteria,
|
1808
|
+
global_discoveries,
|
1809
|
+
)
|
1810
|
+
|
1811
|
+
# Combine results with configurable weights
|
1812
|
+
hybrid_config = convergence_criteria.get(
|
1813
|
+
"hybrid_config",
|
1814
|
+
{
|
1815
|
+
"test_weight": 0.7,
|
1816
|
+
"satisfaction_weight": 0.3,
|
1817
|
+
"require_both": False, # If True, both must pass
|
1818
|
+
},
|
1819
|
+
)
|
1820
|
+
|
1821
|
+
test_weight = hybrid_config.get("test_weight", 0.7)
|
1822
|
+
satisfaction_weight = hybrid_config.get("satisfaction_weight", 0.3)
|
1823
|
+
require_both = hybrid_config.get("require_both", False)
|
1824
|
+
|
1825
|
+
# Calculate combined confidence
|
1826
|
+
combined_confidence = (
|
1827
|
+
test_result["confidence"] * test_weight
|
1828
|
+
+ satisfaction_result["confidence"] * satisfaction_weight
|
1829
|
+
)
|
1830
|
+
|
1831
|
+
# Determine convergence
|
1832
|
+
if require_both:
|
1833
|
+
should_stop = (
|
1834
|
+
test_result["should_stop"] and satisfaction_result["should_stop"]
|
1835
|
+
)
|
1836
|
+
reason = f"hybrid_both: tests={'passed' if test_result['should_stop'] else 'failed'}, satisfaction={'met' if satisfaction_result['should_stop'] else 'unmet'}"
|
1837
|
+
else:
|
1838
|
+
# Stop if weighted score is high enough
|
1839
|
+
threshold = convergence_criteria.get("hybrid_threshold", 0.85)
|
1840
|
+
should_stop = combined_confidence >= threshold
|
1841
|
+
|
1842
|
+
if should_stop:
|
1843
|
+
reason = f"hybrid_threshold: combined confidence {combined_confidence:.2f} >= {threshold}"
|
1844
|
+
else:
|
1845
|
+
reason = f"hybrid_continue: combined confidence {combined_confidence:.2f} < {threshold}"
|
1846
|
+
|
1847
|
+
return {
|
1848
|
+
"should_stop": should_stop,
|
1849
|
+
"reason": reason,
|
1850
|
+
"confidence": combined_confidence,
|
1851
|
+
"test_results": test_result.get("validation_results", {}),
|
1852
|
+
"satisfaction_metrics": satisfaction_result.get("criteria_met", {}),
|
1853
|
+
"recommendations": test_result.get("recommendations", [])
|
1854
|
+
+ satisfaction_result.get("recommendations", []),
|
1855
|
+
}
|
1856
|
+
|
1857
|
+
def _extract_validation_results(
|
1858
|
+
self, iteration_state: IterationState
|
1859
|
+
) -> list[dict]:
|
1860
|
+
"""Extract validation results from execution outputs."""
|
1861
|
+
validation_results = []
|
1862
|
+
execution_results = iteration_state.execution_results or {}
|
1863
|
+
tool_outputs = execution_results.get("tool_outputs", {})
|
1864
|
+
|
1865
|
+
# Look for validation tool outputs with expanded keyword matching
|
1866
|
+
validation_keywords = [
|
1867
|
+
"validate",
|
1868
|
+
"test",
|
1869
|
+
"check",
|
1870
|
+
"verify",
|
1871
|
+
"assert",
|
1872
|
+
"confirm",
|
1873
|
+
"audit",
|
1874
|
+
"review",
|
1875
|
+
"inspect",
|
1876
|
+
"examine",
|
1877
|
+
"eval",
|
1878
|
+
"run",
|
1879
|
+
]
|
1880
|
+
|
1881
|
+
for tool_name, output in tool_outputs.items():
|
1882
|
+
is_validation_tool = any(
|
1883
|
+
keyword in tool_name.lower() for keyword in validation_keywords
|
1884
|
+
)
|
1885
|
+
|
1886
|
+
if isinstance(output, dict):
|
1887
|
+
# Check for validation-related content in the output structure
|
1888
|
+
has_validation_content = any(
|
1889
|
+
key in output
|
1890
|
+
for key in [
|
1891
|
+
"validation_results",
|
1892
|
+
"test_results",
|
1893
|
+
"validated",
|
1894
|
+
"passed",
|
1895
|
+
"failed",
|
1896
|
+
"success",
|
1897
|
+
"errors",
|
1898
|
+
"warnings",
|
1899
|
+
"status",
|
1900
|
+
"result",
|
1901
|
+
]
|
1902
|
+
)
|
1903
|
+
|
1904
|
+
if is_validation_tool or has_validation_content:
|
1905
|
+
if "validation_results" in output:
|
1906
|
+
# Standard validation node output
|
1907
|
+
results = output["validation_results"]
|
1908
|
+
if isinstance(results, list):
|
1909
|
+
validation_results.extend(results)
|
1910
|
+
elif isinstance(results, dict):
|
1911
|
+
validation_results.append(results)
|
1912
|
+
elif "test_results" in output:
|
1913
|
+
# Test suite output
|
1914
|
+
results = output["test_results"]
|
1915
|
+
if isinstance(results, list):
|
1916
|
+
validation_results.extend(results)
|
1917
|
+
elif isinstance(results, dict):
|
1918
|
+
validation_results.append(results)
|
1919
|
+
elif "validated" in output or "passed" in output:
|
1920
|
+
# Simple validation result
|
1921
|
+
passed = output.get("validated", output.get("passed", False))
|
1922
|
+
validation_results.append(
|
1923
|
+
{
|
1924
|
+
"test_name": tool_name,
|
1925
|
+
"passed": passed,
|
1926
|
+
"details": output,
|
1927
|
+
}
|
1928
|
+
)
|
1929
|
+
elif "success" in output or "status" in output:
|
1930
|
+
# Status-based validation
|
1931
|
+
success = output.get(
|
1932
|
+
"success", output.get("status") == "success"
|
1933
|
+
)
|
1934
|
+
validation_results.append(
|
1935
|
+
{
|
1936
|
+
"test_name": tool_name,
|
1937
|
+
"passed": success,
|
1938
|
+
"details": output,
|
1939
|
+
}
|
1940
|
+
)
|
1941
|
+
elif "result" in output:
|
1942
|
+
# Generic result output - try to extract validation info
|
1943
|
+
result = output["result"]
|
1944
|
+
if isinstance(result, dict):
|
1945
|
+
# Check if result contains validation data
|
1946
|
+
if any(
|
1947
|
+
key in result
|
1948
|
+
for key in ["passed", "failed", "success", "errors"]
|
1949
|
+
):
|
1950
|
+
validation_results.append(
|
1951
|
+
{
|
1952
|
+
"test_name": tool_name,
|
1953
|
+
"passed": result.get(
|
1954
|
+
"passed", result.get("success", False)
|
1955
|
+
),
|
1956
|
+
"details": result,
|
1957
|
+
}
|
1958
|
+
)
|
1959
|
+
else:
|
1960
|
+
# Treat non-empty result as successful validation
|
1961
|
+
validation_results.append(
|
1962
|
+
{
|
1963
|
+
"test_name": tool_name,
|
1964
|
+
"passed": bool(result),
|
1965
|
+
"details": output,
|
1966
|
+
}
|
1967
|
+
)
|
1968
|
+
elif isinstance(result, (str, bool, int)):
|
1969
|
+
# Simple result types
|
1970
|
+
validation_results.append(
|
1971
|
+
{
|
1972
|
+
"test_name": tool_name,
|
1973
|
+
"passed": bool(result)
|
1974
|
+
and result != "false"
|
1975
|
+
and result != 0,
|
1976
|
+
"details": output,
|
1977
|
+
}
|
1978
|
+
)
|
1979
|
+
elif isinstance(output, str):
|
1980
|
+
# String output - look for validation patterns
|
1981
|
+
if is_validation_tool:
|
1982
|
+
# Simple heuristic: look for success/fail indicators in string
|
1983
|
+
success_indicators = ["pass", "success", "ok", "valid", "true"]
|
1984
|
+
failure_indicators = ["fail", "error", "invalid", "false"]
|
1985
|
+
|
1986
|
+
output_lower = output.lower()
|
1987
|
+
has_success = any(
|
1988
|
+
indicator in output_lower for indicator in success_indicators
|
1989
|
+
)
|
1990
|
+
has_failure = any(
|
1991
|
+
indicator in output_lower for indicator in failure_indicators
|
1992
|
+
)
|
1993
|
+
|
1994
|
+
if has_success or has_failure:
|
1995
|
+
validation_results.append(
|
1996
|
+
{
|
1997
|
+
"test_name": tool_name,
|
1998
|
+
"passed": has_success and not has_failure,
|
1999
|
+
"details": {"output": output},
|
2000
|
+
}
|
2001
|
+
)
|
2002
|
+
|
2003
|
+
return validation_results
|
2004
|
+
|
2005
|
+
def _perform_implicit_validation(
|
2006
|
+
self,
|
2007
|
+
iteration_state: IterationState,
|
2008
|
+
test_requirements: dict,
|
2009
|
+
validation_strategy: dict,
|
2010
|
+
) -> list[dict]:
|
2011
|
+
"""Perform validation on discovered code/workflow outputs."""
|
2012
|
+
from kailash.nodes.validation import CodeValidationNode, WorkflowValidationNode
|
2013
|
+
|
2014
|
+
validation_results = []
|
2015
|
+
execution_results = iteration_state.execution_results or {}
|
2016
|
+
tool_outputs = execution_results.get("tool_outputs", {})
|
2017
|
+
|
2018
|
+
# Look for code generation outputs
|
2019
|
+
for tool_name, output in tool_outputs.items():
|
2020
|
+
if not isinstance(output, (dict, str)):
|
2021
|
+
continue
|
2022
|
+
|
2023
|
+
# Detect code outputs
|
2024
|
+
code_content = None
|
2025
|
+
if isinstance(output, str) and any(
|
2026
|
+
keyword in output for keyword in ["def ", "class ", "import "]
|
2027
|
+
):
|
2028
|
+
code_content = output
|
2029
|
+
elif isinstance(output, dict):
|
2030
|
+
# Check various possible keys
|
2031
|
+
for key in ["code", "generated_code", "result", "output"]:
|
2032
|
+
if key in output and isinstance(output[key], str):
|
2033
|
+
potential_code = output[key]
|
2034
|
+
if any(
|
2035
|
+
keyword in potential_code
|
2036
|
+
for keyword in ["def ", "class ", "import "]
|
2037
|
+
):
|
2038
|
+
code_content = potential_code
|
2039
|
+
break
|
2040
|
+
|
2041
|
+
# Validate discovered code
|
2042
|
+
if code_content:
|
2043
|
+
validator = CodeValidationNode()
|
2044
|
+
|
2045
|
+
# Determine validation levels based on requirements
|
2046
|
+
levels = []
|
2047
|
+
if test_requirements.get("syntax_valid", True):
|
2048
|
+
levels.append("syntax")
|
2049
|
+
if test_requirements.get("imports_resolve", True):
|
2050
|
+
levels.append("imports")
|
2051
|
+
if test_requirements.get("executes_without_error", True):
|
2052
|
+
levels.append("semantic")
|
2053
|
+
|
2054
|
+
if levels:
|
2055
|
+
try:
|
2056
|
+
result = validator.execute(
|
2057
|
+
code=code_content,
|
2058
|
+
validation_levels=levels,
|
2059
|
+
test_inputs={}, # Could extract from context
|
2060
|
+
)
|
2061
|
+
|
2062
|
+
if "validation_results" in result:
|
2063
|
+
validation_results.extend(result["validation_results"])
|
2064
|
+
except Exception as e:
|
2065
|
+
self.logger.warning(f"Implicit validation failed: {e}")
|
2066
|
+
|
2067
|
+
# Detect workflow outputs
|
2068
|
+
if isinstance(output, (dict, str)) and "WorkflowBuilder" in str(output):
|
2069
|
+
workflow_content = (
|
2070
|
+
str(output)
|
2071
|
+
if isinstance(output, str)
|
2072
|
+
else output.get("workflow", "")
|
2073
|
+
)
|
2074
|
+
|
2075
|
+
if workflow_content:
|
2076
|
+
validator = WorkflowValidationNode()
|
2077
|
+
try:
|
2078
|
+
result = validator.execute(
|
2079
|
+
workflow_code=workflow_content,
|
2080
|
+
validate_execution=test_requirements.get(
|
2081
|
+
"executes_without_error", False
|
2082
|
+
),
|
2083
|
+
)
|
2084
|
+
|
2085
|
+
if result.get("validated"):
|
2086
|
+
validation_results.append(
|
2087
|
+
{
|
2088
|
+
"test_name": "workflow_structure",
|
2089
|
+
"level": "semantic",
|
2090
|
+
"passed": True,
|
2091
|
+
"details": result.get("validation_details", {}),
|
2092
|
+
}
|
2093
|
+
)
|
2094
|
+
else:
|
2095
|
+
validation_results.append(
|
2096
|
+
{
|
2097
|
+
"test_name": "workflow_structure",
|
2098
|
+
"level": "semantic",
|
2099
|
+
"passed": False,
|
2100
|
+
"error": "; ".join(
|
2101
|
+
result.get("validation_details", {}).get(
|
2102
|
+
"errors", []
|
2103
|
+
)
|
2104
|
+
),
|
2105
|
+
}
|
2106
|
+
)
|
2107
|
+
except Exception as e:
|
2108
|
+
self.logger.warning(f"Workflow validation failed: {e}")
|
2109
|
+
|
2110
|
+
return validation_results
|
2111
|
+
|
2112
|
+
def _analyze_test_results(
|
2113
|
+
self, validation_results: list[dict], test_requirements: dict
|
2114
|
+
) -> dict[str, dict]:
|
2115
|
+
"""Analyze validation results against requirements."""
|
2116
|
+
test_status = {}
|
2117
|
+
|
2118
|
+
# Map validation results to requirements
|
2119
|
+
requirement_mapping = {
|
2120
|
+
"syntax_valid": ["syntax", "python_syntax"],
|
2121
|
+
"imports_resolve": ["imports", "import_validation"],
|
2122
|
+
"executes_without_error": ["semantic", "code_execution", "execution"],
|
2123
|
+
"unit_tests_pass": ["unit_tests", "test_suite"],
|
2124
|
+
"integration_tests_pass": ["integration", "integration_tests"],
|
2125
|
+
"output_schema_valid": ["schema", "output_schema"],
|
2126
|
+
}
|
2127
|
+
|
2128
|
+
for req_name, req_enabled in test_requirements.items():
|
2129
|
+
if not req_enabled:
|
2130
|
+
test_status[req_name] = {"passed": True, "skipped": True}
|
2131
|
+
continue
|
2132
|
+
|
2133
|
+
# Find matching validation results
|
2134
|
+
matching_results = []
|
2135
|
+
for result in validation_results:
|
2136
|
+
test_name = result.get("test_name", "").lower()
|
2137
|
+
level = result.get("level", "").lower()
|
2138
|
+
|
2139
|
+
for keyword in requirement_mapping.get(req_name, []):
|
2140
|
+
if keyword in test_name or keyword in level:
|
2141
|
+
matching_results.append(result)
|
2142
|
+
break
|
2143
|
+
|
2144
|
+
if matching_results:
|
2145
|
+
# Requirement passes if ALL matching tests pass
|
2146
|
+
all_passed = all(r.get("passed", False) for r in matching_results)
|
2147
|
+
first_error = next(
|
2148
|
+
(r.get("error") for r in matching_results if not r.get("passed")),
|
2149
|
+
None,
|
2150
|
+
)
|
2151
|
+
|
2152
|
+
test_status[req_name] = {
|
2153
|
+
"passed": all_passed,
|
2154
|
+
"test_count": len(matching_results),
|
2155
|
+
"error": first_error,
|
2156
|
+
"details": matching_results,
|
2157
|
+
}
|
2158
|
+
else:
|
2159
|
+
# No matching tests found
|
2160
|
+
test_status[req_name] = {
|
2161
|
+
"passed": False,
|
2162
|
+
"error": "No validation tests found for this requirement",
|
2163
|
+
"missing": True,
|
2164
|
+
}
|
2165
|
+
|
2166
|
+
return test_status
|
2167
|
+
|
2168
|
+
def _generate_fix_recommendations(
|
2169
|
+
self,
|
2170
|
+
test_status: dict,
|
2171
|
+
failed_tests: list[str],
|
2172
|
+
iteration_state: IterationState,
|
2173
|
+
) -> list[str]:
|
2174
|
+
"""Generate recommendations for fixing failed tests."""
|
2175
|
+
recommendations = []
|
2176
|
+
|
2177
|
+
for test_name in failed_tests:
|
2178
|
+
test_result = test_status.get(test_name, {})
|
2179
|
+
error = test_result.get("error", "")
|
2180
|
+
|
2181
|
+
if test_name == "syntax_valid":
|
2182
|
+
recommendations.append(
|
2183
|
+
"Fix syntax errors in generated code - check for missing colons, incorrect indentation"
|
2184
|
+
)
|
2185
|
+
if "SyntaxError" in error:
|
2186
|
+
recommendations.append(f"Syntax error details: {error}")
|
2187
|
+
|
2188
|
+
elif test_name == "imports_resolve":
|
2189
|
+
recommendations.append(
|
2190
|
+
"Ensure all imports are valid - use only standard library or explicitly available packages"
|
2191
|
+
)
|
2192
|
+
if test_result.get("details"):
|
2193
|
+
unresolved = [
|
2194
|
+
d.get("unresolved_list", []) for d in test_result["details"]
|
2195
|
+
]
|
2196
|
+
if unresolved:
|
2197
|
+
recommendations.append(f"Unresolved imports: {unresolved}")
|
2198
|
+
|
2199
|
+
elif test_name == "executes_without_error":
|
2200
|
+
recommendations.append(
|
2201
|
+
"Fix runtime errors - check variable names, function calls, and logic"
|
2202
|
+
)
|
2203
|
+
if error:
|
2204
|
+
recommendations.append(f"Execution error: {error}")
|
2205
|
+
|
2206
|
+
elif test_name == "unit_tests_pass":
|
2207
|
+
recommendations.append("Ensure code logic matches test expectations")
|
2208
|
+
|
2209
|
+
elif test_name == "output_schema_valid":
|
2210
|
+
recommendations.append("Ensure output format matches expected schema")
|
2211
|
+
|
2212
|
+
# Add general recommendations based on iteration count
|
2213
|
+
if len(iteration_state.discoveries.get("new_tools", [])) == 0:
|
2214
|
+
recommendations.append(
|
2215
|
+
"Consider discovering more tools to help with the task"
|
2216
|
+
)
|
2217
|
+
|
2218
|
+
return recommendations
|
2219
|
+
|
2220
|
+
def _check_resource_limits(
|
2221
|
+
self,
|
2222
|
+
kwargs: dict[str, Any],
|
2223
|
+
iteration_state: IterationState,
|
2224
|
+
previous_iterations: list[IterationState],
|
2225
|
+
convergence_criteria: dict[str, Any],
|
2226
|
+
) -> dict[str, Any]:
|
2227
|
+
"""Check if resource limits have been exceeded."""
|
2228
|
+
resource_limits = convergence_criteria.get(
|
2229
|
+
"resource_limits", {"max_time": 300, "max_iterations": 10}
|
2230
|
+
)
|
2231
|
+
|
2232
|
+
# Calculate total time
|
2233
|
+
total_time = sum(
|
2234
|
+
(state.end_time - state.start_time)
|
2235
|
+
for state in previous_iterations + [iteration_state]
|
2236
|
+
if state.end_time
|
2237
|
+
)
|
2238
|
+
|
2239
|
+
# Check limits
|
2240
|
+
exceeded = False
|
2241
|
+
reason = ""
|
2242
|
+
|
2243
|
+
if total_time > resource_limits.get("max_time", 300):
|
2244
|
+
exceeded = True
|
2245
|
+
reason = f"Time limit exceeded: {total_time:.1f}s > {resource_limits['max_time']}s"
|
2246
|
+
|
2247
|
+
elif len(previous_iterations) + 1 >= resource_limits.get("max_iterations", 10):
|
2248
|
+
exceeded = True
|
2249
|
+
reason = f"Iteration limit reached: {len(previous_iterations) + 1}"
|
2250
|
+
|
2251
|
+
return {"exceeded": exceeded, "reason": reason}
|