alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +156 -0
- alita_sdk/cli/agent_loader.py +245 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3113 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +91 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +388 -46
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +157 -39
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
- alita_sdk/runtime/langchain/langraph_agent.py +405 -84
- alita_sdk/runtime/langchain/utils.py +106 -7
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +31 -0
- alita_sdk/runtime/toolkits/application.py +29 -10
- alita_sdk/runtime/toolkits/artifact.py +20 -11
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +783 -0
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +356 -69
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +10 -3
- alita_sdk/runtime/tools/application.py +27 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +67 -35
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +148 -46
- alita_sdk/runtime/tools/llm.py +1003 -128
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +65 -48
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +9 -3
- alita_sdk/runtime/tools/vectorstore_base.py +70 -14
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +30 -9
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +134 -35
- alita_sdk/tools/ado/repos/__init__.py +51 -32
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -13
- alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +271 -84
- alita_sdk/tools/bitbucket/__init__.py +17 -11
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +11 -8
- alita_sdk/tools/code_indexer_toolkit.py +82 -22
- alita_sdk/tools/confluence/__init__.py +22 -16
- alita_sdk/tools/confluence/api_wrapper.py +107 -30
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +14 -15
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +16 -11
- alita_sdk/tools/gitlab/api_wrapper.py +218 -48
- alita_sdk/tools/gitlab_org/__init__.py +10 -9
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +11 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -10
- alita_sdk/tools/jira/api_wrapper.py +92 -41
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +12 -4
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -9
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +31 -11
- alita_sdk/tools/qtest/api_wrapper.py +2135 -86
- alita_sdk/tools/rally/__init__.py +10 -9
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -8
- alita_sdk/tools/salesforce/__init__.py +10 -8
- alita_sdk/tools/servicenow/__init__.py +17 -15
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -7
- alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +10 -7
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +12 -9
- alita_sdk/tools/testio/__init__.py +10 -7
- alita_sdk/tools/testrail/__init__.py +11 -10
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +103 -18
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
- alita_sdk/tools/xray/__init__.py +13 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +10 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
- alita_sdk/tools/zephyr_essential/__init__.py +10 -7
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -7
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.379.dist-info/RECORD +0 -360
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Single test case execution utilities.
|
|
3
|
+
|
|
4
|
+
Handles executing a single test case with the test runner agent.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import uuid
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Any, List, Optional
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from langchain_core.runnables import RunnableConfig
|
|
14
|
+
|
|
15
|
+
from ..callbacks import create_cli_callback
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
console = Console()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def execute_single_test_case(
|
|
22
|
+
tc_info: Dict[str, Any],
|
|
23
|
+
idx: int,
|
|
24
|
+
total_tests: int,
|
|
25
|
+
bulk_gen_chat_history: List[Dict[str, str]],
|
|
26
|
+
test_cases_path: Path,
|
|
27
|
+
executor_cache: Dict,
|
|
28
|
+
client,
|
|
29
|
+
agent_def: Dict[str, Any],
|
|
30
|
+
config,
|
|
31
|
+
model: Optional[str],
|
|
32
|
+
temperature: Optional[float],
|
|
33
|
+
max_tokens: Optional[int],
|
|
34
|
+
work_dir: str,
|
|
35
|
+
master_log,
|
|
36
|
+
setup_executor_func,
|
|
37
|
+
verbose: bool = True,
|
|
38
|
+
debug: bool = False,
|
|
39
|
+
) -> Optional[str]:
|
|
40
|
+
"""Execute a single test case.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
tc_info: Test case info dict with 'data' and 'file'
|
|
44
|
+
idx: Test case index (1-based)
|
|
45
|
+
total_tests: Total number of test cases
|
|
46
|
+
bulk_gen_chat_history: Chat history from data generation
|
|
47
|
+
test_cases_path: Path to test cases directory
|
|
48
|
+
executor_cache: Cache of executors
|
|
49
|
+
client: API client
|
|
50
|
+
agent_def: Agent definition
|
|
51
|
+
config: CLI configuration
|
|
52
|
+
model: Model override
|
|
53
|
+
temperature: Temperature override
|
|
54
|
+
max_tokens: Max tokens override
|
|
55
|
+
work_dir: Working directory
|
|
56
|
+
master_log: Log capture instance
|
|
57
|
+
setup_executor_func: Function to setup executor
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Execution output string, or None if execution failed
|
|
61
|
+
"""
|
|
62
|
+
from .parser import resolve_toolkit_config_path
|
|
63
|
+
from .prompts import build_single_test_execution_prompt
|
|
64
|
+
from .utils import extract_toolkit_name
|
|
65
|
+
from .executor import create_executor_from_cache
|
|
66
|
+
from ..agent_ui import extract_output_from_result
|
|
67
|
+
|
|
68
|
+
test_case = tc_info['data']
|
|
69
|
+
test_file = tc_info['file']
|
|
70
|
+
test_name = test_case['name']
|
|
71
|
+
|
|
72
|
+
# Resolve toolkit config path for this test case
|
|
73
|
+
toolkit_config_path = resolve_toolkit_config_path(
|
|
74
|
+
test_case.get('config_path', ''),
|
|
75
|
+
test_file,
|
|
76
|
+
test_cases_path
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Extract toolkit name
|
|
80
|
+
toolkit_name = extract_toolkit_name(test_case.get('config_path', ''))
|
|
81
|
+
|
|
82
|
+
# Use cache key (None if no config)
|
|
83
|
+
cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
|
|
84
|
+
thread_id = f"test_case_{idx}_{uuid.uuid4().hex[:8]}"
|
|
85
|
+
|
|
86
|
+
# Log test case header to master log
|
|
87
|
+
master_log.print(f"\n\n" + "=" * 80)
|
|
88
|
+
master_log.print(f"[bold cyan]Test Case {idx}/{total_tests} - {test_name}[/bold cyan]")
|
|
89
|
+
master_log.print(f"[dim]Toolkit: {toolkit_name}[/dim]")
|
|
90
|
+
master_log.print(f"[dim]Config: {toolkit_config_path or 'None'}[/dim]")
|
|
91
|
+
master_log.print("=" * 80 + "\n")
|
|
92
|
+
|
|
93
|
+
# Get or create executor from cache
|
|
94
|
+
agent_executor, memory, mcp_session_manager = create_executor_from_cache(
|
|
95
|
+
executor_cache, cache_key, client, agent_def, toolkit_config_path,
|
|
96
|
+
config, model, temperature, max_tokens, work_dir, setup_executor_func
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Build execution prompt for single test case
|
|
100
|
+
execution_prompt = build_single_test_execution_prompt(tc_info, idx)
|
|
101
|
+
master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
|
|
102
|
+
master_log.print(f"[dim]Executing test case with the prompt {execution_prompt}[/dim]")
|
|
103
|
+
|
|
104
|
+
# Execute test case
|
|
105
|
+
if not agent_executor:
|
|
106
|
+
master_log.print(f"[red]✗ No agent executor available[/red]")
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
invoke_config = None
|
|
110
|
+
if verbose:
|
|
111
|
+
cli_callback = create_cli_callback(verbose=True, debug=debug)
|
|
112
|
+
invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": thread_id})
|
|
113
|
+
|
|
114
|
+
with master_log.status(f"[yellow]Executing test case...[/yellow]", spinner="dots"):
|
|
115
|
+
exec_result = agent_executor.invoke(
|
|
116
|
+
{
|
|
117
|
+
"input": execution_prompt,
|
|
118
|
+
"chat_history": bulk_gen_chat_history, # ONLY data gen history, no accumulation
|
|
119
|
+
},
|
|
120
|
+
config=invoke_config or {"configurable": {"thread_id": thread_id}},
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
execution_output = extract_output_from_result(exec_result)
|
|
124
|
+
|
|
125
|
+
master_log.print(f"[green]✓ Test case executed[/green]")
|
|
126
|
+
master_log.print(f"[dim]{execution_output}[/dim]\n")
|
|
127
|
+
|
|
128
|
+
return execution_output
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def validate_single_test_case(
|
|
132
|
+
tc_info: Dict[str, Any],
|
|
133
|
+
idx: int,
|
|
134
|
+
execution_output: str,
|
|
135
|
+
bulk_gen_chat_history: List[Dict[str, str]],
|
|
136
|
+
validation_executor_cache: Dict,
|
|
137
|
+
cache_key: str,
|
|
138
|
+
client,
|
|
139
|
+
validator_def: Optional[Dict[str, Any]],
|
|
140
|
+
agent_def: Dict[str, Any],
|
|
141
|
+
toolkit_config_path: Optional[str],
|
|
142
|
+
config,
|
|
143
|
+
model: Optional[str],
|
|
144
|
+
temperature: Optional[float],
|
|
145
|
+
max_tokens: Optional[int],
|
|
146
|
+
work_dir: str,
|
|
147
|
+
master_log,
|
|
148
|
+
setup_executor_func,
|
|
149
|
+
verbose: bool = True,
|
|
150
|
+
debug: bool = False,
|
|
151
|
+
) -> Dict[str, Any]:
|
|
152
|
+
"""Validate a single test case execution.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
tc_info: Test case info dict
|
|
156
|
+
idx: Test case index (1-based)
|
|
157
|
+
execution_output: Output from test execution
|
|
158
|
+
bulk_gen_chat_history: Chat history including data gen and execution
|
|
159
|
+
validation_executor_cache: Cache of validation executors
|
|
160
|
+
cache_key: Cache key for executor
|
|
161
|
+
client: API client
|
|
162
|
+
validator_def: Validator agent definition (optional)
|
|
163
|
+
agent_def: Test runner agent definition (fallback)
|
|
164
|
+
toolkit_config_path: Path to toolkit config
|
|
165
|
+
config: CLI configuration
|
|
166
|
+
model: Model override
|
|
167
|
+
temperature: Temperature override
|
|
168
|
+
max_tokens: Max tokens override
|
|
169
|
+
work_dir: Working directory
|
|
170
|
+
master_log: Log capture instance
|
|
171
|
+
setup_executor_func: Function to setup executor
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Test result dict with validation results
|
|
175
|
+
"""
|
|
176
|
+
from .prompts import build_single_test_validation_prompt
|
|
177
|
+
from .validation import extract_json_from_text, print_validation_diagnostics, create_fallback_result_for_test
|
|
178
|
+
from .executor import create_executor_from_cache
|
|
179
|
+
from ..agent_ui import extract_output_from_result
|
|
180
|
+
|
|
181
|
+
test_case = tc_info['data']
|
|
182
|
+
test_file = tc_info['file']
|
|
183
|
+
test_name = test_case['name']
|
|
184
|
+
|
|
185
|
+
# Validate test case using validation executor with accumulated history
|
|
186
|
+
validation_prompt = build_single_test_validation_prompt(tc_info, idx, execution_output)
|
|
187
|
+
|
|
188
|
+
master_log.print(f"[bold yellow]🔍 Validating test case (with execution history)...[/bold yellow]")
|
|
189
|
+
master_log.print(f"[dim]{validation_prompt}[/dim]\n")
|
|
190
|
+
|
|
191
|
+
# Create or retrieve isolated validation executor
|
|
192
|
+
validation_cache_key = f"{cache_key}_validation"
|
|
193
|
+
validation_agent_def = validator_def if validator_def else agent_def
|
|
194
|
+
|
|
195
|
+
validation_executor, validation_memory, validation_mcp_session = create_executor_from_cache(
|
|
196
|
+
validation_executor_cache, validation_cache_key, client, validation_agent_def,
|
|
197
|
+
toolkit_config_path, config, model, temperature, max_tokens, work_dir, setup_executor_func
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if validation_cache_key not in validation_executor_cache:
|
|
201
|
+
master_log.print(f"[dim]Created new isolated validation executor[/dim]")
|
|
202
|
+
else:
|
|
203
|
+
master_log.print(f"[dim]Using cached validation executor[/dim]")
|
|
204
|
+
|
|
205
|
+
# For validation, use a separate thread with accumulated chat history (data gen + execution)
|
|
206
|
+
validation_thread_id = f"validation_{idx}_{uuid.uuid4().hex[:8]}"
|
|
207
|
+
|
|
208
|
+
if not validation_executor:
|
|
209
|
+
master_log.print(f"[red]✗ No validation executor available[/red]")
|
|
210
|
+
return create_fallback_result_for_test(test_case, test_file, 'No validation executor')
|
|
211
|
+
|
|
212
|
+
invoke_config = None
|
|
213
|
+
if verbose:
|
|
214
|
+
cli_callback = create_cli_callback(verbose=True, debug=debug)
|
|
215
|
+
invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": validation_thread_id})
|
|
216
|
+
|
|
217
|
+
master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
|
|
218
|
+
with master_log.status(f"[yellow]Validating test case...[/yellow]", spinner="dots"):
|
|
219
|
+
validation_result = validation_executor.invoke(
|
|
220
|
+
{
|
|
221
|
+
"input": validation_prompt,
|
|
222
|
+
"chat_history": bulk_gen_chat_history, # Includes data gen and execution history
|
|
223
|
+
},
|
|
224
|
+
config=invoke_config or {"configurable": {"thread_id": validation_thread_id}},
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
validation_output = extract_output_from_result(validation_result)
|
|
228
|
+
|
|
229
|
+
# Parse validation JSON
|
|
230
|
+
try:
|
|
231
|
+
validation_json = extract_json_from_text(validation_output)
|
|
232
|
+
step_results = validation_json.get('steps', [])
|
|
233
|
+
|
|
234
|
+
# Determine if test passed (all steps must pass)
|
|
235
|
+
test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
|
|
236
|
+
|
|
237
|
+
if test_passed:
|
|
238
|
+
master_log.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
|
|
239
|
+
else:
|
|
240
|
+
master_log.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
|
|
241
|
+
|
|
242
|
+
# Display individual step results
|
|
243
|
+
for step_result in step_results:
|
|
244
|
+
step_num = step_result.get('step_number')
|
|
245
|
+
step_title = step_result.get('title', '')
|
|
246
|
+
passed = step_result.get('passed', False)
|
|
247
|
+
details = step_result.get('details', '')
|
|
248
|
+
|
|
249
|
+
if passed:
|
|
250
|
+
master_log.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
|
|
251
|
+
master_log.print(f" [dim]{details}[/dim]")
|
|
252
|
+
else:
|
|
253
|
+
master_log.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
|
|
254
|
+
master_log.print(f" [dim]{details}[/dim]")
|
|
255
|
+
|
|
256
|
+
master_log.print()
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
'title': test_name,
|
|
260
|
+
'passed': test_passed,
|
|
261
|
+
'file': test_file.name,
|
|
262
|
+
'step_results': step_results
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.debug(f"Validation parsing failed for {test_name}: {e}", exc_info=True)
|
|
267
|
+
master_log.print(f"[yellow]⚠ Warning: Could not parse validation results for {test_name}[/yellow]")
|
|
268
|
+
master_log.print(f"[yellow]Error: {str(e)}[/yellow]")
|
|
269
|
+
|
|
270
|
+
# Enhanced diagnostic output
|
|
271
|
+
print_validation_diagnostics(validation_output)
|
|
272
|
+
|
|
273
|
+
# Generate fallback result
|
|
274
|
+
master_log.print(f"\n[yellow]🔄 Generating fallback validation result...[/yellow]")
|
|
275
|
+
fallback_result = create_fallback_result_for_test(
|
|
276
|
+
test_case,
|
|
277
|
+
test_file,
|
|
278
|
+
f'Validation failed - could not parse validator output: {str(e)}'
|
|
279
|
+
)
|
|
280
|
+
master_log.print(f"[dim]Created {len(fallback_result['step_results'])} fallback step results[/dim]\n")
|
|
281
|
+
|
|
282
|
+
return fallback_result
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
General utility functions for test execution.
|
|
3
|
+
|
|
4
|
+
Includes toolkit name extraction and other helper functions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def extract_toolkit_name(config_path: Optional[str]) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Extract toolkit name from config path.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
config_path: Path to toolkit config (e.g., '.alita/tool_configs/github-config.json')
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Toolkit name (e.g., 'github') or 'unknown' if path is None/empty
|
|
20
|
+
"""
|
|
21
|
+
if not config_path:
|
|
22
|
+
return 'unknown'
|
|
23
|
+
|
|
24
|
+
# Convert to Path
|
|
25
|
+
path = Path(config_path)
|
|
26
|
+
|
|
27
|
+
# First, try to extract from filename by removing common config suffixes
|
|
28
|
+
# For paths like '.alita/tool_configs/confluence-config.json' -> 'confluence'
|
|
29
|
+
stem = path.stem.replace('_config', '').replace('-config', '')
|
|
30
|
+
if stem and stem.lower() != 'config':
|
|
31
|
+
return stem
|
|
32
|
+
|
|
33
|
+
# Fallback: use parent directory name if it's not a common directory
|
|
34
|
+
# For paths like 'toolkits/github/config.yaml' -> 'github'
|
|
35
|
+
if path.parent.name and path.parent.name not in ['.', 'toolkits', 'tool_configs', 'configs']:
|
|
36
|
+
return path.parent.name
|
|
37
|
+
|
|
38
|
+
# Last resort
|
|
39
|
+
return 'unknown'
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validation utilities for test execution.
|
|
3
|
+
|
|
4
|
+
Handles JSON extraction, fallback results, and diagnostics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Any
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def extract_json_from_text(text: str) -> dict:
|
|
18
|
+
"""Extract JSON object from text using brace counting."""
|
|
19
|
+
start_idx = text.find('{')
|
|
20
|
+
if start_idx == -1:
|
|
21
|
+
raise ValueError("No JSON found in text")
|
|
22
|
+
|
|
23
|
+
brace_count = 0
|
|
24
|
+
end_idx = -1
|
|
25
|
+
for i, char in enumerate(text[start_idx:], start=start_idx):
|
|
26
|
+
if char == '{':
|
|
27
|
+
brace_count += 1
|
|
28
|
+
elif char == '}':
|
|
29
|
+
brace_count -= 1
|
|
30
|
+
if brace_count == 0:
|
|
31
|
+
end_idx = i + 1
|
|
32
|
+
break
|
|
33
|
+
|
|
34
|
+
if end_idx == -1:
|
|
35
|
+
raise ValueError("Could not find matching closing brace")
|
|
36
|
+
|
|
37
|
+
return json.loads(text[start_idx:end_idx])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def create_fallback_result_for_test(test_case: Dict[str, Any], test_file: Path, reason: str = 'Validation failed') -> Dict[str, Any]:
|
|
41
|
+
"""Create a fallback result for a single test case with detailed step information.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
test_case: Parsed test case data
|
|
45
|
+
test_file: Path to test case file
|
|
46
|
+
reason: Reason for fallback
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Fallback test result dict with step details
|
|
50
|
+
"""
|
|
51
|
+
fallback_steps = []
|
|
52
|
+
for step_info in test_case.get('steps', []):
|
|
53
|
+
fallback_steps.append({
|
|
54
|
+
'step_number': step_info['number'],
|
|
55
|
+
'title': step_info['title'],
|
|
56
|
+
'passed': False,
|
|
57
|
+
'details': reason
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
'title': test_case['name'],
|
|
62
|
+
'passed': False,
|
|
63
|
+
'file': test_file.name,
|
|
64
|
+
'step_results': fallback_steps,
|
|
65
|
+
'validation_error': reason
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def print_validation_diagnostics(validation_output: str) -> None:
|
|
70
|
+
"""Print diagnostic information for validation output.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
validation_output: The validation output to diagnose
|
|
74
|
+
"""
|
|
75
|
+
console.print(f"\n[bold red]🔍 Diagnostic Information:[/bold red]")
|
|
76
|
+
console.print(f"[dim]Output length: {len(validation_output)} characters[/dim]")
|
|
77
|
+
|
|
78
|
+
# Check for key JSON elements
|
|
79
|
+
has_json = '{' in validation_output and '}' in validation_output
|
|
80
|
+
has_fields = 'test_number' in validation_output and 'steps' in validation_output
|
|
81
|
+
|
|
82
|
+
console.print(f"[dim]Has JSON structure: {has_json}[/dim]")
|
|
83
|
+
console.print(f"[dim]Has required fields: {has_fields}[/dim]")
|
|
84
|
+
|
|
85
|
+
# Show relevant excerpt
|
|
86
|
+
if len(validation_output) > 400:
|
|
87
|
+
console.print(f"\n[red]First 200 chars:[/red] [dim]{validation_output[:200]}[/dim]")
|
|
88
|
+
console.print(f"[red]Last 200 chars:[/red] [dim]{validation_output[-200:]}[/dim]")
|
|
89
|
+
else:
|
|
90
|
+
console.print(f"\n[red]Full output:[/red] [dim]{validation_output}[/dim]")
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main workflow orchestration for test case execution.
|
|
3
|
+
|
|
4
|
+
Coordinates the entire test execution flow from parsing to reporting.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import uuid
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_all_test_cases(
|
|
16
|
+
test_case_files_list: List[Path],
|
|
17
|
+
master_log
|
|
18
|
+
) -> List[Dict[str, Any]]:
|
|
19
|
+
"""Parse all test case files.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
test_case_files_list: List of test case files to parse
|
|
23
|
+
master_log: Log capture instance
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
List of parsed test case dicts with 'file' and 'data' keys
|
|
27
|
+
"""
|
|
28
|
+
from .parser import parse_test_case
|
|
29
|
+
|
|
30
|
+
parsed_test_cases = []
|
|
31
|
+
for test_file in test_case_files_list:
|
|
32
|
+
try:
|
|
33
|
+
test_case = parse_test_case(str(test_file))
|
|
34
|
+
parsed_test_cases.append({
|
|
35
|
+
'file': test_file,
|
|
36
|
+
'data': test_case
|
|
37
|
+
})
|
|
38
|
+
except Exception as e:
|
|
39
|
+
master_log.print(f"[yellow]⚠ Warning: Failed to parse {test_file.name}: {e}[/yellow]")
|
|
40
|
+
logger.debug(f"Parse error for {test_file.name}: {e}", exc_info=True)
|
|
41
|
+
|
|
42
|
+
return parsed_test_cases
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def filter_test_cases_needing_data_gen(
|
|
46
|
+
parsed_test_cases: List[Dict[str, Any]]
|
|
47
|
+
) -> List[Dict[str, Any]]:
|
|
48
|
+
"""Filter test cases that need data generation.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
parsed_test_cases: All parsed test cases
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Filtered list of test cases that require data generation
|
|
55
|
+
"""
|
|
56
|
+
return [
|
|
57
|
+
tc for tc in parsed_test_cases
|
|
58
|
+
if tc['data'].get('generate_test_data', True)
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def execute_all_test_cases(
|
|
63
|
+
parsed_test_cases: List[Dict[str, Any]],
|
|
64
|
+
bulk_gen_chat_history: List[Dict[str, str]],
|
|
65
|
+
test_cases_path: Path,
|
|
66
|
+
agent_def: Dict[str, Any],
|
|
67
|
+
validator_def: Optional[Dict[str, Any]],
|
|
68
|
+
client,
|
|
69
|
+
config,
|
|
70
|
+
model: Optional[str],
|
|
71
|
+
temperature: Optional[float],
|
|
72
|
+
max_tokens: Optional[int],
|
|
73
|
+
work_dir: str,
|
|
74
|
+
master_log,
|
|
75
|
+
setup_executor_func,
|
|
76
|
+
verbose: bool = True,
|
|
77
|
+
debug: bool = False,
|
|
78
|
+
) -> List[Dict[str, Any]]:
|
|
79
|
+
"""Execute all test cases and return results.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
parsed_test_cases: List of parsed test cases
|
|
83
|
+
bulk_gen_chat_history: Chat history from data generation
|
|
84
|
+
test_cases_path: Path to test cases directory
|
|
85
|
+
agent_def: Test runner agent definition
|
|
86
|
+
validator_def: Validator agent definition (optional)
|
|
87
|
+
client: API client
|
|
88
|
+
config: CLI configuration
|
|
89
|
+
model: Model override
|
|
90
|
+
temperature: Temperature override
|
|
91
|
+
max_tokens: Max tokens override
|
|
92
|
+
work_dir: Working directory
|
|
93
|
+
master_log: Log capture instance
|
|
94
|
+
setup_executor_func: Function to setup executor
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of test result dicts
|
|
98
|
+
"""
|
|
99
|
+
from .parser import resolve_toolkit_config_path
|
|
100
|
+
from .utils import extract_toolkit_name
|
|
101
|
+
from .executor import cleanup_executor_cache
|
|
102
|
+
from .test_runner import execute_single_test_case, validate_single_test_case
|
|
103
|
+
from .validation import create_fallback_result_for_test
|
|
104
|
+
|
|
105
|
+
if not parsed_test_cases:
|
|
106
|
+
master_log.print("[yellow]No test cases to execute[/yellow]")
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
master_log.print(f"\n[bold yellow]📋 Executing test cases sequentially...[/bold yellow]\n")
|
|
110
|
+
|
|
111
|
+
# Show data generation context availability
|
|
112
|
+
if bulk_gen_chat_history:
|
|
113
|
+
master_log.print(f"[dim]✓ Data generation history available ({len(bulk_gen_chat_history)} messages) - shared with all test cases[/dim]\n")
|
|
114
|
+
else:
|
|
115
|
+
master_log.print(f"[dim]ℹ No data generation history (skipped or disabled)[/dim]\n")
|
|
116
|
+
|
|
117
|
+
# Executor caches
|
|
118
|
+
executor_cache = {}
|
|
119
|
+
validation_executor_cache = {}
|
|
120
|
+
|
|
121
|
+
# Execute each test case sequentially
|
|
122
|
+
test_results = []
|
|
123
|
+
total_tests = len(parsed_test_cases)
|
|
124
|
+
|
|
125
|
+
for idx, tc_info in enumerate(parsed_test_cases, 1):
|
|
126
|
+
test_case = tc_info['data']
|
|
127
|
+
test_file = tc_info['file']
|
|
128
|
+
test_name = test_case['name']
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
# Resolve toolkit config path
|
|
132
|
+
toolkit_config_path = resolve_toolkit_config_path(
|
|
133
|
+
test_case.get('config_path', ''),
|
|
134
|
+
test_file,
|
|
135
|
+
test_cases_path
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Use cache key
|
|
139
|
+
cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
|
|
140
|
+
|
|
141
|
+
# Execute single test case
|
|
142
|
+
execution_output = execute_single_test_case(
|
|
143
|
+
tc_info, idx, total_tests, bulk_gen_chat_history, test_cases_path,
|
|
144
|
+
executor_cache, client, agent_def, config, model, temperature,
|
|
145
|
+
max_tokens, work_dir, master_log, setup_executor_func,
|
|
146
|
+
verbose=verbose,
|
|
147
|
+
debug=debug,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if not execution_output:
|
|
151
|
+
# Create fallback result for failed execution
|
|
152
|
+
test_results.append({
|
|
153
|
+
'title': test_name,
|
|
154
|
+
'passed': False,
|
|
155
|
+
'file': test_file.name,
|
|
156
|
+
'step_results': []
|
|
157
|
+
})
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
# Append execution to history for validation
|
|
161
|
+
from .prompts import build_single_test_execution_prompt
|
|
162
|
+
validation_chat_history = bulk_gen_chat_history + [
|
|
163
|
+
{"role": "user", "content": build_single_test_execution_prompt(tc_info, idx)},
|
|
164
|
+
{"role": "assistant", "content": execution_output}
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
# Validate test case
|
|
168
|
+
test_result = validate_single_test_case(
|
|
169
|
+
tc_info, idx, execution_output, validation_chat_history,
|
|
170
|
+
validation_executor_cache, cache_key, client, validator_def,
|
|
171
|
+
agent_def, toolkit_config_path, config, model, temperature,
|
|
172
|
+
max_tokens, work_dir, master_log, setup_executor_func,
|
|
173
|
+
verbose=verbose,
|
|
174
|
+
debug=debug,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
test_results.append(test_result)
|
|
178
|
+
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.debug(f"Test execution failed for {test_name}: {e}", exc_info=True)
|
|
181
|
+
master_log.print(f"[red]✗ Test execution failed: {e}[/red]")
|
|
182
|
+
|
|
183
|
+
# Create fallback result
|
|
184
|
+
fallback_result = create_fallback_result_for_test(
|
|
185
|
+
test_case,
|
|
186
|
+
test_file,
|
|
187
|
+
f'Test execution failed: {str(e)}'
|
|
188
|
+
)
|
|
189
|
+
test_results.append(fallback_result)
|
|
190
|
+
master_log.print()
|
|
191
|
+
|
|
192
|
+
# Cleanup executor caches
|
|
193
|
+
cleanup_executor_cache(executor_cache, "executor")
|
|
194
|
+
cleanup_executor_cache(validation_executor_cache, "validation executor")
|
|
195
|
+
|
|
196
|
+
return test_results
|