alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +156 -0
  6. alita_sdk/cli/agent_loader.py +245 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3113 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/testcases/__init__.py +94 -0
  23. alita_sdk/cli/testcases/data_generation.py +119 -0
  24. alita_sdk/cli/testcases/discovery.py +96 -0
  25. alita_sdk/cli/testcases/executor.py +84 -0
  26. alita_sdk/cli/testcases/logger.py +85 -0
  27. alita_sdk/cli/testcases/parser.py +172 -0
  28. alita_sdk/cli/testcases/prompts.py +91 -0
  29. alita_sdk/cli/testcases/reporting.py +125 -0
  30. alita_sdk/cli/testcases/setup.py +108 -0
  31. alita_sdk/cli/testcases/test_runner.py +282 -0
  32. alita_sdk/cli/testcases/utils.py +39 -0
  33. alita_sdk/cli/testcases/validation.py +90 -0
  34. alita_sdk/cli/testcases/workflow.py +196 -0
  35. alita_sdk/cli/toolkit.py +327 -0
  36. alita_sdk/cli/toolkit_loader.py +85 -0
  37. alita_sdk/cli/tools/__init__.py +43 -0
  38. alita_sdk/cli/tools/approval.py +224 -0
  39. alita_sdk/cli/tools/filesystem.py +1751 -0
  40. alita_sdk/cli/tools/planning.py +389 -0
  41. alita_sdk/cli/tools/terminal.py +414 -0
  42. alita_sdk/community/__init__.py +72 -12
  43. alita_sdk/community/inventory/__init__.py +236 -0
  44. alita_sdk/community/inventory/config.py +257 -0
  45. alita_sdk/community/inventory/enrichment.py +2137 -0
  46. alita_sdk/community/inventory/extractors.py +1469 -0
  47. alita_sdk/community/inventory/ingestion.py +3172 -0
  48. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  49. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  50. alita_sdk/community/inventory/parsers/base.py +295 -0
  51. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  52. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  53. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  54. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  55. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  56. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  57. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  58. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  59. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  60. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  61. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  62. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  63. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  64. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  65. alita_sdk/community/inventory/patterns/loader.py +348 -0
  66. alita_sdk/community/inventory/patterns/registry.py +198 -0
  67. alita_sdk/community/inventory/presets.py +535 -0
  68. alita_sdk/community/inventory/retrieval.py +1403 -0
  69. alita_sdk/community/inventory/toolkit.py +173 -0
  70. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  71. alita_sdk/community/inventory/visualize.py +1370 -0
  72. alita_sdk/configurations/__init__.py +1 -1
  73. alita_sdk/configurations/ado.py +141 -20
  74. alita_sdk/configurations/bitbucket.py +94 -2
  75. alita_sdk/configurations/confluence.py +130 -1
  76. alita_sdk/configurations/figma.py +76 -0
  77. alita_sdk/configurations/gitlab.py +91 -0
  78. alita_sdk/configurations/jira.py +103 -0
  79. alita_sdk/configurations/openapi.py +329 -0
  80. alita_sdk/configurations/qtest.py +72 -1
  81. alita_sdk/configurations/report_portal.py +96 -0
  82. alita_sdk/configurations/sharepoint.py +148 -0
  83. alita_sdk/configurations/testio.py +83 -0
  84. alita_sdk/configurations/testrail.py +88 -0
  85. alita_sdk/configurations/xray.py +93 -0
  86. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  87. alita_sdk/configurations/zephyr_essential.py +75 -0
  88. alita_sdk/runtime/clients/artifact.py +3 -3
  89. alita_sdk/runtime/clients/client.py +388 -46
  90. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  91. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  92. alita_sdk/runtime/clients/sandbox_client.py +8 -21
  93. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  94. alita_sdk/runtime/langchain/assistant.py +157 -39
  95. alita_sdk/runtime/langchain/constants.py +647 -1
  96. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  97. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
  100. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  102. alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
  103. alita_sdk/runtime/langchain/langraph_agent.py +405 -84
  104. alita_sdk/runtime/langchain/utils.py +106 -7
  105. alita_sdk/runtime/llms/preloaded.py +2 -6
  106. alita_sdk/runtime/models/mcp_models.py +61 -0
  107. alita_sdk/runtime/skills/__init__.py +91 -0
  108. alita_sdk/runtime/skills/callbacks.py +498 -0
  109. alita_sdk/runtime/skills/discovery.py +540 -0
  110. alita_sdk/runtime/skills/executor.py +610 -0
  111. alita_sdk/runtime/skills/input_builder.py +371 -0
  112. alita_sdk/runtime/skills/models.py +330 -0
  113. alita_sdk/runtime/skills/registry.py +355 -0
  114. alita_sdk/runtime/skills/skill_runner.py +330 -0
  115. alita_sdk/runtime/toolkits/__init__.py +31 -0
  116. alita_sdk/runtime/toolkits/application.py +29 -10
  117. alita_sdk/runtime/toolkits/artifact.py +20 -11
  118. alita_sdk/runtime/toolkits/datasource.py +13 -6
  119. alita_sdk/runtime/toolkits/mcp.py +783 -0
  120. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  121. alita_sdk/runtime/toolkits/planning.py +178 -0
  122. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  123. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  124. alita_sdk/runtime/toolkits/tools.py +356 -69
  125. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  126. alita_sdk/runtime/tools/__init__.py +10 -3
  127. alita_sdk/runtime/tools/application.py +27 -6
  128. alita_sdk/runtime/tools/artifact.py +511 -28
  129. alita_sdk/runtime/tools/data_analysis.py +183 -0
  130. alita_sdk/runtime/tools/function.py +67 -35
  131. alita_sdk/runtime/tools/graph.py +10 -4
  132. alita_sdk/runtime/tools/image_generation.py +148 -46
  133. alita_sdk/runtime/tools/llm.py +1003 -128
  134. alita_sdk/runtime/tools/loop.py +3 -1
  135. alita_sdk/runtime/tools/loop_output.py +3 -1
  136. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  137. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  138. alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
  139. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  140. alita_sdk/runtime/tools/planning/models.py +246 -0
  141. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  142. alita_sdk/runtime/tools/router.py +2 -4
  143. alita_sdk/runtime/tools/sandbox.py +65 -48
  144. alita_sdk/runtime/tools/skill_router.py +776 -0
  145. alita_sdk/runtime/tools/tool.py +3 -1
  146. alita_sdk/runtime/tools/vectorstore.py +9 -3
  147. alita_sdk/runtime/tools/vectorstore_base.py +70 -14
  148. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  149. alita_sdk/runtime/utils/constants.py +5 -1
  150. alita_sdk/runtime/utils/mcp_client.py +492 -0
  151. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  152. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  153. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  154. alita_sdk/runtime/utils/serialization.py +155 -0
  155. alita_sdk/runtime/utils/streamlit.py +40 -13
  156. alita_sdk/runtime/utils/toolkit_utils.py +30 -9
  157. alita_sdk/runtime/utils/utils.py +36 -0
  158. alita_sdk/tools/__init__.py +134 -35
  159. alita_sdk/tools/ado/repos/__init__.py +51 -32
  160. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  161. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  162. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  163. alita_sdk/tools/ado/utils.py +1 -18
  164. alita_sdk/tools/ado/wiki/__init__.py +25 -12
  165. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  166. alita_sdk/tools/ado/work_item/__init__.py +26 -13
  167. alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
  168. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  169. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  170. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  171. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  172. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  173. alita_sdk/tools/base/tool.py +5 -1
  174. alita_sdk/tools/base_indexer_toolkit.py +271 -84
  175. alita_sdk/tools/bitbucket/__init__.py +17 -11
  176. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  177. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  178. alita_sdk/tools/browser/__init__.py +5 -4
  179. alita_sdk/tools/carrier/__init__.py +5 -6
  180. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  181. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  182. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  183. alita_sdk/tools/chunkers/__init__.py +3 -1
  184. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  185. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  186. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  187. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  188. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  189. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  190. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  191. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  192. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  193. alita_sdk/tools/code/linter/__init__.py +10 -8
  194. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  195. alita_sdk/tools/code/sonar/__init__.py +11 -8
  196. alita_sdk/tools/code_indexer_toolkit.py +82 -22
  197. alita_sdk/tools/confluence/__init__.py +22 -16
  198. alita_sdk/tools/confluence/api_wrapper.py +107 -30
  199. alita_sdk/tools/confluence/loader.py +14 -2
  200. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  201. alita_sdk/tools/elastic/__init__.py +11 -8
  202. alita_sdk/tools/elitea_base.py +493 -30
  203. alita_sdk/tools/figma/__init__.py +58 -11
  204. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  205. alita_sdk/tools/figma/figma_client.py +73 -0
  206. alita_sdk/tools/figma/toon_tools.py +2748 -0
  207. alita_sdk/tools/github/__init__.py +14 -15
  208. alita_sdk/tools/github/github_client.py +224 -100
  209. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  210. alita_sdk/tools/github/schemas.py +14 -5
  211. alita_sdk/tools/github/tool.py +5 -1
  212. alita_sdk/tools/github/tool_prompts.py +9 -22
  213. alita_sdk/tools/gitlab/__init__.py +16 -11
  214. alita_sdk/tools/gitlab/api_wrapper.py +218 -48
  215. alita_sdk/tools/gitlab_org/__init__.py +10 -9
  216. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  217. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  218. alita_sdk/tools/google/bigquery/tool.py +5 -1
  219. alita_sdk/tools/google_places/__init__.py +11 -8
  220. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  221. alita_sdk/tools/jira/__init__.py +17 -10
  222. alita_sdk/tools/jira/api_wrapper.py +92 -41
  223. alita_sdk/tools/keycloak/__init__.py +11 -8
  224. alita_sdk/tools/localgit/__init__.py +9 -3
  225. alita_sdk/tools/localgit/local_git.py +62 -54
  226. alita_sdk/tools/localgit/tool.py +5 -1
  227. alita_sdk/tools/memory/__init__.py +12 -4
  228. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  229. alita_sdk/tools/ocr/__init__.py +11 -8
  230. alita_sdk/tools/openapi/__init__.py +491 -106
  231. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  232. alita_sdk/tools/openapi/tool.py +20 -0
  233. alita_sdk/tools/pandas/__init__.py +20 -12
  234. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  235. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  236. alita_sdk/tools/postman/__init__.py +10 -9
  237. alita_sdk/tools/pptx/__init__.py +11 -10
  238. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  239. alita_sdk/tools/qtest/__init__.py +31 -11
  240. alita_sdk/tools/qtest/api_wrapper.py +2135 -86
  241. alita_sdk/tools/rally/__init__.py +10 -9
  242. alita_sdk/tools/rally/api_wrapper.py +1 -1
  243. alita_sdk/tools/report_portal/__init__.py +12 -8
  244. alita_sdk/tools/salesforce/__init__.py +10 -8
  245. alita_sdk/tools/servicenow/__init__.py +17 -15
  246. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  247. alita_sdk/tools/sharepoint/__init__.py +10 -7
  248. alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
  249. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  250. alita_sdk/tools/sharepoint/utils.py +8 -2
  251. alita_sdk/tools/slack/__init__.py +10 -7
  252. alita_sdk/tools/slack/api_wrapper.py +2 -2
  253. alita_sdk/tools/sql/__init__.py +12 -9
  254. alita_sdk/tools/testio/__init__.py +10 -7
  255. alita_sdk/tools/testrail/__init__.py +11 -10
  256. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  257. alita_sdk/tools/utils/__init__.py +9 -4
  258. alita_sdk/tools/utils/content_parser.py +103 -18
  259. alita_sdk/tools/utils/text_operations.py +410 -0
  260. alita_sdk/tools/utils/tool_prompts.py +79 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
  262. alita_sdk/tools/xray/__init__.py +13 -9
  263. alita_sdk/tools/yagmail/__init__.py +9 -3
  264. alita_sdk/tools/zephyr/__init__.py +10 -7
  265. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
  266. alita_sdk/tools/zephyr_essential/__init__.py +10 -7
  267. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  268. alita_sdk/tools/zephyr_essential/client.py +2 -2
  269. alita_sdk/tools/zephyr_scale/__init__.py +11 -8
  270. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  271. alita_sdk/tools/zephyr_squad/__init__.py +10 -7
  272. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
  273. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  274. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  275. alita_sdk-0.3.379.dist-info/RECORD +0 -360
  276. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,282 @@
1
+ """
2
+ Single test case execution utilities.
3
+
4
+ Handles executing a single test case with the test runner agent.
5
+ """
6
+
7
+ import logging
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import Dict, Any, List, Optional
11
+ from rich.console import Console
12
+
13
+ from langchain_core.runnables import RunnableConfig
14
+
15
+ from ..callbacks import create_cli_callback
16
+
17
+ logger = logging.getLogger(__name__)
18
+ console = Console()
19
+
20
+
21
+ def execute_single_test_case(
22
+ tc_info: Dict[str, Any],
23
+ idx: int,
24
+ total_tests: int,
25
+ bulk_gen_chat_history: List[Dict[str, str]],
26
+ test_cases_path: Path,
27
+ executor_cache: Dict,
28
+ client,
29
+ agent_def: Dict[str, Any],
30
+ config,
31
+ model: Optional[str],
32
+ temperature: Optional[float],
33
+ max_tokens: Optional[int],
34
+ work_dir: str,
35
+ master_log,
36
+ setup_executor_func,
37
+ verbose: bool = True,
38
+ debug: bool = False,
39
+ ) -> Optional[str]:
40
+ """Execute a single test case.
41
+
42
+ Args:
43
+ tc_info: Test case info dict with 'data' and 'file'
44
+ idx: Test case index (1-based)
45
+ total_tests: Total number of test cases
46
+ bulk_gen_chat_history: Chat history from data generation
47
+ test_cases_path: Path to test cases directory
48
+ executor_cache: Cache of executors
49
+ client: API client
50
+ agent_def: Agent definition
51
+ config: CLI configuration
52
+ model: Model override
53
+ temperature: Temperature override
54
+ max_tokens: Max tokens override
55
+ work_dir: Working directory
56
+ master_log: Log capture instance
57
+ setup_executor_func: Function to setup executor
58
+
59
+ Returns:
60
+ Execution output string, or None if execution failed
61
+ """
62
+ from .parser import resolve_toolkit_config_path
63
+ from .prompts import build_single_test_execution_prompt
64
+ from .utils import extract_toolkit_name
65
+ from .executor import create_executor_from_cache
66
+ from ..agent_ui import extract_output_from_result
67
+
68
+ test_case = tc_info['data']
69
+ test_file = tc_info['file']
70
+ test_name = test_case['name']
71
+
72
+ # Resolve toolkit config path for this test case
73
+ toolkit_config_path = resolve_toolkit_config_path(
74
+ test_case.get('config_path', ''),
75
+ test_file,
76
+ test_cases_path
77
+ )
78
+
79
+ # Extract toolkit name
80
+ toolkit_name = extract_toolkit_name(test_case.get('config_path', ''))
81
+
82
+ # Use cache key (None if no config)
83
+ cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
84
+ thread_id = f"test_case_{idx}_{uuid.uuid4().hex[:8]}"
85
+
86
+ # Log test case header to master log
87
+ master_log.print(f"\n\n" + "=" * 80)
88
+ master_log.print(f"[bold cyan]Test Case {idx}/{total_tests} - {test_name}[/bold cyan]")
89
+ master_log.print(f"[dim]Toolkit: {toolkit_name}[/dim]")
90
+ master_log.print(f"[dim]Config: {toolkit_config_path or 'None'}[/dim]")
91
+ master_log.print("=" * 80 + "\n")
92
+
93
+ # Get or create executor from cache
94
+ agent_executor, memory, mcp_session_manager = create_executor_from_cache(
95
+ executor_cache, cache_key, client, agent_def, toolkit_config_path,
96
+ config, model, temperature, max_tokens, work_dir, setup_executor_func
97
+ )
98
+
99
+ # Build execution prompt for single test case
100
+ execution_prompt = build_single_test_execution_prompt(tc_info, idx)
101
+ master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
102
+ master_log.print(f"[dim]Executing test case with the prompt {execution_prompt}[/dim]")
103
+
104
+ # Execute test case
105
+ if not agent_executor:
106
+ master_log.print(f"[red]✗ No agent executor available[/red]")
107
+ return None
108
+
109
+ invoke_config = None
110
+ if verbose:
111
+ cli_callback = create_cli_callback(verbose=True, debug=debug)
112
+ invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": thread_id})
113
+
114
+ with master_log.status(f"[yellow]Executing test case...[/yellow]", spinner="dots"):
115
+ exec_result = agent_executor.invoke(
116
+ {
117
+ "input": execution_prompt,
118
+ "chat_history": bulk_gen_chat_history, # ONLY data gen history, no accumulation
119
+ },
120
+ config=invoke_config or {"configurable": {"thread_id": thread_id}},
121
+ )
122
+
123
+ execution_output = extract_output_from_result(exec_result)
124
+
125
+ master_log.print(f"[green]✓ Test case executed[/green]")
126
+ master_log.print(f"[dim]{execution_output}[/dim]\n")
127
+
128
+ return execution_output
129
+
130
+
131
+ def validate_single_test_case(
132
+ tc_info: Dict[str, Any],
133
+ idx: int,
134
+ execution_output: str,
135
+ bulk_gen_chat_history: List[Dict[str, str]],
136
+ validation_executor_cache: Dict,
137
+ cache_key: str,
138
+ client,
139
+ validator_def: Optional[Dict[str, Any]],
140
+ agent_def: Dict[str, Any],
141
+ toolkit_config_path: Optional[str],
142
+ config,
143
+ model: Optional[str],
144
+ temperature: Optional[float],
145
+ max_tokens: Optional[int],
146
+ work_dir: str,
147
+ master_log,
148
+ setup_executor_func,
149
+ verbose: bool = True,
150
+ debug: bool = False,
151
+ ) -> Dict[str, Any]:
152
+ """Validate a single test case execution.
153
+
154
+ Args:
155
+ tc_info: Test case info dict
156
+ idx: Test case index (1-based)
157
+ execution_output: Output from test execution
158
+ bulk_gen_chat_history: Chat history including data gen and execution
159
+ validation_executor_cache: Cache of validation executors
160
+ cache_key: Cache key for executor
161
+ client: API client
162
+ validator_def: Validator agent definition (optional)
163
+ agent_def: Test runner agent definition (fallback)
164
+ toolkit_config_path: Path to toolkit config
165
+ config: CLI configuration
166
+ model: Model override
167
+ temperature: Temperature override
168
+ max_tokens: Max tokens override
169
+ work_dir: Working directory
170
+ master_log: Log capture instance
171
+ setup_executor_func: Function to setup executor
172
+
173
+ Returns:
174
+ Test result dict with validation results
175
+ """
176
+ from .prompts import build_single_test_validation_prompt
177
+ from .validation import extract_json_from_text, print_validation_diagnostics, create_fallback_result_for_test
178
+ from .executor import create_executor_from_cache
179
+ from ..agent_ui import extract_output_from_result
180
+
181
+ test_case = tc_info['data']
182
+ test_file = tc_info['file']
183
+ test_name = test_case['name']
184
+
185
+ # Validate test case using validation executor with accumulated history
186
+ validation_prompt = build_single_test_validation_prompt(tc_info, idx, execution_output)
187
+
188
+ master_log.print(f"[bold yellow]🔍 Validating test case (with execution history)...[/bold yellow]")
189
+ master_log.print(f"[dim]{validation_prompt}[/dim]\n")
190
+
191
+ # Create or retrieve isolated validation executor
192
+ validation_cache_key = f"{cache_key}_validation"
193
+ validation_agent_def = validator_def if validator_def else agent_def
194
+
195
+ validation_executor, validation_memory, validation_mcp_session = create_executor_from_cache(
196
+ validation_executor_cache, validation_cache_key, client, validation_agent_def,
197
+ toolkit_config_path, config, model, temperature, max_tokens, work_dir, setup_executor_func
198
+ )
199
+
200
+ if validation_cache_key not in validation_executor_cache:
201
+ master_log.print(f"[dim]Created new isolated validation executor[/dim]")
202
+ else:
203
+ master_log.print(f"[dim]Using cached validation executor[/dim]")
204
+
205
+ # For validation, use a separate thread with accumulated chat history (data gen + execution)
206
+ validation_thread_id = f"validation_{idx}_{uuid.uuid4().hex[:8]}"
207
+
208
+ if not validation_executor:
209
+ master_log.print(f"[red]✗ No validation executor available[/red]")
210
+ return create_fallback_result_for_test(test_case, test_file, 'No validation executor')
211
+
212
+ invoke_config = None
213
+ if verbose:
214
+ cli_callback = create_cli_callback(verbose=True, debug=debug)
215
+ invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": validation_thread_id})
216
+
217
+ master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
218
+ with master_log.status(f"[yellow]Validating test case...[/yellow]", spinner="dots"):
219
+ validation_result = validation_executor.invoke(
220
+ {
221
+ "input": validation_prompt,
222
+ "chat_history": bulk_gen_chat_history, # Includes data gen and execution history
223
+ },
224
+ config=invoke_config or {"configurable": {"thread_id": validation_thread_id}},
225
+ )
226
+
227
+ validation_output = extract_output_from_result(validation_result)
228
+
229
+ # Parse validation JSON
230
+ try:
231
+ validation_json = extract_json_from_text(validation_output)
232
+ step_results = validation_json.get('steps', [])
233
+
234
+ # Determine if test passed (all steps must pass)
235
+ test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
236
+
237
+ if test_passed:
238
+ master_log.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
239
+ else:
240
+ master_log.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
241
+
242
+ # Display individual step results
243
+ for step_result in step_results:
244
+ step_num = step_result.get('step_number')
245
+ step_title = step_result.get('title', '')
246
+ passed = step_result.get('passed', False)
247
+ details = step_result.get('details', '')
248
+
249
+ if passed:
250
+ master_log.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
251
+ master_log.print(f" [dim]{details}[/dim]")
252
+ else:
253
+ master_log.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
254
+ master_log.print(f" [dim]{details}[/dim]")
255
+
256
+ master_log.print()
257
+
258
+ return {
259
+ 'title': test_name,
260
+ 'passed': test_passed,
261
+ 'file': test_file.name,
262
+ 'step_results': step_results
263
+ }
264
+
265
+ except Exception as e:
266
+ logger.debug(f"Validation parsing failed for {test_name}: {e}", exc_info=True)
267
+ master_log.print(f"[yellow]⚠ Warning: Could not parse validation results for {test_name}[/yellow]")
268
+ master_log.print(f"[yellow]Error: {str(e)}[/yellow]")
269
+
270
+ # Enhanced diagnostic output
271
+ print_validation_diagnostics(validation_output)
272
+
273
+ # Generate fallback result
274
+ master_log.print(f"\n[yellow]🔄 Generating fallback validation result...[/yellow]")
275
+ fallback_result = create_fallback_result_for_test(
276
+ test_case,
277
+ test_file,
278
+ f'Validation failed - could not parse validator output: {str(e)}'
279
+ )
280
+ master_log.print(f"[dim]Created {len(fallback_result['step_results'])} fallback step results[/dim]\n")
281
+
282
+ return fallback_result
@@ -0,0 +1,39 @@
1
+ """
2
+ General utility functions for test execution.
3
+
4
+ Includes toolkit name extraction and other helper functions.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ def extract_toolkit_name(config_path: Optional[str]) -> str:
12
+ """
13
+ Extract toolkit name from config path.
14
+
15
+ Args:
16
+ config_path: Path to toolkit config (e.g., '.alita/tool_configs/github-config.json')
17
+
18
+ Returns:
19
+ Toolkit name (e.g., 'github') or 'unknown' if path is None/empty
20
+ """
21
+ if not config_path:
22
+ return 'unknown'
23
+
24
+ # Convert to Path
25
+ path = Path(config_path)
26
+
27
+ # First, try to extract from filename by removing common config suffixes
28
+ # For paths like '.alita/tool_configs/confluence-config.json' -> 'confluence'
29
+ stem = path.stem.replace('_config', '').replace('-config', '')
30
+ if stem and stem.lower() != 'config':
31
+ return stem
32
+
33
+ # Fallback: use parent directory name if it's not a common directory
34
+ # For paths like 'toolkits/github/config.yaml' -> 'github'
35
+ if path.parent.name and path.parent.name not in ['.', 'toolkits', 'tool_configs', 'configs']:
36
+ return path.parent.name
37
+
38
+ # Last resort
39
+ return 'unknown'
@@ -0,0 +1,90 @@
1
+ """
2
+ Validation utilities for test execution.
3
+
4
+ Handles JSON extraction, fallback results, and diagnostics.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Dict, Any
11
+ from rich.console import Console
12
+
13
+ logger = logging.getLogger(__name__)
14
+ console = Console()
15
+
16
+
17
+ def extract_json_from_text(text: str) -> dict:
18
+ """Extract JSON object from text using brace counting."""
19
+ start_idx = text.find('{')
20
+ if start_idx == -1:
21
+ raise ValueError("No JSON found in text")
22
+
23
+ brace_count = 0
24
+ end_idx = -1
25
+ for i, char in enumerate(text[start_idx:], start=start_idx):
26
+ if char == '{':
27
+ brace_count += 1
28
+ elif char == '}':
29
+ brace_count -= 1
30
+ if brace_count == 0:
31
+ end_idx = i + 1
32
+ break
33
+
34
+ if end_idx == -1:
35
+ raise ValueError("Could not find matching closing brace")
36
+
37
+ return json.loads(text[start_idx:end_idx])
38
+
39
+
40
+ def create_fallback_result_for_test(test_case: Dict[str, Any], test_file: Path, reason: str = 'Validation failed') -> Dict[str, Any]:
41
+ """Create a fallback result for a single test case with detailed step information.
42
+
43
+ Args:
44
+ test_case: Parsed test case data
45
+ test_file: Path to test case file
46
+ reason: Reason for fallback
47
+
48
+ Returns:
49
+ Fallback test result dict with step details
50
+ """
51
+ fallback_steps = []
52
+ for step_info in test_case.get('steps', []):
53
+ fallback_steps.append({
54
+ 'step_number': step_info['number'],
55
+ 'title': step_info['title'],
56
+ 'passed': False,
57
+ 'details': reason
58
+ })
59
+
60
+ return {
61
+ 'title': test_case['name'],
62
+ 'passed': False,
63
+ 'file': test_file.name,
64
+ 'step_results': fallback_steps,
65
+ 'validation_error': reason
66
+ }
67
+
68
+
69
+ def print_validation_diagnostics(validation_output: str) -> None:
70
+ """Print diagnostic information for validation output.
71
+
72
+ Args:
73
+ validation_output: The validation output to diagnose
74
+ """
75
+ console.print(f"\n[bold red]🔍 Diagnostic Information:[/bold red]")
76
+ console.print(f"[dim]Output length: {len(validation_output)} characters[/dim]")
77
+
78
+ # Check for key JSON elements
79
+ has_json = '{' in validation_output and '}' in validation_output
80
+ has_fields = 'test_number' in validation_output and 'steps' in validation_output
81
+
82
+ console.print(f"[dim]Has JSON structure: {has_json}[/dim]")
83
+ console.print(f"[dim]Has required fields: {has_fields}[/dim]")
84
+
85
+ # Show relevant excerpt
86
+ if len(validation_output) > 400:
87
+ console.print(f"\n[red]First 200 chars:[/red] [dim]{validation_output[:200]}[/dim]")
88
+ console.print(f"[red]Last 200 chars:[/red] [dim]{validation_output[-200:]}[/dim]")
89
+ else:
90
+ console.print(f"\n[red]Full output:[/red] [dim]{validation_output}[/dim]")
@@ -0,0 +1,196 @@
1
+ """
2
+ Main workflow orchestration for test case execution.
3
+
4
+ Coordinates the entire test execution flow from parsing to reporting.
5
+ """
6
+
7
+ import logging
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import List, Dict, Any, Optional, Tuple
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def parse_all_test_cases(
16
+ test_case_files_list: List[Path],
17
+ master_log
18
+ ) -> List[Dict[str, Any]]:
19
+ """Parse all test case files.
20
+
21
+ Args:
22
+ test_case_files_list: List of test case files to parse
23
+ master_log: Log capture instance
24
+
25
+ Returns:
26
+ List of parsed test case dicts with 'file' and 'data' keys
27
+ """
28
+ from .parser import parse_test_case
29
+
30
+ parsed_test_cases = []
31
+ for test_file in test_case_files_list:
32
+ try:
33
+ test_case = parse_test_case(str(test_file))
34
+ parsed_test_cases.append({
35
+ 'file': test_file,
36
+ 'data': test_case
37
+ })
38
+ except Exception as e:
39
+ master_log.print(f"[yellow]⚠ Warning: Failed to parse {test_file.name}: {e}[/yellow]")
40
+ logger.debug(f"Parse error for {test_file.name}: {e}", exc_info=True)
41
+
42
+ return parsed_test_cases
43
+
44
+
45
+ def filter_test_cases_needing_data_gen(
46
+ parsed_test_cases: List[Dict[str, Any]]
47
+ ) -> List[Dict[str, Any]]:
48
+ """Filter test cases that need data generation.
49
+
50
+ Args:
51
+ parsed_test_cases: All parsed test cases
52
+
53
+ Returns:
54
+ Filtered list of test cases that require data generation
55
+ """
56
+ return [
57
+ tc for tc in parsed_test_cases
58
+ if tc['data'].get('generate_test_data', True)
59
+ ]
60
+
61
+
62
+ def execute_all_test_cases(
63
+ parsed_test_cases: List[Dict[str, Any]],
64
+ bulk_gen_chat_history: List[Dict[str, str]],
65
+ test_cases_path: Path,
66
+ agent_def: Dict[str, Any],
67
+ validator_def: Optional[Dict[str, Any]],
68
+ client,
69
+ config,
70
+ model: Optional[str],
71
+ temperature: Optional[float],
72
+ max_tokens: Optional[int],
73
+ work_dir: str,
74
+ master_log,
75
+ setup_executor_func,
76
+ verbose: bool = True,
77
+ debug: bool = False,
78
+ ) -> List[Dict[str, Any]]:
79
+ """Execute all test cases and return results.
80
+
81
+ Args:
82
+ parsed_test_cases: List of parsed test cases
83
+ bulk_gen_chat_history: Chat history from data generation
84
+ test_cases_path: Path to test cases directory
85
+ agent_def: Test runner agent definition
86
+ validator_def: Validator agent definition (optional)
87
+ client: API client
88
+ config: CLI configuration
89
+ model: Model override
90
+ temperature: Temperature override
91
+ max_tokens: Max tokens override
92
+ work_dir: Working directory
93
+ master_log: Log capture instance
94
+ setup_executor_func: Function to setup executor
95
+
96
+ Returns:
97
+ List of test result dicts
98
+ """
99
+ from .parser import resolve_toolkit_config_path
100
+ from .utils import extract_toolkit_name
101
+ from .executor import cleanup_executor_cache
102
+ from .test_runner import execute_single_test_case, validate_single_test_case
103
+ from .validation import create_fallback_result_for_test
104
+
105
+ if not parsed_test_cases:
106
+ master_log.print("[yellow]No test cases to execute[/yellow]")
107
+ return []
108
+
109
+ master_log.print(f"\n[bold yellow]📋 Executing test cases sequentially...[/bold yellow]\n")
110
+
111
+ # Show data generation context availability
112
+ if bulk_gen_chat_history:
113
+ master_log.print(f"[dim]✓ Data generation history available ({len(bulk_gen_chat_history)} messages) - shared with all test cases[/dim]\n")
114
+ else:
115
+ master_log.print(f"[dim]ℹ No data generation history (skipped or disabled)[/dim]\n")
116
+
117
+ # Executor caches
118
+ executor_cache = {}
119
+ validation_executor_cache = {}
120
+
121
+ # Execute each test case sequentially
122
+ test_results = []
123
+ total_tests = len(parsed_test_cases)
124
+
125
+ for idx, tc_info in enumerate(parsed_test_cases, 1):
126
+ test_case = tc_info['data']
127
+ test_file = tc_info['file']
128
+ test_name = test_case['name']
129
+
130
+ try:
131
+ # Resolve toolkit config path
132
+ toolkit_config_path = resolve_toolkit_config_path(
133
+ test_case.get('config_path', ''),
134
+ test_file,
135
+ test_cases_path
136
+ )
137
+
138
+ # Use cache key
139
+ cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
140
+
141
+ # Execute single test case
142
+ execution_output = execute_single_test_case(
143
+ tc_info, idx, total_tests, bulk_gen_chat_history, test_cases_path,
144
+ executor_cache, client, agent_def, config, model, temperature,
145
+ max_tokens, work_dir, master_log, setup_executor_func,
146
+ verbose=verbose,
147
+ debug=debug,
148
+ )
149
+
150
+ if not execution_output:
151
+ # Create fallback result for failed execution
152
+ test_results.append({
153
+ 'title': test_name,
154
+ 'passed': False,
155
+ 'file': test_file.name,
156
+ 'step_results': []
157
+ })
158
+ continue
159
+
160
+ # Append execution to history for validation
161
+ from .prompts import build_single_test_execution_prompt
162
+ validation_chat_history = bulk_gen_chat_history + [
163
+ {"role": "user", "content": build_single_test_execution_prompt(tc_info, idx)},
164
+ {"role": "assistant", "content": execution_output}
165
+ ]
166
+
167
+ # Validate test case
168
+ test_result = validate_single_test_case(
169
+ tc_info, idx, execution_output, validation_chat_history,
170
+ validation_executor_cache, cache_key, client, validator_def,
171
+ agent_def, toolkit_config_path, config, model, temperature,
172
+ max_tokens, work_dir, master_log, setup_executor_func,
173
+ verbose=verbose,
174
+ debug=debug,
175
+ )
176
+
177
+ test_results.append(test_result)
178
+
179
+ except Exception as e:
180
+ logger.debug(f"Test execution failed for {test_name}: {e}", exc_info=True)
181
+ master_log.print(f"[red]✗ Test execution failed: {e}[/red]")
182
+
183
+ # Create fallback result
184
+ fallback_result = create_fallback_result_for_test(
185
+ test_case,
186
+ test_file,
187
+ f'Test execution failed: {str(e)}'
188
+ )
189
+ test_results.append(fallback_result)
190
+ master_log.print()
191
+
192
+ # Cleanup executor caches
193
+ cleanup_executor_cache(executor_cache, "executor")
194
+ cleanup_executor_cache(validation_executor_cache, "validation executor")
195
+
196
+ return test_results