alita-sdk 0.3.602__py3-none-any.whl → 0.3.609__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (32) hide show
  1. alita_sdk/cli/agents.py +108 -826
  2. alita_sdk/cli/testcases/__init__.py +94 -0
  3. alita_sdk/cli/testcases/data_generation.py +119 -0
  4. alita_sdk/cli/testcases/discovery.py +96 -0
  5. alita_sdk/cli/testcases/executor.py +84 -0
  6. alita_sdk/cli/testcases/logger.py +85 -0
  7. alita_sdk/cli/testcases/parser.py +172 -0
  8. alita_sdk/cli/testcases/prompts.py +91 -0
  9. alita_sdk/cli/testcases/reporting.py +125 -0
  10. alita_sdk/cli/testcases/setup.py +108 -0
  11. alita_sdk/cli/testcases/test_runner.py +282 -0
  12. alita_sdk/cli/testcases/utils.py +39 -0
  13. alita_sdk/cli/testcases/validation.py +90 -0
  14. alita_sdk/cli/testcases/workflow.py +196 -0
  15. alita_sdk/configurations/openapi.py +2 -2
  16. alita_sdk/runtime/clients/artifact.py +1 -1
  17. alita_sdk/runtime/tools/artifact.py +253 -8
  18. alita_sdk/runtime/tools/llm.py +12 -11
  19. alita_sdk/tools/bitbucket/api_wrapper.py +31 -30
  20. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  21. alita_sdk/tools/confluence/api_wrapper.py +8 -1
  22. alita_sdk/tools/elitea_base.py +40 -36
  23. alita_sdk/tools/figma/api_wrapper.py +140 -83
  24. alita_sdk/tools/github/github_client.py +18 -10
  25. alita_sdk/tools/github/graphql_client_wrapper.py +1 -0
  26. alita_sdk/tools/utils/text_operations.py +156 -52
  27. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/METADATA +1 -1
  28. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/RECORD +32 -19
  29. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/WHEEL +0 -0
  30. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/entry_points.txt +0 -0
  31. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/licenses/LICENSE +0 -0
  32. {alita_sdk-0.3.602.dist-info → alita_sdk-0.3.609.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,91 @@
1
+ """
2
+ Prompt building utilities for test execution.
3
+
4
+ Builds prompts for data generation, test execution, and validation.
5
+ """
6
+
7
+ from typing import Dict, Any
8
+
9
+
10
+ def build_bulk_data_gen_prompt(parsed_test_cases: list) -> str:
11
+ """Build consolidated requirements text for bulk test data generation."""
12
+ requirements = []
13
+ for idx, tc in enumerate(parsed_test_cases, 1):
14
+ test_case = tc['data']
15
+ test_file = tc['file']
16
+ # Build parts for this test case (do not include separator lines here;
17
+ # the entire block is wrapped with separators at the top-level)
18
+ parts = [f"Test Case #{idx}: {test_case['name']}", f"File: {test_file.name}", ""]
19
+
20
+ if test_case.get('test_data_config'):
21
+ parts.append("Test Data Configuration:")
22
+ td = test_case['test_data_config']
23
+ raw_lines = str(td).splitlines()
24
+ for line in raw_lines:
25
+ parts.append(f"{line}")
26
+
27
+ if test_case.get('prerequisites'):
28
+ parts.append(f"\nPre-requisites:\n{test_case['prerequisites']}")
29
+
30
+ requirements.append("\n".join(parts))
31
+
32
+ # If no requirements were collected, return an empty string to avoid
33
+ # producing a prompt with only separator lines.
34
+ if not requirements:
35
+ return ""
36
+
37
+ # Use a visible divider between test cases so each entry is clearly separated
38
+ divider = '-' * 40
39
+ body = f"\n\n{divider}\n\n".join(requirements)
40
+ return f"{('='*60)}\n\n{body}\n\n{('='*60)}"
41
+
42
+
43
+ def build_single_test_execution_prompt(test_case_info: Dict[str, Any], test_number: int) -> str:
44
+ """Build execution prompt for a single test case."""
45
+ test_case = test_case_info['data']
46
+ test_file = test_case_info['file']
47
+
48
+ parts = [
49
+ f"\n{'='*80}",
50
+ f"TEST CASE #{test_number}: {test_case['name']}",
51
+ f"File: {test_file.name}",
52
+ f"{'='*80}"
53
+ ]
54
+
55
+ if test_case['steps']:
56
+ for step in test_case['steps']:
57
+ parts.append(f"\nStep {step['number']}: {step['title']}")
58
+ parts.append(step['instruction'])
59
+ else:
60
+ parts.append("\n(No steps defined)")
61
+
62
+ return "\n".join(parts)
63
+
64
+
65
+ def build_single_test_validation_prompt(test_case_info: Dict[str, Any], test_number: int, execution_output: str) -> str:
66
+ """Build validation prompt for a single test case."""
67
+ test_case = test_case_info['data']
68
+
69
+ parts = [
70
+ f"\nTest Case #{test_number}: {test_case['name']}"
71
+ ]
72
+
73
+ if test_case['steps']:
74
+ for step in test_case['steps']:
75
+ parts.append(f" Step {step['number']}: {step['title']}")
76
+ if step['expectation']:
77
+ parts.append(f" Expected: {step['expectation']}")
78
+
79
+ parts.append(f"\n\nActual Execution Results:\n{execution_output}\n")
80
+
81
+ # Escape quotes in test name for valid JSON in prompt
82
+ escaped_test_name = test_case['name'].replace('"', '\\"')
83
+
84
+ parts.append(f"""\nBased on the execution results above, validate this test case.
85
+ {{
86
+ "test_number": {test_number},
87
+ "test_name": "{escaped_test_name}"
88
+ }}
89
+ """)
90
+
91
+ return "\n".join(parts)
@@ -0,0 +1,125 @@
1
+ """
2
+ Test result reporting and summary generation.
3
+
4
+ Handles generating test reports and displaying summaries.
5
+ """
6
+
7
+ import json
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import List, Dict, Any
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+ from rich import box
14
+
15
+ console = Console()
16
+
17
+
18
+ def generate_summary_report(test_results: List[Dict[str, Any]]) -> Table:
19
+ """Generate a summary table for test results.
20
+
21
+ Args:
22
+ test_results: List of test result dicts
23
+
24
+ Returns:
25
+ Rich Table with summary statistics
26
+ """
27
+ total_tests = len(test_results)
28
+ passed_tests = sum(1 for r in test_results if r['passed'])
29
+ failed_tests = total_tests - passed_tests
30
+ pass_rate = (passed_tests / total_tests * 100) if total_tests > 0 else 0
31
+
32
+ summary_table = Table(box=box.ROUNDED, border_style="cyan")
33
+ summary_table.add_column("Metric", style="bold")
34
+ summary_table.add_column("Value", justify="right")
35
+
36
+ summary_table.add_row("Total Tests", str(total_tests))
37
+ summary_table.add_row("Passed", f"[green]{passed_tests}[/green]")
38
+ summary_table.add_row("Failed", f"[red]{failed_tests}[/red]")
39
+ summary_table.add_row("Pass Rate", f"{pass_rate:.1f}%")
40
+
41
+ return summary_table
42
+
43
+
44
+ def save_structured_report(
45
+ test_results: List[Dict[str, Any]],
46
+ results_dir: str,
47
+ log_file: Path = None
48
+ ) -> Path:
49
+ """Save structured JSON report of test results.
50
+
51
+ Args:
52
+ test_results: List of test result dicts
53
+ results_dir: Directory to save report
54
+ log_file: Optional path to log file
55
+
56
+ Returns:
57
+ Path to saved report file
58
+ """
59
+ results_path = Path(results_dir)
60
+ results_path.mkdir(parents=True, exist_ok=True)
61
+
62
+ total_tests = len(test_results)
63
+ passed_tests = sum(1 for r in test_results if r['passed'])
64
+ failed_tests = total_tests - passed_tests
65
+ pass_rate = (passed_tests / total_tests * 100) if total_tests > 0 else 0
66
+ overall_result = "pass" if failed_tests == 0 else "fail"
67
+
68
+ structured_report = {
69
+ "test_cases": [
70
+ {
71
+ "title": r['title'],
72
+ "passed": r['passed'],
73
+ "steps": r.get('step_results', [])
74
+ }
75
+ for r in test_results
76
+ ],
77
+ "overall_result": overall_result,
78
+ "summary": {
79
+ "total_tests": total_tests,
80
+ "passed": passed_tests,
81
+ "failed": failed_tests,
82
+ "pass_rate": f"{pass_rate:.1f}%"
83
+ },
84
+ "timestamp": datetime.now().isoformat(),
85
+ "log_file": str(log_file) if log_file else None
86
+ }
87
+
88
+ summary_file = results_path / "test_execution_summary.json"
89
+
90
+ console.print(f"\n[bold yellow]💾 Saving test execution summary...[/bold yellow]")
91
+ with open(summary_file, 'w') as f:
92
+ json.dump(structured_report, f, indent=2)
93
+ console.print(f"[green]✓ Summary saved to {summary_file}[/green]\n")
94
+
95
+ return summary_file
96
+
97
+
98
+ def print_test_execution_summary(
99
+ test_results: List[Dict[str, Any]],
100
+ results_dir: str,
101
+ session_name: str
102
+ ) -> None:
103
+ """Print test execution summary to console.
104
+
105
+ Args:
106
+ test_results: List of test result dicts
107
+ results_dir: Directory where results are saved
108
+ session_name: Session name for finding log file
109
+ """
110
+ console.print(f"\n[bold]{'='*60}[/bold]")
111
+ console.print(f"[bold cyan]📊 Test Execution Summary[/bold cyan]")
112
+ console.print(f"[bold]{'='*60}[/bold]\n")
113
+
114
+ summary_table = generate_summary_report(test_results)
115
+ console.print(summary_table)
116
+
117
+ # Show log file location
118
+ results_path = Path(results_dir)
119
+ toolkit_name = session_name.replace('test-execution-', '')
120
+ toolkit_dir = results_path / toolkit_name
121
+ log_files = sorted(toolkit_dir.glob(f"*{session_name}.txt")) if toolkit_dir.exists() else []
122
+
123
+ console.print(f"\n[bold cyan]📁 Log File[/bold cyan]")
124
+ if log_files:
125
+ console.print(f" [dim]{log_files[0]}[/dim]")
@@ -0,0 +1,108 @@
1
+ """
2
+ Agent setup utilities for test execution.
3
+
4
+ Handles loading and validating test runner, data generator, and validator agents.
5
+ """
6
+
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import Optional, Dict, Any, Tuple
10
+ from rich.console import Console
11
+
12
+ logger = logging.getLogger(__name__)
13
+ console = Console()
14
+
15
+
16
+ def load_test_runner_agent(agent_source: str) -> Tuple[Dict[str, Any], str]:
17
+ """Load test runner agent definition.
18
+
19
+ Args:
20
+ agent_source: Path to agent definition file
21
+
22
+ Returns:
23
+ Tuple of (agent_def, agent_name)
24
+
25
+ Raises:
26
+ FileNotFoundError: If agent file doesn't exist
27
+ """
28
+ from ..agent_loader import load_agent_definition
29
+
30
+ agent_source_path = Path(agent_source)
31
+ if not agent_source_path.exists():
32
+ raise FileNotFoundError(
33
+ f"Agent definition not found: {agent_source}. "
34
+ f"Make sure you are running from the repository root, "
35
+ f"or pass --agent_source explicitly."
36
+ )
37
+
38
+ agent_def = load_agent_definition(agent_source)
39
+ agent_name = agent_def.get('name', agent_source_path.stem)
40
+
41
+ return agent_def, agent_name
42
+
43
+
44
+ def load_data_generator_agent(data_generator: str, skip_data_generation: bool) -> Optional[Dict[str, Any]]:
45
+ """Load data generator agent definition if needed.
46
+
47
+ Args:
48
+ data_generator: Path to data generator agent file
49
+ skip_data_generation: Whether data generation is skipped
50
+
51
+ Returns:
52
+ Agent definition dict or None if skipped/failed
53
+ """
54
+ from ..agent_loader import load_agent_definition
55
+
56
+ if skip_data_generation:
57
+ return None
58
+
59
+ if not data_generator:
60
+ return None
61
+
62
+ try:
63
+ data_gen_def = load_agent_definition(data_generator)
64
+ data_gen_name = data_gen_def.get('name', Path(data_generator).stem)
65
+ console.print(f"Data Generator Agent: [bold]{data_gen_name}[/bold]\n")
66
+ return data_gen_def
67
+ except Exception as e:
68
+ console.print(f"[yellow]⚠ Warning: Failed to setup data generator: {e}[/yellow]")
69
+ console.print("[yellow]Continuing with test execution...[/yellow]\n")
70
+ logger.debug(f"Data generator setup error: {e}", exc_info=True)
71
+ return None
72
+
73
+
74
+ def load_validator_agent(validator: Optional[str]) -> Tuple[Optional[Dict[str, Any]], str, Optional[str]]:
75
+ """Load validator agent definition.
76
+
77
+ Args:
78
+ validator: Path to validator agent file (optional)
79
+
80
+ Returns:
81
+ Tuple of (validator_def, validator_name, validator_path)
82
+ """
83
+ from ..agent_loader import load_agent_definition
84
+
85
+ validator_def = None
86
+ validator_agent_name = "Default Validator"
87
+ validator_path = validator
88
+
89
+ # Try to load validator from specified path or default location
90
+ if not validator_path:
91
+ default_validator = Path.cwd() / '.alita' / 'agents' / 'test-validator.agent.md'
92
+ if default_validator.exists():
93
+ validator_path = str(default_validator)
94
+
95
+ if validator_path and Path(validator_path).exists():
96
+ try:
97
+ validator_def = load_agent_definition(validator_path)
98
+ validator_agent_name = validator_def.get('name', Path(validator_path).stem)
99
+ console.print(f"Validator Agent: [bold]{validator_agent_name}[/bold]")
100
+ console.print(f"[dim]Using: {validator_path}[/dim]\n")
101
+ except Exception as e:
102
+ console.print(f"[yellow]⚠ Warning: Failed to load validator agent: {e}[/yellow]")
103
+ console.print(f"[yellow]Will use test runner agent for validation[/yellow]\n")
104
+ logger.debug(f"Validator load error: {e}", exc_info=True)
105
+ else:
106
+ console.print(f"[dim]No validator agent specified, using test runner agent for validation[/dim]\n")
107
+
108
+ return validator_def, validator_agent_name, validator_path
@@ -0,0 +1,282 @@
1
+ """
2
+ Single test case execution utilities.
3
+
4
+ Handles executing a single test case with the test runner agent.
5
+ """
6
+
7
+ import logging
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import Dict, Any, List, Optional
11
+ from rich.console import Console
12
+
13
+ from langchain_core.runnables import RunnableConfig
14
+
15
+ from ..callbacks import create_cli_callback
16
+
17
+ logger = logging.getLogger(__name__)
18
+ console = Console()
19
+
20
+
21
+ def execute_single_test_case(
22
+ tc_info: Dict[str, Any],
23
+ idx: int,
24
+ total_tests: int,
25
+ bulk_gen_chat_history: List[Dict[str, str]],
26
+ test_cases_path: Path,
27
+ executor_cache: Dict,
28
+ client,
29
+ agent_def: Dict[str, Any],
30
+ config,
31
+ model: Optional[str],
32
+ temperature: Optional[float],
33
+ max_tokens: Optional[int],
34
+ work_dir: str,
35
+ master_log,
36
+ setup_executor_func,
37
+ verbose: bool = True,
38
+ debug: bool = False,
39
+ ) -> Optional[str]:
40
+ """Execute a single test case.
41
+
42
+ Args:
43
+ tc_info: Test case info dict with 'data' and 'file'
44
+ idx: Test case index (1-based)
45
+ total_tests: Total number of test cases
46
+ bulk_gen_chat_history: Chat history from data generation
47
+ test_cases_path: Path to test cases directory
48
+ executor_cache: Cache of executors
49
+ client: API client
50
+ agent_def: Agent definition
51
+ config: CLI configuration
52
+ model: Model override
53
+ temperature: Temperature override
54
+ max_tokens: Max tokens override
55
+ work_dir: Working directory
56
+ master_log: Log capture instance
57
+ setup_executor_func: Function to setup executor
58
+
59
+ Returns:
60
+ Execution output string, or None if execution failed
61
+ """
62
+ from .parser import resolve_toolkit_config_path
63
+ from .prompts import build_single_test_execution_prompt
64
+ from .utils import extract_toolkit_name
65
+ from .executor import create_executor_from_cache
66
+ from ..agent_ui import extract_output_from_result
67
+
68
+ test_case = tc_info['data']
69
+ test_file = tc_info['file']
70
+ test_name = test_case['name']
71
+
72
+ # Resolve toolkit config path for this test case
73
+ toolkit_config_path = resolve_toolkit_config_path(
74
+ test_case.get('config_path', ''),
75
+ test_file,
76
+ test_cases_path
77
+ )
78
+
79
+ # Extract toolkit name
80
+ toolkit_name = extract_toolkit_name(test_case.get('config_path', ''))
81
+
82
+ # Use cache key (None if no config)
83
+ cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
84
+ thread_id = f"test_case_{idx}_{uuid.uuid4().hex[:8]}"
85
+
86
+ # Log test case header to master log
87
+ master_log.print(f"\n\n" + "=" * 80)
88
+ master_log.print(f"[bold cyan]Test Case {idx}/{total_tests} - {test_name}[/bold cyan]")
89
+ master_log.print(f"[dim]Toolkit: {toolkit_name}[/dim]")
90
+ master_log.print(f"[dim]Config: {toolkit_config_path or 'None'}[/dim]")
91
+ master_log.print("=" * 80 + "\n")
92
+
93
+ # Get or create executor from cache
94
+ agent_executor, memory, mcp_session_manager = create_executor_from_cache(
95
+ executor_cache, cache_key, client, agent_def, toolkit_config_path,
96
+ config, model, temperature, max_tokens, work_dir, setup_executor_func
97
+ )
98
+
99
+ # Build execution prompt for single test case
100
+ execution_prompt = build_single_test_execution_prompt(tc_info, idx)
101
+ master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
102
+ master_log.print(f"[dim]Executing test case with the prompt {execution_prompt}[/dim]")
103
+
104
+ # Execute test case
105
+ if not agent_executor:
106
+ master_log.print(f"[red]✗ No agent executor available[/red]")
107
+ return None
108
+
109
+ invoke_config = None
110
+ if verbose:
111
+ cli_callback = create_cli_callback(verbose=True, debug=debug)
112
+ invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": thread_id})
113
+
114
+ with master_log.status(f"[yellow]Executing test case...[/yellow]", spinner="dots"):
115
+ exec_result = agent_executor.invoke(
116
+ {
117
+ "input": execution_prompt,
118
+ "chat_history": bulk_gen_chat_history, # ONLY data gen history, no accumulation
119
+ },
120
+ config=invoke_config or {"configurable": {"thread_id": thread_id}},
121
+ )
122
+
123
+ execution_output = extract_output_from_result(exec_result)
124
+
125
+ master_log.print(f"[green]✓ Test case executed[/green]")
126
+ master_log.print(f"[dim]{execution_output}[/dim]\n")
127
+
128
+ return execution_output
129
+
130
+
131
+ def validate_single_test_case(
132
+ tc_info: Dict[str, Any],
133
+ idx: int,
134
+ execution_output: str,
135
+ bulk_gen_chat_history: List[Dict[str, str]],
136
+ validation_executor_cache: Dict,
137
+ cache_key: str,
138
+ client,
139
+ validator_def: Optional[Dict[str, Any]],
140
+ agent_def: Dict[str, Any],
141
+ toolkit_config_path: Optional[str],
142
+ config,
143
+ model: Optional[str],
144
+ temperature: Optional[float],
145
+ max_tokens: Optional[int],
146
+ work_dir: str,
147
+ master_log,
148
+ setup_executor_func,
149
+ verbose: bool = True,
150
+ debug: bool = False,
151
+ ) -> Dict[str, Any]:
152
+ """Validate a single test case execution.
153
+
154
+ Args:
155
+ tc_info: Test case info dict
156
+ idx: Test case index (1-based)
157
+ execution_output: Output from test execution
158
+ bulk_gen_chat_history: Chat history including data gen and execution
159
+ validation_executor_cache: Cache of validation executors
160
+ cache_key: Cache key for executor
161
+ client: API client
162
+ validator_def: Validator agent definition (optional)
163
+ agent_def: Test runner agent definition (fallback)
164
+ toolkit_config_path: Path to toolkit config
165
+ config: CLI configuration
166
+ model: Model override
167
+ temperature: Temperature override
168
+ max_tokens: Max tokens override
169
+ work_dir: Working directory
170
+ master_log: Log capture instance
171
+ setup_executor_func: Function to setup executor
172
+
173
+ Returns:
174
+ Test result dict with validation results
175
+ """
176
+ from .prompts import build_single_test_validation_prompt
177
+ from .validation import extract_json_from_text, print_validation_diagnostics, create_fallback_result_for_test
178
+ from .executor import create_executor_from_cache
179
+ from ..agent_ui import extract_output_from_result
180
+
181
+ test_case = tc_info['data']
182
+ test_file = tc_info['file']
183
+ test_name = test_case['name']
184
+
185
+ # Validate test case using validation executor with accumulated history
186
+ validation_prompt = build_single_test_validation_prompt(tc_info, idx, execution_output)
187
+
188
+ master_log.print(f"[bold yellow]🔍 Validating test case (with execution history)...[/bold yellow]")
189
+ master_log.print(f"[dim]{validation_prompt}[/dim]\n")
190
+
191
+ # Create or retrieve isolated validation executor
192
+ validation_cache_key = f"{cache_key}_validation"
193
+ validation_agent_def = validator_def if validator_def else agent_def
194
+
195
+ validation_executor, validation_memory, validation_mcp_session = create_executor_from_cache(
196
+ validation_executor_cache, validation_cache_key, client, validation_agent_def,
197
+ toolkit_config_path, config, model, temperature, max_tokens, work_dir, setup_executor_func
198
+ )
199
+
200
+ if validation_cache_key not in validation_executor_cache:
201
+ master_log.print(f"[dim]Created new isolated validation executor[/dim]")
202
+ else:
203
+ master_log.print(f"[dim]Using cached validation executor[/dim]")
204
+
205
+ # For validation, use a separate thread with accumulated chat history (data gen + execution)
206
+ validation_thread_id = f"validation_{idx}_{uuid.uuid4().hex[:8]}"
207
+
208
+ if not validation_executor:
209
+ master_log.print(f"[red]✗ No validation executor available[/red]")
210
+ return create_fallback_result_for_test(test_case, test_file, 'No validation executor')
211
+
212
+ invoke_config = None
213
+ if verbose:
214
+ cli_callback = create_cli_callback(verbose=True, debug=debug)
215
+ invoke_config = RunnableConfig(callbacks=[cli_callback], configurable={"thread_id": validation_thread_id})
216
+
217
+ master_log.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
218
+ with master_log.status(f"[yellow]Validating test case...[/yellow]", spinner="dots"):
219
+ validation_result = validation_executor.invoke(
220
+ {
221
+ "input": validation_prompt,
222
+ "chat_history": bulk_gen_chat_history, # Includes data gen and execution history
223
+ },
224
+ config=invoke_config or {"configurable": {"thread_id": validation_thread_id}},
225
+ )
226
+
227
+ validation_output = extract_output_from_result(validation_result)
228
+
229
+ # Parse validation JSON
230
+ try:
231
+ validation_json = extract_json_from_text(validation_output)
232
+ step_results = validation_json.get('steps', [])
233
+
234
+ # Determine if test passed (all steps must pass)
235
+ test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
236
+
237
+ if test_passed:
238
+ master_log.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
239
+ else:
240
+ master_log.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
241
+
242
+ # Display individual step results
243
+ for step_result in step_results:
244
+ step_num = step_result.get('step_number')
245
+ step_title = step_result.get('title', '')
246
+ passed = step_result.get('passed', False)
247
+ details = step_result.get('details', '')
248
+
249
+ if passed:
250
+ master_log.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
251
+ master_log.print(f" [dim]{details}[/dim]")
252
+ else:
253
+ master_log.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
254
+ master_log.print(f" [dim]{details}[/dim]")
255
+
256
+ master_log.print()
257
+
258
+ return {
259
+ 'title': test_name,
260
+ 'passed': test_passed,
261
+ 'file': test_file.name,
262
+ 'step_results': step_results
263
+ }
264
+
265
+ except Exception as e:
266
+ logger.debug(f"Validation parsing failed for {test_name}: {e}", exc_info=True)
267
+ master_log.print(f"[yellow]⚠ Warning: Could not parse validation results for {test_name}[/yellow]")
268
+ master_log.print(f"[yellow]Error: {str(e)}[/yellow]")
269
+
270
+ # Enhanced diagnostic output
271
+ print_validation_diagnostics(validation_output)
272
+
273
+ # Generate fallback result
274
+ master_log.print(f"\n[yellow]🔄 Generating fallback validation result...[/yellow]")
275
+ fallback_result = create_fallback_result_for_test(
276
+ test_case,
277
+ test_file,
278
+ f'Validation failed - could not parse validator output: {str(e)}'
279
+ )
280
+ master_log.print(f"[dim]Created {len(fallback_result['step_results'])} fallback step results[/dim]\n")
281
+
282
+ return fallback_result
@@ -0,0 +1,39 @@
1
+ """
2
+ General utility functions for test execution.
3
+
4
+ Includes toolkit name extraction and other helper functions.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ def extract_toolkit_name(config_path: Optional[str]) -> str:
12
+ """
13
+ Extract toolkit name from config path.
14
+
15
+ Args:
16
+ config_path: Path to toolkit config (e.g., '.alita/tool_configs/github-config.json')
17
+
18
+ Returns:
19
+ Toolkit name (e.g., 'github') or 'unknown' if path is None/empty
20
+ """
21
+ if not config_path:
22
+ return 'unknown'
23
+
24
+ # Convert to Path
25
+ path = Path(config_path)
26
+
27
+ # First, try to extract from filename by removing common config suffixes
28
+ # For paths like '.alita/tool_configs/confluence-config.json' -> 'confluence'
29
+ stem = path.stem.replace('_config', '').replace('-config', '')
30
+ if stem and stem.lower() != 'config':
31
+ return stem
32
+
33
+ # Fallback: use parent directory name if it's not a common directory
34
+ # For paths like 'toolkits/github/config.yaml' -> 'github'
35
+ if path.parent.name and path.parent.name not in ['.', 'toolkits', 'tool_configs', 'configs']:
36
+ return path.parent.name
37
+
38
+ # Last resort
39
+ return 'unknown'