alita-sdk 0.3.554__py3-none-any.whl → 0.3.603__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent_executor.py +2 -1
- alita_sdk/cli/agent_loader.py +34 -4
- alita_sdk/cli/agents.py +433 -203
- alita_sdk/configurations/openapi.py +227 -15
- alita_sdk/runtime/clients/client.py +4 -2
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +61 -11
- alita_sdk/runtime/langchain/constants.py +419 -171
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -2
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/langraph_agent.py +106 -21
- alita_sdk/runtime/langchain/utils.py +30 -14
- alita_sdk/runtime/toolkits/__init__.py +3 -0
- alita_sdk/runtime/toolkits/artifact.py +2 -1
- alita_sdk/runtime/toolkits/mcp.py +6 -3
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/skill_router.py +2 -2
- alita_sdk/runtime/toolkits/tools.py +64 -2
- alita_sdk/runtime/toolkits/vectorstore.py +1 -1
- alita_sdk/runtime/tools/artifact.py +15 -0
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/llm.py +30 -11
- alita_sdk/runtime/tools/mcp_server_tool.py +6 -3
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +9 -6
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +1 -1
- alita_sdk/runtime/utils/mcp_sse_client.py +1 -1
- alita_sdk/runtime/utils/toolkit_utils.py +2 -0
- alita_sdk/tools/__init__.py +3 -1
- alita_sdk/tools/ado/repos/__init__.py +26 -8
- alita_sdk/tools/ado/repos/repos_wrapper.py +78 -52
- alita_sdk/tools/ado/test_plan/__init__.py +3 -2
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +2 -1
- alita_sdk/tools/ado/wiki/ado_wrapper.py +23 -1
- alita_sdk/tools/ado/work_item/__init__.py +3 -2
- alita_sdk/tools/ado/work_item/ado_wrapper.py +23 -1
- alita_sdk/tools/advanced_jira_mining/__init__.py +2 -1
- alita_sdk/tools/aws/delta_lake/__init__.py +2 -1
- alita_sdk/tools/azure_ai/search/__init__.py +2 -1
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base_indexer_toolkit.py +15 -6
- alita_sdk/tools/bitbucket/__init__.py +2 -1
- alita_sdk/tools/bitbucket/api_wrapper.py +1 -1
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +3 -3
- alita_sdk/tools/browser/__init__.py +1 -1
- alita_sdk/tools/carrier/__init__.py +1 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/cloud/aws/__init__.py +2 -1
- alita_sdk/tools/cloud/azure/__init__.py +2 -1
- alita_sdk/tools/cloud/gcp/__init__.py +2 -1
- alita_sdk/tools/cloud/k8s/__init__.py +2 -1
- alita_sdk/tools/code/linter/__init__.py +2 -1
- alita_sdk/tools/code/sonar/__init__.py +2 -1
- alita_sdk/tools/code_indexer_toolkit.py +19 -2
- alita_sdk/tools/confluence/__init__.py +7 -6
- alita_sdk/tools/confluence/api_wrapper.py +2 -2
- alita_sdk/tools/custom_open_api/__init__.py +2 -1
- alita_sdk/tools/elastic/__init__.py +2 -1
- alita_sdk/tools/elitea_base.py +28 -9
- alita_sdk/tools/figma/__init__.py +52 -6
- alita_sdk/tools/figma/api_wrapper.py +1158 -123
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +2 -1
- alita_sdk/tools/github/github_client.py +69 -97
- alita_sdk/tools/github/schemas.py +4 -4
- alita_sdk/tools/gitlab/__init__.py +2 -1
- alita_sdk/tools/gitlab/api_wrapper.py +118 -38
- alita_sdk/tools/gitlab_org/__init__.py +2 -1
- alita_sdk/tools/gitlab_org/api_wrapper.py +60 -62
- alita_sdk/tools/google/bigquery/__init__.py +2 -1
- alita_sdk/tools/google_places/__init__.py +2 -1
- alita_sdk/tools/jira/__init__.py +2 -1
- alita_sdk/tools/keycloak/__init__.py +2 -1
- alita_sdk/tools/localgit/__init__.py +2 -1
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/ocr/__init__.py +2 -1
- alita_sdk/tools/openapi/__init__.py +227 -15
- alita_sdk/tools/openapi/api_wrapper.py +1287 -802
- alita_sdk/tools/pandas/__init__.py +11 -5
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/postman/__init__.py +2 -1
- alita_sdk/tools/pptx/__init__.py +2 -1
- alita_sdk/tools/qtest/__init__.py +21 -2
- alita_sdk/tools/qtest/api_wrapper.py +430 -13
- alita_sdk/tools/rally/__init__.py +2 -1
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +2 -1
- alita_sdk/tools/salesforce/__init__.py +2 -1
- alita_sdk/tools/servicenow/__init__.py +2 -1
- alita_sdk/tools/sharepoint/__init__.py +2 -1
- alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
- alita_sdk/tools/slack/__init__.py +3 -2
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +3 -2
- alita_sdk/tools/testio/__init__.py +2 -1
- alita_sdk/tools/testrail/__init__.py +2 -1
- alita_sdk/tools/utils/content_parser.py +77 -3
- alita_sdk/tools/utils/text_operations.py +163 -71
- alita_sdk/tools/xray/__init__.py +3 -2
- alita_sdk/tools/yagmail/__init__.py +2 -1
- alita_sdk/tools/zephyr/__init__.py +2 -1
- alita_sdk/tools/zephyr_enterprise/__init__.py +2 -1
- alita_sdk/tools/zephyr_essential/__init__.py +2 -1
- alita_sdk/tools/zephyr_scale/__init__.py +3 -2
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +2 -1
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/METADATA +7 -6
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/RECORD +116 -111
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.554.dist-info → alita_sdk-0.3.603.dist-info}/top_level.txt +0 -0
alita_sdk/cli/agents.py
CHANGED
|
@@ -141,15 +141,16 @@ def parse_test_case(test_case_path: str) -> Dict[str, Any]:
|
|
|
141
141
|
if gen_data_match:
|
|
142
142
|
generate_test_data = gen_data_match.group(1).lower() == 'true'
|
|
143
143
|
|
|
144
|
-
# Extract Test Data Configuration
|
|
145
|
-
|
|
144
|
+
# Extract Test Data Configuration section as a raw fenced code block string
|
|
145
|
+
# NOTE: We intentionally store the entire section as a single string rather than parsing
|
|
146
|
+
# individual table rows. This preserves the original formatting for downstream tools
|
|
147
|
+
# which may prefer the raw markdown block.
|
|
148
|
+
test_data_config = None
|
|
146
149
|
config_section_match = re.search(r'##\s+Test Data Configuration\s*\n(.+?)(?=\n##|\Z)', content, re.DOTALL)
|
|
147
150
|
if config_section_match:
|
|
148
|
-
config_section = config_section_match.group(1)
|
|
149
|
-
#
|
|
150
|
-
|
|
151
|
-
for param, value in table_rows:
|
|
152
|
-
test_data_config[param.strip()] = value.strip()
|
|
151
|
+
config_section = config_section_match.group(1).strip()
|
|
152
|
+
# Store as a fenced code block to make it clear this is a raw block of text
|
|
153
|
+
test_data_config = f"\n{config_section}\n"
|
|
153
154
|
|
|
154
155
|
# Extract Pre-requisites section
|
|
155
156
|
prerequisites = ""
|
|
@@ -252,86 +253,80 @@ def _build_bulk_data_gen_prompt(parsed_test_cases: list) -> str:
|
|
|
252
253
|
for idx, tc in enumerate(parsed_test_cases, 1):
|
|
253
254
|
test_case = tc['data']
|
|
254
255
|
test_file = tc['file']
|
|
255
|
-
|
|
256
|
+
# Build parts for this test case (do not include separator lines here;
|
|
257
|
+
# the entire block is wrapped with separators at the top-level)
|
|
256
258
|
parts = [f"Test Case #{idx}: {test_case['name']}", f"File: {test_file.name}", ""]
|
|
257
259
|
|
|
258
260
|
if test_case.get('test_data_config'):
|
|
259
261
|
parts.append("Test Data Configuration:")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
262
|
+
td = test_case['test_data_config']
|
|
263
|
+
raw_lines = str(td).splitlines()
|
|
264
|
+
for line in raw_lines:
|
|
265
|
+
parts.append(f"{line}")
|
|
266
|
+
|
|
263
267
|
if test_case.get('prerequisites'):
|
|
264
268
|
parts.append(f"\nPre-requisites:\n{test_case['prerequisites']}")
|
|
265
269
|
|
|
266
|
-
if test_case.get('variables'):
|
|
267
|
-
parts.append(f"\nVariables to generate: {', '.join(test_case['variables'])}")
|
|
268
|
-
|
|
269
270
|
requirements.append("\n".join(parts))
|
|
270
271
|
|
|
271
|
-
return
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
272
|
+
# If no requirements were collected, return an empty string to avoid
|
|
273
|
+
# producing a prompt with only separator lines.
|
|
274
|
+
if not requirements:
|
|
275
|
+
return ""
|
|
276
|
+
|
|
277
|
+
# Use a visible divider between test cases so each entry is clearly separated
|
|
278
|
+
divider = '-' * 40
|
|
279
|
+
body = f"\n\n{divider}\n\n".join(requirements)
|
|
280
|
+
return f"{('='*60)}\n\n{body}\n\n{('='*60)}"
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _build_single_test_execution_prompt(test_case_info: dict, test_number: int) -> str:
|
|
284
|
+
"""Build execution prompt for a single test case."""
|
|
285
|
+
test_case = test_case_info['data']
|
|
286
|
+
test_file = test_case_info['file']
|
|
287
|
+
|
|
288
|
+
parts = [
|
|
289
|
+
f"\n{'='*80}",
|
|
290
|
+
f"TEST CASE #{test_number}: {test_case['name']}",
|
|
291
|
+
f"File: {test_file.name}",
|
|
292
|
+
f"{'='*80}"
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
if test_case['steps']:
|
|
296
|
+
for step in test_case['steps']:
|
|
297
|
+
parts.append(f"\nStep {step['number']}: {step['title']}")
|
|
298
|
+
parts.append(step['instruction'])
|
|
299
|
+
else:
|
|
300
|
+
parts.append("\n(No steps defined)")
|
|
295
301
|
|
|
296
302
|
return "\n".join(parts)
|
|
297
303
|
|
|
298
304
|
|
|
299
|
-
def
|
|
300
|
-
"""Build prompt for
|
|
301
|
-
|
|
305
|
+
def _build_single_test_validation_prompt(test_case_info: dict, test_number: int, execution_output: str) -> str:
|
|
306
|
+
"""Build validation prompt for a single test case."""
|
|
307
|
+
test_case = test_case_info['data']
|
|
308
|
+
|
|
309
|
+
parts = [
|
|
310
|
+
f"\nTest Case #{test_number}: {test_case['name']}"
|
|
311
|
+
]
|
|
302
312
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
parts.append(f" Step {step['number']}: {step['title']}")
|
|
309
|
-
if step['expectation']:
|
|
310
|
-
parts.append(f" Expected: {step['expectation']}")
|
|
313
|
+
if test_case['steps']:
|
|
314
|
+
for step in test_case['steps']:
|
|
315
|
+
parts.append(f" Step {step['number']}: {step['title']}")
|
|
316
|
+
if step['expectation']:
|
|
317
|
+
parts.append(f" Expected: {step['expectation']}")
|
|
311
318
|
|
|
312
319
|
parts.append(f"\n\nActual Execution Results:\n{execution_output}\n")
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
{{"step_number": 2, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}}
|
|
324
|
-
]
|
|
325
|
-
}},
|
|
326
|
-
{{
|
|
327
|
-
"test_number": 2,
|
|
328
|
-
"test_name": "<test case name>",
|
|
329
|
-
"steps": [...]
|
|
330
|
-
}}
|
|
331
|
-
]
|
|
332
|
-
}}
|
|
333
|
-
|
|
334
|
-
Validate all {len(parsed_test_cases)} test cases and their steps.""")
|
|
320
|
+
|
|
321
|
+
# Escape quotes in test name for valid JSON in prompt
|
|
322
|
+
escaped_test_name = test_case['name'].replace('"', '\\"')
|
|
323
|
+
|
|
324
|
+
parts.append(f"""\nBased on the execution results above, validate this test case.
|
|
325
|
+
{{
|
|
326
|
+
"test_number": {test_number},
|
|
327
|
+
"test_name": "{escaped_test_name}"
|
|
328
|
+
}}
|
|
329
|
+
""")
|
|
335
330
|
|
|
336
331
|
return "\n".join(parts)
|
|
337
332
|
|
|
@@ -359,17 +354,119 @@ def _extract_json_from_text(text: str) -> dict:
|
|
|
359
354
|
return json.loads(text[start_idx:end_idx])
|
|
360
355
|
|
|
361
356
|
|
|
362
|
-
def
|
|
363
|
-
"""Create fallback
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
357
|
+
def _create_fallback_result_for_test(test_case: dict, test_file: Path, reason: str = 'Validation failed') -> dict:
|
|
358
|
+
"""Create a fallback result for a single test case with detailed step information.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
test_case: Parsed test case data
|
|
362
|
+
test_file: Path to test case file
|
|
363
|
+
reason: Reason for fallback
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Fallback test result dict with step details
|
|
367
|
+
"""
|
|
368
|
+
fallback_steps = []
|
|
369
|
+
for step_info in test_case.get('steps', []):
|
|
370
|
+
fallback_steps.append({
|
|
371
|
+
'step_number': step_info['number'],
|
|
372
|
+
'title': step_info['title'],
|
|
368
373
|
'passed': False,
|
|
369
|
-
'
|
|
370
|
-
'step_results': []
|
|
374
|
+
'details': reason
|
|
371
375
|
})
|
|
372
|
-
|
|
376
|
+
|
|
377
|
+
return {
|
|
378
|
+
'title': test_case['name'],
|
|
379
|
+
'passed': False,
|
|
380
|
+
'file': test_file.name,
|
|
381
|
+
'step_results': fallback_steps,
|
|
382
|
+
'validation_error': reason
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _cleanup_executor_cache(cache: Dict[str, tuple], cache_name: str = "executor") -> None:
|
|
387
|
+
"""Clean up executor cache resources.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
cache: Dictionary of cached executors
|
|
391
|
+
cache_name: Name of cache for logging
|
|
392
|
+
"""
|
|
393
|
+
console.print(f"[dim]Cleaning up {cache_name} cache...[/dim]")
|
|
394
|
+
for cache_key, cached_items in cache.items():
|
|
395
|
+
try:
|
|
396
|
+
# Extract memory from tuple (second element)
|
|
397
|
+
memory = cached_items[1] if len(cached_items) > 1 else None
|
|
398
|
+
|
|
399
|
+
# Close SQLite memory connection
|
|
400
|
+
if memory and hasattr(memory, 'conn') and memory.conn:
|
|
401
|
+
memory.conn.close()
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.debug(f"Error cleaning up {cache_name} cache for {cache_key}: {e}")
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _create_executor_from_cache(cache: Dict[str, tuple], cache_key: str,
|
|
407
|
+
client, agent_def: Dict, toolkit_config_path: Optional[str],
|
|
408
|
+
config, model: Optional[str], temperature: Optional[float],
|
|
409
|
+
max_tokens: Optional[int], work_dir: Optional[str]) -> tuple:
|
|
410
|
+
"""Get or create executor from cache.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
cache: Executor cache dictionary
|
|
414
|
+
cache_key: Key for caching
|
|
415
|
+
client: API client
|
|
416
|
+
agent_def: Agent definition
|
|
417
|
+
toolkit_config_path: Path to toolkit config
|
|
418
|
+
config: CLI configuration
|
|
419
|
+
model: Model override
|
|
420
|
+
temperature: Temperature override
|
|
421
|
+
max_tokens: Max tokens override
|
|
422
|
+
work_dir: Working directory
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
Tuple of (agent_executor, memory, mcp_session_manager)
|
|
426
|
+
"""
|
|
427
|
+
if cache_key in cache:
|
|
428
|
+
return cache[cache_key]
|
|
429
|
+
|
|
430
|
+
# Create new executor
|
|
431
|
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
432
|
+
import sqlite3
|
|
433
|
+
|
|
434
|
+
memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
|
|
435
|
+
toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
|
|
436
|
+
|
|
437
|
+
agent_executor, mcp_session_manager, _, _, _, _, _ = _setup_local_agent_executor(
|
|
438
|
+
client, agent_def, toolkit_config_tuple, config, model, temperature,
|
|
439
|
+
max_tokens, memory, work_dir
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Cache the executor
|
|
443
|
+
cached_tuple = (agent_executor, memory, mcp_session_manager)
|
|
444
|
+
cache[cache_key] = cached_tuple
|
|
445
|
+
return cached_tuple
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _print_validation_diagnostics(validation_output: str) -> None:
|
|
449
|
+
"""Print diagnostic information for validation output.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
validation_output: The validation output to diagnose
|
|
453
|
+
"""
|
|
454
|
+
console.print(f"\n[bold red]🔍 Diagnostic Information:[/bold red]")
|
|
455
|
+
console.print(f"[dim]Output length: {len(validation_output)} characters[/dim]")
|
|
456
|
+
|
|
457
|
+
# Check for key JSON elements
|
|
458
|
+
has_json = '{' in validation_output and '}' in validation_output
|
|
459
|
+
has_fields = 'test_number' in validation_output and 'steps' in validation_output
|
|
460
|
+
|
|
461
|
+
console.print(f"[dim]Has JSON structure: {has_json}[/dim]")
|
|
462
|
+
console.print(f"[dim]Has required fields: {has_fields}[/dim]")
|
|
463
|
+
|
|
464
|
+
# Show relevant excerpt
|
|
465
|
+
if len(validation_output) > 400:
|
|
466
|
+
console.print(f"\n[red]First 200 chars:[/red] [dim]{validation_output[:200]}[/dim]")
|
|
467
|
+
console.print(f"[red]Last 200 chars:[/red] [dim]{validation_output[-200:]}[/dim]")
|
|
468
|
+
else:
|
|
469
|
+
console.print(f"\n[red]Full output:[/red] [dim]{validation_output}[/dim]")
|
|
373
470
|
|
|
374
471
|
|
|
375
472
|
def _get_alita_system_prompt(config) -> str:
|
|
@@ -1263,6 +1360,10 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
|
|
|
1263
1360
|
details.append("Temperature: ", style="bold")
|
|
1264
1361
|
details.append(f"{agent_def['temperature']}\n", style="cyan")
|
|
1265
1362
|
|
|
1363
|
+
if agent_def.get('persona'):
|
|
1364
|
+
details.append("Persona: ", style="bold")
|
|
1365
|
+
details.append(f"{agent_def['persona']}\n", style="cyan")
|
|
1366
|
+
|
|
1266
1367
|
panel = Panel(
|
|
1267
1368
|
details,
|
|
1268
1369
|
title=f"Local Agent: {agent_def.get('name', 'Unknown')}",
|
|
@@ -3212,27 +3313,42 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
|
|
|
3212
3313
|
|
|
3213
3314
|
|
|
3214
3315
|
@agent.command('execute-test-cases')
|
|
3215
|
-
@click.
|
|
3316
|
+
@click.option(
|
|
3317
|
+
'--agent_source',
|
|
3318
|
+
'--agent-source',
|
|
3319
|
+
'agent_source',
|
|
3320
|
+
required=False,
|
|
3321
|
+
default=str(Path('.alita') / 'agents' / 'test-runner.agent.md'),
|
|
3322
|
+
show_default=True,
|
|
3323
|
+
type=click.Path(exists=False, file_okay=True, dir_okay=False),
|
|
3324
|
+
help='Path to test runner agent definition file'
|
|
3325
|
+
)
|
|
3216
3326
|
@click.option('--test-cases-dir', required=True, type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
3217
3327
|
help='Directory containing test case files')
|
|
3218
|
-
@click.option('--results-dir', required=
|
|
3328
|
+
@click.option('--results-dir', required=False, default=str(Path('.alita') / 'tests' / 'results'),
|
|
3329
|
+
type=click.Path(file_okay=False, dir_okay=True),
|
|
3219
3330
|
help='Directory where test results will be saved')
|
|
3220
3331
|
@click.option('--test-case', 'test_case_files', multiple=True,
|
|
3221
3332
|
help='Specific test case file(s) to execute (e.g., TC-001.md). Can specify multiple times. If not specified, executes all test cases.')
|
|
3222
3333
|
@click.option('--model', help='Override LLM model')
|
|
3223
3334
|
@click.option('--temperature', type=float, help='Override temperature')
|
|
3224
3335
|
@click.option('--max-tokens', type=int, help='Override max tokens')
|
|
3225
|
-
@click.option('--dir', 'work_dir',
|
|
3336
|
+
@click.option('--dir', 'work_dir', required=False, default=str(Path('.alita')),
|
|
3337
|
+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
3226
3338
|
help='Grant agent filesystem access to this directory')
|
|
3227
|
-
@click.option('--data-generator',
|
|
3339
|
+
@click.option('--data-generator', required=False, default=str(Path('.alita') / 'agents' / 'test-data-generator.agent.md'),
|
|
3340
|
+
type=click.Path(exists=True),
|
|
3228
3341
|
help='Path to test data generator agent definition file')
|
|
3342
|
+
@click.option('--validator', type=click.Path(exists=True),
|
|
3343
|
+
help='Path to test validator agent definition file (default: .alita/agents/test-validator.agent.md)')
|
|
3229
3344
|
@click.option('--skip-data-generation', is_flag=True,
|
|
3230
3345
|
help='Skip test data generation step')
|
|
3231
3346
|
@click.pass_context
|
|
3232
3347
|
def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir: str,
|
|
3233
3348
|
test_case_files: tuple, model: Optional[str], temperature: Optional[float],
|
|
3234
|
-
max_tokens: Optional[int], work_dir:
|
|
3235
|
-
data_generator: Optional[str],
|
|
3349
|
+
max_tokens: Optional[int], work_dir: str,
|
|
3350
|
+
data_generator: str, validator: Optional[str],
|
|
3351
|
+
skip_data_generation: bool):
|
|
3236
3352
|
"""
|
|
3237
3353
|
Execute test cases from a directory and save results.
|
|
3238
3354
|
|
|
@@ -3247,24 +3363,44 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
|
|
|
3247
3363
|
- Generates a test result file
|
|
3248
3364
|
4. Saves all results to RESULTS_DIR
|
|
3249
3365
|
|
|
3250
|
-
|
|
3366
|
+
--agent_source: Path to test runner agent definition file
|
|
3251
3367
|
|
|
3252
3368
|
\b
|
|
3253
3369
|
Examples:
|
|
3254
|
-
alita execute-test-cases
|
|
3255
|
-
alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3370
|
+
alita agent execute-test-cases --test-cases-dir ./tests --results-dir ./results
|
|
3371
|
+
alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3256
3372
|
--data-generator ./data-gen.json
|
|
3257
|
-
alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3373
|
+
alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3258
3374
|
--test-case TC-001.md --test-case TC-002.md
|
|
3259
|
-
alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3375
|
+
alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
|
|
3260
3376
|
--skip-data-generation --model gpt-4o
|
|
3261
3377
|
"""
|
|
3378
|
+
# Import dependencies at function start
|
|
3379
|
+
import sqlite3
|
|
3380
|
+
import uuid
|
|
3381
|
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
3382
|
+
|
|
3262
3383
|
config = ctx.obj['config']
|
|
3263
3384
|
client = get_client(ctx)
|
|
3385
|
+
|
|
3386
|
+
# Sanity-check committed defaults (should exist; fail early with a clear message if not)
|
|
3387
|
+
if results_dir and not Path(results_dir).exists():
|
|
3388
|
+
raise click.ClickException(
|
|
3389
|
+
f"Results directory not found: {results_dir}. "
|
|
3390
|
+
f"If you are running outside the repo root, pass --results-dir explicitly."
|
|
3391
|
+
)
|
|
3264
3392
|
|
|
3265
3393
|
try:
|
|
3266
3394
|
# Load agent definition
|
|
3267
|
-
|
|
3395
|
+
agent_source_path = Path(agent_source)
|
|
3396
|
+
if not agent_source_path.exists():
|
|
3397
|
+
default_path = Path('.alita') / 'agents' / 'test-runner.agent.md'
|
|
3398
|
+
if agent_source_path == default_path:
|
|
3399
|
+
raise click.ClickException(
|
|
3400
|
+
f"Default agent definition not found: {agent_source}. "
|
|
3401
|
+
f"Run this command from the repo root (so {default_path} resolves correctly) "
|
|
3402
|
+
f"or pass --agent_source explicitly."
|
|
3403
|
+
)
|
|
3268
3404
|
raise click.ClickException(f"Agent definition not found: {agent_source}")
|
|
3269
3405
|
|
|
3270
3406
|
agent_def = load_agent_definition(agent_source)
|
|
@@ -3317,11 +3453,30 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
|
|
|
3317
3453
|
console.print("[yellow]Continuing with test execution...[/yellow]\n")
|
|
3318
3454
|
logger.debug(f"Data generator setup error: {e}", exc_info=True)
|
|
3319
3455
|
|
|
3320
|
-
#
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3456
|
+
# Load validator agent definition
|
|
3457
|
+
validator_def = None
|
|
3458
|
+
validator_agent_name = "Default Validator"
|
|
3459
|
+
|
|
3460
|
+
# Try to load validator from specified path or default location
|
|
3461
|
+
validator_path = validator
|
|
3462
|
+
if not validator_path:
|
|
3463
|
+
# Default to .alita/agents/test-validator.agent.md
|
|
3464
|
+
default_validator = Path.cwd() / '.alita' / 'agents' / 'test-validator.agent.md'
|
|
3465
|
+
if default_validator.exists():
|
|
3466
|
+
validator_path = str(default_validator)
|
|
3467
|
+
|
|
3468
|
+
if validator_path and Path(validator_path).exists():
|
|
3469
|
+
try:
|
|
3470
|
+
validator_def = load_agent_definition(validator_path)
|
|
3471
|
+
validator_agent_name = validator_def.get('name', Path(validator_path).stem)
|
|
3472
|
+
console.print(f"Validator Agent: [bold]{validator_agent_name}[/bold]")
|
|
3473
|
+
console.print(f"[dim]Using: {validator_path}[/dim]\n")
|
|
3474
|
+
except Exception as e:
|
|
3475
|
+
console.print(f"[yellow]⚠ Warning: Failed to load validator agent: {e}[/yellow]")
|
|
3476
|
+
console.print(f"[yellow]Will use test runner agent for validation[/yellow]\n")
|
|
3477
|
+
logger.debug(f"Validator load error: {e}", exc_info=True)
|
|
3478
|
+
else:
|
|
3479
|
+
console.print(f"[dim]No validator agent specified, using test runner agent for validation[/dim]\n")
|
|
3325
3480
|
|
|
3326
3481
|
# Store bulk data generation chat history to pass to test executors
|
|
3327
3482
|
bulk_gen_chat_history = []
|
|
@@ -3353,11 +3508,10 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
|
|
|
3353
3508
|
|
|
3354
3509
|
bulk_data_gen_prompt = _build_bulk_data_gen_prompt(test_cases_needing_data_gen)
|
|
3355
3510
|
|
|
3356
|
-
console.print(f"Executing test data generation prompt {bulk_data_gen_prompt}\n")
|
|
3511
|
+
console.print(f"Executing test data generation prompt \n{bulk_data_gen_prompt}\n")
|
|
3357
3512
|
|
|
3358
3513
|
try:
|
|
3359
3514
|
# Setup data generator agent
|
|
3360
|
-
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
3361
3515
|
bulk_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
|
|
3362
3516
|
|
|
3363
3517
|
# Use first test case's config or empty tuple
|
|
@@ -3398,138 +3552,214 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
|
|
|
3398
3552
|
console.print("[yellow]Continuing with test execution...[/yellow]\n")
|
|
3399
3553
|
logger.debug(f"Bulk data generation error: {e}", exc_info=True)
|
|
3400
3554
|
|
|
3401
|
-
# Execute
|
|
3555
|
+
# Execute test cases sequentially with executor caching
|
|
3402
3556
|
if not parsed_test_cases:
|
|
3403
3557
|
console.print("[yellow]No test cases to execute[/yellow]")
|
|
3404
3558
|
return
|
|
3405
3559
|
|
|
3406
|
-
console.print(f"\n[bold yellow]📋 Executing
|
|
3407
|
-
|
|
3408
|
-
# Use first test case's config for agent setup
|
|
3409
|
-
first_tc = parsed_test_cases[0]
|
|
3410
|
-
first_test_file = first_tc['file']
|
|
3411
|
-
toolkit_config_path = resolve_toolkit_config_path(
|
|
3412
|
-
first_tc['data'].get('config_path', ''),
|
|
3413
|
-
first_test_file,
|
|
3414
|
-
test_cases_path
|
|
3415
|
-
)
|
|
3416
|
-
toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
|
|
3417
|
-
|
|
3418
|
-
# Create memory for bulk execution
|
|
3419
|
-
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
3420
|
-
memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
|
|
3560
|
+
console.print(f"\n[bold yellow]📋 Executing test cases sequentially...[/bold yellow]\n")
|
|
3421
3561
|
|
|
3422
|
-
#
|
|
3423
|
-
|
|
3562
|
+
# Show data generation context availability
|
|
3563
|
+
if bulk_gen_chat_history:
|
|
3564
|
+
console.print(f"[dim]✓ Data generation history available ({len(bulk_gen_chat_history)} messages) - shared with all test cases[/dim]\n")
|
|
3565
|
+
else:
|
|
3566
|
+
console.print(f"[dim]ℹ No data generation history (skipped or disabled)[/dim]\n")
|
|
3424
3567
|
|
|
3425
|
-
#
|
|
3426
|
-
|
|
3427
|
-
client, agent_def, toolkit_config_tuple, config, model, temperature, max_tokens, memory, work_dir
|
|
3428
|
-
)
|
|
3568
|
+
# Executor cache: key = toolkit_config_path, value = (agent_executor, memory, mcp_session_manager)
|
|
3569
|
+
executor_cache = {}
|
|
3429
3570
|
|
|
3430
|
-
#
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
console.print(f"Executing the prompt: {bulk_all_prompt}\n")
|
|
3571
|
+
# Validation executor cache: separate isolated executors for validation
|
|
3572
|
+
# key = toolkit_config_path, value = (agent_executor, memory, mcp_session_manager)
|
|
3573
|
+
validation_executor_cache = {}
|
|
3434
3574
|
|
|
3435
|
-
# Execute
|
|
3575
|
+
# Execute each test case sequentially
|
|
3436
3576
|
test_results = []
|
|
3437
|
-
|
|
3577
|
+
total_tests = len(parsed_test_cases)
|
|
3438
3578
|
|
|
3439
|
-
|
|
3440
|
-
|
|
3441
|
-
|
|
3442
|
-
|
|
3443
|
-
|
|
3444
|
-
|
|
3445
|
-
|
|
3446
|
-
|
|
3447
|
-
|
|
3448
|
-
|
|
3449
|
-
|
|
3579
|
+
for idx, tc_info in enumerate(parsed_test_cases, 1):
|
|
3580
|
+
test_case = tc_info['data']
|
|
3581
|
+
test_file = tc_info['file']
|
|
3582
|
+
test_name = test_case['name']
|
|
3583
|
+
|
|
3584
|
+
# Display progress
|
|
3585
|
+
console.print(f"[bold cyan]Test Case {idx}/{total_tests} - {test_name}[/bold cyan]")
|
|
3586
|
+
|
|
3587
|
+
try:
|
|
3588
|
+
# Resolve toolkit config path for this test case
|
|
3589
|
+
toolkit_config_path = resolve_toolkit_config_path(
|
|
3590
|
+
test_case.get('config_path', ''),
|
|
3591
|
+
test_file,
|
|
3592
|
+
test_cases_path
|
|
3593
|
+
)
|
|
3450
3594
|
|
|
3451
|
-
#
|
|
3452
|
-
|
|
3453
|
-
|
|
3595
|
+
# Use cache key (None if no config)
|
|
3596
|
+
cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
|
|
3597
|
+
thread_id = f"test_case_{idx}_{uuid.uuid4().hex[:8]}"
|
|
3454
3598
|
|
|
3455
|
-
#
|
|
3456
|
-
|
|
3599
|
+
# Get or create executor from cache
|
|
3600
|
+
agent_executor, memory, mcp_session_manager = _create_executor_from_cache(
|
|
3601
|
+
executor_cache, cache_key, client, agent_def, toolkit_config_path,
|
|
3602
|
+
config, model, temperature, max_tokens, work_dir
|
|
3603
|
+
)
|
|
3457
3604
|
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
console.print(f"[dim]{
|
|
3605
|
+
# Build execution prompt for single test case
|
|
3606
|
+
execution_prompt = _build_single_test_execution_prompt(tc_info, idx)
|
|
3607
|
+
console.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
|
|
3608
|
+
console.print(f"[dim]Executing test case with the prompt {execution_prompt}[/dim]")
|
|
3461
3609
|
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3610
|
+
# Execute test case
|
|
3611
|
+
execution_output = ""
|
|
3612
|
+
if agent_executor:
|
|
3613
|
+
with console.status(f"[yellow]Executing test case...[/yellow]", spinner="dots"):
|
|
3614
|
+
exec_result = agent_executor.invoke({
|
|
3615
|
+
"input": execution_prompt,
|
|
3616
|
+
"chat_history": bulk_gen_chat_history # ONLY data gen history, no accumulation
|
|
3617
|
+
}, config={"configurable": {"thread_id": thread_id}})
|
|
3618
|
+
execution_output = extract_output_from_result(exec_result)
|
|
3619
|
+
|
|
3620
|
+
console.print(f"[green]✓ Test case executed[/green]")
|
|
3621
|
+
console.print(f"[dim]{execution_output}[/dim]\n")
|
|
3622
|
+
|
|
3623
|
+
# Append execution to bulk gen chat history for validation
|
|
3624
|
+
test_case_history_start = len(bulk_gen_chat_history)
|
|
3625
|
+
bulk_gen_chat_history.extend([
|
|
3626
|
+
{"role": "user", "content": execution_prompt},
|
|
3627
|
+
{"role": "assistant", "content": execution_output}
|
|
3628
|
+
])
|
|
3629
|
+
|
|
3630
|
+
# No history accumulation - each test case is independent
|
|
3631
|
+
else:
|
|
3632
|
+
console.print(f"[red]✗ No agent executor available[/red]")
|
|
3633
|
+
# Create fallback result for this test
|
|
3634
|
+
test_results.append({
|
|
3635
|
+
'title': test_name,
|
|
3636
|
+
'passed': False,
|
|
3637
|
+
'file': test_file.name,
|
|
3638
|
+
'step_results': []
|
|
3466
3639
|
})
|
|
3640
|
+
continue
|
|
3641
|
+
|
|
3642
|
+
# Validate test case using validation executor with accumulated history
|
|
3643
|
+
validation_prompt = _build_single_test_validation_prompt(tc_info, idx, execution_output)
|
|
3644
|
+
|
|
3645
|
+
console.print(f"[bold yellow]🔍 Validating test case (with execution history)...[/bold yellow]")
|
|
3646
|
+
console.print(f"[dim]{validation_prompt}[/dim]\n")
|
|
3467
3647
|
|
|
3468
|
-
|
|
3648
|
+
# Create or retrieve isolated validation executor
|
|
3649
|
+
validation_cache_key = f"{cache_key}_validation"
|
|
3650
|
+
validation_agent_def = validator_def if validator_def else agent_def
|
|
3651
|
+
|
|
3652
|
+
validation_executor, validation_memory, validation_mcp_session = _create_executor_from_cache(
|
|
3653
|
+
validation_executor_cache, validation_cache_key, client, validation_agent_def,
|
|
3654
|
+
toolkit_config_path, config, model, temperature, max_tokens, work_dir
|
|
3655
|
+
)
|
|
3656
|
+
|
|
3657
|
+
if validation_cache_key not in validation_executor_cache:
|
|
3658
|
+
console.print(f"[dim]Created new isolated validation executor[/dim]")
|
|
3659
|
+
else:
|
|
3660
|
+
console.print(f"[dim]Using cached validation executor[/dim]")
|
|
3469
3661
|
|
|
3470
|
-
|
|
3662
|
+
# For validation, use a separate thread with accumulated chat history (data gen + execution)
|
|
3663
|
+
# This provides context to the validator about the test execution
|
|
3664
|
+
validation_thread_id = f"validation_{idx}_{uuid.uuid4().hex[:8]}"
|
|
3665
|
+
|
|
3666
|
+
validation_output = ""
|
|
3667
|
+
if validation_executor:
|
|
3668
|
+
with console.status(f"[yellow]Validating test case...[/yellow]", spinner="dots"):
|
|
3669
|
+
validation_result = validation_executor.invoke({
|
|
3670
|
+
"input": validation_prompt,
|
|
3671
|
+
"chat_history": bulk_gen_chat_history # Includes data gen and execution history
|
|
3672
|
+
}, {"configurable": {"thread_id": validation_thread_id}})
|
|
3673
|
+
|
|
3674
|
+
validation_output = extract_output_from_result(validation_result)
|
|
3675
|
+
else:
|
|
3676
|
+
console.print(f"[red]✗ No validation executor available[/red]")
|
|
3677
|
+
validation_output = "{}"
|
|
3678
|
+
|
|
3679
|
+
# No further history update - validation completes the cycle
|
|
3471
3680
|
|
|
3472
3681
|
# Parse validation JSON
|
|
3473
3682
|
try:
|
|
3474
3683
|
validation_json = _extract_json_from_text(validation_output)
|
|
3475
|
-
|
|
3684
|
+
step_results = validation_json.get('steps', [])
|
|
3476
3685
|
|
|
3477
|
-
#
|
|
3478
|
-
|
|
3479
|
-
passed_tests = 0
|
|
3480
|
-
failed_tests = 0
|
|
3686
|
+
# Determine if test passed (all steps must pass)
|
|
3687
|
+
test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
|
|
3481
3688
|
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
|
|
3485
|
-
|
|
3486
|
-
|
|
3487
|
-
|
|
3689
|
+
if test_passed:
|
|
3690
|
+
console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
|
|
3691
|
+
else:
|
|
3692
|
+
console.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
|
|
3693
|
+
|
|
3694
|
+
# Display individual step results
|
|
3695
|
+
for step_result in step_results:
|
|
3696
|
+
step_num = step_result.get('step_number')
|
|
3697
|
+
step_title = step_result.get('title', '')
|
|
3698
|
+
passed = step_result.get('passed', False)
|
|
3699
|
+
details = step_result.get('details', '')
|
|
3488
3700
|
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
|
|
3701
|
+
if passed:
|
|
3702
|
+
console.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
|
|
3703
|
+
console.print(f" [dim]{details}[/dim]")
|
|
3493
3704
|
else:
|
|
3494
|
-
|
|
3495
|
-
console.print(f"[
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
console.print(f" [dim]{details}[/dim]")
|
|
3507
|
-
else:
|
|
3508
|
-
console.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
|
|
3509
|
-
console.print(f" [dim]{details}[/dim]")
|
|
3510
|
-
|
|
3511
|
-
console.print()
|
|
3512
|
-
|
|
3513
|
-
# Store result
|
|
3514
|
-
test_results.append({
|
|
3515
|
-
'title': test_name,
|
|
3516
|
-
'passed': test_passed,
|
|
3517
|
-
'file': parsed_test_cases[tc_result.get('test_number', 1) - 1]['file'].name if tc_result.get('test_number', 1) - 1 < len(parsed_test_cases) else 'unknown',
|
|
3518
|
-
'step_results': step_results
|
|
3519
|
-
})
|
|
3705
|
+
console.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
|
|
3706
|
+
console.print(f" [dim]{details}[/dim]")
|
|
3707
|
+
|
|
3708
|
+
console.print()
|
|
3709
|
+
|
|
3710
|
+
# Store result
|
|
3711
|
+
test_results.append({
|
|
3712
|
+
'title': test_name,
|
|
3713
|
+
'passed': test_passed,
|
|
3714
|
+
'file': test_file.name,
|
|
3715
|
+
'step_results': step_results
|
|
3716
|
+
})
|
|
3520
3717
|
|
|
3521
3718
|
except Exception as e:
|
|
3522
|
-
logger.debug(f"Validation parsing failed: {e}")
|
|
3523
|
-
console.print(f"[yellow]⚠ Warning: Could not parse validation results
|
|
3524
|
-
|
|
3525
|
-
else:
|
|
3526
|
-
console.print(f"[red]✗ No agent executor available[/red]\n")
|
|
3527
|
-
test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
|
|
3719
|
+
logger.debug(f"Validation parsing failed for {test_name}: {e}", exc_info=True)
|
|
3720
|
+
console.print(f"[yellow]⚠ Warning: Could not parse validation results for {test_name}[/yellow]")
|
|
3721
|
+
console.print(f"[yellow]Error: {str(e)}[/yellow]")
|
|
3528
3722
|
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3723
|
+
# Enhanced diagnostic output
|
|
3724
|
+
_print_validation_diagnostics(validation_output)
|
|
3725
|
+
|
|
3726
|
+
# Generate fallback result using helper function
|
|
3727
|
+
console.print(f"\n[yellow]🔄 Generating fallback validation result...[/yellow]")
|
|
3728
|
+
fallback_result = _create_fallback_result_for_test(
|
|
3729
|
+
test_case,
|
|
3730
|
+
test_file,
|
|
3731
|
+
f'Validation failed - could not parse validator output: {str(e)}'
|
|
3732
|
+
)
|
|
3733
|
+
console.print(f"[dim]Created {len(fallback_result['step_results'])} fallback step results[/dim]\n")
|
|
3734
|
+
|
|
3735
|
+
test_results.append(fallback_result)
|
|
3736
|
+
console.print()
|
|
3737
|
+
|
|
3738
|
+
# After validation, remove the test case execution from history to prevent accumulation
|
|
3739
|
+
# Remove the entries added for this test case
|
|
3740
|
+
del bulk_gen_chat_history[test_case_history_start:]
|
|
3741
|
+
|
|
3742
|
+
except Exception as e:
|
|
3743
|
+
logger.debug(f"Test execution failed for {test_name}: {e}", exc_info=True)
|
|
3744
|
+
console.print(f"[red]✗ Test execution failed: {e}[/red]")
|
|
3745
|
+
|
|
3746
|
+
# Create fallback result using helper function
|
|
3747
|
+
fallback_result = _create_fallback_result_for_test(
|
|
3748
|
+
test_case,
|
|
3749
|
+
test_file,
|
|
3750
|
+
f'Test execution failed: {str(e)}'
|
|
3751
|
+
)
|
|
3752
|
+
test_results.append(fallback_result)
|
|
3753
|
+
console.print()
|
|
3754
|
+
|
|
3755
|
+
# Cleanup: Close executor cache resources
|
|
3756
|
+
_cleanup_executor_cache(executor_cache, "executor")
|
|
3757
|
+
_cleanup_executor_cache(validation_executor_cache, "validation executor")
|
|
3758
|
+
|
|
3759
|
+
# Calculate totals
|
|
3760
|
+
total_tests = len(test_results)
|
|
3761
|
+
passed_tests = sum(1 for r in test_results if r['passed'])
|
|
3762
|
+
failed_tests = total_tests - passed_tests
|
|
3533
3763
|
|
|
3534
3764
|
# Generate summary report
|
|
3535
3765
|
console.print(f"\n[bold]{'='*60}[/bold]")
|