PyPI - alita-sdk - Versions diffs - 0.3.554__py3-none-any.whl → 0.3.602__py3-none-any.whl - Mend

alita-sdk 0.3.554py3-none-any.whl → 0.3.602py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (116) hide show

alita_sdk/cli/agent_executor.py +2 -1
alita_sdk/cli/agent_loader.py +34 -4
alita_sdk/cli/agents.py +433 -203
alita_sdk/configurations/openapi.py +227 -15
alita_sdk/runtime/clients/client.py +4 -2
alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
alita_sdk/runtime/langchain/assistant.py +61 -11
alita_sdk/runtime/langchain/constants.py +419 -171
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -2
alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
alita_sdk/runtime/langchain/langraph_agent.py +106 -21
alita_sdk/runtime/langchain/utils.py +30 -14
alita_sdk/runtime/toolkits/__init__.py +3 -0
alita_sdk/runtime/toolkits/artifact.py +2 -1
alita_sdk/runtime/toolkits/mcp.py +6 -3
alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
alita_sdk/runtime/toolkits/skill_router.py +2 -2
alita_sdk/runtime/toolkits/tools.py +64 -2
alita_sdk/runtime/toolkits/vectorstore.py +1 -1
alita_sdk/runtime/tools/artifact.py +15 -0
alita_sdk/runtime/tools/data_analysis.py +183 -0
alita_sdk/runtime/tools/llm.py +30 -11
alita_sdk/runtime/tools/mcp_server_tool.py +6 -3
alita_sdk/runtime/tools/router.py +2 -4
alita_sdk/runtime/tools/sandbox.py +9 -6
alita_sdk/runtime/utils/constants.py +5 -1
alita_sdk/runtime/utils/mcp_client.py +1 -1
alita_sdk/runtime/utils/mcp_sse_client.py +1 -1
alita_sdk/runtime/utils/toolkit_utils.py +2 -0
alita_sdk/tools/__init__.py +3 -1
alita_sdk/tools/ado/repos/__init__.py +26 -8
alita_sdk/tools/ado/repos/repos_wrapper.py +78 -52
alita_sdk/tools/ado/test_plan/__init__.py +3 -2
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
alita_sdk/tools/ado/utils.py +1 -18
alita_sdk/tools/ado/wiki/__init__.py +2 -1
alita_sdk/tools/ado/wiki/ado_wrapper.py +23 -1
alita_sdk/tools/ado/work_item/__init__.py +3 -2
alita_sdk/tools/ado/work_item/ado_wrapper.py +23 -1
alita_sdk/tools/advanced_jira_mining/__init__.py +2 -1
alita_sdk/tools/aws/delta_lake/__init__.py +2 -1
alita_sdk/tools/azure_ai/search/__init__.py +2 -1
alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
alita_sdk/tools/base_indexer_toolkit.py +15 -6
alita_sdk/tools/bitbucket/__init__.py +2 -1
alita_sdk/tools/bitbucket/api_wrapper.py +1 -1
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +3 -3
alita_sdk/tools/browser/__init__.py +1 -1
alita_sdk/tools/carrier/__init__.py +1 -1
alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
alita_sdk/tools/cloud/aws/__init__.py +2 -1
alita_sdk/tools/cloud/azure/__init__.py +2 -1
alita_sdk/tools/cloud/gcp/__init__.py +2 -1
alita_sdk/tools/cloud/k8s/__init__.py +2 -1
alita_sdk/tools/code/linter/__init__.py +2 -1
alita_sdk/tools/code/sonar/__init__.py +2 -1
alita_sdk/tools/code_indexer_toolkit.py +19 -2
alita_sdk/tools/confluence/__init__.py +7 -6
alita_sdk/tools/confluence/api_wrapper.py +2 -2
alita_sdk/tools/custom_open_api/__init__.py +2 -1
alita_sdk/tools/elastic/__init__.py +2 -1
alita_sdk/tools/elitea_base.py +28 -9
alita_sdk/tools/figma/__init__.py +52 -6
alita_sdk/tools/figma/api_wrapper.py +1158 -123
alita_sdk/tools/figma/figma_client.py +73 -0
alita_sdk/tools/figma/toon_tools.py +2748 -0
alita_sdk/tools/github/__init__.py +2 -1
alita_sdk/tools/github/github_client.py +56 -92
alita_sdk/tools/github/schemas.py +4 -4
alita_sdk/tools/gitlab/__init__.py +2 -1
alita_sdk/tools/gitlab/api_wrapper.py +118 -38
alita_sdk/tools/gitlab_org/__init__.py +2 -1
alita_sdk/tools/gitlab_org/api_wrapper.py +60 -62
alita_sdk/tools/google/bigquery/__init__.py +2 -1
alita_sdk/tools/google_places/__init__.py +2 -1
alita_sdk/tools/jira/__init__.py +2 -1
alita_sdk/tools/keycloak/__init__.py +2 -1
alita_sdk/tools/localgit/__init__.py +2 -1
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/ocr/__init__.py +2 -1
alita_sdk/tools/openapi/__init__.py +227 -15
alita_sdk/tools/openapi/api_wrapper.py +1287 -802
alita_sdk/tools/pandas/__init__.py +11 -5
alita_sdk/tools/pandas/api_wrapper.py +38 -25
alita_sdk/tools/postman/__init__.py +2 -1
alita_sdk/tools/pptx/__init__.py +2 -1
alita_sdk/tools/qtest/__init__.py +21 -2
alita_sdk/tools/qtest/api_wrapper.py +430 -13
alita_sdk/tools/rally/__init__.py +2 -1
alita_sdk/tools/rally/api_wrapper.py +1 -1
alita_sdk/tools/report_portal/__init__.py +2 -1
alita_sdk/tools/salesforce/__init__.py +2 -1
alita_sdk/tools/servicenow/__init__.py +2 -1
alita_sdk/tools/sharepoint/__init__.py +2 -1
alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
alita_sdk/tools/slack/__init__.py +3 -2
alita_sdk/tools/slack/api_wrapper.py +2 -2
alita_sdk/tools/sql/__init__.py +3 -2
alita_sdk/tools/testio/__init__.py +2 -1
alita_sdk/tools/testrail/__init__.py +2 -1
alita_sdk/tools/utils/content_parser.py +77 -3
alita_sdk/tools/utils/text_operations.py +163 -71
alita_sdk/tools/xray/__init__.py +3 -2
alita_sdk/tools/yagmail/__init__.py +2 -1
alita_sdk/tools/zephyr/__init__.py +2 -1
alita_sdk/tools/zephyr_enterprise/__init__.py +2 -1
alita_sdk/tools/zephyr_essential/__init__.py +2 -1
alita_sdk/tools/zephyr_scale/__init__.py +3 -2
alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
alita_sdk/tools/zephyr_squad/__init__.py +2 -1
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/METADATA +7 -6
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/RECORD +116 -111
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/entry_points.txt +0 -0
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.554.dist-info → alita_sdk-0.3.602.dist-info}/top_level.txt +0 -0

alita_sdk/cli/agents.py CHANGED Viewed

@@ -141,15 +141,16 @@ def parse_test_case(test_case_path: str) -> Dict[str, Any]:
         if gen_data_match:
             generate_test_data = gen_data_match.group(1).lower() == 'true'
-    # Extract Test Data Configuration table
-    test_data_config = {}
+    # Extract Test Data Configuration section as a raw fenced code block string
+    # NOTE: We intentionally store the entire section as a single string rather than parsing
+    # individual table rows. This preserves the original formatting for downstream tools
+    # which may prefer the raw markdown block.
+    test_data_config = None
     config_section_match = re.search(r'##\s+Test Data Configuration\s*\n(.+?)(?=\n##|\Z)', content, re.DOTALL)
     if config_section_match:
-        config_section = config_section_match.group(1)
-        # Parse markdown table (format: | Parameter | Value | Description |)
-        table_rows = re.findall(r'\|\s*\*\*([^*]+)\*\*\s*\|\s*`?([^|`]+)`?\s*\|', config_section)
-        for param, value in table_rows:
-            test_data_config[param.strip()] = value.strip()
+        config_section = config_section_match.group(1).strip()
+        # Store as a fenced code block to make it clear this is a raw block of text
+        test_data_config = f"\n{config_section}\n"
     # Extract Pre-requisites section
     prerequisites = ""
@@ -252,86 +253,80 @@ def _build_bulk_data_gen_prompt(parsed_test_cases: list) -> str:
     for idx, tc in enumerate(parsed_test_cases, 1):
         test_case = tc['data']
         test_file = tc['file']
+        # Build parts for this test case (do not include separator lines here;
+        # the entire block is wrapped with separators at the top-level)
         parts = [f"Test Case #{idx}: {test_case['name']}", f"File: {test_file.name}", ""]
         if test_case.get('test_data_config'):
             parts.append("Test Data Configuration:")
-            for param, value in test_case['test_data_config'].items():
-                parts.append(f"  - {param}: {value}")
+            td = test_case['test_data_config']
+            raw_lines = str(td).splitlines()
+            for line in raw_lines:
+                parts.append(f"{line}")
         if test_case.get('prerequisites'):
             parts.append(f"\nPre-requisites:\n{test_case['prerequisites']}")
-        if test_case.get('variables'):
-            parts.append(f"\nVariables to generate: {', '.join(test_case['variables'])}")
         requirements.append("\n".join(parts))
-    return f"""{'='*60}
-{chr(10).join(requirements)}
-{'='*60}"""
-def _build_bulk_execution_prompt(parsed_test_cases: list) -> str:
-    """Build consolidated prompt for bulk test execution."""
-    parts = []
-    for idx, tc_info in enumerate(parsed_test_cases, 1):
-        test_case = tc_info['data']
-        test_file = tc_info['file']
-        parts.append(f"\n{'='*80}\nTEST CASE #{idx}: {test_case['name']}\nFile: {test_file.name}\n{'='*80}")
-        if test_case['steps']:
-            for step in test_case['steps']:
-                parts.append(f"\nStep {step['number']}: {step['title']}\n{step['instruction']}")
-                if step['expectation']:
-                    parts.append(f"Expected Result: {step['expectation']}")
-        else:
-            parts.append("\n(No steps defined)")
+    # If no requirements were collected, return an empty string to avoid
+    # producing a prompt with only separator lines.
+    if not requirements:
+        return ""
+    # Use a visible divider between test cases so each entry is clearly separated
+    divider = '-' * 40
+    body = f"\n\n{divider}\n\n".join(requirements)
+    return f"{('='*60)}\n\n{body}\n\n{('='*60)}"
+def _build_single_test_execution_prompt(test_case_info: dict, test_number: int) -> str:
+    """Build execution prompt for a single test case."""
+    test_case = test_case_info['data']
+    test_file = test_case_info['file']
+    parts = [
+        f"\n{'='*80}",
+        f"TEST CASE #{test_number}: {test_case['name']}",
+        f"File: {test_file.name}",
+        f"{'='*80}"
+    ]
+    if test_case['steps']:
+        for step in test_case['steps']:
+            parts.append(f"\nStep {step['number']}: {step['title']}")
+            parts.append(step['instruction'])
+    else:
+        parts.append("\n(No steps defined)")
     return "\n".join(parts)
-def _build_validation_prompt(parsed_test_cases: list, execution_output: str) -> str:
-    """Build prompt for bulk validation of test results."""
-    parts = ["You are a test validator. Review the test execution results and validate each test case.\n\nTest Cases to Validate:\n"]
+def _build_single_test_validation_prompt(test_case_info: dict, test_number: int, execution_output: str) -> str:
+    """Build validation prompt for a single test case."""
+    test_case = test_case_info['data']
+    parts = [
+        f"\nTest Case #{test_number}: {test_case['name']}"
+    ]
-    for idx, tc_info in enumerate(parsed_test_cases, 1):
-        test_case = tc_info['data']
-        parts.append(f"\nTest Case #{idx}: {test_case['name']}")
-        if test_case['steps']:
-            for step in test_case['steps']:
-                parts.append(f"  Step {step['number']}: {step['title']}")
-                if step['expectation']:
-                    parts.append(f"  Expected: {step['expectation']}")
+    if test_case['steps']:
+        for step in test_case['steps']:
+            parts.append(f"  Step {step['number']}: {step['title']}")
+            if step['expectation']:
+                parts.append(f"    Expected: {step['expectation']}")
     parts.append(f"\n\nActual Execution Results:\n{execution_output}\n")
-    parts.append(f"""\nBased on the execution results above, validate each test case.
-Respond with valid JSON in this EXACT format:
-{{
-  "test_cases": [
-    {{
-      "test_number": 1,
-      "test_name": "<test case name>",
-      "steps": [
-        {{"step_number": 1, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}},
-        {{"step_number": 2, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}}
-      ]
-    }},
-    {{
-      "test_number": 2,
-      "test_name": "<test case name>",
-      "steps": [...]
-    }}
-  ]
-}}
-Validate all {len(parsed_test_cases)} test cases and their steps.""")
+    # Escape quotes in test name for valid JSON in prompt
+    escaped_test_name = test_case['name'].replace('"', '\\"')
+    parts.append(f"""\nBased on the execution results above, validate this test case.
+        {{
+          "test_number": {test_number},
+          "test_name": "{escaped_test_name}"
+        }}
+    """)
     return "\n".join(parts)
@@ -359,17 +354,119 @@ def _extract_json_from_text(text: str) -> dict:
     return json.loads(text[start_idx:end_idx])
-def _create_fallback_results(parsed_test_cases: list) -> tuple[list, int, int, int]:
-    """Create fallback results when execution/validation fails."""
-    test_results = []
-    for tc_info in parsed_test_cases:
-        test_results.append({
-            'title': tc_info['data']['name'],
+def _create_fallback_result_for_test(test_case: dict, test_file: Path, reason: str = 'Validation failed') -> dict:
+    """Create a fallback result for a single test case with detailed step information.
+    Args:
+        test_case: Parsed test case data
+        test_file: Path to test case file
+        reason: Reason for fallback
+    Returns:
+        Fallback test result dict with step details
+    """
+    fallback_steps = []
+    for step_info in test_case.get('steps', []):
+        fallback_steps.append({
+            'step_number': step_info['number'],
+            'title': step_info['title'],
             'passed': False,
-            'file': tc_info['file'].name,
-            'step_results': []
+            'details': reason
         })
-    return test_results, len(parsed_test_cases), 0, len(parsed_test_cases)
+    return {
+        'title': test_case['name'],
+        'passed': False,
+        'file': test_file.name,
+        'step_results': fallback_steps,
+        'validation_error': reason
+    }
+def _cleanup_executor_cache(cache: Dict[str, tuple], cache_name: str = "executor") -> None:
+    """Clean up executor cache resources.
+    Args:
+        cache: Dictionary of cached executors
+        cache_name: Name of cache for logging
+    """
+    console.print(f"[dim]Cleaning up {cache_name} cache...[/dim]")
+    for cache_key, cached_items in cache.items():
+        try:
+            # Extract memory from tuple (second element)
+            memory = cached_items[1] if len(cached_items) > 1 else None
+            # Close SQLite memory connection
+            if memory and hasattr(memory, 'conn') and memory.conn:
+                memory.conn.close()
+        except Exception as e:
+            logger.debug(f"Error cleaning up {cache_name} cache for {cache_key}: {e}")
+def _create_executor_from_cache(cache: Dict[str, tuple], cache_key: str,
+                                client, agent_def: Dict, toolkit_config_path: Optional[str],
+                                config, model: Optional[str], temperature: Optional[float],
+                                max_tokens: Optional[int], work_dir: Optional[str]) -> tuple:
+    """Get or create executor from cache.
+    Args:
+        cache: Executor cache dictionary
+        cache_key: Key for caching
+        client: API client
+        agent_def: Agent definition
+        toolkit_config_path: Path to toolkit config
+        config: CLI configuration
+        model: Model override
+        temperature: Temperature override
+        max_tokens: Max tokens override
+        work_dir: Working directory
+    Returns:
+        Tuple of (agent_executor, memory, mcp_session_manager)
+    """
+    if cache_key in cache:
+        return cache[cache_key]
+    # Create new executor
+    from langgraph.checkpoint.sqlite import SqliteSaver
+    import sqlite3
+    memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
+    toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
+    agent_executor, mcp_session_manager, _, _, _, _, _ = _setup_local_agent_executor(
+        client, agent_def, toolkit_config_tuple, config, model, temperature,
+        max_tokens, memory, work_dir
+    )
+    # Cache the executor
+    cached_tuple = (agent_executor, memory, mcp_session_manager)
+    cache[cache_key] = cached_tuple
+    return cached_tuple
+def _print_validation_diagnostics(validation_output: str) -> None:
+    """Print diagnostic information for validation output.
+    Args:
+        validation_output: The validation output to diagnose
+    """
+    console.print(f"\n[bold red]🔍 Diagnostic Information:[/bold red]")
+    console.print(f"[dim]Output length: {len(validation_output)} characters[/dim]")
+    # Check for key JSON elements
+    has_json = '{' in validation_output and '}' in validation_output
+    has_fields = 'test_number' in validation_output and 'steps' in validation_output
+    console.print(f"[dim]Has JSON structure: {has_json}[/dim]")
+    console.print(f"[dim]Has required fields: {has_fields}[/dim]")
+    # Show relevant excerpt
+    if len(validation_output) > 400:
+        console.print(f"\n[red]First 200 chars:[/red] [dim]{validation_output[:200]}[/dim]")
+        console.print(f"[red]Last 200 chars:[/red] [dim]{validation_output[-200:]}[/dim]")
+    else:
+        console.print(f"\n[red]Full output:[/red] [dim]{validation_output}[/dim]")
 def _get_alita_system_prompt(config) -> str:
@@ -1263,6 +1360,10 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
                     details.append("Temperature: ", style="bold")
                     details.append(f"{agent_def['temperature']}\n", style="cyan")
+                if agent_def.get('persona'):
+                    details.append("Persona: ", style="bold")
+                    details.append(f"{agent_def['persona']}\n", style="cyan")
                 panel = Panel(
                     details,
                     title=f"Local Agent: {agent_def.get('name', 'Unknown')}",
@@ -3212,27 +3313,42 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
 @agent.command('execute-test-cases')
-@click.argument('agent_source')
+@click.option(
+    '--agent_source',
+    '--agent-source',
+    'agent_source',
+    required=False,
+    default=str(Path('.alita') / 'agents' / 'test-runner.agent.md'),
+    show_default=True,
+    type=click.Path(exists=False, file_okay=True, dir_okay=False),
+    help='Path to test runner agent definition file'
+)
 @click.option('--test-cases-dir', required=True, type=click.Path(exists=True, file_okay=False, dir_okay=True),
               help='Directory containing test case files')
-@click.option('--results-dir', required=True, type=click.Path(file_okay=False, dir_okay=True),
+@click.option('--results-dir', required=False, default=str(Path('.alita') / 'tests' / 'results'),
+              type=click.Path(file_okay=False, dir_okay=True),
               help='Directory where test results will be saved')
 @click.option('--test-case', 'test_case_files', multiple=True,
               help='Specific test case file(s) to execute (e.g., TC-001.md). Can specify multiple times. If not specified, executes all test cases.')
 @click.option('--model', help='Override LLM model')
 @click.option('--temperature', type=float, help='Override temperature')
 @click.option('--max-tokens', type=int, help='Override max tokens')
-@click.option('--dir', 'work_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True),
+@click.option('--dir', 'work_dir', required=False, default=str(Path('.alita')),
+              type=click.Path(exists=True, file_okay=False, dir_okay=True),
               help='Grant agent filesystem access to this directory')
-@click.option('--data-generator', type=click.Path(exists=True),
+@click.option('--data-generator', required=False, default=str(Path('.alita') / 'agents' / 'test-data-generator.agent.md'),
+              type=click.Path(exists=True),
               help='Path to test data generator agent definition file')
+@click.option('--validator', type=click.Path(exists=True),
+              help='Path to test validator agent definition file (default: .alita/agents/test-validator.agent.md)')
 @click.option('--skip-data-generation', is_flag=True,
               help='Skip test data generation step')
 @click.pass_context
 def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir: str,
                       test_case_files: tuple, model: Optional[str], temperature: Optional[float],
-                      max_tokens: Optional[int], work_dir: Optional[str],
-                      data_generator: Optional[str], skip_data_generation: bool):
+                      max_tokens: Optional[int], work_dir: str,
+                      data_generator: str, validator: Optional[str],
+                      skip_data_generation: bool):
     """
     Execute test cases from a directory and save results.
@@ -3247,24 +3363,44 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
        - Generates a test result file
     4. Saves all results to RESULTS_DIR
-    AGENT_SOURCE: Path to agent definition file (e.g., .github/agents/test-runner.agent.md)
+    --agent_source: Path to test runner agent definition file
     \b
     Examples:
-      alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results
-      alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
+      alita agent execute-test-cases --test-cases-dir ./tests --results-dir ./results
+      alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
           --data-generator ./data-gen.json
-      alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
+      alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
           --test-case TC-001.md --test-case TC-002.md
-      alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
+      alita agent execute-test-cases --agent_source ./agent.json --test-cases-dir ./tests --results-dir ./results \
           --skip-data-generation --model gpt-4o
     """
+    # Import dependencies at function start
+    import sqlite3
+    import uuid
+    from langgraph.checkpoint.sqlite import SqliteSaver
     config = ctx.obj['config']
     client = get_client(ctx)
+    # Sanity-check committed defaults (should exist; fail early with a clear message if not)
+    if results_dir and not Path(results_dir).exists():
+        raise click.ClickException(
+            f"Results directory not found: {results_dir}. "
+            f"If you are running outside the repo root, pass --results-dir explicitly."
+        )
     try:
         # Load agent definition
-        if not Path(agent_source).exists():
+        agent_source_path = Path(agent_source)
+        if not agent_source_path.exists():
+            default_path = Path('.alita') / 'agents' / 'test-runner.agent.md'
+            if agent_source_path == default_path:
+                raise click.ClickException(
+                    f"Default agent definition not found: {agent_source}. "
+                    f"Run this command from the repo root (so {default_path} resolves correctly) "
+                    f"or pass --agent_source explicitly."
+                )
             raise click.ClickException(f"Agent definition not found: {agent_source}")
         agent_def = load_agent_definition(agent_source)
@@ -3317,11 +3453,30 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
                 console.print("[yellow]Continuing with test execution...[/yellow]\n")
                 logger.debug(f"Data generator setup error: {e}", exc_info=True)
-        # Track overall results
-        total_tests = 0
-        passed_tests = 0
-        failed_tests = 0
-        test_results = []  # Store structured results for final report
+        # Load validator agent definition
+        validator_def = None
+        validator_agent_name = "Default Validator"
+        # Try to load validator from specified path or default location
+        validator_path = validator
+        if not validator_path:
+            # Default to .alita/agents/test-validator.agent.md
+            default_validator = Path.cwd() / '.alita' / 'agents' / 'test-validator.agent.md'
+            if default_validator.exists():
+                validator_path = str(default_validator)
+        if validator_path and Path(validator_path).exists():
+            try:
+                validator_def = load_agent_definition(validator_path)
+                validator_agent_name = validator_def.get('name', Path(validator_path).stem)
+                console.print(f"Validator Agent: [bold]{validator_agent_name}[/bold]")
+                console.print(f"[dim]Using: {validator_path}[/dim]\n")
+            except Exception as e:
+                console.print(f"[yellow]⚠ Warning: Failed to load validator agent: {e}[/yellow]")
+                console.print(f"[yellow]Will use test runner agent for validation[/yellow]\n")
+                logger.debug(f"Validator load error: {e}", exc_info=True)
+        else:
+            console.print(f"[dim]No validator agent specified, using test runner agent for validation[/dim]\n")
         # Store bulk data generation chat history to pass to test executors
         bulk_gen_chat_history = []
@@ -3353,11 +3508,10 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
             bulk_data_gen_prompt = _build_bulk_data_gen_prompt(test_cases_needing_data_gen)
-            console.print(f"Executing test data generation prompt {bulk_data_gen_prompt}\n")
+            console.print(f"Executing test data generation prompt \n{bulk_data_gen_prompt}\n")
             try:
                 # Setup data generator agent
-                from langgraph.checkpoint.sqlite import SqliteSaver
                 bulk_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
                 # Use first test case's config or empty tuple
@@ -3398,138 +3552,214 @@ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir:
                 console.print("[yellow]Continuing with test execution...[/yellow]\n")
                 logger.debug(f"Bulk data generation error: {e}", exc_info=True)
-        # Execute ALL test cases in one bulk operation
+        # Execute test cases sequentially with executor caching
         if not parsed_test_cases:
             console.print("[yellow]No test cases to execute[/yellow]")
             return
-        console.print(f"\n[bold yellow]📋 Executing ALL test cases in bulk...[/bold yellow]\n")
-        # Use first test case's config for agent setup
-        first_tc = parsed_test_cases[0]
-        first_test_file = first_tc['file']
-        toolkit_config_path = resolve_toolkit_config_path(
-            first_tc['data'].get('config_path', ''),
-            first_test_file,
-            test_cases_path
-        )
-        toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
-        # Create memory for bulk execution
-        from langgraph.checkpoint.sqlite import SqliteSaver
-        memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
+        console.print(f"\n[bold yellow]📋 Executing test cases sequentially...[/bold yellow]\n")
-        # Initialize chat history with bulk data generation context
-        chat_history = bulk_gen_chat_history.copy()
+        # Show data generation context availability
+        if bulk_gen_chat_history:
+            console.print(f"[dim]✓ Data generation history available ({len(bulk_gen_chat_history)} messages) - shared with all test cases[/dim]\n")
+        else:
+            console.print(f"[dim]ℹ No data generation history (skipped or disabled)[/dim]\n")
-        # Setup agent executor
-        agent_executor, _, _, _, _, _, _ = _setup_local_agent_executor(
-            client, agent_def, toolkit_config_tuple, config, model, temperature, max_tokens, memory, work_dir
-        )
+        # Executor cache: key = toolkit_config_path, value = (agent_executor, memory, mcp_session_manager)
+        executor_cache = {}
-        # Build bulk execution prompt
-        bulk_all_prompt = _build_bulk_execution_prompt(parsed_test_cases)
-        console.print(f"Executing the prompt: {bulk_all_prompt}\n")
+        # Validation executor cache: separate isolated executors for validation
+        # key = toolkit_config_path, value = (agent_executor, memory, mcp_session_manager)
+        validation_executor_cache = {}
-        # Execute all test cases in bulk
+        # Execute each test case sequentially
         test_results = []
-        all_execution_output = ""
+        total_tests = len(parsed_test_cases)
-        try:
-            if agent_executor:
-                with console.status(f"[yellow]Executing {len(parsed_test_cases)} test cases in bulk...[/yellow]", spinner="dots"):
-                    bulk_result = agent_executor.invoke({
-                        "input": bulk_all_prompt,
-                        "chat_history": chat_history
-                    })
-                all_execution_output = extract_output_from_result(bulk_result)
-                console.print(f"[green]✓ All test cases executed[/green]")
-                console.print(f"[dim]{all_execution_output}...[/dim]\n")
+        for idx, tc_info in enumerate(parsed_test_cases, 1):
+            test_case = tc_info['data']
+            test_file = tc_info['file']
+            test_name = test_case['name']
+            # Display progress
+            console.print(f"[bold cyan]Test Case {idx}/{total_tests} - {test_name}[/bold cyan]")
+            try:
+                # Resolve toolkit config path for this test case
+                toolkit_config_path = resolve_toolkit_config_path(
+                    test_case.get('config_path', ''),
+                    test_file,
+                    test_cases_path
+                )
-                # Update chat history
-                chat_history.append({"role": "user", "content": bulk_all_prompt})
-                chat_history.append({"role": "assistant", "content": all_execution_output})
+                # Use cache key (None if no config)
+                cache_key = toolkit_config_path if toolkit_config_path else '__no_config__'
+                thread_id = f"test_case_{idx}_{uuid.uuid4().hex[:8]}"
-                # Now validate ALL test cases in bulk
-                console.print(f"[bold yellow]✅ Validating all test cases...[/bold yellow]\n")
+                # Get or create executor from cache
+                agent_executor, memory, mcp_session_manager = _create_executor_from_cache(
+                    executor_cache, cache_key, client, agent_def, toolkit_config_path,
+                    config, model, temperature, max_tokens, work_dir
+                )
-                validation_prompt = _build_validation_prompt(parsed_test_cases, all_execution_output)
-                console.print(f"[dim]{validation_prompt}[/dim]\n")
+                # Build execution prompt for single test case
+                execution_prompt = _build_single_test_execution_prompt(tc_info, idx)
+                console.print(f"[dim]Executing with {len(bulk_gen_chat_history)} history messages[/dim]")
+                console.print(f"[dim]Executing test case with the prompt {execution_prompt}[/dim]")
-                with console.status("[yellow]Validating all results...[/yellow]", spinner="dots"):
-                    validation_result = agent_executor.invoke({
-                        "input": validation_prompt,
-                        "chat_history": chat_history
+                # Execute test case
+                execution_output = ""
+                if agent_executor:
+                    with console.status(f"[yellow]Executing test case...[/yellow]", spinner="dots"):
+                        exec_result = agent_executor.invoke({
+                            "input": execution_prompt,
+                            "chat_history": bulk_gen_chat_history  # ONLY data gen history, no accumulation
+                        }, config={"configurable": {"thread_id": thread_id}})
+                    execution_output = extract_output_from_result(exec_result)
+                    console.print(f"[green]✓ Test case executed[/green]")
+                    console.print(f"[dim]{execution_output}[/dim]\n")
+                    # Append execution to bulk gen chat history for validation
+                    test_case_history_start = len(bulk_gen_chat_history)
+                    bulk_gen_chat_history.extend([
+                        {"role": "user", "content": execution_prompt},
+                        {"role": "assistant", "content": execution_output}
+                    ])
+                    # No history accumulation - each test case is independent
+                else:
+                    console.print(f"[red]✗ No agent executor available[/red]")
+                    # Create fallback result for this test
+                    test_results.append({
+                        'title': test_name,
+                        'passed': False,
+                        'file': test_file.name,
+                        'step_results': []
                     })
+                    continue
+                # Validate test case using validation executor with accumulated history
+                validation_prompt = _build_single_test_validation_prompt(tc_info, idx, execution_output)
+                console.print(f"[bold yellow]🔍 Validating test case (with execution history)...[/bold yellow]")
+                console.print(f"[dim]{validation_prompt}[/dim]\n")
-                validation_output = extract_output_from_result(validation_result)
+                # Create or retrieve isolated validation executor
+                validation_cache_key = f"{cache_key}_validation"
+                validation_agent_def = validator_def if validator_def else agent_def
+                validation_executor, validation_memory, validation_mcp_session = _create_executor_from_cache(
+                    validation_executor_cache, validation_cache_key, client, validation_agent_def,
+                    toolkit_config_path, config, model, temperature, max_tokens, work_dir
+                )
+                if validation_cache_key not in validation_executor_cache:
+                    console.print(f"[dim]Created new isolated validation executor[/dim]")
+                else:
+                    console.print(f"[dim]Using cached validation executor[/dim]")
-                console.print(f"[dim]Validation Response: {validation_output}...[/dim]\n")
+                # For validation, use a separate thread with accumulated chat history (data gen + execution)
+                # This provides context to the validator about the test execution
+                validation_thread_id = f"validation_{idx}_{uuid.uuid4().hex[:8]}"
+                validation_output = ""
+                if validation_executor:
+                    with console.status(f"[yellow]Validating test case...[/yellow]", spinner="dots"):
+                        validation_result = validation_executor.invoke({
+                            "input": validation_prompt,
+                            "chat_history": bulk_gen_chat_history  # Includes data gen and execution history
+                        }, {"configurable": {"thread_id": validation_thread_id}})
+                    validation_output = extract_output_from_result(validation_result)
+                else:
+                    console.print(f"[red]✗ No validation executor available[/red]")
+                    validation_output = "{}"
+                # No further history update - validation completes the cycle
                 # Parse validation JSON
                 try:
                     validation_json = _extract_json_from_text(validation_output)
-                    test_cases_results = validation_json.get('test_cases', [])
+                    step_results = validation_json.get('steps', [])
-                    # Process results for each test case
-                    total_tests = 0
-                    passed_tests = 0
-                    failed_tests = 0
+                    # Determine if test passed (all steps must pass)
+                    test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
-                    for tc_result in test_cases_results:
-                        test_name = tc_result.get('test_name', f"Test #{tc_result.get('test_number', '?')}")
-                        step_results = tc_result.get('steps', [])
-                        # Determine if test passed (all steps must pass)
-                        test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
+                    if test_passed:
+                        console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
+                    else:
+                        console.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
+                    # Display individual step results
+                    for step_result in step_results:
+                        step_num = step_result.get('step_number')
+                        step_title = step_result.get('title', '')
+                        passed = step_result.get('passed', False)
+                        details = step_result.get('details', '')
-                        total_tests += 1
-                        if test_passed:
-                            passed_tests += 1
-                            console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
+                        if passed:
+                            console.print(f"  [green]✓ Step {step_num}: {step_title}[/green]")
+                            console.print(f"  [dim]{details}[/dim]")
                         else:
-                            failed_tests += 1
-                            console.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
-                        # Display individual step results
-                        for step_result in step_results:
-                            step_num = step_result.get('step_number')
-                            step_title = step_result.get('title', '')
-                            passed = step_result.get('passed', False)
-                            details = step_result.get('details', '')
-                            if passed:
-                                console.print(f"  [green]✓ Step {step_num}: {step_title}[/green]")
-                                console.print(f"  [dim]{details}[/dim]")
-                            else:
-                                console.print(f"  [red]✗ Step {step_num}: {step_title}[/red]")
-                                console.print(f"  [dim]{details}[/dim]")
-                        console.print()
-                        # Store result
-                        test_results.append({
-                            'title': test_name,
-                            'passed': test_passed,
-                            'file': parsed_test_cases[tc_result.get('test_number', 1) - 1]['file'].name if tc_result.get('test_number', 1) - 1 < len(parsed_test_cases) else 'unknown',
-                            'step_results': step_results
-                        })
+                            console.print(f"  [red]✗ Step {step_num}: {step_title}[/red]")
+                            console.print(f"  [dim]{details}[/dim]")
+                    console.print()
+                    # Store result
+                    test_results.append({
+                        'title': test_name,
+                        'passed': test_passed,
+                        'file': test_file.name,
+                        'step_results': step_results
+                    })
                 except Exception as e:
-                    logger.debug(f"Validation parsing failed: {e}")
-                    console.print(f"[yellow]⚠ Warning: Could not parse validation results: {e}[/yellow]\n")
-                    test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
-            else:
-                console.print(f"[red]✗ No agent executor available[/red]\n")
-                test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
+                    logger.debug(f"Validation parsing failed for {test_name}: {e}", exc_info=True)
+                    console.print(f"[yellow]⚠ Warning: Could not parse validation results for {test_name}[/yellow]")
+                    console.print(f"[yellow]Error: {str(e)}[/yellow]")
-        except Exception as e:
-            console.print(f"[red]✗ Bulk execution failed: {e}[/red]\n")
-            logger.debug(f"Bulk execution error: {e}", exc_info=True)
-            test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
+                    # Enhanced diagnostic output
+                    _print_validation_diagnostics(validation_output)
+                    # Generate fallback result using helper function
+                    console.print(f"\n[yellow]🔄 Generating fallback validation result...[/yellow]")
+                    fallback_result = _create_fallback_result_for_test(
+                        test_case,
+                        test_file,
+                        f'Validation failed - could not parse validator output: {str(e)}'
+                    )
+                    console.print(f"[dim]Created {len(fallback_result['step_results'])} fallback step results[/dim]\n")
+                    test_results.append(fallback_result)
+                    console.print()
+                # After validation, remove the test case execution from history to prevent accumulation
+                # Remove the entries added for this test case
+                del bulk_gen_chat_history[test_case_history_start:]
+            except Exception as e:
+                logger.debug(f"Test execution failed for {test_name}: {e}", exc_info=True)
+                console.print(f"[red]✗ Test execution failed: {e}[/red]")
+                # Create fallback result using helper function
+                fallback_result = _create_fallback_result_for_test(
+                    test_case,
+                    test_file,
+                    f'Test execution failed: {str(e)}'
+                )
+                test_results.append(fallback_result)
+                console.print()
+        # Cleanup: Close executor cache resources
+        _cleanup_executor_cache(executor_cache, "executor")
+        _cleanup_executor_cache(validation_executor_cache, "validation executor")
+        # Calculate totals
+        total_tests = len(test_results)
+        passed_tests = sum(1 for r in test_results if r['passed'])
+        failed_tests = total_tests - passed_tests
         # Generate summary report
         console.print(f"\n[bold]{'='*60}[/bold]")

alita-sdk 0.3.554__py3-none-any.whl → 0.3.602__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.554py3-none-any.whl → 0.3.602py3-none-any.whl