alita-sdk 0.3.465__py3-none-any.whl → 0.3.486__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (90) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +6 -9
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1866 -185
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +815 -55
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +84 -26
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/langraph_agent.py +122 -31
  56. alita_sdk/runtime/llms/preloaded.py +2 -6
  57. alita_sdk/runtime/toolkits/__init__.py +2 -0
  58. alita_sdk/runtime/toolkits/application.py +1 -1
  59. alita_sdk/runtime/toolkits/mcp.py +46 -36
  60. alita_sdk/runtime/toolkits/planning.py +171 -0
  61. alita_sdk/runtime/toolkits/tools.py +39 -6
  62. alita_sdk/runtime/tools/llm.py +185 -8
  63. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  64. alita_sdk/runtime/tools/planning/models.py +246 -0
  65. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  66. alita_sdk/runtime/tools/vectorstore_base.py +41 -6
  67. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  68. alita_sdk/runtime/utils/streamlit.py +6 -10
  69. alita_sdk/runtime/utils/toolkit_utils.py +19 -4
  70. alita_sdk/tools/__init__.py +54 -27
  71. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  72. alita_sdk/tools/base_indexer_toolkit.py +98 -19
  73. alita_sdk/tools/bitbucket/__init__.py +2 -2
  74. alita_sdk/tools/chunkers/__init__.py +3 -1
  75. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +95 -6
  76. alita_sdk/tools/chunkers/universal_chunker.py +269 -0
  77. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  78. alita_sdk/tools/elitea_base.py +86 -21
  79. alita_sdk/tools/jira/__init__.py +1 -1
  80. alita_sdk/tools/jira/api_wrapper.py +91 -40
  81. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  82. alita_sdk/tools/qtest/__init__.py +1 -1
  83. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +8 -2
  84. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  85. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/METADATA +2 -1
  86. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/RECORD +90 -50
  87. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/WHEEL +0 -0
  88. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/entry_points.txt +0 -0
  89. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/licenses/LICENSE +0 -0
  90. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/top_level.txt +0 -0
alita_sdk/cli/agents.py CHANGED
@@ -11,8 +11,10 @@ import json
11
11
  import logging
12
12
  import sqlite3
13
13
  import sys
14
+ import re
14
15
  from typing import Optional, Dict, Any, List
15
16
  from pathlib import Path
17
+ from datetime import datetime
16
18
  import yaml
17
19
 
18
20
  from rich.console import Console
@@ -32,6 +34,8 @@ from .agent_executor import create_llm_instance, create_agent_executor, create_a
32
34
  from .toolkit_loader import load_toolkit_config, load_toolkit_configs
33
35
  from .callbacks import create_cli_callback, CLICallbackHandler
34
36
  from .input_handler import get_input_handler, styled_input, styled_selection_input
37
+ # Context management for chat history
38
+ from .context import CLIContextManager, CLIMessage, purge_old_sessions as purge_context_sessions
35
39
 
36
40
  logger = logging.getLogger(__name__)
37
41
 
@@ -39,6 +43,335 @@ logger = logging.getLogger(__name__)
39
43
  console = Console()
40
44
 
41
45
 
46
+ def resolve_toolkit_config_path(config_path_str: str, test_file: Path, test_cases_dir: Path) -> Optional[str]:
47
+ """
48
+ Resolve toolkit configuration file path from test case.
49
+
50
+ Tries multiple locations in order:
51
+ 1. Absolute path
52
+ 2. Relative to test case file directory
53
+ 3. Relative to test cases directory
54
+ 4. Relative to workspace root
55
+
56
+ Args:
57
+ config_path_str: Config path from test case
58
+ test_file: Path to the test case file
59
+ test_cases_dir: Path to test cases directory
60
+
61
+ Returns:
62
+ Absolute path to config file if found, None otherwise
63
+ """
64
+ if not config_path_str:
65
+ return None
66
+
67
+ # Normalize path separators
68
+ config_path_str = config_path_str.replace('\\', '/')
69
+
70
+ # Try absolute path first
71
+ config_path = Path(config_path_str)
72
+ if config_path.is_absolute() and config_path.exists():
73
+ return str(config_path)
74
+
75
+ # Try relative to test case file directory
76
+ config_path = test_file.parent / config_path_str
77
+ if config_path.exists():
78
+ return str(config_path)
79
+
80
+ # Try relative to test_cases_dir
81
+ config_path = test_cases_dir / config_path_str
82
+ if config_path.exists():
83
+ return str(config_path)
84
+
85
+ # Try relative to workspace root
86
+ workspace_root = Path.cwd()
87
+ config_path = workspace_root / config_path_str
88
+ if config_path.exists():
89
+ return str(config_path)
90
+
91
+ return None
92
+
93
+
94
+ def parse_test_case(test_case_path: str) -> Dict[str, Any]:
95
+ """
96
+ Parse a test case markdown file to extract configuration, steps, and expectations.
97
+
98
+ Args:
99
+ test_case_path: Path to the test case markdown file
100
+
101
+ Returns:
102
+ Dictionary containing:
103
+ - name: Test case name
104
+ - objective: Test objective
105
+ - config_path: Path to toolkit config file
106
+ - generate_test_data: Boolean flag indicating if test data generation is needed (default: True)
107
+ - test_data_config: Dictionary of test data configuration from table
108
+ - prerequisites: Pre-requisites section text
109
+ - variables: List of variable placeholders found (e.g., {{TEST_PR_NUMBER}})
110
+ - steps: List of test steps with their descriptions
111
+ - expectations: List of expectations/assertions
112
+ """
113
+ path = Path(test_case_path)
114
+ if not path.exists():
115
+ raise FileNotFoundError(f"Test case not found: {test_case_path}")
116
+
117
+ content = path.read_text(encoding='utf-8')
118
+
119
+ # Extract test case name from the first heading
120
+ name_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
121
+ name = name_match.group(1) if name_match else path.stem
122
+
123
+ # Extract objective
124
+ objective_match = re.search(r'##\s+Objective\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
125
+ objective = objective_match.group(1).strip() if objective_match else ""
126
+
127
+ # Extract config path and generateTestData flag
128
+ config_section_match = re.search(r'##\s+Config\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
129
+ config_path = None
130
+ generate_test_data = True # Default to True if not specified
131
+
132
+ if config_section_match:
133
+ config_section = config_section_match.group(1)
134
+ # Extract path
135
+ path_match = re.search(r'path:\s*(.+?)(?=\n|$)', config_section, re.MULTILINE)
136
+ if path_match:
137
+ config_path = path_match.group(1).strip()
138
+
139
+ # Extract generateTestData flag
140
+ gen_data_match = re.search(r'generateTestData\s*:\s*(true|false)', config_section, re.IGNORECASE)
141
+ if gen_data_match:
142
+ generate_test_data = gen_data_match.group(1).lower() == 'true'
143
+
144
+ # Extract Test Data Configuration table
145
+ test_data_config = {}
146
+ config_section_match = re.search(r'##\s+Test Data Configuration\s*\n(.+?)(?=\n##|\Z)', content, re.DOTALL)
147
+ if config_section_match:
148
+ config_section = config_section_match.group(1)
149
+ # Parse markdown table (format: | Parameter | Value | Description |)
150
+ table_rows = re.findall(r'\|\s*\*\*([^*]+)\*\*\s*\|\s*`?([^|`]+)`?\s*\|', config_section)
151
+ for param, value in table_rows:
152
+ test_data_config[param.strip()] = value.strip()
153
+
154
+ # Extract Pre-requisites section
155
+ prerequisites = ""
156
+ prereq_match = re.search(r'##\s+Pre-requisites\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
157
+ if prereq_match:
158
+ prerequisites = prereq_match.group(1).strip()
159
+
160
+ # Find all variable placeholders ({{VARIABLE_NAME}})
161
+ variables = list(set(re.findall(r'\{\{([A-Z_]+)\}\}', content)))
162
+
163
+ # Extract test steps and expectations
164
+ steps = []
165
+ expectations = []
166
+
167
+ # Find all Step sections
168
+ step_pattern = r'###\s+Step\s+(\d+):\s+(.+?)\n\n(.+?)(?=\n\n###|\n\n##|\Z)'
169
+ for step_match in re.finditer(step_pattern, content, re.DOTALL):
170
+ step_num = step_match.group(1)
171
+ step_title = step_match.group(2).strip()
172
+ step_content = step_match.group(3).strip()
173
+
174
+ # Extract the actual instruction (first paragraph before "Expectation:")
175
+ instruction_match = re.search(r'(.+?)(?=\n\n\*\*Expectation:\*\*|\Z)', step_content, re.DOTALL)
176
+ instruction = instruction_match.group(1).strip() if instruction_match else step_content
177
+
178
+ # Extract expectation if present
179
+ expectation_match = re.search(r'\*\*Expectation:\*\*\s+(.+)', step_content, re.DOTALL)
180
+ expectation = expectation_match.group(1).strip() if expectation_match else None
181
+
182
+ steps.append({
183
+ 'number': int(step_num),
184
+ 'title': step_title,
185
+ 'instruction': instruction,
186
+ 'expectation': expectation
187
+ })
188
+
189
+ if expectation:
190
+ expectations.append({
191
+ 'step': int(step_num),
192
+ 'description': expectation
193
+ })
194
+
195
+ return {
196
+ 'name': name,
197
+ 'objective': objective,
198
+ 'config_path': config_path,
199
+ 'generate_test_data': generate_test_data,
200
+ 'test_data_config': test_data_config,
201
+ 'prerequisites': prerequisites,
202
+ 'variables': variables,
203
+ 'steps': steps,
204
+ 'expectations': expectations
205
+ }
206
+
207
+
208
+ def validate_test_output(output: str, expectation: str) -> tuple[bool, str]:
209
+ """
210
+ Validate test output against expectations.
211
+
212
+ Args:
213
+ output: The actual output from the agent
214
+ expectation: The expected result description
215
+
216
+ Returns:
217
+ Tuple of (passed: bool, details: str)
218
+ """
219
+ # Simple keyword-based validation
220
+ # Extract key phrases from expectation
221
+
222
+ # Common patterns in expectations
223
+ if "contains" in expectation.lower():
224
+ # Extract what should be contained
225
+ contains_match = re.search(r'contains.*?["`]([^"`]+)["`]', expectation, re.IGNORECASE)
226
+ if contains_match:
227
+ expected_text = contains_match.group(1)
228
+ if expected_text in output:
229
+ return True, f"Output contains expected text: '{expected_text}'"
230
+ else:
231
+ return False, f"Output does not contain expected text: '{expected_text}'"
232
+
233
+ if "without errors" in expectation.lower() or "runs without errors" in expectation.lower():
234
+ # Check for common error indicators
235
+ error_indicators = ['error', 'exception', 'failed', 'traceback']
236
+ has_error = any(indicator in output.lower() for indicator in error_indicators)
237
+ if not has_error:
238
+ return True, "Execution completed without errors"
239
+ else:
240
+ return False, "Execution encountered errors"
241
+
242
+ # Default: assume pass if output is non-empty
243
+ if output and len(output.strip()) > 0:
244
+ return True, "Output generated successfully"
245
+
246
+ return False, "No output generated"
247
+
248
+
249
+ def _build_bulk_data_gen_prompt(parsed_test_cases: list) -> str:
250
+ """Build consolidated requirements text for bulk test data generation."""
251
+ requirements = []
252
+ for idx, tc in enumerate(parsed_test_cases, 1):
253
+ test_case = tc['data']
254
+ test_file = tc['file']
255
+
256
+ parts = [f"Test Case #{idx}: {test_case['name']}", f"File: {test_file.name}", ""]
257
+
258
+ if test_case.get('test_data_config'):
259
+ parts.append("Test Data Configuration:")
260
+ for param, value in test_case['test_data_config'].items():
261
+ parts.append(f" - {param}: {value}")
262
+
263
+ if test_case.get('prerequisites'):
264
+ parts.append(f"\nPre-requisites:\n{test_case['prerequisites']}")
265
+
266
+ if test_case.get('variables'):
267
+ parts.append(f"\nVariables to generate: {', '.join(test_case['variables'])}")
268
+
269
+ requirements.append("\n".join(parts))
270
+
271
+ return f"""{'='*60}
272
+
273
+ {chr(10).join(requirements)}
274
+
275
+ {'='*60}"""
276
+
277
+
278
+ def _build_bulk_execution_prompt(parsed_test_cases: list) -> str:
279
+ """Build consolidated prompt for bulk test execution."""
280
+ parts = []
281
+
282
+ for idx, tc_info in enumerate(parsed_test_cases, 1):
283
+ test_case = tc_info['data']
284
+ test_file = tc_info['file']
285
+
286
+ parts.append(f"\n{'='*80}\nTEST CASE #{idx}: {test_case['name']}\nFile: {test_file.name}\n{'='*80}")
287
+
288
+ if test_case['steps']:
289
+ for step in test_case['steps']:
290
+ parts.append(f"\nStep {step['number']}: {step['title']}\n{step['instruction']}")
291
+ if step['expectation']:
292
+ parts.append(f"Expected Result: {step['expectation']}")
293
+ else:
294
+ parts.append("\n(No steps defined)")
295
+
296
+ return "\n".join(parts)
297
+
298
+
299
+ def _build_validation_prompt(parsed_test_cases: list, execution_output: str) -> str:
300
+ """Build prompt for bulk validation of test results."""
301
+ parts = ["You are a test validator. Review the test execution results and validate each test case.\n\nTest Cases to Validate:\n"]
302
+
303
+ for idx, tc_info in enumerate(parsed_test_cases, 1):
304
+ test_case = tc_info['data']
305
+ parts.append(f"\nTest Case #{idx}: {test_case['name']}")
306
+ if test_case['steps']:
307
+ for step in test_case['steps']:
308
+ parts.append(f" Step {step['number']}: {step['title']}")
309
+ if step['expectation']:
310
+ parts.append(f" Expected: {step['expectation']}")
311
+
312
+ parts.append(f"\n\nActual Execution Results:\n{execution_output}\n")
313
+ parts.append(f"""\nBased on the execution results above, validate each test case.
314
+
315
+ Respond with valid JSON in this EXACT format:
316
+ {{
317
+ "test_cases": [
318
+ {{
319
+ "test_number": 1,
320
+ "test_name": "<test case name>",
321
+ "steps": [
322
+ {{"step_number": 1, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}},
323
+ {{"step_number": 2, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}}
324
+ ]
325
+ }},
326
+ {{
327
+ "test_number": 2,
328
+ "test_name": "<test case name>",
329
+ "steps": [...]
330
+ }}
331
+ ]
332
+ }}
333
+
334
+ Validate all {len(parsed_test_cases)} test cases and their steps.""")
335
+
336
+ return "\n".join(parts)
337
+
338
+
339
+ def _extract_json_from_text(text: str) -> dict:
340
+ """Extract JSON object from text using brace counting."""
341
+ start_idx = text.find('{')
342
+ if start_idx == -1:
343
+ raise ValueError("No JSON found in text")
344
+
345
+ brace_count = 0
346
+ end_idx = -1
347
+ for i, char in enumerate(text[start_idx:], start=start_idx):
348
+ if char == '{':
349
+ brace_count += 1
350
+ elif char == '}':
351
+ brace_count -= 1
352
+ if brace_count == 0:
353
+ end_idx = i + 1
354
+ break
355
+
356
+ if end_idx == -1:
357
+ raise ValueError("Could not find matching closing brace")
358
+
359
+ return json.loads(text[start_idx:end_idx])
360
+
361
+
362
+ def _create_fallback_results(parsed_test_cases: list) -> tuple[list, int, int, int]:
363
+ """Create fallback results when execution/validation fails."""
364
+ test_results = []
365
+ for tc_info in parsed_test_cases:
366
+ test_results.append({
367
+ 'title': tc_info['data']['name'],
368
+ 'passed': False,
369
+ 'file': tc_info['file'].name,
370
+ 'step_results': []
371
+ })
372
+ return test_results, len(parsed_test_cases), 0, len(parsed_test_cases)
373
+
374
+
42
375
  def _get_alita_system_prompt(config) -> str:
43
376
  """
44
377
  Get the Alita system prompt from user config or fallback to default.
@@ -77,6 +410,210 @@ def _get_alita_system_prompt(config) -> str:
77
410
  return DEFAULT_PROMPT
78
411
 
79
412
 
413
+ def _get_inventory_system_prompt(config) -> str:
414
+ """
415
+ Get the Inventory agent system prompt from user config or fallback to default.
416
+
417
+ Checks for $ALITA_DIR/agents/inventory.agent.md first, then falls back
418
+ to the default prompt with inventory-specific instructions.
419
+
420
+ Returns:
421
+ The system prompt string for Inventory agent
422
+ """
423
+ from .agent.default import DEFAULT_PROMPT
424
+
425
+ # Check for user-customized prompt
426
+ custom_prompt_path = Path(config.agents_dir) / 'inventory.agent.md'
427
+
428
+ if custom_prompt_path.exists():
429
+ try:
430
+ content = custom_prompt_path.read_text(encoding='utf-8')
431
+ # Parse the agent.md file - extract system_prompt from frontmatter or use content
432
+ if content.startswith('---'):
433
+ try:
434
+ parts = content.split('---', 2)
435
+ if len(parts) >= 3:
436
+ frontmatter = yaml.safe_load(parts[1])
437
+ body = parts[2].strip()
438
+ return frontmatter.get('system_prompt', body) if frontmatter else body
439
+ except Exception:
440
+ pass
441
+ return content.strip()
442
+ except Exception as e:
443
+ logger.debug(f"Failed to load custom Inventory prompt from {custom_prompt_path}: {e}")
444
+
445
+ # Use default prompt + inventory toolkit instructions
446
+ inventory_context = """
447
+
448
+ ## Inventory Knowledge Graph
449
+
450
+ You have access to the Inventory toolkit for querying a knowledge graph of software entities and relationships.
451
+ Use these tools to help users understand their codebase:
452
+
453
+ - **search_entities**: Find entities by name, type, or path patterns
454
+ - **get_entity**: Get full details of a specific entity
455
+ - **get_relationships**: Find relationships from/to an entity
456
+ - **impact_analysis**: Analyze what depends on an entity (useful for change impact)
457
+ - **get_graph_stats**: Get statistics about the knowledge graph
458
+
459
+ When answering questions about the codebase, use these tools to provide accurate, citation-backed answers.
460
+ """
461
+ return DEFAULT_PROMPT + inventory_context
462
+
463
+
464
+ def _resolve_inventory_path(path: str, work_dir: Optional[str] = None) -> Optional[str]:
465
+ """
466
+ Resolve an inventory/knowledge graph file path.
467
+
468
+ Tries locations in order:
469
+ 1. Absolute path
470
+ 2. Relative to current working directory (or work_dir if provided)
471
+ 3. Relative to .alita/inventory/ in current directory
472
+ 4. Relative to .alita/inventory/ in work_dir (if different)
473
+
474
+ Args:
475
+ path: The path to resolve (can be relative or absolute)
476
+ work_dir: Optional workspace directory to check
477
+
478
+ Returns:
479
+ Absolute path to the file if found, None otherwise
480
+ """
481
+ # Expand user home directory
482
+ path = str(Path(path).expanduser())
483
+
484
+ # Try absolute path first
485
+ if Path(path).is_absolute() and Path(path).exists():
486
+ return str(Path(path).resolve())
487
+
488
+ # Try relative to current working directory
489
+ cwd = Path.cwd()
490
+ cwd_path = cwd / path
491
+ if cwd_path.exists():
492
+ return str(cwd_path.resolve())
493
+
494
+ # Try .alita/inventory/ in current directory
495
+ alita_inventory_path = cwd / '.alita' / 'inventory' / path
496
+ if alita_inventory_path.exists():
497
+ return str(alita_inventory_path.resolve())
498
+
499
+ # If work_dir is different from cwd, try there too
500
+ if work_dir:
501
+ work_path = Path(work_dir)
502
+ if work_path != cwd:
503
+ # Try relative to work_dir
504
+ work_rel_path = work_path / path
505
+ if work_rel_path.exists():
506
+ return str(work_rel_path.resolve())
507
+
508
+ # Try .alita/inventory/ in work_dir
509
+ work_alita_path = work_path / '.alita' / 'inventory' / path
510
+ if work_alita_path.exists():
511
+ return str(work_alita_path.resolve())
512
+
513
+ return None
514
+
515
+
516
+ def _build_inventory_config(path: str, work_dir: Optional[str] = None) -> Optional[Dict[str, Any]]:
517
+ """
518
+ Build an inventory toolkit configuration from a file path.
519
+
520
+ The toolkit name is derived from the filename (stem).
521
+ All available tools are included.
522
+
523
+ Args:
524
+ path: Path to the knowledge graph JSON file
525
+ work_dir: Optional workspace directory for path resolution
526
+
527
+ Returns:
528
+ Toolkit configuration dict if file found, None otherwise
529
+ """
530
+ # Resolve the path
531
+ resolved_path = _resolve_inventory_path(path, work_dir)
532
+ if not resolved_path:
533
+ return None
534
+
535
+ # Validate it's a JSON file
536
+ if not resolved_path.endswith('.json'):
537
+ return None
538
+
539
+ # Validate file exists and is readable
540
+ try:
541
+ with open(resolved_path, 'r') as f:
542
+ # Just check it's valid JSON
543
+ json.load(f)
544
+ except (IOError, json.JSONDecodeError):
545
+ return None
546
+
547
+ # Extract toolkit name from filename (e.g., 'alita' from 'alita.json')
548
+ toolkit_name = Path(resolved_path).stem
549
+
550
+ # Build configuration with all available tools
551
+ from .toolkit_loader import INVENTORY_TOOLS
552
+
553
+ return {
554
+ 'type': 'inventory',
555
+ 'toolkit_name': toolkit_name,
556
+ 'graph_path': resolved_path,
557
+ 'base_directory': work_dir,
558
+ 'selected_tools': INVENTORY_TOOLS,
559
+ }
560
+
561
+
562
+ def _get_inventory_json_files(work_dir: Optional[str] = None) -> List[str]:
563
+ """
564
+ Get list of .json files for inventory path completion.
565
+
566
+ Searches:
567
+ 1. Current working directory (*.json files)
568
+ 2. .alita/inventory/ directory (*.json files)
569
+ 3. work_dir and work_dir/.alita/inventory/ if different from cwd
570
+
571
+ Args:
572
+ work_dir: Optional workspace directory
573
+
574
+ Returns:
575
+ List of relative or display paths for completion
576
+ """
577
+ suggestions = []
578
+ seen = set()
579
+
580
+ cwd = Path.cwd()
581
+
582
+ # Current directory .json files
583
+ for f in cwd.glob('*.json'):
584
+ if f.name not in seen:
585
+ suggestions.append(f.name)
586
+ seen.add(f.name)
587
+
588
+ # .alita/inventory/ directory
589
+ alita_inv = cwd / '.alita' / 'inventory'
590
+ if alita_inv.exists():
591
+ for f in alita_inv.glob('*.json'):
592
+ display = f'.alita/inventory/{f.name}'
593
+ if display not in seen:
594
+ suggestions.append(display)
595
+ seen.add(display)
596
+
597
+ # work_dir if different
598
+ if work_dir:
599
+ work_path = Path(work_dir)
600
+ if work_path != cwd:
601
+ for f in work_path.glob('*.json'):
602
+ if f.name not in seen:
603
+ suggestions.append(f.name)
604
+ seen.add(f.name)
605
+
606
+ work_alita_inv = work_path / '.alita' / 'inventory'
607
+ if work_alita_inv.exists():
608
+ for f in work_alita_inv.glob('*.json'):
609
+ display = f'.alita/inventory/{f.name}'
610
+ if display not in seen:
611
+ suggestions.append(display)
612
+ seen.add(display)
613
+
614
+ return sorted(suggestions)
615
+
616
+
80
617
  def _load_mcp_tools(agent_def: Dict[str, Any], mcp_config_path: str) -> List[Dict[str, Any]]:
81
618
  """Load MCP tools from agent definition with tool-level filtering.
82
619
 
@@ -93,10 +630,14 @@ def _load_mcp_tools(agent_def: Dict[str, Any], mcp_config_path: str) -> List[Dic
93
630
 
94
631
  def _setup_local_agent_executor(client, agent_def: Dict[str, Any], toolkit_config: tuple,
95
632
  config, model: Optional[str], temperature: Optional[float],
96
- max_tokens: Optional[int], memory, work_dir: Optional[str],
633
+ max_tokens: Optional[int], memory, allowed_directories: Optional[List[str]],
97
634
  plan_state: Optional[Dict] = None):
98
635
  """Setup local agent executor with all configurations.
99
636
 
637
+ Args:
638
+ allowed_directories: List of allowed directories for filesystem access.
639
+ First directory is the primary/base directory.
640
+
100
641
  Returns:
101
642
  Tuple of (agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools)
102
643
  """
@@ -112,21 +653,28 @@ def _setup_local_agent_executor(client, agent_def: Dict[str, Any], toolkit_confi
112
653
  client, model, agent_def, temperature, max_tokens
113
654
  )
114
655
 
115
- # Add filesystem tools if --dir is provided
656
+ # Add filesystem tools if directories are provided
116
657
  filesystem_tools = None
117
658
  terminal_tools = None
118
- if work_dir:
659
+ if allowed_directories:
119
660
  from .tools import get_filesystem_tools, get_terminal_tools
120
661
  preset = agent_def.get('filesystem_tools_preset')
121
662
  include_tools = agent_def.get('filesystem_tools_include')
122
663
  exclude_tools = agent_def.get('filesystem_tools_exclude')
123
- filesystem_tools = get_filesystem_tools(work_dir, include_tools, exclude_tools, preset)
124
664
 
125
- # Also add terminal tools when work_dir is set
126
- terminal_tools = get_terminal_tools(work_dir)
665
+ # First directory is the primary base directory
666
+ base_dir = allowed_directories[0]
667
+ extra_dirs = allowed_directories[1:] if len(allowed_directories) > 1 else None
668
+ filesystem_tools = get_filesystem_tools(base_dir, include_tools, exclude_tools, preset, extra_dirs)
669
+
670
+ # Terminal tools use primary directory as cwd
671
+ terminal_tools = get_terminal_tools(base_dir)
127
672
 
128
673
  tool_count = len(filesystem_tools) + len(terminal_tools)
129
- access_msg = f"✓ Granted filesystem & terminal access to: {work_dir} ({tool_count} tools)"
674
+ if len(allowed_directories) == 1:
675
+ access_msg = f"✓ Granted filesystem & terminal access to: {base_dir} ({tool_count} tools)"
676
+ else:
677
+ access_msg = f"✓ Granted filesystem & terminal access to {len(allowed_directories)} directories ({tool_count} tools)"
130
678
  if preset:
131
679
  access_msg += f" [preset: {preset}]"
132
680
  if include_tools:
@@ -396,20 +944,86 @@ def _select_toolkit_interactive(config) -> Optional[Dict[str, Any]]:
396
944
  return None
397
945
 
398
946
 
947
+ def _list_available_toolkits(config) -> List[str]:
948
+ """
949
+ List names of all available toolkits in $ALITA_DIR/tools.
950
+
951
+ Returns:
952
+ List of toolkit names
953
+ """
954
+ tools_dir = Path(config.tools_dir)
955
+
956
+ if not tools_dir.exists():
957
+ return []
958
+
959
+ toolkit_names = []
960
+ for pattern in ['*.json', '*.yaml', '*.yml']:
961
+ for file_path in tools_dir.glob(pattern):
962
+ try:
963
+ config_data = load_toolkit_config(str(file_path))
964
+ name = config_data.get('toolkit_name') or config_data.get('name') or file_path.stem
965
+ toolkit_names.append(name)
966
+ except Exception:
967
+ pass
968
+
969
+ return toolkit_names
970
+
971
+
972
+ def _find_toolkit_by_name(config, toolkit_name: str) -> Optional[Dict[str, Any]]:
973
+ """
974
+ Find a toolkit by name in $ALITA_DIR/tools.
975
+
976
+ Args:
977
+ config: CLI configuration
978
+ toolkit_name: Name of the toolkit to find (case-insensitive)
979
+
980
+ Returns:
981
+ Toolkit config dict or None if not found
982
+ """
983
+ tools_dir = Path(config.tools_dir)
984
+
985
+ if not tools_dir.exists():
986
+ return None
987
+
988
+ toolkit_name_lower = toolkit_name.lower()
989
+
990
+ for pattern in ['*.json', '*.yaml', '*.yml']:
991
+ for file_path in tools_dir.glob(pattern):
992
+ try:
993
+ config_data = load_toolkit_config(str(file_path))
994
+ name = config_data.get('toolkit_name') or config_data.get('name') or file_path.stem
995
+
996
+ # Match by name (case-insensitive) or file stem
997
+ if name.lower() == toolkit_name_lower or file_path.stem.lower() == toolkit_name_lower:
998
+ return {
999
+ 'file': str(file_path),
1000
+ 'name': name,
1001
+ 'type': config_data.get('toolkit_type') or config_data.get('type', 'unknown'),
1002
+ 'config': config_data
1003
+ }
1004
+ except Exception:
1005
+ pass
1006
+
1007
+ return None
1008
+
1009
+
399
1010
  def _select_agent_interactive(client, config) -> Optional[str]:
400
1011
  """
401
1012
  Show interactive menu to select an agent from platform and local agents.
402
1013
 
403
1014
  Returns:
404
- Agent source (name/id for platform, file path for local, '__direct__' for direct chat) or None if cancelled
1015
+ Agent source (name/id for platform, file path for local, '__direct__' for direct chat,
1016
+ '__inventory__' for inventory agent) or None if cancelled
405
1017
  """
406
1018
  from .config import CLIConfig
407
1019
 
408
1020
  console.print("\n🤖 [bold cyan]Select an agent to chat with:[/bold cyan]\n")
409
1021
 
410
- # First option: Alita (direct LLM chat, no agent)
1022
+ # Built-in agents
411
1023
  console.print(f"1. [[bold]💬 Alita[/bold]] [cyan]Chat directly with LLM (no agent)[/cyan]")
412
1024
  console.print(f" [dim]Direct conversation with the model without agent configuration[/dim]")
1025
+ console.print(f"2. [[bold]📊 Inventory[/bold]] [cyan]Knowledge graph builder agent[/cyan]")
1026
+ console.print(f" [dim]Build inventories from connected toolkits (use --toolkit-config to add sources)[/dim]")
413
1027
 
414
1028
  agents_list = []
415
1029
 
@@ -444,8 +1058,8 @@ def _select_agent_interactive(client, config) -> Optional[str]:
444
1058
  except Exception as e:
445
1059
  logger.debug(f"Failed to load {file_path}: {e}")
446
1060
 
447
- # Display agents with numbers using rich (starting from 2 since 1 is direct chat)
448
- for i, agent in enumerate(agents_list, 2):
1061
+ # Display agents with numbers using rich (starting from 3 since 1-2 are built-in)
1062
+ for i, agent in enumerate(agents_list, 3):
449
1063
  agent_type = "📦 Platform" if agent['type'] == 'platform' else "📁 Local"
450
1064
  console.print(f"{i}. [[bold]{agent_type}[/bold]] [cyan]{agent['name']}[/cyan]")
451
1065
  if agent['description']:
@@ -465,13 +1079,17 @@ def _select_agent_interactive(client, config) -> Optional[str]:
465
1079
  console.print(f"✓ [green]Selected:[/green] [bold]Alita[/bold]")
466
1080
  return '__direct__'
467
1081
 
468
- idx = int(choice) - 2 # Offset by 2 since 1 is direct chat
1082
+ if choice == '2':
1083
+ console.print(f"✓ [green]Selected:[/green] [bold]Inventory[/bold]")
1084
+ return '__inventory__'
1085
+
1086
+ idx = int(choice) - 3 # Offset by 3 since 1-2 are built-in agents
469
1087
  if 0 <= idx < len(agents_list):
470
1088
  selected = agents_list[idx]
471
1089
  console.print(f"✓ [green]Selected:[/green] [bold]{selected['name']}[/bold]")
472
1090
  return selected['source']
473
1091
  else:
474
- console.print(f"[yellow]Invalid selection. Please enter a number between 0 and {len(agents_list) + 1}[/yellow]")
1092
+ console.print(f"[yellow]Invalid selection. Please enter a number between 0 and {len(agents_list) + 2}[/yellow]")
475
1093
  except ValueError:
476
1094
  console.print("[yellow]Please enter a valid number[/yellow]")
477
1095
  except (KeyboardInterrupt, EOFError):
@@ -730,6 +1348,8 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
730
1348
  @click.option('--version', help='Agent version (for platform agents)')
731
1349
  @click.option('--toolkit-config', multiple=True, type=click.Path(exists=True),
732
1350
  help='Toolkit configuration files (can specify multiple)')
1351
+ @click.option('--inventory', 'inventory_path', type=str,
1352
+ help='Load inventory/knowledge graph from JSON file (e.g., alita.json or .alita/inventory/alita.json)')
733
1353
  @click.option('--thread-id', help='Continue existing conversation thread')
734
1354
  @click.option('--model', help='Override LLM model')
735
1355
  @click.option('--temperature', type=float, help='Override temperature')
@@ -740,47 +1360,23 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
740
1360
  help='Output verbosity level: quiet (final output only), default (tool calls + outputs), debug (all including LLM calls)')
741
1361
  @click.pass_context
742
1362
  def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
743
- toolkit_config: tuple, thread_id: Optional[str],
1363
+ toolkit_config: tuple, inventory_path: Optional[str], thread_id: Optional[str],
744
1364
  model: Optional[str], temperature: Optional[float],
745
1365
  max_tokens: Optional[int], work_dir: Optional[str],
746
1366
  verbose: str):
747
- """
748
- Start interactive chat with an agent.
749
-
750
- If AGENT_SOURCE is not provided, shows an interactive menu to select from
751
- available agents (both platform and local).
752
-
753
- AGENT_SOURCE can be:
754
- - Platform agent ID or name
755
- - Path to local agent file
1367
+ """Start interactive chat with an agent.
756
1368
 
1369
+ \b
757
1370
  Examples:
758
-
759
- # Interactive selection
760
- alita-cli agent chat
761
-
762
- # Chat with platform agent
763
- alita-cli agent chat my-agent
764
-
765
- # Chat with local agent
766
- alita-cli agent chat .github/agents/sdk-dev.agent.md
767
-
768
- # With toolkit configurations
769
- alita-cli agent chat my-agent \\
770
- --toolkit-config jira-config.json \\
771
- --toolkit-config github-config.json
772
-
773
- # With filesystem access
774
- alita-cli agent chat my-agent --dir ./workspace
775
-
776
- # Continue previous conversation
777
- alita-cli agent chat my-agent --thread-id abc123
778
-
779
- # Quiet mode (hide tool calls and thinking)
780
- alita-cli agent chat my-agent --verbose quiet
781
-
782
- # Debug mode (show all including LLM calls)
783
- alita-cli agent chat my-agent --verbose debug
1371
+ alita chat # Interactive agent selection
1372
+ alita chat my-agent # Chat with platform agent
1373
+ alita chat ./agent.md # Chat with local agent file
1374
+ alita chat --inventory alita.json
1375
+ alita chat my-agent --dir ./src
1376
+ alita chat my-agent --thread-id abc123
1377
+ alita chat my-agent -v quiet # Hide tool calls
1378
+ alita chat my-agent -v debug # Show all LLM calls
1379
+ alita chat __inventory__ --toolkit-config jira.json
784
1380
  """
785
1381
  formatter = ctx.obj['formatter']
786
1382
  config = ctx.obj['config']
@@ -795,9 +1391,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
795
1391
  if not agent_source:
796
1392
  agent_source = '__direct__'
797
1393
 
798
- # Check for direct chat mode
1394
+ # Check for built-in agent modes
799
1395
  is_direct = agent_source == '__direct__'
800
- is_local = not is_direct and Path(agent_source).exists()
1396
+ is_inventory = agent_source == '__inventory__'
1397
+ is_builtin = is_direct or is_inventory
1398
+ is_local = not is_builtin and Path(agent_source).exists()
1399
+
1400
+ # Get defaults from config
1401
+ default_model = config.default_model or 'gpt-4o'
1402
+ default_temperature = config.default_temperature if config.default_temperature is not None else 0.1
1403
+ default_max_tokens = config.default_max_tokens or 4096
801
1404
 
802
1405
  # Initialize variables for dynamic updates
803
1406
  current_model = model
@@ -814,9 +1417,19 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
814
1417
  planning_tools = None
815
1418
  plan_state = None
816
1419
 
1420
+ # Handle --inventory option: add inventory toolkit config at startup
1421
+ if inventory_path:
1422
+ inventory_config = _build_inventory_config(inventory_path, work_dir)
1423
+ if inventory_config:
1424
+ added_toolkit_configs.append(inventory_config)
1425
+ console.print(f"[dim]✓ Loading inventory: {inventory_config['toolkit_name']} ({inventory_config['graph_path']})[/dim]")
1426
+ else:
1427
+ console.print(f"[yellow]Warning: Inventory file not found: {inventory_path}[/yellow]")
1428
+ console.print("[dim]Searched in current directory and .alita/inventory/[/dim]")
1429
+
817
1430
  # Approval mode: 'always' (confirm each tool), 'auto' (no confirmation), 'yolo' (no safety checks)
818
1431
  approval_mode = 'always'
819
- current_work_dir = work_dir # Track work_dir for /dir command
1432
+ allowed_directories = [work_dir] if work_dir else [] # Track allowed directories for /dir command
820
1433
  current_agent_file = agent_source if is_local else None # Track agent file for /reload command
821
1434
 
822
1435
  if is_direct:
@@ -825,11 +1438,27 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
825
1438
  agent_type = "Direct LLM"
826
1439
  alita_prompt = _get_alita_system_prompt(config)
827
1440
  agent_def = {
828
- 'model': model or 'gpt-5',
829
- 'temperature': temperature if temperature is not None else 0.1,
830
- 'max_tokens': max_tokens or 4096,
1441
+ 'model': model or default_model,
1442
+ 'temperature': temperature if temperature is not None else default_temperature,
1443
+ 'max_tokens': max_tokens or default_max_tokens,
831
1444
  'system_prompt': alita_prompt
832
1445
  }
1446
+ elif is_inventory:
1447
+ # Inventory agent mode - knowledge graph builder with inventory toolkit
1448
+ agent_name = "Inventory"
1449
+ agent_type = "Built-in Agent"
1450
+ inventory_prompt = _get_inventory_system_prompt(config)
1451
+ agent_def = {
1452
+ 'name': 'inventory-agent',
1453
+ 'model': model or default_model,
1454
+ 'temperature': temperature if temperature is not None else 0.3,
1455
+ 'max_tokens': max_tokens or default_max_tokens,
1456
+ 'system_prompt': inventory_prompt,
1457
+ # Include inventory toolkit by default
1458
+ 'toolkit_configs': [
1459
+ {'type': 'inventory', 'graph_path': './knowledge_graph.json'}
1460
+ ]
1461
+ }
833
1462
  elif is_local:
834
1463
  agent_def = load_agent_definition(agent_source)
835
1464
  agent_name = agent_def.get('name', Path(agent_source).stem)
@@ -852,8 +1481,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
852
1481
  agent_type = "Platform Agent"
853
1482
 
854
1483
  # Get model and temperature for welcome banner
855
- llm_model_display = current_model or agent_def.get('model', 'gpt-4o')
856
- llm_temperature_display = current_temperature if current_temperature is not None else agent_def.get('temperature', 0.1)
1484
+ llm_model_display = current_model or agent_def.get('model', default_model)
1485
+ llm_temperature_display = current_temperature if current_temperature is not None else agent_def.get('temperature', default_temperature)
857
1486
 
858
1487
  # Print nice welcome banner
859
1488
  print_welcome(agent_name, llm_model_display, llm_temperature_display, approval_mode)
@@ -862,27 +1491,64 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
862
1491
  chat_history = []
863
1492
 
864
1493
  # Initialize session for persistence (memory + plan)
865
- from .tools import generate_session_id, create_session_memory, save_session_metadata
1494
+ from .tools import generate_session_id, create_session_memory, save_session_metadata, to_portable_path
866
1495
  current_session_id = generate_session_id()
867
1496
  plan_state = {'session_id': current_session_id}
868
1497
 
869
1498
  # Create persistent memory for agent (stored in session directory)
870
1499
  memory = create_session_memory(current_session_id)
871
1500
 
872
- # Save session metadata
1501
+ # Save session metadata with agent source for session resume
1502
+ agent_source_portable = to_portable_path(current_agent_file) if current_agent_file else None
1503
+ # Filter out transient inventory configs (dicts) - only save file paths
1504
+ serializable_toolkit_configs = [tc for tc in added_toolkit_configs if isinstance(tc, str)]
1505
+ # Extract inventory graph path if present
1506
+ inventory_graph = None
1507
+ for tc in added_toolkit_configs:
1508
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
1509
+ inventory_graph = tc.get('graph_path')
1510
+ break
873
1511
  save_session_metadata(current_session_id, {
874
1512
  'agent_name': agent_name,
875
1513
  'agent_type': agent_type if 'agent_type' in dir() else 'Direct LLM',
1514
+ 'agent_source': agent_source_portable,
876
1515
  'model': llm_model_display,
877
1516
  'temperature': llm_temperature_display,
878
1517
  'work_dir': work_dir,
879
1518
  'is_direct': is_direct,
880
1519
  'is_local': is_local,
1520
+ 'is_inventory': is_inventory,
1521
+ 'added_toolkit_configs': serializable_toolkit_configs,
1522
+ 'inventory_graph': inventory_graph,
1523
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
881
1524
  })
882
1525
  console.print(f"[dim]Session: {current_session_id}[/dim]")
883
1526
 
1527
+ # Initialize context manager for chat history management
1528
+ context_config = config.context_management
1529
+ ctx_manager = CLIContextManager(
1530
+ session_id=current_session_id,
1531
+ max_context_tokens=context_config.get('max_context_tokens', 8000),
1532
+ preserve_recent=context_config.get('preserve_recent_messages', 5),
1533
+ pruning_method=context_config.get('pruning_method', 'oldest_first'),
1534
+ enable_summarization=context_config.get('enable_summarization', True),
1535
+ summary_trigger_ratio=context_config.get('summary_trigger_ratio', 0.8),
1536
+ summaries_limit=context_config.get('summaries_limit_count', 5),
1537
+ llm=None # Will be set after LLM creation
1538
+ )
1539
+
1540
+ # Purge old sessions on startup (cleanup task)
1541
+ try:
1542
+ purge_context_sessions(
1543
+ sessions_dir=config.sessions_dir,
1544
+ max_age_days=context_config.get('session_max_age_days', 30),
1545
+ max_sessions=context_config.get('max_sessions', 100)
1546
+ )
1547
+ except Exception as e:
1548
+ logger.debug(f"Session cleanup failed: {e}")
1549
+
884
1550
  # Create agent executor
885
- if is_direct or is_local:
1551
+ if is_direct or is_local or is_inventory:
886
1552
  # Setup local agent executor (handles all config, tools, MCP, etc.)
887
1553
  try:
888
1554
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
@@ -916,25 +1582,53 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
916
1582
  )
917
1583
  llm = None # Platform agents don't use direct LLM
918
1584
 
1585
+ # Set LLM on context manager for summarization
1586
+ if llm is not None:
1587
+ ctx_manager.llm = llm
1588
+
919
1589
  # Initialize input handler for readline support
920
1590
  input_handler = get_input_handler()
921
1591
 
1592
+ # Set up toolkit names callback for tab completion
1593
+ from .input_handler import set_toolkit_names_callback, set_inventory_files_callback
1594
+ set_toolkit_names_callback(lambda: _list_available_toolkits(config))
1595
+
1596
+ # Set up inventory files callback for /inventory tab completion
1597
+ set_inventory_files_callback(lambda: _get_inventory_json_files(allowed_directories[0] if allowed_directories else None))
1598
+
922
1599
  # Interactive chat loop
923
1600
  while True:
924
1601
  try:
1602
+ # Get context info for the UI indicator
1603
+ context_info = ctx_manager.get_context_info()
1604
+
925
1605
  # Get input with styled prompt (prompt is part of input() for proper readline handling)
926
- user_input = styled_input().strip()
1606
+ user_input = styled_input(context_info=context_info).strip()
927
1607
 
928
1608
  if not user_input:
929
1609
  continue
930
1610
 
931
1611
  # Handle commands
932
1612
  if user_input.lower() in ['exit', 'quit']:
1613
+ # Save final session state before exiting
1614
+ try:
1615
+ from .tools import update_session_metadata, to_portable_path
1616
+ update_session_metadata(current_session_id, {
1617
+ 'agent_source': to_portable_path(current_agent_file) if current_agent_file else None,
1618
+ 'model': current_model or llm_model_display,
1619
+ 'temperature': current_temperature if current_temperature is not None else llm_temperature_display,
1620
+ 'allowed_directories': allowed_directories,
1621
+ 'added_toolkit_configs': list(added_toolkit_configs),
1622
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
1623
+ })
1624
+ except Exception as e:
1625
+ logger.debug(f"Failed to save session state on exit: {e}")
933
1626
  console.print("\n[bold cyan]👋 Goodbye![/bold cyan]\n")
934
1627
  break
935
1628
 
936
1629
  if user_input == '/clear':
937
1630
  chat_history = []
1631
+ ctx_manager.clear()
938
1632
  console.print("[green]✓ Conversation history cleared.[/green]")
939
1633
  continue
940
1634
 
@@ -947,7 +1641,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
947
1641
  role = msg.get('role', 'unknown')
948
1642
  content = msg.get('content', '')
949
1643
  role_color = 'blue' if role == 'user' else 'green'
950
- console.print(f"\n[bold {role_color}]{i}. {role.upper()}:[/bold {role_color}] {content[:100]}...")
1644
+ included_marker = "" if ctx_manager.is_message_included(i - 1) else " [dim](pruned)[/dim]"
1645
+ console.print(f"\n[bold {role_color}]{i}. {role.upper()}:[/bold {role_color}] {content[:100]}...{included_marker}")
951
1646
  continue
952
1647
 
953
1648
  if user_input == '/save':
@@ -975,12 +1670,17 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
975
1670
  agent_def['model'] = current_model
976
1671
 
977
1672
  # Recreate LLM and agent executor - use session memory to preserve history
978
- from .tools import create_session_memory
1673
+ from .tools import create_session_memory, update_session_metadata
979
1674
  memory = create_session_memory(current_session_id)
980
1675
  try:
981
1676
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
982
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1677
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
983
1678
  )
1679
+ # Persist model change to session
1680
+ update_session_metadata(current_session_id, {
1681
+ 'model': current_model,
1682
+ 'temperature': current_temperature if current_temperature is not None else agent_def.get('temperature', 0.7)
1683
+ })
984
1684
  console.print(Panel(
985
1685
  f"[cyan]ℹ Model switched to [bold]{current_model}[/bold]. Agent state reset, chat history preserved.[/cyan]",
986
1686
  border_style="cyan",
@@ -993,8 +1693,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
993
1693
  # /reload command - reload agent definition from file
994
1694
  if user_input == '/reload':
995
1695
  if not is_local:
996
- if is_direct:
997
- console.print("[yellow]Cannot reload direct chat mode - no agent file to reload.[/yellow]")
1696
+ if is_direct or is_inventory:
1697
+ console.print("[yellow]Cannot reload built-in agent mode - no agent file to reload.[/yellow]")
998
1698
  else:
999
1699
  console.print("[yellow]Reload is only available for local agents (file-based).[/yellow]")
1000
1700
  continue
@@ -1036,7 +1736,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1036
1736
  from .tools import create_session_memory
1037
1737
  memory = create_session_memory(current_session_id)
1038
1738
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1039
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1739
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1040
1740
  )
1041
1741
 
1042
1742
  # Show what changed
@@ -1063,8 +1763,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1063
1763
 
1064
1764
  # /add_mcp command - add MCP server
1065
1765
  if user_input == '/add_mcp':
1066
- if not (is_direct or is_local):
1067
- console.print("[yellow]Adding MCP is only available for local agents and direct chat.[/yellow]")
1766
+ if not (is_direct or is_local or is_inventory):
1767
+ console.print("[yellow]Adding MCP is only available for local agents and built-in agents.[/yellow]")
1068
1768
  continue
1069
1769
 
1070
1770
  selected_mcp = _select_mcp_interactive(config)
@@ -1077,12 +1777,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1077
1777
  agent_def['mcps'].append(mcp_name)
1078
1778
 
1079
1779
  # Recreate agent executor with new MCP - use session memory to preserve history
1080
- from .tools import create_session_memory
1780
+ from .tools import create_session_memory, update_session_metadata
1081
1781
  memory = create_session_memory(current_session_id)
1082
1782
  try:
1083
1783
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1084
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1784
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1085
1785
  )
1786
+ # Persist added MCPs to session
1787
+ update_session_metadata(current_session_id, {
1788
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]
1789
+ })
1086
1790
  console.print(Panel(
1087
1791
  f"[cyan]ℹ Added MCP: [bold]{mcp_name}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1088
1792
  border_style="cyan",
@@ -1093,12 +1797,27 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1093
1797
  continue
1094
1798
 
1095
1799
  # /add_toolkit command - add toolkit
1096
- if user_input == '/add_toolkit':
1097
- if not (is_direct or is_local):
1098
- console.print("[yellow]Adding toolkit is only available for local agents and direct chat.[/yellow]")
1800
+ if user_input == '/add_toolkit' or user_input.startswith('/add_toolkit '):
1801
+ if not (is_direct or is_local or is_inventory):
1802
+ console.print("[yellow]Adding toolkit is only available for local agents and built-in agents.[/yellow]")
1099
1803
  continue
1100
1804
 
1101
- selected_toolkit = _select_toolkit_interactive(config)
1805
+ parts = user_input.split(maxsplit=1)
1806
+ if len(parts) == 2:
1807
+ # Direct toolkit selection by name
1808
+ toolkit_name_arg = parts[1].strip()
1809
+ selected_toolkit = _find_toolkit_by_name(config, toolkit_name_arg)
1810
+ if not selected_toolkit:
1811
+ console.print(f"[yellow]Toolkit '{toolkit_name_arg}' not found.[/yellow]")
1812
+ # Show available toolkits
1813
+ available = _list_available_toolkits(config)
1814
+ if available:
1815
+ console.print(f"[dim]Available toolkits: {', '.join(available)}[/dim]")
1816
+ continue
1817
+ else:
1818
+ # Interactive selection
1819
+ selected_toolkit = _select_toolkit_interactive(config)
1820
+
1102
1821
  if selected_toolkit:
1103
1822
  toolkit_name = selected_toolkit['name']
1104
1823
  toolkit_file = selected_toolkit['file']
@@ -1108,12 +1827,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1108
1827
  added_toolkit_configs.append(toolkit_file)
1109
1828
 
1110
1829
  # Recreate agent executor with new toolkit - use session memory to preserve history
1111
- from .tools import create_session_memory
1830
+ from .tools import create_session_memory, update_session_metadata
1112
1831
  memory = create_session_memory(current_session_id)
1113
1832
  try:
1114
1833
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1115
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1834
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1116
1835
  )
1836
+ # Persist added toolkits to session
1837
+ update_session_metadata(current_session_id, {
1838
+ 'added_toolkit_configs': list(added_toolkit_configs)
1839
+ })
1117
1840
  console.print(Panel(
1118
1841
  f"[cyan]ℹ Added toolkit: [bold]{toolkit_name}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1119
1842
  border_style="cyan",
@@ -1123,24 +1846,163 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1123
1846
  console.print(f"[red]Error adding toolkit: {e}[/red]")
1124
1847
  continue
1125
1848
 
1126
- # /mode command - set approval mode
1127
- if user_input == '/mode' or user_input.startswith('/mode '):
1849
+ # /rm_mcp command - remove MCP server
1850
+ if user_input == '/rm_mcp' or user_input.startswith('/rm_mcp '):
1851
+ if not (is_direct or is_local or is_inventory):
1852
+ console.print("[yellow]Removing MCP is only available for local agents and built-in agents.[/yellow]")
1853
+ continue
1854
+
1855
+ current_mcps = agent_def.get('mcps', [])
1856
+ if not current_mcps:
1857
+ console.print("[yellow]No MCP servers are currently loaded.[/yellow]")
1858
+ continue
1859
+
1860
+ # Get list of MCP names
1861
+ mcp_names = [m if isinstance(m, str) else m.get('name') for m in current_mcps]
1862
+
1128
1863
  parts = user_input.split(maxsplit=1)
1129
- if len(parts) == 1:
1130
- # Show current mode and options
1131
- mode_info = {
1132
- 'always': ('yellow', 'Confirm before each tool execution'),
1133
- 'auto': ('green', 'Execute tools without confirmation'),
1134
- 'yolo': ('red', 'No confirmations, skip safety warnings')
1135
- }
1136
- console.print("\n🔧 [bold cyan]Approval Mode:[/bold cyan]\n")
1137
- for mode_name, (color, desc) in mode_info.items():
1138
- marker = "●" if mode_name == approval_mode else "○"
1139
- console.print(f" [{color}]{marker}[/{color}] [bold]{mode_name}[/bold] - {desc}")
1140
- console.print(f"\n[dim]Usage: /mode <always|auto|yolo>[/dim]")
1864
+ if len(parts) == 2:
1865
+ # Direct removal by name
1866
+ mcp_name_to_remove = parts[1].strip()
1867
+ if mcp_name_to_remove not in mcp_names:
1868
+ console.print(f"[yellow]MCP '{mcp_name_to_remove}' not found.[/yellow]")
1869
+ console.print(f"[dim]Loaded MCPs: {', '.join(mcp_names)}[/dim]")
1870
+ continue
1141
1871
  else:
1142
- new_mode = parts[1].lower().strip()
1143
- if new_mode in ['always', 'auto', 'yolo']:
1872
+ # Interactive selection
1873
+ console.print("\n🔌 [bold cyan]Remove MCP Server[/bold cyan]\n")
1874
+ for i, name in enumerate(mcp_names, 1):
1875
+ console.print(f" [bold]{i}[/bold]. {name}")
1876
+ console.print(f" [bold]0[/bold]. [dim]Cancel[/dim]")
1877
+ console.print()
1878
+
1879
+ try:
1880
+ choice = int(input("Select MCP to remove: ").strip())
1881
+ if choice == 0:
1882
+ continue
1883
+ if 1 <= choice <= len(mcp_names):
1884
+ mcp_name_to_remove = mcp_names[choice - 1]
1885
+ else:
1886
+ console.print("[yellow]Invalid selection.[/yellow]")
1887
+ continue
1888
+ except (ValueError, KeyboardInterrupt):
1889
+ continue
1890
+
1891
+ # Remove the MCP
1892
+ agent_def['mcps'] = [m for m in current_mcps if (m if isinstance(m, str) else m.get('name')) != mcp_name_to_remove]
1893
+
1894
+ # Recreate agent executor without the MCP
1895
+ from .tools import create_session_memory, update_session_metadata
1896
+ memory = create_session_memory(current_session_id)
1897
+ try:
1898
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1899
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1900
+ )
1901
+ # Persist updated MCPs to session
1902
+ update_session_metadata(current_session_id, {
1903
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]
1904
+ })
1905
+ console.print(Panel(
1906
+ f"[cyan]ℹ Removed MCP: [bold]{mcp_name_to_remove}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1907
+ border_style="cyan",
1908
+ box=box.ROUNDED
1909
+ ))
1910
+ except Exception as e:
1911
+ console.print(f"[red]Error removing MCP: {e}[/red]")
1912
+ continue
1913
+
1914
+ # /rm_toolkit command - remove toolkit
1915
+ if user_input == '/rm_toolkit' or user_input.startswith('/rm_toolkit '):
1916
+ if not (is_direct or is_local or is_inventory):
1917
+ console.print("[yellow]Removing toolkit is only available for local agents and built-in agents.[/yellow]")
1918
+ continue
1919
+
1920
+ if not added_toolkit_configs:
1921
+ console.print("[yellow]No toolkits are currently loaded.[/yellow]")
1922
+ continue
1923
+
1924
+ # Get toolkit names from config files
1925
+ toolkit_info = [] # List of (name, file_path)
1926
+ for toolkit_file in added_toolkit_configs:
1927
+ try:
1928
+ with open(toolkit_file, 'r') as f:
1929
+ tk_config = json.load(f)
1930
+ tk_name = tk_config.get('toolkit_name', Path(toolkit_file).stem)
1931
+ toolkit_info.append((tk_name, toolkit_file))
1932
+ except Exception:
1933
+ toolkit_info.append((Path(toolkit_file).stem, toolkit_file))
1934
+
1935
+ parts = user_input.split(maxsplit=1)
1936
+ if len(parts) == 2:
1937
+ # Direct removal by name
1938
+ toolkit_name_to_remove = parts[1].strip()
1939
+ matching = [(name, path) for name, path in toolkit_info if name == toolkit_name_to_remove]
1940
+ if not matching:
1941
+ console.print(f"[yellow]Toolkit '{toolkit_name_to_remove}' not found.[/yellow]")
1942
+ console.print(f"[dim]Loaded toolkits: {', '.join(name for name, _ in toolkit_info)}[/dim]")
1943
+ continue
1944
+ toolkit_file_to_remove = matching[0][1]
1945
+ else:
1946
+ # Interactive selection
1947
+ console.print("\n🔧 [bold cyan]Remove Toolkit[/bold cyan]\n")
1948
+ for i, (name, _) in enumerate(toolkit_info, 1):
1949
+ console.print(f" [bold]{i}[/bold]. {name}")
1950
+ console.print(f" [bold]0[/bold]. [dim]Cancel[/dim]")
1951
+ console.print()
1952
+
1953
+ try:
1954
+ choice = int(input("Select toolkit to remove: ").strip())
1955
+ if choice == 0:
1956
+ continue
1957
+ if 1 <= choice <= len(toolkit_info):
1958
+ toolkit_name_to_remove, toolkit_file_to_remove = toolkit_info[choice - 1]
1959
+ else:
1960
+ console.print("[yellow]Invalid selection.[/yellow]")
1961
+ continue
1962
+ except (ValueError, KeyboardInterrupt):
1963
+ continue
1964
+
1965
+ # Remove the toolkit
1966
+ added_toolkit_configs.remove(toolkit_file_to_remove)
1967
+
1968
+ # Recreate agent executor without the toolkit
1969
+ from .tools import create_session_memory, update_session_metadata
1970
+ memory = create_session_memory(current_session_id)
1971
+ try:
1972
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1973
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1974
+ )
1975
+ # Persist updated toolkits to session
1976
+ update_session_metadata(current_session_id, {
1977
+ 'added_toolkit_configs': list(added_toolkit_configs)
1978
+ })
1979
+ console.print(Panel(
1980
+ f"[cyan]ℹ Removed toolkit: [bold]{toolkit_name_to_remove}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1981
+ border_style="cyan",
1982
+ box=box.ROUNDED
1983
+ ))
1984
+ except Exception as e:
1985
+ console.print(f"[red]Error removing toolkit: {e}[/red]")
1986
+ continue
1987
+
1988
+ # /mode command - set approval mode
1989
+ if user_input == '/mode' or user_input.startswith('/mode '):
1990
+ parts = user_input.split(maxsplit=1)
1991
+ if len(parts) == 1:
1992
+ # Show current mode and options
1993
+ mode_info = {
1994
+ 'always': ('yellow', 'Confirm before each tool execution'),
1995
+ 'auto': ('green', 'Execute tools without confirmation'),
1996
+ 'yolo': ('red', 'No confirmations, skip safety warnings')
1997
+ }
1998
+ console.print("\n🔧 [bold cyan]Approval Mode:[/bold cyan]\n")
1999
+ for mode_name, (color, desc) in mode_info.items():
2000
+ marker = "●" if mode_name == approval_mode else "○"
2001
+ console.print(f" [{color}]{marker}[/{color}] [bold]{mode_name}[/bold] - {desc}")
2002
+ console.print(f"\n[dim]Usage: /mode <always|auto|yolo>[/dim]")
2003
+ else:
2004
+ new_mode = parts[1].lower().strip()
2005
+ if new_mode in ['always', 'auto', 'yolo']:
1144
2006
  approval_mode = new_mode
1145
2007
  mode_colors = {'always': 'yellow', 'auto': 'green', 'yolo': 'red'}
1146
2008
  console.print(f"✓ [green]Mode set to[/green] [{mode_colors[new_mode]}][bold]{new_mode}[/bold][/{mode_colors[new_mode]}]")
@@ -1148,44 +2010,214 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1148
2010
  console.print(f"[yellow]Unknown mode: {new_mode}. Use: always, auto, or yolo[/yellow]")
1149
2011
  continue
1150
2012
 
1151
- # /dir command - mount workspace directory
2013
+ # /dir command - manage allowed directories
1152
2014
  if user_input == '/dir' or user_input.startswith('/dir '):
1153
- parts = user_input.split(maxsplit=1)
2015
+ parts = user_input.split()
2016
+
1154
2017
  if len(parts) == 1:
1155
- if current_work_dir:
1156
- console.print(f"📁 [bold cyan]Current workspace:[/bold cyan] {current_work_dir}")
2018
+ # /dir - list all allowed directories
2019
+ if allowed_directories:
2020
+ console.print("📁 [bold cyan]Allowed directories:[/bold cyan]")
2021
+ for i, d in enumerate(allowed_directories):
2022
+ marker = "●" if i == 0 else "○"
2023
+ label = " [dim](primary)[/dim]" if i == 0 else ""
2024
+ console.print(f" {marker} {d}{label}")
1157
2025
  else:
1158
- console.print("[yellow]No workspace mounted. Usage: /dir /path/to/workspace[/yellow]")
2026
+ console.print("[yellow]No directories allowed.[/yellow]")
2027
+ console.print("[dim]Usage: /dir [add|rm|remove] /path/to/directory[/dim]")
2028
+ continue
2029
+
2030
+ action = parts[1].lower()
2031
+
2032
+ # Handle /dir add /path or /dir /path (add is default)
2033
+ if action in ['add', 'rm', 'remove']:
2034
+ if len(parts) < 3:
2035
+ console.print(f"[yellow]Missing path. Usage: /dir {action} /path/to/directory[/yellow]")
2036
+ continue
2037
+ dir_path = parts[2]
1159
2038
  else:
1160
- new_dir = parts[1].strip()
1161
- new_dir_path = Path(new_dir).expanduser().resolve()
2039
+ # /dir /path - default to add
2040
+ action = 'add'
2041
+ dir_path = parts[1]
2042
+
2043
+ dir_path = str(Path(dir_path).expanduser().resolve())
2044
+
2045
+ if action == 'add':
2046
+ if not Path(dir_path).exists():
2047
+ console.print(f"[red]Directory not found: {dir_path}[/red]")
2048
+ continue
2049
+ if not Path(dir_path).is_dir():
2050
+ console.print(f"[red]Not a directory: {dir_path}[/red]")
2051
+ continue
1162
2052
 
1163
- if not new_dir_path.exists():
1164
- console.print(f"[red]Directory not found: {new_dir}[/red]")
2053
+ if dir_path in allowed_directories:
2054
+ console.print(f"[yellow]Directory already allowed: {dir_path}[/yellow]")
1165
2055
  continue
1166
- if not new_dir_path.is_dir():
1167
- console.print(f"[red]Not a directory: {new_dir}[/red]")
2056
+
2057
+ allowed_directories.append(dir_path)
2058
+
2059
+ # Recreate agent executor with updated directories
2060
+ if is_direct or is_local or is_inventory:
2061
+ from .tools import create_session_memory
2062
+ memory = create_session_memory(current_session_id)
2063
+ try:
2064
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2065
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2066
+ )
2067
+ console.print(Panel(
2068
+ f"[cyan]✓ Added directory: [bold]{dir_path}[/bold]\n Total allowed: {len(allowed_directories)}[/cyan]",
2069
+ border_style="cyan",
2070
+ box=box.ROUNDED
2071
+ ))
2072
+ except Exception as e:
2073
+ allowed_directories.remove(dir_path) # Rollback
2074
+ console.print(f"[red]Error adding directory: {e}[/red]")
2075
+ else:
2076
+ console.print("[yellow]Directory mounting is only available for local agents and built-in agents.[/yellow]")
2077
+
2078
+ elif action in ['rm', 'remove']:
2079
+ if dir_path not in allowed_directories:
2080
+ console.print(f"[yellow]Directory not in allowed list: {dir_path}[/yellow]")
2081
+ if allowed_directories:
2082
+ console.print("[dim]Currently allowed:[/dim]")
2083
+ for d in allowed_directories:
2084
+ console.print(f"[dim] - {d}[/dim]")
2085
+ continue
2086
+
2087
+ if len(allowed_directories) == 1:
2088
+ console.print("[yellow]Cannot remove the last directory. Use /dir add first to add another.[/yellow]")
1168
2089
  continue
1169
2090
 
1170
- current_work_dir = str(new_dir_path)
2091
+ allowed_directories.remove(dir_path)
1171
2092
 
1172
- # Recreate agent executor with new work_dir - use session memory
1173
- if is_direct or is_local:
2093
+ # Recreate agent executor with updated directories
2094
+ if is_direct or is_local or is_inventory:
1174
2095
  from .tools import create_session_memory
1175
2096
  memory = create_session_memory(current_session_id)
1176
2097
  try:
1177
2098
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1178
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2099
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1179
2100
  )
1180
2101
  console.print(Panel(
1181
- f"[cyan]✓ Mounted: [bold]{current_work_dir}[/bold]\n Terminal + filesystem tools enabled.[/cyan]",
2102
+ f"[cyan]✓ Removed directory: [bold]{dir_path}[/bold]\n Remaining: {len(allowed_directories)}[/cyan]",
1182
2103
  border_style="cyan",
1183
2104
  box=box.ROUNDED
1184
2105
  ))
1185
2106
  except Exception as e:
1186
- console.print(f"[red]Error mounting directory: {e}[/red]")
2107
+ allowed_directories.append(dir_path) # Rollback
2108
+ console.print(f"[red]Error removing directory: {e}[/red]")
2109
+ else:
2110
+ console.print("[yellow]Directory mounting is only available for local agents and built-in agents.[/yellow]")
2111
+ continue
2112
+
2113
+ # /inventory command - load inventory/knowledge graph from path
2114
+ if user_input == '/inventory' or user_input.startswith('/inventory '):
2115
+ if not (is_direct or is_local or is_inventory):
2116
+ console.print("[yellow]Loading inventory is only available for local agents and built-in agents.[/yellow]")
2117
+ continue
2118
+
2119
+ parts = user_input.split(maxsplit=1)
2120
+ if len(parts) == 1:
2121
+ # Show current inventory and available files
2122
+ current_inventory = None
2123
+ for tc in added_toolkit_configs:
2124
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
2125
+ current_inventory = tc.get('graph_path')
2126
+ break
2127
+ elif isinstance(tc, str):
2128
+ try:
2129
+ with open(tc, 'r') as f:
2130
+ cfg = json.load(f)
2131
+ if cfg.get('type') == 'inventory':
2132
+ current_inventory = cfg.get('graph_path')
2133
+ break
2134
+ except Exception:
2135
+ pass
2136
+
2137
+ if current_inventory:
2138
+ console.print(f"📊 [bold cyan]Current inventory:[/bold cyan] {current_inventory}")
1187
2139
  else:
1188
- console.print("[yellow]Directory mounting is only available for local agents and direct chat.[/yellow]")
2140
+ console.print("[yellow]No inventory loaded.[/yellow]")
2141
+
2142
+ # Show available .json files
2143
+ primary_dir = allowed_directories[0] if allowed_directories else None
2144
+ available = _get_inventory_json_files(primary_dir)
2145
+ if available:
2146
+ console.print(f"[dim]Available files: {', '.join(available[:10])}")
2147
+ if len(available) > 10:
2148
+ console.print(f"[dim] ... and {len(available) - 10} more[/dim]")
2149
+ console.print("[dim]Usage: /inventory <path/to/graph.json>[/dim]")
2150
+ else:
2151
+ inventory_path = parts[1].strip()
2152
+
2153
+ # Build inventory config from path
2154
+ primary_dir = allowed_directories[0] if allowed_directories else None
2155
+ inventory_config = _build_inventory_config(inventory_path, primary_dir)
2156
+ if not inventory_config:
2157
+ console.print(f"[red]Inventory file not found: {inventory_path}[/red]")
2158
+ # Show search locations
2159
+ console.print("[dim]Searched in:[/dim]")
2160
+ console.print(f"[dim] - {Path.cwd()}[/dim]")
2161
+ console.print(f"[dim] - {Path.cwd() / '.alita' / 'inventory'}[/dim]")
2162
+ if primary_dir:
2163
+ console.print(f"[dim] - {primary_dir}[/dim]")
2164
+ console.print(f"[dim] - {Path(primary_dir) / '.alita' / 'inventory'}[/dim]")
2165
+ continue
2166
+
2167
+ # Remove any existing inventory toolkit configs
2168
+ new_toolkit_configs = []
2169
+ removed_inventory = None
2170
+ for tc in added_toolkit_configs:
2171
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
2172
+ removed_inventory = tc.get('toolkit_name', 'inventory')
2173
+ continue # Skip existing inventory
2174
+ elif isinstance(tc, str):
2175
+ try:
2176
+ with open(tc, 'r') as f:
2177
+ cfg = json.load(f)
2178
+ if cfg.get('type') == 'inventory':
2179
+ removed_inventory = cfg.get('toolkit_name', Path(tc).stem)
2180
+ continue # Skip existing inventory
2181
+ except Exception:
2182
+ pass
2183
+ new_toolkit_configs.append(tc)
2184
+
2185
+ # Add new inventory config
2186
+ new_toolkit_configs.append(inventory_config)
2187
+ added_toolkit_configs = new_toolkit_configs
2188
+
2189
+ # Recreate agent executor with new inventory
2190
+ from .tools import create_session_memory, update_session_metadata
2191
+ memory = create_session_memory(current_session_id)
2192
+ try:
2193
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2194
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2195
+ )
2196
+ # Persist updated toolkits to session (exclude transient inventory configs)
2197
+ serializable_configs = [tc for tc in added_toolkit_configs if isinstance(tc, str)]
2198
+ update_session_metadata(current_session_id, {
2199
+ 'added_toolkit_configs': serializable_configs,
2200
+ 'inventory_graph': inventory_config.get('graph_path') # Save just the graph path
2201
+ })
2202
+
2203
+ toolkit_name = inventory_config['toolkit_name']
2204
+ graph_path = inventory_config['graph_path']
2205
+ if removed_inventory:
2206
+ console.print(Panel(
2207
+ f"[cyan]ℹ Replaced inventory [bold]{removed_inventory}[/bold] with [bold]{toolkit_name}[/bold]\n"
2208
+ f" Graph: {graph_path}[/cyan]",
2209
+ border_style="cyan",
2210
+ box=box.ROUNDED
2211
+ ))
2212
+ else:
2213
+ console.print(Panel(
2214
+ f"[cyan]✓ Loaded inventory: [bold]{toolkit_name}[/bold]\n"
2215
+ f" Graph: {graph_path}[/cyan]",
2216
+ border_style="cyan",
2217
+ box=box.ROUNDED
2218
+ ))
2219
+ except Exception as e:
2220
+ console.print(f"[red]Error loading inventory: {e}[/red]")
1189
2221
  continue
1190
2222
 
1191
2223
  # /session command - list or resume sessions
@@ -1230,7 +2262,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1230
2262
 
1231
2263
  elif parts[1] == 'resume' and len(parts) > 2:
1232
2264
  session_id = parts[2].strip()
1233
- from .tools import load_session_metadata, create_session_memory
2265
+ from .tools import load_session_metadata, create_session_memory, from_portable_path
1234
2266
 
1235
2267
  # Check if session exists (either plan or metadata)
1236
2268
  loaded_state = PlanState.load(session_id)
@@ -1251,6 +2283,67 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1251
2283
  plan_state['session_id'] = session_id
1252
2284
  resume_info = ""
1253
2285
 
2286
+ # Restore agent source and reload agent definition if available
2287
+ restored_agent = False
2288
+ if session_metadata:
2289
+ agent_source = session_metadata.get('agent_source')
2290
+ if agent_source:
2291
+ agent_file_path = from_portable_path(agent_source)
2292
+ if Path(agent_file_path).exists():
2293
+ try:
2294
+ agent_def = load_agent_definition(agent_file_path)
2295
+ current_agent_file = agent_file_path
2296
+ agent_name = agent_def.get('name', Path(agent_file_path).stem)
2297
+ is_local = True
2298
+ is_direct = False
2299
+ restored_agent = True
2300
+ except Exception as e:
2301
+ console.print(f"[yellow]Warning: Could not reload agent from {agent_source}: {e}[/yellow]")
2302
+
2303
+ # Restore added toolkit configs
2304
+ restored_toolkit_configs = session_metadata.get('added_toolkit_configs', [])
2305
+ if restored_toolkit_configs:
2306
+ added_toolkit_configs.clear()
2307
+ added_toolkit_configs.extend(restored_toolkit_configs)
2308
+
2309
+ # Restore added MCPs to agent_def
2310
+ restored_mcps = session_metadata.get('added_mcps', [])
2311
+ if restored_mcps and restored_agent:
2312
+ if 'mcps' not in agent_def:
2313
+ agent_def['mcps'] = []
2314
+ for mcp_name in restored_mcps:
2315
+ if mcp_name not in [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]:
2316
+ agent_def['mcps'].append(mcp_name)
2317
+
2318
+ # Restore model/temperature overrides
2319
+ if session_metadata.get('model'):
2320
+ current_model = session_metadata['model']
2321
+ if restored_agent:
2322
+ agent_def['model'] = current_model
2323
+ if session_metadata.get('temperature') is not None:
2324
+ current_temperature = session_metadata['temperature']
2325
+ if restored_agent:
2326
+ agent_def['temperature'] = current_temperature
2327
+
2328
+ # Restore allowed directories
2329
+ if session_metadata.get('allowed_directories'):
2330
+ allowed_directories = session_metadata['allowed_directories']
2331
+ elif session_metadata.get('work_dir'):
2332
+ # Backward compatibility with old sessions
2333
+ allowed_directories = [session_metadata['work_dir']]
2334
+
2335
+ # Reinitialize context manager with resumed session_id to load chat history
2336
+ ctx_manager = CLIContextManager(
2337
+ session_id=session_id,
2338
+ max_context_tokens=context_config.get('max_context_tokens', 8000),
2339
+ preserve_recent=context_config.get('preserve_recent_messages', 5),
2340
+ pruning_method=context_config.get('pruning_method', 'oldest_first'),
2341
+ enable_summarization=context_config.get('enable_summarization', True),
2342
+ summary_trigger_ratio=context_config.get('summary_trigger_ratio', 0.8),
2343
+ summaries_limit=context_config.get('summaries_limit_count', 5),
2344
+ llm=llm if 'llm' in dir() else None
2345
+ )
2346
+
1254
2347
  # Show session info
1255
2348
  agent_info = session_metadata.get('agent_name', 'unknown') if session_metadata else 'unknown'
1256
2349
  model_info = session_metadata.get('model', '') if session_metadata else ''
@@ -1263,14 +2356,48 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1263
2356
  box=box.ROUNDED
1264
2357
  ))
1265
2358
 
1266
- # Recreate planning tools with loaded state
1267
- if is_direct or is_local:
2359
+ # Display restored chat history
2360
+ chat_history_export = ctx_manager.export_chat_history(include_only=False)
2361
+ if chat_history_export:
2362
+ preserve_recent = context_config.get('preserve_recent_messages', 5)
2363
+ total_messages = len(chat_history_export)
2364
+
2365
+ if total_messages > preserve_recent:
2366
+ console.print(f"\n[dim]... {total_messages - preserve_recent} earlier messages in context[/dim]")
2367
+ messages_to_show = chat_history_export[-preserve_recent:]
2368
+ else:
2369
+ messages_to_show = chat_history_export
2370
+
2371
+ for msg in messages_to_show:
2372
+ role = msg.get('role', 'user')
2373
+ content = msg.get('content', '')[:200] # Truncate for display
2374
+ if len(msg.get('content', '')) > 200:
2375
+ content += '...'
2376
+ role_color = 'cyan' if role == 'user' else 'green'
2377
+ role_label = 'You' if role == 'user' else 'Assistant'
2378
+ console.print(f"[dim][{role_color}]{role_label}:[/{role_color}] {content}[/dim]")
2379
+ console.print()
2380
+
2381
+ # Recreate agent executor with restored tools if we have a local/built-in agent
2382
+ if (is_direct or is_local or is_inventory) and restored_agent:
2383
+ try:
2384
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2385
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2386
+ )
2387
+ ctx_manager.llm = llm # Update LLM for summarization
2388
+
2389
+ # Warn about MCP state loss
2390
+ if restored_mcps:
2391
+ console.print("[yellow]Note: MCP connections re-initialized (stateful server state like browser sessions are lost)[/yellow]")
2392
+ except Exception as e:
2393
+ console.print(f"[red]Error recreating agent executor: {e}[/red]")
2394
+ console.print("[yellow]Session state loaded but agent not fully restored. Some tools may not work.[/yellow]")
2395
+ elif is_direct or is_local or is_inventory:
2396
+ # Just update planning tools if we couldn't restore agent
1268
2397
  try:
1269
2398
  from .tools import get_planning_tools
1270
2399
  if loaded_state:
1271
2400
  planning_tools, _ = get_planning_tools(loaded_state)
1272
- # Note: We'd need to rebuild the agent to inject new tools
1273
- # For now, the plan state dict is updated so new tool calls will see it
1274
2401
  except Exception as e:
1275
2402
  console.print(f"[yellow]Warning: Could not reload planning tools: {e}[/yellow]")
1276
2403
  else:
@@ -1282,7 +2409,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1282
2409
  # /agent command - switch to a different agent
1283
2410
  if user_input == '/agent':
1284
2411
  selected_agent = _select_agent_interactive(client, config)
1285
- if selected_agent and selected_agent != '__direct__':
2412
+ if selected_agent and selected_agent != '__direct__' and selected_agent != '__inventory__':
1286
2413
  # Load the new agent
1287
2414
  new_is_local = Path(selected_agent).exists()
1288
2415
 
@@ -1292,6 +2419,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1292
2419
  agent_type = "Local Agent"
1293
2420
  is_local = True
1294
2421
  is_direct = False
2422
+ is_inventory = False
1295
2423
  current_agent_file = selected_agent # Track for /reload
1296
2424
  else:
1297
2425
  # Platform agent
@@ -1333,7 +2461,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1333
2461
  added_toolkit_configs = []
1334
2462
  try:
1335
2463
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1336
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2464
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1337
2465
  )
1338
2466
  console.print(Panel(
1339
2467
  f"[cyan]ℹ Switched to agent: [bold]{agent_name}[/bold] ({agent_type}). Agent state reset, chat history preserved.[/cyan]",
@@ -1346,21 +2474,22 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1346
2474
  # Switch back to direct mode
1347
2475
  is_direct = True
1348
2476
  is_local = False
2477
+ is_inventory = False
1349
2478
  current_agent_file = None # No file for direct mode
1350
2479
  agent_name = "Alita"
1351
2480
  agent_type = "Direct LLM"
1352
2481
  alita_prompt = _get_alita_system_prompt(config)
1353
2482
  agent_def = {
1354
- 'model': current_model or 'gpt-4o',
1355
- 'temperature': current_temperature if current_temperature is not None else 0.1,
1356
- 'max_tokens': current_max_tokens or 4096,
2483
+ 'model': current_model or default_model,
2484
+ 'temperature': current_temperature if current_temperature is not None else default_temperature,
2485
+ 'max_tokens': current_max_tokens or default_max_tokens,
1357
2486
  'system_prompt': alita_prompt
1358
2487
  }
1359
2488
  from .tools import create_session_memory
1360
2489
  memory = create_session_memory(current_session_id)
1361
2490
  try:
1362
2491
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1363
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2492
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1364
2493
  )
1365
2494
  console.print(Panel(
1366
2495
  f"[cyan]ℹ Switched to [bold]Alita[/bold]. Agent state reset, chat history preserved.[/cyan]",
@@ -1369,18 +2498,55 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1369
2498
  ))
1370
2499
  except Exception as e:
1371
2500
  console.print(f"[red]Error switching to direct mode: {e}[/red]")
2501
+ elif selected_agent == '__inventory__':
2502
+ # Switch to inventory mode
2503
+ is_direct = False
2504
+ is_local = False
2505
+ is_inventory = True
2506
+ current_agent_file = None # No file for inventory mode
2507
+ agent_name = "Inventory"
2508
+ agent_type = "Built-in Agent"
2509
+ inventory_prompt = _get_inventory_system_prompt(config)
2510
+ agent_def = {
2511
+ 'name': 'inventory-agent',
2512
+ 'model': current_model or default_model,
2513
+ 'temperature': current_temperature if current_temperature is not None else 0.3,
2514
+ 'max_tokens': current_max_tokens or default_max_tokens,
2515
+ 'system_prompt': inventory_prompt,
2516
+ 'toolkit_configs': [
2517
+ {'type': 'inventory', 'graph_path': './knowledge_graph.json'}
2518
+ ]
2519
+ }
2520
+ from .tools import create_session_memory
2521
+ memory = create_session_memory(current_session_id)
2522
+ try:
2523
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2524
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2525
+ )
2526
+ console.print(Panel(
2527
+ f"[cyan]ℹ Switched to [bold]Inventory[/bold] agent. Use /add_toolkit to add source toolkits.[/cyan]",
2528
+ border_style="cyan",
2529
+ box=box.ROUNDED
2530
+ ))
2531
+ except Exception as e:
2532
+ console.print(f"[red]Error switching to inventory mode: {e}[/red]")
1372
2533
  continue
1373
2534
 
1374
2535
  # Execute agent
1375
- if (is_direct or is_local) and agent_executor is None:
2536
+ # Track if history was already added during continuation handling
2537
+ history_already_added = False
2538
+ original_user_input = user_input # Preserve for history tracking
2539
+
2540
+ if (is_direct or is_local or is_inventory) and agent_executor is None:
1376
2541
  # Local agent without tools: use direct LLM call with streaming
1377
2542
  system_prompt = agent_def.get('system_prompt', '')
1378
2543
  messages = []
1379
2544
  if system_prompt:
1380
2545
  messages.append({"role": "system", "content": system_prompt})
1381
2546
 
1382
- # Add chat history
1383
- for msg in chat_history:
2547
+ # Build pruned context from context manager
2548
+ context_messages = ctx_manager.build_context()
2549
+ for msg in context_messages:
1384
2550
  messages.append(msg)
1385
2551
 
1386
2552
  # Add user message
@@ -1444,38 +2610,134 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1444
2610
  from langchain_core.runnables import RunnableConfig
1445
2611
  from langgraph.errors import GraphRecursionError
1446
2612
 
1447
- invoke_config = None
2613
+ # Initialize invoke_config with thread_id for checkpointing
2614
+ # This ensures the same thread is used across continuations
2615
+ invoke_config = RunnableConfig(
2616
+ configurable={"thread_id": current_session_id}
2617
+ )
2618
+ cli_callback = None
1448
2619
  if show_verbose:
1449
2620
  cli_callback = create_cli_callback(verbose=True, debug=debug_mode)
1450
- invoke_config = RunnableConfig(callbacks=[cli_callback])
2621
+ invoke_config["callbacks"] = [cli_callback]
1451
2622
 
1452
2623
  # Track recursion continuation state
1453
2624
  continue_from_recursion = False
1454
2625
  recursion_attempts = 0
2626
+ tool_limit_attempts = 0 # Track tool limit continuation attempts
1455
2627
  max_recursion_continues = 5 # Prevent infinite continuation loops
2628
+ output = None # Initialize output before loop
2629
+ result = None # Initialize result before loop
1456
2630
 
1457
2631
  while True:
1458
2632
  try:
1459
- # Show status only when not verbose (verbose shows its own progress)
1460
- if not show_verbose:
1461
- with console.status("[yellow]Thinking...[/yellow]", spinner="dots"):
1462
- result = agent_executor.invoke(
1463
- {
1464
- "input": [user_input] if not is_local else user_input,
1465
- "chat_history": chat_history
1466
- },
1467
- config=invoke_config
1468
- )
1469
- else:
1470
- if not continue_from_recursion:
1471
- console.print() # Add spacing before tool calls
2633
+ # Always start with a thinking spinner
2634
+ status = console.status("[yellow]Thinking...[/yellow]", spinner="dots")
2635
+ status.start()
2636
+
2637
+ # Pass status to callback so it can stop it when tool calls start
2638
+ if cli_callback:
2639
+ cli_callback.status = status
2640
+
2641
+ try:
1472
2642
  result = agent_executor.invoke(
1473
2643
  {
1474
2644
  "input": [user_input] if not is_local else user_input,
1475
- "chat_history": chat_history
2645
+ "chat_history": ctx_manager.build_context()
1476
2646
  },
1477
2647
  config=invoke_config
1478
2648
  )
2649
+ finally:
2650
+ # Make sure spinner is stopped
2651
+ try:
2652
+ status.stop()
2653
+ except Exception:
2654
+ pass
2655
+
2656
+ # Extract output from result
2657
+ if result is not None:
2658
+ output = extract_output_from_result(result)
2659
+
2660
+ # Check if max tool iterations were reached and prompt user
2661
+ if output and "Maximum tool execution iterations" in output and "reached" in output:
2662
+ tool_limit_attempts += 1
2663
+
2664
+ console.print()
2665
+ console.print(Panel(
2666
+ f"[yellow]⚠ Tool execution limit reached[/yellow]\n\n"
2667
+ f"The agent has executed the maximum number of tool calls in a single turn.\n"
2668
+ f"This usually happens with complex tasks that require many sequential operations.\n\n"
2669
+ f"[dim]Attempt {tool_limit_attempts}/{max_recursion_continues}[/dim]",
2670
+ title="Tool Limit Reached",
2671
+ border_style="yellow",
2672
+ box=box.ROUNDED
2673
+ ))
2674
+
2675
+ if tool_limit_attempts >= max_recursion_continues:
2676
+ console.print("[red]Maximum continuation attempts reached. Please break down your request into smaller tasks.[/red]")
2677
+ break
2678
+
2679
+ console.print("\nWhat would you like to do?")
2680
+ console.print(" [bold cyan]c[/bold cyan] - Continue execution (tell agent to resume)")
2681
+ console.print(" [bold cyan]s[/bold cyan] - Stop and keep partial results")
2682
+ console.print(" [bold cyan]n[/bold cyan] - Start a new request")
2683
+ console.print()
2684
+
2685
+ try:
2686
+ choice = input_handler.get_input("Choice [c/s/n]: ").strip().lower()
2687
+ except (KeyboardInterrupt, EOFError):
2688
+ choice = 's'
2689
+
2690
+ if choice == 'c':
2691
+ # Continue - send a follow-up message to resume
2692
+ console.print("\n[cyan]Continuing execution...[/cyan]\n")
2693
+
2694
+ # Clean up the output - remove the tool limit warning message
2695
+ clean_output = output
2696
+ if "Maximum tool execution iterations" in output:
2697
+ # Strip the warning from the end of the output
2698
+ lines = output.split('\n')
2699
+ clean_lines = [l for l in lines if "Maximum tool execution iterations" not in l and "Stopping tool execution" not in l]
2700
+ clean_output = '\n'.join(clean_lines).strip()
2701
+
2702
+ # Add current output to history first (without the warning)
2703
+ # Use original user input for first continuation, current for subsequent
2704
+ history_input = original_user_input if not history_already_added else user_input
2705
+ if clean_output:
2706
+ chat_history.append({"role": "user", "content": history_input})
2707
+ chat_history.append({"role": "assistant", "content": clean_output})
2708
+ ctx_manager.add_message("user", history_input)
2709
+ ctx_manager.add_message("assistant", clean_output)
2710
+ history_already_added = True
2711
+
2712
+ # CRITICAL: Use a new thread_id when continuing to avoid corrupted
2713
+ # checkpoint state. The tool limit may have left the checkpoint with
2714
+ # an AIMessage containing tool_calls without corresponding ToolMessages.
2715
+ # Using a new thread_id starts fresh with our clean context manager state.
2716
+ import uuid
2717
+ continuation_thread_id = f"{current_session_id}-cont-{uuid.uuid4().hex[:8]}"
2718
+ invoke_config = RunnableConfig(
2719
+ configurable={"thread_id": continuation_thread_id}
2720
+ )
2721
+ if cli_callback:
2722
+ invoke_config["callbacks"] = [cli_callback]
2723
+
2724
+ # Set new input to continue with a more explicit continuation message
2725
+ # Include context about the task limit to help the agent understand
2726
+ user_input = (
2727
+ "The previous response was interrupted due to reaching the tool execution limit. "
2728
+ "Continue from where you left off and complete the remaining steps of the original task. "
2729
+ "Focus on what still needs to be done - do not repeat completed work."
2730
+ )
2731
+ continue # Retry the invoke in this inner loop
2732
+
2733
+ elif choice == 's':
2734
+ console.print("\n[yellow]Stopped. Partial work has been completed.[/yellow]")
2735
+ break # Exit retry loop and show output
2736
+
2737
+ else: # 'n' or anything else
2738
+ console.print("\n[dim]Skipped. Enter a new request.[/dim]")
2739
+ output = None
2740
+ break # Exit retry loop
1479
2741
 
1480
2742
  # Success - exit the retry loop
1481
2743
  break
@@ -1513,13 +2775,35 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1513
2775
  choice = 's'
1514
2776
 
1515
2777
  if choice == 'c':
1516
- # Continue - the checkpoint should preserve state
1517
- # We'll re-invoke with a continuation message
2778
+ # Continue - Use a new thread_id to avoid corrupted checkpoint state.
2779
+ # GraphRecursionError may have left the checkpoint with an AIMessage
2780
+ # containing tool_calls without corresponding ToolMessages.
2781
+ # Using a new thread_id starts fresh with our clean context manager state.
1518
2782
  continue_from_recursion = True
1519
- console.print("\n[cyan]Continuing from last checkpoint...[/cyan]\n")
2783
+ console.print("\n[cyan]Continuing with fresh context...[/cyan]\n")
2784
+
2785
+ # Add current progress to history if we have it
2786
+ # (GraphRecursionError doesn't give us partial output, but context may have been updated)
2787
+ history_input = original_user_input if not history_already_added else user_input
2788
+ ctx_manager.add_message("user", history_input)
2789
+ ctx_manager.add_message("assistant", "[Previous task interrupted - continuing...]")
2790
+ history_already_added = True
1520
2791
 
1521
- # Modify the input to signal continuation
1522
- user_input = "Continue from where you left off. Complete the remaining steps of the task."
2792
+ # Create new thread_id to avoid corrupted checkpoint
2793
+ import uuid
2794
+ continuation_thread_id = f"{current_session_id}-cont-{uuid.uuid4().hex[:8]}"
2795
+ invoke_config = RunnableConfig(
2796
+ configurable={"thread_id": continuation_thread_id}
2797
+ )
2798
+ if cli_callback:
2799
+ invoke_config["callbacks"] = [cli_callback]
2800
+
2801
+ # More explicit continuation message
2802
+ user_input = (
2803
+ "The previous response was interrupted due to reaching the step limit. "
2804
+ "Continue from where you left off and complete the remaining steps of the original task. "
2805
+ "Focus on what still needs to be done - do not repeat completed work."
2806
+ )
1523
2807
  continue # Retry the invoke
1524
2808
 
1525
2809
  elif choice == 's':
@@ -1532,30 +2816,53 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1532
2816
  console.print("\n[dim]Skipped. Enter a new request.[/dim]")
1533
2817
  output = None
1534
2818
  break
1535
-
1536
- # Skip chat history update if we bailed out
2819
+
2820
+ # Skip chat history update if we bailed out (no result)
1537
2821
  if output is None:
1538
2822
  continue
1539
-
1540
- # Extract output from result (if we have a result)
1541
- if 'result' in dir() and result is not None:
1542
- output = extract_output_from_result(result)
1543
2823
 
1544
- # Display response
1545
- console.print(f"\n[bold bright_cyan]{agent_name}:[/bold bright_cyan]")
2824
+ # Display response in a clear format
2825
+ console.print() # Add spacing
2826
+ console.print(f"[bold bright_cyan]{agent_name}:[/bold bright_cyan]")
2827
+ console.print() # Add spacing before response
1546
2828
  if any(marker in output for marker in ['```', '**', '##', '- ', '* ']):
1547
2829
  console.print(Markdown(output))
1548
2830
  else:
1549
2831
  console.print(output)
2832
+ console.print() # Add spacing after response
1550
2833
 
1551
- # Update chat history
1552
- chat_history.append({"role": "user", "content": user_input})
1553
- chat_history.append({"role": "assistant", "content": output})
2834
+ # Update chat history and context manager (skip if already added during continuation)
2835
+ if not history_already_added:
2836
+ chat_history.append({"role": "user", "content": original_user_input})
2837
+ chat_history.append({"role": "assistant", "content": output})
2838
+
2839
+ # Add messages to context manager for token tracking and pruning
2840
+ ctx_manager.add_message("user", original_user_input)
2841
+ ctx_manager.add_message("assistant", output)
2842
+ else:
2843
+ # During continuation, add the final response with continuation message
2844
+ chat_history.append({"role": "user", "content": user_input})
2845
+ chat_history.append({"role": "assistant", "content": output})
2846
+ ctx_manager.add_message("user", user_input)
2847
+ ctx_manager.add_message("assistant", output)
1554
2848
 
1555
2849
  except KeyboardInterrupt:
1556
2850
  console.print("\n\n[yellow]Interrupted. Type 'exit' to quit or continue chatting.[/yellow]")
1557
2851
  continue
1558
2852
  except EOFError:
2853
+ # Save final session state before exiting
2854
+ try:
2855
+ from .tools import update_session_metadata, to_portable_path
2856
+ update_session_metadata(current_session_id, {
2857
+ 'agent_source': to_portable_path(current_agent_file) if current_agent_file else None,
2858
+ 'model': current_model or llm_model_display,
2859
+ 'temperature': current_temperature if current_temperature is not None else llm_temperature_display,
2860
+ 'allowed_directories': allowed_directories,
2861
+ 'added_toolkit_configs': list(added_toolkit_configs),
2862
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
2863
+ })
2864
+ except Exception as e:
2865
+ logger.debug(f"Failed to save session state on exit: {e}")
1559
2866
  console.print("\n\n[bold cyan]Goodbye! 👋[/bold cyan]")
1560
2867
  break
1561
2868
 
@@ -1593,40 +2900,24 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
1593
2900
  temperature: Optional[float], max_tokens: Optional[int],
1594
2901
  save_thread: Optional[str], work_dir: Optional[str],
1595
2902
  verbose: str):
1596
- """
1597
- Run agent with a single message (handoff mode).
2903
+ """Run agent with a single message (handoff mode).
1598
2904
 
2905
+ \b
1599
2906
  AGENT_SOURCE can be:
1600
- - Platform agent ID or name
1601
- - Path to local agent file
2907
+ - Platform agent ID or name
2908
+ - Path to local agent file
1602
2909
 
1603
2910
  MESSAGE is the input message to send to the agent.
1604
2911
 
2912
+ \b
1605
2913
  Examples:
1606
-
1607
- # Simple query
1608
- alita-cli agent run my-agent "What is the status of JIRA-123?"
1609
-
1610
- # With local agent
1611
- alita-cli agent run .github/agents/sdk-dev.agent.md \\
1612
- "Create a new toolkit for Stripe API"
1613
-
1614
- # With toolkit configs and JSON output
1615
- alita-cli --output json agent run my-agent "Search for bugs" \\
1616
- --toolkit-config jira-config.json
1617
-
1618
- # With filesystem access
1619
- alita-cli agent run my-agent "Analyze the code in src/" --dir ./myproject
1620
-
1621
- # Save thread for continuation
1622
- alita-cli agent run my-agent "Start task" \\
1623
- --save-thread thread.txt
1624
-
1625
- # Quiet mode (hide tool calls and thinking)
1626
- alita-cli agent run my-agent "Query" --verbose quiet
1627
-
1628
- # Debug mode (show all including LLM calls)
1629
- alita-cli agent run my-agent "Query" --verbose debug
2914
+ alita run my-agent "What is the status of JIRA-123?"
2915
+ alita run ./agent.md "Create a new toolkit for Stripe API"
2916
+ alita -o json run my-agent "Search for bugs" --toolkit-config jira.json
2917
+ alita run my-agent "Analyze code" --dir ./myproject
2918
+ alita run my-agent "Start task" --save-thread thread.txt
2919
+ alita run my-agent "Query" -v quiet
2920
+ alita run my-agent "Query" -v debug
1630
2921
  """
1631
2922
  formatter = ctx.obj['formatter']
1632
2923
  client = get_client(ctx)
@@ -1909,3 +3200,393 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
1909
3200
  )
1910
3201
  console.print(error_panel, style="red")
1911
3202
  raise click.Abort()
3203
+
3204
+
3205
+ @agent.command('execute-test-cases')
3206
+ @click.argument('agent_source')
3207
+ @click.option('--test-cases-dir', required=True, type=click.Path(exists=True, file_okay=False, dir_okay=True),
3208
+ help='Directory containing test case files')
3209
+ @click.option('--results-dir', required=True, type=click.Path(file_okay=False, dir_okay=True),
3210
+ help='Directory where test results will be saved')
3211
+ @click.option('--test-case', 'test_case_files', multiple=True,
3212
+ help='Specific test case file(s) to execute (e.g., TC-001.md). Can specify multiple times. If not specified, executes all test cases.')
3213
+ @click.option('--model', help='Override LLM model')
3214
+ @click.option('--temperature', type=float, help='Override temperature')
3215
+ @click.option('--max-tokens', type=int, help='Override max tokens')
3216
+ @click.option('--dir', 'work_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True),
3217
+ help='Grant agent filesystem access to this directory')
3218
+ @click.option('--data-generator', type=click.Path(exists=True),
3219
+ help='Path to test data generator agent definition file')
3220
+ @click.option('--skip-data-generation', is_flag=True,
3221
+ help='Skip test data generation step')
3222
+ @click.pass_context
3223
+ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir: str,
3224
+ test_case_files: tuple, model: Optional[str], temperature: Optional[float],
3225
+ max_tokens: Optional[int], work_dir: Optional[str],
3226
+ data_generator: Optional[str], skip_data_generation: bool):
3227
+ """
3228
+ Execute test cases from a directory and save results.
3229
+
3230
+ This command:
3231
+ 1. (Optional) Executes test data generator agent to provision test data
3232
+ 2. Scans TEST_CASES_DIR for test case markdown files (TC-*.md)
3233
+ 3. For each test case:
3234
+ - Parses the test case to extract config, steps, and expectations
3235
+ - Loads the agent with the toolkit config specified in the test case
3236
+ - Executes each test step
3237
+ - Validates output against expectations
3238
+ - Generates a test result file
3239
+ 4. Saves all results to RESULTS_DIR
3240
+
3241
+ AGENT_SOURCE: Path to agent definition file (e.g., .github/agents/test-runner.agent.md)
3242
+
3243
+ \b
3244
+ Examples:
3245
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results
3246
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3247
+ --data-generator ./data-gen.json
3248
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3249
+ --test-case TC-001.md --test-case TC-002.md
3250
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3251
+ --skip-data-generation --model gpt-4o
3252
+ """
3253
+ config = ctx.obj['config']
3254
+ client = get_client(ctx)
3255
+
3256
+ try:
3257
+ # Load agent definition
3258
+ if not Path(agent_source).exists():
3259
+ raise click.ClickException(f"Agent definition not found: {agent_source}")
3260
+
3261
+ agent_def = load_agent_definition(agent_source)
3262
+ agent_name = agent_def.get('name', Path(agent_source).stem)
3263
+
3264
+ # Find all test case files (recursively search subdirectories)
3265
+ test_cases_path = Path(test_cases_dir)
3266
+
3267
+ # Filter test cases based on --test-case options
3268
+ if test_case_files:
3269
+ # User specified specific test case files
3270
+ test_case_files_set = set(test_case_files)
3271
+ all_test_cases = sorted(test_cases_path.rglob('TC-*.md'))
3272
+ test_case_files_list = [
3273
+ tc for tc in all_test_cases
3274
+ if tc.name in test_case_files_set
3275
+ ]
3276
+
3277
+ # Check if all specified files were found
3278
+ found_names = {tc.name for tc in test_case_files_list}
3279
+ not_found = test_case_files_set - found_names
3280
+ if not_found:
3281
+ console.print(f"[yellow]⚠ Warning: Test case files not found: {', '.join(not_found)}[/yellow]")
3282
+ else:
3283
+ # Execute all test cases
3284
+ test_case_files_list = sorted(test_cases_path.rglob('TC-*.md'))
3285
+
3286
+ if not test_case_files_list:
3287
+ if test_case_files:
3288
+ console.print(f"[yellow]No matching test case files found in {test_cases_dir}[/yellow]")
3289
+ else:
3290
+ console.print(f"[yellow]No test case files found in {test_cases_dir}[/yellow]")
3291
+ return
3292
+
3293
+ console.print(f"\n[bold cyan]🧪 Test Execution Started[/bold cyan]")
3294
+ console.print(f"Agent: [bold]{agent_name}[/bold]")
3295
+ console.print(f"Test Cases: {len(test_case_files_list)}")
3296
+ if test_case_files:
3297
+ console.print(f"Selected: [cyan]{', '.join(test_case_files)}[/cyan]")
3298
+ console.print(f"Results Directory: {results_dir}\n")
3299
+
3300
+ data_gen_def = None
3301
+ if data_generator and not skip_data_generation:
3302
+ try:
3303
+ data_gen_def = load_agent_definition(data_generator)
3304
+ data_gen_name = data_gen_def.get('name', Path(data_generator).stem)
3305
+ console.print(f"Data Generator Agent: [bold]{data_gen_name}[/bold]\n")
3306
+ except Exception as e:
3307
+ console.print(f"[yellow]⚠ Warning: Failed to setup data generator: {e}[/yellow]")
3308
+ console.print("[yellow]Continuing with test execution...[/yellow]\n")
3309
+ logger.debug(f"Data generator setup error: {e}", exc_info=True)
3310
+
3311
+ # Track overall results
3312
+ total_tests = 0
3313
+ passed_tests = 0
3314
+ failed_tests = 0
3315
+ test_results = [] # Store structured results for final report
3316
+
3317
+ # Store bulk data generation chat history to pass to test executors
3318
+ bulk_gen_chat_history = []
3319
+
3320
+ # Parse all test cases upfront for bulk data generation
3321
+ parsed_test_cases = []
3322
+ for test_file in test_case_files_list:
3323
+ try:
3324
+ test_case = parse_test_case(str(test_file))
3325
+ parsed_test_cases.append({
3326
+ 'file': test_file,
3327
+ 'data': test_case
3328
+ })
3329
+ except Exception as e:
3330
+ console.print(f"[yellow]⚠ Warning: Failed to parse {test_file.name}: {e}[/yellow]")
3331
+ logger.debug(f"Parse error for {test_file.name}: {e}", exc_info=True)
3332
+
3333
+ # Filter test cases that need data generation
3334
+ test_cases_needing_data_gen = [
3335
+ tc for tc in parsed_test_cases
3336
+ if tc['data'].get('generate_test_data', True)
3337
+ ]
3338
+
3339
+ # Bulk test data generation (if enabled)
3340
+ if data_gen_def and not skip_data_generation and test_cases_needing_data_gen:
3341
+ console.print(f"\n[bold yellow]🔧 Bulk Test Data Generation[/bold yellow]")
3342
+ console.print(f"Generating test data for {len(test_cases_needing_data_gen)} test cases...\n")
3343
+ console.print(f"[dim]Skipping {len(parsed_test_cases) - len(test_cases_needing_data_gen)} test cases with generateTestData: false[/dim]\n")
3344
+
3345
+ bulk_data_gen_prompt = _build_bulk_data_gen_prompt(test_cases_needing_data_gen)
3346
+
3347
+ console.print(f"Executing test data generation prompt {bulk_data_gen_prompt}\n")
3348
+
3349
+ try:
3350
+ # Setup data generator agent
3351
+ from langgraph.checkpoint.sqlite import SqliteSaver
3352
+ bulk_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
3353
+
3354
+ # Use first test case's config or empty tuple
3355
+ first_config_path = None
3356
+ if parsed_test_cases:
3357
+ first_tc = parsed_test_cases[0]
3358
+ first_config_path = resolve_toolkit_config_path(
3359
+ first_tc['data'].get('config_path', ''),
3360
+ first_tc['file'],
3361
+ test_cases_path
3362
+ )
3363
+
3364
+ data_gen_config_tuple = (first_config_path,) if first_config_path else ()
3365
+ data_gen_executor, _, _, _, _, _, _ = _setup_local_agent_executor(
3366
+ client, data_gen_def, data_gen_config_tuple, config,
3367
+ model, temperature, max_tokens, bulk_memory, work_dir
3368
+ )
3369
+
3370
+ if data_gen_executor:
3371
+ with console.status("[yellow]Generating test data for all test cases...[/yellow]", spinner="dots"):
3372
+ bulk_gen_result = data_gen_executor.invoke({
3373
+ "input": bulk_data_gen_prompt,
3374
+ "chat_history": []
3375
+ })
3376
+ bulk_gen_output = extract_output_from_result(bulk_gen_result)
3377
+ console.print(f"[green]✓ Bulk test data generation completed[/green]")
3378
+ console.print(f"[dim]{bulk_gen_output}...[/dim]\n")
3379
+
3380
+ # Store chat history from data generation to pass to test executors
3381
+ bulk_gen_chat_history = [
3382
+ {"role": "user", "content": bulk_data_gen_prompt},
3383
+ {"role": "assistant", "content": bulk_gen_output}
3384
+ ]
3385
+ else:
3386
+ console.print(f"[yellow]⚠ Warning: Data generator has no executor[/yellow]\n")
3387
+ except Exception as e:
3388
+ console.print(f"[yellow]⚠ Warning: Bulk data generation failed: {e}[/yellow]")
3389
+ console.print("[yellow]Continuing with test execution...[/yellow]\n")
3390
+ logger.debug(f"Bulk data generation error: {e}", exc_info=True)
3391
+
3392
+ # Execute ALL test cases in one bulk operation
3393
+ if not parsed_test_cases:
3394
+ console.print("[yellow]No test cases to execute[/yellow]")
3395
+ return
3396
+
3397
+ console.print(f"\n[bold yellow]📋 Executing ALL test cases in bulk...[/bold yellow]\n")
3398
+
3399
+ # Use first test case's config for agent setup
3400
+ first_tc = parsed_test_cases[0]
3401
+ first_test_file = first_tc['file']
3402
+ toolkit_config_path = resolve_toolkit_config_path(
3403
+ first_tc['data'].get('config_path', ''),
3404
+ first_test_file,
3405
+ test_cases_path
3406
+ )
3407
+ toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
3408
+
3409
+ # Create memory for bulk execution
3410
+ from langgraph.checkpoint.sqlite import SqliteSaver
3411
+ memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
3412
+
3413
+ # Initialize chat history with bulk data generation context
3414
+ chat_history = bulk_gen_chat_history.copy()
3415
+
3416
+ # Setup agent executor
3417
+ agent_executor, _, _, _, _, _, _ = _setup_local_agent_executor(
3418
+ client, agent_def, toolkit_config_tuple, config, model, temperature, max_tokens, memory, work_dir
3419
+ )
3420
+
3421
+ # Build bulk execution prompt
3422
+ bulk_all_prompt = _build_bulk_execution_prompt(parsed_test_cases)
3423
+
3424
+ console.print(f"Executing the prompt: {bulk_all_prompt}\n")
3425
+
3426
+ # Execute all test cases in bulk
3427
+ test_results = []
3428
+ all_execution_output = ""
3429
+
3430
+ try:
3431
+ if agent_executor:
3432
+ with console.status(f"[yellow]Executing {len(parsed_test_cases)} test cases in bulk...[/yellow]", spinner="dots"):
3433
+ bulk_result = agent_executor.invoke({
3434
+ "input": bulk_all_prompt,
3435
+ "chat_history": chat_history
3436
+ })
3437
+ all_execution_output = extract_output_from_result(bulk_result)
3438
+
3439
+ console.print(f"[green]✓ All test cases executed[/green]")
3440
+ console.print(f"[dim]{all_execution_output}...[/dim]\n")
3441
+
3442
+ # Update chat history
3443
+ chat_history.append({"role": "user", "content": bulk_all_prompt})
3444
+ chat_history.append({"role": "assistant", "content": all_execution_output})
3445
+
3446
+ # Now validate ALL test cases in bulk
3447
+ console.print(f"[bold yellow]✅ Validating all test cases...[/bold yellow]\n")
3448
+
3449
+ validation_prompt = _build_validation_prompt(parsed_test_cases, all_execution_output)
3450
+
3451
+ console.print(f"[dim]{validation_prompt}[/dim]\n")
3452
+
3453
+ with console.status("[yellow]Validating all results...[/yellow]", spinner="dots"):
3454
+ validation_result = agent_executor.invoke({
3455
+ "input": validation_prompt,
3456
+ "chat_history": chat_history
3457
+ })
3458
+
3459
+ validation_output = extract_output_from_result(validation_result)
3460
+
3461
+ console.print(f"[dim]Validation Response: {validation_output}...[/dim]\n")
3462
+
3463
+ # Parse validation JSON
3464
+ try:
3465
+ validation_json = _extract_json_from_text(validation_output)
3466
+ test_cases_results = validation_json.get('test_cases', [])
3467
+
3468
+ # Process results for each test case
3469
+ total_tests = 0
3470
+ passed_tests = 0
3471
+ failed_tests = 0
3472
+
3473
+ for tc_result in test_cases_results:
3474
+ test_name = tc_result.get('test_name', f"Test #{tc_result.get('test_number', '?')}")
3475
+ step_results = tc_result.get('steps', [])
3476
+
3477
+ # Determine if test passed (all steps must pass)
3478
+ test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
3479
+
3480
+ total_tests += 1
3481
+ if test_passed:
3482
+ passed_tests += 1
3483
+ console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
3484
+ else:
3485
+ failed_tests += 1
3486
+ console.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
3487
+
3488
+ # Display individual step results
3489
+ for step_result in step_results:
3490
+ step_num = step_result.get('step_number')
3491
+ step_title = step_result.get('title', '')
3492
+ passed = step_result.get('passed', False)
3493
+ details = step_result.get('details', '')
3494
+
3495
+ if passed:
3496
+ console.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
3497
+ console.print(f" [dim]{details}[/dim]")
3498
+ else:
3499
+ console.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
3500
+ console.print(f" [dim]{details}[/dim]")
3501
+
3502
+ console.print()
3503
+
3504
+ # Store result
3505
+ test_results.append({
3506
+ 'title': test_name,
3507
+ 'passed': test_passed,
3508
+ 'file': parsed_test_cases[tc_result.get('test_number', 1) - 1]['file'].name if tc_result.get('test_number', 1) - 1 < len(parsed_test_cases) else 'unknown',
3509
+ 'step_results': step_results
3510
+ })
3511
+
3512
+ except Exception as e:
3513
+ logger.debug(f"Validation parsing failed: {e}")
3514
+ console.print(f"[yellow]⚠ Warning: Could not parse validation results: {e}[/yellow]\n")
3515
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3516
+ else:
3517
+ console.print(f"[red]✗ No agent executor available[/red]\n")
3518
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3519
+
3520
+ except Exception as e:
3521
+ console.print(f"[red]✗ Bulk execution failed: {e}[/red]\n")
3522
+ logger.debug(f"Bulk execution error: {e}", exc_info=True)
3523
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3524
+
3525
+ # Generate summary report
3526
+ console.print(f"\n[bold]{'='*60}[/bold]")
3527
+ console.print(f"[bold cyan]📊 Test Execution Summary[/bold cyan]")
3528
+ console.print(f"[bold]{'='*60}[/bold]\n")
3529
+
3530
+ summary_table = Table(box=box.ROUNDED, border_style="cyan")
3531
+ summary_table.add_column("Metric", style="bold")
3532
+ summary_table.add_column("Value", justify="right")
3533
+
3534
+ summary_table.add_row("Total Tests", str(total_tests))
3535
+ summary_table.add_row("Passed", f"[green]{passed_tests}[/green]")
3536
+ summary_table.add_row("Failed", f"[red]{failed_tests}[/red]")
3537
+
3538
+ if total_tests > 0:
3539
+ pass_rate = (passed_tests / total_tests) * 100
3540
+ summary_table.add_row("Pass Rate", f"{pass_rate:.1f}%")
3541
+
3542
+ console.print(summary_table)
3543
+
3544
+ # Generate structured JSON report
3545
+ overall_result = "pass" if failed_tests == 0 else "fail"
3546
+
3547
+ structured_report = {
3548
+ "test_cases": [
3549
+ {
3550
+ "title": r['title'],
3551
+ "passed": r['passed'],
3552
+ "steps": r.get('step_results', [])
3553
+ }
3554
+ for r in test_results
3555
+ ],
3556
+ "overall_result": overall_result,
3557
+ "summary": {
3558
+ "total_tests": total_tests,
3559
+ "passed": passed_tests,
3560
+ "failed": failed_tests,
3561
+ "pass_rate": f"{pass_rate:.1f}%" if total_tests > 0 else "0%"
3562
+ },
3563
+ "timestamp": datetime.now().isoformat()
3564
+ }
3565
+
3566
+ # Save structured report
3567
+ results_path = Path(results_dir)
3568
+ results_path.mkdir(parents=True, exist_ok=True)
3569
+ summary_file = results_path / "test_execution_summary.json"
3570
+
3571
+ console.print(f"\n[bold yellow]💾 Saving test execution summary...[/bold yellow]")
3572
+ with open(summary_file, 'w') as f:
3573
+ json.dump(structured_report, f, indent=2)
3574
+ console.print(f"[green]✓ Summary saved to {summary_file}[/green]\n")
3575
+
3576
+ # Exit with error code if any tests failed
3577
+ if failed_tests > 0:
3578
+ sys.exit(1)
3579
+
3580
+ except click.ClickException:
3581
+ raise
3582
+ except Exception as e:
3583
+ logger.exception("Failed to execute test cases")
3584
+ error_panel = Panel(
3585
+ str(e),
3586
+ title="Error",
3587
+ border_style="red",
3588
+ box=box.ROUNDED
3589
+ )
3590
+ console.print(error_panel, style="red")
3591
+ raise click.Abort()
3592
+