alita-sdk 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (103) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +22 -4
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1876 -186
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +910 -64
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +108 -31
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/constants.py +3 -1
  56. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  57. alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
  58. alita_sdk/runtime/langchain/langraph_agent.py +123 -31
  59. alita_sdk/runtime/llms/preloaded.py +2 -6
  60. alita_sdk/runtime/toolkits/__init__.py +2 -0
  61. alita_sdk/runtime/toolkits/application.py +1 -1
  62. alita_sdk/runtime/toolkits/mcp.py +107 -91
  63. alita_sdk/runtime/toolkits/planning.py +173 -0
  64. alita_sdk/runtime/toolkits/tools.py +59 -7
  65. alita_sdk/runtime/tools/artifact.py +46 -17
  66. alita_sdk/runtime/tools/function.py +2 -1
  67. alita_sdk/runtime/tools/llm.py +320 -32
  68. alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
  69. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  70. alita_sdk/runtime/tools/planning/models.py +246 -0
  71. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  72. alita_sdk/runtime/tools/vectorstore_base.py +44 -9
  73. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  74. alita_sdk/runtime/utils/mcp_client.py +465 -0
  75. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  76. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  77. alita_sdk/runtime/utils/streamlit.py +6 -10
  78. alita_sdk/runtime/utils/toolkit_utils.py +14 -5
  79. alita_sdk/tools/__init__.py +54 -27
  80. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  81. alita_sdk/tools/base_indexer_toolkit.py +99 -20
  82. alita_sdk/tools/bitbucket/__init__.py +2 -2
  83. alita_sdk/tools/chunkers/__init__.py +3 -1
  84. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  85. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  86. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  87. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  88. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  89. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  90. alita_sdk/tools/elitea_base.py +86 -21
  91. alita_sdk/tools/jira/__init__.py +1 -1
  92. alita_sdk/tools/jira/api_wrapper.py +91 -40
  93. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  94. alita_sdk/tools/qtest/__init__.py +1 -1
  95. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  96. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  97. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  98. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +2 -1
  99. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +103 -61
  100. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
  101. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
  102. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
  103. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
alita_sdk/cli/agents.py CHANGED
@@ -11,8 +11,10 @@ import json
11
11
  import logging
12
12
  import sqlite3
13
13
  import sys
14
+ import re
14
15
  from typing import Optional, Dict, Any, List
15
16
  from pathlib import Path
17
+ from datetime import datetime
16
18
  import yaml
17
19
 
18
20
  from rich.console import Console
@@ -32,6 +34,8 @@ from .agent_executor import create_llm_instance, create_agent_executor, create_a
32
34
  from .toolkit_loader import load_toolkit_config, load_toolkit_configs
33
35
  from .callbacks import create_cli_callback, CLICallbackHandler
34
36
  from .input_handler import get_input_handler, styled_input, styled_selection_input
37
+ # Context management for chat history
38
+ from .context import CLIContextManager, CLIMessage, purge_old_sessions as purge_context_sessions
35
39
 
36
40
  logger = logging.getLogger(__name__)
37
41
 
@@ -39,6 +43,335 @@ logger = logging.getLogger(__name__)
39
43
  console = Console()
40
44
 
41
45
 
46
+ def resolve_toolkit_config_path(config_path_str: str, test_file: Path, test_cases_dir: Path) -> Optional[str]:
47
+ """
48
+ Resolve toolkit configuration file path from test case.
49
+
50
+ Tries multiple locations in order:
51
+ 1. Absolute path
52
+ 2. Relative to test case file directory
53
+ 3. Relative to test cases directory
54
+ 4. Relative to workspace root
55
+
56
+ Args:
57
+ config_path_str: Config path from test case
58
+ test_file: Path to the test case file
59
+ test_cases_dir: Path to test cases directory
60
+
61
+ Returns:
62
+ Absolute path to config file if found, None otherwise
63
+ """
64
+ if not config_path_str:
65
+ return None
66
+
67
+ # Normalize path separators
68
+ config_path_str = config_path_str.replace('\\', '/')
69
+
70
+ # Try absolute path first
71
+ config_path = Path(config_path_str)
72
+ if config_path.is_absolute() and config_path.exists():
73
+ return str(config_path)
74
+
75
+ # Try relative to test case file directory
76
+ config_path = test_file.parent / config_path_str
77
+ if config_path.exists():
78
+ return str(config_path)
79
+
80
+ # Try relative to test_cases_dir
81
+ config_path = test_cases_dir / config_path_str
82
+ if config_path.exists():
83
+ return str(config_path)
84
+
85
+ # Try relative to workspace root
86
+ workspace_root = Path.cwd()
87
+ config_path = workspace_root / config_path_str
88
+ if config_path.exists():
89
+ return str(config_path)
90
+
91
+ return None
92
+
93
+
94
+ def parse_test_case(test_case_path: str) -> Dict[str, Any]:
95
+ """
96
+ Parse a test case markdown file to extract configuration, steps, and expectations.
97
+
98
+ Args:
99
+ test_case_path: Path to the test case markdown file
100
+
101
+ Returns:
102
+ Dictionary containing:
103
+ - name: Test case name
104
+ - objective: Test objective
105
+ - config_path: Path to toolkit config file
106
+ - generate_test_data: Boolean flag indicating if test data generation is needed (default: True)
107
+ - test_data_config: Dictionary of test data configuration from table
108
+ - prerequisites: Pre-requisites section text
109
+ - variables: List of variable placeholders found (e.g., {{TEST_PR_NUMBER}})
110
+ - steps: List of test steps with their descriptions
111
+ - expectations: List of expectations/assertions
112
+ """
113
+ path = Path(test_case_path)
114
+ if not path.exists():
115
+ raise FileNotFoundError(f"Test case not found: {test_case_path}")
116
+
117
+ content = path.read_text(encoding='utf-8')
118
+
119
+ # Extract test case name from the first heading
120
+ name_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
121
+ name = name_match.group(1) if name_match else path.stem
122
+
123
+ # Extract objective
124
+ objective_match = re.search(r'##\s+Objective\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
125
+ objective = objective_match.group(1).strip() if objective_match else ""
126
+
127
+ # Extract config path and generateTestData flag
128
+ config_section_match = re.search(r'##\s+Config\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
129
+ config_path = None
130
+ generate_test_data = True # Default to True if not specified
131
+
132
+ if config_section_match:
133
+ config_section = config_section_match.group(1)
134
+ # Extract path
135
+ path_match = re.search(r'path:\s*(.+?)(?=\n|$)', config_section, re.MULTILINE)
136
+ if path_match:
137
+ config_path = path_match.group(1).strip()
138
+
139
+ # Extract generateTestData flag
140
+ gen_data_match = re.search(r'generateTestData\s*:\s*(true|false)', config_section, re.IGNORECASE)
141
+ if gen_data_match:
142
+ generate_test_data = gen_data_match.group(1).lower() == 'true'
143
+
144
+ # Extract Test Data Configuration table
145
+ test_data_config = {}
146
+ config_section_match = re.search(r'##\s+Test Data Configuration\s*\n(.+?)(?=\n##|\Z)', content, re.DOTALL)
147
+ if config_section_match:
148
+ config_section = config_section_match.group(1)
149
+ # Parse markdown table (format: | Parameter | Value | Description |)
150
+ table_rows = re.findall(r'\|\s*\*\*([^*]+)\*\*\s*\|\s*`?([^|`]+)`?\s*\|', config_section)
151
+ for param, value in table_rows:
152
+ test_data_config[param.strip()] = value.strip()
153
+
154
+ # Extract Pre-requisites section
155
+ prerequisites = ""
156
+ prereq_match = re.search(r'##\s+Pre-requisites\s*\n\n(.+?)(?=\n\n##|\Z)', content, re.DOTALL)
157
+ if prereq_match:
158
+ prerequisites = prereq_match.group(1).strip()
159
+
160
+ # Find all variable placeholders ({{VARIABLE_NAME}})
161
+ variables = list(set(re.findall(r'\{\{([A-Z_]+)\}\}', content)))
162
+
163
+ # Extract test steps and expectations
164
+ steps = []
165
+ expectations = []
166
+
167
+ # Find all Step sections
168
+ step_pattern = r'###\s+Step\s+(\d+):\s+(.+?)\n\n(.+?)(?=\n\n###|\n\n##|\Z)'
169
+ for step_match in re.finditer(step_pattern, content, re.DOTALL):
170
+ step_num = step_match.group(1)
171
+ step_title = step_match.group(2).strip()
172
+ step_content = step_match.group(3).strip()
173
+
174
+ # Extract the actual instruction (first paragraph before "Expectation:")
175
+ instruction_match = re.search(r'(.+?)(?=\n\n\*\*Expectation:\*\*|\Z)', step_content, re.DOTALL)
176
+ instruction = instruction_match.group(1).strip() if instruction_match else step_content
177
+
178
+ # Extract expectation if present
179
+ expectation_match = re.search(r'\*\*Expectation:\*\*\s+(.+)', step_content, re.DOTALL)
180
+ expectation = expectation_match.group(1).strip() if expectation_match else None
181
+
182
+ steps.append({
183
+ 'number': int(step_num),
184
+ 'title': step_title,
185
+ 'instruction': instruction,
186
+ 'expectation': expectation
187
+ })
188
+
189
+ if expectation:
190
+ expectations.append({
191
+ 'step': int(step_num),
192
+ 'description': expectation
193
+ })
194
+
195
+ return {
196
+ 'name': name,
197
+ 'objective': objective,
198
+ 'config_path': config_path,
199
+ 'generate_test_data': generate_test_data,
200
+ 'test_data_config': test_data_config,
201
+ 'prerequisites': prerequisites,
202
+ 'variables': variables,
203
+ 'steps': steps,
204
+ 'expectations': expectations
205
+ }
206
+
207
+
208
+ def validate_test_output(output: str, expectation: str) -> tuple[bool, str]:
209
+ """
210
+ Validate test output against expectations.
211
+
212
+ Args:
213
+ output: The actual output from the agent
214
+ expectation: The expected result description
215
+
216
+ Returns:
217
+ Tuple of (passed: bool, details: str)
218
+ """
219
+ # Simple keyword-based validation
220
+ # Extract key phrases from expectation
221
+
222
+ # Common patterns in expectations
223
+ if "contains" in expectation.lower():
224
+ # Extract what should be contained
225
+ contains_match = re.search(r'contains.*?["`]([^"`]+)["`]', expectation, re.IGNORECASE)
226
+ if contains_match:
227
+ expected_text = contains_match.group(1)
228
+ if expected_text in output:
229
+ return True, f"Output contains expected text: '{expected_text}'"
230
+ else:
231
+ return False, f"Output does not contain expected text: '{expected_text}'"
232
+
233
+ if "without errors" in expectation.lower() or "runs without errors" in expectation.lower():
234
+ # Check for common error indicators
235
+ error_indicators = ['error', 'exception', 'failed', 'traceback']
236
+ has_error = any(indicator in output.lower() for indicator in error_indicators)
237
+ if not has_error:
238
+ return True, "Execution completed without errors"
239
+ else:
240
+ return False, "Execution encountered errors"
241
+
242
+ # Default: assume pass if output is non-empty
243
+ if output and len(output.strip()) > 0:
244
+ return True, "Output generated successfully"
245
+
246
+ return False, "No output generated"
247
+
248
+
249
+ def _build_bulk_data_gen_prompt(parsed_test_cases: list) -> str:
250
+ """Build consolidated requirements text for bulk test data generation."""
251
+ requirements = []
252
+ for idx, tc in enumerate(parsed_test_cases, 1):
253
+ test_case = tc['data']
254
+ test_file = tc['file']
255
+
256
+ parts = [f"Test Case #{idx}: {test_case['name']}", f"File: {test_file.name}", ""]
257
+
258
+ if test_case.get('test_data_config'):
259
+ parts.append("Test Data Configuration:")
260
+ for param, value in test_case['test_data_config'].items():
261
+ parts.append(f" - {param}: {value}")
262
+
263
+ if test_case.get('prerequisites'):
264
+ parts.append(f"\nPre-requisites:\n{test_case['prerequisites']}")
265
+
266
+ if test_case.get('variables'):
267
+ parts.append(f"\nVariables to generate: {', '.join(test_case['variables'])}")
268
+
269
+ requirements.append("\n".join(parts))
270
+
271
+ return f"""{'='*60}
272
+
273
+ {chr(10).join(requirements)}
274
+
275
+ {'='*60}"""
276
+
277
+
278
+ def _build_bulk_execution_prompt(parsed_test_cases: list) -> str:
279
+ """Build consolidated prompt for bulk test execution."""
280
+ parts = []
281
+
282
+ for idx, tc_info in enumerate(parsed_test_cases, 1):
283
+ test_case = tc_info['data']
284
+ test_file = tc_info['file']
285
+
286
+ parts.append(f"\n{'='*80}\nTEST CASE #{idx}: {test_case['name']}\nFile: {test_file.name}\n{'='*80}")
287
+
288
+ if test_case['steps']:
289
+ for step in test_case['steps']:
290
+ parts.append(f"\nStep {step['number']}: {step['title']}\n{step['instruction']}")
291
+ if step['expectation']:
292
+ parts.append(f"Expected Result: {step['expectation']}")
293
+ else:
294
+ parts.append("\n(No steps defined)")
295
+
296
+ return "\n".join(parts)
297
+
298
+
299
+ def _build_validation_prompt(parsed_test_cases: list, execution_output: str) -> str:
300
+ """Build prompt for bulk validation of test results."""
301
+ parts = ["You are a test validator. Review the test execution results and validate each test case.\n\nTest Cases to Validate:\n"]
302
+
303
+ for idx, tc_info in enumerate(parsed_test_cases, 1):
304
+ test_case = tc_info['data']
305
+ parts.append(f"\nTest Case #{idx}: {test_case['name']}")
306
+ if test_case['steps']:
307
+ for step in test_case['steps']:
308
+ parts.append(f" Step {step['number']}: {step['title']}")
309
+ if step['expectation']:
310
+ parts.append(f" Expected: {step['expectation']}")
311
+
312
+ parts.append(f"\n\nActual Execution Results:\n{execution_output}\n")
313
+ parts.append(f"""\nBased on the execution results above, validate each test case.
314
+
315
+ Respond with valid JSON in this EXACT format:
316
+ {{
317
+ "test_cases": [
318
+ {{
319
+ "test_number": 1,
320
+ "test_name": "<test case name>",
321
+ "steps": [
322
+ {{"step_number": 1, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}},
323
+ {{"step_number": 2, "title": "<step title>", "passed": true/false, "details": "<brief explanation>"}}
324
+ ]
325
+ }},
326
+ {{
327
+ "test_number": 2,
328
+ "test_name": "<test case name>",
329
+ "steps": [...]
330
+ }}
331
+ ]
332
+ }}
333
+
334
+ Validate all {len(parsed_test_cases)} test cases and their steps.""")
335
+
336
+ return "\n".join(parts)
337
+
338
+
339
+ def _extract_json_from_text(text: str) -> dict:
340
+ """Extract JSON object from text using brace counting."""
341
+ start_idx = text.find('{')
342
+ if start_idx == -1:
343
+ raise ValueError("No JSON found in text")
344
+
345
+ brace_count = 0
346
+ end_idx = -1
347
+ for i, char in enumerate(text[start_idx:], start=start_idx):
348
+ if char == '{':
349
+ brace_count += 1
350
+ elif char == '}':
351
+ brace_count -= 1
352
+ if brace_count == 0:
353
+ end_idx = i + 1
354
+ break
355
+
356
+ if end_idx == -1:
357
+ raise ValueError("Could not find matching closing brace")
358
+
359
+ return json.loads(text[start_idx:end_idx])
360
+
361
+
362
+ def _create_fallback_results(parsed_test_cases: list) -> tuple[list, int, int, int]:
363
+ """Create fallback results when execution/validation fails."""
364
+ test_results = []
365
+ for tc_info in parsed_test_cases:
366
+ test_results.append({
367
+ 'title': tc_info['data']['name'],
368
+ 'passed': False,
369
+ 'file': tc_info['file'].name,
370
+ 'step_results': []
371
+ })
372
+ return test_results, len(parsed_test_cases), 0, len(parsed_test_cases)
373
+
374
+
42
375
  def _get_alita_system_prompt(config) -> str:
43
376
  """
44
377
  Get the Alita system prompt from user config or fallback to default.
@@ -77,6 +410,210 @@ def _get_alita_system_prompt(config) -> str:
77
410
  return DEFAULT_PROMPT
78
411
 
79
412
 
413
+ def _get_inventory_system_prompt(config) -> str:
414
+ """
415
+ Get the Inventory agent system prompt from user config or fallback to default.
416
+
417
+ Checks for $ALITA_DIR/agents/inventory.agent.md first, then falls back
418
+ to the default prompt with inventory-specific instructions.
419
+
420
+ Returns:
421
+ The system prompt string for Inventory agent
422
+ """
423
+ from .agent.default import DEFAULT_PROMPT
424
+
425
+ # Check for user-customized prompt
426
+ custom_prompt_path = Path(config.agents_dir) / 'inventory.agent.md'
427
+
428
+ if custom_prompt_path.exists():
429
+ try:
430
+ content = custom_prompt_path.read_text(encoding='utf-8')
431
+ # Parse the agent.md file - extract system_prompt from frontmatter or use content
432
+ if content.startswith('---'):
433
+ try:
434
+ parts = content.split('---', 2)
435
+ if len(parts) >= 3:
436
+ frontmatter = yaml.safe_load(parts[1])
437
+ body = parts[2].strip()
438
+ return frontmatter.get('system_prompt', body) if frontmatter else body
439
+ except Exception:
440
+ pass
441
+ return content.strip()
442
+ except Exception as e:
443
+ logger.debug(f"Failed to load custom Inventory prompt from {custom_prompt_path}: {e}")
444
+
445
+ # Use default prompt + inventory toolkit instructions
446
+ inventory_context = """
447
+
448
+ ## Inventory Knowledge Graph
449
+
450
+ You have access to the Inventory toolkit for querying a knowledge graph of software entities and relationships.
451
+ Use these tools to help users understand their codebase:
452
+
453
+ - **search_entities**: Find entities by name, type, or path patterns
454
+ - **get_entity**: Get full details of a specific entity
455
+ - **get_relationships**: Find relationships from/to an entity
456
+ - **impact_analysis**: Analyze what depends on an entity (useful for change impact)
457
+ - **get_graph_stats**: Get statistics about the knowledge graph
458
+
459
+ When answering questions about the codebase, use these tools to provide accurate, citation-backed answers.
460
+ """
461
+ return DEFAULT_PROMPT + inventory_context
462
+
463
+
464
+ def _resolve_inventory_path(path: str, work_dir: Optional[str] = None) -> Optional[str]:
465
+ """
466
+ Resolve an inventory/knowledge graph file path.
467
+
468
+ Tries locations in order:
469
+ 1. Absolute path
470
+ 2. Relative to current working directory (or work_dir if provided)
471
+ 3. Relative to .alita/inventory/ in current directory
472
+ 4. Relative to .alita/inventory/ in work_dir (if different)
473
+
474
+ Args:
475
+ path: The path to resolve (can be relative or absolute)
476
+ work_dir: Optional workspace directory to check
477
+
478
+ Returns:
479
+ Absolute path to the file if found, None otherwise
480
+ """
481
+ # Expand user home directory
482
+ path = str(Path(path).expanduser())
483
+
484
+ # Try absolute path first
485
+ if Path(path).is_absolute() and Path(path).exists():
486
+ return str(Path(path).resolve())
487
+
488
+ # Try relative to current working directory
489
+ cwd = Path.cwd()
490
+ cwd_path = cwd / path
491
+ if cwd_path.exists():
492
+ return str(cwd_path.resolve())
493
+
494
+ # Try .alita/inventory/ in current directory
495
+ alita_inventory_path = cwd / '.alita' / 'inventory' / path
496
+ if alita_inventory_path.exists():
497
+ return str(alita_inventory_path.resolve())
498
+
499
+ # If work_dir is different from cwd, try there too
500
+ if work_dir:
501
+ work_path = Path(work_dir)
502
+ if work_path != cwd:
503
+ # Try relative to work_dir
504
+ work_rel_path = work_path / path
505
+ if work_rel_path.exists():
506
+ return str(work_rel_path.resolve())
507
+
508
+ # Try .alita/inventory/ in work_dir
509
+ work_alita_path = work_path / '.alita' / 'inventory' / path
510
+ if work_alita_path.exists():
511
+ return str(work_alita_path.resolve())
512
+
513
+ return None
514
+
515
+
516
+ def _build_inventory_config(path: str, work_dir: Optional[str] = None) -> Optional[Dict[str, Any]]:
517
+ """
518
+ Build an inventory toolkit configuration from a file path.
519
+
520
+ The toolkit name is derived from the filename (stem).
521
+ All available tools are included.
522
+
523
+ Args:
524
+ path: Path to the knowledge graph JSON file
525
+ work_dir: Optional workspace directory for path resolution
526
+
527
+ Returns:
528
+ Toolkit configuration dict if file found, None otherwise
529
+ """
530
+ # Resolve the path
531
+ resolved_path = _resolve_inventory_path(path, work_dir)
532
+ if not resolved_path:
533
+ return None
534
+
535
+ # Validate it's a JSON file
536
+ if not resolved_path.endswith('.json'):
537
+ return None
538
+
539
+ # Validate file exists and is readable
540
+ try:
541
+ with open(resolved_path, 'r') as f:
542
+ # Just check it's valid JSON
543
+ json.load(f)
544
+ except (IOError, json.JSONDecodeError):
545
+ return None
546
+
547
+ # Extract toolkit name from filename (e.g., 'alita' from 'alita.json')
548
+ toolkit_name = Path(resolved_path).stem
549
+
550
+ # Build configuration with all available tools
551
+ from .toolkit_loader import INVENTORY_TOOLS
552
+
553
+ return {
554
+ 'type': 'inventory',
555
+ 'toolkit_name': toolkit_name,
556
+ 'graph_path': resolved_path,
557
+ 'base_directory': work_dir,
558
+ 'selected_tools': INVENTORY_TOOLS,
559
+ }
560
+
561
+
562
+ def _get_inventory_json_files(work_dir: Optional[str] = None) -> List[str]:
563
+ """
564
+ Get list of .json files for inventory path completion.
565
+
566
+ Searches:
567
+ 1. Current working directory (*.json files)
568
+ 2. .alita/inventory/ directory (*.json files)
569
+ 3. work_dir and work_dir/.alita/inventory/ if different from cwd
570
+
571
+ Args:
572
+ work_dir: Optional workspace directory
573
+
574
+ Returns:
575
+ List of relative or display paths for completion
576
+ """
577
+ suggestions = []
578
+ seen = set()
579
+
580
+ cwd = Path.cwd()
581
+
582
+ # Current directory .json files
583
+ for f in cwd.glob('*.json'):
584
+ if f.name not in seen:
585
+ suggestions.append(f.name)
586
+ seen.add(f.name)
587
+
588
+ # .alita/inventory/ directory
589
+ alita_inv = cwd / '.alita' / 'inventory'
590
+ if alita_inv.exists():
591
+ for f in alita_inv.glob('*.json'):
592
+ display = f'.alita/inventory/{f.name}'
593
+ if display not in seen:
594
+ suggestions.append(display)
595
+ seen.add(display)
596
+
597
+ # work_dir if different
598
+ if work_dir:
599
+ work_path = Path(work_dir)
600
+ if work_path != cwd:
601
+ for f in work_path.glob('*.json'):
602
+ if f.name not in seen:
603
+ suggestions.append(f.name)
604
+ seen.add(f.name)
605
+
606
+ work_alita_inv = work_path / '.alita' / 'inventory'
607
+ if work_alita_inv.exists():
608
+ for f in work_alita_inv.glob('*.json'):
609
+ display = f'.alita/inventory/{f.name}'
610
+ if display not in seen:
611
+ suggestions.append(display)
612
+ seen.add(display)
613
+
614
+ return sorted(suggestions)
615
+
616
+
80
617
  def _load_mcp_tools(agent_def: Dict[str, Any], mcp_config_path: str) -> List[Dict[str, Any]]:
81
618
  """Load MCP tools from agent definition with tool-level filtering.
82
619
 
@@ -93,10 +630,14 @@ def _load_mcp_tools(agent_def: Dict[str, Any], mcp_config_path: str) -> List[Dic
93
630
 
94
631
  def _setup_local_agent_executor(client, agent_def: Dict[str, Any], toolkit_config: tuple,
95
632
  config, model: Optional[str], temperature: Optional[float],
96
- max_tokens: Optional[int], memory, work_dir: Optional[str],
633
+ max_tokens: Optional[int], memory, allowed_directories: Optional[List[str]],
97
634
  plan_state: Optional[Dict] = None):
98
635
  """Setup local agent executor with all configurations.
99
636
 
637
+ Args:
638
+ allowed_directories: List of allowed directories for filesystem access.
639
+ First directory is the primary/base directory.
640
+
100
641
  Returns:
101
642
  Tuple of (agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools)
102
643
  """
@@ -112,21 +653,28 @@ def _setup_local_agent_executor(client, agent_def: Dict[str, Any], toolkit_confi
112
653
  client, model, agent_def, temperature, max_tokens
113
654
  )
114
655
 
115
- # Add filesystem tools if --dir is provided
656
+ # Add filesystem tools if directories are provided
116
657
  filesystem_tools = None
117
658
  terminal_tools = None
118
- if work_dir:
659
+ if allowed_directories:
119
660
  from .tools import get_filesystem_tools, get_terminal_tools
120
661
  preset = agent_def.get('filesystem_tools_preset')
121
662
  include_tools = agent_def.get('filesystem_tools_include')
122
663
  exclude_tools = agent_def.get('filesystem_tools_exclude')
123
- filesystem_tools = get_filesystem_tools(work_dir, include_tools, exclude_tools, preset)
124
664
 
125
- # Also add terminal tools when work_dir is set
126
- terminal_tools = get_terminal_tools(work_dir)
665
+ # First directory is the primary base directory
666
+ base_dir = allowed_directories[0]
667
+ extra_dirs = allowed_directories[1:] if len(allowed_directories) > 1 else None
668
+ filesystem_tools = get_filesystem_tools(base_dir, include_tools, exclude_tools, preset, extra_dirs)
669
+
670
+ # Terminal tools use primary directory as cwd
671
+ terminal_tools = get_terminal_tools(base_dir)
127
672
 
128
673
  tool_count = len(filesystem_tools) + len(terminal_tools)
129
- access_msg = f"✓ Granted filesystem & terminal access to: {work_dir} ({tool_count} tools)"
674
+ if len(allowed_directories) == 1:
675
+ access_msg = f"✓ Granted filesystem & terminal access to: {base_dir} ({tool_count} tools)"
676
+ else:
677
+ access_msg = f"✓ Granted filesystem & terminal access to {len(allowed_directories)} directories ({tool_count} tools)"
130
678
  if preset:
131
679
  access_msg += f" [preset: {preset}]"
132
680
  if include_tools:
@@ -396,20 +944,86 @@ def _select_toolkit_interactive(config) -> Optional[Dict[str, Any]]:
396
944
  return None
397
945
 
398
946
 
947
+ def _list_available_toolkits(config) -> List[str]:
948
+ """
949
+ List names of all available toolkits in $ALITA_DIR/tools.
950
+
951
+ Returns:
952
+ List of toolkit names
953
+ """
954
+ tools_dir = Path(config.tools_dir)
955
+
956
+ if not tools_dir.exists():
957
+ return []
958
+
959
+ toolkit_names = []
960
+ for pattern in ['*.json', '*.yaml', '*.yml']:
961
+ for file_path in tools_dir.glob(pattern):
962
+ try:
963
+ config_data = load_toolkit_config(str(file_path))
964
+ name = config_data.get('toolkit_name') or config_data.get('name') or file_path.stem
965
+ toolkit_names.append(name)
966
+ except Exception:
967
+ pass
968
+
969
+ return toolkit_names
970
+
971
+
972
+ def _find_toolkit_by_name(config, toolkit_name: str) -> Optional[Dict[str, Any]]:
973
+ """
974
+ Find a toolkit by name in $ALITA_DIR/tools.
975
+
976
+ Args:
977
+ config: CLI configuration
978
+ toolkit_name: Name of the toolkit to find (case-insensitive)
979
+
980
+ Returns:
981
+ Toolkit config dict or None if not found
982
+ """
983
+ tools_dir = Path(config.tools_dir)
984
+
985
+ if not tools_dir.exists():
986
+ return None
987
+
988
+ toolkit_name_lower = toolkit_name.lower()
989
+
990
+ for pattern in ['*.json', '*.yaml', '*.yml']:
991
+ for file_path in tools_dir.glob(pattern):
992
+ try:
993
+ config_data = load_toolkit_config(str(file_path))
994
+ name = config_data.get('toolkit_name') or config_data.get('name') or file_path.stem
995
+
996
+ # Match by name (case-insensitive) or file stem
997
+ if name.lower() == toolkit_name_lower or file_path.stem.lower() == toolkit_name_lower:
998
+ return {
999
+ 'file': str(file_path),
1000
+ 'name': name,
1001
+ 'type': config_data.get('toolkit_type') or config_data.get('type', 'unknown'),
1002
+ 'config': config_data
1003
+ }
1004
+ except Exception:
1005
+ pass
1006
+
1007
+ return None
1008
+
1009
+
399
1010
  def _select_agent_interactive(client, config) -> Optional[str]:
400
1011
  """
401
1012
  Show interactive menu to select an agent from platform and local agents.
402
1013
 
403
1014
  Returns:
404
- Agent source (name/id for platform, file path for local, '__direct__' for direct chat) or None if cancelled
1015
+ Agent source (name/id for platform, file path for local, '__direct__' for direct chat,
1016
+ '__inventory__' for inventory agent) or None if cancelled
405
1017
  """
406
1018
  from .config import CLIConfig
407
1019
 
408
1020
  console.print("\n🤖 [bold cyan]Select an agent to chat with:[/bold cyan]\n")
409
1021
 
410
- # First option: Alita (direct LLM chat, no agent)
1022
+ # Built-in agents
411
1023
  console.print(f"1. [[bold]💬 Alita[/bold]] [cyan]Chat directly with LLM (no agent)[/cyan]")
412
1024
  console.print(f" [dim]Direct conversation with the model without agent configuration[/dim]")
1025
+ console.print(f"2. [[bold]📊 Inventory[/bold]] [cyan]Knowledge graph builder agent[/cyan]")
1026
+ console.print(f" [dim]Build inventories from connected toolkits (use --toolkit-config to add sources)[/dim]")
413
1027
 
414
1028
  agents_list = []
415
1029
 
@@ -444,8 +1058,8 @@ def _select_agent_interactive(client, config) -> Optional[str]:
444
1058
  except Exception as e:
445
1059
  logger.debug(f"Failed to load {file_path}: {e}")
446
1060
 
447
- # Display agents with numbers using rich (starting from 2 since 1 is direct chat)
448
- for i, agent in enumerate(agents_list, 2):
1061
+ # Display agents with numbers using rich (starting from 3 since 1-2 are built-in)
1062
+ for i, agent in enumerate(agents_list, 3):
449
1063
  agent_type = "📦 Platform" if agent['type'] == 'platform' else "📁 Local"
450
1064
  console.print(f"{i}. [[bold]{agent_type}[/bold]] [cyan]{agent['name']}[/cyan]")
451
1065
  if agent['description']:
@@ -465,13 +1079,17 @@ def _select_agent_interactive(client, config) -> Optional[str]:
465
1079
  console.print(f"✓ [green]Selected:[/green] [bold]Alita[/bold]")
466
1080
  return '__direct__'
467
1081
 
468
- idx = int(choice) - 2 # Offset by 2 since 1 is direct chat
1082
+ if choice == '2':
1083
+ console.print(f"✓ [green]Selected:[/green] [bold]Inventory[/bold]")
1084
+ return '__inventory__'
1085
+
1086
+ idx = int(choice) - 3 # Offset by 3 since 1-2 are built-in agents
469
1087
  if 0 <= idx < len(agents_list):
470
1088
  selected = agents_list[idx]
471
1089
  console.print(f"✓ [green]Selected:[/green] [bold]{selected['name']}[/bold]")
472
1090
  return selected['source']
473
1091
  else:
474
- console.print(f"[yellow]Invalid selection. Please enter a number between 0 and {len(agents_list) + 1}[/yellow]")
1092
+ console.print(f"[yellow]Invalid selection. Please enter a number between 0 and {len(agents_list) + 2}[/yellow]")
475
1093
  except ValueError:
476
1094
  console.print("[yellow]Please enter a valid number[/yellow]")
477
1095
  except (KeyboardInterrupt, EOFError):
@@ -730,6 +1348,8 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
730
1348
  @click.option('--version', help='Agent version (for platform agents)')
731
1349
  @click.option('--toolkit-config', multiple=True, type=click.Path(exists=True),
732
1350
  help='Toolkit configuration files (can specify multiple)')
1351
+ @click.option('--inventory', 'inventory_path', type=str,
1352
+ help='Load inventory/knowledge graph from JSON file (e.g., alita.json or .alita/inventory/alita.json)')
733
1353
  @click.option('--thread-id', help='Continue existing conversation thread')
734
1354
  @click.option('--model', help='Override LLM model')
735
1355
  @click.option('--temperature', type=float, help='Override temperature')
@@ -738,49 +1358,27 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
738
1358
  help='Grant agent filesystem access to this directory')
739
1359
  @click.option('--verbose', '-v', type=click.Choice(['quiet', 'default', 'debug']), default='default',
740
1360
  help='Output verbosity level: quiet (final output only), default (tool calls + outputs), debug (all including LLM calls)')
1361
+ @click.option('--recursion-limit', type=int, default=50,
1362
+ help='Maximum number of tool execution steps per turn')
741
1363
  @click.pass_context
742
1364
  def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
743
- toolkit_config: tuple, thread_id: Optional[str],
1365
+ toolkit_config: tuple, inventory_path: Optional[str], thread_id: Optional[str],
744
1366
  model: Optional[str], temperature: Optional[float],
745
1367
  max_tokens: Optional[int], work_dir: Optional[str],
746
- verbose: str):
747
- """
748
- Start interactive chat with an agent.
749
-
750
- If AGENT_SOURCE is not provided, shows an interactive menu to select from
751
- available agents (both platform and local).
752
-
753
- AGENT_SOURCE can be:
754
- - Platform agent ID or name
755
- - Path to local agent file
1368
+ verbose: str, recursion_limit: Optional[int]):
1369
+ """Start interactive chat with an agent.
756
1370
 
1371
+ \b
757
1372
  Examples:
758
-
759
- # Interactive selection
760
- alita-cli agent chat
761
-
762
- # Chat with platform agent
763
- alita-cli agent chat my-agent
764
-
765
- # Chat with local agent
766
- alita-cli agent chat .github/agents/sdk-dev.agent.md
767
-
768
- # With toolkit configurations
769
- alita-cli agent chat my-agent \\
770
- --toolkit-config jira-config.json \\
771
- --toolkit-config github-config.json
772
-
773
- # With filesystem access
774
- alita-cli agent chat my-agent --dir ./workspace
775
-
776
- # Continue previous conversation
777
- alita-cli agent chat my-agent --thread-id abc123
778
-
779
- # Quiet mode (hide tool calls and thinking)
780
- alita-cli agent chat my-agent --verbose quiet
781
-
782
- # Debug mode (show all including LLM calls)
783
- alita-cli agent chat my-agent --verbose debug
1373
+ alita chat # Interactive agent selection
1374
+ alita chat my-agent # Chat with platform agent
1375
+ alita chat ./agent.md # Chat with local agent file
1376
+ alita chat --inventory alita.json
1377
+ alita chat my-agent --dir ./src
1378
+ alita chat my-agent --thread-id abc123
1379
+ alita chat my-agent -v quiet # Hide tool calls
1380
+ alita chat my-agent -v debug # Show all LLM calls
1381
+ alita chat __inventory__ --toolkit-config jira.json
784
1382
  """
785
1383
  formatter = ctx.obj['formatter']
786
1384
  config = ctx.obj['config']
@@ -795,9 +1393,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
795
1393
  if not agent_source:
796
1394
  agent_source = '__direct__'
797
1395
 
798
- # Check for direct chat mode
1396
+ # Check for built-in agent modes
799
1397
  is_direct = agent_source == '__direct__'
800
- is_local = not is_direct and Path(agent_source).exists()
1398
+ is_inventory = agent_source == '__inventory__'
1399
+ is_builtin = is_direct or is_inventory
1400
+ is_local = not is_builtin and Path(agent_source).exists()
1401
+
1402
+ # Get defaults from config
1403
+ default_model = config.default_model or 'gpt-4o'
1404
+ default_temperature = config.default_temperature if config.default_temperature is not None else 0.1
1405
+ default_max_tokens = config.default_max_tokens or 4096
801
1406
 
802
1407
  # Initialize variables for dynamic updates
803
1408
  current_model = model
@@ -814,9 +1419,19 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
814
1419
  planning_tools = None
815
1420
  plan_state = None
816
1421
 
1422
+ # Handle --inventory option: add inventory toolkit config at startup
1423
+ if inventory_path:
1424
+ inventory_config = _build_inventory_config(inventory_path, work_dir)
1425
+ if inventory_config:
1426
+ added_toolkit_configs.append(inventory_config)
1427
+ console.print(f"[dim]✓ Loading inventory: {inventory_config['toolkit_name']} ({inventory_config['graph_path']})[/dim]")
1428
+ else:
1429
+ console.print(f"[yellow]Warning: Inventory file not found: {inventory_path}[/yellow]")
1430
+ console.print("[dim]Searched in current directory and .alita/inventory/[/dim]")
1431
+
817
1432
  # Approval mode: 'always' (confirm each tool), 'auto' (no confirmation), 'yolo' (no safety checks)
818
1433
  approval_mode = 'always'
819
- current_work_dir = work_dir # Track work_dir for /dir command
1434
+ allowed_directories = [work_dir] if work_dir else [] # Track allowed directories for /dir command
820
1435
  current_agent_file = agent_source if is_local else None # Track agent file for /reload command
821
1436
 
822
1437
  if is_direct:
@@ -825,11 +1440,27 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
825
1440
  agent_type = "Direct LLM"
826
1441
  alita_prompt = _get_alita_system_prompt(config)
827
1442
  agent_def = {
828
- 'model': model or 'gpt-5',
829
- 'temperature': temperature if temperature is not None else 0.1,
830
- 'max_tokens': max_tokens or 4096,
1443
+ 'model': model or default_model,
1444
+ 'temperature': temperature if temperature is not None else default_temperature,
1445
+ 'max_tokens': max_tokens or default_max_tokens,
831
1446
  'system_prompt': alita_prompt
832
1447
  }
1448
+ elif is_inventory:
1449
+ # Inventory agent mode - knowledge graph builder with inventory toolkit
1450
+ agent_name = "Inventory"
1451
+ agent_type = "Built-in Agent"
1452
+ inventory_prompt = _get_inventory_system_prompt(config)
1453
+ agent_def = {
1454
+ 'name': 'inventory-agent',
1455
+ 'model': model or default_model,
1456
+ 'temperature': temperature if temperature is not None else 0.3,
1457
+ 'max_tokens': max_tokens or default_max_tokens,
1458
+ 'system_prompt': inventory_prompt,
1459
+ # Include inventory toolkit by default
1460
+ 'toolkit_configs': [
1461
+ {'type': 'inventory', 'graph_path': './knowledge_graph.json'}
1462
+ ]
1463
+ }
833
1464
  elif is_local:
834
1465
  agent_def = load_agent_definition(agent_source)
835
1466
  agent_name = agent_def.get('name', Path(agent_source).stem)
@@ -852,8 +1483,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
852
1483
  agent_type = "Platform Agent"
853
1484
 
854
1485
  # Get model and temperature for welcome banner
855
- llm_model_display = current_model or agent_def.get('model', 'gpt-4o')
856
- llm_temperature_display = current_temperature if current_temperature is not None else agent_def.get('temperature', 0.1)
1486
+ llm_model_display = current_model or agent_def.get('model', default_model)
1487
+ llm_temperature_display = current_temperature if current_temperature is not None else agent_def.get('temperature', default_temperature)
857
1488
 
858
1489
  # Print nice welcome banner
859
1490
  print_welcome(agent_name, llm_model_display, llm_temperature_display, approval_mode)
@@ -862,27 +1493,64 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
862
1493
  chat_history = []
863
1494
 
864
1495
  # Initialize session for persistence (memory + plan)
865
- from .tools import generate_session_id, create_session_memory, save_session_metadata
1496
+ from .tools import generate_session_id, create_session_memory, save_session_metadata, to_portable_path
866
1497
  current_session_id = generate_session_id()
867
1498
  plan_state = {'session_id': current_session_id}
868
1499
 
869
1500
  # Create persistent memory for agent (stored in session directory)
870
1501
  memory = create_session_memory(current_session_id)
871
1502
 
872
- # Save session metadata
1503
+ # Save session metadata with agent source for session resume
1504
+ agent_source_portable = to_portable_path(current_agent_file) if current_agent_file else None
1505
+ # Filter out transient inventory configs (dicts) - only save file paths
1506
+ serializable_toolkit_configs = [tc for tc in added_toolkit_configs if isinstance(tc, str)]
1507
+ # Extract inventory graph path if present
1508
+ inventory_graph = None
1509
+ for tc in added_toolkit_configs:
1510
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
1511
+ inventory_graph = tc.get('graph_path')
1512
+ break
873
1513
  save_session_metadata(current_session_id, {
874
1514
  'agent_name': agent_name,
875
1515
  'agent_type': agent_type if 'agent_type' in dir() else 'Direct LLM',
1516
+ 'agent_source': agent_source_portable,
876
1517
  'model': llm_model_display,
877
1518
  'temperature': llm_temperature_display,
878
1519
  'work_dir': work_dir,
879
1520
  'is_direct': is_direct,
880
1521
  'is_local': is_local,
1522
+ 'is_inventory': is_inventory,
1523
+ 'added_toolkit_configs': serializable_toolkit_configs,
1524
+ 'inventory_graph': inventory_graph,
1525
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
881
1526
  })
882
1527
  console.print(f"[dim]Session: {current_session_id}[/dim]")
883
1528
 
1529
+ # Initialize context manager for chat history management
1530
+ context_config = config.context_management
1531
+ ctx_manager = CLIContextManager(
1532
+ session_id=current_session_id,
1533
+ max_context_tokens=context_config.get('max_context_tokens', 8000),
1534
+ preserve_recent=context_config.get('preserve_recent_messages', 5),
1535
+ pruning_method=context_config.get('pruning_method', 'oldest_first'),
1536
+ enable_summarization=context_config.get('enable_summarization', True),
1537
+ summary_trigger_ratio=context_config.get('summary_trigger_ratio', 0.8),
1538
+ summaries_limit=context_config.get('summaries_limit_count', 5),
1539
+ llm=None # Will be set after LLM creation
1540
+ )
1541
+
1542
+ # Purge old sessions on startup (cleanup task)
1543
+ try:
1544
+ purge_context_sessions(
1545
+ sessions_dir=config.sessions_dir,
1546
+ max_age_days=context_config.get('session_max_age_days', 30),
1547
+ max_sessions=context_config.get('max_sessions', 100)
1548
+ )
1549
+ except Exception as e:
1550
+ logger.debug(f"Session cleanup failed: {e}")
1551
+
884
1552
  # Create agent executor
885
- if is_direct or is_local:
1553
+ if is_direct or is_local or is_inventory:
886
1554
  # Setup local agent executor (handles all config, tools, MCP, etc.)
887
1555
  try:
888
1556
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
@@ -916,25 +1584,53 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
916
1584
  )
917
1585
  llm = None # Platform agents don't use direct LLM
918
1586
 
1587
+ # Set LLM on context manager for summarization
1588
+ if llm is not None:
1589
+ ctx_manager.llm = llm
1590
+
919
1591
  # Initialize input handler for readline support
920
1592
  input_handler = get_input_handler()
921
1593
 
1594
+ # Set up toolkit names callback for tab completion
1595
+ from .input_handler import set_toolkit_names_callback, set_inventory_files_callback
1596
+ set_toolkit_names_callback(lambda: _list_available_toolkits(config))
1597
+
1598
+ # Set up inventory files callback for /inventory tab completion
1599
+ set_inventory_files_callback(lambda: _get_inventory_json_files(allowed_directories[0] if allowed_directories else None))
1600
+
922
1601
  # Interactive chat loop
923
1602
  while True:
924
1603
  try:
1604
+ # Get context info for the UI indicator
1605
+ context_info = ctx_manager.get_context_info()
1606
+
925
1607
  # Get input with styled prompt (prompt is part of input() for proper readline handling)
926
- user_input = styled_input().strip()
1608
+ user_input = styled_input(context_info=context_info).strip()
927
1609
 
928
1610
  if not user_input:
929
1611
  continue
930
1612
 
931
1613
  # Handle commands
932
1614
  if user_input.lower() in ['exit', 'quit']:
1615
+ # Save final session state before exiting
1616
+ try:
1617
+ from .tools import update_session_metadata, to_portable_path
1618
+ update_session_metadata(current_session_id, {
1619
+ 'agent_source': to_portable_path(current_agent_file) if current_agent_file else None,
1620
+ 'model': current_model or llm_model_display,
1621
+ 'temperature': current_temperature if current_temperature is not None else llm_temperature_display,
1622
+ 'allowed_directories': allowed_directories,
1623
+ 'added_toolkit_configs': list(added_toolkit_configs),
1624
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
1625
+ })
1626
+ except Exception as e:
1627
+ logger.debug(f"Failed to save session state on exit: {e}")
933
1628
  console.print("\n[bold cyan]👋 Goodbye![/bold cyan]\n")
934
1629
  break
935
1630
 
936
1631
  if user_input == '/clear':
937
1632
  chat_history = []
1633
+ ctx_manager.clear()
938
1634
  console.print("[green]✓ Conversation history cleared.[/green]")
939
1635
  continue
940
1636
 
@@ -947,7 +1643,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
947
1643
  role = msg.get('role', 'unknown')
948
1644
  content = msg.get('content', '')
949
1645
  role_color = 'blue' if role == 'user' else 'green'
950
- console.print(f"\n[bold {role_color}]{i}. {role.upper()}:[/bold {role_color}] {content[:100]}...")
1646
+ included_marker = "" if ctx_manager.is_message_included(i - 1) else " [dim](pruned)[/dim]"
1647
+ console.print(f"\n[bold {role_color}]{i}. {role.upper()}:[/bold {role_color}] {content[:100]}...{included_marker}")
951
1648
  continue
952
1649
 
953
1650
  if user_input == '/save':
@@ -975,12 +1672,17 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
975
1672
  agent_def['model'] = current_model
976
1673
 
977
1674
  # Recreate LLM and agent executor - use session memory to preserve history
978
- from .tools import create_session_memory
1675
+ from .tools import create_session_memory, update_session_metadata
979
1676
  memory = create_session_memory(current_session_id)
980
1677
  try:
981
1678
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
982
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1679
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
983
1680
  )
1681
+ # Persist model change to session
1682
+ update_session_metadata(current_session_id, {
1683
+ 'model': current_model,
1684
+ 'temperature': current_temperature if current_temperature is not None else agent_def.get('temperature', 0.7)
1685
+ })
984
1686
  console.print(Panel(
985
1687
  f"[cyan]ℹ Model switched to [bold]{current_model}[/bold]. Agent state reset, chat history preserved.[/cyan]",
986
1688
  border_style="cyan",
@@ -993,8 +1695,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
993
1695
  # /reload command - reload agent definition from file
994
1696
  if user_input == '/reload':
995
1697
  if not is_local:
996
- if is_direct:
997
- console.print("[yellow]Cannot reload direct chat mode - no agent file to reload.[/yellow]")
1698
+ if is_direct or is_inventory:
1699
+ console.print("[yellow]Cannot reload built-in agent mode - no agent file to reload.[/yellow]")
998
1700
  else:
999
1701
  console.print("[yellow]Reload is only available for local agents (file-based).[/yellow]")
1000
1702
  continue
@@ -1036,7 +1738,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1036
1738
  from .tools import create_session_memory
1037
1739
  memory = create_session_memory(current_session_id)
1038
1740
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1039
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1741
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1040
1742
  )
1041
1743
 
1042
1744
  # Show what changed
@@ -1063,8 +1765,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1063
1765
 
1064
1766
  # /add_mcp command - add MCP server
1065
1767
  if user_input == '/add_mcp':
1066
- if not (is_direct or is_local):
1067
- console.print("[yellow]Adding MCP is only available for local agents and direct chat.[/yellow]")
1768
+ if not (is_direct or is_local or is_inventory):
1769
+ console.print("[yellow]Adding MCP is only available for local agents and built-in agents.[/yellow]")
1068
1770
  continue
1069
1771
 
1070
1772
  selected_mcp = _select_mcp_interactive(config)
@@ -1077,12 +1779,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1077
1779
  agent_def['mcps'].append(mcp_name)
1078
1780
 
1079
1781
  # Recreate agent executor with new MCP - use session memory to preserve history
1080
- from .tools import create_session_memory
1782
+ from .tools import create_session_memory, update_session_metadata
1081
1783
  memory = create_session_memory(current_session_id)
1082
1784
  try:
1083
1785
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1084
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1786
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1085
1787
  )
1788
+ # Persist added MCPs to session
1789
+ update_session_metadata(current_session_id, {
1790
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]
1791
+ })
1086
1792
  console.print(Panel(
1087
1793
  f"[cyan]ℹ Added MCP: [bold]{mcp_name}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1088
1794
  border_style="cyan",
@@ -1093,12 +1799,27 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1093
1799
  continue
1094
1800
 
1095
1801
  # /add_toolkit command - add toolkit
1096
- if user_input == '/add_toolkit':
1097
- if not (is_direct or is_local):
1098
- console.print("[yellow]Adding toolkit is only available for local agents and direct chat.[/yellow]")
1802
+ if user_input == '/add_toolkit' or user_input.startswith('/add_toolkit '):
1803
+ if not (is_direct or is_local or is_inventory):
1804
+ console.print("[yellow]Adding toolkit is only available for local agents and built-in agents.[/yellow]")
1099
1805
  continue
1100
1806
 
1101
- selected_toolkit = _select_toolkit_interactive(config)
1807
+ parts = user_input.split(maxsplit=1)
1808
+ if len(parts) == 2:
1809
+ # Direct toolkit selection by name
1810
+ toolkit_name_arg = parts[1].strip()
1811
+ selected_toolkit = _find_toolkit_by_name(config, toolkit_name_arg)
1812
+ if not selected_toolkit:
1813
+ console.print(f"[yellow]Toolkit '{toolkit_name_arg}' not found.[/yellow]")
1814
+ # Show available toolkits
1815
+ available = _list_available_toolkits(config)
1816
+ if available:
1817
+ console.print(f"[dim]Available toolkits: {', '.join(available)}[/dim]")
1818
+ continue
1819
+ else:
1820
+ # Interactive selection
1821
+ selected_toolkit = _select_toolkit_interactive(config)
1822
+
1102
1823
  if selected_toolkit:
1103
1824
  toolkit_name = selected_toolkit['name']
1104
1825
  toolkit_file = selected_toolkit['file']
@@ -1108,12 +1829,16 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1108
1829
  added_toolkit_configs.append(toolkit_file)
1109
1830
 
1110
1831
  # Recreate agent executor with new toolkit - use session memory to preserve history
1111
- from .tools import create_session_memory
1832
+ from .tools import create_session_memory, update_session_metadata
1112
1833
  memory = create_session_memory(current_session_id)
1113
1834
  try:
1114
1835
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1115
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
1836
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1116
1837
  )
1838
+ # Persist added toolkits to session
1839
+ update_session_metadata(current_session_id, {
1840
+ 'added_toolkit_configs': list(added_toolkit_configs)
1841
+ })
1117
1842
  console.print(Panel(
1118
1843
  f"[cyan]ℹ Added toolkit: [bold]{toolkit_name}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1119
1844
  border_style="cyan",
@@ -1123,24 +1848,163 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1123
1848
  console.print(f"[red]Error adding toolkit: {e}[/red]")
1124
1849
  continue
1125
1850
 
1126
- # /mode command - set approval mode
1127
- if user_input == '/mode' or user_input.startswith('/mode '):
1851
+ # /rm_mcp command - remove MCP server
1852
+ if user_input == '/rm_mcp' or user_input.startswith('/rm_mcp '):
1853
+ if not (is_direct or is_local or is_inventory):
1854
+ console.print("[yellow]Removing MCP is only available for local agents and built-in agents.[/yellow]")
1855
+ continue
1856
+
1857
+ current_mcps = agent_def.get('mcps', [])
1858
+ if not current_mcps:
1859
+ console.print("[yellow]No MCP servers are currently loaded.[/yellow]")
1860
+ continue
1861
+
1862
+ # Get list of MCP names
1863
+ mcp_names = [m if isinstance(m, str) else m.get('name') for m in current_mcps]
1864
+
1128
1865
  parts = user_input.split(maxsplit=1)
1129
- if len(parts) == 1:
1130
- # Show current mode and options
1131
- mode_info = {
1132
- 'always': ('yellow', 'Confirm before each tool execution'),
1133
- 'auto': ('green', 'Execute tools without confirmation'),
1134
- 'yolo': ('red', 'No confirmations, skip safety warnings')
1135
- }
1136
- console.print("\n🔧 [bold cyan]Approval Mode:[/bold cyan]\n")
1137
- for mode_name, (color, desc) in mode_info.items():
1138
- marker = "●" if mode_name == approval_mode else "○"
1139
- console.print(f" [{color}]{marker}[/{color}] [bold]{mode_name}[/bold] - {desc}")
1140
- console.print(f"\n[dim]Usage: /mode <always|auto|yolo>[/dim]")
1866
+ if len(parts) == 2:
1867
+ # Direct removal by name
1868
+ mcp_name_to_remove = parts[1].strip()
1869
+ if mcp_name_to_remove not in mcp_names:
1870
+ console.print(f"[yellow]MCP '{mcp_name_to_remove}' not found.[/yellow]")
1871
+ console.print(f"[dim]Loaded MCPs: {', '.join(mcp_names)}[/dim]")
1872
+ continue
1141
1873
  else:
1142
- new_mode = parts[1].lower().strip()
1143
- if new_mode in ['always', 'auto', 'yolo']:
1874
+ # Interactive selection
1875
+ console.print("\n🔌 [bold cyan]Remove MCP Server[/bold cyan]\n")
1876
+ for i, name in enumerate(mcp_names, 1):
1877
+ console.print(f" [bold]{i}[/bold]. {name}")
1878
+ console.print(f" [bold]0[/bold]. [dim]Cancel[/dim]")
1879
+ console.print()
1880
+
1881
+ try:
1882
+ choice = int(input("Select MCP to remove: ").strip())
1883
+ if choice == 0:
1884
+ continue
1885
+ if 1 <= choice <= len(mcp_names):
1886
+ mcp_name_to_remove = mcp_names[choice - 1]
1887
+ else:
1888
+ console.print("[yellow]Invalid selection.[/yellow]")
1889
+ continue
1890
+ except (ValueError, KeyboardInterrupt):
1891
+ continue
1892
+
1893
+ # Remove the MCP
1894
+ agent_def['mcps'] = [m for m in current_mcps if (m if isinstance(m, str) else m.get('name')) != mcp_name_to_remove]
1895
+
1896
+ # Recreate agent executor without the MCP
1897
+ from .tools import create_session_memory, update_session_metadata
1898
+ memory = create_session_memory(current_session_id)
1899
+ try:
1900
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1901
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1902
+ )
1903
+ # Persist updated MCPs to session
1904
+ update_session_metadata(current_session_id, {
1905
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]
1906
+ })
1907
+ console.print(Panel(
1908
+ f"[cyan]ℹ Removed MCP: [bold]{mcp_name_to_remove}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1909
+ border_style="cyan",
1910
+ box=box.ROUNDED
1911
+ ))
1912
+ except Exception as e:
1913
+ console.print(f"[red]Error removing MCP: {e}[/red]")
1914
+ continue
1915
+
1916
+ # /rm_toolkit command - remove toolkit
1917
+ if user_input == '/rm_toolkit' or user_input.startswith('/rm_toolkit '):
1918
+ if not (is_direct or is_local or is_inventory):
1919
+ console.print("[yellow]Removing toolkit is only available for local agents and built-in agents.[/yellow]")
1920
+ continue
1921
+
1922
+ if not added_toolkit_configs:
1923
+ console.print("[yellow]No toolkits are currently loaded.[/yellow]")
1924
+ continue
1925
+
1926
+ # Get toolkit names from config files
1927
+ toolkit_info = [] # List of (name, file_path)
1928
+ for toolkit_file in added_toolkit_configs:
1929
+ try:
1930
+ with open(toolkit_file, 'r') as f:
1931
+ tk_config = json.load(f)
1932
+ tk_name = tk_config.get('toolkit_name', Path(toolkit_file).stem)
1933
+ toolkit_info.append((tk_name, toolkit_file))
1934
+ except Exception:
1935
+ toolkit_info.append((Path(toolkit_file).stem, toolkit_file))
1936
+
1937
+ parts = user_input.split(maxsplit=1)
1938
+ if len(parts) == 2:
1939
+ # Direct removal by name
1940
+ toolkit_name_to_remove = parts[1].strip()
1941
+ matching = [(name, path) for name, path in toolkit_info if name == toolkit_name_to_remove]
1942
+ if not matching:
1943
+ console.print(f"[yellow]Toolkit '{toolkit_name_to_remove}' not found.[/yellow]")
1944
+ console.print(f"[dim]Loaded toolkits: {', '.join(name for name, _ in toolkit_info)}[/dim]")
1945
+ continue
1946
+ toolkit_file_to_remove = matching[0][1]
1947
+ else:
1948
+ # Interactive selection
1949
+ console.print("\n🔧 [bold cyan]Remove Toolkit[/bold cyan]\n")
1950
+ for i, (name, _) in enumerate(toolkit_info, 1):
1951
+ console.print(f" [bold]{i}[/bold]. {name}")
1952
+ console.print(f" [bold]0[/bold]. [dim]Cancel[/dim]")
1953
+ console.print()
1954
+
1955
+ try:
1956
+ choice = int(input("Select toolkit to remove: ").strip())
1957
+ if choice == 0:
1958
+ continue
1959
+ if 1 <= choice <= len(toolkit_info):
1960
+ toolkit_name_to_remove, toolkit_file_to_remove = toolkit_info[choice - 1]
1961
+ else:
1962
+ console.print("[yellow]Invalid selection.[/yellow]")
1963
+ continue
1964
+ except (ValueError, KeyboardInterrupt):
1965
+ continue
1966
+
1967
+ # Remove the toolkit
1968
+ added_toolkit_configs.remove(toolkit_file_to_remove)
1969
+
1970
+ # Recreate agent executor without the toolkit
1971
+ from .tools import create_session_memory, update_session_metadata
1972
+ memory = create_session_memory(current_session_id)
1973
+ try:
1974
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1975
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1976
+ )
1977
+ # Persist updated toolkits to session
1978
+ update_session_metadata(current_session_id, {
1979
+ 'added_toolkit_configs': list(added_toolkit_configs)
1980
+ })
1981
+ console.print(Panel(
1982
+ f"[cyan]ℹ Removed toolkit: [bold]{toolkit_name_to_remove}[/bold]. Agent state reset, chat history preserved.[/cyan]",
1983
+ border_style="cyan",
1984
+ box=box.ROUNDED
1985
+ ))
1986
+ except Exception as e:
1987
+ console.print(f"[red]Error removing toolkit: {e}[/red]")
1988
+ continue
1989
+
1990
+ # /mode command - set approval mode
1991
+ if user_input == '/mode' or user_input.startswith('/mode '):
1992
+ parts = user_input.split(maxsplit=1)
1993
+ if len(parts) == 1:
1994
+ # Show current mode and options
1995
+ mode_info = {
1996
+ 'always': ('yellow', 'Confirm before each tool execution'),
1997
+ 'auto': ('green', 'Execute tools without confirmation'),
1998
+ 'yolo': ('red', 'No confirmations, skip safety warnings')
1999
+ }
2000
+ console.print("\n🔧 [bold cyan]Approval Mode:[/bold cyan]\n")
2001
+ for mode_name, (color, desc) in mode_info.items():
2002
+ marker = "●" if mode_name == approval_mode else "○"
2003
+ console.print(f" [{color}]{marker}[/{color}] [bold]{mode_name}[/bold] - {desc}")
2004
+ console.print(f"\n[dim]Usage: /mode <always|auto|yolo>[/dim]")
2005
+ else:
2006
+ new_mode = parts[1].lower().strip()
2007
+ if new_mode in ['always', 'auto', 'yolo']:
1144
2008
  approval_mode = new_mode
1145
2009
  mode_colors = {'always': 'yellow', 'auto': 'green', 'yolo': 'red'}
1146
2010
  console.print(f"✓ [green]Mode set to[/green] [{mode_colors[new_mode]}][bold]{new_mode}[/bold][/{mode_colors[new_mode]}]")
@@ -1148,44 +2012,214 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1148
2012
  console.print(f"[yellow]Unknown mode: {new_mode}. Use: always, auto, or yolo[/yellow]")
1149
2013
  continue
1150
2014
 
1151
- # /dir command - mount workspace directory
2015
+ # /dir command - manage allowed directories
1152
2016
  if user_input == '/dir' or user_input.startswith('/dir '):
1153
- parts = user_input.split(maxsplit=1)
2017
+ parts = user_input.split()
2018
+
1154
2019
  if len(parts) == 1:
1155
- if current_work_dir:
1156
- console.print(f"📁 [bold cyan]Current workspace:[/bold cyan] {current_work_dir}")
2020
+ # /dir - list all allowed directories
2021
+ if allowed_directories:
2022
+ console.print("📁 [bold cyan]Allowed directories:[/bold cyan]")
2023
+ for i, d in enumerate(allowed_directories):
2024
+ marker = "●" if i == 0 else "○"
2025
+ label = " [dim](primary)[/dim]" if i == 0 else ""
2026
+ console.print(f" {marker} {d}{label}")
1157
2027
  else:
1158
- console.print("[yellow]No workspace mounted. Usage: /dir /path/to/workspace[/yellow]")
2028
+ console.print("[yellow]No directories allowed.[/yellow]")
2029
+ console.print("[dim]Usage: /dir [add|rm|remove] /path/to/directory[/dim]")
2030
+ continue
2031
+
2032
+ action = parts[1].lower()
2033
+
2034
+ # Handle /dir add /path or /dir /path (add is default)
2035
+ if action in ['add', 'rm', 'remove']:
2036
+ if len(parts) < 3:
2037
+ console.print(f"[yellow]Missing path. Usage: /dir {action} /path/to/directory[/yellow]")
2038
+ continue
2039
+ dir_path = parts[2]
1159
2040
  else:
1160
- new_dir = parts[1].strip()
1161
- new_dir_path = Path(new_dir).expanduser().resolve()
2041
+ # /dir /path - default to add
2042
+ action = 'add'
2043
+ dir_path = parts[1]
2044
+
2045
+ dir_path = str(Path(dir_path).expanduser().resolve())
2046
+
2047
+ if action == 'add':
2048
+ if not Path(dir_path).exists():
2049
+ console.print(f"[red]Directory not found: {dir_path}[/red]")
2050
+ continue
2051
+ if not Path(dir_path).is_dir():
2052
+ console.print(f"[red]Not a directory: {dir_path}[/red]")
2053
+ continue
1162
2054
 
1163
- if not new_dir_path.exists():
1164
- console.print(f"[red]Directory not found: {new_dir}[/red]")
2055
+ if dir_path in allowed_directories:
2056
+ console.print(f"[yellow]Directory already allowed: {dir_path}[/yellow]")
1165
2057
  continue
1166
- if not new_dir_path.is_dir():
1167
- console.print(f"[red]Not a directory: {new_dir}[/red]")
2058
+
2059
+ allowed_directories.append(dir_path)
2060
+
2061
+ # Recreate agent executor with updated directories
2062
+ if is_direct or is_local or is_inventory:
2063
+ from .tools import create_session_memory
2064
+ memory = create_session_memory(current_session_id)
2065
+ try:
2066
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2067
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2068
+ )
2069
+ console.print(Panel(
2070
+ f"[cyan]✓ Added directory: [bold]{dir_path}[/bold]\n Total allowed: {len(allowed_directories)}[/cyan]",
2071
+ border_style="cyan",
2072
+ box=box.ROUNDED
2073
+ ))
2074
+ except Exception as e:
2075
+ allowed_directories.remove(dir_path) # Rollback
2076
+ console.print(f"[red]Error adding directory: {e}[/red]")
2077
+ else:
2078
+ console.print("[yellow]Directory mounting is only available for local agents and built-in agents.[/yellow]")
2079
+
2080
+ elif action in ['rm', 'remove']:
2081
+ if dir_path not in allowed_directories:
2082
+ console.print(f"[yellow]Directory not in allowed list: {dir_path}[/yellow]")
2083
+ if allowed_directories:
2084
+ console.print("[dim]Currently allowed:[/dim]")
2085
+ for d in allowed_directories:
2086
+ console.print(f"[dim] - {d}[/dim]")
2087
+ continue
2088
+
2089
+ if len(allowed_directories) == 1:
2090
+ console.print("[yellow]Cannot remove the last directory. Use /dir add first to add another.[/yellow]")
1168
2091
  continue
1169
2092
 
1170
- current_work_dir = str(new_dir_path)
2093
+ allowed_directories.remove(dir_path)
1171
2094
 
1172
- # Recreate agent executor with new work_dir - use session memory
1173
- if is_direct or is_local:
2095
+ # Recreate agent executor with updated directories
2096
+ if is_direct or is_local or is_inventory:
1174
2097
  from .tools import create_session_memory
1175
2098
  memory = create_session_memory(current_session_id)
1176
2099
  try:
1177
2100
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1178
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2101
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1179
2102
  )
1180
2103
  console.print(Panel(
1181
- f"[cyan]✓ Mounted: [bold]{current_work_dir}[/bold]\n Terminal + filesystem tools enabled.[/cyan]",
2104
+ f"[cyan]✓ Removed directory: [bold]{dir_path}[/bold]\n Remaining: {len(allowed_directories)}[/cyan]",
1182
2105
  border_style="cyan",
1183
2106
  box=box.ROUNDED
1184
2107
  ))
1185
2108
  except Exception as e:
1186
- console.print(f"[red]Error mounting directory: {e}[/red]")
2109
+ allowed_directories.append(dir_path) # Rollback
2110
+ console.print(f"[red]Error removing directory: {e}[/red]")
2111
+ else:
2112
+ console.print("[yellow]Directory mounting is only available for local agents and built-in agents.[/yellow]")
2113
+ continue
2114
+
2115
+ # /inventory command - load inventory/knowledge graph from path
2116
+ if user_input == '/inventory' or user_input.startswith('/inventory '):
2117
+ if not (is_direct or is_local or is_inventory):
2118
+ console.print("[yellow]Loading inventory is only available for local agents and built-in agents.[/yellow]")
2119
+ continue
2120
+
2121
+ parts = user_input.split(maxsplit=1)
2122
+ if len(parts) == 1:
2123
+ # Show current inventory and available files
2124
+ current_inventory = None
2125
+ for tc in added_toolkit_configs:
2126
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
2127
+ current_inventory = tc.get('graph_path')
2128
+ break
2129
+ elif isinstance(tc, str):
2130
+ try:
2131
+ with open(tc, 'r') as f:
2132
+ cfg = json.load(f)
2133
+ if cfg.get('type') == 'inventory':
2134
+ current_inventory = cfg.get('graph_path')
2135
+ break
2136
+ except Exception:
2137
+ pass
2138
+
2139
+ if current_inventory:
2140
+ console.print(f"📊 [bold cyan]Current inventory:[/bold cyan] {current_inventory}")
1187
2141
  else:
1188
- console.print("[yellow]Directory mounting is only available for local agents and direct chat.[/yellow]")
2142
+ console.print("[yellow]No inventory loaded.[/yellow]")
2143
+
2144
+ # Show available .json files
2145
+ primary_dir = allowed_directories[0] if allowed_directories else None
2146
+ available = _get_inventory_json_files(primary_dir)
2147
+ if available:
2148
+ console.print(f"[dim]Available files: {', '.join(available[:10])}")
2149
+ if len(available) > 10:
2150
+ console.print(f"[dim] ... and {len(available) - 10} more[/dim]")
2151
+ console.print("[dim]Usage: /inventory <path/to/graph.json>[/dim]")
2152
+ else:
2153
+ inventory_path = parts[1].strip()
2154
+
2155
+ # Build inventory config from path
2156
+ primary_dir = allowed_directories[0] if allowed_directories else None
2157
+ inventory_config = _build_inventory_config(inventory_path, primary_dir)
2158
+ if not inventory_config:
2159
+ console.print(f"[red]Inventory file not found: {inventory_path}[/red]")
2160
+ # Show search locations
2161
+ console.print("[dim]Searched in:[/dim]")
2162
+ console.print(f"[dim] - {Path.cwd()}[/dim]")
2163
+ console.print(f"[dim] - {Path.cwd() / '.alita' / 'inventory'}[/dim]")
2164
+ if primary_dir:
2165
+ console.print(f"[dim] - {primary_dir}[/dim]")
2166
+ console.print(f"[dim] - {Path(primary_dir) / '.alita' / 'inventory'}[/dim]")
2167
+ continue
2168
+
2169
+ # Remove any existing inventory toolkit configs
2170
+ new_toolkit_configs = []
2171
+ removed_inventory = None
2172
+ for tc in added_toolkit_configs:
2173
+ if isinstance(tc, dict) and tc.get('type') == 'inventory':
2174
+ removed_inventory = tc.get('toolkit_name', 'inventory')
2175
+ continue # Skip existing inventory
2176
+ elif isinstance(tc, str):
2177
+ try:
2178
+ with open(tc, 'r') as f:
2179
+ cfg = json.load(f)
2180
+ if cfg.get('type') == 'inventory':
2181
+ removed_inventory = cfg.get('toolkit_name', Path(tc).stem)
2182
+ continue # Skip existing inventory
2183
+ except Exception:
2184
+ pass
2185
+ new_toolkit_configs.append(tc)
2186
+
2187
+ # Add new inventory config
2188
+ new_toolkit_configs.append(inventory_config)
2189
+ added_toolkit_configs = new_toolkit_configs
2190
+
2191
+ # Recreate agent executor with new inventory
2192
+ from .tools import create_session_memory, update_session_metadata
2193
+ memory = create_session_memory(current_session_id)
2194
+ try:
2195
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2196
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2197
+ )
2198
+ # Persist updated toolkits to session (exclude transient inventory configs)
2199
+ serializable_configs = [tc for tc in added_toolkit_configs if isinstance(tc, str)]
2200
+ update_session_metadata(current_session_id, {
2201
+ 'added_toolkit_configs': serializable_configs,
2202
+ 'inventory_graph': inventory_config.get('graph_path') # Save just the graph path
2203
+ })
2204
+
2205
+ toolkit_name = inventory_config['toolkit_name']
2206
+ graph_path = inventory_config['graph_path']
2207
+ if removed_inventory:
2208
+ console.print(Panel(
2209
+ f"[cyan]ℹ Replaced inventory [bold]{removed_inventory}[/bold] with [bold]{toolkit_name}[/bold]\n"
2210
+ f" Graph: {graph_path}[/cyan]",
2211
+ border_style="cyan",
2212
+ box=box.ROUNDED
2213
+ ))
2214
+ else:
2215
+ console.print(Panel(
2216
+ f"[cyan]✓ Loaded inventory: [bold]{toolkit_name}[/bold]\n"
2217
+ f" Graph: {graph_path}[/cyan]",
2218
+ border_style="cyan",
2219
+ box=box.ROUNDED
2220
+ ))
2221
+ except Exception as e:
2222
+ console.print(f"[red]Error loading inventory: {e}[/red]")
1189
2223
  continue
1190
2224
 
1191
2225
  # /session command - list or resume sessions
@@ -1230,7 +2264,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1230
2264
 
1231
2265
  elif parts[1] == 'resume' and len(parts) > 2:
1232
2266
  session_id = parts[2].strip()
1233
- from .tools import load_session_metadata, create_session_memory
2267
+ from .tools import load_session_metadata, create_session_memory, from_portable_path
1234
2268
 
1235
2269
  # Check if session exists (either plan or metadata)
1236
2270
  loaded_state = PlanState.load(session_id)
@@ -1251,6 +2285,67 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1251
2285
  plan_state['session_id'] = session_id
1252
2286
  resume_info = ""
1253
2287
 
2288
+ # Restore agent source and reload agent definition if available
2289
+ restored_agent = False
2290
+ if session_metadata:
2291
+ agent_source = session_metadata.get('agent_source')
2292
+ if agent_source:
2293
+ agent_file_path = from_portable_path(agent_source)
2294
+ if Path(agent_file_path).exists():
2295
+ try:
2296
+ agent_def = load_agent_definition(agent_file_path)
2297
+ current_agent_file = agent_file_path
2298
+ agent_name = agent_def.get('name', Path(agent_file_path).stem)
2299
+ is_local = True
2300
+ is_direct = False
2301
+ restored_agent = True
2302
+ except Exception as e:
2303
+ console.print(f"[yellow]Warning: Could not reload agent from {agent_source}: {e}[/yellow]")
2304
+
2305
+ # Restore added toolkit configs
2306
+ restored_toolkit_configs = session_metadata.get('added_toolkit_configs', [])
2307
+ if restored_toolkit_configs:
2308
+ added_toolkit_configs.clear()
2309
+ added_toolkit_configs.extend(restored_toolkit_configs)
2310
+
2311
+ # Restore added MCPs to agent_def
2312
+ restored_mcps = session_metadata.get('added_mcps', [])
2313
+ if restored_mcps and restored_agent:
2314
+ if 'mcps' not in agent_def:
2315
+ agent_def['mcps'] = []
2316
+ for mcp_name in restored_mcps:
2317
+ if mcp_name not in [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])]:
2318
+ agent_def['mcps'].append(mcp_name)
2319
+
2320
+ # Restore model/temperature overrides
2321
+ if session_metadata.get('model'):
2322
+ current_model = session_metadata['model']
2323
+ if restored_agent:
2324
+ agent_def['model'] = current_model
2325
+ if session_metadata.get('temperature') is not None:
2326
+ current_temperature = session_metadata['temperature']
2327
+ if restored_agent:
2328
+ agent_def['temperature'] = current_temperature
2329
+
2330
+ # Restore allowed directories
2331
+ if session_metadata.get('allowed_directories'):
2332
+ allowed_directories = session_metadata['allowed_directories']
2333
+ elif session_metadata.get('work_dir'):
2334
+ # Backward compatibility with old sessions
2335
+ allowed_directories = [session_metadata['work_dir']]
2336
+
2337
+ # Reinitialize context manager with resumed session_id to load chat history
2338
+ ctx_manager = CLIContextManager(
2339
+ session_id=session_id,
2340
+ max_context_tokens=context_config.get('max_context_tokens', 8000),
2341
+ preserve_recent=context_config.get('preserve_recent_messages', 5),
2342
+ pruning_method=context_config.get('pruning_method', 'oldest_first'),
2343
+ enable_summarization=context_config.get('enable_summarization', True),
2344
+ summary_trigger_ratio=context_config.get('summary_trigger_ratio', 0.8),
2345
+ summaries_limit=context_config.get('summaries_limit_count', 5),
2346
+ llm=llm if 'llm' in dir() else None
2347
+ )
2348
+
1254
2349
  # Show session info
1255
2350
  agent_info = session_metadata.get('agent_name', 'unknown') if session_metadata else 'unknown'
1256
2351
  model_info = session_metadata.get('model', '') if session_metadata else ''
@@ -1263,14 +2358,48 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1263
2358
  box=box.ROUNDED
1264
2359
  ))
1265
2360
 
1266
- # Recreate planning tools with loaded state
1267
- if is_direct or is_local:
2361
+ # Display restored chat history
2362
+ chat_history_export = ctx_manager.export_chat_history(include_only=False)
2363
+ if chat_history_export:
2364
+ preserve_recent = context_config.get('preserve_recent_messages', 5)
2365
+ total_messages = len(chat_history_export)
2366
+
2367
+ if total_messages > preserve_recent:
2368
+ console.print(f"\n[dim]... {total_messages - preserve_recent} earlier messages in context[/dim]")
2369
+ messages_to_show = chat_history_export[-preserve_recent:]
2370
+ else:
2371
+ messages_to_show = chat_history_export
2372
+
2373
+ for msg in messages_to_show:
2374
+ role = msg.get('role', 'user')
2375
+ content = msg.get('content', '')[:200] # Truncate for display
2376
+ if len(msg.get('content', '')) > 200:
2377
+ content += '...'
2378
+ role_color = 'cyan' if role == 'user' else 'green'
2379
+ role_label = 'You' if role == 'user' else 'Assistant'
2380
+ console.print(f"[dim][{role_color}]{role_label}:[/{role_color}] {content}[/dim]")
2381
+ console.print()
2382
+
2383
+ # Recreate agent executor with restored tools if we have a local/built-in agent
2384
+ if (is_direct or is_local or is_inventory) and restored_agent:
2385
+ try:
2386
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2387
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2388
+ )
2389
+ ctx_manager.llm = llm # Update LLM for summarization
2390
+
2391
+ # Warn about MCP state loss
2392
+ if restored_mcps:
2393
+ console.print("[yellow]Note: MCP connections re-initialized (stateful server state like browser sessions are lost)[/yellow]")
2394
+ except Exception as e:
2395
+ console.print(f"[red]Error recreating agent executor: {e}[/red]")
2396
+ console.print("[yellow]Session state loaded but agent not fully restored. Some tools may not work.[/yellow]")
2397
+ elif is_direct or is_local or is_inventory:
2398
+ # Just update planning tools if we couldn't restore agent
1268
2399
  try:
1269
2400
  from .tools import get_planning_tools
1270
2401
  if loaded_state:
1271
2402
  planning_tools, _ = get_planning_tools(loaded_state)
1272
- # Note: We'd need to rebuild the agent to inject new tools
1273
- # For now, the plan state dict is updated so new tool calls will see it
1274
2403
  except Exception as e:
1275
2404
  console.print(f"[yellow]Warning: Could not reload planning tools: {e}[/yellow]")
1276
2405
  else:
@@ -1282,7 +2411,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1282
2411
  # /agent command - switch to a different agent
1283
2412
  if user_input == '/agent':
1284
2413
  selected_agent = _select_agent_interactive(client, config)
1285
- if selected_agent and selected_agent != '__direct__':
2414
+ if selected_agent and selected_agent != '__direct__' and selected_agent != '__inventory__':
1286
2415
  # Load the new agent
1287
2416
  new_is_local = Path(selected_agent).exists()
1288
2417
 
@@ -1292,6 +2421,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1292
2421
  agent_type = "Local Agent"
1293
2422
  is_local = True
1294
2423
  is_direct = False
2424
+ is_inventory = False
1295
2425
  current_agent_file = selected_agent # Track for /reload
1296
2426
  else:
1297
2427
  # Platform agent
@@ -1333,7 +2463,7 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1333
2463
  added_toolkit_configs = []
1334
2464
  try:
1335
2465
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1336
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2466
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1337
2467
  )
1338
2468
  console.print(Panel(
1339
2469
  f"[cyan]ℹ Switched to agent: [bold]{agent_name}[/bold] ({agent_type}). Agent state reset, chat history preserved.[/cyan]",
@@ -1346,21 +2476,22 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1346
2476
  # Switch back to direct mode
1347
2477
  is_direct = True
1348
2478
  is_local = False
2479
+ is_inventory = False
1349
2480
  current_agent_file = None # No file for direct mode
1350
2481
  agent_name = "Alita"
1351
2482
  agent_type = "Direct LLM"
1352
2483
  alita_prompt = _get_alita_system_prompt(config)
1353
2484
  agent_def = {
1354
- 'model': current_model or 'gpt-4o',
1355
- 'temperature': current_temperature if current_temperature is not None else 0.1,
1356
- 'max_tokens': current_max_tokens or 4096,
2485
+ 'model': current_model or default_model,
2486
+ 'temperature': current_temperature if current_temperature is not None else default_temperature,
2487
+ 'max_tokens': current_max_tokens or default_max_tokens,
1357
2488
  'system_prompt': alita_prompt
1358
2489
  }
1359
2490
  from .tools import create_session_memory
1360
2491
  memory = create_session_memory(current_session_id)
1361
2492
  try:
1362
2493
  agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
1363
- client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, current_work_dir, plan_state
2494
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
1364
2495
  )
1365
2496
  console.print(Panel(
1366
2497
  f"[cyan]ℹ Switched to [bold]Alita[/bold]. Agent state reset, chat history preserved.[/cyan]",
@@ -1369,18 +2500,55 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1369
2500
  ))
1370
2501
  except Exception as e:
1371
2502
  console.print(f"[red]Error switching to direct mode: {e}[/red]")
2503
+ elif selected_agent == '__inventory__':
2504
+ # Switch to inventory mode
2505
+ is_direct = False
2506
+ is_local = False
2507
+ is_inventory = True
2508
+ current_agent_file = None # No file for inventory mode
2509
+ agent_name = "Inventory"
2510
+ agent_type = "Built-in Agent"
2511
+ inventory_prompt = _get_inventory_system_prompt(config)
2512
+ agent_def = {
2513
+ 'name': 'inventory-agent',
2514
+ 'model': current_model or default_model,
2515
+ 'temperature': current_temperature if current_temperature is not None else 0.3,
2516
+ 'max_tokens': current_max_tokens or default_max_tokens,
2517
+ 'system_prompt': inventory_prompt,
2518
+ 'toolkit_configs': [
2519
+ {'type': 'inventory', 'graph_path': './knowledge_graph.json'}
2520
+ ]
2521
+ }
2522
+ from .tools import create_session_memory
2523
+ memory = create_session_memory(current_session_id)
2524
+ try:
2525
+ agent_executor, mcp_session_manager, llm, llm_model, filesystem_tools, terminal_tools, planning_tools = _setup_local_agent_executor(
2526
+ client, agent_def, tuple(added_toolkit_configs), config, current_model, current_temperature, current_max_tokens, memory, allowed_directories, plan_state
2527
+ )
2528
+ console.print(Panel(
2529
+ f"[cyan]ℹ Switched to [bold]Inventory[/bold] agent. Use /add_toolkit to add source toolkits.[/cyan]",
2530
+ border_style="cyan",
2531
+ box=box.ROUNDED
2532
+ ))
2533
+ except Exception as e:
2534
+ console.print(f"[red]Error switching to inventory mode: {e}[/red]")
1372
2535
  continue
1373
2536
 
1374
2537
  # Execute agent
1375
- if (is_direct or is_local) and agent_executor is None:
2538
+ # Track if history was already added during continuation handling
2539
+ history_already_added = False
2540
+ original_user_input = user_input # Preserve for history tracking
2541
+
2542
+ if (is_direct or is_local or is_inventory) and agent_executor is None:
1376
2543
  # Local agent without tools: use direct LLM call with streaming
1377
2544
  system_prompt = agent_def.get('system_prompt', '')
1378
2545
  messages = []
1379
2546
  if system_prompt:
1380
2547
  messages.append({"role": "system", "content": system_prompt})
1381
2548
 
1382
- # Add chat history
1383
- for msg in chat_history:
2549
+ # Build pruned context from context manager
2550
+ context_messages = ctx_manager.build_context()
2551
+ for msg in context_messages:
1384
2552
  messages.append(msg)
1385
2553
 
1386
2554
  # Add user message
@@ -1444,38 +2612,141 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1444
2612
  from langchain_core.runnables import RunnableConfig
1445
2613
  from langgraph.errors import GraphRecursionError
1446
2614
 
1447
- invoke_config = None
2615
+ # Initialize invoke_config with thread_id for checkpointing
2616
+ # This ensures the same thread is used across continuations
2617
+ invoke_config = RunnableConfig(
2618
+ configurable={"thread_id": current_session_id}
2619
+ )
2620
+ # always proceed with continuation enabled
2621
+ invoke_config["should_continue"] = True
2622
+ # Set recursion limit for tool executions
2623
+ logger.debug(f"Setting tool steps limit to {recursion_limit}")
2624
+ invoke_config["recursion_limit"] = recursion_limit
2625
+ cli_callback = None
1448
2626
  if show_verbose:
1449
2627
  cli_callback = create_cli_callback(verbose=True, debug=debug_mode)
1450
- invoke_config = RunnableConfig(callbacks=[cli_callback])
2628
+ invoke_config["callbacks"] = [cli_callback]
1451
2629
 
1452
2630
  # Track recursion continuation state
1453
2631
  continue_from_recursion = False
1454
2632
  recursion_attempts = 0
2633
+ tool_limit_attempts = 0 # Track tool limit continuation attempts
1455
2634
  max_recursion_continues = 5 # Prevent infinite continuation loops
2635
+ output = None # Initialize output before loop
2636
+ result = None # Initialize result before loop
1456
2637
 
1457
2638
  while True:
1458
2639
  try:
1459
- # Show status only when not verbose (verbose shows its own progress)
1460
- if not show_verbose:
1461
- with console.status("[yellow]Thinking...[/yellow]", spinner="dots"):
1462
- result = agent_executor.invoke(
1463
- {
1464
- "input": [user_input] if not is_local else user_input,
1465
- "chat_history": chat_history
1466
- },
1467
- config=invoke_config
1468
- )
1469
- else:
1470
- if not continue_from_recursion:
1471
- console.print() # Add spacing before tool calls
2640
+ # Always start with a thinking spinner
2641
+ status = console.status("[yellow]Thinking...[/yellow]", spinner="dots")
2642
+ status.start()
2643
+
2644
+ # Pass status to callback so it can stop it when tool calls start
2645
+ if cli_callback:
2646
+ cli_callback.status = status
2647
+
2648
+ try:
1472
2649
  result = agent_executor.invoke(
1473
2650
  {
1474
2651
  "input": [user_input] if not is_local else user_input,
1475
- "chat_history": chat_history
2652
+ "chat_history": ctx_manager.build_context()
1476
2653
  },
1477
2654
  config=invoke_config
1478
2655
  )
2656
+ finally:
2657
+ # Make sure spinner is stopped
2658
+ try:
2659
+ status.stop()
2660
+ except Exception:
2661
+ pass
2662
+
2663
+ # Extract output from result
2664
+ if result is not None:
2665
+ output = extract_output_from_result(result)
2666
+
2667
+ # Check if max tool iterations were reached and prompt user
2668
+ if output and "Maximum tool execution iterations" in output and "reached" in output:
2669
+ tool_limit_attempts += 1
2670
+
2671
+ console.print()
2672
+ console.print(Panel(
2673
+ f"[yellow]⚠ Tool execution limit reached[/yellow]\n\n"
2674
+ f"The agent has executed the maximum number of tool calls in a single turn.\n"
2675
+ f"This usually happens with complex tasks that require many sequential operations.\n\n"
2676
+ f"[dim]Attempt {tool_limit_attempts}/{max_recursion_continues}[/dim]",
2677
+ title="Tool Limit Reached",
2678
+ border_style="yellow",
2679
+ box=box.ROUNDED
2680
+ ))
2681
+
2682
+ if tool_limit_attempts >= max_recursion_continues:
2683
+ console.print("[red]Maximum continuation attempts reached. Please break down your request into smaller tasks.[/red]")
2684
+ break
2685
+
2686
+ console.print("\nWhat would you like to do?")
2687
+ console.print(" [bold cyan]c[/bold cyan] - Continue execution (tell agent to resume)")
2688
+ console.print(" [bold cyan]s[/bold cyan] - Stop and keep partial results")
2689
+ console.print(" [bold cyan]n[/bold cyan] - Start a new request")
2690
+ console.print()
2691
+
2692
+ try:
2693
+ choice = input_handler.get_input("Choice [c/s/n]: ").strip().lower()
2694
+ except (KeyboardInterrupt, EOFError):
2695
+ choice = 's'
2696
+
2697
+ if choice == 'c':
2698
+ # Continue - send a follow-up message to resume
2699
+ console.print("\n[cyan]Continuing execution...[/cyan]\n")
2700
+
2701
+ # Clean up the output - remove the tool limit warning message
2702
+ clean_output = output
2703
+ if "Maximum tool execution iterations" in output:
2704
+ # Strip the warning from the end of the output
2705
+ lines = output.split('\n')
2706
+ clean_lines = [l for l in lines if "Maximum tool execution iterations" not in l and "Stopping tool execution" not in l]
2707
+ clean_output = '\n'.join(clean_lines).strip()
2708
+
2709
+ # Add current output to history first (without the warning)
2710
+ # Use original user input for first continuation, current for subsequent
2711
+ history_input = original_user_input if not history_already_added else user_input
2712
+ if clean_output:
2713
+ chat_history.append({"role": "user", "content": history_input})
2714
+ chat_history.append({"role": "assistant", "content": clean_output})
2715
+ ctx_manager.add_message("user", history_input)
2716
+ ctx_manager.add_message("assistant", clean_output)
2717
+ history_already_added = True
2718
+
2719
+ # CRITICAL: Use a new thread_id when continuing to avoid corrupted
2720
+ # checkpoint state. The tool limit may have left the checkpoint with
2721
+ # an AIMessage containing tool_calls without corresponding ToolMessages.
2722
+ # Using a new thread_id starts fresh with our clean context manager state.
2723
+ import uuid
2724
+ continuation_thread_id = f"{current_session_id}-cont-{uuid.uuid4().hex[:8]}"
2725
+ invoke_config = RunnableConfig(
2726
+ configurable={"thread_id": continuation_thread_id}
2727
+ )
2728
+ invoke_config["should_continue"] = True
2729
+ invoke_config["recursion_limit"] = recursion_limit
2730
+ if cli_callback:
2731
+ invoke_config["callbacks"] = [cli_callback]
2732
+
2733
+ # Set new input to continue with a more explicit continuation message
2734
+ # Include context about the task limit to help the agent understand
2735
+ user_input = (
2736
+ "The previous response was interrupted due to reaching the tool execution limit. "
2737
+ "Continue from where you left off and complete the remaining steps of the original task. "
2738
+ "Focus on what still needs to be done - do not repeat completed work."
2739
+ )
2740
+ continue # Retry the invoke in this inner loop
2741
+
2742
+ elif choice == 's':
2743
+ console.print("\n[yellow]Stopped. Partial work has been completed.[/yellow]")
2744
+ break # Exit retry loop and show output
2745
+
2746
+ else: # 'n' or anything else
2747
+ console.print("\n[dim]Skipped. Enter a new request.[/dim]")
2748
+ output = None
2749
+ break # Exit retry loop
1479
2750
 
1480
2751
  # Success - exit the retry loop
1481
2752
  break
@@ -1513,13 +2784,35 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1513
2784
  choice = 's'
1514
2785
 
1515
2786
  if choice == 'c':
1516
- # Continue - the checkpoint should preserve state
1517
- # We'll re-invoke with a continuation message
2787
+ # Continue - Use a new thread_id to avoid corrupted checkpoint state.
2788
+ # GraphRecursionError may have left the checkpoint with an AIMessage
2789
+ # containing tool_calls without corresponding ToolMessages.
2790
+ # Using a new thread_id starts fresh with our clean context manager state.
1518
2791
  continue_from_recursion = True
1519
- console.print("\n[cyan]Continuing from last checkpoint...[/cyan]\n")
2792
+ console.print("\n[cyan]Continuing with fresh context...[/cyan]\n")
2793
+
2794
+ # Add current progress to history if we have it
2795
+ # (GraphRecursionError doesn't give us partial output, but context may have been updated)
2796
+ history_input = original_user_input if not history_already_added else user_input
2797
+ ctx_manager.add_message("user", history_input)
2798
+ ctx_manager.add_message("assistant", "[Previous task interrupted - continuing...]")
2799
+ history_already_added = True
1520
2800
 
1521
- # Modify the input to signal continuation
1522
- user_input = "Continue from where you left off. Complete the remaining steps of the task."
2801
+ # Create new thread_id to avoid corrupted checkpoint
2802
+ import uuid
2803
+ continuation_thread_id = f"{current_session_id}-cont-{uuid.uuid4().hex[:8]}"
2804
+ invoke_config = RunnableConfig(
2805
+ configurable={"thread_id": continuation_thread_id}
2806
+ )
2807
+ if cli_callback:
2808
+ invoke_config["callbacks"] = [cli_callback]
2809
+
2810
+ # More explicit continuation message
2811
+ user_input = (
2812
+ "The previous response was interrupted due to reaching the step limit. "
2813
+ "Continue from where you left off and complete the remaining steps of the original task. "
2814
+ "Focus on what still needs to be done - do not repeat completed work."
2815
+ )
1523
2816
  continue # Retry the invoke
1524
2817
 
1525
2818
  elif choice == 's':
@@ -1532,30 +2825,53 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
1532
2825
  console.print("\n[dim]Skipped. Enter a new request.[/dim]")
1533
2826
  output = None
1534
2827
  break
1535
-
1536
- # Skip chat history update if we bailed out
2828
+
2829
+ # Skip chat history update if we bailed out (no result)
1537
2830
  if output is None:
1538
2831
  continue
1539
-
1540
- # Extract output from result (if we have a result)
1541
- if 'result' in dir() and result is not None:
1542
- output = extract_output_from_result(result)
1543
2832
 
1544
- # Display response
1545
- console.print(f"\n[bold bright_cyan]{agent_name}:[/bold bright_cyan]")
2833
+ # Display response in a clear format
2834
+ console.print() # Add spacing
2835
+ console.print(f"[bold bright_cyan]{agent_name}:[/bold bright_cyan]")
2836
+ console.print() # Add spacing before response
1546
2837
  if any(marker in output for marker in ['```', '**', '##', '- ', '* ']):
1547
2838
  console.print(Markdown(output))
1548
2839
  else:
1549
2840
  console.print(output)
2841
+ console.print() # Add spacing after response
1550
2842
 
1551
- # Update chat history
1552
- chat_history.append({"role": "user", "content": user_input})
1553
- chat_history.append({"role": "assistant", "content": output})
2843
+ # Update chat history and context manager (skip if already added during continuation)
2844
+ if not history_already_added:
2845
+ chat_history.append({"role": "user", "content": original_user_input})
2846
+ chat_history.append({"role": "assistant", "content": output})
2847
+
2848
+ # Add messages to context manager for token tracking and pruning
2849
+ ctx_manager.add_message("user", original_user_input)
2850
+ ctx_manager.add_message("assistant", output)
2851
+ else:
2852
+ # During continuation, add the final response with continuation message
2853
+ chat_history.append({"role": "user", "content": user_input})
2854
+ chat_history.append({"role": "assistant", "content": output})
2855
+ ctx_manager.add_message("user", user_input)
2856
+ ctx_manager.add_message("assistant", output)
1554
2857
 
1555
2858
  except KeyboardInterrupt:
1556
2859
  console.print("\n\n[yellow]Interrupted. Type 'exit' to quit or continue chatting.[/yellow]")
1557
2860
  continue
1558
2861
  except EOFError:
2862
+ # Save final session state before exiting
2863
+ try:
2864
+ from .tools import update_session_metadata, to_portable_path
2865
+ update_session_metadata(current_session_id, {
2866
+ 'agent_source': to_portable_path(current_agent_file) if current_agent_file else None,
2867
+ 'model': current_model or llm_model_display,
2868
+ 'temperature': current_temperature if current_temperature is not None else llm_temperature_display,
2869
+ 'allowed_directories': allowed_directories,
2870
+ 'added_toolkit_configs': list(added_toolkit_configs),
2871
+ 'added_mcps': [m if isinstance(m, str) else m.get('name') for m in agent_def.get('mcps', [])],
2872
+ })
2873
+ except Exception as e:
2874
+ logger.debug(f"Failed to save session state on exit: {e}")
1559
2875
  console.print("\n\n[bold cyan]Goodbye! 👋[/bold cyan]")
1560
2876
  break
1561
2877
 
@@ -1593,40 +2909,24 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
1593
2909
  temperature: Optional[float], max_tokens: Optional[int],
1594
2910
  save_thread: Optional[str], work_dir: Optional[str],
1595
2911
  verbose: str):
1596
- """
1597
- Run agent with a single message (handoff mode).
2912
+ """Run agent with a single message (handoff mode).
1598
2913
 
2914
+ \b
1599
2915
  AGENT_SOURCE can be:
1600
- - Platform agent ID or name
1601
- - Path to local agent file
2916
+ - Platform agent ID or name
2917
+ - Path to local agent file
1602
2918
 
1603
2919
  MESSAGE is the input message to send to the agent.
1604
2920
 
2921
+ \b
1605
2922
  Examples:
1606
-
1607
- # Simple query
1608
- alita-cli agent run my-agent "What is the status of JIRA-123?"
1609
-
1610
- # With local agent
1611
- alita-cli agent run .github/agents/sdk-dev.agent.md \\
1612
- "Create a new toolkit for Stripe API"
1613
-
1614
- # With toolkit configs and JSON output
1615
- alita-cli --output json agent run my-agent "Search for bugs" \\
1616
- --toolkit-config jira-config.json
1617
-
1618
- # With filesystem access
1619
- alita-cli agent run my-agent "Analyze the code in src/" --dir ./myproject
1620
-
1621
- # Save thread for continuation
1622
- alita-cli agent run my-agent "Start task" \\
1623
- --save-thread thread.txt
1624
-
1625
- # Quiet mode (hide tool calls and thinking)
1626
- alita-cli agent run my-agent "Query" --verbose quiet
1627
-
1628
- # Debug mode (show all including LLM calls)
1629
- alita-cli agent run my-agent "Query" --verbose debug
2923
+ alita run my-agent "What is the status of JIRA-123?"
2924
+ alita run ./agent.md "Create a new toolkit for Stripe API"
2925
+ alita -o json run my-agent "Search for bugs" --toolkit-config jira.json
2926
+ alita run my-agent "Analyze code" --dir ./myproject
2927
+ alita run my-agent "Start task" --save-thread thread.txt
2928
+ alita run my-agent "Query" -v quiet
2929
+ alita run my-agent "Query" -v debug
1630
2930
  """
1631
2931
  formatter = ctx.obj['formatter']
1632
2932
  client = get_client(ctx)
@@ -1909,3 +3209,393 @@ def agent_run(ctx, agent_source: str, message: str, version: Optional[str],
1909
3209
  )
1910
3210
  console.print(error_panel, style="red")
1911
3211
  raise click.Abort()
3212
+
3213
+
3214
+ @agent.command('execute-test-cases')
3215
+ @click.argument('agent_source')
3216
+ @click.option('--test-cases-dir', required=True, type=click.Path(exists=True, file_okay=False, dir_okay=True),
3217
+ help='Directory containing test case files')
3218
+ @click.option('--results-dir', required=True, type=click.Path(file_okay=False, dir_okay=True),
3219
+ help='Directory where test results will be saved')
3220
+ @click.option('--test-case', 'test_case_files', multiple=True,
3221
+ help='Specific test case file(s) to execute (e.g., TC-001.md). Can specify multiple times. If not specified, executes all test cases.')
3222
+ @click.option('--model', help='Override LLM model')
3223
+ @click.option('--temperature', type=float, help='Override temperature')
3224
+ @click.option('--max-tokens', type=int, help='Override max tokens')
3225
+ @click.option('--dir', 'work_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True),
3226
+ help='Grant agent filesystem access to this directory')
3227
+ @click.option('--data-generator', type=click.Path(exists=True),
3228
+ help='Path to test data generator agent definition file')
3229
+ @click.option('--skip-data-generation', is_flag=True,
3230
+ help='Skip test data generation step')
3231
+ @click.pass_context
3232
+ def execute_test_cases(ctx, agent_source: str, test_cases_dir: str, results_dir: str,
3233
+ test_case_files: tuple, model: Optional[str], temperature: Optional[float],
3234
+ max_tokens: Optional[int], work_dir: Optional[str],
3235
+ data_generator: Optional[str], skip_data_generation: bool):
3236
+ """
3237
+ Execute test cases from a directory and save results.
3238
+
3239
+ This command:
3240
+ 1. (Optional) Executes test data generator agent to provision test data
3241
+ 2. Scans TEST_CASES_DIR for test case markdown files (TC-*.md)
3242
+ 3. For each test case:
3243
+ - Parses the test case to extract config, steps, and expectations
3244
+ - Loads the agent with the toolkit config specified in the test case
3245
+ - Executes each test step
3246
+ - Validates output against expectations
3247
+ - Generates a test result file
3248
+ 4. Saves all results to RESULTS_DIR
3249
+
3250
+ AGENT_SOURCE: Path to agent definition file (e.g., .github/agents/test-runner.agent.md)
3251
+
3252
+ \b
3253
+ Examples:
3254
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results
3255
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3256
+ --data-generator ./data-gen.json
3257
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3258
+ --test-case TC-001.md --test-case TC-002.md
3259
+ alita execute-test-cases ./agent.json --test-cases-dir ./tests --results-dir ./results \
3260
+ --skip-data-generation --model gpt-4o
3261
+ """
3262
+ config = ctx.obj['config']
3263
+ client = get_client(ctx)
3264
+
3265
+ try:
3266
+ # Load agent definition
3267
+ if not Path(agent_source).exists():
3268
+ raise click.ClickException(f"Agent definition not found: {agent_source}")
3269
+
3270
+ agent_def = load_agent_definition(agent_source)
3271
+ agent_name = agent_def.get('name', Path(agent_source).stem)
3272
+
3273
+ # Find all test case files (recursively search subdirectories)
3274
+ test_cases_path = Path(test_cases_dir)
3275
+
3276
+ # Filter test cases based on --test-case options
3277
+ if test_case_files:
3278
+ # User specified specific test case files
3279
+ test_case_files_set = set(test_case_files)
3280
+ all_test_cases = sorted(test_cases_path.rglob('TC-*.md'))
3281
+ test_case_files_list = [
3282
+ tc for tc in all_test_cases
3283
+ if tc.name in test_case_files_set
3284
+ ]
3285
+
3286
+ # Check if all specified files were found
3287
+ found_names = {tc.name for tc in test_case_files_list}
3288
+ not_found = test_case_files_set - found_names
3289
+ if not_found:
3290
+ console.print(f"[yellow]⚠ Warning: Test case files not found: {', '.join(not_found)}[/yellow]")
3291
+ else:
3292
+ # Execute all test cases
3293
+ test_case_files_list = sorted(test_cases_path.rglob('TC-*.md'))
3294
+
3295
+ if not test_case_files_list:
3296
+ if test_case_files:
3297
+ console.print(f"[yellow]No matching test case files found in {test_cases_dir}[/yellow]")
3298
+ else:
3299
+ console.print(f"[yellow]No test case files found in {test_cases_dir}[/yellow]")
3300
+ return
3301
+
3302
+ console.print(f"\n[bold cyan]🧪 Test Execution Started[/bold cyan]")
3303
+ console.print(f"Agent: [bold]{agent_name}[/bold]")
3304
+ console.print(f"Test Cases: {len(test_case_files_list)}")
3305
+ if test_case_files:
3306
+ console.print(f"Selected: [cyan]{', '.join(test_case_files)}[/cyan]")
3307
+ console.print(f"Results Directory: {results_dir}\n")
3308
+
3309
+ data_gen_def = None
3310
+ if data_generator and not skip_data_generation:
3311
+ try:
3312
+ data_gen_def = load_agent_definition(data_generator)
3313
+ data_gen_name = data_gen_def.get('name', Path(data_generator).stem)
3314
+ console.print(f"Data Generator Agent: [bold]{data_gen_name}[/bold]\n")
3315
+ except Exception as e:
3316
+ console.print(f"[yellow]⚠ Warning: Failed to setup data generator: {e}[/yellow]")
3317
+ console.print("[yellow]Continuing with test execution...[/yellow]\n")
3318
+ logger.debug(f"Data generator setup error: {e}", exc_info=True)
3319
+
3320
+ # Track overall results
3321
+ total_tests = 0
3322
+ passed_tests = 0
3323
+ failed_tests = 0
3324
+ test_results = [] # Store structured results for final report
3325
+
3326
+ # Store bulk data generation chat history to pass to test executors
3327
+ bulk_gen_chat_history = []
3328
+
3329
+ # Parse all test cases upfront for bulk data generation
3330
+ parsed_test_cases = []
3331
+ for test_file in test_case_files_list:
3332
+ try:
3333
+ test_case = parse_test_case(str(test_file))
3334
+ parsed_test_cases.append({
3335
+ 'file': test_file,
3336
+ 'data': test_case
3337
+ })
3338
+ except Exception as e:
3339
+ console.print(f"[yellow]⚠ Warning: Failed to parse {test_file.name}: {e}[/yellow]")
3340
+ logger.debug(f"Parse error for {test_file.name}: {e}", exc_info=True)
3341
+
3342
+ # Filter test cases that need data generation
3343
+ test_cases_needing_data_gen = [
3344
+ tc for tc in parsed_test_cases
3345
+ if tc['data'].get('generate_test_data', True)
3346
+ ]
3347
+
3348
+ # Bulk test data generation (if enabled)
3349
+ if data_gen_def and not skip_data_generation and test_cases_needing_data_gen:
3350
+ console.print(f"\n[bold yellow]🔧 Bulk Test Data Generation[/bold yellow]")
3351
+ console.print(f"Generating test data for {len(test_cases_needing_data_gen)} test cases...\n")
3352
+ console.print(f"[dim]Skipping {len(parsed_test_cases) - len(test_cases_needing_data_gen)} test cases with generateTestData: false[/dim]\n")
3353
+
3354
+ bulk_data_gen_prompt = _build_bulk_data_gen_prompt(test_cases_needing_data_gen)
3355
+
3356
+ console.print(f"Executing test data generation prompt {bulk_data_gen_prompt}\n")
3357
+
3358
+ try:
3359
+ # Setup data generator agent
3360
+ from langgraph.checkpoint.sqlite import SqliteSaver
3361
+ bulk_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
3362
+
3363
+ # Use first test case's config or empty tuple
3364
+ first_config_path = None
3365
+ if parsed_test_cases:
3366
+ first_tc = parsed_test_cases[0]
3367
+ first_config_path = resolve_toolkit_config_path(
3368
+ first_tc['data'].get('config_path', ''),
3369
+ first_tc['file'],
3370
+ test_cases_path
3371
+ )
3372
+
3373
+ data_gen_config_tuple = (first_config_path,) if first_config_path else ()
3374
+ data_gen_executor, _, _, _, _, _, _ = _setup_local_agent_executor(
3375
+ client, data_gen_def, data_gen_config_tuple, config,
3376
+ model, temperature, max_tokens, bulk_memory, work_dir
3377
+ )
3378
+
3379
+ if data_gen_executor:
3380
+ with console.status("[yellow]Generating test data for all test cases...[/yellow]", spinner="dots"):
3381
+ bulk_gen_result = data_gen_executor.invoke({
3382
+ "input": bulk_data_gen_prompt,
3383
+ "chat_history": []
3384
+ })
3385
+ bulk_gen_output = extract_output_from_result(bulk_gen_result)
3386
+ console.print(f"[green]✓ Bulk test data generation completed[/green]")
3387
+ console.print(f"[dim]{bulk_gen_output}...[/dim]\n")
3388
+
3389
+ # Store chat history from data generation to pass to test executors
3390
+ bulk_gen_chat_history = [
3391
+ {"role": "user", "content": bulk_data_gen_prompt},
3392
+ {"role": "assistant", "content": bulk_gen_output}
3393
+ ]
3394
+ else:
3395
+ console.print(f"[yellow]⚠ Warning: Data generator has no executor[/yellow]\n")
3396
+ except Exception as e:
3397
+ console.print(f"[yellow]⚠ Warning: Bulk data generation failed: {e}[/yellow]")
3398
+ console.print("[yellow]Continuing with test execution...[/yellow]\n")
3399
+ logger.debug(f"Bulk data generation error: {e}", exc_info=True)
3400
+
3401
+ # Execute ALL test cases in one bulk operation
3402
+ if not parsed_test_cases:
3403
+ console.print("[yellow]No test cases to execute[/yellow]")
3404
+ return
3405
+
3406
+ console.print(f"\n[bold yellow]📋 Executing ALL test cases in bulk...[/bold yellow]\n")
3407
+
3408
+ # Use first test case's config for agent setup
3409
+ first_tc = parsed_test_cases[0]
3410
+ first_test_file = first_tc['file']
3411
+ toolkit_config_path = resolve_toolkit_config_path(
3412
+ first_tc['data'].get('config_path', ''),
3413
+ first_test_file,
3414
+ test_cases_path
3415
+ )
3416
+ toolkit_config_tuple = (toolkit_config_path,) if toolkit_config_path else ()
3417
+
3418
+ # Create memory for bulk execution
3419
+ from langgraph.checkpoint.sqlite import SqliteSaver
3420
+ memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
3421
+
3422
+ # Initialize chat history with bulk data generation context
3423
+ chat_history = bulk_gen_chat_history.copy()
3424
+
3425
+ # Setup agent executor
3426
+ agent_executor, _, _, _, _, _, _ = _setup_local_agent_executor(
3427
+ client, agent_def, toolkit_config_tuple, config, model, temperature, max_tokens, memory, work_dir
3428
+ )
3429
+
3430
+ # Build bulk execution prompt
3431
+ bulk_all_prompt = _build_bulk_execution_prompt(parsed_test_cases)
3432
+
3433
+ console.print(f"Executing the prompt: {bulk_all_prompt}\n")
3434
+
3435
+ # Execute all test cases in bulk
3436
+ test_results = []
3437
+ all_execution_output = ""
3438
+
3439
+ try:
3440
+ if agent_executor:
3441
+ with console.status(f"[yellow]Executing {len(parsed_test_cases)} test cases in bulk...[/yellow]", spinner="dots"):
3442
+ bulk_result = agent_executor.invoke({
3443
+ "input": bulk_all_prompt,
3444
+ "chat_history": chat_history
3445
+ })
3446
+ all_execution_output = extract_output_from_result(bulk_result)
3447
+
3448
+ console.print(f"[green]✓ All test cases executed[/green]")
3449
+ console.print(f"[dim]{all_execution_output}...[/dim]\n")
3450
+
3451
+ # Update chat history
3452
+ chat_history.append({"role": "user", "content": bulk_all_prompt})
3453
+ chat_history.append({"role": "assistant", "content": all_execution_output})
3454
+
3455
+ # Now validate ALL test cases in bulk
3456
+ console.print(f"[bold yellow]✅ Validating all test cases...[/bold yellow]\n")
3457
+
3458
+ validation_prompt = _build_validation_prompt(parsed_test_cases, all_execution_output)
3459
+
3460
+ console.print(f"[dim]{validation_prompt}[/dim]\n")
3461
+
3462
+ with console.status("[yellow]Validating all results...[/yellow]", spinner="dots"):
3463
+ validation_result = agent_executor.invoke({
3464
+ "input": validation_prompt,
3465
+ "chat_history": chat_history
3466
+ })
3467
+
3468
+ validation_output = extract_output_from_result(validation_result)
3469
+
3470
+ console.print(f"[dim]Validation Response: {validation_output}...[/dim]\n")
3471
+
3472
+ # Parse validation JSON
3473
+ try:
3474
+ validation_json = _extract_json_from_text(validation_output)
3475
+ test_cases_results = validation_json.get('test_cases', [])
3476
+
3477
+ # Process results for each test case
3478
+ total_tests = 0
3479
+ passed_tests = 0
3480
+ failed_tests = 0
3481
+
3482
+ for tc_result in test_cases_results:
3483
+ test_name = tc_result.get('test_name', f"Test #{tc_result.get('test_number', '?')}")
3484
+ step_results = tc_result.get('steps', [])
3485
+
3486
+ # Determine if test passed (all steps must pass)
3487
+ test_passed = all(step.get('passed', False) for step in step_results) if step_results else False
3488
+
3489
+ total_tests += 1
3490
+ if test_passed:
3491
+ passed_tests += 1
3492
+ console.print(f"[bold green]✅ Test PASSED: {test_name}[/bold green]")
3493
+ else:
3494
+ failed_tests += 1
3495
+ console.print(f"[bold red]❌ Test FAILED: {test_name}[/bold red]")
3496
+
3497
+ # Display individual step results
3498
+ for step_result in step_results:
3499
+ step_num = step_result.get('step_number')
3500
+ step_title = step_result.get('title', '')
3501
+ passed = step_result.get('passed', False)
3502
+ details = step_result.get('details', '')
3503
+
3504
+ if passed:
3505
+ console.print(f" [green]✓ Step {step_num}: {step_title}[/green]")
3506
+ console.print(f" [dim]{details}[/dim]")
3507
+ else:
3508
+ console.print(f" [red]✗ Step {step_num}: {step_title}[/red]")
3509
+ console.print(f" [dim]{details}[/dim]")
3510
+
3511
+ console.print()
3512
+
3513
+ # Store result
3514
+ test_results.append({
3515
+ 'title': test_name,
3516
+ 'passed': test_passed,
3517
+ 'file': parsed_test_cases[tc_result.get('test_number', 1) - 1]['file'].name if tc_result.get('test_number', 1) - 1 < len(parsed_test_cases) else 'unknown',
3518
+ 'step_results': step_results
3519
+ })
3520
+
3521
+ except Exception as e:
3522
+ logger.debug(f"Validation parsing failed: {e}")
3523
+ console.print(f"[yellow]⚠ Warning: Could not parse validation results: {e}[/yellow]\n")
3524
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3525
+ else:
3526
+ console.print(f"[red]✗ No agent executor available[/red]\n")
3527
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3528
+
3529
+ except Exception as e:
3530
+ console.print(f"[red]✗ Bulk execution failed: {e}[/red]\n")
3531
+ logger.debug(f"Bulk execution error: {e}", exc_info=True)
3532
+ test_results, total_tests, passed_tests, failed_tests = _create_fallback_results(parsed_test_cases)
3533
+
3534
+ # Generate summary report
3535
+ console.print(f"\n[bold]{'='*60}[/bold]")
3536
+ console.print(f"[bold cyan]📊 Test Execution Summary[/bold cyan]")
3537
+ console.print(f"[bold]{'='*60}[/bold]\n")
3538
+
3539
+ summary_table = Table(box=box.ROUNDED, border_style="cyan")
3540
+ summary_table.add_column("Metric", style="bold")
3541
+ summary_table.add_column("Value", justify="right")
3542
+
3543
+ summary_table.add_row("Total Tests", str(total_tests))
3544
+ summary_table.add_row("Passed", f"[green]{passed_tests}[/green]")
3545
+ summary_table.add_row("Failed", f"[red]{failed_tests}[/red]")
3546
+
3547
+ if total_tests > 0:
3548
+ pass_rate = (passed_tests / total_tests) * 100
3549
+ summary_table.add_row("Pass Rate", f"{pass_rate:.1f}%")
3550
+
3551
+ console.print(summary_table)
3552
+
3553
+ # Generate structured JSON report
3554
+ overall_result = "pass" if failed_tests == 0 else "fail"
3555
+
3556
+ structured_report = {
3557
+ "test_cases": [
3558
+ {
3559
+ "title": r['title'],
3560
+ "passed": r['passed'],
3561
+ "steps": r.get('step_results', [])
3562
+ }
3563
+ for r in test_results
3564
+ ],
3565
+ "overall_result": overall_result,
3566
+ "summary": {
3567
+ "total_tests": total_tests,
3568
+ "passed": passed_tests,
3569
+ "failed": failed_tests,
3570
+ "pass_rate": f"{pass_rate:.1f}%" if total_tests > 0 else "0%"
3571
+ },
3572
+ "timestamp": datetime.now().isoformat()
3573
+ }
3574
+
3575
+ # Save structured report
3576
+ results_path = Path(results_dir)
3577
+ results_path.mkdir(parents=True, exist_ok=True)
3578
+ summary_file = results_path / "test_execution_summary.json"
3579
+
3580
+ console.print(f"\n[bold yellow]💾 Saving test execution summary...[/bold yellow]")
3581
+ with open(summary_file, 'w') as f:
3582
+ json.dump(structured_report, f, indent=2)
3583
+ console.print(f"[green]✓ Summary saved to {summary_file}[/green]\n")
3584
+
3585
+ # Exit with error code if any tests failed
3586
+ if failed_tests > 0:
3587
+ sys.exit(1)
3588
+
3589
+ except click.ClickException:
3590
+ raise
3591
+ except Exception as e:
3592
+ logger.exception("Failed to execute test cases")
3593
+ error_panel = Panel(
3594
+ str(e),
3595
+ title="Error",
3596
+ border_style="red",
3597
+ box=box.ROUNDED
3598
+ )
3599
+ console.print(error_panel, style="red")
3600
+ raise click.Abort()
3601
+