amd-gaia 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5632
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -723
  180. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/agents/base/agent.py CHANGED
@@ -1,2177 +1,2177 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """
4
- Generic Agent class for building domain-specific agents.
5
- """
6
-
7
- # Standard library imports
8
- import abc
9
- import datetime
10
- import inspect
11
- import json
12
- import logging
13
- import os
14
- import re
15
- import subprocess
16
- import uuid
17
- from typing import Any, Dict, List, Optional
18
-
19
- from gaia.agents.base.console import AgentConsole, SilentConsole
20
- from gaia.agents.base.errors import format_execution_trace
21
- from gaia.agents.base.tools import _TOOL_REGISTRY
22
-
23
- # First-party imports
24
- from gaia.chat.sdk import ChatConfig, ChatSDK
25
-
26
- # Set up logging
27
- logging.basicConfig(level=logging.INFO)
28
- logger = logging.getLogger(__name__)
29
-
30
- # Content truncation thresholds
31
- CHUNK_TRUNCATION_THRESHOLD = 5000
32
- CHUNK_TRUNCATION_SIZE = 2500
33
-
34
-
35
- class Agent(abc.ABC):
36
- """
37
- Base Agent class that provides core functionality for domain-specific agents.
38
-
39
- The Agent class handles the core conversation loop, tool execution, and LLM
40
- interaction patterns. It provides:
41
- - Conversation management with an LLM
42
- - Tool registration and execution framework
43
- - JSON response parsing and validation
44
- - Error handling and recovery
45
- - State management for multi-step plans
46
- - Output formatting and file writing
47
- - Configurable prompt display for debugging
48
-
49
- Key Parameters:
50
- debug: Enable general debug output and logging
51
- show_prompts: Display prompts sent to LLM (useful for debugging prompts)
52
- debug_prompts: Include prompts in conversation history for analysis
53
- streaming: Enable real-time streaming of LLM responses
54
- silent_mode: Suppress all console output for JSON-only usage
55
- """
56
-
57
- # Define state constants
58
- STATE_PLANNING = "PLANNING"
59
- STATE_EXECUTING_PLAN = "EXECUTING_PLAN"
60
- STATE_DIRECT_EXECUTION = "DIRECT_EXECUTION"
61
- STATE_ERROR_RECOVERY = "ERROR_RECOVERY"
62
- STATE_COMPLETION = "COMPLETION"
63
-
64
- # Define tools that can execute directly without requiring a plan
65
- # Subclasses can override this to specify domain-specific simple tools
66
- SIMPLE_TOOLS = []
67
-
68
- def __init__(
69
- self,
70
- use_claude: bool = False,
71
- use_chatgpt: bool = False,
72
- claude_model: str = "claude-sonnet-4-20250514",
73
- base_url: Optional[str] = None,
74
- model_id: str = None,
75
- max_steps: int = 5,
76
- debug_prompts: bool = False,
77
- show_prompts: bool = False,
78
- output_dir: str = None,
79
- streaming: bool = False,
80
- show_stats: bool = False,
81
- silent_mode: bool = False,
82
- debug: bool = False,
83
- output_handler=None,
84
- max_plan_iterations: int = 3,
85
- min_context_size: int = 32768,
86
- skip_lemonade: bool = False,
87
- ):
88
- """
89
- Initialize the Agent with LLM client.
90
-
91
- Args:
92
- use_claude: If True, uses Claude API (default: False)
93
- use_chatgpt: If True, uses ChatGPT/OpenAI API (default: False)
94
- claude_model: Claude model to use when use_claude=True (default: "claude-sonnet-4-20250514")
95
- base_url: Base URL for local LLM server (default: reads from LEMONADE_BASE_URL env var, falls back to http://localhost:8000/api/v1)
96
- model_id: The ID of the model to use with LLM server (default for local)
97
- max_steps: Maximum number of steps the agent can take before terminating
98
- debug_prompts: If True, includes prompts in the conversation history
99
- show_prompts: If True, displays prompts sent to LLM in console (default: False)
100
- output_dir: Directory for storing JSON output files (default: current directory)
101
- streaming: If True, enables real-time streaming of LLM responses (default: False)
102
- show_stats: If True, displays LLM performance stats after each response (default: False)
103
- silent_mode: If True, suppresses all console output for JSON-only usage (default: False)
104
- debug: If True, enables debug output for troubleshooting (default: False)
105
- output_handler: Custom OutputHandler for displaying agent output (default: None, creates console based on silent_mode)
106
- max_plan_iterations: Maximum number of plan-execute-replan cycles (default: 3, 0 = unlimited)
107
- min_context_size: Minimum context size required for this agent (default: 32768).
108
- skip_lemonade: If True, skip Lemonade server initialization (default: False).
109
- Use this when connecting to a different OpenAI-compatible backend.
110
-
111
- Note: Uses local LLM server by default unless use_claude or use_chatgpt is True.
112
- """
113
- self.error_history = [] # Store error history for learning
114
- self.conversation_history = (
115
- []
116
- ) # Store conversation history for session persistence
117
- self.max_steps = max_steps
118
- self.debug_prompts = debug_prompts
119
- self.show_prompts = show_prompts # Separate flag for displaying prompts
120
- self.output_dir = output_dir if output_dir else os.getcwd()
121
- self.streaming = streaming
122
- self.show_stats = show_stats
123
- self.silent_mode = silent_mode
124
- self.debug = debug
125
- self.last_result = None # Store the most recent result
126
- self.max_plan_iterations = max_plan_iterations
127
- self._current_query: Optional[str] = (
128
- None # Store current query for error context
129
- )
130
-
131
- # Read base_url from environment if not provided
132
- if base_url is None:
133
- base_url = os.getenv("LEMONADE_BASE_URL", "http://localhost:8000/api/v1")
134
-
135
- # Lazy Lemonade initialization for local LLM users
136
- # This ensures Lemonade server is running before we try to use it
137
- if not (use_claude or use_chatgpt or skip_lemonade):
138
- from gaia.llm.lemonade_manager import LemonadeManager
139
-
140
- LemonadeManager.ensure_ready(
141
- min_context_size=min_context_size,
142
- quiet=silent_mode,
143
- base_url=base_url,
144
- )
145
-
146
- # Initialize state management
147
- self.execution_state = self.STATE_PLANNING
148
- self.current_plan = None
149
- self.current_step = 0
150
- self.total_plan_steps = 0
151
- self.plan_iterations = 0 # Track number of plan cycles
152
-
153
- # Initialize the console/output handler for display
154
- # If output_handler is provided, use it; otherwise create based on silent_mode
155
- if output_handler is not None:
156
- self.console = output_handler
157
- else:
158
- self.console = self._create_console()
159
-
160
- # Initialize LLM client for local model
161
- self.system_prompt = self._get_system_prompt()
162
-
163
- # Register tools for this agent
164
- self._register_tools()
165
-
166
- # Update system prompt with available tools and response format
167
- tools_description = self._format_tools_for_prompt()
168
- self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n"
169
-
170
- # Add JSON response format instructions (shared across all agents)
171
- self.system_prompt += """
172
- ==== RESPONSE FORMAT ====
173
- You must respond ONLY in valid JSON. No text before { or after }.
174
-
175
- **To call a tool:**
176
- {"thought": "reasoning", "goal": "objective", "tool": "tool_name", "tool_args": {"arg1": "value1"}}
177
-
178
- **To create a multi-step plan:**
179
- {
180
- "thought": "reasoning",
181
- "goal": "objective",
182
- "plan": [
183
- {"tool": "tool1", "tool_args": {"arg": "val"}},
184
- {"tool": "tool2", "tool_args": {"arg": "val"}}
185
- ],
186
- "tool": "tool1",
187
- "tool_args": {"arg": "val"}
188
- }
189
-
190
- **To provide a final answer:**
191
- {"thought": "reasoning", "goal": "achieved", "answer": "response to user"}
192
-
193
- **RULES:**
194
- 1. ALWAYS use tools for real data - NEVER hallucinate
195
- 2. Plan steps MUST be objects like {"tool": "x", "tool_args": {}}, NOT strings
196
- 3. After tool results, provide an "answer" summarizing them
197
- """
198
-
199
- # Initialize ChatSDK with proper configuration
200
- # Note: We don't set system_prompt in config, we pass it per request
201
- # Note: Context size is configured when starting Lemonade server, not here
202
- # Use Qwen3-Coder-30B by default for better reasoning and JSON formatting
203
- # The 0.5B model is too small for complex agent tasks
204
- chat_config = ChatConfig(
205
- model=model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF",
206
- use_claude=use_claude,
207
- use_chatgpt=use_chatgpt,
208
- claude_model=claude_model,
209
- base_url=base_url,
210
- show_stats=True, # Always collect stats for token tracking
211
- max_history_length=20, # Keep more history for agent conversations
212
- max_tokens=4096, # Increased for complex code generation
213
- )
214
- self.chat = ChatSDK(chat_config)
215
- self.model_id = model_id
216
-
217
- # Print system prompt if show_prompts is enabled
218
- # Debug: Check the actual value of show_prompts
219
- if self.debug:
220
- logger.debug(
221
- f"show_prompts={self.show_prompts}, debug={self.debug}, will show prompt: {self.show_prompts}"
222
- )
223
-
224
- if self.show_prompts:
225
- self.console.print_prompt(self.system_prompt, "Initial System Prompt")
226
-
227
- @abc.abstractmethod
228
- def _get_system_prompt(self) -> str:
229
- """
230
- Generate the system prompt for the agent.
231
- Subclasses must implement this to provide domain-specific prompts.
232
- """
233
- raise NotImplementedError("Subclasses must implement _get_system_prompt")
234
-
235
- def _create_console(self):
236
- """
237
- Create and return a console output handler.
238
- Returns SilentConsole if in silent_mode, otherwise AgentConsole.
239
- Subclasses can override this to provide domain-specific console output.
240
- """
241
- if self.silent_mode:
242
- # Check if we should completely silence everything (including final answer)
243
- # This would be true for JSON-only output or when output_dir is set
244
- silence_final_answer = getattr(self, "output_dir", None) is not None
245
- return SilentConsole(silence_final_answer=silence_final_answer)
246
- return AgentConsole()
247
-
248
- @abc.abstractmethod
249
- def _register_tools(self):
250
- """
251
- Register all domain-specific tools for the agent.
252
- Subclasses must implement this method.
253
- """
254
- raise NotImplementedError("Subclasses must implement _register_tools")
255
-
256
- def _format_tools_for_prompt(self) -> str:
257
- """Format the registered tools into a string for the prompt."""
258
- tool_descriptions = []
259
-
260
- for name, tool_info in _TOOL_REGISTRY.items():
261
- params_str = ", ".join(
262
- [
263
- f"{param_name}{'' if param_info['required'] else '?'}: {param_info['type']}"
264
- for param_name, param_info in tool_info["parameters"].items()
265
- ]
266
- )
267
-
268
- description = tool_info["description"].strip()
269
- tool_descriptions.append(f"- {name}({params_str}): {description}")
270
-
271
- return "\n".join(tool_descriptions)
272
-
273
- def list_tools(self, verbose: bool = True) -> None:
274
- """
275
- Display all tools registered for this agent with their parameters and descriptions.
276
-
277
- Args:
278
- verbose: If True, displays full descriptions and parameter details. If False, shows a compact list.
279
- """
280
- self.console.print_header(f"🛠️ Registered Tools for {self.__class__.__name__}")
281
- self.console.print_separator()
282
-
283
- for name, tool_info in _TOOL_REGISTRY.items():
284
- # Format parameters
285
- params = []
286
- for param_name, param_info in tool_info["parameters"].items():
287
- required = param_info.get("required", False)
288
- param_type = param_info.get("type", "Any")
289
- default = param_info.get("default", None)
290
-
291
- if required:
292
- params.append(f"{param_name}: {param_type}")
293
- else:
294
- default_str = f"={default}" if default is not None else "=None"
295
- params.append(f"{param_name}: {param_type}{default_str}")
296
-
297
- params_str = ", ".join(params)
298
-
299
- # Get description
300
- if verbose:
301
- description = tool_info["description"]
302
- else:
303
- description = (
304
- tool_info["description"].split("\n")[0]
305
- if tool_info["description"]
306
- else "No description"
307
- )
308
-
309
- # Print tool information
310
- self.console.print_tool_info(name, params_str, description)
311
-
312
- self.console.print_separator()
313
-
314
- return None
315
-
316
- def _extract_json_from_response(self, response: str) -> Optional[Dict[str, Any]]:
317
- """
318
- Apply multiple extraction strategies to find valid JSON in the response.
319
-
320
- Args:
321
- response: The raw response from the LLM
322
-
323
- Returns:
324
- Extracted JSON dictionary or None if extraction failed
325
- """
326
- # Strategy 1: Extract JSON from code blocks with various patterns
327
- json_patterns = [
328
- r"```(?:json)?\s*(.*?)\s*```", # Standard code block
329
- r"`json\s*(.*?)\s*`", # Single backtick with json tag
330
- r"<json>\s*(.*?)\s*</json>", # XML-style tags
331
- ]
332
-
333
- for pattern in json_patterns:
334
- matches = re.findall(pattern, response, re.DOTALL)
335
- for match in matches:
336
- try:
337
- result = json.loads(match)
338
- # Ensure tool_args exists if tool is present
339
- if "tool" in result and "tool_args" not in result:
340
- result["tool_args"] = {}
341
- logger.debug(f"Successfully extracted JSON with pattern {pattern}")
342
- return result
343
- except json.JSONDecodeError:
344
- continue
345
-
346
- start_idx = response.find("{")
347
- if start_idx >= 0:
348
- bracket_count = 0
349
- in_string = False
350
- escape_next = False
351
-
352
- for i, char in enumerate(response[start_idx:], start_idx):
353
- if escape_next:
354
- escape_next = False
355
- continue
356
- if char == "\\":
357
- escape_next = True
358
- continue
359
- if char == '"' and not escape_next:
360
- in_string = not in_string
361
- if not in_string:
362
- if char == "{":
363
- bracket_count += 1
364
- elif char == "}":
365
- bracket_count -= 1
366
- if bracket_count == 0:
367
- # Found complete JSON object
368
- try:
369
- extracted = response[start_idx : i + 1]
370
- # Fix common issues before parsing
371
- fixed = re.sub(r",\s*}", "}", extracted)
372
- fixed = re.sub(r",\s*]", "]", fixed)
373
- result = json.loads(fixed)
374
- # Ensure tool_args exists if tool is present
375
- if "tool" in result and "tool_args" not in result:
376
- result["tool_args"] = {}
377
- logger.debug(
378
- "Successfully extracted JSON using bracket-matching"
379
- )
380
- return result
381
- except json.JSONDecodeError as e:
382
- logger.debug(f"Bracket-matched JSON parse failed: {e}")
383
- break
384
-
385
- return None
386
-
387
- def validate_json_response(self, response_text: str) -> Dict[str, Any]:
388
- """
389
- Validates and attempts to fix JSON responses from the LLM.
390
-
391
- Attempts the following fixes in order:
392
- 1. Parse as-is if valid JSON
393
- 2. Extract JSON from code blocks
394
- 3. Truncate after first complete JSON object
395
- 4. Fix common JSON syntax errors
396
- 5. Extract JSON-like content using regex
397
-
398
- Args:
399
- response_text: The response string from the LLM
400
-
401
- Returns:
402
- A dictionary containing the parsed JSON if valid
403
-
404
- Raises:
405
- ValueError: If the response cannot be parsed as JSON or is missing required fields
406
- """
407
- original_response = response_text
408
- json_was_modified = False
409
-
410
- # Step 0: Sanitize control characters to ensure proper JSON format
411
- def sanitize_json_string(text: str) -> str:
412
- """
413
- Ensure JSON strings have properly escaped control characters.
414
-
415
- Args:
416
- text: JSON text that may contain unescaped control characters
417
-
418
- Returns:
419
- Sanitized JSON text with properly escaped control characters
420
- """
421
-
422
- def escape_string_content(match):
423
- """Ensure control characters are properly escaped in JSON string values."""
424
- quote = match.group(1)
425
- content = match.group(2)
426
- closing_quote = match.group(3)
427
-
428
- # Ensure proper escaping of control characters
429
- content = content.replace("\n", "\\n")
430
- content = content.replace("\r", "\\r")
431
- content = content.replace("\t", "\\t")
432
- content = content.replace("\b", "\\b")
433
- content = content.replace("\f", "\\f")
434
-
435
- return f"{quote}{content}{closing_quote}"
436
-
437
- # Match JSON strings: "..." handling escaped quotes
438
- pattern = r'(")([^"\\]*(?:\\.[^"\\]*)*)(")'
439
-
440
- try:
441
- return re.sub(pattern, escape_string_content, text)
442
- except Exception as e:
443
- logger.debug(
444
- f"[JSON] String sanitization encountered issue: {e}, using original"
445
- )
446
- return text
447
-
448
- response_text = sanitize_json_string(response_text)
449
-
450
- # Step 1: Try to parse as-is
451
- try:
452
- json_response = json.loads(response_text)
453
- logger.debug("[JSON] Successfully parsed response without modifications")
454
- except json.JSONDecodeError as initial_error:
455
- # Step 2: Try to extract from code blocks
456
- json_match = re.search(
457
- r"```(?:json)?\s*({.*?})\s*```", response_text, re.DOTALL
458
- )
459
- if json_match:
460
- try:
461
- response_text = json_match.group(1)
462
- json_response = json.loads(response_text)
463
- json_was_modified = True
464
- logger.warning("[JSON] Extracted JSON from code block")
465
- except json.JSONDecodeError as e:
466
- logger.debug(f"[JSON] Code block extraction failed: {e}")
467
-
468
- # Step 3: Try to find and extract first complete JSON object
469
- if not json_was_modified:
470
- # Find the first '{' and try to match brackets
471
- start_idx = response_text.find("{")
472
- if start_idx >= 0:
473
- bracket_count = 0
474
- in_string = False
475
- escape_next = False
476
-
477
- for i, char in enumerate(response_text[start_idx:], start_idx):
478
- if escape_next:
479
- escape_next = False
480
- continue
481
- if char == "\\":
482
- escape_next = True
483
- continue
484
- if char == '"' and not escape_next:
485
- in_string = not in_string
486
- if not in_string:
487
- if char == "{":
488
- bracket_count += 1
489
- elif char == "}":
490
- bracket_count -= 1
491
- if bracket_count == 0:
492
- # Found complete JSON object
493
- try:
494
- truncated = response_text[start_idx : i + 1]
495
- json_response = json.loads(truncated)
496
- json_was_modified = True
497
- logger.warning(
498
- f"[JSON] Truncated response after first complete JSON object (removed {len(response_text) - i - 1} chars)"
499
- )
500
- response_text = truncated
501
- break
502
- except json.JSONDecodeError:
503
- logger.debug(
504
- "[JSON] Truncated text is not valid JSON, trying next bracket pair"
505
- )
506
- continue
507
-
508
- # Step 4: Try to fix common JSON errors
509
- if not json_was_modified:
510
- fixed_text = response_text
511
-
512
- # Remove trailing commas
513
- fixed_text = re.sub(r",\s*}", "}", fixed_text)
514
- fixed_text = re.sub(r",\s*]", "]", fixed_text)
515
-
516
- # Fix single quotes to double quotes (carefully)
517
- if "'" in fixed_text and '"' not in fixed_text:
518
- fixed_text = fixed_text.replace("'", '"')
519
-
520
- # Remove any text before first '{' or '['
521
- json_start = min(
522
- fixed_text.find("{") if "{" in fixed_text else len(fixed_text),
523
- fixed_text.find("[") if "[" in fixed_text else len(fixed_text),
524
- )
525
- if json_start > 0 and json_start < len(fixed_text):
526
- fixed_text = fixed_text[json_start:]
527
-
528
- # Try to parse the fixed text
529
- if fixed_text != response_text:
530
- try:
531
- json_response = json.loads(fixed_text)
532
- json_was_modified = True
533
- logger.warning("[JSON] Applied automatic JSON fixes")
534
- response_text = fixed_text
535
- except json.JSONDecodeError as e:
536
- logger.debug(f"[JSON] Auto-fix failed: {e}")
537
-
538
- # If still no valid JSON, raise the original error
539
- if not json_was_modified:
540
- raise ValueError(
541
- f"Failed to parse response as JSON: {str(initial_error)}"
542
- )
543
-
544
- # Log warning if JSON was modified
545
- if json_was_modified:
546
- logger.warning(
547
- f"[JSON] Response was modified to extract valid JSON. Original length: {len(original_response)}, Fixed length: {len(response_text)}"
548
- )
549
-
550
- # Validate required fields
551
- # Note: 'goal' is optional for simple answer responses
552
- if "answer" in json_response:
553
- required_fields = ["thought", "answer"] # goal is optional
554
- elif "tool" in json_response:
555
- required_fields = ["thought", "tool", "tool_args"] # goal is optional
556
- else:
557
- required_fields = ["thought", "plan"] # goal is optional
558
-
559
- missing_fields = [
560
- field for field in required_fields if field not in json_response
561
- ]
562
- if missing_fields:
563
- raise ValueError(
564
- f"Response is missing required fields: {', '.join(missing_fields)}"
565
- )
566
-
567
- return json_response
568
-
569
- def _parse_llm_response(self, response: str) -> Dict[str, Any]:
570
- """
571
- Parse the LLM response to extract tool calls or conversational answers.
572
-
573
- ARCHITECTURE: Supports two response modes
574
- - Plain text for conversation (no JSON required)
575
- - JSON for tool invocations
576
-
577
- Args:
578
- response: The raw response from the LLM
579
-
580
- Returns:
581
- Parsed response as a dictionary
582
- """
583
- # Check for empty responses
584
- if not response or not response.strip():
585
- logger.warning("Empty LLM response received")
586
- self.error_history.append("Empty LLM response")
587
-
588
- # Provide more helpful error message based on context
589
- if hasattr(self, "api_mode") and self.api_mode: # pylint: disable=no-member
590
- answer = "I encountered an issue processing your request. This might be due to a connection problem with the language model. Please try again."
591
- else:
592
- answer = "I apologize, but I received an empty response from the language model. Please try again."
593
-
594
- return {
595
- "thought": "LLM returned empty response",
596
- "goal": "Handle empty response error",
597
- "answer": answer,
598
- }
599
-
600
- response = response.strip()
601
-
602
- # Log what we received for debugging (show more to see full JSON)
603
- if len(response) > 500:
604
- logger.debug(
605
- f"📥 LLM Response ({len(response)} chars): {response[:500]}..."
606
- )
607
- else:
608
- logger.debug(f"📥 LLM Response: {response}")
609
-
610
- # STEP 1: Fast path - detect plain text conversational responses
611
- # If response doesn't start with '{', it's likely plain text
612
- # Accept it immediately without logging errors
613
- if not response.startswith("{"):
614
- logger.debug(
615
- f"[PARSE] Plain text conversational response (length: {len(response)})"
616
- )
617
- return {"thought": "", "goal": "", "answer": response}
618
-
619
- # STEP 2: Response starts with '{' - looks like JSON
620
- # Try direct JSON parsing first (fastest path)
621
- try:
622
- result = json.loads(response)
623
- # Ensure tool_args exists if tool is present
624
- if "tool" in result and "tool_args" not in result:
625
- result["tool_args"] = {}
626
- logger.debug("[PARSE] Valid JSON response")
627
- return result
628
- except json.JSONDecodeError:
629
- # JSON parsing failed - continue to extraction methods
630
- logger.debug("[PARSE] Malformed JSON, trying extraction")
631
-
632
- # STEP 3: Try JSON extraction methods (handles code blocks, mixed text, etc.)
633
- extracted_json = self._extract_json_from_response(response)
634
- if extracted_json:
635
- logger.debug("[PARSE] Extracted JSON successfully")
636
- return extracted_json
637
-
638
- # STEP 4: JSON was expected (starts with '{') but all parsing failed
639
- # Log error ONLY for JSON that couldn't be parsed
640
- logger.debug("Attempting to extract fields using regex")
641
- thought_match = re.search(r'"thought":\s*"([^"]*)"', response)
642
- tool_match = re.search(r'"tool":\s*"([^"]*)"', response)
643
- answer_match = re.search(r'"answer":\s*"([^"]*)"', response)
644
- plan_match = re.search(r'"plan":\s*(\[.*?\])', response, re.DOTALL)
645
-
646
- if answer_match:
647
- result = {
648
- "thought": thought_match.group(1) if thought_match else "",
649
- "goal": "what was achieved",
650
- "answer": answer_match.group(1),
651
- }
652
- logger.debug(f"Extracted answer using regex: {result}")
653
- return result
654
-
655
- if tool_match:
656
- tool_args = {}
657
-
658
- tool_args_start = response.find('"tool_args"')
659
-
660
- if tool_args_start >= 0:
661
- # Find the opening brace after "tool_args":
662
- brace_start = response.find("{", tool_args_start)
663
- if brace_start >= 0:
664
- # Use bracket-matching to find the complete object
665
- bracket_count = 0
666
- in_string = False
667
- escape_next = False
668
- for i, char in enumerate(response[brace_start:], brace_start):
669
- if escape_next:
670
- escape_next = False
671
- continue
672
- if char == "\\":
673
- escape_next = True
674
- continue
675
- if char == '"' and not escape_next:
676
- in_string = not in_string
677
- if not in_string:
678
- if char == "{":
679
- bracket_count += 1
680
- elif char == "}":
681
- bracket_count -= 1
682
- if bracket_count == 0:
683
- # Found complete tool_args object
684
- tool_args_str = response[brace_start : i + 1]
685
- try:
686
- tool_args = json.loads(tool_args_str)
687
- except json.JSONDecodeError as e:
688
- error_msg = f"Failed to parse tool_args JSON: {str(e)}, content: {tool_args_str[:100]}..."
689
- logger.error(error_msg)
690
- self.error_history.append(error_msg)
691
- break
692
-
693
- result = {
694
- "thought": thought_match.group(1) if thought_match else "",
695
- "goal": "clear statement of what you're trying to achieve",
696
- "tool": tool_match.group(1),
697
- "tool_args": tool_args,
698
- }
699
-
700
- # Add plan if found
701
- if plan_match:
702
- try:
703
- result["plan"] = json.loads(plan_match.group(1))
704
- logger.debug(f"Extracted plan using regex: {result['plan']}")
705
- except json.JSONDecodeError as e:
706
- error_msg = f"Failed to parse plan JSON: {str(e)}, content: {plan_match.group(1)[:100]}..."
707
- logger.error(error_msg)
708
- self.error_history.append(error_msg)
709
-
710
- logger.debug(f"Extracted tool call using regex: {result}")
711
- return result
712
-
713
- # Try to match simple key-value patterns for object names (like ': "my_cube"')
714
- obj_name_match = re.search(
715
- r'["\':]?\s*["\'"]?([a-zA-Z0-9_\.]+)["\'"]?', response
716
- )
717
- if obj_name_match:
718
- object_name = obj_name_match.group(1)
719
- # If it looks like an object name and not just a random word
720
- if "." in object_name or "_" in object_name:
721
- logger.debug(f"Found potential object name: {object_name}")
722
- return {
723
- "thought": "Extracted object name",
724
- "goal": "Use the object name",
725
- "answer": object_name,
726
- }
727
-
728
- # CONVERSATIONAL MODE: No JSON found - treat as plain conversational response
729
- # This is normal and expected for chat agents responding to greetings, explanations, etc.
730
- logger.debug(
731
- f"[PARSE] No JSON structure found, treating as conversational response. Length: {len(response)}, preview: {response[:100]}..."
732
- )
733
-
734
- # If response is empty, provide a meaningful fallback
735
- if not response.strip():
736
- logger.warning("[PARSE] Empty response received from LLM")
737
- return {
738
- "thought": "",
739
- "goal": "",
740
- "answer": "I apologize, but I received an empty response. Please try again.",
741
- }
742
-
743
- # Valid conversational response - wrap it in expected format
744
- return {"thought": "", "goal": "", "answer": response.strip()}
745
-
746
- def _execute_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Any:
747
- """
748
- Execute a tool by name with the provided arguments.
749
-
750
- Args:
751
- tool_name: Name of the tool to execute
752
- tool_args: Arguments to pass to the tool
753
-
754
- Returns:
755
- Result of the tool execution
756
- """
757
- logger.debug(f"Executing tool {tool_name} with args: {tool_args}")
758
-
759
- if tool_name not in _TOOL_REGISTRY:
760
- logger.error(f"Tool '{tool_name}' not found in registry")
761
- return {"status": "error", "error": f"Tool '{tool_name}' not found"}
762
-
763
- tool = _TOOL_REGISTRY[tool_name]["function"]
764
- sig = inspect.signature(tool)
765
-
766
- # Get required parameters (those without defaults)
767
- required_args = {
768
- name: param
769
- for name, param in sig.parameters.items()
770
- if param.default == inspect.Parameter.empty and name != "return"
771
- }
772
-
773
- # Check for missing required arguments
774
- missing_args = [arg for arg in required_args if arg not in tool_args]
775
- if missing_args:
776
- error_msg = (
777
- f"Missing required arguments for {tool_name}: {', '.join(missing_args)}"
778
- )
779
- logger.error(error_msg)
780
- return {"status": "error", "error": error_msg}
781
-
782
- try:
783
- result = tool(**tool_args)
784
- logger.debug(f"Tool execution result: {result}")
785
- return result
786
- except subprocess.TimeoutExpired as e:
787
- # Handle subprocess timeout specifically
788
- error_msg = f"Tool {tool_name} timed out: {str(e)}"
789
- logger.error(error_msg)
790
- self.error_history.append(error_msg)
791
- return {"status": "error", "error": error_msg, "timeout": True}
792
- except Exception as e:
793
- # Format error with full execution trace for debugging
794
- formatted_error = format_execution_trace(
795
- exception=e,
796
- query=getattr(self, "_current_query", None),
797
- plan_step=self.current_step + 1 if self.current_plan else None,
798
- total_steps=self.total_plan_steps if self.current_plan else None,
799
- tool_name=tool_name,
800
- tool_args=tool_args,
801
- )
802
- logger.error(f"Error executing tool {tool_name}: {e}")
803
- self.error_history.append(str(e)) # Store brief error, not formatted
804
-
805
- # Print to console immediately so user sees it
806
- self.console.print_error(formatted_error)
807
-
808
- return {
809
- "status": "error",
810
- "error_brief": str(e), # Brief error message for quick reference
811
- "error_displayed": True, # Flag to prevent duplicate display
812
- "tool_name": tool_name,
813
- "tool_args": tool_args,
814
- "plan_step": self.current_step + 1 if self.current_plan else None,
815
- }
816
-
817
- def _generate_max_steps_message(
818
- self, conversation: List[Dict], steps_taken: int, steps_limit: int
819
- ) -> str:
820
- """Generate informative message when max steps is reached.
821
-
822
- Args:
823
- conversation: The conversation history
824
- steps_taken: Number of steps actually taken
825
- steps_limit: Maximum steps allowed
826
-
827
- Returns:
828
- Informative message about what was accomplished
829
- """
830
- # Analyze what was done
831
- tool_calls = [
832
- msg
833
- for msg in conversation
834
- if msg.get("role") == "assistant" and "tool_calls" in msg
835
- ]
836
-
837
- tools_used = []
838
- for msg in tool_calls:
839
- for tool_call in msg.get("tool_calls", []):
840
- if "function" in tool_call:
841
- tools_used.append(tool_call["function"]["name"])
842
-
843
- message = f"⚠️ Reached maximum steps limit ({steps_limit} steps)\n\n"
844
- message += f"Completed {steps_taken} steps using these tools:\n"
845
-
846
- # Count tool usage
847
- from collections import Counter
848
-
849
- tool_counts = Counter(tools_used)
850
- for tool, count in tool_counts.most_common(10):
851
- message += f" - {tool}: {count}x\n"
852
-
853
- message += "\nTo continue or complete this task:\n"
854
- message += "1. Review the generated files and progress so far\n"
855
- message += f"2. Run with --max-steps {steps_limit + 50} to allow more steps\n"
856
- message += "3. Or complete remaining tasks manually\n"
857
-
858
- return message
859
-
860
- def _write_json_to_file(self, data: Dict[str, Any], filename: str = None) -> str:
861
- """
862
- Write JSON data to a file and return the absolute path.
863
-
864
- Args:
865
- data: Dictionary data to write as JSON
866
- filename: Optional filename, if None a timestamped name will be generated
867
-
868
- Returns:
869
- Absolute path to the saved file
870
- """
871
- # Ensure output directory exists
872
- os.makedirs(self.output_dir, exist_ok=True)
873
-
874
- # Generate filename if not provided
875
- if not filename:
876
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
877
- filename = f"agent_output_{timestamp}.json"
878
-
879
- # Ensure filename has .json extension
880
- if not filename.endswith(".json"):
881
- filename += ".json"
882
-
883
- # Create absolute path
884
- file_path = os.path.join(self.output_dir, filename)
885
-
886
- # Write JSON data to file
887
- with open(file_path, "w", encoding="utf-8") as f:
888
- json.dump(data, f, indent=2)
889
-
890
- return os.path.abspath(file_path)
891
-
892
- def _handle_large_tool_result(
893
- self,
894
- tool_name: str,
895
- tool_result: Any,
896
- conversation: List[Dict[str, Any]],
897
- tool_args: Optional[Dict[str, Any]] = None,
898
- ) -> Any:
899
- """
900
- Handle large tool results by truncating them if necessary.
901
-
902
- Args:
903
- tool_name: Name of the executed tool
904
- tool_result: The result from tool execution
905
- conversation: The conversation list to append to
906
- tool_args: Arguments passed to the tool (optional)
907
-
908
- Returns:
909
- The truncated result or original if within limits
910
- """
911
- truncated_result = tool_result
912
- if isinstance(tool_result, (dict, list)):
913
- # Use custom encoder to handle bytes and other non-serializable types
914
- result_str = json.dumps(tool_result, default=self._json_serialize_fallback)
915
- if (
916
- len(result_str) > 30000
917
- ): # Threshold for truncation (appropriate for 32K context)
918
- # Truncate large results to prevent overwhelming the LLM
919
- truncated_str = self._truncate_large_content(
920
- tool_result, max_chars=20000 # Increased for 32K context
921
- )
922
- try:
923
- truncated_result = json.loads(truncated_str)
924
- except json.JSONDecodeError:
925
- # If truncated string isn't valid JSON, use it as-is
926
- truncated_result = truncated_str
927
- # Notify user about truncation
928
- self.console.print_info(
929
- f"Note: Large result ({len(result_str)} chars) truncated for LLM context"
930
- )
931
- if self.debug:
932
- print(f"[DEBUG] Tool result truncated from {len(result_str)} chars")
933
-
934
- # Add to conversation
935
- tool_entry: Dict[str, Any] = {
936
- "role": "tool",
937
- "name": tool_name,
938
- "content": truncated_result,
939
- }
940
- if tool_args is not None:
941
- tool_entry["tool_args"] = tool_args
942
- conversation.append(tool_entry)
943
- return truncated_result
944
-
945
- def _create_tool_message(self, tool_name: str, tool_output: Any) -> Dict[str, Any]:
946
- """
947
- Build a message structure representing a tool output for downstream LLM calls.
948
- """
949
- if isinstance(tool_output, str):
950
- text_content = tool_output
951
- else:
952
- text_content = self._truncate_large_content(tool_output, max_chars=2000)
953
-
954
- if not isinstance(text_content, str):
955
- text_content = json.dumps(
956
- tool_output, default=self._json_serialize_fallback
957
- )
958
-
959
- return {
960
- "role": "tool",
961
- "name": tool_name,
962
- "tool_call_id": uuid.uuid4().hex,
963
- "content": [{"type": "text", "text": text_content}],
964
- }
965
-
966
- def _json_serialize_fallback(self, obj: Any) -> Any:
967
- """
968
- Fallback serializer for JSON encoding non-standard types.
969
-
970
- Handles bytes, datetime, and other common non-serializable types.
971
- """
972
- try:
973
- import numpy as np # Local import to avoid hard dependency at module import time
974
-
975
- if isinstance(obj, np.generic):
976
- return obj.item()
977
- if isinstance(obj, np.ndarray):
978
- return obj.tolist()
979
- except Exception:
980
- pass
981
-
982
- if isinstance(obj, bytes):
983
- # For binary data, return a placeholder (don't expose raw bytes to LLM)
984
- return f"<binary data: {len(obj)} bytes>"
985
- if hasattr(obj, "isoformat"):
986
- # Handle datetime objects
987
- return obj.isoformat()
988
- if hasattr(obj, "__dict__"):
989
- # Handle objects with __dict__
990
- return obj.__dict__
991
-
992
- for caster in (float, int, str):
993
- try:
994
- return caster(obj)
995
- except Exception:
996
- continue
997
-
998
- return "<non-serializable>"
999
-
1000
- def _truncate_large_content(self, content: Any, max_chars: int = 2000) -> str:
1001
- """
1002
- Truncate large content to prevent overwhelming the LLM.
1003
- Defaults to 20000 chars which is appropriate for 32K token context window.
1004
- """
1005
-
1006
- # If we have test_results in the output we don't want to
1007
- # truncate as this can contain important information on
1008
- # how to fix the tests
1009
- if isinstance(content, dict) and (
1010
- "test_results" in content or "run_tests" in content
1011
- ):
1012
- return json.dumps(content, default=self._json_serialize_fallback)
1013
-
1014
- # Convert to string (use compact JSON first to check size)
1015
- if isinstance(content, (dict, list)):
1016
- compact_str = json.dumps(content, default=self._json_serialize_fallback)
1017
- # Only use indented format if we need to truncate anyway
1018
- content_str = (
1019
- json.dumps(content, indent=2, default=self._json_serialize_fallback)
1020
- if len(compact_str) > max_chars
1021
- else compact_str
1022
- )
1023
- else:
1024
- content_str = str(content)
1025
-
1026
- # Return as-is if within limits
1027
- if len(content_str) <= max_chars:
1028
- return content_str
1029
-
1030
- # For responses with chunks (e.g., search results, document retrieval)
1031
- if (
1032
- isinstance(content, dict)
1033
- and "chunks" in content
1034
- and isinstance(content["chunks"], list)
1035
- ):
1036
- truncated = content.copy()
1037
-
1038
- # Keep all chunks but truncate individual chunk content if needed
1039
- if "chunks" in truncated:
1040
- for chunk in truncated["chunks"]:
1041
- if isinstance(chunk, dict) and "content" in chunk:
1042
- # Keep full content for chunks (they're the actual data)
1043
- # Only truncate if a single chunk is massive
1044
- if len(chunk["content"]) > CHUNK_TRUNCATION_THRESHOLD:
1045
- chunk["content"] = (
1046
- chunk["content"][:CHUNK_TRUNCATION_SIZE]
1047
- + "\n...[chunk truncated]...\n"
1048
- + chunk["content"][-CHUNK_TRUNCATION_SIZE:]
1049
- )
1050
-
1051
- result_str = json.dumps(
1052
- truncated, indent=2, default=self._json_serialize_fallback
1053
- )
1054
- # Use larger limit for chunked responses since chunks are the actual data
1055
- if len(result_str) <= max_chars * 3: # Allow up to 60KB for chunked data
1056
- return result_str
1057
- # If still too large, keep first 3 chunks only
1058
- truncated["chunks"] = truncated["chunks"][:3]
1059
- return json.dumps(
1060
- truncated, indent=2, default=self._json_serialize_fallback
1061
- )
1062
-
1063
- # For Jira responses, keep first 3 issues
1064
- if (
1065
- isinstance(content, dict)
1066
- and "issues" in content
1067
- and isinstance(content["issues"], list)
1068
- ):
1069
- truncated = {
1070
- **content,
1071
- "issues": content["issues"][:3],
1072
- "truncated": True,
1073
- "total": len(content["issues"]),
1074
- }
1075
- return json.dumps(
1076
- truncated, indent=2, default=self._json_serialize_fallback
1077
- )[:max_chars]
1078
-
1079
- # For lists, keep first 3 items
1080
- if isinstance(content, list):
1081
- truncated = (
1082
- content[:3] + [{"truncated": f"{len(content) - 3} more"}]
1083
- if len(content) > 3
1084
- else content
1085
- )
1086
- return json.dumps(
1087
- truncated, indent=2, default=self._json_serialize_fallback
1088
- )[:max_chars]
1089
-
1090
- # Simple truncation
1091
- half = max_chars // 2 - 20
1092
- return f"{content_str[:half]}\n...[truncated]...\n{content_str[-half:]}"
1093
-
1094
- def process_query(
1095
- self,
1096
- user_input: str,
1097
- max_steps: int = None,
1098
- trace: bool = False,
1099
- filename: str = None,
1100
- ) -> Dict[str, Any]:
1101
- """
1102
- Process a user query and execute the necessary tools.
1103
- Displays each step as it's being generated in real-time.
1104
-
1105
- Args:
1106
- user_input: User's query or request
1107
- max_steps: Maximum number of steps to take in the conversation (overrides class default if provided)
1108
- trace: If True, write detailed JSON trace to file
1109
- filename: Optional filename for trace output, if None a timestamped name will be generated
1110
-
1111
- Returns:
1112
- Dict containing the final result and operation details
1113
- """
1114
- import time
1115
-
1116
- start_time = time.time() # Track query processing start time
1117
-
1118
- # Store query for error context (used in _execute_tool for error formatting)
1119
- self._current_query = user_input
1120
-
1121
- logger.debug(f"Processing query: {user_input}")
1122
- conversation = []
1123
- # Build messages array for chat completions
1124
- messages = []
1125
-
1126
- # Prepopulate with conversation history if available (for session persistence)
1127
- if hasattr(self, "conversation_history") and self.conversation_history:
1128
- messages.extend(self.conversation_history)
1129
- logger.debug(
1130
- f"Loaded {len(self.conversation_history)} messages from conversation history"
1131
- )
1132
-
1133
- steps_taken = 0
1134
- final_answer = None
1135
- error_count = 0
1136
- last_tool_call = None # Track the last tool call to prevent loops
1137
- last_error = None # Track the last error to handle it properly
1138
- previous_outputs = [] # Track previous tool outputs
1139
-
1140
- # Reset state management
1141
- self.execution_state = self.STATE_PLANNING
1142
- self.current_plan = None
1143
- self.current_step = 0
1144
- self.total_plan_steps = 0
1145
- self.plan_iterations = 0 # Reset plan iteration counter
1146
-
1147
- # Add user query to the conversation history
1148
- conversation.append({"role": "user", "content": user_input})
1149
- messages.append({"role": "user", "content": user_input})
1150
-
1151
- # Use provided max_steps or fall back to class default
1152
- steps_limit = max_steps if max_steps is not None else self.max_steps
1153
-
1154
- # Print initial message with max steps info
1155
- self.console.print_processing_start(user_input, steps_limit)
1156
- logger.debug(f"Using max_steps: {steps_limit}")
1157
-
1158
- prompt = f"User request: {user_input}\n\n"
1159
-
1160
- # Only add planning reminder in PLANNING state
1161
- if self.execution_state == self.STATE_PLANNING:
1162
- prompt += (
1163
- "IMPORTANT: ALWAYS BEGIN WITH A PLAN before executing any tools.\n"
1164
- "First create a detailed plan with all necessary steps, then execute the first step.\n"
1165
- "When creating a plan with multiple steps:\n"
1166
- " 1. ALWAYS follow the plan in the correct order, starting with the FIRST step.\n"
1167
- " 2. Include both a plan and a 'tool' field, the 'tool' field MUST match the tool in the first step of the plan.\n"
1168
- " 3. Create plans with clear, executable steps that include both the tool name and the exact arguments for each step.\n"
1169
- )
1170
-
1171
- logger.debug(f"Input prompt: {prompt[:200]}...")
1172
-
1173
- # Process the query in steps, allowing for multiple tool usages
1174
- while steps_taken < steps_limit and final_answer is None:
1175
- # Build the next prompt based on current state (this is for fallback mode only)
1176
- # In chat mode, we'll just add to messages array
1177
- steps_taken += 1
1178
- logger.debug(f"Step {steps_taken}/{steps_limit}")
1179
-
1180
- # Check if we're at the limit and ask user if they want to continue
1181
- if steps_taken == steps_limit and final_answer is None:
1182
- # Show what was accomplished
1183
- max_steps_msg = self._generate_max_steps_message(
1184
- conversation, steps_taken, steps_limit
1185
- )
1186
- self.console.print_warning(max_steps_msg)
1187
-
1188
- # Ask user if they want to continue (skip in silent mode OR if stdin is not available)
1189
- # IMPORTANT: Never call input() in API/CI contexts to avoid blocking threads
1190
- import sys
1191
-
1192
- has_stdin = sys.stdin and sys.stdin.isatty()
1193
- if has_stdin and not (
1194
- hasattr(self, "silent_mode") and self.silent_mode
1195
- ):
1196
- try:
1197
- response = (
1198
- input("\nContinue with 50 more steps? (y/n): ")
1199
- .strip()
1200
- .lower()
1201
- )
1202
- if response in ["y", "yes"]:
1203
- steps_limit += 50
1204
- self.console.print_info(
1205
- f"✓ Continuing with {steps_limit} total steps...\n"
1206
- )
1207
- else:
1208
- self.console.print_info("Stopping at user request.")
1209
- break
1210
- except (EOFError, KeyboardInterrupt):
1211
- self.console.print_info("\nStopping at user request.")
1212
- break
1213
- else:
1214
- # Silent mode - just stop
1215
- break
1216
-
1217
- # Display current step
1218
- self.console.print_step_header(steps_taken, steps_limit)
1219
-
1220
- # Skip automatic finalization for single-step plans - always request proper final answer
1221
-
1222
- # If we're executing a plan, we might not need to query the LLM again
1223
- if (
1224
- self.execution_state == self.STATE_EXECUTING_PLAN
1225
- and self.current_step < self.total_plan_steps
1226
- ):
1227
- logger.debug(
1228
- f"Executing plan step {self.current_step + 1}/{self.total_plan_steps}"
1229
- )
1230
- self.console.print_state_info(
1231
- f"EXECUTING PLAN: Step {self.current_step + 1}/{self.total_plan_steps}"
1232
- )
1233
-
1234
- # Display the current plan with the current step highlighted
1235
- if self.current_plan:
1236
- self.console.print_plan(self.current_plan, self.current_step)
1237
-
1238
- # Extract next step from plan
1239
- next_step = self.current_plan[self.current_step]
1240
-
1241
- if (
1242
- isinstance(next_step, dict)
1243
- and "tool" in next_step
1244
- and "tool_args" in next_step
1245
- ):
1246
- # We have a properly formatted step with tool and args
1247
- tool_name = next_step["tool"]
1248
- tool_args = next_step["tool_args"]
1249
-
1250
- # Create a parsed response structure as if it came from the LLM
1251
- parsed = {
1252
- "thought": f"Executing step {self.current_step + 1} of the plan",
1253
- "goal": f"Following the plan to {user_input}",
1254
- "tool": tool_name,
1255
- "tool_args": tool_args,
1256
- }
1257
-
1258
- # Add to conversation
1259
- conversation.append({"role": "assistant", "content": parsed})
1260
-
1261
- # Display the agent's reasoning for the step
1262
- self.console.print_thought(
1263
- parsed.get("thought", "Executing plan step")
1264
- )
1265
- self.console.print_goal(parsed.get("goal", "Following the plan"))
1266
-
1267
- # Display the tool call in real-time
1268
- self.console.print_tool_usage(tool_name)
1269
-
1270
- # Start progress indicator for tool execution
1271
- self.console.start_progress(f"Executing {tool_name}")
1272
-
1273
- # Execute the tool
1274
- tool_result = self._execute_tool(tool_name, tool_args)
1275
-
1276
- # Stop progress indicator
1277
- self.console.stop_progress()
1278
-
1279
- # Handle domain-specific post-processing
1280
- self._post_process_tool_result(tool_name, tool_args, tool_result)
1281
-
1282
- # Handle large tool results
1283
- truncated_result = self._handle_large_tool_result(
1284
- tool_name, tool_result, conversation, tool_args
1285
- )
1286
-
1287
- # Display the tool result in real-time (show full result to user)
1288
- self.console.print_tool_complete()
1289
-
1290
- self.console.pretty_print_json(tool_result, "Tool Result")
1291
-
1292
- # Store the truncated output for future context
1293
- previous_outputs.append(
1294
- {
1295
- "tool": tool_name,
1296
- "args": tool_args,
1297
- "result": truncated_result,
1298
- }
1299
- )
1300
-
1301
- # Share tool output with subsequent LLM calls
1302
- messages.append(
1303
- self._create_tool_message(tool_name, truncated_result)
1304
- )
1305
-
1306
- # Check for error (support multiple error formats)
1307
- is_error = isinstance(tool_result, dict) and (
1308
- tool_result.get("status") == "error" # Standard format
1309
- or tool_result.get("success")
1310
- is False # Tools returning success: false
1311
- or tool_result.get("has_errors") is True # CLI tools
1312
- or tool_result.get("return_code", 0) != 0 # Build failures
1313
- )
1314
-
1315
- if is_error:
1316
- error_count += 1
1317
- # Extract error message from various formats
1318
- # Prefer error_brief for logging (avoids duplicate formatted output)
1319
- last_error = (
1320
- tool_result.get("error_brief")
1321
- or tool_result.get("error")
1322
- or tool_result.get("stderr")
1323
- or tool_result.get("hint") # Many tools provide hints
1324
- or tool_result.get(
1325
- "suggested_fix"
1326
- ) # Some tools provide fix suggestions
1327
- or f"Command failed with return code {tool_result.get('return_code')}"
1328
- )
1329
- logger.warning(
1330
- f"Tool execution error in plan (count: {error_count}): {last_error}"
1331
- )
1332
- # Only print if error wasn't already displayed by _execute_tool
1333
- if not tool_result.get("error_displayed"):
1334
- self.console.print_error(last_error)
1335
-
1336
- # Switch to error recovery state
1337
- self.execution_state = self.STATE_ERROR_RECOVERY
1338
- self.console.print_state_info(
1339
- "ERROR RECOVERY: Handling tool execution failure"
1340
- )
1341
-
1342
- # Break out of plan execution to trigger error recovery prompt
1343
- continue
1344
- else:
1345
- # Success - move to next step in plan
1346
- self.current_step += 1
1347
-
1348
- # Check if we've completed the plan
1349
- if self.current_step >= self.total_plan_steps:
1350
- logger.debug("Plan execution completed")
1351
- self.execution_state = self.STATE_COMPLETION
1352
- self.console.print_state_info(
1353
- "COMPLETION: Plan fully executed"
1354
- )
1355
-
1356
- # Increment plan iteration counter
1357
- self.plan_iterations += 1
1358
- logger.debug(
1359
- f"Plan iteration {self.plan_iterations} completed"
1360
- )
1361
-
1362
- # Check if we've reached max plan iterations
1363
- reached_max_iterations = (
1364
- self.max_plan_iterations > 0
1365
- and self.plan_iterations >= self.max_plan_iterations
1366
- )
1367
-
1368
- # Prepare message for final answer with the completed plan context
1369
- plan_context = {
1370
- "completed_plan": self.current_plan,
1371
- "total_steps": self.total_plan_steps,
1372
- }
1373
- plan_context_raw = json.dumps(
1374
- plan_context, default=self._json_serialize_fallback
1375
- )
1376
- if len(plan_context_raw) > 20000:
1377
- plan_context_str = self._truncate_large_content(
1378
- plan_context, max_chars=20000
1379
- )
1380
- else:
1381
- plan_context_str = plan_context_raw
1382
-
1383
- if reached_max_iterations:
1384
- # Force final answer after max iterations
1385
- completion_message = (
1386
- f"Maximum plan iterations ({self.max_plan_iterations}) reached for task: {user_input}\n"
1387
- f"Task: {user_input}\n"
1388
- f"Plan information:\n{plan_context_str}\n\n"
1389
- f"IMPORTANT: You MUST now provide a final answer with an honest assessment:\n"
1390
- f"- Summarize what was successfully accomplished\n"
1391
- f"- Clearly state if anything remains incomplete or if errors occurred\n"
1392
- f"- If the task is fully complete, state that clearly\n\n"
1393
- f'Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1394
- )
1395
- else:
1396
- completion_message = (
1397
- "You have successfully completed all steps in the plan.\n"
1398
- f"Task: {user_input}\n"
1399
- f"Plan information:\n{plan_context_str}\n\n"
1400
- f"Plan iteration: {self.plan_iterations}/{self.max_plan_iterations if self.max_plan_iterations > 0 else 'unlimited'}\n"
1401
- "Check if more work is needed:\n"
1402
- "- If the task is complete and verified, provide a final answer\n"
1403
- "- If critical validation/testing is needed, you may create ONE more plan\n"
1404
- "- Only create additional plans if absolutely necessary\n\n"
1405
- 'If more work needed: Provide a NEW plan with {{"thought": "...", "goal": "...", "plan": [...]}}\n'
1406
- 'If everything is complete: Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1407
- )
1408
-
1409
- # Debug logging - only show if truncation happened
1410
- if self.debug and len(plan_context_raw) > 2000:
1411
- print(
1412
- "\n[DEBUG] Plan context truncated for completion message"
1413
- )
1414
-
1415
- # Add completion request to messages
1416
- messages.append(
1417
- {"role": "user", "content": completion_message}
1418
- )
1419
-
1420
- # Send the completion prompt to get final answer
1421
- self.console.print_state_info(
1422
- "COMPLETION: Requesting final answer"
1423
- )
1424
-
1425
- # Continue to next iteration to get final answer
1426
- continue
1427
- else:
1428
- # Continue with next step - no need to query LLM again
1429
- continue
1430
- else:
1431
- # Plan step doesn't have proper format, fall back to LLM
1432
- logger.warning(
1433
- f"Plan step {self.current_step + 1} doesn't have proper format: {next_step}"
1434
- )
1435
- self.console.print_warning(
1436
- f"Plan step {self.current_step + 1} format incorrect, asking LLM for guidance"
1437
- )
1438
- prompt = (
1439
- f"You are following a plan but step {self.current_step + 1} doesn't have proper format: {next_step}\n"
1440
- "Please interpret this step and decide what tool to use next.\n\n"
1441
- f"Task: {user_input}\n\n"
1442
- )
1443
- else:
1444
- # Normal execution flow - query the LLM
1445
- if self.execution_state == self.STATE_DIRECT_EXECUTION:
1446
- self.console.print_state_info("DIRECT EXECUTION: Analyzing task")
1447
- elif self.execution_state == self.STATE_PLANNING:
1448
- self.console.print_state_info("PLANNING: Creating or refining plan")
1449
- elif self.execution_state == self.STATE_ERROR_RECOVERY:
1450
- self.console.print_state_info(
1451
- "ERROR RECOVERY: Handling previous error"
1452
- )
1453
-
1454
- # Truncate previous outputs if too large to avoid overwhelming the LLM
1455
- truncated_outputs = (
1456
- self._truncate_large_content(previous_outputs, max_chars=500)
1457
- if previous_outputs
1458
- else "None"
1459
- )
1460
-
1461
- # Create a specific error recovery prompt
1462
- prompt = (
1463
- "TOOL EXECUTION FAILED!\n\n"
1464
- f"You were trying to execute: {last_tool_call[0] if last_tool_call else 'unknown tool'}\n"
1465
- f"Error: {last_error}\n\n"
1466
- f"Original task: {user_input}\n\n"
1467
- f"Current plan step {self.current_step + 1}/{self.total_plan_steps} failed.\n"
1468
- f"Current plan: {self.current_plan}\n\n"
1469
- f"Previous successful outputs: {truncated_outputs}\n\n"
1470
- "INSTRUCTIONS:\n"
1471
- "1. Analyze the error and understand what went wrong\n"
1472
- "2. Create a NEW corrected plan that fixes the error\n"
1473
- "3. Make sure to use correct tool parameters (check the available tools)\n"
1474
- "4. Start executing the corrected plan\n\n"
1475
- "Respond with your analysis, a corrected plan, and the first tool to execute."
1476
- )
1477
-
1478
- # Add the error recovery prompt to the messages array so it gets sent to LLM
1479
- messages.append({"role": "user", "content": prompt})
1480
-
1481
- # Reset state to planning after creating recovery prompt
1482
- self.execution_state = self.STATE_PLANNING
1483
- self.current_plan = None
1484
- self.current_step = 0
1485
- self.total_plan_steps = 0
1486
-
1487
- elif self.execution_state == self.STATE_COMPLETION:
1488
- self.console.print_state_info("COMPLETION: Finalizing response")
1489
-
1490
- # Print the prompt if show_prompts is enabled (separate from debug_prompts)
1491
- if self.show_prompts:
1492
- # Build context from system prompt and messages
1493
- context_parts = [
1494
- (
1495
- f"SYSTEM: {self.system_prompt[:200]}..."
1496
- if len(self.system_prompt) > 200
1497
- else f"SYSTEM: {self.system_prompt}"
1498
- )
1499
- ]
1500
-
1501
- for msg in messages:
1502
- role = msg.get("role", "user").upper()
1503
- content = str(msg.get("content", ""))[:150]
1504
- context_parts.append(
1505
- f"{role}: {content}{'...' if len(str(msg.get('content', ''))) > 150 else ''}"
1506
- )
1507
-
1508
- if not messages and prompt:
1509
- context_parts.append(
1510
- f"USER: {prompt[:150]}{'...' if len(prompt) > 150 else ''}"
1511
- )
1512
-
1513
- self.console.print_prompt("\n".join(context_parts), "LLM Context")
1514
-
1515
- # Handle streaming or non-streaming LLM response
1516
- # Initialize response_stats so it's always in scope
1517
- response_stats = None
1518
-
1519
- if self.streaming:
1520
- # Streaming mode - raw response will be streamed
1521
- # (SilentConsole will suppress this, AgentConsole will show it)
1522
-
1523
- # Add prompt to conversation if debug is enabled
1524
- if self.debug_prompts:
1525
- conversation.append(
1526
- {"role": "system", "content": {"prompt": prompt}}
1527
- )
1528
- # Print the prompt if show_prompts is enabled
1529
- if self.show_prompts:
1530
- self.console.print_prompt(
1531
- prompt, f"Prompt (Step {steps_taken})"
1532
- )
1533
-
1534
- # Get streaming response from ChatSDK with proper conversation history
1535
- try:
1536
- response_stream = self.chat.send_messages_stream(
1537
- messages=messages, system_prompt=self.system_prompt
1538
- )
1539
-
1540
- # Process the streaming response chunks as they arrive
1541
- full_response = ""
1542
- for chunk_response in response_stream:
1543
- if chunk_response.is_complete:
1544
- response_stats = chunk_response.stats
1545
- else:
1546
- self.console.print_streaming_text(chunk_response.text)
1547
- full_response += chunk_response.text
1548
-
1549
- self.console.print_streaming_text("", end_of_stream=True)
1550
- response = full_response
1551
- except ConnectionError as e:
1552
- # Handle LLM server connection errors specifically
1553
- error_msg = f"LLM Server Connection Failed (streaming): {str(e)}"
1554
- logger.error(error_msg)
1555
- self.console.print_error(error_msg)
1556
-
1557
- # Add error to history
1558
- self.error_history.append(
1559
- {
1560
- "step": steps_taken,
1561
- "error": error_msg,
1562
- "type": "llm_connection_error",
1563
- }
1564
- )
1565
-
1566
- # Return error response
1567
- final_answer = (
1568
- f"Unable to complete task due to LLM server error: {str(e)}"
1569
- )
1570
- break
1571
- except Exception as e:
1572
- logger.error(f"Unexpected error during streaming: {e}")
1573
-
1574
- # Add to error history
1575
- self.error_history.append(
1576
- {
1577
- "step": steps_taken,
1578
- "error": str(e),
1579
- "type": "llm_streaming_error",
1580
- }
1581
- )
1582
-
1583
- # Return error response
1584
- final_answer = (
1585
- f"Unable to complete task due to streaming error: {str(e)}"
1586
- )
1587
- break
1588
- else:
1589
- # Use progress indicator for non-streaming mode
1590
- self.console.start_progress("Thinking")
1591
-
1592
- # Debug logging before LLM call
1593
- if self.debug:
1594
-
1595
- print(f"\n[DEBUG] About to call LLM with {len(messages)} messages")
1596
- print(
1597
- f"[DEBUG] Last message role: {messages[-1]['role'] if messages else 'No messages'}"
1598
- )
1599
- if messages and len(messages[-1].get("content", "")) < 500:
1600
- print(
1601
- f"[DEBUG] Last message content: {messages[-1]['content']}"
1602
- )
1603
- else:
1604
- print(
1605
- f"[DEBUG] Last message content length: {len(messages[-1].get('content', ''))}"
1606
- )
1607
- print(f"[DEBUG] Execution state: {self.execution_state}")
1608
- if self.execution_state == "PLANNING":
1609
- print("[DEBUG] Current step: Planning (no active plan yet)")
1610
- else:
1611
- print(
1612
- f"[DEBUG] Current step: {self.current_step}/{self.total_plan_steps}"
1613
- )
1614
-
1615
- # Get complete response from ChatSDK
1616
- try:
1617
- chat_response = self.chat.send_messages(
1618
- messages=messages, system_prompt=self.system_prompt
1619
- )
1620
- response = chat_response.text
1621
- response_stats = chat_response.stats
1622
- except ConnectionError as e:
1623
- error_msg = f"LLM Server Connection Failed: {str(e)}"
1624
- logger.error(error_msg)
1625
- self.console.print_error(error_msg)
1626
-
1627
- # Add error to history and update state
1628
- self.error_history.append(
1629
- {
1630
- "step": steps_taken,
1631
- "error": error_msg,
1632
- "type": "llm_connection_error",
1633
- }
1634
- )
1635
-
1636
- # Return error response
1637
- final_answer = (
1638
- f"Unable to complete task due to LLM server error: {str(e)}"
1639
- )
1640
- break
1641
- except Exception as e:
1642
- if self.debug:
1643
- print(f"[DEBUG] Error calling LLM: {e}")
1644
- logger.error(f"Unexpected error calling LLM: {e}")
1645
-
1646
- # Add to error history
1647
- self.error_history.append(
1648
- {"step": steps_taken, "error": str(e), "type": "llm_error"}
1649
- )
1650
-
1651
- # Return error response
1652
- final_answer = f"Unable to complete task due to error: {str(e)}"
1653
- break
1654
-
1655
- # Stop the progress indicator
1656
- self.console.stop_progress()
1657
-
1658
- # Print the LLM response to the console
1659
- logger.debug(f"LLM response: {response[:200]}...")
1660
- if self.show_prompts:
1661
- self.console.print_response(response, "LLM Response")
1662
-
1663
- # Parse the response
1664
- parsed = self._parse_llm_response(response)
1665
- logger.debug(f"Parsed response: {parsed}")
1666
- conversation.append({"role": "assistant", "content": parsed})
1667
-
1668
- # Add assistant response to messages for chat history
1669
- messages.append({"role": "assistant", "content": response})
1670
-
1671
- # Validate the response has a plan if required
1672
- self._validate_plan_required(parsed, steps_taken)
1673
-
1674
- # If the LLM needs to create a plan first, re-prompt it specifically for that
1675
- if "needs_plan" in parsed and parsed["needs_plan"]:
1676
- # Prepare a special prompt that specifically requests a plan
1677
- deferred_tool = parsed.get("deferred_tool", None)
1678
- deferred_args = parsed.get("deferred_tool_args", {})
1679
-
1680
- plan_prompt = (
1681
- "You MUST create a detailed plan first before taking any action.\n\n"
1682
- f"User request: {user_input}\n\n"
1683
- )
1684
-
1685
- if deferred_tool:
1686
- plan_prompt += (
1687
- f"You initially wanted to use the {deferred_tool} tool with these arguments:\n"
1688
- f"{json.dumps(deferred_args, indent=2, default=self._json_serialize_fallback)}\n\n"
1689
- "However, you MUST first create a plan. Please create a plan that includes this tool usage as a step.\n\n"
1690
- )
1691
-
1692
- plan_prompt += (
1693
- "Create a detailed plan with all necessary steps in JSON format, including exact tool names and arguments.\n"
1694
- "Respond with your reasoning, plan, and the first tool to use."
1695
- )
1696
-
1697
- # Store the plan prompt in conversation if debug is enabled
1698
- if self.debug_prompts:
1699
- conversation.append(
1700
- {"role": "system", "content": {"prompt": plan_prompt}}
1701
- )
1702
- if self.show_prompts:
1703
- self.console.print_prompt(plan_prompt, "Plan Request Prompt")
1704
-
1705
- # Notify the user we're asking for a plan
1706
- self.console.print_info("Requesting a detailed plan before proceeding")
1707
-
1708
- # Get the planning response
1709
- if self.streaming:
1710
- # Add prompt to conversation if debug is enabled
1711
- if self.debug_prompts:
1712
- conversation.append(
1713
- {"role": "system", "content": {"prompt": plan_prompt}}
1714
- )
1715
- # Print the prompt if show_prompts is enabled
1716
- if self.show_prompts:
1717
- self.console.print_prompt(
1718
- plan_prompt, f"Prompt (Step {steps_taken})"
1719
- )
1720
-
1721
- # Handle streaming as before
1722
- full_response = ""
1723
- # Add plan request to messages
1724
- messages.append({"role": "user", "content": plan_prompt})
1725
-
1726
- # Use ChatSDK for streaming plan response
1727
- stream_gen = self.chat.send_messages_stream(
1728
- messages=messages, system_prompt=self.system_prompt
1729
- )
1730
-
1731
- for chunk_response in stream_gen:
1732
- if not chunk_response.is_complete:
1733
- chunk = chunk_response.text
1734
- if hasattr(self.console, "print_streaming_text"):
1735
- self.console.print_streaming_text(chunk)
1736
- else:
1737
- print(chunk, end="", flush=True)
1738
- full_response += chunk
1739
-
1740
- if hasattr(self.console, "print_streaming_text"):
1741
- self.console.print_streaming_text("", end_of_stream=True)
1742
- else:
1743
- print("", flush=True)
1744
-
1745
- plan_response = full_response
1746
- else:
1747
- # Use progress indicator for non-streaming mode
1748
- self.console.start_progress("Creating plan")
1749
-
1750
- # Store the plan prompt in conversation if debug is enabled
1751
- if self.debug_prompts:
1752
- conversation.append(
1753
- {"role": "system", "content": {"prompt": plan_prompt}}
1754
- )
1755
- if self.show_prompts:
1756
- self.console.print_prompt(
1757
- plan_prompt, "Plan Request Prompt"
1758
- )
1759
-
1760
- # Add plan request to messages
1761
- messages.append({"role": "user", "content": plan_prompt})
1762
-
1763
- # Use ChatSDK for non-streaming plan response
1764
- chat_response = self.chat.send_messages(
1765
- messages=messages, system_prompt=self.system_prompt
1766
- )
1767
- plan_response = chat_response.text
1768
- self.console.stop_progress()
1769
-
1770
- # Parse the plan response
1771
- parsed_plan = self._parse_llm_response(plan_response)
1772
- logger.debug(f"Parsed plan response: {parsed_plan}")
1773
- conversation.append({"role": "assistant", "content": parsed_plan})
1774
-
1775
- # Add plan response to messages for chat history
1776
- messages.append({"role": "assistant", "content": plan_response})
1777
-
1778
- # Display the agent's reasoning for the plan
1779
- self.console.print_thought(parsed_plan.get("thought", "Creating plan"))
1780
- self.console.print_goal(parsed_plan.get("goal", "Planning for task"))
1781
-
1782
- # Set the parsed response to the new plan for further processing
1783
- parsed = parsed_plan
1784
- else:
1785
- # Display the agent's reasoning in real-time (only if provided)
1786
- # Skip if we just displayed thought/goal for a plan request above
1787
- thought = parsed.get("thought", "").strip()
1788
- goal = parsed.get("goal", "").strip()
1789
-
1790
- if thought and thought != "No explicit reasoning provided":
1791
- self.console.print_thought(thought)
1792
-
1793
- if goal and goal != "No explicit goal provided":
1794
- self.console.print_goal(goal)
1795
-
1796
- # Process plan if available
1797
- if "plan" in parsed:
1798
- # Validate that plan is actually a list, not a string or other type
1799
- if not isinstance(parsed["plan"], list):
1800
- logger.error(
1801
- f"Invalid plan format: expected list, got {type(parsed['plan']).__name__}. "
1802
- f"Plan content: {parsed['plan']}"
1803
- )
1804
- self.console.print_error(
1805
- f"LLM returned invalid plan format (expected array, got {type(parsed['plan']).__name__}). "
1806
- "Asking for correction..."
1807
- )
1808
-
1809
- # Create error recovery prompt
1810
- error_msg = (
1811
- "ERROR: You provided a plan in the wrong format.\n"
1812
- "Expected: an array of step objects\n"
1813
- f"You provided: {type(parsed['plan']).__name__}\n\n"
1814
- "The correct format is:\n"
1815
- f'{{"plan": [{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}]}}\n\n'
1816
- f"Please create a proper plan as an array of step objects for: {user_input}"
1817
- )
1818
- messages.append({"role": "user", "content": error_msg})
1819
-
1820
- # Continue to next iteration to get corrected plan
1821
- continue
1822
-
1823
- # Validate that plan items are dictionaries with required fields
1824
- invalid_steps = []
1825
- for i, step in enumerate(parsed["plan"]):
1826
- if not isinstance(step, dict):
1827
- invalid_steps.append((i, type(step).__name__, step))
1828
- elif "tool" not in step or "tool_args" not in step:
1829
- invalid_steps.append((i, "missing fields", step))
1830
-
1831
- if invalid_steps:
1832
- logger.error(f"Invalid plan steps found: {invalid_steps}")
1833
- self.console.print_error(
1834
- f"Plan contains {len(invalid_steps)} invalid step(s). Asking for correction..."
1835
- )
1836
-
1837
- # Create detailed error message
1838
- error_details = "\n".join(
1839
- [
1840
- f"Step {i+1}: {issue} - {step}"
1841
- for i, issue, step in invalid_steps[
1842
- :3
1843
- ] # Show first 3 errors
1844
- ]
1845
- )
1846
-
1847
- error_msg = (
1848
- f"ERROR: Your plan contains invalid steps:\n{error_details}\n\n"
1849
- f"Each step must be a dictionary with 'tool' and 'tool_args' fields:\n"
1850
- f'{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}\n\n'
1851
- f"Please create a corrected plan for: {user_input}"
1852
- )
1853
- messages.append({"role": "user", "content": error_msg})
1854
-
1855
- # Continue to next iteration to get corrected plan
1856
- continue
1857
-
1858
- # Plan is valid - proceed with execution
1859
- self.current_plan = parsed["plan"]
1860
- self.current_step = 0
1861
- self.total_plan_steps = len(self.current_plan)
1862
- self.execution_state = self.STATE_EXECUTING_PLAN
1863
- logger.debug(
1864
- f"New plan created with {self.total_plan_steps} steps: {self.current_plan}"
1865
- )
1866
-
1867
- # If the response contains a tool call, execute it
1868
- if "tool" in parsed and "tool_args" in parsed:
1869
-
1870
- # Display the current plan with the current step highlighted
1871
- if self.current_plan:
1872
- self.console.print_plan(self.current_plan, self.current_step)
1873
-
1874
- # When both plan and tool are present, prioritize the plan execution
1875
- # If we have a plan, we should execute from the plan, not the standalone tool call
1876
- if "plan" in parsed and self.current_plan and self.total_plan_steps > 0:
1877
- # Skip the standalone tool execution and let the plan execution handle it
1878
- # The plan execution logic will handle this in the next iteration
1879
- logger.debug(
1880
- "Plan and tool both present - deferring to plan execution logic"
1881
- )
1882
- continue # Skip tool execution, let plan execution handle it
1883
-
1884
- # If this was a single-step plan, mark as completed after tool execution
1885
- if self.total_plan_steps == 1:
1886
- logger.debug(
1887
- "Single-step plan will be marked completed after tool execution"
1888
- )
1889
- self.execution_state = self.STATE_COMPLETION
1890
-
1891
- tool_name = parsed["tool"]
1892
- tool_args = parsed["tool_args"]
1893
- logger.debug(f"Tool call detected: {tool_name} with args {tool_args}")
1894
-
1895
- # Display the tool call in real-time
1896
- self.console.print_tool_usage(tool_name)
1897
-
1898
- if tool_args:
1899
- self.console.pretty_print_json(tool_args, "Arguments")
1900
-
1901
- # Start progress indicator for tool execution
1902
- self.console.start_progress(f"Executing {tool_name}")
1903
-
1904
- # Check for repeated tool calls
1905
- if last_tool_call == (tool_name, str(tool_args)):
1906
- # Stop progress indicator
1907
- self.console.stop_progress()
1908
-
1909
- logger.warning(f"Detected repeated tool call: {tool_name}")
1910
- # Force a final answer if the same tool is called repeatedly
1911
- final_answer = (
1912
- f"Task completed with {tool_name}. No further action needed."
1913
- )
1914
-
1915
- self.console.print_repeated_tool_warning()
1916
- break
1917
-
1918
- # Execute the tool
1919
- tool_result = self._execute_tool(tool_name, tool_args)
1920
-
1921
- # Stop progress indicator
1922
- self.console.stop_progress()
1923
-
1924
- # Handle domain-specific post-processing
1925
- self._post_process_tool_result(tool_name, tool_args, tool_result)
1926
-
1927
- # Handle large tool results
1928
- truncated_result = self._handle_large_tool_result(
1929
- tool_name, tool_result, conversation, tool_args
1930
- )
1931
-
1932
- # Display the tool result in real-time (show full result to user)
1933
- self.console.print_tool_complete()
1934
-
1935
- self.console.pretty_print_json(tool_result, "Result")
1936
-
1937
- # Store the truncated output for future context
1938
- previous_outputs.append(
1939
- {"tool": tool_name, "args": tool_args, "result": truncated_result}
1940
- )
1941
-
1942
- # Share tool output with subsequent LLM calls
1943
- messages.append(self._create_tool_message(tool_name, truncated_result))
1944
-
1945
- # Update last tool call
1946
- last_tool_call = (tool_name, str(tool_args))
1947
-
1948
- # For single-step plans, we still need to let the LLM process the result
1949
- # This is especially important for RAG queries where the LLM needs to
1950
- # synthesize the retrieved information into a coherent answer
1951
- if (
1952
- self.execution_state == self.STATE_COMPLETION
1953
- and self.total_plan_steps == 1
1954
- ):
1955
- logger.debug(
1956
- "Single-step plan execution completed, requesting final answer from LLM"
1957
- )
1958
- # Don't break here - let the loop continue so the LLM can process the tool result
1959
- # The tool result has already been added to messages, so the next iteration
1960
- # will call the LLM with that result
1961
-
1962
- # Check if tool execution resulted in an error (support multiple error formats)
1963
- is_error = isinstance(tool_result, dict) and (
1964
- tool_result.get("status") == "error"
1965
- or tool_result.get("success") is False
1966
- or tool_result.get("has_errors") is True
1967
- or tool_result.get("return_code", 0) != 0
1968
- )
1969
- if is_error:
1970
- error_count += 1
1971
- # Prefer error_brief for logging (avoids duplicate formatted output)
1972
- last_error = (
1973
- tool_result.get("error_brief")
1974
- or tool_result.get("error")
1975
- or tool_result.get("stderr")
1976
- or tool_result.get("hint")
1977
- or tool_result.get("suggested_fix")
1978
- or f"Command failed with return code {tool_result.get('return_code')}"
1979
- )
1980
- logger.warning(
1981
- f"Tool execution error in plan (count: {error_count}): {last_error}"
1982
- )
1983
- # Only print if error wasn't already displayed by _execute_tool
1984
- if not tool_result.get("error_displayed"):
1985
- self.console.print_error(last_error)
1986
-
1987
- # Switch to error recovery state
1988
- self.execution_state = self.STATE_ERROR_RECOVERY
1989
- self.console.print_state_info(
1990
- "ERROR RECOVERY: Handling tool execution failure"
1991
- )
1992
-
1993
- # Break out of tool execution to trigger error recovery prompt
1994
- continue
1995
-
1996
- # Collect and store performance stats for token tracking
1997
- # Do this BEFORE checking for final answer so stats are always collected
1998
- perf_stats = response_stats or self.chat.get_stats()
1999
- if perf_stats:
2000
- conversation.append(
2001
- {
2002
- "role": "system",
2003
- "content": {
2004
- "type": "stats",
2005
- "step": steps_taken,
2006
- "performance_stats": perf_stats,
2007
- },
2008
- }
2009
- )
2010
-
2011
- # Check for final answer (after collecting stats)
2012
- if "answer" in parsed:
2013
- final_answer = parsed["answer"]
2014
- self.execution_state = self.STATE_COMPLETION
2015
- self.console.print_final_answer(final_answer, streaming=self.streaming)
2016
- break
2017
-
2018
- # Validate plan required
2019
- self._validate_plan_required(parsed, steps_taken)
2020
-
2021
- # Print completion message
2022
- self.console.print_completion(steps_taken, steps_limit)
2023
-
2024
- # Calculate total duration
2025
- total_duration = time.time() - start_time
2026
-
2027
- # Aggregate token counts from conversation stats
2028
- total_input_tokens = 0
2029
- total_output_tokens = 0
2030
- for entry in conversation:
2031
- if entry.get("role") == "system" and isinstance(entry.get("content"), dict):
2032
- content = entry["content"]
2033
- if content.get("type") == "stats" and "performance_stats" in content:
2034
- stats = content["performance_stats"]
2035
- if stats.get("input_tokens") is not None:
2036
- total_input_tokens += stats["input_tokens"]
2037
- if stats.get("output_tokens") is not None:
2038
- total_output_tokens += stats["output_tokens"]
2039
-
2040
- # Return the result
2041
- has_errors = len(self.error_history) > 0
2042
- has_valid_answer = (
2043
- final_answer and final_answer.strip()
2044
- ) # Check for non-empty answer
2045
- result = {
2046
- "status": (
2047
- "success"
2048
- if has_valid_answer and not has_errors
2049
- else ("failed" if has_errors else "incomplete")
2050
- ),
2051
- "result": (
2052
- final_answer
2053
- if final_answer
2054
- else self._generate_max_steps_message(
2055
- conversation, steps_taken, steps_limit
2056
- )
2057
- ),
2058
- "system_prompt": self.system_prompt, # Include system prompt in the result
2059
- "conversation": conversation,
2060
- "steps_taken": steps_taken,
2061
- "duration": total_duration, # Total query processing time in seconds
2062
- "input_tokens": total_input_tokens, # Total input tokens across all steps
2063
- "output_tokens": total_output_tokens, # Total output tokens across all steps
2064
- "total_tokens": total_input_tokens
2065
- + total_output_tokens, # Combined token count
2066
- "error_count": len(self.error_history),
2067
- "error_history": self.error_history, # Include the full error history
2068
- }
2069
-
2070
- # Write trace to file if requested
2071
- if trace:
2072
- file_path = self._write_json_to_file(result, filename)
2073
- result["output_file"] = file_path
2074
-
2075
- logger.debug(f"Query processing complete: {result}")
2076
-
2077
- # Store the result internally
2078
- self.last_result = result
2079
-
2080
- return result
2081
-
2082
- def _post_process_tool_result(
2083
- self, _tool_name: str, _tool_args: Dict[str, Any], _tool_result: Dict[str, Any]
2084
- ) -> None:
2085
- """
2086
- Post-process the tool result for domain-specific handling.
2087
- Override this in subclasses to provide domain-specific behavior.
2088
-
2089
- Args:
2090
- _tool_name: Name of the tool that was executed
2091
- _tool_args: Arguments that were passed to the tool
2092
- _tool_result: Result returned by the tool
2093
- """
2094
- ...
2095
-
2096
- def display_result(
2097
- self,
2098
- title: str = "Result",
2099
- result: Dict[str, Any] = None,
2100
- print_result: bool = False,
2101
- ) -> None:
2102
- """
2103
- Display the result and output file path information.
2104
-
2105
- Args:
2106
- title: Optional title for the result panel
2107
- result: Optional result dictionary to display. If None, uses the last stored result.
2108
- print_result: If True, print the result to the console
2109
- """
2110
- # Use the provided result or fall back to the last stored result
2111
- display_result = result if result is not None else self.last_result
2112
-
2113
- if display_result is None:
2114
- self.console.print_warning("No result available to display.")
2115
- return
2116
-
2117
- # Print the full result with syntax highlighting
2118
- if print_result:
2119
- self.console.pretty_print_json(display_result, title)
2120
-
2121
- # If there's an output file, display its path after the result
2122
- if "output_file" in display_result:
2123
- self.console.print_info(
2124
- f"Output written to: {display_result['output_file']}"
2125
- )
2126
-
2127
- def get_error_history(self) -> List[str]:
2128
- """
2129
- Get the history of errors encountered by the agent.
2130
-
2131
- Returns:
2132
- List of error messages
2133
- """
2134
- return self.error_history
2135
-
2136
- def _validate_plan_required(self, parsed: Dict[str, Any], step: int) -> None:
2137
- """
2138
- Validate that the response includes a plan when required by the agent.
2139
-
2140
- Args:
2141
- parsed: The parsed response from the LLM
2142
- step: The current step number
2143
- """
2144
- # Skip validation if we're not in planning mode or if we're already executing a plan
2145
- if self.execution_state != self.STATE_PLANNING or self.current_plan is not None:
2146
- return
2147
-
2148
- # Allow simple single-tool operations without requiring a plan
2149
- if "tool" in parsed and step == 1:
2150
- tool_name = parsed.get("tool", "")
2151
- # List of tools that can execute directly without a plan
2152
- simple_tools = self.SIMPLE_TOOLS
2153
- if tool_name in simple_tools:
2154
- logger.debug(f"Allowing direct execution of simple tool: {tool_name}")
2155
- return
2156
-
2157
- # Check if plan is missing on the first step
2158
- # BUT: Allow direct answers without plans (for simple conversational queries)
2159
- if "plan" not in parsed and "answer" not in parsed and step == 1:
2160
- warning_msg = f"No plan found in step {step} response. The agent should create a plan for all tasks."
2161
- logger.warning(warning_msg)
2162
- self.console.print_warning(warning_msg)
2163
-
2164
- # For the first step, we'll add a flag to indicate we need to re-prompt for a plan
2165
- parsed["needs_plan"] = True
2166
-
2167
- # If there's a tool in the response, store it but don't execute it yet
2168
- if "tool" in parsed:
2169
- parsed["deferred_tool"] = parsed["tool"]
2170
- parsed["deferred_tool_args"] = parsed.get("tool_args", {})
2171
- # Remove the tool so it won't be executed
2172
- del parsed["tool"]
2173
- if "tool_args" in parsed:
2174
- del parsed["tool_args"]
2175
-
2176
- # Set state to indicate we need planning
2177
- self.execution_state = self.STATE_PLANNING
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ Generic Agent class for building domain-specific agents.
5
+ """
6
+
7
+ # Standard library imports
8
+ import abc
9
+ import datetime
10
+ import inspect
11
+ import json
12
+ import logging
13
+ import os
14
+ import re
15
+ import subprocess
16
+ import uuid
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from gaia.agents.base.console import AgentConsole, SilentConsole
20
+ from gaia.agents.base.errors import format_execution_trace
21
+ from gaia.agents.base.tools import _TOOL_REGISTRY
22
+
23
+ # First-party imports
24
+ from gaia.chat.sdk import ChatConfig, ChatSDK
25
+
26
+ # Set up logging
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Content truncation thresholds
31
+ CHUNK_TRUNCATION_THRESHOLD = 5000
32
+ CHUNK_TRUNCATION_SIZE = 2500
33
+
34
+
35
+ class Agent(abc.ABC):
36
+ """
37
+ Base Agent class that provides core functionality for domain-specific agents.
38
+
39
+ The Agent class handles the core conversation loop, tool execution, and LLM
40
+ interaction patterns. It provides:
41
+ - Conversation management with an LLM
42
+ - Tool registration and execution framework
43
+ - JSON response parsing and validation
44
+ - Error handling and recovery
45
+ - State management for multi-step plans
46
+ - Output formatting and file writing
47
+ - Configurable prompt display for debugging
48
+
49
+ Key Parameters:
50
+ debug: Enable general debug output and logging
51
+ show_prompts: Display prompts sent to LLM (useful for debugging prompts)
52
+ debug_prompts: Include prompts in conversation history for analysis
53
+ streaming: Enable real-time streaming of LLM responses
54
+ silent_mode: Suppress all console output for JSON-only usage
55
+ """
56
+
57
+ # Define state constants
58
+ STATE_PLANNING = "PLANNING"
59
+ STATE_EXECUTING_PLAN = "EXECUTING_PLAN"
60
+ STATE_DIRECT_EXECUTION = "DIRECT_EXECUTION"
61
+ STATE_ERROR_RECOVERY = "ERROR_RECOVERY"
62
+ STATE_COMPLETION = "COMPLETION"
63
+
64
+ # Define tools that can execute directly without requiring a plan
65
+ # Subclasses can override this to specify domain-specific simple tools
66
+ SIMPLE_TOOLS = []
67
+
68
+ def __init__(
69
+ self,
70
+ use_claude: bool = False,
71
+ use_chatgpt: bool = False,
72
+ claude_model: str = "claude-sonnet-4-20250514",
73
+ base_url: Optional[str] = None,
74
+ model_id: str = None,
75
+ max_steps: int = 5,
76
+ debug_prompts: bool = False,
77
+ show_prompts: bool = False,
78
+ output_dir: str = None,
79
+ streaming: bool = False,
80
+ show_stats: bool = False,
81
+ silent_mode: bool = False,
82
+ debug: bool = False,
83
+ output_handler=None,
84
+ max_plan_iterations: int = 3,
85
+ min_context_size: int = 32768,
86
+ skip_lemonade: bool = False,
87
+ ):
88
+ """
89
+ Initialize the Agent with LLM client.
90
+
91
+ Args:
92
+ use_claude: If True, uses Claude API (default: False)
93
+ use_chatgpt: If True, uses ChatGPT/OpenAI API (default: False)
94
+ claude_model: Claude model to use when use_claude=True (default: "claude-sonnet-4-20250514")
95
+ base_url: Base URL for local LLM server (default: reads from LEMONADE_BASE_URL env var, falls back to http://localhost:8000/api/v1)
96
+ model_id: The ID of the model to use with LLM server (default for local)
97
+ max_steps: Maximum number of steps the agent can take before terminating
98
+ debug_prompts: If True, includes prompts in the conversation history
99
+ show_prompts: If True, displays prompts sent to LLM in console (default: False)
100
+ output_dir: Directory for storing JSON output files (default: current directory)
101
+ streaming: If True, enables real-time streaming of LLM responses (default: False)
102
+ show_stats: If True, displays LLM performance stats after each response (default: False)
103
+ silent_mode: If True, suppresses all console output for JSON-only usage (default: False)
104
+ debug: If True, enables debug output for troubleshooting (default: False)
105
+ output_handler: Custom OutputHandler for displaying agent output (default: None, creates console based on silent_mode)
106
+ max_plan_iterations: Maximum number of plan-execute-replan cycles (default: 3, 0 = unlimited)
107
+ min_context_size: Minimum context size required for this agent (default: 32768).
108
+ skip_lemonade: If True, skip Lemonade server initialization (default: False).
109
+ Use this when connecting to a different OpenAI-compatible backend.
110
+
111
+ Note: Uses local LLM server by default unless use_claude or use_chatgpt is True.
112
+ """
113
+ self.error_history = [] # Store error history for learning
114
+ self.conversation_history = (
115
+ []
116
+ ) # Store conversation history for session persistence
117
+ self.max_steps = max_steps
118
+ self.debug_prompts = debug_prompts
119
+ self.show_prompts = show_prompts # Separate flag for displaying prompts
120
+ self.output_dir = output_dir if output_dir else os.getcwd()
121
+ self.streaming = streaming
122
+ self.show_stats = show_stats
123
+ self.silent_mode = silent_mode
124
+ self.debug = debug
125
+ self.last_result = None # Store the most recent result
126
+ self.max_plan_iterations = max_plan_iterations
127
+ self._current_query: Optional[str] = (
128
+ None # Store current query for error context
129
+ )
130
+
131
+ # Read base_url from environment if not provided
132
+ if base_url is None:
133
+ base_url = os.getenv("LEMONADE_BASE_URL", "http://localhost:8000/api/v1")
134
+
135
+ # Lazy Lemonade initialization for local LLM users
136
+ # This ensures Lemonade server is running before we try to use it
137
+ if not (use_claude or use_chatgpt or skip_lemonade):
138
+ from gaia.llm.lemonade_manager import LemonadeManager
139
+
140
+ LemonadeManager.ensure_ready(
141
+ min_context_size=min_context_size,
142
+ quiet=silent_mode,
143
+ base_url=base_url,
144
+ )
145
+
146
+ # Initialize state management
147
+ self.execution_state = self.STATE_PLANNING
148
+ self.current_plan = None
149
+ self.current_step = 0
150
+ self.total_plan_steps = 0
151
+ self.plan_iterations = 0 # Track number of plan cycles
152
+
153
+ # Initialize the console/output handler for display
154
+ # If output_handler is provided, use it; otherwise create based on silent_mode
155
+ if output_handler is not None:
156
+ self.console = output_handler
157
+ else:
158
+ self.console = self._create_console()
159
+
160
+ # Initialize LLM client for local model
161
+ self.system_prompt = self._get_system_prompt()
162
+
163
+ # Register tools for this agent
164
+ self._register_tools()
165
+
166
+ # Update system prompt with available tools and response format
167
+ tools_description = self._format_tools_for_prompt()
168
+ self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n"
169
+
170
+ # Add JSON response format instructions (shared across all agents)
171
+ self.system_prompt += """
172
+ ==== RESPONSE FORMAT ====
173
+ You must respond ONLY in valid JSON. No text before { or after }.
174
+
175
+ **To call a tool:**
176
+ {"thought": "reasoning", "goal": "objective", "tool": "tool_name", "tool_args": {"arg1": "value1"}}
177
+
178
+ **To create a multi-step plan:**
179
+ {
180
+ "thought": "reasoning",
181
+ "goal": "objective",
182
+ "plan": [
183
+ {"tool": "tool1", "tool_args": {"arg": "val"}},
184
+ {"tool": "tool2", "tool_args": {"arg": "val"}}
185
+ ],
186
+ "tool": "tool1",
187
+ "tool_args": {"arg": "val"}
188
+ }
189
+
190
+ **To provide a final answer:**
191
+ {"thought": "reasoning", "goal": "achieved", "answer": "response to user"}
192
+
193
+ **RULES:**
194
+ 1. ALWAYS use tools for real data - NEVER hallucinate
195
+ 2. Plan steps MUST be objects like {"tool": "x", "tool_args": {}}, NOT strings
196
+ 3. After tool results, provide an "answer" summarizing them
197
+ """
198
+
199
+ # Initialize ChatSDK with proper configuration
200
+ # Note: We don't set system_prompt in config, we pass it per request
201
+ # Note: Context size is configured when starting Lemonade server, not here
202
+ # Use Qwen3-Coder-30B by default for better reasoning and JSON formatting
203
+ # The 0.5B model is too small for complex agent tasks
204
+ chat_config = ChatConfig(
205
+ model=model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF",
206
+ use_claude=use_claude,
207
+ use_chatgpt=use_chatgpt,
208
+ claude_model=claude_model,
209
+ base_url=base_url,
210
+ show_stats=True, # Always collect stats for token tracking
211
+ max_history_length=20, # Keep more history for agent conversations
212
+ max_tokens=4096, # Increased for complex code generation
213
+ )
214
+ self.chat = ChatSDK(chat_config)
215
+ self.model_id = model_id
216
+
217
+ # Print system prompt if show_prompts is enabled
218
+ # Debug: Check the actual value of show_prompts
219
+ if self.debug:
220
+ logger.debug(
221
+ f"show_prompts={self.show_prompts}, debug={self.debug}, will show prompt: {self.show_prompts}"
222
+ )
223
+
224
+ if self.show_prompts:
225
+ self.console.print_prompt(self.system_prompt, "Initial System Prompt")
226
+
227
+ @abc.abstractmethod
228
+ def _get_system_prompt(self) -> str:
229
+ """
230
+ Generate the system prompt for the agent.
231
+ Subclasses must implement this to provide domain-specific prompts.
232
+ """
233
+ raise NotImplementedError("Subclasses must implement _get_system_prompt")
234
+
235
+ def _create_console(self):
236
+ """
237
+ Create and return a console output handler.
238
+ Returns SilentConsole if in silent_mode, otherwise AgentConsole.
239
+ Subclasses can override this to provide domain-specific console output.
240
+ """
241
+ if self.silent_mode:
242
+ # Check if we should completely silence everything (including final answer)
243
+ # This would be true for JSON-only output or when output_dir is set
244
+ silence_final_answer = getattr(self, "output_dir", None) is not None
245
+ return SilentConsole(silence_final_answer=silence_final_answer)
246
+ return AgentConsole()
247
+
248
+ @abc.abstractmethod
249
+ def _register_tools(self):
250
+ """
251
+ Register all domain-specific tools for the agent.
252
+ Subclasses must implement this method.
253
+ """
254
+ raise NotImplementedError("Subclasses must implement _register_tools")
255
+
256
+ def _format_tools_for_prompt(self) -> str:
257
+ """Format the registered tools into a string for the prompt."""
258
+ tool_descriptions = []
259
+
260
+ for name, tool_info in _TOOL_REGISTRY.items():
261
+ params_str = ", ".join(
262
+ [
263
+ f"{param_name}{'' if param_info['required'] else '?'}: {param_info['type']}"
264
+ for param_name, param_info in tool_info["parameters"].items()
265
+ ]
266
+ )
267
+
268
+ description = tool_info["description"].strip()
269
+ tool_descriptions.append(f"- {name}({params_str}): {description}")
270
+
271
+ return "\n".join(tool_descriptions)
272
+
273
+ def list_tools(self, verbose: bool = True) -> None:
274
+ """
275
+ Display all tools registered for this agent with their parameters and descriptions.
276
+
277
+ Args:
278
+ verbose: If True, displays full descriptions and parameter details. If False, shows a compact list.
279
+ """
280
+ self.console.print_header(f"🛠️ Registered Tools for {self.__class__.__name__}")
281
+ self.console.print_separator()
282
+
283
+ for name, tool_info in _TOOL_REGISTRY.items():
284
+ # Format parameters
285
+ params = []
286
+ for param_name, param_info in tool_info["parameters"].items():
287
+ required = param_info.get("required", False)
288
+ param_type = param_info.get("type", "Any")
289
+ default = param_info.get("default", None)
290
+
291
+ if required:
292
+ params.append(f"{param_name}: {param_type}")
293
+ else:
294
+ default_str = f"={default}" if default is not None else "=None"
295
+ params.append(f"{param_name}: {param_type}{default_str}")
296
+
297
+ params_str = ", ".join(params)
298
+
299
+ # Get description
300
+ if verbose:
301
+ description = tool_info["description"]
302
+ else:
303
+ description = (
304
+ tool_info["description"].split("\n")[0]
305
+ if tool_info["description"]
306
+ else "No description"
307
+ )
308
+
309
+ # Print tool information
310
+ self.console.print_tool_info(name, params_str, description)
311
+
312
+ self.console.print_separator()
313
+
314
+ return None
315
+
316
+ def _extract_json_from_response(self, response: str) -> Optional[Dict[str, Any]]:
317
+ """
318
+ Apply multiple extraction strategies to find valid JSON in the response.
319
+
320
+ Args:
321
+ response: The raw response from the LLM
322
+
323
+ Returns:
324
+ Extracted JSON dictionary or None if extraction failed
325
+ """
326
+ # Strategy 1: Extract JSON from code blocks with various patterns
327
+ json_patterns = [
328
+ r"```(?:json)?\s*(.*?)\s*```", # Standard code block
329
+ r"`json\s*(.*?)\s*`", # Single backtick with json tag
330
+ r"<json>\s*(.*?)\s*</json>", # XML-style tags
331
+ ]
332
+
333
+ for pattern in json_patterns:
334
+ matches = re.findall(pattern, response, re.DOTALL)
335
+ for match in matches:
336
+ try:
337
+ result = json.loads(match)
338
+ # Ensure tool_args exists if tool is present
339
+ if "tool" in result and "tool_args" not in result:
340
+ result["tool_args"] = {}
341
+ logger.debug(f"Successfully extracted JSON with pattern {pattern}")
342
+ return result
343
+ except json.JSONDecodeError:
344
+ continue
345
+
346
+ start_idx = response.find("{")
347
+ if start_idx >= 0:
348
+ bracket_count = 0
349
+ in_string = False
350
+ escape_next = False
351
+
352
+ for i, char in enumerate(response[start_idx:], start_idx):
353
+ if escape_next:
354
+ escape_next = False
355
+ continue
356
+ if char == "\\":
357
+ escape_next = True
358
+ continue
359
+ if char == '"' and not escape_next:
360
+ in_string = not in_string
361
+ if not in_string:
362
+ if char == "{":
363
+ bracket_count += 1
364
+ elif char == "}":
365
+ bracket_count -= 1
366
+ if bracket_count == 0:
367
+ # Found complete JSON object
368
+ try:
369
+ extracted = response[start_idx : i + 1]
370
+ # Fix common issues before parsing
371
+ fixed = re.sub(r",\s*}", "}", extracted)
372
+ fixed = re.sub(r",\s*]", "]", fixed)
373
+ result = json.loads(fixed)
374
+ # Ensure tool_args exists if tool is present
375
+ if "tool" in result and "tool_args" not in result:
376
+ result["tool_args"] = {}
377
+ logger.debug(
378
+ "Successfully extracted JSON using bracket-matching"
379
+ )
380
+ return result
381
+ except json.JSONDecodeError as e:
382
+ logger.debug(f"Bracket-matched JSON parse failed: {e}")
383
+ break
384
+
385
+ return None
386
+
387
+ def validate_json_response(self, response_text: str) -> Dict[str, Any]:
388
+ """
389
+ Validates and attempts to fix JSON responses from the LLM.
390
+
391
+ Attempts the following fixes in order:
392
+ 1. Parse as-is if valid JSON
393
+ 2. Extract JSON from code blocks
394
+ 3. Truncate after first complete JSON object
395
+ 4. Fix common JSON syntax errors
396
+ 5. Extract JSON-like content using regex
397
+
398
+ Args:
399
+ response_text: The response string from the LLM
400
+
401
+ Returns:
402
+ A dictionary containing the parsed JSON if valid
403
+
404
+ Raises:
405
+ ValueError: If the response cannot be parsed as JSON or is missing required fields
406
+ """
407
+ original_response = response_text
408
+ json_was_modified = False
409
+
410
+ # Step 0: Sanitize control characters to ensure proper JSON format
411
+ def sanitize_json_string(text: str) -> str:
412
+ """
413
+ Ensure JSON strings have properly escaped control characters.
414
+
415
+ Args:
416
+ text: JSON text that may contain unescaped control characters
417
+
418
+ Returns:
419
+ Sanitized JSON text with properly escaped control characters
420
+ """
421
+
422
+ def escape_string_content(match):
423
+ """Ensure control characters are properly escaped in JSON string values."""
424
+ quote = match.group(1)
425
+ content = match.group(2)
426
+ closing_quote = match.group(3)
427
+
428
+ # Ensure proper escaping of control characters
429
+ content = content.replace("\n", "\\n")
430
+ content = content.replace("\r", "\\r")
431
+ content = content.replace("\t", "\\t")
432
+ content = content.replace("\b", "\\b")
433
+ content = content.replace("\f", "\\f")
434
+
435
+ return f"{quote}{content}{closing_quote}"
436
+
437
+ # Match JSON strings: "..." handling escaped quotes
438
+ pattern = r'(")([^"\\]*(?:\\.[^"\\]*)*)(")'
439
+
440
+ try:
441
+ return re.sub(pattern, escape_string_content, text)
442
+ except Exception as e:
443
+ logger.debug(
444
+ f"[JSON] String sanitization encountered issue: {e}, using original"
445
+ )
446
+ return text
447
+
448
+ response_text = sanitize_json_string(response_text)
449
+
450
+ # Step 1: Try to parse as-is
451
+ try:
452
+ json_response = json.loads(response_text)
453
+ logger.debug("[JSON] Successfully parsed response without modifications")
454
+ except json.JSONDecodeError as initial_error:
455
+ # Step 2: Try to extract from code blocks
456
+ json_match = re.search(
457
+ r"```(?:json)?\s*({.*?})\s*```", response_text, re.DOTALL
458
+ )
459
+ if json_match:
460
+ try:
461
+ response_text = json_match.group(1)
462
+ json_response = json.loads(response_text)
463
+ json_was_modified = True
464
+ logger.warning("[JSON] Extracted JSON from code block")
465
+ except json.JSONDecodeError as e:
466
+ logger.debug(f"[JSON] Code block extraction failed: {e}")
467
+
468
+ # Step 3: Try to find and extract first complete JSON object
469
+ if not json_was_modified:
470
+ # Find the first '{' and try to match brackets
471
+ start_idx = response_text.find("{")
472
+ if start_idx >= 0:
473
+ bracket_count = 0
474
+ in_string = False
475
+ escape_next = False
476
+
477
+ for i, char in enumerate(response_text[start_idx:], start_idx):
478
+ if escape_next:
479
+ escape_next = False
480
+ continue
481
+ if char == "\\":
482
+ escape_next = True
483
+ continue
484
+ if char == '"' and not escape_next:
485
+ in_string = not in_string
486
+ if not in_string:
487
+ if char == "{":
488
+ bracket_count += 1
489
+ elif char == "}":
490
+ bracket_count -= 1
491
+ if bracket_count == 0:
492
+ # Found complete JSON object
493
+ try:
494
+ truncated = response_text[start_idx : i + 1]
495
+ json_response = json.loads(truncated)
496
+ json_was_modified = True
497
+ logger.warning(
498
+ f"[JSON] Truncated response after first complete JSON object (removed {len(response_text) - i - 1} chars)"
499
+ )
500
+ response_text = truncated
501
+ break
502
+ except json.JSONDecodeError:
503
+ logger.debug(
504
+ "[JSON] Truncated text is not valid JSON, trying next bracket pair"
505
+ )
506
+ continue
507
+
508
+ # Step 4: Try to fix common JSON errors
509
+ if not json_was_modified:
510
+ fixed_text = response_text
511
+
512
+ # Remove trailing commas
513
+ fixed_text = re.sub(r",\s*}", "}", fixed_text)
514
+ fixed_text = re.sub(r",\s*]", "]", fixed_text)
515
+
516
+ # Fix single quotes to double quotes (carefully)
517
+ if "'" in fixed_text and '"' not in fixed_text:
518
+ fixed_text = fixed_text.replace("'", '"')
519
+
520
+ # Remove any text before first '{' or '['
521
+ json_start = min(
522
+ fixed_text.find("{") if "{" in fixed_text else len(fixed_text),
523
+ fixed_text.find("[") if "[" in fixed_text else len(fixed_text),
524
+ )
525
+ if json_start > 0 and json_start < len(fixed_text):
526
+ fixed_text = fixed_text[json_start:]
527
+
528
+ # Try to parse the fixed text
529
+ if fixed_text != response_text:
530
+ try:
531
+ json_response = json.loads(fixed_text)
532
+ json_was_modified = True
533
+ logger.warning("[JSON] Applied automatic JSON fixes")
534
+ response_text = fixed_text
535
+ except json.JSONDecodeError as e:
536
+ logger.debug(f"[JSON] Auto-fix failed: {e}")
537
+
538
+ # If still no valid JSON, raise the original error
539
+ if not json_was_modified:
540
+ raise ValueError(
541
+ f"Failed to parse response as JSON: {str(initial_error)}"
542
+ )
543
+
544
+ # Log warning if JSON was modified
545
+ if json_was_modified:
546
+ logger.warning(
547
+ f"[JSON] Response was modified to extract valid JSON. Original length: {len(original_response)}, Fixed length: {len(response_text)}"
548
+ )
549
+
550
+ # Validate required fields
551
+ # Note: 'goal' is optional for simple answer responses
552
+ if "answer" in json_response:
553
+ required_fields = ["thought", "answer"] # goal is optional
554
+ elif "tool" in json_response:
555
+ required_fields = ["thought", "tool", "tool_args"] # goal is optional
556
+ else:
557
+ required_fields = ["thought", "plan"] # goal is optional
558
+
559
+ missing_fields = [
560
+ field for field in required_fields if field not in json_response
561
+ ]
562
+ if missing_fields:
563
+ raise ValueError(
564
+ f"Response is missing required fields: {', '.join(missing_fields)}"
565
+ )
566
+
567
+ return json_response
568
+
569
+ def _parse_llm_response(self, response: str) -> Dict[str, Any]:
570
+ """
571
+ Parse the LLM response to extract tool calls or conversational answers.
572
+
573
+ ARCHITECTURE: Supports two response modes
574
+ - Plain text for conversation (no JSON required)
575
+ - JSON for tool invocations
576
+
577
+ Args:
578
+ response: The raw response from the LLM
579
+
580
+ Returns:
581
+ Parsed response as a dictionary
582
+ """
583
+ # Check for empty responses
584
+ if not response or not response.strip():
585
+ logger.warning("Empty LLM response received")
586
+ self.error_history.append("Empty LLM response")
587
+
588
+ # Provide more helpful error message based on context
589
+ if hasattr(self, "api_mode") and self.api_mode: # pylint: disable=no-member
590
+ answer = "I encountered an issue processing your request. This might be due to a connection problem with the language model. Please try again."
591
+ else:
592
+ answer = "I apologize, but I received an empty response from the language model. Please try again."
593
+
594
+ return {
595
+ "thought": "LLM returned empty response",
596
+ "goal": "Handle empty response error",
597
+ "answer": answer,
598
+ }
599
+
600
+ response = response.strip()
601
+
602
+ # Log what we received for debugging (show more to see full JSON)
603
+ if len(response) > 500:
604
+ logger.debug(
605
+ f"📥 LLM Response ({len(response)} chars): {response[:500]}..."
606
+ )
607
+ else:
608
+ logger.debug(f"📥 LLM Response: {response}")
609
+
610
+ # STEP 1: Fast path - detect plain text conversational responses
611
+ # If response doesn't start with '{', it's likely plain text
612
+ # Accept it immediately without logging errors
613
+ if not response.startswith("{"):
614
+ logger.debug(
615
+ f"[PARSE] Plain text conversational response (length: {len(response)})"
616
+ )
617
+ return {"thought": "", "goal": "", "answer": response}
618
+
619
+ # STEP 2: Response starts with '{' - looks like JSON
620
+ # Try direct JSON parsing first (fastest path)
621
+ try:
622
+ result = json.loads(response)
623
+ # Ensure tool_args exists if tool is present
624
+ if "tool" in result and "tool_args" not in result:
625
+ result["tool_args"] = {}
626
+ logger.debug("[PARSE] Valid JSON response")
627
+ return result
628
+ except json.JSONDecodeError:
629
+ # JSON parsing failed - continue to extraction methods
630
+ logger.debug("[PARSE] Malformed JSON, trying extraction")
631
+
632
+ # STEP 3: Try JSON extraction methods (handles code blocks, mixed text, etc.)
633
+ extracted_json = self._extract_json_from_response(response)
634
+ if extracted_json:
635
+ logger.debug("[PARSE] Extracted JSON successfully")
636
+ return extracted_json
637
+
638
+ # STEP 4: JSON was expected (starts with '{') but all parsing failed
639
+ # Log error ONLY for JSON that couldn't be parsed
640
+ logger.debug("Attempting to extract fields using regex")
641
+ thought_match = re.search(r'"thought":\s*"([^"]*)"', response)
642
+ tool_match = re.search(r'"tool":\s*"([^"]*)"', response)
643
+ answer_match = re.search(r'"answer":\s*"([^"]*)"', response)
644
+ plan_match = re.search(r'"plan":\s*(\[.*?\])', response, re.DOTALL)
645
+
646
+ if answer_match:
647
+ result = {
648
+ "thought": thought_match.group(1) if thought_match else "",
649
+ "goal": "what was achieved",
650
+ "answer": answer_match.group(1),
651
+ }
652
+ logger.debug(f"Extracted answer using regex: {result}")
653
+ return result
654
+
655
+ if tool_match:
656
+ tool_args = {}
657
+
658
+ tool_args_start = response.find('"tool_args"')
659
+
660
+ if tool_args_start >= 0:
661
+ # Find the opening brace after "tool_args":
662
+ brace_start = response.find("{", tool_args_start)
663
+ if brace_start >= 0:
664
+ # Use bracket-matching to find the complete object
665
+ bracket_count = 0
666
+ in_string = False
667
+ escape_next = False
668
+ for i, char in enumerate(response[brace_start:], brace_start):
669
+ if escape_next:
670
+ escape_next = False
671
+ continue
672
+ if char == "\\":
673
+ escape_next = True
674
+ continue
675
+ if char == '"' and not escape_next:
676
+ in_string = not in_string
677
+ if not in_string:
678
+ if char == "{":
679
+ bracket_count += 1
680
+ elif char == "}":
681
+ bracket_count -= 1
682
+ if bracket_count == 0:
683
+ # Found complete tool_args object
684
+ tool_args_str = response[brace_start : i + 1]
685
+ try:
686
+ tool_args = json.loads(tool_args_str)
687
+ except json.JSONDecodeError as e:
688
+ error_msg = f"Failed to parse tool_args JSON: {str(e)}, content: {tool_args_str[:100]}..."
689
+ logger.error(error_msg)
690
+ self.error_history.append(error_msg)
691
+ break
692
+
693
+ result = {
694
+ "thought": thought_match.group(1) if thought_match else "",
695
+ "goal": "clear statement of what you're trying to achieve",
696
+ "tool": tool_match.group(1),
697
+ "tool_args": tool_args,
698
+ }
699
+
700
+ # Add plan if found
701
+ if plan_match:
702
+ try:
703
+ result["plan"] = json.loads(plan_match.group(1))
704
+ logger.debug(f"Extracted plan using regex: {result['plan']}")
705
+ except json.JSONDecodeError as e:
706
+ error_msg = f"Failed to parse plan JSON: {str(e)}, content: {plan_match.group(1)[:100]}..."
707
+ logger.error(error_msg)
708
+ self.error_history.append(error_msg)
709
+
710
+ logger.debug(f"Extracted tool call using regex: {result}")
711
+ return result
712
+
713
+ # Try to match simple key-value patterns for object names (like ': "my_cube"')
714
+ obj_name_match = re.search(
715
+ r'["\':]?\s*["\'"]?([a-zA-Z0-9_\.]+)["\'"]?', response
716
+ )
717
+ if obj_name_match:
718
+ object_name = obj_name_match.group(1)
719
+ # If it looks like an object name and not just a random word
720
+ if "." in object_name or "_" in object_name:
721
+ logger.debug(f"Found potential object name: {object_name}")
722
+ return {
723
+ "thought": "Extracted object name",
724
+ "goal": "Use the object name",
725
+ "answer": object_name,
726
+ }
727
+
728
+ # CONVERSATIONAL MODE: No JSON found - treat as plain conversational response
729
+ # This is normal and expected for chat agents responding to greetings, explanations, etc.
730
+ logger.debug(
731
+ f"[PARSE] No JSON structure found, treating as conversational response. Length: {len(response)}, preview: {response[:100]}..."
732
+ )
733
+
734
+ # If response is empty, provide a meaningful fallback
735
+ if not response.strip():
736
+ logger.warning("[PARSE] Empty response received from LLM")
737
+ return {
738
+ "thought": "",
739
+ "goal": "",
740
+ "answer": "I apologize, but I received an empty response. Please try again.",
741
+ }
742
+
743
+ # Valid conversational response - wrap it in expected format
744
+ return {"thought": "", "goal": "", "answer": response.strip()}
745
+
746
+ def _execute_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Any:
747
+ """
748
+ Execute a tool by name with the provided arguments.
749
+
750
+ Args:
751
+ tool_name: Name of the tool to execute
752
+ tool_args: Arguments to pass to the tool
753
+
754
+ Returns:
755
+ Result of the tool execution
756
+ """
757
+ logger.debug(f"Executing tool {tool_name} with args: {tool_args}")
758
+
759
+ if tool_name not in _TOOL_REGISTRY:
760
+ logger.error(f"Tool '{tool_name}' not found in registry")
761
+ return {"status": "error", "error": f"Tool '{tool_name}' not found"}
762
+
763
+ tool = _TOOL_REGISTRY[tool_name]["function"]
764
+ sig = inspect.signature(tool)
765
+
766
+ # Get required parameters (those without defaults)
767
+ required_args = {
768
+ name: param
769
+ for name, param in sig.parameters.items()
770
+ if param.default == inspect.Parameter.empty and name != "return"
771
+ }
772
+
773
+ # Check for missing required arguments
774
+ missing_args = [arg for arg in required_args if arg not in tool_args]
775
+ if missing_args:
776
+ error_msg = (
777
+ f"Missing required arguments for {tool_name}: {', '.join(missing_args)}"
778
+ )
779
+ logger.error(error_msg)
780
+ return {"status": "error", "error": error_msg}
781
+
782
+ try:
783
+ result = tool(**tool_args)
784
+ logger.debug(f"Tool execution result: {result}")
785
+ return result
786
+ except subprocess.TimeoutExpired as e:
787
+ # Handle subprocess timeout specifically
788
+ error_msg = f"Tool {tool_name} timed out: {str(e)}"
789
+ logger.error(error_msg)
790
+ self.error_history.append(error_msg)
791
+ return {"status": "error", "error": error_msg, "timeout": True}
792
+ except Exception as e:
793
+ # Format error with full execution trace for debugging
794
+ formatted_error = format_execution_trace(
795
+ exception=e,
796
+ query=getattr(self, "_current_query", None),
797
+ plan_step=self.current_step + 1 if self.current_plan else None,
798
+ total_steps=self.total_plan_steps if self.current_plan else None,
799
+ tool_name=tool_name,
800
+ tool_args=tool_args,
801
+ )
802
+ logger.error(f"Error executing tool {tool_name}: {e}")
803
+ self.error_history.append(str(e)) # Store brief error, not formatted
804
+
805
+ # Print to console immediately so user sees it
806
+ self.console.print_error(formatted_error)
807
+
808
+ return {
809
+ "status": "error",
810
+ "error_brief": str(e), # Brief error message for quick reference
811
+ "error_displayed": True, # Flag to prevent duplicate display
812
+ "tool_name": tool_name,
813
+ "tool_args": tool_args,
814
+ "plan_step": self.current_step + 1 if self.current_plan else None,
815
+ }
816
+
817
+ def _generate_max_steps_message(
818
+ self, conversation: List[Dict], steps_taken: int, steps_limit: int
819
+ ) -> str:
820
+ """Generate informative message when max steps is reached.
821
+
822
+ Args:
823
+ conversation: The conversation history
824
+ steps_taken: Number of steps actually taken
825
+ steps_limit: Maximum steps allowed
826
+
827
+ Returns:
828
+ Informative message about what was accomplished
829
+ """
830
+ # Analyze what was done
831
+ tool_calls = [
832
+ msg
833
+ for msg in conversation
834
+ if msg.get("role") == "assistant" and "tool_calls" in msg
835
+ ]
836
+
837
+ tools_used = []
838
+ for msg in tool_calls:
839
+ for tool_call in msg.get("tool_calls", []):
840
+ if "function" in tool_call:
841
+ tools_used.append(tool_call["function"]["name"])
842
+
843
+ message = f"⚠️ Reached maximum steps limit ({steps_limit} steps)\n\n"
844
+ message += f"Completed {steps_taken} steps using these tools:\n"
845
+
846
+ # Count tool usage
847
+ from collections import Counter
848
+
849
+ tool_counts = Counter(tools_used)
850
+ for tool, count in tool_counts.most_common(10):
851
+ message += f" - {tool}: {count}x\n"
852
+
853
+ message += "\nTo continue or complete this task:\n"
854
+ message += "1. Review the generated files and progress so far\n"
855
+ message += f"2. Run with --max-steps {steps_limit + 50} to allow more steps\n"
856
+ message += "3. Or complete remaining tasks manually\n"
857
+
858
+ return message
859
+
860
+ def _write_json_to_file(self, data: Dict[str, Any], filename: str = None) -> str:
861
+ """
862
+ Write JSON data to a file and return the absolute path.
863
+
864
+ Args:
865
+ data: Dictionary data to write as JSON
866
+ filename: Optional filename, if None a timestamped name will be generated
867
+
868
+ Returns:
869
+ Absolute path to the saved file
870
+ """
871
+ # Ensure output directory exists
872
+ os.makedirs(self.output_dir, exist_ok=True)
873
+
874
+ # Generate filename if not provided
875
+ if not filename:
876
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
877
+ filename = f"agent_output_{timestamp}.json"
878
+
879
+ # Ensure filename has .json extension
880
+ if not filename.endswith(".json"):
881
+ filename += ".json"
882
+
883
+ # Create absolute path
884
+ file_path = os.path.join(self.output_dir, filename)
885
+
886
+ # Write JSON data to file
887
+ with open(file_path, "w", encoding="utf-8") as f:
888
+ json.dump(data, f, indent=2)
889
+
890
+ return os.path.abspath(file_path)
891
+
892
+ def _handle_large_tool_result(
893
+ self,
894
+ tool_name: str,
895
+ tool_result: Any,
896
+ conversation: List[Dict[str, Any]],
897
+ tool_args: Optional[Dict[str, Any]] = None,
898
+ ) -> Any:
899
+ """
900
+ Handle large tool results by truncating them if necessary.
901
+
902
+ Args:
903
+ tool_name: Name of the executed tool
904
+ tool_result: The result from tool execution
905
+ conversation: The conversation list to append to
906
+ tool_args: Arguments passed to the tool (optional)
907
+
908
+ Returns:
909
+ The truncated result or original if within limits
910
+ """
911
+ truncated_result = tool_result
912
+ if isinstance(tool_result, (dict, list)):
913
+ # Use custom encoder to handle bytes and other non-serializable types
914
+ result_str = json.dumps(tool_result, default=self._json_serialize_fallback)
915
+ if (
916
+ len(result_str) > 30000
917
+ ): # Threshold for truncation (appropriate for 32K context)
918
+ # Truncate large results to prevent overwhelming the LLM
919
+ truncated_str = self._truncate_large_content(
920
+ tool_result, max_chars=20000 # Increased for 32K context
921
+ )
922
+ try:
923
+ truncated_result = json.loads(truncated_str)
924
+ except json.JSONDecodeError:
925
+ # If truncated string isn't valid JSON, use it as-is
926
+ truncated_result = truncated_str
927
+ # Notify user about truncation
928
+ self.console.print_info(
929
+ f"Note: Large result ({len(result_str)} chars) truncated for LLM context"
930
+ )
931
+ if self.debug:
932
+ print(f"[DEBUG] Tool result truncated from {len(result_str)} chars")
933
+
934
+ # Add to conversation
935
+ tool_entry: Dict[str, Any] = {
936
+ "role": "tool",
937
+ "name": tool_name,
938
+ "content": truncated_result,
939
+ }
940
+ if tool_args is not None:
941
+ tool_entry["tool_args"] = tool_args
942
+ conversation.append(tool_entry)
943
+ return truncated_result
944
+
945
+ def _create_tool_message(self, tool_name: str, tool_output: Any) -> Dict[str, Any]:
946
+ """
947
+ Build a message structure representing a tool output for downstream LLM calls.
948
+ """
949
+ if isinstance(tool_output, str):
950
+ text_content = tool_output
951
+ else:
952
+ text_content = self._truncate_large_content(tool_output, max_chars=2000)
953
+
954
+ if not isinstance(text_content, str):
955
+ text_content = json.dumps(
956
+ tool_output, default=self._json_serialize_fallback
957
+ )
958
+
959
+ return {
960
+ "role": "tool",
961
+ "name": tool_name,
962
+ "tool_call_id": uuid.uuid4().hex,
963
+ "content": [{"type": "text", "text": text_content}],
964
+ }
965
+
966
+ def _json_serialize_fallback(self, obj: Any) -> Any:
967
+ """
968
+ Fallback serializer for JSON encoding non-standard types.
969
+
970
+ Handles bytes, datetime, and other common non-serializable types.
971
+ """
972
+ try:
973
+ import numpy as np # Local import to avoid hard dependency at module import time
974
+
975
+ if isinstance(obj, np.generic):
976
+ return obj.item()
977
+ if isinstance(obj, np.ndarray):
978
+ return obj.tolist()
979
+ except Exception:
980
+ pass
981
+
982
+ if isinstance(obj, bytes):
983
+ # For binary data, return a placeholder (don't expose raw bytes to LLM)
984
+ return f"<binary data: {len(obj)} bytes>"
985
+ if hasattr(obj, "isoformat"):
986
+ # Handle datetime objects
987
+ return obj.isoformat()
988
+ if hasattr(obj, "__dict__"):
989
+ # Handle objects with __dict__
990
+ return obj.__dict__
991
+
992
+ for caster in (float, int, str):
993
+ try:
994
+ return caster(obj)
995
+ except Exception:
996
+ continue
997
+
998
+ return "<non-serializable>"
999
+
1000
+ def _truncate_large_content(self, content: Any, max_chars: int = 2000) -> str:
1001
+ """
1002
+ Truncate large content to prevent overwhelming the LLM.
1003
+ Defaults to 20000 chars which is appropriate for 32K token context window.
1004
+ """
1005
+
1006
+ # If we have test_results in the output we don't want to
1007
+ # truncate as this can contain important information on
1008
+ # how to fix the tests
1009
+ if isinstance(content, dict) and (
1010
+ "test_results" in content or "run_tests" in content
1011
+ ):
1012
+ return json.dumps(content, default=self._json_serialize_fallback)
1013
+
1014
+ # Convert to string (use compact JSON first to check size)
1015
+ if isinstance(content, (dict, list)):
1016
+ compact_str = json.dumps(content, default=self._json_serialize_fallback)
1017
+ # Only use indented format if we need to truncate anyway
1018
+ content_str = (
1019
+ json.dumps(content, indent=2, default=self._json_serialize_fallback)
1020
+ if len(compact_str) > max_chars
1021
+ else compact_str
1022
+ )
1023
+ else:
1024
+ content_str = str(content)
1025
+
1026
+ # Return as-is if within limits
1027
+ if len(content_str) <= max_chars:
1028
+ return content_str
1029
+
1030
+ # For responses with chunks (e.g., search results, document retrieval)
1031
+ if (
1032
+ isinstance(content, dict)
1033
+ and "chunks" in content
1034
+ and isinstance(content["chunks"], list)
1035
+ ):
1036
+ truncated = content.copy()
1037
+
1038
+ # Keep all chunks but truncate individual chunk content if needed
1039
+ if "chunks" in truncated:
1040
+ for chunk in truncated["chunks"]:
1041
+ if isinstance(chunk, dict) and "content" in chunk:
1042
+ # Keep full content for chunks (they're the actual data)
1043
+ # Only truncate if a single chunk is massive
1044
+ if len(chunk["content"]) > CHUNK_TRUNCATION_THRESHOLD:
1045
+ chunk["content"] = (
1046
+ chunk["content"][:CHUNK_TRUNCATION_SIZE]
1047
+ + "\n...[chunk truncated]...\n"
1048
+ + chunk["content"][-CHUNK_TRUNCATION_SIZE:]
1049
+ )
1050
+
1051
+ result_str = json.dumps(
1052
+ truncated, indent=2, default=self._json_serialize_fallback
1053
+ )
1054
+ # Use larger limit for chunked responses since chunks are the actual data
1055
+ if len(result_str) <= max_chars * 3: # Allow up to 60KB for chunked data
1056
+ return result_str
1057
+ # If still too large, keep first 3 chunks only
1058
+ truncated["chunks"] = truncated["chunks"][:3]
1059
+ return json.dumps(
1060
+ truncated, indent=2, default=self._json_serialize_fallback
1061
+ )
1062
+
1063
+ # For Jira responses, keep first 3 issues
1064
+ if (
1065
+ isinstance(content, dict)
1066
+ and "issues" in content
1067
+ and isinstance(content["issues"], list)
1068
+ ):
1069
+ truncated = {
1070
+ **content,
1071
+ "issues": content["issues"][:3],
1072
+ "truncated": True,
1073
+ "total": len(content["issues"]),
1074
+ }
1075
+ return json.dumps(
1076
+ truncated, indent=2, default=self._json_serialize_fallback
1077
+ )[:max_chars]
1078
+
1079
+ # For lists, keep first 3 items
1080
+ if isinstance(content, list):
1081
+ truncated = (
1082
+ content[:3] + [{"truncated": f"{len(content) - 3} more"}]
1083
+ if len(content) > 3
1084
+ else content
1085
+ )
1086
+ return json.dumps(
1087
+ truncated, indent=2, default=self._json_serialize_fallback
1088
+ )[:max_chars]
1089
+
1090
+ # Simple truncation
1091
+ half = max_chars // 2 - 20
1092
+ return f"{content_str[:half]}\n...[truncated]...\n{content_str[-half:]}"
1093
+
1094
+ def process_query(
1095
+ self,
1096
+ user_input: str,
1097
+ max_steps: int = None,
1098
+ trace: bool = False,
1099
+ filename: str = None,
1100
+ ) -> Dict[str, Any]:
1101
+ """
1102
+ Process a user query and execute the necessary tools.
1103
+ Displays each step as it's being generated in real-time.
1104
+
1105
+ Args:
1106
+ user_input: User's query or request
1107
+ max_steps: Maximum number of steps to take in the conversation (overrides class default if provided)
1108
+ trace: If True, write detailed JSON trace to file
1109
+ filename: Optional filename for trace output, if None a timestamped name will be generated
1110
+
1111
+ Returns:
1112
+ Dict containing the final result and operation details
1113
+ """
1114
+ import time
1115
+
1116
+ start_time = time.time() # Track query processing start time
1117
+
1118
+ # Store query for error context (used in _execute_tool for error formatting)
1119
+ self._current_query = user_input
1120
+
1121
+ logger.debug(f"Processing query: {user_input}")
1122
+ conversation = []
1123
+ # Build messages array for chat completions
1124
+ messages = []
1125
+
1126
+ # Prepopulate with conversation history if available (for session persistence)
1127
+ if hasattr(self, "conversation_history") and self.conversation_history:
1128
+ messages.extend(self.conversation_history)
1129
+ logger.debug(
1130
+ f"Loaded {len(self.conversation_history)} messages from conversation history"
1131
+ )
1132
+
1133
+ steps_taken = 0
1134
+ final_answer = None
1135
+ error_count = 0
1136
+ last_tool_call = None # Track the last tool call to prevent loops
1137
+ last_error = None # Track the last error to handle it properly
1138
+ previous_outputs = [] # Track previous tool outputs
1139
+
1140
+ # Reset state management
1141
+ self.execution_state = self.STATE_PLANNING
1142
+ self.current_plan = None
1143
+ self.current_step = 0
1144
+ self.total_plan_steps = 0
1145
+ self.plan_iterations = 0 # Reset plan iteration counter
1146
+
1147
+ # Add user query to the conversation history
1148
+ conversation.append({"role": "user", "content": user_input})
1149
+ messages.append({"role": "user", "content": user_input})
1150
+
1151
+ # Use provided max_steps or fall back to class default
1152
+ steps_limit = max_steps if max_steps is not None else self.max_steps
1153
+
1154
+ # Print initial message with max steps info
1155
+ self.console.print_processing_start(user_input, steps_limit)
1156
+ logger.debug(f"Using max_steps: {steps_limit}")
1157
+
1158
+ prompt = f"User request: {user_input}\n\n"
1159
+
1160
+ # Only add planning reminder in PLANNING state
1161
+ if self.execution_state == self.STATE_PLANNING:
1162
+ prompt += (
1163
+ "IMPORTANT: ALWAYS BEGIN WITH A PLAN before executing any tools.\n"
1164
+ "First create a detailed plan with all necessary steps, then execute the first step.\n"
1165
+ "When creating a plan with multiple steps:\n"
1166
+ " 1. ALWAYS follow the plan in the correct order, starting with the FIRST step.\n"
1167
+ " 2. Include both a plan and a 'tool' field, the 'tool' field MUST match the tool in the first step of the plan.\n"
1168
+ " 3. Create plans with clear, executable steps that include both the tool name and the exact arguments for each step.\n"
1169
+ )
1170
+
1171
+ logger.debug(f"Input prompt: {prompt[:200]}...")
1172
+
1173
+ # Process the query in steps, allowing for multiple tool usages
1174
+ while steps_taken < steps_limit and final_answer is None:
1175
+ # Build the next prompt based on current state (this is for fallback mode only)
1176
+ # In chat mode, we'll just add to messages array
1177
+ steps_taken += 1
1178
+ logger.debug(f"Step {steps_taken}/{steps_limit}")
1179
+
1180
+ # Check if we're at the limit and ask user if they want to continue
1181
+ if steps_taken == steps_limit and final_answer is None:
1182
+ # Show what was accomplished
1183
+ max_steps_msg = self._generate_max_steps_message(
1184
+ conversation, steps_taken, steps_limit
1185
+ )
1186
+ self.console.print_warning(max_steps_msg)
1187
+
1188
+ # Ask user if they want to continue (skip in silent mode OR if stdin is not available)
1189
+ # IMPORTANT: Never call input() in API/CI contexts to avoid blocking threads
1190
+ import sys
1191
+
1192
+ has_stdin = sys.stdin and sys.stdin.isatty()
1193
+ if has_stdin and not (
1194
+ hasattr(self, "silent_mode") and self.silent_mode
1195
+ ):
1196
+ try:
1197
+ response = (
1198
+ input("\nContinue with 50 more steps? (y/n): ")
1199
+ .strip()
1200
+ .lower()
1201
+ )
1202
+ if response in ["y", "yes"]:
1203
+ steps_limit += 50
1204
+ self.console.print_info(
1205
+ f"✓ Continuing with {steps_limit} total steps...\n"
1206
+ )
1207
+ else:
1208
+ self.console.print_info("Stopping at user request.")
1209
+ break
1210
+ except (EOFError, KeyboardInterrupt):
1211
+ self.console.print_info("\nStopping at user request.")
1212
+ break
1213
+ else:
1214
+ # Silent mode - just stop
1215
+ break
1216
+
1217
+ # Display current step
1218
+ self.console.print_step_header(steps_taken, steps_limit)
1219
+
1220
+ # Skip automatic finalization for single-step plans - always request proper final answer
1221
+
1222
+ # If we're executing a plan, we might not need to query the LLM again
1223
+ if (
1224
+ self.execution_state == self.STATE_EXECUTING_PLAN
1225
+ and self.current_step < self.total_plan_steps
1226
+ ):
1227
+ logger.debug(
1228
+ f"Executing plan step {self.current_step + 1}/{self.total_plan_steps}"
1229
+ )
1230
+ self.console.print_state_info(
1231
+ f"EXECUTING PLAN: Step {self.current_step + 1}/{self.total_plan_steps}"
1232
+ )
1233
+
1234
+ # Display the current plan with the current step highlighted
1235
+ if self.current_plan:
1236
+ self.console.print_plan(self.current_plan, self.current_step)
1237
+
1238
+ # Extract next step from plan
1239
+ next_step = self.current_plan[self.current_step]
1240
+
1241
+ if (
1242
+ isinstance(next_step, dict)
1243
+ and "tool" in next_step
1244
+ and "tool_args" in next_step
1245
+ ):
1246
+ # We have a properly formatted step with tool and args
1247
+ tool_name = next_step["tool"]
1248
+ tool_args = next_step["tool_args"]
1249
+
1250
+ # Create a parsed response structure as if it came from the LLM
1251
+ parsed = {
1252
+ "thought": f"Executing step {self.current_step + 1} of the plan",
1253
+ "goal": f"Following the plan to {user_input}",
1254
+ "tool": tool_name,
1255
+ "tool_args": tool_args,
1256
+ }
1257
+
1258
+ # Add to conversation
1259
+ conversation.append({"role": "assistant", "content": parsed})
1260
+
1261
+ # Display the agent's reasoning for the step
1262
+ self.console.print_thought(
1263
+ parsed.get("thought", "Executing plan step")
1264
+ )
1265
+ self.console.print_goal(parsed.get("goal", "Following the plan"))
1266
+
1267
+ # Display the tool call in real-time
1268
+ self.console.print_tool_usage(tool_name)
1269
+
1270
+ # Start progress indicator for tool execution
1271
+ self.console.start_progress(f"Executing {tool_name}")
1272
+
1273
+ # Execute the tool
1274
+ tool_result = self._execute_tool(tool_name, tool_args)
1275
+
1276
+ # Stop progress indicator
1277
+ self.console.stop_progress()
1278
+
1279
+ # Handle domain-specific post-processing
1280
+ self._post_process_tool_result(tool_name, tool_args, tool_result)
1281
+
1282
+ # Handle large tool results
1283
+ truncated_result = self._handle_large_tool_result(
1284
+ tool_name, tool_result, conversation, tool_args
1285
+ )
1286
+
1287
+ # Display the tool result in real-time (show full result to user)
1288
+ self.console.print_tool_complete()
1289
+
1290
+ self.console.pretty_print_json(tool_result, "Tool Result")
1291
+
1292
+ # Store the truncated output for future context
1293
+ previous_outputs.append(
1294
+ {
1295
+ "tool": tool_name,
1296
+ "args": tool_args,
1297
+ "result": truncated_result,
1298
+ }
1299
+ )
1300
+
1301
+ # Share tool output with subsequent LLM calls
1302
+ messages.append(
1303
+ self._create_tool_message(tool_name, truncated_result)
1304
+ )
1305
+
1306
+ # Check for error (support multiple error formats)
1307
+ is_error = isinstance(tool_result, dict) and (
1308
+ tool_result.get("status") == "error" # Standard format
1309
+ or tool_result.get("success")
1310
+ is False # Tools returning success: false
1311
+ or tool_result.get("has_errors") is True # CLI tools
1312
+ or tool_result.get("return_code", 0) != 0 # Build failures
1313
+ )
1314
+
1315
+ if is_error:
1316
+ error_count += 1
1317
+ # Extract error message from various formats
1318
+ # Prefer error_brief for logging (avoids duplicate formatted output)
1319
+ last_error = (
1320
+ tool_result.get("error_brief")
1321
+ or tool_result.get("error")
1322
+ or tool_result.get("stderr")
1323
+ or tool_result.get("hint") # Many tools provide hints
1324
+ or tool_result.get(
1325
+ "suggested_fix"
1326
+ ) # Some tools provide fix suggestions
1327
+ or f"Command failed with return code {tool_result.get('return_code')}"
1328
+ )
1329
+ logger.warning(
1330
+ f"Tool execution error in plan (count: {error_count}): {last_error}"
1331
+ )
1332
+ # Only print if error wasn't already displayed by _execute_tool
1333
+ if not tool_result.get("error_displayed"):
1334
+ self.console.print_error(last_error)
1335
+
1336
+ # Switch to error recovery state
1337
+ self.execution_state = self.STATE_ERROR_RECOVERY
1338
+ self.console.print_state_info(
1339
+ "ERROR RECOVERY: Handling tool execution failure"
1340
+ )
1341
+
1342
+ # Break out of plan execution to trigger error recovery prompt
1343
+ continue
1344
+ else:
1345
+ # Success - move to next step in plan
1346
+ self.current_step += 1
1347
+
1348
+ # Check if we've completed the plan
1349
+ if self.current_step >= self.total_plan_steps:
1350
+ logger.debug("Plan execution completed")
1351
+ self.execution_state = self.STATE_COMPLETION
1352
+ self.console.print_state_info(
1353
+ "COMPLETION: Plan fully executed"
1354
+ )
1355
+
1356
+ # Increment plan iteration counter
1357
+ self.plan_iterations += 1
1358
+ logger.debug(
1359
+ f"Plan iteration {self.plan_iterations} completed"
1360
+ )
1361
+
1362
+ # Check if we've reached max plan iterations
1363
+ reached_max_iterations = (
1364
+ self.max_plan_iterations > 0
1365
+ and self.plan_iterations >= self.max_plan_iterations
1366
+ )
1367
+
1368
+ # Prepare message for final answer with the completed plan context
1369
+ plan_context = {
1370
+ "completed_plan": self.current_plan,
1371
+ "total_steps": self.total_plan_steps,
1372
+ }
1373
+ plan_context_raw = json.dumps(
1374
+ plan_context, default=self._json_serialize_fallback
1375
+ )
1376
+ if len(plan_context_raw) > 20000:
1377
+ plan_context_str = self._truncate_large_content(
1378
+ plan_context, max_chars=20000
1379
+ )
1380
+ else:
1381
+ plan_context_str = plan_context_raw
1382
+
1383
+ if reached_max_iterations:
1384
+ # Force final answer after max iterations
1385
+ completion_message = (
1386
+ f"Maximum plan iterations ({self.max_plan_iterations}) reached for task: {user_input}\n"
1387
+ f"Task: {user_input}\n"
1388
+ f"Plan information:\n{plan_context_str}\n\n"
1389
+ f"IMPORTANT: You MUST now provide a final answer with an honest assessment:\n"
1390
+ f"- Summarize what was successfully accomplished\n"
1391
+ f"- Clearly state if anything remains incomplete or if errors occurred\n"
1392
+ f"- If the task is fully complete, state that clearly\n\n"
1393
+ f'Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1394
+ )
1395
+ else:
1396
+ completion_message = (
1397
+ "You have successfully completed all steps in the plan.\n"
1398
+ f"Task: {user_input}\n"
1399
+ f"Plan information:\n{plan_context_str}\n\n"
1400
+ f"Plan iteration: {self.plan_iterations}/{self.max_plan_iterations if self.max_plan_iterations > 0 else 'unlimited'}\n"
1401
+ "Check if more work is needed:\n"
1402
+ "- If the task is complete and verified, provide a final answer\n"
1403
+ "- If critical validation/testing is needed, you may create ONE more plan\n"
1404
+ "- Only create additional plans if absolutely necessary\n\n"
1405
+ 'If more work needed: Provide a NEW plan with {{"thought": "...", "goal": "...", "plan": [...]}}\n'
1406
+ 'If everything is complete: Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1407
+ )
1408
+
1409
+ # Debug logging - only show if truncation happened
1410
+ if self.debug and len(plan_context_raw) > 2000:
1411
+ print(
1412
+ "\n[DEBUG] Plan context truncated for completion message"
1413
+ )
1414
+
1415
+ # Add completion request to messages
1416
+ messages.append(
1417
+ {"role": "user", "content": completion_message}
1418
+ )
1419
+
1420
+ # Send the completion prompt to get final answer
1421
+ self.console.print_state_info(
1422
+ "COMPLETION: Requesting final answer"
1423
+ )
1424
+
1425
+ # Continue to next iteration to get final answer
1426
+ continue
1427
+ else:
1428
+ # Continue with next step - no need to query LLM again
1429
+ continue
1430
+ else:
1431
+ # Plan step doesn't have proper format, fall back to LLM
1432
+ logger.warning(
1433
+ f"Plan step {self.current_step + 1} doesn't have proper format: {next_step}"
1434
+ )
1435
+ self.console.print_warning(
1436
+ f"Plan step {self.current_step + 1} format incorrect, asking LLM for guidance"
1437
+ )
1438
+ prompt = (
1439
+ f"You are following a plan but step {self.current_step + 1} doesn't have proper format: {next_step}\n"
1440
+ "Please interpret this step and decide what tool to use next.\n\n"
1441
+ f"Task: {user_input}\n\n"
1442
+ )
1443
+ else:
1444
+ # Normal execution flow - query the LLM
1445
+ if self.execution_state == self.STATE_DIRECT_EXECUTION:
1446
+ self.console.print_state_info("DIRECT EXECUTION: Analyzing task")
1447
+ elif self.execution_state == self.STATE_PLANNING:
1448
+ self.console.print_state_info("PLANNING: Creating or refining plan")
1449
+ elif self.execution_state == self.STATE_ERROR_RECOVERY:
1450
+ self.console.print_state_info(
1451
+ "ERROR RECOVERY: Handling previous error"
1452
+ )
1453
+
1454
+ # Truncate previous outputs if too large to avoid overwhelming the LLM
1455
+ truncated_outputs = (
1456
+ self._truncate_large_content(previous_outputs, max_chars=500)
1457
+ if previous_outputs
1458
+ else "None"
1459
+ )
1460
+
1461
+ # Create a specific error recovery prompt
1462
+ prompt = (
1463
+ "TOOL EXECUTION FAILED!\n\n"
1464
+ f"You were trying to execute: {last_tool_call[0] if last_tool_call else 'unknown tool'}\n"
1465
+ f"Error: {last_error}\n\n"
1466
+ f"Original task: {user_input}\n\n"
1467
+ f"Current plan step {self.current_step + 1}/{self.total_plan_steps} failed.\n"
1468
+ f"Current plan: {self.current_plan}\n\n"
1469
+ f"Previous successful outputs: {truncated_outputs}\n\n"
1470
+ "INSTRUCTIONS:\n"
1471
+ "1. Analyze the error and understand what went wrong\n"
1472
+ "2. Create a NEW corrected plan that fixes the error\n"
1473
+ "3. Make sure to use correct tool parameters (check the available tools)\n"
1474
+ "4. Start executing the corrected plan\n\n"
1475
+ "Respond with your analysis, a corrected plan, and the first tool to execute."
1476
+ )
1477
+
1478
+ # Add the error recovery prompt to the messages array so it gets sent to LLM
1479
+ messages.append({"role": "user", "content": prompt})
1480
+
1481
+ # Reset state to planning after creating recovery prompt
1482
+ self.execution_state = self.STATE_PLANNING
1483
+ self.current_plan = None
1484
+ self.current_step = 0
1485
+ self.total_plan_steps = 0
1486
+
1487
+ elif self.execution_state == self.STATE_COMPLETION:
1488
+ self.console.print_state_info("COMPLETION: Finalizing response")
1489
+
1490
+ # Print the prompt if show_prompts is enabled (separate from debug_prompts)
1491
+ if self.show_prompts:
1492
+ # Build context from system prompt and messages
1493
+ context_parts = [
1494
+ (
1495
+ f"SYSTEM: {self.system_prompt[:200]}..."
1496
+ if len(self.system_prompt) > 200
1497
+ else f"SYSTEM: {self.system_prompt}"
1498
+ )
1499
+ ]
1500
+
1501
+ for msg in messages:
1502
+ role = msg.get("role", "user").upper()
1503
+ content = str(msg.get("content", ""))[:150]
1504
+ context_parts.append(
1505
+ f"{role}: {content}{'...' if len(str(msg.get('content', ''))) > 150 else ''}"
1506
+ )
1507
+
1508
+ if not messages and prompt:
1509
+ context_parts.append(
1510
+ f"USER: {prompt[:150]}{'...' if len(prompt) > 150 else ''}"
1511
+ )
1512
+
1513
+ self.console.print_prompt("\n".join(context_parts), "LLM Context")
1514
+
1515
+ # Handle streaming or non-streaming LLM response
1516
+ # Initialize response_stats so it's always in scope
1517
+ response_stats = None
1518
+
1519
+ if self.streaming:
1520
+ # Streaming mode - raw response will be streamed
1521
+ # (SilentConsole will suppress this, AgentConsole will show it)
1522
+
1523
+ # Add prompt to conversation if debug is enabled
1524
+ if self.debug_prompts:
1525
+ conversation.append(
1526
+ {"role": "system", "content": {"prompt": prompt}}
1527
+ )
1528
+ # Print the prompt if show_prompts is enabled
1529
+ if self.show_prompts:
1530
+ self.console.print_prompt(
1531
+ prompt, f"Prompt (Step {steps_taken})"
1532
+ )
1533
+
1534
+ # Get streaming response from ChatSDK with proper conversation history
1535
+ try:
1536
+ response_stream = self.chat.send_messages_stream(
1537
+ messages=messages, system_prompt=self.system_prompt
1538
+ )
1539
+
1540
+ # Process the streaming response chunks as they arrive
1541
+ full_response = ""
1542
+ for chunk_response in response_stream:
1543
+ if chunk_response.is_complete:
1544
+ response_stats = chunk_response.stats
1545
+ else:
1546
+ self.console.print_streaming_text(chunk_response.text)
1547
+ full_response += chunk_response.text
1548
+
1549
+ self.console.print_streaming_text("", end_of_stream=True)
1550
+ response = full_response
1551
+ except ConnectionError as e:
1552
+ # Handle LLM server connection errors specifically
1553
+ error_msg = f"LLM Server Connection Failed (streaming): {str(e)}"
1554
+ logger.error(error_msg)
1555
+ self.console.print_error(error_msg)
1556
+
1557
+ # Add error to history
1558
+ self.error_history.append(
1559
+ {
1560
+ "step": steps_taken,
1561
+ "error": error_msg,
1562
+ "type": "llm_connection_error",
1563
+ }
1564
+ )
1565
+
1566
+ # Return error response
1567
+ final_answer = (
1568
+ f"Unable to complete task due to LLM server error: {str(e)}"
1569
+ )
1570
+ break
1571
+ except Exception as e:
1572
+ logger.error(f"Unexpected error during streaming: {e}")
1573
+
1574
+ # Add to error history
1575
+ self.error_history.append(
1576
+ {
1577
+ "step": steps_taken,
1578
+ "error": str(e),
1579
+ "type": "llm_streaming_error",
1580
+ }
1581
+ )
1582
+
1583
+ # Return error response
1584
+ final_answer = (
1585
+ f"Unable to complete task due to streaming error: {str(e)}"
1586
+ )
1587
+ break
1588
+ else:
1589
+ # Use progress indicator for non-streaming mode
1590
+ self.console.start_progress("Thinking")
1591
+
1592
+ # Debug logging before LLM call
1593
+ if self.debug:
1594
+
1595
+ print(f"\n[DEBUG] About to call LLM with {len(messages)} messages")
1596
+ print(
1597
+ f"[DEBUG] Last message role: {messages[-1]['role'] if messages else 'No messages'}"
1598
+ )
1599
+ if messages and len(messages[-1].get("content", "")) < 500:
1600
+ print(
1601
+ f"[DEBUG] Last message content: {messages[-1]['content']}"
1602
+ )
1603
+ else:
1604
+ print(
1605
+ f"[DEBUG] Last message content length: {len(messages[-1].get('content', ''))}"
1606
+ )
1607
+ print(f"[DEBUG] Execution state: {self.execution_state}")
1608
+ if self.execution_state == "PLANNING":
1609
+ print("[DEBUG] Current step: Planning (no active plan yet)")
1610
+ else:
1611
+ print(
1612
+ f"[DEBUG] Current step: {self.current_step}/{self.total_plan_steps}"
1613
+ )
1614
+
1615
+ # Get complete response from ChatSDK
1616
+ try:
1617
+ chat_response = self.chat.send_messages(
1618
+ messages=messages, system_prompt=self.system_prompt
1619
+ )
1620
+ response = chat_response.text
1621
+ response_stats = chat_response.stats
1622
+ except ConnectionError as e:
1623
+ error_msg = f"LLM Server Connection Failed: {str(e)}"
1624
+ logger.error(error_msg)
1625
+ self.console.print_error(error_msg)
1626
+
1627
+ # Add error to history and update state
1628
+ self.error_history.append(
1629
+ {
1630
+ "step": steps_taken,
1631
+ "error": error_msg,
1632
+ "type": "llm_connection_error",
1633
+ }
1634
+ )
1635
+
1636
+ # Return error response
1637
+ final_answer = (
1638
+ f"Unable to complete task due to LLM server error: {str(e)}"
1639
+ )
1640
+ break
1641
+ except Exception as e:
1642
+ if self.debug:
1643
+ print(f"[DEBUG] Error calling LLM: {e}")
1644
+ logger.error(f"Unexpected error calling LLM: {e}")
1645
+
1646
+ # Add to error history
1647
+ self.error_history.append(
1648
+ {"step": steps_taken, "error": str(e), "type": "llm_error"}
1649
+ )
1650
+
1651
+ # Return error response
1652
+ final_answer = f"Unable to complete task due to error: {str(e)}"
1653
+ break
1654
+
1655
+ # Stop the progress indicator
1656
+ self.console.stop_progress()
1657
+
1658
+ # Print the LLM response to the console
1659
+ logger.debug(f"LLM response: {response[:200]}...")
1660
+ if self.show_prompts:
1661
+ self.console.print_response(response, "LLM Response")
1662
+
1663
+ # Parse the response
1664
+ parsed = self._parse_llm_response(response)
1665
+ logger.debug(f"Parsed response: {parsed}")
1666
+ conversation.append({"role": "assistant", "content": parsed})
1667
+
1668
+ # Add assistant response to messages for chat history
1669
+ messages.append({"role": "assistant", "content": response})
1670
+
1671
+ # Validate the response has a plan if required
1672
+ self._validate_plan_required(parsed, steps_taken)
1673
+
1674
+ # If the LLM needs to create a plan first, re-prompt it specifically for that
1675
+ if "needs_plan" in parsed and parsed["needs_plan"]:
1676
+ # Prepare a special prompt that specifically requests a plan
1677
+ deferred_tool = parsed.get("deferred_tool", None)
1678
+ deferred_args = parsed.get("deferred_tool_args", {})
1679
+
1680
+ plan_prompt = (
1681
+ "You MUST create a detailed plan first before taking any action.\n\n"
1682
+ f"User request: {user_input}\n\n"
1683
+ )
1684
+
1685
+ if deferred_tool:
1686
+ plan_prompt += (
1687
+ f"You initially wanted to use the {deferred_tool} tool with these arguments:\n"
1688
+ f"{json.dumps(deferred_args, indent=2, default=self._json_serialize_fallback)}\n\n"
1689
+ "However, you MUST first create a plan. Please create a plan that includes this tool usage as a step.\n\n"
1690
+ )
1691
+
1692
+ plan_prompt += (
1693
+ "Create a detailed plan with all necessary steps in JSON format, including exact tool names and arguments.\n"
1694
+ "Respond with your reasoning, plan, and the first tool to use."
1695
+ )
1696
+
1697
+ # Store the plan prompt in conversation if debug is enabled
1698
+ if self.debug_prompts:
1699
+ conversation.append(
1700
+ {"role": "system", "content": {"prompt": plan_prompt}}
1701
+ )
1702
+ if self.show_prompts:
1703
+ self.console.print_prompt(plan_prompt, "Plan Request Prompt")
1704
+
1705
+ # Notify the user we're asking for a plan
1706
+ self.console.print_info("Requesting a detailed plan before proceeding")
1707
+
1708
+ # Get the planning response
1709
+ if self.streaming:
1710
+ # Add prompt to conversation if debug is enabled
1711
+ if self.debug_prompts:
1712
+ conversation.append(
1713
+ {"role": "system", "content": {"prompt": plan_prompt}}
1714
+ )
1715
+ # Print the prompt if show_prompts is enabled
1716
+ if self.show_prompts:
1717
+ self.console.print_prompt(
1718
+ plan_prompt, f"Prompt (Step {steps_taken})"
1719
+ )
1720
+
1721
+ # Handle streaming as before
1722
+ full_response = ""
1723
+ # Add plan request to messages
1724
+ messages.append({"role": "user", "content": plan_prompt})
1725
+
1726
+ # Use ChatSDK for streaming plan response
1727
+ stream_gen = self.chat.send_messages_stream(
1728
+ messages=messages, system_prompt=self.system_prompt
1729
+ )
1730
+
1731
+ for chunk_response in stream_gen:
1732
+ if not chunk_response.is_complete:
1733
+ chunk = chunk_response.text
1734
+ if hasattr(self.console, "print_streaming_text"):
1735
+ self.console.print_streaming_text(chunk)
1736
+ else:
1737
+ print(chunk, end="", flush=True)
1738
+ full_response += chunk
1739
+
1740
+ if hasattr(self.console, "print_streaming_text"):
1741
+ self.console.print_streaming_text("", end_of_stream=True)
1742
+ else:
1743
+ print("", flush=True)
1744
+
1745
+ plan_response = full_response
1746
+ else:
1747
+ # Use progress indicator for non-streaming mode
1748
+ self.console.start_progress("Creating plan")
1749
+
1750
+ # Store the plan prompt in conversation if debug is enabled
1751
+ if self.debug_prompts:
1752
+ conversation.append(
1753
+ {"role": "system", "content": {"prompt": plan_prompt}}
1754
+ )
1755
+ if self.show_prompts:
1756
+ self.console.print_prompt(
1757
+ plan_prompt, "Plan Request Prompt"
1758
+ )
1759
+
1760
+ # Add plan request to messages
1761
+ messages.append({"role": "user", "content": plan_prompt})
1762
+
1763
+ # Use ChatSDK for non-streaming plan response
1764
+ chat_response = self.chat.send_messages(
1765
+ messages=messages, system_prompt=self.system_prompt
1766
+ )
1767
+ plan_response = chat_response.text
1768
+ self.console.stop_progress()
1769
+
1770
+ # Parse the plan response
1771
+ parsed_plan = self._parse_llm_response(plan_response)
1772
+ logger.debug(f"Parsed plan response: {parsed_plan}")
1773
+ conversation.append({"role": "assistant", "content": parsed_plan})
1774
+
1775
+ # Add plan response to messages for chat history
1776
+ messages.append({"role": "assistant", "content": plan_response})
1777
+
1778
+ # Display the agent's reasoning for the plan
1779
+ self.console.print_thought(parsed_plan.get("thought", "Creating plan"))
1780
+ self.console.print_goal(parsed_plan.get("goal", "Planning for task"))
1781
+
1782
+ # Set the parsed response to the new plan for further processing
1783
+ parsed = parsed_plan
1784
+ else:
1785
+ # Display the agent's reasoning in real-time (only if provided)
1786
+ # Skip if we just displayed thought/goal for a plan request above
1787
+ thought = parsed.get("thought", "").strip()
1788
+ goal = parsed.get("goal", "").strip()
1789
+
1790
+ if thought and thought != "No explicit reasoning provided":
1791
+ self.console.print_thought(thought)
1792
+
1793
+ if goal and goal != "No explicit goal provided":
1794
+ self.console.print_goal(goal)
1795
+
1796
+ # Process plan if available
1797
+ if "plan" in parsed:
1798
+ # Validate that plan is actually a list, not a string or other type
1799
+ if not isinstance(parsed["plan"], list):
1800
+ logger.error(
1801
+ f"Invalid plan format: expected list, got {type(parsed['plan']).__name__}. "
1802
+ f"Plan content: {parsed['plan']}"
1803
+ )
1804
+ self.console.print_error(
1805
+ f"LLM returned invalid plan format (expected array, got {type(parsed['plan']).__name__}). "
1806
+ "Asking for correction..."
1807
+ )
1808
+
1809
+ # Create error recovery prompt
1810
+ error_msg = (
1811
+ "ERROR: You provided a plan in the wrong format.\n"
1812
+ "Expected: an array of step objects\n"
1813
+ f"You provided: {type(parsed['plan']).__name__}\n\n"
1814
+ "The correct format is:\n"
1815
+ f'{{"plan": [{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}]}}\n\n'
1816
+ f"Please create a proper plan as an array of step objects for: {user_input}"
1817
+ )
1818
+ messages.append({"role": "user", "content": error_msg})
1819
+
1820
+ # Continue to next iteration to get corrected plan
1821
+ continue
1822
+
1823
+ # Validate that plan items are dictionaries with required fields
1824
+ invalid_steps = []
1825
+ for i, step in enumerate(parsed["plan"]):
1826
+ if not isinstance(step, dict):
1827
+ invalid_steps.append((i, type(step).__name__, step))
1828
+ elif "tool" not in step or "tool_args" not in step:
1829
+ invalid_steps.append((i, "missing fields", step))
1830
+
1831
+ if invalid_steps:
1832
+ logger.error(f"Invalid plan steps found: {invalid_steps}")
1833
+ self.console.print_error(
1834
+ f"Plan contains {len(invalid_steps)} invalid step(s). Asking for correction..."
1835
+ )
1836
+
1837
+ # Create detailed error message
1838
+ error_details = "\n".join(
1839
+ [
1840
+ f"Step {i+1}: {issue} - {step}"
1841
+ for i, issue, step in invalid_steps[
1842
+ :3
1843
+ ] # Show first 3 errors
1844
+ ]
1845
+ )
1846
+
1847
+ error_msg = (
1848
+ f"ERROR: Your plan contains invalid steps:\n{error_details}\n\n"
1849
+ f"Each step must be a dictionary with 'tool' and 'tool_args' fields:\n"
1850
+ f'{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}\n\n'
1851
+ f"Please create a corrected plan for: {user_input}"
1852
+ )
1853
+ messages.append({"role": "user", "content": error_msg})
1854
+
1855
+ # Continue to next iteration to get corrected plan
1856
+ continue
1857
+
1858
+ # Plan is valid - proceed with execution
1859
+ self.current_plan = parsed["plan"]
1860
+ self.current_step = 0
1861
+ self.total_plan_steps = len(self.current_plan)
1862
+ self.execution_state = self.STATE_EXECUTING_PLAN
1863
+ logger.debug(
1864
+ f"New plan created with {self.total_plan_steps} steps: {self.current_plan}"
1865
+ )
1866
+
1867
+ # If the response contains a tool call, execute it
1868
+ if "tool" in parsed and "tool_args" in parsed:
1869
+
1870
+ # Display the current plan with the current step highlighted
1871
+ if self.current_plan:
1872
+ self.console.print_plan(self.current_plan, self.current_step)
1873
+
1874
+ # When both plan and tool are present, prioritize the plan execution
1875
+ # If we have a plan, we should execute from the plan, not the standalone tool call
1876
+ if "plan" in parsed and self.current_plan and self.total_plan_steps > 0:
1877
+ # Skip the standalone tool execution and let the plan execution handle it
1878
+ # The plan execution logic will handle this in the next iteration
1879
+ logger.debug(
1880
+ "Plan and tool both present - deferring to plan execution logic"
1881
+ )
1882
+ continue # Skip tool execution, let plan execution handle it
1883
+
1884
+ # If this was a single-step plan, mark as completed after tool execution
1885
+ if self.total_plan_steps == 1:
1886
+ logger.debug(
1887
+ "Single-step plan will be marked completed after tool execution"
1888
+ )
1889
+ self.execution_state = self.STATE_COMPLETION
1890
+
1891
+ tool_name = parsed["tool"]
1892
+ tool_args = parsed["tool_args"]
1893
+ logger.debug(f"Tool call detected: {tool_name} with args {tool_args}")
1894
+
1895
+ # Display the tool call in real-time
1896
+ self.console.print_tool_usage(tool_name)
1897
+
1898
+ if tool_args:
1899
+ self.console.pretty_print_json(tool_args, "Arguments")
1900
+
1901
+ # Start progress indicator for tool execution
1902
+ self.console.start_progress(f"Executing {tool_name}")
1903
+
1904
+ # Check for repeated tool calls
1905
+ if last_tool_call == (tool_name, str(tool_args)):
1906
+ # Stop progress indicator
1907
+ self.console.stop_progress()
1908
+
1909
+ logger.warning(f"Detected repeated tool call: {tool_name}")
1910
+ # Force a final answer if the same tool is called repeatedly
1911
+ final_answer = (
1912
+ f"Task completed with {tool_name}. No further action needed."
1913
+ )
1914
+
1915
+ self.console.print_repeated_tool_warning()
1916
+ break
1917
+
1918
+ # Execute the tool
1919
+ tool_result = self._execute_tool(tool_name, tool_args)
1920
+
1921
+ # Stop progress indicator
1922
+ self.console.stop_progress()
1923
+
1924
+ # Handle domain-specific post-processing
1925
+ self._post_process_tool_result(tool_name, tool_args, tool_result)
1926
+
1927
+ # Handle large tool results
1928
+ truncated_result = self._handle_large_tool_result(
1929
+ tool_name, tool_result, conversation, tool_args
1930
+ )
1931
+
1932
+ # Display the tool result in real-time (show full result to user)
1933
+ self.console.print_tool_complete()
1934
+
1935
+ self.console.pretty_print_json(tool_result, "Result")
1936
+
1937
+ # Store the truncated output for future context
1938
+ previous_outputs.append(
1939
+ {"tool": tool_name, "args": tool_args, "result": truncated_result}
1940
+ )
1941
+
1942
+ # Share tool output with subsequent LLM calls
1943
+ messages.append(self._create_tool_message(tool_name, truncated_result))
1944
+
1945
+ # Update last tool call
1946
+ last_tool_call = (tool_name, str(tool_args))
1947
+
1948
+ # For single-step plans, we still need to let the LLM process the result
1949
+ # This is especially important for RAG queries where the LLM needs to
1950
+ # synthesize the retrieved information into a coherent answer
1951
+ if (
1952
+ self.execution_state == self.STATE_COMPLETION
1953
+ and self.total_plan_steps == 1
1954
+ ):
1955
+ logger.debug(
1956
+ "Single-step plan execution completed, requesting final answer from LLM"
1957
+ )
1958
+ # Don't break here - let the loop continue so the LLM can process the tool result
1959
+ # The tool result has already been added to messages, so the next iteration
1960
+ # will call the LLM with that result
1961
+
1962
+ # Check if tool execution resulted in an error (support multiple error formats)
1963
+ is_error = isinstance(tool_result, dict) and (
1964
+ tool_result.get("status") == "error"
1965
+ or tool_result.get("success") is False
1966
+ or tool_result.get("has_errors") is True
1967
+ or tool_result.get("return_code", 0) != 0
1968
+ )
1969
+ if is_error:
1970
+ error_count += 1
1971
+ # Prefer error_brief for logging (avoids duplicate formatted output)
1972
+ last_error = (
1973
+ tool_result.get("error_brief")
1974
+ or tool_result.get("error")
1975
+ or tool_result.get("stderr")
1976
+ or tool_result.get("hint")
1977
+ or tool_result.get("suggested_fix")
1978
+ or f"Command failed with return code {tool_result.get('return_code')}"
1979
+ )
1980
+ logger.warning(
1981
+ f"Tool execution error in plan (count: {error_count}): {last_error}"
1982
+ )
1983
+ # Only print if error wasn't already displayed by _execute_tool
1984
+ if not tool_result.get("error_displayed"):
1985
+ self.console.print_error(last_error)
1986
+
1987
+ # Switch to error recovery state
1988
+ self.execution_state = self.STATE_ERROR_RECOVERY
1989
+ self.console.print_state_info(
1990
+ "ERROR RECOVERY: Handling tool execution failure"
1991
+ )
1992
+
1993
+ # Break out of tool execution to trigger error recovery prompt
1994
+ continue
1995
+
1996
+ # Collect and store performance stats for token tracking
1997
+ # Do this BEFORE checking for final answer so stats are always collected
1998
+ perf_stats = response_stats or self.chat.get_stats()
1999
+ if perf_stats:
2000
+ conversation.append(
2001
+ {
2002
+ "role": "system",
2003
+ "content": {
2004
+ "type": "stats",
2005
+ "step": steps_taken,
2006
+ "performance_stats": perf_stats,
2007
+ },
2008
+ }
2009
+ )
2010
+
2011
+ # Check for final answer (after collecting stats)
2012
+ if "answer" in parsed:
2013
+ final_answer = parsed["answer"]
2014
+ self.execution_state = self.STATE_COMPLETION
2015
+ self.console.print_final_answer(final_answer, streaming=self.streaming)
2016
+ break
2017
+
2018
+ # Validate plan required
2019
+ self._validate_plan_required(parsed, steps_taken)
2020
+
2021
+ # Print completion message
2022
+ self.console.print_completion(steps_taken, steps_limit)
2023
+
2024
+ # Calculate total duration
2025
+ total_duration = time.time() - start_time
2026
+
2027
+ # Aggregate token counts from conversation stats
2028
+ total_input_tokens = 0
2029
+ total_output_tokens = 0
2030
+ for entry in conversation:
2031
+ if entry.get("role") == "system" and isinstance(entry.get("content"), dict):
2032
+ content = entry["content"]
2033
+ if content.get("type") == "stats" and "performance_stats" in content:
2034
+ stats = content["performance_stats"]
2035
+ if stats.get("input_tokens") is not None:
2036
+ total_input_tokens += stats["input_tokens"]
2037
+ if stats.get("output_tokens") is not None:
2038
+ total_output_tokens += stats["output_tokens"]
2039
+
2040
+ # Return the result
2041
+ has_errors = len(self.error_history) > 0
2042
+ has_valid_answer = (
2043
+ final_answer and final_answer.strip()
2044
+ ) # Check for non-empty answer
2045
+ result = {
2046
+ "status": (
2047
+ "success"
2048
+ if has_valid_answer and not has_errors
2049
+ else ("failed" if has_errors else "incomplete")
2050
+ ),
2051
+ "result": (
2052
+ final_answer
2053
+ if final_answer
2054
+ else self._generate_max_steps_message(
2055
+ conversation, steps_taken, steps_limit
2056
+ )
2057
+ ),
2058
+ "system_prompt": self.system_prompt, # Include system prompt in the result
2059
+ "conversation": conversation,
2060
+ "steps_taken": steps_taken,
2061
+ "duration": total_duration, # Total query processing time in seconds
2062
+ "input_tokens": total_input_tokens, # Total input tokens across all steps
2063
+ "output_tokens": total_output_tokens, # Total output tokens across all steps
2064
+ "total_tokens": total_input_tokens
2065
+ + total_output_tokens, # Combined token count
2066
+ "error_count": len(self.error_history),
2067
+ "error_history": self.error_history, # Include the full error history
2068
+ }
2069
+
2070
+ # Write trace to file if requested
2071
+ if trace:
2072
+ file_path = self._write_json_to_file(result, filename)
2073
+ result["output_file"] = file_path
2074
+
2075
+ logger.debug(f"Query processing complete: {result}")
2076
+
2077
+ # Store the result internally
2078
+ self.last_result = result
2079
+
2080
+ return result
2081
+
2082
+ def _post_process_tool_result(
2083
+ self, _tool_name: str, _tool_args: Dict[str, Any], _tool_result: Dict[str, Any]
2084
+ ) -> None:
2085
+ """
2086
+ Post-process the tool result for domain-specific handling.
2087
+ Override this in subclasses to provide domain-specific behavior.
2088
+
2089
+ Args:
2090
+ _tool_name: Name of the tool that was executed
2091
+ _tool_args: Arguments that were passed to the tool
2092
+ _tool_result: Result returned by the tool
2093
+ """
2094
+ ...
2095
+
2096
+ def display_result(
2097
+ self,
2098
+ title: str = "Result",
2099
+ result: Dict[str, Any] = None,
2100
+ print_result: bool = False,
2101
+ ) -> None:
2102
+ """
2103
+ Display the result and output file path information.
2104
+
2105
+ Args:
2106
+ title: Optional title for the result panel
2107
+ result: Optional result dictionary to display. If None, uses the last stored result.
2108
+ print_result: If True, print the result to the console
2109
+ """
2110
+ # Use the provided result or fall back to the last stored result
2111
+ display_result = result if result is not None else self.last_result
2112
+
2113
+ if display_result is None:
2114
+ self.console.print_warning("No result available to display.")
2115
+ return
2116
+
2117
+ # Print the full result with syntax highlighting
2118
+ if print_result:
2119
+ self.console.pretty_print_json(display_result, title)
2120
+
2121
+ # If there's an output file, display its path after the result
2122
+ if "output_file" in display_result:
2123
+ self.console.print_info(
2124
+ f"Output written to: {display_result['output_file']}"
2125
+ )
2126
+
2127
+ def get_error_history(self) -> List[str]:
2128
+ """
2129
+ Get the history of errors encountered by the agent.
2130
+
2131
+ Returns:
2132
+ List of error messages
2133
+ """
2134
+ return self.error_history
2135
+
2136
+ def _validate_plan_required(self, parsed: Dict[str, Any], step: int) -> None:
2137
+ """
2138
+ Validate that the response includes a plan when required by the agent.
2139
+
2140
+ Args:
2141
+ parsed: The parsed response from the LLM
2142
+ step: The current step number
2143
+ """
2144
+ # Skip validation if we're not in planning mode or if we're already executing a plan
2145
+ if self.execution_state != self.STATE_PLANNING or self.current_plan is not None:
2146
+ return
2147
+
2148
+ # Allow simple single-tool operations without requiring a plan
2149
+ if "tool" in parsed and step == 1:
2150
+ tool_name = parsed.get("tool", "")
2151
+ # List of tools that can execute directly without a plan
2152
+ simple_tools = self.SIMPLE_TOOLS
2153
+ if tool_name in simple_tools:
2154
+ logger.debug(f"Allowing direct execution of simple tool: {tool_name}")
2155
+ return
2156
+
2157
+ # Check if plan is missing on the first step
2158
+ # BUT: Allow direct answers without plans (for simple conversational queries)
2159
+ if "plan" not in parsed and "answer" not in parsed and step == 1:
2160
+ warning_msg = f"No plan found in step {step} response. The agent should create a plan for all tasks."
2161
+ logger.warning(warning_msg)
2162
+ self.console.print_warning(warning_msg)
2163
+
2164
+ # For the first step, we'll add a flag to indicate we need to re-prompt for a plan
2165
+ parsed["needs_plan"] = True
2166
+
2167
+ # If there's a tool in the response, store it but don't execute it yet
2168
+ if "tool" in parsed:
2169
+ parsed["deferred_tool"] = parsed["tool"]
2170
+ parsed["deferred_tool_args"] = parsed.get("tool_args", {})
2171
+ # Remove the tool so it won't be executed
2172
+ del parsed["tool"]
2173
+ if "tool_args" in parsed:
2174
+ del parsed["tool_args"]
2175
+
2176
+ # Set state to indicate we need planning
2177
+ self.execution_state = self.STATE_PLANNING