amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
gaia/agents/base/agent.py CHANGED
@@ -1,2177 +1,2132 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """
4
- Generic Agent class for building domain-specific agents.
5
- """
6
-
7
- # Standard library imports
8
- import abc
9
- import datetime
10
- import inspect
11
- import json
12
- import logging
13
- import os
14
- import re
15
- import subprocess
16
- import uuid
17
- from typing import Any, Dict, List, Optional
18
-
19
- from gaia.agents.base.console import AgentConsole, SilentConsole
20
- from gaia.agents.base.errors import format_execution_trace
21
- from gaia.agents.base.tools import _TOOL_REGISTRY
22
-
23
- # First-party imports
24
- from gaia.chat.sdk import ChatConfig, ChatSDK
25
-
26
- # Set up logging
27
- logging.basicConfig(level=logging.INFO)
28
- logger = logging.getLogger(__name__)
29
-
30
- # Content truncation thresholds
31
- CHUNK_TRUNCATION_THRESHOLD = 5000
32
- CHUNK_TRUNCATION_SIZE = 2500
33
-
34
-
35
- class Agent(abc.ABC):
36
- """
37
- Base Agent class that provides core functionality for domain-specific agents.
38
-
39
- The Agent class handles the core conversation loop, tool execution, and LLM
40
- interaction patterns. It provides:
41
- - Conversation management with an LLM
42
- - Tool registration and execution framework
43
- - JSON response parsing and validation
44
- - Error handling and recovery
45
- - State management for multi-step plans
46
- - Output formatting and file writing
47
- - Configurable prompt display for debugging
48
-
49
- Key Parameters:
50
- debug: Enable general debug output and logging
51
- show_prompts: Display prompts sent to LLM (useful for debugging prompts)
52
- debug_prompts: Include prompts in conversation history for analysis
53
- streaming: Enable real-time streaming of LLM responses
54
- silent_mode: Suppress all console output for JSON-only usage
55
- """
56
-
57
- # Define state constants
58
- STATE_PLANNING = "PLANNING"
59
- STATE_EXECUTING_PLAN = "EXECUTING_PLAN"
60
- STATE_DIRECT_EXECUTION = "DIRECT_EXECUTION"
61
- STATE_ERROR_RECOVERY = "ERROR_RECOVERY"
62
- STATE_COMPLETION = "COMPLETION"
63
-
64
- # Define tools that can execute directly without requiring a plan
65
- # Subclasses can override this to specify domain-specific simple tools
66
- SIMPLE_TOOLS = []
67
-
68
- def __init__(
69
- self,
70
- use_claude: bool = False,
71
- use_chatgpt: bool = False,
72
- claude_model: str = "claude-sonnet-4-20250514",
73
- base_url: Optional[str] = None,
74
- model_id: str = None,
75
- max_steps: int = 5,
76
- debug_prompts: bool = False,
77
- show_prompts: bool = False,
78
- output_dir: str = None,
79
- streaming: bool = False,
80
- show_stats: bool = False,
81
- silent_mode: bool = False,
82
- debug: bool = False,
83
- output_handler=None,
84
- max_plan_iterations: int = 3,
85
- min_context_size: int = 32768,
86
- skip_lemonade: bool = False,
87
- ):
88
- """
89
- Initialize the Agent with LLM client.
90
-
91
- Args:
92
- use_claude: If True, uses Claude API (default: False)
93
- use_chatgpt: If True, uses ChatGPT/OpenAI API (default: False)
94
- claude_model: Claude model to use when use_claude=True (default: "claude-sonnet-4-20250514")
95
- base_url: Base URL for local LLM server (default: reads from LEMONADE_BASE_URL env var, falls back to http://localhost:8000/api/v1)
96
- model_id: The ID of the model to use with LLM server (default for local)
97
- max_steps: Maximum number of steps the agent can take before terminating
98
- debug_prompts: If True, includes prompts in the conversation history
99
- show_prompts: If True, displays prompts sent to LLM in console (default: False)
100
- output_dir: Directory for storing JSON output files (default: current directory)
101
- streaming: If True, enables real-time streaming of LLM responses (default: False)
102
- show_stats: If True, displays LLM performance stats after each response (default: False)
103
- silent_mode: If True, suppresses all console output for JSON-only usage (default: False)
104
- debug: If True, enables debug output for troubleshooting (default: False)
105
- output_handler: Custom OutputHandler for displaying agent output (default: None, creates console based on silent_mode)
106
- max_plan_iterations: Maximum number of plan-execute-replan cycles (default: 3, 0 = unlimited)
107
- min_context_size: Minimum context size required for this agent (default: 32768).
108
- skip_lemonade: If True, skip Lemonade server initialization (default: False).
109
- Use this when connecting to a different OpenAI-compatible backend.
110
-
111
- Note: Uses local LLM server by default unless use_claude or use_chatgpt is True.
112
- """
113
- self.error_history = [] # Store error history for learning
114
- self.conversation_history = (
115
- []
116
- ) # Store conversation history for session persistence
117
- self.max_steps = max_steps
118
- self.debug_prompts = debug_prompts
119
- self.show_prompts = show_prompts # Separate flag for displaying prompts
120
- self.output_dir = output_dir if output_dir else os.getcwd()
121
- self.streaming = streaming
122
- self.show_stats = show_stats
123
- self.silent_mode = silent_mode
124
- self.debug = debug
125
- self.last_result = None # Store the most recent result
126
- self.max_plan_iterations = max_plan_iterations
127
- self._current_query: Optional[str] = (
128
- None # Store current query for error context
129
- )
130
-
131
- # Read base_url from environment if not provided
132
- if base_url is None:
133
- base_url = os.getenv("LEMONADE_BASE_URL", "http://localhost:8000/api/v1")
134
-
135
- # Lazy Lemonade initialization for local LLM users
136
- # This ensures Lemonade server is running before we try to use it
137
- if not (use_claude or use_chatgpt or skip_lemonade):
138
- from gaia.llm.lemonade_manager import LemonadeManager
139
-
140
- LemonadeManager.ensure_ready(
141
- min_context_size=min_context_size,
142
- quiet=silent_mode,
143
- base_url=base_url,
144
- )
145
-
146
- # Initialize state management
147
- self.execution_state = self.STATE_PLANNING
148
- self.current_plan = None
149
- self.current_step = 0
150
- self.total_plan_steps = 0
151
- self.plan_iterations = 0 # Track number of plan cycles
152
-
153
- # Initialize the console/output handler for display
154
- # If output_handler is provided, use it; otherwise create based on silent_mode
155
- if output_handler is not None:
156
- self.console = output_handler
157
- else:
158
- self.console = self._create_console()
159
-
160
- # Initialize LLM client for local model
161
- self.system_prompt = self._get_system_prompt()
162
-
163
- # Register tools for this agent
164
- self._register_tools()
165
-
166
- # Update system prompt with available tools and response format
167
- tools_description = self._format_tools_for_prompt()
168
- self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n"
169
-
170
- # Add JSON response format instructions (shared across all agents)
171
- self.system_prompt += """
172
- ==== RESPONSE FORMAT ====
173
- You must respond ONLY in valid JSON. No text before { or after }.
174
-
175
- **To call a tool:**
176
- {"thought": "reasoning", "goal": "objective", "tool": "tool_name", "tool_args": {"arg1": "value1"}}
177
-
178
- **To create a multi-step plan:**
179
- {
180
- "thought": "reasoning",
181
- "goal": "objective",
182
- "plan": [
183
- {"tool": "tool1", "tool_args": {"arg": "val"}},
184
- {"tool": "tool2", "tool_args": {"arg": "val"}}
185
- ],
186
- "tool": "tool1",
187
- "tool_args": {"arg": "val"}
188
- }
189
-
190
- **To provide a final answer:**
191
- {"thought": "reasoning", "goal": "achieved", "answer": "response to user"}
192
-
193
- **RULES:**
194
- 1. ALWAYS use tools for real data - NEVER hallucinate
195
- 2. Plan steps MUST be objects like {"tool": "x", "tool_args": {}}, NOT strings
196
- 3. After tool results, provide an "answer" summarizing them
197
- """
198
-
199
- # Initialize ChatSDK with proper configuration
200
- # Note: We don't set system_prompt in config, we pass it per request
201
- # Note: Context size is configured when starting Lemonade server, not here
202
- # Use Qwen3-Coder-30B by default for better reasoning and JSON formatting
203
- # The 0.5B model is too small for complex agent tasks
204
- chat_config = ChatConfig(
205
- model=model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF",
206
- use_claude=use_claude,
207
- use_chatgpt=use_chatgpt,
208
- claude_model=claude_model,
209
- base_url=base_url,
210
- show_stats=True, # Always collect stats for token tracking
211
- max_history_length=20, # Keep more history for agent conversations
212
- max_tokens=4096, # Increased for complex code generation
213
- )
214
- self.chat = ChatSDK(chat_config)
215
- self.model_id = model_id
216
-
217
- # Print system prompt if show_prompts is enabled
218
- # Debug: Check the actual value of show_prompts
219
- if self.debug:
220
- logger.debug(
221
- f"show_prompts={self.show_prompts}, debug={self.debug}, will show prompt: {self.show_prompts}"
222
- )
223
-
224
- if self.show_prompts:
225
- self.console.print_prompt(self.system_prompt, "Initial System Prompt")
226
-
227
- @abc.abstractmethod
228
- def _get_system_prompt(self) -> str:
229
- """
230
- Generate the system prompt for the agent.
231
- Subclasses must implement this to provide domain-specific prompts.
232
- """
233
- raise NotImplementedError("Subclasses must implement _get_system_prompt")
234
-
235
- def _create_console(self):
236
- """
237
- Create and return a console output handler.
238
- Returns SilentConsole if in silent_mode, otherwise AgentConsole.
239
- Subclasses can override this to provide domain-specific console output.
240
- """
241
- if self.silent_mode:
242
- # Check if we should completely silence everything (including final answer)
243
- # This would be true for JSON-only output or when output_dir is set
244
- silence_final_answer = getattr(self, "output_dir", None) is not None
245
- return SilentConsole(silence_final_answer=silence_final_answer)
246
- return AgentConsole()
247
-
248
- @abc.abstractmethod
249
- def _register_tools(self):
250
- """
251
- Register all domain-specific tools for the agent.
252
- Subclasses must implement this method.
253
- """
254
- raise NotImplementedError("Subclasses must implement _register_tools")
255
-
256
- def _format_tools_for_prompt(self) -> str:
257
- """Format the registered tools into a string for the prompt."""
258
- tool_descriptions = []
259
-
260
- for name, tool_info in _TOOL_REGISTRY.items():
261
- params_str = ", ".join(
262
- [
263
- f"{param_name}{'' if param_info['required'] else '?'}: {param_info['type']}"
264
- for param_name, param_info in tool_info["parameters"].items()
265
- ]
266
- )
267
-
268
- description = tool_info["description"].strip()
269
- tool_descriptions.append(f"- {name}({params_str}): {description}")
270
-
271
- return "\n".join(tool_descriptions)
272
-
273
- def list_tools(self, verbose: bool = True) -> None:
274
- """
275
- Display all tools registered for this agent with their parameters and descriptions.
276
-
277
- Args:
278
- verbose: If True, displays full descriptions and parameter details. If False, shows a compact list.
279
- """
280
- self.console.print_header(f"🛠️ Registered Tools for {self.__class__.__name__}")
281
- self.console.print_separator()
282
-
283
- for name, tool_info in _TOOL_REGISTRY.items():
284
- # Format parameters
285
- params = []
286
- for param_name, param_info in tool_info["parameters"].items():
287
- required = param_info.get("required", False)
288
- param_type = param_info.get("type", "Any")
289
- default = param_info.get("default", None)
290
-
291
- if required:
292
- params.append(f"{param_name}: {param_type}")
293
- else:
294
- default_str = f"={default}" if default is not None else "=None"
295
- params.append(f"{param_name}: {param_type}{default_str}")
296
-
297
- params_str = ", ".join(params)
298
-
299
- # Get description
300
- if verbose:
301
- description = tool_info["description"]
302
- else:
303
- description = (
304
- tool_info["description"].split("\n")[0]
305
- if tool_info["description"]
306
- else "No description"
307
- )
308
-
309
- # Print tool information
310
- self.console.print_tool_info(name, params_str, description)
311
-
312
- self.console.print_separator()
313
-
314
- return None
315
-
316
- def _extract_json_from_response(self, response: str) -> Optional[Dict[str, Any]]:
317
- """
318
- Apply multiple extraction strategies to find valid JSON in the response.
319
-
320
- Args:
321
- response: The raw response from the LLM
322
-
323
- Returns:
324
- Extracted JSON dictionary or None if extraction failed
325
- """
326
- # Strategy 1: Extract JSON from code blocks with various patterns
327
- json_patterns = [
328
- r"```(?:json)?\s*(.*?)\s*```", # Standard code block
329
- r"`json\s*(.*?)\s*`", # Single backtick with json tag
330
- r"<json>\s*(.*?)\s*</json>", # XML-style tags
331
- ]
332
-
333
- for pattern in json_patterns:
334
- matches = re.findall(pattern, response, re.DOTALL)
335
- for match in matches:
336
- try:
337
- result = json.loads(match)
338
- # Ensure tool_args exists if tool is present
339
- if "tool" in result and "tool_args" not in result:
340
- result["tool_args"] = {}
341
- logger.debug(f"Successfully extracted JSON with pattern {pattern}")
342
- return result
343
- except json.JSONDecodeError:
344
- continue
345
-
346
- start_idx = response.find("{")
347
- if start_idx >= 0:
348
- bracket_count = 0
349
- in_string = False
350
- escape_next = False
351
-
352
- for i, char in enumerate(response[start_idx:], start_idx):
353
- if escape_next:
354
- escape_next = False
355
- continue
356
- if char == "\\":
357
- escape_next = True
358
- continue
359
- if char == '"' and not escape_next:
360
- in_string = not in_string
361
- if not in_string:
362
- if char == "{":
363
- bracket_count += 1
364
- elif char == "}":
365
- bracket_count -= 1
366
- if bracket_count == 0:
367
- # Found complete JSON object
368
- try:
369
- extracted = response[start_idx : i + 1]
370
- # Fix common issues before parsing
371
- fixed = re.sub(r",\s*}", "}", extracted)
372
- fixed = re.sub(r",\s*]", "]", fixed)
373
- result = json.loads(fixed)
374
- # Ensure tool_args exists if tool is present
375
- if "tool" in result and "tool_args" not in result:
376
- result["tool_args"] = {}
377
- logger.debug(
378
- "Successfully extracted JSON using bracket-matching"
379
- )
380
- return result
381
- except json.JSONDecodeError as e:
382
- logger.debug(f"Bracket-matched JSON parse failed: {e}")
383
- break
384
-
385
- return None
386
-
387
- def validate_json_response(self, response_text: str) -> Dict[str, Any]:
388
- """
389
- Validates and attempts to fix JSON responses from the LLM.
390
-
391
- Attempts the following fixes in order:
392
- 1. Parse as-is if valid JSON
393
- 2. Extract JSON from code blocks
394
- 3. Truncate after first complete JSON object
395
- 4. Fix common JSON syntax errors
396
- 5. Extract JSON-like content using regex
397
-
398
- Args:
399
- response_text: The response string from the LLM
400
-
401
- Returns:
402
- A dictionary containing the parsed JSON if valid
403
-
404
- Raises:
405
- ValueError: If the response cannot be parsed as JSON or is missing required fields
406
- """
407
- original_response = response_text
408
- json_was_modified = False
409
-
410
- # Step 0: Sanitize control characters to ensure proper JSON format
411
- def sanitize_json_string(text: str) -> str:
412
- """
413
- Ensure JSON strings have properly escaped control characters.
414
-
415
- Args:
416
- text: JSON text that may contain unescaped control characters
417
-
418
- Returns:
419
- Sanitized JSON text with properly escaped control characters
420
- """
421
-
422
- def escape_string_content(match):
423
- """Ensure control characters are properly escaped in JSON string values."""
424
- quote = match.group(1)
425
- content = match.group(2)
426
- closing_quote = match.group(3)
427
-
428
- # Ensure proper escaping of control characters
429
- content = content.replace("\n", "\\n")
430
- content = content.replace("\r", "\\r")
431
- content = content.replace("\t", "\\t")
432
- content = content.replace("\b", "\\b")
433
- content = content.replace("\f", "\\f")
434
-
435
- return f"{quote}{content}{closing_quote}"
436
-
437
- # Match JSON strings: "..." handling escaped quotes
438
- pattern = r'(")([^"\\]*(?:\\.[^"\\]*)*)(")'
439
-
440
- try:
441
- return re.sub(pattern, escape_string_content, text)
442
- except Exception as e:
443
- logger.debug(
444
- f"[JSON] String sanitization encountered issue: {e}, using original"
445
- )
446
- return text
447
-
448
- response_text = sanitize_json_string(response_text)
449
-
450
- # Step 1: Try to parse as-is
451
- try:
452
- json_response = json.loads(response_text)
453
- logger.debug("[JSON] Successfully parsed response without modifications")
454
- except json.JSONDecodeError as initial_error:
455
- # Step 2: Try to extract from code blocks
456
- json_match = re.search(
457
- r"```(?:json)?\s*({.*?})\s*```", response_text, re.DOTALL
458
- )
459
- if json_match:
460
- try:
461
- response_text = json_match.group(1)
462
- json_response = json.loads(response_text)
463
- json_was_modified = True
464
- logger.warning("[JSON] Extracted JSON from code block")
465
- except json.JSONDecodeError as e:
466
- logger.debug(f"[JSON] Code block extraction failed: {e}")
467
-
468
- # Step 3: Try to find and extract first complete JSON object
469
- if not json_was_modified:
470
- # Find the first '{' and try to match brackets
471
- start_idx = response_text.find("{")
472
- if start_idx >= 0:
473
- bracket_count = 0
474
- in_string = False
475
- escape_next = False
476
-
477
- for i, char in enumerate(response_text[start_idx:], start_idx):
478
- if escape_next:
479
- escape_next = False
480
- continue
481
- if char == "\\":
482
- escape_next = True
483
- continue
484
- if char == '"' and not escape_next:
485
- in_string = not in_string
486
- if not in_string:
487
- if char == "{":
488
- bracket_count += 1
489
- elif char == "}":
490
- bracket_count -= 1
491
- if bracket_count == 0:
492
- # Found complete JSON object
493
- try:
494
- truncated = response_text[start_idx : i + 1]
495
- json_response = json.loads(truncated)
496
- json_was_modified = True
497
- logger.warning(
498
- f"[JSON] Truncated response after first complete JSON object (removed {len(response_text) - i - 1} chars)"
499
- )
500
- response_text = truncated
501
- break
502
- except json.JSONDecodeError:
503
- logger.debug(
504
- "[JSON] Truncated text is not valid JSON, trying next bracket pair"
505
- )
506
- continue
507
-
508
- # Step 4: Try to fix common JSON errors
509
- if not json_was_modified:
510
- fixed_text = response_text
511
-
512
- # Remove trailing commas
513
- fixed_text = re.sub(r",\s*}", "}", fixed_text)
514
- fixed_text = re.sub(r",\s*]", "]", fixed_text)
515
-
516
- # Fix single quotes to double quotes (carefully)
517
- if "'" in fixed_text and '"' not in fixed_text:
518
- fixed_text = fixed_text.replace("'", '"')
519
-
520
- # Remove any text before first '{' or '['
521
- json_start = min(
522
- fixed_text.find("{") if "{" in fixed_text else len(fixed_text),
523
- fixed_text.find("[") if "[" in fixed_text else len(fixed_text),
524
- )
525
- if json_start > 0 and json_start < len(fixed_text):
526
- fixed_text = fixed_text[json_start:]
527
-
528
- # Try to parse the fixed text
529
- if fixed_text != response_text:
530
- try:
531
- json_response = json.loads(fixed_text)
532
- json_was_modified = True
533
- logger.warning("[JSON] Applied automatic JSON fixes")
534
- response_text = fixed_text
535
- except json.JSONDecodeError as e:
536
- logger.debug(f"[JSON] Auto-fix failed: {e}")
537
-
538
- # If still no valid JSON, raise the original error
539
- if not json_was_modified:
540
- raise ValueError(
541
- f"Failed to parse response as JSON: {str(initial_error)}"
542
- )
543
-
544
- # Log warning if JSON was modified
545
- if json_was_modified:
546
- logger.warning(
547
- f"[JSON] Response was modified to extract valid JSON. Original length: {len(original_response)}, Fixed length: {len(response_text)}"
548
- )
549
-
550
- # Validate required fields
551
- # Note: 'goal' is optional for simple answer responses
552
- if "answer" in json_response:
553
- required_fields = ["thought", "answer"] # goal is optional
554
- elif "tool" in json_response:
555
- required_fields = ["thought", "tool", "tool_args"] # goal is optional
556
- else:
557
- required_fields = ["thought", "plan"] # goal is optional
558
-
559
- missing_fields = [
560
- field for field in required_fields if field not in json_response
561
- ]
562
- if missing_fields:
563
- raise ValueError(
564
- f"Response is missing required fields: {', '.join(missing_fields)}"
565
- )
566
-
567
- return json_response
568
-
569
- def _parse_llm_response(self, response: str) -> Dict[str, Any]:
570
- """
571
- Parse the LLM response to extract tool calls or conversational answers.
572
-
573
- ARCHITECTURE: Supports two response modes
574
- - Plain text for conversation (no JSON required)
575
- - JSON for tool invocations
576
-
577
- Args:
578
- response: The raw response from the LLM
579
-
580
- Returns:
581
- Parsed response as a dictionary
582
- """
583
- # Check for empty responses
584
- if not response or not response.strip():
585
- logger.warning("Empty LLM response received")
586
- self.error_history.append("Empty LLM response")
587
-
588
- # Provide more helpful error message based on context
589
- if hasattr(self, "api_mode") and self.api_mode: # pylint: disable=no-member
590
- answer = "I encountered an issue processing your request. This might be due to a connection problem with the language model. Please try again."
591
- else:
592
- answer = "I apologize, but I received an empty response from the language model. Please try again."
593
-
594
- return {
595
- "thought": "LLM returned empty response",
596
- "goal": "Handle empty response error",
597
- "answer": answer,
598
- }
599
-
600
- response = response.strip()
601
-
602
- # Log what we received for debugging (show more to see full JSON)
603
- if len(response) > 500:
604
- logger.debug(
605
- f"📥 LLM Response ({len(response)} chars): {response[:500]}..."
606
- )
607
- else:
608
- logger.debug(f"📥 LLM Response: {response}")
609
-
610
- # STEP 1: Fast path - detect plain text conversational responses
611
- # If response doesn't start with '{', it's likely plain text
612
- # Accept it immediately without logging errors
613
- if not response.startswith("{"):
614
- logger.debug(
615
- f"[PARSE] Plain text conversational response (length: {len(response)})"
616
- )
617
- return {"thought": "", "goal": "", "answer": response}
618
-
619
- # STEP 2: Response starts with '{' - looks like JSON
620
- # Try direct JSON parsing first (fastest path)
621
- try:
622
- result = json.loads(response)
623
- # Ensure tool_args exists if tool is present
624
- if "tool" in result and "tool_args" not in result:
625
- result["tool_args"] = {}
626
- logger.debug("[PARSE] Valid JSON response")
627
- return result
628
- except json.JSONDecodeError:
629
- # JSON parsing failed - continue to extraction methods
630
- logger.debug("[PARSE] Malformed JSON, trying extraction")
631
-
632
- # STEP 3: Try JSON extraction methods (handles code blocks, mixed text, etc.)
633
- extracted_json = self._extract_json_from_response(response)
634
- if extracted_json:
635
- logger.debug("[PARSE] Extracted JSON successfully")
636
- return extracted_json
637
-
638
- # STEP 4: JSON was expected (starts with '{') but all parsing failed
639
- # Log error ONLY for JSON that couldn't be parsed
640
- logger.debug("Attempting to extract fields using regex")
641
- thought_match = re.search(r'"thought":\s*"([^"]*)"', response)
642
- tool_match = re.search(r'"tool":\s*"([^"]*)"', response)
643
- answer_match = re.search(r'"answer":\s*"([^"]*)"', response)
644
- plan_match = re.search(r'"plan":\s*(\[.*?\])', response, re.DOTALL)
645
-
646
- if answer_match:
647
- result = {
648
- "thought": thought_match.group(1) if thought_match else "",
649
- "goal": "what was achieved",
650
- "answer": answer_match.group(1),
651
- }
652
- logger.debug(f"Extracted answer using regex: {result}")
653
- return result
654
-
655
- if tool_match:
656
- tool_args = {}
657
-
658
- tool_args_start = response.find('"tool_args"')
659
-
660
- if tool_args_start >= 0:
661
- # Find the opening brace after "tool_args":
662
- brace_start = response.find("{", tool_args_start)
663
- if brace_start >= 0:
664
- # Use bracket-matching to find the complete object
665
- bracket_count = 0
666
- in_string = False
667
- escape_next = False
668
- for i, char in enumerate(response[brace_start:], brace_start):
669
- if escape_next:
670
- escape_next = False
671
- continue
672
- if char == "\\":
673
- escape_next = True
674
- continue
675
- if char == '"' and not escape_next:
676
- in_string = not in_string
677
- if not in_string:
678
- if char == "{":
679
- bracket_count += 1
680
- elif char == "}":
681
- bracket_count -= 1
682
- if bracket_count == 0:
683
- # Found complete tool_args object
684
- tool_args_str = response[brace_start : i + 1]
685
- try:
686
- tool_args = json.loads(tool_args_str)
687
- except json.JSONDecodeError as e:
688
- error_msg = f"Failed to parse tool_args JSON: {str(e)}, content: {tool_args_str[:100]}..."
689
- logger.error(error_msg)
690
- self.error_history.append(error_msg)
691
- break
692
-
693
- result = {
694
- "thought": thought_match.group(1) if thought_match else "",
695
- "goal": "clear statement of what you're trying to achieve",
696
- "tool": tool_match.group(1),
697
- "tool_args": tool_args,
698
- }
699
-
700
- # Add plan if found
701
- if plan_match:
702
- try:
703
- result["plan"] = json.loads(plan_match.group(1))
704
- logger.debug(f"Extracted plan using regex: {result['plan']}")
705
- except json.JSONDecodeError as e:
706
- error_msg = f"Failed to parse plan JSON: {str(e)}, content: {plan_match.group(1)[:100]}..."
707
- logger.error(error_msg)
708
- self.error_history.append(error_msg)
709
-
710
- logger.debug(f"Extracted tool call using regex: {result}")
711
- return result
712
-
713
- # Try to match simple key-value patterns for object names (like ': "my_cube"')
714
- obj_name_match = re.search(
715
- r'["\':]?\s*["\'"]?([a-zA-Z0-9_\.]+)["\'"]?', response
716
- )
717
- if obj_name_match:
718
- object_name = obj_name_match.group(1)
719
- # If it looks like an object name and not just a random word
720
- if "." in object_name or "_" in object_name:
721
- logger.debug(f"Found potential object name: {object_name}")
722
- return {
723
- "thought": "Extracted object name",
724
- "goal": "Use the object name",
725
- "answer": object_name,
726
- }
727
-
728
- # CONVERSATIONAL MODE: No JSON found - treat as plain conversational response
729
- # This is normal and expected for chat agents responding to greetings, explanations, etc.
730
- logger.debug(
731
- f"[PARSE] No JSON structure found, treating as conversational response. Length: {len(response)}, preview: {response[:100]}..."
732
- )
733
-
734
- # If response is empty, provide a meaningful fallback
735
- if not response.strip():
736
- logger.warning("[PARSE] Empty response received from LLM")
737
- return {
738
- "thought": "",
739
- "goal": "",
740
- "answer": "I apologize, but I received an empty response. Please try again.",
741
- }
742
-
743
- # Valid conversational response - wrap it in expected format
744
- return {"thought": "", "goal": "", "answer": response.strip()}
745
-
746
- def _execute_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Any:
747
- """
748
- Execute a tool by name with the provided arguments.
749
-
750
- Args:
751
- tool_name: Name of the tool to execute
752
- tool_args: Arguments to pass to the tool
753
-
754
- Returns:
755
- Result of the tool execution
756
- """
757
- logger.debug(f"Executing tool {tool_name} with args: {tool_args}")
758
-
759
- if tool_name not in _TOOL_REGISTRY:
760
- logger.error(f"Tool '{tool_name}' not found in registry")
761
- return {"status": "error", "error": f"Tool '{tool_name}' not found"}
762
-
763
- tool = _TOOL_REGISTRY[tool_name]["function"]
764
- sig = inspect.signature(tool)
765
-
766
- # Get required parameters (those without defaults)
767
- required_args = {
768
- name: param
769
- for name, param in sig.parameters.items()
770
- if param.default == inspect.Parameter.empty and name != "return"
771
- }
772
-
773
- # Check for missing required arguments
774
- missing_args = [arg for arg in required_args if arg not in tool_args]
775
- if missing_args:
776
- error_msg = (
777
- f"Missing required arguments for {tool_name}: {', '.join(missing_args)}"
778
- )
779
- logger.error(error_msg)
780
- return {"status": "error", "error": error_msg}
781
-
782
- try:
783
- result = tool(**tool_args)
784
- logger.debug(f"Tool execution result: {result}")
785
- return result
786
- except subprocess.TimeoutExpired as e:
787
- # Handle subprocess timeout specifically
788
- error_msg = f"Tool {tool_name} timed out: {str(e)}"
789
- logger.error(error_msg)
790
- self.error_history.append(error_msg)
791
- return {"status": "error", "error": error_msg, "timeout": True}
792
- except Exception as e:
793
- # Format error with full execution trace for debugging
794
- formatted_error = format_execution_trace(
795
- exception=e,
796
- query=getattr(self, "_current_query", None),
797
- plan_step=self.current_step + 1 if self.current_plan else None,
798
- total_steps=self.total_plan_steps if self.current_plan else None,
799
- tool_name=tool_name,
800
- tool_args=tool_args,
801
- )
802
- logger.error(f"Error executing tool {tool_name}: {e}")
803
- self.error_history.append(str(e)) # Store brief error, not formatted
804
-
805
- # Print to console immediately so user sees it
806
- self.console.print_error(formatted_error)
807
-
808
- return {
809
- "status": "error",
810
- "error_brief": str(e), # Brief error message for quick reference
811
- "error_displayed": True, # Flag to prevent duplicate display
812
- "tool_name": tool_name,
813
- "tool_args": tool_args,
814
- "plan_step": self.current_step + 1 if self.current_plan else None,
815
- }
816
-
817
- def _generate_max_steps_message(
818
- self, conversation: List[Dict], steps_taken: int, steps_limit: int
819
- ) -> str:
820
- """Generate informative message when max steps is reached.
821
-
822
- Args:
823
- conversation: The conversation history
824
- steps_taken: Number of steps actually taken
825
- steps_limit: Maximum steps allowed
826
-
827
- Returns:
828
- Informative message about what was accomplished
829
- """
830
- # Analyze what was done
831
- tool_calls = [
832
- msg
833
- for msg in conversation
834
- if msg.get("role") == "assistant" and "tool_calls" in msg
835
- ]
836
-
837
- tools_used = []
838
- for msg in tool_calls:
839
- for tool_call in msg.get("tool_calls", []):
840
- if "function" in tool_call:
841
- tools_used.append(tool_call["function"]["name"])
842
-
843
- message = f"⚠️ Reached maximum steps limit ({steps_limit} steps)\n\n"
844
- message += f"Completed {steps_taken} steps using these tools:\n"
845
-
846
- # Count tool usage
847
- from collections import Counter
848
-
849
- tool_counts = Counter(tools_used)
850
- for tool, count in tool_counts.most_common(10):
851
- message += f" - {tool}: {count}x\n"
852
-
853
- message += "\nTo continue or complete this task:\n"
854
- message += "1. Review the generated files and progress so far\n"
855
- message += f"2. Run with --max-steps {steps_limit + 50} to allow more steps\n"
856
- message += "3. Or complete remaining tasks manually\n"
857
-
858
- return message
859
-
860
- def _write_json_to_file(self, data: Dict[str, Any], filename: str = None) -> str:
861
- """
862
- Write JSON data to a file and return the absolute path.
863
-
864
- Args:
865
- data: Dictionary data to write as JSON
866
- filename: Optional filename, if None a timestamped name will be generated
867
-
868
- Returns:
869
- Absolute path to the saved file
870
- """
871
- # Ensure output directory exists
872
- os.makedirs(self.output_dir, exist_ok=True)
873
-
874
- # Generate filename if not provided
875
- if not filename:
876
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
877
- filename = f"agent_output_{timestamp}.json"
878
-
879
- # Ensure filename has .json extension
880
- if not filename.endswith(".json"):
881
- filename += ".json"
882
-
883
- # Create absolute path
884
- file_path = os.path.join(self.output_dir, filename)
885
-
886
- # Write JSON data to file
887
- with open(file_path, "w", encoding="utf-8") as f:
888
- json.dump(data, f, indent=2)
889
-
890
- return os.path.abspath(file_path)
891
-
892
- def _handle_large_tool_result(
893
- self,
894
- tool_name: str,
895
- tool_result: Any,
896
- conversation: List[Dict[str, Any]],
897
- tool_args: Optional[Dict[str, Any]] = None,
898
- ) -> Any:
899
- """
900
- Handle large tool results by truncating them if necessary.
901
-
902
- Args:
903
- tool_name: Name of the executed tool
904
- tool_result: The result from tool execution
905
- conversation: The conversation list to append to
906
- tool_args: Arguments passed to the tool (optional)
907
-
908
- Returns:
909
- The truncated result or original if within limits
910
- """
911
- truncated_result = tool_result
912
- if isinstance(tool_result, (dict, list)):
913
- # Use custom encoder to handle bytes and other non-serializable types
914
- result_str = json.dumps(tool_result, default=self._json_serialize_fallback)
915
- if (
916
- len(result_str) > 30000
917
- ): # Threshold for truncation (appropriate for 32K context)
918
- # Truncate large results to prevent overwhelming the LLM
919
- truncated_str = self._truncate_large_content(
920
- tool_result, max_chars=20000 # Increased for 32K context
921
- )
922
- try:
923
- truncated_result = json.loads(truncated_str)
924
- except json.JSONDecodeError:
925
- # If truncated string isn't valid JSON, use it as-is
926
- truncated_result = truncated_str
927
- # Notify user about truncation
928
- self.console.print_info(
929
- f"Note: Large result ({len(result_str)} chars) truncated for LLM context"
930
- )
931
- if self.debug:
932
- print(f"[DEBUG] Tool result truncated from {len(result_str)} chars")
933
-
934
- # Add to conversation
935
- tool_entry: Dict[str, Any] = {
936
- "role": "tool",
937
- "name": tool_name,
938
- "content": truncated_result,
939
- }
940
- if tool_args is not None:
941
- tool_entry["tool_args"] = tool_args
942
- conversation.append(tool_entry)
943
- return truncated_result
944
-
945
- def _create_tool_message(self, tool_name: str, tool_output: Any) -> Dict[str, Any]:
946
- """
947
- Build a message structure representing a tool output for downstream LLM calls.
948
- """
949
- if isinstance(tool_output, str):
950
- text_content = tool_output
951
- else:
952
- text_content = self._truncate_large_content(tool_output, max_chars=2000)
953
-
954
- if not isinstance(text_content, str):
955
- text_content = json.dumps(
956
- tool_output, default=self._json_serialize_fallback
957
- )
958
-
959
- return {
960
- "role": "tool",
961
- "name": tool_name,
962
- "tool_call_id": uuid.uuid4().hex,
963
- "content": [{"type": "text", "text": text_content}],
964
- }
965
-
966
- def _json_serialize_fallback(self, obj: Any) -> Any:
967
- """
968
- Fallback serializer for JSON encoding non-standard types.
969
-
970
- Handles bytes, datetime, and other common non-serializable types.
971
- """
972
- try:
973
- import numpy as np # Local import to avoid hard dependency at module import time
974
-
975
- if isinstance(obj, np.generic):
976
- return obj.item()
977
- if isinstance(obj, np.ndarray):
978
- return obj.tolist()
979
- except Exception:
980
- pass
981
-
982
- if isinstance(obj, bytes):
983
- # For binary data, return a placeholder (don't expose raw bytes to LLM)
984
- return f"<binary data: {len(obj)} bytes>"
985
- if hasattr(obj, "isoformat"):
986
- # Handle datetime objects
987
- return obj.isoformat()
988
- if hasattr(obj, "__dict__"):
989
- # Handle objects with __dict__
990
- return obj.__dict__
991
-
992
- for caster in (float, int, str):
993
- try:
994
- return caster(obj)
995
- except Exception:
996
- continue
997
-
998
- return "<non-serializable>"
999
-
1000
- def _truncate_large_content(self, content: Any, max_chars: int = 2000) -> str:
1001
- """
1002
- Truncate large content to prevent overwhelming the LLM.
1003
- Defaults to 20000 chars which is appropriate for 32K token context window.
1004
- """
1005
-
1006
- # If we have test_results in the output we don't want to
1007
- # truncate as this can contain important information on
1008
- # how to fix the tests
1009
- if isinstance(content, dict) and (
1010
- "test_results" in content or "run_tests" in content
1011
- ):
1012
- return json.dumps(content, default=self._json_serialize_fallback)
1013
-
1014
- # Convert to string (use compact JSON first to check size)
1015
- if isinstance(content, (dict, list)):
1016
- compact_str = json.dumps(content, default=self._json_serialize_fallback)
1017
- # Only use indented format if we need to truncate anyway
1018
- content_str = (
1019
- json.dumps(content, indent=2, default=self._json_serialize_fallback)
1020
- if len(compact_str) > max_chars
1021
- else compact_str
1022
- )
1023
- else:
1024
- content_str = str(content)
1025
-
1026
- # Return as-is if within limits
1027
- if len(content_str) <= max_chars:
1028
- return content_str
1029
-
1030
- # For responses with chunks (e.g., search results, document retrieval)
1031
- if (
1032
- isinstance(content, dict)
1033
- and "chunks" in content
1034
- and isinstance(content["chunks"], list)
1035
- ):
1036
- truncated = content.copy()
1037
-
1038
- # Keep all chunks but truncate individual chunk content if needed
1039
- if "chunks" in truncated:
1040
- for chunk in truncated["chunks"]:
1041
- if isinstance(chunk, dict) and "content" in chunk:
1042
- # Keep full content for chunks (they're the actual data)
1043
- # Only truncate if a single chunk is massive
1044
- if len(chunk["content"]) > CHUNK_TRUNCATION_THRESHOLD:
1045
- chunk["content"] = (
1046
- chunk["content"][:CHUNK_TRUNCATION_SIZE]
1047
- + "\n...[chunk truncated]...\n"
1048
- + chunk["content"][-CHUNK_TRUNCATION_SIZE:]
1049
- )
1050
-
1051
- result_str = json.dumps(
1052
- truncated, indent=2, default=self._json_serialize_fallback
1053
- )
1054
- # Use larger limit for chunked responses since chunks are the actual data
1055
- if len(result_str) <= max_chars * 3: # Allow up to 60KB for chunked data
1056
- return result_str
1057
- # If still too large, keep first 3 chunks only
1058
- truncated["chunks"] = truncated["chunks"][:3]
1059
- return json.dumps(
1060
- truncated, indent=2, default=self._json_serialize_fallback
1061
- )
1062
-
1063
- # For Jira responses, keep first 3 issues
1064
- if (
1065
- isinstance(content, dict)
1066
- and "issues" in content
1067
- and isinstance(content["issues"], list)
1068
- ):
1069
- truncated = {
1070
- **content,
1071
- "issues": content["issues"][:3],
1072
- "truncated": True,
1073
- "total": len(content["issues"]),
1074
- }
1075
- return json.dumps(
1076
- truncated, indent=2, default=self._json_serialize_fallback
1077
- )[:max_chars]
1078
-
1079
- # For lists, keep first 3 items
1080
- if isinstance(content, list):
1081
- truncated = (
1082
- content[:3] + [{"truncated": f"{len(content) - 3} more"}]
1083
- if len(content) > 3
1084
- else content
1085
- )
1086
- return json.dumps(
1087
- truncated, indent=2, default=self._json_serialize_fallback
1088
- )[:max_chars]
1089
-
1090
- # Simple truncation
1091
- half = max_chars // 2 - 20
1092
- return f"{content_str[:half]}\n...[truncated]...\n{content_str[-half:]}"
1093
-
1094
- def process_query(
1095
- self,
1096
- user_input: str,
1097
- max_steps: int = None,
1098
- trace: bool = False,
1099
- filename: str = None,
1100
- ) -> Dict[str, Any]:
1101
- """
1102
- Process a user query and execute the necessary tools.
1103
- Displays each step as it's being generated in real-time.
1104
-
1105
- Args:
1106
- user_input: User's query or request
1107
- max_steps: Maximum number of steps to take in the conversation (overrides class default if provided)
1108
- trace: If True, write detailed JSON trace to file
1109
- filename: Optional filename for trace output, if None a timestamped name will be generated
1110
-
1111
- Returns:
1112
- Dict containing the final result and operation details
1113
- """
1114
- import time
1115
-
1116
- start_time = time.time() # Track query processing start time
1117
-
1118
- # Store query for error context (used in _execute_tool for error formatting)
1119
- self._current_query = user_input
1120
-
1121
- logger.debug(f"Processing query: {user_input}")
1122
- conversation = []
1123
- # Build messages array for chat completions
1124
- messages = []
1125
-
1126
- # Prepopulate with conversation history if available (for session persistence)
1127
- if hasattr(self, "conversation_history") and self.conversation_history:
1128
- messages.extend(self.conversation_history)
1129
- logger.debug(
1130
- f"Loaded {len(self.conversation_history)} messages from conversation history"
1131
- )
1132
-
1133
- steps_taken = 0
1134
- final_answer = None
1135
- error_count = 0
1136
- last_tool_call = None # Track the last tool call to prevent loops
1137
- last_error = None # Track the last error to handle it properly
1138
- previous_outputs = [] # Track previous tool outputs
1139
-
1140
- # Reset state management
1141
- self.execution_state = self.STATE_PLANNING
1142
- self.current_plan = None
1143
- self.current_step = 0
1144
- self.total_plan_steps = 0
1145
- self.plan_iterations = 0 # Reset plan iteration counter
1146
-
1147
- # Add user query to the conversation history
1148
- conversation.append({"role": "user", "content": user_input})
1149
- messages.append({"role": "user", "content": user_input})
1150
-
1151
- # Use provided max_steps or fall back to class default
1152
- steps_limit = max_steps if max_steps is not None else self.max_steps
1153
-
1154
- # Print initial message with max steps info
1155
- self.console.print_processing_start(user_input, steps_limit)
1156
- logger.debug(f"Using max_steps: {steps_limit}")
1157
-
1158
- prompt = f"User request: {user_input}\n\n"
1159
-
1160
- # Only add planning reminder in PLANNING state
1161
- if self.execution_state == self.STATE_PLANNING:
1162
- prompt += (
1163
- "IMPORTANT: ALWAYS BEGIN WITH A PLAN before executing any tools.\n"
1164
- "First create a detailed plan with all necessary steps, then execute the first step.\n"
1165
- "When creating a plan with multiple steps:\n"
1166
- " 1. ALWAYS follow the plan in the correct order, starting with the FIRST step.\n"
1167
- " 2. Include both a plan and a 'tool' field, the 'tool' field MUST match the tool in the first step of the plan.\n"
1168
- " 3. Create plans with clear, executable steps that include both the tool name and the exact arguments for each step.\n"
1169
- )
1170
-
1171
- logger.debug(f"Input prompt: {prompt[:200]}...")
1172
-
1173
- # Process the query in steps, allowing for multiple tool usages
1174
- while steps_taken < steps_limit and final_answer is None:
1175
- # Build the next prompt based on current state (this is for fallback mode only)
1176
- # In chat mode, we'll just add to messages array
1177
- steps_taken += 1
1178
- logger.debug(f"Step {steps_taken}/{steps_limit}")
1179
-
1180
- # Check if we're at the limit and ask user if they want to continue
1181
- if steps_taken == steps_limit and final_answer is None:
1182
- # Show what was accomplished
1183
- max_steps_msg = self._generate_max_steps_message(
1184
- conversation, steps_taken, steps_limit
1185
- )
1186
- self.console.print_warning(max_steps_msg)
1187
-
1188
- # Ask user if they want to continue (skip in silent mode OR if stdin is not available)
1189
- # IMPORTANT: Never call input() in API/CI contexts to avoid blocking threads
1190
- import sys
1191
-
1192
- has_stdin = sys.stdin and sys.stdin.isatty()
1193
- if has_stdin and not (
1194
- hasattr(self, "silent_mode") and self.silent_mode
1195
- ):
1196
- try:
1197
- response = (
1198
- input("\nContinue with 50 more steps? (y/n): ")
1199
- .strip()
1200
- .lower()
1201
- )
1202
- if response in ["y", "yes"]:
1203
- steps_limit += 50
1204
- self.console.print_info(
1205
- f"✓ Continuing with {steps_limit} total steps...\n"
1206
- )
1207
- else:
1208
- self.console.print_info("Stopping at user request.")
1209
- break
1210
- except (EOFError, KeyboardInterrupt):
1211
- self.console.print_info("\nStopping at user request.")
1212
- break
1213
- else:
1214
- # Silent mode - just stop
1215
- break
1216
-
1217
- # Display current step
1218
- self.console.print_step_header(steps_taken, steps_limit)
1219
-
1220
- # Skip automatic finalization for single-step plans - always request proper final answer
1221
-
1222
- # If we're executing a plan, we might not need to query the LLM again
1223
- if (
1224
- self.execution_state == self.STATE_EXECUTING_PLAN
1225
- and self.current_step < self.total_plan_steps
1226
- ):
1227
- logger.debug(
1228
- f"Executing plan step {self.current_step + 1}/{self.total_plan_steps}"
1229
- )
1230
- self.console.print_state_info(
1231
- f"EXECUTING PLAN: Step {self.current_step + 1}/{self.total_plan_steps}"
1232
- )
1233
-
1234
- # Display the current plan with the current step highlighted
1235
- if self.current_plan:
1236
- self.console.print_plan(self.current_plan, self.current_step)
1237
-
1238
- # Extract next step from plan
1239
- next_step = self.current_plan[self.current_step]
1240
-
1241
- if (
1242
- isinstance(next_step, dict)
1243
- and "tool" in next_step
1244
- and "tool_args" in next_step
1245
- ):
1246
- # We have a properly formatted step with tool and args
1247
- tool_name = next_step["tool"]
1248
- tool_args = next_step["tool_args"]
1249
-
1250
- # Create a parsed response structure as if it came from the LLM
1251
- parsed = {
1252
- "thought": f"Executing step {self.current_step + 1} of the plan",
1253
- "goal": f"Following the plan to {user_input}",
1254
- "tool": tool_name,
1255
- "tool_args": tool_args,
1256
- }
1257
-
1258
- # Add to conversation
1259
- conversation.append({"role": "assistant", "content": parsed})
1260
-
1261
- # Display the agent's reasoning for the step
1262
- self.console.print_thought(
1263
- parsed.get("thought", "Executing plan step")
1264
- )
1265
- self.console.print_goal(parsed.get("goal", "Following the plan"))
1266
-
1267
- # Display the tool call in real-time
1268
- self.console.print_tool_usage(tool_name)
1269
-
1270
- # Start progress indicator for tool execution
1271
- self.console.start_progress(f"Executing {tool_name}")
1272
-
1273
- # Execute the tool
1274
- tool_result = self._execute_tool(tool_name, tool_args)
1275
-
1276
- # Stop progress indicator
1277
- self.console.stop_progress()
1278
-
1279
- # Handle domain-specific post-processing
1280
- self._post_process_tool_result(tool_name, tool_args, tool_result)
1281
-
1282
- # Handle large tool results
1283
- truncated_result = self._handle_large_tool_result(
1284
- tool_name, tool_result, conversation, tool_args
1285
- )
1286
-
1287
- # Display the tool result in real-time (show full result to user)
1288
- self.console.print_tool_complete()
1289
-
1290
- self.console.pretty_print_json(tool_result, "Tool Result")
1291
-
1292
- # Store the truncated output for future context
1293
- previous_outputs.append(
1294
- {
1295
- "tool": tool_name,
1296
- "args": tool_args,
1297
- "result": truncated_result,
1298
- }
1299
- )
1300
-
1301
- # Share tool output with subsequent LLM calls
1302
- messages.append(
1303
- self._create_tool_message(tool_name, truncated_result)
1304
- )
1305
-
1306
- # Check for error (support multiple error formats)
1307
- is_error = isinstance(tool_result, dict) and (
1308
- tool_result.get("status") == "error" # Standard format
1309
- or tool_result.get("success")
1310
- is False # Tools returning success: false
1311
- or tool_result.get("has_errors") is True # CLI tools
1312
- or tool_result.get("return_code", 0) != 0 # Build failures
1313
- )
1314
-
1315
- if is_error:
1316
- error_count += 1
1317
- # Extract error message from various formats
1318
- # Prefer error_brief for logging (avoids duplicate formatted output)
1319
- last_error = (
1320
- tool_result.get("error_brief")
1321
- or tool_result.get("error")
1322
- or tool_result.get("stderr")
1323
- or tool_result.get("hint") # Many tools provide hints
1324
- or tool_result.get(
1325
- "suggested_fix"
1326
- ) # Some tools provide fix suggestions
1327
- or f"Command failed with return code {tool_result.get('return_code')}"
1328
- )
1329
- logger.warning(
1330
- f"Tool execution error in plan (count: {error_count}): {last_error}"
1331
- )
1332
- # Only print if error wasn't already displayed by _execute_tool
1333
- if not tool_result.get("error_displayed"):
1334
- self.console.print_error(last_error)
1335
-
1336
- # Switch to error recovery state
1337
- self.execution_state = self.STATE_ERROR_RECOVERY
1338
- self.console.print_state_info(
1339
- "ERROR RECOVERY: Handling tool execution failure"
1340
- )
1341
-
1342
- # Break out of plan execution to trigger error recovery prompt
1343
- continue
1344
- else:
1345
- # Success - move to next step in plan
1346
- self.current_step += 1
1347
-
1348
- # Check if we've completed the plan
1349
- if self.current_step >= self.total_plan_steps:
1350
- logger.debug("Plan execution completed")
1351
- self.execution_state = self.STATE_COMPLETION
1352
- self.console.print_state_info(
1353
- "COMPLETION: Plan fully executed"
1354
- )
1355
-
1356
- # Increment plan iteration counter
1357
- self.plan_iterations += 1
1358
- logger.debug(
1359
- f"Plan iteration {self.plan_iterations} completed"
1360
- )
1361
-
1362
- # Check if we've reached max plan iterations
1363
- reached_max_iterations = (
1364
- self.max_plan_iterations > 0
1365
- and self.plan_iterations >= self.max_plan_iterations
1366
- )
1367
-
1368
- # Prepare message for final answer with the completed plan context
1369
- plan_context = {
1370
- "completed_plan": self.current_plan,
1371
- "total_steps": self.total_plan_steps,
1372
- }
1373
- plan_context_raw = json.dumps(
1374
- plan_context, default=self._json_serialize_fallback
1375
- )
1376
- if len(plan_context_raw) > 20000:
1377
- plan_context_str = self._truncate_large_content(
1378
- plan_context, max_chars=20000
1379
- )
1380
- else:
1381
- plan_context_str = plan_context_raw
1382
-
1383
- if reached_max_iterations:
1384
- # Force final answer after max iterations
1385
- completion_message = (
1386
- f"Maximum plan iterations ({self.max_plan_iterations}) reached for task: {user_input}\n"
1387
- f"Task: {user_input}\n"
1388
- f"Plan information:\n{plan_context_str}\n\n"
1389
- f"IMPORTANT: You MUST now provide a final answer with an honest assessment:\n"
1390
- f"- Summarize what was successfully accomplished\n"
1391
- f"- Clearly state if anything remains incomplete or if errors occurred\n"
1392
- f"- If the task is fully complete, state that clearly\n\n"
1393
- f'Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1394
- )
1395
- else:
1396
- completion_message = (
1397
- "You have successfully completed all steps in the plan.\n"
1398
- f"Task: {user_input}\n"
1399
- f"Plan information:\n{plan_context_str}\n\n"
1400
- f"Plan iteration: {self.plan_iterations}/{self.max_plan_iterations if self.max_plan_iterations > 0 else 'unlimited'}\n"
1401
- "Check if more work is needed:\n"
1402
- "- If the task is complete and verified, provide a final answer\n"
1403
- "- If critical validation/testing is needed, you may create ONE more plan\n"
1404
- "- Only create additional plans if absolutely necessary\n\n"
1405
- 'If more work needed: Provide a NEW plan with {{"thought": "...", "goal": "...", "plan": [...]}}\n'
1406
- 'If everything is complete: Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1407
- )
1408
-
1409
- # Debug logging - only show if truncation happened
1410
- if self.debug and len(plan_context_raw) > 2000:
1411
- print(
1412
- "\n[DEBUG] Plan context truncated for completion message"
1413
- )
1414
-
1415
- # Add completion request to messages
1416
- messages.append(
1417
- {"role": "user", "content": completion_message}
1418
- )
1419
-
1420
- # Send the completion prompt to get final answer
1421
- self.console.print_state_info(
1422
- "COMPLETION: Requesting final answer"
1423
- )
1424
-
1425
- # Continue to next iteration to get final answer
1426
- continue
1427
- else:
1428
- # Continue with next step - no need to query LLM again
1429
- continue
1430
- else:
1431
- # Plan step doesn't have proper format, fall back to LLM
1432
- logger.warning(
1433
- f"Plan step {self.current_step + 1} doesn't have proper format: {next_step}"
1434
- )
1435
- self.console.print_warning(
1436
- f"Plan step {self.current_step + 1} format incorrect, asking LLM for guidance"
1437
- )
1438
- prompt = (
1439
- f"You are following a plan but step {self.current_step + 1} doesn't have proper format: {next_step}\n"
1440
- "Please interpret this step and decide what tool to use next.\n\n"
1441
- f"Task: {user_input}\n\n"
1442
- )
1443
- else:
1444
- # Normal execution flow - query the LLM
1445
- if self.execution_state == self.STATE_DIRECT_EXECUTION:
1446
- self.console.print_state_info("DIRECT EXECUTION: Analyzing task")
1447
- elif self.execution_state == self.STATE_PLANNING:
1448
- self.console.print_state_info("PLANNING: Creating or refining plan")
1449
- elif self.execution_state == self.STATE_ERROR_RECOVERY:
1450
- self.console.print_state_info(
1451
- "ERROR RECOVERY: Handling previous error"
1452
- )
1453
-
1454
- # Truncate previous outputs if too large to avoid overwhelming the LLM
1455
- truncated_outputs = (
1456
- self._truncate_large_content(previous_outputs, max_chars=500)
1457
- if previous_outputs
1458
- else "None"
1459
- )
1460
-
1461
- # Create a specific error recovery prompt
1462
- prompt = (
1463
- "TOOL EXECUTION FAILED!\n\n"
1464
- f"You were trying to execute: {last_tool_call[0] if last_tool_call else 'unknown tool'}\n"
1465
- f"Error: {last_error}\n\n"
1466
- f"Original task: {user_input}\n\n"
1467
- f"Current plan step {self.current_step + 1}/{self.total_plan_steps} failed.\n"
1468
- f"Current plan: {self.current_plan}\n\n"
1469
- f"Previous successful outputs: {truncated_outputs}\n\n"
1470
- "INSTRUCTIONS:\n"
1471
- "1. Analyze the error and understand what went wrong\n"
1472
- "2. Create a NEW corrected plan that fixes the error\n"
1473
- "3. Make sure to use correct tool parameters (check the available tools)\n"
1474
- "4. Start executing the corrected plan\n\n"
1475
- "Respond with your analysis, a corrected plan, and the first tool to execute."
1476
- )
1477
-
1478
- # Add the error recovery prompt to the messages array so it gets sent to LLM
1479
- messages.append({"role": "user", "content": prompt})
1480
-
1481
- # Reset state to planning after creating recovery prompt
1482
- self.execution_state = self.STATE_PLANNING
1483
- self.current_plan = None
1484
- self.current_step = 0
1485
- self.total_plan_steps = 0
1486
-
1487
- elif self.execution_state == self.STATE_COMPLETION:
1488
- self.console.print_state_info("COMPLETION: Finalizing response")
1489
-
1490
- # Print the prompt if show_prompts is enabled (separate from debug_prompts)
1491
- if self.show_prompts:
1492
- # Build context from system prompt and messages
1493
- context_parts = [
1494
- (
1495
- f"SYSTEM: {self.system_prompt[:200]}..."
1496
- if len(self.system_prompt) > 200
1497
- else f"SYSTEM: {self.system_prompt}"
1498
- )
1499
- ]
1500
-
1501
- for msg in messages:
1502
- role = msg.get("role", "user").upper()
1503
- content = str(msg.get("content", ""))[:150]
1504
- context_parts.append(
1505
- f"{role}: {content}{'...' if len(str(msg.get('content', ''))) > 150 else ''}"
1506
- )
1507
-
1508
- if not messages and prompt:
1509
- context_parts.append(
1510
- f"USER: {prompt[:150]}{'...' if len(prompt) > 150 else ''}"
1511
- )
1512
-
1513
- self.console.print_prompt("\n".join(context_parts), "LLM Context")
1514
-
1515
- # Handle streaming or non-streaming LLM response
1516
- # Initialize response_stats so it's always in scope
1517
- response_stats = None
1518
-
1519
- if self.streaming:
1520
- # Streaming mode - raw response will be streamed
1521
- # (SilentConsole will suppress this, AgentConsole will show it)
1522
-
1523
- # Add prompt to conversation if debug is enabled
1524
- if self.debug_prompts:
1525
- conversation.append(
1526
- {"role": "system", "content": {"prompt": prompt}}
1527
- )
1528
- # Print the prompt if show_prompts is enabled
1529
- if self.show_prompts:
1530
- self.console.print_prompt(
1531
- prompt, f"Prompt (Step {steps_taken})"
1532
- )
1533
-
1534
- # Get streaming response from ChatSDK with proper conversation history
1535
- try:
1536
- response_stream = self.chat.send_messages_stream(
1537
- messages=messages, system_prompt=self.system_prompt
1538
- )
1539
-
1540
- # Process the streaming response chunks as they arrive
1541
- full_response = ""
1542
- for chunk_response in response_stream:
1543
- if chunk_response.is_complete:
1544
- response_stats = chunk_response.stats
1545
- else:
1546
- self.console.print_streaming_text(chunk_response.text)
1547
- full_response += chunk_response.text
1548
-
1549
- self.console.print_streaming_text("", end_of_stream=True)
1550
- response = full_response
1551
- except ConnectionError as e:
1552
- # Handle LLM server connection errors specifically
1553
- error_msg = f"LLM Server Connection Failed (streaming): {str(e)}"
1554
- logger.error(error_msg)
1555
- self.console.print_error(error_msg)
1556
-
1557
- # Add error to history
1558
- self.error_history.append(
1559
- {
1560
- "step": steps_taken,
1561
- "error": error_msg,
1562
- "type": "llm_connection_error",
1563
- }
1564
- )
1565
-
1566
- # Return error response
1567
- final_answer = (
1568
- f"Unable to complete task due to LLM server error: {str(e)}"
1569
- )
1570
- break
1571
- except Exception as e:
1572
- logger.error(f"Unexpected error during streaming: {e}")
1573
-
1574
- # Add to error history
1575
- self.error_history.append(
1576
- {
1577
- "step": steps_taken,
1578
- "error": str(e),
1579
- "type": "llm_streaming_error",
1580
- }
1581
- )
1582
-
1583
- # Return error response
1584
- final_answer = (
1585
- f"Unable to complete task due to streaming error: {str(e)}"
1586
- )
1587
- break
1588
- else:
1589
- # Use progress indicator for non-streaming mode
1590
- self.console.start_progress("Thinking")
1591
-
1592
- # Debug logging before LLM call
1593
- if self.debug:
1594
-
1595
- print(f"\n[DEBUG] About to call LLM with {len(messages)} messages")
1596
- print(
1597
- f"[DEBUG] Last message role: {messages[-1]['role'] if messages else 'No messages'}"
1598
- )
1599
- if messages and len(messages[-1].get("content", "")) < 500:
1600
- print(
1601
- f"[DEBUG] Last message content: {messages[-1]['content']}"
1602
- )
1603
- else:
1604
- print(
1605
- f"[DEBUG] Last message content length: {len(messages[-1].get('content', ''))}"
1606
- )
1607
- print(f"[DEBUG] Execution state: {self.execution_state}")
1608
- if self.execution_state == "PLANNING":
1609
- print("[DEBUG] Current step: Planning (no active plan yet)")
1610
- else:
1611
- print(
1612
- f"[DEBUG] Current step: {self.current_step}/{self.total_plan_steps}"
1613
- )
1614
-
1615
- # Get complete response from ChatSDK
1616
- try:
1617
- chat_response = self.chat.send_messages(
1618
- messages=messages, system_prompt=self.system_prompt
1619
- )
1620
- response = chat_response.text
1621
- response_stats = chat_response.stats
1622
- except ConnectionError as e:
1623
- error_msg = f"LLM Server Connection Failed: {str(e)}"
1624
- logger.error(error_msg)
1625
- self.console.print_error(error_msg)
1626
-
1627
- # Add error to history and update state
1628
- self.error_history.append(
1629
- {
1630
- "step": steps_taken,
1631
- "error": error_msg,
1632
- "type": "llm_connection_error",
1633
- }
1634
- )
1635
-
1636
- # Return error response
1637
- final_answer = (
1638
- f"Unable to complete task due to LLM server error: {str(e)}"
1639
- )
1640
- break
1641
- except Exception as e:
1642
- if self.debug:
1643
- print(f"[DEBUG] Error calling LLM: {e}")
1644
- logger.error(f"Unexpected error calling LLM: {e}")
1645
-
1646
- # Add to error history
1647
- self.error_history.append(
1648
- {"step": steps_taken, "error": str(e), "type": "llm_error"}
1649
- )
1650
-
1651
- # Return error response
1652
- final_answer = f"Unable to complete task due to error: {str(e)}"
1653
- break
1654
-
1655
- # Stop the progress indicator
1656
- self.console.stop_progress()
1657
-
1658
- # Print the LLM response to the console
1659
- logger.debug(f"LLM response: {response[:200]}...")
1660
- if self.show_prompts:
1661
- self.console.print_response(response, "LLM Response")
1662
-
1663
- # Parse the response
1664
- parsed = self._parse_llm_response(response)
1665
- logger.debug(f"Parsed response: {parsed}")
1666
- conversation.append({"role": "assistant", "content": parsed})
1667
-
1668
- # Add assistant response to messages for chat history
1669
- messages.append({"role": "assistant", "content": response})
1670
-
1671
- # Validate the response has a plan if required
1672
- self._validate_plan_required(parsed, steps_taken)
1673
-
1674
- # If the LLM needs to create a plan first, re-prompt it specifically for that
1675
- if "needs_plan" in parsed and parsed["needs_plan"]:
1676
- # Prepare a special prompt that specifically requests a plan
1677
- deferred_tool = parsed.get("deferred_tool", None)
1678
- deferred_args = parsed.get("deferred_tool_args", {})
1679
-
1680
- plan_prompt = (
1681
- "You MUST create a detailed plan first before taking any action.\n\n"
1682
- f"User request: {user_input}\n\n"
1683
- )
1684
-
1685
- if deferred_tool:
1686
- plan_prompt += (
1687
- f"You initially wanted to use the {deferred_tool} tool with these arguments:\n"
1688
- f"{json.dumps(deferred_args, indent=2, default=self._json_serialize_fallback)}\n\n"
1689
- "However, you MUST first create a plan. Please create a plan that includes this tool usage as a step.\n\n"
1690
- )
1691
-
1692
- plan_prompt += (
1693
- "Create a detailed plan with all necessary steps in JSON format, including exact tool names and arguments.\n"
1694
- "Respond with your reasoning, plan, and the first tool to use."
1695
- )
1696
-
1697
- # Store the plan prompt in conversation if debug is enabled
1698
- if self.debug_prompts:
1699
- conversation.append(
1700
- {"role": "system", "content": {"prompt": plan_prompt}}
1701
- )
1702
- if self.show_prompts:
1703
- self.console.print_prompt(plan_prompt, "Plan Request Prompt")
1704
-
1705
- # Notify the user we're asking for a plan
1706
- self.console.print_info("Requesting a detailed plan before proceeding")
1707
-
1708
- # Get the planning response
1709
- if self.streaming:
1710
- # Add prompt to conversation if debug is enabled
1711
- if self.debug_prompts:
1712
- conversation.append(
1713
- {"role": "system", "content": {"prompt": plan_prompt}}
1714
- )
1715
- # Print the prompt if show_prompts is enabled
1716
- if self.show_prompts:
1717
- self.console.print_prompt(
1718
- plan_prompt, f"Prompt (Step {steps_taken})"
1719
- )
1720
-
1721
- # Handle streaming as before
1722
- full_response = ""
1723
- # Add plan request to messages
1724
- messages.append({"role": "user", "content": plan_prompt})
1725
-
1726
- # Use ChatSDK for streaming plan response
1727
- stream_gen = self.chat.send_messages_stream(
1728
- messages=messages, system_prompt=self.system_prompt
1729
- )
1730
-
1731
- for chunk_response in stream_gen:
1732
- if not chunk_response.is_complete:
1733
- chunk = chunk_response.text
1734
- if hasattr(self.console, "print_streaming_text"):
1735
- self.console.print_streaming_text(chunk)
1736
- else:
1737
- print(chunk, end="", flush=True)
1738
- full_response += chunk
1739
-
1740
- if hasattr(self.console, "print_streaming_text"):
1741
- self.console.print_streaming_text("", end_of_stream=True)
1742
- else:
1743
- print("", flush=True)
1744
-
1745
- plan_response = full_response
1746
- else:
1747
- # Use progress indicator for non-streaming mode
1748
- self.console.start_progress("Creating plan")
1749
-
1750
- # Store the plan prompt in conversation if debug is enabled
1751
- if self.debug_prompts:
1752
- conversation.append(
1753
- {"role": "system", "content": {"prompt": plan_prompt}}
1754
- )
1755
- if self.show_prompts:
1756
- self.console.print_prompt(
1757
- plan_prompt, "Plan Request Prompt"
1758
- )
1759
-
1760
- # Add plan request to messages
1761
- messages.append({"role": "user", "content": plan_prompt})
1762
-
1763
- # Use ChatSDK for non-streaming plan response
1764
- chat_response = self.chat.send_messages(
1765
- messages=messages, system_prompt=self.system_prompt
1766
- )
1767
- plan_response = chat_response.text
1768
- self.console.stop_progress()
1769
-
1770
- # Parse the plan response
1771
- parsed_plan = self._parse_llm_response(plan_response)
1772
- logger.debug(f"Parsed plan response: {parsed_plan}")
1773
- conversation.append({"role": "assistant", "content": parsed_plan})
1774
-
1775
- # Add plan response to messages for chat history
1776
- messages.append({"role": "assistant", "content": plan_response})
1777
-
1778
- # Display the agent's reasoning for the plan
1779
- self.console.print_thought(parsed_plan.get("thought", "Creating plan"))
1780
- self.console.print_goal(parsed_plan.get("goal", "Planning for task"))
1781
-
1782
- # Set the parsed response to the new plan for further processing
1783
- parsed = parsed_plan
1784
- else:
1785
- # Display the agent's reasoning in real-time (only if provided)
1786
- # Skip if we just displayed thought/goal for a plan request above
1787
- thought = parsed.get("thought", "").strip()
1788
- goal = parsed.get("goal", "").strip()
1789
-
1790
- if thought and thought != "No explicit reasoning provided":
1791
- self.console.print_thought(thought)
1792
-
1793
- if goal and goal != "No explicit goal provided":
1794
- self.console.print_goal(goal)
1795
-
1796
- # Process plan if available
1797
- if "plan" in parsed:
1798
- # Validate that plan is actually a list, not a string or other type
1799
- if not isinstance(parsed["plan"], list):
1800
- logger.error(
1801
- f"Invalid plan format: expected list, got {type(parsed['plan']).__name__}. "
1802
- f"Plan content: {parsed['plan']}"
1803
- )
1804
- self.console.print_error(
1805
- f"LLM returned invalid plan format (expected array, got {type(parsed['plan']).__name__}). "
1806
- "Asking for correction..."
1807
- )
1808
-
1809
- # Create error recovery prompt
1810
- error_msg = (
1811
- "ERROR: You provided a plan in the wrong format.\n"
1812
- "Expected: an array of step objects\n"
1813
- f"You provided: {type(parsed['plan']).__name__}\n\n"
1814
- "The correct format is:\n"
1815
- f'{{"plan": [{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}]}}\n\n'
1816
- f"Please create a proper plan as an array of step objects for: {user_input}"
1817
- )
1818
- messages.append({"role": "user", "content": error_msg})
1819
-
1820
- # Continue to next iteration to get corrected plan
1821
- continue
1822
-
1823
- # Validate that plan items are dictionaries with required fields
1824
- invalid_steps = []
1825
- for i, step in enumerate(parsed["plan"]):
1826
- if not isinstance(step, dict):
1827
- invalid_steps.append((i, type(step).__name__, step))
1828
- elif "tool" not in step or "tool_args" not in step:
1829
- invalid_steps.append((i, "missing fields", step))
1830
-
1831
- if invalid_steps:
1832
- logger.error(f"Invalid plan steps found: {invalid_steps}")
1833
- self.console.print_error(
1834
- f"Plan contains {len(invalid_steps)} invalid step(s). Asking for correction..."
1835
- )
1836
-
1837
- # Create detailed error message
1838
- error_details = "\n".join(
1839
- [
1840
- f"Step {i+1}: {issue} - {step}"
1841
- for i, issue, step in invalid_steps[
1842
- :3
1843
- ] # Show first 3 errors
1844
- ]
1845
- )
1846
-
1847
- error_msg = (
1848
- f"ERROR: Your plan contains invalid steps:\n{error_details}\n\n"
1849
- f"Each step must be a dictionary with 'tool' and 'tool_args' fields:\n"
1850
- f'{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}\n\n'
1851
- f"Please create a corrected plan for: {user_input}"
1852
- )
1853
- messages.append({"role": "user", "content": error_msg})
1854
-
1855
- # Continue to next iteration to get corrected plan
1856
- continue
1857
-
1858
- # Plan is valid - proceed with execution
1859
- self.current_plan = parsed["plan"]
1860
- self.current_step = 0
1861
- self.total_plan_steps = len(self.current_plan)
1862
- self.execution_state = self.STATE_EXECUTING_PLAN
1863
- logger.debug(
1864
- f"New plan created with {self.total_plan_steps} steps: {self.current_plan}"
1865
- )
1866
-
1867
- # If the response contains a tool call, execute it
1868
- if "tool" in parsed and "tool_args" in parsed:
1869
-
1870
- # Display the current plan with the current step highlighted
1871
- if self.current_plan:
1872
- self.console.print_plan(self.current_plan, self.current_step)
1873
-
1874
- # When both plan and tool are present, prioritize the plan execution
1875
- # If we have a plan, we should execute from the plan, not the standalone tool call
1876
- if "plan" in parsed and self.current_plan and self.total_plan_steps > 0:
1877
- # Skip the standalone tool execution and let the plan execution handle it
1878
- # The plan execution logic will handle this in the next iteration
1879
- logger.debug(
1880
- "Plan and tool both present - deferring to plan execution logic"
1881
- )
1882
- continue # Skip tool execution, let plan execution handle it
1883
-
1884
- # If this was a single-step plan, mark as completed after tool execution
1885
- if self.total_plan_steps == 1:
1886
- logger.debug(
1887
- "Single-step plan will be marked completed after tool execution"
1888
- )
1889
- self.execution_state = self.STATE_COMPLETION
1890
-
1891
- tool_name = parsed["tool"]
1892
- tool_args = parsed["tool_args"]
1893
- logger.debug(f"Tool call detected: {tool_name} with args {tool_args}")
1894
-
1895
- # Display the tool call in real-time
1896
- self.console.print_tool_usage(tool_name)
1897
-
1898
- if tool_args:
1899
- self.console.pretty_print_json(tool_args, "Arguments")
1900
-
1901
- # Start progress indicator for tool execution
1902
- self.console.start_progress(f"Executing {tool_name}")
1903
-
1904
- # Check for repeated tool calls
1905
- if last_tool_call == (tool_name, str(tool_args)):
1906
- # Stop progress indicator
1907
- self.console.stop_progress()
1908
-
1909
- logger.warning(f"Detected repeated tool call: {tool_name}")
1910
- # Force a final answer if the same tool is called repeatedly
1911
- final_answer = (
1912
- f"Task completed with {tool_name}. No further action needed."
1913
- )
1914
-
1915
- self.console.print_repeated_tool_warning()
1916
- break
1917
-
1918
- # Execute the tool
1919
- tool_result = self._execute_tool(tool_name, tool_args)
1920
-
1921
- # Stop progress indicator
1922
- self.console.stop_progress()
1923
-
1924
- # Handle domain-specific post-processing
1925
- self._post_process_tool_result(tool_name, tool_args, tool_result)
1926
-
1927
- # Handle large tool results
1928
- truncated_result = self._handle_large_tool_result(
1929
- tool_name, tool_result, conversation, tool_args
1930
- )
1931
-
1932
- # Display the tool result in real-time (show full result to user)
1933
- self.console.print_tool_complete()
1934
-
1935
- self.console.pretty_print_json(tool_result, "Result")
1936
-
1937
- # Store the truncated output for future context
1938
- previous_outputs.append(
1939
- {"tool": tool_name, "args": tool_args, "result": truncated_result}
1940
- )
1941
-
1942
- # Share tool output with subsequent LLM calls
1943
- messages.append(self._create_tool_message(tool_name, truncated_result))
1944
-
1945
- # Update last tool call
1946
- last_tool_call = (tool_name, str(tool_args))
1947
-
1948
- # For single-step plans, we still need to let the LLM process the result
1949
- # This is especially important for RAG queries where the LLM needs to
1950
- # synthesize the retrieved information into a coherent answer
1951
- if (
1952
- self.execution_state == self.STATE_COMPLETION
1953
- and self.total_plan_steps == 1
1954
- ):
1955
- logger.debug(
1956
- "Single-step plan execution completed, requesting final answer from LLM"
1957
- )
1958
- # Don't break here - let the loop continue so the LLM can process the tool result
1959
- # The tool result has already been added to messages, so the next iteration
1960
- # will call the LLM with that result
1961
-
1962
- # Check if tool execution resulted in an error (support multiple error formats)
1963
- is_error = isinstance(tool_result, dict) and (
1964
- tool_result.get("status") == "error"
1965
- or tool_result.get("success") is False
1966
- or tool_result.get("has_errors") is True
1967
- or tool_result.get("return_code", 0) != 0
1968
- )
1969
- if is_error:
1970
- error_count += 1
1971
- # Prefer error_brief for logging (avoids duplicate formatted output)
1972
- last_error = (
1973
- tool_result.get("error_brief")
1974
- or tool_result.get("error")
1975
- or tool_result.get("stderr")
1976
- or tool_result.get("hint")
1977
- or tool_result.get("suggested_fix")
1978
- or f"Command failed with return code {tool_result.get('return_code')}"
1979
- )
1980
- logger.warning(
1981
- f"Tool execution error in plan (count: {error_count}): {last_error}"
1982
- )
1983
- # Only print if error wasn't already displayed by _execute_tool
1984
- if not tool_result.get("error_displayed"):
1985
- self.console.print_error(last_error)
1986
-
1987
- # Switch to error recovery state
1988
- self.execution_state = self.STATE_ERROR_RECOVERY
1989
- self.console.print_state_info(
1990
- "ERROR RECOVERY: Handling tool execution failure"
1991
- )
1992
-
1993
- # Break out of tool execution to trigger error recovery prompt
1994
- continue
1995
-
1996
- # Collect and store performance stats for token tracking
1997
- # Do this BEFORE checking for final answer so stats are always collected
1998
- perf_stats = response_stats or self.chat.get_stats()
1999
- if perf_stats:
2000
- conversation.append(
2001
- {
2002
- "role": "system",
2003
- "content": {
2004
- "type": "stats",
2005
- "step": steps_taken,
2006
- "performance_stats": perf_stats,
2007
- },
2008
- }
2009
- )
2010
-
2011
- # Check for final answer (after collecting stats)
2012
- if "answer" in parsed:
2013
- final_answer = parsed["answer"]
2014
- self.execution_state = self.STATE_COMPLETION
2015
- self.console.print_final_answer(final_answer, streaming=self.streaming)
2016
- break
2017
-
2018
- # Validate plan required
2019
- self._validate_plan_required(parsed, steps_taken)
2020
-
2021
- # Print completion message
2022
- self.console.print_completion(steps_taken, steps_limit)
2023
-
2024
- # Calculate total duration
2025
- total_duration = time.time() - start_time
2026
-
2027
- # Aggregate token counts from conversation stats
2028
- total_input_tokens = 0
2029
- total_output_tokens = 0
2030
- for entry in conversation:
2031
- if entry.get("role") == "system" and isinstance(entry.get("content"), dict):
2032
- content = entry["content"]
2033
- if content.get("type") == "stats" and "performance_stats" in content:
2034
- stats = content["performance_stats"]
2035
- if stats.get("input_tokens") is not None:
2036
- total_input_tokens += stats["input_tokens"]
2037
- if stats.get("output_tokens") is not None:
2038
- total_output_tokens += stats["output_tokens"]
2039
-
2040
- # Return the result
2041
- has_errors = len(self.error_history) > 0
2042
- has_valid_answer = (
2043
- final_answer and final_answer.strip()
2044
- ) # Check for non-empty answer
2045
- result = {
2046
- "status": (
2047
- "success"
2048
- if has_valid_answer and not has_errors
2049
- else ("failed" if has_errors else "incomplete")
2050
- ),
2051
- "result": (
2052
- final_answer
2053
- if final_answer
2054
- else self._generate_max_steps_message(
2055
- conversation, steps_taken, steps_limit
2056
- )
2057
- ),
2058
- "system_prompt": self.system_prompt, # Include system prompt in the result
2059
- "conversation": conversation,
2060
- "steps_taken": steps_taken,
2061
- "duration": total_duration, # Total query processing time in seconds
2062
- "input_tokens": total_input_tokens, # Total input tokens across all steps
2063
- "output_tokens": total_output_tokens, # Total output tokens across all steps
2064
- "total_tokens": total_input_tokens
2065
- + total_output_tokens, # Combined token count
2066
- "error_count": len(self.error_history),
2067
- "error_history": self.error_history, # Include the full error history
2068
- }
2069
-
2070
- # Write trace to file if requested
2071
- if trace:
2072
- file_path = self._write_json_to_file(result, filename)
2073
- result["output_file"] = file_path
2074
-
2075
- logger.debug(f"Query processing complete: {result}")
2076
-
2077
- # Store the result internally
2078
- self.last_result = result
2079
-
2080
- return result
2081
-
2082
- def _post_process_tool_result(
2083
- self, _tool_name: str, _tool_args: Dict[str, Any], _tool_result: Dict[str, Any]
2084
- ) -> None:
2085
- """
2086
- Post-process the tool result for domain-specific handling.
2087
- Override this in subclasses to provide domain-specific behavior.
2088
-
2089
- Args:
2090
- _tool_name: Name of the tool that was executed
2091
- _tool_args: Arguments that were passed to the tool
2092
- _tool_result: Result returned by the tool
2093
- """
2094
- ...
2095
-
2096
- def display_result(
2097
- self,
2098
- title: str = "Result",
2099
- result: Dict[str, Any] = None,
2100
- print_result: bool = False,
2101
- ) -> None:
2102
- """
2103
- Display the result and output file path information.
2104
-
2105
- Args:
2106
- title: Optional title for the result panel
2107
- result: Optional result dictionary to display. If None, uses the last stored result.
2108
- print_result: If True, print the result to the console
2109
- """
2110
- # Use the provided result or fall back to the last stored result
2111
- display_result = result if result is not None else self.last_result
2112
-
2113
- if display_result is None:
2114
- self.console.print_warning("No result available to display.")
2115
- return
2116
-
2117
- # Print the full result with syntax highlighting
2118
- if print_result:
2119
- self.console.pretty_print_json(display_result, title)
2120
-
2121
- # If there's an output file, display its path after the result
2122
- if "output_file" in display_result:
2123
- self.console.print_info(
2124
- f"Output written to: {display_result['output_file']}"
2125
- )
2126
-
2127
- def get_error_history(self) -> List[str]:
2128
- """
2129
- Get the history of errors encountered by the agent.
2130
-
2131
- Returns:
2132
- List of error messages
2133
- """
2134
- return self.error_history
2135
-
2136
- def _validate_plan_required(self, parsed: Dict[str, Any], step: int) -> None:
2137
- """
2138
- Validate that the response includes a plan when required by the agent.
2139
-
2140
- Args:
2141
- parsed: The parsed response from the LLM
2142
- step: The current step number
2143
- """
2144
- # Skip validation if we're not in planning mode or if we're already executing a plan
2145
- if self.execution_state != self.STATE_PLANNING or self.current_plan is not None:
2146
- return
2147
-
2148
- # Allow simple single-tool operations without requiring a plan
2149
- if "tool" in parsed and step == 1:
2150
- tool_name = parsed.get("tool", "")
2151
- # List of tools that can execute directly without a plan
2152
- simple_tools = self.SIMPLE_TOOLS
2153
- if tool_name in simple_tools:
2154
- logger.debug(f"Allowing direct execution of simple tool: {tool_name}")
2155
- return
2156
-
2157
- # Check if plan is missing on the first step
2158
- # BUT: Allow direct answers without plans (for simple conversational queries)
2159
- if "plan" not in parsed and "answer" not in parsed and step == 1:
2160
- warning_msg = f"No plan found in step {step} response. The agent should create a plan for all tasks."
2161
- logger.warning(warning_msg)
2162
- self.console.print_warning(warning_msg)
2163
-
2164
- # For the first step, we'll add a flag to indicate we need to re-prompt for a plan
2165
- parsed["needs_plan"] = True
2166
-
2167
- # If there's a tool in the response, store it but don't execute it yet
2168
- if "tool" in parsed:
2169
- parsed["deferred_tool"] = parsed["tool"]
2170
- parsed["deferred_tool_args"] = parsed.get("tool_args", {})
2171
- # Remove the tool so it won't be executed
2172
- del parsed["tool"]
2173
- if "tool_args" in parsed:
2174
- del parsed["tool_args"]
2175
-
2176
- # Set state to indicate we need planning
2177
- self.execution_state = self.STATE_PLANNING
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ Generic Agent class for building domain-specific agents.
5
+ """
6
+
7
+ # Standard library imports
8
+ import abc
9
+ import datetime
10
+ import inspect
11
+ import json
12
+ import logging
13
+ import os
14
+ import re
15
+ import subprocess
16
+ import uuid
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from gaia.agents.base.console import AgentConsole, SilentConsole
20
+ from gaia.agents.base.errors import format_execution_trace
21
+ from gaia.agents.base.tools import _TOOL_REGISTRY
22
+
23
+ # First-party imports
24
+ from gaia.chat.sdk import ChatConfig, ChatSDK
25
+
26
+ # Set up logging
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Content truncation thresholds
31
+ CHUNK_TRUNCATION_THRESHOLD = 5000
32
+ CHUNK_TRUNCATION_SIZE = 2500
33
+
34
+
35
+ class Agent(abc.ABC):
36
+ """
37
+ Base Agent class that provides core functionality for domain-specific agents.
38
+
39
+ The Agent class handles the core conversation loop, tool execution, and LLM
40
+ interaction patterns. It provides:
41
+ - Conversation management with an LLM
42
+ - Tool registration and execution framework
43
+ - JSON response parsing and validation
44
+ - Error handling and recovery
45
+ - State management for multi-step plans
46
+ - Output formatting and file writing
47
+ - Configurable prompt display for debugging
48
+
49
+ Key Parameters:
50
+ debug: Enable general debug output and logging
51
+ show_prompts: Display prompts sent to LLM (useful for debugging prompts)
52
+ debug_prompts: Include prompts in conversation history for analysis
53
+ streaming: Enable real-time streaming of LLM responses
54
+ silent_mode: Suppress all console output for JSON-only usage
55
+ """
56
+
57
+ # Define state constants
58
+ STATE_PLANNING = "PLANNING"
59
+ STATE_EXECUTING_PLAN = "EXECUTING_PLAN"
60
+ STATE_DIRECT_EXECUTION = "DIRECT_EXECUTION"
61
+ STATE_ERROR_RECOVERY = "ERROR_RECOVERY"
62
+ STATE_COMPLETION = "COMPLETION"
63
+
64
+ def __init__(
65
+ self,
66
+ use_claude: bool = False,
67
+ use_chatgpt: bool = False,
68
+ claude_model: str = "claude-sonnet-4-20250514",
69
+ base_url: Optional[str] = None,
70
+ model_id: str = None,
71
+ max_steps: int = 5,
72
+ debug_prompts: bool = False,
73
+ show_prompts: bool = False,
74
+ output_dir: str = None,
75
+ streaming: bool = False,
76
+ show_stats: bool = False,
77
+ silent_mode: bool = False,
78
+ debug: bool = False,
79
+ output_handler=None,
80
+ max_plan_iterations: int = 3,
81
+ min_context_size: int = 32768,
82
+ skip_lemonade: bool = False,
83
+ ):
84
+ """
85
+ Initialize the Agent with LLM client.
86
+
87
+ Args:
88
+ use_claude: If True, uses Claude API (default: False)
89
+ use_chatgpt: If True, uses ChatGPT/OpenAI API (default: False)
90
+ claude_model: Claude model to use when use_claude=True (default: "claude-sonnet-4-20250514")
91
+ base_url: Base URL for local LLM server (default: reads from LEMONADE_BASE_URL env var, falls back to http://localhost:8000/api/v1)
92
+ model_id: The ID of the model to use with LLM server (default for local)
93
+ max_steps: Maximum number of steps the agent can take before terminating
94
+ debug_prompts: If True, includes prompts in the conversation history
95
+ show_prompts: If True, displays prompts sent to LLM in console (default: False)
96
+ output_dir: Directory for storing JSON output files (default: current directory)
97
+ streaming: If True, enables real-time streaming of LLM responses (default: False)
98
+ show_stats: If True, displays LLM performance stats after each response (default: False)
99
+ silent_mode: If True, suppresses all console output for JSON-only usage (default: False)
100
+ debug: If True, enables debug output for troubleshooting (default: False)
101
+ output_handler: Custom OutputHandler for displaying agent output (default: None, creates console based on silent_mode)
102
+ max_plan_iterations: Maximum number of plan-execute-replan cycles (default: 3, 0 = unlimited)
103
+ min_context_size: Minimum context size required for this agent (default: 32768).
104
+ skip_lemonade: If True, skip Lemonade server initialization (default: False).
105
+ Use this when connecting to a different OpenAI-compatible backend.
106
+
107
+ Note: Uses local LLM server by default unless use_claude or use_chatgpt is True.
108
+ """
109
+ self.error_history = [] # Store error history for learning
110
+ self.conversation_history = (
111
+ []
112
+ ) # Store conversation history for session persistence
113
+ self.max_steps = max_steps
114
+ self.debug_prompts = debug_prompts
115
+ self.show_prompts = show_prompts # Separate flag for displaying prompts
116
+ self.output_dir = output_dir if output_dir else os.getcwd()
117
+ self.streaming = streaming
118
+ self.show_stats = show_stats
119
+ self.silent_mode = silent_mode
120
+ self.debug = debug
121
+ self.last_result = None # Store the most recent result
122
+ self.max_plan_iterations = max_plan_iterations
123
+ self._current_query: Optional[str] = (
124
+ None # Store current query for error context
125
+ )
126
+
127
+ # Read base_url from environment if not provided
128
+ if base_url is None:
129
+ base_url = os.getenv("LEMONADE_BASE_URL", "http://localhost:8000/api/v1")
130
+
131
+ # Lazy Lemonade initialization for local LLM users
132
+ # This ensures Lemonade server is running before we try to use it
133
+ if not (use_claude or use_chatgpt or skip_lemonade):
134
+ from gaia.llm.lemonade_manager import LemonadeManager
135
+
136
+ LemonadeManager.ensure_ready(
137
+ min_context_size=min_context_size,
138
+ quiet=silent_mode,
139
+ base_url=base_url,
140
+ )
141
+
142
+ # Initialize state management
143
+ self.execution_state = self.STATE_PLANNING
144
+ self.current_plan = None
145
+ self.current_step = 0
146
+ self.total_plan_steps = 0
147
+ self.plan_iterations = 0 # Track number of plan cycles
148
+
149
+ # Initialize the console/output handler for display
150
+ # If output_handler is provided, use it; otherwise create based on silent_mode
151
+ if output_handler is not None:
152
+ self.console = output_handler
153
+ else:
154
+ self.console = self._create_console()
155
+
156
+ # Initialize LLM client for local model
157
+ self.system_prompt = self._get_system_prompt()
158
+
159
+ # Register tools for this agent
160
+ self._register_tools()
161
+
162
+ # Update system prompt with available tools and response format
163
+ tools_description = self._format_tools_for_prompt()
164
+ self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n"
165
+
166
+ # Add JSON response format instructions (shared across all agents)
167
+ self.system_prompt += """
168
+ ==== RESPONSE FORMAT ====
169
+ You must respond ONLY in valid JSON. No text before { or after }.
170
+
171
+ **To call a tool:**
172
+ {"thought": "reasoning", "goal": "objective", "tool": "tool_name", "tool_args": {"arg1": "value1"}}
173
+
174
+ **To create a multi-step plan:**
175
+ {
176
+ "thought": "reasoning",
177
+ "goal": "objective",
178
+ "plan": [
179
+ {"tool": "tool1", "tool_args": {"arg": "val"}},
180
+ {"tool": "tool2", "tool_args": {"arg": "val"}}
181
+ ],
182
+ "tool": "tool1",
183
+ "tool_args": {"arg": "val"}
184
+ }
185
+
186
+ **To provide a final answer:**
187
+ {"thought": "reasoning", "goal": "achieved", "answer": "response to user"}
188
+
189
+ **RULES:**
190
+ 1. ALWAYS use tools for real data - NEVER hallucinate
191
+ 2. Plan steps MUST be objects like {"tool": "x", "tool_args": {}}, NOT strings
192
+ 3. After tool results, provide an "answer" summarizing them
193
+ """
194
+
195
+ # Initialize ChatSDK with proper configuration
196
+ # Note: We don't set system_prompt in config, we pass it per request
197
+ # Note: Context size is configured when starting Lemonade server, not here
198
+ # Use Qwen3-Coder-30B by default for better reasoning and JSON formatting
199
+ # The 0.5B model is too small for complex agent tasks
200
+ chat_config = ChatConfig(
201
+ model=model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF",
202
+ use_claude=use_claude,
203
+ use_chatgpt=use_chatgpt,
204
+ claude_model=claude_model,
205
+ base_url=base_url,
206
+ show_stats=True, # Always collect stats for token tracking
207
+ max_history_length=20, # Keep more history for agent conversations
208
+ max_tokens=4096, # Increased for complex code generation
209
+ )
210
+ self.chat = ChatSDK(chat_config)
211
+ self.model_id = model_id
212
+
213
+ # Print system prompt if show_prompts is enabled
214
+ # Debug: Check the actual value of show_prompts
215
+ if self.debug:
216
+ logger.debug(
217
+ f"show_prompts={self.show_prompts}, debug={self.debug}, will show prompt: {self.show_prompts}"
218
+ )
219
+
220
+ if self.show_prompts:
221
+ self.console.print_prompt(self.system_prompt, "Initial System Prompt")
222
+
223
+ @abc.abstractmethod
224
+ def _get_system_prompt(self) -> str:
225
+ """
226
+ Generate the system prompt for the agent.
227
+ Subclasses must implement this to provide domain-specific prompts.
228
+ """
229
+ raise NotImplementedError("Subclasses must implement _get_system_prompt")
230
+
231
+ def _create_console(self):
232
+ """
233
+ Create and return a console output handler.
234
+ Returns SilentConsole if in silent_mode, otherwise AgentConsole.
235
+ Subclasses can override this to provide domain-specific console output.
236
+ """
237
+ if self.silent_mode:
238
+ # Check if we should completely silence everything (including final answer)
239
+ # This would be true for JSON-only output or when output_dir is set
240
+ silence_final_answer = getattr(self, "output_dir", None) is not None
241
+ return SilentConsole(silence_final_answer=silence_final_answer)
242
+ return AgentConsole()
243
+
244
+ @abc.abstractmethod
245
+ def _register_tools(self):
246
+ """
247
+ Register all domain-specific tools for the agent.
248
+ Subclasses must implement this method.
249
+ """
250
+ raise NotImplementedError("Subclasses must implement _register_tools")
251
+
252
+ def _format_tools_for_prompt(self) -> str:
253
+ """Format the registered tools into a string for the prompt."""
254
+ tool_descriptions = []
255
+
256
+ for name, tool_info in _TOOL_REGISTRY.items():
257
+ params_str = ", ".join(
258
+ [
259
+ f"{param_name}{'' if param_info['required'] else '?'}: {param_info['type']}"
260
+ for param_name, param_info in tool_info["parameters"].items()
261
+ ]
262
+ )
263
+
264
+ description = tool_info["description"].strip()
265
+ tool_descriptions.append(f"- {name}({params_str}): {description}")
266
+
267
+ return "\n".join(tool_descriptions)
268
+
269
+ def list_tools(self, verbose: bool = True) -> None:
270
+ """
271
+ Display all tools registered for this agent with their parameters and descriptions.
272
+
273
+ Args:
274
+ verbose: If True, displays full descriptions and parameter details. If False, shows a compact list.
275
+ """
276
+ self.console.print_header(f"🛠️ Registered Tools for {self.__class__.__name__}")
277
+ self.console.print_separator()
278
+
279
+ for name, tool_info in self.get_tools_info().items():
280
+ # Format parameters
281
+ params = []
282
+ for param_name, param_info in tool_info["parameters"].items():
283
+ required = param_info.get("required", False)
284
+ param_type = param_info.get("type", "Any")
285
+ default = param_info.get("default", None)
286
+
287
+ if required:
288
+ params.append(f"{param_name}: {param_type}")
289
+ else:
290
+ default_str = f"={default}" if default is not None else "=None"
291
+ params.append(f"{param_name}: {param_type}{default_str}")
292
+
293
+ params_str = ", ".join(params)
294
+
295
+ # Get description
296
+ if verbose:
297
+ description = tool_info["description"]
298
+ else:
299
+ description = (
300
+ tool_info["description"].split("\n")[0]
301
+ if tool_info["description"]
302
+ else "No description"
303
+ )
304
+
305
+ # Print tool information
306
+ self.console.print_tool_info(name, params_str, description)
307
+
308
+ self.console.print_separator()
309
+
310
+ return None
311
+
312
+ def get_tools_info(self) -> Dict[str, Any]:
313
+ """Get information about all registered tools."""
314
+ return _TOOL_REGISTRY
315
+
316
+ def get_tools(self) -> List[Dict[str, Any]]:
317
+ """Get a list of registered tools for the agent."""
318
+ return list(_TOOL_REGISTRY.values())
319
+
320
+ def _extract_json_from_response(self, response: str) -> Optional[Dict[str, Any]]:
321
+ """
322
+ Apply multiple extraction strategies to find valid JSON in the response.
323
+
324
+ Args:
325
+ response: The raw response from the LLM
326
+
327
+ Returns:
328
+ Extracted JSON dictionary or None if extraction failed
329
+ """
330
+ # Strategy 1: Extract JSON from code blocks with various patterns
331
+ json_patterns = [
332
+ r"```(?:json)?\s*(.*?)\s*```", # Standard code block
333
+ r"`json\s*(.*?)\s*`", # Single backtick with json tag
334
+ r"<json>\s*(.*?)\s*</json>", # XML-style tags
335
+ ]
336
+
337
+ for pattern in json_patterns:
338
+ matches = re.findall(pattern, response, re.DOTALL)
339
+ for match in matches:
340
+ try:
341
+ result = json.loads(match)
342
+ # Ensure tool_args exists if tool is present
343
+ if "tool" in result and "tool_args" not in result:
344
+ result["tool_args"] = {}
345
+ logger.debug(f"Successfully extracted JSON with pattern {pattern}")
346
+ return result
347
+ except json.JSONDecodeError:
348
+ continue
349
+
350
+ start_idx = response.find("{")
351
+ if start_idx >= 0:
352
+ bracket_count = 0
353
+ in_string = False
354
+ escape_next = False
355
+
356
+ for i, char in enumerate(response[start_idx:], start_idx):
357
+ if escape_next:
358
+ escape_next = False
359
+ continue
360
+ if char == "\\":
361
+ escape_next = True
362
+ continue
363
+ if char == '"' and not escape_next:
364
+ in_string = not in_string
365
+ if not in_string:
366
+ if char == "{":
367
+ bracket_count += 1
368
+ elif char == "}":
369
+ bracket_count -= 1
370
+ if bracket_count == 0:
371
+ # Found complete JSON object
372
+ try:
373
+ extracted = response[start_idx : i + 1]
374
+ # Fix common issues before parsing
375
+ fixed = re.sub(r",\s*}", "}", extracted)
376
+ fixed = re.sub(r",\s*]", "]", fixed)
377
+ result = json.loads(fixed)
378
+ # Ensure tool_args exists if tool is present
379
+ if "tool" in result and "tool_args" not in result:
380
+ result["tool_args"] = {}
381
+ logger.debug(
382
+ "Successfully extracted JSON using bracket-matching"
383
+ )
384
+ return result
385
+ except json.JSONDecodeError as e:
386
+ logger.debug(f"Bracket-matched JSON parse failed: {e}")
387
+ break
388
+
389
+ return None
390
+
391
+ def validate_json_response(self, response_text: str) -> Dict[str, Any]:
392
+ """
393
+ Validates and attempts to fix JSON responses from the LLM.
394
+
395
+ Attempts the following fixes in order:
396
+ 1. Parse as-is if valid JSON
397
+ 2. Extract JSON from code blocks
398
+ 3. Truncate after first complete JSON object
399
+ 4. Fix common JSON syntax errors
400
+ 5. Extract JSON-like content using regex
401
+
402
+ Args:
403
+ response_text: The response string from the LLM
404
+
405
+ Returns:
406
+ A dictionary containing the parsed JSON if valid
407
+
408
+ Raises:
409
+ ValueError: If the response cannot be parsed as JSON or is missing required fields
410
+ """
411
+ original_response = response_text
412
+ json_was_modified = False
413
+
414
+ # Step 0: Sanitize control characters to ensure proper JSON format
415
+ def sanitize_json_string(text: str) -> str:
416
+ """
417
+ Ensure JSON strings have properly escaped control characters.
418
+
419
+ Args:
420
+ text: JSON text that may contain unescaped control characters
421
+
422
+ Returns:
423
+ Sanitized JSON text with properly escaped control characters
424
+ """
425
+
426
+ def escape_string_content(match):
427
+ """Ensure control characters are properly escaped in JSON string values."""
428
+ quote = match.group(1)
429
+ content = match.group(2)
430
+ closing_quote = match.group(3)
431
+
432
+ # Ensure proper escaping of control characters
433
+ content = content.replace("\n", "\\n")
434
+ content = content.replace("\r", "\\r")
435
+ content = content.replace("\t", "\\t")
436
+ content = content.replace("\b", "\\b")
437
+ content = content.replace("\f", "\\f")
438
+
439
+ return f"{quote}{content}{closing_quote}"
440
+
441
+ # Match JSON strings: "..." handling escaped quotes
442
+ pattern = r'(")([^"\\]*(?:\\.[^"\\]*)*)(")'
443
+
444
+ try:
445
+ return re.sub(pattern, escape_string_content, text)
446
+ except Exception as e:
447
+ logger.debug(
448
+ f"[JSON] String sanitization encountered issue: {e}, using original"
449
+ )
450
+ return text
451
+
452
+ response_text = sanitize_json_string(response_text)
453
+
454
+ # Step 1: Try to parse as-is
455
+ try:
456
+ json_response = json.loads(response_text)
457
+ logger.debug("[JSON] Successfully parsed response without modifications")
458
+ except json.JSONDecodeError as initial_error:
459
+ # Step 2: Try to extract from code blocks
460
+ json_match = re.search(
461
+ r"```(?:json)?\s*({.*?})\s*```", response_text, re.DOTALL
462
+ )
463
+ if json_match:
464
+ try:
465
+ response_text = json_match.group(1)
466
+ json_response = json.loads(response_text)
467
+ json_was_modified = True
468
+ logger.warning("[JSON] Extracted JSON from code block")
469
+ except json.JSONDecodeError as e:
470
+ logger.debug(f"[JSON] Code block extraction failed: {e}")
471
+
472
+ # Step 3: Try to find and extract first complete JSON object
473
+ if not json_was_modified:
474
+ # Find the first '{' and try to match brackets
475
+ start_idx = response_text.find("{")
476
+ if start_idx >= 0:
477
+ bracket_count = 0
478
+ in_string = False
479
+ escape_next = False
480
+
481
+ for i, char in enumerate(response_text[start_idx:], start_idx):
482
+ if escape_next:
483
+ escape_next = False
484
+ continue
485
+ if char == "\\":
486
+ escape_next = True
487
+ continue
488
+ if char == '"' and not escape_next:
489
+ in_string = not in_string
490
+ if not in_string:
491
+ if char == "{":
492
+ bracket_count += 1
493
+ elif char == "}":
494
+ bracket_count -= 1
495
+ if bracket_count == 0:
496
+ # Found complete JSON object
497
+ try:
498
+ truncated = response_text[start_idx : i + 1]
499
+ json_response = json.loads(truncated)
500
+ json_was_modified = True
501
+ logger.warning(
502
+ f"[JSON] Truncated response after first complete JSON object (removed {len(response_text) - i - 1} chars)"
503
+ )
504
+ response_text = truncated
505
+ break
506
+ except json.JSONDecodeError:
507
+ logger.debug(
508
+ "[JSON] Truncated text is not valid JSON, trying next bracket pair"
509
+ )
510
+ continue
511
+
512
+ # Step 4: Try to fix common JSON errors
513
+ if not json_was_modified:
514
+ fixed_text = response_text
515
+
516
+ # Remove trailing commas
517
+ fixed_text = re.sub(r",\s*}", "}", fixed_text)
518
+ fixed_text = re.sub(r",\s*]", "]", fixed_text)
519
+
520
+ # Fix single quotes to double quotes (carefully)
521
+ if "'" in fixed_text and '"' not in fixed_text:
522
+ fixed_text = fixed_text.replace("'", '"')
523
+
524
+ # Remove any text before first '{' or '['
525
+ json_start = min(
526
+ fixed_text.find("{") if "{" in fixed_text else len(fixed_text),
527
+ fixed_text.find("[") if "[" in fixed_text else len(fixed_text),
528
+ )
529
+ if json_start > 0 and json_start < len(fixed_text):
530
+ fixed_text = fixed_text[json_start:]
531
+
532
+ # Try to parse the fixed text
533
+ if fixed_text != response_text:
534
+ try:
535
+ json_response = json.loads(fixed_text)
536
+ json_was_modified = True
537
+ logger.warning("[JSON] Applied automatic JSON fixes")
538
+ response_text = fixed_text
539
+ except json.JSONDecodeError as e:
540
+ logger.debug(f"[JSON] Auto-fix failed: {e}")
541
+
542
+ # If still no valid JSON, raise the original error
543
+ if not json_was_modified:
544
+ raise ValueError(
545
+ f"Failed to parse response as JSON: {str(initial_error)}"
546
+ )
547
+
548
+ # Log warning if JSON was modified
549
+ if json_was_modified:
550
+ logger.warning(
551
+ f"[JSON] Response was modified to extract valid JSON. Original length: {len(original_response)}, Fixed length: {len(response_text)}"
552
+ )
553
+
554
+ # Validate required fields
555
+ # Note: 'goal' is optional for simple answer responses
556
+ if "answer" in json_response:
557
+ required_fields = ["thought", "answer"] # goal is optional
558
+ elif "tool" in json_response:
559
+ required_fields = ["thought", "tool", "tool_args"] # goal is optional
560
+ else:
561
+ required_fields = ["thought", "plan"] # goal is optional
562
+
563
+ missing_fields = [
564
+ field for field in required_fields if field not in json_response
565
+ ]
566
+ if missing_fields:
567
+ raise ValueError(
568
+ f"Response is missing required fields: {', '.join(missing_fields)}"
569
+ )
570
+
571
+ return json_response
572
+
573
+ def _parse_llm_response(self, response: str) -> Dict[str, Any]:
574
+ """
575
+ Parse the LLM response to extract tool calls or conversational answers.
576
+
577
+ ARCHITECTURE: Supports two response modes
578
+ - Plain text for conversation (no JSON required)
579
+ - JSON for tool invocations
580
+
581
+ Args:
582
+ response: The raw response from the LLM
583
+
584
+ Returns:
585
+ Parsed response as a dictionary
586
+ """
587
+ # Check for empty responses
588
+ if not response or not response.strip():
589
+ logger.warning("Empty LLM response received")
590
+ self.error_history.append("Empty LLM response")
591
+
592
+ # Provide more helpful error message based on context
593
+ if hasattr(self, "api_mode") and self.api_mode: # pylint: disable=no-member
594
+ answer = "I encountered an issue processing your request. This might be due to a connection problem with the language model. Please try again."
595
+ else:
596
+ answer = "I apologize, but I received an empty response from the language model. Please try again."
597
+
598
+ return {
599
+ "thought": "LLM returned empty response",
600
+ "goal": "Handle empty response error",
601
+ "answer": answer,
602
+ }
603
+
604
+ response = response.strip()
605
+
606
+ # Log what we received for debugging (show more to see full JSON)
607
+ if len(response) > 500:
608
+ logger.debug(
609
+ f"📥 LLM Response ({len(response)} chars): {response[:500]}..."
610
+ )
611
+ else:
612
+ logger.debug(f"📥 LLM Response: {response}")
613
+
614
+ # STEP 1: Fast path - detect plain text conversational responses
615
+ # If response doesn't start with '{', it's likely plain text
616
+ # Accept it immediately without logging errors
617
+ if not response.startswith("{"):
618
+ logger.debug(
619
+ f"[PARSE] Plain text conversational response (length: {len(response)})"
620
+ )
621
+ return {"thought": "", "goal": "", "answer": response}
622
+
623
+ # STEP 2: Response starts with '{' - looks like JSON
624
+ # Try direct JSON parsing first (fastest path)
625
+ try:
626
+ result = json.loads(response)
627
+ # Ensure tool_args exists if tool is present
628
+ if "tool" in result and "tool_args" not in result:
629
+ result["tool_args"] = {}
630
+ logger.debug("[PARSE] Valid JSON response")
631
+ return result
632
+ except json.JSONDecodeError:
633
+ # JSON parsing failed - continue to extraction methods
634
+ logger.debug("[PARSE] Malformed JSON, trying extraction")
635
+
636
+ # STEP 3: Try JSON extraction methods (handles code blocks, mixed text, etc.)
637
+ extracted_json = self._extract_json_from_response(response)
638
+ if extracted_json:
639
+ logger.debug("[PARSE] Extracted JSON successfully")
640
+ return extracted_json
641
+
642
+ # STEP 4: JSON was expected (starts with '{') but all parsing failed
643
+ # Log error ONLY for JSON that couldn't be parsed
644
+ logger.debug("Attempting to extract fields using regex")
645
+ thought_match = re.search(r'"thought":\s*"([^"]*)"', response)
646
+ tool_match = re.search(r'"tool":\s*"([^"]*)"', response)
647
+ answer_match = re.search(r'"answer":\s*"([^"]*)"', response)
648
+ plan_match = re.search(r'"plan":\s*(\[.*?\])', response, re.DOTALL)
649
+
650
+ if answer_match:
651
+ result = {
652
+ "thought": thought_match.group(1) if thought_match else "",
653
+ "goal": "what was achieved",
654
+ "answer": answer_match.group(1),
655
+ }
656
+ logger.debug(f"Extracted answer using regex: {result}")
657
+ return result
658
+
659
+ if tool_match:
660
+ tool_args = {}
661
+
662
+ tool_args_start = response.find('"tool_args"')
663
+
664
+ if tool_args_start >= 0:
665
+ # Find the opening brace after "tool_args":
666
+ brace_start = response.find("{", tool_args_start)
667
+ if brace_start >= 0:
668
+ # Use bracket-matching to find the complete object
669
+ bracket_count = 0
670
+ in_string = False
671
+ escape_next = False
672
+ for i, char in enumerate(response[brace_start:], brace_start):
673
+ if escape_next:
674
+ escape_next = False
675
+ continue
676
+ if char == "\\":
677
+ escape_next = True
678
+ continue
679
+ if char == '"' and not escape_next:
680
+ in_string = not in_string
681
+ if not in_string:
682
+ if char == "{":
683
+ bracket_count += 1
684
+ elif char == "}":
685
+ bracket_count -= 1
686
+ if bracket_count == 0:
687
+ # Found complete tool_args object
688
+ tool_args_str = response[brace_start : i + 1]
689
+ try:
690
+ tool_args = json.loads(tool_args_str)
691
+ except json.JSONDecodeError as e:
692
+ error_msg = f"Failed to parse tool_args JSON: {str(e)}, content: {tool_args_str[:100]}..."
693
+ logger.error(error_msg)
694
+ self.error_history.append(error_msg)
695
+ break
696
+
697
+ result = {
698
+ "thought": thought_match.group(1) if thought_match else "",
699
+ "goal": "clear statement of what you're trying to achieve",
700
+ "tool": tool_match.group(1),
701
+ "tool_args": tool_args,
702
+ }
703
+
704
+ # Add plan if found
705
+ if plan_match:
706
+ try:
707
+ result["plan"] = json.loads(plan_match.group(1))
708
+ logger.debug(f"Extracted plan using regex: {result['plan']}")
709
+ except json.JSONDecodeError as e:
710
+ error_msg = f"Failed to parse plan JSON: {str(e)}, content: {plan_match.group(1)[:100]}..."
711
+ logger.error(error_msg)
712
+ self.error_history.append(error_msg)
713
+
714
+ logger.debug(f"Extracted tool call using regex: {result}")
715
+ return result
716
+
717
+ # Try to match simple key-value patterns for object names (like ': "my_cube"')
718
+ obj_name_match = re.search(
719
+ r'["\':]?\s*["\'"]?([a-zA-Z0-9_\.]+)["\'"]?', response
720
+ )
721
+ if obj_name_match:
722
+ object_name = obj_name_match.group(1)
723
+ # If it looks like an object name and not just a random word
724
+ if "." in object_name or "_" in object_name:
725
+ logger.debug(f"Found potential object name: {object_name}")
726
+ return {
727
+ "thought": "Extracted object name",
728
+ "goal": "Use the object name",
729
+ "answer": object_name,
730
+ }
731
+
732
+ # CONVERSATIONAL MODE: No JSON found - treat as plain conversational response
733
+ # This is normal and expected for chat agents responding to greetings, explanations, etc.
734
+ logger.debug(
735
+ f"[PARSE] No JSON structure found, treating as conversational response. Length: {len(response)}, preview: {response[:100]}..."
736
+ )
737
+
738
+ # If response is empty, provide a meaningful fallback
739
+ if not response.strip():
740
+ logger.warning("[PARSE] Empty response received from LLM")
741
+ return {
742
+ "thought": "",
743
+ "goal": "",
744
+ "answer": "I apologize, but I received an empty response. Please try again.",
745
+ }
746
+
747
+ # Valid conversational response - wrap it in expected format
748
+ return {"thought": "", "goal": "", "answer": response.strip()}
749
+
750
+ def _execute_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Any:
751
+ """
752
+ Execute a tool by name with the provided arguments.
753
+
754
+ Args:
755
+ tool_name: Name of the tool to execute
756
+ tool_args: Arguments to pass to the tool
757
+
758
+ Returns:
759
+ Result of the tool execution
760
+ """
761
+ logger.debug(f"Executing tool {tool_name} with args: {tool_args}")
762
+
763
+ if tool_name not in _TOOL_REGISTRY:
764
+ logger.error(f"Tool '{tool_name}' not found in registry")
765
+ return {"status": "error", "error": f"Tool '{tool_name}' not found"}
766
+
767
+ tool = _TOOL_REGISTRY[tool_name]["function"]
768
+ sig = inspect.signature(tool)
769
+
770
+ # Get required parameters (those without defaults)
771
+ required_args = {
772
+ name: param
773
+ for name, param in sig.parameters.items()
774
+ if param.default == inspect.Parameter.empty and name != "return"
775
+ }
776
+
777
+ # Check for missing required arguments
778
+ missing_args = [arg for arg in required_args if arg not in tool_args]
779
+ if missing_args:
780
+ error_msg = (
781
+ f"Missing required arguments for {tool_name}: {', '.join(missing_args)}"
782
+ )
783
+ logger.error(error_msg)
784
+ return {"status": "error", "error": error_msg}
785
+
786
+ try:
787
+ result = tool(**tool_args)
788
+ logger.debug(f"Tool execution result: {result}")
789
+ return result
790
+ except subprocess.TimeoutExpired as e:
791
+ # Handle subprocess timeout specifically
792
+ error_msg = f"Tool {tool_name} timed out: {str(e)}"
793
+ logger.error(error_msg)
794
+ self.error_history.append(error_msg)
795
+ return {"status": "error", "error": error_msg, "timeout": True}
796
+ except Exception as e:
797
+ # Format error with full execution trace for debugging
798
+ formatted_error = format_execution_trace(
799
+ exception=e,
800
+ query=getattr(self, "_current_query", None),
801
+ plan_step=self.current_step + 1 if self.current_plan else None,
802
+ total_steps=self.total_plan_steps if self.current_plan else None,
803
+ tool_name=tool_name,
804
+ tool_args=tool_args,
805
+ )
806
+ logger.error(f"Error executing tool {tool_name}: {e}")
807
+ self.error_history.append(str(e)) # Store brief error, not formatted
808
+
809
+ # Print to console immediately so user sees it
810
+ self.console.print_error(formatted_error)
811
+
812
+ return {
813
+ "status": "error",
814
+ "error_brief": str(e), # Brief error message for quick reference
815
+ "error_displayed": True, # Flag to prevent duplicate display
816
+ "tool_name": tool_name,
817
+ "tool_args": tool_args,
818
+ "plan_step": self.current_step + 1 if self.current_plan else None,
819
+ }
820
+
821
+ def _generate_max_steps_message(
822
+ self, conversation: List[Dict], steps_taken: int, steps_limit: int
823
+ ) -> str:
824
+ """Generate informative message when max steps is reached.
825
+
826
+ Args:
827
+ conversation: The conversation history
828
+ steps_taken: Number of steps actually taken
829
+ steps_limit: Maximum steps allowed
830
+
831
+ Returns:
832
+ Informative message about what was accomplished
833
+ """
834
+ # Analyze what was done
835
+ tool_calls = [
836
+ msg
837
+ for msg in conversation
838
+ if msg.get("role") == "assistant" and "tool_calls" in msg
839
+ ]
840
+
841
+ tools_used = []
842
+ for msg in tool_calls:
843
+ for tool_call in msg.get("tool_calls", []):
844
+ if "function" in tool_call:
845
+ tools_used.append(tool_call["function"]["name"])
846
+
847
+ message = f"⚠️ Reached maximum steps limit ({steps_limit} steps)\n\n"
848
+ message += f"Completed {steps_taken} steps using these tools:\n"
849
+
850
+ # Count tool usage
851
+ from collections import Counter
852
+
853
+ tool_counts = Counter(tools_used)
854
+ for tool, count in tool_counts.most_common(10):
855
+ message += f" - {tool}: {count}x\n"
856
+
857
+ message += "\nTo continue or complete this task:\n"
858
+ message += "1. Review the generated files and progress so far\n"
859
+ message += f"2. Run with --max-steps {steps_limit + 50} to allow more steps\n"
860
+ message += "3. Or complete remaining tasks manually\n"
861
+
862
+ return message
863
+
864
+ def _write_json_to_file(self, data: Dict[str, Any], filename: str = None) -> str:
865
+ """
866
+ Write JSON data to a file and return the absolute path.
867
+
868
+ Args:
869
+ data: Dictionary data to write as JSON
870
+ filename: Optional filename, if None a timestamped name will be generated
871
+
872
+ Returns:
873
+ Absolute path to the saved file
874
+ """
875
+ # Ensure output directory exists
876
+ os.makedirs(self.output_dir, exist_ok=True)
877
+
878
+ # Generate filename if not provided
879
+ if not filename:
880
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
881
+ filename = f"agent_output_{timestamp}.json"
882
+
883
+ # Ensure filename has .json extension
884
+ if not filename.endswith(".json"):
885
+ filename += ".json"
886
+
887
+ # Create absolute path
888
+ file_path = os.path.join(self.output_dir, filename)
889
+
890
+ # Write JSON data to file
891
+ with open(file_path, "w", encoding="utf-8") as f:
892
+ json.dump(data, f, indent=2)
893
+
894
+ return os.path.abspath(file_path)
895
+
896
+ def _handle_large_tool_result(
897
+ self,
898
+ tool_name: str,
899
+ tool_result: Any,
900
+ conversation: List[Dict[str, Any]],
901
+ tool_args: Optional[Dict[str, Any]] = None,
902
+ ) -> Any:
903
+ """
904
+ Handle large tool results by truncating them if necessary.
905
+
906
+ Args:
907
+ tool_name: Name of the executed tool
908
+ tool_result: The result from tool execution
909
+ conversation: The conversation list to append to
910
+ tool_args: Arguments passed to the tool (optional)
911
+
912
+ Returns:
913
+ The truncated result or original if within limits
914
+ """
915
+ truncated_result = tool_result
916
+ if isinstance(tool_result, (dict, list)):
917
+ # Use custom encoder to handle bytes and other non-serializable types
918
+ result_str = json.dumps(tool_result, default=self._json_serialize_fallback)
919
+ if (
920
+ len(result_str) > 30000
921
+ ): # Threshold for truncation (appropriate for 32K context)
922
+ # Truncate large results to prevent overwhelming the LLM
923
+ truncated_str = self._truncate_large_content(
924
+ tool_result, max_chars=20000 # Increased for 32K context
925
+ )
926
+ try:
927
+ truncated_result = json.loads(truncated_str)
928
+ except json.JSONDecodeError:
929
+ # If truncated string isn't valid JSON, use it as-is
930
+ truncated_result = truncated_str
931
+ # Notify user about truncation
932
+ self.console.print_info(
933
+ f"Note: Large result ({len(result_str)} chars) truncated for LLM context"
934
+ )
935
+ if self.debug:
936
+ print(f"[DEBUG] Tool result truncated from {len(result_str)} chars")
937
+
938
+ # Add to conversation
939
+ tool_entry: Dict[str, Any] = {
940
+ "role": "tool",
941
+ "name": tool_name,
942
+ "content": truncated_result,
943
+ }
944
+ if tool_args is not None:
945
+ tool_entry["tool_args"] = tool_args
946
+ conversation.append(tool_entry)
947
+ return truncated_result
948
+
949
+ def _create_tool_message(self, tool_name: str, tool_output: Any) -> Dict[str, Any]:
950
+ """
951
+ Build a message structure representing a tool output for downstream LLM calls.
952
+ """
953
+ if isinstance(tool_output, str):
954
+ text_content = tool_output
955
+ else:
956
+ text_content = self._truncate_large_content(tool_output, max_chars=2000)
957
+
958
+ if not isinstance(text_content, str):
959
+ text_content = json.dumps(
960
+ tool_output, default=self._json_serialize_fallback
961
+ )
962
+
963
+ return {
964
+ "role": "tool",
965
+ "name": tool_name,
966
+ "tool_call_id": uuid.uuid4().hex,
967
+ "content": [{"type": "text", "text": text_content}],
968
+ }
969
+
970
+ def _json_serialize_fallback(self, obj: Any) -> Any:
971
+ """
972
+ Fallback serializer for JSON encoding non-standard types.
973
+
974
+ Handles bytes, datetime, and other common non-serializable types.
975
+ """
976
+ try:
977
+ import numpy as np # Local import to avoid hard dependency at module import time
978
+
979
+ if isinstance(obj, np.generic):
980
+ return obj.item()
981
+ if isinstance(obj, np.ndarray):
982
+ return obj.tolist()
983
+ except Exception:
984
+ pass
985
+
986
+ if isinstance(obj, bytes):
987
+ # For binary data, return a placeholder (don't expose raw bytes to LLM)
988
+ return f"<binary data: {len(obj)} bytes>"
989
+ if hasattr(obj, "isoformat"):
990
+ # Handle datetime objects
991
+ return obj.isoformat()
992
+ if hasattr(obj, "__dict__"):
993
+ # Handle objects with __dict__
994
+ return obj.__dict__
995
+
996
+ for caster in (float, int, str):
997
+ try:
998
+ return caster(obj)
999
+ except Exception:
1000
+ continue
1001
+
1002
+ return "<non-serializable>"
1003
+
1004
+ def _truncate_large_content(self, content: Any, max_chars: int = 2000) -> str:
1005
+ """
1006
+ Truncate large content to prevent overwhelming the LLM.
1007
+ Defaults to 20000 chars which is appropriate for 32K token context window.
1008
+ """
1009
+
1010
+ # If we have test_results in the output we don't want to
1011
+ # truncate as this can contain important information on
1012
+ # how to fix the tests
1013
+ if isinstance(content, dict) and (
1014
+ "test_results" in content or "run_tests" in content
1015
+ ):
1016
+ return json.dumps(content, default=self._json_serialize_fallback)
1017
+
1018
+ # Convert to string (use compact JSON first to check size)
1019
+ if isinstance(content, (dict, list)):
1020
+ compact_str = json.dumps(content, default=self._json_serialize_fallback)
1021
+ # Only use indented format if we need to truncate anyway
1022
+ content_str = (
1023
+ json.dumps(content, indent=2, default=self._json_serialize_fallback)
1024
+ if len(compact_str) > max_chars
1025
+ else compact_str
1026
+ )
1027
+ else:
1028
+ content_str = str(content)
1029
+
1030
+ # Return as-is if within limits
1031
+ if len(content_str) <= max_chars:
1032
+ return content_str
1033
+
1034
+ # For responses with chunks (e.g., search results, document retrieval)
1035
+ if (
1036
+ isinstance(content, dict)
1037
+ and "chunks" in content
1038
+ and isinstance(content["chunks"], list)
1039
+ ):
1040
+ truncated = content.copy()
1041
+
1042
+ # Keep all chunks but truncate individual chunk content if needed
1043
+ if "chunks" in truncated:
1044
+ for chunk in truncated["chunks"]:
1045
+ if isinstance(chunk, dict) and "content" in chunk:
1046
+ # Keep full content for chunks (they're the actual data)
1047
+ # Only truncate if a single chunk is massive
1048
+ if len(chunk["content"]) > CHUNK_TRUNCATION_THRESHOLD:
1049
+ chunk["content"] = (
1050
+ chunk["content"][:CHUNK_TRUNCATION_SIZE]
1051
+ + "\n...[chunk truncated]...\n"
1052
+ + chunk["content"][-CHUNK_TRUNCATION_SIZE:]
1053
+ )
1054
+
1055
+ result_str = json.dumps(
1056
+ truncated, indent=2, default=self._json_serialize_fallback
1057
+ )
1058
+ # Use larger limit for chunked responses since chunks are the actual data
1059
+ if len(result_str) <= max_chars * 3: # Allow up to 60KB for chunked data
1060
+ return result_str
1061
+ # If still too large, keep first 3 chunks only
1062
+ truncated["chunks"] = truncated["chunks"][:3]
1063
+ return json.dumps(
1064
+ truncated, indent=2, default=self._json_serialize_fallback
1065
+ )
1066
+
1067
+ # For Jira responses, keep first 3 issues
1068
+ if (
1069
+ isinstance(content, dict)
1070
+ and "issues" in content
1071
+ and isinstance(content["issues"], list)
1072
+ ):
1073
+ truncated = {
1074
+ **content,
1075
+ "issues": content["issues"][:3],
1076
+ "truncated": True,
1077
+ "total": len(content["issues"]),
1078
+ }
1079
+ return json.dumps(
1080
+ truncated, indent=2, default=self._json_serialize_fallback
1081
+ )[:max_chars]
1082
+
1083
+ # For lists, keep first 3 items
1084
+ if isinstance(content, list):
1085
+ truncated = (
1086
+ content[:3] + [{"truncated": f"{len(content) - 3} more"}]
1087
+ if len(content) > 3
1088
+ else content
1089
+ )
1090
+ return json.dumps(
1091
+ truncated, indent=2, default=self._json_serialize_fallback
1092
+ )[:max_chars]
1093
+
1094
+ # Simple truncation
1095
+ half = max_chars // 2 - 20
1096
+ return f"{content_str[:half]}\n...[truncated]...\n{content_str[-half:]}"
1097
+
1098
+ def process_query(
1099
+ self,
1100
+ user_input: str,
1101
+ max_steps: int = None,
1102
+ trace: bool = False,
1103
+ filename: str = None,
1104
+ ) -> Dict[str, Any]:
1105
+ """
1106
+ Process a user query and execute the necessary tools.
1107
+ Displays each step as it's being generated in real-time.
1108
+
1109
+ Args:
1110
+ user_input: User's query or request
1111
+ max_steps: Maximum number of steps to take in the conversation (overrides class default if provided)
1112
+ trace: If True, write detailed JSON trace to file
1113
+ filename: Optional filename for trace output, if None a timestamped name will be generated
1114
+
1115
+ Returns:
1116
+ Dict containing the final result and operation details
1117
+ """
1118
+ import time
1119
+
1120
+ start_time = time.time() # Track query processing start time
1121
+
1122
+ # Store query for error context (used in _execute_tool for error formatting)
1123
+ self._current_query = user_input
1124
+
1125
+ logger.debug(f"Processing query: {user_input}")
1126
+ conversation = []
1127
+ # Build messages array for chat completions
1128
+ messages = []
1129
+
1130
+ # Prepopulate with conversation history if available (for session persistence)
1131
+ if hasattr(self, "conversation_history") and self.conversation_history:
1132
+ messages.extend(self.conversation_history)
1133
+ logger.debug(
1134
+ f"Loaded {len(self.conversation_history)} messages from conversation history"
1135
+ )
1136
+
1137
+ steps_taken = 0
1138
+ final_answer = None
1139
+ error_count = 0
1140
+ last_tool_call = None # Track the last tool call to prevent loops
1141
+ last_error = None # Track the last error to handle it properly
1142
+ previous_outputs = [] # Track previous tool outputs
1143
+
1144
+ # Reset state management
1145
+ self.execution_state = self.STATE_PLANNING
1146
+ self.current_plan = None
1147
+ self.current_step = 0
1148
+ self.total_plan_steps = 0
1149
+ self.plan_iterations = 0 # Reset plan iteration counter
1150
+
1151
+ # Add user query to the conversation history
1152
+ conversation.append({"role": "user", "content": user_input})
1153
+ messages.append({"role": "user", "content": user_input})
1154
+
1155
+ # Use provided max_steps or fall back to class default
1156
+ steps_limit = max_steps if max_steps is not None else self.max_steps
1157
+
1158
+ # Print initial message with max steps info
1159
+ self.console.print_processing_start(user_input, steps_limit)
1160
+ logger.debug(f"Using max_steps: {steps_limit}")
1161
+
1162
+ prompt = f"User request: {user_input}\n\n"
1163
+
1164
+ # Only add planning reminder in PLANNING state
1165
+ if self.execution_state == self.STATE_PLANNING:
1166
+ prompt += (
1167
+ "IMPORTANT: ALWAYS BEGIN WITH A PLAN before executing any tools.\n"
1168
+ "First create a detailed plan with all necessary steps, then execute the first step.\n"
1169
+ "When creating a plan with multiple steps:\n"
1170
+ " 1. ALWAYS follow the plan in the correct order, starting with the FIRST step.\n"
1171
+ " 2. Include both a plan and a 'tool' field, the 'tool' field MUST match the tool in the first step of the plan.\n"
1172
+ " 3. Create plans with clear, executable steps that include both the tool name and the exact arguments for each step.\n"
1173
+ )
1174
+
1175
+ logger.debug(f"Input prompt: {prompt[:200]}...")
1176
+
1177
+ # Process the query in steps, allowing for multiple tool usages
1178
+ while steps_taken < steps_limit and final_answer is None:
1179
+ # Build the next prompt based on current state (this is for fallback mode only)
1180
+ # In chat mode, we'll just add to messages array
1181
+ steps_taken += 1
1182
+ logger.debug(f"Step {steps_taken}/{steps_limit}")
1183
+
1184
+ # Display current step
1185
+ self.console.print_step_header(steps_taken, steps_limit)
1186
+
1187
+ # Skip automatic finalization for single-step plans - always request proper final answer
1188
+
1189
+ # If we're executing a plan, we might not need to query the LLM again
1190
+ if (
1191
+ self.execution_state == self.STATE_EXECUTING_PLAN
1192
+ and self.current_step < self.total_plan_steps
1193
+ ):
1194
+ logger.debug(
1195
+ f"Executing plan step {self.current_step + 1}/{self.total_plan_steps}"
1196
+ )
1197
+ self.console.print_state_info(
1198
+ f"EXECUTING PLAN: Step {self.current_step + 1}/{self.total_plan_steps}"
1199
+ )
1200
+
1201
+ # Display the current plan with the current step highlighted
1202
+ if self.current_plan:
1203
+ self.console.print_plan(self.current_plan, self.current_step)
1204
+
1205
+ # Extract next step from plan
1206
+ next_step = self.current_plan[self.current_step]
1207
+
1208
+ if (
1209
+ isinstance(next_step, dict)
1210
+ and "tool" in next_step
1211
+ and "tool_args" in next_step
1212
+ ):
1213
+ # We have a properly formatted step with tool and args
1214
+ tool_name = next_step["tool"]
1215
+ tool_args = next_step["tool_args"]
1216
+
1217
+ # Create a parsed response structure as if it came from the LLM
1218
+ parsed = {
1219
+ "thought": f"Executing step {self.current_step + 1} of the plan",
1220
+ "goal": f"Following the plan to {user_input}",
1221
+ "tool": tool_name,
1222
+ "tool_args": tool_args,
1223
+ }
1224
+
1225
+ # Add to conversation
1226
+ conversation.append({"role": "assistant", "content": parsed})
1227
+
1228
+ # Display the agent's reasoning for the step
1229
+ self.console.print_thought(
1230
+ parsed.get("thought", "Executing plan step")
1231
+ )
1232
+ self.console.print_goal(parsed.get("goal", "Following the plan"))
1233
+
1234
+ # Display the tool call in real-time
1235
+ self.console.print_tool_usage(tool_name)
1236
+
1237
+ # Start progress indicator for tool execution
1238
+ self.console.start_progress(f"Executing {tool_name}")
1239
+
1240
+ # Execute the tool
1241
+ tool_result = self._execute_tool(tool_name, tool_args)
1242
+
1243
+ # Stop progress indicator
1244
+ self.console.stop_progress()
1245
+
1246
+ # Handle domain-specific post-processing
1247
+ self._post_process_tool_result(tool_name, tool_args, tool_result)
1248
+
1249
+ # Handle large tool results
1250
+ truncated_result = self._handle_large_tool_result(
1251
+ tool_name, tool_result, conversation, tool_args
1252
+ )
1253
+
1254
+ # Display the tool result in real-time (show full result to user)
1255
+ self.console.print_tool_complete()
1256
+
1257
+ self.console.pretty_print_json(tool_result, "Tool Result")
1258
+
1259
+ # Store the truncated output for future context
1260
+ previous_outputs.append(
1261
+ {
1262
+ "tool": tool_name,
1263
+ "args": tool_args,
1264
+ "result": truncated_result,
1265
+ }
1266
+ )
1267
+
1268
+ # Share tool output with subsequent LLM calls
1269
+ messages.append(
1270
+ self._create_tool_message(tool_name, truncated_result)
1271
+ )
1272
+
1273
+ # Check for error (support multiple error formats)
1274
+ is_error = isinstance(tool_result, dict) and (
1275
+ tool_result.get("status") == "error" # Standard format
1276
+ or tool_result.get("success")
1277
+ is False # Tools returning success: false
1278
+ or tool_result.get("has_errors") is True # CLI tools
1279
+ or tool_result.get("return_code", 0) != 0 # Build failures
1280
+ )
1281
+
1282
+ if is_error:
1283
+ error_count += 1
1284
+ # Extract error message from various formats
1285
+ # Prefer error_brief for logging (avoids duplicate formatted output)
1286
+ last_error = (
1287
+ tool_result.get("error_brief")
1288
+ or tool_result.get("error")
1289
+ or tool_result.get("stderr")
1290
+ or tool_result.get("hint") # Many tools provide hints
1291
+ or tool_result.get(
1292
+ "suggested_fix"
1293
+ ) # Some tools provide fix suggestions
1294
+ or f"Command failed with return code {tool_result.get('return_code')}"
1295
+ )
1296
+ logger.warning(
1297
+ f"Tool execution error in plan (count: {error_count}): {last_error}"
1298
+ )
1299
+ # Only print if error wasn't already displayed by _execute_tool
1300
+ if not tool_result.get("error_displayed"):
1301
+ self.console.print_error(last_error)
1302
+
1303
+ # Switch to error recovery state
1304
+ self.execution_state = self.STATE_ERROR_RECOVERY
1305
+ self.console.print_state_info(
1306
+ "ERROR RECOVERY: Handling tool execution failure"
1307
+ )
1308
+
1309
+ # Break out of plan execution to trigger error recovery prompt
1310
+ continue
1311
+ else:
1312
+ # Success - move to next step in plan
1313
+ self.current_step += 1
1314
+
1315
+ # Check if we've completed the plan
1316
+ if self.current_step >= self.total_plan_steps:
1317
+ logger.debug("Plan execution completed")
1318
+ self.execution_state = self.STATE_COMPLETION
1319
+ self.console.print_state_info(
1320
+ "COMPLETION: Plan fully executed"
1321
+ )
1322
+
1323
+ # Increment plan iteration counter
1324
+ self.plan_iterations += 1
1325
+ logger.debug(
1326
+ f"Plan iteration {self.plan_iterations} completed"
1327
+ )
1328
+
1329
+ # Check if we've reached max plan iterations
1330
+ reached_max_iterations = (
1331
+ self.max_plan_iterations > 0
1332
+ and self.plan_iterations >= self.max_plan_iterations
1333
+ )
1334
+
1335
+ # Prepare message for final answer with the completed plan context
1336
+ plan_context = {
1337
+ "completed_plan": self.current_plan,
1338
+ "total_steps": self.total_plan_steps,
1339
+ }
1340
+ plan_context_raw = json.dumps(
1341
+ plan_context, default=self._json_serialize_fallback
1342
+ )
1343
+ if len(plan_context_raw) > 20000:
1344
+ plan_context_str = self._truncate_large_content(
1345
+ plan_context, max_chars=20000
1346
+ )
1347
+ else:
1348
+ plan_context_str = plan_context_raw
1349
+
1350
+ if reached_max_iterations:
1351
+ # Force final answer after max iterations
1352
+ completion_message = (
1353
+ f"Maximum plan iterations ({self.max_plan_iterations}) reached for task: {user_input}\n"
1354
+ f"Task: {user_input}\n"
1355
+ f"Plan information:\n{plan_context_str}\n\n"
1356
+ f"IMPORTANT: You MUST now provide a final answer with an honest assessment:\n"
1357
+ f"- Summarize what was successfully accomplished\n"
1358
+ f"- Clearly state if anything remains incomplete or if errors occurred\n"
1359
+ f"- If the task is fully complete, state that clearly\n\n"
1360
+ f'Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1361
+ )
1362
+ else:
1363
+ completion_message = (
1364
+ "You have successfully completed all steps in the plan.\n"
1365
+ f"Task: {user_input}\n"
1366
+ f"Plan information:\n{plan_context_str}\n\n"
1367
+ f"Plan iteration: {self.plan_iterations}/{self.max_plan_iterations if self.max_plan_iterations > 0 else 'unlimited'}\n"
1368
+ "Check if more work is needed:\n"
1369
+ "- If the task is complete and verified, provide a final answer\n"
1370
+ "- If critical validation/testing is needed, you may create ONE more plan\n"
1371
+ "- Only create additional plans if absolutely necessary\n\n"
1372
+ 'If more work needed: Provide a NEW plan with {{"thought": "...", "goal": "...", "plan": [...]}}\n'
1373
+ 'If everything is complete: Provide {{"thought": "...", "goal": "...", "answer": "..."}}'
1374
+ )
1375
+
1376
+ # Debug logging - only show if truncation happened
1377
+ if self.debug and len(plan_context_raw) > 2000:
1378
+ print(
1379
+ "\n[DEBUG] Plan context truncated for completion message"
1380
+ )
1381
+
1382
+ # Add completion request to messages
1383
+ messages.append(
1384
+ {"role": "user", "content": completion_message}
1385
+ )
1386
+
1387
+ # Send the completion prompt to get final answer
1388
+ self.console.print_state_info(
1389
+ "COMPLETION: Requesting final answer"
1390
+ )
1391
+
1392
+ # Continue to next iteration to get final answer
1393
+ continue
1394
+ else:
1395
+ # Continue with next step - no need to query LLM again
1396
+ continue
1397
+ else:
1398
+ # Plan step doesn't have proper format, fall back to LLM
1399
+ logger.warning(
1400
+ f"Plan step {self.current_step + 1} doesn't have proper format: {next_step}"
1401
+ )
1402
+ self.console.print_warning(
1403
+ f"Plan step {self.current_step + 1} format incorrect, asking LLM for guidance"
1404
+ )
1405
+ prompt = (
1406
+ f"You are following a plan but step {self.current_step + 1} doesn't have proper format: {next_step}\n"
1407
+ "Please interpret this step and decide what tool to use next.\n\n"
1408
+ f"Task: {user_input}\n\n"
1409
+ )
1410
+ else:
1411
+ # Normal execution flow - query the LLM
1412
+ if self.execution_state == self.STATE_DIRECT_EXECUTION:
1413
+ self.console.print_state_info("DIRECT EXECUTION: Analyzing task")
1414
+ elif self.execution_state == self.STATE_PLANNING:
1415
+ self.console.print_state_info("PLANNING: Creating or refining plan")
1416
+ elif self.execution_state == self.STATE_ERROR_RECOVERY:
1417
+ self.console.print_state_info(
1418
+ "ERROR RECOVERY: Handling previous error"
1419
+ )
1420
+
1421
+ # Truncate previous outputs if too large to avoid overwhelming the LLM
1422
+ truncated_outputs = (
1423
+ self._truncate_large_content(previous_outputs, max_chars=500)
1424
+ if previous_outputs
1425
+ else "None"
1426
+ )
1427
+
1428
+ # Create a specific error recovery prompt
1429
+ prompt = (
1430
+ "TOOL EXECUTION FAILED!\n\n"
1431
+ f"You were trying to execute: {last_tool_call[0] if last_tool_call else 'unknown tool'}\n"
1432
+ f"Error: {last_error}\n\n"
1433
+ f"Original task: {user_input}\n\n"
1434
+ f"Current plan step {self.current_step + 1}/{self.total_plan_steps} failed.\n"
1435
+ f"Current plan: {self.current_plan}\n\n"
1436
+ f"Previous successful outputs: {truncated_outputs}\n\n"
1437
+ "INSTRUCTIONS:\n"
1438
+ "1. Analyze the error and understand what went wrong\n"
1439
+ "2. Create a NEW corrected plan that fixes the error\n"
1440
+ "3. Make sure to use correct tool parameters (check the available tools)\n"
1441
+ "4. Start executing the corrected plan\n\n"
1442
+ "Respond with your analysis, a corrected plan, and the first tool to execute."
1443
+ )
1444
+
1445
+ # Add the error recovery prompt to the messages array so it gets sent to LLM
1446
+ messages.append({"role": "user", "content": prompt})
1447
+
1448
+ # Reset state to planning after creating recovery prompt
1449
+ self.execution_state = self.STATE_PLANNING
1450
+ self.current_plan = None
1451
+ self.current_step = 0
1452
+ self.total_plan_steps = 0
1453
+
1454
+ elif self.execution_state == self.STATE_COMPLETION:
1455
+ self.console.print_state_info("COMPLETION: Finalizing response")
1456
+
1457
+ # Print the prompt if show_prompts is enabled (separate from debug_prompts)
1458
+ if self.show_prompts:
1459
+ # Build context from system prompt and messages
1460
+ context_parts = [
1461
+ (
1462
+ f"SYSTEM: {self.system_prompt[:200]}..."
1463
+ if len(self.system_prompt) > 200
1464
+ else f"SYSTEM: {self.system_prompt}"
1465
+ )
1466
+ ]
1467
+
1468
+ for msg in messages:
1469
+ role = msg.get("role", "user").upper()
1470
+ content = str(msg.get("content", ""))[:150]
1471
+ context_parts.append(
1472
+ f"{role}: {content}{'...' if len(str(msg.get('content', ''))) > 150 else ''}"
1473
+ )
1474
+
1475
+ if not messages and prompt:
1476
+ context_parts.append(
1477
+ f"USER: {prompt[:150]}{'...' if len(prompt) > 150 else ''}"
1478
+ )
1479
+
1480
+ self.console.print_prompt("\n".join(context_parts), "LLM Context")
1481
+
1482
+ # Handle streaming or non-streaming LLM response
1483
+ # Initialize response_stats so it's always in scope
1484
+ response_stats = None
1485
+
1486
+ if self.streaming:
1487
+ # Streaming mode - raw response will be streamed
1488
+ # (SilentConsole will suppress this, AgentConsole will show it)
1489
+
1490
+ # Add prompt to conversation if debug is enabled
1491
+ if self.debug_prompts:
1492
+ conversation.append(
1493
+ {"role": "system", "content": {"prompt": prompt}}
1494
+ )
1495
+ # Print the prompt if show_prompts is enabled
1496
+ if self.show_prompts:
1497
+ self.console.print_prompt(
1498
+ prompt, f"Prompt (Step {steps_taken})"
1499
+ )
1500
+
1501
+ # Get streaming response from ChatSDK with proper conversation history
1502
+ try:
1503
+ response_stream = self.chat.send_messages_stream(
1504
+ messages=messages, system_prompt=self.system_prompt
1505
+ )
1506
+
1507
+ # Process the streaming response chunks as they arrive
1508
+ full_response = ""
1509
+ for chunk_response in response_stream:
1510
+ if chunk_response.is_complete:
1511
+ response_stats = chunk_response.stats
1512
+ else:
1513
+ self.console.print_streaming_text(chunk_response.text)
1514
+ full_response += chunk_response.text
1515
+
1516
+ self.console.print_streaming_text("", end_of_stream=True)
1517
+ response = full_response
1518
+ except ConnectionError as e:
1519
+ # Handle LLM server connection errors specifically
1520
+ error_msg = f"LLM Server Connection Failed (streaming): {str(e)}"
1521
+ logger.error(error_msg)
1522
+ self.console.print_error(error_msg)
1523
+
1524
+ # Add error to history
1525
+ self.error_history.append(
1526
+ {
1527
+ "step": steps_taken,
1528
+ "error": error_msg,
1529
+ "type": "llm_connection_error",
1530
+ }
1531
+ )
1532
+
1533
+ # Return error response
1534
+ final_answer = (
1535
+ f"Unable to complete task due to LLM server error: {str(e)}"
1536
+ )
1537
+ break
1538
+ except Exception as e:
1539
+ logger.error(f"Unexpected error during streaming: {e}")
1540
+
1541
+ # Add to error history
1542
+ self.error_history.append(
1543
+ {
1544
+ "step": steps_taken,
1545
+ "error": str(e),
1546
+ "type": "llm_streaming_error",
1547
+ }
1548
+ )
1549
+
1550
+ # Return error response
1551
+ final_answer = (
1552
+ f"Unable to complete task due to streaming error: {str(e)}"
1553
+ )
1554
+ break
1555
+ else:
1556
+ # Use progress indicator for non-streaming mode
1557
+ self.console.start_progress("Thinking")
1558
+
1559
+ # Debug logging before LLM call
1560
+ if self.debug:
1561
+
1562
+ print(f"\n[DEBUG] About to call LLM with {len(messages)} messages")
1563
+ print(
1564
+ f"[DEBUG] Last message role: {messages[-1]['role'] if messages else 'No messages'}"
1565
+ )
1566
+ if messages and len(messages[-1].get("content", "")) < 500:
1567
+ print(
1568
+ f"[DEBUG] Last message content: {messages[-1]['content']}"
1569
+ )
1570
+ else:
1571
+ print(
1572
+ f"[DEBUG] Last message content length: {len(messages[-1].get('content', ''))}"
1573
+ )
1574
+ print(f"[DEBUG] Execution state: {self.execution_state}")
1575
+ if self.execution_state == "PLANNING":
1576
+ print("[DEBUG] Current step: Planning (no active plan yet)")
1577
+ else:
1578
+ print(
1579
+ f"[DEBUG] Current step: {self.current_step}/{self.total_plan_steps}"
1580
+ )
1581
+
1582
+ # Get complete response from ChatSDK
1583
+ try:
1584
+ chat_response = self.chat.send_messages(
1585
+ messages=messages, system_prompt=self.system_prompt
1586
+ )
1587
+ response = chat_response.text
1588
+ response_stats = chat_response.stats
1589
+ except ConnectionError as e:
1590
+ error_msg = f"LLM Server Connection Failed: {str(e)}"
1591
+ logger.error(error_msg)
1592
+ self.console.print_error(error_msg)
1593
+
1594
+ # Add error to history and update state
1595
+ self.error_history.append(
1596
+ {
1597
+ "step": steps_taken,
1598
+ "error": error_msg,
1599
+ "type": "llm_connection_error",
1600
+ }
1601
+ )
1602
+
1603
+ # Return error response
1604
+ final_answer = (
1605
+ f"Unable to complete task due to LLM server error: {str(e)}"
1606
+ )
1607
+ break
1608
+ except Exception as e:
1609
+ if self.debug:
1610
+ print(f"[DEBUG] Error calling LLM: {e}")
1611
+ logger.error(f"Unexpected error calling LLM: {e}")
1612
+
1613
+ # Add to error history
1614
+ self.error_history.append(
1615
+ {"step": steps_taken, "error": str(e), "type": "llm_error"}
1616
+ )
1617
+
1618
+ # Return error response
1619
+ final_answer = f"Unable to complete task due to error: {str(e)}"
1620
+ break
1621
+
1622
+ # Stop the progress indicator
1623
+ self.console.stop_progress()
1624
+
1625
+ # Print the LLM response to the console
1626
+ logger.debug(f"LLM response: {response[:200]}...")
1627
+ if self.show_prompts:
1628
+ self.console.print_response(response, "LLM Response")
1629
+
1630
+ # Parse the response
1631
+ parsed = self._parse_llm_response(response)
1632
+ logger.debug(f"Parsed response: {parsed}")
1633
+ conversation.append({"role": "assistant", "content": parsed})
1634
+
1635
+ # Add assistant response to messages for chat history
1636
+ messages.append({"role": "assistant", "content": response})
1637
+
1638
+ # If the LLM needs to create a plan first, re-prompt it specifically for that
1639
+ if "needs_plan" in parsed and parsed["needs_plan"]:
1640
+ # Prepare a special prompt that specifically requests a plan
1641
+ deferred_tool = parsed.get("deferred_tool", None)
1642
+ deferred_args = parsed.get("deferred_tool_args", {})
1643
+
1644
+ plan_prompt = (
1645
+ "You MUST create a detailed plan first before taking any action.\n\n"
1646
+ f"User request: {user_input}\n\n"
1647
+ )
1648
+
1649
+ if deferred_tool:
1650
+ plan_prompt += (
1651
+ f"You initially wanted to use the {deferred_tool} tool with these arguments:\n"
1652
+ f"{json.dumps(deferred_args, indent=2, default=self._json_serialize_fallback)}\n\n"
1653
+ "However, you MUST first create a plan. Please create a plan that includes this tool usage as a step.\n\n"
1654
+ )
1655
+
1656
+ plan_prompt += (
1657
+ "Create a detailed plan with all necessary steps in JSON format, including exact tool names and arguments.\n"
1658
+ "Respond with your reasoning, plan, and the first tool to use."
1659
+ )
1660
+
1661
+ # Store the plan prompt in conversation if debug is enabled
1662
+ if self.debug_prompts:
1663
+ conversation.append(
1664
+ {"role": "system", "content": {"prompt": plan_prompt}}
1665
+ )
1666
+ if self.show_prompts:
1667
+ self.console.print_prompt(plan_prompt, "Plan Request Prompt")
1668
+
1669
+ # Notify the user we're asking for a plan
1670
+ self.console.print_info("Requesting a detailed plan before proceeding")
1671
+
1672
+ # Get the planning response
1673
+ if self.streaming:
1674
+ # Add prompt to conversation if debug is enabled
1675
+ if self.debug_prompts:
1676
+ conversation.append(
1677
+ {"role": "system", "content": {"prompt": plan_prompt}}
1678
+ )
1679
+ # Print the prompt if show_prompts is enabled
1680
+ if self.show_prompts:
1681
+ self.console.print_prompt(
1682
+ plan_prompt, f"Prompt (Step {steps_taken})"
1683
+ )
1684
+
1685
+ # Handle streaming as before
1686
+ full_response = ""
1687
+ # Add plan request to messages
1688
+ messages.append({"role": "user", "content": plan_prompt})
1689
+
1690
+ # Use ChatSDK for streaming plan response
1691
+ stream_gen = self.chat.send_messages_stream(
1692
+ messages=messages, system_prompt=self.system_prompt
1693
+ )
1694
+
1695
+ for chunk_response in stream_gen:
1696
+ if not chunk_response.is_complete:
1697
+ chunk = chunk_response.text
1698
+ if hasattr(self.console, "print_streaming_text"):
1699
+ self.console.print_streaming_text(chunk)
1700
+ else:
1701
+ print(chunk, end="", flush=True)
1702
+ full_response += chunk
1703
+
1704
+ if hasattr(self.console, "print_streaming_text"):
1705
+ self.console.print_streaming_text("", end_of_stream=True)
1706
+ else:
1707
+ print("", flush=True)
1708
+
1709
+ plan_response = full_response
1710
+ else:
1711
+ # Use progress indicator for non-streaming mode
1712
+ self.console.start_progress("Creating plan")
1713
+
1714
+ # Store the plan prompt in conversation if debug is enabled
1715
+ if self.debug_prompts:
1716
+ conversation.append(
1717
+ {"role": "system", "content": {"prompt": plan_prompt}}
1718
+ )
1719
+ if self.show_prompts:
1720
+ self.console.print_prompt(
1721
+ plan_prompt, "Plan Request Prompt"
1722
+ )
1723
+
1724
+ # Add plan request to messages
1725
+ messages.append({"role": "user", "content": plan_prompt})
1726
+
1727
+ # Use ChatSDK for non-streaming plan response
1728
+ chat_response = self.chat.send_messages(
1729
+ messages=messages, system_prompt=self.system_prompt
1730
+ )
1731
+ plan_response = chat_response.text
1732
+ self.console.stop_progress()
1733
+
1734
+ # Parse the plan response
1735
+ parsed_plan = self._parse_llm_response(plan_response)
1736
+ logger.debug(f"Parsed plan response: {parsed_plan}")
1737
+ conversation.append({"role": "assistant", "content": parsed_plan})
1738
+
1739
+ # Add plan response to messages for chat history
1740
+ messages.append({"role": "assistant", "content": plan_response})
1741
+
1742
+ # Display the agent's reasoning for the plan
1743
+ self.console.print_thought(parsed_plan.get("thought", "Creating plan"))
1744
+ self.console.print_goal(parsed_plan.get("goal", "Planning for task"))
1745
+
1746
+ # Set the parsed response to the new plan for further processing
1747
+ parsed = parsed_plan
1748
+ else:
1749
+ # Display the agent's reasoning in real-time (only if provided)
1750
+ # Skip if we just displayed thought/goal for a plan request above
1751
+ thought = parsed.get("thought", "").strip()
1752
+ goal = parsed.get("goal", "").strip()
1753
+
1754
+ if thought and thought != "No explicit reasoning provided":
1755
+ self.console.print_thought(thought)
1756
+
1757
+ if goal and goal != "No explicit goal provided":
1758
+ self.console.print_goal(goal)
1759
+
1760
+ # Process plan if available
1761
+ if "plan" in parsed:
1762
+ # Validate that plan is actually a list, not a string or other type
1763
+ if not isinstance(parsed["plan"], list):
1764
+ logger.error(
1765
+ f"Invalid plan format: expected list, got {type(parsed['plan']).__name__}. "
1766
+ f"Plan content: {parsed['plan']}"
1767
+ )
1768
+ self.console.print_error(
1769
+ f"LLM returned invalid plan format (expected array, got {type(parsed['plan']).__name__}). "
1770
+ "Asking for correction..."
1771
+ )
1772
+
1773
+ # Create error recovery prompt
1774
+ error_msg = (
1775
+ "ERROR: You provided a plan in the wrong format.\n"
1776
+ "Expected: an array of step objects\n"
1777
+ f"You provided: {type(parsed['plan']).__name__}\n\n"
1778
+ "The correct format is:\n"
1779
+ f'{{"plan": [{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}]}}\n\n'
1780
+ f"Please create a proper plan as an array of step objects for: {user_input}"
1781
+ )
1782
+ messages.append({"role": "user", "content": error_msg})
1783
+
1784
+ # Continue to next iteration to get corrected plan
1785
+ continue
1786
+
1787
+ # Validate that plan items are dictionaries with required fields
1788
+ invalid_steps = []
1789
+ for i, step in enumerate(parsed["plan"]):
1790
+ if not isinstance(step, dict):
1791
+ invalid_steps.append((i, type(step).__name__, step))
1792
+ elif "tool" not in step or "tool_args" not in step:
1793
+ invalid_steps.append((i, "missing fields", step))
1794
+
1795
+ if invalid_steps:
1796
+ logger.error(f"Invalid plan steps found: {invalid_steps}")
1797
+ self.console.print_error(
1798
+ f"Plan contains {len(invalid_steps)} invalid step(s). Asking for correction..."
1799
+ )
1800
+
1801
+ # Create detailed error message
1802
+ error_details = "\n".join(
1803
+ [
1804
+ f"Step {i+1}: {issue} - {step}"
1805
+ for i, issue, step in invalid_steps[
1806
+ :3
1807
+ ] # Show first 3 errors
1808
+ ]
1809
+ )
1810
+
1811
+ error_msg = (
1812
+ f"ERROR: Your plan contains invalid steps:\n{error_details}\n\n"
1813
+ f"Each step must be a dictionary with 'tool' and 'tool_args' fields:\n"
1814
+ f'{{"tool": "tool_name", "tool_args": {{...}}, "description": "..."}}\n\n'
1815
+ f"Please create a corrected plan for: {user_input}"
1816
+ )
1817
+ messages.append({"role": "user", "content": error_msg})
1818
+
1819
+ # Continue to next iteration to get corrected plan
1820
+ continue
1821
+
1822
+ # Plan is valid - proceed with execution
1823
+ self.current_plan = parsed["plan"]
1824
+ self.current_step = 0
1825
+ self.total_plan_steps = len(self.current_plan)
1826
+ self.execution_state = self.STATE_EXECUTING_PLAN
1827
+ logger.debug(
1828
+ f"New plan created with {self.total_plan_steps} steps: {self.current_plan}"
1829
+ )
1830
+
1831
+ # If the response contains a tool call, execute it
1832
+ if "tool" in parsed and "tool_args" in parsed:
1833
+
1834
+ # Display the current plan with the current step highlighted
1835
+ if self.current_plan:
1836
+ self.console.print_plan(self.current_plan, self.current_step)
1837
+
1838
+ # When both plan and tool are present, prioritize the plan execution
1839
+ # If we have a plan, we should execute from the plan, not the standalone tool call
1840
+ if "plan" in parsed and self.current_plan and self.total_plan_steps > 0:
1841
+ # Skip the standalone tool execution and let the plan execution handle it
1842
+ # The plan execution logic will handle this in the next iteration
1843
+ logger.debug(
1844
+ "Plan and tool both present - deferring to plan execution logic"
1845
+ )
1846
+ continue # Skip tool execution, let plan execution handle it
1847
+
1848
+ # If this was a single-step plan, mark as completed after tool execution
1849
+ if self.total_plan_steps == 1:
1850
+ logger.debug(
1851
+ "Single-step plan will be marked completed after tool execution"
1852
+ )
1853
+ self.execution_state = self.STATE_COMPLETION
1854
+
1855
+ tool_name = parsed["tool"]
1856
+ tool_args = parsed["tool_args"]
1857
+ logger.debug(f"Tool call detected: {tool_name} with args {tool_args}")
1858
+
1859
+ # Display the tool call in real-time
1860
+ self.console.print_tool_usage(tool_name)
1861
+
1862
+ if tool_args:
1863
+ self.console.pretty_print_json(tool_args, "Arguments")
1864
+
1865
+ # Start progress indicator for tool execution
1866
+ self.console.start_progress(f"Executing {tool_name}")
1867
+
1868
+ # Check for repeated tool calls
1869
+ if last_tool_call == (tool_name, str(tool_args)):
1870
+ # Stop progress indicator
1871
+ self.console.stop_progress()
1872
+
1873
+ logger.warning(f"Detected repeated tool call: {tool_name}")
1874
+ # Force a final answer if the same tool is called repeatedly
1875
+ final_answer = (
1876
+ f"Task completed with {tool_name}. No further action needed."
1877
+ )
1878
+
1879
+ self.console.print_repeated_tool_warning()
1880
+ break
1881
+
1882
+ # Execute the tool
1883
+ tool_result = self._execute_tool(tool_name, tool_args)
1884
+
1885
+ # Stop progress indicator
1886
+ self.console.stop_progress()
1887
+
1888
+ # Handle domain-specific post-processing
1889
+ self._post_process_tool_result(tool_name, tool_args, tool_result)
1890
+
1891
+ # Handle large tool results
1892
+ truncated_result = self._handle_large_tool_result(
1893
+ tool_name, tool_result, conversation, tool_args
1894
+ )
1895
+
1896
+ # Display the tool result in real-time (show full result to user)
1897
+ self.console.print_tool_complete()
1898
+
1899
+ self.console.pretty_print_json(tool_result, "Result")
1900
+
1901
+ # Store the truncated output for future context
1902
+ previous_outputs.append(
1903
+ {"tool": tool_name, "args": tool_args, "result": truncated_result}
1904
+ )
1905
+
1906
+ # Share tool output with subsequent LLM calls
1907
+ messages.append(self._create_tool_message(tool_name, truncated_result))
1908
+
1909
+ # Update last tool call
1910
+ last_tool_call = (tool_name, str(tool_args))
1911
+
1912
+ # For single-step plans, we still need to let the LLM process the result
1913
+ # This is especially important for RAG queries where the LLM needs to
1914
+ # synthesize the retrieved information into a coherent answer
1915
+ if (
1916
+ self.execution_state == self.STATE_COMPLETION
1917
+ and self.total_plan_steps == 1
1918
+ ):
1919
+ logger.debug(
1920
+ "Single-step plan execution completed, requesting final answer from LLM"
1921
+ )
1922
+ # Don't break here - let the loop continue so the LLM can process the tool result
1923
+ # The tool result has already been added to messages, so the next iteration
1924
+ # will call the LLM with that result
1925
+
1926
+ # Check if tool execution resulted in an error (support multiple error formats)
1927
+ is_error = isinstance(tool_result, dict) and (
1928
+ tool_result.get("status") == "error"
1929
+ or tool_result.get("success") is False
1930
+ or tool_result.get("has_errors") is True
1931
+ or tool_result.get("return_code", 0) != 0
1932
+ )
1933
+ if is_error:
1934
+ error_count += 1
1935
+ # Prefer error_brief for logging (avoids duplicate formatted output)
1936
+ last_error = (
1937
+ tool_result.get("error_brief")
1938
+ or tool_result.get("error")
1939
+ or tool_result.get("stderr")
1940
+ or tool_result.get("hint")
1941
+ or tool_result.get("suggested_fix")
1942
+ or f"Command failed with return code {tool_result.get('return_code')}"
1943
+ )
1944
+ logger.warning(
1945
+ f"Tool execution error in plan (count: {error_count}): {last_error}"
1946
+ )
1947
+ # Only print if error wasn't already displayed by _execute_tool
1948
+ if not tool_result.get("error_displayed"):
1949
+ self.console.print_error(last_error)
1950
+
1951
+ # Switch to error recovery state
1952
+ self.execution_state = self.STATE_ERROR_RECOVERY
1953
+ self.console.print_state_info(
1954
+ "ERROR RECOVERY: Handling tool execution failure"
1955
+ )
1956
+
1957
+ # Break out of tool execution to trigger error recovery prompt
1958
+ continue
1959
+
1960
+ # Collect and store performance stats for token tracking
1961
+ # Do this BEFORE checking for final answer so stats are always collected
1962
+ perf_stats = response_stats or self.chat.get_stats()
1963
+ if perf_stats:
1964
+ conversation.append(
1965
+ {
1966
+ "role": "system",
1967
+ "content": {
1968
+ "type": "stats",
1969
+ "step": steps_taken,
1970
+ "performance_stats": perf_stats,
1971
+ },
1972
+ }
1973
+ )
1974
+
1975
+ # Check for final answer (after collecting stats)
1976
+ if "answer" in parsed:
1977
+ final_answer = parsed["answer"]
1978
+ self.execution_state = self.STATE_COMPLETION
1979
+ self.console.print_final_answer(final_answer, streaming=self.streaming)
1980
+ break
1981
+
1982
+ # Check if we're at the limit and ask user if they want to continue
1983
+ if steps_taken == steps_limit and final_answer is None:
1984
+ # Show what was accomplished
1985
+ max_steps_msg = self._generate_max_steps_message(
1986
+ conversation, steps_taken, steps_limit
1987
+ )
1988
+ self.console.print_warning(max_steps_msg)
1989
+
1990
+ # Ask user if they want to continue (skip in silent mode OR if stdin is not available)
1991
+ # IMPORTANT: Never call input() in API/CI contexts to avoid blocking threads
1992
+ import sys
1993
+
1994
+ has_stdin = sys.stdin and sys.stdin.isatty()
1995
+ if has_stdin and not (
1996
+ hasattr(self, "silent_mode") and self.silent_mode
1997
+ ):
1998
+ try:
1999
+ response = (
2000
+ input("\nContinue with 50 more steps? (y/n): ")
2001
+ .strip()
2002
+ .lower()
2003
+ )
2004
+ if response in ["y", "yes"]:
2005
+ steps_limit += 50
2006
+ self.console.print_info(
2007
+ f"✓ Continuing with {steps_limit} total steps...\n"
2008
+ )
2009
+ else:
2010
+ self.console.print_info("Stopping at user request.")
2011
+ break
2012
+ except (EOFError, KeyboardInterrupt):
2013
+ self.console.print_info("\nStopping at user request.")
2014
+ break
2015
+ else:
2016
+ # Silent mode - just stop
2017
+ break
2018
+
2019
+ # Print completion message
2020
+ self.console.print_completion(steps_taken, steps_limit)
2021
+
2022
+ # Calculate total duration
2023
+ total_duration = time.time() - start_time
2024
+
2025
+ # Aggregate token counts from conversation stats
2026
+ total_input_tokens = 0
2027
+ total_output_tokens = 0
2028
+ for entry in conversation:
2029
+ if entry.get("role") == "system" and isinstance(entry.get("content"), dict):
2030
+ content = entry["content"]
2031
+ if content.get("type") == "stats" and "performance_stats" in content:
2032
+ stats = content["performance_stats"]
2033
+ if stats.get("input_tokens") is not None:
2034
+ total_input_tokens += stats["input_tokens"]
2035
+ if stats.get("output_tokens") is not None:
2036
+ total_output_tokens += stats["output_tokens"]
2037
+
2038
+ # Return the result
2039
+ has_errors = len(self.error_history) > 0
2040
+ has_valid_answer = (
2041
+ final_answer and final_answer.strip()
2042
+ ) # Check for non-empty answer
2043
+ result = {
2044
+ "status": (
2045
+ "success"
2046
+ if has_valid_answer and not has_errors
2047
+ else ("failed" if has_errors else "incomplete")
2048
+ ),
2049
+ "result": (
2050
+ final_answer
2051
+ if final_answer
2052
+ else self._generate_max_steps_message(
2053
+ conversation, steps_taken, steps_limit
2054
+ )
2055
+ ),
2056
+ "system_prompt": self.system_prompt, # Include system prompt in the result
2057
+ "conversation": conversation,
2058
+ "steps_taken": steps_taken,
2059
+ "duration": total_duration, # Total query processing time in seconds
2060
+ "input_tokens": total_input_tokens, # Total input tokens across all steps
2061
+ "output_tokens": total_output_tokens, # Total output tokens across all steps
2062
+ "total_tokens": total_input_tokens
2063
+ + total_output_tokens, # Combined token count
2064
+ "error_count": len(self.error_history),
2065
+ "error_history": self.error_history, # Include the full error history
2066
+ }
2067
+
2068
+ # Write trace to file if requested
2069
+ if trace:
2070
+ file_path = self._write_json_to_file(result, filename)
2071
+ result["output_file"] = file_path
2072
+
2073
+ logger.debug(f"Query processing complete: {result}")
2074
+
2075
+ # Store the result internally
2076
+ self.last_result = result
2077
+
2078
+ return result
2079
+
2080
+ def _post_process_tool_result(
2081
+ self, _tool_name: str, _tool_args: Dict[str, Any], _tool_result: Dict[str, Any]
2082
+ ) -> None:
2083
+ """
2084
+ Post-process the tool result for domain-specific handling.
2085
+ Override this in subclasses to provide domain-specific behavior.
2086
+
2087
+ Args:
2088
+ _tool_name: Name of the tool that was executed
2089
+ _tool_args: Arguments that were passed to the tool
2090
+ _tool_result: Result returned by the tool
2091
+ """
2092
+ ...
2093
+
2094
+ def display_result(
2095
+ self,
2096
+ title: str = "Result",
2097
+ result: Dict[str, Any] = None,
2098
+ print_result: bool = False,
2099
+ ) -> None:
2100
+ """
2101
+ Display the result and output file path information.
2102
+
2103
+ Args:
2104
+ title: Optional title for the result panel
2105
+ result: Optional result dictionary to display. If None, uses the last stored result.
2106
+ print_result: If True, print the result to the console
2107
+ """
2108
+ # Use the provided result or fall back to the last stored result
2109
+ display_result = result if result is not None else self.last_result
2110
+
2111
+ if display_result is None:
2112
+ self.console.print_warning("No result available to display.")
2113
+ return
2114
+
2115
+ # Print the full result with syntax highlighting
2116
+ if print_result:
2117
+ self.console.pretty_print_json(display_result, title)
2118
+
2119
+ # If there's an output file, display its path after the result
2120
+ if "output_file" in display_result:
2121
+ self.console.print_info(
2122
+ f"Output written to: {display_result['output_file']}"
2123
+ )
2124
+
2125
+ def get_error_history(self) -> List[str]:
2126
+ """
2127
+ Get the history of errors encountered by the agent.
2128
+
2129
+ Returns:
2130
+ List of error messages
2131
+ """
2132
+ return self.error_history