emdash-core 0.1.25__py3-none-any.whl → 0.1.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/agent/__init__.py +4 -0
- emdash_core/agent/agents.py +84 -23
- emdash_core/agent/events.py +42 -20
- emdash_core/agent/hooks.py +419 -0
- emdash_core/agent/inprocess_subagent.py +166 -18
- emdash_core/agent/prompts/__init__.py +4 -3
- emdash_core/agent/prompts/main_agent.py +67 -2
- emdash_core/agent/prompts/plan_mode.py +236 -107
- emdash_core/agent/prompts/subagents.py +103 -23
- emdash_core/agent/prompts/workflow.py +159 -26
- emdash_core/agent/providers/factory.py +2 -2
- emdash_core/agent/providers/openai_provider.py +67 -15
- emdash_core/agent/runner/__init__.py +49 -0
- emdash_core/agent/runner/agent_runner.py +765 -0
- emdash_core/agent/runner/context.py +470 -0
- emdash_core/agent/runner/factory.py +108 -0
- emdash_core/agent/runner/plan.py +217 -0
- emdash_core/agent/runner/sdk_runner.py +324 -0
- emdash_core/agent/runner/utils.py +67 -0
- emdash_core/agent/skills.py +47 -8
- emdash_core/agent/toolkit.py +46 -14
- emdash_core/agent/toolkits/__init__.py +117 -18
- emdash_core/agent/toolkits/base.py +87 -2
- emdash_core/agent/toolkits/explore.py +18 -0
- emdash_core/agent/toolkits/plan.py +27 -11
- emdash_core/agent/tools/__init__.py +2 -2
- emdash_core/agent/tools/coding.py +48 -4
- emdash_core/agent/tools/modes.py +151 -143
- emdash_core/agent/tools/task.py +52 -6
- emdash_core/api/agent.py +706 -1
- emdash_core/ingestion/repository.py +17 -198
- emdash_core/models/agent.py +4 -0
- emdash_core/skills/frontend-design/SKILL.md +56 -0
- emdash_core/sse/stream.py +4 -0
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.37.dist-info}/METADATA +4 -1
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.37.dist-info}/RECORD +38 -30
- emdash_core/agent/runner.py +0 -1123
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.37.dist-info}/WHEEL +0 -0
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.37.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,765 @@
|
|
|
1
|
+
"""Agent runner for LLM-powered exploration.
|
|
2
|
+
|
|
3
|
+
This module contains the main AgentRunner class that orchestrates
|
|
4
|
+
the agent loop, tool execution, and conversation management.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from ...utils.logger import log
|
|
13
|
+
from ...core.exceptions import ContextLengthError
|
|
14
|
+
from ..toolkit import AgentToolkit
|
|
15
|
+
from ..events import AgentEventEmitter, NullEmitter
|
|
16
|
+
from ..providers import get_provider
|
|
17
|
+
from ..providers.factory import DEFAULT_MODEL
|
|
18
|
+
from ..context_manager import (
|
|
19
|
+
truncate_tool_output,
|
|
20
|
+
reduce_context_for_retry,
|
|
21
|
+
is_context_overflow_error,
|
|
22
|
+
)
|
|
23
|
+
from ..prompts import build_system_prompt
|
|
24
|
+
from ..tools.tasks import TaskState
|
|
25
|
+
from ...checkpoint import CheckpointManager
|
|
26
|
+
|
|
27
|
+
from .utils import SafeJSONEncoder, summarize_tool_result
|
|
28
|
+
from .context import (
|
|
29
|
+
estimate_context_tokens,
|
|
30
|
+
get_context_breakdown,
|
|
31
|
+
maybe_compact_context,
|
|
32
|
+
emit_context_frame,
|
|
33
|
+
)
|
|
34
|
+
from .plan import PlanMixin
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AgentRunner(PlanMixin):
|
|
38
|
+
"""Runs an LLM agent with tool access for code exploration.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
runner = AgentRunner()
|
|
42
|
+
response = runner.run("How does authentication work in this codebase?")
|
|
43
|
+
print(response)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
toolkit: Optional[AgentToolkit] = None,
|
|
49
|
+
model: str = DEFAULT_MODEL,
|
|
50
|
+
system_prompt: Optional[str] = None,
|
|
51
|
+
emitter: Optional[AgentEventEmitter] = None,
|
|
52
|
+
max_iterations: int = int(os.getenv("EMDASH_MAX_ITERATIONS", "100")),
|
|
53
|
+
verbose: bool = False,
|
|
54
|
+
show_tool_results: bool = False,
|
|
55
|
+
enable_thinking: Optional[bool] = None,
|
|
56
|
+
checkpoint_manager: Optional[CheckpointManager] = None,
|
|
57
|
+
):
|
|
58
|
+
"""Initialize the agent runner.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
toolkit: AgentToolkit instance. If None, creates default.
|
|
62
|
+
model: LLM model to use.
|
|
63
|
+
system_prompt: Custom system prompt. If None, uses default.
|
|
64
|
+
emitter: Event emitter for streaming output.
|
|
65
|
+
max_iterations: Maximum tool call iterations.
|
|
66
|
+
verbose: Whether to print verbose output.
|
|
67
|
+
show_tool_results: Whether to show detailed tool results.
|
|
68
|
+
enable_thinking: Enable extended thinking. If None, auto-detect from model.
|
|
69
|
+
checkpoint_manager: Optional checkpoint manager for git-based checkpoints.
|
|
70
|
+
"""
|
|
71
|
+
self.toolkit = toolkit or AgentToolkit()
|
|
72
|
+
self.provider = get_provider(model)
|
|
73
|
+
self.model = model
|
|
74
|
+
# Build system prompt
|
|
75
|
+
if system_prompt:
|
|
76
|
+
self.system_prompt = system_prompt
|
|
77
|
+
else:
|
|
78
|
+
self.system_prompt = build_system_prompt(self.toolkit)
|
|
79
|
+
self.emitter = emitter or NullEmitter()
|
|
80
|
+
# Inject emitter into tools that need it (e.g., TaskTool for sub-agent streaming)
|
|
81
|
+
self.toolkit.set_emitter(self.emitter)
|
|
82
|
+
self.max_iterations = max_iterations
|
|
83
|
+
self.verbose = verbose
|
|
84
|
+
self.show_tool_results = show_tool_results
|
|
85
|
+
# Extended thinking support
|
|
86
|
+
if enable_thinking is None:
|
|
87
|
+
# Auto-detect from provider capabilities
|
|
88
|
+
self.enable_thinking = (
|
|
89
|
+
hasattr(self.provider, "supports_thinking")
|
|
90
|
+
and self.provider.supports_thinking()
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
self.enable_thinking = enable_thinking
|
|
94
|
+
# Conversation history for multi-turn support
|
|
95
|
+
self._messages: list[dict] = []
|
|
96
|
+
# Token usage tracking
|
|
97
|
+
self._total_input_tokens: int = 0
|
|
98
|
+
self._total_output_tokens: int = 0
|
|
99
|
+
self._total_thinking_tokens: int = 0
|
|
100
|
+
# Store query for reranking
|
|
101
|
+
self._current_query: str = ""
|
|
102
|
+
# Todo state tracking for injection
|
|
103
|
+
self._last_todo_snapshot: str = ""
|
|
104
|
+
# Checkpoint manager for git-based checkpoints
|
|
105
|
+
self._checkpoint_manager = checkpoint_manager
|
|
106
|
+
# Track tools used during current run (for checkpoint metadata)
|
|
107
|
+
self._tools_used_this_run: set[str] = set()
|
|
108
|
+
# Plan approval state (from PlanMixin)
|
|
109
|
+
self._pending_plan: Optional[dict] = None
|
|
110
|
+
# Callback for autosave after each iteration (set by API layer)
|
|
111
|
+
self._on_iteration_callback: Optional[callable] = None
|
|
112
|
+
|
|
113
|
+
def _get_default_plan_file_path(self) -> str:
|
|
114
|
+
"""Get the default plan file path based on repo root.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Path to the plan file (e.g., .emdash/plan.md)
|
|
118
|
+
"""
|
|
119
|
+
repo_root = self.toolkit._repo_root
|
|
120
|
+
return str(repo_root / ".emdash" / "plan.md")
|
|
121
|
+
|
|
122
|
+
def _get_todo_snapshot(self) -> str:
|
|
123
|
+
"""Get current todo state as string for comparison."""
|
|
124
|
+
state = TaskState.get_instance()
|
|
125
|
+
return json.dumps(state.get_all_tasks(), sort_keys=True)
|
|
126
|
+
|
|
127
|
+
def _format_todo_reminder(self) -> str:
|
|
128
|
+
"""Format current todos as XML reminder for injection into context."""
|
|
129
|
+
state = TaskState.get_instance()
|
|
130
|
+
tasks = state.get_all_tasks()
|
|
131
|
+
if not tasks:
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
counts = {"pending": 0, "in_progress": 0, "completed": 0}
|
|
135
|
+
lines = []
|
|
136
|
+
for t in tasks:
|
|
137
|
+
status = t.get("status", "pending")
|
|
138
|
+
counts[status] = counts.get(status, 0) + 1
|
|
139
|
+
status_icon = {"pending": "⬚", "in_progress": "🔄", "completed": "✅"}.get(status, "?")
|
|
140
|
+
lines.append(f' {t["id"]}. {status_icon} {t["title"]}')
|
|
141
|
+
|
|
142
|
+
header = f'Tasks: {counts["completed"]} completed, {counts["in_progress"]} in progress, {counts["pending"]} pending'
|
|
143
|
+
task_list = "\n".join(lines)
|
|
144
|
+
return f"<todo-state>\n{header}\n{task_list}\n</todo-state>"
|
|
145
|
+
|
|
146
|
+
def _execute_tools_parallel(self, parsed_calls: list) -> list:
|
|
147
|
+
"""Execute multiple tool calls in parallel using a thread pool.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
parsed_calls: List of (tool_call, args) tuples
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of (tool_call, args, result) tuples in original order
|
|
154
|
+
"""
|
|
155
|
+
# Emit tool start events for all calls
|
|
156
|
+
for tool_call, args in parsed_calls:
|
|
157
|
+
self.emitter.emit_tool_start(tool_call.name, args, tool_id=tool_call.id)
|
|
158
|
+
|
|
159
|
+
def execute_one(item):
|
|
160
|
+
tool_call, args = item
|
|
161
|
+
try:
|
|
162
|
+
result = self.toolkit.execute(tool_call.name, **args)
|
|
163
|
+
return (tool_call, args, result)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
log.exception(f"Tool {tool_call.name} failed")
|
|
166
|
+
from ..tools.base import ToolResult
|
|
167
|
+
return (tool_call, args, ToolResult.error_result(str(e)))
|
|
168
|
+
|
|
169
|
+
# Execute in parallel with up to 3 workers
|
|
170
|
+
results: list = [None] * len(parsed_calls)
|
|
171
|
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
172
|
+
futures = {executor.submit(execute_one, item): i for i, item in enumerate(parsed_calls)}
|
|
173
|
+
# Collect results maintaining order
|
|
174
|
+
for future in as_completed(futures):
|
|
175
|
+
idx = futures[future]
|
|
176
|
+
results[idx] = future.result()
|
|
177
|
+
|
|
178
|
+
# Emit tool result events for all calls
|
|
179
|
+
for tool_call, args, result in results:
|
|
180
|
+
self.emitter.emit_tool_result(
|
|
181
|
+
tool_call.name,
|
|
182
|
+
result.success,
|
|
183
|
+
summarize_tool_result(result),
|
|
184
|
+
tool_id=tool_call.id,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
return results
|
|
188
|
+
|
|
189
|
+
def run(
|
|
190
|
+
self,
|
|
191
|
+
query: str,
|
|
192
|
+
context: Optional[str] = None,
|
|
193
|
+
images: Optional[list] = None,
|
|
194
|
+
) -> str:
|
|
195
|
+
"""Run the agent to answer a query.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
query: User's question or request
|
|
199
|
+
context: Optional additional context
|
|
200
|
+
images: Optional list of images to include
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Agent's final response
|
|
204
|
+
"""
|
|
205
|
+
# Store query for reranking context frame
|
|
206
|
+
self._current_query = query
|
|
207
|
+
|
|
208
|
+
# Reset per-cycle mode state (allows exit_plan to be called again)
|
|
209
|
+
from ..tools.modes import ModeState
|
|
210
|
+
ModeState.get_instance().reset_cycle()
|
|
211
|
+
|
|
212
|
+
# Build user message
|
|
213
|
+
if context:
|
|
214
|
+
user_message = {
|
|
215
|
+
"role": "user",
|
|
216
|
+
"content": f"Context:\n{context}\n\nQuestion: {query}",
|
|
217
|
+
}
|
|
218
|
+
else:
|
|
219
|
+
user_message = {
|
|
220
|
+
"role": "user",
|
|
221
|
+
"content": query,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
# Save user message to history BEFORE running (so it's preserved even if interrupted)
|
|
225
|
+
self._messages.append(user_message)
|
|
226
|
+
messages = list(self._messages) # Copy for the loop
|
|
227
|
+
|
|
228
|
+
# TODO: Handle images if provided
|
|
229
|
+
|
|
230
|
+
# Get tool schemas
|
|
231
|
+
tools = self.toolkit.get_all_schemas()
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
response, final_messages = self._run_loop(messages, tools)
|
|
235
|
+
# Update conversation history with full exchange
|
|
236
|
+
self._messages = final_messages
|
|
237
|
+
self.emitter.emit_end(success=True)
|
|
238
|
+
# Create checkpoint if manager is configured
|
|
239
|
+
self._create_checkpoint()
|
|
240
|
+
return response
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
log.exception("Agent run failed")
|
|
244
|
+
self.emitter.emit_error(str(e))
|
|
245
|
+
# Keep user message in history even on error (already appended above)
|
|
246
|
+
return f"Error: {str(e)}"
|
|
247
|
+
|
|
248
|
+
def _run_loop(
|
|
249
|
+
self,
|
|
250
|
+
messages: list[dict],
|
|
251
|
+
tools: list[dict],
|
|
252
|
+
) -> tuple[str, list[dict]]:
|
|
253
|
+
"""Run the agent loop until completion.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
messages: Initial messages
|
|
257
|
+
tools: Tool schemas
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Tuple of (final response text, conversation messages)
|
|
261
|
+
"""
|
|
262
|
+
max_retries = 3
|
|
263
|
+
|
|
264
|
+
for iteration in range(self.max_iterations):
|
|
265
|
+
# When approaching max iterations, ask agent to wrap up
|
|
266
|
+
if iteration == self.max_iterations - 2:
|
|
267
|
+
messages.append({
|
|
268
|
+
"role": "user",
|
|
269
|
+
"content": "[SYSTEM: You are approaching your iteration limit. Please provide your findings and conclusions now, even if incomplete. Summarize what you've learned and any recommendations.]",
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
# Try API call with retry on context overflow
|
|
273
|
+
retry_count = 0
|
|
274
|
+
response = None
|
|
275
|
+
|
|
276
|
+
while retry_count < max_retries:
|
|
277
|
+
try:
|
|
278
|
+
# Proactively compact context if approaching limit
|
|
279
|
+
messages = maybe_compact_context(
|
|
280
|
+
messages, self.provider, self.emitter, self.system_prompt
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
log.debug(
|
|
284
|
+
"Calling LLM iteration={} messages={} tools={}",
|
|
285
|
+
iteration,
|
|
286
|
+
len(messages),
|
|
287
|
+
len(tools) if tools else 0,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
response = self.provider.chat(
|
|
291
|
+
messages=messages,
|
|
292
|
+
system=self.system_prompt,
|
|
293
|
+
tools=tools,
|
|
294
|
+
thinking=self.enable_thinking,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
log.debug("LLM response received iteration={}", iteration)
|
|
298
|
+
break # Success
|
|
299
|
+
|
|
300
|
+
except Exception as exc:
|
|
301
|
+
if is_context_overflow_error(exc):
|
|
302
|
+
retry_count += 1
|
|
303
|
+
log.warning(
|
|
304
|
+
"Context overflow on attempt {}/{}, reducing context...",
|
|
305
|
+
retry_count,
|
|
306
|
+
max_retries,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if retry_count >= max_retries:
|
|
310
|
+
raise ContextLengthError(
|
|
311
|
+
f"Failed to reduce context after {max_retries} attempts: {exc}",
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Reduce context by removing old messages
|
|
315
|
+
messages = reduce_context_for_retry(
|
|
316
|
+
messages,
|
|
317
|
+
keep_recent=max(2, 6 - retry_count * 2), # Fewer messages each retry
|
|
318
|
+
)
|
|
319
|
+
else:
|
|
320
|
+
raise # Re-raise non-context errors
|
|
321
|
+
|
|
322
|
+
if response is None:
|
|
323
|
+
raise RuntimeError("Failed to get response from provider")
|
|
324
|
+
|
|
325
|
+
# Accumulate token usage
|
|
326
|
+
self._total_input_tokens += response.input_tokens
|
|
327
|
+
self._total_output_tokens += response.output_tokens
|
|
328
|
+
self._total_thinking_tokens += getattr(response, "thinking_tokens", 0)
|
|
329
|
+
|
|
330
|
+
# Emit thinking if present
|
|
331
|
+
if response.thinking:
|
|
332
|
+
self.emitter.emit_thinking(response.thinking)
|
|
333
|
+
|
|
334
|
+
# Check for tool calls
|
|
335
|
+
if response.tool_calls:
|
|
336
|
+
# Emit assistant text if present (shown as bullets between tool calls)
|
|
337
|
+
if response.content and response.content.strip():
|
|
338
|
+
self.emitter.emit_assistant_text(response.content)
|
|
339
|
+
|
|
340
|
+
# Track if we need to pause for user input
|
|
341
|
+
needs_user_input = False
|
|
342
|
+
|
|
343
|
+
# Parse all tool call arguments first
|
|
344
|
+
parsed_calls = []
|
|
345
|
+
for tool_call in response.tool_calls:
|
|
346
|
+
args = tool_call.arguments
|
|
347
|
+
if isinstance(args, str):
|
|
348
|
+
args = json.loads(args)
|
|
349
|
+
parsed_calls.append((tool_call, args))
|
|
350
|
+
|
|
351
|
+
# CRITICAL: Check if exit_plan is in the batch - if so, execute it FIRST
|
|
352
|
+
# and skip all other tools. This prevents the agent from continuing to
|
|
353
|
+
# work after submitting a plan.
|
|
354
|
+
exit_plan_idx = None
|
|
355
|
+
for i, (tc, _) in enumerate(parsed_calls):
|
|
356
|
+
if tc.name == "exit_plan":
|
|
357
|
+
exit_plan_idx = i
|
|
358
|
+
break
|
|
359
|
+
|
|
360
|
+
if exit_plan_idx is not None:
|
|
361
|
+
# Execute ONLY exit_plan, skip everything else
|
|
362
|
+
exit_call, exit_args = parsed_calls[exit_plan_idx]
|
|
363
|
+
self.emitter.emit_tool_start(exit_call.name, exit_args, tool_id=exit_call.id)
|
|
364
|
+
exit_result = self.toolkit.execute(exit_call.name, **exit_args)
|
|
365
|
+
self.emitter.emit_tool_result(
|
|
366
|
+
exit_call.name,
|
|
367
|
+
exit_result.success,
|
|
368
|
+
summarize_tool_result(exit_result),
|
|
369
|
+
tool_id=exit_call.id,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Build results list with exit_plan result and skipped placeholders
|
|
373
|
+
results = []
|
|
374
|
+
from ..tools.base import ToolResult
|
|
375
|
+
for i, (tc, args) in enumerate(parsed_calls):
|
|
376
|
+
if i == exit_plan_idx:
|
|
377
|
+
results.append((tc, args, exit_result))
|
|
378
|
+
else:
|
|
379
|
+
# Skip this tool - don't execute it
|
|
380
|
+
log.warning(f"Skipping tool {tc.name} - exit_plan takes priority")
|
|
381
|
+
skip_result = ToolResult.error_result(
|
|
382
|
+
"Tool skipped: exit_plan was called. Agent must stop and wait for user approval."
|
|
383
|
+
)
|
|
384
|
+
results.append((tc, args, skip_result))
|
|
385
|
+
|
|
386
|
+
elif len(parsed_calls) > 1:
|
|
387
|
+
# No exit_plan - execute tools in parallel
|
|
388
|
+
results = self._execute_tools_parallel(parsed_calls)
|
|
389
|
+
else:
|
|
390
|
+
# Single tool - execute directly
|
|
391
|
+
tool_call, args = parsed_calls[0]
|
|
392
|
+
self.emitter.emit_tool_start(tool_call.name, args, tool_id=tool_call.id)
|
|
393
|
+
result = self.toolkit.execute(tool_call.name, **args)
|
|
394
|
+
self.emitter.emit_tool_result(
|
|
395
|
+
tool_call.name,
|
|
396
|
+
result.success,
|
|
397
|
+
summarize_tool_result(result),
|
|
398
|
+
tool_id=tool_call.id,
|
|
399
|
+
)
|
|
400
|
+
results = [(tool_call, args, result)]
|
|
401
|
+
|
|
402
|
+
# Track if we need to rebuild toolkit for mode change
|
|
403
|
+
mode_changed = False
|
|
404
|
+
|
|
405
|
+
# CRITICAL FIX: Add ONE assistant message with ALL tool calls
|
|
406
|
+
# This prevents the LLM from seeing multiple assistant messages
|
|
407
|
+
# which causes it to loop repeating the same tools
|
|
408
|
+
all_tool_calls = []
|
|
409
|
+
for tool_call, args in parsed_calls:
|
|
410
|
+
all_tool_calls.append({
|
|
411
|
+
"id": tool_call.id,
|
|
412
|
+
"type": "function",
|
|
413
|
+
"function": {
|
|
414
|
+
"name": tool_call.name,
|
|
415
|
+
"arguments": json.dumps(args),
|
|
416
|
+
},
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
messages.append({
|
|
420
|
+
"role": "assistant",
|
|
421
|
+
"content": response.content or "",
|
|
422
|
+
"tool_calls": all_tool_calls,
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
# Now process results and add tool result messages
|
|
426
|
+
for tool_call, args, result in results:
|
|
427
|
+
# Track tool for checkpoint metadata
|
|
428
|
+
self._tools_used_this_run.add(tool_call.name)
|
|
429
|
+
|
|
430
|
+
# Check if tool is asking a clarification question
|
|
431
|
+
if (result.success and
|
|
432
|
+
result.data and
|
|
433
|
+
result.data.get("status") == "awaiting_response" and
|
|
434
|
+
"question" in result.data):
|
|
435
|
+
self.emitter.emit_clarification(
|
|
436
|
+
question=result.data["question"],
|
|
437
|
+
context="",
|
|
438
|
+
options=result.data.get("options", []),
|
|
439
|
+
)
|
|
440
|
+
needs_user_input = True
|
|
441
|
+
|
|
442
|
+
# Check if agent entered plan mode
|
|
443
|
+
if (result.success and
|
|
444
|
+
result.data and
|
|
445
|
+
result.data.get("status") == "entered_plan_mode"):
|
|
446
|
+
mode_changed = True
|
|
447
|
+
# Get plan file path
|
|
448
|
+
plan_file_path = self._get_default_plan_file_path()
|
|
449
|
+
from ..tools.modes import ModeState
|
|
450
|
+
ModeState.get_instance().set_plan_file_path(plan_file_path)
|
|
451
|
+
# Rebuild toolkit with plan_mode=True
|
|
452
|
+
self.toolkit = AgentToolkit(
|
|
453
|
+
connection=self.toolkit.connection,
|
|
454
|
+
repo_root=self.toolkit._repo_root,
|
|
455
|
+
plan_mode=True,
|
|
456
|
+
plan_file_path=plan_file_path,
|
|
457
|
+
)
|
|
458
|
+
self.toolkit.set_emitter(self.emitter)
|
|
459
|
+
# Main agent uses normal prompt - delegates to Plan subagent
|
|
460
|
+
self.system_prompt = build_system_prompt(self.toolkit)
|
|
461
|
+
# Update tools for LLM
|
|
462
|
+
tools = self.toolkit.get_all_schemas()
|
|
463
|
+
|
|
464
|
+
# Check if agent requested to enter plan mode (enter_plan_mode)
|
|
465
|
+
if (result.success and
|
|
466
|
+
result.data and
|
|
467
|
+
result.data.get("status") == "plan_mode_requested"):
|
|
468
|
+
# Emit event for UI to show approval dialog
|
|
469
|
+
self.emitter.emit_plan_mode_requested(
|
|
470
|
+
reason=result.data.get("reason", ""),
|
|
471
|
+
)
|
|
472
|
+
# Pause and wait for user approval
|
|
473
|
+
needs_user_input = True
|
|
474
|
+
|
|
475
|
+
# Check if tool is submitting a plan for approval (exit_plan)
|
|
476
|
+
if (result.success and
|
|
477
|
+
result.data and
|
|
478
|
+
result.data.get("status") == "plan_submitted"):
|
|
479
|
+
# Store the pending plan (simple string)
|
|
480
|
+
self._pending_plan = {
|
|
481
|
+
"plan": result.data.get("plan", ""),
|
|
482
|
+
}
|
|
483
|
+
self.emitter.emit_plan_submitted(
|
|
484
|
+
plan=self._pending_plan["plan"],
|
|
485
|
+
)
|
|
486
|
+
# Pause and wait for approval (similar to clarification flow)
|
|
487
|
+
needs_user_input = True
|
|
488
|
+
|
|
489
|
+
# Serialize and truncate tool result to prevent context overflow
|
|
490
|
+
result_json = json.dumps(result.to_dict(), cls=SafeJSONEncoder)
|
|
491
|
+
result_json = truncate_tool_output(result_json)
|
|
492
|
+
|
|
493
|
+
# Check if todos changed and inject reminder
|
|
494
|
+
if tool_call.name in ("write_todo", "update_todo_list"):
|
|
495
|
+
new_snapshot = self._get_todo_snapshot()
|
|
496
|
+
if new_snapshot != self._last_todo_snapshot:
|
|
497
|
+
self._last_todo_snapshot = new_snapshot
|
|
498
|
+
reminder = self._format_todo_reminder()
|
|
499
|
+
if reminder:
|
|
500
|
+
result_json += f"\n\n{reminder}"
|
|
501
|
+
|
|
502
|
+
# Add tool result
|
|
503
|
+
messages.append({
|
|
504
|
+
"role": "tool",
|
|
505
|
+
"tool_call_id": tool_call.id,
|
|
506
|
+
"content": result_json,
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
# Emit context frame after each iteration (for autosave and UI updates)
|
|
510
|
+
self._emit_context_frame(messages)
|
|
511
|
+
|
|
512
|
+
# If a clarification question was asked, pause and wait for user input
|
|
513
|
+
if needs_user_input:
|
|
514
|
+
log.debug("Pausing agent loop - waiting for user input")
|
|
515
|
+
return "", messages
|
|
516
|
+
|
|
517
|
+
else:
|
|
518
|
+
# No tool calls - check if response was truncated
|
|
519
|
+
if response.stop_reason in ("max_tokens", "length"):
|
|
520
|
+
# Response was truncated, request continuation
|
|
521
|
+
log.debug("Response truncated ({}), requesting continuation", response.stop_reason)
|
|
522
|
+
if response.content:
|
|
523
|
+
messages.append({
|
|
524
|
+
"role": "assistant",
|
|
525
|
+
"content": response.content,
|
|
526
|
+
})
|
|
527
|
+
messages.append({
|
|
528
|
+
"role": "user",
|
|
529
|
+
"content": "Your response was cut off. Please continue.",
|
|
530
|
+
})
|
|
531
|
+
continue
|
|
532
|
+
|
|
533
|
+
# PLAN MODE ENFORCEMENT: In plan mode, reject text-only responses
|
|
534
|
+
# Force the model to use tools (task, ask_followup_question, exit_plan)
|
|
535
|
+
if self.toolkit.plan_mode:
|
|
536
|
+
log.warning("Plan mode: Agent output text without tools, forcing tool usage")
|
|
537
|
+
if response.content:
|
|
538
|
+
messages.append({
|
|
539
|
+
"role": "assistant",
|
|
540
|
+
"content": response.content,
|
|
541
|
+
})
|
|
542
|
+
messages.append({
|
|
543
|
+
"role": "user",
|
|
544
|
+
"content": """[SYSTEM ERROR] You are in plan mode but did not use any tools.
|
|
545
|
+
|
|
546
|
+
In plan mode you MUST use tools - text-only responses are not allowed.
|
|
547
|
+
|
|
548
|
+
YOUR REQUIRED ACTION NOW:
|
|
549
|
+
Use your exploration tools directly to investigate the codebase:
|
|
550
|
+
|
|
551
|
+
- glob(pattern="**/*.py") - Find files by pattern
|
|
552
|
+
- grep(pattern="class User", path="src/") - Search file contents
|
|
553
|
+
- read_file(path="path/to/file.py") - Read specific files
|
|
554
|
+
- semantic_search(query="authentication") - Find conceptually related code
|
|
555
|
+
|
|
556
|
+
You ARE the planner. Use tools to explore, then write your plan and call exit_plan.
|
|
557
|
+
|
|
558
|
+
DO NOT output more text. Use a tool NOW.""",
|
|
559
|
+
})
|
|
560
|
+
continue # Force another iteration with tool usage
|
|
561
|
+
|
|
562
|
+
# Agent is done - emit final response
|
|
563
|
+
if response.content:
|
|
564
|
+
self.emitter.emit_message_start()
|
|
565
|
+
self.emitter.emit_message_delta(response.content)
|
|
566
|
+
self.emitter.emit_message_end()
|
|
567
|
+
# Add final assistant message to history
|
|
568
|
+
messages.append({
|
|
569
|
+
"role": "assistant",
|
|
570
|
+
"content": response.content,
|
|
571
|
+
})
|
|
572
|
+
# Emit final context frame summary
|
|
573
|
+
self._emit_context_frame(messages)
|
|
574
|
+
return response.content, messages
|
|
575
|
+
|
|
576
|
+
# Agent finished without providing a summary - request one
|
|
577
|
+
log.debug("Agent finished without summary, requesting completion message")
|
|
578
|
+
try:
|
|
579
|
+
summary_response = self.provider.chat(
|
|
580
|
+
messages=messages + [{
|
|
581
|
+
"role": "user",
|
|
582
|
+
"content": "[SYSTEM: You have completed your task. Please provide a brief summary of what you accomplished.]",
|
|
583
|
+
}],
|
|
584
|
+
system=self.system_prompt,
|
|
585
|
+
tools=None, # No tools - force text response
|
|
586
|
+
thinking=self.enable_thinking,
|
|
587
|
+
)
|
|
588
|
+
if summary_response.content:
|
|
589
|
+
self.emitter.emit_message_start()
|
|
590
|
+
self.emitter.emit_message_delta(summary_response.content)
|
|
591
|
+
self.emitter.emit_message_end()
|
|
592
|
+
self._emit_context_frame(messages)
|
|
593
|
+
return summary_response.content, messages
|
|
594
|
+
except Exception as e:
|
|
595
|
+
log.warning(f"Failed to get completion summary: {e}")
|
|
596
|
+
|
|
597
|
+
# Fallback if summary request fails
|
|
598
|
+
fallback_message = "Task completed."
|
|
599
|
+
self.emitter.emit_message_start()
|
|
600
|
+
self.emitter.emit_message_delta(fallback_message)
|
|
601
|
+
self.emitter.emit_message_end()
|
|
602
|
+
self._emit_context_frame(messages)
|
|
603
|
+
return fallback_message, messages
|
|
604
|
+
|
|
605
|
+
# Hit max iterations - try one final request without tools to force a response
|
|
606
|
+
try:
|
|
607
|
+
final_response = self.provider.chat(
|
|
608
|
+
messages=messages + [{
|
|
609
|
+
"role": "user",
|
|
610
|
+
"content": "[SYSTEM: Maximum iterations reached. Provide your final response now with whatever information you have gathered. Do not use any tools.]",
|
|
611
|
+
}],
|
|
612
|
+
system=self.system_prompt,
|
|
613
|
+
tools=None, # No tools - force text response
|
|
614
|
+
thinking=self.enable_thinking,
|
|
615
|
+
)
|
|
616
|
+
# Emit thinking if present
|
|
617
|
+
if final_response.thinking:
|
|
618
|
+
self.emitter.emit_thinking(final_response.thinking)
|
|
619
|
+
if final_response.content:
|
|
620
|
+
self.emitter.emit_message_start()
|
|
621
|
+
self.emitter.emit_message_delta(final_response.content)
|
|
622
|
+
self.emitter.emit_message_end()
|
|
623
|
+
self._emit_context_frame(messages)
|
|
624
|
+
return final_response.content, messages
|
|
625
|
+
except Exception as e:
|
|
626
|
+
log.warning(f"Failed to get final response: {e}")
|
|
627
|
+
|
|
628
|
+
# Fallback message if final response fails
|
|
629
|
+
final_message = "Reached maximum iterations. The agent was unable to complete the task within the allowed iterations."
|
|
630
|
+
self.emitter.emit_message_start()
|
|
631
|
+
self.emitter.emit_message_delta(final_message)
|
|
632
|
+
self.emitter.emit_message_end()
|
|
633
|
+
self._emit_context_frame(messages)
|
|
634
|
+
return final_message, messages
|
|
635
|
+
|
|
636
|
+
def _emit_context_frame(self, messages: list[dict] | None = None) -> None:
|
|
637
|
+
"""Emit a context frame event with current exploration state.
|
|
638
|
+
|
|
639
|
+
Args:
|
|
640
|
+
messages: Current conversation messages to estimate context size
|
|
641
|
+
"""
|
|
642
|
+
emit_context_frame(
|
|
643
|
+
toolkit=self.toolkit,
|
|
644
|
+
emitter=self.emitter,
|
|
645
|
+
messages=messages or [],
|
|
646
|
+
system_prompt=self.system_prompt,
|
|
647
|
+
current_query=self._current_query,
|
|
648
|
+
total_input_tokens=self._total_input_tokens,
|
|
649
|
+
total_output_tokens=self._total_output_tokens,
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
# Call iteration callback for autosave if set
|
|
653
|
+
if self._on_iteration_callback and messages:
|
|
654
|
+
try:
|
|
655
|
+
self._on_iteration_callback(messages)
|
|
656
|
+
except Exception as e:
|
|
657
|
+
log.debug(f"Iteration callback failed: {e}")
|
|
658
|
+
|
|
659
|
+
def chat(self, message: str, images: Optional[list] = None) -> str:
|
|
660
|
+
"""Continue a conversation with a new message.
|
|
661
|
+
|
|
662
|
+
This method maintains conversation history for multi-turn interactions.
|
|
663
|
+
Call run() first to start a conversation, then chat() for follow-ups.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
message: User's follow-up message
|
|
667
|
+
images: Optional list of images to include
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
Agent's response
|
|
671
|
+
"""
|
|
672
|
+
if not self._messages:
|
|
673
|
+
# No history, just run fresh
|
|
674
|
+
return self.run(message, images=images)
|
|
675
|
+
|
|
676
|
+
# Store query for reranking context frame
|
|
677
|
+
self._current_query = message
|
|
678
|
+
|
|
679
|
+
# Add new user message to history
|
|
680
|
+
self._messages.append({
|
|
681
|
+
"role": "user",
|
|
682
|
+
"content": message,
|
|
683
|
+
})
|
|
684
|
+
|
|
685
|
+
# Get tool schemas
|
|
686
|
+
tools = self.toolkit.get_all_schemas()
|
|
687
|
+
|
|
688
|
+
try:
|
|
689
|
+
response, final_messages = self._run_loop(self._messages, tools)
|
|
690
|
+
# Update conversation history
|
|
691
|
+
self._messages = final_messages
|
|
692
|
+
self.emitter.emit_end(success=True)
|
|
693
|
+
# Create checkpoint if manager is configured
|
|
694
|
+
self._create_checkpoint()
|
|
695
|
+
return response
|
|
696
|
+
|
|
697
|
+
except Exception as e:
|
|
698
|
+
log.exception("Agent chat failed")
|
|
699
|
+
self.emitter.emit_error(str(e))
|
|
700
|
+
return f"Error: {str(e)}"
|
|
701
|
+
|
|
702
|
+
def _create_checkpoint(self) -> None:
|
|
703
|
+
"""Create a git checkpoint after successful run.
|
|
704
|
+
|
|
705
|
+
Only creates a checkpoint if:
|
|
706
|
+
- A checkpoint manager is configured
|
|
707
|
+
- There are file changes to commit
|
|
708
|
+
"""
|
|
709
|
+
if not self._checkpoint_manager:
|
|
710
|
+
return
|
|
711
|
+
|
|
712
|
+
try:
|
|
713
|
+
self._checkpoint_manager.create_checkpoint(
|
|
714
|
+
messages=self._messages,
|
|
715
|
+
model=self.model,
|
|
716
|
+
system_prompt=self.system_prompt,
|
|
717
|
+
tools_used=list(self._tools_used_this_run),
|
|
718
|
+
token_usage={
|
|
719
|
+
"input": self._total_input_tokens,
|
|
720
|
+
"output": self._total_output_tokens,
|
|
721
|
+
"thinking": self._total_thinking_tokens,
|
|
722
|
+
},
|
|
723
|
+
)
|
|
724
|
+
except Exception as e:
|
|
725
|
+
log.warning(f"Failed to create checkpoint: {e}")
|
|
726
|
+
finally:
|
|
727
|
+
# Clear tools for next run
|
|
728
|
+
self._tools_used_this_run.clear()
|
|
729
|
+
|
|
730
|
+
def reset(self) -> None:
|
|
731
|
+
"""Reset the agent state."""
|
|
732
|
+
self.toolkit.reset_session()
|
|
733
|
+
self._total_input_tokens = 0
|
|
734
|
+
self._total_output_tokens = 0
|
|
735
|
+
self._current_query = ""
|
|
736
|
+
|
|
737
|
+
def answer_clarification(self, answer: str) -> str:
|
|
738
|
+
"""Answer a pending clarification question and resume the agent.
|
|
739
|
+
|
|
740
|
+
This method is called when the user responds to a clarification question
|
|
741
|
+
asked via ask_followup_question tool. It clears the pending question state
|
|
742
|
+
and resumes the agent loop with the user's answer.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
answer: The user's answer to the clarification question
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Agent's response after processing the answer
|
|
749
|
+
"""
|
|
750
|
+
# Get current task state and clear the pending question
|
|
751
|
+
state = TaskState.get_instance()
|
|
752
|
+
pending_question = state.pending_question
|
|
753
|
+
|
|
754
|
+
# Clear the pending question state
|
|
755
|
+
state.pending_question = None
|
|
756
|
+
state.user_response = answer
|
|
757
|
+
|
|
758
|
+
# Build a context message that indicates this is the answer to the question
|
|
759
|
+
if pending_question:
|
|
760
|
+
context_message = f"[User answered the clarification question]\nQuestion: {pending_question}\nAnswer: {answer}"
|
|
761
|
+
else:
|
|
762
|
+
context_message = f"[User response]: {answer}"
|
|
763
|
+
|
|
764
|
+
# Continue the conversation with the answer
|
|
765
|
+
return self.chat(context_message)
|