emdash-core 0.1.25__py3-none-any.whl → 0.1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/agent/__init__.py +4 -0
- emdash_core/agent/events.py +42 -20
- emdash_core/agent/inprocess_subagent.py +123 -10
- emdash_core/agent/prompts/__init__.py +4 -3
- emdash_core/agent/prompts/main_agent.py +32 -2
- emdash_core/agent/prompts/plan_mode.py +236 -107
- emdash_core/agent/prompts/subagents.py +79 -15
- emdash_core/agent/prompts/workflow.py +145 -26
- emdash_core/agent/providers/factory.py +2 -2
- emdash_core/agent/providers/openai_provider.py +67 -15
- emdash_core/agent/runner/__init__.py +49 -0
- emdash_core/agent/runner/agent_runner.py +753 -0
- emdash_core/agent/runner/context.py +451 -0
- emdash_core/agent/runner/factory.py +108 -0
- emdash_core/agent/runner/plan.py +217 -0
- emdash_core/agent/runner/sdk_runner.py +324 -0
- emdash_core/agent/runner/utils.py +67 -0
- emdash_core/agent/skills.py +47 -8
- emdash_core/agent/toolkit.py +46 -14
- emdash_core/agent/toolkits/plan.py +9 -11
- emdash_core/agent/tools/__init__.py +2 -2
- emdash_core/agent/tools/coding.py +48 -4
- emdash_core/agent/tools/modes.py +151 -143
- emdash_core/agent/tools/task.py +41 -2
- emdash_core/api/agent.py +555 -1
- emdash_core/skills/frontend-design/SKILL.md +56 -0
- emdash_core/sse/stream.py +4 -0
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/METADATA +2 -1
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/RECORD +31 -24
- emdash_core/agent/runner.py +0 -1123
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/WHEEL +0 -0
- {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/entry_points.txt +0 -0
emdash_core/agent/runner.py
DELETED
|
@@ -1,1123 +0,0 @@
|
|
|
1
|
-
"""Agent runner for LLM-powered exploration."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
-
from datetime import datetime, date
|
|
7
|
-
from typing import Any, Optional
|
|
8
|
-
|
|
9
|
-
from ..utils.logger import log
|
|
10
|
-
from ..core.config import get_config
|
|
11
|
-
from ..core.exceptions import ContextLengthError
|
|
12
|
-
from .toolkit import AgentToolkit
|
|
13
|
-
from .events import AgentEventEmitter, NullEmitter
|
|
14
|
-
from .providers import get_provider
|
|
15
|
-
from .providers.factory import DEFAULT_MODEL
|
|
16
|
-
from .context_manager import (
|
|
17
|
-
truncate_tool_output,
|
|
18
|
-
reduce_context_for_retry,
|
|
19
|
-
is_context_overflow_error,
|
|
20
|
-
)
|
|
21
|
-
from .prompts import BASE_SYSTEM_PROMPT, build_system_prompt
|
|
22
|
-
from .tools.tasks import TaskState
|
|
23
|
-
from ..checkpoint import CheckpointManager
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class SafeJSONEncoder(json.JSONEncoder):
|
|
27
|
-
"""JSON encoder that handles Neo4j types and other non-serializable objects."""
|
|
28
|
-
|
|
29
|
-
def default(self, obj: Any) -> Any:
|
|
30
|
-
# Handle datetime objects
|
|
31
|
-
if isinstance(obj, (datetime, date)):
|
|
32
|
-
return obj.isoformat()
|
|
33
|
-
|
|
34
|
-
# Handle Neo4j DateTime
|
|
35
|
-
if hasattr(obj, 'isoformat'):
|
|
36
|
-
return obj.isoformat()
|
|
37
|
-
|
|
38
|
-
# Handle Neo4j Date, Time, etc.
|
|
39
|
-
if hasattr(obj, 'to_native'):
|
|
40
|
-
return str(obj.to_native())
|
|
41
|
-
|
|
42
|
-
# Handle sets
|
|
43
|
-
if isinstance(obj, set):
|
|
44
|
-
return list(obj)
|
|
45
|
-
|
|
46
|
-
# Handle bytes
|
|
47
|
-
if isinstance(obj, bytes):
|
|
48
|
-
return obj.decode('utf-8', errors='replace')
|
|
49
|
-
|
|
50
|
-
# Fallback to string representation
|
|
51
|
-
try:
|
|
52
|
-
return str(obj)
|
|
53
|
-
except Exception:
|
|
54
|
-
return f"<non-serializable: {type(obj).__name__}>"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class AgentRunner:
|
|
58
|
-
"""Runs an LLM agent with tool access for code exploration.
|
|
59
|
-
|
|
60
|
-
Example:
|
|
61
|
-
runner = AgentRunner()
|
|
62
|
-
response = runner.run("How does authentication work in this codebase?")
|
|
63
|
-
print(response)
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
def __init__(
|
|
67
|
-
self,
|
|
68
|
-
toolkit: Optional[AgentToolkit] = None,
|
|
69
|
-
model: str = DEFAULT_MODEL,
|
|
70
|
-
system_prompt: Optional[str] = None,
|
|
71
|
-
emitter: Optional[AgentEventEmitter] = None,
|
|
72
|
-
max_iterations: int = int(os.getenv("EMDASH_MAX_ITERATIONS", "100")),
|
|
73
|
-
verbose: bool = False,
|
|
74
|
-
show_tool_results: bool = False,
|
|
75
|
-
enable_thinking: Optional[bool] = None,
|
|
76
|
-
checkpoint_manager: Optional[CheckpointManager] = None,
|
|
77
|
-
):
|
|
78
|
-
"""Initialize the agent runner.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
toolkit: AgentToolkit instance. If None, creates default.
|
|
82
|
-
model: LLM model to use.
|
|
83
|
-
system_prompt: Custom system prompt. If None, uses default.
|
|
84
|
-
emitter: Event emitter for streaming output.
|
|
85
|
-
max_iterations: Maximum tool call iterations.
|
|
86
|
-
verbose: Whether to print verbose output.
|
|
87
|
-
show_tool_results: Whether to show detailed tool results.
|
|
88
|
-
enable_thinking: Enable extended thinking. If None, auto-detect from model.
|
|
89
|
-
checkpoint_manager: Optional checkpoint manager for git-based checkpoints.
|
|
90
|
-
"""
|
|
91
|
-
self.toolkit = toolkit or AgentToolkit()
|
|
92
|
-
self.provider = get_provider(model)
|
|
93
|
-
self.model = model
|
|
94
|
-
# Build system prompt, prepending plan mode prompt if in plan mode
|
|
95
|
-
if system_prompt:
|
|
96
|
-
self.system_prompt = system_prompt
|
|
97
|
-
elif self.toolkit.plan_mode:
|
|
98
|
-
from .prompts.plan_mode import PLAN_MODE_PROMPT
|
|
99
|
-
self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
|
|
100
|
-
else:
|
|
101
|
-
self.system_prompt = build_system_prompt(self.toolkit)
|
|
102
|
-
self.emitter = emitter or NullEmitter()
|
|
103
|
-
# Inject emitter into tools that need it (e.g., TaskTool for sub-agent streaming)
|
|
104
|
-
self.toolkit.set_emitter(self.emitter)
|
|
105
|
-
self.max_iterations = max_iterations
|
|
106
|
-
self.verbose = verbose
|
|
107
|
-
self.show_tool_results = show_tool_results
|
|
108
|
-
# Extended thinking support
|
|
109
|
-
if enable_thinking is None:
|
|
110
|
-
# Auto-detect from provider capabilities
|
|
111
|
-
self.enable_thinking = (
|
|
112
|
-
hasattr(self.provider, "supports_thinking")
|
|
113
|
-
and self.provider.supports_thinking()
|
|
114
|
-
)
|
|
115
|
-
else:
|
|
116
|
-
self.enable_thinking = enable_thinking
|
|
117
|
-
# Conversation history for multi-turn support
|
|
118
|
-
self._messages: list[dict] = []
|
|
119
|
-
# Token usage tracking
|
|
120
|
-
self._total_input_tokens: int = 0
|
|
121
|
-
self._total_output_tokens: int = 0
|
|
122
|
-
self._total_thinking_tokens: int = 0
|
|
123
|
-
# Store query for reranking
|
|
124
|
-
self._current_query: str = ""
|
|
125
|
-
# Todo state tracking for injection
|
|
126
|
-
self._last_todo_snapshot: str = ""
|
|
127
|
-
# Checkpoint manager for git-based checkpoints
|
|
128
|
-
self._checkpoint_manager = checkpoint_manager
|
|
129
|
-
# Track tools used during current run (for checkpoint metadata)
|
|
130
|
-
self._tools_used_this_run: set[str] = set()
|
|
131
|
-
# Plan approval state
|
|
132
|
-
self._pending_plan: Optional[dict] = None # Stores submitted plan awaiting approval
|
|
133
|
-
|
|
134
|
-
def _get_todo_snapshot(self) -> str:
|
|
135
|
-
"""Get current todo state as string for comparison."""
|
|
136
|
-
state = TaskState.get_instance()
|
|
137
|
-
return json.dumps(state.get_all_tasks(), sort_keys=True)
|
|
138
|
-
|
|
139
|
-
def _format_todo_reminder(self) -> str:
|
|
140
|
-
"""Format current todos as XML reminder for injection into context."""
|
|
141
|
-
state = TaskState.get_instance()
|
|
142
|
-
tasks = state.get_all_tasks()
|
|
143
|
-
if not tasks:
|
|
144
|
-
return ""
|
|
145
|
-
|
|
146
|
-
counts = {"pending": 0, "in_progress": 0, "completed": 0}
|
|
147
|
-
lines = []
|
|
148
|
-
for t in tasks:
|
|
149
|
-
status = t.get("status", "pending")
|
|
150
|
-
counts[status] = counts.get(status, 0) + 1
|
|
151
|
-
status_icon = {"pending": "⬚", "in_progress": "🔄", "completed": "✅"}.get(status, "?")
|
|
152
|
-
lines.append(f' {t["id"]}. {status_icon} {t["title"]}')
|
|
153
|
-
|
|
154
|
-
header = f'Tasks: {counts["completed"]} completed, {counts["in_progress"]} in progress, {counts["pending"]} pending'
|
|
155
|
-
task_list = "\n".join(lines)
|
|
156
|
-
return f"<todo-state>\n{header}\n{task_list}\n</todo-state>"
|
|
157
|
-
|
|
158
|
-
def _execute_tools_parallel(self, parsed_calls: list) -> list:
|
|
159
|
-
"""Execute multiple tool calls in parallel using a thread pool.
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
parsed_calls: List of (tool_call, args) tuples
|
|
163
|
-
|
|
164
|
-
Returns:
|
|
165
|
-
List of (tool_call, args, result) tuples in original order
|
|
166
|
-
"""
|
|
167
|
-
# Emit tool start events for all calls
|
|
168
|
-
for tool_call, args in parsed_calls:
|
|
169
|
-
self.emitter.emit_tool_start(tool_call.name, args)
|
|
170
|
-
|
|
171
|
-
def execute_one(item):
|
|
172
|
-
tool_call, args = item
|
|
173
|
-
try:
|
|
174
|
-
result = self.toolkit.execute(tool_call.name, **args)
|
|
175
|
-
return (tool_call, args, result)
|
|
176
|
-
except Exception as e:
|
|
177
|
-
log.exception(f"Tool {tool_call.name} failed")
|
|
178
|
-
from .tools.base import ToolResult
|
|
179
|
-
return (tool_call, args, ToolResult.error_result(str(e)))
|
|
180
|
-
|
|
181
|
-
# Execute in parallel with up to 3 workers
|
|
182
|
-
results: list = [None] * len(parsed_calls)
|
|
183
|
-
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
184
|
-
futures = {executor.submit(execute_one, item): i for i, item in enumerate(parsed_calls)}
|
|
185
|
-
# Collect results maintaining order
|
|
186
|
-
for future in as_completed(futures):
|
|
187
|
-
idx = futures[future]
|
|
188
|
-
results[idx] = future.result()
|
|
189
|
-
|
|
190
|
-
# Emit tool result events for all calls
|
|
191
|
-
for tool_call, args, result in results:
|
|
192
|
-
self.emitter.emit_tool_result(
|
|
193
|
-
tool_call.name,
|
|
194
|
-
result.success,
|
|
195
|
-
self._summarize_result(result),
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
return results
|
|
199
|
-
|
|
200
|
-
def run(
|
|
201
|
-
self,
|
|
202
|
-
query: str,
|
|
203
|
-
context: Optional[str] = None,
|
|
204
|
-
images: Optional[list] = None,
|
|
205
|
-
) -> str:
|
|
206
|
-
"""Run the agent to answer a query.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
query: User's question or request
|
|
210
|
-
context: Optional additional context
|
|
211
|
-
images: Optional list of images to include
|
|
212
|
-
|
|
213
|
-
Returns:
|
|
214
|
-
Agent's final response
|
|
215
|
-
"""
|
|
216
|
-
# Store query for reranking context frame
|
|
217
|
-
self._current_query = query
|
|
218
|
-
|
|
219
|
-
# Build user message
|
|
220
|
-
if context:
|
|
221
|
-
user_message = {
|
|
222
|
-
"role": "user",
|
|
223
|
-
"content": f"Context:\n{context}\n\nQuestion: {query}",
|
|
224
|
-
}
|
|
225
|
-
else:
|
|
226
|
-
user_message = {
|
|
227
|
-
"role": "user",
|
|
228
|
-
"content": query,
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
# Save user message to history BEFORE running (so it's preserved even if interrupted)
|
|
232
|
-
self._messages.append(user_message)
|
|
233
|
-
messages = list(self._messages) # Copy for the loop
|
|
234
|
-
|
|
235
|
-
# TODO: Handle images if provided
|
|
236
|
-
|
|
237
|
-
# Get tool schemas
|
|
238
|
-
tools = self.toolkit.get_all_schemas()
|
|
239
|
-
|
|
240
|
-
try:
|
|
241
|
-
response, final_messages = self._run_loop(messages, tools)
|
|
242
|
-
# Update conversation history with full exchange
|
|
243
|
-
self._messages = final_messages
|
|
244
|
-
self.emitter.emit_end(success=True)
|
|
245
|
-
# Create checkpoint if manager is configured
|
|
246
|
-
self._create_checkpoint()
|
|
247
|
-
return response
|
|
248
|
-
|
|
249
|
-
except Exception as e:
|
|
250
|
-
log.exception("Agent run failed")
|
|
251
|
-
self.emitter.emit_error(str(e))
|
|
252
|
-
# Keep user message in history even on error (already appended above)
|
|
253
|
-
return f"Error: {str(e)}"
|
|
254
|
-
|
|
255
|
-
def has_pending_plan(self) -> bool:
|
|
256
|
-
"""Check if there's a plan awaiting approval.
|
|
257
|
-
|
|
258
|
-
Returns:
|
|
259
|
-
True if a plan has been submitted and is awaiting approval.
|
|
260
|
-
"""
|
|
261
|
-
return self._pending_plan is not None
|
|
262
|
-
|
|
263
|
-
def get_pending_plan(self) -> Optional[dict]:
|
|
264
|
-
"""Get the pending plan if one exists.
|
|
265
|
-
|
|
266
|
-
Returns:
|
|
267
|
-
The pending plan dict, or None if no plan is pending.
|
|
268
|
-
"""
|
|
269
|
-
return self._pending_plan
|
|
270
|
-
|
|
271
|
-
def approve_plan(self) -> str:
|
|
272
|
-
"""Approve the pending plan and transition back to code mode.
|
|
273
|
-
|
|
274
|
-
This method should be called after the user approves a submitted plan.
|
|
275
|
-
It transitions the agent from plan mode back to code mode, allowing
|
|
276
|
-
it to implement the approved plan.
|
|
277
|
-
|
|
278
|
-
Returns:
|
|
279
|
-
The agent's response after transitioning to code mode.
|
|
280
|
-
"""
|
|
281
|
-
if not self._pending_plan:
|
|
282
|
-
return "No pending plan to approve."
|
|
283
|
-
|
|
284
|
-
plan = self._pending_plan
|
|
285
|
-
self._pending_plan = None # Clear pending plan
|
|
286
|
-
|
|
287
|
-
# Reset ModeState singleton to code mode
|
|
288
|
-
from .tools.modes import ModeState, AgentMode
|
|
289
|
-
state = ModeState.get_instance()
|
|
290
|
-
state.current_mode = AgentMode.CODE
|
|
291
|
-
state.plan_content = plan.get("summary", "")
|
|
292
|
-
|
|
293
|
-
# Rebuild toolkit with plan_mode=False (code mode)
|
|
294
|
-
self.toolkit = AgentToolkit(
|
|
295
|
-
connection=self.toolkit.connection,
|
|
296
|
-
repo_root=self.toolkit._repo_root,
|
|
297
|
-
plan_mode=False,
|
|
298
|
-
)
|
|
299
|
-
self.toolkit.set_emitter(self.emitter)
|
|
300
|
-
|
|
301
|
-
# Update system prompt back to code mode
|
|
302
|
-
self.system_prompt = build_system_prompt(self.toolkit)
|
|
303
|
-
|
|
304
|
-
# Resume execution with approval message
|
|
305
|
-
approval_message = f"""Your plan "{plan.get('title', 'Untitled')}" has been APPROVED.
|
|
306
|
-
|
|
307
|
-
You are now in code mode. Please implement the plan:
|
|
308
|
-
|
|
309
|
-
## Summary
|
|
310
|
-
{plan.get('summary', '')}
|
|
311
|
-
|
|
312
|
-
## Files to Modify
|
|
313
|
-
{self._format_files_to_modify(plan.get('files_to_modify', []))}
|
|
314
|
-
|
|
315
|
-
Proceed with implementation using the available tools (write_to_file, apply_diff, execute_command, etc.)."""
|
|
316
|
-
|
|
317
|
-
return self.run(approval_message)
|
|
318
|
-
|
|
319
|
-
def reject_plan(self, feedback: str = "") -> str:
|
|
320
|
-
"""Reject the pending plan and provide feedback.
|
|
321
|
-
|
|
322
|
-
The agent remains in plan mode to revise the plan based on feedback.
|
|
323
|
-
|
|
324
|
-
Args:
|
|
325
|
-
feedback: Optional feedback explaining why the plan was rejected.
|
|
326
|
-
|
|
327
|
-
Returns:
|
|
328
|
-
The agent's response after receiving the rejection.
|
|
329
|
-
"""
|
|
330
|
-
if not self._pending_plan:
|
|
331
|
-
return "No pending plan to reject."
|
|
332
|
-
|
|
333
|
-
plan_title = self._pending_plan.get("title", "Untitled")
|
|
334
|
-
self._pending_plan = None # Clear pending plan (but stay in plan mode)
|
|
335
|
-
|
|
336
|
-
rejection_message = f"""Your plan "{plan_title}" was REJECTED.
|
|
337
|
-
|
|
338
|
-
{f"Feedback: {feedback}" if feedback else "Please revise the plan."}
|
|
339
|
-
|
|
340
|
-
You are still in plan mode. Please address the feedback and submit a revised plan using exit_plan."""
|
|
341
|
-
|
|
342
|
-
return self.run(rejection_message)
|
|
343
|
-
|
|
344
|
-
def _format_files_to_modify(self, files: list[dict]) -> str:
|
|
345
|
-
"""Format files_to_modify list for display."""
|
|
346
|
-
if not files:
|
|
347
|
-
return "No files specified"
|
|
348
|
-
lines = []
|
|
349
|
-
for f in files:
|
|
350
|
-
path = f.get("path", "unknown")
|
|
351
|
-
lines_info = f.get("lines", "")
|
|
352
|
-
changes = f.get("changes", "")
|
|
353
|
-
lines.append(f"- {path} ({lines_info}): {changes}")
|
|
354
|
-
return "\n".join(lines)
|
|
355
|
-
|
|
356
|
-
def _run_loop(
|
|
357
|
-
self,
|
|
358
|
-
messages: list[dict],
|
|
359
|
-
tools: list[dict],
|
|
360
|
-
) -> tuple[str, list[dict]]:
|
|
361
|
-
"""Run the agent loop until completion.
|
|
362
|
-
|
|
363
|
-
Args:
|
|
364
|
-
messages: Initial messages
|
|
365
|
-
tools: Tool schemas
|
|
366
|
-
|
|
367
|
-
Returns:
|
|
368
|
-
Tuple of (final response text, conversation messages)
|
|
369
|
-
"""
|
|
370
|
-
max_retries = 3
|
|
371
|
-
|
|
372
|
-
for iteration in range(self.max_iterations):
|
|
373
|
-
# When approaching max iterations, ask agent to wrap up
|
|
374
|
-
if iteration == self.max_iterations - 2:
|
|
375
|
-
messages.append({
|
|
376
|
-
"role": "user",
|
|
377
|
-
"content": "[SYSTEM: You are approaching your iteration limit. Please provide your findings and conclusions now, even if incomplete. Summarize what you've learned and any recommendations.]",
|
|
378
|
-
})
|
|
379
|
-
|
|
380
|
-
# Try API call with retry on context overflow
|
|
381
|
-
retry_count = 0
|
|
382
|
-
response = None
|
|
383
|
-
|
|
384
|
-
while retry_count < max_retries:
|
|
385
|
-
try:
|
|
386
|
-
# Proactively compact context if approaching limit
|
|
387
|
-
messages = self._maybe_compact_context(messages)
|
|
388
|
-
|
|
389
|
-
response = self.provider.chat(
|
|
390
|
-
messages=messages,
|
|
391
|
-
system=self.system_prompt,
|
|
392
|
-
tools=tools,
|
|
393
|
-
thinking=self.enable_thinking,
|
|
394
|
-
)
|
|
395
|
-
break # Success
|
|
396
|
-
|
|
397
|
-
except Exception as exc:
|
|
398
|
-
if is_context_overflow_error(exc):
|
|
399
|
-
retry_count += 1
|
|
400
|
-
log.warning(
|
|
401
|
-
"Context overflow on attempt {}/{}, reducing context...",
|
|
402
|
-
retry_count,
|
|
403
|
-
max_retries,
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
if retry_count >= max_retries:
|
|
407
|
-
raise ContextLengthError(
|
|
408
|
-
f"Failed to reduce context after {max_retries} attempts: {exc}",
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
# Reduce context by removing old messages
|
|
412
|
-
messages = reduce_context_for_retry(
|
|
413
|
-
messages,
|
|
414
|
-
keep_recent=max(2, 6 - retry_count * 2), # Fewer messages each retry
|
|
415
|
-
)
|
|
416
|
-
else:
|
|
417
|
-
raise # Re-raise non-context errors
|
|
418
|
-
|
|
419
|
-
if response is None:
|
|
420
|
-
raise RuntimeError("Failed to get response from provider")
|
|
421
|
-
|
|
422
|
-
# Accumulate token usage
|
|
423
|
-
self._total_input_tokens += response.input_tokens
|
|
424
|
-
self._total_output_tokens += response.output_tokens
|
|
425
|
-
self._total_thinking_tokens += getattr(response, "thinking_tokens", 0)
|
|
426
|
-
|
|
427
|
-
# Emit thinking if present
|
|
428
|
-
if response.thinking:
|
|
429
|
-
self.emitter.emit_thinking(response.thinking)
|
|
430
|
-
|
|
431
|
-
# Check for tool calls
|
|
432
|
-
if response.tool_calls:
|
|
433
|
-
# Don't emit thinking text when there are tool calls - it clutters the output
|
|
434
|
-
# The thinking is still in the conversation history for context
|
|
435
|
-
|
|
436
|
-
# Track if we need to pause for user input
|
|
437
|
-
needs_user_input = False
|
|
438
|
-
|
|
439
|
-
# Parse all tool call arguments first
|
|
440
|
-
parsed_calls = []
|
|
441
|
-
for tool_call in response.tool_calls:
|
|
442
|
-
args = tool_call.arguments
|
|
443
|
-
if isinstance(args, str):
|
|
444
|
-
args = json.loads(args)
|
|
445
|
-
parsed_calls.append((tool_call, args))
|
|
446
|
-
|
|
447
|
-
# Execute tools in parallel if multiple calls
|
|
448
|
-
if len(parsed_calls) > 1:
|
|
449
|
-
results = self._execute_tools_parallel(parsed_calls)
|
|
450
|
-
else:
|
|
451
|
-
# Single tool - execute directly
|
|
452
|
-
tool_call, args = parsed_calls[0]
|
|
453
|
-
self.emitter.emit_tool_start(tool_call.name, args)
|
|
454
|
-
result = self.toolkit.execute(tool_call.name, **args)
|
|
455
|
-
self.emitter.emit_tool_result(
|
|
456
|
-
tool_call.name,
|
|
457
|
-
result.success,
|
|
458
|
-
self._summarize_result(result),
|
|
459
|
-
)
|
|
460
|
-
results = [(tool_call, args, result)]
|
|
461
|
-
|
|
462
|
-
# Track if we need to rebuild toolkit for mode change
|
|
463
|
-
mode_changed = False
|
|
464
|
-
|
|
465
|
-
# Process results and build messages
|
|
466
|
-
for tool_call, args, result in results:
|
|
467
|
-
# Track tool for checkpoint metadata
|
|
468
|
-
self._tools_used_this_run.add(tool_call.name)
|
|
469
|
-
# Check if tool is asking a clarification question
|
|
470
|
-
if (result.success and
|
|
471
|
-
result.data and
|
|
472
|
-
result.data.get("status") == "awaiting_response" and
|
|
473
|
-
"question" in result.data):
|
|
474
|
-
self.emitter.emit_clarification(
|
|
475
|
-
question=result.data["question"],
|
|
476
|
-
context="",
|
|
477
|
-
options=result.data.get("options", []),
|
|
478
|
-
)
|
|
479
|
-
needs_user_input = True
|
|
480
|
-
|
|
481
|
-
# Check if agent entered plan mode
|
|
482
|
-
if (result.success and
|
|
483
|
-
result.data and
|
|
484
|
-
result.data.get("status") == "entered_plan_mode"):
|
|
485
|
-
mode_changed = True
|
|
486
|
-
# Rebuild toolkit with plan_mode=True
|
|
487
|
-
self.toolkit = AgentToolkit(
|
|
488
|
-
connection=self.toolkit.connection,
|
|
489
|
-
repo_root=self.toolkit._repo_root,
|
|
490
|
-
plan_mode=True,
|
|
491
|
-
)
|
|
492
|
-
self.toolkit.set_emitter(self.emitter)
|
|
493
|
-
# Update system prompt with plan mode instructions
|
|
494
|
-
from .prompts.plan_mode import PLAN_MODE_PROMPT
|
|
495
|
-
self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
|
|
496
|
-
# Update tools for LLM
|
|
497
|
-
tools = self.toolkit.get_all_schemas()
|
|
498
|
-
|
|
499
|
-
# Check if tool is submitting a plan for approval (exit_plan)
|
|
500
|
-
if (result.success and
|
|
501
|
-
result.data and
|
|
502
|
-
result.data.get("status") == "plan_submitted"):
|
|
503
|
-
# Store the pending plan
|
|
504
|
-
self._pending_plan = {
|
|
505
|
-
"title": result.data.get("title", ""),
|
|
506
|
-
"summary": result.data.get("summary", ""),
|
|
507
|
-
"files_to_modify": result.data.get("files_to_modify", []),
|
|
508
|
-
"implementation_steps": result.data.get("implementation_steps", []),
|
|
509
|
-
"risks": result.data.get("risks", []),
|
|
510
|
-
"testing_strategy": result.data.get("testing_strategy", ""),
|
|
511
|
-
}
|
|
512
|
-
self.emitter.emit_plan_submitted(
|
|
513
|
-
title=self._pending_plan["title"],
|
|
514
|
-
summary=self._pending_plan["summary"],
|
|
515
|
-
files_to_modify=self._pending_plan["files_to_modify"],
|
|
516
|
-
implementation_steps=self._pending_plan["implementation_steps"],
|
|
517
|
-
risks=self._pending_plan["risks"],
|
|
518
|
-
testing_strategy=self._pending_plan["testing_strategy"],
|
|
519
|
-
)
|
|
520
|
-
# Pause and wait for approval (similar to clarification flow)
|
|
521
|
-
needs_user_input = True
|
|
522
|
-
|
|
523
|
-
# Add assistant message with tool call
|
|
524
|
-
messages.append({
|
|
525
|
-
"role": "assistant",
|
|
526
|
-
"content": response.content or "",
|
|
527
|
-
"tool_calls": [{
|
|
528
|
-
"id": tool_call.id,
|
|
529
|
-
"type": "function",
|
|
530
|
-
"function": {
|
|
531
|
-
"name": tool_call.name,
|
|
532
|
-
"arguments": json.dumps(args),
|
|
533
|
-
},
|
|
534
|
-
}],
|
|
535
|
-
})
|
|
536
|
-
|
|
537
|
-
# Serialize and truncate tool result to prevent context overflow
|
|
538
|
-
result_json = json.dumps(result.to_dict(), cls=SafeJSONEncoder)
|
|
539
|
-
result_json = truncate_tool_output(result_json)
|
|
540
|
-
|
|
541
|
-
# Check if todos changed and inject reminder
|
|
542
|
-
if tool_call.name in ("write_todo", "update_todo_list"):
|
|
543
|
-
new_snapshot = self._get_todo_snapshot()
|
|
544
|
-
if new_snapshot != self._last_todo_snapshot:
|
|
545
|
-
self._last_todo_snapshot = new_snapshot
|
|
546
|
-
reminder = self._format_todo_reminder()
|
|
547
|
-
if reminder:
|
|
548
|
-
result_json += f"\n\n{reminder}"
|
|
549
|
-
|
|
550
|
-
# Add tool result
|
|
551
|
-
messages.append({
|
|
552
|
-
"role": "tool",
|
|
553
|
-
"tool_call_id": tool_call.id,
|
|
554
|
-
"content": result_json,
|
|
555
|
-
})
|
|
556
|
-
|
|
557
|
-
# If a clarification question was asked, pause and wait for user input
|
|
558
|
-
if needs_user_input:
|
|
559
|
-
log.debug("Pausing agent loop - waiting for user input")
|
|
560
|
-
return "", messages
|
|
561
|
-
|
|
562
|
-
else:
|
|
563
|
-
# No tool calls - check if response was truncated
|
|
564
|
-
if response.stop_reason in ("max_tokens", "length"):
|
|
565
|
-
# Response was truncated, request continuation
|
|
566
|
-
log.debug("Response truncated ({}), requesting continuation", response.stop_reason)
|
|
567
|
-
if response.content:
|
|
568
|
-
messages.append({
|
|
569
|
-
"role": "assistant",
|
|
570
|
-
"content": response.content,
|
|
571
|
-
})
|
|
572
|
-
messages.append({
|
|
573
|
-
"role": "user",
|
|
574
|
-
"content": "Your response was cut off. Please continue.",
|
|
575
|
-
})
|
|
576
|
-
continue
|
|
577
|
-
|
|
578
|
-
# Agent is done - emit final response
|
|
579
|
-
if response.content:
|
|
580
|
-
self.emitter.emit_message_start()
|
|
581
|
-
self.emitter.emit_message_delta(response.content)
|
|
582
|
-
self.emitter.emit_message_end()
|
|
583
|
-
# Add final assistant message to history
|
|
584
|
-
messages.append({
|
|
585
|
-
"role": "assistant",
|
|
586
|
-
"content": response.content,
|
|
587
|
-
})
|
|
588
|
-
|
|
589
|
-
# Emit final context frame summary
|
|
590
|
-
self._emit_context_frame(messages)
|
|
591
|
-
|
|
592
|
-
return response.content or "", messages
|
|
593
|
-
|
|
594
|
-
# Hit max iterations - try one final request without tools to force a response
|
|
595
|
-
try:
|
|
596
|
-
final_response = self.provider.chat(
|
|
597
|
-
messages=messages + [{
|
|
598
|
-
"role": "user",
|
|
599
|
-
"content": "[SYSTEM: Maximum iterations reached. Provide your final response now with whatever information you have gathered. Do not use any tools.]",
|
|
600
|
-
}],
|
|
601
|
-
system=self.system_prompt,
|
|
602
|
-
tools=None, # No tools - force text response
|
|
603
|
-
thinking=self.enable_thinking,
|
|
604
|
-
)
|
|
605
|
-
# Emit thinking if present
|
|
606
|
-
if final_response.thinking:
|
|
607
|
-
self.emitter.emit_thinking(final_response.thinking)
|
|
608
|
-
if final_response.content:
|
|
609
|
-
self.emitter.emit_message_start()
|
|
610
|
-
self.emitter.emit_message_delta(final_response.content)
|
|
611
|
-
self.emitter.emit_message_end()
|
|
612
|
-
self._emit_context_frame(messages)
|
|
613
|
-
return final_response.content, messages
|
|
614
|
-
except Exception as e:
|
|
615
|
-
log.warning(f"Failed to get final response: {e}")
|
|
616
|
-
|
|
617
|
-
# Fallback message if final response fails
|
|
618
|
-
final_message = "Reached maximum iterations. The agent was unable to complete the task within the allowed iterations."
|
|
619
|
-
self.emitter.emit_message_start()
|
|
620
|
-
self.emitter.emit_message_delta(final_message)
|
|
621
|
-
self.emitter.emit_message_end()
|
|
622
|
-
self._emit_context_frame(messages)
|
|
623
|
-
return final_message, messages
|
|
624
|
-
|
|
625
|
-
def _summarize_result(self, result: Any) -> str:
|
|
626
|
-
"""Create a brief summary of a tool result."""
|
|
627
|
-
if not result.success:
|
|
628
|
-
return f"Error: {result.error}"
|
|
629
|
-
|
|
630
|
-
if not result.data:
|
|
631
|
-
return "Empty result"
|
|
632
|
-
|
|
633
|
-
data = result.data
|
|
634
|
-
|
|
635
|
-
if "results" in data:
|
|
636
|
-
return f"{len(data['results'])} results"
|
|
637
|
-
elif "root_node" in data:
|
|
638
|
-
node = data["root_node"]
|
|
639
|
-
name = node.get("qualified_name") or node.get("file_path", "unknown")
|
|
640
|
-
return f"Expanded: {name}"
|
|
641
|
-
elif "callers" in data:
|
|
642
|
-
return f"{len(data['callers'])} callers"
|
|
643
|
-
elif "callees" in data:
|
|
644
|
-
return f"{len(data['callees'])} callees"
|
|
645
|
-
|
|
646
|
-
return "Completed"
|
|
647
|
-
|
|
648
|
-
def _emit_context_frame(self, messages: list[dict] | None = None) -> None:
|
|
649
|
-
"""Emit a context frame event with current exploration state.
|
|
650
|
-
|
|
651
|
-
Args:
|
|
652
|
-
messages: Current conversation messages to estimate context size
|
|
653
|
-
"""
|
|
654
|
-
# Get exploration steps from toolkit session
|
|
655
|
-
steps = self.toolkit.get_exploration_steps()
|
|
656
|
-
|
|
657
|
-
# Estimate current context window tokens and get breakdown
|
|
658
|
-
context_tokens = 0
|
|
659
|
-
context_breakdown = {}
|
|
660
|
-
largest_messages = []
|
|
661
|
-
if messages:
|
|
662
|
-
context_tokens = self._estimate_context_tokens(messages)
|
|
663
|
-
context_breakdown, largest_messages = self._get_context_breakdown(messages)
|
|
664
|
-
|
|
665
|
-
# Summarize exploration by tool
|
|
666
|
-
tool_counts: dict[str, int] = {}
|
|
667
|
-
entities_found = 0
|
|
668
|
-
step_details: list[dict] = []
|
|
669
|
-
|
|
670
|
-
for step in steps:
|
|
671
|
-
tool_name = getattr(step, 'tool', 'unknown')
|
|
672
|
-
tool_counts[tool_name] = tool_counts.get(tool_name, 0) + 1
|
|
673
|
-
|
|
674
|
-
# Count entities from the step
|
|
675
|
-
step_entities = getattr(step, 'entities_found', [])
|
|
676
|
-
entities_found += len(step_entities)
|
|
677
|
-
|
|
678
|
-
# Collect step details
|
|
679
|
-
params = getattr(step, 'params', {})
|
|
680
|
-
summary = getattr(step, 'result_summary', '')
|
|
681
|
-
|
|
682
|
-
# Extract meaningful info based on tool type
|
|
683
|
-
detail = {
|
|
684
|
-
"tool": tool_name,
|
|
685
|
-
"summary": summary,
|
|
686
|
-
}
|
|
687
|
-
|
|
688
|
-
# Add relevant params based on tool
|
|
689
|
-
if tool_name == 'read_file' and 'file_path' in params:
|
|
690
|
-
detail["file"] = params['file_path']
|
|
691
|
-
elif tool_name == 'read_file' and 'path' in params:
|
|
692
|
-
detail["file"] = params['path']
|
|
693
|
-
elif tool_name in ('grep', 'semantic_search') and 'query' in params:
|
|
694
|
-
detail["query"] = params['query']
|
|
695
|
-
elif tool_name == 'glob' and 'pattern' in params:
|
|
696
|
-
detail["pattern"] = params['pattern']
|
|
697
|
-
elif tool_name == 'list_files' and 'path' in params:
|
|
698
|
-
detail["path"] = params['path']
|
|
699
|
-
|
|
700
|
-
# Add content preview if available
|
|
701
|
-
content_preview = getattr(step, 'content_preview', None)
|
|
702
|
-
if content_preview:
|
|
703
|
-
detail["content_preview"] = content_preview
|
|
704
|
-
|
|
705
|
-
# Add token count if available
|
|
706
|
-
token_count = getattr(step, 'token_count', 0)
|
|
707
|
-
if token_count > 0:
|
|
708
|
-
detail["tokens"] = token_count
|
|
709
|
-
|
|
710
|
-
# Add entities if any
|
|
711
|
-
if step_entities:
|
|
712
|
-
detail["entities"] = step_entities[:5] # Limit to 5
|
|
713
|
-
|
|
714
|
-
step_details.append(detail)
|
|
715
|
-
|
|
716
|
-
exploration_steps = [
|
|
717
|
-
{"tool": tool, "count": count}
|
|
718
|
-
for tool, count in tool_counts.items()
|
|
719
|
-
]
|
|
720
|
-
|
|
721
|
-
# Build context frame data
|
|
722
|
-
adding = {
|
|
723
|
-
"exploration_steps": exploration_steps,
|
|
724
|
-
"entities_found": entities_found,
|
|
725
|
-
"step_count": len(steps),
|
|
726
|
-
"details": step_details[-20:], # Last 20 steps
|
|
727
|
-
"input_tokens": self._total_input_tokens,
|
|
728
|
-
"output_tokens": self._total_output_tokens,
|
|
729
|
-
"context_tokens": context_tokens, # Current context window size
|
|
730
|
-
"context_breakdown": context_breakdown, # Tokens by message type
|
|
731
|
-
"largest_messages": largest_messages, # Top 5 biggest messages
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
# Get reranked context items
|
|
735
|
-
reading = self._get_reranked_context()
|
|
736
|
-
|
|
737
|
-
# Emit the context frame
|
|
738
|
-
self.emitter.emit_context_frame(adding=adding, reading=reading)
|
|
739
|
-
|
|
740
|
-
def _estimate_context_tokens(self, messages: list[dict]) -> int:
|
|
741
|
-
"""Estimate the current context window size in tokens.
|
|
742
|
-
|
|
743
|
-
Args:
|
|
744
|
-
messages: Conversation messages
|
|
745
|
-
|
|
746
|
-
Returns:
|
|
747
|
-
Estimated token count for the context
|
|
748
|
-
"""
|
|
749
|
-
total_chars = 0
|
|
750
|
-
|
|
751
|
-
# Count characters in all messages
|
|
752
|
-
for msg in messages:
|
|
753
|
-
content = msg.get("content", "")
|
|
754
|
-
if isinstance(content, str):
|
|
755
|
-
total_chars += len(content)
|
|
756
|
-
elif isinstance(content, list):
|
|
757
|
-
# Handle multi-part messages (e.g., with images)
|
|
758
|
-
for part in content:
|
|
759
|
-
if isinstance(part, dict) and "text" in part:
|
|
760
|
-
total_chars += len(part["text"])
|
|
761
|
-
|
|
762
|
-
# Add role overhead (~4 tokens per message for role/structure)
|
|
763
|
-
total_chars += 16
|
|
764
|
-
|
|
765
|
-
# Also count system prompt
|
|
766
|
-
if self.system_prompt:
|
|
767
|
-
total_chars += len(self.system_prompt)
|
|
768
|
-
|
|
769
|
-
# Estimate: ~4 characters per token
|
|
770
|
-
return total_chars // 4
|
|
771
|
-
|
|
772
|
-
def _get_context_breakdown(self, messages: list[dict]) -> tuple[dict, list[dict]]:
|
|
773
|
-
"""Get breakdown of context usage by message type.
|
|
774
|
-
|
|
775
|
-
Args:
|
|
776
|
-
messages: Conversation messages
|
|
777
|
-
|
|
778
|
-
Returns:
|
|
779
|
-
Tuple of (breakdown dict, list of largest messages)
|
|
780
|
-
"""
|
|
781
|
-
breakdown = {
|
|
782
|
-
"system_prompt": len(self.system_prompt) // 4 if self.system_prompt else 0,
|
|
783
|
-
"user": 0,
|
|
784
|
-
"assistant": 0,
|
|
785
|
-
"tool_results": 0,
|
|
786
|
-
}
|
|
787
|
-
|
|
788
|
-
# Track individual message sizes for finding largest
|
|
789
|
-
message_sizes = []
|
|
790
|
-
|
|
791
|
-
for i, msg in enumerate(messages):
|
|
792
|
-
role = msg.get("role", "unknown")
|
|
793
|
-
content = msg.get("content", "")
|
|
794
|
-
|
|
795
|
-
# Calculate content size
|
|
796
|
-
if isinstance(content, str):
|
|
797
|
-
size = len(content)
|
|
798
|
-
elif isinstance(content, list):
|
|
799
|
-
size = sum(len(p.get("text", "")) for p in content if isinstance(p, dict))
|
|
800
|
-
else:
|
|
801
|
-
size = 0
|
|
802
|
-
|
|
803
|
-
tokens = size // 4
|
|
804
|
-
|
|
805
|
-
# Categorize
|
|
806
|
-
if role == "user":
|
|
807
|
-
breakdown["user"] += tokens
|
|
808
|
-
elif role == "assistant":
|
|
809
|
-
breakdown["assistant"] += tokens
|
|
810
|
-
elif role == "tool":
|
|
811
|
-
breakdown["tool_results"] += tokens
|
|
812
|
-
|
|
813
|
-
# Track for largest messages
|
|
814
|
-
if tokens > 100: # Only track substantial messages
|
|
815
|
-
# Try to get a label for this message
|
|
816
|
-
label = f"{role}[{i}]"
|
|
817
|
-
if role == "tool":
|
|
818
|
-
tool_call_id = msg.get("tool_call_id", "")
|
|
819
|
-
# Try to find the tool name from previous assistant message
|
|
820
|
-
for prev_msg in reversed(messages[:i]):
|
|
821
|
-
if prev_msg.get("role") == "assistant" and "tool_calls" in prev_msg:
|
|
822
|
-
for tc in prev_msg.get("tool_calls", []):
|
|
823
|
-
if tc.get("id") == tool_call_id:
|
|
824
|
-
label = tc.get("function", {}).get("name", "tool")
|
|
825
|
-
break
|
|
826
|
-
break
|
|
827
|
-
|
|
828
|
-
message_sizes.append({
|
|
829
|
-
"index": i,
|
|
830
|
-
"role": role,
|
|
831
|
-
"label": label,
|
|
832
|
-
"tokens": tokens,
|
|
833
|
-
"preview": content[:100] if isinstance(content, str) else str(content)[:100],
|
|
834
|
-
})
|
|
835
|
-
|
|
836
|
-
# Sort by size and get top 5
|
|
837
|
-
message_sizes.sort(key=lambda x: x["tokens"], reverse=True)
|
|
838
|
-
largest = message_sizes[:5]
|
|
839
|
-
|
|
840
|
-
return breakdown, largest
|
|
841
|
-
|
|
842
|
-
def _maybe_compact_context(
|
|
843
|
-
self,
|
|
844
|
-
messages: list[dict],
|
|
845
|
-
threshold: float = 0.8,
|
|
846
|
-
) -> list[dict]:
|
|
847
|
-
"""Proactively compact context if approaching limit.
|
|
848
|
-
|
|
849
|
-
Args:
|
|
850
|
-
messages: Current conversation messages
|
|
851
|
-
threshold: Trigger compaction at this % of context limit (default 80%)
|
|
852
|
-
|
|
853
|
-
Returns:
|
|
854
|
-
Original or compacted messages
|
|
855
|
-
"""
|
|
856
|
-
context_tokens = self._estimate_context_tokens(messages)
|
|
857
|
-
context_limit = self.provider.get_context_limit()
|
|
858
|
-
|
|
859
|
-
# Check if we need to compact
|
|
860
|
-
if context_tokens < context_limit * threshold:
|
|
861
|
-
return messages # No compaction needed
|
|
862
|
-
|
|
863
|
-
log.info(
|
|
864
|
-
f"Context at {context_tokens:,}/{context_limit:,} tokens "
|
|
865
|
-
f"({context_tokens/context_limit:.0%}), compacting..."
|
|
866
|
-
)
|
|
867
|
-
|
|
868
|
-
return self._compact_messages_with_llm(
|
|
869
|
-
messages, target_tokens=int(context_limit * 0.5)
|
|
870
|
-
)
|
|
871
|
-
|
|
872
|
-
def _compact_messages_with_llm(
|
|
873
|
-
self,
|
|
874
|
-
messages: list[dict],
|
|
875
|
-
target_tokens: int,
|
|
876
|
-
) -> list[dict]:
|
|
877
|
-
"""Use fast LLM to summarize middle messages.
|
|
878
|
-
|
|
879
|
-
Preserves:
|
|
880
|
-
- First message (original user request)
|
|
881
|
-
- Last 4 messages (recent context)
|
|
882
|
-
- Summarizes everything in between
|
|
883
|
-
|
|
884
|
-
Args:
|
|
885
|
-
messages: Current conversation messages
|
|
886
|
-
target_tokens: Target token count after compaction
|
|
887
|
-
|
|
888
|
-
Returns:
|
|
889
|
-
Compacted messages list
|
|
890
|
-
"""
|
|
891
|
-
from .subagent import get_model_for_tier
|
|
892
|
-
from .providers import get_provider
|
|
893
|
-
|
|
894
|
-
if len(messages) <= 5:
|
|
895
|
-
return messages # Too few to compact
|
|
896
|
-
|
|
897
|
-
# Split messages
|
|
898
|
-
first_msg = messages[0]
|
|
899
|
-
recent_msgs = messages[-4:]
|
|
900
|
-
middle_msgs = messages[1:-4]
|
|
901
|
-
|
|
902
|
-
if not middle_msgs:
|
|
903
|
-
return messages
|
|
904
|
-
|
|
905
|
-
# Build summary prompt
|
|
906
|
-
middle_content = self._format_messages_for_summary(middle_msgs)
|
|
907
|
-
|
|
908
|
-
prompt = f"""Summarize this conversation history concisely.
|
|
909
|
-
|
|
910
|
-
PRESERVE (include verbatim if present):
|
|
911
|
-
- Code snippets and file paths
|
|
912
|
-
- Error messages
|
|
913
|
-
- Key decisions made
|
|
914
|
-
- Important tool results (file contents, search results)
|
|
915
|
-
|
|
916
|
-
CONDENSE:
|
|
917
|
-
- Repetitive searches
|
|
918
|
-
- Verbose tool outputs
|
|
919
|
-
- Intermediate reasoning
|
|
920
|
-
|
|
921
|
-
CONVERSATION HISTORY:
|
|
922
|
-
{middle_content}
|
|
923
|
-
|
|
924
|
-
OUTPUT FORMAT:
|
|
925
|
-
Provide a concise summary (max 2000 tokens) that captures the essential context needed to continue this task."""
|
|
926
|
-
|
|
927
|
-
# Use fast model for summarization
|
|
928
|
-
fast_model = get_model_for_tier("fast")
|
|
929
|
-
fast_provider = get_provider(fast_model)
|
|
930
|
-
|
|
931
|
-
try:
|
|
932
|
-
self.emitter.emit_thinking("Compacting context with fast model...")
|
|
933
|
-
|
|
934
|
-
response = fast_provider.chat(
|
|
935
|
-
messages=[{"role": "user", "content": prompt}],
|
|
936
|
-
system="You are a context summarizer. Be concise but preserve code and technical details.",
|
|
937
|
-
)
|
|
938
|
-
|
|
939
|
-
summary = response.content or ""
|
|
940
|
-
|
|
941
|
-
log.info(
|
|
942
|
-
f"Compacted {len(middle_msgs)} messages into summary "
|
|
943
|
-
f"({len(summary)} chars)"
|
|
944
|
-
)
|
|
945
|
-
|
|
946
|
-
# Build compacted messages
|
|
947
|
-
return [
|
|
948
|
-
first_msg,
|
|
949
|
-
{
|
|
950
|
-
"role": "assistant",
|
|
951
|
-
"content": f"[Context Summary]\n{summary}\n[End Summary]",
|
|
952
|
-
},
|
|
953
|
-
*recent_msgs,
|
|
954
|
-
]
|
|
955
|
-
except Exception as e:
|
|
956
|
-
log.warning(f"LLM compaction failed: {e}, falling back to truncation")
|
|
957
|
-
return [first_msg] + recent_msgs
|
|
958
|
-
|
|
959
|
-
def _format_messages_for_summary(self, messages: list[dict]) -> str:
|
|
960
|
-
"""Format messages for summarization prompt.
|
|
961
|
-
|
|
962
|
-
Args:
|
|
963
|
-
messages: Messages to format
|
|
964
|
-
|
|
965
|
-
Returns:
|
|
966
|
-
Formatted string for summarization
|
|
967
|
-
"""
|
|
968
|
-
parts = []
|
|
969
|
-
for msg in messages:
|
|
970
|
-
role = msg.get("role", "unknown")
|
|
971
|
-
content = msg.get("content", "")
|
|
972
|
-
|
|
973
|
-
# Handle tool calls in assistant messages
|
|
974
|
-
if role == "assistant" and "tool_calls" in msg:
|
|
975
|
-
tool_calls = msg.get("tool_calls", [])
|
|
976
|
-
tool_info = [
|
|
977
|
-
f"Called: {tc.get('function', {}).get('name', 'unknown')}"
|
|
978
|
-
for tc in tool_calls
|
|
979
|
-
]
|
|
980
|
-
content = f"{content}\n[Tools: {', '.join(tool_info)}]" if content else f"[Tools: {', '.join(tool_info)}]"
|
|
981
|
-
|
|
982
|
-
# Truncate very long content
|
|
983
|
-
if len(content) > 4000:
|
|
984
|
-
content = content[:4000] + "\n[...truncated...]"
|
|
985
|
-
|
|
986
|
-
parts.append(f"[{role.upper()}]\n{content}")
|
|
987
|
-
|
|
988
|
-
return "\n\n---\n\n".join(parts)
|
|
989
|
-
|
|
990
|
-
def _get_reranked_context(self) -> dict:
|
|
991
|
-
"""Get reranked context items based on the current query.
|
|
992
|
-
|
|
993
|
-
Returns:
|
|
994
|
-
Dict with item_count and items list
|
|
995
|
-
"""
|
|
996
|
-
try:
|
|
997
|
-
from ..context.service import ContextService
|
|
998
|
-
from ..context.reranker import rerank_context_items
|
|
999
|
-
|
|
1000
|
-
# Get exploration steps for context extraction
|
|
1001
|
-
steps = self.toolkit.get_exploration_steps()
|
|
1002
|
-
if not steps:
|
|
1003
|
-
return {"item_count": 0, "items": []}
|
|
1004
|
-
|
|
1005
|
-
# Use context service to extract context items from exploration
|
|
1006
|
-
service = ContextService(connection=self.toolkit.connection)
|
|
1007
|
-
terminal_id = service.get_terminal_id()
|
|
1008
|
-
|
|
1009
|
-
# Update context with exploration steps
|
|
1010
|
-
service.update_context(
|
|
1011
|
-
terminal_id=terminal_id,
|
|
1012
|
-
exploration_steps=steps,
|
|
1013
|
-
)
|
|
1014
|
-
|
|
1015
|
-
# Get context items
|
|
1016
|
-
items = service.get_context_items(terminal_id)
|
|
1017
|
-
if not items:
|
|
1018
|
-
return {"item_count": 0, "items": []}
|
|
1019
|
-
|
|
1020
|
-
# Rerank by query relevance
|
|
1021
|
-
if self._current_query:
|
|
1022
|
-
items = rerank_context_items(
|
|
1023
|
-
items,
|
|
1024
|
-
self._current_query,
|
|
1025
|
-
top_k=20,
|
|
1026
|
-
)
|
|
1027
|
-
|
|
1028
|
-
# Convert to serializable format
|
|
1029
|
-
result_items = []
|
|
1030
|
-
for item in items[:20]: # Limit to 20 items
|
|
1031
|
-
result_items.append({
|
|
1032
|
-
"name": item.qualified_name,
|
|
1033
|
-
"type": item.entity_type,
|
|
1034
|
-
"file": item.file_path,
|
|
1035
|
-
"score": round(item.score, 3) if hasattr(item, 'score') else None,
|
|
1036
|
-
})
|
|
1037
|
-
|
|
1038
|
-
return {
|
|
1039
|
-
"item_count": len(result_items),
|
|
1040
|
-
"items": result_items,
|
|
1041
|
-
}
|
|
1042
|
-
|
|
1043
|
-
except Exception as e:
|
|
1044
|
-
log.debug(f"Failed to get reranked context: {e}")
|
|
1045
|
-
return {"item_count": 0, "items": []}
|
|
1046
|
-
|
|
1047
|
-
def chat(self, message: str, images: Optional[list] = None) -> str:
|
|
1048
|
-
"""Continue a conversation with a new message.
|
|
1049
|
-
|
|
1050
|
-
This method maintains conversation history for multi-turn interactions.
|
|
1051
|
-
Call run() first to start a conversation, then chat() for follow-ups.
|
|
1052
|
-
|
|
1053
|
-
Args:
|
|
1054
|
-
message: User's follow-up message
|
|
1055
|
-
images: Optional list of images to include
|
|
1056
|
-
|
|
1057
|
-
Returns:
|
|
1058
|
-
Agent's response
|
|
1059
|
-
"""
|
|
1060
|
-
if not self._messages:
|
|
1061
|
-
# No history, just run fresh
|
|
1062
|
-
return self.run(message, images=images)
|
|
1063
|
-
|
|
1064
|
-
# Store query for reranking context frame
|
|
1065
|
-
self._current_query = message
|
|
1066
|
-
|
|
1067
|
-
# Add new user message to history
|
|
1068
|
-
self._messages.append({
|
|
1069
|
-
"role": "user",
|
|
1070
|
-
"content": message,
|
|
1071
|
-
})
|
|
1072
|
-
|
|
1073
|
-
# Get tool schemas
|
|
1074
|
-
tools = self.toolkit.get_all_schemas()
|
|
1075
|
-
|
|
1076
|
-
try:
|
|
1077
|
-
response, final_messages = self._run_loop(self._messages, tools)
|
|
1078
|
-
# Update conversation history
|
|
1079
|
-
self._messages = final_messages
|
|
1080
|
-
self.emitter.emit_end(success=True)
|
|
1081
|
-
# Create checkpoint if manager is configured
|
|
1082
|
-
self._create_checkpoint()
|
|
1083
|
-
return response
|
|
1084
|
-
|
|
1085
|
-
except Exception as e:
|
|
1086
|
-
log.exception("Agent chat failed")
|
|
1087
|
-
self.emitter.emit_error(str(e))
|
|
1088
|
-
return f"Error: {str(e)}"
|
|
1089
|
-
|
|
1090
|
-
def _create_checkpoint(self) -> None:
|
|
1091
|
-
"""Create a git checkpoint after successful run.
|
|
1092
|
-
|
|
1093
|
-
Only creates a checkpoint if:
|
|
1094
|
-
- A checkpoint manager is configured
|
|
1095
|
-
- There are file changes to commit
|
|
1096
|
-
"""
|
|
1097
|
-
if not self._checkpoint_manager:
|
|
1098
|
-
return
|
|
1099
|
-
|
|
1100
|
-
try:
|
|
1101
|
-
self._checkpoint_manager.create_checkpoint(
|
|
1102
|
-
messages=self._messages,
|
|
1103
|
-
model=self.model,
|
|
1104
|
-
system_prompt=self.system_prompt,
|
|
1105
|
-
tools_used=list(self._tools_used_this_run),
|
|
1106
|
-
token_usage={
|
|
1107
|
-
"input": self._total_input_tokens,
|
|
1108
|
-
"output": self._total_output_tokens,
|
|
1109
|
-
"thinking": self._total_thinking_tokens,
|
|
1110
|
-
},
|
|
1111
|
-
)
|
|
1112
|
-
except Exception as e:
|
|
1113
|
-
log.warning(f"Failed to create checkpoint: {e}")
|
|
1114
|
-
finally:
|
|
1115
|
-
# Clear tools for next run
|
|
1116
|
-
self._tools_used_this_run.clear()
|
|
1117
|
-
|
|
1118
|
-
def reset(self) -> None:
|
|
1119
|
-
"""Reset the agent state."""
|
|
1120
|
-
self.toolkit.reset_session()
|
|
1121
|
-
self._total_input_tokens = 0
|
|
1122
|
-
self._total_output_tokens = 0
|
|
1123
|
-
self._current_query = ""
|