emdash-core 0.1.25__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. emdash_core/agent/__init__.py +4 -0
  2. emdash_core/agent/events.py +42 -20
  3. emdash_core/agent/inprocess_subagent.py +123 -10
  4. emdash_core/agent/prompts/__init__.py +4 -3
  5. emdash_core/agent/prompts/main_agent.py +32 -2
  6. emdash_core/agent/prompts/plan_mode.py +236 -107
  7. emdash_core/agent/prompts/subagents.py +79 -15
  8. emdash_core/agent/prompts/workflow.py +145 -26
  9. emdash_core/agent/providers/factory.py +2 -2
  10. emdash_core/agent/providers/openai_provider.py +67 -15
  11. emdash_core/agent/runner/__init__.py +49 -0
  12. emdash_core/agent/runner/agent_runner.py +753 -0
  13. emdash_core/agent/runner/context.py +451 -0
  14. emdash_core/agent/runner/factory.py +108 -0
  15. emdash_core/agent/runner/plan.py +217 -0
  16. emdash_core/agent/runner/sdk_runner.py +324 -0
  17. emdash_core/agent/runner/utils.py +67 -0
  18. emdash_core/agent/skills.py +47 -8
  19. emdash_core/agent/toolkit.py +46 -14
  20. emdash_core/agent/toolkits/plan.py +9 -11
  21. emdash_core/agent/tools/__init__.py +2 -2
  22. emdash_core/agent/tools/coding.py +48 -4
  23. emdash_core/agent/tools/modes.py +151 -143
  24. emdash_core/agent/tools/task.py +41 -2
  25. emdash_core/api/agent.py +555 -1
  26. emdash_core/skills/frontend-design/SKILL.md +56 -0
  27. emdash_core/sse/stream.py +4 -0
  28. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/METADATA +2 -1
  29. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/RECORD +31 -24
  30. emdash_core/agent/runner.py +0 -1123
  31. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/WHEEL +0 -0
  32. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/entry_points.txt +0 -0
@@ -1,1123 +0,0 @@
1
- """Agent runner for LLM-powered exploration."""
2
-
3
- import json
4
- import os
5
- from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from datetime import datetime, date
7
- from typing import Any, Optional
8
-
9
- from ..utils.logger import log
10
- from ..core.config import get_config
11
- from ..core.exceptions import ContextLengthError
12
- from .toolkit import AgentToolkit
13
- from .events import AgentEventEmitter, NullEmitter
14
- from .providers import get_provider
15
- from .providers.factory import DEFAULT_MODEL
16
- from .context_manager import (
17
- truncate_tool_output,
18
- reduce_context_for_retry,
19
- is_context_overflow_error,
20
- )
21
- from .prompts import BASE_SYSTEM_PROMPT, build_system_prompt
22
- from .tools.tasks import TaskState
23
- from ..checkpoint import CheckpointManager
24
-
25
-
26
- class SafeJSONEncoder(json.JSONEncoder):
27
- """JSON encoder that handles Neo4j types and other non-serializable objects."""
28
-
29
- def default(self, obj: Any) -> Any:
30
- # Handle datetime objects
31
- if isinstance(obj, (datetime, date)):
32
- return obj.isoformat()
33
-
34
- # Handle Neo4j DateTime
35
- if hasattr(obj, 'isoformat'):
36
- return obj.isoformat()
37
-
38
- # Handle Neo4j Date, Time, etc.
39
- if hasattr(obj, 'to_native'):
40
- return str(obj.to_native())
41
-
42
- # Handle sets
43
- if isinstance(obj, set):
44
- return list(obj)
45
-
46
- # Handle bytes
47
- if isinstance(obj, bytes):
48
- return obj.decode('utf-8', errors='replace')
49
-
50
- # Fallback to string representation
51
- try:
52
- return str(obj)
53
- except Exception:
54
- return f"<non-serializable: {type(obj).__name__}>"
55
-
56
-
57
- class AgentRunner:
58
- """Runs an LLM agent with tool access for code exploration.
59
-
60
- Example:
61
- runner = AgentRunner()
62
- response = runner.run("How does authentication work in this codebase?")
63
- print(response)
64
- """
65
-
66
- def __init__(
67
- self,
68
- toolkit: Optional[AgentToolkit] = None,
69
- model: str = DEFAULT_MODEL,
70
- system_prompt: Optional[str] = None,
71
- emitter: Optional[AgentEventEmitter] = None,
72
- max_iterations: int = int(os.getenv("EMDASH_MAX_ITERATIONS", "100")),
73
- verbose: bool = False,
74
- show_tool_results: bool = False,
75
- enable_thinking: Optional[bool] = None,
76
- checkpoint_manager: Optional[CheckpointManager] = None,
77
- ):
78
- """Initialize the agent runner.
79
-
80
- Args:
81
- toolkit: AgentToolkit instance. If None, creates default.
82
- model: LLM model to use.
83
- system_prompt: Custom system prompt. If None, uses default.
84
- emitter: Event emitter for streaming output.
85
- max_iterations: Maximum tool call iterations.
86
- verbose: Whether to print verbose output.
87
- show_tool_results: Whether to show detailed tool results.
88
- enable_thinking: Enable extended thinking. If None, auto-detect from model.
89
- checkpoint_manager: Optional checkpoint manager for git-based checkpoints.
90
- """
91
- self.toolkit = toolkit or AgentToolkit()
92
- self.provider = get_provider(model)
93
- self.model = model
94
- # Build system prompt, prepending plan mode prompt if in plan mode
95
- if system_prompt:
96
- self.system_prompt = system_prompt
97
- elif self.toolkit.plan_mode:
98
- from .prompts.plan_mode import PLAN_MODE_PROMPT
99
- self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
100
- else:
101
- self.system_prompt = build_system_prompt(self.toolkit)
102
- self.emitter = emitter or NullEmitter()
103
- # Inject emitter into tools that need it (e.g., TaskTool for sub-agent streaming)
104
- self.toolkit.set_emitter(self.emitter)
105
- self.max_iterations = max_iterations
106
- self.verbose = verbose
107
- self.show_tool_results = show_tool_results
108
- # Extended thinking support
109
- if enable_thinking is None:
110
- # Auto-detect from provider capabilities
111
- self.enable_thinking = (
112
- hasattr(self.provider, "supports_thinking")
113
- and self.provider.supports_thinking()
114
- )
115
- else:
116
- self.enable_thinking = enable_thinking
117
- # Conversation history for multi-turn support
118
- self._messages: list[dict] = []
119
- # Token usage tracking
120
- self._total_input_tokens: int = 0
121
- self._total_output_tokens: int = 0
122
- self._total_thinking_tokens: int = 0
123
- # Store query for reranking
124
- self._current_query: str = ""
125
- # Todo state tracking for injection
126
- self._last_todo_snapshot: str = ""
127
- # Checkpoint manager for git-based checkpoints
128
- self._checkpoint_manager = checkpoint_manager
129
- # Track tools used during current run (for checkpoint metadata)
130
- self._tools_used_this_run: set[str] = set()
131
- # Plan approval state
132
- self._pending_plan: Optional[dict] = None # Stores submitted plan awaiting approval
133
-
134
- def _get_todo_snapshot(self) -> str:
135
- """Get current todo state as string for comparison."""
136
- state = TaskState.get_instance()
137
- return json.dumps(state.get_all_tasks(), sort_keys=True)
138
-
139
- def _format_todo_reminder(self) -> str:
140
- """Format current todos as XML reminder for injection into context."""
141
- state = TaskState.get_instance()
142
- tasks = state.get_all_tasks()
143
- if not tasks:
144
- return ""
145
-
146
- counts = {"pending": 0, "in_progress": 0, "completed": 0}
147
- lines = []
148
- for t in tasks:
149
- status = t.get("status", "pending")
150
- counts[status] = counts.get(status, 0) + 1
151
- status_icon = {"pending": "⬚", "in_progress": "🔄", "completed": "✅"}.get(status, "?")
152
- lines.append(f' {t["id"]}. {status_icon} {t["title"]}')
153
-
154
- header = f'Tasks: {counts["completed"]} completed, {counts["in_progress"]} in progress, {counts["pending"]} pending'
155
- task_list = "\n".join(lines)
156
- return f"<todo-state>\n{header}\n{task_list}\n</todo-state>"
157
-
158
- def _execute_tools_parallel(self, parsed_calls: list) -> list:
159
- """Execute multiple tool calls in parallel using a thread pool.
160
-
161
- Args:
162
- parsed_calls: List of (tool_call, args) tuples
163
-
164
- Returns:
165
- List of (tool_call, args, result) tuples in original order
166
- """
167
- # Emit tool start events for all calls
168
- for tool_call, args in parsed_calls:
169
- self.emitter.emit_tool_start(tool_call.name, args)
170
-
171
- def execute_one(item):
172
- tool_call, args = item
173
- try:
174
- result = self.toolkit.execute(tool_call.name, **args)
175
- return (tool_call, args, result)
176
- except Exception as e:
177
- log.exception(f"Tool {tool_call.name} failed")
178
- from .tools.base import ToolResult
179
- return (tool_call, args, ToolResult.error_result(str(e)))
180
-
181
- # Execute in parallel with up to 3 workers
182
- results: list = [None] * len(parsed_calls)
183
- with ThreadPoolExecutor(max_workers=3) as executor:
184
- futures = {executor.submit(execute_one, item): i for i, item in enumerate(parsed_calls)}
185
- # Collect results maintaining order
186
- for future in as_completed(futures):
187
- idx = futures[future]
188
- results[idx] = future.result()
189
-
190
- # Emit tool result events for all calls
191
- for tool_call, args, result in results:
192
- self.emitter.emit_tool_result(
193
- tool_call.name,
194
- result.success,
195
- self._summarize_result(result),
196
- )
197
-
198
- return results
199
-
200
- def run(
201
- self,
202
- query: str,
203
- context: Optional[str] = None,
204
- images: Optional[list] = None,
205
- ) -> str:
206
- """Run the agent to answer a query.
207
-
208
- Args:
209
- query: User's question or request
210
- context: Optional additional context
211
- images: Optional list of images to include
212
-
213
- Returns:
214
- Agent's final response
215
- """
216
- # Store query for reranking context frame
217
- self._current_query = query
218
-
219
- # Build user message
220
- if context:
221
- user_message = {
222
- "role": "user",
223
- "content": f"Context:\n{context}\n\nQuestion: {query}",
224
- }
225
- else:
226
- user_message = {
227
- "role": "user",
228
- "content": query,
229
- }
230
-
231
- # Save user message to history BEFORE running (so it's preserved even if interrupted)
232
- self._messages.append(user_message)
233
- messages = list(self._messages) # Copy for the loop
234
-
235
- # TODO: Handle images if provided
236
-
237
- # Get tool schemas
238
- tools = self.toolkit.get_all_schemas()
239
-
240
- try:
241
- response, final_messages = self._run_loop(messages, tools)
242
- # Update conversation history with full exchange
243
- self._messages = final_messages
244
- self.emitter.emit_end(success=True)
245
- # Create checkpoint if manager is configured
246
- self._create_checkpoint()
247
- return response
248
-
249
- except Exception as e:
250
- log.exception("Agent run failed")
251
- self.emitter.emit_error(str(e))
252
- # Keep user message in history even on error (already appended above)
253
- return f"Error: {str(e)}"
254
-
255
- def has_pending_plan(self) -> bool:
256
- """Check if there's a plan awaiting approval.
257
-
258
- Returns:
259
- True if a plan has been submitted and is awaiting approval.
260
- """
261
- return self._pending_plan is not None
262
-
263
- def get_pending_plan(self) -> Optional[dict]:
264
- """Get the pending plan if one exists.
265
-
266
- Returns:
267
- The pending plan dict, or None if no plan is pending.
268
- """
269
- return self._pending_plan
270
-
271
- def approve_plan(self) -> str:
272
- """Approve the pending plan and transition back to code mode.
273
-
274
- This method should be called after the user approves a submitted plan.
275
- It transitions the agent from plan mode back to code mode, allowing
276
- it to implement the approved plan.
277
-
278
- Returns:
279
- The agent's response after transitioning to code mode.
280
- """
281
- if not self._pending_plan:
282
- return "No pending plan to approve."
283
-
284
- plan = self._pending_plan
285
- self._pending_plan = None # Clear pending plan
286
-
287
- # Reset ModeState singleton to code mode
288
- from .tools.modes import ModeState, AgentMode
289
- state = ModeState.get_instance()
290
- state.current_mode = AgentMode.CODE
291
- state.plan_content = plan.get("summary", "")
292
-
293
- # Rebuild toolkit with plan_mode=False (code mode)
294
- self.toolkit = AgentToolkit(
295
- connection=self.toolkit.connection,
296
- repo_root=self.toolkit._repo_root,
297
- plan_mode=False,
298
- )
299
- self.toolkit.set_emitter(self.emitter)
300
-
301
- # Update system prompt back to code mode
302
- self.system_prompt = build_system_prompt(self.toolkit)
303
-
304
- # Resume execution with approval message
305
- approval_message = f"""Your plan "{plan.get('title', 'Untitled')}" has been APPROVED.
306
-
307
- You are now in code mode. Please implement the plan:
308
-
309
- ## Summary
310
- {plan.get('summary', '')}
311
-
312
- ## Files to Modify
313
- {self._format_files_to_modify(plan.get('files_to_modify', []))}
314
-
315
- Proceed with implementation using the available tools (write_to_file, apply_diff, execute_command, etc.)."""
316
-
317
- return self.run(approval_message)
318
-
319
- def reject_plan(self, feedback: str = "") -> str:
320
- """Reject the pending plan and provide feedback.
321
-
322
- The agent remains in plan mode to revise the plan based on feedback.
323
-
324
- Args:
325
- feedback: Optional feedback explaining why the plan was rejected.
326
-
327
- Returns:
328
- The agent's response after receiving the rejection.
329
- """
330
- if not self._pending_plan:
331
- return "No pending plan to reject."
332
-
333
- plan_title = self._pending_plan.get("title", "Untitled")
334
- self._pending_plan = None # Clear pending plan (but stay in plan mode)
335
-
336
- rejection_message = f"""Your plan "{plan_title}" was REJECTED.
337
-
338
- {f"Feedback: {feedback}" if feedback else "Please revise the plan."}
339
-
340
- You are still in plan mode. Please address the feedback and submit a revised plan using exit_plan."""
341
-
342
- return self.run(rejection_message)
343
-
344
- def _format_files_to_modify(self, files: list[dict]) -> str:
345
- """Format files_to_modify list for display."""
346
- if not files:
347
- return "No files specified"
348
- lines = []
349
- for f in files:
350
- path = f.get("path", "unknown")
351
- lines_info = f.get("lines", "")
352
- changes = f.get("changes", "")
353
- lines.append(f"- {path} ({lines_info}): {changes}")
354
- return "\n".join(lines)
355
-
356
- def _run_loop(
357
- self,
358
- messages: list[dict],
359
- tools: list[dict],
360
- ) -> tuple[str, list[dict]]:
361
- """Run the agent loop until completion.
362
-
363
- Args:
364
- messages: Initial messages
365
- tools: Tool schemas
366
-
367
- Returns:
368
- Tuple of (final response text, conversation messages)
369
- """
370
- max_retries = 3
371
-
372
- for iteration in range(self.max_iterations):
373
- # When approaching max iterations, ask agent to wrap up
374
- if iteration == self.max_iterations - 2:
375
- messages.append({
376
- "role": "user",
377
- "content": "[SYSTEM: You are approaching your iteration limit. Please provide your findings and conclusions now, even if incomplete. Summarize what you've learned and any recommendations.]",
378
- })
379
-
380
- # Try API call with retry on context overflow
381
- retry_count = 0
382
- response = None
383
-
384
- while retry_count < max_retries:
385
- try:
386
- # Proactively compact context if approaching limit
387
- messages = self._maybe_compact_context(messages)
388
-
389
- response = self.provider.chat(
390
- messages=messages,
391
- system=self.system_prompt,
392
- tools=tools,
393
- thinking=self.enable_thinking,
394
- )
395
- break # Success
396
-
397
- except Exception as exc:
398
- if is_context_overflow_error(exc):
399
- retry_count += 1
400
- log.warning(
401
- "Context overflow on attempt {}/{}, reducing context...",
402
- retry_count,
403
- max_retries,
404
- )
405
-
406
- if retry_count >= max_retries:
407
- raise ContextLengthError(
408
- f"Failed to reduce context after {max_retries} attempts: {exc}",
409
- )
410
-
411
- # Reduce context by removing old messages
412
- messages = reduce_context_for_retry(
413
- messages,
414
- keep_recent=max(2, 6 - retry_count * 2), # Fewer messages each retry
415
- )
416
- else:
417
- raise # Re-raise non-context errors
418
-
419
- if response is None:
420
- raise RuntimeError("Failed to get response from provider")
421
-
422
- # Accumulate token usage
423
- self._total_input_tokens += response.input_tokens
424
- self._total_output_tokens += response.output_tokens
425
- self._total_thinking_tokens += getattr(response, "thinking_tokens", 0)
426
-
427
- # Emit thinking if present
428
- if response.thinking:
429
- self.emitter.emit_thinking(response.thinking)
430
-
431
- # Check for tool calls
432
- if response.tool_calls:
433
- # Don't emit thinking text when there are tool calls - it clutters the output
434
- # The thinking is still in the conversation history for context
435
-
436
- # Track if we need to pause for user input
437
- needs_user_input = False
438
-
439
- # Parse all tool call arguments first
440
- parsed_calls = []
441
- for tool_call in response.tool_calls:
442
- args = tool_call.arguments
443
- if isinstance(args, str):
444
- args = json.loads(args)
445
- parsed_calls.append((tool_call, args))
446
-
447
- # Execute tools in parallel if multiple calls
448
- if len(parsed_calls) > 1:
449
- results = self._execute_tools_parallel(parsed_calls)
450
- else:
451
- # Single tool - execute directly
452
- tool_call, args = parsed_calls[0]
453
- self.emitter.emit_tool_start(tool_call.name, args)
454
- result = self.toolkit.execute(tool_call.name, **args)
455
- self.emitter.emit_tool_result(
456
- tool_call.name,
457
- result.success,
458
- self._summarize_result(result),
459
- )
460
- results = [(tool_call, args, result)]
461
-
462
- # Track if we need to rebuild toolkit for mode change
463
- mode_changed = False
464
-
465
- # Process results and build messages
466
- for tool_call, args, result in results:
467
- # Track tool for checkpoint metadata
468
- self._tools_used_this_run.add(tool_call.name)
469
- # Check if tool is asking a clarification question
470
- if (result.success and
471
- result.data and
472
- result.data.get("status") == "awaiting_response" and
473
- "question" in result.data):
474
- self.emitter.emit_clarification(
475
- question=result.data["question"],
476
- context="",
477
- options=result.data.get("options", []),
478
- )
479
- needs_user_input = True
480
-
481
- # Check if agent entered plan mode
482
- if (result.success and
483
- result.data and
484
- result.data.get("status") == "entered_plan_mode"):
485
- mode_changed = True
486
- # Rebuild toolkit with plan_mode=True
487
- self.toolkit = AgentToolkit(
488
- connection=self.toolkit.connection,
489
- repo_root=self.toolkit._repo_root,
490
- plan_mode=True,
491
- )
492
- self.toolkit.set_emitter(self.emitter)
493
- # Update system prompt with plan mode instructions
494
- from .prompts.plan_mode import PLAN_MODE_PROMPT
495
- self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
496
- # Update tools for LLM
497
- tools = self.toolkit.get_all_schemas()
498
-
499
- # Check if tool is submitting a plan for approval (exit_plan)
500
- if (result.success and
501
- result.data and
502
- result.data.get("status") == "plan_submitted"):
503
- # Store the pending plan
504
- self._pending_plan = {
505
- "title": result.data.get("title", ""),
506
- "summary": result.data.get("summary", ""),
507
- "files_to_modify": result.data.get("files_to_modify", []),
508
- "implementation_steps": result.data.get("implementation_steps", []),
509
- "risks": result.data.get("risks", []),
510
- "testing_strategy": result.data.get("testing_strategy", ""),
511
- }
512
- self.emitter.emit_plan_submitted(
513
- title=self._pending_plan["title"],
514
- summary=self._pending_plan["summary"],
515
- files_to_modify=self._pending_plan["files_to_modify"],
516
- implementation_steps=self._pending_plan["implementation_steps"],
517
- risks=self._pending_plan["risks"],
518
- testing_strategy=self._pending_plan["testing_strategy"],
519
- )
520
- # Pause and wait for approval (similar to clarification flow)
521
- needs_user_input = True
522
-
523
- # Add assistant message with tool call
524
- messages.append({
525
- "role": "assistant",
526
- "content": response.content or "",
527
- "tool_calls": [{
528
- "id": tool_call.id,
529
- "type": "function",
530
- "function": {
531
- "name": tool_call.name,
532
- "arguments": json.dumps(args),
533
- },
534
- }],
535
- })
536
-
537
- # Serialize and truncate tool result to prevent context overflow
538
- result_json = json.dumps(result.to_dict(), cls=SafeJSONEncoder)
539
- result_json = truncate_tool_output(result_json)
540
-
541
- # Check if todos changed and inject reminder
542
- if tool_call.name in ("write_todo", "update_todo_list"):
543
- new_snapshot = self._get_todo_snapshot()
544
- if new_snapshot != self._last_todo_snapshot:
545
- self._last_todo_snapshot = new_snapshot
546
- reminder = self._format_todo_reminder()
547
- if reminder:
548
- result_json += f"\n\n{reminder}"
549
-
550
- # Add tool result
551
- messages.append({
552
- "role": "tool",
553
- "tool_call_id": tool_call.id,
554
- "content": result_json,
555
- })
556
-
557
- # If a clarification question was asked, pause and wait for user input
558
- if needs_user_input:
559
- log.debug("Pausing agent loop - waiting for user input")
560
- return "", messages
561
-
562
- else:
563
- # No tool calls - check if response was truncated
564
- if response.stop_reason in ("max_tokens", "length"):
565
- # Response was truncated, request continuation
566
- log.debug("Response truncated ({}), requesting continuation", response.stop_reason)
567
- if response.content:
568
- messages.append({
569
- "role": "assistant",
570
- "content": response.content,
571
- })
572
- messages.append({
573
- "role": "user",
574
- "content": "Your response was cut off. Please continue.",
575
- })
576
- continue
577
-
578
- # Agent is done - emit final response
579
- if response.content:
580
- self.emitter.emit_message_start()
581
- self.emitter.emit_message_delta(response.content)
582
- self.emitter.emit_message_end()
583
- # Add final assistant message to history
584
- messages.append({
585
- "role": "assistant",
586
- "content": response.content,
587
- })
588
-
589
- # Emit final context frame summary
590
- self._emit_context_frame(messages)
591
-
592
- return response.content or "", messages
593
-
594
- # Hit max iterations - try one final request without tools to force a response
595
- try:
596
- final_response = self.provider.chat(
597
- messages=messages + [{
598
- "role": "user",
599
- "content": "[SYSTEM: Maximum iterations reached. Provide your final response now with whatever information you have gathered. Do not use any tools.]",
600
- }],
601
- system=self.system_prompt,
602
- tools=None, # No tools - force text response
603
- thinking=self.enable_thinking,
604
- )
605
- # Emit thinking if present
606
- if final_response.thinking:
607
- self.emitter.emit_thinking(final_response.thinking)
608
- if final_response.content:
609
- self.emitter.emit_message_start()
610
- self.emitter.emit_message_delta(final_response.content)
611
- self.emitter.emit_message_end()
612
- self._emit_context_frame(messages)
613
- return final_response.content, messages
614
- except Exception as e:
615
- log.warning(f"Failed to get final response: {e}")
616
-
617
- # Fallback message if final response fails
618
- final_message = "Reached maximum iterations. The agent was unable to complete the task within the allowed iterations."
619
- self.emitter.emit_message_start()
620
- self.emitter.emit_message_delta(final_message)
621
- self.emitter.emit_message_end()
622
- self._emit_context_frame(messages)
623
- return final_message, messages
624
-
625
- def _summarize_result(self, result: Any) -> str:
626
- """Create a brief summary of a tool result."""
627
- if not result.success:
628
- return f"Error: {result.error}"
629
-
630
- if not result.data:
631
- return "Empty result"
632
-
633
- data = result.data
634
-
635
- if "results" in data:
636
- return f"{len(data['results'])} results"
637
- elif "root_node" in data:
638
- node = data["root_node"]
639
- name = node.get("qualified_name") or node.get("file_path", "unknown")
640
- return f"Expanded: {name}"
641
- elif "callers" in data:
642
- return f"{len(data['callers'])} callers"
643
- elif "callees" in data:
644
- return f"{len(data['callees'])} callees"
645
-
646
- return "Completed"
647
-
648
- def _emit_context_frame(self, messages: list[dict] | None = None) -> None:
649
- """Emit a context frame event with current exploration state.
650
-
651
- Args:
652
- messages: Current conversation messages to estimate context size
653
- """
654
- # Get exploration steps from toolkit session
655
- steps = self.toolkit.get_exploration_steps()
656
-
657
- # Estimate current context window tokens and get breakdown
658
- context_tokens = 0
659
- context_breakdown = {}
660
- largest_messages = []
661
- if messages:
662
- context_tokens = self._estimate_context_tokens(messages)
663
- context_breakdown, largest_messages = self._get_context_breakdown(messages)
664
-
665
- # Summarize exploration by tool
666
- tool_counts: dict[str, int] = {}
667
- entities_found = 0
668
- step_details: list[dict] = []
669
-
670
- for step in steps:
671
- tool_name = getattr(step, 'tool', 'unknown')
672
- tool_counts[tool_name] = tool_counts.get(tool_name, 0) + 1
673
-
674
- # Count entities from the step
675
- step_entities = getattr(step, 'entities_found', [])
676
- entities_found += len(step_entities)
677
-
678
- # Collect step details
679
- params = getattr(step, 'params', {})
680
- summary = getattr(step, 'result_summary', '')
681
-
682
- # Extract meaningful info based on tool type
683
- detail = {
684
- "tool": tool_name,
685
- "summary": summary,
686
- }
687
-
688
- # Add relevant params based on tool
689
- if tool_name == 'read_file' and 'file_path' in params:
690
- detail["file"] = params['file_path']
691
- elif tool_name == 'read_file' and 'path' in params:
692
- detail["file"] = params['path']
693
- elif tool_name in ('grep', 'semantic_search') and 'query' in params:
694
- detail["query"] = params['query']
695
- elif tool_name == 'glob' and 'pattern' in params:
696
- detail["pattern"] = params['pattern']
697
- elif tool_name == 'list_files' and 'path' in params:
698
- detail["path"] = params['path']
699
-
700
- # Add content preview if available
701
- content_preview = getattr(step, 'content_preview', None)
702
- if content_preview:
703
- detail["content_preview"] = content_preview
704
-
705
- # Add token count if available
706
- token_count = getattr(step, 'token_count', 0)
707
- if token_count > 0:
708
- detail["tokens"] = token_count
709
-
710
- # Add entities if any
711
- if step_entities:
712
- detail["entities"] = step_entities[:5] # Limit to 5
713
-
714
- step_details.append(detail)
715
-
716
- exploration_steps = [
717
- {"tool": tool, "count": count}
718
- for tool, count in tool_counts.items()
719
- ]
720
-
721
- # Build context frame data
722
- adding = {
723
- "exploration_steps": exploration_steps,
724
- "entities_found": entities_found,
725
- "step_count": len(steps),
726
- "details": step_details[-20:], # Last 20 steps
727
- "input_tokens": self._total_input_tokens,
728
- "output_tokens": self._total_output_tokens,
729
- "context_tokens": context_tokens, # Current context window size
730
- "context_breakdown": context_breakdown, # Tokens by message type
731
- "largest_messages": largest_messages, # Top 5 biggest messages
732
- }
733
-
734
- # Get reranked context items
735
- reading = self._get_reranked_context()
736
-
737
- # Emit the context frame
738
- self.emitter.emit_context_frame(adding=adding, reading=reading)
739
-
740
- def _estimate_context_tokens(self, messages: list[dict]) -> int:
741
- """Estimate the current context window size in tokens.
742
-
743
- Args:
744
- messages: Conversation messages
745
-
746
- Returns:
747
- Estimated token count for the context
748
- """
749
- total_chars = 0
750
-
751
- # Count characters in all messages
752
- for msg in messages:
753
- content = msg.get("content", "")
754
- if isinstance(content, str):
755
- total_chars += len(content)
756
- elif isinstance(content, list):
757
- # Handle multi-part messages (e.g., with images)
758
- for part in content:
759
- if isinstance(part, dict) and "text" in part:
760
- total_chars += len(part["text"])
761
-
762
- # Add role overhead (~4 tokens per message for role/structure)
763
- total_chars += 16
764
-
765
- # Also count system prompt
766
- if self.system_prompt:
767
- total_chars += len(self.system_prompt)
768
-
769
- # Estimate: ~4 characters per token
770
- return total_chars // 4
771
-
772
- def _get_context_breakdown(self, messages: list[dict]) -> tuple[dict, list[dict]]:
773
- """Get breakdown of context usage by message type.
774
-
775
- Args:
776
- messages: Conversation messages
777
-
778
- Returns:
779
- Tuple of (breakdown dict, list of largest messages)
780
- """
781
- breakdown = {
782
- "system_prompt": len(self.system_prompt) // 4 if self.system_prompt else 0,
783
- "user": 0,
784
- "assistant": 0,
785
- "tool_results": 0,
786
- }
787
-
788
- # Track individual message sizes for finding largest
789
- message_sizes = []
790
-
791
- for i, msg in enumerate(messages):
792
- role = msg.get("role", "unknown")
793
- content = msg.get("content", "")
794
-
795
- # Calculate content size
796
- if isinstance(content, str):
797
- size = len(content)
798
- elif isinstance(content, list):
799
- size = sum(len(p.get("text", "")) for p in content if isinstance(p, dict))
800
- else:
801
- size = 0
802
-
803
- tokens = size // 4
804
-
805
- # Categorize
806
- if role == "user":
807
- breakdown["user"] += tokens
808
- elif role == "assistant":
809
- breakdown["assistant"] += tokens
810
- elif role == "tool":
811
- breakdown["tool_results"] += tokens
812
-
813
- # Track for largest messages
814
- if tokens > 100: # Only track substantial messages
815
- # Try to get a label for this message
816
- label = f"{role}[{i}]"
817
- if role == "tool":
818
- tool_call_id = msg.get("tool_call_id", "")
819
- # Try to find the tool name from previous assistant message
820
- for prev_msg in reversed(messages[:i]):
821
- if prev_msg.get("role") == "assistant" and "tool_calls" in prev_msg:
822
- for tc in prev_msg.get("tool_calls", []):
823
- if tc.get("id") == tool_call_id:
824
- label = tc.get("function", {}).get("name", "tool")
825
- break
826
- break
827
-
828
- message_sizes.append({
829
- "index": i,
830
- "role": role,
831
- "label": label,
832
- "tokens": tokens,
833
- "preview": content[:100] if isinstance(content, str) else str(content)[:100],
834
- })
835
-
836
- # Sort by size and get top 5
837
- message_sizes.sort(key=lambda x: x["tokens"], reverse=True)
838
- largest = message_sizes[:5]
839
-
840
- return breakdown, largest
841
-
842
- def _maybe_compact_context(
843
- self,
844
- messages: list[dict],
845
- threshold: float = 0.8,
846
- ) -> list[dict]:
847
- """Proactively compact context if approaching limit.
848
-
849
- Args:
850
- messages: Current conversation messages
851
- threshold: Trigger compaction at this % of context limit (default 80%)
852
-
853
- Returns:
854
- Original or compacted messages
855
- """
856
- context_tokens = self._estimate_context_tokens(messages)
857
- context_limit = self.provider.get_context_limit()
858
-
859
- # Check if we need to compact
860
- if context_tokens < context_limit * threshold:
861
- return messages # No compaction needed
862
-
863
- log.info(
864
- f"Context at {context_tokens:,}/{context_limit:,} tokens "
865
- f"({context_tokens/context_limit:.0%}), compacting..."
866
- )
867
-
868
- return self._compact_messages_with_llm(
869
- messages, target_tokens=int(context_limit * 0.5)
870
- )
871
-
872
- def _compact_messages_with_llm(
873
- self,
874
- messages: list[dict],
875
- target_tokens: int,
876
- ) -> list[dict]:
877
- """Use fast LLM to summarize middle messages.
878
-
879
- Preserves:
880
- - First message (original user request)
881
- - Last 4 messages (recent context)
882
- - Summarizes everything in between
883
-
884
- Args:
885
- messages: Current conversation messages
886
- target_tokens: Target token count after compaction
887
-
888
- Returns:
889
- Compacted messages list
890
- """
891
- from .subagent import get_model_for_tier
892
- from .providers import get_provider
893
-
894
- if len(messages) <= 5:
895
- return messages # Too few to compact
896
-
897
- # Split messages
898
- first_msg = messages[0]
899
- recent_msgs = messages[-4:]
900
- middle_msgs = messages[1:-4]
901
-
902
- if not middle_msgs:
903
- return messages
904
-
905
- # Build summary prompt
906
- middle_content = self._format_messages_for_summary(middle_msgs)
907
-
908
- prompt = f"""Summarize this conversation history concisely.
909
-
910
- PRESERVE (include verbatim if present):
911
- - Code snippets and file paths
912
- - Error messages
913
- - Key decisions made
914
- - Important tool results (file contents, search results)
915
-
916
- CONDENSE:
917
- - Repetitive searches
918
- - Verbose tool outputs
919
- - Intermediate reasoning
920
-
921
- CONVERSATION HISTORY:
922
- {middle_content}
923
-
924
- OUTPUT FORMAT:
925
- Provide a concise summary (max 2000 tokens) that captures the essential context needed to continue this task."""
926
-
927
- # Use fast model for summarization
928
- fast_model = get_model_for_tier("fast")
929
- fast_provider = get_provider(fast_model)
930
-
931
- try:
932
- self.emitter.emit_thinking("Compacting context with fast model...")
933
-
934
- response = fast_provider.chat(
935
- messages=[{"role": "user", "content": prompt}],
936
- system="You are a context summarizer. Be concise but preserve code and technical details.",
937
- )
938
-
939
- summary = response.content or ""
940
-
941
- log.info(
942
- f"Compacted {len(middle_msgs)} messages into summary "
943
- f"({len(summary)} chars)"
944
- )
945
-
946
- # Build compacted messages
947
- return [
948
- first_msg,
949
- {
950
- "role": "assistant",
951
- "content": f"[Context Summary]\n{summary}\n[End Summary]",
952
- },
953
- *recent_msgs,
954
- ]
955
- except Exception as e:
956
- log.warning(f"LLM compaction failed: {e}, falling back to truncation")
957
- return [first_msg] + recent_msgs
958
-
959
- def _format_messages_for_summary(self, messages: list[dict]) -> str:
960
- """Format messages for summarization prompt.
961
-
962
- Args:
963
- messages: Messages to format
964
-
965
- Returns:
966
- Formatted string for summarization
967
- """
968
- parts = []
969
- for msg in messages:
970
- role = msg.get("role", "unknown")
971
- content = msg.get("content", "")
972
-
973
- # Handle tool calls in assistant messages
974
- if role == "assistant" and "tool_calls" in msg:
975
- tool_calls = msg.get("tool_calls", [])
976
- tool_info = [
977
- f"Called: {tc.get('function', {}).get('name', 'unknown')}"
978
- for tc in tool_calls
979
- ]
980
- content = f"{content}\n[Tools: {', '.join(tool_info)}]" if content else f"[Tools: {', '.join(tool_info)}]"
981
-
982
- # Truncate very long content
983
- if len(content) > 4000:
984
- content = content[:4000] + "\n[...truncated...]"
985
-
986
- parts.append(f"[{role.upper()}]\n{content}")
987
-
988
- return "\n\n---\n\n".join(parts)
989
-
990
- def _get_reranked_context(self) -> dict:
991
- """Get reranked context items based on the current query.
992
-
993
- Returns:
994
- Dict with item_count and items list
995
- """
996
- try:
997
- from ..context.service import ContextService
998
- from ..context.reranker import rerank_context_items
999
-
1000
- # Get exploration steps for context extraction
1001
- steps = self.toolkit.get_exploration_steps()
1002
- if not steps:
1003
- return {"item_count": 0, "items": []}
1004
-
1005
- # Use context service to extract context items from exploration
1006
- service = ContextService(connection=self.toolkit.connection)
1007
- terminal_id = service.get_terminal_id()
1008
-
1009
- # Update context with exploration steps
1010
- service.update_context(
1011
- terminal_id=terminal_id,
1012
- exploration_steps=steps,
1013
- )
1014
-
1015
- # Get context items
1016
- items = service.get_context_items(terminal_id)
1017
- if not items:
1018
- return {"item_count": 0, "items": []}
1019
-
1020
- # Rerank by query relevance
1021
- if self._current_query:
1022
- items = rerank_context_items(
1023
- items,
1024
- self._current_query,
1025
- top_k=20,
1026
- )
1027
-
1028
- # Convert to serializable format
1029
- result_items = []
1030
- for item in items[:20]: # Limit to 20 items
1031
- result_items.append({
1032
- "name": item.qualified_name,
1033
- "type": item.entity_type,
1034
- "file": item.file_path,
1035
- "score": round(item.score, 3) if hasattr(item, 'score') else None,
1036
- })
1037
-
1038
- return {
1039
- "item_count": len(result_items),
1040
- "items": result_items,
1041
- }
1042
-
1043
- except Exception as e:
1044
- log.debug(f"Failed to get reranked context: {e}")
1045
- return {"item_count": 0, "items": []}
1046
-
1047
- def chat(self, message: str, images: Optional[list] = None) -> str:
1048
- """Continue a conversation with a new message.
1049
-
1050
- This method maintains conversation history for multi-turn interactions.
1051
- Call run() first to start a conversation, then chat() for follow-ups.
1052
-
1053
- Args:
1054
- message: User's follow-up message
1055
- images: Optional list of images to include
1056
-
1057
- Returns:
1058
- Agent's response
1059
- """
1060
- if not self._messages:
1061
- # No history, just run fresh
1062
- return self.run(message, images=images)
1063
-
1064
- # Store query for reranking context frame
1065
- self._current_query = message
1066
-
1067
- # Add new user message to history
1068
- self._messages.append({
1069
- "role": "user",
1070
- "content": message,
1071
- })
1072
-
1073
- # Get tool schemas
1074
- tools = self.toolkit.get_all_schemas()
1075
-
1076
- try:
1077
- response, final_messages = self._run_loop(self._messages, tools)
1078
- # Update conversation history
1079
- self._messages = final_messages
1080
- self.emitter.emit_end(success=True)
1081
- # Create checkpoint if manager is configured
1082
- self._create_checkpoint()
1083
- return response
1084
-
1085
- except Exception as e:
1086
- log.exception("Agent chat failed")
1087
- self.emitter.emit_error(str(e))
1088
- return f"Error: {str(e)}"
1089
-
1090
- def _create_checkpoint(self) -> None:
1091
- """Create a git checkpoint after successful run.
1092
-
1093
- Only creates a checkpoint if:
1094
- - A checkpoint manager is configured
1095
- - There are file changes to commit
1096
- """
1097
- if not self._checkpoint_manager:
1098
- return
1099
-
1100
- try:
1101
- self._checkpoint_manager.create_checkpoint(
1102
- messages=self._messages,
1103
- model=self.model,
1104
- system_prompt=self.system_prompt,
1105
- tools_used=list(self._tools_used_this_run),
1106
- token_usage={
1107
- "input": self._total_input_tokens,
1108
- "output": self._total_output_tokens,
1109
- "thinking": self._total_thinking_tokens,
1110
- },
1111
- )
1112
- except Exception as e:
1113
- log.warning(f"Failed to create checkpoint: {e}")
1114
- finally:
1115
- # Clear tools for next run
1116
- self._tools_used_this_run.clear()
1117
-
1118
- def reset(self) -> None:
1119
- """Reset the agent state."""
1120
- self.toolkit.reset_session()
1121
- self._total_input_tokens = 0
1122
- self._total_output_tokens = 0
1123
- self._current_query = ""