emdash-core 0.1.7__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. emdash_core/__init__.py +6 -1
  2. emdash_core/agent/events.py +29 -0
  3. emdash_core/agent/prompts/__init__.py +5 -0
  4. emdash_core/agent/prompts/main_agent.py +22 -2
  5. emdash_core/agent/prompts/plan_mode.py +126 -0
  6. emdash_core/agent/prompts/subagents.py +11 -7
  7. emdash_core/agent/prompts/workflow.py +138 -43
  8. emdash_core/agent/providers/base.py +4 -0
  9. emdash_core/agent/providers/models.py +7 -0
  10. emdash_core/agent/providers/openai_provider.py +74 -2
  11. emdash_core/agent/runner.py +556 -34
  12. emdash_core/agent/skills.py +319 -0
  13. emdash_core/agent/toolkit.py +48 -0
  14. emdash_core/agent/tools/__init__.py +3 -2
  15. emdash_core/agent/tools/modes.py +197 -53
  16. emdash_core/agent/tools/search.py +4 -0
  17. emdash_core/agent/tools/skill.py +193 -0
  18. emdash_core/agent/tools/spec.py +61 -94
  19. emdash_core/agent/tools/tasks.py +15 -78
  20. emdash_core/api/agent.py +7 -7
  21. emdash_core/api/index.py +1 -1
  22. emdash_core/api/projectmd.py +4 -2
  23. emdash_core/api/router.py +2 -0
  24. emdash_core/api/skills.py +241 -0
  25. emdash_core/checkpoint/__init__.py +40 -0
  26. emdash_core/checkpoint/cli.py +175 -0
  27. emdash_core/checkpoint/git_operations.py +250 -0
  28. emdash_core/checkpoint/manager.py +231 -0
  29. emdash_core/checkpoint/models.py +107 -0
  30. emdash_core/checkpoint/storage.py +201 -0
  31. emdash_core/config.py +1 -1
  32. emdash_core/core/config.py +18 -2
  33. emdash_core/graph/schema.py +5 -5
  34. emdash_core/ingestion/orchestrator.py +19 -10
  35. emdash_core/models/agent.py +1 -1
  36. emdash_core/server.py +42 -0
  37. emdash_core/sse/stream.py +1 -0
  38. {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/METADATA +1 -2
  39. {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/RECORD +41 -31
  40. {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/entry_points.txt +1 -0
  41. {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/WHEEL +0 -0
@@ -1,6 +1,8 @@
1
1
  """Agent runner for LLM-powered exploration."""
2
2
 
3
3
  import json
4
+ import os
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
6
  from datetime import datetime, date
5
7
  from typing import Any, Optional
6
8
 
@@ -18,6 +20,7 @@ from .context_manager import (
18
20
  )
19
21
  from .prompts import BASE_SYSTEM_PROMPT, build_system_prompt
20
22
  from .tools.tasks import TaskState
23
+ from ..checkpoint import CheckpointManager
21
24
 
22
25
 
23
26
  class SafeJSONEncoder(json.JSONEncoder):
@@ -66,9 +69,11 @@ class AgentRunner:
66
69
  model: str = DEFAULT_MODEL,
67
70
  system_prompt: Optional[str] = None,
68
71
  emitter: Optional[AgentEventEmitter] = None,
69
- max_iterations: int = 50,
72
+ max_iterations: int = int(os.getenv("EMDASH_MAX_ITERATIONS", "100")),
70
73
  verbose: bool = False,
71
74
  show_tool_results: bool = False,
75
+ enable_thinking: Optional[bool] = None,
76
+ checkpoint_manager: Optional[CheckpointManager] = None,
72
77
  ):
73
78
  """Initialize the agent runner.
74
79
 
@@ -80,26 +85,51 @@ class AgentRunner:
80
85
  max_iterations: Maximum tool call iterations.
81
86
  verbose: Whether to print verbose output.
82
87
  show_tool_results: Whether to show detailed tool results.
88
+ enable_thinking: Enable extended thinking. If None, auto-detect from model.
89
+ checkpoint_manager: Optional checkpoint manager for git-based checkpoints.
83
90
  """
84
91
  self.toolkit = toolkit or AgentToolkit()
85
92
  self.provider = get_provider(model)
86
93
  self.model = model
87
- self.system_prompt = system_prompt or build_system_prompt(self.toolkit)
94
+ # Build system prompt, prepending plan mode prompt if in plan mode
95
+ if system_prompt:
96
+ self.system_prompt = system_prompt
97
+ elif self.toolkit.plan_mode:
98
+ from .prompts.plan_mode import PLAN_MODE_PROMPT
99
+ self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
100
+ else:
101
+ self.system_prompt = build_system_prompt(self.toolkit)
88
102
  self.emitter = emitter or NullEmitter()
89
103
  # Inject emitter into tools that need it (e.g., TaskTool for sub-agent streaming)
90
104
  self.toolkit.set_emitter(self.emitter)
91
105
  self.max_iterations = max_iterations
92
106
  self.verbose = verbose
93
107
  self.show_tool_results = show_tool_results
108
+ # Extended thinking support
109
+ if enable_thinking is None:
110
+ # Auto-detect from provider capabilities
111
+ self.enable_thinking = (
112
+ hasattr(self.provider, "supports_thinking")
113
+ and self.provider.supports_thinking()
114
+ )
115
+ else:
116
+ self.enable_thinking = enable_thinking
94
117
  # Conversation history for multi-turn support
95
118
  self._messages: list[dict] = []
96
119
  # Token usage tracking
97
120
  self._total_input_tokens: int = 0
98
121
  self._total_output_tokens: int = 0
122
+ self._total_thinking_tokens: int = 0
99
123
  # Store query for reranking
100
124
  self._current_query: str = ""
101
125
  # Todo state tracking for injection
102
126
  self._last_todo_snapshot: str = ""
127
+ # Checkpoint manager for git-based checkpoints
128
+ self._checkpoint_manager = checkpoint_manager
129
+ # Track tools used during current run (for checkpoint metadata)
130
+ self._tools_used_this_run: set[str] = set()
131
+ # Plan approval state
132
+ self._pending_plan: Optional[dict] = None # Stores submitted plan awaiting approval
103
133
 
104
134
  def _get_todo_snapshot(self) -> str:
105
135
  """Get current todo state as string for comparison."""
@@ -125,6 +155,48 @@ class AgentRunner:
125
155
  task_list = "\n".join(lines)
126
156
  return f"<todo-state>\n{header}\n{task_list}\n</todo-state>"
127
157
 
158
+ def _execute_tools_parallel(self, parsed_calls: list) -> list:
159
+ """Execute multiple tool calls in parallel using a thread pool.
160
+
161
+ Args:
162
+ parsed_calls: List of (tool_call, args) tuples
163
+
164
+ Returns:
165
+ List of (tool_call, args, result) tuples in original order
166
+ """
167
+ # Emit tool start events for all calls
168
+ for tool_call, args in parsed_calls:
169
+ self.emitter.emit_tool_start(tool_call.name, args)
170
+
171
+ def execute_one(item):
172
+ tool_call, args = item
173
+ try:
174
+ result = self.toolkit.execute(tool_call.name, **args)
175
+ return (tool_call, args, result)
176
+ except Exception as e:
177
+ log.exception(f"Tool {tool_call.name} failed")
178
+ from .tools.base import ToolResult
179
+ return (tool_call, args, ToolResult.error_result(str(e)))
180
+
181
+ # Execute in parallel with up to 3 workers
182
+ results: list = [None] * len(parsed_calls)
183
+ with ThreadPoolExecutor(max_workers=3) as executor:
184
+ futures = {executor.submit(execute_one, item): i for i, item in enumerate(parsed_calls)}
185
+ # Collect results maintaining order
186
+ for future in as_completed(futures):
187
+ idx = futures[future]
188
+ results[idx] = future.result()
189
+
190
+ # Emit tool result events for all calls
191
+ for tool_call, args, result in results:
192
+ self.emitter.emit_tool_result(
193
+ tool_call.name,
194
+ result.success,
195
+ self._summarize_result(result),
196
+ )
197
+
198
+ return results
199
+
128
200
  def run(
129
201
  self,
130
202
  query: str,
@@ -143,21 +215,22 @@ class AgentRunner:
143
215
  """
144
216
  # Store query for reranking context frame
145
217
  self._current_query = query
146
- self.emitter.emit_start(goal=query)
147
-
148
- # Build messages
149
- messages = []
150
218
 
219
+ # Build user message
151
220
  if context:
152
- messages.append({
221
+ user_message = {
153
222
  "role": "user",
154
223
  "content": f"Context:\n{context}\n\nQuestion: {query}",
155
- })
224
+ }
156
225
  else:
157
- messages.append({
226
+ user_message = {
158
227
  "role": "user",
159
228
  "content": query,
160
- })
229
+ }
230
+
231
+ # Save user message to history BEFORE running (so it's preserved even if interrupted)
232
+ self._messages.append(user_message)
233
+ messages = list(self._messages) # Copy for the loop
161
234
 
162
235
  # TODO: Handle images if provided
163
236
 
@@ -166,16 +239,120 @@ class AgentRunner:
166
239
 
167
240
  try:
168
241
  response, final_messages = self._run_loop(messages, tools)
169
- # Save conversation history for multi-turn support
242
+ # Update conversation history with full exchange
170
243
  self._messages = final_messages
171
244
  self.emitter.emit_end(success=True)
245
+ # Create checkpoint if manager is configured
246
+ self._create_checkpoint()
172
247
  return response
173
248
 
174
249
  except Exception as e:
175
250
  log.exception("Agent run failed")
176
251
  self.emitter.emit_error(str(e))
252
+ # Keep user message in history even on error (already appended above)
177
253
  return f"Error: {str(e)}"
178
254
 
255
+ def has_pending_plan(self) -> bool:
256
+ """Check if there's a plan awaiting approval.
257
+
258
+ Returns:
259
+ True if a plan has been submitted and is awaiting approval.
260
+ """
261
+ return self._pending_plan is not None
262
+
263
+ def get_pending_plan(self) -> Optional[dict]:
264
+ """Get the pending plan if one exists.
265
+
266
+ Returns:
267
+ The pending plan dict, or None if no plan is pending.
268
+ """
269
+ return self._pending_plan
270
+
271
+ def approve_plan(self) -> str:
272
+ """Approve the pending plan and transition back to code mode.
273
+
274
+ This method should be called after the user approves a submitted plan.
275
+ It transitions the agent from plan mode back to code mode, allowing
276
+ it to implement the approved plan.
277
+
278
+ Returns:
279
+ The agent's response after transitioning to code mode.
280
+ """
281
+ if not self._pending_plan:
282
+ return "No pending plan to approve."
283
+
284
+ plan = self._pending_plan
285
+ self._pending_plan = None # Clear pending plan
286
+
287
+ # Reset ModeState singleton to code mode
288
+ from .tools.modes import ModeState, AgentMode
289
+ state = ModeState.get_instance()
290
+ state.current_mode = AgentMode.CODE
291
+ state.plan_content = plan.get("summary", "")
292
+
293
+ # Rebuild toolkit with plan_mode=False (code mode)
294
+ self.toolkit = AgentToolkit(
295
+ connection=self.toolkit.connection,
296
+ repo_root=self.toolkit._repo_root,
297
+ plan_mode=False,
298
+ )
299
+ self.toolkit.set_emitter(self.emitter)
300
+
301
+ # Update system prompt back to code mode
302
+ self.system_prompt = build_system_prompt(self.toolkit)
303
+
304
+ # Resume execution with approval message
305
+ approval_message = f"""Your plan "{plan.get('title', 'Untitled')}" has been APPROVED.
306
+
307
+ You are now in code mode. Please implement the plan:
308
+
309
+ ## Summary
310
+ {plan.get('summary', '')}
311
+
312
+ ## Files to Modify
313
+ {self._format_files_to_modify(plan.get('files_to_modify', []))}
314
+
315
+ Proceed with implementation using the available tools (write_to_file, apply_diff, execute_command, etc.)."""
316
+
317
+ return self.run(approval_message)
318
+
319
+ def reject_plan(self, feedback: str = "") -> str:
320
+ """Reject the pending plan and provide feedback.
321
+
322
+ The agent remains in plan mode to revise the plan based on feedback.
323
+
324
+ Args:
325
+ feedback: Optional feedback explaining why the plan was rejected.
326
+
327
+ Returns:
328
+ The agent's response after receiving the rejection.
329
+ """
330
+ if not self._pending_plan:
331
+ return "No pending plan to reject."
332
+
333
+ plan_title = self._pending_plan.get("title", "Untitled")
334
+ self._pending_plan = None # Clear pending plan (but stay in plan mode)
335
+
336
+ rejection_message = f"""Your plan "{plan_title}" was REJECTED.
337
+
338
+ {f"Feedback: {feedback}" if feedback else "Please revise the plan."}
339
+
340
+ You are still in plan mode. Please address the feedback and submit a revised plan using exit_plan."""
341
+
342
+ return self.run(rejection_message)
343
+
344
+ def _format_files_to_modify(self, files: list[dict]) -> str:
345
+ """Format files_to_modify list for display."""
346
+ if not files:
347
+ return "No files specified"
348
+ lines = []
349
+ for f in files:
350
+ path = f.get("path", "unknown")
351
+ lines_info = f.get("lines", "")
352
+ changes = f.get("changes", "")
353
+ lines.append(f"- {path} ({lines_info}): {changes}")
354
+ return "\n".join(lines)
355
+
179
356
  def _run_loop(
180
357
  self,
181
358
  messages: list[dict],
@@ -193,16 +370,27 @@ class AgentRunner:
193
370
  max_retries = 3
194
371
 
195
372
  for iteration in range(self.max_iterations):
373
+ # When approaching max iterations, ask agent to wrap up
374
+ if iteration == self.max_iterations - 2:
375
+ messages.append({
376
+ "role": "user",
377
+ "content": "[SYSTEM: You are approaching your iteration limit. Please provide your findings and conclusions now, even if incomplete. Summarize what you've learned and any recommendations.]",
378
+ })
379
+
196
380
  # Try API call with retry on context overflow
197
381
  retry_count = 0
198
382
  response = None
199
383
 
200
384
  while retry_count < max_retries:
201
385
  try:
386
+ # Proactively compact context if approaching limit
387
+ messages = self._maybe_compact_context(messages)
388
+
202
389
  response = self.provider.chat(
203
390
  messages=messages,
204
391
  system=self.system_prompt,
205
392
  tools=tools,
393
+ thinking=self.enable_thinking,
206
394
  )
207
395
  break # Success
208
396
 
@@ -234,38 +422,50 @@ class AgentRunner:
234
422
  # Accumulate token usage
235
423
  self._total_input_tokens += response.input_tokens
236
424
  self._total_output_tokens += response.output_tokens
425
+ self._total_thinking_tokens += getattr(response, "thinking_tokens", 0)
426
+
427
+ # Emit thinking if present
428
+ if response.thinking:
429
+ self.emitter.emit_thinking(response.thinking)
237
430
 
238
431
  # Check for tool calls
239
432
  if response.tool_calls:
240
- # Emit any content that accompanies tool calls (e.g., thinking text)
241
- if response.content:
242
- self.emitter.emit_message_start()
243
- self.emitter.emit_message_delta(response.content)
244
- self.emitter.emit_message_end()
433
+ # Don't emit thinking text when there are tool calls - it clutters the output
434
+ # The thinking is still in the conversation history for context
435
+
436
+ # Track if we need to pause for user input
437
+ needs_user_input = False
245
438
 
246
- # Execute tools and add results
439
+ # Parse all tool call arguments first
440
+ parsed_calls = []
247
441
  for tool_call in response.tool_calls:
248
- # Parse arguments if they're a JSON string
249
442
  args = tool_call.arguments
250
443
  if isinstance(args, str):
251
444
  args = json.loads(args)
252
-
253
- self.emitter.emit_tool_start(
254
- tool_call.name,
255
- args,
256
- )
257
-
258
- result = self.toolkit.execute(
259
- tool_call.name,
260
- **args,
261
- )
262
-
445
+ parsed_calls.append((tool_call, args))
446
+
447
+ # Execute tools in parallel if multiple calls
448
+ if len(parsed_calls) > 1:
449
+ results = self._execute_tools_parallel(parsed_calls)
450
+ else:
451
+ # Single tool - execute directly
452
+ tool_call, args = parsed_calls[0]
453
+ self.emitter.emit_tool_start(tool_call.name, args)
454
+ result = self.toolkit.execute(tool_call.name, **args)
263
455
  self.emitter.emit_tool_result(
264
456
  tool_call.name,
265
457
  result.success,
266
458
  self._summarize_result(result),
267
459
  )
460
+ results = [(tool_call, args, result)]
461
+
462
+ # Track if we need to rebuild toolkit for mode change
463
+ mode_changed = False
268
464
 
465
+ # Process results and build messages
466
+ for tool_call, args, result in results:
467
+ # Track tool for checkpoint metadata
468
+ self._tools_used_this_run.add(tool_call.name)
269
469
  # Check if tool is asking a clarification question
270
470
  if (result.success and
271
471
  result.data and
@@ -276,6 +476,49 @@ class AgentRunner:
276
476
  context="",
277
477
  options=result.data.get("options", []),
278
478
  )
479
+ needs_user_input = True
480
+
481
+ # Check if agent entered plan mode
482
+ if (result.success and
483
+ result.data and
484
+ result.data.get("status") == "entered_plan_mode"):
485
+ mode_changed = True
486
+ # Rebuild toolkit with plan_mode=True
487
+ self.toolkit = AgentToolkit(
488
+ connection=self.toolkit.connection,
489
+ repo_root=self.toolkit._repo_root,
490
+ plan_mode=True,
491
+ )
492
+ self.toolkit.set_emitter(self.emitter)
493
+ # Update system prompt with plan mode instructions
494
+ from .prompts.plan_mode import PLAN_MODE_PROMPT
495
+ self.system_prompt = PLAN_MODE_PROMPT + "\n\n" + build_system_prompt(self.toolkit)
496
+ # Update tools for LLM
497
+ tools = self.toolkit.get_all_schemas()
498
+
499
+ # Check if tool is submitting a plan for approval (exit_plan)
500
+ if (result.success and
501
+ result.data and
502
+ result.data.get("status") == "plan_submitted"):
503
+ # Store the pending plan
504
+ self._pending_plan = {
505
+ "title": result.data.get("title", ""),
506
+ "summary": result.data.get("summary", ""),
507
+ "files_to_modify": result.data.get("files_to_modify", []),
508
+ "implementation_steps": result.data.get("implementation_steps", []),
509
+ "risks": result.data.get("risks", []),
510
+ "testing_strategy": result.data.get("testing_strategy", ""),
511
+ }
512
+ self.emitter.emit_plan_submitted(
513
+ title=self._pending_plan["title"],
514
+ summary=self._pending_plan["summary"],
515
+ files_to_modify=self._pending_plan["files_to_modify"],
516
+ implementation_steps=self._pending_plan["implementation_steps"],
517
+ risks=self._pending_plan["risks"],
518
+ testing_strategy=self._pending_plan["testing_strategy"],
519
+ )
520
+ # Pause and wait for approval (similar to clarification flow)
521
+ needs_user_input = True
279
522
 
280
523
  # Add assistant message with tool call
281
524
  messages.append({
@@ -311,6 +554,10 @@ class AgentRunner:
311
554
  "content": result_json,
312
555
  })
313
556
 
557
+ # If a clarification question was asked, pause and wait for user input
558
+ if needs_user_input:
559
+ log.debug("Pausing agent loop - waiting for user input")
560
+ return "", messages
314
561
 
315
562
  else:
316
563
  # No tool calls - check if response was truncated
@@ -344,8 +591,31 @@ class AgentRunner:
344
591
 
345
592
  return response.content or "", messages
346
593
 
347
- # Hit max iterations - emit final message and context frame
348
- final_message = "Reached maximum iterations without completing."
594
+ # Hit max iterations - try one final request without tools to force a response
595
+ try:
596
+ final_response = self.provider.chat(
597
+ messages=messages + [{
598
+ "role": "user",
599
+ "content": "[SYSTEM: Maximum iterations reached. Provide your final response now with whatever information you have gathered. Do not use any tools.]",
600
+ }],
601
+ system=self.system_prompt,
602
+ tools=None, # No tools - force text response
603
+ thinking=self.enable_thinking,
604
+ )
605
+ # Emit thinking if present
606
+ if final_response.thinking:
607
+ self.emitter.emit_thinking(final_response.thinking)
608
+ if final_response.content:
609
+ self.emitter.emit_message_start()
610
+ self.emitter.emit_message_delta(final_response.content)
611
+ self.emitter.emit_message_end()
612
+ self._emit_context_frame(messages)
613
+ return final_response.content, messages
614
+ except Exception as e:
615
+ log.warning(f"Failed to get final response: {e}")
616
+
617
+ # Fallback message if final response fails
618
+ final_message = "Reached maximum iterations. The agent was unable to complete the task within the allowed iterations."
349
619
  self.emitter.emit_message_start()
350
620
  self.emitter.emit_message_delta(final_message)
351
621
  self.emitter.emit_message_end()
@@ -375,7 +645,7 @@ class AgentRunner:
375
645
 
376
646
  return "Completed"
377
647
 
378
- def _emit_context_frame(self, messages: list[dict] = None) -> None:
648
+ def _emit_context_frame(self, messages: list[dict] | None = None) -> None:
379
649
  """Emit a context frame event with current exploration state.
380
650
 
381
651
  Args:
@@ -384,10 +654,13 @@ class AgentRunner:
384
654
  # Get exploration steps from toolkit session
385
655
  steps = self.toolkit.get_exploration_steps()
386
656
 
387
- # Estimate current context window tokens
657
+ # Estimate current context window tokens and get breakdown
388
658
  context_tokens = 0
659
+ context_breakdown = {}
660
+ largest_messages = []
389
661
  if messages:
390
662
  context_tokens = self._estimate_context_tokens(messages)
663
+ context_breakdown, largest_messages = self._get_context_breakdown(messages)
391
664
 
392
665
  # Summarize exploration by tool
393
666
  tool_counts: dict[str, int] = {}
@@ -454,6 +727,8 @@ class AgentRunner:
454
727
  "input_tokens": self._total_input_tokens,
455
728
  "output_tokens": self._total_output_tokens,
456
729
  "context_tokens": context_tokens, # Current context window size
730
+ "context_breakdown": context_breakdown, # Tokens by message type
731
+ "largest_messages": largest_messages, # Top 5 biggest messages
457
732
  }
458
733
 
459
734
  # Get reranked context items
@@ -494,6 +769,224 @@ class AgentRunner:
494
769
  # Estimate: ~4 characters per token
495
770
  return total_chars // 4
496
771
 
772
+ def _get_context_breakdown(self, messages: list[dict]) -> tuple[dict, list[dict]]:
773
+ """Get breakdown of context usage by message type.
774
+
775
+ Args:
776
+ messages: Conversation messages
777
+
778
+ Returns:
779
+ Tuple of (breakdown dict, list of largest messages)
780
+ """
781
+ breakdown = {
782
+ "system_prompt": len(self.system_prompt) // 4 if self.system_prompt else 0,
783
+ "user": 0,
784
+ "assistant": 0,
785
+ "tool_results": 0,
786
+ }
787
+
788
+ # Track individual message sizes for finding largest
789
+ message_sizes = []
790
+
791
+ for i, msg in enumerate(messages):
792
+ role = msg.get("role", "unknown")
793
+ content = msg.get("content", "")
794
+
795
+ # Calculate content size
796
+ if isinstance(content, str):
797
+ size = len(content)
798
+ elif isinstance(content, list):
799
+ size = sum(len(p.get("text", "")) for p in content if isinstance(p, dict))
800
+ else:
801
+ size = 0
802
+
803
+ tokens = size // 4
804
+
805
+ # Categorize
806
+ if role == "user":
807
+ breakdown["user"] += tokens
808
+ elif role == "assistant":
809
+ breakdown["assistant"] += tokens
810
+ elif role == "tool":
811
+ breakdown["tool_results"] += tokens
812
+
813
+ # Track for largest messages
814
+ if tokens > 100: # Only track substantial messages
815
+ # Try to get a label for this message
816
+ label = f"{role}[{i}]"
817
+ if role == "tool":
818
+ tool_call_id = msg.get("tool_call_id", "")
819
+ # Try to find the tool name from previous assistant message
820
+ for prev_msg in reversed(messages[:i]):
821
+ if prev_msg.get("role") == "assistant" and "tool_calls" in prev_msg:
822
+ for tc in prev_msg.get("tool_calls", []):
823
+ if tc.get("id") == tool_call_id:
824
+ label = tc.get("function", {}).get("name", "tool")
825
+ break
826
+ break
827
+
828
+ message_sizes.append({
829
+ "index": i,
830
+ "role": role,
831
+ "label": label,
832
+ "tokens": tokens,
833
+ "preview": content[:100] if isinstance(content, str) else str(content)[:100],
834
+ })
835
+
836
+ # Sort by size and get top 5
837
+ message_sizes.sort(key=lambda x: x["tokens"], reverse=True)
838
+ largest = message_sizes[:5]
839
+
840
+ return breakdown, largest
841
+
842
+ def _maybe_compact_context(
843
+ self,
844
+ messages: list[dict],
845
+ threshold: float = 0.8,
846
+ ) -> list[dict]:
847
+ """Proactively compact context if approaching limit.
848
+
849
+ Args:
850
+ messages: Current conversation messages
851
+ threshold: Trigger compaction at this % of context limit (default 80%)
852
+
853
+ Returns:
854
+ Original or compacted messages
855
+ """
856
+ context_tokens = self._estimate_context_tokens(messages)
857
+ context_limit = self.provider.get_context_limit()
858
+
859
+ # Check if we need to compact
860
+ if context_tokens < context_limit * threshold:
861
+ return messages # No compaction needed
862
+
863
+ log.info(
864
+ f"Context at {context_tokens:,}/{context_limit:,} tokens "
865
+ f"({context_tokens/context_limit:.0%}), compacting..."
866
+ )
867
+
868
+ return self._compact_messages_with_llm(
869
+ messages, target_tokens=int(context_limit * 0.5)
870
+ )
871
+
872
+ def _compact_messages_with_llm(
873
+ self,
874
+ messages: list[dict],
875
+ target_tokens: int,
876
+ ) -> list[dict]:
877
+ """Use fast LLM to summarize middle messages.
878
+
879
+ Preserves:
880
+ - First message (original user request)
881
+ - Last 4 messages (recent context)
882
+ - Summarizes everything in between
883
+
884
+ Args:
885
+ messages: Current conversation messages
886
+ target_tokens: Target token count after compaction
887
+
888
+ Returns:
889
+ Compacted messages list
890
+ """
891
+ from .subagent import get_model_for_tier
892
+ from .providers import get_provider
893
+
894
+ if len(messages) <= 5:
895
+ return messages # Too few to compact
896
+
897
+ # Split messages
898
+ first_msg = messages[0]
899
+ recent_msgs = messages[-4:]
900
+ middle_msgs = messages[1:-4]
901
+
902
+ if not middle_msgs:
903
+ return messages
904
+
905
+ # Build summary prompt
906
+ middle_content = self._format_messages_for_summary(middle_msgs)
907
+
908
+ prompt = f"""Summarize this conversation history concisely.
909
+
910
+ PRESERVE (include verbatim if present):
911
+ - Code snippets and file paths
912
+ - Error messages
913
+ - Key decisions made
914
+ - Important tool results (file contents, search results)
915
+
916
+ CONDENSE:
917
+ - Repetitive searches
918
+ - Verbose tool outputs
919
+ - Intermediate reasoning
920
+
921
+ CONVERSATION HISTORY:
922
+ {middle_content}
923
+
924
+ OUTPUT FORMAT:
925
+ Provide a concise summary (max 2000 tokens) that captures the essential context needed to continue this task."""
926
+
927
+ # Use fast model for summarization
928
+ fast_model = get_model_for_tier("fast")
929
+ fast_provider = get_provider(fast_model)
930
+
931
+ try:
932
+ self.emitter.emit_thinking("Compacting context with fast model...")
933
+
934
+ response = fast_provider.chat(
935
+ messages=[{"role": "user", "content": prompt}],
936
+ system="You are a context summarizer. Be concise but preserve code and technical details.",
937
+ )
938
+
939
+ summary = response.content or ""
940
+
941
+ log.info(
942
+ f"Compacted {len(middle_msgs)} messages into summary "
943
+ f"({len(summary)} chars)"
944
+ )
945
+
946
+ # Build compacted messages
947
+ return [
948
+ first_msg,
949
+ {
950
+ "role": "assistant",
951
+ "content": f"[Context Summary]\n{summary}\n[End Summary]",
952
+ },
953
+ *recent_msgs,
954
+ ]
955
+ except Exception as e:
956
+ log.warning(f"LLM compaction failed: {e}, falling back to truncation")
957
+ return [first_msg] + recent_msgs
958
+
959
+ def _format_messages_for_summary(self, messages: list[dict]) -> str:
960
+ """Format messages for summarization prompt.
961
+
962
+ Args:
963
+ messages: Messages to format
964
+
965
+ Returns:
966
+ Formatted string for summarization
967
+ """
968
+ parts = []
969
+ for msg in messages:
970
+ role = msg.get("role", "unknown")
971
+ content = msg.get("content", "")
972
+
973
+ # Handle tool calls in assistant messages
974
+ if role == "assistant" and "tool_calls" in msg:
975
+ tool_calls = msg.get("tool_calls", [])
976
+ tool_info = [
977
+ f"Called: {tc.get('function', {}).get('name', 'unknown')}"
978
+ for tc in tool_calls
979
+ ]
980
+ content = f"{content}\n[Tools: {', '.join(tool_info)}]" if content else f"[Tools: {', '.join(tool_info)}]"
981
+
982
+ # Truncate very long content
983
+ if len(content) > 4000:
984
+ content = content[:4000] + "\n[...truncated...]"
985
+
986
+ parts.append(f"[{role.upper()}]\n{content}")
987
+
988
+ return "\n\n---\n\n".join(parts)
989
+
497
990
  def _get_reranked_context(self) -> dict:
498
991
  """Get reranked context items based on the current query.
499
992
 
@@ -570,7 +1063,6 @@ class AgentRunner:
570
1063
 
571
1064
  # Store query for reranking context frame
572
1065
  self._current_query = message
573
- self.emitter.emit_start(goal=message)
574
1066
 
575
1067
  # Add new user message to history
576
1068
  self._messages.append({
@@ -586,6 +1078,8 @@ class AgentRunner:
586
1078
  # Update conversation history
587
1079
  self._messages = final_messages
588
1080
  self.emitter.emit_end(success=True)
1081
+ # Create checkpoint if manager is configured
1082
+ self._create_checkpoint()
589
1083
  return response
590
1084
 
591
1085
  except Exception as e:
@@ -593,6 +1087,34 @@ class AgentRunner:
593
1087
  self.emitter.emit_error(str(e))
594
1088
  return f"Error: {str(e)}"
595
1089
 
1090
+ def _create_checkpoint(self) -> None:
1091
+ """Create a git checkpoint after successful run.
1092
+
1093
+ Only creates a checkpoint if:
1094
+ - A checkpoint manager is configured
1095
+ - There are file changes to commit
1096
+ """
1097
+ if not self._checkpoint_manager:
1098
+ return
1099
+
1100
+ try:
1101
+ self._checkpoint_manager.create_checkpoint(
1102
+ messages=self._messages,
1103
+ model=self.model,
1104
+ system_prompt=self.system_prompt,
1105
+ tools_used=list(self._tools_used_this_run),
1106
+ token_usage={
1107
+ "input": self._total_input_tokens,
1108
+ "output": self._total_output_tokens,
1109
+ "thinking": self._total_thinking_tokens,
1110
+ },
1111
+ )
1112
+ except Exception as e:
1113
+ log.warning(f"Failed to create checkpoint: {e}")
1114
+ finally:
1115
+ # Clear tools for next run
1116
+ self._tools_used_this_run.clear()
1117
+
596
1118
  def reset(self) -> None:
597
1119
  """Reset the agent state."""
598
1120
  self.toolkit.reset_session()