bone-agent 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +184 -0
  3. package/bin/npm-wrapper.js +235 -0
  4. package/bin/rg +0 -0
  5. package/bin/rg.exe +0 -0
  6. package/config.yaml.example +133 -0
  7. package/package.json +53 -0
  8. package/requirements.txt +9 -0
  9. package/src/__init__.py +11 -0
  10. package/src/core/__init__.py +1 -0
  11. package/src/core/agentic.py +1054 -0
  12. package/src/core/chat_manager.py +1552 -0
  13. package/src/core/config_manager.py +247 -0
  14. package/src/core/cron.py +527 -0
  15. package/src/core/cron_allowlist.py +118 -0
  16. package/src/core/memory.py +232 -0
  17. package/src/core/retry.py +71 -0
  18. package/src/core/sub_agent.py +326 -0
  19. package/src/core/tool_approval.py +220 -0
  20. package/src/core/tool_feedback.py +778 -0
  21. package/src/exceptions.py +79 -0
  22. package/src/llm/__init__.py +1 -0
  23. package/src/llm/client.py +171 -0
  24. package/src/llm/config.py +466 -0
  25. package/src/llm/prompts.py +735 -0
  26. package/src/llm/providers.py +417 -0
  27. package/src/llm/streaming.py +163 -0
  28. package/src/llm/token_tracker.py +368 -0
  29. package/src/tools/__init__.py +212 -0
  30. package/src/tools/constants.py +59 -0
  31. package/src/tools/create_file.py +136 -0
  32. package/src/tools/directory.py +389 -0
  33. package/src/tools/edit.py +543 -0
  34. package/src/tools/file_reader.py +322 -0
  35. package/src/tools/helpers/__init__.py +105 -0
  36. package/src/tools/helpers/base.py +550 -0
  37. package/src/tools/helpers/converters.py +44 -0
  38. package/src/tools/helpers/file_helpers.py +189 -0
  39. package/src/tools/helpers/formatters.py +411 -0
  40. package/src/tools/helpers/loader.py +231 -0
  41. package/src/tools/helpers/parallel_executor.py +231 -0
  42. package/src/tools/helpers/path_resolver.py +226 -0
  43. package/src/tools/helpers/plugin_manifest.py +156 -0
  44. package/src/tools/obsidian.py +96 -0
  45. package/src/tools/review_sub_agent.py +189 -0
  46. package/src/tools/rg_search.py +393 -0
  47. package/src/tools/search_plugins.py +109 -0
  48. package/src/tools/select_option.py +593 -0
  49. package/src/tools/shell.py +302 -0
  50. package/src/tools/sub_agent.py +139 -0
  51. package/src/tools/task_list.py +269 -0
  52. package/src/tools/web_search.py +61 -0
  53. package/src/ui/__init__.py +1 -0
  54. package/src/ui/banner.py +87 -0
  55. package/src/ui/commands.py +2694 -0
  56. package/src/ui/displays.py +213 -0
  57. package/src/ui/loader.py +284 -0
  58. package/src/ui/main.py +646 -0
  59. package/src/ui/prompt_utils.py +113 -0
  60. package/src/ui/setting_selector.py +590 -0
  61. package/src/ui/setup_wizard.py +294 -0
  62. package/src/ui/sub_agent_panel.py +234 -0
  63. package/src/ui/tool_confirmation.py +215 -0
  64. package/src/utils/__init__.py +1 -0
  65. package/src/utils/citation_parser.py +199 -0
  66. package/src/utils/editor.py +158 -0
  67. package/src/utils/gitignore_filter.py +149 -0
  68. package/src/utils/logger.py +254 -0
  69. package/src/utils/paths.py +30 -0
  70. package/src/utils/result_parsers.py +108 -0
  71. package/src/utils/safe_commands.py +243 -0
  72. package/src/utils/settings.py +174 -0
  73. package/src/utils/validation.py +191 -0
  74. package/src/utils/web_search.py +173 -0
@@ -0,0 +1,1552 @@
1
+ """Chat state and server lifecycle management."""
2
+
3
+ import os
4
+ import json
5
+ import logging
6
+ import subprocess
7
+ import time
8
+ import requests
9
+ from typing import Optional, IO
10
+
11
+ from llm.client import LLMClient
12
+ from llm.config import get_providers, get_provider_config, reload_config
13
+ from llm.prompts import build_system_prompt
14
+ from pathlib import Path
15
+ from llm.token_tracker import TokenTracker
16
+ from utils.settings import server_settings, context_settings
17
+ from utils.logger import MarkdownConversationLogger
18
+ from utils.result_parsers import extract_exit_code, extract_metadata_from_result
19
+
20
+ # Token counting constants
21
+ MESSAGE_OVERHEAD_TOKENS = 4 # Approximate tokens for JSON structure: braces, quotes, colons, commas
22
+ CHAR_BASED_OVERHEAD = 20 # Character overhead for JSON structure in character-based estimation
23
+
24
+ # Action labels for context management notifications (used by ensure_context_fits)
25
+ _ACTION_LABELS = {
26
+ "tool_compaction": "compacted tool results",
27
+ "history_compaction": "compacted history",
28
+ "emergency_truncation": "emergency truncation (oldest messages dropped)",
29
+ }
30
+
31
+ class ChatManager:
32
+ """Manages chat state, messages, and provider switching."""
33
+
34
+ def __init__(self, compact_trigger_tokens: Optional[int] = None):
35
+ # Initialize client with provider from global config
36
+ self.client = LLMClient()
37
+ self.messages = []
38
+ self.server_process: Optional[subprocess.Popen] = None
39
+ self._log_file: Optional[IO] = None # Track llama_server log file handle
40
+ self.approve_mode = "safe"
41
+ self.token_tracker = TokenTracker()
42
+ self.context_token_estimate = 0
43
+ # In-session, memory-only task list (used in EDIT workflows)
44
+ self.task_list = []
45
+ self.task_list_title = None
46
+
47
+ # .gitignore filtering state
48
+ self._gitignore_spec = None
49
+ self._gitignore_mtime = None
50
+ self._repo_root = None
51
+
52
+ # Custom compaction threshold (overrides global context_settings if set)
53
+ self._compact_trigger_tokens = compact_trigger_tokens
54
+
55
+ # Disable all compaction when True (used by sub-agents to preserve findings)
56
+ self._compaction_disabled = False
57
+
58
+ # Conversation logging
59
+ self.markdown_logger: Optional[MarkdownConversationLogger] = None
60
+ if context_settings.log_conversations:
61
+ self.markdown_logger = MarkdownConversationLogger(
62
+ conversations_dir=context_settings.conversations_dir
63
+ )
64
+
65
+ # Compaction lock: prevents compaction during active tool execution
66
+ # Set by agentic.py before executing tools, cleared after all results appended
67
+ self._compaction_locked = False
68
+
69
+ self._init_messages(reset_totals=True)
70
+
71
+ def set_compaction_lock(self, locked):
72
+ """Set or release the compaction lock.
73
+
74
+ When locked, compaction is skipped entirely (no message removal,
75
+ no summarization, no truncation). Used during tool execution to
76
+ prevent orphaning tool_call_ids.
77
+ """
78
+ self._compaction_locked = locked
79
+
80
+ def _init_messages(self, reset_totals: bool = True, reset_costs: bool = False):
81
+ """Initialize message history with system prompt and agents.md as initial exchange.
82
+
83
+ Args:
84
+ reset_totals: Reset cumulative token counts (default True).
85
+ reset_costs: Reset cost accumulators (default False).
86
+ Set True on provider switch to clear stale billing state.
87
+ Kept False on /clear to preserve cumulative session costs.
88
+ """
89
+ # Start new conversation logging session
90
+ if self.markdown_logger:
91
+ self.markdown_logger.start_session()
92
+
93
+ # Start with system prompt only
94
+ self.messages = [{"role": "system", "content": self._build_system_prompt()}]
95
+
96
+ # Add agents.md as initial user/assistant exchange (only if it exists in cwd)
97
+ user_msg, assistant_msg = self._load_agents_md()
98
+ if user_msg and assistant_msg:
99
+ self.messages.append({"role": "user", "content": user_msg})
100
+ self.messages.append({"role": "assistant", "content": assistant_msg})
101
+
102
+ # Log initial messages
103
+ if self.markdown_logger:
104
+ for msg in self.messages:
105
+ self.markdown_logger.log_message(msg)
106
+
107
+ # Reset session totals if requested (keep totals across /clear)
108
+ # For a fresh conversation, cumulative totals start at 0 (no API calls made yet)
109
+ if reset_totals:
110
+ if reset_costs:
111
+ self.token_tracker.reset_all()
112
+ else:
113
+ self.token_tracker.reset(prompt_tokens=0, completion_tokens=0)
114
+
115
+ # Always reset conversation tokens (resets on /new and fresh starts)
116
+ self.token_tracker.reset_conversation()
117
+
118
+ # Initialize context tokens with actual message count (including tools if enabled)
119
+ self._update_context_tokens()
120
+ self.context_token_estimate = self.token_tracker.current_context_tokens
121
+
122
+ def _build_system_prompt(self) -> str:
123
+ """Build system prompt."""
124
+ return build_system_prompt()
125
+
126
+ def update_system_prompt(self):
127
+ """Rebuild system prompt (e.g. after session reset)."""
128
+ if not self.messages:
129
+ raise RuntimeError("Cannot update system prompt: messages array is empty")
130
+
131
+ if self.messages[0]["role"] != "system":
132
+ raise RuntimeError(f"Cannot update system prompt: messages[0] has role '{self.messages[0]['role']}', expected 'system'")
133
+
134
+ # Update the system message with current mode
135
+ self.messages[0]["content"] = self._build_system_prompt()
136
+ self._update_context_tokens()
137
+
138
+ def _load_agents_md(self) -> tuple[str, str]:
139
+ """Load agents.md content and prepare user/assistant exchange.
140
+
141
+ Returns:
142
+ tuple: (user_message, assistant_message)
143
+ """
144
+ # Check for agents.md in current working directory (user's project)
145
+ agents_path = Path.cwd() / "agents.md"
146
+
147
+ if agents_path.exists():
148
+ map_content = agents_path.read_text(encoding="utf-8").strip()
149
+ user_msg = (
150
+ "Here is the codebase map for this project. "
151
+ "This provides an overview of the repository structure and file purposes. "
152
+ "Use this as a reference when exploring the codebase.\n\n"
153
+ f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
154
+ )
155
+ assistant_msg = (
156
+ "I've received the codebase map. I'll use this as a reference when "
157
+ "exploring the repository, but I'll always verify current state by "
158
+ "reading files and searching the codebase before making changes."
159
+ )
160
+ else:
161
+ # No codebase map available - skip entirely
162
+ user_msg = ""
163
+ assistant_msg = ""
164
+
165
+ return user_msg, assistant_msg
166
+
167
+ def _update_context_tokens(self, tools=None):
168
+ """Recount and update current_context_tokens after message changes.
169
+
170
+ Args:
171
+ tools: Optional list of tool definitions to include in token count.
172
+ If None, uses current mode's tools (if enabled).
173
+ """
174
+ message_tokens = self._count_tokens(self.messages)
175
+
176
+ # Count tool tokens if tools are provided or enabled
177
+ if tools is None:
178
+ from llm.config import TOOLS_ENABLED
179
+ if not TOOLS_ENABLED:
180
+ self.token_tracker.set_context_tokens(message_tokens)
181
+ self.context_token_estimate = message_tokens
182
+ return
183
+ else:
184
+ from tools import TOOLS
185
+ tools = TOOLS()
186
+
187
+ if tools:
188
+ # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
189
+ if self.client.provider == "anthropic":
190
+ tools_json = json.dumps(tools)
191
+ tool_tokens = len(tools_json) // 4
192
+ else:
193
+ try:
194
+ import tiktoken
195
+ model = getattr(self.client, "model", "") or ""
196
+ try:
197
+ enc = tiktoken.encoding_for_model(model)
198
+ except Exception:
199
+ enc = tiktoken.get_encoding("cl100k_base")
200
+
201
+ # Encode tools list as JSON (which is how it's sent to the API)
202
+ tools_json = json.dumps(tools)
203
+ tool_tokens = len(enc.encode(tools_json))
204
+ except Exception:
205
+ # Fallback: character-based approximation
206
+ tools_json = json.dumps(tools)
207
+ tool_tokens = len(tools_json) // 4
208
+
209
+ total_tokens = message_tokens + tool_tokens
210
+ else:
211
+ total_tokens = message_tokens
212
+
213
+ self.token_tracker.set_context_tokens(total_tokens)
214
+ self.context_token_estimate = total_tokens
215
+
216
+ def _collect_message_text(self, msg) -> str:
217
+ """Extract all text fields from a message as a single string.
218
+
219
+ Collects role, content, tool_calls (id, type, function name/args),
220
+ and tool_call_id fields. Used by token counting methods.
221
+
222
+ Args:
223
+ msg: Message dict
224
+
225
+ Returns:
226
+ Concatenated string of all message text fields
227
+ """
228
+ parts = []
229
+
230
+ # Role field
231
+ role = msg.get('role', '')
232
+ if role:
233
+ parts.append(role)
234
+
235
+ # Content
236
+ content = msg.get('content', '')
237
+ if content:
238
+ parts.append(str(content))
239
+
240
+ # Tool calls (assistant messages)
241
+ if msg.get('tool_calls'):
242
+ for tc in msg['tool_calls']:
243
+ # id field (e.g., "call_abc123")
244
+ tc_id = tc.get('id', '')
245
+ if tc_id:
246
+ parts.append(tc_id)
247
+
248
+ # type field (usually "function")
249
+ tc_type = tc.get('type', 'function')
250
+ parts.append(tc_type)
251
+
252
+ # function object
253
+ fn = tc.get('function', {})
254
+ if fn:
255
+ fn_name = fn.get('name', '')
256
+ if fn_name:
257
+ parts.append(fn_name)
258
+
259
+ fn_args = fn.get('arguments', '{}')
260
+ parts.append(fn_args)
261
+
262
+ # Tool call ID (tool messages)
263
+ if msg.get('role') == 'tool' and msg.get('tool_call_id'):
264
+ parts.append(msg['tool_call_id'])
265
+
266
+ return ''.join(p or '' for p in parts)
267
+
268
+ def _count_tokens(self, messages) -> int:
269
+ """Count tokens accurately using tiktoken for OpenAI, character-based for Anthropic.
270
+
271
+ Counts everything the AI receives:
272
+ - All message types: user, assistant, system, tool
273
+ - All fields: role, content, tool_calls (id, type, function, name, arguments)
274
+ - Tool messages: tool_call_id + content
275
+
276
+ Args:
277
+ messages: List of messages to count tokens for
278
+
279
+ Returns:
280
+ int: Estimated token count
281
+ """
282
+ # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
283
+ if self.client.provider == "anthropic":
284
+ return self._count_tokens_char_based(messages)
285
+
286
+ try:
287
+ import tiktoken
288
+ model = getattr(self.client, "model", "") or ""
289
+ try:
290
+ enc = tiktoken.encoding_for_model(model)
291
+ except Exception:
292
+ enc = tiktoken.get_encoding("cl100k_base")
293
+
294
+ # Collect text from all messages and encode
295
+ total = 0
296
+ for msg in messages:
297
+ text = self._collect_message_text(msg)
298
+ total += len(enc.encode(text))
299
+ total += MESSAGE_OVERHEAD_TOKENS
300
+
301
+ return total
302
+
303
+ except Exception:
304
+ # Fallback to character-based estimation
305
+ return self._count_tokens_char_based(messages)
306
+
307
+ def _count_tokens_char_based(self, messages) -> int:
308
+ """Count tokens using character-based approximation (for Anthropic).
309
+
310
+ Uses ~4 characters per token as a rough estimate.
311
+
312
+ Args:
313
+ messages: List of messages to count tokens for
314
+
315
+ Returns:
316
+ int: Estimated token count
317
+ """
318
+ total = 0
319
+ for msg in messages:
320
+ text = self._collect_message_text(msg)
321
+ total += (len(text) + CHAR_BASED_OVERHEAD) // 4
322
+
323
+ return total
324
+
325
+
326
+ def _build_summary_prompt(self, messages) -> str:
327
+ """Generate a comprehensive summary of messages.
328
+
329
+ Captures:
330
+ - User questions asked
331
+ - Tool calls performed (files read, edits, searches)
332
+ - Key decisions and changes
333
+
334
+ Args:
335
+ messages: List of messages to summarize
336
+
337
+ Returns:
338
+ str: Structured summary preserving context
339
+ """
340
+ # Extract user questions
341
+ user_queries = []
342
+ for m in messages:
343
+ if m.get('role') == 'user':
344
+ content = m.get('content', '')
345
+ if content and not content.startswith("The codebase map"):
346
+ user_queries.append(content)
347
+
348
+ # Extract tool calls
349
+ tool_calls = []
350
+ for m in messages:
351
+ if m.get('tool_calls'):
352
+ for tc in m['tool_calls']:
353
+ fn = tc['function']
354
+ name = fn.get('name', '')
355
+ args = fn.get('arguments', '')
356
+ tool_calls.append(f"- {name}: {args[:100]}")
357
+ elif m.get('role') == 'tool':
358
+ # Extract tool result metadata
359
+ content = m.get('content', '')
360
+ if 'exit_code=' in content:
361
+ lines = content.split('\n')[:5] # First 5 lines for context
362
+ tool_calls.append(f"Result: {'; '.join(lines[:2])}")
363
+
364
+ # Build summary prompt
365
+ summary_prompt = f"""Summarize the following conversation context.
366
+
367
+ User questions:
368
+ {chr(10).join(f'- {q}' for q in user_queries) if user_queries else 'None'}
369
+
370
+ Tool operations performed:
371
+ {chr(10).join(tool_calls) if tool_calls else 'None'}
372
+
373
+ Focus on:
374
+ 1. What problem was being solved
375
+ 2. What files were read or modified
376
+ 3. What searches were performed
377
+ 4. Key code changes or decisions made
378
+ 5. Current state/progress
379
+
380
+ Provide a concise summary (2-4 paragraphs) that captures all essential context for continuing the work."""
381
+
382
+ return summary_prompt
383
+
384
+ # ===== Tool Result Compaction =====
385
+
386
+ def _find_tool_blocks(self):
387
+ """Find all tool-result blocks in message history.
388
+
389
+ Handles both single-turn and multi-turn tool chains:
390
+ Single: user → assistant(tc) → tool_results → assistant(answer)
391
+ Multi: user → assistant(tc1) → tools → assistant(tc2) → tools → assistant(answer)
392
+
393
+ In multi-turn chains, all tool_calls and tool_results are merged into
394
+ a single block spanning from the first assistant(tool_calls) to the
395
+ final assistant(answer).
396
+
397
+ Returns:
398
+ list: List of block dicts with keys: user_idx, start, end, tool_calls, tool_results
399
+ """
400
+ blocks = []
401
+ i = 0
402
+
403
+ while i < len(self.messages):
404
+ msg = self.messages[i]
405
+
406
+ # Look for assistant message with tool_calls
407
+ if msg.get('role') == 'assistant' and msg.get('tool_calls'):
408
+
409
+ # Find user question before this
410
+ user_idx = i - 1
411
+ while user_idx >= 0 and self.messages[user_idx].get('role') != 'user':
412
+ user_idx -= 1
413
+
414
+ if user_idx < 0:
415
+ i += 1
416
+ continue
417
+
418
+ # Follow consecutive assistant(tool_calls) → tool_results pairs
419
+ # until we reach a final answer (assistant without tool_calls)
420
+ block_start = i
421
+ all_tool_calls = []
422
+ all_tool_results = []
423
+ j = i
424
+ found_end = False
425
+
426
+ while j < len(self.messages):
427
+ if self.messages[j].get('role') == 'assistant' and self.messages[j].get('tool_calls'):
428
+ # Accumulate tool calls from this assistant message
429
+ all_tool_calls.extend(self.messages[j].get('tool_calls', []))
430
+ # Collect immediately following tool results
431
+ k = j + 1
432
+ while k < len(self.messages) and self.messages[k].get('role') == 'tool':
433
+ all_tool_results.append(self.messages[k].get('content', ''))
434
+ k += 1
435
+ j = k
436
+ elif self.messages[j].get('role') == 'assistant' and not self.messages[j].get('tool_calls'):
437
+ # Final answer — this completes the block
438
+ found_end = True
439
+ break
440
+ else:
441
+ # Non-tool, non-assistant message breaks the chain
442
+ break
443
+
444
+ if found_end and all_tool_calls:
445
+ blocks.append({
446
+ 'user_idx': user_idx,
447
+ 'start': block_start,
448
+ 'end': j,
449
+ 'tool_calls': all_tool_calls,
450
+ 'tool_results': all_tool_results
451
+ })
452
+
453
+ # Continue scanning from after the final answer (or after the chain)
454
+ # Guard: always advance at least one position to prevent infinite loops
455
+ i = max(i + 1, j + 1 if found_end else j)
456
+ else:
457
+ i += 1
458
+
459
+ return blocks
460
+
461
+ def _get_tool_result_messages(self, start_idx, end_idx):
462
+ """Extract only tool result messages between two indices.
463
+
464
+ Args:
465
+ start_idx: Starting index (exclusive)
466
+ end_idx: Ending index (exclusive)
467
+
468
+ Returns:
469
+ list: Tool result messages (role='tool') between start_idx and end_idx
470
+ """
471
+ tool_results = []
472
+ for i in range(start_idx + 1, end_idx):
473
+ if self.messages[i].get('role') == 'tool':
474
+ tool_results.append(self.messages[i])
475
+ return tool_results
476
+
477
+ def _summarize_tool_call(self, tool_call, tool_result):
478
+ """Extract key info from a single tool call.
479
+
480
+ Args:
481
+ tool_call: Tool call dict from message
482
+ tool_result: Tool result content string
483
+
484
+ Returns:
485
+ str: Summary string for this tool
486
+ """
487
+ try:
488
+ import json
489
+ fn_name = tool_call['function']['name']
490
+ args = json.loads(tool_call['function']['arguments'])
491
+ except (json.JSONDecodeError, KeyError):
492
+ return "Used a tool"
493
+
494
+ if fn_name == "execute_command":
495
+ cmd = args.get('command', '')
496
+ exit_code = extract_exit_code(tool_result)
497
+ matches = extract_metadata_from_result(tool_result, 'matches_found')
498
+
499
+ if exit_code == 0:
500
+ if matches is not None:
501
+ return f"Searched for '{cmd[:50]}...' (found {matches} matches)"
502
+ else:
503
+ return f"Searched: '{cmd[:50]}...'"
504
+ else:
505
+ return f"Search failed: '{cmd[:30]}...'"
506
+
507
+ elif fn_name == "read_file":
508
+ path = args.get('path_str', '')
509
+ lines = extract_metadata_from_result(tool_result, 'lines_read')
510
+ start_line = extract_metadata_from_result(tool_result, 'start_line')
511
+
512
+ if lines is not None:
513
+ if start_line is not None and start_line > 1:
514
+ end_line = start_line + lines - 1
515
+ return f"Read {path} (lines {start_line}-{end_line})"
516
+ else:
517
+ return f"Read {path} ({lines} lines)"
518
+ else:
519
+ return f"Read {path}"
520
+
521
+ elif fn_name == "list_directory":
522
+ path = args.get('path_str', '.')
523
+ items = extract_metadata_from_result(tool_result, 'items_count')
524
+ recursive = args.get('recursive', False)
525
+
526
+ action = "Listed recursively" if recursive else "Listed"
527
+ if items is not None:
528
+ return f"{action} {path} ({items} items)"
529
+ return f"{action} {path}"
530
+
531
+ elif fn_name == "edit_file":
532
+ path = args.get('path', '')
533
+ search = args.get('search', '')
534
+ search_preview = search[:30] + "..." if len(search) > 30 else search
535
+ return f"Edited {path} (replaced '{search_preview}')"
536
+
537
+ elif fn_name == "web_search":
538
+ query = args.get('query', '')
539
+ results = extract_metadata_from_result(tool_result, 'results_found')
540
+ if results is not None:
541
+ return f"Searched web for '{query[:40]}...' ({results} results)"
542
+ return f"Searched web: '{query[:40]}...'"
543
+
544
+ return f"Used {fn_name}"
545
+
546
+ def _generate_tool_block_summary(self, tool_calls, tool_results):
547
+ """Generate a single summary line for all tools in a block.
548
+
549
+ Args:
550
+ tool_calls: List of tool call dicts
551
+ tool_results: List of tool result strings
552
+
553
+ Returns:
554
+ str: Human-readable summary
555
+ """
556
+ # Group tools by type for better readability
557
+ searches = []
558
+ reads = []
559
+ lists = []
560
+ edits = []
561
+ web = []
562
+ failed = []
563
+
564
+ for i, tool_call in enumerate(tool_calls):
565
+ result = tool_results[i] if i < len(tool_results) else ""
566
+ summary = self._summarize_tool_call(tool_call, result)
567
+
568
+ if "failed" in summary.lower():
569
+ failed.append(summary)
570
+ elif "searched" in summary.lower() and "web" not in summary.lower():
571
+ searches.append(summary)
572
+ elif "read" in summary.lower():
573
+ reads.append(summary)
574
+ elif "listed" in summary.lower():
575
+ lists.append(summary)
576
+ elif "edited" in summary.lower():
577
+ edits.append(summary)
578
+ elif "web" in summary.lower():
579
+ web.append(summary)
580
+
581
+ # Build human-readable summary
582
+ parts = []
583
+
584
+ if searches:
585
+ count = len(searches)
586
+ if count == 1:
587
+ parts.append(searches[0])
588
+ else:
589
+ parts.append(f"performed {count} searches")
590
+
591
+ if reads:
592
+ if len(reads) == 1:
593
+ parts.append(reads[0])
594
+ else:
595
+ parts.append(f"read {len(reads)} files")
596
+
597
+ if lists:
598
+ parts.append(lists[0] if len(lists) == 1 else "listed directories")
599
+
600
+ if edits:
601
+ parts.append(edits[0] if len(edits) == 1 else f"made {len(edits)} edits")
602
+
603
+ if web:
604
+ parts.append(web[0] if len(web) == 1 else "performed web searches")
605
+
606
+ if failed:
607
+ parts.append(f"{len(failed)} tool(s) failed")
608
+
609
+ if not parts:
610
+ return "Used tools for exploration"
611
+
612
+ # Join with natural language
613
+ if len(parts) <= 2:
614
+ return " and ".join(parts) + "."
615
+ else:
616
+ first = ", ".join(parts[:-1])
617
+ return f"{first}, and {parts[-1]}."
618
+
619
+ def _estimate_message_tokens(self, msg) -> int:
620
+ """Lightweight per-message token estimate for boundary calculation.
621
+
622
+ Uses character-based estimation (~4 chars/token) to avoid the overhead
623
+ of full tiktoken encoding during boundary walks. Good enough for
624
+ determining where to split the uncompacted tail.
625
+
626
+ Args:
627
+ msg: Message dict
628
+
629
+ Returns:
630
+ Estimated token count for this message
631
+ """
632
+ text = self._collect_message_text(msg)
633
+ return (len(text) + CHAR_BASED_OVERHEAD) // 4
634
+
635
+ def _find_in_flight_boundary(self):
636
+ """Find the index where in-flight tool blocks begin.
637
+
638
+ Scans from the end of messages for any assistant message with tool_calls
639
+ that does NOT have a corresponding final assistant answer after it.
640
+ These messages must never be included in the compactable region.
641
+
642
+ Returns:
643
+ int: Index of the first in-flight message, or len(messages) if none.
644
+ """
645
+ n = len(self.messages)
646
+ i = n - 1
647
+
648
+ # Walk backward looking for the pattern: ...assistant(tool_calls) tool_results...
649
+ # without a final assistant(answer) after the tool results.
650
+ while i >= 0:
651
+ msg = self.messages[i]
652
+ if msg.get('role') == 'assistant' and msg.get('tool_calls'):
653
+ # Found an assistant with tool_calls. Check if there's a final
654
+ # answer (assistant without tool_calls) after it.
655
+ has_final_answer = False
656
+ j = i + 1
657
+ while j < n:
658
+ if self.messages[j].get('role') == 'assistant' and not self.messages[j].get('tool_calls'):
659
+ has_final_answer = True
660
+ break
661
+ elif self.messages[j].get('role') == 'assistant' and self.messages[j].get('tool_calls'):
662
+ # Another tool-calling assistant — skip over its tool results
663
+ j += 1
664
+ while j < n and self.messages[j].get('role') == 'tool':
665
+ j += 1
666
+ continue
667
+ elif self.messages[j].get('role') == 'tool':
668
+ j += 1
669
+ continue
670
+ else:
671
+ break
672
+
673
+ if not has_final_answer:
674
+ # This is an in-flight tool block. Find its user question.
675
+ user_idx = i - 1
676
+ while user_idx >= 0 and self.messages[user_idx].get('role') != 'user':
677
+ user_idx -= 1
678
+ return max(0, user_idx)
679
+ else:
680
+ # Completed block — continue scanning backward
681
+ # Skip past all the tool messages associated with this block
682
+ j = i + 1
683
+ while j < n:
684
+ if self.messages[j].get('role') == 'tool':
685
+ j += 1
686
+ elif self.messages[j].get('role') == 'assistant' and self.messages[j].get('tool_calls'):
687
+ j += 1
688
+ while j < n and self.messages[j].get('role') == 'tool':
689
+ j += 1
690
+ continue
691
+ else:
692
+ break
693
+ i = j - 1
694
+ else:
695
+ i -= 1
696
+
697
+ return n
698
+
699
+ def _compute_split_boundary(self, blocks, in_flight_start):
700
+ """Compute the message index where the uncompacted tail begins.
701
+
702
+ Three constraints determine the boundary (take the most conservative /
703
+ earliest index):
704
+ 1. Token budget: accumulate from the end until uncompacted_tail_tokens
705
+ 2. Minimum tool blocks: preserve at least min_tool_blocks completed blocks
706
+ 3. Tool-call integrity: never split inside a tool block
707
+ 4. In-flight boundary: never include in-flight tool messages
708
+
709
+ Args:
710
+ blocks: List of tool block dicts from _find_tool_blocks()
711
+ in_flight_start: Index of first in-flight message (from _find_in_flight_boundary)
712
+
713
+ Returns:
714
+ int: Message index where the uncompacted tail starts
715
+ """
716
+ tc = context_settings.tool_compaction
717
+ token_budget = tc.uncompacted_tail_tokens
718
+ min_blocks = tc.min_tool_blocks
719
+ n = len(self.messages)
720
+
721
+ # The verbatim region ends at the first in-flight message (exclusive)
722
+ verbatim_end = min(in_flight_start, n)
723
+
724
+ # Constraint 1: Token budget — walk from verbatim_end backward
725
+ tokens_accumulated = 0
726
+ token_boundary = 0
727
+ for i in range(verbatim_end - 1, 0, -1):
728
+ tokens_accumulated += self._estimate_message_tokens(self.messages[i])
729
+ if tokens_accumulated >= token_budget:
730
+ token_boundary = i
731
+ break
732
+ else:
733
+ # All messages fit within budget
734
+ token_boundary = 1
735
+
736
+ # Constraint 2: Minimum tool blocks — ensure at least min_blocks completed
737
+ # blocks are within the verbatim tail
738
+ min_block_boundary = 1
739
+ completed_blocks_in_tail = []
740
+ for block in blocks:
741
+ # A block is in the tail if its end is within the verbatim region
742
+ if block['end'] < verbatim_end:
743
+ completed_blocks_in_tail.append(block)
744
+
745
+ if len(completed_blocks_in_tail) < min_blocks and len(completed_blocks_in_tail) < len(blocks):
746
+ # Need to extend backward to include more blocks
747
+ blocks_needed = min_blocks - len(completed_blocks_in_tail)
748
+ # Take the blocks immediately before the current tail
749
+ # Find blocks whose end < token_boundary (not already in tail)
750
+ earlier_blocks = [b for b in blocks if b['end'] < token_boundary]
751
+ # Sort by end index descending (most recent first)
752
+ earlier_blocks.sort(key=lambda b: b['end'], reverse=True)
753
+ # Extend boundary to include the earliest user_idx of the blocks we need
754
+ for b in earlier_blocks[:blocks_needed]:
755
+ min_block_boundary = min(min_block_boundary, b['user_idx'])
756
+
757
+ # Constraint 3: Tool-call integrity — if token_boundary lands inside a
758
+ # tool block, extend backward to include the complete block
759
+ integrity_boundary = token_boundary
760
+ for block in blocks:
761
+ if block['user_idx'] < token_boundary <= block['end']:
762
+ # Split would cut through this block — extend to include it
763
+ integrity_boundary = min(integrity_boundary, block['user_idx'])
764
+
765
+ # Take the most conservative (earliest) boundary
766
+ boundary = min(token_boundary, integrity_boundary)
767
+ if min_block_boundary < boundary:
768
+ boundary = min_block_boundary
769
+
770
+ return boundary
771
+
772
+ def compact_tool_results(self):
773
+ """Replace completed tool-result blocks with summaries using token-budget tail.
774
+
775
+ Walks messages from the end, accumulating tokens until ~40k tokens are
776
+ reached. Everything before that boundary gets compacted (completed tool
777
+ blocks replaced with summary lines). Always preserves at least
778
+ min_tool_blocks completed blocks regardless of token budget.
779
+
780
+ Safe to call mid-loop (during tool execution) because it only compacts
781
+ completed tool blocks — in-flight blocks are never touched.
782
+ """
783
+ # Skip if disabled (e.g. sub-agents preserving findings)
784
+ if self._compaction_disabled:
785
+ return
786
+
787
+ if not context_settings.tool_compaction.enable_per_message_compaction:
788
+ return
789
+
790
+ # Safety: Don't compact if very few messages
791
+ if len(self.messages) < 6: # Minimum: user+assistant+tool+assistant+user+assistant
792
+ return
793
+
794
+ # Find completed tool-result blocks
795
+ blocks = self._find_tool_blocks()
796
+
797
+ if not blocks:
798
+ return
799
+
800
+ # Find where in-flight tool blocks begin (if any)
801
+ in_flight_start = self._find_in_flight_boundary()
802
+
803
+ # Compute the split boundary using token budget + constraints
804
+ split_boundary = self._compute_split_boundary(blocks, in_flight_start)
805
+
806
+ # Determine which blocks fall entirely before the split boundary
807
+ # (those are the ones to compact)
808
+ blocks_to_compact = [
809
+ b for b in blocks
810
+ if b['end'] < split_boundary
811
+ ]
812
+
813
+ if not blocks_to_compact:
814
+ return
815
+
816
+ # Build the new message list
817
+ new_messages = []
818
+ processed_indices = set()
819
+
820
+ for i, msg in enumerate(self.messages):
821
+ if i in processed_indices:
822
+ continue
823
+
824
+ # Check if this is the start of a block to compact
825
+ block = next((b for b in blocks_to_compact if b['start'] == i), None)
826
+
827
+ if block:
828
+ # Check if any tool in this block failed
829
+ skip_compaction = False
830
+ if not context_settings.tool_compaction.compact_failed_tools:
831
+ for tool_result in block['tool_results']:
832
+ exit_code = extract_exit_code(tool_result)
833
+ if exit_code is not None and exit_code != 0:
834
+ skip_compaction = True
835
+ break
836
+
837
+ if skip_compaction:
838
+ # Keep this block as-is
839
+ for idx in range(block['user_idx'], block['end'] + 1):
840
+ new_messages.append(self.messages[idx])
841
+ processed_indices.add(idx)
842
+ continue
843
+
844
+ # Generate summary and replace block
845
+ summary = self._generate_tool_block_summary(
846
+ block['tool_calls'],
847
+ block['tool_results']
848
+ )
849
+
850
+ # Add user question with summary appended
851
+ user_msg = self.messages[block['user_idx']].copy()
852
+ user_msg['content'] = user_msg['content'] + f"\n\n[Context: {summary}]"
853
+ new_messages.append(user_msg)
854
+
855
+ # Add final assistant answer
856
+ new_messages.append(self.messages[block['end']])
857
+
858
+ # Mark all indices as processed
859
+ processed_indices.add(block['user_idx'])
860
+ for idx in range(block['start'], block['end'] + 1):
861
+ processed_indices.add(idx)
862
+ else:
863
+ # Keep this message as-is
864
+ new_messages.append(msg)
865
+
866
+ self.messages = new_messages
867
+ self._update_context_tokens()
868
+
869
+ # ===== AI-Based History Compaction =====
870
+
871
+ def compact_history(self, console=None, trigger="manual"):
872
+ """Compact chat history while preserving recent context.
873
+
874
+ Strategy:
875
+ 1. Keep last user message verbatim
876
+ 2. Keep assistant tool_calls message (if present) for context
877
+ 3. Keep last assistant response (without tool calls) verbatim
878
+ 4. Summarize everything prior AND all tool result messages
879
+
880
+ Args:
881
+ console: Console for notifications (None for silent auto-compact)
882
+ trigger: "manual" or "auto"
883
+
884
+ Returns:
885
+ dict with compaction stats or None
886
+ """
887
+ if len(self.messages) < 10: # Need enough history
888
+ return None
889
+
890
+ # Find the last user message (start from end, skip system/tool messages)
891
+ last_user_idx = None
892
+ for i in range(len(self.messages) - 1, -1, -1):
893
+ role = self.messages[i].get('role')
894
+ # Look for user message that's not the codebase map
895
+ if role == 'user' and not self.messages[i].get('tool_calls'):
896
+ content = self.messages[i].get('content', '')
897
+ if content and not content.startswith("The codebase map"):
898
+ last_user_idx = i
899
+ break
900
+
901
+ if last_user_idx is None or last_user_idx < 3:
902
+ return None # Not enough history to compact
903
+
904
+ # Find the last assistant message WITHOUT tool calls (final answer)
905
+ last_assistant_without_tools_idx = None
906
+ for i in range(len(self.messages) - 1, -1, -1):
907
+ msg = self.messages[i]
908
+ if msg.get('role') == 'assistant' and not msg.get('tool_calls'):
909
+ # This is a final answer
910
+ last_assistant_without_tools_idx = i
911
+ break
912
+
913
+ if last_assistant_without_tools_idx is None:
914
+ return None # No final answer found
915
+
916
+ # Determine what to keep vs summarize
917
+ # We always keep: system prompt, last user message, assistant tool_calls (if present), last assistant answer
918
+ # We summarize: everything between system prompt and last user message,
919
+ # AND all tool result messages (but not the tool_calls message)
920
+
921
+ # Case 1: Last assistant answer is directly after last user message
922
+ # (no tools were called)
923
+ if last_assistant_without_tools_idx == last_user_idx + 1:
924
+ # Original behavior: keep from last_user_idx, summarize before
925
+ messages_to_keep = self.messages[last_user_idx:]
926
+ messages_to_summarize = self.messages[1:last_user_idx]
927
+ else:
928
+ # Case 2: There are tool interactions between last user and last assistant
929
+ # Keep: last user message + entire tool exchange + final answer
930
+ # Summarize: everything before last user message
931
+ #
932
+ # The tail from last_user_idx through last_assistant_without_tools_idx
933
+ # is a valid message sequence (user → assistant(tool_calls) → tool results → assistant(answer))
934
+ # and must be kept intact to avoid consecutive assistant messages or orphaned tool_call_ids.
935
+ messages_to_keep = self.messages[last_user_idx:]
936
+ messages_to_summarize = self.messages[1:last_user_idx]
937
+
938
+ if not messages_to_summarize:
939
+ return None
940
+
941
+ # Generate comprehensive summary using extracted context
942
+ summary_prompt_content = self._build_summary_prompt(messages_to_summarize)
943
+
944
+ # Track token counts before (total tokens including system prompt + messages + tools)
945
+ self._update_context_tokens()
946
+ tokens_before = self.token_tracker.current_context_tokens
947
+
948
+ # Call LLM to generate summary
949
+ summary_prompt = [
950
+ {
951
+ "role": "system",
952
+ "content": (
953
+ "You are a helpful assistant that summarizes conversation context. "
954
+ "Provide clear, concise summaries that capture essential information for continuing work."
955
+ ),
956
+ },
957
+ {
958
+ "role": "user",
959
+ "content": summary_prompt_content,
960
+ },
961
+ ]
962
+
963
+ try:
964
+ response = self.client.chat_completion(summary_prompt, stream=False, tools=None)
965
+ except Exception as e:
966
+ if console and trigger == "manual":
967
+ console.print(f"Compaction failed: {e}", style="red")
968
+ return None
969
+
970
+ if response is None:
971
+ return None
972
+
973
+ if isinstance(response, str):
974
+ if console and trigger == "manual":
975
+ console.print(f"Compaction failed: {response}", style="red")
976
+ return None
977
+
978
+ try:
979
+ summary_text = response["choices"][0]["message"].get("content", "").strip()
980
+ except (KeyError, IndexError, TypeError):
981
+ summary_text = ""
982
+
983
+ if not summary_text:
984
+ if console and trigger == "manual":
985
+ console.print("Compaction failed: empty summary.", style="red")
986
+ return None
987
+
988
+ # Build new history: system prompt + summary + recent messages
989
+ summary_message = {
990
+ "role": "system",
991
+ "content": f"Previous conversation context (summarized):\n\n{summary_text}"
992
+ }
993
+
994
+ self.messages = [self.messages[0]] + [summary_message] + messages_to_keep
995
+
996
+ # Update token tracking accurately (include system prompt + messages + tools)
997
+ self._update_context_tokens()
998
+ tokens_after = self.token_tracker.current_context_tokens
999
+ provider_cfg = get_provider_config(self.client.provider)
1000
+ self.token_tracker.add_usage(
1001
+ response,
1002
+ model_name=provider_cfg.get("model", ""),
1003
+ )
1004
+
1005
+ # Update context estimate (keeps cumulative API usage intact)
1006
+ self.context_token_estimate = tokens_after
1007
+
1008
+ # Notify only for manual trigger
1009
+ if console and trigger == "manual":
1010
+ reduction = tokens_before - tokens_after
1011
+ console.print(
1012
+ f"[dim]Compacted history: {tokens_before:,} → {tokens_after:,} tokens "
1013
+ f"(-{reduction:,} / {-100 * reduction // (tokens_before or 1)}%)[/dim]"
1014
+ )
1015
+
1016
+ return {
1017
+ "trigger": trigger,
1018
+ "before_tokens": tokens_before,
1019
+ "after_tokens": tokens_after,
1020
+ "summary": summary_text,
1021
+ }
1022
+
1023
+ def maybe_auto_compact(self, console=None):
1024
+ """Check token count and auto-compact if over threshold.
1025
+
1026
+ Args:
1027
+ console: None for silent operation (no user notification)
1028
+ """
1029
+ # Check against total context tokens (system prompt + messages + tools)
1030
+ self._update_context_tokens()
1031
+ total_tokens = self.token_tracker.current_context_tokens
1032
+
1033
+ # Skip auto-compaction if locked (tools are actively being executed)
1034
+ if self._compaction_locked:
1035
+ return
1036
+
1037
+ # Skip all compaction if disabled (e.g. sub-agents preserving findings)
1038
+ if self._compaction_disabled:
1039
+ return
1040
+
1041
+ # Use custom threshold if set, otherwise use global setting
1042
+ trigger_threshold = (
1043
+ self._compact_trigger_tokens
1044
+ if self._compact_trigger_tokens is not None
1045
+ else context_settings.compact_trigger_tokens
1046
+ )
1047
+
1048
+ if total_tokens >= trigger_threshold:
1049
+ # Auto-compact with optional notification
1050
+ result = self.compact_history(console=None, trigger="auto")
1051
+ if result and context_settings.notify_auto_compaction and console:
1052
+ self._notify_compaction(
1053
+ console,
1054
+ result["before_tokens"],
1055
+ result["after_tokens"],
1056
+ "compacted history",
1057
+ )
1058
+
1059
+ def ensure_context_fits(self, console=None):
1060
+ """Ensure context fits within hard_limit_tokens before sending to LLM.
1061
+
1062
+ Three-layer escalation strategy:
1063
+ 1. Check — if under hard_limit, return immediately (no action)
1064
+ 2. Layer 1 — aggressive tool result compaction (non-LLM, fast)
1065
+ 3. Layer 2 — AI-based history compaction (slower, more effective)
1066
+ 4. Layer 3 — emergency truncation (drop oldest messages)
1067
+
1068
+ If _compaction_locked, skip all layers (including truncation) and return
1069
+ "locked" — the message list is in intermediate state during tool execution.
1070
+
1071
+ Args:
1072
+ console: Optional Rich console for debug notifications.
1073
+
1074
+ Returns:
1075
+ dict with action taken and details, e.g.:
1076
+ {"action": "none", "tokens": 120000}
1077
+ {"action": "tool_compaction", "tokens": 90000, "reduction": 30000}
1078
+ {"action": "history_compaction", "tokens": 70000, "reduction": 50000}
1079
+ {"action": "emergency_truncation", "tokens": 150000, "dropped": 5}
1080
+ """
1081
+ self._update_context_tokens()
1082
+ current_tokens = self.token_tracker.current_context_tokens
1083
+ hard_limit = context_settings.hard_limit_tokens
1084
+
1085
+ # Layer 0: Under limit — no action needed
1086
+ if current_tokens < hard_limit:
1087
+ return {"action": "none", "tokens": current_tokens}
1088
+
1089
+ # Skip all compaction layers if disabled (e.g. sub-agents preserving findings)
1090
+ if self._compaction_disabled:
1091
+ logger = logging.getLogger(__name__)
1092
+ logger.warning(
1093
+ "Context (%d tokens) exceeds hard limit (%d) but compaction is disabled — "
1094
+ "API call may fail with context-length error",
1095
+ current_tokens, hard_limit,
1096
+ )
1097
+ return {"action": "none", "tokens": current_tokens}
1098
+
1099
+ tokens_before = current_tokens
1100
+
1101
+ # If compaction is NOT locked, try layers 1 and 2
1102
+ if not self._compaction_locked:
1103
+ # Layer 1: Aggressive tool result compaction (non-LLM, fast)
1104
+ # Temporarily use very small token budget and min blocks for aggressive compaction
1105
+ original_tail_tokens = context_settings.tool_compaction.uncompacted_tail_tokens
1106
+ original_min_blocks = context_settings.tool_compaction.min_tool_blocks
1107
+ try:
1108
+ context_settings.tool_compaction.uncompacted_tail_tokens = 10_000
1109
+ context_settings.tool_compaction.min_tool_blocks = 1
1110
+ self.compact_tool_results()
1111
+ finally:
1112
+ context_settings.tool_compaction.uncompacted_tail_tokens = original_tail_tokens
1113
+ context_settings.tool_compaction.min_tool_blocks = original_min_blocks
1114
+
1115
+ self._update_context_tokens()
1116
+ current_tokens = self.token_tracker.current_context_tokens
1117
+ if current_tokens < hard_limit:
1118
+ result = {
1119
+ "action": "tool_compaction",
1120
+ "tokens": current_tokens,
1121
+ "reduction": tokens_before - current_tokens,
1122
+ }
1123
+ self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["tool_compaction"])
1124
+ return result
1125
+
1126
+ # Layer 2: AI-based history compaction
1127
+ try:
1128
+ result = self.compact_history(console=None, trigger="auto")
1129
+ except Exception:
1130
+ result = None # Compaction failed, fall through to truncation
1131
+
1132
+ if result is not None:
1133
+ self._update_context_tokens()
1134
+ current_tokens = self.token_tracker.current_context_tokens
1135
+ if current_tokens < hard_limit:
1136
+ result = {
1137
+ "action": "history_compaction",
1138
+ "tokens": current_tokens,
1139
+ "reduction": tokens_before - current_tokens,
1140
+ }
1141
+ self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["history_compaction"])
1142
+ return result
1143
+
1144
+ # Layer 3: Emergency truncation — drop oldest messages
1145
+ # Skip if compaction is locked (tool execution in progress) to avoid
1146
+ # corrupting tool_call_id pairing on incomplete message state
1147
+ if self._compaction_locked:
1148
+ self._update_context_tokens()
1149
+ current_tokens = self.token_tracker.current_context_tokens
1150
+ return {
1151
+ "action": "locked",
1152
+ "tokens": current_tokens,
1153
+ "reduction": tokens_before - current_tokens,
1154
+ }
1155
+
1156
+ self._emergency_truncate(hard_limit)
1157
+ self._update_context_tokens()
1158
+ current_tokens = self.token_tracker.current_context_tokens
1159
+
1160
+ result = {
1161
+ "action": "emergency_truncation",
1162
+ "tokens": current_tokens,
1163
+ "reduction": tokens_before - current_tokens,
1164
+ }
1165
+ self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["emergency_truncation"])
1166
+ return result
1167
+
1168
+ def _emergency_truncate(self, target_tokens):
1169
+ """Drop oldest non-system messages until context is under target.
1170
+
1171
+ Preservation rules:
1172
+ - Index 0: system prompt (always kept)
1173
+ - Any "Previous conversation context" system messages (compaction summaries)
1174
+ - Last 6 messages minimum (recent context)
1175
+ - Tool-call integrity: if an assistant message with tool_calls is in the
1176
+ protected tail, all its corresponding tool result messages must also be
1177
+ in the tail (and vice versa). The protected region is expanded to
1178
+ include complete tool blocks.
1179
+
1180
+ Args:
1181
+ target_tokens: Target token count to get under.
1182
+ """
1183
+ MIN_TAIL = 6 # Minimum recent messages to preserve
1184
+
1185
+ def _is_protected(msg):
1186
+ """Check if a message should never be dropped."""
1187
+ return msg.get("role", "") == "system"
1188
+
1189
+ def _compute_protected_tail(messages):
1190
+ """Compute the minimum protected tail index that preserves tool_call pairs.
1191
+
1192
+ Start from MIN_TAIL from the end and expand backward if a tool block
1193
+ straddles the boundary.
1194
+ """
1195
+ n = len(messages)
1196
+ if n <= MIN_TAIL + 1:
1197
+ return 1 # Nothing to drop anyway
1198
+
1199
+ tail_start = n - MIN_TAIL
1200
+
1201
+ # Scan backward from tail_start to find tool blocks that straddle
1202
+ # the boundary and expand to include them.
1203
+ changed = True
1204
+ while changed:
1205
+ changed = False
1206
+ # Build set of tool_call_ids that appear in tool messages within
1207
+ # the protected tail region
1208
+ tool_ids_in_tail = set()
1209
+ for i in range(tail_start, n):
1210
+ msg = messages[i]
1211
+ if msg.get("role") == "tool":
1212
+ tcid = msg.get("tool_call_id")
1213
+ if tcid:
1214
+ tool_ids_in_tail.add(tcid)
1215
+
1216
+ # Check if any message just before tail_start has tool_calls
1217
+ # that reference those tool_call_ids
1218
+ scan = tail_start - 1
1219
+ while scan > 0:
1220
+ msg = messages[scan]
1221
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
1222
+ msg_tool_ids = {
1223
+ tc.get("id") for tc in msg["tool_calls"] if tc.get("id")
1224
+ }
1225
+ if msg_tool_ids & tool_ids_in_tail:
1226
+ # This assistant message must be in the protected tail
1227
+ tail_start = scan
1228
+ changed = True
1229
+ # Also add any of its tool_call_ids to the set
1230
+ tool_ids_in_tail |= msg_tool_ids
1231
+ else:
1232
+ break # No overlap, stop scanning backward
1233
+ elif msg.get("role") == "tool":
1234
+ # A tool message before the assistant — check if its
1235
+ # tool_call_id belongs to an assistant in the tail
1236
+ tcid = msg.get("tool_call_id")
1237
+ if tcid and tcid in tool_ids_in_tail:
1238
+ tail_start = scan
1239
+ changed = True
1240
+ else:
1241
+ break
1242
+ else:
1243
+ break
1244
+ scan -= 1
1245
+
1246
+ return tail_start
1247
+
1248
+ # Drop oldest non-protected messages until under target
1249
+ while True:
1250
+ self._update_context_tokens()
1251
+ if self.token_tracker.current_context_tokens < target_tokens:
1252
+ break
1253
+
1254
+ tail_start = _compute_protected_tail(self.messages)
1255
+ if tail_start <= 1:
1256
+ break # Nothing droppable remains
1257
+
1258
+ # Find the oldest droppable message (skip index 0 and protected tail)
1259
+ dropped = False
1260
+ for i in range(1, tail_start):
1261
+ if not _is_protected(self.messages[i]):
1262
+ self.messages.pop(i)
1263
+ dropped = True
1264
+ break
1265
+
1266
+ if not dropped:
1267
+ break # Only protected messages remain in droppable zone
1268
+
1269
+ self.sync_log()
1270
+
1271
+ def _notify_compaction(self, console, tokens_before, tokens_after, action_label):
1272
+ """Show dim notification when auto-compaction takes action.
1273
+
1274
+ Args:
1275
+ console: Rich console (or None to suppress)
1276
+ tokens_before: Token count before compaction
1277
+ tokens_after: Token count after compaction
1278
+ action_label: Human-readable description of the action taken
1279
+ """
1280
+ if not context_settings.notify_auto_compaction or not console:
1281
+ return
1282
+ reduction = tokens_before - tokens_after
1283
+ console.print(
1284
+ f"[dim]Auto-compacted: {tokens_before:,} → {tokens_after:,} tokens "
1285
+ f"({action_label})[/dim]"
1286
+ )
1287
+
1288
+ def get_gitignore_spec(self, repo_root: Path):
1289
+ """Get cached or load PathSpec object for .gitignore filtering.
1290
+
1291
+ Caches the spec and reloads if .gitignore is modified.
1292
+
1293
+ Args:
1294
+ repo_root: Repository root directory
1295
+
1296
+ Returns:
1297
+ pathspec.PathSpec or None if .gitignore doesn't exist
1298
+ """
1299
+ gitignore_path = repo_root / ".gitignore"
1300
+
1301
+ # Check if we need to reload
1302
+ current_mtime = None
1303
+ if gitignore_path.exists():
1304
+ current_mtime = gitignore_path.stat().st_mtime
1305
+
1306
+ # Reload if: (1) not initialized, (2) repo changed, (3) file modified
1307
+ if (
1308
+ self._gitignore_spec is None
1309
+ or self._repo_root != repo_root
1310
+ or current_mtime != self._gitignore_mtime
1311
+ ):
1312
+ from utils.gitignore_filter import load_gitignore_spec
1313
+
1314
+ self._repo_root = repo_root
1315
+ self._gitignore_mtime = current_mtime
1316
+ self._gitignore_spec = load_gitignore_spec(repo_root)
1317
+
1318
+ return self._gitignore_spec
1319
+
1320
+ def switch_provider(self, provider_name):
1321
+ """Switch LLM provider.
1322
+
1323
+ Args:
1324
+ provider_name: Provider name ('local' or 'openrouter')
1325
+
1326
+ Returns:
1327
+ str: Result message
1328
+ """
1329
+ providers = get_providers()
1330
+ if provider_name not in providers:
1331
+ return f"Invalid provider. Use /provider to list. Available: {', '.join(providers)}"
1332
+
1333
+ previous_provider = self.client.provider
1334
+
1335
+ # Terminate server if switching away from local
1336
+ if previous_provider == "local" and provider_name != "local":
1337
+ self.cleanup()
1338
+
1339
+ if self.client.switch_provider(provider_name):
1340
+ self._init_messages(reset_costs=True)
1341
+ if provider_name == "local":
1342
+ server = self.start_server_if_needed()
1343
+ if not server:
1344
+ # Failed to start server - revert
1345
+ self.client.switch_provider(previous_provider)
1346
+ self._init_messages(reset_costs=True)
1347
+ return f"Failed to start local server. Reverted to {previous_provider} provider."
1348
+ self.server_process = server
1349
+ return f"Switched to {provider_name} provider (server ready)."
1350
+ return f"Switched to {provider_name} provider."
1351
+ return "Provider switch failed."
1352
+
1353
+ def reload_config(self):
1354
+ """Reload configuration from disk and update client.
1355
+
1356
+ This should be called after any config change (provider, model, api key).
1357
+ """
1358
+ reload_config()
1359
+ self.client.sync_provider_from_config()
1360
+
1361
+ # ===== Config Methods (for agent use) =====
1362
+
1363
+ def set_provider(self, provider_name: str) -> str:
1364
+ """Set provider for current session (agent-accessible).
1365
+
1366
+ Args:
1367
+ provider_name: Provider name to switch to.
1368
+
1369
+ Returns:
1370
+ str: Result message.
1371
+ """
1372
+ return self.switch_provider(provider_name)
1373
+
1374
+ def start_server_if_needed(self):
1375
+ """Start local server if using local provider and not already running.
1376
+
1377
+ Returns:
1378
+ subprocess.Popen: Server process or None
1379
+ """
1380
+ if self.client.provider == "local" and not self.server_process:
1381
+ return self._start_local_server()
1382
+ return None
1383
+
1384
+ def _start_local_server(self):
1385
+ """Start llama-server process and wait for health check.
1386
+
1387
+ Returns:
1388
+ subprocess.Popen: Server process or None if failed
1389
+ """
1390
+ from llm.config import get_provider_config, _CONFIG
1391
+
1392
+ local_config = get_provider_config("local")
1393
+ server_path = _CONFIG.get("LOCAL_SERVER_PATH", local_config["config_keys"]["LOCAL_SERVER_PATH"])
1394
+ model_path = local_config.get("model", "")
1395
+ host = local_config["extra"]["host"]
1396
+ port = local_config["extra"]["port"]
1397
+
1398
+ args = [
1399
+ server_path,
1400
+ "-m", model_path,
1401
+ "-ngl", str(server_settings.ngl_layers),
1402
+ "--split-mode", "none",
1403
+ "--ctx-size", str(server_settings.ctx_size),
1404
+ "--n-predict", str(server_settings.n_predict),
1405
+ "--rope-scale", str(server_settings.rope_scale),
1406
+ "--host", host,
1407
+ "--port", str(port),
1408
+ "--jinja",
1409
+ ]
1410
+
1411
+ # Restrict to RTX 5070 Ti only (GPU 0)
1412
+ env = os.environ.copy()
1413
+ env["CUDA_VISIBLE_DEVICES"] = "0"
1414
+
1415
+ # Log stderr to file for debugging
1416
+ log_path = Path(__file__).resolve().parents[2] / "llama_server.log"
1417
+ self._log_file = open(log_path, "w")
1418
+
1419
+ process = subprocess.Popen(
1420
+ args,
1421
+ stdout=subprocess.DEVNULL,
1422
+ stderr=self._log_file,
1423
+ env=env,
1424
+ )
1425
+
1426
+ health_url = f"http://{host}:{port}/health"
1427
+ for i in range(server_settings.health_check_timeout_sec):
1428
+ try:
1429
+ r = requests.get(health_url, timeout=2)
1430
+ if r.status_code == 200:
1431
+ data = r.json()
1432
+ if data.get("status") == "ok":
1433
+ return process
1434
+ except Exception:
1435
+ pass
1436
+ time.sleep(server_settings.health_check_interval_sec)
1437
+
1438
+ # Server failed health check - clean up resources
1439
+ if process:
1440
+ process.terminate()
1441
+ process.wait()
1442
+ if self._log_file:
1443
+ self._log_file.close()
1444
+ self._log_file = None
1445
+ return None
1446
+
1447
+ def cycle_approve_mode(self) -> str:
1448
+ """Cycle to next approval mode.
1449
+
1450
+ Returns:
1451
+ str: The new approval mode.
1452
+ """
1453
+ from llm.config import CYCLEABLE_APPROVE_MODES
1454
+ modes = CYCLEABLE_APPROVE_MODES
1455
+ try:
1456
+ next_index = (modes.index(self.approve_mode) + 1) % len(modes)
1457
+ except ValueError:
1458
+ next_index = 0
1459
+ self.approve_mode = modes[next_index]
1460
+ return self.approve_mode
1461
+
1462
+ def reset_session(self):
1463
+ """Reset chat session (clear messages and task list).
1464
+
1465
+ This is a public wrapper for _init_messages that also clears
1466
+ the in-session task list.
1467
+ """
1468
+ # End current conversation logging session before reset
1469
+ if self.markdown_logger:
1470
+ self.markdown_logger.end_session()
1471
+
1472
+ self._init_messages(reset_totals=False)
1473
+ self.task_list.clear()
1474
+ self.task_list_title = None
1475
+
1476
+ def log_message(self, message: dict):
1477
+ """Log a message to the conversation logger.
1478
+
1479
+ Args:
1480
+ message: Message dict to log
1481
+ """
1482
+ if self.markdown_logger:
1483
+ self.markdown_logger.log_message(message)
1484
+
1485
+ def sync_log(self):
1486
+ """Rewrite the entire conversation log to match current message state.
1487
+
1488
+ This should be called after any operation that modifies the messages array:
1489
+ - After adding new messages
1490
+ - After compaction
1491
+ - After mode changes (which modify system prompts)
1492
+ """
1493
+ if self.markdown_logger:
1494
+ self.markdown_logger.rewrite_log(self.messages)
1495
+
1496
+ def end_conversation(self):
1497
+ """End the current conversation logging session."""
1498
+ if self.markdown_logger:
1499
+ self.markdown_logger.end_session()
1500
+
1501
+ def toggle_logging(self):
1502
+ """Toggle conversation logging on/off.
1503
+
1504
+ Returns:
1505
+ bool: New logging state (True if enabled, False if disabled)
1506
+ """
1507
+ from utils.logger import MarkdownConversationLogger
1508
+
1509
+ if self.markdown_logger:
1510
+ # Disable logging
1511
+ self.markdown_logger.end_session()
1512
+ self.markdown_logger = None
1513
+ return False
1514
+ else:
1515
+ # Enable logging
1516
+ self.markdown_logger = MarkdownConversationLogger(
1517
+ conversations_dir=context_settings.conversations_dir
1518
+ )
1519
+ # Start a new session and log current messages
1520
+ self.markdown_logger.start_session()
1521
+ for msg in self.messages:
1522
+ self.markdown_logger.log_message(msg)
1523
+ return True
1524
+
1525
+ def set_logging(self, enabled: bool) -> bool:
1526
+ """Set conversation logging to a specific state.
1527
+
1528
+ Args:
1529
+ enabled: True to enable logging, False to disable.
1530
+
1531
+ Returns:
1532
+ bool: The new logging state.
1533
+ """
1534
+ current_state = self.markdown_logger is not None
1535
+ if enabled == current_state:
1536
+ return current_state
1537
+ return self.toggle_logging()
1538
+
1539
+ def cleanup(self):
1540
+ """Terminate server process if running."""
1541
+ # End conversation session on cleanup
1542
+ if self.markdown_logger:
1543
+ self.markdown_logger.end_session()
1544
+
1545
+ if self.server_process:
1546
+ self.server_process.terminate()
1547
+ self.server_process.wait()
1548
+
1549
+ # Close log file handle if open
1550
+ if self._log_file:
1551
+ self._log_file.close()
1552
+ self._log_file = None