bone-agent 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/bin/bone.js +39 -0
  2. package/package.json +25 -39
  3. package/LICENSE +0 -21
  4. package/README.md +0 -201
  5. package/bin/npm-wrapper.js +0 -235
  6. package/bin/rg +0 -0
  7. package/bin/rg.exe +0 -0
  8. package/config.yaml.example +0 -144
  9. package/prompts/main/ask_questions.md +0 -31
  10. package/prompts/main/batch_independent_calls.md +0 -5
  11. package/prompts/main/casual_interactions.md +0 -11
  12. package/prompts/main/code_references.md +0 -8
  13. package/prompts/main/communication_style.md +0 -12
  14. package/prompts/main/context_reliability.md +0 -12
  15. package/prompts/main/conversational_tool_calling.md +0 -15
  16. package/prompts/main/dream.md +0 -50
  17. package/prompts/main/editing_pattern.md +0 -13
  18. package/prompts/main/error_handling.md +0 -6
  19. package/prompts/main/exploration_pattern.md +0 -21
  20. package/prompts/main/intro.md +0 -1
  21. package/prompts/main/obsidian.md +0 -16
  22. package/prompts/main/obsidian_project.md +0 -79
  23. package/prompts/main/professional_objectivity.md +0 -3
  24. package/prompts/main/skills.md +0 -3
  25. package/prompts/main/targeted_searching.md +0 -10
  26. package/prompts/main/task_lists_pattern.md +0 -8
  27. package/prompts/main/temp_folder.md +0 -9
  28. package/prompts/main/think_before_acting.md +0 -10
  29. package/prompts/main/tone_and_style.md +0 -4
  30. package/prompts/main/tool_preferences.md +0 -24
  31. package/prompts/main/trust_subagent_context.md +0 -21
  32. package/prompts/main/when_to_use_sub_agent.md +0 -7
  33. package/prompts/micro/ask_questions.md +0 -1
  34. package/prompts/micro/batch_independent_calls.md +0 -1
  35. package/prompts/micro/casual_interactions.md +0 -1
  36. package/prompts/micro/code_references.md +0 -1
  37. package/prompts/micro/communication_style.md +0 -1
  38. package/prompts/micro/context_reliability.md +0 -1
  39. package/prompts/micro/conversational_tool_calling.md +0 -1
  40. package/prompts/micro/editing_pattern.md +0 -1
  41. package/prompts/micro/error_handling.md +0 -1
  42. package/prompts/micro/exploration_pattern.md +0 -1
  43. package/prompts/micro/intro.md +0 -1
  44. package/prompts/micro/obsidian.md +0 -4
  45. package/prompts/micro/obsidian_project.md +0 -5
  46. package/prompts/micro/professional_objectivity.md +0 -1
  47. package/prompts/micro/skills.md +0 -1
  48. package/prompts/micro/targeted_searching.md +0 -1
  49. package/prompts/micro/task_lists_pattern.md +0 -1
  50. package/prompts/micro/temp_folder.md +0 -1
  51. package/prompts/micro/think_before_acting.md +0 -5
  52. package/prompts/micro/tone_and_style.md +0 -1
  53. package/prompts/micro/tool_preferences.md +0 -1
  54. package/prompts/micro/trust_subagent_context.md +0 -1
  55. package/prompts/micro/when_to_use_sub_agent.md +0 -1
  56. package/requirements.txt +0 -9
  57. package/src/__init__.py +0 -11
  58. package/src/core/__init__.py +0 -1
  59. package/src/core/agentic.py +0 -1085
  60. package/src/core/chat_manager.py +0 -1577
  61. package/src/core/config_manager.py +0 -260
  62. package/src/core/cron.py +0 -578
  63. package/src/core/cron_allowlist.py +0 -118
  64. package/src/core/memory.py +0 -145
  65. package/src/core/metadata.py +0 -75
  66. package/src/core/retry.py +0 -71
  67. package/src/core/skills.py +0 -463
  68. package/src/core/sub_agent.py +0 -376
  69. package/src/core/tool_approval.py +0 -220
  70. package/src/core/tool_feedback.py +0 -789
  71. package/src/exceptions.py +0 -79
  72. package/src/llm/__init__.py +0 -1
  73. package/src/llm/client.py +0 -176
  74. package/src/llm/codex_provider.py +0 -350
  75. package/src/llm/config.py +0 -536
  76. package/src/llm/prompts.py +0 -494
  77. package/src/llm/providers.py +0 -438
  78. package/src/llm/streaming.py +0 -163
  79. package/src/llm/token_tracker.py +0 -399
  80. package/src/tools/__init__.py +0 -151
  81. package/src/tools/constants.py +0 -59
  82. package/src/tools/create_file.py +0 -136
  83. package/src/tools/directory.py +0 -389
  84. package/src/tools/edit.py +0 -549
  85. package/src/tools/file_reader.py +0 -322
  86. package/src/tools/helpers/__init__.py +0 -99
  87. package/src/tools/helpers/base.py +0 -599
  88. package/src/tools/helpers/converters.py +0 -44
  89. package/src/tools/helpers/file_helpers.py +0 -189
  90. package/src/tools/helpers/formatters.py +0 -411
  91. package/src/tools/helpers/loader.py +0 -145
  92. package/src/tools/helpers/parallel_executor.py +0 -231
  93. package/src/tools/helpers/path_resolver.py +0 -283
  94. package/src/tools/helpers/plugin_manifest.py +0 -185
  95. package/src/tools/obsidian.py +0 -96
  96. package/src/tools/review_sub_agent.py +0 -190
  97. package/src/tools/rg_search.py +0 -477
  98. package/src/tools/search_plugins.py +0 -177
  99. package/src/tools/select_option.py +0 -600
  100. package/src/tools/shell.py +0 -302
  101. package/src/tools/sub_agent.py +0 -139
  102. package/src/tools/task_list.py +0 -269
  103. package/src/tools/web_search.py +0 -61
  104. package/src/ui/__init__.py +0 -1
  105. package/src/ui/banner.py +0 -87
  106. package/src/ui/commands.py +0 -3131
  107. package/src/ui/displays.py +0 -239
  108. package/src/ui/loader.py +0 -284
  109. package/src/ui/main.py +0 -643
  110. package/src/ui/prompt_utils.py +0 -113
  111. package/src/ui/setting_selector.py +0 -590
  112. package/src/ui/setup_wizard.py +0 -294
  113. package/src/ui/sub_agent_panel.py +0 -234
  114. package/src/ui/tool_confirmation.py +0 -226
  115. package/src/utils/__init__.py +0 -1
  116. package/src/utils/citation_parser.py +0 -199
  117. package/src/utils/editor.py +0 -207
  118. package/src/utils/gitignore_filter.py +0 -149
  119. package/src/utils/logger.py +0 -254
  120. package/src/utils/paths.py +0 -30
  121. package/src/utils/result_parsers.py +0 -108
  122. package/src/utils/safe_commands.py +0 -243
  123. package/src/utils/settings.py +0 -195
  124. package/src/utils/user_message_logger.py +0 -120
  125. package/src/utils/validation.py +0 -201
  126. package/src/utils/web_search.py +0 -173
@@ -1,1577 +0,0 @@
1
- """Chat state and server lifecycle management."""
2
-
3
- import os
4
- import json
5
- import logging
6
- import subprocess
7
- import time
8
- import requests
9
- from typing import Optional, IO
10
-
11
- from llm.client import LLMClient
12
- from llm.config import get_providers, get_provider_config, get_provider_display_name, reload_config
13
- from llm.prompts import build_system_prompt
14
- from core.skills import render_active_skills_section
15
- from pathlib import Path
16
- from llm.token_tracker import TokenTracker
17
- from utils.settings import server_settings, context_settings
18
- from utils.logger import MarkdownConversationLogger
19
- from utils.user_message_logger import UserMessageLogger
20
- from utils.result_parsers import extract_exit_code, extract_metadata_from_result
21
-
22
- # Token counting constants
23
- MESSAGE_OVERHEAD_TOKENS = 4 # Approximate tokens for JSON structure: braces, quotes, colons, commas
24
- CHAR_BASED_OVERHEAD = 20 # Character overhead for JSON structure in character-based estimation
25
-
26
- # Action labels for context management notifications (used by ensure_context_fits)
27
- _ACTION_LABELS = {
28
- "tool_compaction": "compacted tool results",
29
- "history_compaction": "compacted history",
30
- "emergency_truncation": "emergency truncation (oldest messages dropped)",
31
- }
32
-
33
- class ChatManager:
34
- """Manages chat state, messages, and provider switching."""
35
-
36
- def __init__(self, compact_trigger_tokens: Optional[int] = None):
37
- # Initialize client with provider from global config
38
- self.client = LLMClient()
39
- self.messages = []
40
- self.server_process: Optional[subprocess.Popen] = None
41
- self._log_file: Optional[IO] = None # Track llama_server log file handle
42
- self.approve_mode = "safe"
43
- self.token_tracker = TokenTracker()
44
- self.context_token_estimate = 0
45
- # In-session, memory-only task list (used in EDIT workflows)
46
- self.task_list = []
47
- self.task_list_title = None
48
-
49
- # In-session active skill tracking. These skills are rendered into the
50
- # system prompt for the current chat.
51
- self.loaded_skills = set()
52
-
53
- # .gitignore filtering state
54
- self._gitignore_spec = None
55
- self._gitignore_mtime = None
56
- self._repo_root = None
57
-
58
- # Custom compaction threshold (overrides global context_settings if set)
59
- self._compact_trigger_tokens = compact_trigger_tokens
60
-
61
- # Disable all compaction when True (used by sub-agents to preserve findings)
62
- self._compaction_disabled = False
63
-
64
- # Conversation logging
65
- self.markdown_logger: Optional[MarkdownConversationLogger] = None
66
- if context_settings.log_conversations:
67
- self.markdown_logger = MarkdownConversationLogger(
68
- conversations_dir=context_settings.conversations_dir
69
- )
70
-
71
- # User message logging (always on, for dream memory system)
72
- self.user_message_logger = UserMessageLogger()
73
-
74
- # Compaction lock: prevents compaction during active tool execution
75
- # Set by agentic.py before executing tools, cleared after all results appended
76
- self._compaction_locked = False
77
-
78
- self._init_messages(reset_totals=True)
79
-
80
- def set_compaction_lock(self, locked):
81
- """Set or release the compaction lock.
82
-
83
- When locked, compaction is skipped entirely (no message removal,
84
- no summarization, no truncation). Used during tool execution to
85
- prevent orphaning tool_call_ids.
86
- """
87
- self._compaction_locked = locked
88
-
89
- def _init_messages(self, reset_totals: bool = True, reset_costs: bool = False):
90
- """Initialize message history with system prompt and agents.md as initial exchange.
91
-
92
- Args:
93
- reset_totals: Reset cumulative token counts (default True).
94
- reset_costs: Reset cost accumulators (default False).
95
- Set True on provider switch to clear stale billing state.
96
- Kept False on /clear to preserve cumulative session costs.
97
- """
98
- # Start new conversation logging session
99
- if self.markdown_logger:
100
- self.markdown_logger.start_session()
101
-
102
- # Active skills are scoped to the current message history/session.
103
- self.loaded_skills = set()
104
-
105
- # Start with system prompt only
106
- self.messages = [{"role": "system", "content": self._build_system_prompt()}]
107
-
108
- # Add agents.md as initial user/assistant exchange (only if it exists in cwd)
109
- user_msg, assistant_msg = self._load_agents_md()
110
- if user_msg and assistant_msg:
111
- self.messages.append({"role": "user", "content": user_msg})
112
- self.messages.append({"role": "assistant", "content": assistant_msg})
113
-
114
- # Log initial messages
115
- if self.markdown_logger:
116
- for msg in self.messages:
117
- self.markdown_logger.log_message(msg)
118
-
119
- # Reset session totals if requested (keep totals across /clear)
120
- # For a fresh conversation, cumulative totals start at 0 (no API calls made yet)
121
- if reset_totals:
122
- if reset_costs:
123
- self.token_tracker.reset_all()
124
- else:
125
- self.token_tracker.reset(prompt_tokens=0, completion_tokens=0)
126
-
127
- # Always reset conversation tokens (resets on /new and fresh starts)
128
- self.token_tracker.reset_conversation()
129
-
130
- # Initialize context tokens with actual message count (including tools if enabled)
131
- self._update_context_tokens()
132
- self.context_token_estimate = self.token_tracker.current_context_tokens
133
-
134
- def _build_system_prompt(self, variant: str | None = None) -> str:
135
- """Build system prompt.
136
-
137
- Args:
138
- variant: Prompt variant name (e.g. 'main', 'micro').
139
- If None, reads from prompt_settings.
140
- """
141
- if variant is None:
142
- from utils.settings import prompt_settings
143
- variant = prompt_settings.variant
144
- active_skills_section = render_active_skills_section(self.loaded_skills)
145
- return build_system_prompt(variant, active_skills_section=active_skills_section)
146
-
147
- def update_system_prompt(self, variant: str | None = None):
148
- """Rebuild system prompt in-place (e.g. after hotswap or session reset).
149
-
150
- Args:
151
- variant: Prompt variant to use. If None, keeps current variant.
152
- Updates token_tracker.current_variant.
153
- """
154
- if not self.messages:
155
- raise RuntimeError("Cannot update system prompt: messages array is empty")
156
-
157
- if self.messages[0]["role"] != "system":
158
- raise RuntimeError(f"Cannot update system prompt: messages[0] has role '{self.messages[0]['role']}', expected 'system'")
159
-
160
- if variant is None:
161
- from utils.settings import prompt_settings
162
- variant = prompt_settings.variant
163
-
164
- self.messages[0]["content"] = self._build_system_prompt(variant)
165
- self.token_tracker.current_variant = variant
166
- self._update_context_tokens()
167
-
168
- def _load_agents_md(self) -> tuple[str, str]:
169
- """Load agents.md content and prepare user/assistant exchange.
170
-
171
- Returns:
172
- tuple: (user_message, assistant_message)
173
- """
174
- # Check for agents.md in current working directory (user's project)
175
- agents_path = Path.cwd() / "agents.md"
176
-
177
- if agents_path.exists():
178
- map_content = agents_path.read_text(encoding="utf-8").strip()
179
- user_msg = (
180
- "Here is the codebase map for this project. "
181
- "This provides an overview of the repository structure and file purposes. "
182
- "Use this as a reference when exploring the codebase.\n\n"
183
- f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
184
- )
185
- assistant_msg = (
186
- "I've received the codebase map. I'll use this as a reference when "
187
- "exploring the repository, but I'll always verify current state by "
188
- "reading files and searching the codebase before making changes."
189
- )
190
- else:
191
- # No codebase map available - skip entirely
192
- user_msg = ""
193
- assistant_msg = ""
194
-
195
- return user_msg, assistant_msg
196
-
197
- def _update_context_tokens(self, tools=None):
198
- """Recount and update current_context_tokens after message changes.
199
-
200
- Args:
201
- tools: Optional list of tool definitions to include in token count.
202
- If None, uses current mode's tools (if enabled).
203
- """
204
- message_tokens = self._count_tokens(self.messages)
205
-
206
- # Count tool tokens if tools are provided or enabled
207
- if tools is None:
208
- from llm.config import TOOLS_ENABLED
209
- if not TOOLS_ENABLED:
210
- self.token_tracker.set_context_tokens(message_tokens)
211
- self.context_token_estimate = message_tokens
212
- return
213
- else:
214
- from tools import TOOLS
215
- tools = TOOLS()
216
-
217
- if tools:
218
- # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
219
- if self.client.provider == "anthropic":
220
- tools_json = json.dumps(tools)
221
- tool_tokens = len(tools_json) // 4
222
- else:
223
- try:
224
- import tiktoken
225
- model = getattr(self.client, "model", "") or ""
226
- try:
227
- enc = tiktoken.encoding_for_model(model)
228
- except Exception:
229
- enc = tiktoken.get_encoding("cl100k_base")
230
-
231
- # Encode tools list as JSON (which is how it's sent to the API)
232
- tools_json = json.dumps(tools)
233
- tool_tokens = len(enc.encode(tools_json))
234
- except Exception:
235
- # Fallback: character-based approximation
236
- tools_json = json.dumps(tools)
237
- tool_tokens = len(tools_json) // 4
238
-
239
- total_tokens = message_tokens + tool_tokens
240
- else:
241
- total_tokens = message_tokens
242
-
243
- self.token_tracker.set_context_tokens(total_tokens)
244
- self.context_token_estimate = total_tokens
245
-
246
- def _collect_message_text(self, msg) -> str:
247
- """Extract all text fields from a message as a single string.
248
-
249
- Collects role, content, tool_calls (id, type, function name/args),
250
- and tool_call_id fields. Used by token counting methods.
251
-
252
- Args:
253
- msg: Message dict
254
-
255
- Returns:
256
- Concatenated string of all message text fields
257
- """
258
- parts = []
259
-
260
- # Role field
261
- role = msg.get('role', '')
262
- if role:
263
- parts.append(role)
264
-
265
- # Content
266
- content = msg.get('content', '')
267
- if content:
268
- parts.append(str(content))
269
-
270
- # Tool calls (assistant messages)
271
- if msg.get('tool_calls'):
272
- for tc in msg['tool_calls']:
273
- # id field (e.g., "call_abc123")
274
- tc_id = tc.get('id', '')
275
- if tc_id:
276
- parts.append(tc_id)
277
-
278
- # type field (usually "function")
279
- tc_type = tc.get('type', 'function')
280
- parts.append(tc_type)
281
-
282
- # function object
283
- fn = tc.get('function', {})
284
- if fn:
285
- fn_name = fn.get('name', '')
286
- if fn_name:
287
- parts.append(fn_name)
288
-
289
- fn_args = fn.get('arguments', '{}')
290
- parts.append(fn_args)
291
-
292
- # Tool call ID (tool messages)
293
- if msg.get('role') == 'tool' and msg.get('tool_call_id'):
294
- parts.append(msg['tool_call_id'])
295
-
296
- return ''.join(p or '' for p in parts)
297
-
298
- def _count_tokens(self, messages) -> int:
299
- """Count tokens accurately using tiktoken for OpenAI, character-based for Anthropic.
300
-
301
- Counts everything the AI receives:
302
- - All message types: user, assistant, system, tool
303
- - All fields: role, content, tool_calls (id, type, function, name, arguments)
304
- - Tool messages: tool_call_id + content
305
-
306
- Args:
307
- messages: List of messages to count tokens for
308
-
309
- Returns:
310
- int: Estimated token count
311
- """
312
- # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
313
- if self.client.provider == "anthropic":
314
- return self._count_tokens_char_based(messages)
315
-
316
- try:
317
- import tiktoken
318
- model = getattr(self.client, "model", "") or ""
319
- try:
320
- enc = tiktoken.encoding_for_model(model)
321
- except Exception:
322
- enc = tiktoken.get_encoding("cl100k_base")
323
-
324
- # Collect text from all messages and encode
325
- total = 0
326
- for msg in messages:
327
- text = self._collect_message_text(msg)
328
- total += len(enc.encode(text))
329
- total += MESSAGE_OVERHEAD_TOKENS
330
-
331
- return total
332
-
333
- except Exception:
334
- # Fallback to character-based estimation
335
- return self._count_tokens_char_based(messages)
336
-
337
- def _count_tokens_char_based(self, messages) -> int:
338
- """Count tokens using character-based approximation (for Anthropic).
339
-
340
- Uses ~4 characters per token as a rough estimate.
341
-
342
- Args:
343
- messages: List of messages to count tokens for
344
-
345
- Returns:
346
- int: Estimated token count
347
- """
348
- total = 0
349
- for msg in messages:
350
- text = self._collect_message_text(msg)
351
- total += (len(text) + CHAR_BASED_OVERHEAD) // 4
352
-
353
- return total
354
-
355
-
356
- def _build_summary_prompt(self, messages) -> str:
357
- """Generate a comprehensive summary of messages.
358
-
359
- Captures:
360
- - User questions asked
361
- - Tool calls performed (files read, edits, searches)
362
- - Key decisions and changes
363
-
364
- Args:
365
- messages: List of messages to summarize
366
-
367
- Returns:
368
- str: Structured summary preserving context
369
- """
370
- # Extract user questions
371
- user_queries = []
372
- for m in messages:
373
- if m.get('role') == 'user':
374
- content = m.get('content', '')
375
- if content and not content.startswith("The codebase map"):
376
- user_queries.append(content)
377
-
378
- # Extract tool calls
379
- tool_calls = []
380
- for m in messages:
381
- if m.get('tool_calls'):
382
- for tc in m['tool_calls']:
383
- fn = tc['function']
384
- name = fn.get('name', '')
385
- args = fn.get('arguments', '')
386
- tool_calls.append(f"- {name}: {args[:100]}")
387
- elif m.get('role') == 'tool':
388
- # Extract tool result metadata
389
- content = m.get('content', '')
390
- if 'exit_code=' in content:
391
- lines = content.split('\n')[:5] # First 5 lines for context
392
- tool_calls.append(f"Result: {'; '.join(lines[:2])}")
393
-
394
- # Build summary prompt
395
- summary_prompt = f"""Summarize the following conversation context.
396
-
397
- User questions:
398
- {chr(10).join(f'- {q}' for q in user_queries) if user_queries else 'None'}
399
-
400
- Tool operations performed:
401
- {chr(10).join(tool_calls) if tool_calls else 'None'}
402
-
403
- Focus on:
404
- 1. What problem was being solved
405
- 2. What files were read or modified
406
- 3. What searches were performed
407
- 4. Key code changes or decisions made
408
- 5. Current state/progress
409
-
410
- Provide a concise summary (2-4 paragraphs) that captures all essential context for continuing the work."""
411
-
412
- return summary_prompt
413
-
414
- # ===== Tool Result Compaction =====
415
-
416
- def _find_tool_blocks(self, include_in_flight=False):
417
- """Find all tool-result blocks in message history.
418
-
419
- Handles both single-turn and multi-turn tool chains:
420
- Single: user → assistant(tc) → tool_results → assistant(answer)
421
- Multi: user → assistant(tc1) → tools → assistant(tc2) → tools → assistant(answer)
422
-
423
- In multi-turn chains, all tool_calls and tool_results are merged into
424
- a single block spanning from the first assistant(tool_calls) to the
425
- final assistant(answer).
426
-
427
- Args:
428
- include_in_flight: If True, also return blocks that lack a final
429
- assistant answer (in-flight tool chains). The 'end' field points
430
- to the index after the last message in the chain (or the breaking
431
- message index if the chain was interrupted).
432
-
433
- Returns:
434
- list: List of block dicts with keys: user_idx, start, end, tool_calls, tool_results
435
- """
436
- blocks = []
437
- i = 0
438
-
439
- while i < len(self.messages):
440
- msg = self.messages[i]
441
-
442
- # Look for assistant message with tool_calls
443
- if msg.get('role') == 'assistant' and msg.get('tool_calls'):
444
-
445
- # Find user question before this
446
- user_idx = i - 1
447
- while user_idx >= 0 and self.messages[user_idx].get('role') != 'user':
448
- user_idx -= 1
449
-
450
- if user_idx < 0:
451
- i += 1
452
- continue
453
-
454
- # Follow consecutive assistant(tool_calls) → tool_results pairs
455
- # until we reach a final answer (assistant without tool_calls)
456
- block_start = i
457
- all_tool_calls = []
458
- all_tool_results = []
459
- j = i
460
- found_end = False
461
-
462
- while j < len(self.messages):
463
- if self.messages[j].get('role') == 'assistant' and self.messages[j].get('tool_calls'):
464
- # Accumulate tool calls from this assistant message
465
- all_tool_calls.extend(self.messages[j].get('tool_calls', []))
466
- # Collect immediately following tool results
467
- k = j + 1
468
- while k < len(self.messages) and self.messages[k].get('role') == 'tool':
469
- all_tool_results.append(self.messages[k].get('content', ''))
470
- k += 1
471
- j = k
472
- elif self.messages[j].get('role') == 'assistant' and not self.messages[j].get('tool_calls'):
473
- # Final answer — this completes the block
474
- found_end = True
475
- break
476
- else:
477
- # Non-tool, non-assistant message breaks the chain
478
- break
479
-
480
- if include_in_flight:
481
- if all_tool_calls:
482
- blocks.append({
483
- 'user_idx': user_idx,
484
- 'start': block_start,
485
- 'end': j,
486
- 'tool_calls': all_tool_calls,
487
- 'tool_results': all_tool_results,
488
- 'in_flight': not found_end,
489
- })
490
- else:
491
- if found_end and all_tool_calls:
492
- blocks.append({
493
- 'user_idx': user_idx,
494
- 'start': block_start,
495
- 'end': j,
496
- 'tool_calls': all_tool_calls,
497
- 'tool_results': all_tool_results,
498
- })
499
-
500
- # Continue scanning from after the final answer (or after the chain)
501
- # Guard: always advance at least one position to prevent infinite loops
502
- i = max(i + 1, j + 1 if found_end else j)
503
- else:
504
- i += 1
505
-
506
- return blocks
507
-
508
- def _get_tool_result_messages(self, start_idx, end_idx):
509
- """Extract only tool result messages between two indices.
510
-
511
- Args:
512
- start_idx: Starting index (exclusive)
513
- end_idx: Ending index (exclusive)
514
-
515
- Returns:
516
- list: Tool result messages (role='tool') between start_idx and end_idx
517
- """
518
- tool_results = []
519
- for i in range(start_idx + 1, end_idx):
520
- if self.messages[i].get('role') == 'tool':
521
- tool_results.append(self.messages[i])
522
- return tool_results
523
-
524
- def _summarize_tool_call(self, tool_call, tool_result):
525
- """Extract key info from a single tool call.
526
-
527
- Args:
528
- tool_call: Tool call dict from message
529
- tool_result: Tool result content string
530
-
531
- Returns:
532
- str: Summary string for this tool
533
- """
534
- try:
535
- import json
536
- fn_name = tool_call['function']['name']
537
- args = json.loads(tool_call['function']['arguments'])
538
- except (json.JSONDecodeError, KeyError):
539
- return "Used a tool"
540
-
541
- if fn_name == "execute_command":
542
- cmd = args.get('command', '')
543
- exit_code = extract_exit_code(tool_result)
544
- matches = extract_metadata_from_result(tool_result, 'matches_found')
545
-
546
- if exit_code == 0:
547
- if matches is not None:
548
- return f"Searched for '{cmd[:50]}...' (found {matches} matches)"
549
- else:
550
- return f"Searched: '{cmd[:50]}...'"
551
- else:
552
- return f"Search failed: '{cmd[:30]}...'"
553
-
554
- elif fn_name == "read_file":
555
- path = args.get('path_str', '')
556
- lines = extract_metadata_from_result(tool_result, 'lines_read')
557
- start_line = extract_metadata_from_result(tool_result, 'start_line')
558
-
559
- if lines is not None:
560
- if start_line is not None and start_line > 1:
561
- end_line = start_line + lines - 1
562
- return f"Read {path} (lines {start_line}-{end_line})"
563
- else:
564
- return f"Read {path} ({lines} lines)"
565
- else:
566
- return f"Read {path}"
567
-
568
- elif fn_name == "list_directory":
569
- path = args.get('path_str', '.')
570
- items = extract_metadata_from_result(tool_result, 'items_count')
571
- recursive = args.get('recursive', False)
572
-
573
- action = "Listed recursively" if recursive else "Listed"
574
- if items is not None:
575
- return f"{action} {path} ({items} items)"
576
- return f"{action} {path}"
577
-
578
- elif fn_name == "edit_file":
579
- path = args.get('path', '')
580
- search = args.get('search', '')
581
- search_preview = search[:30] + "..." if len(search) > 30 else search
582
- return f"Edited {path} (replaced '{search_preview}')"
583
-
584
- elif fn_name == "web_search":
585
- query = args.get('query', '')
586
- results = extract_metadata_from_result(tool_result, 'results_found')
587
- if results is not None:
588
- return f"Searched web for '{query[:40]}...' ({results} results)"
589
- return f"Searched web: '{query[:40]}...'"
590
-
591
- return f"Used {fn_name}"
592
-
593
- def _generate_tool_block_summary(self, tool_calls, tool_results):
594
- """Generate a single summary line for all tools in a block.
595
-
596
- Args:
597
- tool_calls: List of tool call dicts
598
- tool_results: List of tool result strings
599
-
600
- Returns:
601
- str: Human-readable summary
602
- """
603
- # Group tools by type for better readability
604
- searches = []
605
- reads = []
606
- lists = []
607
- edits = []
608
- web = []
609
- failed = []
610
-
611
- for i, tool_call in enumerate(tool_calls):
612
- result = tool_results[i] if i < len(tool_results) else ""
613
- summary = self._summarize_tool_call(tool_call, result)
614
-
615
- if "failed" in summary.lower():
616
- failed.append(summary)
617
- elif "searched" in summary.lower() and "web" not in summary.lower():
618
- searches.append(summary)
619
- elif "read" in summary.lower():
620
- reads.append(summary)
621
- elif "listed" in summary.lower():
622
- lists.append(summary)
623
- elif "edited" in summary.lower():
624
- edits.append(summary)
625
- elif "web" in summary.lower():
626
- web.append(summary)
627
-
628
- # Build human-readable summary
629
- parts = []
630
-
631
- if searches:
632
- count = len(searches)
633
- if count == 1:
634
- parts.append(searches[0])
635
- else:
636
- parts.append(f"performed {count} searches")
637
-
638
- if reads:
639
- if len(reads) == 1:
640
- parts.append(reads[0])
641
- else:
642
- parts.append(f"read {len(reads)} files")
643
-
644
- if lists:
645
- parts.append(lists[0] if len(lists) == 1 else "listed directories")
646
-
647
- if edits:
648
- parts.append(edits[0] if len(edits) == 1 else f"made {len(edits)} edits")
649
-
650
- if web:
651
- parts.append(web[0] if len(web) == 1 else "performed web searches")
652
-
653
- if failed:
654
- parts.append(f"{len(failed)} tool(s) failed")
655
-
656
- if not parts:
657
- return "Used tools for exploration"
658
-
659
- # Join with natural language
660
- if len(parts) <= 2:
661
- return " and ".join(parts) + "."
662
- else:
663
- first = ", ".join(parts[:-1])
664
- return f"{first}, and {parts[-1]}."
665
-
666
- def _estimate_message_tokens(self, msg) -> int:
667
- """Lightweight per-message token estimate for boundary calculation.
668
-
669
- Uses character-based estimation (~4 chars/token) to avoid the overhead
670
- of full tiktoken encoding during boundary walks. Good enough for
671
- determining where to split the uncompacted tail.
672
-
673
- Args:
674
- msg: Message dict
675
-
676
- Returns:
677
- Estimated token count for this message
678
- """
679
- text = self._collect_message_text(msg)
680
- return (len(text) + CHAR_BASED_OVERHEAD) // 4
681
-
682
- def _find_in_flight_boundary(self):
683
- """Find the index where in-flight tool blocks begin.
684
-
685
- Delegates to _find_tool_blocks(include_in_flight=True) to find all
686
- blocks, then returns the earliest start of any in-flight block.
687
- These messages must never be included in the compactable region.
688
-
689
- Returns:
690
- int: Index of the first in-flight message, or len(messages) if none.
691
- """
692
- all_blocks = self._find_tool_blocks(include_in_flight=True)
693
- in_flight = [b for b in all_blocks if b.get('in_flight')]
694
- if in_flight:
695
- return min(b['user_idx'] for b in in_flight)
696
- return len(self.messages)
697
-
698
- def _compute_split_boundary(self, blocks, in_flight_start,
699
- uncompacted_tail_tokens=None, min_tool_blocks=None):
700
- """Compute the message index where the uncompacted tail begins.
701
-
702
- Three constraints determine the boundary (take the most conservative /
703
- earliest index):
704
- 1. Token budget: accumulate from the end until uncompacted_tail_tokens
705
- 2. Minimum tool blocks: preserve at least min_tool_blocks completed blocks
706
- 3. Tool-call integrity: never split inside a tool block
707
- 4. In-flight boundary: never include in-flight tool messages
708
-
709
- Args:
710
- blocks: List of tool block dicts from _find_tool_blocks()
711
- in_flight_start: Index of first in-flight message (from _find_in_flight_boundary)
712
- uncompacted_tail_tokens: Override for the token budget (None = use settings)
713
- min_tool_blocks: Override for minimum tool blocks to preserve (None = use settings)
714
-
715
- Returns:
716
- int: Message index where the uncompacted tail starts
717
- """
718
- tc = context_settings.tool_compaction
719
- token_budget = uncompacted_tail_tokens if uncompacted_tail_tokens is not None else tc.uncompacted_tail_tokens
720
- min_blocks = min_tool_blocks if min_tool_blocks is not None else tc.min_tool_blocks
721
- n = len(self.messages)
722
-
723
- # The verbatim region ends at the first in-flight message (exclusive)
724
- verbatim_end = min(in_flight_start, n)
725
-
726
- # Constraint 1: Token budget — walk from verbatim_end backward.
727
- # Note: range stops at 1 (not 0) so the system prompt is never counted
728
- # toward the budget — it is always preserved uncompacted.
729
- tokens_accumulated = 0
730
- token_boundary = 0
731
- for i in range(verbatim_end - 1, 0, -1):
732
- tokens_accumulated += self._estimate_message_tokens(self.messages[i])
733
- if tokens_accumulated >= token_budget:
734
- token_boundary = i
735
- break
736
- else:
737
- # All messages fit within budget
738
- token_boundary = 1
739
-
740
- # Constraint 2: Minimum tool blocks — ensure at least min_blocks completed
741
- # blocks are within the uncompacted tail. Take the min_blocks most recent
742
- # completed blocks and set the boundary so they all fall at or after it.
743
- min_block_boundary = 1
744
- if min_blocks > 0 and len(blocks) >= min_blocks:
745
- # Sort by end index descending (most recent first), take top min_blocks
746
- sorted_blocks = sorted(blocks, key=lambda b: b['end'], reverse=True)
747
- recent_blocks = sorted_blocks[:min_blocks]
748
- # The boundary must be at or before the earliest user_idx of these blocks
749
- # so that all of them satisfy user_idx >= boundary (i.e. block is fully in the tail)
750
- min_block_boundary = min(b['user_idx'] for b in recent_blocks)
751
-
752
- # Constraint 3: Tool-call integrity — if token_boundary lands inside a
753
- # tool block, extend backward to include the complete block
754
- integrity_boundary = token_boundary
755
- for block in blocks:
756
- if block['user_idx'] < token_boundary <= block['end']:
757
- # Split would cut through this block — extend to include it
758
- integrity_boundary = min(integrity_boundary, block['user_idx'])
759
-
760
- # Take the most conservative (earliest) boundary
761
- # integrity_boundary <= token_boundary always (starts equal, only decreases)
762
- boundary = integrity_boundary
763
- if min_block_boundary < boundary:
764
- boundary = min_block_boundary
765
-
766
- return boundary
767
-
768
- def compact_tool_results(self, skip_token_update=False,
769
- uncompacted_tail_tokens=None, min_tool_blocks=None):
770
- """Replace completed tool-result blocks with summaries using token-budget tail.
771
-
772
- Walks messages from the end, accumulating tokens until ~40k tokens are
773
- reached. Everything before that boundary gets compacted (completed tool
774
- blocks replaced with summary lines). Always preserves at least
775
- min_tool_blocks completed blocks regardless of token budget.
776
-
777
- Safe to call mid-loop (during tool execution) because it only compacts
778
- completed tool blocks — in-flight blocks are never touched.
779
-
780
- Args:
781
- skip_token_update: If True, skip the internal _update_context_tokens()
782
- call. Use when the caller will update tokens with mode-specific
783
- tools immediately after.
784
- uncompacted_tail_tokens: Override for the token budget (None = use settings).
785
- Use for aggressive compaction with a smaller tail.
786
- min_tool_blocks: Override for minimum tool blocks to preserve (None = use settings).
787
- Use for aggressive compaction with fewer preserved blocks.
788
- """
789
- # Skip if disabled (e.g. sub-agents preserving findings)
790
- if self._compaction_disabled:
791
- return
792
-
793
- if not context_settings.tool_compaction.enable_per_message_compaction:
794
- return
795
-
796
- # Safety: Don't compact if very few messages
797
- if len(self.messages) < 6: # Minimum: user+assistant+tool+assistant+user+assistant
798
- return
799
-
800
- # Find completed tool-result blocks
801
- blocks = self._find_tool_blocks()
802
-
803
- if not blocks:
804
- return
805
-
806
- # Find where in-flight tool blocks begin (if any)
807
- in_flight_start = self._find_in_flight_boundary()
808
-
809
- # Compute the split boundary using token budget + constraints
810
- split_boundary = self._compute_split_boundary(
811
- blocks, in_flight_start,
812
- uncompacted_tail_tokens=uncompacted_tail_tokens,
813
- min_tool_blocks=min_tool_blocks,
814
- )
815
-
816
- # Determine which blocks fall entirely before the split boundary
817
- # (those are the ones to compact)
818
- blocks_to_compact = [
819
- b for b in blocks
820
- if b['end'] < split_boundary
821
- ]
822
-
823
- if not blocks_to_compact:
824
- return
825
-
826
- # Build the new message list
827
- new_messages = []
828
- processed_indices = set()
829
-
830
- for i, msg in enumerate(self.messages):
831
- if i in processed_indices:
832
- continue
833
-
834
- # Check if this is the start of a block to compact
835
- block = next((b for b in blocks_to_compact if b['start'] == i), None)
836
-
837
- if block:
838
- # Check if any tool in this block failed
839
- skip_compaction = False
840
- if not context_settings.tool_compaction.compact_failed_tools:
841
- for tool_result in block['tool_results']:
842
- exit_code = extract_exit_code(tool_result)
843
- if exit_code is not None and exit_code != 0:
844
- skip_compaction = True
845
- break
846
-
847
- if skip_compaction:
848
- # Keep this block as-is
849
- for idx in range(block['user_idx'], block['end'] + 1):
850
- new_messages.append(self.messages[idx])
851
- processed_indices.add(idx)
852
- continue
853
-
854
- # Generate summary and replace block
855
- summary = self._generate_tool_block_summary(
856
- block['tool_calls'],
857
- block['tool_results']
858
- )
859
-
860
- # Add user question with summary appended
861
- user_msg = self.messages[block['user_idx']].copy()
862
- user_msg['content'] = user_msg['content'] + f"\n\n[Context: {summary}]"
863
- new_messages.append(user_msg)
864
-
865
- # Add final assistant answer
866
- new_messages.append(self.messages[block['end']])
867
-
868
- # Mark all indices as processed
869
- processed_indices.add(block['user_idx'])
870
- for idx in range(block['start'], block['end'] + 1):
871
- processed_indices.add(idx)
872
- else:
873
- # Keep this message as-is
874
- new_messages.append(msg)
875
-
876
- self.messages = new_messages
877
- if not skip_token_update:
878
- self._update_context_tokens()
879
-
880
- # ===== AI-Based History Compaction =====
881
-
882
- def compact_history(self, console=None, trigger="manual"):
883
- """Compact chat history while preserving recent context.
884
-
885
- Strategy:
886
- 1. Keep last user message verbatim
887
- 2. Keep assistant tool_calls message (if present) for context
888
- 3. Keep last assistant response (without tool calls) verbatim
889
- 4. Summarize everything prior AND all tool result messages
890
-
891
- Args:
892
- console: Console for notifications (None for silent auto-compact)
893
- trigger: "manual" or "auto"
894
-
895
- Returns:
896
- dict with compaction stats or None
897
- """
898
- if len(self.messages) < 10: # Need enough history
899
- return None
900
-
901
- # Find the last user message (start from end, skip system/tool messages)
902
- last_user_idx = None
903
- for i in range(len(self.messages) - 1, -1, -1):
904
- role = self.messages[i].get('role')
905
- # Look for user message that's not the codebase map
906
- if role == 'user' and not self.messages[i].get('tool_calls'):
907
- content = self.messages[i].get('content', '')
908
- if content and not content.startswith("The codebase map"):
909
- last_user_idx = i
910
- break
911
-
912
- if last_user_idx is None or last_user_idx < 3:
913
- return None # Not enough history to compact
914
-
915
- # Find the last assistant message WITHOUT tool calls (final answer)
916
- last_assistant_without_tools_idx = None
917
- for i in range(len(self.messages) - 1, -1, -1):
918
- msg = self.messages[i]
919
- if msg.get('role') == 'assistant' and not msg.get('tool_calls'):
920
- # This is a final answer
921
- last_assistant_without_tools_idx = i
922
- break
923
-
924
- if last_assistant_without_tools_idx is None:
925
- return None # No final answer found
926
-
927
- # Determine what to keep vs summarize
928
- # We always keep: system prompt, last user message, assistant tool_calls (if present), last assistant answer
929
- # We summarize: everything between system prompt and last user message,
930
- # AND all tool result messages (but not the tool_calls message)
931
-
932
- # Case 1: Last assistant answer is directly after last user message
933
- # (no tools were called)
934
- if last_assistant_without_tools_idx == last_user_idx + 1:
935
- # Original behavior: keep from last_user_idx, summarize before
936
- messages_to_keep = self.messages[last_user_idx:]
937
- messages_to_summarize = self.messages[1:last_user_idx]
938
- else:
939
- # Case 2: There are tool interactions between last user and last assistant
940
- # Keep: last user message + entire tool exchange + final answer
941
- # Summarize: everything before last user message
942
- #
943
- # The tail from last_user_idx through last_assistant_without_tools_idx
944
- # is a valid message sequence (user → assistant(tool_calls) → tool results → assistant(answer))
945
- # and must be kept intact to avoid consecutive assistant messages or orphaned tool_call_ids.
946
- messages_to_keep = self.messages[last_user_idx:]
947
- messages_to_summarize = self.messages[1:last_user_idx]
948
-
949
- if not messages_to_summarize:
950
- return None
951
-
952
- # Generate comprehensive summary using extracted context
953
- summary_prompt_content = self._build_summary_prompt(messages_to_summarize)
954
-
955
- # Track token counts before (total tokens including system prompt + messages + tools)
956
- self._update_context_tokens()
957
- tokens_before = self.token_tracker.current_context_tokens
958
-
959
- # Call LLM to generate summary
960
- summary_prompt = [
961
- {
962
- "role": "system",
963
- "content": (
964
- "You are a helpful assistant that summarizes conversation context. "
965
- "Provide clear, concise summaries that capture essential information for continuing work."
966
- ),
967
- },
968
- {
969
- "role": "user",
970
- "content": summary_prompt_content,
971
- },
972
- ]
973
-
974
- try:
975
- response = self.client.chat_completion(summary_prompt, stream=False, tools=None)
976
- except Exception as e:
977
- if console and trigger == "manual":
978
- console.print(f"Compaction failed: {e}", style="red")
979
- return None
980
-
981
- if response is None:
982
- return None
983
-
984
- if isinstance(response, str):
985
- if console and trigger == "manual":
986
- console.print(f"Compaction failed: {response}", style="red")
987
- return None
988
-
989
- try:
990
- summary_text = response["choices"][0]["message"].get("content", "").strip()
991
- except (KeyError, IndexError, TypeError):
992
- summary_text = ""
993
-
994
- if not summary_text:
995
- if console and trigger == "manual":
996
- console.print("Compaction failed: empty summary.", style="red")
997
- return None
998
-
999
- # Build new history: system prompt + summary + recent messages
1000
- summary_message = {
1001
- "role": "system",
1002
- "content": f"Previous conversation context (summarized):\n\n{summary_text}"
1003
- }
1004
-
1005
- self.messages = [self.messages[0]] + [summary_message] + messages_to_keep
1006
-
1007
- # Update token tracking accurately (include system prompt + messages + tools)
1008
- self._update_context_tokens()
1009
- tokens_after = self.token_tracker.current_context_tokens
1010
- provider_cfg = get_provider_config(self.client.provider)
1011
- self.token_tracker.add_usage(
1012
- response,
1013
- model_name=provider_cfg.get("model", ""),
1014
- )
1015
-
1016
- # Update context estimate (keeps cumulative API usage intact)
1017
- self.context_token_estimate = tokens_after
1018
-
1019
- # Notify only for manual trigger
1020
- if console and trigger == "manual":
1021
- reduction = tokens_before - tokens_after
1022
- console.print(
1023
- f"[dim]Compacted history: {tokens_before:,} → {tokens_after:,} tokens "
1024
- f"(-{reduction:,} / {-100 * reduction // (tokens_before or 1)}%)[/dim]"
1025
- )
1026
-
1027
- return {
1028
- "trigger": trigger,
1029
- "before_tokens": tokens_before,
1030
- "after_tokens": tokens_after,
1031
- "summary": summary_text,
1032
- }
1033
-
1034
- def maybe_auto_compact(self, console=None):
1035
- """Check token count and auto-compact if over threshold.
1036
-
1037
- Args:
1038
- console: None for silent operation (no user notification)
1039
- """
1040
- # Check against total context tokens (system prompt + messages + tools)
1041
- self._update_context_tokens()
1042
- total_tokens = self.token_tracker.current_context_tokens
1043
-
1044
- # Skip auto-compaction if locked (tools are actively being executed)
1045
- if self._compaction_locked:
1046
- return
1047
-
1048
- # Skip all compaction if disabled (e.g. sub-agents preserving findings)
1049
- if self._compaction_disabled:
1050
- return
1051
-
1052
- # Use custom threshold if set, otherwise use global setting
1053
- trigger_threshold = (
1054
- self._compact_trigger_tokens
1055
- if self._compact_trigger_tokens is not None
1056
- else context_settings.compact_trigger_tokens
1057
- )
1058
-
1059
- if total_tokens >= trigger_threshold:
1060
- # Auto-compact with optional notification
1061
- result = self.compact_history(console=None, trigger="auto")
1062
- if result and context_settings.notify_auto_compaction and console:
1063
- self._notify_compaction(
1064
- console,
1065
- result["before_tokens"],
1066
- result["after_tokens"],
1067
- "compacted history",
1068
- )
1069
-
1070
- def ensure_context_fits(self, console=None):
1071
- """Ensure context fits within hard_limit_tokens before sending to LLM.
1072
-
1073
- Three-layer escalation strategy:
1074
- 1. Check — if under hard_limit, return immediately (no action)
1075
- 2. Layer 1 — aggressive tool result compaction (non-LLM, fast)
1076
- 3. Layer 2 — AI-based history compaction (slower, more effective)
1077
- 4. Layer 3 — emergency truncation (drop oldest messages)
1078
-
1079
- If _compaction_locked, skip all layers (including truncation) and return
1080
- "locked" — the message list is in intermediate state during tool execution.
1081
-
1082
- Args:
1083
- console: Optional Rich console for debug notifications.
1084
-
1085
- Returns:
1086
- dict with action taken and details, e.g.:
1087
- {"action": "none", "tokens": 120000}
1088
- {"action": "tool_compaction", "tokens": 90000, "reduction": 30000}
1089
- {"action": "history_compaction", "tokens": 70000, "reduction": 50000}
1090
- {"action": "emergency_truncation", "tokens": 150000, "dropped": 5}
1091
- """
1092
- self._update_context_tokens()
1093
- current_tokens = self.token_tracker.current_context_tokens
1094
- hard_limit = context_settings.hard_limit_tokens
1095
-
1096
- # Layer 0: Under limit — no action needed
1097
- if current_tokens < hard_limit:
1098
- return {"action": "none", "tokens": current_tokens}
1099
-
1100
- # Skip all compaction layers if disabled (e.g. sub-agents preserving findings)
1101
- if self._compaction_disabled:
1102
- logger = logging.getLogger(__name__)
1103
- logger.warning(
1104
- "Context (%d tokens) exceeds hard limit (%d) but compaction is disabled — "
1105
- "API call may fail with context-length error",
1106
- current_tokens, hard_limit,
1107
- )
1108
- return {"action": "none", "tokens": current_tokens}
1109
-
1110
- tokens_before = current_tokens
1111
-
1112
- # If compaction is NOT locked, try layers 1 and 2
1113
- if not self._compaction_locked:
1114
- # Layer 1: Aggressive tool result compaction (non-LLM, fast)
1115
- # Use very small token budget and min blocks for aggressive compaction
1116
- self.compact_tool_results(
1117
- skip_token_update=True,
1118
- uncompacted_tail_tokens=10_000,
1119
- min_tool_blocks=1,
1120
- )
1121
-
1122
- self._update_context_tokens()
1123
- current_tokens = self.token_tracker.current_context_tokens
1124
- if current_tokens < hard_limit:
1125
- result = {
1126
- "action": "tool_compaction",
1127
- "tokens": current_tokens,
1128
- "reduction": tokens_before - current_tokens,
1129
- }
1130
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["tool_compaction"])
1131
- return result
1132
-
1133
- # Layer 2: AI-based history compaction
1134
- try:
1135
- result = self.compact_history(console=None, trigger="auto")
1136
- except Exception:
1137
- result = None # Compaction failed, fall through to truncation
1138
-
1139
- if result is not None:
1140
- self._update_context_tokens()
1141
- current_tokens = self.token_tracker.current_context_tokens
1142
- if current_tokens < hard_limit:
1143
- result = {
1144
- "action": "history_compaction",
1145
- "tokens": current_tokens,
1146
- "reduction": tokens_before - current_tokens,
1147
- }
1148
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["history_compaction"])
1149
- return result
1150
-
1151
- # Layer 3: Emergency truncation — drop oldest messages
1152
- # Skip if compaction is locked (tool execution in progress) to avoid
1153
- # corrupting tool_call_id pairing on incomplete message state
1154
- if self._compaction_locked:
1155
- self._update_context_tokens()
1156
- current_tokens = self.token_tracker.current_context_tokens
1157
- return {
1158
- "action": "locked",
1159
- "tokens": current_tokens,
1160
- "reduction": tokens_before - current_tokens,
1161
- }
1162
-
1163
- self._emergency_truncate(hard_limit)
1164
- self._update_context_tokens()
1165
- current_tokens = self.token_tracker.current_context_tokens
1166
-
1167
- result = {
1168
- "action": "emergency_truncation",
1169
- "tokens": current_tokens,
1170
- "reduction": tokens_before - current_tokens,
1171
- }
1172
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["emergency_truncation"])
1173
- return result
1174
-
1175
- def _emergency_truncate(self, target_tokens):
1176
- """Drop oldest non-system messages until context is under target.
1177
-
1178
- Preservation rules:
1179
- - Index 0: system prompt (always kept)
1180
- - Any "Previous conversation context" system messages (compaction summaries)
1181
- - Last 6 messages minimum (recent context)
1182
- - Tool-call integrity: if an assistant message with tool_calls is in the
1183
- protected tail, all its corresponding tool result messages must also be
1184
- in the tail (and vice versa). The protected region is expanded to
1185
- include complete tool blocks.
1186
-
1187
- Args:
1188
- target_tokens: Target token count to get under.
1189
- """
1190
- MIN_TAIL = 6 # Minimum recent messages to preserve
1191
-
1192
- def _is_protected(msg):
1193
- """Check if a message should never be dropped."""
1194
- return msg.get("role", "") == "system"
1195
-
1196
- def _compute_protected_tail(messages):
1197
- """Compute the minimum protected tail index that preserves tool_call pairs.
1198
-
1199
- Start from MIN_TAIL from the end and expand backward if a tool block
1200
- straddles the boundary.
1201
- """
1202
- n = len(messages)
1203
- if n <= MIN_TAIL + 1:
1204
- return 1 # Nothing to drop anyway
1205
-
1206
- tail_start = n - MIN_TAIL
1207
-
1208
- # Scan backward from tail_start to find tool blocks that straddle
1209
- # the boundary and expand to include them.
1210
- changed = True
1211
- while changed:
1212
- changed = False
1213
- # Build set of tool_call_ids that appear in tool messages within
1214
- # the protected tail region
1215
- tool_ids_in_tail = set()
1216
- for i in range(tail_start, n):
1217
- msg = messages[i]
1218
- if msg.get("role") == "tool":
1219
- tcid = msg.get("tool_call_id")
1220
- if tcid:
1221
- tool_ids_in_tail.add(tcid)
1222
-
1223
- # Check if any message just before tail_start has tool_calls
1224
- # that reference those tool_call_ids
1225
- scan = tail_start - 1
1226
- while scan > 0:
1227
- msg = messages[scan]
1228
- if msg.get("role") == "assistant" and msg.get("tool_calls"):
1229
- msg_tool_ids = {
1230
- tc.get("id") for tc in msg["tool_calls"] if tc.get("id")
1231
- }
1232
- if msg_tool_ids & tool_ids_in_tail:
1233
- # This assistant message must be in the protected tail
1234
- tail_start = scan
1235
- changed = True
1236
- # Also add any of its tool_call_ids to the set
1237
- tool_ids_in_tail |= msg_tool_ids
1238
- else:
1239
- break # No overlap, stop scanning backward
1240
- elif msg.get("role") == "tool":
1241
- # A tool message before the assistant — check if its
1242
- # tool_call_id belongs to an assistant in the tail
1243
- tcid = msg.get("tool_call_id")
1244
- if tcid and tcid in tool_ids_in_tail:
1245
- tail_start = scan
1246
- changed = True
1247
- else:
1248
- break
1249
- else:
1250
- break
1251
- scan -= 1
1252
-
1253
- return tail_start
1254
-
1255
- # Drop oldest non-protected messages until under target
1256
- while True:
1257
- self._update_context_tokens()
1258
- if self.token_tracker.current_context_tokens < target_tokens:
1259
- break
1260
-
1261
- tail_start = _compute_protected_tail(self.messages)
1262
- if tail_start <= 1:
1263
- break # Nothing droppable remains
1264
-
1265
- # Find the oldest droppable message (skip index 0 and protected tail)
1266
- dropped = False
1267
- for i in range(1, tail_start):
1268
- if not _is_protected(self.messages[i]):
1269
- self.messages.pop(i)
1270
- dropped = True
1271
- break
1272
-
1273
- if not dropped:
1274
- break # Only protected messages remain in droppable zone
1275
-
1276
- self.sync_log()
1277
-
1278
- def _notify_compaction(self, console, tokens_before, tokens_after, action_label):
1279
- """Show dim notification when auto-compaction takes action.
1280
-
1281
- Args:
1282
- console: Rich console (or None to suppress)
1283
- tokens_before: Token count before compaction
1284
- tokens_after: Token count after compaction
1285
- action_label: Human-readable description of the action taken
1286
- """
1287
- if not context_settings.notify_auto_compaction or not console:
1288
- return
1289
- reduction = tokens_before - tokens_after
1290
- console.print(
1291
- f"[dim]Auto-compacted: {tokens_before:,} → {tokens_after:,} tokens "
1292
- f"({action_label})[/dim]"
1293
- )
1294
-
1295
- def get_gitignore_spec(self, repo_root: Path):
1296
- """Get cached or load PathSpec object for .gitignore filtering.
1297
-
1298
- Caches the spec and reloads if .gitignore is modified.
1299
-
1300
- Args:
1301
- repo_root: Repository root directory
1302
-
1303
- Returns:
1304
- pathspec.PathSpec or None if .gitignore doesn't exist
1305
- """
1306
- gitignore_path = repo_root / ".gitignore"
1307
-
1308
- # Check if we need to reload
1309
- current_mtime = None
1310
- if gitignore_path.exists():
1311
- current_mtime = gitignore_path.stat().st_mtime
1312
-
1313
- # Reload if: (1) not initialized, (2) repo changed, (3) file modified
1314
- if (
1315
- self._gitignore_spec is None
1316
- or self._repo_root != repo_root
1317
- or current_mtime != self._gitignore_mtime
1318
- ):
1319
- from utils.gitignore_filter import load_gitignore_spec
1320
-
1321
- self._repo_root = repo_root
1322
- self._gitignore_mtime = current_mtime
1323
- self._gitignore_spec = load_gitignore_spec(repo_root)
1324
-
1325
- return self._gitignore_spec
1326
-
1327
- def switch_provider(self, provider_name):
1328
- """Switch LLM provider.
1329
-
1330
- Args:
1331
- provider_name: Provider name ('local' or 'openrouter')
1332
-
1333
- Returns:
1334
- str: Result message
1335
- """
1336
- providers = get_providers()
1337
- if provider_name not in providers:
1338
- available = ', '.join(get_provider_display_name(provider) for provider in providers)
1339
- return f"Invalid provider. Use /provider to list. Available: {available}"
1340
-
1341
- previous_provider = self.client.provider
1342
-
1343
- # Terminate server if switching away from local
1344
- if previous_provider == "local" and provider_name != "local":
1345
- self.cleanup()
1346
-
1347
- if self.client.switch_provider(provider_name):
1348
- self._init_messages(reset_costs=True)
1349
- if provider_name == "local":
1350
- server = self.start_server_if_needed()
1351
- if not server:
1352
- # Failed to start server - revert
1353
- self.client.switch_provider(previous_provider)
1354
- self._init_messages(reset_costs=True)
1355
- previous_label = get_provider_display_name(previous_provider)
1356
- return f"Failed to start local server. Reverted to {previous_label} provider."
1357
- self.server_process = server
1358
- provider_label = get_provider_display_name(provider_name)
1359
- return f"Switched to {provider_label} provider (server ready)."
1360
- provider_label = get_provider_display_name(provider_name)
1361
- return f"Switched to {provider_label} provider."
1362
- return "Provider switch failed."
1363
-
1364
- def reload_config(self):
1365
- """Reload configuration from disk and update client.
1366
-
1367
- This should be called after any config change (provider, model, api key).
1368
- """
1369
- reload_config()
1370
- self.client.sync_provider_from_config()
1371
-
1372
- # ===== Config Methods (for agent use) =====
1373
-
1374
- def set_provider(self, provider_name: str) -> str:
1375
- """Set provider for current session (agent-accessible).
1376
-
1377
- Args:
1378
- provider_name: Provider name to switch to.
1379
-
1380
- Returns:
1381
- str: Result message.
1382
- """
1383
- return self.switch_provider(provider_name)
1384
-
1385
- def start_server_if_needed(self):
1386
- """Start local server if using local provider and not already running.
1387
-
1388
- Returns:
1389
- subprocess.Popen: Server process or None
1390
- """
1391
- if self.client.provider == "local" and not self.server_process:
1392
- return self._start_local_server()
1393
- return None
1394
-
1395
- def _start_local_server(self):
1396
- """Start llama-server process and wait for health check.
1397
-
1398
- Returns:
1399
- subprocess.Popen: Server process or None if failed
1400
- """
1401
- from llm.config import get_provider_config, _CONFIG
1402
-
1403
- local_config = get_provider_config("local")
1404
- server_path = _CONFIG.get("LOCAL_SERVER_PATH", local_config["config_keys"]["LOCAL_SERVER_PATH"])
1405
- model_path = local_config.get("model", "")
1406
- host = local_config["extra"]["host"]
1407
- port = local_config["extra"]["port"]
1408
-
1409
- args = [
1410
- server_path,
1411
- "-m", model_path,
1412
- "-ngl", str(server_settings.ngl_layers),
1413
- "--threads", str(server_settings.threads),
1414
- "--batch-size", str(server_settings.batch_size),
1415
- "--ubatch-size", str(server_settings.ubatch_size),
1416
- "--flash-attn" if server_settings.flash_attn else "--no-flash-attn",
1417
- "--split-mode", "none",
1418
- "--ctx-size", str(server_settings.ctx_size),
1419
- "--n-predict", str(server_settings.n_predict),
1420
- "--rope-scale", str(server_settings.rope_scale),
1421
- "--host", host,
1422
- "--port", str(port),
1423
- "--jinja",
1424
- "--reasoning", "off",
1425
- ]
1426
-
1427
- # Restrict to RTX 5070 Ti only (GPU 0)
1428
- env = os.environ.copy()
1429
- env["CUDA_VISIBLE_DEVICES"] = "0"
1430
-
1431
- # Log stderr to file for debugging
1432
- log_path = Path(__file__).resolve().parents[2] / "llama_server.log"
1433
- self._log_file = open(log_path, "w")
1434
-
1435
- process = subprocess.Popen(
1436
- args,
1437
- stdout=subprocess.DEVNULL,
1438
- stderr=self._log_file,
1439
- env=env,
1440
- )
1441
-
1442
- health_url = f"http://{host}:{port}/health"
1443
- for i in range(server_settings.health_check_timeout_sec):
1444
- try:
1445
- r = requests.get(health_url, timeout=2)
1446
- if r.status_code == 200:
1447
- data = r.json()
1448
- if data.get("status") == "ok":
1449
- return process
1450
- except Exception:
1451
- pass
1452
- time.sleep(server_settings.health_check_interval_sec)
1453
-
1454
- # Server failed health check - clean up resources
1455
- if process:
1456
- process.terminate()
1457
- process.wait()
1458
- if self._log_file:
1459
- self._log_file.close()
1460
- self._log_file = None
1461
- return None
1462
-
1463
- def cycle_approve_mode(self) -> str:
1464
- """Cycle to next approval mode.
1465
-
1466
- Returns:
1467
- str: The new approval mode.
1468
- """
1469
- from llm.config import CYCLEABLE_APPROVE_MODES
1470
- modes = CYCLEABLE_APPROVE_MODES
1471
- try:
1472
- next_index = (modes.index(self.approve_mode) + 1) % len(modes)
1473
- except ValueError:
1474
- next_index = 0
1475
- self.approve_mode = modes[next_index]
1476
- return self.approve_mode
1477
-
1478
- def reset_session(self):
1479
- """Reset chat session (clear messages and task list).
1480
-
1481
- This is a public wrapper for _init_messages that also clears
1482
- the in-session task list.
1483
- """
1484
- # End current conversation logging session before reset
1485
- if self.markdown_logger:
1486
- self.markdown_logger.end_session()
1487
-
1488
- self._init_messages(reset_totals=False)
1489
- self.task_list.clear()
1490
- self.task_list_title = None
1491
-
1492
- def log_message(self, message: dict):
1493
- """Log a message to the conversation logger.
1494
-
1495
- Args:
1496
- message: Message dict to log
1497
- """
1498
- if self.markdown_logger:
1499
- self.markdown_logger.log_message(message)
1500
-
1501
- # Log user messages to JSONL for dream memory processing (only if memory enabled)
1502
- if message.get("role") == "user" and message.get("content"):
1503
- from llm.config import MEMORY_SETTINGS
1504
- if MEMORY_SETTINGS.get("enabled", True):
1505
- self.user_message_logger.log_user_message(
1506
- message["content"],
1507
- project_dir=Path.cwd().resolve(),
1508
- )
1509
-
1510
- def sync_log(self):
1511
- """Rewrite the entire conversation log to match current message state.
1512
-
1513
- This should be called after any operation that modifies the messages array:
1514
- - After adding new messages
1515
- - After compaction
1516
- - After mode changes (which modify system prompts)
1517
- """
1518
- if self.markdown_logger:
1519
- self.markdown_logger.rewrite_log(self.messages)
1520
-
1521
- def end_conversation(self):
1522
- """End the current conversation logging session."""
1523
- if self.markdown_logger:
1524
- self.markdown_logger.end_session()
1525
-
1526
- def toggle_logging(self):
1527
- """Toggle conversation logging on/off.
1528
-
1529
- Returns:
1530
- bool: New logging state (True if enabled, False if disabled)
1531
- """
1532
- from utils.logger import MarkdownConversationLogger
1533
-
1534
- if self.markdown_logger:
1535
- # Disable logging
1536
- self.markdown_logger.end_session()
1537
- self.markdown_logger = None
1538
- return False
1539
- else:
1540
- # Enable logging
1541
- self.markdown_logger = MarkdownConversationLogger(
1542
- conversations_dir=context_settings.conversations_dir
1543
- )
1544
- # Start a new session and log current messages
1545
- self.markdown_logger.start_session()
1546
- for msg in self.messages:
1547
- self.markdown_logger.log_message(msg)
1548
- return True
1549
-
1550
- def set_logging(self, enabled: bool) -> bool:
1551
- """Set conversation logging to a specific state.
1552
-
1553
- Args:
1554
- enabled: True to enable logging, False to disable.
1555
-
1556
- Returns:
1557
- bool: The new logging state.
1558
- """
1559
- current_state = self.markdown_logger is not None
1560
- if enabled == current_state:
1561
- return current_state
1562
- return self.toggle_logging()
1563
-
1564
- def cleanup(self):
1565
- """Terminate server process if running."""
1566
- # End conversation session on cleanup
1567
- if self.markdown_logger:
1568
- self.markdown_logger.end_session()
1569
-
1570
- if self.server_process:
1571
- self.server_process.terminate()
1572
- self.server_process.wait()
1573
-
1574
- # Close log file handle if open
1575
- if self._log_file:
1576
- self._log_file.close()
1577
- self._log_file = None