bone-agent 1.3.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/bin/bone.js +39 -0
  2. package/package.json +25 -39
  3. package/LICENSE +0 -21
  4. package/README.md +0 -184
  5. package/bin/npm-wrapper.js +0 -235
  6. package/bin/rg +0 -0
  7. package/bin/rg.exe +0 -0
  8. package/config.yaml.example +0 -141
  9. package/prompts/main/ask_questions.md +0 -31
  10. package/prompts/main/batch_independent_calls.md +0 -5
  11. package/prompts/main/casual_interactions.md +0 -11
  12. package/prompts/main/code_references.md +0 -8
  13. package/prompts/main/communication_style.md +0 -12
  14. package/prompts/main/context_reliability.md +0 -12
  15. package/prompts/main/conversational_tool_calling.md +0 -15
  16. package/prompts/main/dream.md +0 -36
  17. package/prompts/main/editing_pattern.md +0 -13
  18. package/prompts/main/error_handling.md +0 -6
  19. package/prompts/main/exploration_pattern.md +0 -21
  20. package/prompts/main/intro.md +0 -1
  21. package/prompts/main/obsidian.md +0 -16
  22. package/prompts/main/obsidian_project.md +0 -79
  23. package/prompts/main/professional_objectivity.md +0 -3
  24. package/prompts/main/targeted_searching.md +0 -10
  25. package/prompts/main/task_lists_pattern.md +0 -8
  26. package/prompts/main/temp_folder.md +0 -9
  27. package/prompts/main/think_before_acting.md +0 -10
  28. package/prompts/main/tone_and_style.md +0 -4
  29. package/prompts/main/tool_preferences.md +0 -24
  30. package/prompts/main/trust_subagent_context.md +0 -21
  31. package/prompts/main/when_to_use_sub_agent.md +0 -7
  32. package/prompts/micro/ask_questions.md +0 -1
  33. package/prompts/micro/batch_independent_calls.md +0 -1
  34. package/prompts/micro/casual_interactions.md +0 -1
  35. package/prompts/micro/code_references.md +0 -1
  36. package/prompts/micro/communication_style.md +0 -1
  37. package/prompts/micro/context_reliability.md +0 -1
  38. package/prompts/micro/conversational_tool_calling.md +0 -1
  39. package/prompts/micro/editing_pattern.md +0 -1
  40. package/prompts/micro/error_handling.md +0 -1
  41. package/prompts/micro/exploration_pattern.md +0 -1
  42. package/prompts/micro/intro.md +0 -1
  43. package/prompts/micro/obsidian.md +0 -4
  44. package/prompts/micro/obsidian_project.md +0 -5
  45. package/prompts/micro/professional_objectivity.md +0 -1
  46. package/prompts/micro/targeted_searching.md +0 -1
  47. package/prompts/micro/task_lists_pattern.md +0 -1
  48. package/prompts/micro/temp_folder.md +0 -1
  49. package/prompts/micro/think_before_acting.md +0 -5
  50. package/prompts/micro/tone_and_style.md +0 -1
  51. package/prompts/micro/tool_preferences.md +0 -1
  52. package/prompts/micro/trust_subagent_context.md +0 -1
  53. package/prompts/micro/when_to_use_sub_agent.md +0 -1
  54. package/requirements.txt +0 -9
  55. package/src/__init__.py +0 -11
  56. package/src/core/__init__.py +0 -1
  57. package/src/core/agentic.py +0 -985
  58. package/src/core/chat_manager.py +0 -1564
  59. package/src/core/config_manager.py +0 -253
  60. package/src/core/cron.py +0 -582
  61. package/src/core/cron_allowlist.py +0 -118
  62. package/src/core/memory.py +0 -145
  63. package/src/core/retry.py +0 -71
  64. package/src/core/sub_agent.py +0 -326
  65. package/src/core/tool_approval.py +0 -220
  66. package/src/core/tool_feedback.py +0 -778
  67. package/src/exceptions.py +0 -79
  68. package/src/llm/__init__.py +0 -1
  69. package/src/llm/client.py +0 -171
  70. package/src/llm/config.py +0 -492
  71. package/src/llm/prompts.py +0 -489
  72. package/src/llm/providers.py +0 -436
  73. package/src/llm/streaming.py +0 -163
  74. package/src/llm/token_tracker.py +0 -384
  75. package/src/tools/__init__.py +0 -212
  76. package/src/tools/constants.py +0 -59
  77. package/src/tools/create_file.py +0 -136
  78. package/src/tools/directory.py +0 -389
  79. package/src/tools/edit.py +0 -545
  80. package/src/tools/file_reader.py +0 -322
  81. package/src/tools/helpers/__init__.py +0 -105
  82. package/src/tools/helpers/base.py +0 -550
  83. package/src/tools/helpers/converters.py +0 -44
  84. package/src/tools/helpers/file_helpers.py +0 -189
  85. package/src/tools/helpers/formatters.py +0 -411
  86. package/src/tools/helpers/loader.py +0 -231
  87. package/src/tools/helpers/parallel_executor.py +0 -231
  88. package/src/tools/helpers/path_resolver.py +0 -232
  89. package/src/tools/helpers/plugin_manifest.py +0 -156
  90. package/src/tools/obsidian.py +0 -96
  91. package/src/tools/review_sub_agent.py +0 -189
  92. package/src/tools/rg_search.py +0 -460
  93. package/src/tools/search_plugins.py +0 -109
  94. package/src/tools/select_option.py +0 -600
  95. package/src/tools/shell.py +0 -302
  96. package/src/tools/sub_agent.py +0 -139
  97. package/src/tools/task_list.py +0 -269
  98. package/src/tools/web_search.py +0 -61
  99. package/src/ui/__init__.py +0 -1
  100. package/src/ui/banner.py +0 -87
  101. package/src/ui/commands.py +0 -2809
  102. package/src/ui/displays.py +0 -214
  103. package/src/ui/loader.py +0 -284
  104. package/src/ui/main.py +0 -647
  105. package/src/ui/prompt_utils.py +0 -113
  106. package/src/ui/setting_selector.py +0 -590
  107. package/src/ui/setup_wizard.py +0 -294
  108. package/src/ui/sub_agent_panel.py +0 -234
  109. package/src/ui/tool_confirmation.py +0 -215
  110. package/src/utils/__init__.py +0 -1
  111. package/src/utils/citation_parser.py +0 -199
  112. package/src/utils/editor.py +0 -158
  113. package/src/utils/gitignore_filter.py +0 -149
  114. package/src/utils/logger.py +0 -254
  115. package/src/utils/paths.py +0 -30
  116. package/src/utils/result_parsers.py +0 -108
  117. package/src/utils/safe_commands.py +0 -243
  118. package/src/utils/settings.py +0 -191
  119. package/src/utils/user_message_logger.py +0 -120
  120. package/src/utils/validation.py +0 -191
  121. package/src/utils/web_search.py +0 -173
@@ -1,1564 +0,0 @@
1
- """Chat state and server lifecycle management."""
2
-
3
- import os
4
- import json
5
- import logging
6
- import subprocess
7
- import time
8
- import requests
9
- from typing import Optional, IO
10
-
11
- from llm.client import LLMClient
12
- from llm.config import get_providers, get_provider_config, reload_config
13
- from llm.prompts import build_system_prompt
14
- from pathlib import Path
15
- from llm.token_tracker import TokenTracker
16
- from utils.settings import server_settings, context_settings
17
- from utils.logger import MarkdownConversationLogger
18
- from utils.user_message_logger import UserMessageLogger
19
- from utils.result_parsers import extract_exit_code, extract_metadata_from_result
20
-
21
- # Token counting constants
22
- MESSAGE_OVERHEAD_TOKENS = 4 # Approximate tokens for JSON structure: braces, quotes, colons, commas
23
- CHAR_BASED_OVERHEAD = 20 # Character overhead for JSON structure in character-based estimation
24
-
25
- # Action labels for context management notifications (used by ensure_context_fits)
26
- _ACTION_LABELS = {
27
- "tool_compaction": "compacted tool results",
28
- "history_compaction": "compacted history",
29
- "emergency_truncation": "emergency truncation (oldest messages dropped)",
30
- }
31
-
32
- class ChatManager:
33
- """Manages chat state, messages, and provider switching."""
34
-
35
- def __init__(self, compact_trigger_tokens: Optional[int] = None):
36
- # Initialize client with provider from global config
37
- self.client = LLMClient()
38
- self.messages = []
39
- self.server_process: Optional[subprocess.Popen] = None
40
- self._log_file: Optional[IO] = None # Track llama_server log file handle
41
- self.approve_mode = "safe"
42
- self.token_tracker = TokenTracker()
43
- self.context_token_estimate = 0
44
- # In-session, memory-only task list (used in EDIT workflows)
45
- self.task_list = []
46
- self.task_list_title = None
47
-
48
- # .gitignore filtering state
49
- self._gitignore_spec = None
50
- self._gitignore_mtime = None
51
- self._repo_root = None
52
-
53
- # Custom compaction threshold (overrides global context_settings if set)
54
- self._compact_trigger_tokens = compact_trigger_tokens
55
-
56
- # Disable all compaction when True (used by sub-agents to preserve findings)
57
- self._compaction_disabled = False
58
-
59
- # Conversation logging
60
- self.markdown_logger: Optional[MarkdownConversationLogger] = None
61
- if context_settings.log_conversations:
62
- self.markdown_logger = MarkdownConversationLogger(
63
- conversations_dir=context_settings.conversations_dir
64
- )
65
-
66
- # User message logging (always on, for dream memory system)
67
- self.user_message_logger = UserMessageLogger()
68
-
69
- # Compaction lock: prevents compaction during active tool execution
70
- # Set by agentic.py before executing tools, cleared after all results appended
71
- self._compaction_locked = False
72
-
73
- self._init_messages(reset_totals=True)
74
-
75
- def set_compaction_lock(self, locked):
76
- """Set or release the compaction lock.
77
-
78
- When locked, compaction is skipped entirely (no message removal,
79
- no summarization, no truncation). Used during tool execution to
80
- prevent orphaning tool_call_ids.
81
- """
82
- self._compaction_locked = locked
83
-
84
- def _init_messages(self, reset_totals: bool = True, reset_costs: bool = False):
85
- """Initialize message history with system prompt and agents.md as initial exchange.
86
-
87
- Args:
88
- reset_totals: Reset cumulative token counts (default True).
89
- reset_costs: Reset cost accumulators (default False).
90
- Set True on provider switch to clear stale billing state.
91
- Kept False on /clear to preserve cumulative session costs.
92
- """
93
- # Start new conversation logging session
94
- if self.markdown_logger:
95
- self.markdown_logger.start_session()
96
-
97
- # Start with system prompt only
98
- self.messages = [{"role": "system", "content": self._build_system_prompt()}]
99
-
100
- # Add agents.md as initial user/assistant exchange (only if it exists in cwd)
101
- user_msg, assistant_msg = self._load_agents_md()
102
- if user_msg and assistant_msg:
103
- self.messages.append({"role": "user", "content": user_msg})
104
- self.messages.append({"role": "assistant", "content": assistant_msg})
105
-
106
- # Log initial messages
107
- if self.markdown_logger:
108
- for msg in self.messages:
109
- self.markdown_logger.log_message(msg)
110
-
111
- # Reset session totals if requested (keep totals across /clear)
112
- # For a fresh conversation, cumulative totals start at 0 (no API calls made yet)
113
- if reset_totals:
114
- if reset_costs:
115
- self.token_tracker.reset_all()
116
- else:
117
- self.token_tracker.reset(prompt_tokens=0, completion_tokens=0)
118
-
119
- # Always reset conversation tokens (resets on /new and fresh starts)
120
- self.token_tracker.reset_conversation()
121
-
122
- # Initialize context tokens with actual message count (including tools if enabled)
123
- self._update_context_tokens()
124
- self.context_token_estimate = self.token_tracker.current_context_tokens
125
-
126
- def _build_system_prompt(self, variant: str | None = None) -> str:
127
- """Build system prompt.
128
-
129
- Args:
130
- variant: Prompt variant name (e.g. 'main', 'micro').
131
- If None, reads from prompt_settings.
132
- """
133
- if variant is None:
134
- from utils.settings import prompt_settings
135
- variant = prompt_settings.variant
136
- return build_system_prompt(variant)
137
-
138
- def update_system_prompt(self, variant: str | None = None):
139
- """Rebuild system prompt in-place (e.g. after hotswap or session reset).
140
-
141
- Args:
142
- variant: Prompt variant to use. If None, keeps current variant.
143
- Updates token_tracker.current_variant.
144
- """
145
- if not self.messages:
146
- raise RuntimeError("Cannot update system prompt: messages array is empty")
147
-
148
- if self.messages[0]["role"] != "system":
149
- raise RuntimeError(f"Cannot update system prompt: messages[0] has role '{self.messages[0]['role']}', expected 'system'")
150
-
151
- if variant is None:
152
- from utils.settings import prompt_settings
153
- variant = prompt_settings.variant
154
-
155
- self.messages[0]["content"] = self._build_system_prompt(variant)
156
- self.token_tracker.current_variant = variant
157
- self._update_context_tokens()
158
-
159
- def _load_agents_md(self) -> tuple[str, str]:
160
- """Load agents.md content and prepare user/assistant exchange.
161
-
162
- Returns:
163
- tuple: (user_message, assistant_message)
164
- """
165
- # Check for agents.md in current working directory (user's project)
166
- agents_path = Path.cwd() / "agents.md"
167
-
168
- if agents_path.exists():
169
- map_content = agents_path.read_text(encoding="utf-8").strip()
170
- user_msg = (
171
- "Here is the codebase map for this project. "
172
- "This provides an overview of the repository structure and file purposes. "
173
- "Use this as a reference when exploring the codebase.\n\n"
174
- f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
175
- )
176
- assistant_msg = (
177
- "I've received the codebase map. I'll use this as a reference when "
178
- "exploring the repository, but I'll always verify current state by "
179
- "reading files and searching the codebase before making changes."
180
- )
181
- else:
182
- # No codebase map available - skip entirely
183
- user_msg = ""
184
- assistant_msg = ""
185
-
186
- return user_msg, assistant_msg
187
-
188
- def _update_context_tokens(self, tools=None):
189
- """Recount and update current_context_tokens after message changes.
190
-
191
- Args:
192
- tools: Optional list of tool definitions to include in token count.
193
- If None, uses current mode's tools (if enabled).
194
- """
195
- message_tokens = self._count_tokens(self.messages)
196
-
197
- # Count tool tokens if tools are provided or enabled
198
- if tools is None:
199
- from llm.config import TOOLS_ENABLED
200
- if not TOOLS_ENABLED:
201
- self.token_tracker.set_context_tokens(message_tokens)
202
- self.context_token_estimate = message_tokens
203
- return
204
- else:
205
- from tools import TOOLS
206
- tools = TOOLS()
207
-
208
- if tools:
209
- # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
210
- if self.client.provider == "anthropic":
211
- tools_json = json.dumps(tools)
212
- tool_tokens = len(tools_json) // 4
213
- else:
214
- try:
215
- import tiktoken
216
- model = getattr(self.client, "model", "") or ""
217
- try:
218
- enc = tiktoken.encoding_for_model(model)
219
- except Exception:
220
- enc = tiktoken.get_encoding("cl100k_base")
221
-
222
- # Encode tools list as JSON (which is how it's sent to the API)
223
- tools_json = json.dumps(tools)
224
- tool_tokens = len(enc.encode(tools_json))
225
- except Exception:
226
- # Fallback: character-based approximation
227
- tools_json = json.dumps(tools)
228
- tool_tokens = len(tools_json) // 4
229
-
230
- total_tokens = message_tokens + tool_tokens
231
- else:
232
- total_tokens = message_tokens
233
-
234
- self.token_tracker.set_context_tokens(total_tokens)
235
- self.context_token_estimate = total_tokens
236
-
237
- def _collect_message_text(self, msg) -> str:
238
- """Extract all text fields from a message as a single string.
239
-
240
- Collects role, content, tool_calls (id, type, function name/args),
241
- and tool_call_id fields. Used by token counting methods.
242
-
243
- Args:
244
- msg: Message dict
245
-
246
- Returns:
247
- Concatenated string of all message text fields
248
- """
249
- parts = []
250
-
251
- # Role field
252
- role = msg.get('role', '')
253
- if role:
254
- parts.append(role)
255
-
256
- # Content
257
- content = msg.get('content', '')
258
- if content:
259
- parts.append(str(content))
260
-
261
- # Tool calls (assistant messages)
262
- if msg.get('tool_calls'):
263
- for tc in msg['tool_calls']:
264
- # id field (e.g., "call_abc123")
265
- tc_id = tc.get('id', '')
266
- if tc_id:
267
- parts.append(tc_id)
268
-
269
- # type field (usually "function")
270
- tc_type = tc.get('type', 'function')
271
- parts.append(tc_type)
272
-
273
- # function object
274
- fn = tc.get('function', {})
275
- if fn:
276
- fn_name = fn.get('name', '')
277
- if fn_name:
278
- parts.append(fn_name)
279
-
280
- fn_args = fn.get('arguments', '{}')
281
- parts.append(fn_args)
282
-
283
- # Tool call ID (tool messages)
284
- if msg.get('role') == 'tool' and msg.get('tool_call_id'):
285
- parts.append(msg['tool_call_id'])
286
-
287
- return ''.join(p or '' for p in parts)
288
-
289
- def _count_tokens(self, messages) -> int:
290
- """Count tokens accurately using tiktoken for OpenAI, character-based for Anthropic.
291
-
292
- Counts everything the AI receives:
293
- - All message types: user, assistant, system, tool
294
- - All fields: role, content, tool_calls (id, type, function, name, arguments)
295
- - Tool messages: tool_call_id + content
296
-
297
- Args:
298
- messages: List of messages to count tokens for
299
-
300
- Returns:
301
- int: Estimated token count
302
- """
303
- # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
304
- if self.client.provider == "anthropic":
305
- return self._count_tokens_char_based(messages)
306
-
307
- try:
308
- import tiktoken
309
- model = getattr(self.client, "model", "") or ""
310
- try:
311
- enc = tiktoken.encoding_for_model(model)
312
- except Exception:
313
- enc = tiktoken.get_encoding("cl100k_base")
314
-
315
- # Collect text from all messages and encode
316
- total = 0
317
- for msg in messages:
318
- text = self._collect_message_text(msg)
319
- total += len(enc.encode(text))
320
- total += MESSAGE_OVERHEAD_TOKENS
321
-
322
- return total
323
-
324
- except Exception:
325
- # Fallback to character-based estimation
326
- return self._count_tokens_char_based(messages)
327
-
328
- def _count_tokens_char_based(self, messages) -> int:
329
- """Count tokens using character-based approximation (for Anthropic).
330
-
331
- Uses ~4 characters per token as a rough estimate.
332
-
333
- Args:
334
- messages: List of messages to count tokens for
335
-
336
- Returns:
337
- int: Estimated token count
338
- """
339
- total = 0
340
- for msg in messages:
341
- text = self._collect_message_text(msg)
342
- total += (len(text) + CHAR_BASED_OVERHEAD) // 4
343
-
344
- return total
345
-
346
-
347
- def _build_summary_prompt(self, messages) -> str:
348
- """Generate a comprehensive summary of messages.
349
-
350
- Captures:
351
- - User questions asked
352
- - Tool calls performed (files read, edits, searches)
353
- - Key decisions and changes
354
-
355
- Args:
356
- messages: List of messages to summarize
357
-
358
- Returns:
359
- str: Structured summary preserving context
360
- """
361
- # Extract user questions
362
- user_queries = []
363
- for m in messages:
364
- if m.get('role') == 'user':
365
- content = m.get('content', '')
366
- if content and not content.startswith("The codebase map"):
367
- user_queries.append(content)
368
-
369
- # Extract tool calls
370
- tool_calls = []
371
- for m in messages:
372
- if m.get('tool_calls'):
373
- for tc in m['tool_calls']:
374
- fn = tc['function']
375
- name = fn.get('name', '')
376
- args = fn.get('arguments', '')
377
- tool_calls.append(f"- {name}: {args[:100]}")
378
- elif m.get('role') == 'tool':
379
- # Extract tool result metadata
380
- content = m.get('content', '')
381
- if 'exit_code=' in content:
382
- lines = content.split('\n')[:5] # First 5 lines for context
383
- tool_calls.append(f"Result: {'; '.join(lines[:2])}")
384
-
385
- # Build summary prompt
386
- summary_prompt = f"""Summarize the following conversation context.
387
-
388
- User questions:
389
- {chr(10).join(f'- {q}' for q in user_queries) if user_queries else 'None'}
390
-
391
- Tool operations performed:
392
- {chr(10).join(tool_calls) if tool_calls else 'None'}
393
-
394
- Focus on:
395
- 1. What problem was being solved
396
- 2. What files were read or modified
397
- 3. What searches were performed
398
- 4. Key code changes or decisions made
399
- 5. Current state/progress
400
-
401
- Provide a concise summary (2-4 paragraphs) that captures all essential context for continuing the work."""
402
-
403
- return summary_prompt
404
-
405
- # ===== Tool Result Compaction =====
406
-
407
- def _find_tool_blocks(self, include_in_flight=False):
408
- """Find all tool-result blocks in message history.
409
-
410
- Handles both single-turn and multi-turn tool chains:
411
- Single: user → assistant(tc) → tool_results → assistant(answer)
412
- Multi: user → assistant(tc1) → tools → assistant(tc2) → tools → assistant(answer)
413
-
414
- In multi-turn chains, all tool_calls and tool_results are merged into
415
- a single block spanning from the first assistant(tool_calls) to the
416
- final assistant(answer).
417
-
418
- Args:
419
- include_in_flight: If True, also return blocks that lack a final
420
- assistant answer (in-flight tool chains). The 'end' field points
421
- to the index after the last message in the chain (or the breaking
422
- message index if the chain was interrupted).
423
-
424
- Returns:
425
- list: List of block dicts with keys: user_idx, start, end, tool_calls, tool_results
426
- """
427
- blocks = []
428
- i = 0
429
-
430
- while i < len(self.messages):
431
- msg = self.messages[i]
432
-
433
- # Look for assistant message with tool_calls
434
- if msg.get('role') == 'assistant' and msg.get('tool_calls'):
435
-
436
- # Find user question before this
437
- user_idx = i - 1
438
- while user_idx >= 0 and self.messages[user_idx].get('role') != 'user':
439
- user_idx -= 1
440
-
441
- if user_idx < 0:
442
- i += 1
443
- continue
444
-
445
- # Follow consecutive assistant(tool_calls) → tool_results pairs
446
- # until we reach a final answer (assistant without tool_calls)
447
- block_start = i
448
- all_tool_calls = []
449
- all_tool_results = []
450
- j = i
451
- found_end = False
452
-
453
- while j < len(self.messages):
454
- if self.messages[j].get('role') == 'assistant' and self.messages[j].get('tool_calls'):
455
- # Accumulate tool calls from this assistant message
456
- all_tool_calls.extend(self.messages[j].get('tool_calls', []))
457
- # Collect immediately following tool results
458
- k = j + 1
459
- while k < len(self.messages) and self.messages[k].get('role') == 'tool':
460
- all_tool_results.append(self.messages[k].get('content', ''))
461
- k += 1
462
- j = k
463
- elif self.messages[j].get('role') == 'assistant' and not self.messages[j].get('tool_calls'):
464
- # Final answer — this completes the block
465
- found_end = True
466
- break
467
- else:
468
- # Non-tool, non-assistant message breaks the chain
469
- break
470
-
471
- if include_in_flight:
472
- if all_tool_calls:
473
- blocks.append({
474
- 'user_idx': user_idx,
475
- 'start': block_start,
476
- 'end': j,
477
- 'tool_calls': all_tool_calls,
478
- 'tool_results': all_tool_results,
479
- 'in_flight': not found_end,
480
- })
481
- else:
482
- if found_end and all_tool_calls:
483
- blocks.append({
484
- 'user_idx': user_idx,
485
- 'start': block_start,
486
- 'end': j,
487
- 'tool_calls': all_tool_calls,
488
- 'tool_results': all_tool_results,
489
- })
490
-
491
- # Continue scanning from after the final answer (or after the chain)
492
- # Guard: always advance at least one position to prevent infinite loops
493
- i = max(i + 1, j + 1 if found_end else j)
494
- else:
495
- i += 1
496
-
497
- return blocks
498
-
499
- def _get_tool_result_messages(self, start_idx, end_idx):
500
- """Extract only tool result messages between two indices.
501
-
502
- Args:
503
- start_idx: Starting index (exclusive)
504
- end_idx: Ending index (exclusive)
505
-
506
- Returns:
507
- list: Tool result messages (role='tool') between start_idx and end_idx
508
- """
509
- tool_results = []
510
- for i in range(start_idx + 1, end_idx):
511
- if self.messages[i].get('role') == 'tool':
512
- tool_results.append(self.messages[i])
513
- return tool_results
514
-
515
- def _summarize_tool_call(self, tool_call, tool_result):
516
- """Extract key info from a single tool call.
517
-
518
- Args:
519
- tool_call: Tool call dict from message
520
- tool_result: Tool result content string
521
-
522
- Returns:
523
- str: Summary string for this tool
524
- """
525
- try:
526
- import json
527
- fn_name = tool_call['function']['name']
528
- args = json.loads(tool_call['function']['arguments'])
529
- except (json.JSONDecodeError, KeyError):
530
- return "Used a tool"
531
-
532
- if fn_name == "execute_command":
533
- cmd = args.get('command', '')
534
- exit_code = extract_exit_code(tool_result)
535
- matches = extract_metadata_from_result(tool_result, 'matches_found')
536
-
537
- if exit_code == 0:
538
- if matches is not None:
539
- return f"Searched for '{cmd[:50]}...' (found {matches} matches)"
540
- else:
541
- return f"Searched: '{cmd[:50]}...'"
542
- else:
543
- return f"Search failed: '{cmd[:30]}...'"
544
-
545
- elif fn_name == "read_file":
546
- path = args.get('path_str', '')
547
- lines = extract_metadata_from_result(tool_result, 'lines_read')
548
- start_line = extract_metadata_from_result(tool_result, 'start_line')
549
-
550
- if lines is not None:
551
- if start_line is not None and start_line > 1:
552
- end_line = start_line + lines - 1
553
- return f"Read {path} (lines {start_line}-{end_line})"
554
- else:
555
- return f"Read {path} ({lines} lines)"
556
- else:
557
- return f"Read {path}"
558
-
559
- elif fn_name == "list_directory":
560
- path = args.get('path_str', '.')
561
- items = extract_metadata_from_result(tool_result, 'items_count')
562
- recursive = args.get('recursive', False)
563
-
564
- action = "Listed recursively" if recursive else "Listed"
565
- if items is not None:
566
- return f"{action} {path} ({items} items)"
567
- return f"{action} {path}"
568
-
569
- elif fn_name == "edit_file":
570
- path = args.get('path', '')
571
- search = args.get('search', '')
572
- search_preview = search[:30] + "..." if len(search) > 30 else search
573
- return f"Edited {path} (replaced '{search_preview}')"
574
-
575
- elif fn_name == "web_search":
576
- query = args.get('query', '')
577
- results = extract_metadata_from_result(tool_result, 'results_found')
578
- if results is not None:
579
- return f"Searched web for '{query[:40]}...' ({results} results)"
580
- return f"Searched web: '{query[:40]}...'"
581
-
582
- return f"Used {fn_name}"
583
-
584
- def _generate_tool_block_summary(self, tool_calls, tool_results):
585
- """Generate a single summary line for all tools in a block.
586
-
587
- Args:
588
- tool_calls: List of tool call dicts
589
- tool_results: List of tool result strings
590
-
591
- Returns:
592
- str: Human-readable summary
593
- """
594
- # Group tools by type for better readability
595
- searches = []
596
- reads = []
597
- lists = []
598
- edits = []
599
- web = []
600
- failed = []
601
-
602
- for i, tool_call in enumerate(tool_calls):
603
- result = tool_results[i] if i < len(tool_results) else ""
604
- summary = self._summarize_tool_call(tool_call, result)
605
-
606
- if "failed" in summary.lower():
607
- failed.append(summary)
608
- elif "searched" in summary.lower() and "web" not in summary.lower():
609
- searches.append(summary)
610
- elif "read" in summary.lower():
611
- reads.append(summary)
612
- elif "listed" in summary.lower():
613
- lists.append(summary)
614
- elif "edited" in summary.lower():
615
- edits.append(summary)
616
- elif "web" in summary.lower():
617
- web.append(summary)
618
-
619
- # Build human-readable summary
620
- parts = []
621
-
622
- if searches:
623
- count = len(searches)
624
- if count == 1:
625
- parts.append(searches[0])
626
- else:
627
- parts.append(f"performed {count} searches")
628
-
629
- if reads:
630
- if len(reads) == 1:
631
- parts.append(reads[0])
632
- else:
633
- parts.append(f"read {len(reads)} files")
634
-
635
- if lists:
636
- parts.append(lists[0] if len(lists) == 1 else "listed directories")
637
-
638
- if edits:
639
- parts.append(edits[0] if len(edits) == 1 else f"made {len(edits)} edits")
640
-
641
- if web:
642
- parts.append(web[0] if len(web) == 1 else "performed web searches")
643
-
644
- if failed:
645
- parts.append(f"{len(failed)} tool(s) failed")
646
-
647
- if not parts:
648
- return "Used tools for exploration"
649
-
650
- # Join with natural language
651
- if len(parts) <= 2:
652
- return " and ".join(parts) + "."
653
- else:
654
- first = ", ".join(parts[:-1])
655
- return f"{first}, and {parts[-1]}."
656
-
657
- def _estimate_message_tokens(self, msg) -> int:
658
- """Lightweight per-message token estimate for boundary calculation.
659
-
660
- Uses character-based estimation (~4 chars/token) to avoid the overhead
661
- of full tiktoken encoding during boundary walks. Good enough for
662
- determining where to split the uncompacted tail.
663
-
664
- Args:
665
- msg: Message dict
666
-
667
- Returns:
668
- Estimated token count for this message
669
- """
670
- text = self._collect_message_text(msg)
671
- return (len(text) + CHAR_BASED_OVERHEAD) // 4
672
-
673
- def _find_in_flight_boundary(self):
674
- """Find the index where in-flight tool blocks begin.
675
-
676
- Delegates to _find_tool_blocks(include_in_flight=True) to find all
677
- blocks, then returns the earliest start of any in-flight block.
678
- These messages must never be included in the compactable region.
679
-
680
- Returns:
681
- int: Index of the first in-flight message, or len(messages) if none.
682
- """
683
- all_blocks = self._find_tool_blocks(include_in_flight=True)
684
- in_flight = [b for b in all_blocks if b.get('in_flight')]
685
- if in_flight:
686
- return min(b['user_idx'] for b in in_flight)
687
- return len(self.messages)
688
-
689
- def _compute_split_boundary(self, blocks, in_flight_start,
690
- uncompacted_tail_tokens=None, min_tool_blocks=None):
691
- """Compute the message index where the uncompacted tail begins.
692
-
693
- Three constraints determine the boundary (take the most conservative /
694
- earliest index):
695
- 1. Token budget: accumulate from the end until uncompacted_tail_tokens
696
- 2. Minimum tool blocks: preserve at least min_tool_blocks completed blocks
697
- 3. Tool-call integrity: never split inside a tool block
698
- 4. In-flight boundary: never include in-flight tool messages
699
-
700
- Args:
701
- blocks: List of tool block dicts from _find_tool_blocks()
702
- in_flight_start: Index of first in-flight message (from _find_in_flight_boundary)
703
- uncompacted_tail_tokens: Override for the token budget (None = use settings)
704
- min_tool_blocks: Override for minimum tool blocks to preserve (None = use settings)
705
-
706
- Returns:
707
- int: Message index where the uncompacted tail starts
708
- """
709
- tc = context_settings.tool_compaction
710
- token_budget = uncompacted_tail_tokens if uncompacted_tail_tokens is not None else tc.uncompacted_tail_tokens
711
- min_blocks = min_tool_blocks if min_tool_blocks is not None else tc.min_tool_blocks
712
- n = len(self.messages)
713
-
714
- # The verbatim region ends at the first in-flight message (exclusive)
715
- verbatim_end = min(in_flight_start, n)
716
-
717
- # Constraint 1: Token budget — walk from verbatim_end backward.
718
- # Note: range stops at 1 (not 0) so the system prompt is never counted
719
- # toward the budget — it is always preserved uncompacted.
720
- tokens_accumulated = 0
721
- token_boundary = 0
722
- for i in range(verbatim_end - 1, 0, -1):
723
- tokens_accumulated += self._estimate_message_tokens(self.messages[i])
724
- if tokens_accumulated >= token_budget:
725
- token_boundary = i
726
- break
727
- else:
728
- # All messages fit within budget
729
- token_boundary = 1
730
-
731
- # Constraint 2: Minimum tool blocks — ensure at least min_blocks completed
732
- # blocks are within the uncompacted tail. Take the min_blocks most recent
733
- # completed blocks and set the boundary so they all fall at or after it.
734
- min_block_boundary = 1
735
- if min_blocks > 0 and len(blocks) >= min_blocks:
736
- # Sort by end index descending (most recent first), take top min_blocks
737
- sorted_blocks = sorted(blocks, key=lambda b: b['end'], reverse=True)
738
- recent_blocks = sorted_blocks[:min_blocks]
739
- # The boundary must be at or before the earliest user_idx of these blocks
740
- # so that all of them satisfy user_idx >= boundary (i.e. block is fully in the tail)
741
- min_block_boundary = min(b['user_idx'] for b in recent_blocks)
742
-
743
- # Constraint 3: Tool-call integrity — if token_boundary lands inside a
744
- # tool block, extend backward to include the complete block
745
- integrity_boundary = token_boundary
746
- for block in blocks:
747
- if block['user_idx'] < token_boundary <= block['end']:
748
- # Split would cut through this block — extend to include it
749
- integrity_boundary = min(integrity_boundary, block['user_idx'])
750
-
751
- # Take the most conservative (earliest) boundary
752
- # integrity_boundary <= token_boundary always (starts equal, only decreases)
753
- boundary = integrity_boundary
754
- if min_block_boundary < boundary:
755
- boundary = min_block_boundary
756
-
757
- return boundary
758
-
759
- def compact_tool_results(self, skip_token_update=False,
760
- uncompacted_tail_tokens=None, min_tool_blocks=None):
761
- """Replace completed tool-result blocks with summaries using token-budget tail.
762
-
763
- Walks messages from the end, accumulating tokens until ~40k tokens are
764
- reached. Everything before that boundary gets compacted (completed tool
765
- blocks replaced with summary lines). Always preserves at least
766
- min_tool_blocks completed blocks regardless of token budget.
767
-
768
- Safe to call mid-loop (during tool execution) because it only compacts
769
- completed tool blocks — in-flight blocks are never touched.
770
-
771
- Args:
772
- skip_token_update: If True, skip the internal _update_context_tokens()
773
- call. Use when the caller will update tokens with mode-specific
774
- tools immediately after.
775
- uncompacted_tail_tokens: Override for the token budget (None = use settings).
776
- Use for aggressive compaction with a smaller tail.
777
- min_tool_blocks: Override for minimum tool blocks to preserve (None = use settings).
778
- Use for aggressive compaction with fewer preserved blocks.
779
- """
780
- # Skip if disabled (e.g. sub-agents preserving findings)
781
- if self._compaction_disabled:
782
- return
783
-
784
- if not context_settings.tool_compaction.enable_per_message_compaction:
785
- return
786
-
787
- # Safety: Don't compact if very few messages
788
- if len(self.messages) < 6: # Minimum: user+assistant+tool+assistant+user+assistant
789
- return
790
-
791
- # Find completed tool-result blocks
792
- blocks = self._find_tool_blocks()
793
-
794
- if not blocks:
795
- return
796
-
797
- # Find where in-flight tool blocks begin (if any)
798
- in_flight_start = self._find_in_flight_boundary()
799
-
800
- # Compute the split boundary using token budget + constraints
801
- split_boundary = self._compute_split_boundary(
802
- blocks, in_flight_start,
803
- uncompacted_tail_tokens=uncompacted_tail_tokens,
804
- min_tool_blocks=min_tool_blocks,
805
- )
806
-
807
- # Determine which blocks fall entirely before the split boundary
808
- # (those are the ones to compact)
809
- blocks_to_compact = [
810
- b for b in blocks
811
- if b['end'] < split_boundary
812
- ]
813
-
814
- if not blocks_to_compact:
815
- return
816
-
817
- # Build the new message list
818
- new_messages = []
819
- processed_indices = set()
820
-
821
- for i, msg in enumerate(self.messages):
822
- if i in processed_indices:
823
- continue
824
-
825
- # Check if this is the start of a block to compact
826
- block = next((b for b in blocks_to_compact if b['start'] == i), None)
827
-
828
- if block:
829
- # Check if any tool in this block failed
830
- skip_compaction = False
831
- if not context_settings.tool_compaction.compact_failed_tools:
832
- for tool_result in block['tool_results']:
833
- exit_code = extract_exit_code(tool_result)
834
- if exit_code is not None and exit_code != 0:
835
- skip_compaction = True
836
- break
837
-
838
- if skip_compaction:
839
- # Keep this block as-is
840
- for idx in range(block['user_idx'], block['end'] + 1):
841
- new_messages.append(self.messages[idx])
842
- processed_indices.add(idx)
843
- continue
844
-
845
- # Generate summary and replace block
846
- summary = self._generate_tool_block_summary(
847
- block['tool_calls'],
848
- block['tool_results']
849
- )
850
-
851
- # Add user question with summary appended
852
- user_msg = self.messages[block['user_idx']].copy()
853
- user_msg['content'] = user_msg['content'] + f"\n\n[Context: {summary}]"
854
- new_messages.append(user_msg)
855
-
856
- # Add final assistant answer
857
- new_messages.append(self.messages[block['end']])
858
-
859
- # Mark all indices as processed
860
- processed_indices.add(block['user_idx'])
861
- for idx in range(block['start'], block['end'] + 1):
862
- processed_indices.add(idx)
863
- else:
864
- # Keep this message as-is
865
- new_messages.append(msg)
866
-
867
- self.messages = new_messages
868
- if not skip_token_update:
869
- self._update_context_tokens()
870
-
871
- # ===== AI-Based History Compaction =====
872
-
873
- def compact_history(self, console=None, trigger="manual"):
874
- """Compact chat history while preserving recent context.
875
-
876
- Strategy:
877
- 1. Keep last user message verbatim
878
- 2. Keep assistant tool_calls message (if present) for context
879
- 3. Keep last assistant response (without tool calls) verbatim
880
- 4. Summarize everything prior AND all tool result messages
881
-
882
- Args:
883
- console: Console for notifications (None for silent auto-compact)
884
- trigger: "manual" or "auto"
885
-
886
- Returns:
887
- dict with compaction stats or None
888
- """
889
- if len(self.messages) < 10: # Need enough history
890
- return None
891
-
892
- # Find the last user message (start from end, skip system/tool messages)
893
- last_user_idx = None
894
- for i in range(len(self.messages) - 1, -1, -1):
895
- role = self.messages[i].get('role')
896
- # Look for user message that's not the codebase map
897
- if role == 'user' and not self.messages[i].get('tool_calls'):
898
- content = self.messages[i].get('content', '')
899
- if content and not content.startswith("The codebase map"):
900
- last_user_idx = i
901
- break
902
-
903
- if last_user_idx is None or last_user_idx < 3:
904
- return None # Not enough history to compact
905
-
906
- # Find the last assistant message WITHOUT tool calls (final answer)
907
- last_assistant_without_tools_idx = None
908
- for i in range(len(self.messages) - 1, -1, -1):
909
- msg = self.messages[i]
910
- if msg.get('role') == 'assistant' and not msg.get('tool_calls'):
911
- # This is a final answer
912
- last_assistant_without_tools_idx = i
913
- break
914
-
915
- if last_assistant_without_tools_idx is None:
916
- return None # No final answer found
917
-
918
- # Determine what to keep vs summarize
919
- # We always keep: system prompt, last user message, assistant tool_calls (if present), last assistant answer
920
- # We summarize: everything between system prompt and last user message,
921
- # AND all tool result messages (but not the tool_calls message)
922
-
923
- # Case 1: Last assistant answer is directly after last user message
924
- # (no tools were called)
925
- if last_assistant_without_tools_idx == last_user_idx + 1:
926
- # Original behavior: keep from last_user_idx, summarize before
927
- messages_to_keep = self.messages[last_user_idx:]
928
- messages_to_summarize = self.messages[1:last_user_idx]
929
- else:
930
- # Case 2: There are tool interactions between last user and last assistant
931
- # Keep: last user message + entire tool exchange + final answer
932
- # Summarize: everything before last user message
933
- #
934
- # The tail from last_user_idx through last_assistant_without_tools_idx
935
- # is a valid message sequence (user → assistant(tool_calls) → tool results → assistant(answer))
936
- # and must be kept intact to avoid consecutive assistant messages or orphaned tool_call_ids.
937
- messages_to_keep = self.messages[last_user_idx:]
938
- messages_to_summarize = self.messages[1:last_user_idx]
939
-
940
- if not messages_to_summarize:
941
- return None
942
-
943
- # Generate comprehensive summary using extracted context
944
- summary_prompt_content = self._build_summary_prompt(messages_to_summarize)
945
-
946
- # Track token counts before (total tokens including system prompt + messages + tools)
947
- self._update_context_tokens()
948
- tokens_before = self.token_tracker.current_context_tokens
949
-
950
- # Call LLM to generate summary
951
- summary_prompt = [
952
- {
953
- "role": "system",
954
- "content": (
955
- "You are a helpful assistant that summarizes conversation context. "
956
- "Provide clear, concise summaries that capture essential information for continuing work."
957
- ),
958
- },
959
- {
960
- "role": "user",
961
- "content": summary_prompt_content,
962
- },
963
- ]
964
-
965
- try:
966
- response = self.client.chat_completion(summary_prompt, stream=False, tools=None)
967
- except Exception as e:
968
- if console and trigger == "manual":
969
- console.print(f"Compaction failed: {e}", style="red")
970
- return None
971
-
972
- if response is None:
973
- return None
974
-
975
- if isinstance(response, str):
976
- if console and trigger == "manual":
977
- console.print(f"Compaction failed: {response}", style="red")
978
- return None
979
-
980
- try:
981
- summary_text = response["choices"][0]["message"].get("content", "").strip()
982
- except (KeyError, IndexError, TypeError):
983
- summary_text = ""
984
-
985
- if not summary_text:
986
- if console and trigger == "manual":
987
- console.print("Compaction failed: empty summary.", style="red")
988
- return None
989
-
990
- # Build new history: system prompt + summary + recent messages
991
- summary_message = {
992
- "role": "system",
993
- "content": f"Previous conversation context (summarized):\n\n{summary_text}"
994
- }
995
-
996
- self.messages = [self.messages[0]] + [summary_message] + messages_to_keep
997
-
998
- # Update token tracking accurately (include system prompt + messages + tools)
999
- self._update_context_tokens()
1000
- tokens_after = self.token_tracker.current_context_tokens
1001
- provider_cfg = get_provider_config(self.client.provider)
1002
- self.token_tracker.add_usage(
1003
- response,
1004
- model_name=provider_cfg.get("model", ""),
1005
- )
1006
-
1007
- # Update context estimate (keeps cumulative API usage intact)
1008
- self.context_token_estimate = tokens_after
1009
-
1010
- # Notify only for manual trigger
1011
- if console and trigger == "manual":
1012
- reduction = tokens_before - tokens_after
1013
- console.print(
1014
- f"[dim]Compacted history: {tokens_before:,} → {tokens_after:,} tokens "
1015
- f"(-{reduction:,} / {-100 * reduction // (tokens_before or 1)}%)[/dim]"
1016
- )
1017
-
1018
- return {
1019
- "trigger": trigger,
1020
- "before_tokens": tokens_before,
1021
- "after_tokens": tokens_after,
1022
- "summary": summary_text,
1023
- }
1024
-
1025
- def maybe_auto_compact(self, console=None):
1026
- """Check token count and auto-compact if over threshold.
1027
-
1028
- Args:
1029
- console: None for silent operation (no user notification)
1030
- """
1031
- # Check against total context tokens (system prompt + messages + tools)
1032
- self._update_context_tokens()
1033
- total_tokens = self.token_tracker.current_context_tokens
1034
-
1035
- # Skip auto-compaction if locked (tools are actively being executed)
1036
- if self._compaction_locked:
1037
- return
1038
-
1039
- # Skip all compaction if disabled (e.g. sub-agents preserving findings)
1040
- if self._compaction_disabled:
1041
- return
1042
-
1043
- # Use custom threshold if set, otherwise use global setting
1044
- trigger_threshold = (
1045
- self._compact_trigger_tokens
1046
- if self._compact_trigger_tokens is not None
1047
- else context_settings.compact_trigger_tokens
1048
- )
1049
-
1050
- if total_tokens >= trigger_threshold:
1051
- # Auto-compact with optional notification
1052
- result = self.compact_history(console=None, trigger="auto")
1053
- if result and context_settings.notify_auto_compaction and console:
1054
- self._notify_compaction(
1055
- console,
1056
- result["before_tokens"],
1057
- result["after_tokens"],
1058
- "compacted history",
1059
- )
1060
-
1061
- def ensure_context_fits(self, console=None):
1062
- """Ensure context fits within hard_limit_tokens before sending to LLM.
1063
-
1064
- Three-layer escalation strategy:
1065
- 1. Check — if under hard_limit, return immediately (no action)
1066
- 2. Layer 1 — aggressive tool result compaction (non-LLM, fast)
1067
- 3. Layer 2 — AI-based history compaction (slower, more effective)
1068
- 4. Layer 3 — emergency truncation (drop oldest messages)
1069
-
1070
- If _compaction_locked, skip all layers (including truncation) and return
1071
- "locked" — the message list is in intermediate state during tool execution.
1072
-
1073
- Args:
1074
- console: Optional Rich console for debug notifications.
1075
-
1076
- Returns:
1077
- dict with action taken and details, e.g.:
1078
- {"action": "none", "tokens": 120000}
1079
- {"action": "tool_compaction", "tokens": 90000, "reduction": 30000}
1080
- {"action": "history_compaction", "tokens": 70000, "reduction": 50000}
1081
- {"action": "emergency_truncation", "tokens": 150000, "dropped": 5}
1082
- """
1083
- self._update_context_tokens()
1084
- current_tokens = self.token_tracker.current_context_tokens
1085
- hard_limit = context_settings.hard_limit_tokens
1086
-
1087
- # Layer 0: Under limit — no action needed
1088
- if current_tokens < hard_limit:
1089
- return {"action": "none", "tokens": current_tokens}
1090
-
1091
- # Skip all compaction layers if disabled (e.g. sub-agents preserving findings)
1092
- if self._compaction_disabled:
1093
- logger = logging.getLogger(__name__)
1094
- logger.warning(
1095
- "Context (%d tokens) exceeds hard limit (%d) but compaction is disabled — "
1096
- "API call may fail with context-length error",
1097
- current_tokens, hard_limit,
1098
- )
1099
- return {"action": "none", "tokens": current_tokens}
1100
-
1101
- tokens_before = current_tokens
1102
-
1103
- # If compaction is NOT locked, try layers 1 and 2
1104
- if not self._compaction_locked:
1105
- # Layer 1: Aggressive tool result compaction (non-LLM, fast)
1106
- # Use very small token budget and min blocks for aggressive compaction
1107
- self.compact_tool_results(
1108
- skip_token_update=True,
1109
- uncompacted_tail_tokens=10_000,
1110
- min_tool_blocks=1,
1111
- )
1112
-
1113
- self._update_context_tokens()
1114
- current_tokens = self.token_tracker.current_context_tokens
1115
- if current_tokens < hard_limit:
1116
- result = {
1117
- "action": "tool_compaction",
1118
- "tokens": current_tokens,
1119
- "reduction": tokens_before - current_tokens,
1120
- }
1121
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["tool_compaction"])
1122
- return result
1123
-
1124
- # Layer 2: AI-based history compaction
1125
- try:
1126
- result = self.compact_history(console=None, trigger="auto")
1127
- except Exception:
1128
- result = None # Compaction failed, fall through to truncation
1129
-
1130
- if result is not None:
1131
- self._update_context_tokens()
1132
- current_tokens = self.token_tracker.current_context_tokens
1133
- if current_tokens < hard_limit:
1134
- result = {
1135
- "action": "history_compaction",
1136
- "tokens": current_tokens,
1137
- "reduction": tokens_before - current_tokens,
1138
- }
1139
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["history_compaction"])
1140
- return result
1141
-
1142
- # Layer 3: Emergency truncation — drop oldest messages
1143
- # Skip if compaction is locked (tool execution in progress) to avoid
1144
- # corrupting tool_call_id pairing on incomplete message state
1145
- if self._compaction_locked:
1146
- self._update_context_tokens()
1147
- current_tokens = self.token_tracker.current_context_tokens
1148
- return {
1149
- "action": "locked",
1150
- "tokens": current_tokens,
1151
- "reduction": tokens_before - current_tokens,
1152
- }
1153
-
1154
- self._emergency_truncate(hard_limit)
1155
- self._update_context_tokens()
1156
- current_tokens = self.token_tracker.current_context_tokens
1157
-
1158
- result = {
1159
- "action": "emergency_truncation",
1160
- "tokens": current_tokens,
1161
- "reduction": tokens_before - current_tokens,
1162
- }
1163
- self._notify_compaction(console, tokens_before, current_tokens, _ACTION_LABELS["emergency_truncation"])
1164
- return result
1165
-
1166
- def _emergency_truncate(self, target_tokens):
1167
- """Drop oldest non-system messages until context is under target.
1168
-
1169
- Preservation rules:
1170
- - Index 0: system prompt (always kept)
1171
- - Any "Previous conversation context" system messages (compaction summaries)
1172
- - Last 6 messages minimum (recent context)
1173
- - Tool-call integrity: if an assistant message with tool_calls is in the
1174
- protected tail, all its corresponding tool result messages must also be
1175
- in the tail (and vice versa). The protected region is expanded to
1176
- include complete tool blocks.
1177
-
1178
- Args:
1179
- target_tokens: Target token count to get under.
1180
- """
1181
- MIN_TAIL = 6 # Minimum recent messages to preserve
1182
-
1183
- def _is_protected(msg):
1184
- """Check if a message should never be dropped."""
1185
- return msg.get("role", "") == "system"
1186
-
1187
- def _compute_protected_tail(messages):
1188
- """Compute the minimum protected tail index that preserves tool_call pairs.
1189
-
1190
- Start from MIN_TAIL from the end and expand backward if a tool block
1191
- straddles the boundary.
1192
- """
1193
- n = len(messages)
1194
- if n <= MIN_TAIL + 1:
1195
- return 1 # Nothing to drop anyway
1196
-
1197
- tail_start = n - MIN_TAIL
1198
-
1199
- # Scan backward from tail_start to find tool blocks that straddle
1200
- # the boundary and expand to include them.
1201
- changed = True
1202
- while changed:
1203
- changed = False
1204
- # Build set of tool_call_ids that appear in tool messages within
1205
- # the protected tail region
1206
- tool_ids_in_tail = set()
1207
- for i in range(tail_start, n):
1208
- msg = messages[i]
1209
- if msg.get("role") == "tool":
1210
- tcid = msg.get("tool_call_id")
1211
- if tcid:
1212
- tool_ids_in_tail.add(tcid)
1213
-
1214
- # Check if any message just before tail_start has tool_calls
1215
- # that reference those tool_call_ids
1216
- scan = tail_start - 1
1217
- while scan > 0:
1218
- msg = messages[scan]
1219
- if msg.get("role") == "assistant" and msg.get("tool_calls"):
1220
- msg_tool_ids = {
1221
- tc.get("id") for tc in msg["tool_calls"] if tc.get("id")
1222
- }
1223
- if msg_tool_ids & tool_ids_in_tail:
1224
- # This assistant message must be in the protected tail
1225
- tail_start = scan
1226
- changed = True
1227
- # Also add any of its tool_call_ids to the set
1228
- tool_ids_in_tail |= msg_tool_ids
1229
- else:
1230
- break # No overlap, stop scanning backward
1231
- elif msg.get("role") == "tool":
1232
- # A tool message before the assistant — check if its
1233
- # tool_call_id belongs to an assistant in the tail
1234
- tcid = msg.get("tool_call_id")
1235
- if tcid and tcid in tool_ids_in_tail:
1236
- tail_start = scan
1237
- changed = True
1238
- else:
1239
- break
1240
- else:
1241
- break
1242
- scan -= 1
1243
-
1244
- return tail_start
1245
-
1246
- # Drop oldest non-protected messages until under target
1247
- while True:
1248
- self._update_context_tokens()
1249
- if self.token_tracker.current_context_tokens < target_tokens:
1250
- break
1251
-
1252
- tail_start = _compute_protected_tail(self.messages)
1253
- if tail_start <= 1:
1254
- break # Nothing droppable remains
1255
-
1256
- # Find the oldest droppable message (skip index 0 and protected tail)
1257
- dropped = False
1258
- for i in range(1, tail_start):
1259
- if not _is_protected(self.messages[i]):
1260
- self.messages.pop(i)
1261
- dropped = True
1262
- break
1263
-
1264
- if not dropped:
1265
- break # Only protected messages remain in droppable zone
1266
-
1267
- self.sync_log()
1268
-
1269
- def _notify_compaction(self, console, tokens_before, tokens_after, action_label):
1270
- """Show dim notification when auto-compaction takes action.
1271
-
1272
- Args:
1273
- console: Rich console (or None to suppress)
1274
- tokens_before: Token count before compaction
1275
- tokens_after: Token count after compaction
1276
- action_label: Human-readable description of the action taken
1277
- """
1278
- if not context_settings.notify_auto_compaction or not console:
1279
- return
1280
- reduction = tokens_before - tokens_after
1281
- console.print(
1282
- f"[dim]Auto-compacted: {tokens_before:,} → {tokens_after:,} tokens "
1283
- f"({action_label})[/dim]"
1284
- )
1285
-
1286
- def get_gitignore_spec(self, repo_root: Path):
1287
- """Get cached or load PathSpec object for .gitignore filtering.
1288
-
1289
- Caches the spec and reloads if .gitignore is modified.
1290
-
1291
- Args:
1292
- repo_root: Repository root directory
1293
-
1294
- Returns:
1295
- pathspec.PathSpec or None if .gitignore doesn't exist
1296
- """
1297
- gitignore_path = repo_root / ".gitignore"
1298
-
1299
- # Check if we need to reload
1300
- current_mtime = None
1301
- if gitignore_path.exists():
1302
- current_mtime = gitignore_path.stat().st_mtime
1303
-
1304
- # Reload if: (1) not initialized, (2) repo changed, (3) file modified
1305
- if (
1306
- self._gitignore_spec is None
1307
- or self._repo_root != repo_root
1308
- or current_mtime != self._gitignore_mtime
1309
- ):
1310
- from utils.gitignore_filter import load_gitignore_spec
1311
-
1312
- self._repo_root = repo_root
1313
- self._gitignore_mtime = current_mtime
1314
- self._gitignore_spec = load_gitignore_spec(repo_root)
1315
-
1316
- return self._gitignore_spec
1317
-
1318
- def switch_provider(self, provider_name):
1319
- """Switch LLM provider.
1320
-
1321
- Args:
1322
- provider_name: Provider name ('local' or 'openrouter')
1323
-
1324
- Returns:
1325
- str: Result message
1326
- """
1327
- providers = get_providers()
1328
- if provider_name not in providers:
1329
- return f"Invalid provider. Use /provider to list. Available: {', '.join(providers)}"
1330
-
1331
- previous_provider = self.client.provider
1332
-
1333
- # Terminate server if switching away from local
1334
- if previous_provider == "local" and provider_name != "local":
1335
- self.cleanup()
1336
-
1337
- if self.client.switch_provider(provider_name):
1338
- self._init_messages(reset_costs=True)
1339
- if provider_name == "local":
1340
- server = self.start_server_if_needed()
1341
- if not server:
1342
- # Failed to start server - revert
1343
- self.client.switch_provider(previous_provider)
1344
- self._init_messages(reset_costs=True)
1345
- return f"Failed to start local server. Reverted to {previous_provider} provider."
1346
- self.server_process = server
1347
- return f"Switched to {provider_name} provider (server ready)."
1348
- return f"Switched to {provider_name} provider."
1349
- return "Provider switch failed."
1350
-
1351
- def reload_config(self):
1352
- """Reload configuration from disk and update client.
1353
-
1354
- This should be called after any config change (provider, model, api key).
1355
- """
1356
- reload_config()
1357
- self.client.sync_provider_from_config()
1358
-
1359
- # ===== Config Methods (for agent use) =====
1360
-
1361
- def set_provider(self, provider_name: str) -> str:
1362
- """Set provider for current session (agent-accessible).
1363
-
1364
- Args:
1365
- provider_name: Provider name to switch to.
1366
-
1367
- Returns:
1368
- str: Result message.
1369
- """
1370
- return self.switch_provider(provider_name)
1371
-
1372
- def start_server_if_needed(self):
1373
- """Start local server if using local provider and not already running.
1374
-
1375
- Returns:
1376
- subprocess.Popen: Server process or None
1377
- """
1378
- if self.client.provider == "local" and not self.server_process:
1379
- return self._start_local_server()
1380
- return None
1381
-
1382
- def _start_local_server(self):
1383
- """Start llama-server process and wait for health check.
1384
-
1385
- Returns:
1386
- subprocess.Popen: Server process or None if failed
1387
- """
1388
- from llm.config import get_provider_config, _CONFIG
1389
-
1390
- local_config = get_provider_config("local")
1391
- server_path = _CONFIG.get("LOCAL_SERVER_PATH", local_config["config_keys"]["LOCAL_SERVER_PATH"])
1392
- model_path = local_config.get("model", "")
1393
- host = local_config["extra"]["host"]
1394
- port = local_config["extra"]["port"]
1395
-
1396
- args = [
1397
- server_path,
1398
- "-m", model_path,
1399
- "-ngl", str(server_settings.ngl_layers),
1400
- "--threads", str(server_settings.threads),
1401
- "--batch-size", str(server_settings.batch_size),
1402
- "--ubatch-size", str(server_settings.ubatch_size),
1403
- "--flash-attn" if server_settings.flash_attn else "--no-flash-attn",
1404
- "--split-mode", "none",
1405
- "--ctx-size", str(server_settings.ctx_size),
1406
- "--n-predict", str(server_settings.n_predict),
1407
- "--rope-scale", str(server_settings.rope_scale),
1408
- "--host", host,
1409
- "--port", str(port),
1410
- "--jinja",
1411
- "--reasoning", "off",
1412
- ]
1413
-
1414
- # Restrict to RTX 5070 Ti only (GPU 0)
1415
- env = os.environ.copy()
1416
- env["CUDA_VISIBLE_DEVICES"] = "0"
1417
-
1418
- # Log stderr to file for debugging
1419
- log_path = Path(__file__).resolve().parents[2] / "llama_server.log"
1420
- self._log_file = open(log_path, "w")
1421
-
1422
- process = subprocess.Popen(
1423
- args,
1424
- stdout=subprocess.DEVNULL,
1425
- stderr=self._log_file,
1426
- env=env,
1427
- )
1428
-
1429
- health_url = f"http://{host}:{port}/health"
1430
- for i in range(server_settings.health_check_timeout_sec):
1431
- try:
1432
- r = requests.get(health_url, timeout=2)
1433
- if r.status_code == 200:
1434
- data = r.json()
1435
- if data.get("status") == "ok":
1436
- return process
1437
- except Exception:
1438
- pass
1439
- time.sleep(server_settings.health_check_interval_sec)
1440
-
1441
- # Server failed health check - clean up resources
1442
- if process:
1443
- process.terminate()
1444
- process.wait()
1445
- if self._log_file:
1446
- self._log_file.close()
1447
- self._log_file = None
1448
- return None
1449
-
1450
- def cycle_approve_mode(self) -> str:
1451
- """Cycle to next approval mode.
1452
-
1453
- Returns:
1454
- str: The new approval mode.
1455
- """
1456
- from llm.config import CYCLEABLE_APPROVE_MODES
1457
- modes = CYCLEABLE_APPROVE_MODES
1458
- try:
1459
- next_index = (modes.index(self.approve_mode) + 1) % len(modes)
1460
- except ValueError:
1461
- next_index = 0
1462
- self.approve_mode = modes[next_index]
1463
- return self.approve_mode
1464
-
1465
- def reset_session(self):
1466
- """Reset chat session (clear messages and task list).
1467
-
1468
- This is a public wrapper for _init_messages that also clears
1469
- the in-session task list.
1470
- """
1471
- # End current conversation logging session before reset
1472
- if self.markdown_logger:
1473
- self.markdown_logger.end_session()
1474
-
1475
- self._init_messages(reset_totals=False)
1476
- self.task_list.clear()
1477
- self.task_list_title = None
1478
-
1479
- def log_message(self, message: dict):
1480
- """Log a message to the conversation logger.
1481
-
1482
- Args:
1483
- message: Message dict to log
1484
- """
1485
- if self.markdown_logger:
1486
- self.markdown_logger.log_message(message)
1487
-
1488
- # Log user messages to JSONL for dream memory processing (only if memory enabled)
1489
- if message.get("role") == "user" and message.get("content"):
1490
- from llm.config import MEMORY_SETTINGS
1491
- if MEMORY_SETTINGS.get("enabled", True):
1492
- self.user_message_logger.log_user_message(
1493
- message["content"],
1494
- project_dir=Path.cwd().resolve(),
1495
- )
1496
-
1497
- def sync_log(self):
1498
- """Rewrite the entire conversation log to match current message state.
1499
-
1500
- This should be called after any operation that modifies the messages array:
1501
- - After adding new messages
1502
- - After compaction
1503
- - After mode changes (which modify system prompts)
1504
- """
1505
- if self.markdown_logger:
1506
- self.markdown_logger.rewrite_log(self.messages)
1507
-
1508
- def end_conversation(self):
1509
- """End the current conversation logging session."""
1510
- if self.markdown_logger:
1511
- self.markdown_logger.end_session()
1512
-
1513
- def toggle_logging(self):
1514
- """Toggle conversation logging on/off.
1515
-
1516
- Returns:
1517
- bool: New logging state (True if enabled, False if disabled)
1518
- """
1519
- from utils.logger import MarkdownConversationLogger
1520
-
1521
- if self.markdown_logger:
1522
- # Disable logging
1523
- self.markdown_logger.end_session()
1524
- self.markdown_logger = None
1525
- return False
1526
- else:
1527
- # Enable logging
1528
- self.markdown_logger = MarkdownConversationLogger(
1529
- conversations_dir=context_settings.conversations_dir
1530
- )
1531
- # Start a new session and log current messages
1532
- self.markdown_logger.start_session()
1533
- for msg in self.messages:
1534
- self.markdown_logger.log_message(msg)
1535
- return True
1536
-
1537
- def set_logging(self, enabled: bool) -> bool:
1538
- """Set conversation logging to a specific state.
1539
-
1540
- Args:
1541
- enabled: True to enable logging, False to disable.
1542
-
1543
- Returns:
1544
- bool: The new logging state.
1545
- """
1546
- current_state = self.markdown_logger is not None
1547
- if enabled == current_state:
1548
- return current_state
1549
- return self.toggle_logging()
1550
-
1551
- def cleanup(self):
1552
- """Terminate server process if running."""
1553
- # End conversation session on cleanup
1554
- if self.markdown_logger:
1555
- self.markdown_logger.end_session()
1556
-
1557
- if self.server_process:
1558
- self.server_process.terminate()
1559
- self.server_process.wait()
1560
-
1561
- # Close log file handle if open
1562
- if self._log_file:
1563
- self._log_file.close()
1564
- self._log_file = None