bone-agent 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +184 -0
  3. package/bin/npm-wrapper.js +235 -0
  4. package/bin/rg +0 -0
  5. package/bin/rg.exe +0 -0
  6. package/config.yaml.example +133 -0
  7. package/package.json +53 -0
  8. package/requirements.txt +9 -0
  9. package/src/__init__.py +11 -0
  10. package/src/core/__init__.py +1 -0
  11. package/src/core/agentic.py +1054 -0
  12. package/src/core/chat_manager.py +1552 -0
  13. package/src/core/config_manager.py +247 -0
  14. package/src/core/cron.py +527 -0
  15. package/src/core/cron_allowlist.py +118 -0
  16. package/src/core/memory.py +232 -0
  17. package/src/core/retry.py +71 -0
  18. package/src/core/sub_agent.py +326 -0
  19. package/src/core/tool_approval.py +220 -0
  20. package/src/core/tool_feedback.py +778 -0
  21. package/src/exceptions.py +79 -0
  22. package/src/llm/__init__.py +1 -0
  23. package/src/llm/client.py +171 -0
  24. package/src/llm/config.py +466 -0
  25. package/src/llm/prompts.py +735 -0
  26. package/src/llm/providers.py +417 -0
  27. package/src/llm/streaming.py +163 -0
  28. package/src/llm/token_tracker.py +368 -0
  29. package/src/tools/__init__.py +212 -0
  30. package/src/tools/constants.py +59 -0
  31. package/src/tools/create_file.py +136 -0
  32. package/src/tools/directory.py +389 -0
  33. package/src/tools/edit.py +543 -0
  34. package/src/tools/file_reader.py +322 -0
  35. package/src/tools/helpers/__init__.py +105 -0
  36. package/src/tools/helpers/base.py +550 -0
  37. package/src/tools/helpers/converters.py +44 -0
  38. package/src/tools/helpers/file_helpers.py +189 -0
  39. package/src/tools/helpers/formatters.py +411 -0
  40. package/src/tools/helpers/loader.py +231 -0
  41. package/src/tools/helpers/parallel_executor.py +231 -0
  42. package/src/tools/helpers/path_resolver.py +226 -0
  43. package/src/tools/helpers/plugin_manifest.py +156 -0
  44. package/src/tools/obsidian.py +96 -0
  45. package/src/tools/review_sub_agent.py +189 -0
  46. package/src/tools/rg_search.py +393 -0
  47. package/src/tools/search_plugins.py +109 -0
  48. package/src/tools/select_option.py +593 -0
  49. package/src/tools/shell.py +302 -0
  50. package/src/tools/sub_agent.py +139 -0
  51. package/src/tools/task_list.py +269 -0
  52. package/src/tools/web_search.py +61 -0
  53. package/src/ui/__init__.py +1 -0
  54. package/src/ui/banner.py +87 -0
  55. package/src/ui/commands.py +2694 -0
  56. package/src/ui/displays.py +213 -0
  57. package/src/ui/loader.py +284 -0
  58. package/src/ui/main.py +646 -0
  59. package/src/ui/prompt_utils.py +113 -0
  60. package/src/ui/setting_selector.py +590 -0
  61. package/src/ui/setup_wizard.py +294 -0
  62. package/src/ui/sub_agent_panel.py +234 -0
  63. package/src/ui/tool_confirmation.py +215 -0
  64. package/src/utils/__init__.py +1 -0
  65. package/src/utils/citation_parser.py +199 -0
  66. package/src/utils/editor.py +158 -0
  67. package/src/utils/gitignore_filter.py +149 -0
  68. package/src/utils/logger.py +254 -0
  69. package/src/utils/paths.py +30 -0
  70. package/src/utils/result_parsers.py +108 -0
  71. package/src/utils/safe_commands.py +243 -0
  72. package/src/utils/settings.py +174 -0
  73. package/src/utils/validation.py +191 -0
  74. package/src/utils/web_search.py +173 -0
@@ -0,0 +1,232 @@
1
+ """Multi-layer memory system for the agent.
2
+
3
+ Two-layer persistent memory:
4
+ - User memory (global): ~/.bone/user_memory.md
5
+ - Project memory (per-repo): {repo_root}/.bone/agents.md
6
+
7
+ The agent writes to these files via edit_file (auto-approved, fire-and-forget).
8
+ Memory content is injected into the system prompt on every conversation start.
9
+ """
10
+
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Capacity constants (prompt-enforced, no code enforcement)
18
+ CHAR_LIMIT = 1500 # suggested chars per layer (~500 tokens)
19
+ SECTION_LIMIT = 8 # suggested max sections per layer
20
+ ENTRY_LIMIT = 20 # suggested max entries per section
21
+
22
+
23
+ class MemoryManager:
24
+ """Manages two-layer memory: user-level (global) and project-level (per-repo).
25
+
26
+ Uses a lazy singleton pattern — first call with repo_root bootstraps the
27
+ instance, subsequent calls reuse it. Call reset() when switching repos.
28
+ """
29
+
30
+ _instance: Optional["MemoryManager"] = None
31
+
32
+ def __init__(self, repo_root: Path):
33
+ self.repo_root = repo_root
34
+ self.user_memory_path = Path.home() / ".bone" / "user_memory.md"
35
+ self.project_memory_path = repo_root / ".bone" / "agents.md"
36
+
37
+ @classmethod
38
+ def get_instance(cls, repo_root: Path = None) -> Optional["MemoryManager"]:
39
+ """Lazy singleton. First call sets repo_root, subsequent calls reuse instance.
40
+
41
+ Args:
42
+ repo_root: Path to repository root. Required on first call,
43
+ ignored on subsequent calls (until reset()).
44
+
45
+ Returns:
46
+ MemoryManager instance, or None if no repo_root provided and
47
+ no instance has been initialized yet.
48
+ """
49
+ if cls._instance is not None:
50
+ return cls._instance
51
+ if repo_root is None:
52
+ return None
53
+ cls._instance = cls(repo_root)
54
+ return cls._instance
55
+
56
+ @classmethod
57
+ def reset(cls) -> None:
58
+ """Clear singleton. Called when switching repos via /cd."""
59
+ cls._instance = None
60
+
61
+ def ensure_exists(self) -> None:
62
+ """Create user-level directory and memory file only.
63
+
64
+ Project-level .bone/agents.md is created lazily on first write,
65
+ not at startup. This prevents creating .bone/ directories in
66
+ non-project locations (e.g. when running from ~/.bone/ itself).
67
+ """
68
+ self._ensure_dir_and_file(
69
+ self.user_memory_path,
70
+ "# User Memory\n\n",
71
+ )
72
+ # Add .bone/ to .gitignore if repo_root has a git repo
73
+ self._ensure_gitignore()
74
+
75
+ def load_user_memory(self) -> str:
76
+ """Read and return user memory file content. Returns empty string if missing."""
77
+ return self._read_file(self.user_memory_path)
78
+
79
+ def load_project_memory(self) -> str:
80
+ """Read and return project memory file content. Returns empty string if missing."""
81
+ return self._read_file(self.project_memory_path)
82
+
83
+ def load_all(self) -> str:
84
+ """Load both layers, combined for prompt injection."""
85
+ parts = []
86
+ user = self.load_user_memory()
87
+ project = self.load_project_memory()
88
+ if user.strip():
89
+ parts.append(user.strip())
90
+ if project.strip():
91
+ parts.append(project.strip())
92
+ return "\n\n".join(parts)
93
+
94
+ def get_user_usage(self) -> dict:
95
+ """Return {chars_used, chars_limit} for user memory."""
96
+ content = self.load_user_memory()
97
+ return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
98
+
99
+ def get_project_usage(self) -> dict:
100
+ """Return {chars_used, chars_limit} for project memory."""
101
+ content = self.load_project_memory()
102
+ return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
103
+
104
+ def get_prompt_section(self) -> str:
105
+ """Build the full memory system prompt section.
106
+
107
+ Includes:
108
+ - Guidelines text with resolved file paths
109
+ - Capacity headers and memory content (if files have entries beyond headers)
110
+
111
+ Returns:
112
+ Complete prompt section string. Includes guidelines even when
113
+ memory files are empty (just headers). Returns guidelines with
114
+ placeholder paths if no MemoryManager instance exists.
115
+ """
116
+ user_path = str(self.user_memory_path)
117
+ project_path = str(self.project_memory_path)
118
+
119
+ lines = [
120
+ "## Memory System",
121
+ "",
122
+ "You have a two-layer memory system that persists across conversations:",
123
+ f"- User memory (global): {user_path} — preferences, identity, work patterns",
124
+ f"- Project memory (per-repo): {project_path} — context, conventions, decisions, current work",
125
+ "",
126
+ "Both memory layers are loaded into this prompt at conversation start. "
127
+ "You can already see all memories below.",
128
+ "",
129
+ "To save information, use `edit_file` to write directly to the memory files. "
130
+ "These edits are auto-approved and run silently.",
131
+ "Add a timestamp in parentheses: `*(YYYY-MM-DD)*`",
132
+ "",
133
+ "### Save these (proactively):",
134
+ "- User preferences: \"I prefer TypeScript over JavaScript\" → user memory",
135
+ "- Environment facts: \"This project uses Python 3.11 with pytest\" → project memory",
136
+ "- Corrections: \"Don't use sudo for docker, user is in docker group\" → project memory",
137
+ "- Conventions: \"Project uses tabs, 120-char line width\" → project memory",
138
+ "- Completed work: \"Migrated database schema on 2026-04-20\" → project memory",
139
+ "- Explicit requests: \"Remember that my API key rotation happens monthly\" → user memory",
140
+ "",
141
+ "### Skip these:",
142
+ "- Trivial/obvious info: \"User asked about Python\" — too vague to be useful",
143
+ "- Easily re-discovered facts: \"Python 3.12 supports f-string nesting\" — can web search this",
144
+ "- Raw data dumps: Large code blocks, log files, data tables — too big for memory",
145
+ "- Session-specific ephemera: Temporary file paths, one-off debugging context",
146
+ "- Information already in agents.md or other context files",
147
+ "",
148
+ "Keep memories concise and information-dense. Use the section that best fits the information.",
149
+ "To update a memory, edit the entry in place with a new timestamp.",
150
+ "To remove a memory, delete the line.",
151
+ f"Stay under {CHAR_LIMIT} chars per file (~500 tokens). "
152
+ f"When above 80% ({int(CHAR_LIMIT * 0.8)} chars), consolidate older entries before adding new ones.",
153
+ ]
154
+
155
+ # Add capacity headers and memory content if files have real content
156
+ user_content = self.load_user_memory()
157
+ user_usage = self.get_user_usage()
158
+ # Only show block if file has more than just the header
159
+ if self._has_entries(user_content):
160
+ pct = user_usage["chars_used"] * 100 // user_usage["chars_limit"]
161
+ lines.extend([
162
+ "",
163
+ f"USER MEMORY [{pct}% — {user_usage['chars_used']}/{user_usage['chars_limit']} chars]",
164
+ user_content.strip(),
165
+ ])
166
+
167
+ project_content = self.load_project_memory()
168
+ project_usage = self.get_project_usage()
169
+ if self._has_entries(project_content):
170
+ pct = project_usage["chars_used"] * 100 // project_usage["chars_limit"]
171
+ lines.extend([
172
+ "",
173
+ f"PROJECT MEMORY [{pct}% — {project_usage['chars_used']}/{project_usage['chars_limit']} chars]",
174
+ project_content.strip(),
175
+ ])
176
+
177
+ return "\n".join(lines)
178
+
179
+ # ---- Private helpers ----
180
+
181
+ @staticmethod
182
+ def _has_entries(content: str) -> bool:
183
+ """Check if memory file has entries beyond just the header.
184
+
185
+ A file with only "# User Memory\\n\\n" is considered empty.
186
+ """
187
+ stripped = content.strip()
188
+ # Remove the H1 header line and blank lines
189
+ for line in stripped.split("\n"):
190
+ line = line.strip()
191
+ if not line or line.startswith("#"):
192
+ continue
193
+ # Found a non-header, non-blank line — has entries
194
+ return True
195
+ return False
196
+
197
+ @staticmethod
198
+ def _ensure_dir_and_file(path: Path, default_content: str) -> None:
199
+ """Create parent directory and file with default content if missing."""
200
+ try:
201
+ path.parent.mkdir(parents=True, exist_ok=True)
202
+ if not path.exists():
203
+ path.write_text(default_content, encoding="utf-8")
204
+ logger.debug("Created memory file: %s", path)
205
+ except Exception as e:
206
+ logger.warning("Failed to create memory file %s: %s", path, e)
207
+
208
+ @staticmethod
209
+ def _read_file(path: Path) -> str:
210
+ """Read file content, return empty string on any error."""
211
+ try:
212
+ if path.exists():
213
+ return path.read_text(encoding="utf-8")
214
+ except Exception as e:
215
+ logger.warning("Failed to read memory file %s: %s", path, e)
216
+ return ""
217
+
218
+ def _ensure_gitignore(self) -> None:
219
+ """Add .bone/ to .gitignore if not already present."""
220
+ gitignore = self.repo_root / ".gitignore"
221
+ if not self.repo_root.is_dir() or not (self.repo_root / ".git").is_dir():
222
+ return # Not a git repo
223
+ try:
224
+ if not gitignore.exists():
225
+ gitignore.write_text(".bone/\n", encoding="utf-8")
226
+ return
227
+ content = gitignore.read_text(encoding="utf-8")
228
+ if ".bone" not in content:
229
+ with open(gitignore, "a", encoding="utf-8") as f:
230
+ f.write("\n.bone/\n")
231
+ except Exception as e:
232
+ logger.warning("Failed to update .gitignore: %s", e)
@@ -0,0 +1,71 @@
1
+ """Retry logic for LLM connection and timeout errors."""
2
+
3
+ import time
4
+
5
+ from exceptions import LLMResponseError
6
+
7
+ # Timeout retry constants
8
+ RETRY_MAX_ATTEMPTS = 3
9
+ RETRY_DELAYS = (2, 4) # exponential backoff per attempt
10
+ RETRYABLE_STATUS_CODES = {429, 502, 503, 504}
11
+ RETRYABLE_ERROR_KEYWORDS = (
12
+ "timeout", "timed out", "connectionerror", "connection refused",
13
+ "connection reset", "connection aborted", "name or service not known",
14
+ "network unreachable", "no route to host", "eof occurred",
15
+ )
16
+ NON_RETRYABLE_STATUS_CODES = {400, 401, 403, 405, 422}
17
+
18
+
19
+ def is_retryable_error(error):
20
+ """Check if an LLMConnectionError is retryable.
21
+
22
+ Retryable conditions:
23
+ - Timeout or connection-level errors (network unreachable, DNS failure, etc.)
24
+ - HTTP 429 (rate limited), 502, 503, 504 (server errors)
25
+
26
+ Non-retryable conditions:
27
+ - HTTP 400, 401, 403, 405, 422 (client/auth errors)
28
+ - LLMResponseError (malformed response data)
29
+
30
+ Args:
31
+ error: Exception instance (typically LLMConnectionError)
32
+
33
+ Returns:
34
+ bool: True if the error is retryable
35
+ """
36
+ # Never retry response parsing errors
37
+ if isinstance(error, LLMResponseError):
38
+ return False
39
+
40
+ # Check HTTP status code first (most reliable signal)
41
+ details = getattr(error, 'details', {}) or {}
42
+ status_code = details.get("status_code")
43
+ if status_code is not None:
44
+ if status_code in NON_RETRYABLE_STATUS_CODES:
45
+ return False
46
+ if status_code in RETRYABLE_STATUS_CODES:
47
+ return True
48
+
49
+ # For network-level errors, check the original error message
50
+ original_error = details.get("original_error", "")
51
+ original_lower = original_error.lower()
52
+ return any(keyword in original_lower for keyword in RETRYABLE_ERROR_KEYWORDS)
53
+
54
+
55
+ def wait_with_cancel_message(console, delay_seconds):
56
+ """Wait briefly before retrying, showing a dim status line.
57
+
58
+ Args:
59
+ console: Rich console for output
60
+ delay_seconds: Seconds to wait
61
+
62
+ Returns:
63
+ bool: True if wait completed, False if interrupted by KeyboardInterrupt
64
+ """
65
+ console.print(f"[dim]Connection issue, retrying in {delay_seconds}s... (Ctrl+C to cancel)[/dim]")
66
+ try:
67
+ time.sleep(delay_seconds)
68
+ except KeyboardInterrupt:
69
+ console.print("[dim]Retry cancelled.[/dim]")
70
+ return False
71
+ return True
@@ -0,0 +1,326 @@
1
+ """Sub-agent for delegated tasks.
2
+
3
+ Uses existing AgenticOrchestrator with isolated message context
4
+ and read-only tools to execute generic delegated tasks.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ from core.chat_manager import ChatManager
10
+ from llm.prompts import build_sub_agent_prompt
11
+ from utils.settings import sub_agent_settings
12
+
13
+
14
+ class HardLimitExceeded(Exception):
15
+ """Raised when the sub-agent hits its hard token limit."""
16
+ pass
17
+
18
+
19
+ def _format_messages_dump(messages) -> str:
20
+ """Format sub-agent message history as a markdown dump.
21
+
22
+ Args:
23
+ messages: List of message dicts from the sub-agent ChatManager.
24
+
25
+ Returns:
26
+ Markdown string with the full conversation context.
27
+ """
28
+ lines = [
29
+ "## Sub-Agent Context Dump (Hard Limit Reached)",
30
+ "",
31
+ "The sub-agent exceeded its hard token limit. Below is the full, unabridged context of its investigation. No summary was produced.",
32
+ "",
33
+ "---",
34
+ "",
35
+ ]
36
+ for i, msg in enumerate(messages):
37
+ role = msg.get("role", "unknown")
38
+ content = msg.get("content", "")
39
+ tool_calls = msg.get("tool_calls")
40
+ tool_call_id = msg.get("tool_call_id")
41
+
42
+ if tool_call_id:
43
+ lines.append(f"### Message {i} — tool result ({tool_call_id})")
44
+ elif tool_calls:
45
+ lines.append(f"### Message {i} — assistant tool calls")
46
+ for tc in tool_calls:
47
+ fn = tc.get("function", {})
48
+ lines.append(f"- `{fn.get('name', '?')}` — `{fn.get('arguments', '')}`")
49
+ else:
50
+ lines.append(f"### Message {i} — {role}")
51
+
52
+ if content:
53
+ # Truncate large content to avoid blowing out the main agent's context
54
+ max_chars = 4000
55
+ if len(content) > max_chars:
56
+ content = content[:max_chars] + f"\n\n... (truncated, {len(content) - max_chars:,} chars omitted)"
57
+ lines.append(content)
58
+ lines.append("")
59
+ return "\n".join(lines)
60
+
61
+
62
+ def _configure_compaction():
63
+ """Create a ChatManager with compaction settings from config.
64
+
65
+ Returns:
66
+ ChatManager: A new ChatManager instance with compaction configured
67
+ """
68
+ if sub_agent_settings.enable_compaction:
69
+ return ChatManager(compact_trigger_tokens=sub_agent_settings.compact_trigger_tokens)
70
+ else:
71
+ return ChatManager(compact_trigger_tokens=None)
72
+
73
+
74
+ def _inject_system_prompt(chat_manager, sub_agent_type: str = "research"):
75
+ """Build sub-agent prompt and inject it.
76
+
77
+ Token usage is reported live by the wrapper in run_sub_agent(),
78
+ so the system prompt is kept clean.
79
+
80
+ Args:
81
+ chat_manager: ChatManager instance to configure
82
+ sub_agent_type: Type of sub-agent ('research' or 'review').
83
+ """
84
+ base_prompt = build_sub_agent_prompt(
85
+ sub_agent_type=sub_agent_type,
86
+ soft_limit_tokens=sub_agent_settings.soft_limit_tokens,
87
+ hard_limit_tokens=sub_agent_settings.hard_limit_tokens,
88
+ )
89
+ chat_manager.messages = [{"role": "system", "content": base_prompt}]
90
+
91
+
92
+ def _load_codebase_map(chat_manager):
93
+ """Load agents.md codebase map into sub-agent context if available.
94
+
95
+ Args:
96
+ chat_manager: ChatManager instance to add context to
97
+ """
98
+ agents_path = Path.cwd() / "agents.md"
99
+ if agents_path.exists():
100
+ map_content = agents_path.read_text(encoding="utf-8").strip()
101
+ user_msg = (
102
+ "Here is the codebase map for this project. "
103
+ "This provides an overview of the repository structure and file purposes. "
104
+ "Use this as a reference when exploring the codebase.\n\n"
105
+ f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
106
+ )
107
+ assistant_msg = (
108
+ "I've received the codebase map. I'll use this as a reference when "
109
+ "exploring the repository, but I'll always verify current state by "
110
+ "reading files and searching the codebase before making changes."
111
+ )
112
+ chat_manager.messages.append({"role": "user", "content": user_msg})
113
+ chat_manager.messages.append({"role": "assistant", "content": assistant_msg})
114
+
115
+
116
+ def _configure_isolation(chat_manager):
117
+ """Apply isolation settings for sub-agent context.
118
+
119
+ Disables conversation logging.
120
+
121
+ Args:
122
+ chat_manager: ChatManager instance to configure
123
+ """
124
+ chat_manager.markdown_logger = None
125
+
126
+
127
+ def _create_chat_manager(sub_agent_type: str = "research"):
128
+ """Create a fresh ChatManager instance for sub-agent use.
129
+
130
+ Orchestrates compaction, prompt injection, codebase map loading,
131
+ and isolation configuration.
132
+
133
+ Args:
134
+ sub_agent_type: Type of sub-agent ('research' or 'review').
135
+
136
+ Returns:
137
+ ChatManager: A new ChatManager instance with pre-configured system prompt
138
+ """
139
+ chat_manager = _configure_compaction()
140
+ chat_manager._compaction_disabled = True
141
+ _inject_system_prompt(chat_manager, sub_agent_type=sub_agent_type)
142
+ _load_codebase_map(chat_manager)
143
+ _configure_isolation(chat_manager)
144
+ return chat_manager
145
+
146
+
147
+ def run_sub_agent(
148
+ task_query: str,
149
+ repo_root: Path,
150
+ rg_exe_path: str,
151
+ console=None,
152
+ panel_updater=None,
153
+ sub_agent_type: str = "research",
154
+ initial_context: str = None,
155
+ ) -> dict:
156
+ """Run sub-agent using existing AgenticOrchestrator for delegated tasks.
157
+
158
+ Args:
159
+ task_query: Generic task query to execute (e.g., "Read file config.json")
160
+ repo_root: Repository root path
161
+ rg_exe_path: Path to rg executable
162
+ console: Optional Rich console for output
163
+ panel_updater: Optional SubAgentPanel for live panel updates
164
+ sub_agent_type: Type of sub-agent ('research' or 'review').
165
+ initial_context: Optional string injected as context before the task query
166
+ (e.g., a git diff for review mode).
167
+
168
+ Returns:
169
+ Dict with:
170
+ - 'result': Formatted markdown string (goes into chat history)
171
+ - 'usage': Usage data for billing
172
+ - 'error': Error message if failed (None if success)
173
+ """
174
+ # Validate panel_updater type if provided
175
+ if panel_updater is not None and not hasattr(panel_updater, 'append'):
176
+ panel_updater = None
177
+
178
+ # If no panel_updater provided, create a simple no-op one
179
+ if panel_updater is None:
180
+ from tools.sub_agent import SimplePanelUpdater
181
+ panel_updater = SimplePanelUpdater(console)
182
+
183
+ # Create fresh ChatManager for sub-agent
184
+ temp_chat_manager = _create_chat_manager(sub_agent_type=sub_agent_type)
185
+
186
+ # Inject initial context as a user/assistant exchange if provided
187
+ if initial_context:
188
+ temp_chat_manager.messages.append(
189
+ {"role": "user", "content": initial_context}
190
+ )
191
+ temp_chat_manager.messages.append(
192
+ {"role": "assistant", "content": "I've received the context. I'll analyze it and use the available tools to gather additional information as needed."}
193
+ )
194
+
195
+ # Import here to avoid circular import with core.agentic
196
+ from core.agentic import AgenticOrchestrator
197
+
198
+ # Create orchestrator (reuses existing implementation)
199
+ orchestrator = AgenticOrchestrator(
200
+ chat_manager=temp_chat_manager,
201
+ repo_root=repo_root,
202
+ rg_exe_path=rg_exe_path,
203
+ console=console,
204
+ debug_mode=False,
205
+ suppress_result_display=True,
206
+ is_sub_agent=True,
207
+ panel_updater=panel_updater,
208
+ force_parallel_execution=True # Enable parallel execution for read-only tools
209
+ )
210
+
211
+ # Wrap orchestrator._get_llm_response to check hard token limit and
212
+ # wrap client.chat_completion once (outside the loop) to inject live
213
+ # token feedback as a system message — avoids per-call monkey-patching
214
+ # and eliminates any re-entrancy risk.
215
+ original_get_llm_response = orchestrator._get_llm_response
216
+ original_chat_completion = temp_chat_manager.client.chat_completion
217
+
218
+ _soft_limit_warned = False
219
+
220
+ def _chat_completion_with_token_hint(messages, **kwargs):
221
+ """Prepend a system-level token budget hint (and soft-limit warning once) to every LLM call."""
222
+ nonlocal _soft_limit_warned
223
+ tt = temp_chat_manager.token_tracker
224
+ hint = f"[Token budget: {tt.current_context_tokens:,} curr / {tt.conv_total_tokens:,} total]"
225
+
226
+ if not _soft_limit_warned and tt.current_context_tokens >= sub_agent_settings.soft_limit_tokens:
227
+ _soft_limit_warned = True
228
+ hint = (
229
+ f"WARNING: You have exceeded the soft token limit "
230
+ f"({tt.current_context_tokens:,} / {sub_agent_settings.soft_limit_tokens:,}). "
231
+ "STOP exploring and return your findings immediately. Do NOT call any more tools. "
232
+ + hint
233
+ )
234
+
235
+ token_msg = {"role": "system", "content": hint}
236
+ return original_chat_completion([token_msg, *messages], **kwargs)
237
+
238
+ def _get_llm_response_with_hard_limit(allowed_tools=None):
239
+ """Wrapper to check hard token limit and update panel with live token counts."""
240
+ tt = temp_chat_manager.token_tracker
241
+
242
+ # Check hard token limit before making LLM call
243
+ # Use current_context_tokens (prompt size) not total_tokens (cumulative billing)
244
+ # to catch prompt-length-over-limit errors before they hit the API.
245
+ if tt.current_context_tokens >= sub_agent_settings.hard_limit_tokens:
246
+ raise HardLimitExceeded(
247
+ f"Sub-agent hard token limit exceeded: "
248
+ f"{tt.current_context_tokens:,} / {sub_agent_settings.hard_limit_tokens:,} tokens."
249
+ )
250
+
251
+ # Update panel with live token counts
252
+ # Order: conversation length (current context) first, total tokens billed second
253
+ conv_length = tt.current_context_tokens
254
+ total_billed = tt.conv_total_tokens
255
+ if hasattr(panel_updater, 'token_info'):
256
+ panel_updater.token_info = f"{conv_length:,} curr | {total_billed:,} total"
257
+ panel_updater.append("") # Refresh panel title
258
+
259
+ return original_get_llm_response(allowed_tools=allowed_tools)
260
+
261
+ # Apply both patches once, before the orchestrator loop starts
262
+ orchestrator._get_llm_response = _get_llm_response_with_hard_limit
263
+ temp_chat_manager.client.chat_completion = _chat_completion_with_token_hint
264
+
265
+ hard_limit_exceeded = False
266
+
267
+ try:
268
+ # Run sub-agent task
269
+ orchestrator.run(
270
+ task_query,
271
+ thinking_indicator=None,
272
+ allowed_tools=sub_agent_settings.allowed_tools
273
+ )
274
+ except HardLimitExceeded:
275
+ hard_limit_exceeded = True
276
+ except Exception as e:
277
+ import traceback
278
+ error_details = f"{e}\n\nTraceback:\n{traceback.format_exc()}"
279
+ return {
280
+ "result": "",
281
+ "usage": {
282
+ "prompt_tokens": 0,
283
+ "completion_tokens": 0,
284
+ "total_tokens": 0
285
+ },
286
+ "model": "",
287
+ "error": error_details
288
+ }
289
+ finally:
290
+ # Restore originals
291
+ temp_chat_manager.client.chat_completion = original_chat_completion
292
+
293
+ # Get final token usage (no need for delta calculation on fresh instance)
294
+ delta_prompt = temp_chat_manager.token_tracker.total_prompt_tokens
295
+ delta_completion = temp_chat_manager.token_tracker.total_completion_tokens
296
+ delta_total = temp_chat_manager.token_tracker.total_tokens
297
+ tt = temp_chat_manager.token_tracker
298
+ delta_cost = tt.total_actual_cost + tt.total_estimated_cost
299
+
300
+ if hard_limit_exceeded and sub_agent_settings.dump_context_on_hard_limit:
301
+ result = _format_messages_dump(temp_chat_manager.messages)
302
+ else:
303
+ # Extract final response (last assistant message with content)
304
+ final_content = ""
305
+ for msg in reversed(temp_chat_manager.messages):
306
+ if msg.get("role") == "assistant" and msg.get("content"):
307
+ final_content = msg["content"].strip()
308
+ break
309
+ result = final_content
310
+
311
+ usage = {
312
+ "prompt_tokens": delta_prompt,
313
+ "completion_tokens": delta_completion,
314
+ "total_tokens": delta_total,
315
+ "context_tokens": tt.current_context_tokens,
316
+ }
317
+ if delta_cost > 0:
318
+ usage["cost"] = delta_cost
319
+
320
+ return {
321
+ "result": result,
322
+ "usage": usage,
323
+ "model": temp_chat_manager.client.model,
324
+ "error": None,
325
+ "hard_limit_exceeded": hard_limit_exceeded,
326
+ }