bone-agent 1.3.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/bin/bone.js +39 -0
  2. package/package.json +25 -39
  3. package/LICENSE +0 -21
  4. package/README.md +0 -184
  5. package/bin/npm-wrapper.js +0 -235
  6. package/bin/rg +0 -0
  7. package/bin/rg.exe +0 -0
  8. package/config.yaml.example +0 -141
  9. package/prompts/main/ask_questions.md +0 -31
  10. package/prompts/main/batch_independent_calls.md +0 -5
  11. package/prompts/main/casual_interactions.md +0 -11
  12. package/prompts/main/code_references.md +0 -8
  13. package/prompts/main/communication_style.md +0 -12
  14. package/prompts/main/context_reliability.md +0 -12
  15. package/prompts/main/conversational_tool_calling.md +0 -15
  16. package/prompts/main/dream.md +0 -36
  17. package/prompts/main/editing_pattern.md +0 -13
  18. package/prompts/main/error_handling.md +0 -6
  19. package/prompts/main/exploration_pattern.md +0 -21
  20. package/prompts/main/intro.md +0 -1
  21. package/prompts/main/obsidian.md +0 -16
  22. package/prompts/main/obsidian_project.md +0 -79
  23. package/prompts/main/professional_objectivity.md +0 -3
  24. package/prompts/main/targeted_searching.md +0 -10
  25. package/prompts/main/task_lists_pattern.md +0 -8
  26. package/prompts/main/temp_folder.md +0 -9
  27. package/prompts/main/think_before_acting.md +0 -10
  28. package/prompts/main/tone_and_style.md +0 -4
  29. package/prompts/main/tool_preferences.md +0 -24
  30. package/prompts/main/trust_subagent_context.md +0 -21
  31. package/prompts/main/when_to_use_sub_agent.md +0 -7
  32. package/prompts/micro/ask_questions.md +0 -1
  33. package/prompts/micro/batch_independent_calls.md +0 -1
  34. package/prompts/micro/casual_interactions.md +0 -1
  35. package/prompts/micro/code_references.md +0 -1
  36. package/prompts/micro/communication_style.md +0 -1
  37. package/prompts/micro/context_reliability.md +0 -1
  38. package/prompts/micro/conversational_tool_calling.md +0 -1
  39. package/prompts/micro/editing_pattern.md +0 -1
  40. package/prompts/micro/error_handling.md +0 -1
  41. package/prompts/micro/exploration_pattern.md +0 -1
  42. package/prompts/micro/intro.md +0 -1
  43. package/prompts/micro/obsidian.md +0 -4
  44. package/prompts/micro/obsidian_project.md +0 -5
  45. package/prompts/micro/professional_objectivity.md +0 -1
  46. package/prompts/micro/targeted_searching.md +0 -1
  47. package/prompts/micro/task_lists_pattern.md +0 -1
  48. package/prompts/micro/temp_folder.md +0 -1
  49. package/prompts/micro/think_before_acting.md +0 -5
  50. package/prompts/micro/tone_and_style.md +0 -1
  51. package/prompts/micro/tool_preferences.md +0 -1
  52. package/prompts/micro/trust_subagent_context.md +0 -1
  53. package/prompts/micro/when_to_use_sub_agent.md +0 -1
  54. package/requirements.txt +0 -9
  55. package/src/__init__.py +0 -11
  56. package/src/core/__init__.py +0 -1
  57. package/src/core/agentic.py +0 -985
  58. package/src/core/chat_manager.py +0 -1564
  59. package/src/core/config_manager.py +0 -253
  60. package/src/core/cron.py +0 -582
  61. package/src/core/cron_allowlist.py +0 -118
  62. package/src/core/memory.py +0 -145
  63. package/src/core/retry.py +0 -71
  64. package/src/core/sub_agent.py +0 -326
  65. package/src/core/tool_approval.py +0 -220
  66. package/src/core/tool_feedback.py +0 -778
  67. package/src/exceptions.py +0 -79
  68. package/src/llm/__init__.py +0 -1
  69. package/src/llm/client.py +0 -171
  70. package/src/llm/config.py +0 -492
  71. package/src/llm/prompts.py +0 -489
  72. package/src/llm/providers.py +0 -436
  73. package/src/llm/streaming.py +0 -163
  74. package/src/llm/token_tracker.py +0 -384
  75. package/src/tools/__init__.py +0 -212
  76. package/src/tools/constants.py +0 -59
  77. package/src/tools/create_file.py +0 -136
  78. package/src/tools/directory.py +0 -389
  79. package/src/tools/edit.py +0 -545
  80. package/src/tools/file_reader.py +0 -322
  81. package/src/tools/helpers/__init__.py +0 -105
  82. package/src/tools/helpers/base.py +0 -550
  83. package/src/tools/helpers/converters.py +0 -44
  84. package/src/tools/helpers/file_helpers.py +0 -189
  85. package/src/tools/helpers/formatters.py +0 -411
  86. package/src/tools/helpers/loader.py +0 -231
  87. package/src/tools/helpers/parallel_executor.py +0 -231
  88. package/src/tools/helpers/path_resolver.py +0 -232
  89. package/src/tools/helpers/plugin_manifest.py +0 -156
  90. package/src/tools/obsidian.py +0 -96
  91. package/src/tools/review_sub_agent.py +0 -189
  92. package/src/tools/rg_search.py +0 -460
  93. package/src/tools/search_plugins.py +0 -109
  94. package/src/tools/select_option.py +0 -600
  95. package/src/tools/shell.py +0 -302
  96. package/src/tools/sub_agent.py +0 -139
  97. package/src/tools/task_list.py +0 -269
  98. package/src/tools/web_search.py +0 -61
  99. package/src/ui/__init__.py +0 -1
  100. package/src/ui/banner.py +0 -87
  101. package/src/ui/commands.py +0 -2809
  102. package/src/ui/displays.py +0 -214
  103. package/src/ui/loader.py +0 -284
  104. package/src/ui/main.py +0 -647
  105. package/src/ui/prompt_utils.py +0 -113
  106. package/src/ui/setting_selector.py +0 -590
  107. package/src/ui/setup_wizard.py +0 -294
  108. package/src/ui/sub_agent_panel.py +0 -234
  109. package/src/ui/tool_confirmation.py +0 -215
  110. package/src/utils/__init__.py +0 -1
  111. package/src/utils/citation_parser.py +0 -199
  112. package/src/utils/editor.py +0 -158
  113. package/src/utils/gitignore_filter.py +0 -149
  114. package/src/utils/logger.py +0 -254
  115. package/src/utils/paths.py +0 -30
  116. package/src/utils/result_parsers.py +0 -108
  117. package/src/utils/safe_commands.py +0 -243
  118. package/src/utils/settings.py +0 -191
  119. package/src/utils/user_message_logger.py +0 -120
  120. package/src/utils/validation.py +0 -191
  121. package/src/utils/web_search.py +0 -173
@@ -1,145 +0,0 @@
1
- """Multi-layer memory system for the agent.
2
-
3
- Two-layer persistent memory:
4
- - User memory (global): ~/.bone/user_memory.md
5
- - Project memory (per-repo): {repo_root}/.bone/agents.md
6
-
7
- Memory files are read-only during conversations — loaded into the system prompt
8
- for context but never written inline. All writes happen through the dream cron job,
9
- which consolidates user messages into focused memories nightly.
10
- """
11
-
12
- import logging
13
- from pathlib import Path
14
- from typing import Optional
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- # Capacity constants (prompt-enforced, no code enforcement)
19
- CHAR_LIMIT = 1500 # suggested chars per layer (~500 tokens)
20
-
21
-
22
- class MemoryManager:
23
- """Manages two-layer memory: user-level (global) and project-level (per-repo).
24
-
25
- Uses a lazy singleton pattern — first call with repo_root bootstraps the
26
- instance, subsequent calls reuse it. Call reset() when switching repos.
27
- """
28
-
29
- _instance: Optional["MemoryManager"] = None
30
-
31
- def __init__(self, repo_root: Path):
32
- self.repo_root = repo_root
33
- self.user_memory_path = Path.home() / ".bone" / "user_memory.md"
34
- self.project_memory_path = repo_root / ".bone" / "agents.md"
35
-
36
- @classmethod
37
- def get_instance(cls, repo_root: Path = None) -> Optional["MemoryManager"]:
38
- """Lazy singleton. First call sets repo_root, subsequent calls reuse instance.
39
-
40
- Args:
41
- repo_root: Path to repository root. Required on first call,
42
- ignored on subsequent calls (until reset()).
43
-
44
- Returns:
45
- MemoryManager instance, or None if no repo_root provided and
46
- no instance has been initialized yet.
47
- """
48
- if cls._instance is not None:
49
- return cls._instance
50
- if repo_root is None:
51
- return None
52
- cls._instance = cls(repo_root)
53
- return cls._instance
54
-
55
- @classmethod
56
- def reset(cls) -> None:
57
- """Clear singleton. Called when switching repos via /cd."""
58
- cls._instance = None
59
-
60
- def ensure_exists(self) -> None:
61
- """Create user-level directory and memory file only.
62
-
63
- Project-level .bone/agents.md is created lazily on first write,
64
- not at startup. This prevents creating .bone/ directories in
65
- non-project locations (e.g. when running from ~/.bone/ itself).
66
- """
67
- self._ensure_dir_and_file(
68
- self.user_memory_path,
69
- "# User Memory\n\n",
70
- )
71
- # Add .bone/ to .gitignore if repo_root has a git repo
72
- self._ensure_gitignore()
73
-
74
- def load_user_memory(self) -> str:
75
- """Read and return user memory file content. Returns empty string if missing."""
76
- return self._read_file(self.user_memory_path)
77
-
78
- def load_project_memory(self) -> str:
79
- """Read and return project memory file content. Returns empty string if missing."""
80
- return self._read_file(self.project_memory_path)
81
-
82
- def get_user_usage(self) -> dict:
83
- """Return {chars_used, chars_limit} for user memory."""
84
- content = self.load_user_memory()
85
- return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
86
-
87
- def get_project_usage(self) -> dict:
88
- """Return {chars_used, chars_limit} for project memory."""
89
- content = self.load_project_memory()
90
- return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
91
-
92
- # ---- Private helpers ----
93
-
94
- @staticmethod
95
- def _has_entries(content: str) -> bool:
96
- """Check if memory file has entries beyond just the header.
97
-
98
- A file with only "# User Memory\\n\\n" is considered empty.
99
- """
100
- stripped = content.strip()
101
- # Remove the H1 header line and blank lines
102
- for line in stripped.split("\n"):
103
- line = line.strip()
104
- if not line or line.startswith("#"):
105
- continue
106
- # Found a non-header, non-blank line — has entries
107
- return True
108
- return False
109
-
110
- @staticmethod
111
- def _ensure_dir_and_file(path: Path, default_content: str) -> None:
112
- """Create parent directory and file with default content if missing."""
113
- try:
114
- path.parent.mkdir(parents=True, exist_ok=True)
115
- if not path.exists():
116
- path.write_text(default_content, encoding="utf-8")
117
- logger.debug("Created memory file: %s", path)
118
- except Exception as e:
119
- logger.warning("Failed to create memory file %s: %s", path, e)
120
-
121
- @staticmethod
122
- def _read_file(path: Path) -> str:
123
- """Read file content, return empty string on any error."""
124
- try:
125
- if path.exists():
126
- return path.read_text(encoding="utf-8")
127
- except Exception as e:
128
- logger.warning("Failed to read memory file %s: %s", path, e)
129
- return ""
130
-
131
- def _ensure_gitignore(self) -> None:
132
- """Add .bone/ to .gitignore if not already present."""
133
- gitignore = self.repo_root / ".gitignore"
134
- if not self.repo_root.is_dir() or not (self.repo_root / ".git").is_dir():
135
- return # Not a git repo
136
- try:
137
- if not gitignore.exists():
138
- gitignore.write_text(".bone/\n", encoding="utf-8")
139
- return
140
- content = gitignore.read_text(encoding="utf-8")
141
- if ".bone" not in content:
142
- with open(gitignore, "a", encoding="utf-8") as f:
143
- f.write("\n.bone/\n")
144
- except Exception as e:
145
- logger.warning("Failed to update .gitignore: %s", e)
package/src/core/retry.py DELETED
@@ -1,71 +0,0 @@
1
- """Retry logic for LLM connection and timeout errors."""
2
-
3
- import time
4
-
5
- from exceptions import LLMResponseError
6
-
7
- # Timeout retry constants
8
- RETRY_MAX_ATTEMPTS = 3
9
- RETRY_DELAYS = (2, 4) # exponential backoff per attempt
10
- RETRYABLE_STATUS_CODES = {429, 502, 503, 504}
11
- RETRYABLE_ERROR_KEYWORDS = (
12
- "timeout", "timed out", "connectionerror", "connection refused",
13
- "connection reset", "connection aborted", "name or service not known",
14
- "network unreachable", "no route to host", "eof occurred",
15
- )
16
- NON_RETRYABLE_STATUS_CODES = {400, 401, 403, 405, 422}
17
-
18
-
19
- def is_retryable_error(error):
20
- """Check if an LLMConnectionError is retryable.
21
-
22
- Retryable conditions:
23
- - Timeout or connection-level errors (network unreachable, DNS failure, etc.)
24
- - HTTP 429 (rate limited), 502, 503, 504 (server errors)
25
-
26
- Non-retryable conditions:
27
- - HTTP 400, 401, 403, 405, 422 (client/auth errors)
28
- - LLMResponseError (malformed response data)
29
-
30
- Args:
31
- error: Exception instance (typically LLMConnectionError)
32
-
33
- Returns:
34
- bool: True if the error is retryable
35
- """
36
- # Never retry response parsing errors
37
- if isinstance(error, LLMResponseError):
38
- return False
39
-
40
- # Check HTTP status code first (most reliable signal)
41
- details = getattr(error, 'details', {}) or {}
42
- status_code = details.get("status_code")
43
- if status_code is not None:
44
- if status_code in NON_RETRYABLE_STATUS_CODES:
45
- return False
46
- if status_code in RETRYABLE_STATUS_CODES:
47
- return True
48
-
49
- # For network-level errors, check the original error message
50
- original_error = details.get("original_error", "")
51
- original_lower = original_error.lower()
52
- return any(keyword in original_lower for keyword in RETRYABLE_ERROR_KEYWORDS)
53
-
54
-
55
- def wait_with_cancel_message(console, delay_seconds):
56
- """Wait briefly before retrying, showing a dim status line.
57
-
58
- Args:
59
- console: Rich console for output
60
- delay_seconds: Seconds to wait
61
-
62
- Returns:
63
- bool: True if wait completed, False if interrupted by KeyboardInterrupt
64
- """
65
- console.print(f"[dim]Connection issue, retrying in {delay_seconds}s... (Ctrl+C to cancel)[/dim]")
66
- try:
67
- time.sleep(delay_seconds)
68
- except KeyboardInterrupt:
69
- console.print("[dim]Retry cancelled.[/dim]")
70
- return False
71
- return True
@@ -1,326 +0,0 @@
1
- """Sub-agent for delegated tasks.
2
-
3
- Uses existing AgenticOrchestrator with isolated message context
4
- and read-only tools to execute generic delegated tasks.
5
- """
6
-
7
- from pathlib import Path
8
-
9
- from core.chat_manager import ChatManager
10
- from llm.prompts import build_sub_agent_prompt
11
- from utils.settings import sub_agent_settings
12
-
13
-
14
- class HardLimitExceeded(Exception):
15
- """Raised when the sub-agent hits its hard token limit."""
16
- pass
17
-
18
-
19
- def _format_messages_dump(messages) -> str:
20
- """Format sub-agent message history as a markdown dump.
21
-
22
- Args:
23
- messages: List of message dicts from the sub-agent ChatManager.
24
-
25
- Returns:
26
- Markdown string with the full conversation context.
27
- """
28
- lines = [
29
- "## Sub-Agent Context Dump (Hard Limit Reached)",
30
- "",
31
- "The sub-agent exceeded its hard token limit. Below is the full, unabridged context of its investigation. No summary was produced.",
32
- "",
33
- "---",
34
- "",
35
- ]
36
- for i, msg in enumerate(messages):
37
- role = msg.get("role", "unknown")
38
- content = msg.get("content", "")
39
- tool_calls = msg.get("tool_calls")
40
- tool_call_id = msg.get("tool_call_id")
41
-
42
- if tool_call_id:
43
- lines.append(f"### Message {i} — tool result ({tool_call_id})")
44
- elif tool_calls:
45
- lines.append(f"### Message {i} — assistant tool calls")
46
- for tc in tool_calls:
47
- fn = tc.get("function", {})
48
- lines.append(f"- `{fn.get('name', '?')}` — `{fn.get('arguments', '')}`")
49
- else:
50
- lines.append(f"### Message {i} — {role}")
51
-
52
- if content:
53
- # Truncate large content to avoid blowing out the main agent's context
54
- max_chars = 4000
55
- if len(content) > max_chars:
56
- content = content[:max_chars] + f"\n\n... (truncated, {len(content) - max_chars:,} chars omitted)"
57
- lines.append(content)
58
- lines.append("")
59
- return "\n".join(lines)
60
-
61
-
62
- def _configure_compaction():
63
- """Create a ChatManager with compaction settings from config.
64
-
65
- Returns:
66
- ChatManager: A new ChatManager instance with compaction configured
67
- """
68
- if sub_agent_settings.enable_compaction:
69
- return ChatManager(compact_trigger_tokens=sub_agent_settings.compact_trigger_tokens)
70
- else:
71
- return ChatManager(compact_trigger_tokens=None)
72
-
73
-
74
- def _inject_system_prompt(chat_manager, sub_agent_type: str = "research"):
75
- """Build sub-agent prompt and inject it.
76
-
77
- Token usage is reported live by the wrapper in run_sub_agent(),
78
- so the system prompt is kept clean.
79
-
80
- Args:
81
- chat_manager: ChatManager instance to configure
82
- sub_agent_type: Type of sub-agent ('research' or 'review').
83
- """
84
- base_prompt = build_sub_agent_prompt(
85
- sub_agent_type=sub_agent_type,
86
- soft_limit_tokens=sub_agent_settings.soft_limit_tokens,
87
- hard_limit_tokens=sub_agent_settings.hard_limit_tokens,
88
- )
89
- chat_manager.messages = [{"role": "system", "content": base_prompt}]
90
-
91
-
92
- def _load_codebase_map(chat_manager):
93
- """Load agents.md codebase map into sub-agent context if available.
94
-
95
- Args:
96
- chat_manager: ChatManager instance to add context to
97
- """
98
- agents_path = Path.cwd() / "agents.md"
99
- if agents_path.exists():
100
- map_content = agents_path.read_text(encoding="utf-8").strip()
101
- user_msg = (
102
- "Here is the codebase map for this project. "
103
- "This provides an overview of the repository structure and file purposes. "
104
- "Use this as a reference when exploring the codebase.\n\n"
105
- f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
106
- )
107
- assistant_msg = (
108
- "I've received the codebase map. I'll use this as a reference when "
109
- "exploring the repository, but I'll always verify current state by "
110
- "reading files and searching the codebase before making changes."
111
- )
112
- chat_manager.messages.append({"role": "user", "content": user_msg})
113
- chat_manager.messages.append({"role": "assistant", "content": assistant_msg})
114
-
115
-
116
- def _configure_isolation(chat_manager):
117
- """Apply isolation settings for sub-agent context.
118
-
119
- Disables conversation logging.
120
-
121
- Args:
122
- chat_manager: ChatManager instance to configure
123
- """
124
- chat_manager.markdown_logger = None
125
-
126
-
127
- def _create_chat_manager(sub_agent_type: str = "research"):
128
- """Create a fresh ChatManager instance for sub-agent use.
129
-
130
- Orchestrates compaction, prompt injection, codebase map loading,
131
- and isolation configuration.
132
-
133
- Args:
134
- sub_agent_type: Type of sub-agent ('research' or 'review').
135
-
136
- Returns:
137
- ChatManager: A new ChatManager instance with pre-configured system prompt
138
- """
139
- chat_manager = _configure_compaction()
140
- chat_manager._compaction_disabled = True
141
- _inject_system_prompt(chat_manager, sub_agent_type=sub_agent_type)
142
- _load_codebase_map(chat_manager)
143
- _configure_isolation(chat_manager)
144
- return chat_manager
145
-
146
-
147
- def run_sub_agent(
148
- task_query: str,
149
- repo_root: Path,
150
- rg_exe_path: str,
151
- console=None,
152
- panel_updater=None,
153
- sub_agent_type: str = "research",
154
- initial_context: str = None,
155
- ) -> dict:
156
- """Run sub-agent using existing AgenticOrchestrator for delegated tasks.
157
-
158
- Args:
159
- task_query: Generic task query to execute (e.g., "Read file config.json")
160
- repo_root: Repository root path
161
- rg_exe_path: Path to rg executable
162
- console: Optional Rich console for output
163
- panel_updater: Optional SubAgentPanel for live panel updates
164
- sub_agent_type: Type of sub-agent ('research' or 'review').
165
- initial_context: Optional string injected as context before the task query
166
- (e.g., a git diff for review mode).
167
-
168
- Returns:
169
- Dict with:
170
- - 'result': Formatted markdown string (goes into chat history)
171
- - 'usage': Usage data for billing
172
- - 'error': Error message if failed (None if success)
173
- """
174
- # Validate panel_updater type if provided
175
- if panel_updater is not None and not hasattr(panel_updater, 'append'):
176
- panel_updater = None
177
-
178
- # If no panel_updater provided, create a simple no-op one
179
- if panel_updater is None:
180
- from tools.sub_agent import SimplePanelUpdater
181
- panel_updater = SimplePanelUpdater(console)
182
-
183
- # Create fresh ChatManager for sub-agent
184
- temp_chat_manager = _create_chat_manager(sub_agent_type=sub_agent_type)
185
-
186
- # Inject initial context as a user/assistant exchange if provided
187
- if initial_context:
188
- temp_chat_manager.messages.append(
189
- {"role": "user", "content": initial_context}
190
- )
191
- temp_chat_manager.messages.append(
192
- {"role": "assistant", "content": "I've received the context. I'll analyze it and use the available tools to gather additional information as needed."}
193
- )
194
-
195
- # Import here to avoid circular import with core.agentic
196
- from core.agentic import AgenticOrchestrator
197
-
198
- # Create orchestrator (reuses existing implementation)
199
- orchestrator = AgenticOrchestrator(
200
- chat_manager=temp_chat_manager,
201
- repo_root=repo_root,
202
- rg_exe_path=rg_exe_path,
203
- console=console,
204
- debug_mode=False,
205
- suppress_result_display=True,
206
- is_sub_agent=True,
207
- panel_updater=panel_updater,
208
- force_parallel_execution=True # Enable parallel execution for read-only tools
209
- )
210
-
211
- # Wrap orchestrator._get_llm_response to check hard token limit and
212
- # wrap client.chat_completion once (outside the loop) to inject live
213
- # token feedback as a system message — avoids per-call monkey-patching
214
- # and eliminates any re-entrancy risk.
215
- original_get_llm_response = orchestrator._get_llm_response
216
- original_chat_completion = temp_chat_manager.client.chat_completion
217
-
218
- _soft_limit_warned = False
219
-
220
- def _chat_completion_with_token_hint(messages, **kwargs):
221
- """Prepend a system-level token budget hint (and soft-limit warning once) to every LLM call."""
222
- nonlocal _soft_limit_warned
223
- tt = temp_chat_manager.token_tracker
224
- hint = f"[Token budget: {tt.current_context_tokens:,} curr / {tt.conv_total_tokens:,} total]"
225
-
226
- if not _soft_limit_warned and tt.current_context_tokens >= sub_agent_settings.soft_limit_tokens:
227
- _soft_limit_warned = True
228
- hint = (
229
- f"WARNING: You have exceeded the soft token limit "
230
- f"({tt.current_context_tokens:,} / {sub_agent_settings.soft_limit_tokens:,}). "
231
- "STOP exploring and return your findings immediately. Do NOT call any more tools. "
232
- + hint
233
- )
234
-
235
- token_msg = {"role": "system", "content": hint}
236
- return original_chat_completion([token_msg, *messages], **kwargs)
237
-
238
- def _get_llm_response_with_hard_limit(allowed_tools=None):
239
- """Wrapper to check hard token limit and update panel with live token counts."""
240
- tt = temp_chat_manager.token_tracker
241
-
242
- # Check hard token limit before making LLM call
243
- # Use current_context_tokens (prompt size) not total_tokens (cumulative billing)
244
- # to catch prompt-length-over-limit errors before they hit the API.
245
- if tt.current_context_tokens >= sub_agent_settings.hard_limit_tokens:
246
- raise HardLimitExceeded(
247
- f"Sub-agent hard token limit exceeded: "
248
- f"{tt.current_context_tokens:,} / {sub_agent_settings.hard_limit_tokens:,} tokens."
249
- )
250
-
251
- # Update panel with live token counts
252
- # Order: conversation length (current context) first, total tokens billed second
253
- conv_length = tt.current_context_tokens
254
- total_billed = tt.conv_total_tokens
255
- if hasattr(panel_updater, 'token_info'):
256
- panel_updater.token_info = f"{conv_length:,} curr | {total_billed:,} total"
257
- panel_updater.append("") # Refresh panel title
258
-
259
- return original_get_llm_response(allowed_tools=allowed_tools)
260
-
261
- # Apply both patches once, before the orchestrator loop starts
262
- orchestrator._get_llm_response = _get_llm_response_with_hard_limit
263
- temp_chat_manager.client.chat_completion = _chat_completion_with_token_hint
264
-
265
- hard_limit_exceeded = False
266
-
267
- try:
268
- # Run sub-agent task
269
- orchestrator.run(
270
- task_query,
271
- thinking_indicator=None,
272
- allowed_tools=sub_agent_settings.allowed_tools
273
- )
274
- except HardLimitExceeded:
275
- hard_limit_exceeded = True
276
- except Exception as e:
277
- import traceback
278
- error_details = f"{e}\n\nTraceback:\n{traceback.format_exc()}"
279
- return {
280
- "result": "",
281
- "usage": {
282
- "prompt_tokens": 0,
283
- "completion_tokens": 0,
284
- "total_tokens": 0
285
- },
286
- "model": "",
287
- "error": error_details
288
- }
289
- finally:
290
- # Restore originals
291
- temp_chat_manager.client.chat_completion = original_chat_completion
292
-
293
- # Get final token usage (no need for delta calculation on fresh instance)
294
- delta_prompt = temp_chat_manager.token_tracker.total_prompt_tokens
295
- delta_completion = temp_chat_manager.token_tracker.total_completion_tokens
296
- delta_total = temp_chat_manager.token_tracker.total_tokens
297
- tt = temp_chat_manager.token_tracker
298
- delta_cost = tt.total_actual_cost + tt.total_estimated_cost
299
-
300
- if hard_limit_exceeded and sub_agent_settings.dump_context_on_hard_limit:
301
- result = _format_messages_dump(temp_chat_manager.messages)
302
- else:
303
- # Extract final response (last assistant message with content)
304
- final_content = ""
305
- for msg in reversed(temp_chat_manager.messages):
306
- if msg.get("role") == "assistant" and msg.get("content"):
307
- final_content = msg["content"].strip()
308
- break
309
- result = final_content
310
-
311
- usage = {
312
- "prompt_tokens": delta_prompt,
313
- "completion_tokens": delta_completion,
314
- "total_tokens": delta_total,
315
- "context_tokens": tt.current_context_tokens,
316
- }
317
- if delta_cost > 0:
318
- usage["cost"] = delta_cost
319
-
320
- return {
321
- "result": result,
322
- "usage": usage,
323
- "model": temp_chat_manager.client.model,
324
- "error": None,
325
- "hard_limit_exceeded": hard_limit_exceeded,
326
- }