npm - bone-agent - Versions diffs - 1.3.0 - Mend

bone-agent 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/LICENSE +21 -0
package/README.md +184 -0
package/bin/npm-wrapper.js +235 -0
package/bin/rg +0 -0
package/bin/rg.exe +0 -0
package/config.yaml.example +133 -0
package/package.json +53 -0
package/requirements.txt +9 -0
package/src/__init__.py +11 -0
package/src/core/__init__.py +1 -0
package/src/core/agentic.py +1054 -0
package/src/core/chat_manager.py +1552 -0
package/src/core/config_manager.py +247 -0
package/src/core/cron.py +527 -0
package/src/core/cron_allowlist.py +118 -0
package/src/core/memory.py +232 -0
package/src/core/retry.py +71 -0
package/src/core/sub_agent.py +326 -0
package/src/core/tool_approval.py +220 -0
package/src/core/tool_feedback.py +778 -0
package/src/exceptions.py +79 -0
package/src/llm/__init__.py +1 -0
package/src/llm/client.py +171 -0
package/src/llm/config.py +466 -0
package/src/llm/prompts.py +735 -0
package/src/llm/providers.py +417 -0
package/src/llm/streaming.py +163 -0
package/src/llm/token_tracker.py +368 -0
package/src/tools/__init__.py +212 -0
package/src/tools/constants.py +59 -0
package/src/tools/create_file.py +136 -0
package/src/tools/directory.py +389 -0
package/src/tools/edit.py +543 -0
package/src/tools/file_reader.py +322 -0
package/src/tools/helpers/__init__.py +105 -0
package/src/tools/helpers/base.py +550 -0
package/src/tools/helpers/converters.py +44 -0
package/src/tools/helpers/file_helpers.py +189 -0
package/src/tools/helpers/formatters.py +411 -0
package/src/tools/helpers/loader.py +231 -0
package/src/tools/helpers/parallel_executor.py +231 -0
package/src/tools/helpers/path_resolver.py +226 -0
package/src/tools/helpers/plugin_manifest.py +156 -0
package/src/tools/obsidian.py +96 -0
package/src/tools/review_sub_agent.py +189 -0
package/src/tools/rg_search.py +393 -0
package/src/tools/search_plugins.py +109 -0
package/src/tools/select_option.py +593 -0
package/src/tools/shell.py +302 -0
package/src/tools/sub_agent.py +139 -0
package/src/tools/task_list.py +269 -0
package/src/tools/web_search.py +61 -0
package/src/ui/__init__.py +1 -0
package/src/ui/banner.py +87 -0
package/src/ui/commands.py +2694 -0
package/src/ui/displays.py +213 -0
package/src/ui/loader.py +284 -0
package/src/ui/main.py +646 -0
package/src/ui/prompt_utils.py +113 -0
package/src/ui/setting_selector.py +590 -0
package/src/ui/setup_wizard.py +294 -0
package/src/ui/sub_agent_panel.py +234 -0
package/src/ui/tool_confirmation.py +215 -0
package/src/utils/__init__.py +1 -0
package/src/utils/citation_parser.py +199 -0
package/src/utils/editor.py +158 -0
package/src/utils/gitignore_filter.py +149 -0
package/src/utils/logger.py +254 -0
package/src/utils/paths.py +30 -0
package/src/utils/result_parsers.py +108 -0
package/src/utils/safe_commands.py +243 -0
package/src/utils/settings.py +174 -0
package/src/utils/validation.py +191 -0
package/src/utils/web_search.py +173 -0

package/src/core/memory.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Multi-layer memory system for the agent.
+Two-layer persistent memory:
+- User memory (global): ~/.bone/user_memory.md
+- Project memory (per-repo): {repo_root}/.bone/agents.md
+The agent writes to these files via edit_file (auto-approved, fire-and-forget).
+Memory content is injected into the system prompt on every conversation start.
+"""
+import logging
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+# Capacity constants (prompt-enforced, no code enforcement)
+CHAR_LIMIT = 1500  # suggested chars per layer (~500 tokens)
+SECTION_LIMIT = 8   # suggested max sections per layer
+ENTRY_LIMIT = 20    # suggested max entries per section
+class MemoryManager:
+    """Manages two-layer memory: user-level (global) and project-level (per-repo).
+    Uses a lazy singleton pattern — first call with repo_root bootstraps the
+    instance, subsequent calls reuse it. Call reset() when switching repos.
+    """
+    _instance: Optional["MemoryManager"] = None
+    def __init__(self, repo_root: Path):
+        self.repo_root = repo_root
+        self.user_memory_path = Path.home() / ".bone" / "user_memory.md"
+        self.project_memory_path = repo_root / ".bone" / "agents.md"
+    @classmethod
+    def get_instance(cls, repo_root: Path = None) -> Optional["MemoryManager"]:
+        """Lazy singleton. First call sets repo_root, subsequent calls reuse instance.
+        Args:
+            repo_root: Path to repository root. Required on first call,
+                       ignored on subsequent calls (until reset()).
+        Returns:
+            MemoryManager instance, or None if no repo_root provided and
+            no instance has been initialized yet.
+        """
+        if cls._instance is not None:
+            return cls._instance
+        if repo_root is None:
+            return None
+        cls._instance = cls(repo_root)
+        return cls._instance
+    @classmethod
+    def reset(cls) -> None:
+        """Clear singleton. Called when switching repos via /cd."""
+        cls._instance = None
+    def ensure_exists(self) -> None:
+        """Create user-level directory and memory file only.
+        Project-level .bone/agents.md is created lazily on first write,
+        not at startup. This prevents creating .bone/ directories in
+        non-project locations (e.g. when running from ~/.bone/ itself).
+        """
+        self._ensure_dir_and_file(
+            self.user_memory_path,
+            "# User Memory\n\n",
+        )
+        # Add .bone/ to .gitignore if repo_root has a git repo
+        self._ensure_gitignore()
+    def load_user_memory(self) -> str:
+        """Read and return user memory file content. Returns empty string if missing."""
+        return self._read_file(self.user_memory_path)
+    def load_project_memory(self) -> str:
+        """Read and return project memory file content. Returns empty string if missing."""
+        return self._read_file(self.project_memory_path)
+    def load_all(self) -> str:
+        """Load both layers, combined for prompt injection."""
+        parts = []
+        user = self.load_user_memory()
+        project = self.load_project_memory()
+        if user.strip():
+            parts.append(user.strip())
+        if project.strip():
+            parts.append(project.strip())
+        return "\n\n".join(parts)
+    def get_user_usage(self) -> dict:
+        """Return {chars_used, chars_limit} for user memory."""
+        content = self.load_user_memory()
+        return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
+    def get_project_usage(self) -> dict:
+        """Return {chars_used, chars_limit} for project memory."""
+        content = self.load_project_memory()
+        return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
+    def get_prompt_section(self) -> str:
+        """Build the full memory system prompt section.
+        Includes:
+        - Guidelines text with resolved file paths
+        - Capacity headers and memory content (if files have entries beyond headers)
+        Returns:
+            Complete prompt section string. Includes guidelines even when
+            memory files are empty (just headers). Returns guidelines with
+            placeholder paths if no MemoryManager instance exists.
+        """
+        user_path = str(self.user_memory_path)
+        project_path = str(self.project_memory_path)
+        lines = [
+            "## Memory System",
+            "",
+            "You have a two-layer memory system that persists across conversations:",
+            f"- User memory (global): {user_path} — preferences, identity, work patterns",
+            f"- Project memory (per-repo): {project_path} — context, conventions, decisions, current work",
+            "",
+            "Both memory layers are loaded into this prompt at conversation start. "
+            "You can already see all memories below.",
+            "",
+            "To save information, use `edit_file` to write directly to the memory files. "
+            "These edits are auto-approved and run silently.",
+            "Add a timestamp in parentheses: `*(YYYY-MM-DD)*`",
+            "",
+            "### Save these (proactively):",
+            "- User preferences: \"I prefer TypeScript over JavaScript\" → user memory",
+            "- Environment facts: \"This project uses Python 3.11 with pytest\" → project memory",
+            "- Corrections: \"Don't use sudo for docker, user is in docker group\" → project memory",
+            "- Conventions: \"Project uses tabs, 120-char line width\" → project memory",
+            "- Completed work: \"Migrated database schema on 2026-04-20\" → project memory",
+            "- Explicit requests: \"Remember that my API key rotation happens monthly\" → user memory",
+            "",
+            "### Skip these:",
+            "- Trivial/obvious info: \"User asked about Python\" — too vague to be useful",
+            "- Easily re-discovered facts: \"Python 3.12 supports f-string nesting\" — can web search this",
+            "- Raw data dumps: Large code blocks, log files, data tables — too big for memory",
+            "- Session-specific ephemera: Temporary file paths, one-off debugging context",
+            "- Information already in agents.md or other context files",
+            "",
+            "Keep memories concise and information-dense. Use the section that best fits the information.",
+            "To update a memory, edit the entry in place with a new timestamp.",
+            "To remove a memory, delete the line.",
+            f"Stay under {CHAR_LIMIT} chars per file (~500 tokens). "
+            f"When above 80% ({int(CHAR_LIMIT * 0.8)} chars), consolidate older entries before adding new ones.",
+        ]
+        # Add capacity headers and memory content if files have real content
+        user_content = self.load_user_memory()
+        user_usage = self.get_user_usage()
+        # Only show block if file has more than just the header
+        if self._has_entries(user_content):
+            pct = user_usage["chars_used"] * 100 // user_usage["chars_limit"]
+            lines.extend([
+                "",
+                f"USER MEMORY [{pct}% — {user_usage['chars_used']}/{user_usage['chars_limit']} chars]",
+                user_content.strip(),
+            ])
+        project_content = self.load_project_memory()
+        project_usage = self.get_project_usage()
+        if self._has_entries(project_content):
+            pct = project_usage["chars_used"] * 100 // project_usage["chars_limit"]
+            lines.extend([
+                "",
+                f"PROJECT MEMORY [{pct}% — {project_usage['chars_used']}/{project_usage['chars_limit']} chars]",
+                project_content.strip(),
+            ])
+        return "\n".join(lines)
+    # ---- Private helpers ----
+    @staticmethod
+    def _has_entries(content: str) -> bool:
+        """Check if memory file has entries beyond just the header.
+        A file with only "# User Memory\\n\\n" is considered empty.
+        """
+        stripped = content.strip()
+        # Remove the H1 header line and blank lines
+        for line in stripped.split("\n"):
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            # Found a non-header, non-blank line — has entries
+            return True
+        return False
+    @staticmethod
+    def _ensure_dir_and_file(path: Path, default_content: str) -> None:
+        """Create parent directory and file with default content if missing."""
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            if not path.exists():
+                path.write_text(default_content, encoding="utf-8")
+                logger.debug("Created memory file: %s", path)
+        except Exception as e:
+            logger.warning("Failed to create memory file %s: %s", path, e)
+    @staticmethod
+    def _read_file(path: Path) -> str:
+        """Read file content, return empty string on any error."""
+        try:
+            if path.exists():
+                return path.read_text(encoding="utf-8")
+        except Exception as e:
+            logger.warning("Failed to read memory file %s: %s", path, e)
+        return ""
+    def _ensure_gitignore(self) -> None:
+        """Add .bone/ to .gitignore if not already present."""
+        gitignore = self.repo_root / ".gitignore"
+        if not self.repo_root.is_dir() or not (self.repo_root / ".git").is_dir():
+            return  # Not a git repo
+        try:
+            if not gitignore.exists():
+                gitignore.write_text(".bone/\n", encoding="utf-8")
+                return
+            content = gitignore.read_text(encoding="utf-8")
+            if ".bone" not in content:
+                with open(gitignore, "a", encoding="utf-8") as f:
+                    f.write("\n.bone/\n")
+        except Exception as e:
+            logger.warning("Failed to update .gitignore: %s", e)

package/src/core/retry.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Retry logic for LLM connection and timeout errors."""
+import time
+from exceptions import LLMResponseError
+# Timeout retry constants
+RETRY_MAX_ATTEMPTS = 3
+RETRY_DELAYS = (2, 4)  # exponential backoff per attempt
+RETRYABLE_STATUS_CODES = {429, 502, 503, 504}
+RETRYABLE_ERROR_KEYWORDS = (
+    "timeout", "timed out", "connectionerror", "connection refused",
+    "connection reset", "connection aborted", "name or service not known",
+    "network unreachable", "no route to host", "eof occurred",
+)
+NON_RETRYABLE_STATUS_CODES = {400, 401, 403, 405, 422}
+def is_retryable_error(error):
+    """Check if an LLMConnectionError is retryable.
+    Retryable conditions:
+    - Timeout or connection-level errors (network unreachable, DNS failure, etc.)
+    - HTTP 429 (rate limited), 502, 503, 504 (server errors)
+    Non-retryable conditions:
+    - HTTP 400, 401, 403, 405, 422 (client/auth errors)
+    - LLMResponseError (malformed response data)
+    Args:
+        error: Exception instance (typically LLMConnectionError)
+    Returns:
+        bool: True if the error is retryable
+    """
+    # Never retry response parsing errors
+    if isinstance(error, LLMResponseError):
+        return False
+    # Check HTTP status code first (most reliable signal)
+    details = getattr(error, 'details', {}) or {}
+    status_code = details.get("status_code")
+    if status_code is not None:
+        if status_code in NON_RETRYABLE_STATUS_CODES:
+            return False
+        if status_code in RETRYABLE_STATUS_CODES:
+            return True
+    # For network-level errors, check the original error message
+    original_error = details.get("original_error", "")
+    original_lower = original_error.lower()
+    return any(keyword in original_lower for keyword in RETRYABLE_ERROR_KEYWORDS)
+def wait_with_cancel_message(console, delay_seconds):
+    """Wait briefly before retrying, showing a dim status line.
+    Args:
+        console: Rich console for output
+        delay_seconds: Seconds to wait
+    Returns:
+        bool: True if wait completed, False if interrupted by KeyboardInterrupt
+    """
+    console.print(f"[dim]Connection issue, retrying in {delay_seconds}s... (Ctrl+C to cancel)[/dim]")
+    try:
+        time.sleep(delay_seconds)
+    except KeyboardInterrupt:
+        console.print("[dim]Retry cancelled.[/dim]")
+        return False
+    return True

package/src/core/sub_agent.py ADDED Viewed

@@ -0,0 +1,326 @@
+"""Sub-agent for delegated tasks.
+Uses existing AgenticOrchestrator with isolated message context
+and read-only tools to execute generic delegated tasks.
+"""
+from pathlib import Path
+from core.chat_manager import ChatManager
+from llm.prompts import build_sub_agent_prompt
+from utils.settings import sub_agent_settings
+class HardLimitExceeded(Exception):
+    """Raised when the sub-agent hits its hard token limit."""
+    pass
+def _format_messages_dump(messages) -> str:
+    """Format sub-agent message history as a markdown dump.
+    Args:
+        messages: List of message dicts from the sub-agent ChatManager.
+    Returns:
+        Markdown string with the full conversation context.
+    """
+    lines = [
+        "## Sub-Agent Context Dump (Hard Limit Reached)",
+        "",
+        "The sub-agent exceeded its hard token limit. Below is the full, unabridged context of its investigation. No summary was produced.",
+        "",
+        "---",
+        "",
+    ]
+    for i, msg in enumerate(messages):
+        role = msg.get("role", "unknown")
+        content = msg.get("content", "")
+        tool_calls = msg.get("tool_calls")
+        tool_call_id = msg.get("tool_call_id")
+        if tool_call_id:
+            lines.append(f"### Message {i} — tool result ({tool_call_id})")
+        elif tool_calls:
+            lines.append(f"### Message {i} — assistant tool calls")
+            for tc in tool_calls:
+                fn = tc.get("function", {})
+                lines.append(f"- `{fn.get('name', '?')}` — `{fn.get('arguments', '')}`")
+        else:
+            lines.append(f"### Message {i} — {role}")
+        if content:
+            # Truncate large content to avoid blowing out the main agent's context
+            max_chars = 4000
+            if len(content) > max_chars:
+                content = content[:max_chars] + f"\n\n... (truncated, {len(content) - max_chars:,} chars omitted)"
+            lines.append(content)
+        lines.append("")
+    return "\n".join(lines)
+def _configure_compaction():
+    """Create a ChatManager with compaction settings from config.
+    Returns:
+        ChatManager: A new ChatManager instance with compaction configured
+    """
+    if sub_agent_settings.enable_compaction:
+        return ChatManager(compact_trigger_tokens=sub_agent_settings.compact_trigger_tokens)
+    else:
+        return ChatManager(compact_trigger_tokens=None)
+def _inject_system_prompt(chat_manager, sub_agent_type: str = "research"):
+    """Build sub-agent prompt and inject it.
+    Token usage is reported live by the wrapper in run_sub_agent(),
+    so the system prompt is kept clean.
+    Args:
+        chat_manager: ChatManager instance to configure
+        sub_agent_type: Type of sub-agent ('research' or 'review').
+    """
+    base_prompt = build_sub_agent_prompt(
+        sub_agent_type=sub_agent_type,
+        soft_limit_tokens=sub_agent_settings.soft_limit_tokens,
+        hard_limit_tokens=sub_agent_settings.hard_limit_tokens,
+    )
+    chat_manager.messages = [{"role": "system", "content": base_prompt}]
+def _load_codebase_map(chat_manager):
+    """Load agents.md codebase map into sub-agent context if available.
+    Args:
+        chat_manager: ChatManager instance to add context to
+    """
+    agents_path = Path.cwd() / "agents.md"
+    if agents_path.exists():
+        map_content = agents_path.read_text(encoding="utf-8").strip()
+        user_msg = (
+            "Here is the codebase map for this project. "
+            "This provides an overview of the repository structure and file purposes. "
+            "Use this as a reference when exploring the codebase.\n\n"
+            f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
+        )
+        assistant_msg = (
+            "I've received the codebase map. I'll use this as a reference when "
+            "exploring the repository, but I'll always verify current state by "
+            "reading files and searching the codebase before making changes."
+        )
+        chat_manager.messages.append({"role": "user", "content": user_msg})
+        chat_manager.messages.append({"role": "assistant", "content": assistant_msg})
+def _configure_isolation(chat_manager):
+    """Apply isolation settings for sub-agent context.
+    Disables conversation logging.
+    Args:
+        chat_manager: ChatManager instance to configure
+    """
+    chat_manager.markdown_logger = None
+def _create_chat_manager(sub_agent_type: str = "research"):
+    """Create a fresh ChatManager instance for sub-agent use.
+    Orchestrates compaction, prompt injection, codebase map loading,
+    and isolation configuration.
+    Args:
+        sub_agent_type: Type of sub-agent ('research' or 'review').
+    Returns:
+        ChatManager: A new ChatManager instance with pre-configured system prompt
+    """
+    chat_manager = _configure_compaction()
+    chat_manager._compaction_disabled = True
+    _inject_system_prompt(chat_manager, sub_agent_type=sub_agent_type)
+    _load_codebase_map(chat_manager)
+    _configure_isolation(chat_manager)
+    return chat_manager
+def run_sub_agent(
+    task_query: str,
+    repo_root: Path,
+    rg_exe_path: str,
+    console=None,
+    panel_updater=None,
+    sub_agent_type: str = "research",
+    initial_context: str = None,
+) -> dict:
+    """Run sub-agent using existing AgenticOrchestrator for delegated tasks.
+    Args:
+        task_query: Generic task query to execute (e.g., "Read file config.json")
+        repo_root: Repository root path
+        rg_exe_path: Path to rg executable
+        console: Optional Rich console for output
+        panel_updater: Optional SubAgentPanel for live panel updates
+        sub_agent_type: Type of sub-agent ('research' or 'review').
+        initial_context: Optional string injected as context before the task query
+            (e.g., a git diff for review mode).
+    Returns:
+        Dict with:
+            - 'result': Formatted markdown string (goes into chat history)
+            - 'usage': Usage data for billing
+            - 'error': Error message if failed (None if success)
+    """
+    # Validate panel_updater type if provided
+    if panel_updater is not None and not hasattr(panel_updater, 'append'):
+        panel_updater = None
+    # If no panel_updater provided, create a simple no-op one
+    if panel_updater is None:
+        from tools.sub_agent import SimplePanelUpdater
+        panel_updater = SimplePanelUpdater(console)
+    # Create fresh ChatManager for sub-agent
+    temp_chat_manager = _create_chat_manager(sub_agent_type=sub_agent_type)
+    # Inject initial context as a user/assistant exchange if provided
+    if initial_context:
+        temp_chat_manager.messages.append(
+            {"role": "user", "content": initial_context}
+        )
+        temp_chat_manager.messages.append(
+            {"role": "assistant", "content": "I've received the context. I'll analyze it and use the available tools to gather additional information as needed."}
+        )
+    # Import here to avoid circular import with core.agentic
+    from core.agentic import AgenticOrchestrator
+    # Create orchestrator (reuses existing implementation)
+    orchestrator = AgenticOrchestrator(
+        chat_manager=temp_chat_manager,
+        repo_root=repo_root,
+        rg_exe_path=rg_exe_path,
+        console=console,
+        debug_mode=False,
+        suppress_result_display=True,
+        is_sub_agent=True,
+        panel_updater=panel_updater,
+        force_parallel_execution=True  # Enable parallel execution for read-only tools
+    )
+    # Wrap orchestrator._get_llm_response to check hard token limit and
+    # wrap client.chat_completion once (outside the loop) to inject live
+    # token feedback as a system message — avoids per-call monkey-patching
+    # and eliminates any re-entrancy risk.
+    original_get_llm_response = orchestrator._get_llm_response
+    original_chat_completion = temp_chat_manager.client.chat_completion
+    _soft_limit_warned = False
+    def _chat_completion_with_token_hint(messages, **kwargs):
+        """Prepend a system-level token budget hint (and soft-limit warning once) to every LLM call."""
+        nonlocal _soft_limit_warned
+        tt = temp_chat_manager.token_tracker
+        hint = f"[Token budget: {tt.current_context_tokens:,} curr / {tt.conv_total_tokens:,} total]"
+        if not _soft_limit_warned and tt.current_context_tokens >= sub_agent_settings.soft_limit_tokens:
+            _soft_limit_warned = True
+            hint = (
+                f"WARNING: You have exceeded the soft token limit "
+                f"({tt.current_context_tokens:,} / {sub_agent_settings.soft_limit_tokens:,}). "
+                "STOP exploring and return your findings immediately. Do NOT call any more tools. "
+                + hint
+            )
+        token_msg = {"role": "system", "content": hint}
+        return original_chat_completion([token_msg, *messages], **kwargs)
+    def _get_llm_response_with_hard_limit(allowed_tools=None):
+        """Wrapper to check hard token limit and update panel with live token counts."""
+        tt = temp_chat_manager.token_tracker
+        # Check hard token limit before making LLM call
+        # Use current_context_tokens (prompt size) not total_tokens (cumulative billing)
+        # to catch prompt-length-over-limit errors before they hit the API.
+        if tt.current_context_tokens >= sub_agent_settings.hard_limit_tokens:
+            raise HardLimitExceeded(
+                f"Sub-agent hard token limit exceeded: "
+                f"{tt.current_context_tokens:,} / {sub_agent_settings.hard_limit_tokens:,} tokens."
+            )
+        # Update panel with live token counts
+        # Order: conversation length (current context) first, total tokens billed second
+        conv_length = tt.current_context_tokens
+        total_billed = tt.conv_total_tokens
+        if hasattr(panel_updater, 'token_info'):
+            panel_updater.token_info = f"{conv_length:,} curr | {total_billed:,} total"
+            panel_updater.append("")  # Refresh panel title
+        return original_get_llm_response(allowed_tools=allowed_tools)
+    # Apply both patches once, before the orchestrator loop starts
+    orchestrator._get_llm_response = _get_llm_response_with_hard_limit
+    temp_chat_manager.client.chat_completion = _chat_completion_with_token_hint
+    hard_limit_exceeded = False
+    try:
+        # Run sub-agent task
+        orchestrator.run(
+            task_query,
+            thinking_indicator=None,
+            allowed_tools=sub_agent_settings.allowed_tools
+        )
+    except HardLimitExceeded:
+        hard_limit_exceeded = True
+    except Exception as e:
+        import traceback
+        error_details = f"{e}\n\nTraceback:\n{traceback.format_exc()}"
+        return {
+            "result": "",
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0
+            },
+            "model": "",
+            "error": error_details
+        }
+    finally:
+        # Restore originals
+        temp_chat_manager.client.chat_completion = original_chat_completion
+    # Get final token usage (no need for delta calculation on fresh instance)
+    delta_prompt = temp_chat_manager.token_tracker.total_prompt_tokens
+    delta_completion = temp_chat_manager.token_tracker.total_completion_tokens
+    delta_total = temp_chat_manager.token_tracker.total_tokens
+    tt = temp_chat_manager.token_tracker
+    delta_cost = tt.total_actual_cost + tt.total_estimated_cost
+    if hard_limit_exceeded and sub_agent_settings.dump_context_on_hard_limit:
+        result = _format_messages_dump(temp_chat_manager.messages)
+    else:
+        # Extract final response (last assistant message with content)
+        final_content = ""
+        for msg in reversed(temp_chat_manager.messages):
+            if msg.get("role") == "assistant" and msg.get("content"):
+                final_content = msg["content"].strip()
+                break
+        result = final_content
+    usage = {
+        "prompt_tokens": delta_prompt,
+        "completion_tokens": delta_completion,
+        "total_tokens": delta_total,
+        "context_tokens": tt.current_context_tokens,
+    }
+    if delta_cost > 0:
+        usage["cost"] = delta_cost
+    return {
+        "result": result,
+        "usage": usage,
+        "model": temp_chat_manager.client.model,
+        "error": None,
+        "hard_limit_exceeded": hard_limit_exceeded,
+    }