npm - bone-agent - Versions diffs - 1.3.3 → 2.0.0 - Mend

bone-agent 1.3.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

package/bin/bone.js +39 -0
package/package.json +25 -39
package/LICENSE +0 -21
package/README.md +0 -184
package/bin/npm-wrapper.js +0 -235
package/bin/rg +0 -0
package/bin/rg.exe +0 -0
package/config.yaml.example +0 -141
package/prompts/main/ask_questions.md +0 -31
package/prompts/main/batch_independent_calls.md +0 -5
package/prompts/main/casual_interactions.md +0 -11
package/prompts/main/code_references.md +0 -8
package/prompts/main/communication_style.md +0 -12
package/prompts/main/context_reliability.md +0 -12
package/prompts/main/conversational_tool_calling.md +0 -15
package/prompts/main/dream.md +0 -36
package/prompts/main/editing_pattern.md +0 -13
package/prompts/main/error_handling.md +0 -6
package/prompts/main/exploration_pattern.md +0 -21
package/prompts/main/intro.md +0 -1
package/prompts/main/obsidian.md +0 -16
package/prompts/main/obsidian_project.md +0 -79
package/prompts/main/professional_objectivity.md +0 -3
package/prompts/main/targeted_searching.md +0 -10
package/prompts/main/task_lists_pattern.md +0 -8
package/prompts/main/temp_folder.md +0 -9
package/prompts/main/think_before_acting.md +0 -10
package/prompts/main/tone_and_style.md +0 -4
package/prompts/main/tool_preferences.md +0 -24
package/prompts/main/trust_subagent_context.md +0 -21
package/prompts/main/when_to_use_sub_agent.md +0 -7
package/prompts/micro/ask_questions.md +0 -1
package/prompts/micro/batch_independent_calls.md +0 -1
package/prompts/micro/casual_interactions.md +0 -1
package/prompts/micro/code_references.md +0 -1
package/prompts/micro/communication_style.md +0 -1
package/prompts/micro/context_reliability.md +0 -1
package/prompts/micro/conversational_tool_calling.md +0 -1
package/prompts/micro/editing_pattern.md +0 -1
package/prompts/micro/error_handling.md +0 -1
package/prompts/micro/exploration_pattern.md +0 -1
package/prompts/micro/intro.md +0 -1
package/prompts/micro/obsidian.md +0 -4
package/prompts/micro/obsidian_project.md +0 -5
package/prompts/micro/professional_objectivity.md +0 -1
package/prompts/micro/targeted_searching.md +0 -1
package/prompts/micro/task_lists_pattern.md +0 -1
package/prompts/micro/temp_folder.md +0 -1
package/prompts/micro/think_before_acting.md +0 -5
package/prompts/micro/tone_and_style.md +0 -1
package/prompts/micro/tool_preferences.md +0 -1
package/prompts/micro/trust_subagent_context.md +0 -1
package/prompts/micro/when_to_use_sub_agent.md +0 -1
package/requirements.txt +0 -9
package/src/__init__.py +0 -11
package/src/core/__init__.py +0 -1
package/src/core/agentic.py +0 -985
package/src/core/chat_manager.py +0 -1564
package/src/core/config_manager.py +0 -253
package/src/core/cron.py +0 -582
package/src/core/cron_allowlist.py +0 -118
package/src/core/memory.py +0 -145
package/src/core/retry.py +0 -71
package/src/core/sub_agent.py +0 -326
package/src/core/tool_approval.py +0 -220
package/src/core/tool_feedback.py +0 -778
package/src/exceptions.py +0 -79
package/src/llm/__init__.py +0 -1
package/src/llm/client.py +0 -171
package/src/llm/config.py +0 -492
package/src/llm/prompts.py +0 -489
package/src/llm/providers.py +0 -436
package/src/llm/streaming.py +0 -163
package/src/llm/token_tracker.py +0 -384
package/src/tools/__init__.py +0 -212
package/src/tools/constants.py +0 -59
package/src/tools/create_file.py +0 -136
package/src/tools/directory.py +0 -389
package/src/tools/edit.py +0 -545
package/src/tools/file_reader.py +0 -322
package/src/tools/helpers/__init__.py +0 -105
package/src/tools/helpers/base.py +0 -550
package/src/tools/helpers/converters.py +0 -44
package/src/tools/helpers/file_helpers.py +0 -189
package/src/tools/helpers/formatters.py +0 -411
package/src/tools/helpers/loader.py +0 -231
package/src/tools/helpers/parallel_executor.py +0 -231
package/src/tools/helpers/path_resolver.py +0 -232
package/src/tools/helpers/plugin_manifest.py +0 -156
package/src/tools/obsidian.py +0 -96
package/src/tools/review_sub_agent.py +0 -189
package/src/tools/rg_search.py +0 -460
package/src/tools/search_plugins.py +0 -109
package/src/tools/select_option.py +0 -600
package/src/tools/shell.py +0 -302
package/src/tools/sub_agent.py +0 -139
package/src/tools/task_list.py +0 -269
package/src/tools/web_search.py +0 -61
package/src/ui/__init__.py +0 -1
package/src/ui/banner.py +0 -87
package/src/ui/commands.py +0 -2809
package/src/ui/displays.py +0 -214
package/src/ui/loader.py +0 -284
package/src/ui/main.py +0 -647
package/src/ui/prompt_utils.py +0 -113
package/src/ui/setting_selector.py +0 -590
package/src/ui/setup_wizard.py +0 -294
package/src/ui/sub_agent_panel.py +0 -234
package/src/ui/tool_confirmation.py +0 -215
package/src/utils/__init__.py +0 -1
package/src/utils/citation_parser.py +0 -199
package/src/utils/editor.py +0 -158
package/src/utils/gitignore_filter.py +0 -149
package/src/utils/logger.py +0 -254
package/src/utils/paths.py +0 -30
package/src/utils/result_parsers.py +0 -108
package/src/utils/safe_commands.py +0 -243
package/src/utils/settings.py +0 -191
package/src/utils/user_message_logger.py +0 -120
package/src/utils/validation.py +0 -191
package/src/utils/web_search.py +0 -173

package/src/core/memory.py DELETED Viewed

@@ -1,145 +0,0 @@
-"""Multi-layer memory system for the agent.
-Two-layer persistent memory:
-- User memory (global): ~/.bone/user_memory.md
-- Project memory (per-repo): {repo_root}/.bone/agents.md
-Memory files are read-only during conversations — loaded into the system prompt
-for context but never written inline. All writes happen through the dream cron job,
-which consolidates user messages into focused memories nightly.
-"""
-import logging
-from pathlib import Path
-from typing import Optional
-logger = logging.getLogger(__name__)
-# Capacity constants (prompt-enforced, no code enforcement)
-CHAR_LIMIT = 1500  # suggested chars per layer (~500 tokens)
-class MemoryManager:
-    """Manages two-layer memory: user-level (global) and project-level (per-repo).
-    Uses a lazy singleton pattern — first call with repo_root bootstraps the
-    instance, subsequent calls reuse it. Call reset() when switching repos.
-    """
-    _instance: Optional["MemoryManager"] = None
-    def __init__(self, repo_root: Path):
-        self.repo_root = repo_root
-        self.user_memory_path = Path.home() / ".bone" / "user_memory.md"
-        self.project_memory_path = repo_root / ".bone" / "agents.md"
-    @classmethod
-    def get_instance(cls, repo_root: Path = None) -> Optional["MemoryManager"]:
-        """Lazy singleton. First call sets repo_root, subsequent calls reuse instance.
-        Args:
-            repo_root: Path to repository root. Required on first call,
-                       ignored on subsequent calls (until reset()).
-        Returns:
-            MemoryManager instance, or None if no repo_root provided and
-            no instance has been initialized yet.
-        """
-        if cls._instance is not None:
-            return cls._instance
-        if repo_root is None:
-            return None
-        cls._instance = cls(repo_root)
-        return cls._instance
-    @classmethod
-    def reset(cls) -> None:
-        """Clear singleton. Called when switching repos via /cd."""
-        cls._instance = None
-    def ensure_exists(self) -> None:
-        """Create user-level directory and memory file only.
-        Project-level .bone/agents.md is created lazily on first write,
-        not at startup. This prevents creating .bone/ directories in
-        non-project locations (e.g. when running from ~/.bone/ itself).
-        """
-        self._ensure_dir_and_file(
-            self.user_memory_path,
-            "# User Memory\n\n",
-        )
-        # Add .bone/ to .gitignore if repo_root has a git repo
-        self._ensure_gitignore()
-    def load_user_memory(self) -> str:
-        """Read and return user memory file content. Returns empty string if missing."""
-        return self._read_file(self.user_memory_path)
-    def load_project_memory(self) -> str:
-        """Read and return project memory file content. Returns empty string if missing."""
-        return self._read_file(self.project_memory_path)
-    def get_user_usage(self) -> dict:
-        """Return {chars_used, chars_limit} for user memory."""
-        content = self.load_user_memory()
-        return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
-    def get_project_usage(self) -> dict:
-        """Return {chars_used, chars_limit} for project memory."""
-        content = self.load_project_memory()
-        return {"chars_used": len(content), "chars_limit": CHAR_LIMIT}
-    # ---- Private helpers ----
-    @staticmethod
-    def _has_entries(content: str) -> bool:
-        """Check if memory file has entries beyond just the header.
-        A file with only "# User Memory\\n\\n" is considered empty.
-        """
-        stripped = content.strip()
-        # Remove the H1 header line and blank lines
-        for line in stripped.split("\n"):
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
-            # Found a non-header, non-blank line — has entries
-            return True
-        return False
-    @staticmethod
-    def _ensure_dir_and_file(path: Path, default_content: str) -> None:
-        """Create parent directory and file with default content if missing."""
-        try:
-            path.parent.mkdir(parents=True, exist_ok=True)
-            if not path.exists():
-                path.write_text(default_content, encoding="utf-8")
-                logger.debug("Created memory file: %s", path)
-        except Exception as e:
-            logger.warning("Failed to create memory file %s: %s", path, e)
-    @staticmethod
-    def _read_file(path: Path) -> str:
-        """Read file content, return empty string on any error."""
-        try:
-            if path.exists():
-                return path.read_text(encoding="utf-8")
-        except Exception as e:
-            logger.warning("Failed to read memory file %s: %s", path, e)
-        return ""
-    def _ensure_gitignore(self) -> None:
-        """Add .bone/ to .gitignore if not already present."""
-        gitignore = self.repo_root / ".gitignore"
-        if not self.repo_root.is_dir() or not (self.repo_root / ".git").is_dir():
-            return  # Not a git repo
-        try:
-            if not gitignore.exists():
-                gitignore.write_text(".bone/\n", encoding="utf-8")
-                return
-            content = gitignore.read_text(encoding="utf-8")
-            if ".bone" not in content:
-                with open(gitignore, "a", encoding="utf-8") as f:
-                    f.write("\n.bone/\n")
-        except Exception as e:
-            logger.warning("Failed to update .gitignore: %s", e)

package/src/core/retry.py DELETED Viewed

@@ -1,71 +0,0 @@
-"""Retry logic for LLM connection and timeout errors."""
-import time
-from exceptions import LLMResponseError
-# Timeout retry constants
-RETRY_MAX_ATTEMPTS = 3
-RETRY_DELAYS = (2, 4)  # exponential backoff per attempt
-RETRYABLE_STATUS_CODES = {429, 502, 503, 504}
-RETRYABLE_ERROR_KEYWORDS = (
-    "timeout", "timed out", "connectionerror", "connection refused",
-    "connection reset", "connection aborted", "name or service not known",
-    "network unreachable", "no route to host", "eof occurred",
-)
-NON_RETRYABLE_STATUS_CODES = {400, 401, 403, 405, 422}
-def is_retryable_error(error):
-    """Check if an LLMConnectionError is retryable.
-    Retryable conditions:
-    - Timeout or connection-level errors (network unreachable, DNS failure, etc.)
-    - HTTP 429 (rate limited), 502, 503, 504 (server errors)
-    Non-retryable conditions:
-    - HTTP 400, 401, 403, 405, 422 (client/auth errors)
-    - LLMResponseError (malformed response data)
-    Args:
-        error: Exception instance (typically LLMConnectionError)
-    Returns:
-        bool: True if the error is retryable
-    """
-    # Never retry response parsing errors
-    if isinstance(error, LLMResponseError):
-        return False
-    # Check HTTP status code first (most reliable signal)
-    details = getattr(error, 'details', {}) or {}
-    status_code = details.get("status_code")
-    if status_code is not None:
-        if status_code in NON_RETRYABLE_STATUS_CODES:
-            return False
-        if status_code in RETRYABLE_STATUS_CODES:
-            return True
-    # For network-level errors, check the original error message
-    original_error = details.get("original_error", "")
-    original_lower = original_error.lower()
-    return any(keyword in original_lower for keyword in RETRYABLE_ERROR_KEYWORDS)
-def wait_with_cancel_message(console, delay_seconds):
-    """Wait briefly before retrying, showing a dim status line.
-    Args:
-        console: Rich console for output
-        delay_seconds: Seconds to wait
-    Returns:
-        bool: True if wait completed, False if interrupted by KeyboardInterrupt
-    """
-    console.print(f"[dim]Connection issue, retrying in {delay_seconds}s... (Ctrl+C to cancel)[/dim]")
-    try:
-        time.sleep(delay_seconds)
-    except KeyboardInterrupt:
-        console.print("[dim]Retry cancelled.[/dim]")
-        return False
-    return True

package/src/core/sub_agent.py DELETED Viewed

@@ -1,326 +0,0 @@
-"""Sub-agent for delegated tasks.
-Uses existing AgenticOrchestrator with isolated message context
-and read-only tools to execute generic delegated tasks.
-"""
-from pathlib import Path
-from core.chat_manager import ChatManager
-from llm.prompts import build_sub_agent_prompt
-from utils.settings import sub_agent_settings
-class HardLimitExceeded(Exception):
-    """Raised when the sub-agent hits its hard token limit."""
-    pass
-def _format_messages_dump(messages) -> str:
-    """Format sub-agent message history as a markdown dump.
-    Args:
-        messages: List of message dicts from the sub-agent ChatManager.
-    Returns:
-        Markdown string with the full conversation context.
-    """
-    lines = [
-        "## Sub-Agent Context Dump (Hard Limit Reached)",
-        "",
-        "The sub-agent exceeded its hard token limit. Below is the full, unabridged context of its investigation. No summary was produced.",
-        "",
-        "---",
-        "",
-    ]
-    for i, msg in enumerate(messages):
-        role = msg.get("role", "unknown")
-        content = msg.get("content", "")
-        tool_calls = msg.get("tool_calls")
-        tool_call_id = msg.get("tool_call_id")
-        if tool_call_id:
-            lines.append(f"### Message {i} — tool result ({tool_call_id})")
-        elif tool_calls:
-            lines.append(f"### Message {i} — assistant tool calls")
-            for tc in tool_calls:
-                fn = tc.get("function", {})
-                lines.append(f"- `{fn.get('name', '?')}` — `{fn.get('arguments', '')}`")
-        else:
-            lines.append(f"### Message {i} — {role}")
-        if content:
-            # Truncate large content to avoid blowing out the main agent's context
-            max_chars = 4000
-            if len(content) > max_chars:
-                content = content[:max_chars] + f"\n\n... (truncated, {len(content) - max_chars:,} chars omitted)"
-            lines.append(content)
-        lines.append("")
-    return "\n".join(lines)
-def _configure_compaction():
-    """Create a ChatManager with compaction settings from config.
-    Returns:
-        ChatManager: A new ChatManager instance with compaction configured
-    """
-    if sub_agent_settings.enable_compaction:
-        return ChatManager(compact_trigger_tokens=sub_agent_settings.compact_trigger_tokens)
-    else:
-        return ChatManager(compact_trigger_tokens=None)
-def _inject_system_prompt(chat_manager, sub_agent_type: str = "research"):
-    """Build sub-agent prompt and inject it.
-    Token usage is reported live by the wrapper in run_sub_agent(),
-    so the system prompt is kept clean.
-    Args:
-        chat_manager: ChatManager instance to configure
-        sub_agent_type: Type of sub-agent ('research' or 'review').
-    """
-    base_prompt = build_sub_agent_prompt(
-        sub_agent_type=sub_agent_type,
-        soft_limit_tokens=sub_agent_settings.soft_limit_tokens,
-        hard_limit_tokens=sub_agent_settings.hard_limit_tokens,
-    )
-    chat_manager.messages = [{"role": "system", "content": base_prompt}]
-def _load_codebase_map(chat_manager):
-    """Load agents.md codebase map into sub-agent context if available.
-    Args:
-        chat_manager: ChatManager instance to add context to
-    """
-    agents_path = Path.cwd() / "agents.md"
-    if agents_path.exists():
-        map_content = agents_path.read_text(encoding="utf-8").strip()
-        user_msg = (
-            "Here is the codebase map for this project. "
-            "This provides an overview of the repository structure and file purposes. "
-            "Use this as a reference when exploring the codebase.\n\n"
-            f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
-        )
-        assistant_msg = (
-            "I've received the codebase map. I'll use this as a reference when "
-            "exploring the repository, but I'll always verify current state by "
-            "reading files and searching the codebase before making changes."
-        )
-        chat_manager.messages.append({"role": "user", "content": user_msg})
-        chat_manager.messages.append({"role": "assistant", "content": assistant_msg})
-def _configure_isolation(chat_manager):
-    """Apply isolation settings for sub-agent context.
-    Disables conversation logging.
-    Args:
-        chat_manager: ChatManager instance to configure
-    """
-    chat_manager.markdown_logger = None
-def _create_chat_manager(sub_agent_type: str = "research"):
-    """Create a fresh ChatManager instance for sub-agent use.
-    Orchestrates compaction, prompt injection, codebase map loading,
-    and isolation configuration.
-    Args:
-        sub_agent_type: Type of sub-agent ('research' or 'review').
-    Returns:
-        ChatManager: A new ChatManager instance with pre-configured system prompt
-    """
-    chat_manager = _configure_compaction()
-    chat_manager._compaction_disabled = True
-    _inject_system_prompt(chat_manager, sub_agent_type=sub_agent_type)
-    _load_codebase_map(chat_manager)
-    _configure_isolation(chat_manager)
-    return chat_manager
-def run_sub_agent(
-    task_query: str,
-    repo_root: Path,
-    rg_exe_path: str,
-    console=None,
-    panel_updater=None,
-    sub_agent_type: str = "research",
-    initial_context: str = None,
-) -> dict:
-    """Run sub-agent using existing AgenticOrchestrator for delegated tasks.
-    Args:
-        task_query: Generic task query to execute (e.g., "Read file config.json")
-        repo_root: Repository root path
-        rg_exe_path: Path to rg executable
-        console: Optional Rich console for output
-        panel_updater: Optional SubAgentPanel for live panel updates
-        sub_agent_type: Type of sub-agent ('research' or 'review').
-        initial_context: Optional string injected as context before the task query
-            (e.g., a git diff for review mode).
-    Returns:
-        Dict with:
-            - 'result': Formatted markdown string (goes into chat history)
-            - 'usage': Usage data for billing
-            - 'error': Error message if failed (None if success)
-    """
-    # Validate panel_updater type if provided
-    if panel_updater is not None and not hasattr(panel_updater, 'append'):
-        panel_updater = None
-    # If no panel_updater provided, create a simple no-op one
-    if panel_updater is None:
-        from tools.sub_agent import SimplePanelUpdater
-        panel_updater = SimplePanelUpdater(console)
-    # Create fresh ChatManager for sub-agent
-    temp_chat_manager = _create_chat_manager(sub_agent_type=sub_agent_type)
-    # Inject initial context as a user/assistant exchange if provided
-    if initial_context:
-        temp_chat_manager.messages.append(
-            {"role": "user", "content": initial_context}
-        )
-        temp_chat_manager.messages.append(
-            {"role": "assistant", "content": "I've received the context. I'll analyze it and use the available tools to gather additional information as needed."}
-        )
-    # Import here to avoid circular import with core.agentic
-    from core.agentic import AgenticOrchestrator
-    # Create orchestrator (reuses existing implementation)
-    orchestrator = AgenticOrchestrator(
-        chat_manager=temp_chat_manager,
-        repo_root=repo_root,
-        rg_exe_path=rg_exe_path,
-        console=console,
-        debug_mode=False,
-        suppress_result_display=True,
-        is_sub_agent=True,
-        panel_updater=panel_updater,
-        force_parallel_execution=True  # Enable parallel execution for read-only tools
-    )
-    # Wrap orchestrator._get_llm_response to check hard token limit and
-    # wrap client.chat_completion once (outside the loop) to inject live
-    # token feedback as a system message — avoids per-call monkey-patching
-    # and eliminates any re-entrancy risk.
-    original_get_llm_response = orchestrator._get_llm_response
-    original_chat_completion = temp_chat_manager.client.chat_completion
-    _soft_limit_warned = False
-    def _chat_completion_with_token_hint(messages, **kwargs):
-        """Prepend a system-level token budget hint (and soft-limit warning once) to every LLM call."""
-        nonlocal _soft_limit_warned
-        tt = temp_chat_manager.token_tracker
-        hint = f"[Token budget: {tt.current_context_tokens:,} curr / {tt.conv_total_tokens:,} total]"
-        if not _soft_limit_warned and tt.current_context_tokens >= sub_agent_settings.soft_limit_tokens:
-            _soft_limit_warned = True
-            hint = (
-                f"WARNING: You have exceeded the soft token limit "
-                f"({tt.current_context_tokens:,} / {sub_agent_settings.soft_limit_tokens:,}). "
-                "STOP exploring and return your findings immediately. Do NOT call any more tools. "
-                + hint
-            )
-        token_msg = {"role": "system", "content": hint}
-        return original_chat_completion([token_msg, *messages], **kwargs)
-    def _get_llm_response_with_hard_limit(allowed_tools=None):
-        """Wrapper to check hard token limit and update panel with live token counts."""
-        tt = temp_chat_manager.token_tracker
-        # Check hard token limit before making LLM call
-        # Use current_context_tokens (prompt size) not total_tokens (cumulative billing)
-        # to catch prompt-length-over-limit errors before they hit the API.
-        if tt.current_context_tokens >= sub_agent_settings.hard_limit_tokens:
-            raise HardLimitExceeded(
-                f"Sub-agent hard token limit exceeded: "
-                f"{tt.current_context_tokens:,} / {sub_agent_settings.hard_limit_tokens:,} tokens."
-            )
-        # Update panel with live token counts
-        # Order: conversation length (current context) first, total tokens billed second
-        conv_length = tt.current_context_tokens
-        total_billed = tt.conv_total_tokens
-        if hasattr(panel_updater, 'token_info'):
-            panel_updater.token_info = f"{conv_length:,} curr | {total_billed:,} total"
-            panel_updater.append("")  # Refresh panel title
-        return original_get_llm_response(allowed_tools=allowed_tools)
-    # Apply both patches once, before the orchestrator loop starts
-    orchestrator._get_llm_response = _get_llm_response_with_hard_limit
-    temp_chat_manager.client.chat_completion = _chat_completion_with_token_hint
-    hard_limit_exceeded = False
-    try:
-        # Run sub-agent task
-        orchestrator.run(
-            task_query,
-            thinking_indicator=None,
-            allowed_tools=sub_agent_settings.allowed_tools
-        )
-    except HardLimitExceeded:
-        hard_limit_exceeded = True
-    except Exception as e:
-        import traceback
-        error_details = f"{e}\n\nTraceback:\n{traceback.format_exc()}"
-        return {
-            "result": "",
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "total_tokens": 0
-            },
-            "model": "",
-            "error": error_details
-        }
-    finally:
-        # Restore originals
-        temp_chat_manager.client.chat_completion = original_chat_completion
-    # Get final token usage (no need for delta calculation on fresh instance)
-    delta_prompt = temp_chat_manager.token_tracker.total_prompt_tokens
-    delta_completion = temp_chat_manager.token_tracker.total_completion_tokens
-    delta_total = temp_chat_manager.token_tracker.total_tokens
-    tt = temp_chat_manager.token_tracker
-    delta_cost = tt.total_actual_cost + tt.total_estimated_cost
-    if hard_limit_exceeded and sub_agent_settings.dump_context_on_hard_limit:
-        result = _format_messages_dump(temp_chat_manager.messages)
-    else:
-        # Extract final response (last assistant message with content)
-        final_content = ""
-        for msg in reversed(temp_chat_manager.messages):
-            if msg.get("role") == "assistant" and msg.get("content"):
-                final_content = msg["content"].strip()
-                break
-        result = final_content
-    usage = {
-        "prompt_tokens": delta_prompt,
-        "completion_tokens": delta_completion,
-        "total_tokens": delta_total,
-        "context_tokens": tt.current_context_tokens,
-    }
-    if delta_cost > 0:
-        usage["cost"] = delta_cost
-    return {
-        "result": result,
-        "usage": usage,
-        "model": temp_chat_manager.client.model,
-        "error": None,
-        "hard_limit_exceeded": hard_limit_exceeded,
-    }