PyPI - aru-code - Versions diffs - 0.15.0__tar.gz → 0.16.0__tar.gz - Mend

aru-code 0.15.0tar.gz → 0.16.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{aru_code-0.15.0/aru_code.egg-info → aru_code-0.16.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.15.0
+Version: 0.16.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT
@@ -56,7 +56,7 @@ An intelligent coding assistant for the terminal, powered by LLMs and [Agno](htt
 - **Multi-Agent Architecture** — Specialized agents for planning, execution, and conversation
 - **Interactive CLI** — Streaming responses, multi-line paste, session management
 - **Image Support** — Attach images via `@` mentions for multimodal analysis (Claude, GPT-4o, Gemini)
-- **16 Integrated Tools** — File operations, code search, shell, web search, task delegation
+- **11 Integrated Tools** — File operations, code search, shell, web search, task delegation
 - **Task Planning** — Break down complex tasks into steps with automatic execution
 - **Multi-Provider** — Anthropic, OpenAI, Ollama, Groq, OpenRouter, DeepSeek, and others via custom configuration
 - **Custom Commands, Skills, and Agents** — Extend aru via the `.agents/` directory
@@ -479,15 +479,14 @@ Aru can load tools from MCP servers. Configure in `.aru/mcp_config.json`:
 ### File Operations
 - `read_file` — Reads files with line range support and binary detection
-- `read_file_smart` — Smart file reading focused on relevant snippets for the query
-- `write_file` — Writes files
-- `edit_file` — Find-replace edits
+- `read_file_smart` — Answers specific questions about a file without returning raw content
+- `write_file` — Writes content to files, creating directories as needed
+- `edit_file` — Find-and-replace edits on files
 ### Search & Discovery
 - `glob_search` — Find files by pattern (respects .gitignore)
 - `grep_search` — Content search with regex and file filtering
 - `list_directory` — Directory listing with gitignore filtering
-- `rank_files` — Multi-factor file relevance ranking (name, structure, recency)
 ### Shell & Web
 - `bash` — Executes shell commands with permission gates
@@ -517,7 +516,7 @@ aru-code/
 │   │   ├── planner.py      # Planning agent
 │   │   └── executor.py     # Execution agent
 │   └── tools/
-│       ├── codebase.py     # 16 core tools
+│       ├── codebase.py     # 11 core tools
 │       ├── ast_tools.py    # Tree-sitter code analysis
 │       ├── ranker.py       # File relevance ranking
 │       ├── mcp_client.py   # MCP client

{aru_code-0.15.0 → aru_code-0.16.0}/README.md RENAMED Viewed

@@ -9,7 +9,7 @@ An intelligent coding assistant for the terminal, powered by LLMs and [Agno](htt
 - **Multi-Agent Architecture** — Specialized agents for planning, execution, and conversation
 - **Interactive CLI** — Streaming responses, multi-line paste, session management
 - **Image Support** — Attach images via `@` mentions for multimodal analysis (Claude, GPT-4o, Gemini)
-- **16 Integrated Tools** — File operations, code search, shell, web search, task delegation
+- **11 Integrated Tools** — File operations, code search, shell, web search, task delegation
 - **Task Planning** — Break down complex tasks into steps with automatic execution
 - **Multi-Provider** — Anthropic, OpenAI, Ollama, Groq, OpenRouter, DeepSeek, and others via custom configuration
 - **Custom Commands, Skills, and Agents** — Extend aru via the `.agents/` directory
@@ -432,15 +432,14 @@ Aru can load tools from MCP servers. Configure in `.aru/mcp_config.json`:
 ### File Operations
 - `read_file` — Reads files with line range support and binary detection
-- `read_file_smart` — Smart file reading focused on relevant snippets for the query
-- `write_file` — Writes files
-- `edit_file` — Find-replace edits
+- `read_file_smart` — Answers specific questions about a file without returning raw content
+- `write_file` — Writes content to files, creating directories as needed
+- `edit_file` — Find-and-replace edits on files
 ### Search & Discovery
 - `glob_search` — Find files by pattern (respects .gitignore)
 - `grep_search` — Content search with regex and file filtering
 - `list_directory` — Directory listing with gitignore filtering
-- `rank_files` — Multi-factor file relevance ranking (name, structure, recency)
 ### Shell & Web
 - `bash` — Executes shell commands with permission gates
@@ -470,7 +469,7 @@ aru-code/
 │   │   ├── planner.py      # Planning agent
 │   │   └── executor.py     # Execution agent
 │   └── tools/
-│       ├── codebase.py     # 16 core tools
+│       ├── codebase.py     # 11 core tools
 │       ├── ast_tools.py    # Tree-sitter code analysis
 │       ├── ranker.py       # File relevance ranking
 │       ├── mcp_client.py   # MCP client

aru_code-0.16.0/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.16.0"

{aru_code-0.15.0 → aru_code-0.16.0}/aru/agent_factory.py RENAMED Viewed

@@ -21,12 +21,16 @@ def create_general_agent(
             in the system prompt. Placed in instructions so it's cacheable.
     """
     from agno.agent import Agent
-    from agno.compression.manager import CompressionManager
     from aru.tools.codebase import GENERAL_TOOLS
-    from aru.runtime import get_ctx
+    tools = GENERAL_TOOLS
-    extra = config.get_extra_instructions() if config else ""
+    # Only include AGENTS.md/project instructions on first turn to save ~1.6K tokens/turn
+    if config and not session.extra_instructions_sent:
+        extra = config.get_extra_instructions()
+        session.extra_instructions_sent = True
+    else:
+        extra = ""
     if env_context:
         extra = f"{extra}\n\n{env_context}" if extra else env_context
     model_ref = model_override or session.model_ref
@@ -34,15 +38,9 @@ def create_general_agent(
     return Agent(
         name="Aru",
         model=create_model(model_ref, max_tokens=8192),
-        tools=GENERAL_TOOLS,
+        tools=tools,
         instructions=_build_instructions("general", extra),
         markdown=True,
-        compress_tool_results=True,
-        compression_manager=CompressionManager(
-            model=create_model(get_ctx().small_model_ref, max_tokens=1024),
-            compress_tool_results=True,
-            compress_tool_results_limit=25,
-        ),
         tool_call_limit=20,
     )
@@ -52,10 +50,8 @@ def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
                                   env_context: str = ""):
     """Create an Agno Agent from a CustomAgent definition."""
     from agno.agent import Agent
-    from agno.compression.manager import CompressionManager
     from aru.agents.base import BASE_INSTRUCTIONS
     from aru.tools.codebase import resolve_tools
-    from aru.runtime import get_ctx
     model_ref = agent_def.model or session.model_ref
     tools = resolve_tools(agent_def.tools)
@@ -74,11 +70,5 @@ def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
         tools=tools,
         instructions=instructions,
         markdown=True,
-        compress_tool_results=True,
-        compression_manager=CompressionManager(
-            model=create_model(get_ctx().small_model_ref, max_tokens=1024),
-            compress_tool_results=True,
-            compress_tool_results_limit=25,
-        ),
         tool_call_limit=agent_def.max_turns or 20,
     )

aru_code-0.16.0/aru/cache_patch.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Monkey-patch Agno's model layer to reduce token consumption.
+Two optimizations:
+1. **Tool result pruning** (ALL providers): After each tool execution, old tool
+   results in the message list are truncated to a short summary. This prevents
+   O(n²) token growth where each API call re-sends all previous tool results.
+2. **Cache breakpoints** (Anthropic only): Marks the last 2 messages with
+   cache_control for Anthropic's prompt caching.
+These patches intercept Agno's internal loop so they work transparently
+regardless of which provider is used.
+"""
+from __future__ import annotations
+# Max chars to keep from old tool results
+_TOOL_RESULT_KEEP_CHARS = 200
+# Number of recent tool results to keep in full
+_KEEP_RECENT_RESULTS = 1
+def _prune_tool_messages(messages):
+    """Truncate old tool result content in the message list.
+    Keeps only the last N tool results in full. Older ones are truncated
+    to a short preview. This runs BEFORE each API call, so accumulated
+    tool results don't bloat the context on every re-send.
+    """
+    # Find all tool message indices
+    tool_indices = [
+        i for i, msg in enumerate(messages)
+        if getattr(msg, "role", None) == "tool"
+    ]
+    if len(tool_indices) <= _KEEP_RECENT_RESULTS:
+        return
+    # Prune all except the last N
+    for idx in tool_indices[:-_KEEP_RECENT_RESULTS]:
+        msg = messages[idx]
+        content = getattr(msg, "content", None)
+        if content is None:
+            continue
+        content_str = str(content)
+        if len(content_str) <= _TOOL_RESULT_KEEP_CHARS:
+            continue
+        truncated = content_str[:_TOOL_RESULT_KEEP_CHARS] + "\n[...truncated]"
+        try:
+            msg.content = truncated
+            if hasattr(msg, "compressed_content"):
+                msg.compressed_content = None
+        except (AttributeError, TypeError):
+            pass
+def apply_cache_patch():
+    """Apply all patches to reduce Agno's token consumption."""
+    _patch_tool_result_pruning()
+    _patch_claude_cache_breakpoints()
+def _patch_tool_result_pruning():
+    """Patch format_function_call_results to prune old tool results.
+    This is called after each tool execution, right before the next API call.
+    Works for ALL providers (Claude, OpenAI, Qwen, etc.) since it patches
+    the base Model class.
+    """
+    from agno.models.base import Model
+    _original_format_results = Model.format_function_call_results
+    def _patched_format_results(self, messages, function_call_results, **kwargs):
+        # First: prune old tool results already in messages
+        _prune_tool_messages(messages)
+        # Then: add new results normally
+        return _original_format_results(self, messages, function_call_results, **kwargs)
+    Model.format_function_call_results = _patched_format_results
+def _patch_claude_cache_breakpoints():
+    """Patch Claude's format_messages to add cache breakpoints.
+    Marks the last 2 messages with cache_control for Anthropic's prompt
+    caching. Non-Anthropic providers ignore these fields.
+    """
+    try:
+        import agno.utils.models.claude as claude_utils
+    except ImportError:
+        return
+    _original_format = claude_utils.format_messages
+    def _patched_format_messages(messages, compress_tool_results=False):
+        chat_messages, system_message = _original_format(
+            messages, compress_tool_results=compress_tool_results
+        )
+        if not chat_messages:
+            return chat_messages, system_message
+        # Add cache_control to last 2 messages
+        cache_marker = {"type": "ephemeral"}
+        marked = 0
+        for msg in reversed(chat_messages):
+            if marked >= 2:
+                break
+            content = msg.get("content")
+            if isinstance(content, list) and content:
+                last_item = content[-1]
+                if isinstance(last_item, dict):
+                    last_item["cache_control"] = cache_marker
+                    marked += 1
+                elif hasattr(last_item, "type"):
+                    try:
+                        as_dict = last_item.model_dump() if hasattr(last_item, "model_dump") else dict(last_item)
+                        as_dict["cache_control"] = cache_marker
+                        content[-1] = as_dict
+                        marked += 1
+                    except Exception:
+                        pass
+            elif isinstance(content, str):
+                msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
+                marked += 1
+        return chat_messages, system_message
+    claude_utils.format_messages = _patched_format_messages

{aru_code-0.15.0 → aru_code-0.16.0}/aru/cli.py RENAMED Viewed

@@ -50,6 +50,7 @@ from aru.display import (  # noqa: F401
 from aru.completers import (  # noqa: F401
     AruCompleter,
     FileMentionCompleter,
+    MentionResult,
     PasteState,
     SlashCommandCompleter,
     TIPS,
@@ -110,6 +111,11 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
     from aru.permissions import parse_permission_config, reset_session as perm_reset_session
     from aru.tools.codebase import cleanup_processes
+    # Inject cache breakpoints into Agno's Claude API calls — reduces token
+    # consumption by ~40% on multi-tool-call interactions via prompt caching.
+    from aru.cache_patch import apply_cache_patch
+    apply_cache_patch()
     ctx = init_ctx(console=console, skip_permissions=skip_permissions)
     store = SessionStore()
@@ -253,16 +259,19 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
         # Resolve @file mentions (skip known agent names)
         _agent_names = set(config.custom_agents.keys()) if config.custom_agents else set()
-        resolved, injected, attached_images = _resolve_mentions(user_input, os.getcwd(), _agent_names)
-        if injected > 0:
+        mention_result = _resolve_mentions(user_input, os.getcwd(), _agent_names)
+        attached_images = mention_result.images
+        # File contents go into history as separate prunable messages (not inline)
+        mention_file_msgs = mention_result.file_messages
+        if mention_result.count > 0:
             parts = []
-            text_count = injected - len(attached_images)
+            text_count = mention_result.count - len(attached_images)
             if text_count > 0:
                 parts.append(f"{text_count} file(s)")
             if attached_images:
                 parts.append(f"{len(attached_images)} image(s)")
             console.print(f"[dim]Attached {', '.join(parts)} from @ mentions[/dim]")
-            user_input = resolved
+            user_input = mention_result.text
         if paste_state.pasted_content and user_text:
             console.print(
@@ -276,6 +285,14 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
         if not user_input:
             continue
+        # Inject @file contents as prunable history entries BEFORE the user message.
+        # These look like simulated read_file tool calls and can be pruned/compacted
+        # normally, unlike inline content which bloats the user message permanently.
+        if mention_file_msgs:
+            for msg in mention_file_msgs:
+                session.add_message(msg["role"], msg["content"])
+            mention_file_msgs = []  # consumed
         # Reset "allow all" approvals for each new user message
         perm_reset_session()

{aru_code-0.15.0 → aru_code-0.16.0}/aru/completers.py RENAMED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import os
 import re
+from dataclasses import dataclass
 from prompt_toolkit import PromptSession
 from prompt_toolkit.completion import Completer, Completion
@@ -18,24 +19,36 @@ from aru.commands import SLASH_COMMANDS
 from aru.config import AgentConfig
 _MENTION_RE = re.compile(r'(?<!\S)@([a-zA-Z0-9_./\\:-]+)')
-_MENTION_MAX_SIZE = 30_000  # bytes, same limit as read_file
+_MENTION_MAX_SIZE = 10_000  # bytes — smaller to protect context (model uses read_file for large files)
 _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
 _IMAGE_MAX_SIZE = 20 * 1024 * 1024  # 20MB
-def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None) -> tuple[str, int, list[Image]]:
-    """Resolve @file mentions by appending file contents to the message.
+@dataclass
+class MentionResult:
+    """Result of resolving @file mentions."""
+    text: str                          # User text (without file contents)
+    file_messages: list[dict[str, str]]  # Simulated tool-call pairs for history
+    images: list[Image]
+    count: int                         # Total attached (files + images)
-    Image files (png, jpg, etc.) are returned as Image objects instead of text.
+def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None) -> MentionResult:
+    """Resolve @file mentions as simulated read_file tool calls.
+    Instead of inlining file contents into the user message (which bloats
+    history and can't be pruned), we return separate assistant+tool_result
+    message pairs that the session can prune/compact like normal tool outputs.
+    Image files are returned as Image objects.
     Skips @mentions that match known agent names.
-    Returns (resolved_text, number_of_files_attached, images).
     """
     agent_names = agent_names or set()
     matches = list(_MENTION_RE.finditer(text))
     if not matches:
-        return text, 0, []
+        return MentionResult(text=text, file_messages=[], images=[], count=0)
-    appendix_parts = []
+    file_messages: list[dict[str, str]] = []
     images: list[Image] = []
     seen = set()
     for m in matches:
@@ -64,21 +77,18 @@ def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None)
             size = os.path.getsize(abs_path)
             with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
                 content = f.read(_MENTION_MAX_SIZE)
-            if size > _MENTION_MAX_SIZE:
-                appendix_parts.append(
-                    f"\n\n---\nContents of {rel_path} (truncated to {_MENTION_MAX_SIZE // 1000}KB):\n```\n{content}\n```"
-                )
-            else:
-                appendix_parts.append(
-                    f"\n\n---\nContents of {rel_path}:\n```\n{content}\n```"
-                )
+            truncated = size > _MENTION_MAX_SIZE
+            label = f"[read_file: {rel_path}]"
+            if truncated:
+                label += f" (truncated to {_MENTION_MAX_SIZE // 1000}KB of {size // 1000}KB — use read_file for the rest)"
+            # Simulated tool call pair — can be pruned like normal tool outputs
+            file_messages.append({"role": "assistant", "content": label})
+            file_messages.append({"role": "user", "content": content})
         except OSError:
             continue
-    attached = len(appendix_parts) + len(images)
-    if appendix_parts:
-        return text + "".join(appendix_parts), attached, images
-    return text, attached, images
+    count = len(file_messages) // 2 + len(images)
+    return MentionResult(text=text, file_messages=file_messages, images=images, count=count)
 def _extract_agent_mention(

{aru_code-0.15.0 → aru_code-0.16.0}/aru/context.py RENAMED Viewed

@@ -11,15 +11,15 @@ from __future__ import annotations
 # ── Constants ──────────────────────────────────────────────────────
 # Pruning: minimum chars that must be freeable to justify a prune pass
-PRUNE_MINIMUM_CHARS = 12_000  # ~3K tokens (lower = prune sooner)
+PRUNE_MINIMUM_CHARS = 8_000  # ~2K tokens (was 12K — prune sooner)
 # Placeholder that replaces evicted content
-PRUNED_PLACEHOLDER = "[previous output cleared to save context]"
+PRUNED_PLACEHOLDER = "[cleared]"
 # User messages larger than this threshold are truncated when outside protection window
-PRUNE_USER_MSG_THRESHOLD = 2_000  # ~570 tokens — catches @file mentions
+PRUNE_USER_MSG_THRESHOLD = 1_200  # ~340 tokens (was 2K — catch file contents earlier)
 # How many chars to keep from the start of a pruned user message
-PRUNE_USER_MSG_KEEP = 500  # ~140 tokens — enough to understand the request
+PRUNE_USER_MSG_KEEP = 300  # ~85 tokens (was 500 — enough for the request intent)
 # Minimum number of recent user turns always protected (regardless of char budget)
-PRUNE_PROTECT_TURNS = 2
+PRUNE_PROTECT_TURNS = 1  # was 2 — only protect the very last turn
 # Tool result markers that should never be pruned (critical context)
 PRUNE_PROTECTED_MARKERS = {"[SubAgent-", "delegate_task"}
 # Tool names whose outputs should never be pruned (like OpenCode's PRUNE_PROTECTED_TOOLS)
@@ -27,16 +27,20 @@ PRUNE_PROTECTED_MARKERS = {"[SubAgent-", "delegate_task"}
 PRUNE_PROTECTED_TOOLS = {"delegate_task"}
 # Truncation: universal limits for any tool output
-TRUNCATE_MAX_LINES = 300
-TRUNCATE_MAX_BYTES = 15 * 1024  # 15 KB (was 20KB — tighter to prevent context bloat)
-TRUNCATE_KEEP_START = 200  # lines to keep from the start
-TRUNCATE_KEEP_END = 60  # lines to keep from the end
-TRUNCATE_MAX_LINE_LENGTH = 2000  # chars per individual line (prevents minified files)
+TRUNCATE_MAX_LINES = 200  # was 300 — tighter to save context
+TRUNCATE_MAX_BYTES = 10 * 1024  # 10 KB (was 15KB — save full to disk instead)
+TRUNCATE_KEEP_START = 150  # lines to keep from the start
+TRUNCATE_KEEP_END = 30  # lines to keep from the end (was 60)
+TRUNCATE_MAX_LINE_LENGTH = 1500  # chars per individual line (prevents minified files)
+# Directory for saving full truncated outputs (like OpenCode pattern)
+TRUNCATE_SAVE_DIR = ".aru/truncated"
 # Compaction: trigger when per-run input tokens exceed this fraction of model limit
-COMPACTION_THRESHOLD_RATIO = 0.70  # was 0.85 — compact earlier to avoid hitting limits
+COMPACTION_THRESHOLD_RATIO = 0.50  # was 0.70 — compact much earlier to stay lean
 # Compaction: target post-compaction size as fraction of model context limit
-COMPACTION_TARGET_RATIO = 0.15
+COMPACTION_TARGET_RATIO = 0.10  # was 0.15 — more aggressive compaction target
+# Compaction: also trigger after this many user turns (regardless of token count)
+COMPACTION_MAX_TURNS = 8
 # Compaction: reserve buffer for the compaction process itself (like OpenCode's 20K)
 COMPACTION_BUFFER_TOKENS = 20_000
 # Default model context limits (input tokens)
@@ -111,10 +115,10 @@ def _get_prune_protect_chars(model_id: str = "default") -> int:
     to prevent context overflow. Returns ~7% of the model's context in chars.
     """
     limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
-    # ~4 chars per token, protect ~7% of context (was 10% — tighter budget)
-    protect = int(limit * 0.07 * 4)
-    # Clamp between 15K (minimum usable) and 60K (diminishing returns)
-    return max(15_000, min(protect, 60_000))
+    # ~4 chars per token, protect ~5% of context (was 7% — tighter budget)
+    protect = int(limit * 0.05 * 4)
+    # Clamp between 10K (minimum usable) and 40K (diminishing returns)
+    return max(10_000, min(protect, 40_000))
 def prune_history(
@@ -214,42 +218,50 @@ def _truncate_long_lines(lines: list[str]) -> list[str]:
     return result
+def _save_truncated_output(text: str) -> str | None:
+    """Save full truncated output to disk and return the file path.
+    Returns None if saving fails (non-fatal — hint will omit path).
+    """
+    import os
+    import time
+    save_dir = os.path.join(os.getcwd(), TRUNCATE_SAVE_DIR)
+    try:
+        os.makedirs(save_dir, exist_ok=True)
+        filename = f"output_{int(time.time() * 1000)}.txt"
+        filepath = os.path.join(save_dir, filename)
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(text)
+        return filepath
+    except OSError:
+        return None
 def _build_truncation_hint(
     source_file: str = "",
     source_tool: str = "",
     lines_shown: int = 0,
+    saved_path: str | None = None,
 ) -> str:
-    """Build a context-aware truncation hint that guides the LLM to save tokens.
+    """Build a context-aware truncation hint.
-    When the source file is known, provides a direct read_file reference with
-    the next offset. Otherwise falls back to generic tool suggestions.
-    Always suggests delegate_task for large exploration work.
+    When output was saved to disk, points to the saved file.
+    When the source file is known, provides a direct read_file reference.
     """
-    parts = ["\n[Hint: Output was truncated."]
+    parts = ["[Truncated."]
-    if source_file:
-        # File-specific: tell the LLM exactly how to access the rest
+    if saved_path:
+        parts.append(f" Full output saved to: {saved_path}")
+        parts.append(" Use grep_search or read_file with start_line/end_line to inspect.")
+    elif source_file:
         next_line = lines_shown + 1 if lines_shown else 1
-        parts.append(
-            f' To see more: read_file("{source_file}", start_line={next_line}).'
-            f" Use grep_search to find specific content instead of reading everything."
-        )
-    elif source_tool == "bash":
-        parts.append(
-            " Use grep_search to find specific content in project files."
-            " Do NOT re-run the command to get full output."
-        )
+        parts.append(f' read_file("{source_file}", start_line={next_line}) for more.')
     else:
-        parts.append(
-            " Use grep_search to find specific content, or read_file with"
-            " start_line/end_line for incremental reading."
-        )
+        parts.append(" Use grep_search to find specific content.")
-    # Always suggest delegation for large outputs
-    parts.append(
-        " For large exploration tasks, use delegate_task to keep your context clean.]"
-    )
-    return "".join(parts)
+    parts.append("]")
+    return " ".join(parts)
 def truncate_output(
@@ -282,17 +294,18 @@ def truncate_output(
     if byte_len <= TRUNCATE_MAX_BYTES and line_count <= TRUNCATE_MAX_LINES:
         return "".join(lines)
+    # Save full output to disk before truncating (like OpenCode)
+    saved_path = _save_truncated_output(text)
     # Truncate by lines
     if line_count > TRUNCATE_MAX_LINES:
         head = lines[:TRUNCATE_KEEP_START]
-        tail = lines[-TRUNCATE_KEEP_END:]
-        omitted = line_count - TRUNCATE_KEEP_START - TRUNCATE_KEEP_END
-        hint = _build_truncation_hint(source_file, source_tool, TRUNCATE_KEEP_START)
+        omitted = line_count - TRUNCATE_KEEP_START
+        hint = _build_truncation_hint(source_file, source_tool, TRUNCATE_KEEP_START, saved_path)
         return (
             "".join(head)
-            + f"\n\n[... {omitted:,} lines omitted ({line_count:,} total)]"
-            + hint + "\n\n"
-            + "".join(tail)
+            + f"\n\n[... {omitted:,} lines omitted ({line_count:,} total)]\n"
+            + hint + "\n"
         )
     # Truncate by bytes (lines fit but total bytes too large)
@@ -306,11 +319,11 @@ def truncate_output(
         total += line_bytes
     remaining = line_count - len(kept_lines)
-    hint = _build_truncation_hint(source_file, source_tool, len(kept_lines))
+    hint = _build_truncation_hint(source_file, source_tool, len(kept_lines), saved_path)
     return (
         "".join(kept_lines)
         + f"\n\n[... truncated at ~{TRUNCATE_MAX_BYTES // 1024}KB — "
-        f"{remaining:,} more lines]"
+        f"{remaining:,} more lines]\n"
         + hint + "\n"
     )
@@ -329,16 +342,22 @@ def should_compact(
 ) -> bool:
     """Check if the conversation should be compacted.
-    Uses OpenCode's approach: usable = model_limit - buffer, then
-    trigger when tokens >= usable * threshold_ratio.
+    Triggers on EITHER condition:
+    1. Token-based: tokens >= usable_context * threshold_ratio
+    2. Turn-based: user turns >= COMPACTION_MAX_TURNS (prevents slow token creep)
-    Accepts either an estimated token count (int) or the history list
-    (from which tokens are estimated via char count).
+    Accepts either an estimated token count (int) or the history list.
     """
     if isinstance(history_or_tokens, list):
-        tokens = estimate_history_tokens(history_or_tokens)
+        history = history_or_tokens
+        tokens = estimate_history_tokens(history)
+        # Turn-based trigger: count user messages
+        user_turns = sum(1 for m in history if m["role"] == "user")
+        if user_turns >= COMPACTION_MAX_TURNS:
+            return True
     else:
         tokens = history_or_tokens
     limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
     usable = limit - COMPACTION_BUFFER_TOKENS
     threshold = int(usable * COMPACTION_THRESHOLD_RATIO)

{aru_code-0.15.0 → aru_code-0.16.0}/aru/session.py RENAMED Viewed

@@ -145,6 +145,8 @@ class Session:
         self._cached_tree: str | None = None
         self._cached_git_status: str | None = None
         self._context_dirty: bool = True
+        # Track whether AGENTS.md/extra instructions were already sent (skip on subsequent turns)
+        self.extra_instructions_sent: bool = False
         # Tree depth for env context (configurable via aru.json "tree_depth")
         self._tree_max_depth: int = 2
         # Token budget (0 = unlimited)

{aru_code-0.15.0 → aru_code-0.16.0}/aru/tools/codebase.py RENAMED Viewed

@@ -54,23 +54,23 @@ def _format_diff(old_string: str, new_string: str) -> Group:
-# Hard ceiling per tool result (~10K tokens). Even max_size=0 respects this per chunk.
-_READ_HARD_CAP = 40_000  # bytes (was 60K — tighter to protect context)
+# Hard ceiling per tool result (~7K tokens). Even max_size=0 respects this per chunk.
+_READ_HARD_CAP = 25_000  # bytes (was 40K — each tool result re-sent on next API call)
 def clear_read_cache():
     """Clear the read cache. Call after file mutations to avoid stale data."""
     get_ctx().read_cache.clear()
-def read_file(file_path: str, start_line: int = 0, end_line: int = 0, max_size: int = 12_000) -> str:
+def read_file(file_path: str, start_line: int = 0, end_line: int = 0, max_size: int = 8_000) -> str:
     """Read file contents. Returns chunked output for large files.
     Args:
         file_path: Path to the file (absolute or relative).
         start_line: First line (1-indexed, inclusive). 0 = beginning.
         end_line: Last line (1-indexed, inclusive). 0 = end.
-        max_size: Max bytes before truncation. Default 12KB.
-            Set to 0 to read the full file in chunks — each chunk up to ~40KB.
+        max_size: Max bytes before truncation. Default 8KB.
+            Set to 0 to read the full file in chunks — each chunk up to ~25KB.
             The first chunk includes a continuation hint so you can call again
             with start_line to get the next chunk.
     """
@@ -505,15 +505,15 @@ def glob_search(pattern: str, directory: str = ".") -> str:
     return "\n".join(matches)
-def grep_search(pattern: str, directory: str = ".", file_glob: str = "", context_lines: int = 10) -> str:
+def grep_search(pattern: str, directory: str = ".", file_glob: str = "", context_lines: int = 5) -> str:
     """Search for a regex pattern in file contents.
     Args:
         pattern: Regular expression pattern to search for.
         directory: Directory to search in. Defaults to current directory.
         file_glob: Optional glob to filter which files to search (e.g. '*.py').
-        context_lines: Lines of context before and after each match (like grep -C). Default 10.
-            Use 0 for file-level matches only. Use 30+ for full function bodies.
+        context_lines: Lines of context before and after each match (like grep -C). Default 5.
+            Use 0 for file-level matches only. Use 20+ for full function bodies.
     """
     import re
@@ -1158,7 +1158,7 @@ EXECUTOR_TOOLS = [
     delegate_task,
 ]
-# General-purpose tools
+# General-purpose tools (full set — used as fallback)
 GENERAL_TOOLS = [
     read_file,
     read_file_smart,

{aru_code-0.15.0 → aru_code-0.16.0/aru_code.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.15.0
+Version: 0.16.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT
@@ -56,7 +56,7 @@ An intelligent coding assistant for the terminal, powered by LLMs and [Agno](htt
 - **Multi-Agent Architecture** — Specialized agents for planning, execution, and conversation
 - **Interactive CLI** — Streaming responses, multi-line paste, session management
 - **Image Support** — Attach images via `@` mentions for multimodal analysis (Claude, GPT-4o, Gemini)
-- **16 Integrated Tools** — File operations, code search, shell, web search, task delegation
+- **11 Integrated Tools** — File operations, code search, shell, web search, task delegation
 - **Task Planning** — Break down complex tasks into steps with automatic execution
 - **Multi-Provider** — Anthropic, OpenAI, Ollama, Groq, OpenRouter, DeepSeek, and others via custom configuration
 - **Custom Commands, Skills, and Agents** — Extend aru via the `.agents/` directory
@@ -479,15 +479,14 @@ Aru can load tools from MCP servers. Configure in `.aru/mcp_config.json`:
 ### File Operations
 - `read_file` — Reads files with line range support and binary detection
-- `read_file_smart` — Smart file reading focused on relevant snippets for the query
-- `write_file` — Writes files
-- `edit_file` — Find-replace edits
+- `read_file_smart` — Answers specific questions about a file without returning raw content
+- `write_file` — Writes content to files, creating directories as needed
+- `edit_file` — Find-and-replace edits on files
 ### Search & Discovery
 - `glob_search` — Find files by pattern (respects .gitignore)
 - `grep_search` — Content search with regex and file filtering
 - `list_directory` — Directory listing with gitignore filtering
-- `rank_files` — Multi-factor file relevance ranking (name, structure, recency)
 ### Shell & Web
 - `bash` — Executes shell commands with permission gates
@@ -517,7 +516,7 @@ aru-code/
 │   │   ├── planner.py      # Planning agent
 │   │   └── executor.py     # Execution agent
 │   └── tools/
-│       ├── codebase.py     # 16 core tools
+│       ├── codebase.py     # 11 core tools
 │       ├── ast_tools.py    # Tree-sitter code analysis
 │       ├── ranker.py       # File relevance ranking
 │       ├── mcp_client.py   # MCP client

{aru_code-0.15.0 → aru_code-0.16.0}/aru_code.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,6 +3,7 @@ README.md
 pyproject.toml
 aru/__init__.py
 aru/agent_factory.py
+aru/cache_patch.py
 aru/cli.py
 aru/commands.py
 aru/completers.py

{aru_code-0.15.0 → aru_code-0.16.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "aru-code"
-version = "0.15.0"
+version = "0.16.0"
 description = "A Claude Code clone built with Agno agents"
 readme = "README.md"
 license = "MIT"

{aru_code-0.15.0 → aru_code-0.16.0}/tests/test_cli.py RENAMED Viewed

@@ -46,35 +46,40 @@ class TestSanitizeInput:
 class TestResolveMentions:
     def test_no_mentions(self, tmp_path):
-        result, count, _imgs = _resolve_mentions("hello world", str(tmp_path))
-        assert result == "hello world"
-        assert count == 0
+        mr = _resolve_mentions("hello world", str(tmp_path))
+        assert mr.text == "hello world"
+        assert mr.count == 0
+        assert mr.file_messages == []
     def test_resolves_file_mention(self, tmp_path):
         (tmp_path / "config.py").write_text("DEBUG = True")
-        result, count, _imgs = _resolve_mentions("check @config.py", str(tmp_path))
-        assert "DEBUG = True" in result
-        assert "Contents of config.py" in result
-        assert count == 1
+        mr = _resolve_mentions("check @config.py", str(tmp_path))
+        # File content now goes into file_messages, not inline text
+        assert mr.count == 1
+        assert len(mr.file_messages) == 2  # assistant label + user content
+        assert "read_file: config.py" in mr.file_messages[0]["content"]
+        assert "DEBUG = True" in mr.file_messages[1]["content"]
     def test_nonexistent_file_ignored(self, tmp_path):
-        result, count, _imgs = _resolve_mentions("check @missing.py", str(tmp_path))
-        assert result == "check @missing.py"
-        assert count == 0
+        mr = _resolve_mentions("check @missing.py", str(tmp_path))
+        assert mr.text == "check @missing.py"
+        assert mr.count == 0
     def test_deduplicates_mentions(self, tmp_path):
         (tmp_path / "file.py").write_text("code")
-        result, count, _imgs = _resolve_mentions("@file.py and @file.py", str(tmp_path))
-        assert result.count("Contents of file.py") == 1
-        assert count == 1
+        mr = _resolve_mentions("@file.py and @file.py", str(tmp_path))
+        assert mr.count == 1
+        assert len(mr.file_messages) == 2  # one pair
     def test_multiple_files(self, tmp_path):
         (tmp_path / "a.py").write_text("aaa")
         (tmp_path / "b.py").write_text("bbb")
-        result, count, _imgs = _resolve_mentions("@a.py and @b.py", str(tmp_path))
-        assert "Contents of a.py" in result
-        assert "Contents of b.py" in result
-        assert count == 2
+        mr = _resolve_mentions("@a.py and @b.py", str(tmp_path))
+        assert mr.count == 2
+        assert len(mr.file_messages) == 4  # two pairs
+        all_content = " ".join(m["content"] for m in mr.file_messages)
+        assert "aaa" in all_content
+        assert "bbb" in all_content
     def test_mention_regex_pattern(self):
         matches = _MENTION_RE.findall("check @file.py now")

{aru_code-0.15.0 → aru_code-0.16.0}/tests/test_cli_completers.py RENAMED Viewed

@@ -606,61 +606,53 @@ class TestExtractAgentMention:
 class TestImageMentions:
     """Tests for image file detection in @mentions."""
-    def test_resolve_mentions_returns_three_tuple(self, tmp_path):
-        result = _resolve_mentions("hello", str(tmp_path))
-        assert len(result) == 3
-        text, count, images = result
-        assert text == "hello"
-        assert count == 0
-        assert images == []
+    def test_resolve_mentions_returns_mention_result(self, tmp_path):
+        mr = _resolve_mentions("hello", str(tmp_path))
+        assert mr.text == "hello"
+        assert mr.count == 0
+        assert mr.images == []
+        assert mr.file_messages == []
     def test_resolve_mentions_image_file(self, tmp_path):
         img = tmp_path / "screenshot.png"
         img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100)
-        text, count, images = _resolve_mentions(
-            "analyze @screenshot.png", str(tmp_path)
-        )
-        assert count == 1
-        assert len(images) == 1
-        assert isinstance(images[0], Image)
-        assert images[0].id == "screenshot.png"
-        # Image content should NOT be appended as text
-        assert "```" not in text
+        mr = _resolve_mentions("analyze @screenshot.png", str(tmp_path))
+        assert mr.count == 1
+        assert len(mr.images) == 1
+        assert isinstance(mr.images[0], Image)
+        assert mr.images[0].id == "screenshot.png"
+        # Image content should NOT be in file_messages
+        assert len(mr.file_messages) == 0
     def test_resolve_mentions_mixed_files_and_images(self, tmp_path):
         (tmp_path / "code.py").write_text("print('hello')", encoding="utf-8")
         (tmp_path / "diagram.jpg").write_bytes(b"\xff\xd8\xff" + b"\x00" * 100)
-        text, count, images = _resolve_mentions(
-            "review @code.py and @diagram.jpg", str(tmp_path)
-        )
-        assert count == 2
-        assert len(images) == 1
-        assert images[0].id == "diagram.jpg"
-        # Text file content should be appended
-        assert "print('hello')" in text
+        mr = _resolve_mentions("review @code.py and @diagram.jpg", str(tmp_path))
+        assert mr.count == 2
+        assert len(mr.images) == 1
+        assert mr.images[0].id == "diagram.jpg"
+        # Text file content goes into file_messages
+        all_content = " ".join(m["content"] for m in mr.file_messages)
+        assert "print('hello')" in all_content
     def test_resolve_mentions_multiple_images(self, tmp_path):
         (tmp_path / "a.png").write_bytes(b"\x89PNG" + b"\x00" * 100)
         (tmp_path / "b.webp").write_bytes(b"RIFF" + b"\x00" * 100)
-        text, count, images = _resolve_mentions(
-            "compare @a.png @b.webp", str(tmp_path)
-        )
-        assert count == 2
-        assert len(images) == 2
+        mr = _resolve_mentions("compare @a.png @b.webp", str(tmp_path))
+        assert mr.count == 2
+        assert len(mr.images) == 2
     def test_resolve_mentions_image_too_large(self, tmp_path):
         img = tmp_path / "huge.png"
         # Write just over the 20MB limit header
         img.write_bytes(b"\x89PNG" + b"\x00" * (20 * 1024 * 1024 + 1))
-        text, count, images = _resolve_mentions(
-            "analyze @huge.png", str(tmp_path)
-        )
-        assert count == 0
-        assert len(images) == 0
+        mr = _resolve_mentions("analyze @huge.png", str(tmp_path))
+        assert mr.count == 0
+        assert len(mr.images) == 0
     def test_resolve_mentions_all_image_extensions(self, tmp_path):
         for ext in _IMAGE_EXTENSIONS:
@@ -668,8 +660,8 @@ class TestImageMentions:
             (tmp_path / fname).write_bytes(b"\x00" * 100)
         mentions = " ".join(f"@test{ext}" for ext in _IMAGE_EXTENSIONS)
-        text, count, images = _resolve_mentions(mentions, str(tmp_path))
-        assert len(images) == len(_IMAGE_EXTENSIONS)
+        mr = _resolve_mentions(mentions, str(tmp_path))
+        assert len(mr.images) == len(_IMAGE_EXTENSIONS)
     def test_image_completer_shows_image_metadata(self, tmp_path):
         (tmp_path / "photo.png").touch()