PyPI - aru-code - Versions diffs - 0.13.1__tar.gz → 0.13.3__tar.gz - Mend

aru-code 0.13.1tar.gz → 0.13.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{aru_code-0.13.1/aru_code.egg-info → aru_code-0.13.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.13.1
+Version: 0.13.3
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

aru_code-0.13.3/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.13.3"

{aru_code-0.13.1 → aru_code-0.13.3}/aru/cli.py RENAMED Viewed

@@ -214,18 +214,29 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
             _render_input_separator()
             model_tb = session.model_display
             from prompt_toolkit.formatted_text import HTML
+            def _bottom_toolbar():
+                mcp_part = ""
+                if ctx.mcp_loaded_msg:
+                    mcp_part = (
+                        f'  <style fg="ansigray">│</style>'
+                        f'  <style fg="ansigray">{ctx.mcp_loaded_msg}</style>'
+                    )
+                return HTML(
+                    f'  <style fg="ansigray">{model_tb}</style>'
+                    f'  <style fg="ansigray">│</style>'
+                    f'  <style fg="ansigray">/help</style>'
+                    f'  <style fg="ansigray">│</style>'
+                    f'  <style fg="ansigray">Esc+Enter newline</style>'
+                    f'{mcp_part}'
+                )
             user_text = (
                 await asyncio.to_thread(
                     prompt_session.prompt,
                     HTML('<b><ansigreen>❯</ansigreen></b> '),
                     multiline=False,
-                    bottom_toolbar=HTML(
-                        f'  <style fg="ansigray">{model_tb}</style>'
-                        f'  <style fg="ansigray">│</style>'
-                        f'  <style fg="ansigray">/help</style>'
-                        f'  <style fg="ansigray">│</style>'
-                        f'  <style fg="ansigray">Esc+Enter newline</style>'
-                    ),
+                    bottom_toolbar=_bottom_toolbar,
                 )
             ).strip()
             _render_input_separator()

{aru_code-0.13.1 → aru_code-0.13.3}/aru/context.py RENAMED Viewed

@@ -10,8 +10,6 @@ from __future__ import annotations
 # ── Constants ──────────────────────────────────────────────────────
-# Pruning: protect the most recent N chars of content from eviction
-PRUNE_PROTECT_CHARS = 50_000  # ~14K tokens
 # Pruning: minimum chars that must be freeable to justify a prune pass
 PRUNE_MINIMUM_CHARS = 20_000  # ~5.7K tokens
 # Placeholder that replaces evicted content
@@ -27,8 +25,10 @@ TRUNCATE_MAX_BYTES = 20 * 1024  # 20 KB
 TRUNCATE_KEEP_START = 350  # lines to keep from the start
 TRUNCATE_KEEP_END = 100  # lines to keep from the end
-# Compaction: trigger when cumulative input tokens exceed this fraction of model limit
+# Compaction: trigger when per-run input tokens exceed this fraction of model limit
 COMPACTION_THRESHOLD_RATIO = 0.85
+# Compaction: target post-compaction size as fraction of model context limit
+COMPACTION_TARGET_RATIO = 0.15
 # Default model context limits (input tokens)
 MODEL_CONTEXT_LIMITS: dict[str, int] = {
     # Anthropic
@@ -83,12 +83,27 @@ Be concise but complete. This summary replaces the full conversation history."""
 # ── Layer 1: Pruning ──────────────────────────────────────────────
-def prune_history(history: list[dict[str, str]]) -> list[dict[str, str]]:
+def _get_prune_protect_chars(model_id: str = "default") -> int:
+    """Scale protection window based on model context size.
+    Larger models get more protection; smaller models prune more aggressively
+    to delay compaction. Returns ~10% of the model's context in chars (~3.5 chars/token).
+    """
+    limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
+    # ~3.5 chars per token, protect ~10% of context
+    protect = int(limit * 0.10 * 3.5)
+    # Clamp between 20K (minimum usable) and 80K (diminishing returns)
+    return max(20_000, min(protect, 80_000))
+def prune_history(
+    history: list[dict[str, str]], model_id: str = "default"
+) -> list[dict[str, str]]:
     """Replace old messages with a short placeholder to reduce tokens.
     Walks backward through history, protecting the most recent content
-    (up to PRUNE_PROTECT_CHARS total across both roles). Older messages
-    beyond that budget are pruned:
+    (scaled to the model's context size). Older messages beyond that
+    budget are pruned:
     - Assistant messages: replaced entirely with placeholder
     - User messages over PRUNE_USER_MSG_THRESHOLD: truncated to first N chars
@@ -97,11 +112,13 @@ def prune_history(history: list[dict[str, str]]) -> list[dict[str, str]]:
     if len(history) <= 2:
         return list(history)
+    protect_chars = _get_prune_protect_chars(model_id)
     # Calculate total prunable chars (both roles)
     total_chars = sum(len(msg["content"]) for msg in history)
     # Not enough to prune
-    if total_chars < PRUNE_PROTECT_CHARS + PRUNE_MINIMUM_CHARS:
+    if total_chars < protect_chars + PRUNE_MINIMUM_CHARS:
         return list(history)
     # Walk backward, protecting recent content
@@ -112,7 +129,7 @@ def prune_history(history: list[dict[str, str]]) -> list[dict[str, str]]:
         msg = result[i]
         msg_len = len(msg["content"])
-        if protected + msg_len <= PRUNE_PROTECT_CHARS:
+        if protected + msg_len <= protect_chars:
             # Still within protection window
             protected += msg_len
         else:
@@ -121,8 +138,6 @@ def prune_history(history: list[dict[str, str]]) -> list[dict[str, str]]:
                 if msg["content"] != PRUNED_PLACEHOLDER:
                     result[i] = {"role": "assistant", "content": PRUNED_PLACEHOLDER}
             elif msg["role"] == "user" and msg_len > PRUNE_USER_MSG_THRESHOLD:
-                # Large user messages (e.g. @file mentions with file contents)
-                # are truncated to keep a brief summary of the original request
                 truncated = msg["content"][:PRUNE_USER_MSG_KEEP] + \
                     f"\n\n[... {msg_len - PRUNE_USER_MSG_KEEP:,} chars pruned to save context ...]"
                 result[i] = {"role": "user", "content": truncated}
@@ -181,21 +196,79 @@ def truncate_output(text: str) -> str:
 # ── Layer 3: Compaction ───────────────────────────────────────────
-def should_compact(total_input_tokens: int, model_id: str = "default") -> bool:
-    """Check if the conversation should be compacted based on token usage."""
+def estimate_history_tokens(history: list[dict[str, str]]) -> int:
+    """Estimate token count from conversation history chars (~3.5 chars/token)."""
+    total_chars = sum(len(msg["content"]) for msg in history)
+    return int(total_chars / 3.5)
+def should_compact(
+    history_or_tokens: int | list[dict[str, str]],
+    model_id: str = "default",
+) -> bool:
+    """Check if the conversation should be compacted (reactive, post-run).
+    Accepts either an estimated token count (int) or the history list
+    (from which tokens are estimated via char count).
+    """
+    if isinstance(history_or_tokens, list):
+        tokens = estimate_history_tokens(history_or_tokens)
+    else:
+        tokens = history_or_tokens
     limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
     threshold = int(limit * COMPACTION_THRESHOLD_RATIO)
-    return total_input_tokens >= threshold
+    return tokens >= threshold
+def would_prune(history: list[dict[str, str]], model_id: str = "default") -> bool:
+    """Check if prune_history would discard content from this history.
+    Uses the exact same criteria as prune_history: total chars exceed
+    the protection window + minimum prunable threshold.
+    """
+    if len(history) <= 2:
+        return False
+    total_chars = sum(len(msg["content"]) for msg in history)
+    protect_chars = _get_prune_protect_chars(model_id)
+    return total_chars >= protect_chars + PRUNE_MINIMUM_CHARS
+def _split_history(history: list[dict[str, str]], model_id: str = "default") -> tuple[list[dict[str, str]], list[dict[str, str]]]:
+    """Split history into old (to summarize) and recent (to keep intact).
+    Uses the same protection window as pruning.
+    """
+    protect_chars = _get_prune_protect_chars(model_id)
+    protected = 0
+    split_idx = len(history)
+    for i in range(len(history) - 1, -1, -1):
+        msg_len = len(history[i]["content"])
+        if protected + msg_len <= protect_chars:
+            protected += msg_len
+            split_idx = i
+        else:
+            break
+    return history[:split_idx], history[split_idx:]
+def build_compaction_prompt(
+    history: list[dict[str, str]],
+    plan_task: str | None = None,
+    model_id: str = "default",
+) -> str:
+    """Build the prompt sent to the compaction agent.
+    Only includes OLD messages (outside the protection window) for
+    summarization. Recent messages are kept intact by apply_compaction.
+    """
+    old_msgs, _ = _split_history(history, model_id)
-def build_compaction_prompt(history: list[dict[str, str]], plan_task: str | None = None) -> str:
-    """Build the prompt sent to the compaction agent to summarize the conversation."""
     parts = [COMPACTION_TEMPLATE, "\n\n---\n\n## Conversation to summarize:\n"]
     if plan_task:
         parts.append(f"**Active task:** {plan_task}\n\n")
-    for msg in history:
+    for msg in old_msgs:
         role = msg["role"].upper()
         content = msg["content"]
         # Cap individual messages in the compaction input to avoid blowing up
@@ -206,26 +279,22 @@ def build_compaction_prompt(history: list[dict[str, str]], plan_task: str | None
     return "".join(parts)
-def apply_compaction(history: list[dict[str, str]], summary: str) -> list[dict[str, str]]:
-    """Replace history with a compaction summary + the most recent exchange."""
+def apply_compaction(
+    history: list[dict[str, str]], summary: str, model_id: str = "default"
+) -> list[dict[str, str]]:
+    """Replace OLD messages with a summary, keep RECENT messages intact.
+    Uses the same protection window as pruning: recent messages within
+    the window are preserved as-is, older messages are replaced by a
+    compaction summary. This preserves the natural conversation flow.
+    """
+    _, recent = _split_history(history, model_id)
     compacted = [
         {"role": "user", "content": f"[Conversation compacted]\n\n{summary}"}
     ]
-    # Keep the last user message and last assistant message for continuity
-    last_user = None
-    last_assistant = None
-    for msg in reversed(history):
-        if msg["role"] == "user" and last_user is None:
-            last_user = msg
-        elif msg["role"] == "assistant" and last_assistant is None:
-            last_assistant = msg
-        if last_user and last_assistant:
-            break
-    if last_assistant:
-        compacted.append(last_assistant)
-    if last_user and last_user != compacted[0]:
-        compacted.append(last_user)
+    compacted.extend(recent)
     return compacted
@@ -234,6 +303,7 @@ async def compact_conversation(
     history: list[dict[str, str]],
     model_ref: str,
     plan_task: str | None = None,
+    model_id: str = "default",
 ) -> list[dict[str, str]]:
     """Run the compaction agent to summarize and replace history.
@@ -243,7 +313,7 @@ async def compact_conversation(
     from aru.runtime import get_ctx
     from aru.providers import create_model
-    prompt = build_compaction_prompt(history, plan_task)
+    prompt = build_compaction_prompt(history, plan_task, model_id=model_id)
     try:
         from agno.agent import Agent
@@ -263,12 +333,12 @@ async def compact_conversation(
             # Fallback: simple mechanical summary
             summary = _fallback_summary(history, plan_task)
-        return apply_compaction(history, summary)
+        return apply_compaction(history, summary, model_id=model_id)
     except Exception:
         # Fallback if agent fails
         summary = _fallback_summary(history, plan_task)
-        return apply_compaction(history, summary)
+        return apply_compaction(history, summary, model_id=model_id)
 def _fallback_summary(history: list[dict[str, str]], plan_task: str | None = None) -> str:

{aru_code-0.13.1 → aru_code-0.13.3}/aru/runner.py RENAMED Viewed

@@ -115,11 +115,25 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
             run_message = message
         # Build conversation history as real messages for the LLM
-        from aru.context import prune_history
+        # Compact BEFORE pruning: if the history is large enough that pruning
+        # would discard content, compact first to preserve context via summary
+        # instead of losing it to placeholders.
+        from aru.context import prune_history, should_compact, compact_conversation, would_prune
+        if session and session.history and not lightweight:
+            if would_prune(session.history, model_id=session.model_id):
+                try:
+                    session.history = await compact_conversation(
+                        session.history, session.model_ref, session.plan_task,
+                        model_id=session.model_id,
+                    )
+                    console.print("[dim]Context compacted to save tokens.[/dim]")
+                except Exception:
+                    pass
         history_messages: list[Message] = []
         if session and session.history and not lightweight:
             prior_history = session.history[:-1]
-            pruned = prune_history(prior_history)
+            pruned = prune_history(prior_history, model_id=session.model_id)
             for msg in pruned:
                 history_messages.append(Message(role=msg["role"], content=msg["content"], from_history=True))
@@ -228,11 +242,16 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
         if run_output and session and hasattr(run_output, "metrics"):
             session.track_tokens(run_output.metrics)
-            from aru.context import should_compact, compact_conversation
-            if should_compact(session.total_input_tokens, session.model_id):
+            # Reactive compaction: use per-run input_tokens (sum of all API
+            # calls within this arun) as a conservative proxy for context pressure.
+            # session.history doesn't include tool results, so char-based estimates
+            # would miss the bulk of the context sent to the model.
+            run_input_tokens = getattr(run_output.metrics, "input_tokens", 0) or 0
+            if should_compact(run_input_tokens, session.model_id):
                 try:
                     session.history = await compact_conversation(
-                        session.history, session.model_ref, session.plan_task
+                        session.history, session.model_ref, session.plan_task,
+                        model_id=session.model_id,
                     )
                     console.print("[dim]Context compacted to save tokens.[/dim]")
                 except Exception:

{aru_code-0.13.1 → aru_code-0.13.3}/aru/runtime.py RENAMED Viewed

@@ -117,6 +117,7 @@ class RuntimeContext:
     # -- MCP --
     mcp_catalog_text: str = ""
+    mcp_loaded_msg: str = ""
 # ── ContextVar plumbing ──────────────────────────────────────────────

{aru_code-0.13.1 → aru_code-0.13.3}/aru/tools/codebase.py RENAMED Viewed

@@ -1232,7 +1232,7 @@ async def load_mcp_tools(eager: bool = False):
         if eager:
             # Legacy: each MCP tool = one Agno Function (expensive)
             mcp_tools = manager.get_eager_tools()
-            get_ctx().console.print(f"[dim]Loaded {tool_count} tools from MCP servers (eager mode).[/dim]")
+            get_ctx().mcp_loaded_msg = f"Loaded {tool_count} tools from MCP servers (eager mode)."
             for t in mcp_tools:
                 ALL_TOOLS.append(t)
                 EXECUTOR_TOOLS.append(t)
@@ -1245,10 +1245,10 @@ async def load_mcp_tools(eager: bool = False):
             GENERAL_TOOLS.append(gateway)
             # Store catalog text for injection into system prompt
             get_ctx().mcp_catalog_text = manager.get_catalog_text()
-            get_ctx().console.print(f"[dim]Loaded {tool_count} tools from MCP servers.[/dim]")
+            get_ctx().mcp_loaded_msg = f"Loaded {tool_count} tools from MCP servers."
     except Exception as e:
-        get_ctx().console.print(f"[dim]Failed to load MCP tools: {e}[/dim]")
+        get_ctx().mcp_loaded_msg = f"Failed to load MCP tools: {e}"
 def _build_mcp_gateway(manager):

{aru_code-0.13.1 → aru_code-0.13.3/aru_code.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.13.1
+Version: 0.13.3
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

{aru_code-0.13.1 → aru_code-0.13.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "aru-code"
-version = "0.13.1"
+version = "0.13.3"
 description = "A Claude Code clone built with Agno agents"
 readme = "README.md"
 license = "MIT"