PyPI - aru-code - Versions diffs - 0.13.3__tar.gz → 0.14.1__tar.gz - Mend

aru-code 0.13.3tar.gz → 0.14.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{aru_code-0.13.3/aru_code.egg-info → aru_code-0.14.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.13.3
+Version: 0.14.1
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

aru_code-0.14.1/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.14.1"

{aru_code-0.13.3 → aru_code-0.14.1}/aru/agents/base.py RENAMED Viewed

@@ -3,9 +3,26 @@
 # Common rules shared across all agents (planner, executor, general).
 # Each agent appends its role-specific instructions to this base.
 BASE_INSTRUCTIONS = """\
-Be concise and direct. Focus on doing the work, not explaining what you'll do.
+## Output rules — CRITICAL for token efficiency
+Minimize output tokens. Your responses should be fewer than 4 lines unless the user \
+asks for detail or you are writing code. One word answers are best when they suffice.
+Do NOT add unnecessary preamble or postamble. Avoid introductions, conclusions, \
+and explanations of what you will do or just did. Do not add code explanation \
+summaries unless the user requests them. Only address the specific query or task at hand.
 NEVER write narration before calling tools. Do NOT say "I will analyze...", "Let me check...", \
 "Now I will...", or any similar preamble. Call the tool immediately and silently.
+Examples of ideal responses:
+- user: "2 + 2" → assistant: "4"
+- user: "is 11 prime?" → assistant: "Yes"
+- user: "what command lists files?" → assistant: "ls"
+- user: "fix the typo in line 5" → [call edit_file immediately, no narration]
+## Scope rules
 NEVER create documentation files (*.md) unless the user explicitly asks for them.
 Focus on writing working code, not documentation.
 Deliver EXACTLY what was asked — no more, no less. \

{aru_code-0.13.3 → aru_code-0.14.1}/aru/context.py RENAMED Viewed

@@ -11,24 +11,31 @@ from __future__ import annotations
 # ── Constants ──────────────────────────────────────────────────────
 # Pruning: minimum chars that must be freeable to justify a prune pass
-PRUNE_MINIMUM_CHARS = 20_000  # ~5.7K tokens
+PRUNE_MINIMUM_CHARS = 12_000  # ~3K tokens (lower = prune sooner)
 # Placeholder that replaces evicted content
 PRUNED_PLACEHOLDER = "[previous output cleared to save context]"
 # User messages larger than this threshold are truncated when outside protection window
 PRUNE_USER_MSG_THRESHOLD = 2_000  # ~570 tokens — catches @file mentions
 # How many chars to keep from the start of a pruned user message
 PRUNE_USER_MSG_KEEP = 500  # ~140 tokens — enough to understand the request
+# Minimum number of recent user turns always protected (regardless of char budget)
+PRUNE_PROTECT_TURNS = 2
+# Tool result markers that should never be pruned (critical context)
+PRUNE_PROTECTED_MARKERS = {"[SubAgent-", "delegate_task"}
 # Truncation: universal limits for any tool output
-TRUNCATE_MAX_LINES = 500
-TRUNCATE_MAX_BYTES = 20 * 1024  # 20 KB
-TRUNCATE_KEEP_START = 350  # lines to keep from the start
-TRUNCATE_KEEP_END = 100  # lines to keep from the end
+TRUNCATE_MAX_LINES = 300
+TRUNCATE_MAX_BYTES = 15 * 1024  # 15 KB (was 20KB — tighter to prevent context bloat)
+TRUNCATE_KEEP_START = 200  # lines to keep from the start
+TRUNCATE_KEEP_END = 60  # lines to keep from the end
+TRUNCATE_MAX_LINE_LENGTH = 2000  # chars per individual line (prevents minified files)
 # Compaction: trigger when per-run input tokens exceed this fraction of model limit
-COMPACTION_THRESHOLD_RATIO = 0.85
+COMPACTION_THRESHOLD_RATIO = 0.70  # was 0.85 — compact earlier to avoid hitting limits
 # Compaction: target post-compaction size as fraction of model context limit
 COMPACTION_TARGET_RATIO = 0.15
+# Compaction: reserve buffer for the compaction process itself (like OpenCode's 20K)
+COMPACTION_BUFFER_TOKENS = 20_000
 # Default model context limits (input tokens)
 MODEL_CONTEXT_LIMITS: dict[str, int] = {
     # Anthropic
@@ -70,15 +77,26 @@ MODEL_CONTEXT_LIMITS: dict[str, int] = {
 }
 COMPACTION_TEMPLATE = """\
-Summarize this conversation concisely. Preserve:
-1. **Goal**: What the user wants to accomplish
-2. **Key decisions**: Important choices made during the conversation
-3. **Discoveries**: What was learned about the codebase or problem
-4. **Accomplished**: What has been done so far (be specific about files changed)
-5. **Relevant files**: File paths that are important for continuing the work
-6. **Next steps**: What remains to be done
+Summarize this conversation into the EXACT sections below. Be concise but complete — \
+this summary replaces the full conversation history. Output ONLY these sections:
-Be concise but complete. This summary replaces the full conversation history."""
+## Goal
+What the user is trying to accomplish (1-2 sentences).
+## Instructions
+Important instructions or preferences the user stated (bullet list). \
+If none, write "None stated."
+## Discoveries
+Notable things learned about the codebase, bugs, or architecture (bullet list). \
+If none, write "None."
+## Accomplished
+What was done so far — be specific about files created/changed and functions added/modified. \
+List what is in progress and what remains (bullet list).
+## Relevant files / directories
+Structured list of file paths relevant to continuing the work (one per line)."""
 # ── Layer 1: Pruning ──────────────────────────────────────────────
@@ -87,13 +105,13 @@ def _get_prune_protect_chars(model_id: str = "default") -> int:
     """Scale protection window based on model context size.
     Larger models get more protection; smaller models prune more aggressively
-    to delay compaction. Returns ~10% of the model's context in chars (~3.5 chars/token).
+    to prevent context overflow. Returns ~7% of the model's context in chars.
     """
     limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
-    # ~3.5 chars per token, protect ~10% of context
-    protect = int(limit * 0.10 * 3.5)
-    # Clamp between 20K (minimum usable) and 80K (diminishing returns)
-    return max(20_000, min(protect, 80_000))
+    # ~4 chars per token, protect ~7% of context (was 10% — tighter budget)
+    protect = int(limit * 0.07 * 4)
+    # Clamp between 15K (minimum usable) and 60K (diminishing returns)
+    return max(15_000, min(protect, 60_000))
 def prune_history(
@@ -104,9 +122,14 @@ def prune_history(
     Walks backward through history, protecting the most recent content
     (scaled to the model's context size). Older messages beyond that
     budget are pruned:
-    - Assistant messages: replaced entirely with placeholder
+    - Assistant messages: replaced entirely with placeholder (unless protected)
     - User messages over PRUNE_USER_MSG_THRESHOLD: truncated to first N chars
+    Protection layers:
+    1. Turn-based: last PRUNE_PROTECT_TURNS user turns always kept
+    2. Char-based: recent content within the protection window
+    3. Content-based: messages containing PRUNE_PROTECTED_MARKERS never pruned
     Returns a new list (does not mutate the input).
     """
     if len(history) <= 2:
@@ -121,6 +144,18 @@ def prune_history(
     if total_chars < protect_chars + PRUNE_MINIMUM_CHARS:
         return list(history)
+    # Identify indices of last N user turns (always protected)
+    turn_protected: set[int] = set()
+    user_turns_seen = 0
+    for i in range(len(history) - 1, -1, -1):
+        if history[i]["role"] == "user":
+            user_turns_seen += 1
+            if user_turns_seen <= PRUNE_PROTECT_TURNS:
+                turn_protected.add(i)
+                # Also protect the assistant response right after this user turn
+                if i + 1 < len(history):
+                    turn_protected.add(i + 1)
     # Walk backward, protecting recent content
     result = list(history)
     protected = 0
@@ -129,10 +164,20 @@ def prune_history(
         msg = result[i]
         msg_len = len(msg["content"])
+        # Turn-based protection: never prune last N user turns
+        if i in turn_protected:
+            protected += msg_len
+            continue
         if protected + msg_len <= protect_chars:
             # Still within protection window
             protected += msg_len
         else:
+            # Check protected markers before pruning
+            if any(marker in msg["content"] for marker in PRUNE_PROTECTED_MARKERS):
+                protected += msg_len
+                continue
             # Beyond protection window — prune
             if msg["role"] == "assistant":
                 if msg["content"] != PRUNED_PLACEHOLDER:
@@ -147,11 +192,36 @@ def prune_history(
 # ── Layer 2: Truncation ───────────────────────────────────────────
+def _truncate_long_lines(lines: list[str]) -> list[str]:
+    """Truncate individual lines that exceed MAX_LINE_LENGTH.
+    Prevents minified JS/CSS or log lines from consuming massive tokens.
+    """
+    result = []
+    for line in lines:
+        if len(line) > TRUNCATE_MAX_LINE_LENGTH:
+            result.append(
+                line[:TRUNCATE_MAX_LINE_LENGTH]
+                + f"... (line truncated to {TRUNCATE_MAX_LINE_LENGTH} chars)\n"
+            )
+        else:
+            result.append(line)
+    return result
+_TRUNCATION_HINT = (
+    "\n[Hint: Use grep_search to find specific content, or read_file with "
+    "start_line/end_line for incremental reading. "
+    "For large exploration tasks, use delegate_task to keep your context clean.]"
+)
 def truncate_output(text: str) -> str:
     """Universal truncation for tool outputs.
     Caps output at TRUNCATE_MAX_BYTES / TRUNCATE_MAX_LINES, keeping the
     start and end with a middle marker showing what was cut.
+    Also truncates individual lines exceeding TRUNCATE_MAX_LINE_LENGTH.
     """
     if not text:
         return text
@@ -161,8 +231,11 @@ def truncate_output(text: str) -> str:
     lines = text.splitlines(keepends=True)
     line_count = len(lines)
+    # Truncate individual long lines first
+    lines = _truncate_long_lines(lines)
     if byte_len <= TRUNCATE_MAX_BYTES and line_count <= TRUNCATE_MAX_LINES:
-        return text
+        return "".join(lines)
     # Truncate by lines
     if line_count > TRUNCATE_MAX_LINES:
@@ -171,8 +244,8 @@ def truncate_output(text: str) -> str:
         omitted = line_count - TRUNCATE_KEEP_START - TRUNCATE_KEEP_END
         return (
             "".join(head)
-            + f"\n\n[... {omitted:,} lines omitted ({line_count:,} total) — "
-            f"use offset/limit or a more specific query ...]\n\n"
+            + f"\n\n[... {omitted:,} lines omitted ({line_count:,} total)]"
+            + _TRUNCATION_HINT + "\n\n"
             + "".join(tail)
         )
@@ -190,7 +263,8 @@ def truncate_output(text: str) -> str:
     return (
         "".join(kept_lines)
         + f"\n\n[... truncated at ~{TRUNCATE_MAX_BYTES // 1024}KB — "
-        f"{remaining:,} more lines — use offset/limit to read further ...]\n"
+        f"{remaining:,} more lines]"
+        + _TRUNCATION_HINT + "\n"
     )
@@ -206,7 +280,10 @@ def should_compact(
     history_or_tokens: int | list[dict[str, str]],
     model_id: str = "default",
 ) -> bool:
-    """Check if the conversation should be compacted (reactive, post-run).
+    """Check if the conversation should be compacted.
+    Uses OpenCode's approach: usable = model_limit - buffer, then
+    trigger when tokens >= usable * threshold_ratio.
     Accepts either an estimated token count (int) or the history list
     (from which tokens are estimated via char count).
@@ -216,7 +293,8 @@ def should_compact(
     else:
         tokens = history_or_tokens
     limit = MODEL_CONTEXT_LIMITS.get(model_id, MODEL_CONTEXT_LIMITS["default"])
-    threshold = int(limit * COMPACTION_THRESHOLD_RATIO)
+    usable = limit - COMPACTION_BUFFER_TOKENS
+    threshold = int(usable * COMPACTION_THRESHOLD_RATIO)
     return tokens >= threshold
@@ -287,7 +365,7 @@ def apply_compaction(
     Uses the same protection window as pruning: recent messages within
     the window are preserved as-is, older messages are replaced by a
-    compaction summary. This preserves the natural conversation flow.
+    compaction summary. Replays the last user message to maintain continuity.
     """
     _, recent = _split_history(history, model_id)
@@ -296,6 +374,21 @@ def apply_compaction(
     ]
     compacted.extend(recent)
+    # Replay: ensure the last message is from the user so the LLM continues naturally
+    if not compacted or compacted[-1]["role"] != "user":
+        # Find last user message in original history for replay
+        last_user = None
+        for msg in reversed(history):
+            if msg["role"] == "user":
+                last_user = msg["content"]
+                break
+        if last_user:
+            # Truncate replayed message to avoid re-bloating context
+            replay = last_user[:1000] if len(last_user) > 1000 else last_user
+            compacted.append({"role": "user", "content": replay})
+        else:
+            compacted.append({"role": "user", "content": "Continue if you have next steps, or stop and ask for clarification."})
     return compacted

{aru_code-0.13.3 → aru_code-0.14.1}/aru/runner.py RENAMED Viewed

@@ -249,12 +249,15 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
             run_input_tokens = getattr(run_output.metrics, "input_tokens", 0) or 0
             if should_compact(run_input_tokens, session.model_id):
                 try:
+                    # Always prune first to shrink history before compaction
+                    session.history = prune_history(session.history, model_id=session.model_id)
                     session.history = await compact_conversation(
                         session.history, session.model_ref, session.plan_task,
                         model_id=session.model_id,
                     )
                     console.print("[dim]Context compacted to save tokens.[/dim]")
                 except Exception:
+                    # Even if compaction fails, keep the pruned history
                     pass
         final_content = accumulated or final_content

{aru_code-0.13.3 → aru_code-0.14.1}/aru/tools/codebase.py RENAMED Viewed

@@ -54,23 +54,23 @@ def _format_diff(old_string: str, new_string: str) -> Group:
-# Hard ceiling per tool result (~15K tokens). Even max_size=0 respects this per chunk.
-_READ_HARD_CAP = 60_000  # bytes
+# Hard ceiling per tool result (~10K tokens). Even max_size=0 respects this per chunk.
+_READ_HARD_CAP = 40_000  # bytes (was 60K — tighter to protect context)
 def clear_read_cache():
     """Clear the read cache. Call after file mutations to avoid stale data."""
     get_ctx().read_cache.clear()
-def read_file(file_path: str, start_line: int = 0, end_line: int = 0, max_size: int = 15_000) -> str:
+def read_file(file_path: str, start_line: int = 0, end_line: int = 0, max_size: int = 12_000) -> str:
     """Read file contents. Returns chunked output for large files.
     Args:
         file_path: Path to the file (absolute or relative).
         start_line: First line (1-indexed, inclusive). 0 = beginning.
         end_line: Last line (1-indexed, inclusive). 0 = end.
-        max_size: Max bytes before truncation. Default 15KB.
-            Set to 0 to read the full file in chunks — each chunk up to ~60KB.
+        max_size: Max bytes before truncation. Default 12KB.
+            Set to 0 to read the full file in chunks — each chunk up to ~40KB.
             The first chunk includes a continuation hint so you can call again
             with start_line to get the next chunk.
     """
@@ -204,7 +204,7 @@ async def read_file_smart(file_path: str, query: str) -> str:
         query: The specific question you want answered about this file.
     """
     # Read raw content first (reuse existing read_file logic)
-    raw = read_file(file_path, max_size=20_000)
+    raw = read_file(file_path, max_size=15_000)
     if raw.startswith("Error:"):
         return raw
@@ -321,6 +321,35 @@ def write_files(file_list: list[dict]) -> str:
     return "\n".join(parts) or "No files to write."
+def _compact_diff(old_string: str, new_string: str, file_path: str = "") -> str:
+    """Generate a compact unified diff string for the LLM context.
+    Returns only the changed lines (not the full file), saving tokens while
+    giving the LLM enough context to continue working.
+    """
+    old_lines = old_string.splitlines(keepends=True)
+    new_lines = new_string.splitlines(keepends=True)
+    # Ensure trailing newlines for clean diff
+    if old_lines and not old_lines[-1].endswith("\n"):
+        old_lines[-1] += "\n"
+    if new_lines and not new_lines[-1].endswith("\n"):
+        new_lines[-1] += "\n"
+    import difflib
+    diff_lines = list(difflib.unified_diff(
+        old_lines, new_lines,
+        fromfile=file_path, tofile=file_path,
+        lineterm="",
+    ))
+    if not diff_lines:
+        return ""
+    # Cap diff output to avoid huge diffs bloating context
+    MAX_DIFF_LINES = 40
+    if len(diff_lines) > MAX_DIFF_LINES:
+        return "\n".join(diff_lines[:MAX_DIFF_LINES]) + f"\n... ({len(diff_lines) - MAX_DIFF_LINES} more diff lines)"
+    return "\n".join(diff_lines)
 def edit_file(file_path: str, old_string: str, new_string: str) -> str:
     """Replace an exact string in a file. The old_string must appear exactly once.
@@ -347,7 +376,12 @@ def edit_file(file_path: str, old_string: str, new_string: str) -> str:
         with open(file_path, "w", encoding="utf-8") as f:
             f.write(new_content)
         _notify_file_mutation()
-        return f"Successfully edited {file_path}"
+        # Return compact diff instead of just success message
+        diff_text = _compact_diff(old_string, new_string, file_path)
+        if diff_text:
+            return f"Edited {file_path}\n{diff_text}"
+        return f"Edited {file_path}"
     except FileNotFoundError:
         return f"Error: File not found: {file_path}"
     except Exception as e:
@@ -423,7 +457,16 @@ def edit_files(edits: list[dict]) -> str:
     if results:
         _notify_file_mutation()
         unique = list(dict.fromkeys(results))  # preserve order, dedupe
-        parts.append(f"Successfully applied {len(results)} edits across {len(unique)} files: {', '.join(unique)}")
+        parts.append(f"Applied {len(results)} edits across {len(unique)} files: {', '.join(unique)}")
+        # Append compact diffs for each edit
+        for entry in edits:
+            old = entry.get("old_string", "")
+            new = entry.get("new_string", "")
+            path = entry.get("path", "")
+            if old and path in written:
+                diff_text = _compact_diff(old, new, path)
+                if diff_text:
+                    parts.append(diff_text)
     if errors:
         parts.append("\n".join(errors))
     return "\n".join(parts) or "No edits to apply."

{aru_code-0.13.3 → aru_code-0.14.1/aru_code.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.13.3
+Version: 0.14.1
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

{aru_code-0.13.3 → aru_code-0.14.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "aru-code"
-version = "0.13.3"
+version = "0.14.1"
 description = "A Claude Code clone built with Agno agents"
 readme = "README.md"
 license = "MIT"

{aru_code-0.13.3 → aru_code-0.14.1}/tests/test_codebase.py RENAMED Viewed

@@ -209,7 +209,7 @@ def test_edit_file_basic(tmp_path):
     finally:
         set_skip_permissions(False)
-    assert "Successfully edited" in result
+    assert "Edited" in result
     assert f.read_text() == "def hello():\n    return 'earth'\n"
@@ -234,7 +234,7 @@ def test_edit_file_search_replace(tmp_path):
     finally:
         set_skip_permissions(False)
-    assert "Successfully edited" in result
+    assert "Edited" in result
     updated = f.read_text()
     assert "DB_HOST = 'production.example.com'" in updated
     assert "DB_PORT = 5433" in updated
@@ -427,7 +427,7 @@ class TestEditFiles:
         finally:
             set_skip_permissions(False)
-        assert "Successfully" in result
+        assert "Applied" in result or "Edited" in result
         assert f1.read_text() == "alpha = 10"
         assert f2.read_text() == "beta = 20"
@@ -446,7 +446,7 @@ class TestEditFiles:
         finally:
             set_skip_permissions(False)
-        assert "Successfully" in result
+        assert "Applied" in result or "Edited" in result
         content = f.read_text()
         assert "HOST = 'prod.example.com'" in content
         assert "PORT = 8080" in content
@@ -501,7 +501,7 @@ class TestEditFiles:
         finally:
             set_skip_permissions(False)
-        assert "Successfully" in result
+        assert "Applied" in result or "Edited" in result
         content_a = f1.read_text()
         assert "import logging" in content_a