PyPI - aru-code - Versions diffs - 0.53.0__tar.gz → 0.54.0__tar.gz - Mend

aru-code 0.53.0tar.gz → 0.54.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

{aru_code-0.53.0/aru_code.egg-info → aru_code-0.54.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.53.0
+Version: 0.54.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

aru_code-0.54.0/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.54.0"

{aru_code-0.53.0 → aru_code-0.54.0}/aru/cache_patch.py RENAMED Viewed

@@ -23,6 +23,9 @@ regardless of which provider is used.
 from __future__ import annotations
+import os as _os
+import time as _time
 # Token-budget pruning (aligned with OpenCode's strategy):
 # - Protect recent tool results within a token budget
 # - Only prune if there's enough to free (avoid churn)
@@ -43,6 +46,22 @@ _last_call_cache_write: int = 0
 # We normalize "length" → "max_tokens" so callers can check a single value.
 _last_call_stop_reason: str | None = None
+# Per-call observability ring buffer. Each accumulate_model_metrics fire
+# appends one record; the ring caps at _CALL_HISTORY_MAX so a long-running
+# session doesn't grow unbounded. Surfaced via /calls so users can see
+# *which* models / model_types / call sites produced each request — the
+# canonical "why are there N api_calls?" diagnosis surface.
+_CALL_HISTORY_MAX = 200
+_call_history: list[dict] = []
+# Pending request metadata captured by the request-side patch right before
+# the provider call goes out. Read by ``_patched_accumulate`` after the
+# response lands and merged into the matching call_history record so /calls
+# shows both the response usage AND a summary of what was sent. Single-
+# slot global is OK: aru runs requests sequentially per ctx, and the patch
+# captures-then-clears synchronously around each invocation.
+_pending_request_meta: dict | None = None
 # Micro-compaction metrics (process-wide, reset by tests via
 # reset_microcompact_stats()). Recorded by _prune_tool_messages every time it
 # fires from the format_function_call_results patch. Surfaced in /cost so
@@ -105,6 +124,92 @@ def reset_last_stop_reason() -> None:
     _last_call_stop_reason = None
+def _summarize_request(messages, tools=None) -> dict:
+    """Build a compact summary of an outgoing request for /calls.
+    We deliberately don't store full message bodies — a single tool result
+    can be tens of KB and a long session would balloon memory. We keep:
+      * count of messages and per-role tally
+      * total chars across messages (proxy for prompt size)
+      * snippet of the first message (usually system prompt) and the last
+        message (usually the freshest user/tool turn — what the model is
+        responding to)
+      * snippet of the most recent ``user`` message specifically
+      * tool count
+    Snippets are capped at 240 chars. Enough to identify the call without
+    storing PII-heavy or token-heavy bodies.
+    """
+    out = {
+        "n_messages": 0,
+        "roles": {},
+        "total_chars": 0,
+        "first_snippet": "",
+        "last_snippet": "",
+        "last_user_snippet": "",
+        "n_tools": 0,
+    }
+    try:
+        msgs = list(messages or [])
+        out["n_messages"] = len(msgs)
+        out["n_tools"] = len(tools or [])
+        last_user = ""
+        for i, m in enumerate(msgs):
+            role = (getattr(m, "role", None) or "?")
+            out["roles"][role] = out["roles"].get(role, 0) + 1
+            content = getattr(m, "content", None)
+            if content is None:
+                content = getattr(m, "text", "")
+            if not isinstance(content, str):
+                try:
+                    content = str(content)
+                except Exception:
+                    content = ""
+            out["total_chars"] += len(content)
+            if i == 0:
+                out["first_snippet"] = content[:240]
+            if role == "user":
+                last_user = content[:240]
+        if msgs:
+            last = msgs[-1]
+            lc = getattr(last, "content", None) or getattr(last, "text", "")
+            if not isinstance(lc, str):
+                try:
+                    lc = str(lc)
+                except Exception:
+                    lc = ""
+            out["last_snippet"] = lc[:240]
+        out["last_user_snippet"] = last_user
+    except Exception:
+        pass
+    return out
+def _capture_request_meta(messages, tools=None) -> None:
+    """Stash a request summary into the pending slot for the next accumulate."""
+    global _pending_request_meta
+    _pending_request_meta = _summarize_request(messages, tools)
+def get_call_history() -> list[dict]:
+    """Return a copy of the per-API-call ring buffer.
+    Each entry: ``{n, model_type, model_id, provider, input_tokens,
+    output_tokens, cache_read, cache_write, stop_reason, caller, ts}``.
+    ``input_tokens`` is the *normalized* value (cache stripped for OpenAI-
+    style providers). ``caller`` is the agno file:line that invoked
+    accumulate_model_metrics — useful for distinguishing main-model calls
+    from parser/output-model/memory/recovery calls.
+    """
+    return list(_call_history)
+def reset_call_history() -> None:
+    """Clear the call ring buffer. Useful at session start or in tests."""
+    _call_history.clear()
 def get_microcompact_stats() -> dict:
     """Return process-wide micro-compaction metrics.
@@ -317,6 +422,72 @@ def _prune_tool_messages(messages):
 _PATCH_APPLIED = False
+def _patch_request_capture():
+    """Wrap the agno methods that receive ``messages`` right before the
+    provider HTTP call so /calls can show what was actually sent.
+    We hook the four ``Model._{a,}invoke{_stream,}_with_retry`` methods
+    on ``agno.models.base.Model`` — these are the chokepoint each subclass
+    flows through (sync/async × stream/non-stream). Each wrapper takes a
+    cheap snapshot of ``kwargs["messages"]`` into ``_pending_request_meta``
+    immediately before delegating to the original. ``_patched_accumulate``
+    then reads-and-clears that slot when the matching response lands.
+    The wrappers are best-effort: any exception during snapshotting is
+    swallowed so we never break the actual model call. Stream wrappers
+    must remain async generators (``async for ... yield``) — collecting
+    the stream first would defeat streaming.
+    """
+    try:
+        from agno.models.base import Model
+    except ImportError:
+        return
+    _orig_invoke = Model._invoke_with_retry
+    _orig_ainvoke = Model._ainvoke_with_retry
+    _orig_invoke_stream = Model._invoke_stream_with_retry
+    _orig_ainvoke_stream = Model._ainvoke_stream_with_retry
+    def _wrap_invoke(self, **kwargs):
+        try:
+            _capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
+        except Exception:
+            pass
+        return _orig_invoke(self, **kwargs)
+    async def _wrap_ainvoke(self, **kwargs):
+        try:
+            _capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
+        except Exception:
+            pass
+        return await _orig_ainvoke(self, **kwargs)
+    def _wrap_invoke_stream(self, **kwargs):
+        try:
+            _capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
+        except Exception:
+            pass
+        # _invoke_stream_with_retry returns an Iterator (sync generator)
+        return _orig_invoke_stream(self, **kwargs)
+    async def _wrap_ainvoke_stream(self, **kwargs):
+        try:
+            _capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
+        except Exception:
+            pass
+        # _ainvoke_stream_with_retry is an async generator — we must
+        # re-yield rather than return it (returning an async generator
+        # from an async def function wraps it in a coroutine that yields
+        # the generator object, which the caller would not iterate).
+        async for chunk in _orig_ainvoke_stream(self, **kwargs):
+            yield chunk
+    Model._invoke_with_retry = _wrap_invoke
+    Model._ainvoke_with_retry = _wrap_ainvoke
+    Model._invoke_stream_with_retry = _wrap_invoke_stream
+    Model._ainvoke_stream_with_retry = _wrap_ainvoke_stream
 def apply_cache_patch():
     """Apply all patches to reduce Agno's token consumption.
@@ -334,6 +505,7 @@ def apply_cache_patch():
     _patch_per_call_metrics()
     _patch_stop_reason_capture()
     _patch_overflow_recovery()
+    _patch_request_capture()
     _PATCH_APPLIED = True
@@ -515,6 +687,10 @@ def _publish_live_metrics(
         session.total_output_tokens += output_tokens
         session.total_cache_read_tokens += cache_read
         session.total_cache_write_tokens += cache_write
+        # Count real API requests (one per accumulate call). track_tokens
+        # used to do this at turn-end (++1), which collapsed multi-tool
+        # turns — a turn with N tool calls = N+1 requests but counted as 1.
+        session.api_calls = (getattr(session, "api_calls", 0) or 0) + 1
         session._live_input_added = (
             getattr(session, "_live_input_added", 0) + input_tokens
         )
@@ -587,6 +763,16 @@ def _patch_per_call_metrics():
         global _last_call_input_tokens, _last_call_output_tokens
         global _last_call_cache_read, _last_call_cache_write
         usage = getattr(model_response, "response_usage", None)
+        # Capture the call site (agno file:line that invoked accumulate)
+        # cheaply — only when there's a usage object worth recording.
+        _caller_str = ""
+        if usage is not None:
+            try:
+                import sys as _sys
+                _frame = _sys._getframe(1)
+                _caller_str = f"{_os.path.basename(_frame.f_code.co_filename)}:{_frame.f_lineno}"
+            except Exception:
+                _caller_str = "?"
         if usage is not None:
             input_tokens = getattr(usage, "input_tokens", 0) or 0
             output_tokens = getattr(usage, "output_tokens", 0) or 0
@@ -603,12 +789,59 @@ def _patch_per_call_metrics():
             is_anthropic = "anthropic" in (provider_name or "").lower()
             if not is_anthropic and cache_read and input_tokens >= cache_read:
                 input_tokens -= cache_read
+                # Mutate the shared usage object so the downstream
+                # ``_original_accumulate`` writes the *normalized* value
+                # into Agno's RunMetrics. Without this, RunMetrics keeps
+                # the raw (cache-inclusive) input while ``_last_call_*``
+                # and the live publish hold the normalized one, and
+                # ``Session.track_tokens`` reconciliation re-adds the
+                # cached portion as a fake "missing delta" — exactly the
+                # cumulative-vs-last asymmetry users see in /cost.
+                try:
+                    usage.input_tokens = input_tokens
+                except (AttributeError, TypeError):
+                    pass
             _last_call_input_tokens = input_tokens
             _last_call_output_tokens = output_tokens
             _last_call_cache_read = cache_read
             _last_call_cache_write = cache_write
+            # Per-call observability: append to the ring buffer so /calls
+            # can show breakdown by model_type (MODEL vs PARSER_MODEL vs
+            # MEMORY_MODEL etc.) and call site. Bounded to _CALL_HISTORY_MAX
+            # so a long session doesn't grow unbounded.
+            _model_id = ""
+            try:
+                _model_id = getattr(model, "id", "") or ""
+            except Exception:
+                pass
+            _mt_str = (
+                model_type.value
+                if hasattr(model_type, "value")
+                else str(model_type)
+            )
+            global _pending_request_meta
+            _req_meta = _pending_request_meta or {}
+            _pending_request_meta = None
+            _call_history.append({
+                "n": len(_call_history) + 1,
+                "model_type": _mt_str,
+                "model_id": _model_id,
+                "provider": provider_name or "",
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cache_read": cache_read,
+                "cache_write": cache_write,
+                "stop_reason": _last_call_stop_reason,
+                "caller": _caller_str,
+                "ts": _time.time(),
+                "request": _req_meta,
+            })
+            if len(_call_history) > _CALL_HISTORY_MAX:
+                # Keep the most recent N — drop from the front.
+                del _call_history[: len(_call_history) - _CALL_HISTORY_MAX]
             # Intra-turn live session update + bus publish. Gated to the
             # primary agent (subagent_depth == 0) so subagent API calls
             # don't double-count — delegate_task adds subagent totals in

{aru_code-0.53.0 → aru_code-0.54.0}/aru/cli.py RENAMED Viewed

@@ -761,6 +761,15 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
             ))
             continue
+        if user_input.lower() == "/calls":
+            console.print(Panel(
+                session.calls_summary,
+                title="[bold]Per-API-Call Breakdown[/bold]",
+                border_style="cyan",
+                padding=(1, 2),
+            ))
+            continue
         if user_input.lower() == "/subagents":
             from aru.commands import handle_subagents_command
             handle_subagents_command(session)

{aru_code-0.53.0 → aru_code-0.54.0}/aru/commands.py RENAMED Viewed

@@ -31,6 +31,7 @@ SLASH_COMMANDS = [
     ("/debug", "Debug utilities (plugin-errors)", "/debug <subcommand>"),
     ("/undo", "Undo last turn — restore files and/or conversation", "/undo"),
     ("/cost", "Show detailed token usage and cost", "/cost"),
+    ("/calls", "Show per-API-call breakdown (model_type, tokens, stop_reason, caller)", "/calls"),
     ("/yolo", "Toggle DANGEROUSLY skip all permissions (YOLO mode)", "/yolo"),
     ("/quit", "Exit aru", "/quit"),
 ]

{aru_code-0.53.0 → aru_code-0.54.0}/aru/runner.py RENAMED Viewed

@@ -9,6 +9,7 @@ from dataclasses import dataclass, field
 from rich.markdown import Markdown
 from aru.display import console
+from aru.session import Session
 # Categories of tools that modify files (for highlighting in history)
@@ -640,18 +641,30 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
         })
         # Tier 2 #4: auto-memory extraction (opt-in, fire-and-forget).
+        # ``turn_tokens`` here is the size of the *exchange* (user message +
+        # assistant reply) — NOT the API call's prompt size. Earlier this
+        # used ``last_input_tokens + last_output_tokens``, but
+        # ``last_input_tokens`` includes the entire system prompt (~8K on
+        # aru with 30+ tools), so ``min_turn_tokens=500`` always tripped
+        # even on "Olá"/"ok"-style turns and the extractor fired every
+        # turn — burning the curator budget on nothing. Estimating from
+        # user+assistant char length matches the docstring intent
+        # ("trivial turns 'ok'/'thanks' don't trigger").
         try:
             from aru.memory.extractor import schedule_extraction_task
             from aru.runtime import get_ctx as _get_ctx
             _cfg = getattr(_get_ctx(), "config", None)
             _cfg_memory = getattr(_cfg, "memory", None) or {}
             _project_root = getattr(session, "project_root", None) or os.getcwd()
+            _exchange_tokens = Session.estimate_tokens(
+                (run_message or "") + (final_content or "")
+            )
             schedule_extraction_task(
                 project_root=_project_root,
                 user_msg=run_message or "",
                 assistant_msg=final_content or "",
                 config_memory=_cfg_memory,
-                turn_tokens=_turn_tokens_in + _turn_tokens_out,
+                turn_tokens=_exchange_tokens,
             )
         except Exception:
             pass  # extractor guards internally; swallow any unexpected raise

{aru_code-0.53.0 → aru_code-0.54.0}/aru/session.py RENAMED Viewed

@@ -540,7 +540,11 @@ class Session:
         self.total_cache_write_tokens += max(
             0, agno_cw - self._live_cache_write_added
         )
-        self.api_calls += 1
+        # api_calls is no longer bumped here — it's incremented per real
+        # API request inside cache_patch._publish_live_metrics, which fires
+        # once per accumulate_model_metrics call. Bumping again here would
+        # double-count. Subagent runs (which skip live publish) bump
+        # api_calls themselves in delegate_task at sub-run completion.
         self.reset_live_token_counters()
         # Capture last API call's context window (set by cache_patch)
         try:
@@ -626,8 +630,20 @@ class Session:
     @property
     def cost_summary(self) -> str:
-        """Detailed cost breakdown for /cost command."""
-        total = self.total_input_tokens + self.total_output_tokens
+        """Detailed cost breakdown for /cost command.
+        Mirrors OpenCode: a single running session total — input, output,
+        and cache buckets shown side-by-side and summed into one ``total``.
+        Cache is normalized non-overlapping with input by ``cache_patch``,
+        so ``input + output + cache_read + cache_write`` is the true
+        chargeable token volume.
+        """
+        total = (
+            self.total_input_tokens
+            + self.total_output_tokens
+            + self.total_cache_read_tokens
+            + self.total_cache_write_tokens
+        )
         if total == 0:
             return "No token usage yet."
         cost = self.estimated_cost
@@ -635,7 +651,7 @@ class Session:
         lines = [
             f"Session cost: {cost_str}",
             f"",
-            f"Cumulative tokens:",
+            f"Session tokens:",
             f"  input:       {self.total_input_tokens:,}",
             f"  output:      {self.total_output_tokens:,}",
         ]
@@ -645,8 +661,13 @@ class Session:
             lines.append(f"  cache_write: {self.total_cache_write_tokens:,}")
         lines.append(f"  total:       {total:,}")
         lines.append(f"  api calls:   {self.api_calls}")
-        if self.last_input_tokens > 0:
-            ctx_total = self.last_input_tokens + self.last_output_tokens + self.last_cache_read + self.last_cache_write
+        if self.last_input_tokens > 0 or self.last_cache_read > 0:
+            ctx_total = (
+                self.last_input_tokens
+                + self.last_output_tokens
+                + self.last_cache_read
+                + self.last_cache_write
+            )
             lines.append(f"")
             lines.append(f"Last context window: {ctx_total:,}")
             lines.append(f"  input:       {self.last_input_tokens:,}")
@@ -679,6 +700,84 @@ class Session:
             pass
         return "\n".join(lines)
+    @property
+    def calls_summary(self) -> str:
+        """Per-API-call breakdown — answers "why are there N api_calls?".
+        Pulls from ``cache_patch._call_history`` (the ring buffer that
+        records every fire of ``accumulate_model_metrics``). Each row
+        shows: ``model_type`` (MODEL vs PARSER_MODEL vs MEMORY_MODEL vs
+        recovery), ``model_id``, normalized input, output, cache hits,
+        stop_reason, and the agno call site that triggered it.
+        Use to distinguish: a ``stop_reason=max_tokens`` row followed by a
+        smaller row = the streaming recovery loop fired. Two
+        ``MODEL`` rows = the agent did a tool call round. Mixed model_types
+        = optional features (memory/parser/output models) are active.
+        """
+        try:
+            from aru.cache_patch import get_call_history
+        except ImportError:
+            return "Call history not available."
+        history = get_call_history()
+        if not history:
+            return "No API calls yet."
+        lines = [f"Total recorded calls: {len(history)}", ""]
+        for c in history:
+            mt = c.get("model_type", "")
+            if mt.startswith("ModelType."):
+                mt = mt[len("ModelType."):]
+            req = c.get("request") or {}
+            n_msgs = req.get("n_messages", 0)
+            roles = req.get("roles") or {}
+            roles_str = ", ".join(f"{r}={n}" for r, n in sorted(roles.items()))
+            n_tools = req.get("n_tools", 0)
+            total_chars = req.get("total_chars", 0)
+            est_prompt_tokens = total_chars // 4 if total_chars else 0
+            lines.append(
+                f"── Call #{c.get('n', 0)} "
+                f"[{mt} / {c.get('model_id', '')}] ──"
+            )
+            lines.append(
+                f"  request:  {n_msgs} msgs ({roles_str}), "
+                f"{n_tools} tools, ~{est_prompt_tokens:,} tok ({total_chars:,} chars)"
+            )
+            first = (req.get("first_snippet") or "").replace("\n", " ⏎ ")
+            last_user = (req.get("last_user_snippet") or "").replace("\n", " ⏎ ")
+            last = (req.get("last_snippet") or "").replace("\n", " ⏎ ")
+            if first:
+                lines.append(f"    first msg:     {first[:200]!r}")
+            if last_user and last_user != first:
+                lines.append(f"    last user msg: {last_user[:200]!r}")
+            if last and last != last_user and last != first:
+                lines.append(f"    last msg:      {last[:200]!r}")
+            lines.append(
+                f"  response: input={c.get('input_tokens', 0):,} "
+                f"output={c.get('output_tokens', 0):,} "
+                f"cache_read={c.get('cache_read', 0):,} "
+                f"cache_write={c.get('cache_write', 0):,} "
+                f"stop={c.get('stop_reason') or '-'}"
+            )
+            lines.append(
+                f"  source:   provider={c.get('provider', '?')}, "
+                f"caller={c.get('caller', '?')}"
+            )
+            lines.append("")
+        # Aggregate by model_type
+        by_type: dict[str, int] = {}
+        for c in history:
+            mt = c.get("model_type", "")
+            if mt.startswith("ModelType."):
+                mt = mt[len("ModelType."):]
+            by_type[mt] = by_type.get(mt, 0) + 1
+        lines.append("By model_type:")
+        for mt, n in sorted(by_type.items(), key=lambda x: -x[1]):
+            lines.append(f"  {mt}: {n}")
+        return "\n".join(lines)
     def invalidate_context_cache(self):
         """Mark cached tree/git status as stale. Call after file mutations."""
         self._context_dirty = True
@@ -716,10 +815,20 @@ class Session:
         return int(len(text) / Session._CHARS_PER_TOKEN)
     def check_budget_warning(self) -> str | None:
-        """Return a warning string if token usage is approaching the budget."""
+        """Return a warning string if token usage is approaching the budget.
+        Total mirrors OpenCode's context indicator —
+        ``input + output + cache_read + cache_write`` — so the warning
+        triggers on the same volume the user sees in /cost.
+        """
         if self.token_budget <= 0:
             return None
-        total = self.total_input_tokens + self.total_output_tokens
+        total = (
+            self.total_input_tokens
+            + self.total_output_tokens
+            + self.total_cache_read_tokens
+            + self.total_cache_write_tokens
+        )
         pct = total / self.token_budget * 100
         if pct >= 95:
             return f"[bold red]Token budget nearly exhausted ({pct:.0f}%)[/bold red]"

{aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/app.py RENAMED Viewed

@@ -236,7 +236,7 @@ class AruApp(App):
     # Extending this map is the cheapest way to add a new local command.
     _LOCAL_SLASH = {
         "clear", "quit", "exit", "help", "plan",
-        "cost", "compact", "sessions", "model", "undo",
+        "cost", "calls", "compact", "sessions", "model", "undo",
         "skills", "agents", "commands", "mcp", "yolo",
         "theme",
     }
@@ -840,6 +840,8 @@ class AruApp(App):
                 self.action_toggle_plan()
             elif name == "cost":
                 self._slash_cost()
+            elif name == "calls":
+                self._slash_calls()
             elif name == "compact":
                 self._slash_compact()
             elif name == "sessions":
@@ -904,6 +906,18 @@ class AruApp(App):
             text = f"cost failed: {exc}"
         self._push_chat(text, "cost")
+    def _slash_calls(self) -> None:
+        session = self.session
+        if session is None:
+            self._push_chat("No session.", "calls")
+            return
+        try:
+            summary = getattr(session, "calls_summary", None)
+            text = summary if isinstance(summary, str) else str(summary)
+        except Exception as exc:
+            text = f"calls failed: {exc}"
+        self._push_chat(text, "calls")
     def _slash_compact(self) -> None:
         session = self.session
         if session is None:

{aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/completer.py RENAMED Viewed

@@ -38,6 +38,7 @@ SLASH_COMMANDS: list[tuple[str, str]] = [
     ("quit",       "Save and quit"),
     ("exit",       "Save and quit"),
     ("cost",       "Show token usage & cost"),
+    ("calls",      "Per-API-call breakdown"),
     ("model",      "Switch model"),
     ("compact",    "Compact conversation"),
     ("memory",     "Auto-extracted project memories"),

{aru_code-0.53.0 → aru_code-0.54.0/aru_code.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.53.0
+Version: 0.54.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

{aru_code-0.53.0 → aru_code-0.54.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "aru-code"
-version = "0.53.0"
+version = "0.54.0"
 description = "A Claude Code clone built with Agno agents"
 readme = "README.md"
 license = "MIT"

{aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cache_patch_metrics.py RENAMED Viewed

@@ -276,7 +276,10 @@ class TestLiveMetricsAccumulation:
         s = ctx.session
         assert s.total_input_tokens == 1_500, "track_tokens must not re-add"
         assert s.total_output_tokens == 75
-        assert s.api_calls == 1
+        # api_calls is now incremented per real API request (inside
+        # _publish_live_metrics), not per turn — so the two live calls
+        # above produce api_calls == 2, not 1.
+        assert s.api_calls == 2
         # Live counters reset so the next turn starts clean.
         assert s._live_input_added == 0
         assert s._live_output_added == 0

{aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli.py RENAMED Viewed

@@ -348,7 +348,11 @@ class TestSession:
         assert session.total_input_tokens == 100
         assert session.total_output_tokens == 50
         assert session.total_cache_read_tokens == 30
-        assert session.api_calls == 1
+        # api_calls is no longer incremented by track_tokens — it's bumped
+        # per real API request inside cache_patch._publish_live_metrics.
+        # This unit test exercises track_tokens in isolation (no patch),
+        # so api_calls stays at 0.
+        assert session.api_calls == 0
     def test_track_tokens_none_metrics(self):
         session = Session()

aru-code 0.53.0__tar.gz → 0.54.0__tar.gz

aru-code 0.53.0tar.gz → 0.54.0tar.gz