PyPI - aru-code - Versions diffs - 0.30.0__tar.gz → 0.31.0__tar.gz - Mend

aru-code 0.30.0tar.gz → 0.31.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{aru_code-0.30.0/aru_code.egg-info → aru_code-0.31.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.30.0
+Version: 0.31.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

aru_code-0.31.0/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.31.0"

{aru_code-0.30.0 → aru_code-0.31.0}/aru/agent_factory.py RENAMED Viewed

@@ -29,32 +29,26 @@ async def _fire_hook(event_name: str, data: dict) -> dict:
     return data
-# Tools blocked while the session is in plan mode. Read-only tools (read,
-# glob, grep, list_directory, web_search, web_fetch, etc.) are NOT in this
-# set — the agent needs them to research and write the plan. Mutating or
-# execution-capable tools are gated: the agent must call exit_plan_mode and
-# get user approval before running any of these.
-_PLAN_MODE_BLOCKED_TOOLS: frozenset[str] = frozenset({
-    "edit_file",
-    "edit_files",
-    "write_file",
-    "write_files",
-    "bash",
-    "delegate_task",
-})
+# Backward-compat re-export. The canonical list now lives in
+# aru.tool_policy.PLAN_MODE_BLOCKED_TOOLS; external callers (tests,
+# docs) that import it from here keep working.
+from aru.tool_policy import PLAN_MODE_BLOCKED_TOOLS as _PLAN_MODE_BLOCKED_TOOLS
 def _wrap_tools_with_hooks(tools: list) -> list:
-    """Wrap tool functions to fire tool.execute.before/after plugin hooks.
-    Before hook can mutate args; after hook can mutate the result.
-    If a before hook raises, the tool is not executed and the error is returned.
-    Also enforces the plan-mode gate: when `session.plan_mode` is True,
-    any tool in `_PLAN_MODE_BLOCKED_TOOLS` short-circuits with a structured
-    BLOCKED message telling the agent to call `exit_plan_mode` first. The
-    gate runs BEFORE plugin hooks so plan mode is the highest-priority
-    enforcement; plugins cannot accidentally bypass it.
+    """Wrap tool functions with a single tool-policy gate and plugin hooks.
+    The policy gate (plan mode + active-skill disallowed_tools) is
+    evaluated by `aru.tool_policy.evaluate_tool_policy` — a single
+    decision function shared with `aru.permissions.resolve_permission`,
+    so both the wrapper and per-tool permission checks see the same
+    answer. When a tool is denied by multiple rules at once, the policy
+    layer returns one combined BLOCKED message rather than two
+    sequential contradictory ones (this is the scenario-1 fix of the
+    combinatorial gate audit).
+    Plugin hooks run AFTER the policy gate so a plugin's
+    tool.execute.before hook cannot bypass plan-mode / skill rules.
     """
     def _wrap_one(fn):
@@ -64,49 +58,13 @@ def _wrap_tools_with_hooks(tools: list) -> list:
         @functools.wraps(fn)
         async def wrapper(**kwargs):
             tool_name = fn.__name__
-            # Plan-mode gate — fires before any other logic so a mutating
-            # tool never reaches the permission layer or the actual executor.
-            if tool_name in _PLAN_MODE_BLOCKED_TOOLS:
-                try:
-                    from aru.runtime import get_ctx
-                    session = getattr(get_ctx(), "session", None)
-                except (LookupError, AttributeError):
-                    session = None
-                if session is not None and getattr(session, "plan_mode", False):
-                    return (
-                        f"BLOCKED: plan mode is active. Mutating tools "
-                        f"(edit/write/bash/delegate_task) are blocked until the "
-                        f"user approves the plan. Finish writing the plan as "
-                        f"your next assistant message, then call "
-                        f"exit_plan_mode(plan=<full plan text>) to request "
-                        f"approval. Do NOT retry {tool_name}."
-                    )
-            # Active-skill disallowed-tools gate — honors the `disallowed-tools`
-            # frontmatter field of the currently active skill. Mirrors the
-            # plan-mode gate pattern above; runs before plugin hooks so a skill
-            # can hard-block a tool regardless of permission/plugin state.
-            try:
-                from aru.runtime import get_ctx
-                ctx = get_ctx()
-                session = getattr(ctx, "session", None)
-                config = getattr(ctx, "config", None)
-            except (LookupError, AttributeError):
-                session = None
-                config = None
-            if session is not None and config is not None:
-                active = getattr(session, "active_skill", None)
-                skills = getattr(config, "skills", None) or {}
-                active_skill_obj = skills.get(active) if active else None
-                disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
-                if tool_name in disallowed:
-                    return (
-                        f"BLOCKED: tool `{tool_name}` is disallowed by the "
-                        f"currently active skill `{active}`. Read the skill's "
-                        f"SKILL.md for the prescribed path. Do NOT retry "
-                        f"`{tool_name}`; use the alternative the skill specifies "
-                        f"(commonly: write the output to a `.md` file via "
-                        f"`write_file` instead of using in-session state)."
-                    )
+            # Unified policy gate — one function, one decision, one
+            # message on denial (combines plan-mode + skill rules when
+            # both apply).
+            from aru.tool_policy import evaluate_tool_policy
+            decision = evaluate_tool_policy(tool_name)
+            if not decision.allowed:
+                return decision.message
             # Before hook — plugins can mutate args or raise PermissionError to block
             try:
                 before_data = await _fire_hook("tool.execute.before", {
@@ -151,14 +109,16 @@ async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
     })
     instructions = data.get("system_prompt", instructions)
-    # chat.params — plugins can modify LLM parameters
+    # chat.params — plugins can modify LLM parameters. max_tokens is
+    # deliberately NOT exposed: it is coupled with the recovery loop in
+    # runner.py and mutating it from a plugin can break mid-thought
+    # recovery. Plugins that need to bound output should do so via model
+    # selection or temperature, not raw token limits.
     data = await _fire_hook("chat.params", {
         "model": model_ref,
-        "max_tokens": max_tokens,
         "temperature": None,  # let plugin set if desired
     })
     model_ref = data.get("model", model_ref)
-    max_tokens = data.get("max_tokens", max_tokens)
     return instructions, model_ref, max_tokens

{aru_code-0.30.0 → aru_code-0.31.0}/aru/cli.py RENAMED Viewed

@@ -711,13 +711,16 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
                 if not skill.user_invocable:
                     console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
                 else:
-                    session.active_skill = cmd_name
+                    # Slash-invoked skills always run under the primary agent
+                    # scope (agent_id=None). Subagents reach skills via the
+                    # invoke_skill tool, which keys by ctx.agent_id instead.
+                    session.set_active_skill(None, cmd_name)
                     prompt = render_skill_template(skill.content, cmd_args)
                     # Record so the skill body survives compaction — mirror of
                     # claude-code's addInvokedSkill. Store the rendered content
                     # (post-argument substitution) so post-compact restoration
                     # matches what the model initially read.
-                    session.record_invoked_skill(cmd_name, prompt, skill.source_path)
+                    session.record_invoked_skill(cmd_name, prompt, skill.source_path, agent_id=None)
                     console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
                     agent = await create_general_agent(session, config, env_context=_build_env_ctx())

{aru_code-0.30.0 → aru_code-0.31.0}/aru/context.py RENAMED Viewed

@@ -525,6 +525,58 @@ def would_prune(history: list[dict], model_id: str = "default") -> bool:
     return total_tool_chars >= protect_chars + PRUNE_MINIMUM_CHARS
+def _advance_split_past_tool_pairs(history: list[dict], split_idx: int) -> int:
+    """Move split_idx backward until no tool_result in recent is orphaned.
+    A tool_result block must travel with its matching tool_use block in
+    the same API request — Anthropic rejects a tool_result whose
+    tool_use_id is not declared by any tool_use in the conversation. If
+    the initial budget-based split falls between an assistant turn
+    (carrying tool_use) and the subsequent user turn (carrying
+    tool_result), the pair breaks: tool_use goes into `old` (and is
+    discarded when replaced by the summary), leaving tool_result
+    orphaned in `recent`.
+    This helper walks `split_idx` backward one index at a time until the
+    slice `history[split_idx:]` contains the matching tool_use for every
+    tool_result it holds. Matches opencode's invariant that pair
+    structure is never cut (compaction.ts does a mark-and-replace on the
+    tool output, but never removes either block).
+    O(n²) in the worst case, but n is bounded by history length and the
+    inner scan is only over the tail; on real sessions the loop ends in
+    1-2 iterations or not at all.
+    """
+    from aru.history_blocks import is_tool_result, tool_use_ids_in_item
+    while split_idx > 0:
+        # All tool_use ids present in the recent slice
+        declared: set[str] = set()
+        for msg in history[split_idx:]:
+            if msg.get("role") == "assistant":
+                declared.update(tool_use_ids_in_item(msg))
+        orphaned = False
+        for msg in history[split_idx:]:
+            if msg.get("role") != "user":
+                continue
+            for block in (msg.get("content") or []):
+                if not is_tool_result(block):
+                    continue
+                tid = block.get("tool_use_id")
+                if tid and tid not in declared:
+                    orphaned = True
+                    break
+            if orphaned:
+                break
+        if not orphaned:
+            return split_idx
+        split_idx -= 1
+    return split_idx
 def _split_history(history: list[dict], model_id: str = "default") -> tuple[list[dict], list[dict]]:
     """Split history into old (to summarize) and recent (to keep intact).
@@ -541,6 +593,12 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
     of the summary, but keeping it in recent too means the agent can
     quote it verbatim afterward.
+    The split point is then walked backward (via
+    `_advance_split_past_tool_pairs`) to guarantee every tool_result in
+    `recent` has its matching tool_use in `recent`. Without that
+    invariant, a naive budget split can orphan a tool_result whose
+    tool_use landed in `old` — the API rejects such requests.
     The `model_id` parameter is retained for signature compatibility;
     the recent budget is a flat value not scaled by model context.
     """
@@ -556,6 +614,10 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
         else:
             break
+    # Pair-safety: never let a tool_result in `recent` reference a tool_use
+    # that was discarded into `old`. Walks split_idx backward as needed.
+    split_idx = _advance_split_past_tool_pairs(history, split_idx)
     # Defensive: force the first user turn into `recent` even if the
     # protect budget would have sent it to `old`. The original ask is
     # the session anchor and must stay literal.
@@ -792,15 +854,22 @@ async def compact_conversation(
         pass  # no plugin manager available — proceed without hooks
     # Best-effort: if caller didn't pass invoked_skills but there's a session
-    # in the current runtime context, use its record. Keeps legacy call sites
-    # (subagent compaction, tests) covered without forcing every caller to
-    # plumb the session through.
+    # in the current runtime context, pull just this agent's slice. Keeps
+    # legacy call sites (subagent compaction, tests) covered without forcing
+    # every caller to plumb the session through. Filtering by agent_id means
+    # a subagent's compaction doesn't replay primary-scope skills and vice
+    # versa.
     if invoked_skills is None:
         try:
             from aru.runtime import get_ctx
-            session = getattr(get_ctx(), "session", None)
+            ctx = get_ctx()
+            session = getattr(ctx, "session", None)
             if session is not None:
-                invoked_skills = getattr(session, "invoked_skills", None)
+                getter = getattr(session, "get_invoked_skills_for_agent", None)
+                if callable(getter):
+                    invoked_skills = getter(getattr(ctx, "agent_id", None))
+                else:
+                    invoked_skills = getattr(session, "invoked_skills", None)
         except (LookupError, AttributeError, ImportError):
             pass

{aru_code-0.30.0 → aru_code-0.31.0}/aru/permissions.py RENAMED Viewed

@@ -416,6 +416,21 @@ def _most_restrictive(
     return worst
+# Mapping from permission category (what resolve_permission takes) to the
+# tool_name used by the unified tool-policy gate (what evaluate_tool_policy
+# takes). The permission system asks about *categories* (edit, write, bash),
+# while the tool-policy layer reasons about tool *names* (edit_file, bash,
+# ...). This mapping lets resolve_permission consult the tool-policy layer
+# consistently so that, e.g., a bash check in plan mode denies at the
+# permission level too — not only at the wrapper level.
+_CATEGORY_TO_REPRESENTATIVE_TOOL: dict[str, str] = {
+    "edit": "edit_file",
+    "write": "write_file",
+    "bash": "bash",
+    "delegate_task": "delegate_task",
+}
 def resolve_permission(
     category: str, subject: str = ""
 ) -> tuple[PermissionAction, str]:
@@ -425,15 +440,32 @@ def resolve_permission(
     Algorithm:
     1. If skip_permissions -> ("allow", "*")
-    2. Check session_allowed for matching (category, pattern) -> ("allow", pattern)
-    3. For bash: handle compound commands, then walk rules
-    4. For others: walk rules (defaults + user config), last-match-wins
-    5. Fallback: category default, then global default
+    2. Consult unified tool-policy gate (plan_mode / skill disallowed).
+       If policy denies this category's representative tool, return
+       ("deny", "tool-policy"). This is how claude-code / opencode fold
+       mode-based gates into the same decision function that handles
+       user rules, instead of stacking independent short-circuits.
+    3. Check session_allowed for matching (category, pattern)
+       -> ("allow", pattern)
+    4. For bash: handle compound commands, then walk rules
+    5. For others: walk rules (defaults + user config), last-match-wins
+    6. Fallback: category default, then global default
     """
     ctx = get_ctx()
     if ctx.skip_permissions:
         return ("allow", "*")
+    # Unified tool-policy gate — shared with the agent_factory wrapper so
+    # both paths agree. A tool denied by plan_mode / skill rules is denied
+    # here too; the wrapper renders the combined message for the model,
+    # and this call returns a plain "deny" for the user-prompt codepath.
+    rep_tool = _CATEGORY_TO_REPRESENTATIVE_TOOL.get(category)
+    if rep_tool:
+        from aru.tool_policy import evaluate_tool_policy
+        decision = evaluate_tool_policy(rep_tool)
+        if not decision.allowed:
+            return ("deny", "tool-policy")
     # "Accept edits" mode auto-allows edit/write categories for the session.
     if ctx.permission_mode == "acceptEdits" and category in ("edit", "write"):
         return ("allow", "*")
@@ -469,8 +501,15 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
     Supports both sync and async handlers. Returns True/False if a handler
     sets event.data["allow"], or None if no handler overrode the decision.
+    Async handlers dispatched in a worker thread carry a copied
+    contextvars.Context so plugin code can still call `get_ctx()` and
+    other contextvar-backed helpers — without the copy, the new
+    `asyncio.run` loop would see an empty context and break handlers
+    that rely on the runtime.
     """
     import asyncio
+    import contextvars
     from aru.plugins.hooks import HookEvent
     evt = HookEvent(hook="permission.ask", data={"category": category, "subject": subject})
@@ -480,15 +519,21 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
             try:
                 if asyncio.iscoroutinefunction(handler):
                     # Async handler — run via the event loop
-                    loop = asyncio.get_event_loop()
-                    if loop.is_running():
-                        # Schedule as a task and wait with run_until_complete
-                        # won't work, so use a new loop in a thread
+                    try:
+                        loop = asyncio.get_running_loop()
+                    except RuntimeError:
+                        loop = None
+                    if loop is not None:
+                        # A loop is running in this thread; we cannot call
+                        # run_until_complete. Dispatch to a worker thread
+                        # with the current contextvars snapshot so the
+                        # handler sees the same RuntimeContext.
                         import concurrent.futures
+                        snapshot = contextvars.copy_context()
                         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                            pool.submit(asyncio.run, handler(evt)).result(timeout=5)
+                            pool.submit(snapshot.run, asyncio.run, handler(evt)).result(timeout=5)
                     else:
-                        loop.run_until_complete(handler(evt))
+                        asyncio.run(handler(evt))
                 else:
                     handler(evt)
             except Exception:

{aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/hooks.py RENAMED Viewed

@@ -40,7 +40,7 @@ VALID_HOOKS = frozenset({
     # Chat lifecycle
     "chat.message",              # Before user message is sent to LLM (can modify)
-    "chat.params",               # Before LLM call (can modify temperature, max_tokens)
+    "chat.params",               # Before LLM call (can modify model, temperature). NOT max_tokens — coupled with recovery loop.
     "chat.system.transform",     # Before LLM call (can modify system prompt)
     "chat.messages.transform",   # Before LLM call (can modify message history)

{aru_code-0.30.0 → aru_code-0.31.0}/aru/runner.py RENAMED Viewed

@@ -755,10 +755,13 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
                 with Status("[dim]Compacting context...[/dim]", console=console, spinner="dots"):
                     try:
                         session.history = prune_history(session.history, model_id=session.model_id)
+                        # Only the primary agent's invoked skills get replayed
+                        # after compaction here — subagents have their own
+                        # compaction flow and carry their own agent_id.
                         session.history = await compact_conversation(
                             session.history, session.model_ref, session.plan_task,
                             model_id=session.model_id,
-                            invoked_skills=session.invoked_skills,
+                            invoked_skills=session.get_invoked_skills_for_agent(None),
                         )
                         console.print("[dim]Context compacted to save tokens.[/dim]")
                     except Exception:

{aru_code-0.30.0 → aru_code-0.31.0}/aru/runtime.py RENAMED Viewed

@@ -22,6 +22,7 @@ from __future__ import annotations
 import contextvars
 import copy
 import threading
+import uuid
 from dataclasses import dataclass, field
 from typing import Any, Callable
@@ -104,6 +105,14 @@ class RuntimeContext:
     # -- Custom agents --
     custom_agent_defs: dict = field(default_factory=dict)
+    # -- Agent scope --
+    # Stable identifier for the current agent's execution scope. None means
+    # "primary agent" (the top-level conversation). Subagents forked via
+    # fork_ctx() receive a unique identifier here, used to key per-scope
+    # state such as active skills (so a subagent does not inherit the
+    # parent's skill-active state).
+    agent_id: str | None = None
     # -- Permissions --
     perm_config: Any = field(default_factory=_default_perm_config)
     session_allowed: set[tuple[str, str]] = field(default_factory=set)
@@ -165,6 +174,11 @@ def fork_ctx() -> RuntimeContext:
     Permission state is deep-copied to prevent interleaving when multiple
     sub-agents run concurrently via ``asyncio.gather``.  Shared resources
     (console, locks, tracked_processes) are kept by reference.
+    The fork receives a fresh, unique ``agent_id`` so per-scope state
+    (e.g. active skills) keyed by agent_id is isolated from the parent.
+    Callers may overwrite ``agent_id`` afterwards if they prefer a more
+    descriptive label.
     """
     original = get_ctx()
     forked = copy.copy(original)
@@ -176,4 +190,9 @@ def fork_ctx() -> RuntimeContext:
     forked.read_cache = {}
     # Fresh task store per sub-agent
     forked.task_store = TaskStore()
+    # Assign a unique agent_id so skill scope is isolated from the parent.
+    # A uuid is used rather than an incrementing counter so nested forks
+    # (fork-of-a-fork) still get distinct ids even though the counter on
+    # the intermediate ctx was shallow-copied from the root.
+    forked.agent_id = f"subagent-{uuid.uuid4().hex[:8]}"
     return forked

aru-code 0.30.0__tar.gz → 0.31.0__tar.gz

aru-code 0.30.0tar.gz → 0.31.0tar.gz