PyPI - aru-code - Versions diffs - 0.30.0__tar.gz → 0.32.0__tar.gz - Mend

aru-code 0.30.0tar.gz → 0.32.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{aru_code-0.30.0/aru_code.egg-info → aru_code-0.32.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.30.0
+Version: 0.32.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT

aru_code-0.32.0/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.32.0"

{aru_code-0.30.0 → aru_code-0.32.0}/aru/agent_factory.py RENAMED Viewed

@@ -29,32 +29,26 @@ async def _fire_hook(event_name: str, data: dict) -> dict:
     return data
-# Tools blocked while the session is in plan mode. Read-only tools (read,
-# glob, grep, list_directory, web_search, web_fetch, etc.) are NOT in this
-# set — the agent needs them to research and write the plan. Mutating or
-# execution-capable tools are gated: the agent must call exit_plan_mode and
-# get user approval before running any of these.
-_PLAN_MODE_BLOCKED_TOOLS: frozenset[str] = frozenset({
-    "edit_file",
-    "edit_files",
-    "write_file",
-    "write_files",
-    "bash",
-    "delegate_task",
-})
+# Backward-compat re-export. The canonical list now lives in
+# aru.tool_policy.PLAN_MODE_BLOCKED_TOOLS; external callers (tests,
+# docs) that import it from here keep working.
+from aru.tool_policy import PLAN_MODE_BLOCKED_TOOLS as _PLAN_MODE_BLOCKED_TOOLS
 def _wrap_tools_with_hooks(tools: list) -> list:
-    """Wrap tool functions to fire tool.execute.before/after plugin hooks.
-    Before hook can mutate args; after hook can mutate the result.
-    If a before hook raises, the tool is not executed and the error is returned.
-    Also enforces the plan-mode gate: when `session.plan_mode` is True,
-    any tool in `_PLAN_MODE_BLOCKED_TOOLS` short-circuits with a structured
-    BLOCKED message telling the agent to call `exit_plan_mode` first. The
-    gate runs BEFORE plugin hooks so plan mode is the highest-priority
-    enforcement; plugins cannot accidentally bypass it.
+    """Wrap tool functions with a single tool-policy gate and plugin hooks.
+    The policy gate (plan mode + active-skill disallowed_tools) is
+    evaluated by `aru.tool_policy.evaluate_tool_policy` — a single
+    decision function shared with `aru.permissions.resolve_permission`,
+    so both the wrapper and per-tool permission checks see the same
+    answer. When a tool is denied by multiple rules at once, the policy
+    layer returns one combined BLOCKED message rather than two
+    sequential contradictory ones (this is the scenario-1 fix of the
+    combinatorial gate audit).
+    Plugin hooks run AFTER the policy gate so a plugin's
+    tool.execute.before hook cannot bypass plan-mode / skill rules.
     """
     def _wrap_one(fn):
@@ -64,49 +58,13 @@ def _wrap_tools_with_hooks(tools: list) -> list:
         @functools.wraps(fn)
         async def wrapper(**kwargs):
             tool_name = fn.__name__
-            # Plan-mode gate — fires before any other logic so a mutating
-            # tool never reaches the permission layer or the actual executor.
-            if tool_name in _PLAN_MODE_BLOCKED_TOOLS:
-                try:
-                    from aru.runtime import get_ctx
-                    session = getattr(get_ctx(), "session", None)
-                except (LookupError, AttributeError):
-                    session = None
-                if session is not None and getattr(session, "plan_mode", False):
-                    return (
-                        f"BLOCKED: plan mode is active. Mutating tools "
-                        f"(edit/write/bash/delegate_task) are blocked until the "
-                        f"user approves the plan. Finish writing the plan as "
-                        f"your next assistant message, then call "
-                        f"exit_plan_mode(plan=<full plan text>) to request "
-                        f"approval. Do NOT retry {tool_name}."
-                    )
-            # Active-skill disallowed-tools gate — honors the `disallowed-tools`
-            # frontmatter field of the currently active skill. Mirrors the
-            # plan-mode gate pattern above; runs before plugin hooks so a skill
-            # can hard-block a tool regardless of permission/plugin state.
-            try:
-                from aru.runtime import get_ctx
-                ctx = get_ctx()
-                session = getattr(ctx, "session", None)
-                config = getattr(ctx, "config", None)
-            except (LookupError, AttributeError):
-                session = None
-                config = None
-            if session is not None and config is not None:
-                active = getattr(session, "active_skill", None)
-                skills = getattr(config, "skills", None) or {}
-                active_skill_obj = skills.get(active) if active else None
-                disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
-                if tool_name in disallowed:
-                    return (
-                        f"BLOCKED: tool `{tool_name}` is disallowed by the "
-                        f"currently active skill `{active}`. Read the skill's "
-                        f"SKILL.md for the prescribed path. Do NOT retry "
-                        f"`{tool_name}`; use the alternative the skill specifies "
-                        f"(commonly: write the output to a `.md` file via "
-                        f"`write_file` instead of using in-session state)."
-                    )
+            # Unified policy gate — one function, one decision, one
+            # message on denial (combines plan-mode + skill rules when
+            # both apply).
+            from aru.tool_policy import evaluate_tool_policy
+            decision = evaluate_tool_policy(tool_name)
+            if not decision.allowed:
+                return decision.message
             # Before hook — plugins can mutate args or raise PermissionError to block
             try:
                 before_data = await _fire_hook("tool.execute.before", {
@@ -151,14 +109,16 @@ async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
     })
     instructions = data.get("system_prompt", instructions)
-    # chat.params — plugins can modify LLM parameters
+    # chat.params — plugins can modify LLM parameters. max_tokens is
+    # deliberately NOT exposed: it is coupled with the recovery loop in
+    # runner.py and mutating it from a plugin can break mid-thought
+    # recovery. Plugins that need to bound output should do so via model
+    # selection or temperature, not raw token limits.
     data = await _fire_hook("chat.params", {
         "model": model_ref,
-        "max_tokens": max_tokens,
         "temperature": None,  # let plugin set if desired
     })
     model_ref = data.get("model", model_ref)
-    max_tokens = data.get("max_tokens", max_tokens)
     return instructions, model_ref, max_tokens
@@ -196,9 +156,16 @@ async def create_agent_from_spec(
         instructions, resolved_model, spec.name, max_tokens=spec.max_tokens,
     )
+    reasoning_override = session.reasoning_override if session is not None else None
     return Agent(
         name=spec.name,
-        model=create_model(resolved_model, max_tokens=max_tokens),
+        model=create_model(
+            resolved_model,
+            max_tokens=max_tokens,
+            use_reasoning=spec.use_reasoning,
+            reasoning_override=reasoning_override,
+        ),
         tools=tools,
         instructions=instructions,
         markdown=True,
@@ -250,7 +217,11 @@ async def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
     return Agent(
         name=agent_def.name,
-        model=create_model(model_ref, max_tokens=max_tokens),
+        model=create_model(
+            model_ref,
+            max_tokens=max_tokens,
+            reasoning_override=session.reasoning_override,
+        ),
         tools=tools,
         instructions=instructions,
         markdown=True,

{aru_code-0.30.0 → aru_code-0.32.0}/aru/agents/catalog.py RENAMED Viewed

@@ -34,6 +34,7 @@ class AgentSpec:
     tools_factory: Callable[[], list]    # lazy resolver — invoked at agent creation
     max_tokens: int | None
     small_model: bool = False            # if True, factory uses ctx.small_model_ref
+    use_reasoning: bool = True           # False skips thinking params (e.g. explorer)
 def _build_tools() -> list:
@@ -88,5 +89,6 @@ AGENTS: dict[str, AgentSpec] = {
         tools_factory=_explore_tools,
         max_tokens=8192,
         small_model=True,
+        use_reasoning=False,  # fast read-only subagent — no thinking overhead
     ),
 }

{aru_code-0.30.0 → aru_code-0.32.0}/aru/cache_patch.py RENAMED Viewed

@@ -43,6 +43,43 @@ _last_call_cache_write: int = 0
 # We normalize "length" → "max_tokens" so callers can check a single value.
 _last_call_stop_reason: str | None = None
+# Micro-compaction metrics (process-wide, reset by tests via
+# reset_microcompact_stats()). Recorded by _prune_tool_messages every time it
+# fires from the format_function_call_results patch. Surfaced in /cost so
+# users can see what the pre-API-call prune is actually doing — the basis
+# for any future calibration of count/time-based triggers (Passos 3/4 of the
+# plan, deferred until we have data here to justify them).
+_microcompact_invocations: int = 0   # times _prune_tool_messages was called
+_microcompact_clear_passes: int = 0  # times the prune actually cleared anything
+_microcompact_results_cleared: int = 0  # cumulative tool_result blocks cleared
+# Reactive overflow recovery: counts API calls where the provider rejected the
+# request as too long and we wiped older tool_results then retried. Surfaced
+# in /cost so users can tell when the recovery path is masking a chronically
+# oversized context (suggests prune thresholds or model choice need attention).
+_microcompact_overflow_recoveries: int = 0
+# Aggressive prune keeps only the last N compactable tool_results, no matter
+# the budget. Picked low because by definition we got here AFTER the regular
+# prune (160K protect) failed to keep the context within model limits.
+_OVERFLOW_RECOVERY_KEEP_RECENT = 3
+# Substrings (case-insensitive) that mark a provider error as a context-too-long
+# rejection. Anthropic / OpenAI / DashScope / DeepSeek / Groq all phrase it
+# slightly differently; the union below covers the seen variants. Match is
+# substring against str(exc) — wider than ideal, but the fallback path (no
+# recovery) only kicks in when wrong, and a false positive at worst replays
+# the same call after a no-op prune.
+_OVERFLOW_ERROR_SIGNATURES = (
+    "prompt is too long",
+    "context length",
+    "context_length_exceeded",
+    "maximum context",
+    "exceeds the maximum",
+    "exceeds context",
+    "input is too long",
+    "too many tokens",
+    "request too large",
+)
 def get_last_call_metrics() -> tuple[int, int, int, int]:
     """Return (input, output, cache_read, cache_write) from the most recent API call."""
@@ -68,6 +105,130 @@ def reset_last_stop_reason() -> None:
     _last_call_stop_reason = None
+def get_microcompact_stats() -> dict:
+    """Return process-wide micro-compaction metrics.
+    Keys:
+      - invocations: total times _prune_tool_messages ran
+      - clear_passes: subset that actually cleared something
+      - results_cleared: cumulative tool_result blocks wiped
+    Used by /cost and tests. The ratio results_cleared/invocations is the
+    natural calibration signal for whether the budget-based trigger fires
+    often enough — if it's near zero across long sessions, the threshold
+    is too lax (or the protect window too generous).
+    """
+    return {
+        "invocations": _microcompact_invocations,
+        "clear_passes": _microcompact_clear_passes,
+        "results_cleared": _microcompact_results_cleared,
+        "overflow_recoveries": _microcompact_overflow_recoveries,
+    }
+def reset_microcompact_stats() -> None:
+    """Zero the micro-compaction counters. Test-only helper."""
+    global _microcompact_invocations, _microcompact_clear_passes, _microcompact_results_cleared
+    global _microcompact_overflow_recoveries
+    _microcompact_invocations = 0
+    _microcompact_clear_passes = 0
+    _microcompact_results_cleared = 0
+    _microcompact_overflow_recoveries = 0
+def _is_context_overflow_error(exc) -> bool:
+    """Return True iff `exc` looks like a provider context-too-long rejection.
+    Substring match (case-insensitive) against the str of the exception and any
+    nested `original_error` attribute. Wider than ideal but cheap; the recovery
+    path that consumes this is itself idempotent (re-running with no changes
+    after a no-op prune just hits the same error again and propagates).
+    """
+    msgs: list[str] = []
+    try:
+        msgs.append(str(exc))
+    except Exception:
+        pass
+    inner = getattr(exc, "original_error", None) or getattr(exc, "__cause__", None)
+    if inner is not None:
+        try:
+            msgs.append(str(inner))
+        except Exception:
+            pass
+    blob = " ".join(m.lower() for m in msgs if m)
+    return any(sig in blob for sig in _OVERFLOW_ERROR_SIGNATURES)
+def _aggressive_prune(messages, keep_recent: int = _OVERFLOW_RECOVERY_KEEP_RECENT) -> int:
+    """Wipe content of all but the last `keep_recent` compactable tool_results.
+    Used reactively after a provider rejects a request as too long. Ignores the
+    budget walk entirely — by the time we get here, the budget-based prune
+    already failed to keep us under the model's context limit, so its answer
+    is wrong for this request.
+    Non-compactable tool_results (delegate_task etc.) are still preserved.
+    Returns the number of results actually cleared.
+    """
+    from aru.context import COMPACTABLE_TOOLS
+    id_to_name = _build_tool_id_to_name_map(messages)
+    # Collect compactable tool_result indices in encounter order.
+    compactable_indices: list[int] = []
+    for i, msg in enumerate(messages):
+        if getattr(msg, "role", None) != "tool":
+            continue
+        tc_id = getattr(msg, "tool_call_id", None)
+        tool_name = id_to_name.get(tc_id) if tc_id else None
+        if tool_name in COMPACTABLE_TOOLS:
+            compactable_indices.append(i)
+    if len(compactable_indices) <= keep_recent:
+        return 0
+    to_clear = compactable_indices[:-keep_recent] if keep_recent > 0 else compactable_indices
+    cleared = 0
+    for idx in to_clear:
+        msg = messages[idx]
+        content = getattr(msg, "content", None)
+        if content is None or str(content) == _PRUNED_PLACEHOLDER:
+            continue
+        try:
+            msg.content = _PRUNED_PLACEHOLDER
+            if hasattr(msg, "compressed_content"):
+                msg.compressed_content = None
+            cleared += 1
+        except (AttributeError, TypeError):
+            pass
+    return cleared
+def _build_tool_id_to_name_map(messages) -> dict:
+    """Walk assistant messages forward, building tool_call_id → tool_name map.
+    Required because Agno's `role="tool"` Message carries `tool_call_id` but
+    not the originating tool name — the name lives on the matching
+    `assistant.tool_calls[i].function.name` in a previous message.
+    """
+    id_to_name: dict = {}
+    for msg in messages:
+        if getattr(msg, "role", None) != "assistant":
+            continue
+        tool_calls = getattr(msg, "tool_calls", None)
+        if not tool_calls:
+            continue
+        for tc in tool_calls:
+            tc_id = tc.get("id") if isinstance(tc, dict) else None
+            if not tc_id:
+                continue
+            fn = tc.get("function") if isinstance(tc, dict) else None
+            tc_name = fn.get("name") if isinstance(fn, dict) else None
+            if tc_name:
+                id_to_name[tc_id] = tc_name
+    return id_to_name
 def _prune_tool_messages(messages):
     """Clear old tool result content using a token-budget approach.
@@ -77,49 +238,81 @@ def _prune_tool_messages(messages):
     PRUNE_MINIMUM_CHARS (avoids unnecessary churn on small conversations).
     Aligned with OpenCode's strategy: budget-based, not fixed-N.
+    **Tool allowlist**: only outputs of tools in `COMPACTABLE_TOOLS` are
+    eligible for clearing. Non-compactable tools (delegate_task, invoke_skill,
+    tasklist mutators) still consume the protection budget but are never
+    pruned — their content is semantically load-bearing. The id→name map is
+    built from prior assistant `tool_calls` since `role="tool"` Messages carry
+    only the call id, not the tool name. Single source of truth lives in
+    `aru.context.COMPACTABLE_TOOLS`.
+    Returns the number of tool results actually cleared (0 if none) for
+    metrics consumption by `_microcompact_stats`.
     """
-    # Collect tool message indices and their content sizes
-    tool_indices = []
-    for i, msg in enumerate(messages):
-        if getattr(msg, "role", None) == "tool":
-            content = getattr(msg, "content", None)
-            content_len = len(str(content)) if content is not None else 0
-            tool_indices.append((i, content_len))
+    from aru.context import COMPACTABLE_TOOLS
-    if not tool_indices:
-        return
+    global _microcompact_invocations, _microcompact_clear_passes, _microcompact_results_cleared
+    _microcompact_invocations += 1
-    # Walk backwards, accumulating protected chars
-    protected_chars = 0
-    prune_candidates = []  # (index, content_len) of messages outside protection
+    id_to_name = _build_tool_id_to_name_map(messages)
-    for idx, content_len in reversed(tool_indices):
-        if protected_chars + content_len <= _PRUNE_PROTECT_CHARS:
-            protected_chars += content_len
-        else:
+    # Collect tool message indices, their content sizes, and compactability.
+    tool_entries = []  # (index, content_len, is_compactable)
+    for i, msg in enumerate(messages):
+        if getattr(msg, "role", None) != "tool":
+            continue
+        content = getattr(msg, "content", None)
+        content_len = len(str(content)) if content is not None else 0
+        tc_id = getattr(msg, "tool_call_id", None)
+        tool_name = id_to_name.get(tc_id) if tc_id else None
+        # Defensive: if we can't resolve the name, treat as non-compactable.
+        # Better to leak budget than wipe a delegate_task result by mistake.
+        is_compactable = tool_name in COMPACTABLE_TOOLS if tool_name else False
+        tool_entries.append((i, content_len, is_compactable))
+    if not tool_entries:
+        return 0
+    # Walk backwards. ALL tool content (compactable or not) consumes the
+    # protection budget — the prompt carries it either way. Once the budget
+    # is exhausted, older entries are prune candidates ONLY if compactable;
+    # non-compactable old entries (delegate_task etc.) stay untouched.
+    running_total = 0
+    prune_candidates = []  # (index, content_len) of compactable messages outside protection
+    for idx, content_len, is_compactable in reversed(tool_entries):
+        in_recent_window = (running_total + content_len) <= _PRUNE_PROTECT_CHARS
+        running_total += content_len
+        if not in_recent_window and is_compactable:
             prune_candidates.append((idx, content_len))
     # Only prune if there's enough to free
     freeable = sum(cl for _, cl in prune_candidates)
     if freeable < _PRUNE_MINIMUM_CHARS:
-        return
+        return 0
-    # Replace old tool results with placeholder
+    cleared = 0
     for idx, _ in prune_candidates:
         msg = messages[idx]
         content = getattr(msg, "content", None)
         if content is None:
             continue
-        # Skip if already pruned
         if str(content) == _PRUNED_PLACEHOLDER:
             continue
         try:
             msg.content = _PRUNED_PLACEHOLDER
             if hasattr(msg, "compressed_content"):
                 msg.compressed_content = None
+            cleared += 1
         except (AttributeError, TypeError):
             pass
+    if cleared:
+        _microcompact_clear_passes += 1
+        _microcompact_results_cleared += cleared
+    return cleared
 def apply_cache_patch():
     """Apply all patches to reduce Agno's token consumption."""
@@ -127,6 +320,73 @@ def apply_cache_patch():
     _patch_claude_cache_breakpoints()
     _patch_per_call_metrics()
     _patch_stop_reason_capture()
+    _patch_overflow_recovery()
+def _patch_overflow_recovery():
+    """Wrap Agno's retry loops to handle context-overflow rejections.
+    When the provider rejects a request as too long (after the regular pre-call
+    prune was insufficient), wipe content of all but the last
+    `_OVERFLOW_RECOVERY_KEEP_RECENT` compactable tool_results in the message
+    list and re-raise. Agno's existing retry loop in `_a*invoke_with_retry`
+    will retry once with the now-shorter messages.
+    Patches both `_ainvoke_with_retry` (non-stream) and
+    `_ainvoke_stream_with_retry` (stream — what Aru's runner uses). Each is
+    wrapped to call `_aggressive_prune` once per turn before the underlying
+    retry fires; subsequent overflow errors propagate normally so we never
+    loop forever wiping the same messages.
+    A turn-scoped flag (`_overflow_recovery_done` set on the Model instance)
+    ensures we only attempt recovery once per call site — if even the
+    aggressive prune doesn't shrink the prompt enough, the error propagates
+    and the user sees it instead of a silent retry storm.
+    """
+    from agno.models.base import Model
+    from agno.exceptions import ModelProviderError
+    _orig_ainvoke = Model._ainvoke_with_retry
+    _orig_ainvoke_stream = Model._ainvoke_stream_with_retry
+    async def _patched_ainvoke_with_retry(self, **kwargs):
+        global _microcompact_overflow_recoveries
+        try:
+            return await _orig_ainvoke(self, **kwargs)
+        except ModelProviderError as e:
+            if not _is_context_overflow_error(e):
+                raise
+            messages = kwargs.get("messages")
+            if messages is None:
+                raise
+            cleared = _aggressive_prune(messages)
+            if cleared == 0:
+                raise
+            _microcompact_overflow_recoveries += 1
+            return await _orig_ainvoke(self, **kwargs)
+    async def _patched_ainvoke_stream_with_retry(self, **kwargs):
+        global _microcompact_overflow_recoveries
+        try:
+            async for response in _orig_ainvoke_stream(self, **kwargs):
+                yield response
+            return
+        except ModelProviderError as e:
+            if not _is_context_overflow_error(e):
+                raise
+            messages = kwargs.get("messages")
+            if messages is None:
+                raise
+            cleared = _aggressive_prune(messages)
+            if cleared == 0:
+                raise
+            _microcompact_overflow_recoveries += 1
+        # Retry once with the now-pruned messages. A second overflow propagates.
+        async for response in _orig_ainvoke_stream(self, **kwargs):
+            yield response
+    Model._ainvoke_with_retry = _patched_ainvoke_with_retry
+    Model._ainvoke_stream_with_retry = _patched_ainvoke_stream_with_retry
 def _patch_tool_result_pruning():

{aru_code-0.30.0 → aru_code-0.32.0}/aru/cli.py RENAMED Viewed

@@ -529,6 +529,30 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
                     console.print(f"[yellow]Error: {e}[/yellow]")
             continue
+        if user_input == "/reasoning" or user_input.startswith("/reasoning "):
+            arg = user_input[len("/reasoning"):].strip().lower()
+            valid_efforts = {"low", "medium", "high", "max"}
+            if not arg:
+                current = session.reasoning_override or "[dim](config default)[/dim]"
+                console.print(f"[bold]Reasoning effort:[/bold] {current}")
+                console.print()
+                console.print("[dim]Usage:[/dim]")
+                console.print("[dim]  /reasoning <low|medium|high|max>  — override effort for this session[/dim]")
+                console.print("[dim]  /reasoning off                    — disable thinking entirely[/dim]")
+                console.print("[dim]  /reasoning clear                  — revert to provider/model config[/dim]")
+            elif arg in ("clear", "default", "none"):
+                session.reasoning_override = None
+                console.print("[bold green]Reasoning override cleared[/bold green] — using provider/model config.")
+            elif arg == "off":
+                session.reasoning_override = "off"
+                console.print("[bold yellow]Reasoning disabled[/bold yellow] for this session.")
+            elif arg in valid_efforts:
+                session.reasoning_override = arg
+                console.print(f"[bold green]Reasoning effort set to '{arg}'[/bold green] for this session.")
+            else:
+                console.print(f"[yellow]Unknown value '{arg}'. Use low/medium/high/max/off/clear.[/yellow]")
+            continue
         if user_input.lower() in ("/sessions", "/list"):
             sessions = store.list_sessions()
             if not sessions:
@@ -711,13 +735,16 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
                 if not skill.user_invocable:
                     console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
                 else:
-                    session.active_skill = cmd_name
+                    # Slash-invoked skills always run under the primary agent
+                    # scope (agent_id=None). Subagents reach skills via the
+                    # invoke_skill tool, which keys by ctx.agent_id instead.
+                    session.set_active_skill(None, cmd_name)
                     prompt = render_skill_template(skill.content, cmd_args)
                     # Record so the skill body survives compaction — mirror of
                     # claude-code's addInvokedSkill. Store the rendered content
                     # (post-argument substitution) so post-compact restoration
                     # matches what the model initially read.
-                    session.record_invoked_skill(cmd_name, prompt, skill.source_path)
+                    session.record_invoked_skill(cmd_name, prompt, skill.source_path, agent_id=None)
                     console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
                     agent = await create_general_agent(session, config, env_context=_build_env_ctx())
@@ -841,7 +868,7 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
         agent = Agent(
             name="Aru",
-            model=create_model(session.model_ref),  # None → provider cap
+            model=create_model(session.model_ref, reasoning_override=session.reasoning_override),
             tools=[],
             instructions=build_instructions("general", extra_instructions),
             markdown=True,

{aru_code-0.30.0 → aru_code-0.32.0}/aru/commands.py RENAMED Viewed

@@ -16,6 +16,7 @@ SLASH_COMMANDS = [
     ("/help", "Show help and available commands", "/help"),
     ("/plan", "Create an implementation plan", "/plan <task>"),
     ("/model", "Switch model/provider", "/model [provider/model]"),
+    ("/reasoning", "Set reasoning effort for this session", "/reasoning [low|medium|high|max|off|clear]"),
     ("/sessions", "List recent sessions", "/sessions"),
     ("/commands", "List custom commands", "/commands"),
     ("/skills", "List available skills", "/skills"),

aru-code 0.30.0__tar.gz → 0.32.0__tar.gz

aru-code 0.30.0tar.gz → 0.32.0tar.gz