PyPI - aru-code - Versions diffs - 0.32.0__tar.gz → 0.36.0__tar.gz - Mend

aru-code 0.32.0tar.gz → 0.36.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

{aru_code-0.32.0 → aru_code-0.36.0}/PKG-INFO +32 -2
{aru_code-0.32.0 → aru_code-0.36.0}/README.md +30 -1
aru_code-0.36.0/aru/__init__.py +1 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/agent_factory.py +9 -1
{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/base.py +178 -28
{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/catalog.py +63 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/cache_patch.py +102 -1
{aru_code-0.32.0 → aru_code-0.36.0}/aru/cli.py +203 -26
aru_code-0.36.0/aru/commands.py +712 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/config.py +29 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/context.py +97 -23
{aru_code-0.32.0 → aru_code-0.36.0}/aru/display.py +9 -1
aru_code-0.36.0/aru/events.py +218 -0
aru_code-0.36.0/aru/format/__init__.py +24 -0
aru_code-0.36.0/aru/format/manager.py +158 -0
aru_code-0.36.0/aru/format/runner.py +70 -0
aru_code-0.36.0/aru/lsp/__init__.py +29 -0
aru_code-0.36.0/aru/lsp/client.py +186 -0
aru_code-0.36.0/aru/lsp/manager.py +152 -0
aru_code-0.36.0/aru/lsp/protocol.py +117 -0
aru_code-0.36.0/aru/memory/__init__.py +50 -0
aru_code-0.36.0/aru/memory/extractor.py +195 -0
aru_code-0.36.0/aru/memory/loader.py +42 -0
aru_code-0.36.0/aru/memory/store.py +281 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/permissions.py +371 -36
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/hooks.py +27 -2
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/manager.py +73 -6
{aru_code-0.32.0 → aru_code-0.36.0}/aru/runner.py +161 -243
aru_code-0.36.0/aru/runtime.py +577 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/session.py +150 -7
aru_code-0.36.0/aru/sinks.py +244 -0
aru_code-0.36.0/aru/streaming.py +346 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tool_policy.py +75 -49
aru_code-0.36.0/aru/tools/_shared.py +94 -0
aru_code-0.36.0/aru/tools/apply_patch.py +513 -0
aru_code-0.36.0/aru/tools/apply_patch_prompt.txt +65 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/codebase.py +1 -1
aru_code-0.36.0/aru/tools/delegate.py +662 -0
aru_code-0.36.0/aru/tools/delegate_prompt.txt +34 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/file_ops.py +26 -15
aru_code-0.36.0/aru/tools/lsp.py +398 -0
aru_code-0.36.0/aru/tools/mcp_client.py +482 -0
aru_code-0.36.0/aru/tools/memory_tool.py +108 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/plan_mode.py +16 -8
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/ranker.py +4 -1
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/registry.py +29 -6
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/search.py +35 -4
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/shell.py +37 -8
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/skill.py +1 -1
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/tasklist.py +29 -4
aru_code-0.36.0/aru/tools/worktree.py +230 -0
aru_code-0.36.0/aru/tui/__init__.py +15 -0
aru_code-0.36.0/aru/tui/app.py +1627 -0
aru_code-0.36.0/aru/tui/screens/__init__.py +8 -0
aru_code-0.36.0/aru/tui/screens/choice.py +103 -0
aru_code-0.36.0/aru/tui/screens/confirm.py +77 -0
aru_code-0.36.0/aru/tui/screens/search.py +106 -0
aru_code-0.36.0/aru/tui/screens/text_input.py +66 -0
aru_code-0.36.0/aru/tui/sinks.py +153 -0
aru_code-0.36.0/aru/tui/slash_bridge.py +133 -0
aru_code-0.36.0/aru/tui/ui.py +239 -0
aru_code-0.36.0/aru/tui/widgets/__init__.py +20 -0
aru_code-0.36.0/aru/tui/widgets/chat.py +340 -0
aru_code-0.36.0/aru/tui/widgets/completer.py +262 -0
aru_code-0.36.0/aru/tui/widgets/context_pane.py +171 -0
aru_code-0.36.0/aru/tui/widgets/header.py +42 -0
aru_code-0.36.0/aru/tui/widgets/inline_choice.py +155 -0
aru_code-0.36.0/aru/tui/widgets/loaded_pane.py +144 -0
aru_code-0.36.0/aru/tui/widgets/status.py +233 -0
aru_code-0.36.0/aru/tui/widgets/thinking.py +99 -0
aru_code-0.36.0/aru/tui/widgets/tools.py +172 -0
aru_code-0.36.0/aru/ui.py +158 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/PKG-INFO +32 -2
aru_code-0.36.0/aru_code.egg-info/SOURCES.txt +179 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/requires.txt +1 -0
{aru_code-0.32.0 → aru_code-0.36.0}/pyproject.toml +5 -1
aru_code-0.36.0/tests/test_apply_patch.py +275 -0
aru_code-0.36.0/tests/test_async_tool_permission.py +50 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cache_patch_metrics.py +132 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_catalog.py +8 -1
aru_code-0.36.0/tests/test_chat_scrollable.py +87 -0
aru_code-0.36.0/tests/test_context_pane.py +63 -0
aru_code-0.36.0/tests/test_cwd_awareness.py +213 -0
aru_code-0.36.0/tests/test_delegate.py +1063 -0
aru_code-0.36.0/tests/test_events_backward_compat.py +90 -0
aru_code-0.36.0/tests/test_events_schema.py +140 -0
aru_code-0.36.0/tests/test_fork_ctx_concurrency.py +173 -0
aru_code-0.36.0/tests/test_format.py +185 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_invoke_skill.py +4 -4
aru_code-0.36.0/tests/test_loaded_pane_path.py +67 -0
aru_code-0.36.0/tests/test_lsp.py +239 -0
aru_code-0.36.0/tests/test_lsp_rename.py +303 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_main.py +67 -55
aru_code-0.36.0/tests/test_markdown_to_text.py +116 -0
aru_code-0.36.0/tests/test_mcp_health.py +237 -0
aru_code-0.36.0/tests/test_memory.py +215 -0
aru_code-0.36.0/tests/test_memory_tool.py +175 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_permissions.py +508 -7
aru_code-0.36.0/tests/test_plugin_errors.py +206 -0
aru_code-0.36.0/tests/test_plugin_hooks_v2.py +172 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_ranker.py +26 -0
aru_code-0.36.0/tests/test_status_breakdown.py +103 -0
aru_code-0.36.0/tests/test_status_cost.py +55 -0
aru_code-0.36.0/tests/test_streaming_sink.py +219 -0
aru_code-0.36.0/tests/test_thread_tool_timeout.py +92 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_tool_policy.py +88 -0
aru_code-0.36.0/tests/test_truncation_marker.py +122 -0
aru_code-0.36.0/tests/test_tui_app_boot.py +298 -0
aru_code-0.36.0/tests/test_tui_bindings.py +104 -0
aru_code-0.36.0/tests/test_tui_bus_flow.py +77 -0
aru_code-0.36.0/tests/test_tui_chat.py +110 -0
aru_code-0.36.0/tests/test_tui_completer.py +240 -0
aru_code-0.36.0/tests/test_tui_completer_dynamic.py +152 -0
aru_code-0.36.0/tests/test_tui_copy.py +189 -0
aru_code-0.36.0/tests/test_tui_input_behaviour.py +190 -0
aru_code-0.36.0/tests/test_tui_mention_expand.py +46 -0
aru_code-0.36.0/tests/test_tui_modals.py +113 -0
aru_code-0.36.0/tests/test_tui_mode_cycle.py +53 -0
aru_code-0.36.0/tests/test_tui_native_selection.py +54 -0
aru_code-0.36.0/tests/test_tui_permission_flow.py +299 -0
aru_code-0.36.0/tests/test_tui_plan_task_render.py +95 -0
aru_code-0.36.0/tests/test_tui_sidebar_toggle.py +74 -0
aru_code-0.36.0/tests/test_tui_slash_bridge.py +120 -0
aru_code-0.36.0/tests/test_tui_snapshot_smoke.py +164 -0
aru_code-0.36.0/tests/test_tui_thinking_and_boot.py +108 -0
aru_code-0.36.0/tests/test_tui_widgets_visual.py +186 -0
aru_code-0.36.0/tests/test_ui_adapter.py +57 -0
aru_code-0.36.0/tests/test_worktree.py +155 -0
aru_code-0.36.0/tests/test_worktree_session_restore.py +129 -0
aru_code-0.32.0/aru/__init__.py +0 -1
aru_code-0.32.0/aru/commands.py +0 -246
aru_code-0.32.0/aru/runtime.py +0 -198
aru_code-0.32.0/aru/tools/_shared.py +0 -63
aru_code-0.32.0/aru/tools/delegate.py +0 -236
aru_code-0.32.0/aru/tools/mcp_client.py +0 -283
aru_code-0.32.0/aru_code.egg-info/SOURCES.txt +0 -92
{aru_code-0.32.0 → aru_code-0.36.0}/LICENSE +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/__init__.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/planner.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/checkpoints.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/completers.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/history_blocks.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugin_cache.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/__init__.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/custom_tools.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/tool_api.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/providers.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/select.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/__init__.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/_diff.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/ast_tools.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/gitignore.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/web.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/dependency_links.txt +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/entry_points.txt +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/top_level.txt +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/setup.cfg +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_agents_base.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_agents_md_coverage.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cache_patch_stop_reason.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_checkpoints.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_advanced.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_base.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_completers.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_new.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_run_cli.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_session.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_shell.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_codebase.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_confabulation_regression.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_config.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_context.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_gitignore.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_guardrails_scenarios.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_invoked_skills.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_mcp_client.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_microcompact.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plan_mode_refactor.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plugin_cache.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plugins.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_providers.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_reasoning.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_runner_recovery.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_runtime.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_select.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_skill_disallowed_tools.py +0 -0
{aru_code-0.32.0 → aru_code-0.36.0}/tests/test_tasklist.py +0 -0

{aru_code-0.32.0 → aru_code-0.36.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aru-code
-Version: 0.32.0
+Version: 0.36.0
 Summary: A Claude Code clone built with Agno agents
 Author-email: Estevao <estevaofon@gmail.com>
 License-Expression: MIT
@@ -25,6 +25,7 @@ Requires-Dist: pathspec>=0.12
 Requires-Dist: python-dotenv>=1.2.2
 Requires-Dist: prompt-toolkit>=3.0
 Requires-Dist: rich
+Requires-Dist: textual>=0.86
 Requires-Dist: tree-sitter>=0.23
 Requires-Dist: tree-sitter-python>=0.23
 Requires-Dist: mcp>=1.0
@@ -95,7 +96,36 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
 aru
 ```
-That's it — `aru` is available globally after install.
+That's it — `aru` is available globally after install. Running `aru`
+with no arguments drops straight into the full-screen Textual TUI:
+persistent chat pane, live tools sidebar, reactive status bar (session
+· model · tokens · cost · mode), branded header, and modal permission
+prompts.
+Key bindings (TUI):
+| Key          | Action                   |
+|--------------|--------------------------|
+| Ctrl+Q       | Quit (saves session)     |
+| Ctrl+L       | Clear chat pane          |
+| Ctrl+A       | Cycle permission mode    |
+| Ctrl+P       | Toggle plan mode         |
+| Ctrl+F       | Search chat history      |
+| Up / Down    | Recall prior inputs      |
+Local slash commands inside the TUI: `/help`, `/clear`, `/quit`,
+`/plan`. Any other input is sent to the agent.
+### Classic REPL (opt-in)
+Prefer the single-line prompt? Pass `--repl` to use the classic REPL:
+```bash
+aru --repl
+```
+Both modes share the same sessions, plugins, permissions, and tools —
+only presentation differs.
 ## Usage

{aru_code-0.32.0 → aru_code-0.36.0}/README.md RENAMED Viewed

@@ -48,7 +48,36 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
 aru
 ```
-That's it — `aru` is available globally after install.
+That's it — `aru` is available globally after install. Running `aru`
+with no arguments drops straight into the full-screen Textual TUI:
+persistent chat pane, live tools sidebar, reactive status bar (session
+· model · tokens · cost · mode), branded header, and modal permission
+prompts.
+Key bindings (TUI):
+| Key          | Action                   |
+|--------------|--------------------------|
+| Ctrl+Q       | Quit (saves session)     |
+| Ctrl+L       | Clear chat pane          |
+| Ctrl+A       | Cycle permission mode    |
+| Ctrl+P       | Toggle plan mode         |
+| Ctrl+F       | Search chat history      |
+| Up / Down    | Recall prior inputs      |
+Local slash commands inside the TUI: `/help`, `/clear`, `/quit`,
+`/plan`. Any other input is sent to the agent.
+### Classic REPL (opt-in)
+Prefer the single-line prompt? Pass `--repl` to use the classic REPL:
+```bash
+aru --repl
+```
+Both modes share the same sessions, plugins, permissions, and tools —
+only presentation differs.
 ## Usage

aru_code-0.36.0/aru/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.36.0"

{aru_code-0.32.0 → aru_code-0.36.0}/aru/agent_factory.py RENAMED Viewed

@@ -150,7 +150,15 @@ async def create_agent_from_spec(
         resolved_model = model_ref or session.model_ref
     tools = _wrap_tools_with_hooks(spec.tools_factory())
-    instructions = _build_instructions(spec.role, extra_instructions)
+    # Merge spec-level extra instructions (static, agent-specific policy like
+    # "you are read-only, never call write tools") with caller-provided extras
+    # (dynamic, session-specific context like cwd or AGENTS.md). Spec text
+    # comes first so the agent's baseline policy is established before any
+    # session-specific text that might try to override it.
+    combined_extra = "\n\n".join(
+        part for part in (spec.extra_instructions, extra_instructions) if part
+    )
+    instructions = _build_instructions(spec.role, combined_extra)
     instructions, resolved_model, max_tokens = await _apply_chat_hooks(
         instructions, resolved_model, spec.name, max_tokens=spec.max_tokens,

{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/base.py RENAMED Viewed

@@ -35,6 +35,22 @@ Deliver EXACTLY what was asked — no more, no less. \
 One function requested = one function written. Helper functions, tests, utilities, and "while I'm here" \
 improvements are out of scope unless the user names them explicitly.
+## Truncated tool output
+Large tool results are truncated head+tail with a structured marker you can parse:
+```
+<truncation source_tool="bash" original_lines="2000" shown_head_lines="300"
+  shown_tail_lines="200" saved_at="/abs/path/output_xxx.txt" />
+```
+Attributes are optional; common ones: ``source_tool``, ``source_file``, \
+``original_bytes``, ``original_lines``, ``shown_head_lines``, ``shown_tail_lines``, \
+``saved_at``. When ``saved_at`` is present, the full output is on disk — use \
+``read_file(saved_at, start_line=..., end_line=...)`` or ``grep_search`` to \
+retrieve omitted rows. When ``source_file`` is present, read from the original \
+file instead. Do NOT re-run the same tool hoping for different output.
 ## Reasoning rules
 **Verify before asserting.** If you describe what a function, module, or system does, \
@@ -182,10 +198,16 @@ Use `context_lines=30` for full function bodies.
 **Batch independent tool calls**: emit ALL independent tool calls in a single response.
-Use delegate_task to split work into independent subtasks for parallel execution. \
-For broad codebase exploration (searching many files, finding patterns, understanding code), \
-break the research into focused questions and spawn multiple \
-`delegate_task(task="<specific search>", agent_name="explorer")` calls in parallel.
+Use delegate_task for parallel research only when the questions are truly \
+independent — no sub-question needs another's answer. For write-path execution, \
+default to sequential: parallel writes require disjoint files AND no inter-task \
+dependencies (task B never imports/reads what task A just produced). When in \
+doubt, sequential is correct.
+For broad codebase exploration — searching many files, finding patterns, \
+understanding code — fan out: spawn multiple \
+`delegate_task(task="<specific search>", agent_name="explorer")` calls in one \
+response. Read-only fan-out has no write-path hazards.
 When given a plan, execute it step by step. When given a direct task, figure out what needs to be done and do it.
 **ZERO narration between tool calls.** No "Now I have enough context...", \
@@ -255,31 +277,44 @@ inline probe is a bug the user never has to report.
 ## Delegation strategy — CRITICAL for context efficiency
 For simple, directed lookups (one known file, one specific symbol) use \
-`grep_search` / `glob_search` / `read_file` directly.
-For **anything broader** — understanding a system, researching before implementing, \
-analyzing multiple files, writing specs or documentation — **always use explorer agents**. \
-Every `read_file` / `read_files` / `grep_search` result you call directly accumulates \
-in YOUR context window and stays there forever. Explorer agents read files in their own \
-isolated context and return only a concise summary. This is critical: \
-**3 explorer summaries < 8 raw file reads** in context cost.
-**Rule of thumb**: If you'd need to read or search more than 2-3 files, use explorers instead.
-**Decompose, don't dump.** Never throw one vague task at one explorer. \
-Break the work into **focused, independent search questions** and spawn one explorer \
-per question — all in a single response so they run in parallel. Each explorer prompt \
-should be specific enough that it can search and answer on its own.
-Example — user asks "explain the authentication system":
+`grep_search` / `glob_search` / `read_file` directly — do not delegate.
+For broader work — understanding a system, researching before implementing, \
+analyzing multiple files — prefer explorer subagents so raw output does not \
+accumulate in your context. An explorer reads in isolation and returns a concise \
+summary; **3 summaries < 8 raw file reads** in context cost.
+**When 1 explorer is enough** (do NOT fan out):
+- Task is isolated to file(s) the user named
+- Small, targeted change and you already have enough context to act
+- You only need to confirm one thing (one pattern, one symbol, one file shape)
+**When to fan out into parallel explorers:**
+- Scope is uncertain — several areas of the codebase may be involved
+- Multiple truly independent questions — disjoint search terms, no question \
+  depends on another's answer
+- Writing a spec or doc covering distinct subsystems
+**Parallelism rule — dependency is the discriminator, not "always":**
+If question B needs A's answer, they are sequential: do A first, synthesize, \
+then launch B. If A / B / C are genuinely independent, emit ALL `delegate_task` \
+calls in **one assistant response** so `asyncio.gather` runs them concurrently. \
+Minimum agents necessary — usually just 1.
+Example (uncertain scope, independent questions) — user asks "explain the \
+authentication system":
 ```
-delegate_task(task="Find auth middleware: search for login/logout handlers, session management, token validation", agent_name="explorer")
-delegate_task(task="Find auth configuration: search for auth-related config files, env vars, secrets setup", agent_name="explorer")
-delegate_task(task="Find auth tests: search for test files covering authentication flows", agent_name="explorer")
+delegate_task(task="Find auth middleware: login/logout handlers, session validation", agent_name="explorer")
+delegate_task(task="Find auth configuration: env vars, secrets setup", agent_name="explorer")
+delegate_task(task="Find auth tests: files covering authentication flows", agent_name="explorer")
 ```
-After all explorers return, **synthesize their findings yourself** — the user sees \
-your summary, not the raw explorer output.
+Counter-example (localized, known file) — user asks "fix the typo in auth.py:42": \
+just `read_file` and `edit_file`. Do not delegate.
+After explorers return, **synthesize their findings yourself** before acting — \
+never write "based on your findings". Include file paths and exact changes in \
+your synthesis so the next step proves you understood.
 ## Planning
@@ -330,7 +365,29 @@ When you see a `<system-reminder>` listing PLAN ACTIVE steps, work through them
 Each plan step is independent context; after marking it done, the reminder updates and shows \
 the next one. Do NOT call `enter_plan_mode` if a plan is already active — execute the existing \
-plan instead.\
+plan instead.
+## Plan execution — sequential by default
+When executing a multi-task plan (loaded via a skill like /executing-plans or \
+/subagent-driven-development, or surfaced via a plan reminder), each task runs \
+**sequentially** unless the plan explicitly marks tasks as independent AND they \
+touch disjoint files.
+Write-path concurrency hazards to respect:
+- Two parallel subagents editing the same file → last-write-wins, silent loss.
+- Subagent B importing a symbol subagent A was supposed to create → B fails \
+  because A has not finished yet.
+Safe parallel-write pattern (only when ALL three hold):
+1. The plan declares the tasks as independent.
+2. The tasks touch disjoint file sets.
+3. No task's output is another task's input inside the same batch.
+If any of the three fails, run tasks sequentially — one `delegate_task` per \
+response, or stay in-session and execute the step yourself. Parallel fan-out \
+for read-only research (explorer) follows the Delegation strategy rules above; \
+it does not carry these write-path hazards.\
 """
 # Explorer-specific additions (read-only fast search subagent)
@@ -374,11 +431,101 @@ Complete the search request efficiently and report your findings clearly.\
 """
+VERIFIER_ROLE = """\
+You are a verification sub-agent. Your sole job is to review a recent batch
+of edits for correctness and report issues.
+=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
+You are STRICTLY PROHIBITED from creating, editing, deleting, or moving
+files. You do not have access to edit tools; attempts will fail. No
+state-changing bash commands (no git add/commit, no npm/pip install, no
+mkdir/touch/rm/cp/mv).
+Your workflow:
+1. Read each file mentioned in the task using `read_file` or `read_files`
+2. Search for call sites / references to changed APIs using `grep_search`
+3. Skim related tests using `glob_search` + `read_file`
+4. Report findings in this structure:
+   - Inconsistencies found (with file:line refs)
+   - Missing follow-up edits (call sites not updated, etc.)
+   - Suspicious patterns worth the caller's attention (even if uncertain)
+   - What looks correct (brief — don't pad the report)
+Be concise. Skip nitpicks (formatting, naming preferences). Focus on
+bugs, broken contracts, or outdated call sites the caller likely missed.
+Return ONE final message. The caller is not able to ask follow-ups
+without a resume — include everything they need to act.\
+"""
+REVIEWER_ROLE = """\
+You are a code-review sub-agent. Review the files mentioned in the task
+against common quality heuristics and produce actionable findings.
+=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
+You may only read and search. No edit/write/delete/move operations. No
+state-changing bash.
+For each file covered:
+- Naming: are identifiers clear and consistent with the surrounding code?
+- Error handling: are edge cases covered? Any swallowed exceptions?
+- Testing: is there test coverage for the new/modified code paths?
+- Security: obvious injection, path traversal, secret exposure, unchecked
+  user input, missing auth checks?
+- Complexity: functions that should be split, duplicated logic, over-
+  engineered abstractions for simple cases?
+Report format:
+- One bullet per finding
+- Include file:line
+- Classify severity: (blocker) / (important) / (nit) — omit (nit) unless
+  asked for a thorough review
+- If nothing is wrong, say so plainly — do not fabricate issues
+Return ONE final message covering every file you looked at.\
+"""
+GUIDE_ROLE = """\
+You are the Aru user-guide sub-agent. You answer questions about how to
+use and configure Aru itself — slash commands, permission config, skills,
+plugins, tool catalog, session management.
+The questions are about Aru, NOT about the user's own codebase. When in
+doubt, treat the task as "explain how to do X with Aru" rather than "do X
+in the user's project".
+=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
+You may only read and search. No edit/write/delete/move operations.
+Authoritative sources, in priority order:
+1. `AGENTS.md` at the project root — architectural reference
+2. `docs/*.md` — user-facing documentation
+3. `aru.json` examples in the codebase — config shape
+4. Reading the code under `aru/` directly (last resort — prefer docs)
+Workflow:
+1. `read_file` AGENTS.md first
+2. `glob_search` + `read_file` relevant docs/*.md
+3. Search `aru.json` or permission config examples if the question is
+   configuration-related
+Never invent features. If the docs do not cover the topic, say so and
+suggest the closest available alternative. Cite file paths in your
+response so the user can verify.
+Return ONE final message.\
+"""
 def build_instructions(role: str, extra: str = "") -> str:
     """Build complete instructions for an agent role.
     Args:
-        role: One of 'planner', 'executor', 'general', 'explorer'.
+        role: One of 'planner', 'executor', 'general', 'explorer', 'verifier',
+            'reviewer', 'guide'.
         extra: Additional project-specific instructions (README, AGENTS.md, skills).
     """
     role_text = {
@@ -386,6 +533,9 @@ def build_instructions(role: str, extra: str = "") -> str:
         "executor": EXECUTOR_ROLE,
         "general": GENERAL_ROLE,
         "explorer": EXPLORER_ROLE,
+        "verifier": VERIFIER_ROLE,
+        "reviewer": REVIEWER_ROLE,
+        "guide": GUIDE_ROLE,
     }[role]
     parts = [role_text, BASE_INSTRUCTIONS]

{aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/catalog.py RENAMED Viewed

@@ -26,6 +26,15 @@ class AgentSpec:
     An explicit int caps the agent below that ceiling — providers.py always
     clamps the final value to min(requested, model_cap) so specs can never
     ask for more than the model supports.
+    `description` is the LLM-facing summary rendered into `delegate_task`'s
+    docstring. Only subagent specs need a meaningful description (primary
+    agents are never picked via `agent_name`). Keep it short (1-3 sentences)
+    and directive — the model uses it to decide when this agent fits.
+    `extra_instructions` is appended to the base role instructions when the
+    agent is built. Use it for agent-specific policy ("you are read-only,
+    never call write tools") that shouldn't leak into other roles.
     """
     name: str                            # display name passed to Agno
@@ -35,6 +44,8 @@ class AgentSpec:
     max_tokens: int | None
     small_model: bool = False            # if True, factory uses ctx.small_model_ref
     use_reasoning: bool = True           # False skips thinking params (e.g. explorer)
+    description: str = ""                # LLM-facing summary for `delegate_task` docstring
+    extra_instructions: str = ""         # appended to base role instructions on build
 def _build_tools() -> list:
@@ -90,5 +101,57 @@ AGENTS: dict[str, AgentSpec] = {
         max_tokens=8192,
         small_model=True,
         use_reasoning=False,  # fast read-only subagent — no thinking overhead
+        description=(
+            "Fast read-only codebase exploration agent. Use for searching "
+            "files, finding patterns, reading code, and understanding "
+            "structure. Specify thoroughness in the task text: \"quick\" "
+            "(basic searches), \"medium\" (moderate exploration), or "
+            "\"very thorough\" (comprehensive analysis)."
+        ),
+    ),
+    "verification": AgentSpec(
+        name="Verifier",
+        role="verifier",
+        mode="subagent",
+        tools_factory=_explore_tools,  # read-only
+        max_tokens=4096,
+        small_model=True,
+        use_reasoning=False,
+        description=(
+            "Double-check a recent batch of edits for correctness. Reads "
+            "changed files, searches for call sites, reports inconsistencies "
+            "and missing follow-up edits. Read-only — never edits. Use after "
+            "non-trivial multi-file edits to catch issues before the user sees them."
+        ),
+    ),
+    "reviewer": AgentSpec(
+        name="Reviewer",
+        role="reviewer",
+        mode="subagent",
+        tools_factory=_explore_tools,  # read-only
+        max_tokens=4096,
+        small_model=True,
+        use_reasoning=False,
+        description=(
+            "Code review against naming, error handling, test coverage, and "
+            "security heuristics. Read-only; produces bulleted findings with "
+            "file:line refs and severity tags. Use when you want a second "
+            "pair of eyes before finalising changes."
+        ),
+    ),
+    "guide": AgentSpec(
+        name="Guide",
+        role="guide",
+        mode="subagent",
+        tools_factory=_explore_tools,  # read-only
+        max_tokens=4096,
+        small_model=True,
+        use_reasoning=False,
+        description=(
+            "Answer questions about using Aru itself — slash commands, "
+            "permission config, skills, plugins, tool catalog. Reads "
+            "AGENTS.md and docs/ to ground answers. Use when the user's "
+            "question is about Aru's features, not their own codebase."
+        ),
     ),
 }

{aru_code-0.32.0 → aru_code-0.36.0}/aru/cache_patch.py RENAMED Viewed

@@ -314,13 +314,27 @@ def _prune_tool_messages(messages):
     return cleared
+_PATCH_APPLIED = False
 def apply_cache_patch():
-    """Apply all patches to reduce Agno's token consumption."""
+    """Apply all patches to reduce Agno's token consumption.
+    Idempotent: wrapping Agno's base Model methods is additive, so
+    calling this repeatedly (e.g. across a test suite's fixtures) would
+    nest the wrappers and multiply every side effect — including the
+    new per-call session token accumulation, which caused totals to
+    grow by the wrap-depth instead of by the real per-call delta.
+    """
+    global _PATCH_APPLIED
+    if _PATCH_APPLIED:
+        return
     _patch_tool_result_pruning()
     _patch_claude_cache_breakpoints()
     _patch_per_call_metrics()
     _patch_stop_reason_capture()
     _patch_overflow_recovery()
+    _PATCH_APPLIED = True
 def _patch_overflow_recovery():
@@ -459,6 +473,85 @@ def _patch_claude_cache_breakpoints():
     claude_utils.format_messages = _patched_format_messages
+def _publish_live_metrics(
+    input_tokens: int,
+    output_tokens: int,
+    cache_read: int,
+    cache_write: int,
+) -> None:
+    """Apply this call's tokens to the primary session and publish ``metrics.updated``.
+    Fires from inside ``_patched_accumulate`` after every internal LLM
+    API call. Scoped to ``subagent_depth == 0`` so subagent calls are
+    ignored here — their tokens are added in one shot by ``delegate_task``
+    when the sub-run completes (doing both would double-count).
+    On the primary session:
+      * bumps ``total_*`` counters so ``estimated_cost`` climbs live;
+      * updates ``last_*`` so the Last-context-window breakdown refreshes;
+      * records the added delta in ``_live_*_added`` so ``track_tokens``
+        at turn-end reconciles and never double-counts.
+    The publish falls back silently when no plugin manager / no session
+    is installed (tests, raw SDK use).
+    """
+    try:
+        from aru.runtime import get_ctx, _schedule_publish
+    except Exception:
+        return
+    try:
+        ctx = get_ctx()
+    except LookupError:
+        return
+    # Only the primary scope accumulates live — subagent tokens are
+    # added wholesale by delegate_task at sub-run completion.
+    if getattr(ctx, "subagent_depth", 0) != 0:
+        return
+    session = getattr(ctx, "session", None)
+    if session is None:
+        return
+    try:
+        session.total_input_tokens += input_tokens
+        session.total_output_tokens += output_tokens
+        session.total_cache_read_tokens += cache_read
+        session.total_cache_write_tokens += cache_write
+        session._live_input_added = (
+            getattr(session, "_live_input_added", 0) + input_tokens
+        )
+        session._live_output_added = (
+            getattr(session, "_live_output_added", 0) + output_tokens
+        )
+        session._live_cache_read_added = (
+            getattr(session, "_live_cache_read_added", 0) + cache_read
+        )
+        session._live_cache_write_added = (
+            getattr(session, "_live_cache_write_added", 0) + cache_write
+        )
+        session.last_input_tokens = input_tokens
+        session.last_output_tokens = output_tokens
+        session.last_cache_read = cache_read
+        session.last_cache_write = cache_write
+    except Exception:
+        return
+    try:
+        cost = float(session.estimated_cost)
+    except Exception:
+        cost = 0.0
+    _schedule_publish("metrics.updated", {
+        "session_id": getattr(session, "session_id", None)
+            or getattr(session, "id", None),
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "cache_read_tokens": cache_read,
+        "cache_write_tokens": cache_write,
+        "total_input_tokens": session.total_input_tokens,
+        "total_output_tokens": session.total_output_tokens,
+        "total_cache_read_tokens": session.total_cache_read_tokens,
+        "total_cache_write_tokens": session.total_cache_write_tokens,
+        "estimated_cost": cost,
+    })
 def _patch_per_call_metrics():
     """Patch accumulate_model_metrics to capture per-API-call token counts.
@@ -515,6 +608,14 @@ def _patch_per_call_metrics():
             _last_call_output_tokens = output_tokens
             _last_call_cache_read = cache_read
             _last_call_cache_write = cache_write
+            # Intra-turn live session update + bus publish. Gated to the
+            # primary agent (subagent_depth == 0) so subagent API calls
+            # don't double-count — delegate_task adds subagent totals in
+            # one shot when the sub-run completes.
+            _publish_live_metrics(
+                input_tokens, output_tokens, cache_read, cache_write
+            )
         return _original_accumulate(model_response, model, model_type, run_metrics)
     _metrics_module.accumulate_model_metrics = _patched_accumulate

aru-code 0.32.0__tar.gz → 0.36.0__tar.gz

aru-code 0.32.0tar.gz → 0.36.0tar.gz