aru-code 0.32.0__tar.gz → 0.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. {aru_code-0.32.0 → aru_code-0.36.0}/PKG-INFO +32 -2
  2. {aru_code-0.32.0 → aru_code-0.36.0}/README.md +30 -1
  3. aru_code-0.36.0/aru/__init__.py +1 -0
  4. {aru_code-0.32.0 → aru_code-0.36.0}/aru/agent_factory.py +9 -1
  5. {aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/base.py +178 -28
  6. {aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/catalog.py +63 -0
  7. {aru_code-0.32.0 → aru_code-0.36.0}/aru/cache_patch.py +102 -1
  8. {aru_code-0.32.0 → aru_code-0.36.0}/aru/cli.py +203 -26
  9. aru_code-0.36.0/aru/commands.py +712 -0
  10. {aru_code-0.32.0 → aru_code-0.36.0}/aru/config.py +29 -0
  11. {aru_code-0.32.0 → aru_code-0.36.0}/aru/context.py +97 -23
  12. {aru_code-0.32.0 → aru_code-0.36.0}/aru/display.py +9 -1
  13. aru_code-0.36.0/aru/events.py +218 -0
  14. aru_code-0.36.0/aru/format/__init__.py +24 -0
  15. aru_code-0.36.0/aru/format/manager.py +158 -0
  16. aru_code-0.36.0/aru/format/runner.py +70 -0
  17. aru_code-0.36.0/aru/lsp/__init__.py +29 -0
  18. aru_code-0.36.0/aru/lsp/client.py +186 -0
  19. aru_code-0.36.0/aru/lsp/manager.py +152 -0
  20. aru_code-0.36.0/aru/lsp/protocol.py +117 -0
  21. aru_code-0.36.0/aru/memory/__init__.py +50 -0
  22. aru_code-0.36.0/aru/memory/extractor.py +195 -0
  23. aru_code-0.36.0/aru/memory/loader.py +42 -0
  24. aru_code-0.36.0/aru/memory/store.py +281 -0
  25. {aru_code-0.32.0 → aru_code-0.36.0}/aru/permissions.py +371 -36
  26. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/hooks.py +27 -2
  27. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/manager.py +73 -6
  28. {aru_code-0.32.0 → aru_code-0.36.0}/aru/runner.py +161 -243
  29. aru_code-0.36.0/aru/runtime.py +577 -0
  30. {aru_code-0.32.0 → aru_code-0.36.0}/aru/session.py +150 -7
  31. aru_code-0.36.0/aru/sinks.py +244 -0
  32. aru_code-0.36.0/aru/streaming.py +346 -0
  33. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tool_policy.py +75 -49
  34. aru_code-0.36.0/aru/tools/_shared.py +94 -0
  35. aru_code-0.36.0/aru/tools/apply_patch.py +513 -0
  36. aru_code-0.36.0/aru/tools/apply_patch_prompt.txt +65 -0
  37. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/codebase.py +1 -1
  38. aru_code-0.36.0/aru/tools/delegate.py +662 -0
  39. aru_code-0.36.0/aru/tools/delegate_prompt.txt +34 -0
  40. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/file_ops.py +26 -15
  41. aru_code-0.36.0/aru/tools/lsp.py +398 -0
  42. aru_code-0.36.0/aru/tools/mcp_client.py +482 -0
  43. aru_code-0.36.0/aru/tools/memory_tool.py +108 -0
  44. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/plan_mode.py +16 -8
  45. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/ranker.py +4 -1
  46. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/registry.py +29 -6
  47. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/search.py +35 -4
  48. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/shell.py +37 -8
  49. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/skill.py +1 -1
  50. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/tasklist.py +29 -4
  51. aru_code-0.36.0/aru/tools/worktree.py +230 -0
  52. aru_code-0.36.0/aru/tui/__init__.py +15 -0
  53. aru_code-0.36.0/aru/tui/app.py +1627 -0
  54. aru_code-0.36.0/aru/tui/screens/__init__.py +8 -0
  55. aru_code-0.36.0/aru/tui/screens/choice.py +103 -0
  56. aru_code-0.36.0/aru/tui/screens/confirm.py +77 -0
  57. aru_code-0.36.0/aru/tui/screens/search.py +106 -0
  58. aru_code-0.36.0/aru/tui/screens/text_input.py +66 -0
  59. aru_code-0.36.0/aru/tui/sinks.py +153 -0
  60. aru_code-0.36.0/aru/tui/slash_bridge.py +133 -0
  61. aru_code-0.36.0/aru/tui/ui.py +239 -0
  62. aru_code-0.36.0/aru/tui/widgets/__init__.py +20 -0
  63. aru_code-0.36.0/aru/tui/widgets/chat.py +340 -0
  64. aru_code-0.36.0/aru/tui/widgets/completer.py +262 -0
  65. aru_code-0.36.0/aru/tui/widgets/context_pane.py +171 -0
  66. aru_code-0.36.0/aru/tui/widgets/header.py +42 -0
  67. aru_code-0.36.0/aru/tui/widgets/inline_choice.py +155 -0
  68. aru_code-0.36.0/aru/tui/widgets/loaded_pane.py +144 -0
  69. aru_code-0.36.0/aru/tui/widgets/status.py +233 -0
  70. aru_code-0.36.0/aru/tui/widgets/thinking.py +99 -0
  71. aru_code-0.36.0/aru/tui/widgets/tools.py +172 -0
  72. aru_code-0.36.0/aru/ui.py +158 -0
  73. {aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/PKG-INFO +32 -2
  74. aru_code-0.36.0/aru_code.egg-info/SOURCES.txt +179 -0
  75. {aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/requires.txt +1 -0
  76. {aru_code-0.32.0 → aru_code-0.36.0}/pyproject.toml +5 -1
  77. aru_code-0.36.0/tests/test_apply_patch.py +275 -0
  78. aru_code-0.36.0/tests/test_async_tool_permission.py +50 -0
  79. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cache_patch_metrics.py +132 -0
  80. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_catalog.py +8 -1
  81. aru_code-0.36.0/tests/test_chat_scrollable.py +87 -0
  82. aru_code-0.36.0/tests/test_context_pane.py +63 -0
  83. aru_code-0.36.0/tests/test_cwd_awareness.py +213 -0
  84. aru_code-0.36.0/tests/test_delegate.py +1063 -0
  85. aru_code-0.36.0/tests/test_events_backward_compat.py +90 -0
  86. aru_code-0.36.0/tests/test_events_schema.py +140 -0
  87. aru_code-0.36.0/tests/test_fork_ctx_concurrency.py +173 -0
  88. aru_code-0.36.0/tests/test_format.py +185 -0
  89. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_invoke_skill.py +4 -4
  90. aru_code-0.36.0/tests/test_loaded_pane_path.py +67 -0
  91. aru_code-0.36.0/tests/test_lsp.py +239 -0
  92. aru_code-0.36.0/tests/test_lsp_rename.py +303 -0
  93. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_main.py +67 -55
  94. aru_code-0.36.0/tests/test_markdown_to_text.py +116 -0
  95. aru_code-0.36.0/tests/test_mcp_health.py +237 -0
  96. aru_code-0.36.0/tests/test_memory.py +215 -0
  97. aru_code-0.36.0/tests/test_memory_tool.py +175 -0
  98. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_permissions.py +508 -7
  99. aru_code-0.36.0/tests/test_plugin_errors.py +206 -0
  100. aru_code-0.36.0/tests/test_plugin_hooks_v2.py +172 -0
  101. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_ranker.py +26 -0
  102. aru_code-0.36.0/tests/test_status_breakdown.py +103 -0
  103. aru_code-0.36.0/tests/test_status_cost.py +55 -0
  104. aru_code-0.36.0/tests/test_streaming_sink.py +219 -0
  105. aru_code-0.36.0/tests/test_thread_tool_timeout.py +92 -0
  106. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_tool_policy.py +88 -0
  107. aru_code-0.36.0/tests/test_truncation_marker.py +122 -0
  108. aru_code-0.36.0/tests/test_tui_app_boot.py +298 -0
  109. aru_code-0.36.0/tests/test_tui_bindings.py +104 -0
  110. aru_code-0.36.0/tests/test_tui_bus_flow.py +77 -0
  111. aru_code-0.36.0/tests/test_tui_chat.py +110 -0
  112. aru_code-0.36.0/tests/test_tui_completer.py +240 -0
  113. aru_code-0.36.0/tests/test_tui_completer_dynamic.py +152 -0
  114. aru_code-0.36.0/tests/test_tui_copy.py +189 -0
  115. aru_code-0.36.0/tests/test_tui_input_behaviour.py +190 -0
  116. aru_code-0.36.0/tests/test_tui_mention_expand.py +46 -0
  117. aru_code-0.36.0/tests/test_tui_modals.py +113 -0
  118. aru_code-0.36.0/tests/test_tui_mode_cycle.py +53 -0
  119. aru_code-0.36.0/tests/test_tui_native_selection.py +54 -0
  120. aru_code-0.36.0/tests/test_tui_permission_flow.py +299 -0
  121. aru_code-0.36.0/tests/test_tui_plan_task_render.py +95 -0
  122. aru_code-0.36.0/tests/test_tui_sidebar_toggle.py +74 -0
  123. aru_code-0.36.0/tests/test_tui_slash_bridge.py +120 -0
  124. aru_code-0.36.0/tests/test_tui_snapshot_smoke.py +164 -0
  125. aru_code-0.36.0/tests/test_tui_thinking_and_boot.py +108 -0
  126. aru_code-0.36.0/tests/test_tui_widgets_visual.py +186 -0
  127. aru_code-0.36.0/tests/test_ui_adapter.py +57 -0
  128. aru_code-0.36.0/tests/test_worktree.py +155 -0
  129. aru_code-0.36.0/tests/test_worktree_session_restore.py +129 -0
  130. aru_code-0.32.0/aru/__init__.py +0 -1
  131. aru_code-0.32.0/aru/commands.py +0 -246
  132. aru_code-0.32.0/aru/runtime.py +0 -198
  133. aru_code-0.32.0/aru/tools/_shared.py +0 -63
  134. aru_code-0.32.0/aru/tools/delegate.py +0 -236
  135. aru_code-0.32.0/aru/tools/mcp_client.py +0 -283
  136. aru_code-0.32.0/aru_code.egg-info/SOURCES.txt +0 -92
  137. {aru_code-0.32.0 → aru_code-0.36.0}/LICENSE +0 -0
  138. {aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/__init__.py +0 -0
  139. {aru_code-0.32.0 → aru_code-0.36.0}/aru/agents/planner.py +0 -0
  140. {aru_code-0.32.0 → aru_code-0.36.0}/aru/checkpoints.py +0 -0
  141. {aru_code-0.32.0 → aru_code-0.36.0}/aru/completers.py +0 -0
  142. {aru_code-0.32.0 → aru_code-0.36.0}/aru/history_blocks.py +0 -0
  143. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugin_cache.py +0 -0
  144. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/__init__.py +0 -0
  145. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/custom_tools.py +0 -0
  146. {aru_code-0.32.0 → aru_code-0.36.0}/aru/plugins/tool_api.py +0 -0
  147. {aru_code-0.32.0 → aru_code-0.36.0}/aru/providers.py +0 -0
  148. {aru_code-0.32.0 → aru_code-0.36.0}/aru/select.py +0 -0
  149. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/__init__.py +0 -0
  150. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/_diff.py +0 -0
  151. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/ast_tools.py +0 -0
  152. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/gitignore.py +0 -0
  153. {aru_code-0.32.0 → aru_code-0.36.0}/aru/tools/web.py +0 -0
  154. {aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/dependency_links.txt +0 -0
  155. {aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/entry_points.txt +0 -0
  156. {aru_code-0.32.0 → aru_code-0.36.0}/aru_code.egg-info/top_level.txt +0 -0
  157. {aru_code-0.32.0 → aru_code-0.36.0}/setup.cfg +0 -0
  158. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_agents_base.py +0 -0
  159. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_agents_md_coverage.py +0 -0
  160. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cache_patch_stop_reason.py +0 -0
  161. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_checkpoints.py +0 -0
  162. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli.py +0 -0
  163. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_advanced.py +0 -0
  164. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_base.py +0 -0
  165. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_completers.py +0 -0
  166. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_new.py +0 -0
  167. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_run_cli.py +0 -0
  168. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_session.py +0 -0
  169. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_cli_shell.py +0 -0
  170. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_codebase.py +0 -0
  171. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_confabulation_regression.py +0 -0
  172. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_config.py +0 -0
  173. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_context.py +0 -0
  174. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_gitignore.py +0 -0
  175. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_guardrails_scenarios.py +0 -0
  176. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_invoked_skills.py +0 -0
  177. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_mcp_client.py +0 -0
  178. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_microcompact.py +0 -0
  179. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plan_mode_refactor.py +0 -0
  180. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plugin_cache.py +0 -0
  181. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_plugins.py +0 -0
  182. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_providers.py +0 -0
  183. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_reasoning.py +0 -0
  184. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_runner_recovery.py +0 -0
  185. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_runtime.py +0 -0
  186. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_select.py +0 -0
  187. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_skill_disallowed_tools.py +0 -0
  188. {aru_code-0.32.0 → aru_code-0.36.0}/tests/test_tasklist.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.32.0
3
+ Version: 0.36.0
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ Requires-Dist: pathspec>=0.12
25
25
  Requires-Dist: python-dotenv>=1.2.2
26
26
  Requires-Dist: prompt-toolkit>=3.0
27
27
  Requires-Dist: rich
28
+ Requires-Dist: textual>=0.86
28
29
  Requires-Dist: tree-sitter>=0.23
29
30
  Requires-Dist: tree-sitter-python>=0.23
30
31
  Requires-Dist: mcp>=1.0
@@ -95,7 +96,36 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
95
96
  aru
96
97
  ```
97
98
 
98
- That's it — `aru` is available globally after install.
99
+ That's it — `aru` is available globally after install. Running `aru`
100
+ with no arguments drops straight into the full-screen Textual TUI:
101
+ persistent chat pane, live tools sidebar, reactive status bar (session
102
+ · model · tokens · cost · mode), branded header, and modal permission
103
+ prompts.
104
+
105
+ Key bindings (TUI):
106
+
107
+ | Key | Action |
108
+ |--------------|--------------------------|
109
+ | Ctrl+Q | Quit (saves session) |
110
+ | Ctrl+L | Clear chat pane |
111
+ | Ctrl+A | Cycle permission mode |
112
+ | Ctrl+P | Toggle plan mode |
113
+ | Ctrl+F | Search chat history |
114
+ | Up / Down | Recall prior inputs |
115
+
116
+ Local slash commands inside the TUI: `/help`, `/clear`, `/quit`,
117
+ `/plan`. Any other input is sent to the agent.
118
+
119
+ ### Classic REPL (opt-in)
120
+
121
+ Prefer the single-line prompt? Pass `--repl` to use the classic REPL:
122
+
123
+ ```bash
124
+ aru --repl
125
+ ```
126
+
127
+ Both modes share the same sessions, plugins, permissions, and tools —
128
+ only presentation differs.
99
129
 
100
130
  ## Usage
101
131
 
@@ -48,7 +48,36 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
48
48
  aru
49
49
  ```
50
50
 
51
- That's it — `aru` is available globally after install.
51
+ That's it — `aru` is available globally after install. Running `aru`
52
+ with no arguments drops straight into the full-screen Textual TUI:
53
+ persistent chat pane, live tools sidebar, reactive status bar (session
54
+ · model · tokens · cost · mode), branded header, and modal permission
55
+ prompts.
56
+
57
+ Key bindings (TUI):
58
+
59
+ | Key | Action |
60
+ |--------------|--------------------------|
61
+ | Ctrl+Q | Quit (saves session) |
62
+ | Ctrl+L | Clear chat pane |
63
+ | Ctrl+A | Cycle permission mode |
64
+ | Ctrl+P | Toggle plan mode |
65
+ | Ctrl+F | Search chat history |
66
+ | Up / Down | Recall prior inputs |
67
+
68
+ Local slash commands inside the TUI: `/help`, `/clear`, `/quit`,
69
+ `/plan`. Any other input is sent to the agent.
70
+
71
+ ### Classic REPL (opt-in)
72
+
73
+ Prefer the single-line prompt? Pass `--repl` to use the classic REPL:
74
+
75
+ ```bash
76
+ aru --repl
77
+ ```
78
+
79
+ Both modes share the same sessions, plugins, permissions, and tools —
80
+ only presentation differs.
52
81
 
53
82
  ## Usage
54
83
 
@@ -0,0 +1 @@
1
+ __version__ = "0.36.0"
@@ -150,7 +150,15 @@ async def create_agent_from_spec(
150
150
  resolved_model = model_ref or session.model_ref
151
151
 
152
152
  tools = _wrap_tools_with_hooks(spec.tools_factory())
153
- instructions = _build_instructions(spec.role, extra_instructions)
153
+ # Merge spec-level extra instructions (static, agent-specific policy like
154
+ # "you are read-only, never call write tools") with caller-provided extras
155
+ # (dynamic, session-specific context like cwd or AGENTS.md). Spec text
156
+ # comes first so the agent's baseline policy is established before any
157
+ # session-specific text that might try to override it.
158
+ combined_extra = "\n\n".join(
159
+ part for part in (spec.extra_instructions, extra_instructions) if part
160
+ )
161
+ instructions = _build_instructions(spec.role, combined_extra)
154
162
 
155
163
  instructions, resolved_model, max_tokens = await _apply_chat_hooks(
156
164
  instructions, resolved_model, spec.name, max_tokens=spec.max_tokens,
@@ -35,6 +35,22 @@ Deliver EXACTLY what was asked — no more, no less. \
35
35
  One function requested = one function written. Helper functions, tests, utilities, and "while I'm here" \
36
36
  improvements are out of scope unless the user names them explicitly.
37
37
 
38
+ ## Truncated tool output
39
+
40
+ Large tool results are truncated head+tail with a structured marker you can parse:
41
+
42
+ ```
43
+ <truncation source_tool="bash" original_lines="2000" shown_head_lines="300"
44
+ shown_tail_lines="200" saved_at="/abs/path/output_xxx.txt" />
45
+ ```
46
+
47
+ Attributes are optional; common ones: ``source_tool``, ``source_file``, \
48
+ ``original_bytes``, ``original_lines``, ``shown_head_lines``, ``shown_tail_lines``, \
49
+ ``saved_at``. When ``saved_at`` is present, the full output is on disk — use \
50
+ ``read_file(saved_at, start_line=..., end_line=...)`` or ``grep_search`` to \
51
+ retrieve omitted rows. When ``source_file`` is present, read from the original \
52
+ file instead. Do NOT re-run the same tool hoping for different output.
53
+
38
54
  ## Reasoning rules
39
55
 
40
56
  **Verify before asserting.** If you describe what a function, module, or system does, \
@@ -182,10 +198,16 @@ Use `context_lines=30` for full function bodies.
182
198
 
183
199
  **Batch independent tool calls**: emit ALL independent tool calls in a single response.
184
200
 
185
- Use delegate_task to split work into independent subtasks for parallel execution. \
186
- For broad codebase exploration (searching many files, finding patterns, understanding code), \
187
- break the research into focused questions and spawn multiple \
188
- `delegate_task(task="<specific search>", agent_name="explorer")` calls in parallel.
201
+ Use delegate_task for parallel research only when the questions are truly \
202
+ independent no sub-question needs another's answer. For write-path execution, \
203
+ default to sequential: parallel writes require disjoint files AND no inter-task \
204
+ dependencies (task B never imports/reads what task A just produced). When in \
205
+ doubt, sequential is correct.
206
+
207
+ For broad codebase exploration — searching many files, finding patterns, \
208
+ understanding code — fan out: spawn multiple \
209
+ `delegate_task(task="<specific search>", agent_name="explorer")` calls in one \
210
+ response. Read-only fan-out has no write-path hazards.
189
211
 
190
212
  When given a plan, execute it step by step. When given a direct task, figure out what needs to be done and do it.
191
213
  **ZERO narration between tool calls.** No "Now I have enough context...", \
@@ -255,31 +277,44 @@ inline probe is a bug the user never has to report.
255
277
  ## Delegation strategy — CRITICAL for context efficiency
256
278
 
257
279
  For simple, directed lookups (one known file, one specific symbol) use \
258
- `grep_search` / `glob_search` / `read_file` directly.
259
-
260
- For **anything broader** — understanding a system, researching before implementing, \
261
- analyzing multiple files, writing specs or documentation **always use explorer agents**. \
262
- Every `read_file` / `read_files` / `grep_search` result you call directly accumulates \
263
- in YOUR context window and stays there forever. Explorer agents read files in their own \
264
- isolated context and return only a concise summary. This is critical: \
265
- **3 explorer summaries < 8 raw file reads** in context cost.
266
-
267
- **Rule of thumb**: If you'd need to read or search more than 2-3 files, use explorers instead.
268
-
269
- **Decompose, don't dump.** Never throw one vague task at one explorer. \
270
- Break the work into **focused, independent search questions** and spawn one explorer \
271
- per question all in a single response so they run in parallel. Each explorer prompt \
272
- should be specific enough that it can search and answer on its own.
273
-
274
- Example user asks "explain the authentication system":
280
+ `grep_search` / `glob_search` / `read_file` directly — do not delegate.
281
+
282
+ For broader work — understanding a system, researching before implementing, \
283
+ analyzing multiple files prefer explorer subagents so raw output does not \
284
+ accumulate in your context. An explorer reads in isolation and returns a concise \
285
+ summary; **3 summaries < 8 raw file reads** in context cost.
286
+
287
+ **When 1 explorer is enough** (do NOT fan out):
288
+ - Task is isolated to file(s) the user named
289
+ - Small, targeted change and you already have enough context to act
290
+ - You only need to confirm one thing (one pattern, one symbol, one file shape)
291
+
292
+ **When to fan out into parallel explorers:**
293
+ - Scope is uncertain several areas of the codebase may be involved
294
+ - Multiple truly independent questions disjoint search terms, no question \
295
+ depends on another's answer
296
+ - Writing a spec or doc covering distinct subsystems
297
+
298
+ **Parallelism rule — dependency is the discriminator, not "always":**
299
+ If question B needs A's answer, they are sequential: do A first, synthesize, \
300
+ then launch B. If A / B / C are genuinely independent, emit ALL `delegate_task` \
301
+ calls in **one assistant response** so `asyncio.gather` runs them concurrently. \
302
+ Minimum agents necessary — usually just 1.
303
+
304
+ Example (uncertain scope, independent questions) — user asks "explain the \
305
+ authentication system":
275
306
  ```
276
- delegate_task(task="Find auth middleware: search for login/logout handlers, session management, token validation", agent_name="explorer")
277
- delegate_task(task="Find auth configuration: search for auth-related config files, env vars, secrets setup", agent_name="explorer")
278
- delegate_task(task="Find auth tests: search for test files covering authentication flows", agent_name="explorer")
307
+ delegate_task(task="Find auth middleware: login/logout handlers, session validation", agent_name="explorer")
308
+ delegate_task(task="Find auth configuration: env vars, secrets setup", agent_name="explorer")
309
+ delegate_task(task="Find auth tests: files covering authentication flows", agent_name="explorer")
279
310
  ```
280
311
 
281
- After all explorers return, **synthesize their findings yourself** the user sees \
282
- your summary, not the raw explorer output.
312
+ Counter-example (localized, known file) user asks "fix the typo in auth.py:42": \
313
+ just `read_file` and `edit_file`. Do not delegate.
314
+
315
+ After explorers return, **synthesize their findings yourself** before acting — \
316
+ never write "based on your findings". Include file paths and exact changes in \
317
+ your synthesis so the next step proves you understood.
283
318
 
284
319
  ## Planning
285
320
 
@@ -330,7 +365,29 @@ When you see a `<system-reminder>` listing PLAN ACTIVE steps, work through them
330
365
 
331
366
  Each plan step is independent context; after marking it done, the reminder updates and shows \
332
367
  the next one. Do NOT call `enter_plan_mode` if a plan is already active — execute the existing \
333
- plan instead.\
368
+ plan instead.
369
+
370
+ ## Plan execution — sequential by default
371
+
372
+ When executing a multi-task plan (loaded via a skill like /executing-plans or \
373
+ /subagent-driven-development, or surfaced via a plan reminder), each task runs \
374
+ **sequentially** unless the plan explicitly marks tasks as independent AND they \
375
+ touch disjoint files.
376
+
377
+ Write-path concurrency hazards to respect:
378
+ - Two parallel subagents editing the same file → last-write-wins, silent loss.
379
+ - Subagent B importing a symbol subagent A was supposed to create → B fails \
380
+ because A has not finished yet.
381
+
382
+ Safe parallel-write pattern (only when ALL three hold):
383
+ 1. The plan declares the tasks as independent.
384
+ 2. The tasks touch disjoint file sets.
385
+ 3. No task's output is another task's input inside the same batch.
386
+
387
+ If any of the three fails, run tasks sequentially — one `delegate_task` per \
388
+ response, or stay in-session and execute the step yourself. Parallel fan-out \
389
+ for read-only research (explorer) follows the Delegation strategy rules above; \
390
+ it does not carry these write-path hazards.\
334
391
  """
335
392
 
336
393
  # Explorer-specific additions (read-only fast search subagent)
@@ -374,11 +431,101 @@ Complete the search request efficiently and report your findings clearly.\
374
431
  """
375
432
 
376
433
 
434
+ VERIFIER_ROLE = """\
435
+ You are a verification sub-agent. Your sole job is to review a recent batch
436
+ of edits for correctness and report issues.
437
+
438
+ === CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
439
+ You are STRICTLY PROHIBITED from creating, editing, deleting, or moving
440
+ files. You do not have access to edit tools; attempts will fail. No
441
+ state-changing bash commands (no git add/commit, no npm/pip install, no
442
+ mkdir/touch/rm/cp/mv).
443
+
444
+ Your workflow:
445
+ 1. Read each file mentioned in the task using `read_file` or `read_files`
446
+ 2. Search for call sites / references to changed APIs using `grep_search`
447
+ 3. Skim related tests using `glob_search` + `read_file`
448
+ 4. Report findings in this structure:
449
+ - Inconsistencies found (with file:line refs)
450
+ - Missing follow-up edits (call sites not updated, etc.)
451
+ - Suspicious patterns worth the caller's attention (even if uncertain)
452
+ - What looks correct (brief — don't pad the report)
453
+
454
+ Be concise. Skip nitpicks (formatting, naming preferences). Focus on
455
+ bugs, broken contracts, or outdated call sites the caller likely missed.
456
+
457
+ Return ONE final message. The caller is not able to ask follow-ups
458
+ without a resume — include everything they need to act.\
459
+ """
460
+
461
+
462
+ REVIEWER_ROLE = """\
463
+ You are a code-review sub-agent. Review the files mentioned in the task
464
+ against common quality heuristics and produce actionable findings.
465
+
466
+ === CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
467
+ You may only read and search. No edit/write/delete/move operations. No
468
+ state-changing bash.
469
+
470
+ For each file covered:
471
+
472
+ - Naming: are identifiers clear and consistent with the surrounding code?
473
+ - Error handling: are edge cases covered? Any swallowed exceptions?
474
+ - Testing: is there test coverage for the new/modified code paths?
475
+ - Security: obvious injection, path traversal, secret exposure, unchecked
476
+ user input, missing auth checks?
477
+ - Complexity: functions that should be split, duplicated logic, over-
478
+ engineered abstractions for simple cases?
479
+
480
+ Report format:
481
+ - One bullet per finding
482
+ - Include file:line
483
+ - Classify severity: (blocker) / (important) / (nit) — omit (nit) unless
484
+ asked for a thorough review
485
+ - If nothing is wrong, say so plainly — do not fabricate issues
486
+
487
+ Return ONE final message covering every file you looked at.\
488
+ """
489
+
490
+
491
+ GUIDE_ROLE = """\
492
+ You are the Aru user-guide sub-agent. You answer questions about how to
493
+ use and configure Aru itself — slash commands, permission config, skills,
494
+ plugins, tool catalog, session management.
495
+
496
+ The questions are about Aru, NOT about the user's own codebase. When in
497
+ doubt, treat the task as "explain how to do X with Aru" rather than "do X
498
+ in the user's project".
499
+
500
+ === CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
501
+ You may only read and search. No edit/write/delete/move operations.
502
+
503
+ Authoritative sources, in priority order:
504
+ 1. `AGENTS.md` at the project root — architectural reference
505
+ 2. `docs/*.md` — user-facing documentation
506
+ 3. `aru.json` examples in the codebase — config shape
507
+ 4. Reading the code under `aru/` directly (last resort — prefer docs)
508
+
509
+ Workflow:
510
+ 1. `read_file` AGENTS.md first
511
+ 2. `glob_search` + `read_file` relevant docs/*.md
512
+ 3. Search `aru.json` or permission config examples if the question is
513
+ configuration-related
514
+
515
+ Never invent features. If the docs do not cover the topic, say so and
516
+ suggest the closest available alternative. Cite file paths in your
517
+ response so the user can verify.
518
+
519
+ Return ONE final message.\
520
+ """
521
+
522
+
377
523
  def build_instructions(role: str, extra: str = "") -> str:
378
524
  """Build complete instructions for an agent role.
379
525
 
380
526
  Args:
381
- role: One of 'planner', 'executor', 'general', 'explorer'.
527
+ role: One of 'planner', 'executor', 'general', 'explorer', 'verifier',
528
+ 'reviewer', 'guide'.
382
529
  extra: Additional project-specific instructions (README, AGENTS.md, skills).
383
530
  """
384
531
  role_text = {
@@ -386,6 +533,9 @@ def build_instructions(role: str, extra: str = "") -> str:
386
533
  "executor": EXECUTOR_ROLE,
387
534
  "general": GENERAL_ROLE,
388
535
  "explorer": EXPLORER_ROLE,
536
+ "verifier": VERIFIER_ROLE,
537
+ "reviewer": REVIEWER_ROLE,
538
+ "guide": GUIDE_ROLE,
389
539
  }[role]
390
540
 
391
541
  parts = [role_text, BASE_INSTRUCTIONS]
@@ -26,6 +26,15 @@ class AgentSpec:
26
26
  An explicit int caps the agent below that ceiling — providers.py always
27
27
  clamps the final value to min(requested, model_cap) so specs can never
28
28
  ask for more than the model supports.
29
+
30
+ `description` is the LLM-facing summary rendered into `delegate_task`'s
31
+ docstring. Only subagent specs need a meaningful description (primary
32
+ agents are never picked via `agent_name`). Keep it short (1-3 sentences)
33
+ and directive — the model uses it to decide when this agent fits.
34
+
35
+ `extra_instructions` is appended to the base role instructions when the
36
+ agent is built. Use it for agent-specific policy ("you are read-only,
37
+ never call write tools") that shouldn't leak into other roles.
29
38
  """
30
39
 
31
40
  name: str # display name passed to Agno
@@ -35,6 +44,8 @@ class AgentSpec:
35
44
  max_tokens: int | None
36
45
  small_model: bool = False # if True, factory uses ctx.small_model_ref
37
46
  use_reasoning: bool = True # False skips thinking params (e.g. explorer)
47
+ description: str = "" # LLM-facing summary for `delegate_task` docstring
48
+ extra_instructions: str = "" # appended to base role instructions on build
38
49
 
39
50
 
40
51
  def _build_tools() -> list:
@@ -90,5 +101,57 @@ AGENTS: dict[str, AgentSpec] = {
90
101
  max_tokens=8192,
91
102
  small_model=True,
92
103
  use_reasoning=False, # fast read-only subagent — no thinking overhead
104
+ description=(
105
+ "Fast read-only codebase exploration agent. Use for searching "
106
+ "files, finding patterns, reading code, and understanding "
107
+ "structure. Specify thoroughness in the task text: \"quick\" "
108
+ "(basic searches), \"medium\" (moderate exploration), or "
109
+ "\"very thorough\" (comprehensive analysis)."
110
+ ),
111
+ ),
112
+ "verification": AgentSpec(
113
+ name="Verifier",
114
+ role="verifier",
115
+ mode="subagent",
116
+ tools_factory=_explore_tools, # read-only
117
+ max_tokens=4096,
118
+ small_model=True,
119
+ use_reasoning=False,
120
+ description=(
121
+ "Double-check a recent batch of edits for correctness. Reads "
122
+ "changed files, searches for call sites, reports inconsistencies "
123
+ "and missing follow-up edits. Read-only — never edits. Use after "
124
+ "non-trivial multi-file edits to catch issues before the user sees them."
125
+ ),
126
+ ),
127
+ "reviewer": AgentSpec(
128
+ name="Reviewer",
129
+ role="reviewer",
130
+ mode="subagent",
131
+ tools_factory=_explore_tools, # read-only
132
+ max_tokens=4096,
133
+ small_model=True,
134
+ use_reasoning=False,
135
+ description=(
136
+ "Code review against naming, error handling, test coverage, and "
137
+ "security heuristics. Read-only; produces bulleted findings with "
138
+ "file:line refs and severity tags. Use when you want a second "
139
+ "pair of eyes before finalising changes."
140
+ ),
141
+ ),
142
+ "guide": AgentSpec(
143
+ name="Guide",
144
+ role="guide",
145
+ mode="subagent",
146
+ tools_factory=_explore_tools, # read-only
147
+ max_tokens=4096,
148
+ small_model=True,
149
+ use_reasoning=False,
150
+ description=(
151
+ "Answer questions about using Aru itself — slash commands, "
152
+ "permission config, skills, plugins, tool catalog. Reads "
153
+ "AGENTS.md and docs/ to ground answers. Use when the user's "
154
+ "question is about Aru's features, not their own codebase."
155
+ ),
93
156
  ),
94
157
  }
@@ -314,13 +314,27 @@ def _prune_tool_messages(messages):
314
314
  return cleared
315
315
 
316
316
 
317
+ _PATCH_APPLIED = False
318
+
319
+
317
320
  def apply_cache_patch():
318
- """Apply all patches to reduce Agno's token consumption."""
321
+ """Apply all patches to reduce Agno's token consumption.
322
+
323
+ Idempotent: wrapping Agno's base Model methods is additive, so
324
+ calling this repeatedly (e.g. across a test suite's fixtures) would
325
+ nest the wrappers and multiply every side effect — including the
326
+ new per-call session token accumulation, which caused totals to
327
+ grow by the wrap-depth instead of by the real per-call delta.
328
+ """
329
+ global _PATCH_APPLIED
330
+ if _PATCH_APPLIED:
331
+ return
319
332
  _patch_tool_result_pruning()
320
333
  _patch_claude_cache_breakpoints()
321
334
  _patch_per_call_metrics()
322
335
  _patch_stop_reason_capture()
323
336
  _patch_overflow_recovery()
337
+ _PATCH_APPLIED = True
324
338
 
325
339
 
326
340
  def _patch_overflow_recovery():
@@ -459,6 +473,85 @@ def _patch_claude_cache_breakpoints():
459
473
  claude_utils.format_messages = _patched_format_messages
460
474
 
461
475
 
476
+ def _publish_live_metrics(
477
+ input_tokens: int,
478
+ output_tokens: int,
479
+ cache_read: int,
480
+ cache_write: int,
481
+ ) -> None:
482
+ """Apply this call's tokens to the primary session and publish ``metrics.updated``.
483
+
484
+ Fires from inside ``_patched_accumulate`` after every internal LLM
485
+ API call. Scoped to ``subagent_depth == 0`` so subagent calls are
486
+ ignored here — their tokens are added in one shot by ``delegate_task``
487
+ when the sub-run completes (doing both would double-count).
488
+
489
+ On the primary session:
490
+ * bumps ``total_*`` counters so ``estimated_cost`` climbs live;
491
+ * updates ``last_*`` so the Last-context-window breakdown refreshes;
492
+ * records the added delta in ``_live_*_added`` so ``track_tokens``
493
+ at turn-end reconciles and never double-counts.
494
+
495
+ The publish falls back silently when no plugin manager / no session
496
+ is installed (tests, raw SDK use).
497
+ """
498
+ try:
499
+ from aru.runtime import get_ctx, _schedule_publish
500
+ except Exception:
501
+ return
502
+ try:
503
+ ctx = get_ctx()
504
+ except LookupError:
505
+ return
506
+ # Only the primary scope accumulates live — subagent tokens are
507
+ # added wholesale by delegate_task at sub-run completion.
508
+ if getattr(ctx, "subagent_depth", 0) != 0:
509
+ return
510
+ session = getattr(ctx, "session", None)
511
+ if session is None:
512
+ return
513
+ try:
514
+ session.total_input_tokens += input_tokens
515
+ session.total_output_tokens += output_tokens
516
+ session.total_cache_read_tokens += cache_read
517
+ session.total_cache_write_tokens += cache_write
518
+ session._live_input_added = (
519
+ getattr(session, "_live_input_added", 0) + input_tokens
520
+ )
521
+ session._live_output_added = (
522
+ getattr(session, "_live_output_added", 0) + output_tokens
523
+ )
524
+ session._live_cache_read_added = (
525
+ getattr(session, "_live_cache_read_added", 0) + cache_read
526
+ )
527
+ session._live_cache_write_added = (
528
+ getattr(session, "_live_cache_write_added", 0) + cache_write
529
+ )
530
+ session.last_input_tokens = input_tokens
531
+ session.last_output_tokens = output_tokens
532
+ session.last_cache_read = cache_read
533
+ session.last_cache_write = cache_write
534
+ except Exception:
535
+ return
536
+ try:
537
+ cost = float(session.estimated_cost)
538
+ except Exception:
539
+ cost = 0.0
540
+ _schedule_publish("metrics.updated", {
541
+ "session_id": getattr(session, "session_id", None)
542
+ or getattr(session, "id", None),
543
+ "input_tokens": input_tokens,
544
+ "output_tokens": output_tokens,
545
+ "cache_read_tokens": cache_read,
546
+ "cache_write_tokens": cache_write,
547
+ "total_input_tokens": session.total_input_tokens,
548
+ "total_output_tokens": session.total_output_tokens,
549
+ "total_cache_read_tokens": session.total_cache_read_tokens,
550
+ "total_cache_write_tokens": session.total_cache_write_tokens,
551
+ "estimated_cost": cost,
552
+ })
553
+
554
+
462
555
  def _patch_per_call_metrics():
463
556
  """Patch accumulate_model_metrics to capture per-API-call token counts.
464
557
 
@@ -515,6 +608,14 @@ def _patch_per_call_metrics():
515
608
  _last_call_output_tokens = output_tokens
516
609
  _last_call_cache_read = cache_read
517
610
  _last_call_cache_write = cache_write
611
+
612
+ # Intra-turn live session update + bus publish. Gated to the
613
+ # primary agent (subagent_depth == 0) so subagent API calls
614
+ # don't double-count — delegate_task adds subagent totals in
615
+ # one shot when the sub-run completes.
616
+ _publish_live_metrics(
617
+ input_tokens, output_tokens, cache_read, cache_write
618
+ )
518
619
  return _original_accumulate(model_response, model, model_type, run_metrics)
519
620
 
520
621
  _metrics_module.accumulate_model_metrics = _patched_accumulate