aru-code 0.60.0__tar.gz → 0.61.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {aru_code-0.60.0/aru_code.egg-info → aru_code-0.61.0}/PKG-INFO +1 -1
  2. aru_code-0.61.0/aru/__init__.py +1 -0
  3. {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/base.py +60 -5
  4. {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/planner.py +4 -3
  5. {aru_code-0.60.0 → aru_code-0.61.0}/aru/cli.py +6 -13
  6. {aru_code-0.60.0 → aru_code-0.61.0}/aru/context.py +2 -2
  7. {aru_code-0.60.0 → aru_code-0.61.0}/aru/history_blocks.py +66 -5
  8. {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/extractor.py +2 -2
  9. {aru_code-0.60.0 → aru_code-0.61.0}/aru/permissions.py +12 -4
  10. {aru_code-0.60.0 → aru_code-0.61.0}/aru/providers.py +16 -0
  11. {aru_code-0.60.0 → aru_code-0.61.0}/aru/runner.py +84 -0
  12. {aru_code-0.60.0 → aru_code-0.61.0}/aru/session.py +15 -1
  13. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/delegate.py +10 -1
  14. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/file_ops.py +46 -0
  15. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/registry.py +12 -2
  16. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/tasklist.py +17 -2
  17. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/app.py +16 -43
  18. {aru_code-0.60.0 → aru_code-0.61.0/aru_code.egg-info}/PKG-INFO +1 -1
  19. {aru_code-0.60.0 → aru_code-0.61.0}/pyproject.toml +1 -1
  20. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_confabulation_regression.py +74 -13
  21. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runner_recovery.py +11 -1
  22. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_slash_model.py +3 -3
  23. aru_code-0.60.0/aru/__init__.py +0 -1
  24. {aru_code-0.60.0 → aru_code-0.61.0}/LICENSE +0 -0
  25. {aru_code-0.60.0 → aru_code-0.61.0}/README.md +0 -0
  26. {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/__init__.py +0 -0
  27. {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/analyze_trace.py +0 -0
  28. {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/loop_tracer.py +0 -0
  29. {aru_code-0.60.0 → aru_code-0.61.0}/aru/agent_factory.py +0 -0
  30. {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/__init__.py +0 -0
  31. {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/catalog.py +0 -0
  32. {aru_code-0.60.0 → aru_code-0.61.0}/aru/auth.py +0 -0
  33. {aru_code-0.60.0 → aru_code-0.61.0}/aru/cache_patch.py +0 -0
  34. {aru_code-0.60.0 → aru_code-0.61.0}/aru/checkpoints.py +0 -0
  35. {aru_code-0.60.0 → aru_code-0.61.0}/aru/codex_oauth.py +0 -0
  36. {aru_code-0.60.0 → aru_code-0.61.0}/aru/commands.py +0 -0
  37. {aru_code-0.60.0 → aru_code-0.61.0}/aru/config.py +0 -0
  38. {aru_code-0.60.0 → aru_code-0.61.0}/aru/display.py +0 -0
  39. {aru_code-0.60.0 → aru_code-0.61.0}/aru/doom_loop.py +0 -0
  40. {aru_code-0.60.0 → aru_code-0.61.0}/aru/events.py +0 -0
  41. {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/__init__.py +0 -0
  42. {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/manager.py +0 -0
  43. {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/runner.py +0 -0
  44. {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/__init__.py +0 -0
  45. {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/client.py +0 -0
  46. {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/manager.py +0 -0
  47. {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/protocol.py +0 -0
  48. {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/__init__.py +0 -0
  49. {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/loader.py +0 -0
  50. {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/store.py +0 -0
  51. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugin_cache.py +0 -0
  52. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/__init__.py +0 -0
  53. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/custom_tools.py +0 -0
  54. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/hooks.py +0 -0
  55. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/manager.py +0 -0
  56. {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/tool_api.py +0 -0
  57. {aru_code-0.60.0 → aru_code-0.61.0}/aru/runtime.py +0 -0
  58. {aru_code-0.60.0 → aru_code-0.61.0}/aru/select.py +0 -0
  59. {aru_code-0.60.0 → aru_code-0.61.0}/aru/sinks.py +0 -0
  60. {aru_code-0.60.0 → aru_code-0.61.0}/aru/state.py +0 -0
  61. {aru_code-0.60.0 → aru_code-0.61.0}/aru/streaming.py +0 -0
  62. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tool_policy.py +0 -0
  63. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/__init__.py +0 -0
  64. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/_diff.py +0 -0
  65. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/_shared.py +0 -0
  66. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/apply_patch.py +0 -0
  67. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/apply_patch_prompt.txt +0 -0
  68. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/ast_tools.py +0 -0
  69. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/codebase.py +0 -0
  70. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/delegate_prompt.txt +0 -0
  71. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/gitignore.py +0 -0
  72. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/lsp.py +0 -0
  73. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/mcp_client.py +0 -0
  74. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/memory_tool.py +0 -0
  75. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/plan_mode.py +0 -0
  76. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/ranker.py +0 -0
  77. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/search.py +0 -0
  78. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/shell.py +0 -0
  79. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/skill.py +0 -0
  80. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/web.py +0 -0
  81. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/worktree.py +0 -0
  82. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/__init__.py +0 -0
  83. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/log_bridge.py +0 -0
  84. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/notifications.py +0 -0
  85. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/sanitize.py +0 -0
  86. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/__init__.py +0 -0
  87. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/choice.py +0 -0
  88. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/confirm.py +0 -0
  89. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/keymap.py +0 -0
  90. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/search.py +0 -0
  91. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/session_picker.py +0 -0
  92. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/text_input.py +0 -0
  93. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/sinks.py +0 -0
  94. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/slash_bridge.py +0 -0
  95. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/themes.py +0 -0
  96. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/ui.py +0 -0
  97. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/__init__.py +0 -0
  98. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/chat.py +0 -0
  99. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/completer.py +0 -0
  100. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/context_pane.py +0 -0
  101. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/file_link.py +0 -0
  102. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/header.py +0 -0
  103. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/inline_choice.py +0 -0
  104. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/loaded_pane.py +0 -0
  105. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/prompt_area.py +0 -0
  106. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/prompt_queue.py +0 -0
  107. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/status.py +0 -0
  108. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/subagent_panel.py +0 -0
  109. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/tasklist_panel.py +0 -0
  110. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/thinking.py +0 -0
  111. {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/tools.py +0 -0
  112. {aru_code-0.60.0 → aru_code-0.61.0}/aru/ui.py +0 -0
  113. {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/SOURCES.txt +0 -0
  114. {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/dependency_links.txt +0 -0
  115. {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/entry_points.txt +0 -0
  116. {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/requires.txt +0 -0
  117. {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/top_level.txt +0 -0
  118. {aru_code-0.60.0 → aru_code-0.61.0}/setup.cfg +0 -0
  119. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_agents_base.py +0 -0
  120. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_agents_md_coverage.py +0 -0
  121. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_apply_patch.py +0 -0
  122. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_async_tool_permission.py +0 -0
  123. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_auth_store.py +0 -0
  124. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cache_patch_metrics.py +0 -0
  125. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cache_patch_stop_reason.py +0 -0
  126. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_catalog.py +0 -0
  127. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_chat_scrollable.py +0 -0
  128. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_checkpoints.py +0 -0
  129. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli.py +0 -0
  130. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_advanced.py +0 -0
  131. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_base.py +0 -0
  132. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_new.py +0 -0
  133. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_session.py +0 -0
  134. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_shell.py +0 -0
  135. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_codebase.py +0 -0
  136. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_codex_oauth.py +0 -0
  137. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_config.py +0 -0
  138. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_connect_command.py +0 -0
  139. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_connect_oauth.py +0 -0
  140. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_context.py +0 -0
  141. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_context_pane.py +0 -0
  142. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cwd_awareness.py +0 -0
  143. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_delegate.py +0 -0
  144. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_doom_loop.py +0 -0
  145. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_events_backward_compat.py +0 -0
  146. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_events_schema.py +0 -0
  147. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_fork_ctx_concurrency.py +0 -0
  148. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_format.py +0 -0
  149. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_gitignore.py +0 -0
  150. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_guardrails_scenarios.py +0 -0
  151. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_invoke_skill.py +0 -0
  152. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_invoked_skills.py +0 -0
  153. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_loaded_pane_path.py +0 -0
  154. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_lsp.py +0 -0
  155. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_lsp_rename.py +0 -0
  156. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_main.py +0 -0
  157. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_markdown_to_text.py +0 -0
  158. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_mcp_client.py +0 -0
  159. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_mcp_health.py +0 -0
  160. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_memory.py +0 -0
  161. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_memory_tool.py +0 -0
  162. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_microcompact.py +0 -0
  163. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_permission_timeout_suspension.py +0 -0
  164. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_permissions.py +0 -0
  165. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plan_mode_refactor.py +0 -0
  166. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_cache.py +0 -0
  167. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_errors.py +0 -0
  168. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_hooks_v2.py +0 -0
  169. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugins.py +0 -0
  170. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_providers.py +0 -0
  171. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_ranker.py +0 -0
  172. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_reasoning.py +0 -0
  173. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runner_interrupt.py +0 -0
  174. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runtime.py +0 -0
  175. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_select.py +0 -0
  176. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_session_free_cost.py +0 -0
  177. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_skill_disallowed_tools.py +0 -0
  178. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_state_recent_models.py +0 -0
  179. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_status_breakdown.py +0 -0
  180. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_status_cost.py +0 -0
  181. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_streaming_sink.py +0 -0
  182. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_subagent_tool_events.py +0 -0
  183. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tasklist.py +0 -0
  184. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_thread_tool_timeout.py +0 -0
  185. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tool_policy.py +0 -0
  186. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_truncation_marker.py +0 -0
  187. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_app_boot.py +0 -0
  188. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_bindings.py +0 -0
  189. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_bus_flow.py +0 -0
  190. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_chat.py +0 -0
  191. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_chat_adversarial.py +0 -0
  192. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_completer.py +0 -0
  193. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_completer_dynamic.py +0 -0
  194. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_connect_wiring.py +0 -0
  195. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_copy.py +0 -0
  196. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_error_display.py +0 -0
  197. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_file_link.py +0 -0
  198. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_input_behaviour.py +0 -0
  199. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_layer12_recovery.py +0 -0
  200. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_layer13_recovery.py +0 -0
  201. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_mention_expand.py +0 -0
  202. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_modals.py +0 -0
  203. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_mode_cycle.py +0 -0
  204. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_native_selection.py +0 -0
  205. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_permission_flow.py +0 -0
  206. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_plan_task_render.py +0 -0
  207. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_prompt_queue.py +0 -0
  208. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_shell_bang.py +0 -0
  209. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_sidebar_toggle.py +0 -0
  210. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_slash_bridge.py +0 -0
  211. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_snapshot_smoke.py +0 -0
  212. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_subagent_panel.py +0 -0
  213. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_theme.py +0 -0
  214. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_thinking_and_boot.py +0 -0
  215. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_widgets_visual.py +0 -0
  216. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_ui_adapter.py +0 -0
  217. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_worktree.py +0 -0
  218. {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_worktree_session_restore.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.60.0
3
+ Version: 0.61.0
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.61.0"
@@ -3,6 +3,33 @@
3
3
  # Common rules shared across all agents (planner, executor, general).
4
4
  # Each agent appends its role-specific instructions to this base.
5
5
  BASE_INSTRUCTIONS = """\
6
+ ## Autonomy and Persistence
7
+
8
+ Persist until the task is fully handled end-to-end within the current turn whenever feasible: \
9
+ do not stop at analysis or partial fixes; carry changes through implementation, verification, \
10
+ and a clear explanation of outcomes unless the user explicitly pauses or redirects you. \
11
+ Assume the user wants you to make code changes or run tools to solve the problem — \
12
+ it is bad to output your proposed solution in a message and stop; go ahead and actually \
13
+ implement the change. If you encounter challenges or blockers, attempt to resolve them yourself.
14
+
15
+ ## Task execution
16
+
17
+ You are a coding agent. Please keep going until the query is completely resolved, before \
18
+ ending your turn and yielding back to the user. Only terminate your turn when you are sure \
19
+ that the problem is solved. Autonomously resolve the query to the best of your ability, \
20
+ using the tools available to you, before coming back to the user. Do NOT guess or make up \
21
+ an answer.
22
+
23
+ If a review, test run, plan step, or check surfaces concrete follow-up work that is clearly \
24
+ in scope, resolve it in the same turn. "More work I identified" is NOT a blocker — it is the \
25
+ next thing to do. The turn ends only when (a) the task is completely resolved and verified, \
26
+ (b) you hit a real blocker that needs information only the user has, or (c) the plan / task \
27
+ list is exhausted with every item terminal (completed / skipped / failed).
28
+
29
+ End your turn by reporting what you DID, not by previewing what should happen next. Phrases \
30
+ like "Próximo passo objetivo é…", "Next step is…", "I will now…" are forbidden as turn-end \
31
+ content — if you write them you must execute them in the same turn.
32
+
6
33
  ## Output rules — CRITICAL for token efficiency
7
34
 
8
35
  Minimize output tokens. Your responses should be fewer than 4 lines unless the user \
@@ -144,7 +171,10 @@ You are a software engineer agent. Your job is to implement code changes.
144
171
  You MUST call `create_task_list` as your FIRST action before any other tool call. \
145
172
  Define 1-10 concrete subtasks for the current step. Then execute them in order, \
146
173
  calling `update_task` to mark each as "completed" or "failed" as you go. \
147
- When all subtasks are done, STOP. Do not add extra actions beyond the task list.
174
+ When all subtasks finish, output a brief summary of what changed. The turn ends \
175
+ only when the macro plan / multi-task workflow is also exhausted; if there are \
176
+ more plan steps or skill-driven tasks pending, continue executing them in the \
177
+ same turn — finishing a subtask list is not finishing the user's request.
148
178
 
149
179
  ## Subtask granularity — CRITICAL
150
180
  Each subtask should touch at most **3-4 files**. If the step involves many files, \
@@ -212,8 +242,10 @@ response. Read-only fan-out has no write-path hazards.
212
242
  When given a plan, execute it step by step. When given a direct task, figure out what needs to be done and do it.
213
243
  **ZERO narration between tool calls.** No "Now I have enough context...", \
214
244
  "Let me check...", "Now I understand...", "I need to...". Just call the next tool silently. \
215
- Only output text AFTER all subtasks are finished a brief summary of what was done. \
216
- Text output is ONLY for the final result or when you hit a blocker that needs user input.
245
+ Output text only when (a) the user's full request is resolved including all macro plan \
246
+ steps and skill-driven tasks or (b) you hit a blocker that needs user input. Completing \
247
+ a single subtask list or a single delegated task is NOT a turn boundary; continue with the \
248
+ next pending item in the same turn.
217
249
 
218
250
  **Never retry failed shell commands with alternative syntax.** If a command fails, diagnose \
219
251
  the error — do not try `cmd /c`, absolute paths, or other wrappers hoping one works.
@@ -352,6 +384,23 @@ those tools — finish the plan and call exit_plan_mode instead.
352
384
  For simple tasks (1-2 file changes) where the user did NOT ask for a plan, \
353
385
  execute directly without entering plan mode.
354
386
 
387
+ ## Subtask lists vs the user's request — CRITICAL
388
+
389
+ `create_task_list` / `update_task` track subtasks for ONE unit of work — \
390
+ typically a single plan step, a single delegated task, or a single Task in a \
391
+ multi-task skill workflow (e.g. /subagent-driven-development). Finishing a \
392
+ subtask list is NOT finishing the user's request. When the `update_task` \
393
+ tool_result says "All subtasks finished. Output a brief summary", that summary \
394
+ is the summary of THAT unit only — not the whole turn.
395
+
396
+ Before yielding, check: is there a pending plan step? A skill workflow that \
397
+ declares more Tasks (Task 1..N)? A check that surfaced more work? If yes, \
398
+ keep going in the same turn — call `create_task_list` again for the next \
399
+ unit, or dispatch the next subagent, or call `update_plan_step` and move on. \
400
+ Phrases like "Se quiser, continuo direto para a Task N", "Próximo passo \
401
+ objetivo é…", "Next step is…" are forbidden as turn-end content. The turn \
402
+ ends only when the user's full request is exhausted.
403
+
355
404
  ## Plan execution
356
405
 
357
406
  When you see a `<system-reminder>` listing PLAN ACTIVE steps, work through them in order:
@@ -384,8 +433,14 @@ Safe parallel-write pattern (only when ALL three hold):
384
433
  2. The tasks touch disjoint file sets.
385
434
  3. No task's output is another task's input inside the same batch.
386
435
 
387
- If any of the three fails, run tasks sequentially — one `delegate_task` per \
388
- response, or stay in-session and execute the step yourself. Parallel fan-out \
436
+ If any of the three fails, run tasks sequentially — dispatch one \
437
+ `delegate_task` per assistant response (so the next one only starts after the \
438
+ previous returns), but keep doing this within the same turn until the multi-task \
439
+ plan/skill workflow is exhausted. "Sequential" means "not in parallel"; it does \
440
+ NOT mean "one task per turn" — finishing a single delegated task and then \
441
+ yielding to the user defeats skills like /subagent-driven-development that \
442
+ dispatch a fresh implementer per task. After each subagent returns, immediately \
443
+ dispatch the next pending task in the same turn. Parallel fan-out \
389
444
  for read-only research (explorer) follows the Delegation strategy rules above; \
390
445
  it does not carry these write-path hazards.\
391
446
  """
@@ -42,9 +42,10 @@ async def review_plan(request: str, plan: str) -> str:
42
42
  )
43
43
  prompt = f"## User Request\n{request}\n\n## Generated Plan\n{plan}"
44
44
  try:
45
- response = await reviewer.arun(prompt)
46
- if response and response.content and response.content.strip():
47
- return response.content.strip()
45
+ from aru.runner import arun_text_only
46
+ content = await arun_text_only(reviewer, prompt)
47
+ if content and content.strip():
48
+ return content.strip()
48
49
  except Exception:
49
50
  pass
50
51
  return plan
@@ -198,16 +198,8 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
198
198
  ctx.model_id = session.model_id
199
199
  small_ref = config.model_aliases.get("small") if config else None
200
200
  if not small_ref:
201
- from aru.providers import resolve_model_ref
202
- provider_key, _ = resolve_model_ref(session.model_ref)
203
- _small_defaults = {
204
- "anthropic": "anthropic/claude-haiku-4-5",
205
- "openai": "openai/gpt-4o-mini",
206
- "groq": "groq/llama-3.1-8b-instant",
207
- "deepseek": "deepseek/deepseek-chat",
208
- "ollama": "ollama/llama3.1",
209
- }
210
- small_ref = _small_defaults.get(provider_key, session.model_ref)
201
+ from aru.providers import default_small_model_ref
202
+ small_ref = default_small_model_ref(session.model_ref)
211
203
  ctx.small_model_ref = small_ref
212
204
 
213
205
  extra_instructions = config.get_extra_instructions()
@@ -225,10 +217,11 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
225
217
  instructions=build_instructions("general", extra_instructions),
226
218
  markdown=True,
227
219
  )
228
- response = await agent.arun(prompt)
229
- if response and response.content:
220
+ from aru.runner import arun_text_only
221
+ content = await arun_text_only(agent, prompt)
222
+ if content:
230
223
  # Print raw text to stdout for piping
231
- print(response.content)
224
+ print(content)
232
225
  else:
233
226
  # Full mode with tools
234
227
  from aru.runner import build_env_context
@@ -974,8 +974,8 @@ async def compact_conversation(
974
974
  markdown=True,
975
975
  )
976
976
 
977
- result = await compactor.arun(prompt, stream=False)
978
- summary = result.content if result and result.content else ""
977
+ from aru.runner import arun_text_only
978
+ summary = await arun_text_only(compactor, prompt)
979
979
 
980
980
  if not summary:
981
981
  # Fallback: simple mechanical summary
@@ -206,9 +206,49 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
206
206
  This function is the single translation layer between Aru's storage
207
207
  format and the runtime format Agno's Claude adapter expects (see
208
208
  `.venv/Lib/site-packages/agno/utils/models/claude.py:334-358`).
209
+
210
+ Defensive orphan filtering: both directions of the tool_use / tool_result
211
+ pair are checked, because each one breaks the API in its own way.
212
+
213
+ * ``tool_result`` whose ``tool_use_id`` has no matching ``tool_use``
214
+ anywhere in the history is dropped. Anthropic rejects with
215
+ ``404 tool_use_id not found``; the OpenAI Responses backend (Codex)
216
+ rejects with ``400 No tool call found for function call output``.
217
+ * ``tool_use`` whose ``id`` has no matching ``tool_result`` anywhere
218
+ after it is dropped (from the assistant message's ``tool_calls``
219
+ list). Anthropic accepts trailing unmatched tool_use only if it's
220
+ the very last turn (because the *next* assistant turn is expected to
221
+ include the tool_result); but for any older assistant turn an
222
+ unmatched tool_use leaves the conversation in an "awaiting tool
223
+ output" state and the Responses API rejects with ``400 No tool
224
+ output found for function call``. This typically happens when a
225
+ tool wrapper raised before producing a result (timeout, schema
226
+ error, Ctrl+C mid-batch) or when a delegated subagent crashed and
227
+ its tool_result was never recorded.
228
+
229
+ Filtering here keeps the API contract intact regardless of how the
230
+ history got unbalanced upstream (compaction, prune, crash recovery).
209
231
  """
210
232
  from agno.models.message import Message # local import to avoid cycles
211
233
 
234
+ declared_tool_use_ids: set[str] = set()
235
+ answered_tool_use_ids: set[str] = set()
236
+ for item in history:
237
+ role = item.get("role")
238
+ blocks = item.get("content") or []
239
+ if role == "assistant":
240
+ for block in blocks:
241
+ if is_tool_use(block):
242
+ tid = block.get("id")
243
+ if tid:
244
+ declared_tool_use_ids.add(tid)
245
+ elif role in ("user", "tool"):
246
+ for block in blocks:
247
+ if is_tool_result(block):
248
+ tid = block.get("tool_use_id")
249
+ if tid:
250
+ answered_tool_use_ids.add(tid)
251
+
212
252
  out: list[Message] = []
213
253
  for item in history:
214
254
  role = item.get("role", "user")
@@ -220,13 +260,18 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
220
260
  text_parts = [b.get("text", "") for b in blocks if is_text(b)]
221
261
  tool_result_blocks = [b for b in blocks if is_tool_result(b)]
222
262
 
223
- # Tool results must be emitted as separate `role="tool"` Messages
263
+ # Tool results must be emitted as separate `role="tool"` Messages.
264
+ # Skip orphans — see docstring; both Anthropic and Codex reject
265
+ # tool_results whose tool_use_id has no declaring tool_use.
224
266
  for tr in tool_result_blocks:
267
+ tid = tr.get("tool_use_id", "")
268
+ if tid and tid not in declared_tool_use_ids:
269
+ continue
225
270
  out.append(
226
271
  Message(
227
272
  role="tool",
228
273
  content=str(tr.get("content", "")),
229
- tool_call_id=tr.get("tool_use_id", ""),
274
+ tool_call_id=tid,
230
275
  from_history=True,
231
276
  )
232
277
  )
@@ -245,9 +290,19 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
245
290
  for b in blocks:
246
291
  if not is_tool_use(b):
247
292
  continue
293
+ tid = b.get("id", "")
294
+ # Drop tool_calls that never produced a tool_result. Without
295
+ # this, the next API call carries an unanswered function_call
296
+ # from a prior turn and the Responses backend errors out
297
+ # ("No tool output found for function call <id>"). The tool
298
+ # wrapper *should* always produce a result, but defensive
299
+ # filtering here recovers a stuck history even when the
300
+ # wrapper failed (timeout/crash/abort).
301
+ if tid and tid not in answered_tool_use_ids:
302
+ continue
248
303
  tool_calls.append(
249
304
  {
250
- "id": b.get("id", ""),
305
+ "id": tid,
251
306
  "type": "function",
252
307
  "function": {
253
308
  "name": b.get("name", ""),
@@ -265,15 +320,21 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
265
320
 
266
321
  elif role == "tool":
267
322
  # Explicit tool-role items (we don't produce these ourselves but
268
- # support them for forward compat with loaded sessions).
323
+ # support them for forward compat with loaded sessions). Same
324
+ # orphan filter as the user-role branch — this is actually the
325
+ # branch that catches the loaded-session case where a prior
326
+ # compaction summarised the matching assistant turn away.
269
327
  for tr in blocks:
270
328
  if not is_tool_result(tr):
271
329
  continue
330
+ tid = tr.get("tool_use_id", "")
331
+ if tid and tid not in declared_tool_use_ids:
332
+ continue
272
333
  out.append(
273
334
  Message(
274
335
  role="tool",
275
336
  content=str(tr.get("content", "")),
276
- tool_call_id=tr.get("tool_use_id", ""),
337
+ tool_call_id=tid,
277
338
  from_history=True,
278
339
  )
279
340
  )
@@ -135,8 +135,8 @@ async def _run_extractor_agent(prompt: str, model_ref: str) -> str:
135
135
  instructions="You curate durable memories. Output only the requested JSON.",
136
136
  markdown=False,
137
137
  )
138
- result = await agent.arun(prompt, stream=False)
139
- return (result.content or "") if result else ""
138
+ from aru.runner import arun_text_only
139
+ return await arun_text_only(agent, prompt)
140
140
 
141
141
 
142
142
  def _parse_json_array(content: str) -> list[dict]:
@@ -444,16 +444,24 @@ def set_permission_mode(mode: str) -> str:
444
444
 
445
445
 
446
446
  def cycle_permission_mode() -> str:
447
- """Advance to the next mode and return it."""
447
+ """Advance to the next mode and return it.
448
+
449
+ Delegates the actual mutation to ``set_permission_mode`` so the Ctrl+A
450
+ path (this function) and the ``/yolo`` slash command path (which calls
451
+ ``set_permission_mode`` directly) follow the exact same code path —
452
+ same mutation, same ``permission.mode.changed`` publish, same UI
453
+ refresh trigger. Historically these two were near-duplicate and the
454
+ Ctrl+A version skipped the bus publish; this caused subtle drift
455
+ where the StatusPane visually advanced but downstream subscribers
456
+ saw stale state.
457
+ """
448
458
  ctx = get_ctx()
449
459
  try:
450
460
  idx = _MODE_CYCLE.index(ctx.permission_mode)
451
461
  except ValueError:
452
462
  idx = 0
453
463
  next_mode = _MODE_CYCLE[(idx + 1) % len(_MODE_CYCLE)]
454
- ctx.permission_mode = next_mode
455
- ctx.skip_permissions = (next_mode == "yolo")
456
- return next_mode
464
+ return set_permission_mode(next_mode)
457
465
 
458
466
 
459
467
  def consume_rejection_feedback() -> str:
@@ -465,6 +465,22 @@ def resolve_model_ref(model_ref: str) -> tuple[str, str]:
465
465
  return provider_key, model_name
466
466
 
467
467
 
468
+ def default_small_model_ref(session_model_ref: str) -> str:
469
+ """Default model ref for sub-agents when no ``small`` alias is set.
470
+
471
+ Mirrors Codex's ``build_agent_shared_config`` (multi_agents_common.rs):
472
+ the spawned agent inherits the parent's effective model. Keeps the
473
+ sub-agent on the same provider (preserves credentials + cache lineage)
474
+ and avoids the cross-provider failure mode where a hard-coded "small
475
+ model" is rejected by the parent's backend — e.g. ``gpt-4o-mini`` on
476
+ a ChatGPT Plus/Pro OAuth credential whose Codex endpoint only accepts
477
+ ``gpt-5*`` ids. Users who want a cheaper sub-agent model can still set
478
+ ``model_aliases.small`` in aru.json; that override wins at every call
479
+ site before this helper runs.
480
+ """
481
+ return session_model_ref
482
+
483
+
468
484
  def _get_actual_model_id(provider: ProviderConfig, model_name: str) -> str:
469
485
  """Get the actual model ID to send to the API.
470
486
 
@@ -27,6 +27,33 @@ _MAX_TOKENS_RECOVERY_PROMPT = (
27
27
  )
28
28
 
29
29
 
30
+ async def arun_text_only(agent, prompt: str) -> str:
31
+ """Run a tools-less helper agent and return its final text output.
32
+
33
+ Always streams, because the Codex Responses backend rejects non-streaming
34
+ calls with ``400 'Stream must be set to true'``. The official OpenAI
35
+ Responses API + every other provider also accept stream=True, so a single
36
+ code path covers both. Used by the compaction summarizer, memory
37
+ extractor, and plan reviewer — all of which previously called
38
+ ``agent.arun(prompt, stream=False)`` and broke for any user on a ChatGPT
39
+ Plus/Pro OAuth credential.
40
+
41
+ Falls back to the empty string when the model returns no content (caller
42
+ decides what that means — e.g. compaction has its own mechanical
43
+ fallback).
44
+ """
45
+ from agno.run.agent import RunOutput
46
+
47
+ final_output = None
48
+ async for event in agent.arun(prompt, stream=True, yield_run_output=True):
49
+ if isinstance(event, RunOutput):
50
+ final_output = event
51
+ break
52
+ if final_output and final_output.content:
53
+ return final_output.content
54
+ return ""
55
+
56
+
30
57
  def _prepare_recovery_input(
31
58
  *,
32
59
  agent,
@@ -185,6 +212,59 @@ def _build_plan_reminder(session) -> str | None:
185
212
  return "\n".join(lines)
186
213
 
187
214
 
215
+ def _build_permission_mode_reminder() -> str | None:
216
+ """Surface the active permission mode to the model when it's non-default.
217
+
218
+ GPT-5 / Codex-trained models default to asking "should I commit?",
219
+ "want me to run X?" before mutating actions — that posture matches the
220
+ Codex CLI's default-approval gate, but it's the wrong posture inside
221
+ Aru's YOLO mode where every gate is already pre-approved. The harness
222
+ *can* see ``ctx.permission_mode``; the model can't unless we tell it.
223
+ Without this nudge the user has to re-type "go" / "do it" after every
224
+ proposal, which is exactly what YOLO is supposed to skip. Same shape
225
+ as ``_build_plan_reminder`` so the model treats it as authoritative.
226
+
227
+ Returns ``None`` for ``default`` mode — the model's built-in caution
228
+ is correct there and an extra reminder would just consume cache space
229
+ on every turn for no behavioural change.
230
+ """
231
+ try:
232
+ from aru.runtime import get_ctx
233
+ mode = get_ctx().permission_mode
234
+ except LookupError:
235
+ return None
236
+ if mode == "yolo":
237
+ # Persistence / autonomy posture lives in BASE_INSTRUCTIONS (always
238
+ # in the system prompt, so it covers default / acceptEdits / yolo
239
+ # alike) — mirrors Codex's design where the "Task execution" /
240
+ # "Autonomy and Persistence" sections live in the cached base
241
+ # instructions, not in a per-turn reminder. This reminder is
242
+ # scoped to the one thing that is mode-specific: permission
243
+ # gating. Do NOT re-state the persistence rules here; that would
244
+ # bloat the per-turn cache for no behavioural gain.
245
+ return (
246
+ "<system-reminder>\n"
247
+ "YOLO MODE ACTIVE — equivalent to Codex `approval-policy: "
248
+ "never`. Every tool call is pre-approved. Do NOT ask permission "
249
+ "before running tools (\"posso fazer o commit?\", \"want me to "
250
+ "run the tests?\", \"shall I edit X?\", \"Se quiser, faço…\"). "
251
+ "Just execute. The autonomy and task-execution rules from your "
252
+ "system prompt still apply.\n"
253
+ "</system-reminder>"
254
+ )
255
+ if mode == "acceptEdits":
256
+ return (
257
+ "<system-reminder>\n"
258
+ "AUTO-ACCEPT EDITS ACTIVE — file edits are pre-approved. Do NOT "
259
+ "ask before writing/editing files. Bash and other non-edit "
260
+ "actions still gate normally; for those you may pause if the "
261
+ "command is destructive or ambiguous. For routine edits, "
262
+ "execute without confirmation.\n"
263
+ "</system-reminder>"
264
+ )
265
+ return None
266
+
267
+
188
268
  def _consume_plan_rejection_feedback(session) -> str | None:
189
269
  """Read-and-clear plan rejection feedback stored on the session.
190
270
 
@@ -405,6 +485,10 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
405
485
  if reminder:
406
486
  msg_parts.append(reminder)
407
487
 
488
+ mode_reminder = _build_permission_mode_reminder()
489
+ if mode_reminder:
490
+ msg_parts.append(mode_reminder)
491
+
408
492
  warning = session.check_budget_warning()
409
493
  if warning:
410
494
  console.print(warning)
@@ -13,7 +13,7 @@ from dataclasses import dataclass, field
13
13
  from datetime import datetime
14
14
  from typing import Literal
15
15
 
16
- from aru.providers import MODEL_ALIASES, get_model_display, resolve_model_ref
16
+ from aru.providers import MODEL_ALIASES, get_model_display, get_provider, resolve_model_ref
17
17
 
18
18
  # Default model reference (provider/model format)
19
19
  DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
@@ -577,11 +577,25 @@ class Session:
577
577
  suffix convention plus any future provider that adopts the same
578
578
  naming. None of the major paid models contain "free" in their id,
579
579
  so false positives are negligible.
580
+
581
+ ChatGPT Plus/Pro via Codex OAuth is a flat-rate subscription — usage
582
+ is bounded by the plan's session quotas, not per-token charges — so
583
+ all four prices are zero whenever the active openai credential is an
584
+ OAuth token (``provider.codex_oauth``). The user disconnecting via
585
+ ``/connect logout`` clears the flag and the regular gpt-5 pricing
586
+ kicks back in for any subsequent turns.
580
587
  """
581
588
  ref = (self.model_ref or "").lower()
582
589
  mid = (self.model_id or "").lower()
583
590
  if "free" in ref or "free" in mid:
584
591
  return (0.0, 0.0, 0.0, 0.0)
592
+ try:
593
+ provider_key, _ = resolve_model_ref(self.model_ref or "")
594
+ provider = get_provider(provider_key)
595
+ if provider is not None and getattr(provider, "codex_oauth", False):
596
+ return (0.0, 0.0, 0.0, 0.0)
597
+ except Exception:
598
+ pass
585
599
  model_id = self.model_id
586
600
  # Try exact match, then prefix match, then fallback
587
601
  for prefix, pricing in MODEL_PRICING.items():
@@ -422,8 +422,17 @@ Do not create documentation files unless explicitly asked.
422
422
  })
423
423
 
424
424
  from aru.runtime import _schedule_publish as _sched_t
425
+ # Prepend the permission-mode reminder to the subagent's prompt so
426
+ # YOLO mode reaches the spawned agent too — delegate runs through
427
+ # ``agent_instance.arun`` directly, bypassing run_agent_capture's
428
+ # reminder injection. The persistence / task-execution posture
429
+ # is in BASE_INSTRUCTIONS (subagent's system prompt) so it
430
+ # propagates without needing a per-spawn reminder.
431
+ from aru.runner import _build_permission_mode_reminder
432
+ _mode_reminder = _build_permission_mode_reminder()
433
+ sub_task = f"{_mode_reminder}\n\n{task}" if _mode_reminder else task
425
434
  try:
426
- async for event in agent_instance.arun(task, stream=True, stream_events=True, yield_run_output=True):
435
+ async for event in agent_instance.arun(sub_task, stream=True, stream_events=True, yield_run_output=True):
427
436
  if is_aborted():
428
437
  _trace.status = "cancelled"
429
438
  _trace.ended_at = _time.monotonic()
@@ -199,6 +199,39 @@ def write_files(file_list: list[dict]) -> str:
199
199
  Example: [{"path": "src/main.py", "content": "print('hello')"}, {"path": "src/utils.py", "content": "..."}]
200
200
  """
201
201
  from aru.runtime import resolve_path as _resolve_path
202
+ # Defensive schema validation — return a string error instead of raising.
203
+ # An uncaught TypeError / AttributeError here would propagate through the
204
+ # async tool wrapper without producing a tool_result, leaving the next
205
+ # turn with a function_call lacking its function_call_output (Codex
206
+ # rejects with ``400 No tool output found for function call``).
207
+ if not isinstance(file_list, list):
208
+ return (
209
+ "Error: write_files expects ``file_list`` to be a JSON array of "
210
+ "objects with 'path' and 'content' keys. Got "
211
+ f"{type(file_list).__name__!r}."
212
+ )
213
+ cleaned: list[dict] = []
214
+ schema_errors: list[str] = []
215
+ for i, e in enumerate(file_list):
216
+ if not isinstance(e, dict):
217
+ schema_errors.append(
218
+ f"entry {i}: expected object with 'path' and 'content', got {type(e).__name__}"
219
+ )
220
+ continue
221
+ if "path" not in e or "content" not in e:
222
+ schema_errors.append(
223
+ f"entry {i}: missing required key(s) — needs both 'path' and 'content'"
224
+ )
225
+ continue
226
+ cleaned.append(e)
227
+ if not cleaned:
228
+ return (
229
+ "Error: write_files received no valid entries. "
230
+ + "; ".join(schema_errors)
231
+ if schema_errors
232
+ else "Error: write_files received an empty list."
233
+ )
234
+ file_list = cleaned
202
235
  parts = [Text(f"Write {len(file_list)} files:", style="bold"), Text()]
203
236
  for e in file_list:
204
237
  p = _resolve_path(e.get("path", "<missing>"))
@@ -311,6 +344,19 @@ def edit_files(edits: list[dict]) -> str:
311
344
  Example: [{"path": "src/main.py", "old_string": "foo", "new_string": "bar"}]
312
345
  """
313
346
  from aru.runtime import resolve_path as _resolve_path
347
+ # Defensive schema validation — same rationale as write_files: a TypeError
348
+ # raised here would propagate through the async wrapper without producing
349
+ # a tool_result, leaving the assistant message with an unanswered tool_use
350
+ # that the Responses backend rejects on the next turn.
351
+ if not isinstance(edits, list):
352
+ return (
353
+ "Error: edit_files expects ``edits`` to be a JSON array of "
354
+ "objects with 'path', 'old_string', 'new_string'. Got "
355
+ f"{type(edits).__name__!r}."
356
+ )
357
+ edits = [e for e in edits if isinstance(e, dict)]
358
+ if not edits:
359
+ return "Error: edit_files received no valid edit entries."
314
360
  original: dict[str, str] = {}
315
361
  preview: dict[str, str] = {}
316
362
  preview_errors: list[str] = []
@@ -18,7 +18,6 @@ from aru.tools.file_ops import (
18
18
  _list_directory_tool,
19
19
  _read_file_tool,
20
20
  _write_file_tool,
21
- _write_files_tool,
22
21
  read_files,
23
22
  )
24
23
  from aru.tools.plan_mode import enter_plan_mode, exit_plan_mode
@@ -69,7 +68,18 @@ _READ_ONLY_TOOLS = [
69
68
 
70
69
  _WRITE_TOOLS = [
71
70
  _write_file_tool,
72
- _write_files_tool,
71
+ # ``_write_files_tool`` (batch write) intentionally NOT exposed: the
72
+ # nested ``[{"path", "content"}]`` schema is consistently mis-called by
73
+ # every model family we tested (GPT-5 included) — it passes a plain dict,
74
+ # a list of strings, or forgets one of the required keys. The wrapper
75
+ # used to raise on the malformed input, which left the assistant message
76
+ # with an unanswered tool_call and broke the next turn on the Codex
77
+ # backend (``400 No tool output found for function call``). Adding
78
+ # schema-validation didn't fix the underlying ergonomics — models still
79
+ # waste calls fighting the schema. Single ``write_file`` works
80
+ # reliably; batches can be expressed as N sequential calls. The
81
+ # function and its async wrapper are kept in ``file_ops.py`` for any
82
+ # custom tool or plugin that imports them directly.
73
83
  _edit_file_tool,
74
84
  _edit_files_tool,
75
85
  _apply_patch_tool,
@@ -237,10 +237,25 @@ def update_task(index: int, status: str) -> str:
237
237
  failed_count = sum(1 for t in all_tasks if t["status"] == "failed")
238
238
  total = len(all_tasks)
239
239
 
240
+ # Tool result kept minimal — Codex's update_plan returns the constant
241
+ # ``"Plan updated"`` (plan.rs:22) and relies on the base-instructions
242
+ # "Task execution" section (always in the system prompt) to keep GPT-5
243
+ # going across subtasks. We follow the same shape so the persistence
244
+ # signal lives in one place (BASE_INSTRUCTIONS) instead of being
245
+ # duplicated in every tool result. The "all finished" branch is
246
+ # deliberately worded as "this subtask list is done" (not "the work is
247
+ # done") so the model doesn't read it as a turn-end signal when there
248
+ # are still pending plan steps or skill-driven macro Tasks.
240
249
  if completed_count + failed_count == total:
241
- return f"All subtasks finished ({completed_count} completed, {failed_count} failed). Step done. Output a brief summary of what was created/changed."
250
+ return (
251
+ f"This subtask list is done ({completed_count} completed, "
252
+ f"{failed_count} failed). If more plan steps or skill-driven "
253
+ "Tasks remain in your request, continue with the next one in "
254
+ "the same turn (call create_task_list again, dispatch the next "
255
+ "subagent, or call update_plan_step). Only yield to the user "
256
+ "when the full request is exhausted."
257
+ )
242
258
 
243
- # Find next pending subtask
244
259
  next_task = next((t for t in all_tasks if t["status"] == "pending"), None)
245
260
  if next_task:
246
261
  return f"Subtask {index} → {status}. Next: subtask {next_task['index']} — {next_task['description']}"