aru-code 0.60.0__tar.gz → 0.61.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aru_code-0.60.0/aru_code.egg-info → aru_code-0.61.0}/PKG-INFO +1 -1
- aru_code-0.61.0/aru/__init__.py +1 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/base.py +60 -5
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/planner.py +4 -3
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/cli.py +6 -13
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/context.py +2 -2
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/history_blocks.py +66 -5
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/extractor.py +2 -2
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/permissions.py +12 -4
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/providers.py +16 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/runner.py +84 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/session.py +15 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/delegate.py +10 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/file_ops.py +46 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/registry.py +12 -2
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/tasklist.py +17 -2
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/app.py +16 -43
- {aru_code-0.60.0 → aru_code-0.61.0/aru_code.egg-info}/PKG-INFO +1 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/pyproject.toml +1 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_confabulation_regression.py +74 -13
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runner_recovery.py +11 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_slash_model.py +3 -3
- aru_code-0.60.0/aru/__init__.py +0 -1
- {aru_code-0.60.0 → aru_code-0.61.0}/LICENSE +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/README.md +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/analyze_trace.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/_debug/loop_tracer.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/agent_factory.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/agents/catalog.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/auth.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/cache_patch.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/checkpoints.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/codex_oauth.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/commands.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/config.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/display.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/doom_loop.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/events.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/manager.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/format/runner.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/client.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/manager.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/lsp/protocol.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/loader.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/memory/store.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugin_cache.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/custom_tools.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/hooks.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/manager.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/plugins/tool_api.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/runtime.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/select.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/sinks.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/state.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/streaming.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tool_policy.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/_diff.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/_shared.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/apply_patch.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/apply_patch_prompt.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/ast_tools.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/codebase.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/delegate_prompt.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/gitignore.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/lsp.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/mcp_client.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/memory_tool.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/plan_mode.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/ranker.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/search.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/shell.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/skill.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/web.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tools/worktree.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/log_bridge.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/notifications.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/sanitize.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/choice.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/confirm.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/keymap.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/search.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/session_picker.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/screens/text_input.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/sinks.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/slash_bridge.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/themes.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/ui.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/__init__.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/chat.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/completer.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/context_pane.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/file_link.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/header.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/inline_choice.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/loaded_pane.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/prompt_area.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/prompt_queue.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/status.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/subagent_panel.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/tasklist_panel.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/thinking.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/tui/widgets/tools.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru/ui.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/SOURCES.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/dependency_links.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/entry_points.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/requires.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/aru_code.egg-info/top_level.txt +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/setup.cfg +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_agents_base.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_agents_md_coverage.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_apply_patch.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_async_tool_permission.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_auth_store.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cache_patch_metrics.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cache_patch_stop_reason.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_catalog.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_chat_scrollable.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_checkpoints.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_advanced.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_base.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_new.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_session.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cli_shell.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_codebase.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_codex_oauth.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_config.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_connect_command.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_connect_oauth.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_context.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_context_pane.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_cwd_awareness.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_delegate.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_doom_loop.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_events_backward_compat.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_events_schema.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_fork_ctx_concurrency.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_format.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_gitignore.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_guardrails_scenarios.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_invoke_skill.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_invoked_skills.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_loaded_pane_path.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_lsp.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_lsp_rename.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_main.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_markdown_to_text.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_mcp_client.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_mcp_health.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_memory.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_memory_tool.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_microcompact.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_permission_timeout_suspension.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_permissions.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plan_mode_refactor.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_cache.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_errors.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugin_hooks_v2.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_plugins.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_providers.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_ranker.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_reasoning.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runner_interrupt.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_runtime.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_select.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_session_free_cost.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_skill_disallowed_tools.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_state_recent_models.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_status_breakdown.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_status_cost.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_streaming_sink.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_subagent_tool_events.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tasklist.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_thread_tool_timeout.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tool_policy.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_truncation_marker.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_app_boot.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_bindings.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_bus_flow.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_chat.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_chat_adversarial.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_completer.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_completer_dynamic.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_connect_wiring.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_copy.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_error_display.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_file_link.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_input_behaviour.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_layer12_recovery.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_layer13_recovery.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_mention_expand.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_modals.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_mode_cycle.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_native_selection.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_permission_flow.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_plan_task_render.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_prompt_queue.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_shell_bang.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_sidebar_toggle.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_slash_bridge.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_snapshot_smoke.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_subagent_panel.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_theme.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_thinking_and_boot.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_tui_widgets_visual.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_ui_adapter.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_worktree.py +0 -0
- {aru_code-0.60.0 → aru_code-0.61.0}/tests/test_worktree_session_restore.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.61.0"
|
|
@@ -3,6 +3,33 @@
|
|
|
3
3
|
# Common rules shared across all agents (planner, executor, general).
|
|
4
4
|
# Each agent appends its role-specific instructions to this base.
|
|
5
5
|
BASE_INSTRUCTIONS = """\
|
|
6
|
+
## Autonomy and Persistence
|
|
7
|
+
|
|
8
|
+
Persist until the task is fully handled end-to-end within the current turn whenever feasible: \
|
|
9
|
+
do not stop at analysis or partial fixes; carry changes through implementation, verification, \
|
|
10
|
+
and a clear explanation of outcomes unless the user explicitly pauses or redirects you. \
|
|
11
|
+
Assume the user wants you to make code changes or run tools to solve the problem — \
|
|
12
|
+
it is bad to output your proposed solution in a message and stop; go ahead and actually \
|
|
13
|
+
implement the change. If you encounter challenges or blockers, attempt to resolve them yourself.
|
|
14
|
+
|
|
15
|
+
## Task execution
|
|
16
|
+
|
|
17
|
+
You are a coding agent. Please keep going until the query is completely resolved, before \
|
|
18
|
+
ending your turn and yielding back to the user. Only terminate your turn when you are sure \
|
|
19
|
+
that the problem is solved. Autonomously resolve the query to the best of your ability, \
|
|
20
|
+
using the tools available to you, before coming back to the user. Do NOT guess or make up \
|
|
21
|
+
an answer.
|
|
22
|
+
|
|
23
|
+
If a review, test run, plan step, or check surfaces concrete follow-up work that is clearly \
|
|
24
|
+
in scope, resolve it in the same turn. "More work I identified" is NOT a blocker — it is the \
|
|
25
|
+
next thing to do. The turn ends only when (a) the task is completely resolved and verified, \
|
|
26
|
+
(b) you hit a real blocker that needs information only the user has, or (c) the plan / task \
|
|
27
|
+
list is exhausted with every item terminal (completed / skipped / failed).
|
|
28
|
+
|
|
29
|
+
End your turn by reporting what you DID, not by previewing what should happen next. Phrases \
|
|
30
|
+
like "Próximo passo objetivo é…", "Next step is…", "I will now…" are forbidden as turn-end \
|
|
31
|
+
content — if you write them you must execute them in the same turn.
|
|
32
|
+
|
|
6
33
|
## Output rules — CRITICAL for token efficiency
|
|
7
34
|
|
|
8
35
|
Minimize output tokens. Your responses should be fewer than 4 lines unless the user \
|
|
@@ -144,7 +171,10 @@ You are a software engineer agent. Your job is to implement code changes.
|
|
|
144
171
|
You MUST call `create_task_list` as your FIRST action before any other tool call. \
|
|
145
172
|
Define 1-10 concrete subtasks for the current step. Then execute them in order, \
|
|
146
173
|
calling `update_task` to mark each as "completed" or "failed" as you go. \
|
|
147
|
-
When all subtasks
|
|
174
|
+
When all subtasks finish, output a brief summary of what changed. The turn ends \
|
|
175
|
+
only when the macro plan / multi-task workflow is also exhausted; if there are \
|
|
176
|
+
more plan steps or skill-driven tasks pending, continue executing them in the \
|
|
177
|
+
same turn — finishing a subtask list is not finishing the user's request.
|
|
148
178
|
|
|
149
179
|
## Subtask granularity — CRITICAL
|
|
150
180
|
Each subtask should touch at most **3-4 files**. If the step involves many files, \
|
|
@@ -212,8 +242,10 @@ response. Read-only fan-out has no write-path hazards.
|
|
|
212
242
|
When given a plan, execute it step by step. When given a direct task, figure out what needs to be done and do it.
|
|
213
243
|
**ZERO narration between tool calls.** No "Now I have enough context...", \
|
|
214
244
|
"Let me check...", "Now I understand...", "I need to...". Just call the next tool silently. \
|
|
215
|
-
|
|
216
|
-
|
|
245
|
+
Output text only when (a) the user's full request is resolved — including all macro plan \
|
|
246
|
+
steps and skill-driven tasks — or (b) you hit a blocker that needs user input. Completing \
|
|
247
|
+
a single subtask list or a single delegated task is NOT a turn boundary; continue with the \
|
|
248
|
+
next pending item in the same turn.
|
|
217
249
|
|
|
218
250
|
**Never retry failed shell commands with alternative syntax.** If a command fails, diagnose \
|
|
219
251
|
the error — do not try `cmd /c`, absolute paths, or other wrappers hoping one works.
|
|
@@ -352,6 +384,23 @@ those tools — finish the plan and call exit_plan_mode instead.
|
|
|
352
384
|
For simple tasks (1-2 file changes) where the user did NOT ask for a plan, \
|
|
353
385
|
execute directly without entering plan mode.
|
|
354
386
|
|
|
387
|
+
## Subtask lists vs the user's request — CRITICAL
|
|
388
|
+
|
|
389
|
+
`create_task_list` / `update_task` track subtasks for ONE unit of work — \
|
|
390
|
+
typically a single plan step, a single delegated task, or a single Task in a \
|
|
391
|
+
multi-task skill workflow (e.g. /subagent-driven-development). Finishing a \
|
|
392
|
+
subtask list is NOT finishing the user's request. When the `update_task` \
|
|
393
|
+
tool_result says "All subtasks finished. Output a brief summary", that summary \
|
|
394
|
+
is the summary of THAT unit only — not the whole turn.
|
|
395
|
+
|
|
396
|
+
Before yielding, check: is there a pending plan step? A skill workflow that \
|
|
397
|
+
declares more Tasks (Task 1..N)? A check that surfaced more work? If yes, \
|
|
398
|
+
keep going in the same turn — call `create_task_list` again for the next \
|
|
399
|
+
unit, or dispatch the next subagent, or call `update_plan_step` and move on. \
|
|
400
|
+
Phrases like "Se quiser, continuo direto para a Task N", "Próximo passo \
|
|
401
|
+
objetivo é…", "Next step is…" are forbidden as turn-end content. The turn \
|
|
402
|
+
ends only when the user's full request is exhausted.
|
|
403
|
+
|
|
355
404
|
## Plan execution
|
|
356
405
|
|
|
357
406
|
When you see a `<system-reminder>` listing PLAN ACTIVE steps, work through them in order:
|
|
@@ -384,8 +433,14 @@ Safe parallel-write pattern (only when ALL three hold):
|
|
|
384
433
|
2. The tasks touch disjoint file sets.
|
|
385
434
|
3. No task's output is another task's input inside the same batch.
|
|
386
435
|
|
|
387
|
-
If any of the three fails, run tasks sequentially — one
|
|
388
|
-
|
|
436
|
+
If any of the three fails, run tasks sequentially — dispatch one \
|
|
437
|
+
`delegate_task` per assistant response (so the next one only starts after the \
|
|
438
|
+
previous returns), but keep doing this within the same turn until the multi-task \
|
|
439
|
+
plan/skill workflow is exhausted. "Sequential" means "not in parallel"; it does \
|
|
440
|
+
NOT mean "one task per turn" — finishing a single delegated task and then \
|
|
441
|
+
yielding to the user defeats skills like /subagent-driven-development that \
|
|
442
|
+
dispatch a fresh implementer per task. After each subagent returns, immediately \
|
|
443
|
+
dispatch the next pending task in the same turn. Parallel fan-out \
|
|
389
444
|
for read-only research (explorer) follows the Delegation strategy rules above; \
|
|
390
445
|
it does not carry these write-path hazards.\
|
|
391
446
|
"""
|
|
@@ -42,9 +42,10 @@ async def review_plan(request: str, plan: str) -> str:
|
|
|
42
42
|
)
|
|
43
43
|
prompt = f"## User Request\n{request}\n\n## Generated Plan\n{plan}"
|
|
44
44
|
try:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
from aru.runner import arun_text_only
|
|
46
|
+
content = await arun_text_only(reviewer, prompt)
|
|
47
|
+
if content and content.strip():
|
|
48
|
+
return content.strip()
|
|
48
49
|
except Exception:
|
|
49
50
|
pass
|
|
50
51
|
return plan
|
|
@@ -198,16 +198,8 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
|
|
|
198
198
|
ctx.model_id = session.model_id
|
|
199
199
|
small_ref = config.model_aliases.get("small") if config else None
|
|
200
200
|
if not small_ref:
|
|
201
|
-
from aru.providers import
|
|
202
|
-
|
|
203
|
-
_small_defaults = {
|
|
204
|
-
"anthropic": "anthropic/claude-haiku-4-5",
|
|
205
|
-
"openai": "openai/gpt-4o-mini",
|
|
206
|
-
"groq": "groq/llama-3.1-8b-instant",
|
|
207
|
-
"deepseek": "deepseek/deepseek-chat",
|
|
208
|
-
"ollama": "ollama/llama3.1",
|
|
209
|
-
}
|
|
210
|
-
small_ref = _small_defaults.get(provider_key, session.model_ref)
|
|
201
|
+
from aru.providers import default_small_model_ref
|
|
202
|
+
small_ref = default_small_model_ref(session.model_ref)
|
|
211
203
|
ctx.small_model_ref = small_ref
|
|
212
204
|
|
|
213
205
|
extra_instructions = config.get_extra_instructions()
|
|
@@ -225,10 +217,11 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
|
|
|
225
217
|
instructions=build_instructions("general", extra_instructions),
|
|
226
218
|
markdown=True,
|
|
227
219
|
)
|
|
228
|
-
|
|
229
|
-
|
|
220
|
+
from aru.runner import arun_text_only
|
|
221
|
+
content = await arun_text_only(agent, prompt)
|
|
222
|
+
if content:
|
|
230
223
|
# Print raw text to stdout for piping
|
|
231
|
-
print(
|
|
224
|
+
print(content)
|
|
232
225
|
else:
|
|
233
226
|
# Full mode with tools
|
|
234
227
|
from aru.runner import build_env_context
|
|
@@ -974,8 +974,8 @@ async def compact_conversation(
|
|
|
974
974
|
markdown=True,
|
|
975
975
|
)
|
|
976
976
|
|
|
977
|
-
|
|
978
|
-
summary =
|
|
977
|
+
from aru.runner import arun_text_only
|
|
978
|
+
summary = await arun_text_only(compactor, prompt)
|
|
979
979
|
|
|
980
980
|
if not summary:
|
|
981
981
|
# Fallback: simple mechanical summary
|
|
@@ -206,9 +206,49 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
|
|
|
206
206
|
This function is the single translation layer between Aru's storage
|
|
207
207
|
format and the runtime format Agno's Claude adapter expects (see
|
|
208
208
|
`.venv/Lib/site-packages/agno/utils/models/claude.py:334-358`).
|
|
209
|
+
|
|
210
|
+
Defensive orphan filtering: both directions of the tool_use / tool_result
|
|
211
|
+
pair are checked, because each one breaks the API in its own way.
|
|
212
|
+
|
|
213
|
+
* ``tool_result`` whose ``tool_use_id`` has no matching ``tool_use``
|
|
214
|
+
anywhere in the history is dropped. Anthropic rejects with
|
|
215
|
+
``404 tool_use_id not found``; the OpenAI Responses backend (Codex)
|
|
216
|
+
rejects with ``400 No tool call found for function call output``.
|
|
217
|
+
* ``tool_use`` whose ``id`` has no matching ``tool_result`` anywhere
|
|
218
|
+
after it is dropped (from the assistant message's ``tool_calls``
|
|
219
|
+
list). Anthropic accepts trailing unmatched tool_use only if it's
|
|
220
|
+
the very last turn (because the *next* assistant turn is expected to
|
|
221
|
+
include the tool_result); but for any older assistant turn an
|
|
222
|
+
unmatched tool_use leaves the conversation in an "awaiting tool
|
|
223
|
+
output" state and the Responses API rejects with ``400 No tool
|
|
224
|
+
output found for function call``. This typically happens when a
|
|
225
|
+
tool wrapper raised before producing a result (timeout, schema
|
|
226
|
+
error, Ctrl+C mid-batch) or when a delegated subagent crashed and
|
|
227
|
+
its tool_result was never recorded.
|
|
228
|
+
|
|
229
|
+
Filtering here keeps the API contract intact regardless of how the
|
|
230
|
+
history got unbalanced upstream (compaction, prune, crash recovery).
|
|
209
231
|
"""
|
|
210
232
|
from agno.models.message import Message # local import to avoid cycles
|
|
211
233
|
|
|
234
|
+
declared_tool_use_ids: set[str] = set()
|
|
235
|
+
answered_tool_use_ids: set[str] = set()
|
|
236
|
+
for item in history:
|
|
237
|
+
role = item.get("role")
|
|
238
|
+
blocks = item.get("content") or []
|
|
239
|
+
if role == "assistant":
|
|
240
|
+
for block in blocks:
|
|
241
|
+
if is_tool_use(block):
|
|
242
|
+
tid = block.get("id")
|
|
243
|
+
if tid:
|
|
244
|
+
declared_tool_use_ids.add(tid)
|
|
245
|
+
elif role in ("user", "tool"):
|
|
246
|
+
for block in blocks:
|
|
247
|
+
if is_tool_result(block):
|
|
248
|
+
tid = block.get("tool_use_id")
|
|
249
|
+
if tid:
|
|
250
|
+
answered_tool_use_ids.add(tid)
|
|
251
|
+
|
|
212
252
|
out: list[Message] = []
|
|
213
253
|
for item in history:
|
|
214
254
|
role = item.get("role", "user")
|
|
@@ -220,13 +260,18 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
|
|
|
220
260
|
text_parts = [b.get("text", "") for b in blocks if is_text(b)]
|
|
221
261
|
tool_result_blocks = [b for b in blocks if is_tool_result(b)]
|
|
222
262
|
|
|
223
|
-
# Tool results must be emitted as separate `role="tool"` Messages
|
|
263
|
+
# Tool results must be emitted as separate `role="tool"` Messages.
|
|
264
|
+
# Skip orphans — see docstring; both Anthropic and Codex reject
|
|
265
|
+
# tool_results whose tool_use_id has no declaring tool_use.
|
|
224
266
|
for tr in tool_result_blocks:
|
|
267
|
+
tid = tr.get("tool_use_id", "")
|
|
268
|
+
if tid and tid not in declared_tool_use_ids:
|
|
269
|
+
continue
|
|
225
270
|
out.append(
|
|
226
271
|
Message(
|
|
227
272
|
role="tool",
|
|
228
273
|
content=str(tr.get("content", "")),
|
|
229
|
-
tool_call_id=
|
|
274
|
+
tool_call_id=tid,
|
|
230
275
|
from_history=True,
|
|
231
276
|
)
|
|
232
277
|
)
|
|
@@ -245,9 +290,19 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
|
|
|
245
290
|
for b in blocks:
|
|
246
291
|
if not is_tool_use(b):
|
|
247
292
|
continue
|
|
293
|
+
tid = b.get("id", "")
|
|
294
|
+
# Drop tool_calls that never produced a tool_result. Without
|
|
295
|
+
# this, the next API call carries an unanswered function_call
|
|
296
|
+
# from a prior turn and the Responses backend errors out
|
|
297
|
+
# ("No tool output found for function call <id>"). The tool
|
|
298
|
+
# wrapper *should* always produce a result, but defensive
|
|
299
|
+
# filtering here recovers a stuck history even when the
|
|
300
|
+
# wrapper failed (timeout/crash/abort).
|
|
301
|
+
if tid and tid not in answered_tool_use_ids:
|
|
302
|
+
continue
|
|
248
303
|
tool_calls.append(
|
|
249
304
|
{
|
|
250
|
-
"id":
|
|
305
|
+
"id": tid,
|
|
251
306
|
"type": "function",
|
|
252
307
|
"function": {
|
|
253
308
|
"name": b.get("name", ""),
|
|
@@ -265,15 +320,21 @@ def to_agno_messages(history: list[HistoryItem]) -> list:
|
|
|
265
320
|
|
|
266
321
|
elif role == "tool":
|
|
267
322
|
# Explicit tool-role items (we don't produce these ourselves but
|
|
268
|
-
# support them for forward compat with loaded sessions).
|
|
323
|
+
# support them for forward compat with loaded sessions). Same
|
|
324
|
+
# orphan filter as the user-role branch — this is actually the
|
|
325
|
+
# branch that catches the loaded-session case where a prior
|
|
326
|
+
# compaction summarised the matching assistant turn away.
|
|
269
327
|
for tr in blocks:
|
|
270
328
|
if not is_tool_result(tr):
|
|
271
329
|
continue
|
|
330
|
+
tid = tr.get("tool_use_id", "")
|
|
331
|
+
if tid and tid not in declared_tool_use_ids:
|
|
332
|
+
continue
|
|
272
333
|
out.append(
|
|
273
334
|
Message(
|
|
274
335
|
role="tool",
|
|
275
336
|
content=str(tr.get("content", "")),
|
|
276
|
-
tool_call_id=
|
|
337
|
+
tool_call_id=tid,
|
|
277
338
|
from_history=True,
|
|
278
339
|
)
|
|
279
340
|
)
|
|
@@ -135,8 +135,8 @@ async def _run_extractor_agent(prompt: str, model_ref: str) -> str:
|
|
|
135
135
|
instructions="You curate durable memories. Output only the requested JSON.",
|
|
136
136
|
markdown=False,
|
|
137
137
|
)
|
|
138
|
-
|
|
139
|
-
return (
|
|
138
|
+
from aru.runner import arun_text_only
|
|
139
|
+
return await arun_text_only(agent, prompt)
|
|
140
140
|
|
|
141
141
|
|
|
142
142
|
def _parse_json_array(content: str) -> list[dict]:
|
|
@@ -444,16 +444,24 @@ def set_permission_mode(mode: str) -> str:
|
|
|
444
444
|
|
|
445
445
|
|
|
446
446
|
def cycle_permission_mode() -> str:
|
|
447
|
-
"""Advance to the next mode and return it.
|
|
447
|
+
"""Advance to the next mode and return it.
|
|
448
|
+
|
|
449
|
+
Delegates the actual mutation to ``set_permission_mode`` so the Ctrl+A
|
|
450
|
+
path (this function) and the ``/yolo`` slash command path (which calls
|
|
451
|
+
``set_permission_mode`` directly) follow the exact same code path —
|
|
452
|
+
same mutation, same ``permission.mode.changed`` publish, same UI
|
|
453
|
+
refresh trigger. Historically these two were near-duplicate and the
|
|
454
|
+
Ctrl+A version skipped the bus publish; this caused subtle drift
|
|
455
|
+
where the StatusPane visually advanced but downstream subscribers
|
|
456
|
+
saw stale state.
|
|
457
|
+
"""
|
|
448
458
|
ctx = get_ctx()
|
|
449
459
|
try:
|
|
450
460
|
idx = _MODE_CYCLE.index(ctx.permission_mode)
|
|
451
461
|
except ValueError:
|
|
452
462
|
idx = 0
|
|
453
463
|
next_mode = _MODE_CYCLE[(idx + 1) % len(_MODE_CYCLE)]
|
|
454
|
-
|
|
455
|
-
ctx.skip_permissions = (next_mode == "yolo")
|
|
456
|
-
return next_mode
|
|
464
|
+
return set_permission_mode(next_mode)
|
|
457
465
|
|
|
458
466
|
|
|
459
467
|
def consume_rejection_feedback() -> str:
|
|
@@ -465,6 +465,22 @@ def resolve_model_ref(model_ref: str) -> tuple[str, str]:
|
|
|
465
465
|
return provider_key, model_name
|
|
466
466
|
|
|
467
467
|
|
|
468
|
+
def default_small_model_ref(session_model_ref: str) -> str:
|
|
469
|
+
"""Default model ref for sub-agents when no ``small`` alias is set.
|
|
470
|
+
|
|
471
|
+
Mirrors Codex's ``build_agent_shared_config`` (multi_agents_common.rs):
|
|
472
|
+
the spawned agent inherits the parent's effective model. Keeps the
|
|
473
|
+
sub-agent on the same provider (preserves credentials + cache lineage)
|
|
474
|
+
and avoids the cross-provider failure mode where a hard-coded "small
|
|
475
|
+
model" is rejected by the parent's backend — e.g. ``gpt-4o-mini`` on
|
|
476
|
+
a ChatGPT Plus/Pro OAuth credential whose Codex endpoint only accepts
|
|
477
|
+
``gpt-5*`` ids. Users who want a cheaper sub-agent model can still set
|
|
478
|
+
``model_aliases.small`` in aru.json; that override wins at every call
|
|
479
|
+
site before this helper runs.
|
|
480
|
+
"""
|
|
481
|
+
return session_model_ref
|
|
482
|
+
|
|
483
|
+
|
|
468
484
|
def _get_actual_model_id(provider: ProviderConfig, model_name: str) -> str:
|
|
469
485
|
"""Get the actual model ID to send to the API.
|
|
470
486
|
|
|
@@ -27,6 +27,33 @@ _MAX_TOKENS_RECOVERY_PROMPT = (
|
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
async def arun_text_only(agent, prompt: str) -> str:
|
|
31
|
+
"""Run a tools-less helper agent and return its final text output.
|
|
32
|
+
|
|
33
|
+
Always streams, because the Codex Responses backend rejects non-streaming
|
|
34
|
+
calls with ``400 'Stream must be set to true'``. The official OpenAI
|
|
35
|
+
Responses API + every other provider also accept stream=True, so a single
|
|
36
|
+
code path covers both. Used by the compaction summarizer, memory
|
|
37
|
+
extractor, and plan reviewer — all of which previously called
|
|
38
|
+
``agent.arun(prompt, stream=False)`` and broke for any user on a ChatGPT
|
|
39
|
+
Plus/Pro OAuth credential.
|
|
40
|
+
|
|
41
|
+
Falls back to the empty string when the model returns no content (caller
|
|
42
|
+
decides what that means — e.g. compaction has its own mechanical
|
|
43
|
+
fallback).
|
|
44
|
+
"""
|
|
45
|
+
from agno.run.agent import RunOutput
|
|
46
|
+
|
|
47
|
+
final_output = None
|
|
48
|
+
async for event in agent.arun(prompt, stream=True, yield_run_output=True):
|
|
49
|
+
if isinstance(event, RunOutput):
|
|
50
|
+
final_output = event
|
|
51
|
+
break
|
|
52
|
+
if final_output and final_output.content:
|
|
53
|
+
return final_output.content
|
|
54
|
+
return ""
|
|
55
|
+
|
|
56
|
+
|
|
30
57
|
def _prepare_recovery_input(
|
|
31
58
|
*,
|
|
32
59
|
agent,
|
|
@@ -185,6 +212,59 @@ def _build_plan_reminder(session) -> str | None:
|
|
|
185
212
|
return "\n".join(lines)
|
|
186
213
|
|
|
187
214
|
|
|
215
|
+
def _build_permission_mode_reminder() -> str | None:
|
|
216
|
+
"""Surface the active permission mode to the model when it's non-default.
|
|
217
|
+
|
|
218
|
+
GPT-5 / Codex-trained models default to asking "should I commit?",
|
|
219
|
+
"want me to run X?" before mutating actions — that posture matches the
|
|
220
|
+
Codex CLI's default-approval gate, but it's the wrong posture inside
|
|
221
|
+
Aru's YOLO mode where every gate is already pre-approved. The harness
|
|
222
|
+
*can* see ``ctx.permission_mode``; the model can't unless we tell it.
|
|
223
|
+
Without this nudge the user has to re-type "go" / "do it" after every
|
|
224
|
+
proposal, which is exactly what YOLO is supposed to skip. Same shape
|
|
225
|
+
as ``_build_plan_reminder`` so the model treats it as authoritative.
|
|
226
|
+
|
|
227
|
+
Returns ``None`` for ``default`` mode — the model's built-in caution
|
|
228
|
+
is correct there and an extra reminder would just consume cache space
|
|
229
|
+
on every turn for no behavioural change.
|
|
230
|
+
"""
|
|
231
|
+
try:
|
|
232
|
+
from aru.runtime import get_ctx
|
|
233
|
+
mode = get_ctx().permission_mode
|
|
234
|
+
except LookupError:
|
|
235
|
+
return None
|
|
236
|
+
if mode == "yolo":
|
|
237
|
+
# Persistence / autonomy posture lives in BASE_INSTRUCTIONS (always
|
|
238
|
+
# in the system prompt, so it covers default / acceptEdits / yolo
|
|
239
|
+
# alike) — mirrors Codex's design where the "Task execution" /
|
|
240
|
+
# "Autonomy and Persistence" sections live in the cached base
|
|
241
|
+
# instructions, not in a per-turn reminder. This reminder is
|
|
242
|
+
# scoped to the one thing that is mode-specific: permission
|
|
243
|
+
# gating. Do NOT re-state the persistence rules here; that would
|
|
244
|
+
# bloat the per-turn cache for no behavioural gain.
|
|
245
|
+
return (
|
|
246
|
+
"<system-reminder>\n"
|
|
247
|
+
"YOLO MODE ACTIVE — equivalent to Codex `approval-policy: "
|
|
248
|
+
"never`. Every tool call is pre-approved. Do NOT ask permission "
|
|
249
|
+
"before running tools (\"posso fazer o commit?\", \"want me to "
|
|
250
|
+
"run the tests?\", \"shall I edit X?\", \"Se quiser, faço…\"). "
|
|
251
|
+
"Just execute. The autonomy and task-execution rules from your "
|
|
252
|
+
"system prompt still apply.\n"
|
|
253
|
+
"</system-reminder>"
|
|
254
|
+
)
|
|
255
|
+
if mode == "acceptEdits":
|
|
256
|
+
return (
|
|
257
|
+
"<system-reminder>\n"
|
|
258
|
+
"AUTO-ACCEPT EDITS ACTIVE — file edits are pre-approved. Do NOT "
|
|
259
|
+
"ask before writing/editing files. Bash and other non-edit "
|
|
260
|
+
"actions still gate normally; for those you may pause if the "
|
|
261
|
+
"command is destructive or ambiguous. For routine edits, "
|
|
262
|
+
"execute without confirmation.\n"
|
|
263
|
+
"</system-reminder>"
|
|
264
|
+
)
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
|
|
188
268
|
def _consume_plan_rejection_feedback(session) -> str | None:
|
|
189
269
|
"""Read-and-clear plan rejection feedback stored on the session.
|
|
190
270
|
|
|
@@ -405,6 +485,10 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
|
|
|
405
485
|
if reminder:
|
|
406
486
|
msg_parts.append(reminder)
|
|
407
487
|
|
|
488
|
+
mode_reminder = _build_permission_mode_reminder()
|
|
489
|
+
if mode_reminder:
|
|
490
|
+
msg_parts.append(mode_reminder)
|
|
491
|
+
|
|
408
492
|
warning = session.check_budget_warning()
|
|
409
493
|
if warning:
|
|
410
494
|
console.print(warning)
|
|
@@ -13,7 +13,7 @@ from dataclasses import dataclass, field
|
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
from typing import Literal
|
|
15
15
|
|
|
16
|
-
from aru.providers import MODEL_ALIASES, get_model_display, resolve_model_ref
|
|
16
|
+
from aru.providers import MODEL_ALIASES, get_model_display, get_provider, resolve_model_ref
|
|
17
17
|
|
|
18
18
|
# Default model reference (provider/model format)
|
|
19
19
|
DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
|
|
@@ -577,11 +577,25 @@ class Session:
|
|
|
577
577
|
suffix convention plus any future provider that adopts the same
|
|
578
578
|
naming. None of the major paid models contain "free" in their id,
|
|
579
579
|
so false positives are negligible.
|
|
580
|
+
|
|
581
|
+
ChatGPT Plus/Pro via Codex OAuth is a flat-rate subscription — usage
|
|
582
|
+
is bounded by the plan's session quotas, not per-token charges — so
|
|
583
|
+
all four prices are zero whenever the active openai credential is an
|
|
584
|
+
OAuth token (``provider.codex_oauth``). The user disconnecting via
|
|
585
|
+
``/connect logout`` clears the flag and the regular gpt-5 pricing
|
|
586
|
+
kicks back in for any subsequent turns.
|
|
580
587
|
"""
|
|
581
588
|
ref = (self.model_ref or "").lower()
|
|
582
589
|
mid = (self.model_id or "").lower()
|
|
583
590
|
if "free" in ref or "free" in mid:
|
|
584
591
|
return (0.0, 0.0, 0.0, 0.0)
|
|
592
|
+
try:
|
|
593
|
+
provider_key, _ = resolve_model_ref(self.model_ref or "")
|
|
594
|
+
provider = get_provider(provider_key)
|
|
595
|
+
if provider is not None and getattr(provider, "codex_oauth", False):
|
|
596
|
+
return (0.0, 0.0, 0.0, 0.0)
|
|
597
|
+
except Exception:
|
|
598
|
+
pass
|
|
585
599
|
model_id = self.model_id
|
|
586
600
|
# Try exact match, then prefix match, then fallback
|
|
587
601
|
for prefix, pricing in MODEL_PRICING.items():
|
|
@@ -422,8 +422,17 @@ Do not create documentation files unless explicitly asked.
|
|
|
422
422
|
})
|
|
423
423
|
|
|
424
424
|
from aru.runtime import _schedule_publish as _sched_t
|
|
425
|
+
# Prepend the permission-mode reminder to the subagent's prompt so
|
|
426
|
+
# YOLO mode reaches the spawned agent too — delegate runs through
|
|
427
|
+
# ``agent_instance.arun`` directly, bypassing run_agent_capture's
|
|
428
|
+
# reminder injection. The persistence / task-execution posture
|
|
429
|
+
# is in BASE_INSTRUCTIONS (subagent's system prompt) so it
|
|
430
|
+
# propagates without needing a per-spawn reminder.
|
|
431
|
+
from aru.runner import _build_permission_mode_reminder
|
|
432
|
+
_mode_reminder = _build_permission_mode_reminder()
|
|
433
|
+
sub_task = f"{_mode_reminder}\n\n{task}" if _mode_reminder else task
|
|
425
434
|
try:
|
|
426
|
-
async for event in agent_instance.arun(
|
|
435
|
+
async for event in agent_instance.arun(sub_task, stream=True, stream_events=True, yield_run_output=True):
|
|
427
436
|
if is_aborted():
|
|
428
437
|
_trace.status = "cancelled"
|
|
429
438
|
_trace.ended_at = _time.monotonic()
|
|
@@ -199,6 +199,39 @@ def write_files(file_list: list[dict]) -> str:
|
|
|
199
199
|
Example: [{"path": "src/main.py", "content": "print('hello')"}, {"path": "src/utils.py", "content": "..."}]
|
|
200
200
|
"""
|
|
201
201
|
from aru.runtime import resolve_path as _resolve_path
|
|
202
|
+
# Defensive schema validation — return a string error instead of raising.
|
|
203
|
+
# An uncaught TypeError / AttributeError here would propagate through the
|
|
204
|
+
# async tool wrapper without producing a tool_result, leaving the next
|
|
205
|
+
# turn with a function_call lacking its function_call_output (Codex
|
|
206
|
+
# rejects with ``400 No tool output found for function call``).
|
|
207
|
+
if not isinstance(file_list, list):
|
|
208
|
+
return (
|
|
209
|
+
"Error: write_files expects ``file_list`` to be a JSON array of "
|
|
210
|
+
"objects with 'path' and 'content' keys. Got "
|
|
211
|
+
f"{type(file_list).__name__!r}."
|
|
212
|
+
)
|
|
213
|
+
cleaned: list[dict] = []
|
|
214
|
+
schema_errors: list[str] = []
|
|
215
|
+
for i, e in enumerate(file_list):
|
|
216
|
+
if not isinstance(e, dict):
|
|
217
|
+
schema_errors.append(
|
|
218
|
+
f"entry {i}: expected object with 'path' and 'content', got {type(e).__name__}"
|
|
219
|
+
)
|
|
220
|
+
continue
|
|
221
|
+
if "path" not in e or "content" not in e:
|
|
222
|
+
schema_errors.append(
|
|
223
|
+
f"entry {i}: missing required key(s) — needs both 'path' and 'content'"
|
|
224
|
+
)
|
|
225
|
+
continue
|
|
226
|
+
cleaned.append(e)
|
|
227
|
+
if not cleaned:
|
|
228
|
+
return (
|
|
229
|
+
"Error: write_files received no valid entries. "
|
|
230
|
+
+ "; ".join(schema_errors)
|
|
231
|
+
if schema_errors
|
|
232
|
+
else "Error: write_files received an empty list."
|
|
233
|
+
)
|
|
234
|
+
file_list = cleaned
|
|
202
235
|
parts = [Text(f"Write {len(file_list)} files:", style="bold"), Text()]
|
|
203
236
|
for e in file_list:
|
|
204
237
|
p = _resolve_path(e.get("path", "<missing>"))
|
|
@@ -311,6 +344,19 @@ def edit_files(edits: list[dict]) -> str:
|
|
|
311
344
|
Example: [{"path": "src/main.py", "old_string": "foo", "new_string": "bar"}]
|
|
312
345
|
"""
|
|
313
346
|
from aru.runtime import resolve_path as _resolve_path
|
|
347
|
+
# Defensive schema validation — same rationale as write_files: a TypeError
|
|
348
|
+
# raised here would propagate through the async wrapper without producing
|
|
349
|
+
# a tool_result, leaving the assistant message with an unanswered tool_use
|
|
350
|
+
# that the Responses backend rejects on the next turn.
|
|
351
|
+
if not isinstance(edits, list):
|
|
352
|
+
return (
|
|
353
|
+
"Error: edit_files expects ``edits`` to be a JSON array of "
|
|
354
|
+
"objects with 'path', 'old_string', 'new_string'. Got "
|
|
355
|
+
f"{type(edits).__name__!r}."
|
|
356
|
+
)
|
|
357
|
+
edits = [e for e in edits if isinstance(e, dict)]
|
|
358
|
+
if not edits:
|
|
359
|
+
return "Error: edit_files received no valid edit entries."
|
|
314
360
|
original: dict[str, str] = {}
|
|
315
361
|
preview: dict[str, str] = {}
|
|
316
362
|
preview_errors: list[str] = []
|
|
@@ -18,7 +18,6 @@ from aru.tools.file_ops import (
|
|
|
18
18
|
_list_directory_tool,
|
|
19
19
|
_read_file_tool,
|
|
20
20
|
_write_file_tool,
|
|
21
|
-
_write_files_tool,
|
|
22
21
|
read_files,
|
|
23
22
|
)
|
|
24
23
|
from aru.tools.plan_mode import enter_plan_mode, exit_plan_mode
|
|
@@ -69,7 +68,18 @@ _READ_ONLY_TOOLS = [
|
|
|
69
68
|
|
|
70
69
|
_WRITE_TOOLS = [
|
|
71
70
|
_write_file_tool,
|
|
72
|
-
_write_files_tool
|
|
71
|
+
# ``_write_files_tool`` (batch write) intentionally NOT exposed: the
|
|
72
|
+
# nested ``[{"path", "content"}]`` schema is consistently mis-called by
|
|
73
|
+
# every model family we tested (GPT-5 included) — it passes a plain dict,
|
|
74
|
+
# a list of strings, or forgets one of the required keys. The wrapper
|
|
75
|
+
# used to raise on the malformed input, which left the assistant message
|
|
76
|
+
# with an unanswered tool_call and broke the next turn on the Codex
|
|
77
|
+
# backend (``400 No tool output found for function call``). Adding
|
|
78
|
+
# schema-validation didn't fix the underlying ergonomics — models still
|
|
79
|
+
# waste calls fighting the schema. Single ``write_file`` works
|
|
80
|
+
# reliably; batches can be expressed as N sequential calls. The
|
|
81
|
+
# function and its async wrapper are kept in ``file_ops.py`` for any
|
|
82
|
+
# custom tool or plugin that imports them directly.
|
|
73
83
|
_edit_file_tool,
|
|
74
84
|
_edit_files_tool,
|
|
75
85
|
_apply_patch_tool,
|
|
@@ -237,10 +237,25 @@ def update_task(index: int, status: str) -> str:
|
|
|
237
237
|
failed_count = sum(1 for t in all_tasks if t["status"] == "failed")
|
|
238
238
|
total = len(all_tasks)
|
|
239
239
|
|
|
240
|
+
# Tool result kept minimal — Codex's update_plan returns the constant
|
|
241
|
+
# ``"Plan updated"`` (plan.rs:22) and relies on the base-instructions
|
|
242
|
+
# "Task execution" section (always in the system prompt) to keep GPT-5
|
|
243
|
+
# going across subtasks. We follow the same shape so the persistence
|
|
244
|
+
# signal lives in one place (BASE_INSTRUCTIONS) instead of being
|
|
245
|
+
# duplicated in every tool result. The "all finished" branch is
|
|
246
|
+
# deliberately worded as "this subtask list is done" (not "the work is
|
|
247
|
+
# done") so the model doesn't read it as a turn-end signal when there
|
|
248
|
+
# are still pending plan steps or skill-driven macro Tasks.
|
|
240
249
|
if completed_count + failed_count == total:
|
|
241
|
-
return
|
|
250
|
+
return (
|
|
251
|
+
f"This subtask list is done ({completed_count} completed, "
|
|
252
|
+
f"{failed_count} failed). If more plan steps or skill-driven "
|
|
253
|
+
"Tasks remain in your request, continue with the next one in "
|
|
254
|
+
"the same turn (call create_task_list again, dispatch the next "
|
|
255
|
+
"subagent, or call update_plan_step). Only yield to the user "
|
|
256
|
+
"when the full request is exhausted."
|
|
257
|
+
)
|
|
242
258
|
|
|
243
|
-
# Find next pending subtask
|
|
244
259
|
next_task = next((t for t in all_tasks if t["status"] == "pending"), None)
|
|
245
260
|
if next_task:
|
|
246
261
|
return f"Subtask {index} → {status}. Next: subtask {next_task['index']} — {next_task['description']}"
|