aru-code 0.53.0__tar.gz → 0.54.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aru_code-0.53.0/aru_code.egg-info → aru_code-0.54.0}/PKG-INFO +1 -1
- aru_code-0.54.0/aru/__init__.py +1 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/cache_patch.py +233 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/cli.py +9 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/commands.py +1 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/runner.py +14 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/session.py +117 -8
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/app.py +15 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/completer.py +1 -0
- {aru_code-0.53.0 → aru_code-0.54.0/aru_code.egg-info}/PKG-INFO +1 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/pyproject.toml +1 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cache_patch_metrics.py +4 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli.py +5 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_context.py +15 -6
- aru_code-0.53.0/aru/__init__.py +0 -1
- {aru_code-0.53.0 → aru_code-0.54.0}/LICENSE +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/README.md +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/_debug/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/_debug/analyze_trace.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/_debug/loop_tracer.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/agent_factory.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/agents/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/agents/base.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/agents/catalog.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/agents/planner.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/checkpoints.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/completers.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/config.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/context.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/display.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/doom_loop.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/events.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/format/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/format/manager.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/format/runner.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/history_blocks.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/lsp/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/lsp/client.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/lsp/manager.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/lsp/protocol.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/memory/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/memory/extractor.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/memory/loader.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/memory/store.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/permissions.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugin_cache.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugins/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugins/custom_tools.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugins/hooks.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugins/manager.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/plugins/tool_api.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/providers.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/runtime.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/select.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/sinks.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/streaming.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tool_policy.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/_diff.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/_shared.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/apply_patch.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/apply_patch_prompt.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/ast_tools.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/codebase.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/delegate.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/delegate_prompt.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/file_ops.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/gitignore.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/lsp.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/mcp_client.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/memory_tool.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/plan_mode.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/ranker.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/registry.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/search.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/shell.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/skill.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/tasklist.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/web.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tools/worktree.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/log_bridge.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/notifications.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/sanitize.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/choice.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/confirm.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/keymap.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/search.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/session_picker.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/screens/text_input.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/sinks.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/slash_bridge.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/themes.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/ui.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/__init__.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/chat.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/context_pane.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/file_link.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/header.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/inline_choice.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/loaded_pane.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/prompt_area.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/prompt_queue.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/status.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/subagent_panel.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/tasklist_panel.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/thinking.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/tui/widgets/tools.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru/ui.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru_code.egg-info/SOURCES.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru_code.egg-info/dependency_links.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru_code.egg-info/entry_points.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru_code.egg-info/requires.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/aru_code.egg-info/top_level.txt +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/setup.cfg +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_agents_base.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_agents_md_coverage.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_apply_patch.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_async_tool_permission.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cache_patch_stop_reason.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_catalog.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_chat_scrollable.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_checkpoints.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_advanced.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_base.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_completers.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_new.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_run_cli.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_session.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cli_shell.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_codebase.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_confabulation_regression.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_config.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_context_pane.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_cwd_awareness.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_delegate.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_doom_loop.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_events_backward_compat.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_events_schema.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_fork_ctx_concurrency.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_format.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_gitignore.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_guardrails_scenarios.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_invoke_skill.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_invoked_skills.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_loaded_pane_path.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_lsp.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_lsp_rename.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_main.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_markdown_to_text.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_mcp_client.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_mcp_health.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_memory.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_memory_tool.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_microcompact.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_permissions.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_plan_mode_refactor.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_plugin_cache.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_plugin_errors.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_plugin_hooks_v2.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_plugins.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_providers.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_ranker.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_reasoning.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_runner_interrupt.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_runner_recovery.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_runtime.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_select.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_session_free_cost.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_skill_disallowed_tools.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_status_breakdown.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_status_cost.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_streaming_sink.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_subagent_tool_events.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tasklist.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_thread_tool_timeout.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tool_policy.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_truncation_marker.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_app_boot.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_bindings.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_bus_flow.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_chat.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_chat_adversarial.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_completer.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_completer_dynamic.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_copy.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_error_display.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_file_link.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_input_behaviour.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_layer12_recovery.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_layer13_recovery.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_mention_expand.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_modals.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_mode_cycle.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_native_selection.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_permission_flow.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_plan_task_render.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_prompt_queue.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_shell_bang.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_sidebar_toggle.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_slash_bridge.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_slash_model.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_snapshot_smoke.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_subagent_panel.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_theme.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_thinking_and_boot.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_tui_widgets_visual.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_ui_adapter.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_worktree.py +0 -0
- {aru_code-0.53.0 → aru_code-0.54.0}/tests/test_worktree_session_restore.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.54.0"
|
|
@@ -23,6 +23,9 @@ regardless of which provider is used.
|
|
|
23
23
|
|
|
24
24
|
from __future__ import annotations
|
|
25
25
|
|
|
26
|
+
import os as _os
|
|
27
|
+
import time as _time
|
|
28
|
+
|
|
26
29
|
# Token-budget pruning (aligned with OpenCode's strategy):
|
|
27
30
|
# - Protect recent tool results within a token budget
|
|
28
31
|
# - Only prune if there's enough to free (avoid churn)
|
|
@@ -43,6 +46,22 @@ _last_call_cache_write: int = 0
|
|
|
43
46
|
# We normalize "length" → "max_tokens" so callers can check a single value.
|
|
44
47
|
_last_call_stop_reason: str | None = None
|
|
45
48
|
|
|
49
|
+
# Per-call observability ring buffer. Each accumulate_model_metrics fire
|
|
50
|
+
# appends one record; the ring caps at _CALL_HISTORY_MAX so a long-running
|
|
51
|
+
# session doesn't grow unbounded. Surfaced via /calls so users can see
|
|
52
|
+
# *which* models / model_types / call sites produced each request — the
|
|
53
|
+
# canonical "why are there N api_calls?" diagnosis surface.
|
|
54
|
+
_CALL_HISTORY_MAX = 200
|
|
55
|
+
_call_history: list[dict] = []
|
|
56
|
+
|
|
57
|
+
# Pending request metadata captured by the request-side patch right before
|
|
58
|
+
# the provider call goes out. Read by ``_patched_accumulate`` after the
|
|
59
|
+
# response lands and merged into the matching call_history record so /calls
|
|
60
|
+
# shows both the response usage AND a summary of what was sent. Single-
|
|
61
|
+
# slot global is OK: aru runs requests sequentially per ctx, and the patch
|
|
62
|
+
# captures-then-clears synchronously around each invocation.
|
|
63
|
+
_pending_request_meta: dict | None = None
|
|
64
|
+
|
|
46
65
|
# Micro-compaction metrics (process-wide, reset by tests via
|
|
47
66
|
# reset_microcompact_stats()). Recorded by _prune_tool_messages every time it
|
|
48
67
|
# fires from the format_function_call_results patch. Surfaced in /cost so
|
|
@@ -105,6 +124,92 @@ def reset_last_stop_reason() -> None:
|
|
|
105
124
|
_last_call_stop_reason = None
|
|
106
125
|
|
|
107
126
|
|
|
127
|
+
def _summarize_request(messages, tools=None) -> dict:
|
|
128
|
+
"""Build a compact summary of an outgoing request for /calls.
|
|
129
|
+
|
|
130
|
+
We deliberately don't store full message bodies — a single tool result
|
|
131
|
+
can be tens of KB and a long session would balloon memory. We keep:
|
|
132
|
+
|
|
133
|
+
* count of messages and per-role tally
|
|
134
|
+
* total chars across messages (proxy for prompt size)
|
|
135
|
+
* snippet of the first message (usually system prompt) and the last
|
|
136
|
+
message (usually the freshest user/tool turn — what the model is
|
|
137
|
+
responding to)
|
|
138
|
+
* snippet of the most recent ``user`` message specifically
|
|
139
|
+
* tool count
|
|
140
|
+
|
|
141
|
+
Snippets are capped at 240 chars. Enough to identify the call without
|
|
142
|
+
storing PII-heavy or token-heavy bodies.
|
|
143
|
+
"""
|
|
144
|
+
out = {
|
|
145
|
+
"n_messages": 0,
|
|
146
|
+
"roles": {},
|
|
147
|
+
"total_chars": 0,
|
|
148
|
+
"first_snippet": "",
|
|
149
|
+
"last_snippet": "",
|
|
150
|
+
"last_user_snippet": "",
|
|
151
|
+
"n_tools": 0,
|
|
152
|
+
}
|
|
153
|
+
try:
|
|
154
|
+
msgs = list(messages or [])
|
|
155
|
+
out["n_messages"] = len(msgs)
|
|
156
|
+
out["n_tools"] = len(tools or [])
|
|
157
|
+
last_user = ""
|
|
158
|
+
for i, m in enumerate(msgs):
|
|
159
|
+
role = (getattr(m, "role", None) or "?")
|
|
160
|
+
out["roles"][role] = out["roles"].get(role, 0) + 1
|
|
161
|
+
content = getattr(m, "content", None)
|
|
162
|
+
if content is None:
|
|
163
|
+
content = getattr(m, "text", "")
|
|
164
|
+
if not isinstance(content, str):
|
|
165
|
+
try:
|
|
166
|
+
content = str(content)
|
|
167
|
+
except Exception:
|
|
168
|
+
content = ""
|
|
169
|
+
out["total_chars"] += len(content)
|
|
170
|
+
if i == 0:
|
|
171
|
+
out["first_snippet"] = content[:240]
|
|
172
|
+
if role == "user":
|
|
173
|
+
last_user = content[:240]
|
|
174
|
+
if msgs:
|
|
175
|
+
last = msgs[-1]
|
|
176
|
+
lc = getattr(last, "content", None) or getattr(last, "text", "")
|
|
177
|
+
if not isinstance(lc, str):
|
|
178
|
+
try:
|
|
179
|
+
lc = str(lc)
|
|
180
|
+
except Exception:
|
|
181
|
+
lc = ""
|
|
182
|
+
out["last_snippet"] = lc[:240]
|
|
183
|
+
out["last_user_snippet"] = last_user
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
|
186
|
+
return out
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _capture_request_meta(messages, tools=None) -> None:
|
|
190
|
+
"""Stash a request summary into the pending slot for the next accumulate."""
|
|
191
|
+
global _pending_request_meta
|
|
192
|
+
_pending_request_meta = _summarize_request(messages, tools)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def get_call_history() -> list[dict]:
|
|
196
|
+
"""Return a copy of the per-API-call ring buffer.
|
|
197
|
+
|
|
198
|
+
Each entry: ``{n, model_type, model_id, provider, input_tokens,
|
|
199
|
+
output_tokens, cache_read, cache_write, stop_reason, caller, ts}``.
|
|
200
|
+
``input_tokens`` is the *normalized* value (cache stripped for OpenAI-
|
|
201
|
+
style providers). ``caller`` is the agno file:line that invoked
|
|
202
|
+
accumulate_model_metrics — useful for distinguishing main-model calls
|
|
203
|
+
from parser/output-model/memory/recovery calls.
|
|
204
|
+
"""
|
|
205
|
+
return list(_call_history)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def reset_call_history() -> None:
|
|
209
|
+
"""Clear the call ring buffer. Useful at session start or in tests."""
|
|
210
|
+
_call_history.clear()
|
|
211
|
+
|
|
212
|
+
|
|
108
213
|
def get_microcompact_stats() -> dict:
|
|
109
214
|
"""Return process-wide micro-compaction metrics.
|
|
110
215
|
|
|
@@ -317,6 +422,72 @@ def _prune_tool_messages(messages):
|
|
|
317
422
|
_PATCH_APPLIED = False
|
|
318
423
|
|
|
319
424
|
|
|
425
|
+
def _patch_request_capture():
|
|
426
|
+
"""Wrap the agno methods that receive ``messages`` right before the
|
|
427
|
+
provider HTTP call so /calls can show what was actually sent.
|
|
428
|
+
|
|
429
|
+
We hook the four ``Model._{a,}invoke{_stream,}_with_retry`` methods
|
|
430
|
+
on ``agno.models.base.Model`` — these are the chokepoint each subclass
|
|
431
|
+
flows through (sync/async × stream/non-stream). Each wrapper takes a
|
|
432
|
+
cheap snapshot of ``kwargs["messages"]`` into ``_pending_request_meta``
|
|
433
|
+
immediately before delegating to the original. ``_patched_accumulate``
|
|
434
|
+
then reads-and-clears that slot when the matching response lands.
|
|
435
|
+
|
|
436
|
+
The wrappers are best-effort: any exception during snapshotting is
|
|
437
|
+
swallowed so we never break the actual model call. Stream wrappers
|
|
438
|
+
must remain async generators (``async for ... yield``) — collecting
|
|
439
|
+
the stream first would defeat streaming.
|
|
440
|
+
"""
|
|
441
|
+
try:
|
|
442
|
+
from agno.models.base import Model
|
|
443
|
+
except ImportError:
|
|
444
|
+
return
|
|
445
|
+
|
|
446
|
+
_orig_invoke = Model._invoke_with_retry
|
|
447
|
+
_orig_ainvoke = Model._ainvoke_with_retry
|
|
448
|
+
_orig_invoke_stream = Model._invoke_stream_with_retry
|
|
449
|
+
_orig_ainvoke_stream = Model._ainvoke_stream_with_retry
|
|
450
|
+
|
|
451
|
+
def _wrap_invoke(self, **kwargs):
|
|
452
|
+
try:
|
|
453
|
+
_capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
|
|
454
|
+
except Exception:
|
|
455
|
+
pass
|
|
456
|
+
return _orig_invoke(self, **kwargs)
|
|
457
|
+
|
|
458
|
+
async def _wrap_ainvoke(self, **kwargs):
|
|
459
|
+
try:
|
|
460
|
+
_capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
|
|
461
|
+
except Exception:
|
|
462
|
+
pass
|
|
463
|
+
return await _orig_ainvoke(self, **kwargs)
|
|
464
|
+
|
|
465
|
+
def _wrap_invoke_stream(self, **kwargs):
|
|
466
|
+
try:
|
|
467
|
+
_capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
|
|
468
|
+
except Exception:
|
|
469
|
+
pass
|
|
470
|
+
# _invoke_stream_with_retry returns an Iterator (sync generator)
|
|
471
|
+
return _orig_invoke_stream(self, **kwargs)
|
|
472
|
+
|
|
473
|
+
async def _wrap_ainvoke_stream(self, **kwargs):
|
|
474
|
+
try:
|
|
475
|
+
_capture_request_meta(kwargs.get("messages"), kwargs.get("tools"))
|
|
476
|
+
except Exception:
|
|
477
|
+
pass
|
|
478
|
+
# _ainvoke_stream_with_retry is an async generator — we must
|
|
479
|
+
# re-yield rather than return it (returning an async generator
|
|
480
|
+
# from an async def function wraps it in a coroutine that yields
|
|
481
|
+
# the generator object, which the caller would not iterate).
|
|
482
|
+
async for chunk in _orig_ainvoke_stream(self, **kwargs):
|
|
483
|
+
yield chunk
|
|
484
|
+
|
|
485
|
+
Model._invoke_with_retry = _wrap_invoke
|
|
486
|
+
Model._ainvoke_with_retry = _wrap_ainvoke
|
|
487
|
+
Model._invoke_stream_with_retry = _wrap_invoke_stream
|
|
488
|
+
Model._ainvoke_stream_with_retry = _wrap_ainvoke_stream
|
|
489
|
+
|
|
490
|
+
|
|
320
491
|
def apply_cache_patch():
|
|
321
492
|
"""Apply all patches to reduce Agno's token consumption.
|
|
322
493
|
|
|
@@ -334,6 +505,7 @@ def apply_cache_patch():
|
|
|
334
505
|
_patch_per_call_metrics()
|
|
335
506
|
_patch_stop_reason_capture()
|
|
336
507
|
_patch_overflow_recovery()
|
|
508
|
+
_patch_request_capture()
|
|
337
509
|
_PATCH_APPLIED = True
|
|
338
510
|
|
|
339
511
|
|
|
@@ -515,6 +687,10 @@ def _publish_live_metrics(
|
|
|
515
687
|
session.total_output_tokens += output_tokens
|
|
516
688
|
session.total_cache_read_tokens += cache_read
|
|
517
689
|
session.total_cache_write_tokens += cache_write
|
|
690
|
+
# Count real API requests (one per accumulate call). track_tokens
|
|
691
|
+
# used to do this at turn-end (++1), which collapsed multi-tool
|
|
692
|
+
# turns — a turn with N tool calls = N+1 requests but counted as 1.
|
|
693
|
+
session.api_calls = (getattr(session, "api_calls", 0) or 0) + 1
|
|
518
694
|
session._live_input_added = (
|
|
519
695
|
getattr(session, "_live_input_added", 0) + input_tokens
|
|
520
696
|
)
|
|
@@ -587,6 +763,16 @@ def _patch_per_call_metrics():
|
|
|
587
763
|
global _last_call_input_tokens, _last_call_output_tokens
|
|
588
764
|
global _last_call_cache_read, _last_call_cache_write
|
|
589
765
|
usage = getattr(model_response, "response_usage", None)
|
|
766
|
+
# Capture the call site (agno file:line that invoked accumulate)
|
|
767
|
+
# cheaply — only when there's a usage object worth recording.
|
|
768
|
+
_caller_str = ""
|
|
769
|
+
if usage is not None:
|
|
770
|
+
try:
|
|
771
|
+
import sys as _sys
|
|
772
|
+
_frame = _sys._getframe(1)
|
|
773
|
+
_caller_str = f"{_os.path.basename(_frame.f_code.co_filename)}:{_frame.f_lineno}"
|
|
774
|
+
except Exception:
|
|
775
|
+
_caller_str = "?"
|
|
590
776
|
if usage is not None:
|
|
591
777
|
input_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
592
778
|
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
@@ -603,12 +789,59 @@ def _patch_per_call_metrics():
|
|
|
603
789
|
is_anthropic = "anthropic" in (provider_name or "").lower()
|
|
604
790
|
if not is_anthropic and cache_read and input_tokens >= cache_read:
|
|
605
791
|
input_tokens -= cache_read
|
|
792
|
+
# Mutate the shared usage object so the downstream
|
|
793
|
+
# ``_original_accumulate`` writes the *normalized* value
|
|
794
|
+
# into Agno's RunMetrics. Without this, RunMetrics keeps
|
|
795
|
+
# the raw (cache-inclusive) input while ``_last_call_*``
|
|
796
|
+
# and the live publish hold the normalized one, and
|
|
797
|
+
# ``Session.track_tokens`` reconciliation re-adds the
|
|
798
|
+
# cached portion as a fake "missing delta" — exactly the
|
|
799
|
+
# cumulative-vs-last asymmetry users see in /cost.
|
|
800
|
+
try:
|
|
801
|
+
usage.input_tokens = input_tokens
|
|
802
|
+
except (AttributeError, TypeError):
|
|
803
|
+
pass
|
|
606
804
|
|
|
607
805
|
_last_call_input_tokens = input_tokens
|
|
608
806
|
_last_call_output_tokens = output_tokens
|
|
609
807
|
_last_call_cache_read = cache_read
|
|
610
808
|
_last_call_cache_write = cache_write
|
|
611
809
|
|
|
810
|
+
# Per-call observability: append to the ring buffer so /calls
|
|
811
|
+
# can show breakdown by model_type (MODEL vs PARSER_MODEL vs
|
|
812
|
+
# MEMORY_MODEL etc.) and call site. Bounded to _CALL_HISTORY_MAX
|
|
813
|
+
# so a long session doesn't grow unbounded.
|
|
814
|
+
_model_id = ""
|
|
815
|
+
try:
|
|
816
|
+
_model_id = getattr(model, "id", "") or ""
|
|
817
|
+
except Exception:
|
|
818
|
+
pass
|
|
819
|
+
_mt_str = (
|
|
820
|
+
model_type.value
|
|
821
|
+
if hasattr(model_type, "value")
|
|
822
|
+
else str(model_type)
|
|
823
|
+
)
|
|
824
|
+
global _pending_request_meta
|
|
825
|
+
_req_meta = _pending_request_meta or {}
|
|
826
|
+
_pending_request_meta = None
|
|
827
|
+
_call_history.append({
|
|
828
|
+
"n": len(_call_history) + 1,
|
|
829
|
+
"model_type": _mt_str,
|
|
830
|
+
"model_id": _model_id,
|
|
831
|
+
"provider": provider_name or "",
|
|
832
|
+
"input_tokens": input_tokens,
|
|
833
|
+
"output_tokens": output_tokens,
|
|
834
|
+
"cache_read": cache_read,
|
|
835
|
+
"cache_write": cache_write,
|
|
836
|
+
"stop_reason": _last_call_stop_reason,
|
|
837
|
+
"caller": _caller_str,
|
|
838
|
+
"ts": _time.time(),
|
|
839
|
+
"request": _req_meta,
|
|
840
|
+
})
|
|
841
|
+
if len(_call_history) > _CALL_HISTORY_MAX:
|
|
842
|
+
# Keep the most recent N — drop from the front.
|
|
843
|
+
del _call_history[: len(_call_history) - _CALL_HISTORY_MAX]
|
|
844
|
+
|
|
612
845
|
# Intra-turn live session update + bus publish. Gated to the
|
|
613
846
|
# primary agent (subagent_depth == 0) so subagent API calls
|
|
614
847
|
# don't double-count — delegate_task adds subagent totals in
|
|
@@ -761,6 +761,15 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
|
761
761
|
))
|
|
762
762
|
continue
|
|
763
763
|
|
|
764
|
+
if user_input.lower() == "/calls":
|
|
765
|
+
console.print(Panel(
|
|
766
|
+
session.calls_summary,
|
|
767
|
+
title="[bold]Per-API-Call Breakdown[/bold]",
|
|
768
|
+
border_style="cyan",
|
|
769
|
+
padding=(1, 2),
|
|
770
|
+
))
|
|
771
|
+
continue
|
|
772
|
+
|
|
764
773
|
if user_input.lower() == "/subagents":
|
|
765
774
|
from aru.commands import handle_subagents_command
|
|
766
775
|
handle_subagents_command(session)
|
|
@@ -31,6 +31,7 @@ SLASH_COMMANDS = [
|
|
|
31
31
|
("/debug", "Debug utilities (plugin-errors)", "/debug <subcommand>"),
|
|
32
32
|
("/undo", "Undo last turn — restore files and/or conversation", "/undo"),
|
|
33
33
|
("/cost", "Show detailed token usage and cost", "/cost"),
|
|
34
|
+
("/calls", "Show per-API-call breakdown (model_type, tokens, stop_reason, caller)", "/calls"),
|
|
34
35
|
("/yolo", "Toggle DANGEROUSLY skip all permissions (YOLO mode)", "/yolo"),
|
|
35
36
|
("/quit", "Exit aru", "/quit"),
|
|
36
37
|
]
|
|
@@ -9,6 +9,7 @@ from dataclasses import dataclass, field
|
|
|
9
9
|
from rich.markdown import Markdown
|
|
10
10
|
|
|
11
11
|
from aru.display import console
|
|
12
|
+
from aru.session import Session
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
# Categories of tools that modify files (for highlighting in history)
|
|
@@ -640,18 +641,30 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
|
|
|
640
641
|
})
|
|
641
642
|
|
|
642
643
|
# Tier 2 #4: auto-memory extraction (opt-in, fire-and-forget).
|
|
644
|
+
# ``turn_tokens`` here is the size of the *exchange* (user message +
|
|
645
|
+
# assistant reply) — NOT the API call's prompt size. Earlier this
|
|
646
|
+
# used ``last_input_tokens + last_output_tokens``, but
|
|
647
|
+
# ``last_input_tokens`` includes the entire system prompt (~8K on
|
|
648
|
+
# aru with 30+ tools), so ``min_turn_tokens=500`` always tripped
|
|
649
|
+
# even on "Olá"/"ok"-style turns and the extractor fired every
|
|
650
|
+
# turn — burning the curator budget on nothing. Estimating from
|
|
651
|
+
# user+assistant char length matches the docstring intent
|
|
652
|
+
# ("trivial turns 'ok'/'thanks' don't trigger").
|
|
643
653
|
try:
|
|
644
654
|
from aru.memory.extractor import schedule_extraction_task
|
|
645
655
|
from aru.runtime import get_ctx as _get_ctx
|
|
646
656
|
_cfg = getattr(_get_ctx(), "config", None)
|
|
647
657
|
_cfg_memory = getattr(_cfg, "memory", None) or {}
|
|
648
658
|
_project_root = getattr(session, "project_root", None) or os.getcwd()
|
|
659
|
+
_exchange_tokens = Session.estimate_tokens(
|
|
660
|
+
(run_message or "") + (final_content or "")
|
|
661
|
+
)
|
|
649
662
|
schedule_extraction_task(
|
|
650
663
|
project_root=_project_root,
|
|
651
664
|
user_msg=run_message or "",
|
|
652
665
|
assistant_msg=final_content or "",
|
|
653
666
|
config_memory=_cfg_memory,
|
|
654
|
-
turn_tokens=
|
|
667
|
+
turn_tokens=_exchange_tokens,
|
|
655
668
|
)
|
|
656
669
|
except Exception:
|
|
657
670
|
pass # extractor guards internally; swallow any unexpected raise
|
|
@@ -540,7 +540,11 @@ class Session:
|
|
|
540
540
|
self.total_cache_write_tokens += max(
|
|
541
541
|
0, agno_cw - self._live_cache_write_added
|
|
542
542
|
)
|
|
543
|
-
|
|
543
|
+
# api_calls is no longer bumped here — it's incremented per real
|
|
544
|
+
# API request inside cache_patch._publish_live_metrics, which fires
|
|
545
|
+
# once per accumulate_model_metrics call. Bumping again here would
|
|
546
|
+
# double-count. Subagent runs (which skip live publish) bump
|
|
547
|
+
# api_calls themselves in delegate_task at sub-run completion.
|
|
544
548
|
self.reset_live_token_counters()
|
|
545
549
|
# Capture last API call's context window (set by cache_patch)
|
|
546
550
|
try:
|
|
@@ -626,8 +630,20 @@ class Session:
|
|
|
626
630
|
|
|
627
631
|
@property
|
|
628
632
|
def cost_summary(self) -> str:
|
|
629
|
-
"""Detailed cost breakdown for /cost command.
|
|
630
|
-
|
|
633
|
+
"""Detailed cost breakdown for /cost command.
|
|
634
|
+
|
|
635
|
+
Mirrors OpenCode: a single running session total — input, output,
|
|
636
|
+
and cache buckets shown side-by-side and summed into one ``total``.
|
|
637
|
+
Cache is normalized non-overlapping with input by ``cache_patch``,
|
|
638
|
+
so ``input + output + cache_read + cache_write`` is the true
|
|
639
|
+
chargeable token volume.
|
|
640
|
+
"""
|
|
641
|
+
total = (
|
|
642
|
+
self.total_input_tokens
|
|
643
|
+
+ self.total_output_tokens
|
|
644
|
+
+ self.total_cache_read_tokens
|
|
645
|
+
+ self.total_cache_write_tokens
|
|
646
|
+
)
|
|
631
647
|
if total == 0:
|
|
632
648
|
return "No token usage yet."
|
|
633
649
|
cost = self.estimated_cost
|
|
@@ -635,7 +651,7 @@ class Session:
|
|
|
635
651
|
lines = [
|
|
636
652
|
f"Session cost: {cost_str}",
|
|
637
653
|
f"",
|
|
638
|
-
f"
|
|
654
|
+
f"Session tokens:",
|
|
639
655
|
f" input: {self.total_input_tokens:,}",
|
|
640
656
|
f" output: {self.total_output_tokens:,}",
|
|
641
657
|
]
|
|
@@ -645,8 +661,13 @@ class Session:
|
|
|
645
661
|
lines.append(f" cache_write: {self.total_cache_write_tokens:,}")
|
|
646
662
|
lines.append(f" total: {total:,}")
|
|
647
663
|
lines.append(f" api calls: {self.api_calls}")
|
|
648
|
-
if self.last_input_tokens > 0:
|
|
649
|
-
ctx_total =
|
|
664
|
+
if self.last_input_tokens > 0 or self.last_cache_read > 0:
|
|
665
|
+
ctx_total = (
|
|
666
|
+
self.last_input_tokens
|
|
667
|
+
+ self.last_output_tokens
|
|
668
|
+
+ self.last_cache_read
|
|
669
|
+
+ self.last_cache_write
|
|
670
|
+
)
|
|
650
671
|
lines.append(f"")
|
|
651
672
|
lines.append(f"Last context window: {ctx_total:,}")
|
|
652
673
|
lines.append(f" input: {self.last_input_tokens:,}")
|
|
@@ -679,6 +700,84 @@ class Session:
|
|
|
679
700
|
pass
|
|
680
701
|
return "\n".join(lines)
|
|
681
702
|
|
|
703
|
+
@property
|
|
704
|
+
def calls_summary(self) -> str:
|
|
705
|
+
"""Per-API-call breakdown — answers "why are there N api_calls?".
|
|
706
|
+
|
|
707
|
+
Pulls from ``cache_patch._call_history`` (the ring buffer that
|
|
708
|
+
records every fire of ``accumulate_model_metrics``). Each row
|
|
709
|
+
shows: ``model_type`` (MODEL vs PARSER_MODEL vs MEMORY_MODEL vs
|
|
710
|
+
recovery), ``model_id``, normalized input, output, cache hits,
|
|
711
|
+
stop_reason, and the agno call site that triggered it.
|
|
712
|
+
|
|
713
|
+
Use to distinguish: a ``stop_reason=max_tokens`` row followed by a
|
|
714
|
+
smaller row = the streaming recovery loop fired. Two
|
|
715
|
+
``MODEL`` rows = the agent did a tool call round. Mixed model_types
|
|
716
|
+
= optional features (memory/parser/output models) are active.
|
|
717
|
+
"""
|
|
718
|
+
try:
|
|
719
|
+
from aru.cache_patch import get_call_history
|
|
720
|
+
except ImportError:
|
|
721
|
+
return "Call history not available."
|
|
722
|
+
history = get_call_history()
|
|
723
|
+
if not history:
|
|
724
|
+
return "No API calls yet."
|
|
725
|
+
lines = [f"Total recorded calls: {len(history)}", ""]
|
|
726
|
+
for c in history:
|
|
727
|
+
mt = c.get("model_type", "")
|
|
728
|
+
if mt.startswith("ModelType."):
|
|
729
|
+
mt = mt[len("ModelType."):]
|
|
730
|
+
req = c.get("request") or {}
|
|
731
|
+
n_msgs = req.get("n_messages", 0)
|
|
732
|
+
roles = req.get("roles") or {}
|
|
733
|
+
roles_str = ", ".join(f"{r}={n}" for r, n in sorted(roles.items()))
|
|
734
|
+
n_tools = req.get("n_tools", 0)
|
|
735
|
+
total_chars = req.get("total_chars", 0)
|
|
736
|
+
est_prompt_tokens = total_chars // 4 if total_chars else 0
|
|
737
|
+
|
|
738
|
+
lines.append(
|
|
739
|
+
f"── Call #{c.get('n', 0)} "
|
|
740
|
+
f"[{mt} / {c.get('model_id', '')}] ──"
|
|
741
|
+
)
|
|
742
|
+
lines.append(
|
|
743
|
+
f" request: {n_msgs} msgs ({roles_str}), "
|
|
744
|
+
f"{n_tools} tools, ~{est_prompt_tokens:,} tok ({total_chars:,} chars)"
|
|
745
|
+
)
|
|
746
|
+
first = (req.get("first_snippet") or "").replace("\n", " ⏎ ")
|
|
747
|
+
last_user = (req.get("last_user_snippet") or "").replace("\n", " ⏎ ")
|
|
748
|
+
last = (req.get("last_snippet") or "").replace("\n", " ⏎ ")
|
|
749
|
+
if first:
|
|
750
|
+
lines.append(f" first msg: {first[:200]!r}")
|
|
751
|
+
if last_user and last_user != first:
|
|
752
|
+
lines.append(f" last user msg: {last_user[:200]!r}")
|
|
753
|
+
if last and last != last_user and last != first:
|
|
754
|
+
lines.append(f" last msg: {last[:200]!r}")
|
|
755
|
+
|
|
756
|
+
lines.append(
|
|
757
|
+
f" response: input={c.get('input_tokens', 0):,} "
|
|
758
|
+
f"output={c.get('output_tokens', 0):,} "
|
|
759
|
+
f"cache_read={c.get('cache_read', 0):,} "
|
|
760
|
+
f"cache_write={c.get('cache_write', 0):,} "
|
|
761
|
+
f"stop={c.get('stop_reason') or '-'}"
|
|
762
|
+
)
|
|
763
|
+
lines.append(
|
|
764
|
+
f" source: provider={c.get('provider', '?')}, "
|
|
765
|
+
f"caller={c.get('caller', '?')}"
|
|
766
|
+
)
|
|
767
|
+
lines.append("")
|
|
768
|
+
|
|
769
|
+
# Aggregate by model_type
|
|
770
|
+
by_type: dict[str, int] = {}
|
|
771
|
+
for c in history:
|
|
772
|
+
mt = c.get("model_type", "")
|
|
773
|
+
if mt.startswith("ModelType."):
|
|
774
|
+
mt = mt[len("ModelType."):]
|
|
775
|
+
by_type[mt] = by_type.get(mt, 0) + 1
|
|
776
|
+
lines.append("By model_type:")
|
|
777
|
+
for mt, n in sorted(by_type.items(), key=lambda x: -x[1]):
|
|
778
|
+
lines.append(f" {mt}: {n}")
|
|
779
|
+
return "\n".join(lines)
|
|
780
|
+
|
|
682
781
|
def invalidate_context_cache(self):
|
|
683
782
|
"""Mark cached tree/git status as stale. Call after file mutations."""
|
|
684
783
|
self._context_dirty = True
|
|
@@ -716,10 +815,20 @@ class Session:
|
|
|
716
815
|
return int(len(text) / Session._CHARS_PER_TOKEN)
|
|
717
816
|
|
|
718
817
|
def check_budget_warning(self) -> str | None:
|
|
719
|
-
"""Return a warning string if token usage is approaching the budget.
|
|
818
|
+
"""Return a warning string if token usage is approaching the budget.
|
|
819
|
+
|
|
820
|
+
Total mirrors OpenCode's context indicator —
|
|
821
|
+
``input + output + cache_read + cache_write`` — so the warning
|
|
822
|
+
triggers on the same volume the user sees in /cost.
|
|
823
|
+
"""
|
|
720
824
|
if self.token_budget <= 0:
|
|
721
825
|
return None
|
|
722
|
-
total =
|
|
826
|
+
total = (
|
|
827
|
+
self.total_input_tokens
|
|
828
|
+
+ self.total_output_tokens
|
|
829
|
+
+ self.total_cache_read_tokens
|
|
830
|
+
+ self.total_cache_write_tokens
|
|
831
|
+
)
|
|
723
832
|
pct = total / self.token_budget * 100
|
|
724
833
|
if pct >= 95:
|
|
725
834
|
return f"[bold red]Token budget nearly exhausted ({pct:.0f}%)[/bold red]"
|
|
@@ -236,7 +236,7 @@ class AruApp(App):
|
|
|
236
236
|
# Extending this map is the cheapest way to add a new local command.
|
|
237
237
|
_LOCAL_SLASH = {
|
|
238
238
|
"clear", "quit", "exit", "help", "plan",
|
|
239
|
-
"cost", "compact", "sessions", "model", "undo",
|
|
239
|
+
"cost", "calls", "compact", "sessions", "model", "undo",
|
|
240
240
|
"skills", "agents", "commands", "mcp", "yolo",
|
|
241
241
|
"theme",
|
|
242
242
|
}
|
|
@@ -840,6 +840,8 @@ class AruApp(App):
|
|
|
840
840
|
self.action_toggle_plan()
|
|
841
841
|
elif name == "cost":
|
|
842
842
|
self._slash_cost()
|
|
843
|
+
elif name == "calls":
|
|
844
|
+
self._slash_calls()
|
|
843
845
|
elif name == "compact":
|
|
844
846
|
self._slash_compact()
|
|
845
847
|
elif name == "sessions":
|
|
@@ -904,6 +906,18 @@ class AruApp(App):
|
|
|
904
906
|
text = f"cost failed: {exc}"
|
|
905
907
|
self._push_chat(text, "cost")
|
|
906
908
|
|
|
909
|
+
def _slash_calls(self) -> None:
|
|
910
|
+
session = self.session
|
|
911
|
+
if session is None:
|
|
912
|
+
self._push_chat("No session.", "calls")
|
|
913
|
+
return
|
|
914
|
+
try:
|
|
915
|
+
summary = getattr(session, "calls_summary", None)
|
|
916
|
+
text = summary if isinstance(summary, str) else str(summary)
|
|
917
|
+
except Exception as exc:
|
|
918
|
+
text = f"calls failed: {exc}"
|
|
919
|
+
self._push_chat(text, "calls")
|
|
920
|
+
|
|
907
921
|
def _slash_compact(self) -> None:
|
|
908
922
|
session = self.session
|
|
909
923
|
if session is None:
|
|
@@ -38,6 +38,7 @@ SLASH_COMMANDS: list[tuple[str, str]] = [
|
|
|
38
38
|
("quit", "Save and quit"),
|
|
39
39
|
("exit", "Save and quit"),
|
|
40
40
|
("cost", "Show token usage & cost"),
|
|
41
|
+
("calls", "Per-API-call breakdown"),
|
|
41
42
|
("model", "Switch model"),
|
|
42
43
|
("compact", "Compact conversation"),
|
|
43
44
|
("memory", "Auto-extracted project memories"),
|
|
@@ -276,7 +276,10 @@ class TestLiveMetricsAccumulation:
|
|
|
276
276
|
s = ctx.session
|
|
277
277
|
assert s.total_input_tokens == 1_500, "track_tokens must not re-add"
|
|
278
278
|
assert s.total_output_tokens == 75
|
|
279
|
-
|
|
279
|
+
# api_calls is now incremented per real API request (inside
|
|
280
|
+
# _publish_live_metrics), not per turn — so the two live calls
|
|
281
|
+
# above produce api_calls == 2, not 1.
|
|
282
|
+
assert s.api_calls == 2
|
|
280
283
|
# Live counters reset so the next turn starts clean.
|
|
281
284
|
assert s._live_input_added == 0
|
|
282
285
|
assert s._live_output_added == 0
|
|
@@ -348,7 +348,11 @@ class TestSession:
|
|
|
348
348
|
assert session.total_input_tokens == 100
|
|
349
349
|
assert session.total_output_tokens == 50
|
|
350
350
|
assert session.total_cache_read_tokens == 30
|
|
351
|
-
|
|
351
|
+
# api_calls is no longer incremented by track_tokens — it's bumped
|
|
352
|
+
# per real API request inside cache_patch._publish_live_metrics.
|
|
353
|
+
# This unit test exercises track_tokens in isolation (no patch),
|
|
354
|
+
# so api_calls stays at 0.
|
|
355
|
+
assert session.api_calls == 0
|
|
352
356
|
|
|
353
357
|
def test_track_tokens_none_metrics(self):
|
|
354
358
|
session = Session()
|