aru-code 0.31.0__tar.gz → 0.33.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aru_code-0.31.0 → aru_code-0.33.0}/PKG-INFO +1 -1
- aru_code-0.33.0/aru/__init__.py +1 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/agent_factory.py +22 -3
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/agents/base.py +94 -1
- aru_code-0.33.0/aru/agents/catalog.py +157 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/cache_patch.py +279 -19
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/cli.py +57 -2
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/commands.py +133 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/context.py +24 -1
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/permissions.py +318 -21
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/providers.py +214 -3
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/runtime.py +78 -1
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/session.py +115 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tool_policy.py +75 -49
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/codebase.py +1 -1
- aru_code-0.33.0/aru/tools/delegate.py +602 -0
- aru_code-0.33.0/aru/tools/delegate_prompt.txt +34 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/file_ops.py +2 -2
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/registry.py +10 -5
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/skill.py +1 -1
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/PKG-INFO +1 -1
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/SOURCES.txt +4 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/pyproject.toml +4 -1
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_catalog.py +8 -1
- aru_code-0.33.0/tests/test_delegate.py +1063 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_invoke_skill.py +4 -4
- aru_code-0.33.0/tests/test_microcompact.py +277 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_permissions.py +501 -0
- aru_code-0.33.0/tests/test_reasoning.py +455 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_tool_policy.py +88 -0
- aru_code-0.31.0/aru/__init__.py +0 -1
- aru_code-0.31.0/aru/agents/catalog.py +0 -92
- aru_code-0.31.0/aru/tools/delegate.py +0 -236
- {aru_code-0.31.0 → aru_code-0.33.0}/LICENSE +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/README.md +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/agents/__init__.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/agents/planner.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/checkpoints.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/completers.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/config.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/display.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/history_blocks.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugin_cache.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugins/__init__.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugins/custom_tools.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugins/hooks.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugins/manager.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/plugins/tool_api.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/runner.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/select.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/__init__.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/_diff.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/_shared.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/ast_tools.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/gitignore.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/mcp_client.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/plan_mode.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/ranker.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/search.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/shell.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/tasklist.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru/tools/web.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/dependency_links.txt +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/entry_points.txt +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/requires.txt +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/aru_code.egg-info/top_level.txt +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/setup.cfg +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_agents_base.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_agents_md_coverage.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cache_patch_metrics.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cache_patch_stop_reason.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_checkpoints.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_advanced.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_base.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_completers.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_new.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_run_cli.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_session.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_cli_shell.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_codebase.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_confabulation_regression.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_config.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_context.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_gitignore.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_guardrails_scenarios.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_invoked_skills.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_main.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_mcp_client.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_plan_mode_refactor.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_plugin_cache.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_plugins.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_providers.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_ranker.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_runner_recovery.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_runtime.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_select.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_skill_disallowed_tools.py +0 -0
- {aru_code-0.31.0 → aru_code-0.33.0}/tests/test_tasklist.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.33.0"
|
|
@@ -150,15 +150,30 @@ async def create_agent_from_spec(
|
|
|
150
150
|
resolved_model = model_ref or session.model_ref
|
|
151
151
|
|
|
152
152
|
tools = _wrap_tools_with_hooks(spec.tools_factory())
|
|
153
|
-
instructions
|
|
153
|
+
# Merge spec-level extra instructions (static, agent-specific policy like
|
|
154
|
+
# "you are read-only, never call write tools") with caller-provided extras
|
|
155
|
+
# (dynamic, session-specific context like cwd or AGENTS.md). Spec text
|
|
156
|
+
# comes first so the agent's baseline policy is established before any
|
|
157
|
+
# session-specific text that might try to override it.
|
|
158
|
+
combined_extra = "\n\n".join(
|
|
159
|
+
part for part in (spec.extra_instructions, extra_instructions) if part
|
|
160
|
+
)
|
|
161
|
+
instructions = _build_instructions(spec.role, combined_extra)
|
|
154
162
|
|
|
155
163
|
instructions, resolved_model, max_tokens = await _apply_chat_hooks(
|
|
156
164
|
instructions, resolved_model, spec.name, max_tokens=spec.max_tokens,
|
|
157
165
|
)
|
|
158
166
|
|
|
167
|
+
reasoning_override = session.reasoning_override if session is not None else None
|
|
168
|
+
|
|
159
169
|
return Agent(
|
|
160
170
|
name=spec.name,
|
|
161
|
-
model=create_model(
|
|
171
|
+
model=create_model(
|
|
172
|
+
resolved_model,
|
|
173
|
+
max_tokens=max_tokens,
|
|
174
|
+
use_reasoning=spec.use_reasoning,
|
|
175
|
+
reasoning_override=reasoning_override,
|
|
176
|
+
),
|
|
162
177
|
tools=tools,
|
|
163
178
|
instructions=instructions,
|
|
164
179
|
markdown=True,
|
|
@@ -210,7 +225,11 @@ async def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
|
|
|
210
225
|
|
|
211
226
|
return Agent(
|
|
212
227
|
name=agent_def.name,
|
|
213
|
-
model=create_model(
|
|
228
|
+
model=create_model(
|
|
229
|
+
model_ref,
|
|
230
|
+
max_tokens=max_tokens,
|
|
231
|
+
reasoning_override=session.reasoning_override,
|
|
232
|
+
),
|
|
214
233
|
tools=tools,
|
|
215
234
|
instructions=instructions,
|
|
216
235
|
markdown=True,
|
|
@@ -374,11 +374,101 @@ Complete the search request efficiently and report your findings clearly.\
|
|
|
374
374
|
"""
|
|
375
375
|
|
|
376
376
|
|
|
377
|
+
VERIFIER_ROLE = """\
|
|
378
|
+
You are a verification sub-agent. Your sole job is to review a recent batch
|
|
379
|
+
of edits for correctness and report issues.
|
|
380
|
+
|
|
381
|
+
=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
|
|
382
|
+
You are STRICTLY PROHIBITED from creating, editing, deleting, or moving
|
|
383
|
+
files. You do not have access to edit tools; attempts will fail. No
|
|
384
|
+
state-changing bash commands (no git add/commit, no npm/pip install, no
|
|
385
|
+
mkdir/touch/rm/cp/mv).
|
|
386
|
+
|
|
387
|
+
Your workflow:
|
|
388
|
+
1. Read each file mentioned in the task using `read_file` or `read_files`
|
|
389
|
+
2. Search for call sites / references to changed APIs using `grep_search`
|
|
390
|
+
3. Skim related tests using `glob_search` + `read_file`
|
|
391
|
+
4. Report findings in this structure:
|
|
392
|
+
- Inconsistencies found (with file:line refs)
|
|
393
|
+
- Missing follow-up edits (call sites not updated, etc.)
|
|
394
|
+
- Suspicious patterns worth the caller's attention (even if uncertain)
|
|
395
|
+
- What looks correct (brief — don't pad the report)
|
|
396
|
+
|
|
397
|
+
Be concise. Skip nitpicks (formatting, naming preferences). Focus on
|
|
398
|
+
bugs, broken contracts, or outdated call sites the caller likely missed.
|
|
399
|
+
|
|
400
|
+
Return ONE final message. The caller is not able to ask follow-ups
|
|
401
|
+
without a resume — include everything they need to act.\
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
REVIEWER_ROLE = """\
|
|
406
|
+
You are a code-review sub-agent. Review the files mentioned in the task
|
|
407
|
+
against common quality heuristics and produce actionable findings.
|
|
408
|
+
|
|
409
|
+
=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
|
|
410
|
+
You may only read and search. No edit/write/delete/move operations. No
|
|
411
|
+
state-changing bash.
|
|
412
|
+
|
|
413
|
+
For each file covered:
|
|
414
|
+
|
|
415
|
+
- Naming: are identifiers clear and consistent with the surrounding code?
|
|
416
|
+
- Error handling: are edge cases covered? Any swallowed exceptions?
|
|
417
|
+
- Testing: is there test coverage for the new/modified code paths?
|
|
418
|
+
- Security: obvious injection, path traversal, secret exposure, unchecked
|
|
419
|
+
user input, missing auth checks?
|
|
420
|
+
- Complexity: functions that should be split, duplicated logic, over-
|
|
421
|
+
engineered abstractions for simple cases?
|
|
422
|
+
|
|
423
|
+
Report format:
|
|
424
|
+
- One bullet per finding
|
|
425
|
+
- Include file:line
|
|
426
|
+
- Classify severity: (blocker) / (important) / (nit) — omit (nit) unless
|
|
427
|
+
asked for a thorough review
|
|
428
|
+
- If nothing is wrong, say so plainly — do not fabricate issues
|
|
429
|
+
|
|
430
|
+
Return ONE final message covering every file you looked at.\
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
GUIDE_ROLE = """\
|
|
435
|
+
You are the Aru user-guide sub-agent. You answer questions about how to
|
|
436
|
+
use and configure Aru itself — slash commands, permission config, skills,
|
|
437
|
+
plugins, tool catalog, session management.
|
|
438
|
+
|
|
439
|
+
The questions are about Aru, NOT about the user's own codebase. When in
|
|
440
|
+
doubt, treat the task as "explain how to do X with Aru" rather than "do X
|
|
441
|
+
in the user's project".
|
|
442
|
+
|
|
443
|
+
=== CRITICAL: READ-ONLY MODE — NO FILE MODIFICATIONS ===
|
|
444
|
+
You may only read and search. No edit/write/delete/move operations.
|
|
445
|
+
|
|
446
|
+
Authoritative sources, in priority order:
|
|
447
|
+
1. `AGENTS.md` at the project root — architectural reference
|
|
448
|
+
2. `docs/*.md` — user-facing documentation
|
|
449
|
+
3. `aru.json` examples in the codebase — config shape
|
|
450
|
+
4. Reading the code under `aru/` directly (last resort — prefer docs)
|
|
451
|
+
|
|
452
|
+
Workflow:
|
|
453
|
+
1. `read_file` AGENTS.md first
|
|
454
|
+
2. `glob_search` + `read_file` relevant docs/*.md
|
|
455
|
+
3. Search `aru.json` or permission config examples if the question is
|
|
456
|
+
configuration-related
|
|
457
|
+
|
|
458
|
+
Never invent features. If the docs do not cover the topic, say so and
|
|
459
|
+
suggest the closest available alternative. Cite file paths in your
|
|
460
|
+
response so the user can verify.
|
|
461
|
+
|
|
462
|
+
Return ONE final message.\
|
|
463
|
+
"""
|
|
464
|
+
|
|
465
|
+
|
|
377
466
|
def build_instructions(role: str, extra: str = "") -> str:
|
|
378
467
|
"""Build complete instructions for an agent role.
|
|
379
468
|
|
|
380
469
|
Args:
|
|
381
|
-
role: One of 'planner', 'executor', 'general', 'explorer'
|
|
470
|
+
role: One of 'planner', 'executor', 'general', 'explorer', 'verifier',
|
|
471
|
+
'reviewer', 'guide'.
|
|
382
472
|
extra: Additional project-specific instructions (README, AGENTS.md, skills).
|
|
383
473
|
"""
|
|
384
474
|
role_text = {
|
|
@@ -386,6 +476,9 @@ def build_instructions(role: str, extra: str = "") -> str:
|
|
|
386
476
|
"executor": EXECUTOR_ROLE,
|
|
387
477
|
"general": GENERAL_ROLE,
|
|
388
478
|
"explorer": EXPLORER_ROLE,
|
|
479
|
+
"verifier": VERIFIER_ROLE,
|
|
480
|
+
"reviewer": REVIEWER_ROLE,
|
|
481
|
+
"guide": GUIDE_ROLE,
|
|
389
482
|
}[role]
|
|
390
483
|
|
|
391
484
|
parts = [role_text, BASE_INSTRUCTIONS]
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Native agent catalog — single source of truth for built-in agent specs.
|
|
2
|
+
|
|
3
|
+
Each AgentSpec describes a runtime-parameterized agent: prompt role, tool list,
|
|
4
|
+
mode (primary/subagent), and model sizing. The factory in agent_factory.py
|
|
5
|
+
consumes specs and builds Agno Agent instances. The runner in runner.py looks
|
|
6
|
+
up specs by name when handling runner.prompt(PromptInput).
|
|
7
|
+
|
|
8
|
+
Custom agents (defined via .agents/agents/*.md) follow a separate path through
|
|
9
|
+
create_custom_agent_instance and are NOT listed here.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from typing import Callable, Literal
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class AgentSpec:
|
|
20
|
+
"""Static description of a native agent.
|
|
21
|
+
|
|
22
|
+
The tools_factory is a lazy callable so module load order does not force
|
|
23
|
+
aru.tools.codebase to be imported before this module.
|
|
24
|
+
|
|
25
|
+
`max_tokens=None` means "use the model's full cap" (see providers.py).
|
|
26
|
+
An explicit int caps the agent below that ceiling — providers.py always
|
|
27
|
+
clamps the final value to min(requested, model_cap) so specs can never
|
|
28
|
+
ask for more than the model supports.
|
|
29
|
+
|
|
30
|
+
`description` is the LLM-facing summary rendered into `delegate_task`'s
|
|
31
|
+
docstring. Only subagent specs need a meaningful description (primary
|
|
32
|
+
agents are never picked via `agent_name`). Keep it short (1-3 sentences)
|
|
33
|
+
and directive — the model uses it to decide when this agent fits.
|
|
34
|
+
|
|
35
|
+
`extra_instructions` is appended to the base role instructions when the
|
|
36
|
+
agent is built. Use it for agent-specific policy ("you are read-only,
|
|
37
|
+
never call write tools") that shouldn't leak into other roles.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
name: str # display name passed to Agno
|
|
41
|
+
role: str # key into build_instructions(role, ...)
|
|
42
|
+
mode: Literal["primary", "subagent"]
|
|
43
|
+
tools_factory: Callable[[], list] # lazy resolver — invoked at agent creation
|
|
44
|
+
max_tokens: int | None
|
|
45
|
+
small_model: bool = False # if True, factory uses ctx.small_model_ref
|
|
46
|
+
use_reasoning: bool = True # False skips thinking params (e.g. explorer)
|
|
47
|
+
description: str = "" # LLM-facing summary for `delegate_task` docstring
|
|
48
|
+
extra_instructions: str = "" # appended to base role instructions on build
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _build_tools() -> list:
|
|
52
|
+
from aru.tools.registry import GENERAL_TOOLS
|
|
53
|
+
return GENERAL_TOOLS
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _plan_tools() -> list:
|
|
57
|
+
from aru.tools.registry import PLANNER_TOOLS
|
|
58
|
+
return PLANNER_TOOLS
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _exec_tools() -> list:
|
|
62
|
+
from aru.tools.registry import EXECUTOR_TOOLS
|
|
63
|
+
return EXECUTOR_TOOLS
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _explore_tools() -> list:
|
|
67
|
+
from aru.tools.registry import EXPLORER_TOOLS
|
|
68
|
+
return EXPLORER_TOOLS
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
AGENTS: dict[str, AgentSpec] = {
|
|
72
|
+
# Primary agents default to the model's full output cap (clamped by
|
|
73
|
+
# providers.create_model). Subagents keep a tight budget so a runaway
|
|
74
|
+
# explorer can't blow through the whole turn.
|
|
75
|
+
"build": AgentSpec(
|
|
76
|
+
name="Aru",
|
|
77
|
+
role="general",
|
|
78
|
+
mode="primary",
|
|
79
|
+
tools_factory=_build_tools,
|
|
80
|
+
max_tokens=None,
|
|
81
|
+
),
|
|
82
|
+
"plan": AgentSpec(
|
|
83
|
+
name="Planner",
|
|
84
|
+
role="planner",
|
|
85
|
+
mode="primary",
|
|
86
|
+
tools_factory=_plan_tools,
|
|
87
|
+
max_tokens=4096,
|
|
88
|
+
),
|
|
89
|
+
"executor": AgentSpec(
|
|
90
|
+
name="Executor",
|
|
91
|
+
role="executor",
|
|
92
|
+
mode="primary",
|
|
93
|
+
tools_factory=_exec_tools,
|
|
94
|
+
max_tokens=None,
|
|
95
|
+
),
|
|
96
|
+
"explorer": AgentSpec(
|
|
97
|
+
name="Explorer",
|
|
98
|
+
role="explorer",
|
|
99
|
+
mode="subagent",
|
|
100
|
+
tools_factory=_explore_tools,
|
|
101
|
+
max_tokens=8192,
|
|
102
|
+
small_model=True,
|
|
103
|
+
use_reasoning=False, # fast read-only subagent — no thinking overhead
|
|
104
|
+
description=(
|
|
105
|
+
"Fast read-only codebase exploration agent. Use for searching "
|
|
106
|
+
"files, finding patterns, reading code, and understanding "
|
|
107
|
+
"structure. Specify thoroughness in the task text: \"quick\" "
|
|
108
|
+
"(basic searches), \"medium\" (moderate exploration), or "
|
|
109
|
+
"\"very thorough\" (comprehensive analysis)."
|
|
110
|
+
),
|
|
111
|
+
),
|
|
112
|
+
"verification": AgentSpec(
|
|
113
|
+
name="Verifier",
|
|
114
|
+
role="verifier",
|
|
115
|
+
mode="subagent",
|
|
116
|
+
tools_factory=_explore_tools, # read-only
|
|
117
|
+
max_tokens=4096,
|
|
118
|
+
small_model=True,
|
|
119
|
+
use_reasoning=False,
|
|
120
|
+
description=(
|
|
121
|
+
"Double-check a recent batch of edits for correctness. Reads "
|
|
122
|
+
"changed files, searches for call sites, reports inconsistencies "
|
|
123
|
+
"and missing follow-up edits. Read-only — never edits. Use after "
|
|
124
|
+
"non-trivial multi-file edits to catch issues before the user sees them."
|
|
125
|
+
),
|
|
126
|
+
),
|
|
127
|
+
"reviewer": AgentSpec(
|
|
128
|
+
name="Reviewer",
|
|
129
|
+
role="reviewer",
|
|
130
|
+
mode="subagent",
|
|
131
|
+
tools_factory=_explore_tools, # read-only
|
|
132
|
+
max_tokens=4096,
|
|
133
|
+
small_model=True,
|
|
134
|
+
use_reasoning=False,
|
|
135
|
+
description=(
|
|
136
|
+
"Code review against naming, error handling, test coverage, and "
|
|
137
|
+
"security heuristics. Read-only; produces bulleted findings with "
|
|
138
|
+
"file:line refs and severity tags. Use when you want a second "
|
|
139
|
+
"pair of eyes before finalising changes."
|
|
140
|
+
),
|
|
141
|
+
),
|
|
142
|
+
"guide": AgentSpec(
|
|
143
|
+
name="Guide",
|
|
144
|
+
role="guide",
|
|
145
|
+
mode="subagent",
|
|
146
|
+
tools_factory=_explore_tools, # read-only
|
|
147
|
+
max_tokens=4096,
|
|
148
|
+
small_model=True,
|
|
149
|
+
use_reasoning=False,
|
|
150
|
+
description=(
|
|
151
|
+
"Answer questions about using Aru itself — slash commands, "
|
|
152
|
+
"permission config, skills, plugins, tool catalog. Reads "
|
|
153
|
+
"AGENTS.md and docs/ to ground answers. Use when the user's "
|
|
154
|
+
"question is about Aru's features, not their own codebase."
|
|
155
|
+
),
|
|
156
|
+
),
|
|
157
|
+
}
|
|
@@ -43,6 +43,43 @@ _last_call_cache_write: int = 0
|
|
|
43
43
|
# We normalize "length" → "max_tokens" so callers can check a single value.
|
|
44
44
|
_last_call_stop_reason: str | None = None
|
|
45
45
|
|
|
46
|
+
# Micro-compaction metrics (process-wide, reset by tests via
|
|
47
|
+
# reset_microcompact_stats()). Recorded by _prune_tool_messages every time it
|
|
48
|
+
# fires from the format_function_call_results patch. Surfaced in /cost so
|
|
49
|
+
# users can see what the pre-API-call prune is actually doing — the basis
|
|
50
|
+
# for any future calibration of count/time-based triggers (Passos 3/4 of the
|
|
51
|
+
# plan, deferred until we have data here to justify them).
|
|
52
|
+
_microcompact_invocations: int = 0 # times _prune_tool_messages was called
|
|
53
|
+
_microcompact_clear_passes: int = 0 # times the prune actually cleared anything
|
|
54
|
+
_microcompact_results_cleared: int = 0 # cumulative tool_result blocks cleared
|
|
55
|
+
|
|
56
|
+
# Reactive overflow recovery: counts API calls where the provider rejected the
|
|
57
|
+
# request as too long and we wiped older tool_results then retried. Surfaced
|
|
58
|
+
# in /cost so users can tell when the recovery path is masking a chronically
|
|
59
|
+
# oversized context (suggests prune thresholds or model choice need attention).
|
|
60
|
+
_microcompact_overflow_recoveries: int = 0
|
|
61
|
+
# Aggressive prune keeps only the last N compactable tool_results, no matter
|
|
62
|
+
# the budget. Picked low because by definition we got here AFTER the regular
|
|
63
|
+
# prune (160K protect) failed to keep the context within model limits.
|
|
64
|
+
_OVERFLOW_RECOVERY_KEEP_RECENT = 3
|
|
65
|
+
# Substrings (case-insensitive) that mark a provider error as a context-too-long
|
|
66
|
+
# rejection. Anthropic / OpenAI / DashScope / DeepSeek / Groq all phrase it
|
|
67
|
+
# slightly differently; the union below covers the seen variants. Match is
|
|
68
|
+
# substring against str(exc) — wider than ideal, but the fallback path (no
|
|
69
|
+
# recovery) only kicks in when wrong, and a false positive at worst replays
|
|
70
|
+
# the same call after a no-op prune.
|
|
71
|
+
_OVERFLOW_ERROR_SIGNATURES = (
|
|
72
|
+
"prompt is too long",
|
|
73
|
+
"context length",
|
|
74
|
+
"context_length_exceeded",
|
|
75
|
+
"maximum context",
|
|
76
|
+
"exceeds the maximum",
|
|
77
|
+
"exceeds context",
|
|
78
|
+
"input is too long",
|
|
79
|
+
"too many tokens",
|
|
80
|
+
"request too large",
|
|
81
|
+
)
|
|
82
|
+
|
|
46
83
|
|
|
47
84
|
def get_last_call_metrics() -> tuple[int, int, int, int]:
|
|
48
85
|
"""Return (input, output, cache_read, cache_write) from the most recent API call."""
|
|
@@ -68,6 +105,130 @@ def reset_last_stop_reason() -> None:
|
|
|
68
105
|
_last_call_stop_reason = None
|
|
69
106
|
|
|
70
107
|
|
|
108
|
+
def get_microcompact_stats() -> dict:
|
|
109
|
+
"""Return process-wide micro-compaction metrics.
|
|
110
|
+
|
|
111
|
+
Keys:
|
|
112
|
+
- invocations: total times _prune_tool_messages ran
|
|
113
|
+
- clear_passes: subset that actually cleared something
|
|
114
|
+
- results_cleared: cumulative tool_result blocks wiped
|
|
115
|
+
|
|
116
|
+
Used by /cost and tests. The ratio results_cleared/invocations is the
|
|
117
|
+
natural calibration signal for whether the budget-based trigger fires
|
|
118
|
+
often enough — if it's near zero across long sessions, the threshold
|
|
119
|
+
is too lax (or the protect window too generous).
|
|
120
|
+
"""
|
|
121
|
+
return {
|
|
122
|
+
"invocations": _microcompact_invocations,
|
|
123
|
+
"clear_passes": _microcompact_clear_passes,
|
|
124
|
+
"results_cleared": _microcompact_results_cleared,
|
|
125
|
+
"overflow_recoveries": _microcompact_overflow_recoveries,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def reset_microcompact_stats() -> None:
|
|
130
|
+
"""Zero the micro-compaction counters. Test-only helper."""
|
|
131
|
+
global _microcompact_invocations, _microcompact_clear_passes, _microcompact_results_cleared
|
|
132
|
+
global _microcompact_overflow_recoveries
|
|
133
|
+
_microcompact_invocations = 0
|
|
134
|
+
_microcompact_clear_passes = 0
|
|
135
|
+
_microcompact_results_cleared = 0
|
|
136
|
+
_microcompact_overflow_recoveries = 0
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _is_context_overflow_error(exc) -> bool:
|
|
140
|
+
"""Return True iff `exc` looks like a provider context-too-long rejection.
|
|
141
|
+
|
|
142
|
+
Substring match (case-insensitive) against the str of the exception and any
|
|
143
|
+
nested `original_error` attribute. Wider than ideal but cheap; the recovery
|
|
144
|
+
path that consumes this is itself idempotent (re-running with no changes
|
|
145
|
+
after a no-op prune just hits the same error again and propagates).
|
|
146
|
+
"""
|
|
147
|
+
msgs: list[str] = []
|
|
148
|
+
try:
|
|
149
|
+
msgs.append(str(exc))
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
inner = getattr(exc, "original_error", None) or getattr(exc, "__cause__", None)
|
|
153
|
+
if inner is not None:
|
|
154
|
+
try:
|
|
155
|
+
msgs.append(str(inner))
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
blob = " ".join(m.lower() for m in msgs if m)
|
|
159
|
+
return any(sig in blob for sig in _OVERFLOW_ERROR_SIGNATURES)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _aggressive_prune(messages, keep_recent: int = _OVERFLOW_RECOVERY_KEEP_RECENT) -> int:
|
|
163
|
+
"""Wipe content of all but the last `keep_recent` compactable tool_results.
|
|
164
|
+
|
|
165
|
+
Used reactively after a provider rejects a request as too long. Ignores the
|
|
166
|
+
budget walk entirely — by the time we get here, the budget-based prune
|
|
167
|
+
already failed to keep us under the model's context limit, so its answer
|
|
168
|
+
is wrong for this request.
|
|
169
|
+
|
|
170
|
+
Non-compactable tool_results (delegate_task etc.) are still preserved.
|
|
171
|
+
Returns the number of results actually cleared.
|
|
172
|
+
"""
|
|
173
|
+
from aru.context import COMPACTABLE_TOOLS
|
|
174
|
+
|
|
175
|
+
id_to_name = _build_tool_id_to_name_map(messages)
|
|
176
|
+
|
|
177
|
+
# Collect compactable tool_result indices in encounter order.
|
|
178
|
+
compactable_indices: list[int] = []
|
|
179
|
+
for i, msg in enumerate(messages):
|
|
180
|
+
if getattr(msg, "role", None) != "tool":
|
|
181
|
+
continue
|
|
182
|
+
tc_id = getattr(msg, "tool_call_id", None)
|
|
183
|
+
tool_name = id_to_name.get(tc_id) if tc_id else None
|
|
184
|
+
if tool_name in COMPACTABLE_TOOLS:
|
|
185
|
+
compactable_indices.append(i)
|
|
186
|
+
|
|
187
|
+
if len(compactable_indices) <= keep_recent:
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
to_clear = compactable_indices[:-keep_recent] if keep_recent > 0 else compactable_indices
|
|
191
|
+
cleared = 0
|
|
192
|
+
for idx in to_clear:
|
|
193
|
+
msg = messages[idx]
|
|
194
|
+
content = getattr(msg, "content", None)
|
|
195
|
+
if content is None or str(content) == _PRUNED_PLACEHOLDER:
|
|
196
|
+
continue
|
|
197
|
+
try:
|
|
198
|
+
msg.content = _PRUNED_PLACEHOLDER
|
|
199
|
+
if hasattr(msg, "compressed_content"):
|
|
200
|
+
msg.compressed_content = None
|
|
201
|
+
cleared += 1
|
|
202
|
+
except (AttributeError, TypeError):
|
|
203
|
+
pass
|
|
204
|
+
return cleared
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _build_tool_id_to_name_map(messages) -> dict:
|
|
208
|
+
"""Walk assistant messages forward, building tool_call_id → tool_name map.
|
|
209
|
+
|
|
210
|
+
Required because Agno's `role="tool"` Message carries `tool_call_id` but
|
|
211
|
+
not the originating tool name — the name lives on the matching
|
|
212
|
+
`assistant.tool_calls[i].function.name` in a previous message.
|
|
213
|
+
"""
|
|
214
|
+
id_to_name: dict = {}
|
|
215
|
+
for msg in messages:
|
|
216
|
+
if getattr(msg, "role", None) != "assistant":
|
|
217
|
+
continue
|
|
218
|
+
tool_calls = getattr(msg, "tool_calls", None)
|
|
219
|
+
if not tool_calls:
|
|
220
|
+
continue
|
|
221
|
+
for tc in tool_calls:
|
|
222
|
+
tc_id = tc.get("id") if isinstance(tc, dict) else None
|
|
223
|
+
if not tc_id:
|
|
224
|
+
continue
|
|
225
|
+
fn = tc.get("function") if isinstance(tc, dict) else None
|
|
226
|
+
tc_name = fn.get("name") if isinstance(fn, dict) else None
|
|
227
|
+
if tc_name:
|
|
228
|
+
id_to_name[tc_id] = tc_name
|
|
229
|
+
return id_to_name
|
|
230
|
+
|
|
231
|
+
|
|
71
232
|
def _prune_tool_messages(messages):
|
|
72
233
|
"""Clear old tool result content using a token-budget approach.
|
|
73
234
|
|
|
@@ -77,49 +238,81 @@ def _prune_tool_messages(messages):
|
|
|
77
238
|
PRUNE_MINIMUM_CHARS (avoids unnecessary churn on small conversations).
|
|
78
239
|
|
|
79
240
|
Aligned with OpenCode's strategy: budget-based, not fixed-N.
|
|
241
|
+
|
|
242
|
+
**Tool allowlist**: only outputs of tools in `COMPACTABLE_TOOLS` are
|
|
243
|
+
eligible for clearing. Non-compactable tools (delegate_task, invoke_skill,
|
|
244
|
+
tasklist mutators) still consume the protection budget but are never
|
|
245
|
+
pruned — their content is semantically load-bearing. The id→name map is
|
|
246
|
+
built from prior assistant `tool_calls` since `role="tool"` Messages carry
|
|
247
|
+
only the call id, not the tool name. Single source of truth lives in
|
|
248
|
+
`aru.context.COMPACTABLE_TOOLS`.
|
|
249
|
+
|
|
250
|
+
Returns the number of tool results actually cleared (0 if none) for
|
|
251
|
+
metrics consumption by `_microcompact_stats`.
|
|
80
252
|
"""
|
|
81
|
-
|
|
82
|
-
tool_indices = []
|
|
83
|
-
for i, msg in enumerate(messages):
|
|
84
|
-
if getattr(msg, "role", None) == "tool":
|
|
85
|
-
content = getattr(msg, "content", None)
|
|
86
|
-
content_len = len(str(content)) if content is not None else 0
|
|
87
|
-
tool_indices.append((i, content_len))
|
|
253
|
+
from aru.context import COMPACTABLE_TOOLS
|
|
88
254
|
|
|
89
|
-
|
|
90
|
-
|
|
255
|
+
global _microcompact_invocations, _microcompact_clear_passes, _microcompact_results_cleared
|
|
256
|
+
_microcompact_invocations += 1
|
|
91
257
|
|
|
92
|
-
|
|
93
|
-
protected_chars = 0
|
|
94
|
-
prune_candidates = [] # (index, content_len) of messages outside protection
|
|
258
|
+
id_to_name = _build_tool_id_to_name_map(messages)
|
|
95
259
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
260
|
+
# Collect tool message indices, their content sizes, and compactability.
|
|
261
|
+
tool_entries = [] # (index, content_len, is_compactable)
|
|
262
|
+
for i, msg in enumerate(messages):
|
|
263
|
+
if getattr(msg, "role", None) != "tool":
|
|
264
|
+
continue
|
|
265
|
+
content = getattr(msg, "content", None)
|
|
266
|
+
content_len = len(str(content)) if content is not None else 0
|
|
267
|
+
tc_id = getattr(msg, "tool_call_id", None)
|
|
268
|
+
tool_name = id_to_name.get(tc_id) if tc_id else None
|
|
269
|
+
# Defensive: if we can't resolve the name, treat as non-compactable.
|
|
270
|
+
# Better to leak budget than wipe a delegate_task result by mistake.
|
|
271
|
+
is_compactable = tool_name in COMPACTABLE_TOOLS if tool_name else False
|
|
272
|
+
tool_entries.append((i, content_len, is_compactable))
|
|
273
|
+
|
|
274
|
+
if not tool_entries:
|
|
275
|
+
return 0
|
|
276
|
+
|
|
277
|
+
# Walk backwards. ALL tool content (compactable or not) consumes the
|
|
278
|
+
# protection budget — the prompt carries it either way. Once the budget
|
|
279
|
+
# is exhausted, older entries are prune candidates ONLY if compactable;
|
|
280
|
+
# non-compactable old entries (delegate_task etc.) stay untouched.
|
|
281
|
+
running_total = 0
|
|
282
|
+
prune_candidates = [] # (index, content_len) of compactable messages outside protection
|
|
283
|
+
|
|
284
|
+
for idx, content_len, is_compactable in reversed(tool_entries):
|
|
285
|
+
in_recent_window = (running_total + content_len) <= _PRUNE_PROTECT_CHARS
|
|
286
|
+
running_total += content_len
|
|
287
|
+
if not in_recent_window and is_compactable:
|
|
100
288
|
prune_candidates.append((idx, content_len))
|
|
101
289
|
|
|
102
290
|
# Only prune if there's enough to free
|
|
103
291
|
freeable = sum(cl for _, cl in prune_candidates)
|
|
104
292
|
if freeable < _PRUNE_MINIMUM_CHARS:
|
|
105
|
-
return
|
|
293
|
+
return 0
|
|
106
294
|
|
|
107
|
-
|
|
295
|
+
cleared = 0
|
|
108
296
|
for idx, _ in prune_candidates:
|
|
109
297
|
msg = messages[idx]
|
|
110
298
|
content = getattr(msg, "content", None)
|
|
111
299
|
if content is None:
|
|
112
300
|
continue
|
|
113
|
-
# Skip if already pruned
|
|
114
301
|
if str(content) == _PRUNED_PLACEHOLDER:
|
|
115
302
|
continue
|
|
116
303
|
try:
|
|
117
304
|
msg.content = _PRUNED_PLACEHOLDER
|
|
118
305
|
if hasattr(msg, "compressed_content"):
|
|
119
306
|
msg.compressed_content = None
|
|
307
|
+
cleared += 1
|
|
120
308
|
except (AttributeError, TypeError):
|
|
121
309
|
pass
|
|
122
310
|
|
|
311
|
+
if cleared:
|
|
312
|
+
_microcompact_clear_passes += 1
|
|
313
|
+
_microcompact_results_cleared += cleared
|
|
314
|
+
return cleared
|
|
315
|
+
|
|
123
316
|
|
|
124
317
|
def apply_cache_patch():
|
|
125
318
|
"""Apply all patches to reduce Agno's token consumption."""
|
|
@@ -127,6 +320,73 @@ def apply_cache_patch():
|
|
|
127
320
|
_patch_claude_cache_breakpoints()
|
|
128
321
|
_patch_per_call_metrics()
|
|
129
322
|
_patch_stop_reason_capture()
|
|
323
|
+
_patch_overflow_recovery()
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _patch_overflow_recovery():
|
|
327
|
+
"""Wrap Agno's retry loops to handle context-overflow rejections.
|
|
328
|
+
|
|
329
|
+
When the provider rejects a request as too long (after the regular pre-call
|
|
330
|
+
prune was insufficient), wipe content of all but the last
|
|
331
|
+
`_OVERFLOW_RECOVERY_KEEP_RECENT` compactable tool_results in the message
|
|
332
|
+
list and re-raise. Agno's existing retry loop in `_a*invoke_with_retry`
|
|
333
|
+
will retry once with the now-shorter messages.
|
|
334
|
+
|
|
335
|
+
Patches both `_ainvoke_with_retry` (non-stream) and
|
|
336
|
+
`_ainvoke_stream_with_retry` (stream — what Aru's runner uses). Each is
|
|
337
|
+
wrapped to call `_aggressive_prune` once per turn before the underlying
|
|
338
|
+
retry fires; subsequent overflow errors propagate normally so we never
|
|
339
|
+
loop forever wiping the same messages.
|
|
340
|
+
|
|
341
|
+
A turn-scoped flag (`_overflow_recovery_done` set on the Model instance)
|
|
342
|
+
ensures we only attempt recovery once per call site — if even the
|
|
343
|
+
aggressive prune doesn't shrink the prompt enough, the error propagates
|
|
344
|
+
and the user sees it instead of a silent retry storm.
|
|
345
|
+
"""
|
|
346
|
+
from agno.models.base import Model
|
|
347
|
+
from agno.exceptions import ModelProviderError
|
|
348
|
+
|
|
349
|
+
_orig_ainvoke = Model._ainvoke_with_retry
|
|
350
|
+
_orig_ainvoke_stream = Model._ainvoke_stream_with_retry
|
|
351
|
+
|
|
352
|
+
async def _patched_ainvoke_with_retry(self, **kwargs):
|
|
353
|
+
global _microcompact_overflow_recoveries
|
|
354
|
+
try:
|
|
355
|
+
return await _orig_ainvoke(self, **kwargs)
|
|
356
|
+
except ModelProviderError as e:
|
|
357
|
+
if not _is_context_overflow_error(e):
|
|
358
|
+
raise
|
|
359
|
+
messages = kwargs.get("messages")
|
|
360
|
+
if messages is None:
|
|
361
|
+
raise
|
|
362
|
+
cleared = _aggressive_prune(messages)
|
|
363
|
+
if cleared == 0:
|
|
364
|
+
raise
|
|
365
|
+
_microcompact_overflow_recoveries += 1
|
|
366
|
+
return await _orig_ainvoke(self, **kwargs)
|
|
367
|
+
|
|
368
|
+
async def _patched_ainvoke_stream_with_retry(self, **kwargs):
|
|
369
|
+
global _microcompact_overflow_recoveries
|
|
370
|
+
try:
|
|
371
|
+
async for response in _orig_ainvoke_stream(self, **kwargs):
|
|
372
|
+
yield response
|
|
373
|
+
return
|
|
374
|
+
except ModelProviderError as e:
|
|
375
|
+
if not _is_context_overflow_error(e):
|
|
376
|
+
raise
|
|
377
|
+
messages = kwargs.get("messages")
|
|
378
|
+
if messages is None:
|
|
379
|
+
raise
|
|
380
|
+
cleared = _aggressive_prune(messages)
|
|
381
|
+
if cleared == 0:
|
|
382
|
+
raise
|
|
383
|
+
_microcompact_overflow_recoveries += 1
|
|
384
|
+
# Retry once with the now-pruned messages. A second overflow propagates.
|
|
385
|
+
async for response in _orig_ainvoke_stream(self, **kwargs):
|
|
386
|
+
yield response
|
|
387
|
+
|
|
388
|
+
Model._ainvoke_with_retry = _patched_ainvoke_with_retry
|
|
389
|
+
Model._ainvoke_stream_with_retry = _patched_ainvoke_stream_with_retry
|
|
130
390
|
|
|
131
391
|
|
|
132
392
|
def _patch_tool_result_pruning():
|