aru-code 0.28.0__tar.gz → 0.30.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aru_code-0.28.0/aru_code.egg-info → aru_code-0.30.0}/PKG-INFO +1 -1
- aru_code-0.30.0/aru/__init__.py +1 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/agent_factory.py +30 -3
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/catalog.py +12 -4
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/cache_patch.py +122 -1
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/cli.py +53 -3
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/commands.py +1 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/config.py +20 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/context.py +130 -3
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/display.py +1 -1
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/permissions.py +7 -3
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/providers.py +47 -12
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/runner.py +258 -126
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/runtime.py +2 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/session.py +82 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/skill.py +13 -0
- {aru_code-0.28.0 → aru_code-0.30.0/aru_code.egg-info}/PKG-INFO +1 -1
- {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/SOURCES.txt +4 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/pyproject.toml +1 -1
- aru_code-0.30.0/tests/test_cache_patch_stop_reason.py +108 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_catalog.py +6 -3
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_config.py +16 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_invoke_skill.py +46 -0
- aru_code-0.30.0/tests/test_invoked_skills.py +321 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_permissions.py +74 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_providers.py +19 -1
- aru_code-0.30.0/tests/test_runner_recovery.py +132 -0
- aru_code-0.30.0/tests/test_skill_disallowed_tools.py +78 -0
- aru_code-0.28.0/aru/__init__.py +0 -1
- {aru_code-0.28.0 → aru_code-0.30.0}/LICENSE +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/README.md +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/__init__.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/base.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/planner.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/checkpoints.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/completers.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/history_blocks.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugin_cache.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/__init__.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/custom_tools.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/hooks.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/manager.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/tool_api.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/select.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/__init__.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/_diff.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/_shared.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/ast_tools.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/codebase.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/delegate.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/file_ops.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/gitignore.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/mcp_client.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/plan_mode.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/ranker.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/registry.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/search.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/shell.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/tasklist.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/web.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/dependency_links.txt +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/entry_points.txt +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/requires.txt +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/top_level.txt +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/setup.cfg +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_agents_base.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_agents_md_coverage.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cache_patch_metrics.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_checkpoints.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_advanced.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_base.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_completers.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_new.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_run_cli.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_session.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_shell.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_codebase.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_confabulation_regression.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_context.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_gitignore.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_guardrails_scenarios.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_main.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_mcp_client.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plan_mode_refactor.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plugin_cache.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plugins.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_ranker.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_select.py +0 -0
- {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_tasklist.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.30.0"
|
|
@@ -81,6 +81,32 @@ def _wrap_tools_with_hooks(tools: list) -> list:
|
|
|
81
81
|
f"exit_plan_mode(plan=<full plan text>) to request "
|
|
82
82
|
f"approval. Do NOT retry {tool_name}."
|
|
83
83
|
)
|
|
84
|
+
# Active-skill disallowed-tools gate — honors the `disallowed-tools`
|
|
85
|
+
# frontmatter field of the currently active skill. Mirrors the
|
|
86
|
+
# plan-mode gate pattern above; runs before plugin hooks so a skill
|
|
87
|
+
# can hard-block a tool regardless of permission/plugin state.
|
|
88
|
+
try:
|
|
89
|
+
from aru.runtime import get_ctx
|
|
90
|
+
ctx = get_ctx()
|
|
91
|
+
session = getattr(ctx, "session", None)
|
|
92
|
+
config = getattr(ctx, "config", None)
|
|
93
|
+
except (LookupError, AttributeError):
|
|
94
|
+
session = None
|
|
95
|
+
config = None
|
|
96
|
+
if session is not None and config is not None:
|
|
97
|
+
active = getattr(session, "active_skill", None)
|
|
98
|
+
skills = getattr(config, "skills", None) or {}
|
|
99
|
+
active_skill_obj = skills.get(active) if active else None
|
|
100
|
+
disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
|
|
101
|
+
if tool_name in disallowed:
|
|
102
|
+
return (
|
|
103
|
+
f"BLOCKED: tool `{tool_name}` is disallowed by the "
|
|
104
|
+
f"currently active skill `{active}`. Read the skill's "
|
|
105
|
+
f"SKILL.md for the prescribed path. Do NOT retry "
|
|
106
|
+
f"`{tool_name}`; use the alternative the skill specifies "
|
|
107
|
+
f"(commonly: write the output to a `.md` file via "
|
|
108
|
+
f"`write_file` instead of using in-session state)."
|
|
109
|
+
)
|
|
84
110
|
# Before hook — plugins can mutate args or raise PermissionError to block
|
|
85
111
|
try:
|
|
86
112
|
before_data = await _fire_hook("tool.execute.before", {
|
|
@@ -112,10 +138,11 @@ def _wrap_tools_with_hooks(tools: list) -> list:
|
|
|
112
138
|
|
|
113
139
|
|
|
114
140
|
async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
|
|
115
|
-
max_tokens: int =
|
|
141
|
+
max_tokens: int | None = None) -> tuple[str, str, int | None]:
|
|
116
142
|
"""Apply chat.system.transform and chat.params hooks to agent creation params.
|
|
117
143
|
|
|
118
144
|
Returns (instructions, model_ref, max_tokens) — possibly modified by plugins.
|
|
145
|
+
When max_tokens is None, providers.create_model will use the model's full cap.
|
|
119
146
|
"""
|
|
120
147
|
# chat.system.transform — plugins can modify the system prompt
|
|
121
148
|
data = await _fire_hook("chat.system.transform", {
|
|
@@ -216,9 +243,9 @@ async def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
|
|
|
216
243
|
parts.append(extra)
|
|
217
244
|
instructions = "\n\n".join(parts)
|
|
218
245
|
|
|
219
|
-
# Apply chat hooks (system.transform + params)
|
|
246
|
+
# Apply chat hooks (system.transform + params). max_tokens=None → provider cap.
|
|
220
247
|
instructions, model_ref, max_tokens = await _apply_chat_hooks(
|
|
221
|
-
instructions, model_ref, agent_def.name, max_tokens=
|
|
248
|
+
instructions, model_ref, agent_def.name, max_tokens=None,
|
|
222
249
|
)
|
|
223
250
|
|
|
224
251
|
return Agent(
|
|
@@ -21,13 +21,18 @@ class AgentSpec:
|
|
|
21
21
|
|
|
22
22
|
The tools_factory is a lazy callable so module load order does not force
|
|
23
23
|
aru.tools.codebase to be imported before this module.
|
|
24
|
+
|
|
25
|
+
`max_tokens=None` means "use the model's full cap" (see providers.py).
|
|
26
|
+
An explicit int caps the agent below that ceiling — providers.py always
|
|
27
|
+
clamps the final value to min(requested, model_cap) so specs can never
|
|
28
|
+
ask for more than the model supports.
|
|
24
29
|
"""
|
|
25
30
|
|
|
26
31
|
name: str # display name passed to Agno
|
|
27
32
|
role: str # key into build_instructions(role, ...)
|
|
28
33
|
mode: Literal["primary", "subagent"]
|
|
29
34
|
tools_factory: Callable[[], list] # lazy resolver — invoked at agent creation
|
|
30
|
-
max_tokens: int
|
|
35
|
+
max_tokens: int | None
|
|
31
36
|
small_model: bool = False # if True, factory uses ctx.small_model_ref
|
|
32
37
|
|
|
33
38
|
|
|
@@ -52,12 +57,15 @@ def _explore_tools() -> list:
|
|
|
52
57
|
|
|
53
58
|
|
|
54
59
|
AGENTS: dict[str, AgentSpec] = {
|
|
60
|
+
# Primary agents default to the model's full output cap (clamped by
|
|
61
|
+
# providers.create_model). Subagents keep a tight budget so a runaway
|
|
62
|
+
# explorer can't blow through the whole turn.
|
|
55
63
|
"build": AgentSpec(
|
|
56
64
|
name="Aru",
|
|
57
65
|
role="general",
|
|
58
66
|
mode="primary",
|
|
59
67
|
tools_factory=_build_tools,
|
|
60
|
-
max_tokens=
|
|
68
|
+
max_tokens=None,
|
|
61
69
|
),
|
|
62
70
|
"plan": AgentSpec(
|
|
63
71
|
name="Planner",
|
|
@@ -71,14 +79,14 @@ AGENTS: dict[str, AgentSpec] = {
|
|
|
71
79
|
role="executor",
|
|
72
80
|
mode="primary",
|
|
73
81
|
tools_factory=_exec_tools,
|
|
74
|
-
max_tokens=
|
|
82
|
+
max_tokens=None,
|
|
75
83
|
),
|
|
76
84
|
"explorer": AgentSpec(
|
|
77
85
|
name="Explorer",
|
|
78
86
|
role="explorer",
|
|
79
87
|
mode="subagent",
|
|
80
88
|
tools_factory=_explore_tools,
|
|
81
|
-
max_tokens=
|
|
89
|
+
max_tokens=8192,
|
|
82
90
|
small_model=True,
|
|
83
91
|
),
|
|
84
92
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Monkey-patch Agno's model layer to reduce token consumption.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Four optimizations:
|
|
4
4
|
|
|
5
5
|
1. **Tool result pruning** (ALL providers): After each tool execution, old tool
|
|
6
6
|
results in the message list are truncated to a short summary. This prevents
|
|
@@ -12,6 +12,11 @@ Three optimizations:
|
|
|
12
12
|
3. **Per-call metrics** (ALL providers): Captures input/output tokens of the
|
|
13
13
|
last API call (context window size), exposed via get_last_call_metrics().
|
|
14
14
|
|
|
15
|
+
4. **Stop-reason capture** (Anthropic + OpenAI-compatible): Captures the
|
|
16
|
+
`stop_reason` / `finish_reason` from the final message of the last API call,
|
|
17
|
+
exposed via get_last_stop_reason(). Lets the runner detect `max_tokens`
|
|
18
|
+
truncation and trigger the recovery loop.
|
|
19
|
+
|
|
15
20
|
These patches intercept Agno's internal loop so they work transparently
|
|
16
21
|
regardless of which provider is used.
|
|
17
22
|
"""
|
|
@@ -33,12 +38,36 @@ _last_call_output_tokens: int = 0
|
|
|
33
38
|
_last_call_cache_read: int = 0
|
|
34
39
|
_last_call_cache_write: int = 0
|
|
35
40
|
|
|
41
|
+
# Last API call stop reason (Anthropic uses "end_turn"/"tool_use"/"max_tokens"/
|
|
42
|
+
# "stop_sequence"/"pause_turn"; OpenAI uses "stop"/"length"/"tool_calls").
|
|
43
|
+
# We normalize "length" → "max_tokens" so callers can check a single value.
|
|
44
|
+
_last_call_stop_reason: str | None = None
|
|
45
|
+
|
|
36
46
|
|
|
37
47
|
def get_last_call_metrics() -> tuple[int, int, int, int]:
|
|
38
48
|
"""Return (input, output, cache_read, cache_write) from the most recent API call."""
|
|
39
49
|
return _last_call_input_tokens, _last_call_output_tokens, _last_call_cache_read, _last_call_cache_write
|
|
40
50
|
|
|
41
51
|
|
|
52
|
+
def get_last_stop_reason() -> str | None:
|
|
53
|
+
"""Return the stop reason from the most recent API call, normalized.
|
|
54
|
+
|
|
55
|
+
Returns one of: `end_turn`, `tool_use`, `max_tokens`, `stop_sequence`,
|
|
56
|
+
`pause_turn`, or None if no call has happened yet / the provider did not
|
|
57
|
+
expose one. OpenAI's `length` is mapped to `max_tokens` and `stop` to
|
|
58
|
+
`end_turn` so callers have a single vocabulary.
|
|
59
|
+
"""
|
|
60
|
+
return _last_call_stop_reason
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def reset_last_stop_reason() -> None:
|
|
64
|
+
"""Clear the cached stop reason — call before starting a new turn so a
|
|
65
|
+
stale value from a prior turn never leaks into the next one.
|
|
66
|
+
"""
|
|
67
|
+
global _last_call_stop_reason
|
|
68
|
+
_last_call_stop_reason = None
|
|
69
|
+
|
|
70
|
+
|
|
42
71
|
def _prune_tool_messages(messages):
|
|
43
72
|
"""Clear old tool result content using a token-budget approach.
|
|
44
73
|
|
|
@@ -97,6 +126,7 @@ def apply_cache_patch():
|
|
|
97
126
|
_patch_tool_result_pruning()
|
|
98
127
|
_patch_claude_cache_breakpoints()
|
|
99
128
|
_patch_per_call_metrics()
|
|
129
|
+
_patch_stop_reason_capture()
|
|
100
130
|
|
|
101
131
|
|
|
102
132
|
def _patch_tool_result_pruning():
|
|
@@ -235,3 +265,94 @@ def _patch_per_call_metrics():
|
|
|
235
265
|
_base_module.accumulate_model_metrics = _patched_accumulate
|
|
236
266
|
except (ImportError, AttributeError):
|
|
237
267
|
pass
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# OpenAI "length" and Anthropic "max_tokens" mean the same thing; normalize so
|
|
271
|
+
# runner logic can check a single value.
|
|
272
|
+
_STOP_REASON_NORMALIZE = {
|
|
273
|
+
"length": "max_tokens", # OpenAI
|
|
274
|
+
"stop": "end_turn", # OpenAI
|
|
275
|
+
"tool_calls": "tool_use", # OpenAI
|
|
276
|
+
"function_call": "tool_use", # legacy OpenAI
|
|
277
|
+
"MAX_TOKENS": "max_tokens", # Gemini (all-caps)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _record_stop_reason(raw: str | None) -> None:
|
|
282
|
+
"""Normalize and cache the provider's stop reason."""
|
|
283
|
+
global _last_call_stop_reason
|
|
284
|
+
if raw is None or raw == "":
|
|
285
|
+
return
|
|
286
|
+
_last_call_stop_reason = _STOP_REASON_NORMALIZE.get(raw, raw)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _patch_stop_reason_capture():
|
|
290
|
+
"""Forward `stop_reason` from Agno's provider parsers into a module-level
|
|
291
|
+
slot readable via `get_last_stop_reason()`.
|
|
292
|
+
|
|
293
|
+
Agno's Anthropic adapter sees `response.stop_reason` (non-streaming) and
|
|
294
|
+
`response.message.stop_reason` (streaming MessageStopEvent), but discards
|
|
295
|
+
both before anything downstream can observe them. We wrap the two parsers
|
|
296
|
+
and record the value as a side effect. The OpenAI-compatible adapter
|
|
297
|
+
already exposes `response.choices[0].finish_reason`, so we hook that too
|
|
298
|
+
for completeness (Qwen, DeepSeek, Groq, OpenRouter).
|
|
299
|
+
"""
|
|
300
|
+
# Anthropic (native + streaming)
|
|
301
|
+
try:
|
|
302
|
+
from agno.models.anthropic import claude as _claude_mod
|
|
303
|
+
|
|
304
|
+
_original_parse = _claude_mod.Claude._parse_provider_response
|
|
305
|
+
_original_parse_delta = _claude_mod.Claude._parse_provider_response_delta
|
|
306
|
+
|
|
307
|
+
def _patched_parse(self, response, *args, **kwargs):
|
|
308
|
+
result = _original_parse(self, response, *args, **kwargs)
|
|
309
|
+
_record_stop_reason(getattr(response, "stop_reason", None))
|
|
310
|
+
return result
|
|
311
|
+
|
|
312
|
+
def _patched_parse_delta(self, response, *args, **kwargs):
|
|
313
|
+
result = _original_parse_delta(self, response, *args, **kwargs)
|
|
314
|
+
# MessageStopEvent / ParsedBetaMessageStopEvent carry the final
|
|
315
|
+
# stop_reason on their nested `message` object.
|
|
316
|
+
msg = getattr(response, "message", None)
|
|
317
|
+
if msg is not None:
|
|
318
|
+
_record_stop_reason(getattr(msg, "stop_reason", None))
|
|
319
|
+
return result
|
|
320
|
+
|
|
321
|
+
_claude_mod.Claude._parse_provider_response = _patched_parse
|
|
322
|
+
_claude_mod.Claude._parse_provider_response_delta = _patched_parse_delta
|
|
323
|
+
except (ImportError, AttributeError):
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
# OpenAI-compatible (OpenAI, Qwen/DashScope, DeepSeek, Groq, OpenRouter)
|
|
327
|
+
try:
|
|
328
|
+
from agno.models.openai import chat as _openai_chat
|
|
329
|
+
|
|
330
|
+
_original_openai_parse = _openai_chat.OpenAIChat._parse_provider_response
|
|
331
|
+
|
|
332
|
+
def _patched_openai_parse(self, response, *args, **kwargs):
|
|
333
|
+
result = _original_openai_parse(self, response, *args, **kwargs)
|
|
334
|
+
try:
|
|
335
|
+
choice = response.choices[0]
|
|
336
|
+
_record_stop_reason(getattr(choice, "finish_reason", None))
|
|
337
|
+
except (AttributeError, IndexError, TypeError):
|
|
338
|
+
pass
|
|
339
|
+
return result
|
|
340
|
+
|
|
341
|
+
_openai_chat.OpenAIChat._parse_provider_response = _patched_openai_parse
|
|
342
|
+
|
|
343
|
+
if hasattr(_openai_chat.OpenAIChat, "_parse_provider_response_delta"):
|
|
344
|
+
_original_openai_delta = _openai_chat.OpenAIChat._parse_provider_response_delta
|
|
345
|
+
|
|
346
|
+
def _patched_openai_delta(self, response, *args, **kwargs):
|
|
347
|
+
result = _original_openai_delta(self, response, *args, **kwargs)
|
|
348
|
+
try:
|
|
349
|
+
choice = response.choices[0]
|
|
350
|
+
# Only the final chunk sets finish_reason.
|
|
351
|
+
_record_stop_reason(getattr(choice, "finish_reason", None))
|
|
352
|
+
except (AttributeError, IndexError, TypeError):
|
|
353
|
+
pass
|
|
354
|
+
return result
|
|
355
|
+
|
|
356
|
+
_openai_chat.OpenAIChat._parse_provider_response_delta = _patched_openai_delta
|
|
357
|
+
except (ImportError, AttributeError):
|
|
358
|
+
pass
|
|
@@ -15,6 +15,7 @@ import sys
|
|
|
15
15
|
|
|
16
16
|
from rich.markdown import Markdown
|
|
17
17
|
from rich.panel import Panel
|
|
18
|
+
from rich.text import Text
|
|
18
19
|
|
|
19
20
|
# ── Re-exports for backward compatibility ─────────────────────────────
|
|
20
21
|
# Tests and external code import these from aru.cli; keep them accessible.
|
|
@@ -92,7 +93,7 @@ _logging.getLogger("agno").setLevel(_logging.WARNING)
|
|
|
92
93
|
|
|
93
94
|
from aru.agents.planner import review_plan
|
|
94
95
|
from aru.config import load_config, render_command_template, render_skill_template
|
|
95
|
-
from aru.permissions import get_skip_permissions
|
|
96
|
+
from aru.permissions import get_skip_permissions, set_permission_mode
|
|
96
97
|
from aru.providers import (
|
|
97
98
|
MODEL_ALIASES,
|
|
98
99
|
list_providers,
|
|
@@ -100,6 +101,39 @@ from aru.providers import (
|
|
|
100
101
|
)
|
|
101
102
|
|
|
102
103
|
|
|
104
|
+
def _toggle_yolo_mode(ctx) -> None:
|
|
105
|
+
"""Toggle YOLO (dangerously-skip-permissions) mode from the REPL.
|
|
106
|
+
|
|
107
|
+
Turning YOLO *off* is unconditional — safety is not at risk.
|
|
108
|
+
Turning YOLO *on* requires an explicit y/n confirmation with a red warning panel.
|
|
109
|
+
"""
|
|
110
|
+
if ctx.permission_mode == "yolo":
|
|
111
|
+
set_permission_mode("default")
|
|
112
|
+
console.print("[bold green]✔ YOLO disabled — safe mode restored.[/bold green]")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
warning = Text.from_markup(
|
|
116
|
+
"[bold red]⚠ DANGEROUSLY SKIP PERMISSIONS (YOLO)[/bold red]\n\n"
|
|
117
|
+
"[red]All permission prompts will be bypassed for this session, including:[/red]\n"
|
|
118
|
+
" • Reading/writing [bold].env[/bold] files and other sensitive paths\n"
|
|
119
|
+
" • Arbitrary shell commands ([bold]rm -rf[/bold], package installs, network calls)\n"
|
|
120
|
+
" • Edits outside the working directory\n"
|
|
121
|
+
" • All sub-agents delegated during this session\n\n"
|
|
122
|
+
"[dim]Toggle off anytime with /yolo or shift+tab.[/dim]"
|
|
123
|
+
)
|
|
124
|
+
console.print(Panel(
|
|
125
|
+
warning,
|
|
126
|
+
title="[bold red]Enable YOLO mode?[/bold red]",
|
|
127
|
+
border_style="red",
|
|
128
|
+
padding=(1, 2),
|
|
129
|
+
))
|
|
130
|
+
if ask_yes_no("Confirm enabling YOLO mode"):
|
|
131
|
+
set_permission_mode("yolo")
|
|
132
|
+
console.print("[bold red]🔥 YOLO MODE ACTIVE — all permissions bypassed.[/bold red]")
|
|
133
|
+
else:
|
|
134
|
+
console.print("[dim]Cancelled. Remaining in safe mode.[/dim]")
|
|
135
|
+
|
|
136
|
+
|
|
103
137
|
# ── Main REPL ──────────────────────────────────────────────────────────
|
|
104
138
|
|
|
105
139
|
async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
@@ -288,7 +322,13 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
|
288
322
|
f' <style fg="ansigray">│</style>'
|
|
289
323
|
f' <style fg="ansigray">{ctx.mcp_loaded_msg}</style>'
|
|
290
324
|
)
|
|
291
|
-
if ctx.permission_mode == "
|
|
325
|
+
if ctx.permission_mode == "yolo":
|
|
326
|
+
mode_part = (
|
|
327
|
+
f' <style fg="ansigray">│</style>'
|
|
328
|
+
f' <b><style fg="ansired">🔥 YOLO — permissions bypassed</style></b>'
|
|
329
|
+
f' <style fg="ansigray">(/yolo to toggle)</style>'
|
|
330
|
+
)
|
|
331
|
+
elif ctx.permission_mode == "acceptEdits":
|
|
292
332
|
mode_part = (
|
|
293
333
|
f' <style fg="ansigray">│</style>'
|
|
294
334
|
f' <b><style fg="ansigreen">⏵⏵ auto-accept edits on</style></b>'
|
|
@@ -570,6 +610,10 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
|
570
610
|
))
|
|
571
611
|
continue
|
|
572
612
|
|
|
613
|
+
if user_input.lower() in ("/yolo", "/unsafe"):
|
|
614
|
+
_toggle_yolo_mode(ctx)
|
|
615
|
+
continue
|
|
616
|
+
|
|
573
617
|
# Begin a new checkpoint turn for undo support
|
|
574
618
|
_turn_counter += 1
|
|
575
619
|
ctx.checkpoint_manager.begin_turn(_turn_counter)
|
|
@@ -667,7 +711,13 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
|
667
711
|
if not skill.user_invocable:
|
|
668
712
|
console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
|
|
669
713
|
else:
|
|
714
|
+
session.active_skill = cmd_name
|
|
670
715
|
prompt = render_skill_template(skill.content, cmd_args)
|
|
716
|
+
# Record so the skill body survives compaction — mirror of
|
|
717
|
+
# claude-code's addInvokedSkill. Store the rendered content
|
|
718
|
+
# (post-argument substitution) so post-compact restoration
|
|
719
|
+
# matches what the model initially read.
|
|
720
|
+
session.record_invoked_skill(cmd_name, prompt, skill.source_path)
|
|
671
721
|
console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
|
|
672
722
|
|
|
673
723
|
agent = await create_general_agent(session, config, env_context=_build_env_ctx())
|
|
@@ -791,7 +841,7 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
|
|
|
791
841
|
|
|
792
842
|
agent = Agent(
|
|
793
843
|
name="Aru",
|
|
794
|
-
model=create_model(session.model_ref,
|
|
844
|
+
model=create_model(session.model_ref), # None → provider cap
|
|
795
845
|
tools=[],
|
|
796
846
|
instructions=build_instructions("general", extra_instructions),
|
|
797
847
|
markdown=True,
|
|
@@ -24,6 +24,7 @@ SLASH_COMMANDS = [
|
|
|
24
24
|
("/plugin", "Manage cached plugins (install/list/remove/update)", "/plugin <subcommand>"),
|
|
25
25
|
("/undo", "Undo last turn — restore files and/or conversation", "/undo"),
|
|
26
26
|
("/cost", "Show detailed token usage and cost", "/cost"),
|
|
27
|
+
("/yolo", "Toggle DANGEROUSLY skip all permissions (YOLO mode)", "/yolo"),
|
|
27
28
|
("/quit", "Exit aru", "/quit"),
|
|
28
29
|
]
|
|
29
30
|
|
|
@@ -39,9 +39,16 @@ class Skill:
|
|
|
39
39
|
content: str
|
|
40
40
|
source_path: str
|
|
41
41
|
allowed_tools: list[str] = field(default_factory=list)
|
|
42
|
+
disallowed_tools: list[str] = field(default_factory=list)
|
|
42
43
|
disable_model_invocation: bool = False
|
|
43
44
|
user_invocable: bool = True
|
|
44
45
|
argument_hint: str = ""
|
|
46
|
+
# Short (~1-2 sentences) reminder used by the core to reinforce the
|
|
47
|
+
# skill's critical gates during compaction. Not re-injected per turn —
|
|
48
|
+
# it only appears wrapped in `<system-reminder>` when a compaction
|
|
49
|
+
# would otherwise drop the skill body from history. When absent, the
|
|
50
|
+
# core derives a default from `description`.
|
|
51
|
+
reminder: str = ""
|
|
45
52
|
|
|
46
53
|
|
|
47
54
|
@dataclass
|
|
@@ -266,6 +273,17 @@ def _parse_skill_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
|
|
|
266
273
|
else:
|
|
267
274
|
result["allowed_tools"] = []
|
|
268
275
|
|
|
276
|
+
disallowed_raw = metadata.get("disallowed-tools", "")
|
|
277
|
+
if isinstance(disallowed_raw, list):
|
|
278
|
+
result["disallowed_tools"] = [str(t).strip() for t in disallowed_raw]
|
|
279
|
+
elif disallowed_raw:
|
|
280
|
+
result["disallowed_tools"] = [t.strip() for t in str(disallowed_raw).split(",") if t.strip()]
|
|
281
|
+
else:
|
|
282
|
+
result["disallowed_tools"] = []
|
|
283
|
+
|
|
284
|
+
reminder_raw = metadata.get("reminder", "")
|
|
285
|
+
result["reminder"] = str(reminder_raw).strip() if reminder_raw else ""
|
|
286
|
+
|
|
269
287
|
return result
|
|
270
288
|
|
|
271
289
|
|
|
@@ -382,9 +400,11 @@ def _discover_skills(search_roots: list[Path]) -> dict[str, Skill]:
|
|
|
382
400
|
content=body,
|
|
383
401
|
source_path=str(skill_file),
|
|
384
402
|
allowed_tools=meta["allowed_tools"],
|
|
403
|
+
disallowed_tools=meta["disallowed_tools"],
|
|
385
404
|
disable_model_invocation=meta["disable_model_invocation"],
|
|
386
405
|
user_invocable=meta["user_invocable"],
|
|
387
406
|
argument_hint=meta["argument_hint"],
|
|
407
|
+
reminder=meta["reminder"],
|
|
388
408
|
)
|
|
389
409
|
|
|
390
410
|
return skills
|
|
@@ -52,6 +52,20 @@ TRUNCATE_SAVE_DIR = ".aru/truncated"
|
|
|
52
52
|
# opencode's approach where the split point mirrors the prune window.
|
|
53
53
|
COMPACT_RECENT_CHARS = 160_000
|
|
54
54
|
|
|
55
|
+
# Per-skill head-keep cap when preserving invoked SKILL.md bodies through a
|
|
56
|
+
# compaction. 20K chars ≈ 5K tokens (claude-code POST_COMPACT_MAX_TOKENS_PER_SKILL).
|
|
57
|
+
# The head is kept because SKILL.md files put gates and checklists at the top.
|
|
58
|
+
POST_COMPACT_MAX_CHARS_PER_SKILL = 20_000
|
|
59
|
+
# Total budget across all invoked skills in the preservation block.
|
|
60
|
+
# 100K chars ≈ 25K tokens (claude-code POST_COMPACT_SKILLS_TOKEN_BUDGET).
|
|
61
|
+
# Skills are sorted most-recent-first; older ones are dropped when the budget
|
|
62
|
+
# fills, so stale mid-session invocations don't crowd out the skill you're
|
|
63
|
+
# currently following.
|
|
64
|
+
POST_COMPACT_SKILLS_BUDGET_CHARS = 100_000
|
|
65
|
+
# Marker appended to a per-skill body when it was truncated — tells the model
|
|
66
|
+
# it can re-read the full SKILL.md at the source path if needed.
|
|
67
|
+
SKILL_TRUNCATION_MARKER = "\n\n[... SKILL.md truncated for budget. Re-read the file if details below the cutoff are needed.]"
|
|
68
|
+
|
|
55
69
|
# Compaction: trigger when per-call input tokens approach real overflow.
|
|
56
70
|
# Matches opencode's philosophy: only fire near the model's actual context
|
|
57
71
|
# limit, not routinely. Routine context reduction is handled by prune_history
|
|
@@ -607,8 +621,88 @@ def build_compaction_prompt(
|
|
|
607
621
|
|
|
608
622
|
|
|
609
623
|
|
|
624
|
+
def _build_skills_preservation_item(invoked_skills: dict | None) -> dict | None:
|
|
625
|
+
"""Build a user message preserving invoked SKILL.md bodies through compaction.
|
|
626
|
+
|
|
627
|
+
Mirrors claude-code's `createSkillAttachmentIfNeeded` (compact.ts:1494) +
|
|
628
|
+
the `'invoked_skills'` branch of the attachment renderer (messages.ts:3644).
|
|
629
|
+
Skills are sorted most-recent-first; each body is head-capped at
|
|
630
|
+
`POST_COMPACT_MAX_CHARS_PER_SKILL`; once the cumulative total crosses
|
|
631
|
+
`POST_COMPACT_SKILLS_BUDGET_CHARS` subsequent (older) skills are dropped.
|
|
632
|
+
|
|
633
|
+
The rendered item is a **user meta-message** whose content is wrapped in
|
|
634
|
+
`<system-reminder>` tags — this signals to the model that the block is a
|
|
635
|
+
system-level reminder (elevated attention) rather than fresh user intent.
|
|
636
|
+
Mirrors `wrapMessagesInSystemReminder` from claude-code.
|
|
637
|
+
|
|
638
|
+
Returns None when there is nothing to preserve (no invoked skills, or all
|
|
639
|
+
bodies empty), so the caller can skip injection cleanly.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
invoked_skills: session.invoked_skills — dict keyed by skill name with
|
|
643
|
+
`.content`, `.source_path`, `.invoked_at` attributes (InvokedSkill
|
|
644
|
+
instances; dict-shaped values are tolerated for from-JSON callers).
|
|
645
|
+
"""
|
|
646
|
+
if not invoked_skills:
|
|
647
|
+
return None
|
|
648
|
+
|
|
649
|
+
def _attr(obj, name, default=""):
|
|
650
|
+
if isinstance(obj, dict):
|
|
651
|
+
return obj.get(name, default)
|
|
652
|
+
return getattr(obj, name, default)
|
|
653
|
+
|
|
654
|
+
# Sort most-recent-first so budget pressure drops the oldest skills first.
|
|
655
|
+
entries = sorted(
|
|
656
|
+
invoked_skills.values(),
|
|
657
|
+
key=lambda s: float(_attr(s, "invoked_at", 0.0)) or 0.0,
|
|
658
|
+
reverse=True,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
rendered_blocks: list[str] = []
|
|
662
|
+
used_chars = 0
|
|
663
|
+
for skill in entries:
|
|
664
|
+
body = str(_attr(skill, "content", "") or "")
|
|
665
|
+
if not body.strip():
|
|
666
|
+
continue
|
|
667
|
+
# Head-keep cap per skill
|
|
668
|
+
if len(body) > POST_COMPACT_MAX_CHARS_PER_SKILL:
|
|
669
|
+
body = body[: POST_COMPACT_MAX_CHARS_PER_SKILL - len(SKILL_TRUNCATION_MARKER)] + SKILL_TRUNCATION_MARKER
|
|
670
|
+
if used_chars + len(body) > POST_COMPACT_SKILLS_BUDGET_CHARS:
|
|
671
|
+
break
|
|
672
|
+
used_chars += len(body)
|
|
673
|
+
|
|
674
|
+
name = str(_attr(skill, "name", "") or "?")
|
|
675
|
+
path = str(_attr(skill, "source_path", "") or "")
|
|
676
|
+
header = f"### Skill: /{name}"
|
|
677
|
+
if path:
|
|
678
|
+
header += f"\nPath: {path}"
|
|
679
|
+
rendered_blocks.append(f"{header}\n\n{body}")
|
|
680
|
+
|
|
681
|
+
if not rendered_blocks:
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
skills_text = "\n\n---\n\n".join(rendered_blocks)
|
|
685
|
+
preserved = (
|
|
686
|
+
"<system-reminder>\n"
|
|
687
|
+
"The following skills were invoked earlier in this session and their full SKILL.md bodies "
|
|
688
|
+
"were summarized out during compaction. Continue to follow these guidelines exactly — all "
|
|
689
|
+
"gates, checklists, and forbidden shortcuts still apply.\n\n"
|
|
690
|
+
f"{skills_text}\n"
|
|
691
|
+
"</system-reminder>"
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
from aru.history_blocks import text_block
|
|
695
|
+
return {
|
|
696
|
+
"role": "user",
|
|
697
|
+
"content": [text_block(preserved)],
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
|
|
610
701
|
def apply_compaction(
|
|
611
|
-
history: list[dict],
|
|
702
|
+
history: list[dict],
|
|
703
|
+
summary: str,
|
|
704
|
+
model_id: str = "default",
|
|
705
|
+
invoked_skills: dict | None = None,
|
|
612
706
|
) -> list[dict]:
|
|
613
707
|
"""Replace OLD messages with a summary, keep RECENT messages intact.
|
|
614
708
|
|
|
@@ -616,12 +710,19 @@ def apply_compaction(
|
|
|
616
710
|
role alternation stays natural:
|
|
617
711
|
[user: "Please summarize..."]
|
|
618
712
|
[assistant: "<summary>", summary=True]
|
|
713
|
+
[user: "<system-reminder>...invoked skills...</system-reminder>"] ← when invoked_skills
|
|
619
714
|
+ recent messages as-is
|
|
620
715
|
|
|
621
716
|
The assistant summary is marked with `summary: True` as a checkpoint.
|
|
622
717
|
`prune_history` walks backward and stops at this marker, so content
|
|
623
718
|
already consolidated into the summary is never re-processed. Mirrors
|
|
624
719
|
opencode's `msg.info.summary` flag (see message-v2.ts:914).
|
|
720
|
+
|
|
721
|
+
`invoked_skills` (claude-code parity — `STATE.invokedSkills`) is the
|
|
722
|
+
session's record of every SKILL.md invoked so far. When provided, a
|
|
723
|
+
skills-preservation `<system-reminder>` is injected right after the
|
|
724
|
+
summary so the model continues following skill gates even though the
|
|
725
|
+
original SKILL.md body was folded into the summary.
|
|
625
726
|
"""
|
|
626
727
|
from aru.history_blocks import text_block, coerce_history_item
|
|
627
728
|
_, recent = _split_history(history, model_id)
|
|
@@ -637,6 +738,11 @@ def apply_compaction(
|
|
|
637
738
|
"summary": True,
|
|
638
739
|
},
|
|
639
740
|
]
|
|
741
|
+
|
|
742
|
+
skills_item = _build_skills_preservation_item(invoked_skills)
|
|
743
|
+
if skills_item is not None:
|
|
744
|
+
compacted.append(skills_item)
|
|
745
|
+
|
|
640
746
|
compacted.extend(coerce_history_item(m) for m in recent)
|
|
641
747
|
|
|
642
748
|
return compacted
|
|
@@ -647,6 +753,7 @@ async def compact_conversation(
|
|
|
647
753
|
model_ref: str,
|
|
648
754
|
plan_task: str | None = None,
|
|
649
755
|
model_id: str = "default",
|
|
756
|
+
invoked_skills: dict | None = None,
|
|
650
757
|
) -> list[dict[str, str]]:
|
|
651
758
|
"""Run the compaction agent to summarize and replace history.
|
|
652
759
|
|
|
@@ -663,6 +770,13 @@ async def compact_conversation(
|
|
|
663
770
|
step.
|
|
664
771
|
|
|
665
772
|
Falls back to a mechanical summary if the agent call fails.
|
|
773
|
+
|
|
774
|
+
`invoked_skills` is `session.invoked_skills` — passed through to
|
|
775
|
+
`apply_compaction` so SKILL.md bodies survive the summary via a
|
|
776
|
+
`<system-reminder>` preservation block. When None (caller didn't pass
|
|
777
|
+
it, e.g. tests, subagent flows), `get_ctx().session.invoked_skills` is
|
|
778
|
+
consulted as a best-effort fallback. See claude-code
|
|
779
|
+
`createSkillAttachmentIfNeeded` for the pattern.
|
|
666
780
|
"""
|
|
667
781
|
from aru.providers import create_model
|
|
668
782
|
|
|
@@ -677,6 +791,19 @@ async def compact_conversation(
|
|
|
677
791
|
except (LookupError, AttributeError, ImportError):
|
|
678
792
|
pass # no plugin manager available — proceed without hooks
|
|
679
793
|
|
|
794
|
+
# Best-effort: if caller didn't pass invoked_skills but there's a session
|
|
795
|
+
# in the current runtime context, use its record. Keeps legacy call sites
|
|
796
|
+
# (subagent compaction, tests) covered without forcing every caller to
|
|
797
|
+
# plumb the session through.
|
|
798
|
+
if invoked_skills is None:
|
|
799
|
+
try:
|
|
800
|
+
from aru.runtime import get_ctx
|
|
801
|
+
session = getattr(get_ctx(), "session", None)
|
|
802
|
+
if session is not None:
|
|
803
|
+
invoked_skills = getattr(session, "invoked_skills", None)
|
|
804
|
+
except (LookupError, AttributeError, ImportError):
|
|
805
|
+
pass
|
|
806
|
+
|
|
680
807
|
prompt = build_compaction_prompt(history, plan_task, model_id=model_id)
|
|
681
808
|
|
|
682
809
|
try:
|
|
@@ -705,12 +832,12 @@ async def compact_conversation(
|
|
|
705
832
|
# Fallback: simple mechanical summary
|
|
706
833
|
summary = _fallback_summary(history, plan_task)
|
|
707
834
|
|
|
708
|
-
return apply_compaction(history, summary, model_id=model_id)
|
|
835
|
+
return apply_compaction(history, summary, model_id=model_id, invoked_skills=invoked_skills)
|
|
709
836
|
|
|
710
837
|
except Exception:
|
|
711
838
|
# Fallback if agent fails
|
|
712
839
|
summary = _fallback_summary(history, plan_task)
|
|
713
|
-
return apply_compaction(history, summary, model_id=model_id)
|
|
840
|
+
return apply_compaction(history, summary, model_id=model_id, invoked_skills=invoked_skills)
|
|
714
841
|
|
|
715
842
|
|
|
716
843
|
def _fallback_summary(history: list[dict], plan_task: str | None = None) -> str:
|
|
@@ -119,7 +119,7 @@ def _render_home(session, skip_permissions: bool) -> None:
|
|
|
119
119
|
console.print(cmds)
|
|
120
120
|
console.print()
|
|
121
121
|
|
|
122
|
-
mode_label = "[red]
|
|
122
|
+
mode_label = "[bold red]🔥 YOLO mode — permissions bypassed[/bold red]" if skip_permissions else "[green]safe mode[/green]"
|
|
123
123
|
console.print(
|
|
124
124
|
Text.from_markup(
|
|
125
125
|
f" [dim]model:[/dim] [bold]{session.model_display}[/bold] [dim]({session.model_id})[/dim]"
|