aru-code 0.28.0__tar.gz → 0.30.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {aru_code-0.28.0/aru_code.egg-info → aru_code-0.30.0}/PKG-INFO +1 -1
  2. aru_code-0.30.0/aru/__init__.py +1 -0
  3. {aru_code-0.28.0 → aru_code-0.30.0}/aru/agent_factory.py +30 -3
  4. {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/catalog.py +12 -4
  5. {aru_code-0.28.0 → aru_code-0.30.0}/aru/cache_patch.py +122 -1
  6. {aru_code-0.28.0 → aru_code-0.30.0}/aru/cli.py +53 -3
  7. {aru_code-0.28.0 → aru_code-0.30.0}/aru/commands.py +1 -0
  8. {aru_code-0.28.0 → aru_code-0.30.0}/aru/config.py +20 -0
  9. {aru_code-0.28.0 → aru_code-0.30.0}/aru/context.py +130 -3
  10. {aru_code-0.28.0 → aru_code-0.30.0}/aru/display.py +1 -1
  11. {aru_code-0.28.0 → aru_code-0.30.0}/aru/permissions.py +7 -3
  12. {aru_code-0.28.0 → aru_code-0.30.0}/aru/providers.py +47 -12
  13. {aru_code-0.28.0 → aru_code-0.30.0}/aru/runner.py +258 -126
  14. {aru_code-0.28.0 → aru_code-0.30.0}/aru/runtime.py +2 -0
  15. {aru_code-0.28.0 → aru_code-0.30.0}/aru/session.py +82 -0
  16. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/skill.py +13 -0
  17. {aru_code-0.28.0 → aru_code-0.30.0/aru_code.egg-info}/PKG-INFO +1 -1
  18. {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/SOURCES.txt +4 -0
  19. {aru_code-0.28.0 → aru_code-0.30.0}/pyproject.toml +1 -1
  20. aru_code-0.30.0/tests/test_cache_patch_stop_reason.py +108 -0
  21. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_catalog.py +6 -3
  22. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_config.py +16 -0
  23. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_invoke_skill.py +46 -0
  24. aru_code-0.30.0/tests/test_invoked_skills.py +321 -0
  25. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_permissions.py +74 -0
  26. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_providers.py +19 -1
  27. aru_code-0.30.0/tests/test_runner_recovery.py +132 -0
  28. aru_code-0.30.0/tests/test_skill_disallowed_tools.py +78 -0
  29. aru_code-0.28.0/aru/__init__.py +0 -1
  30. {aru_code-0.28.0 → aru_code-0.30.0}/LICENSE +0 -0
  31. {aru_code-0.28.0 → aru_code-0.30.0}/README.md +0 -0
  32. {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/__init__.py +0 -0
  33. {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/base.py +0 -0
  34. {aru_code-0.28.0 → aru_code-0.30.0}/aru/agents/planner.py +0 -0
  35. {aru_code-0.28.0 → aru_code-0.30.0}/aru/checkpoints.py +0 -0
  36. {aru_code-0.28.0 → aru_code-0.30.0}/aru/completers.py +0 -0
  37. {aru_code-0.28.0 → aru_code-0.30.0}/aru/history_blocks.py +0 -0
  38. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugin_cache.py +0 -0
  39. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/__init__.py +0 -0
  40. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/custom_tools.py +0 -0
  41. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/hooks.py +0 -0
  42. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/manager.py +0 -0
  43. {aru_code-0.28.0 → aru_code-0.30.0}/aru/plugins/tool_api.py +0 -0
  44. {aru_code-0.28.0 → aru_code-0.30.0}/aru/select.py +0 -0
  45. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/__init__.py +0 -0
  46. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/_diff.py +0 -0
  47. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/_shared.py +0 -0
  48. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/ast_tools.py +0 -0
  49. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/codebase.py +0 -0
  50. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/delegate.py +0 -0
  51. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/file_ops.py +0 -0
  52. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/gitignore.py +0 -0
  53. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/mcp_client.py +0 -0
  54. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/plan_mode.py +0 -0
  55. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/ranker.py +0 -0
  56. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/registry.py +0 -0
  57. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/search.py +0 -0
  58. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/shell.py +0 -0
  59. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/tasklist.py +0 -0
  60. {aru_code-0.28.0 → aru_code-0.30.0}/aru/tools/web.py +0 -0
  61. {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/dependency_links.txt +0 -0
  62. {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/entry_points.txt +0 -0
  63. {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/requires.txt +0 -0
  64. {aru_code-0.28.0 → aru_code-0.30.0}/aru_code.egg-info/top_level.txt +0 -0
  65. {aru_code-0.28.0 → aru_code-0.30.0}/setup.cfg +0 -0
  66. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_agents_base.py +0 -0
  67. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_agents_md_coverage.py +0 -0
  68. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cache_patch_metrics.py +0 -0
  69. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_checkpoints.py +0 -0
  70. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli.py +0 -0
  71. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_advanced.py +0 -0
  72. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_base.py +0 -0
  73. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_completers.py +0 -0
  74. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_new.py +0 -0
  75. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_run_cli.py +0 -0
  76. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_session.py +0 -0
  77. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_cli_shell.py +0 -0
  78. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_codebase.py +0 -0
  79. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_confabulation_regression.py +0 -0
  80. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_context.py +0 -0
  81. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_gitignore.py +0 -0
  82. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_guardrails_scenarios.py +0 -0
  83. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_main.py +0 -0
  84. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_mcp_client.py +0 -0
  85. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plan_mode_refactor.py +0 -0
  86. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plugin_cache.py +0 -0
  87. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_plugins.py +0 -0
  88. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_ranker.py +0 -0
  89. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_select.py +0 -0
  90. {aru_code-0.28.0 → aru_code-0.30.0}/tests/test_tasklist.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.28.0
3
+ Version: 0.30.0
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.30.0"
@@ -81,6 +81,32 @@ def _wrap_tools_with_hooks(tools: list) -> list:
81
81
  f"exit_plan_mode(plan=<full plan text>) to request "
82
82
  f"approval. Do NOT retry {tool_name}."
83
83
  )
84
+ # Active-skill disallowed-tools gate — honors the `disallowed-tools`
85
+ # frontmatter field of the currently active skill. Mirrors the
86
+ # plan-mode gate pattern above; runs before plugin hooks so a skill
87
+ # can hard-block a tool regardless of permission/plugin state.
88
+ try:
89
+ from aru.runtime import get_ctx
90
+ ctx = get_ctx()
91
+ session = getattr(ctx, "session", None)
92
+ config = getattr(ctx, "config", None)
93
+ except (LookupError, AttributeError):
94
+ session = None
95
+ config = None
96
+ if session is not None and config is not None:
97
+ active = getattr(session, "active_skill", None)
98
+ skills = getattr(config, "skills", None) or {}
99
+ active_skill_obj = skills.get(active) if active else None
100
+ disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
101
+ if tool_name in disallowed:
102
+ return (
103
+ f"BLOCKED: tool `{tool_name}` is disallowed by the "
104
+ f"currently active skill `{active}`. Read the skill's "
105
+ f"SKILL.md for the prescribed path. Do NOT retry "
106
+ f"`{tool_name}`; use the alternative the skill specifies "
107
+ f"(commonly: write the output to a `.md` file via "
108
+ f"`write_file` instead of using in-session state)."
109
+ )
84
110
  # Before hook — plugins can mutate args or raise PermissionError to block
85
111
  try:
86
112
  before_data = await _fire_hook("tool.execute.before", {
@@ -112,10 +138,11 @@ def _wrap_tools_with_hooks(tools: list) -> list:
112
138
 
113
139
 
114
140
  async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
115
- max_tokens: int = 8192) -> tuple[str, str, int]:
141
+ max_tokens: int | None = None) -> tuple[str, str, int | None]:
116
142
  """Apply chat.system.transform and chat.params hooks to agent creation params.
117
143
 
118
144
  Returns (instructions, model_ref, max_tokens) — possibly modified by plugins.
145
+ When max_tokens is None, providers.create_model will use the model's full cap.
119
146
  """
120
147
  # chat.system.transform — plugins can modify the system prompt
121
148
  data = await _fire_hook("chat.system.transform", {
@@ -216,9 +243,9 @@ async def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
216
243
  parts.append(extra)
217
244
  instructions = "\n\n".join(parts)
218
245
 
219
- # Apply chat hooks (system.transform + params)
246
+ # Apply chat hooks (system.transform + params). max_tokens=None → provider cap.
220
247
  instructions, model_ref, max_tokens = await _apply_chat_hooks(
221
- instructions, model_ref, agent_def.name, max_tokens=8192,
248
+ instructions, model_ref, agent_def.name, max_tokens=None,
222
249
  )
223
250
 
224
251
  return Agent(
@@ -21,13 +21,18 @@ class AgentSpec:
21
21
 
22
22
  The tools_factory is a lazy callable so module load order does not force
23
23
  aru.tools.codebase to be imported before this module.
24
+
25
+ `max_tokens=None` means "use the model's full cap" (see providers.py).
26
+ An explicit int caps the agent below that ceiling — providers.py always
27
+ clamps the final value to min(requested, model_cap) so specs can never
28
+ ask for more than the model supports.
24
29
  """
25
30
 
26
31
  name: str # display name passed to Agno
27
32
  role: str # key into build_instructions(role, ...)
28
33
  mode: Literal["primary", "subagent"]
29
34
  tools_factory: Callable[[], list] # lazy resolver — invoked at agent creation
30
- max_tokens: int
35
+ max_tokens: int | None
31
36
  small_model: bool = False # if True, factory uses ctx.small_model_ref
32
37
 
33
38
 
@@ -52,12 +57,15 @@ def _explore_tools() -> list:
52
57
 
53
58
 
54
59
  AGENTS: dict[str, AgentSpec] = {
60
+ # Primary agents default to the model's full output cap (clamped by
61
+ # providers.create_model). Subagents keep a tight budget so a runaway
62
+ # explorer can't blow through the whole turn.
55
63
  "build": AgentSpec(
56
64
  name="Aru",
57
65
  role="general",
58
66
  mode="primary",
59
67
  tools_factory=_build_tools,
60
- max_tokens=8192,
68
+ max_tokens=None,
61
69
  ),
62
70
  "plan": AgentSpec(
63
71
  name="Planner",
@@ -71,14 +79,14 @@ AGENTS: dict[str, AgentSpec] = {
71
79
  role="executor",
72
80
  mode="primary",
73
81
  tools_factory=_exec_tools,
74
- max_tokens=8192,
82
+ max_tokens=None,
75
83
  ),
76
84
  "explorer": AgentSpec(
77
85
  name="Explorer",
78
86
  role="explorer",
79
87
  mode="subagent",
80
88
  tools_factory=_explore_tools,
81
- max_tokens=4096,
89
+ max_tokens=8192,
82
90
  small_model=True,
83
91
  ),
84
92
  }
@@ -1,6 +1,6 @@
1
1
  """Monkey-patch Agno's model layer to reduce token consumption.
2
2
 
3
- Three optimizations:
3
+ Four optimizations:
4
4
 
5
5
  1. **Tool result pruning** (ALL providers): After each tool execution, old tool
6
6
  results in the message list are truncated to a short summary. This prevents
@@ -12,6 +12,11 @@ Three optimizations:
12
12
  3. **Per-call metrics** (ALL providers): Captures input/output tokens of the
13
13
  last API call (context window size), exposed via get_last_call_metrics().
14
14
 
15
+ 4. **Stop-reason capture** (Anthropic + OpenAI-compatible): Captures the
16
+ `stop_reason` / `finish_reason` from the final message of the last API call,
17
+ exposed via get_last_stop_reason(). Lets the runner detect `max_tokens`
18
+ truncation and trigger the recovery loop.
19
+
15
20
  These patches intercept Agno's internal loop so they work transparently
16
21
  regardless of which provider is used.
17
22
  """
@@ -33,12 +38,36 @@ _last_call_output_tokens: int = 0
33
38
  _last_call_cache_read: int = 0
34
39
  _last_call_cache_write: int = 0
35
40
 
41
+ # Last API call stop reason (Anthropic uses "end_turn"/"tool_use"/"max_tokens"/
42
+ # "stop_sequence"/"pause_turn"; OpenAI uses "stop"/"length"/"tool_calls").
43
+ # We normalize "length" → "max_tokens" so callers can check a single value.
44
+ _last_call_stop_reason: str | None = None
45
+
36
46
 
37
47
  def get_last_call_metrics() -> tuple[int, int, int, int]:
38
48
  """Return (input, output, cache_read, cache_write) from the most recent API call."""
39
49
  return _last_call_input_tokens, _last_call_output_tokens, _last_call_cache_read, _last_call_cache_write
40
50
 
41
51
 
52
+ def get_last_stop_reason() -> str | None:
53
+ """Return the stop reason from the most recent API call, normalized.
54
+
55
+ Returns one of: `end_turn`, `tool_use`, `max_tokens`, `stop_sequence`,
56
+ `pause_turn`, or None if no call has happened yet / the provider did not
57
+ expose one. OpenAI's `length` is mapped to `max_tokens` and `stop` to
58
+ `end_turn` so callers have a single vocabulary.
59
+ """
60
+ return _last_call_stop_reason
61
+
62
+
63
+ def reset_last_stop_reason() -> None:
64
+ """Clear the cached stop reason — call before starting a new turn so a
65
+ stale value from a prior turn never leaks into the next one.
66
+ """
67
+ global _last_call_stop_reason
68
+ _last_call_stop_reason = None
69
+
70
+
42
71
  def _prune_tool_messages(messages):
43
72
  """Clear old tool result content using a token-budget approach.
44
73
 
@@ -97,6 +126,7 @@ def apply_cache_patch():
97
126
  _patch_tool_result_pruning()
98
127
  _patch_claude_cache_breakpoints()
99
128
  _patch_per_call_metrics()
129
+ _patch_stop_reason_capture()
100
130
 
101
131
 
102
132
  def _patch_tool_result_pruning():
@@ -235,3 +265,94 @@ def _patch_per_call_metrics():
235
265
  _base_module.accumulate_model_metrics = _patched_accumulate
236
266
  except (ImportError, AttributeError):
237
267
  pass
268
+
269
+
270
+ # OpenAI "length" and Anthropic "max_tokens" mean the same thing; normalize so
271
+ # runner logic can check a single value.
272
+ _STOP_REASON_NORMALIZE = {
273
+ "length": "max_tokens", # OpenAI
274
+ "stop": "end_turn", # OpenAI
275
+ "tool_calls": "tool_use", # OpenAI
276
+ "function_call": "tool_use", # legacy OpenAI
277
+ "MAX_TOKENS": "max_tokens", # Gemini (all-caps)
278
+ }
279
+
280
+
281
+ def _record_stop_reason(raw: str | None) -> None:
282
+ """Normalize and cache the provider's stop reason."""
283
+ global _last_call_stop_reason
284
+ if raw is None or raw == "":
285
+ return
286
+ _last_call_stop_reason = _STOP_REASON_NORMALIZE.get(raw, raw)
287
+
288
+
289
+ def _patch_stop_reason_capture():
290
+ """Forward `stop_reason` from Agno's provider parsers into a module-level
291
+ slot readable via `get_last_stop_reason()`.
292
+
293
+ Agno's Anthropic adapter sees `response.stop_reason` (non-streaming) and
294
+ `response.message.stop_reason` (streaming MessageStopEvent), but discards
295
+ both before anything downstream can observe them. We wrap the two parsers
296
+ and record the value as a side effect. The OpenAI-compatible adapter
297
+ already exposes `response.choices[0].finish_reason`, so we hook that too
298
+ for completeness (Qwen, DeepSeek, Groq, OpenRouter).
299
+ """
300
+ # Anthropic (native + streaming)
301
+ try:
302
+ from agno.models.anthropic import claude as _claude_mod
303
+
304
+ _original_parse = _claude_mod.Claude._parse_provider_response
305
+ _original_parse_delta = _claude_mod.Claude._parse_provider_response_delta
306
+
307
+ def _patched_parse(self, response, *args, **kwargs):
308
+ result = _original_parse(self, response, *args, **kwargs)
309
+ _record_stop_reason(getattr(response, "stop_reason", None))
310
+ return result
311
+
312
+ def _patched_parse_delta(self, response, *args, **kwargs):
313
+ result = _original_parse_delta(self, response, *args, **kwargs)
314
+ # MessageStopEvent / ParsedBetaMessageStopEvent carry the final
315
+ # stop_reason on their nested `message` object.
316
+ msg = getattr(response, "message", None)
317
+ if msg is not None:
318
+ _record_stop_reason(getattr(msg, "stop_reason", None))
319
+ return result
320
+
321
+ _claude_mod.Claude._parse_provider_response = _patched_parse
322
+ _claude_mod.Claude._parse_provider_response_delta = _patched_parse_delta
323
+ except (ImportError, AttributeError):
324
+ pass
325
+
326
+ # OpenAI-compatible (OpenAI, Qwen/DashScope, DeepSeek, Groq, OpenRouter)
327
+ try:
328
+ from agno.models.openai import chat as _openai_chat
329
+
330
+ _original_openai_parse = _openai_chat.OpenAIChat._parse_provider_response
331
+
332
+ def _patched_openai_parse(self, response, *args, **kwargs):
333
+ result = _original_openai_parse(self, response, *args, **kwargs)
334
+ try:
335
+ choice = response.choices[0]
336
+ _record_stop_reason(getattr(choice, "finish_reason", None))
337
+ except (AttributeError, IndexError, TypeError):
338
+ pass
339
+ return result
340
+
341
+ _openai_chat.OpenAIChat._parse_provider_response = _patched_openai_parse
342
+
343
+ if hasattr(_openai_chat.OpenAIChat, "_parse_provider_response_delta"):
344
+ _original_openai_delta = _openai_chat.OpenAIChat._parse_provider_response_delta
345
+
346
+ def _patched_openai_delta(self, response, *args, **kwargs):
347
+ result = _original_openai_delta(self, response, *args, **kwargs)
348
+ try:
349
+ choice = response.choices[0]
350
+ # Only the final chunk sets finish_reason.
351
+ _record_stop_reason(getattr(choice, "finish_reason", None))
352
+ except (AttributeError, IndexError, TypeError):
353
+ pass
354
+ return result
355
+
356
+ _openai_chat.OpenAIChat._parse_provider_response_delta = _patched_openai_delta
357
+ except (ImportError, AttributeError):
358
+ pass
@@ -15,6 +15,7 @@ import sys
15
15
 
16
16
  from rich.markdown import Markdown
17
17
  from rich.panel import Panel
18
+ from rich.text import Text
18
19
 
19
20
  # ── Re-exports for backward compatibility ─────────────────────────────
20
21
  # Tests and external code import these from aru.cli; keep them accessible.
@@ -92,7 +93,7 @@ _logging.getLogger("agno").setLevel(_logging.WARNING)
92
93
 
93
94
  from aru.agents.planner import review_plan
94
95
  from aru.config import load_config, render_command_template, render_skill_template
95
- from aru.permissions import get_skip_permissions
96
+ from aru.permissions import get_skip_permissions, set_permission_mode
96
97
  from aru.providers import (
97
98
  MODEL_ALIASES,
98
99
  list_providers,
@@ -100,6 +101,39 @@ from aru.providers import (
100
101
  )
101
102
 
102
103
 
104
+ def _toggle_yolo_mode(ctx) -> None:
105
+ """Toggle YOLO (dangerously-skip-permissions) mode from the REPL.
106
+
107
+ Turning YOLO *off* is unconditional — safety is not at risk.
108
+ Turning YOLO *on* requires an explicit y/n confirmation with a red warning panel.
109
+ """
110
+ if ctx.permission_mode == "yolo":
111
+ set_permission_mode("default")
112
+ console.print("[bold green]✔ YOLO disabled — safe mode restored.[/bold green]")
113
+ return
114
+
115
+ warning = Text.from_markup(
116
+ "[bold red]⚠ DANGEROUSLY SKIP PERMISSIONS (YOLO)[/bold red]\n\n"
117
+ "[red]All permission prompts will be bypassed for this session, including:[/red]\n"
118
+ " • Reading/writing [bold].env[/bold] files and other sensitive paths\n"
119
+ " • Arbitrary shell commands ([bold]rm -rf[/bold], package installs, network calls)\n"
120
+ " • Edits outside the working directory\n"
121
+ " • All sub-agents delegated during this session\n\n"
122
+ "[dim]Toggle off anytime with /yolo or shift+tab.[/dim]"
123
+ )
124
+ console.print(Panel(
125
+ warning,
126
+ title="[bold red]Enable YOLO mode?[/bold red]",
127
+ border_style="red",
128
+ padding=(1, 2),
129
+ ))
130
+ if ask_yes_no("Confirm enabling YOLO mode"):
131
+ set_permission_mode("yolo")
132
+ console.print("[bold red]🔥 YOLO MODE ACTIVE — all permissions bypassed.[/bold red]")
133
+ else:
134
+ console.print("[dim]Cancelled. Remaining in safe mode.[/dim]")
135
+
136
+
103
137
  # ── Main REPL ──────────────────────────────────────────────────────────
104
138
 
105
139
  async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
@@ -288,7 +322,13 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
288
322
  f' <style fg="ansigray">│</style>'
289
323
  f' <style fg="ansigray">{ctx.mcp_loaded_msg}</style>'
290
324
  )
291
- if ctx.permission_mode == "acceptEdits":
325
+ if ctx.permission_mode == "yolo":
326
+ mode_part = (
327
+ f' <style fg="ansigray">│</style>'
328
+ f' <b><style fg="ansired">🔥 YOLO — permissions bypassed</style></b>'
329
+ f' <style fg="ansigray">(/yolo to toggle)</style>'
330
+ )
331
+ elif ctx.permission_mode == "acceptEdits":
292
332
  mode_part = (
293
333
  f' <style fg="ansigray">│</style>'
294
334
  f' <b><style fg="ansigreen">⏵⏵ auto-accept edits on</style></b>'
@@ -570,6 +610,10 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
570
610
  ))
571
611
  continue
572
612
 
613
+ if user_input.lower() in ("/yolo", "/unsafe"):
614
+ _toggle_yolo_mode(ctx)
615
+ continue
616
+
573
617
  # Begin a new checkpoint turn for undo support
574
618
  _turn_counter += 1
575
619
  ctx.checkpoint_manager.begin_turn(_turn_counter)
@@ -667,7 +711,13 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
667
711
  if not skill.user_invocable:
668
712
  console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
669
713
  else:
714
+ session.active_skill = cmd_name
670
715
  prompt = render_skill_template(skill.content, cmd_args)
716
+ # Record so the skill body survives compaction — mirror of
717
+ # claude-code's addInvokedSkill. Store the rendered content
718
+ # (post-argument substitution) so post-compact restoration
719
+ # matches what the model initially read.
720
+ session.record_invoked_skill(cmd_name, prompt, skill.source_path)
671
721
  console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
672
722
 
673
723
  agent = await create_general_agent(session, config, env_context=_build_env_ctx())
@@ -791,7 +841,7 @@ async def run_oneshot(prompt: str, print_only: bool = False, skip_permissions: b
791
841
 
792
842
  agent = Agent(
793
843
  name="Aru",
794
- model=create_model(session.model_ref, max_tokens=8192),
844
+ model=create_model(session.model_ref), # None → provider cap
795
845
  tools=[],
796
846
  instructions=build_instructions("general", extra_instructions),
797
847
  markdown=True,
@@ -24,6 +24,7 @@ SLASH_COMMANDS = [
24
24
  ("/plugin", "Manage cached plugins (install/list/remove/update)", "/plugin <subcommand>"),
25
25
  ("/undo", "Undo last turn — restore files and/or conversation", "/undo"),
26
26
  ("/cost", "Show detailed token usage and cost", "/cost"),
27
+ ("/yolo", "Toggle DANGEROUSLY skip all permissions (YOLO mode)", "/yolo"),
27
28
  ("/quit", "Exit aru", "/quit"),
28
29
  ]
29
30
 
@@ -39,9 +39,16 @@ class Skill:
39
39
  content: str
40
40
  source_path: str
41
41
  allowed_tools: list[str] = field(default_factory=list)
42
+ disallowed_tools: list[str] = field(default_factory=list)
42
43
  disable_model_invocation: bool = False
43
44
  user_invocable: bool = True
44
45
  argument_hint: str = ""
46
+ # Short (~1-2 sentences) reminder used by the core to reinforce the
47
+ # skill's critical gates during compaction. Not re-injected per turn —
48
+ # it only appears wrapped in `<system-reminder>` when a compaction
49
+ # would otherwise drop the skill body from history. When absent, the
50
+ # core derives a default from `description`.
51
+ reminder: str = ""
45
52
 
46
53
 
47
54
  @dataclass
@@ -266,6 +273,17 @@ def _parse_skill_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
266
273
  else:
267
274
  result["allowed_tools"] = []
268
275
 
276
+ disallowed_raw = metadata.get("disallowed-tools", "")
277
+ if isinstance(disallowed_raw, list):
278
+ result["disallowed_tools"] = [str(t).strip() for t in disallowed_raw]
279
+ elif disallowed_raw:
280
+ result["disallowed_tools"] = [t.strip() for t in str(disallowed_raw).split(",") if t.strip()]
281
+ else:
282
+ result["disallowed_tools"] = []
283
+
284
+ reminder_raw = metadata.get("reminder", "")
285
+ result["reminder"] = str(reminder_raw).strip() if reminder_raw else ""
286
+
269
287
  return result
270
288
 
271
289
 
@@ -382,9 +400,11 @@ def _discover_skills(search_roots: list[Path]) -> dict[str, Skill]:
382
400
  content=body,
383
401
  source_path=str(skill_file),
384
402
  allowed_tools=meta["allowed_tools"],
403
+ disallowed_tools=meta["disallowed_tools"],
385
404
  disable_model_invocation=meta["disable_model_invocation"],
386
405
  user_invocable=meta["user_invocable"],
387
406
  argument_hint=meta["argument_hint"],
407
+ reminder=meta["reminder"],
388
408
  )
389
409
 
390
410
  return skills
@@ -52,6 +52,20 @@ TRUNCATE_SAVE_DIR = ".aru/truncated"
52
52
  # opencode's approach where the split point mirrors the prune window.
53
53
  COMPACT_RECENT_CHARS = 160_000
54
54
 
55
+ # Per-skill head-keep cap when preserving invoked SKILL.md bodies through a
56
+ # compaction. 20K chars ≈ 5K tokens (claude-code POST_COMPACT_MAX_TOKENS_PER_SKILL).
57
+ # The head is kept because SKILL.md files put gates and checklists at the top.
58
+ POST_COMPACT_MAX_CHARS_PER_SKILL = 20_000
59
+ # Total budget across all invoked skills in the preservation block.
60
+ # 100K chars ≈ 25K tokens (claude-code POST_COMPACT_SKILLS_TOKEN_BUDGET).
61
+ # Skills are sorted most-recent-first; older ones are dropped when the budget
62
+ # fills, so stale mid-session invocations don't crowd out the skill you're
63
+ # currently following.
64
+ POST_COMPACT_SKILLS_BUDGET_CHARS = 100_000
65
+ # Marker appended to a per-skill body when it was truncated — tells the model
66
+ # it can re-read the full SKILL.md at the source path if needed.
67
+ SKILL_TRUNCATION_MARKER = "\n\n[... SKILL.md truncated for budget. Re-read the file if details below the cutoff are needed.]"
68
+
55
69
  # Compaction: trigger when per-call input tokens approach real overflow.
56
70
  # Matches opencode's philosophy: only fire near the model's actual context
57
71
  # limit, not routinely. Routine context reduction is handled by prune_history
@@ -607,8 +621,88 @@ def build_compaction_prompt(
607
621
 
608
622
 
609
623
 
624
+ def _build_skills_preservation_item(invoked_skills: dict | None) -> dict | None:
625
+ """Build a user message preserving invoked SKILL.md bodies through compaction.
626
+
627
+ Mirrors claude-code's `createSkillAttachmentIfNeeded` (compact.ts:1494) +
628
+ the `'invoked_skills'` branch of the attachment renderer (messages.ts:3644).
629
+ Skills are sorted most-recent-first; each body is head-capped at
630
+ `POST_COMPACT_MAX_CHARS_PER_SKILL`; once the cumulative total crosses
631
+ `POST_COMPACT_SKILLS_BUDGET_CHARS` subsequent (older) skills are dropped.
632
+
633
+ The rendered item is a **user meta-message** whose content is wrapped in
634
+ `<system-reminder>` tags — this signals to the model that the block is a
635
+ system-level reminder (elevated attention) rather than fresh user intent.
636
+ Mirrors `wrapMessagesInSystemReminder` from claude-code.
637
+
638
+ Returns None when there is nothing to preserve (no invoked skills, or all
639
+ bodies empty), so the caller can skip injection cleanly.
640
+
641
+ Args:
642
+ invoked_skills: session.invoked_skills — dict keyed by skill name with
643
+ `.content`, `.source_path`, `.invoked_at` attributes (InvokedSkill
644
+ instances; dict-shaped values are tolerated for from-JSON callers).
645
+ """
646
+ if not invoked_skills:
647
+ return None
648
+
649
+ def _attr(obj, name, default=""):
650
+ if isinstance(obj, dict):
651
+ return obj.get(name, default)
652
+ return getattr(obj, name, default)
653
+
654
+ # Sort most-recent-first so budget pressure drops the oldest skills first.
655
+ entries = sorted(
656
+ invoked_skills.values(),
657
+ key=lambda s: float(_attr(s, "invoked_at", 0.0)) or 0.0,
658
+ reverse=True,
659
+ )
660
+
661
+ rendered_blocks: list[str] = []
662
+ used_chars = 0
663
+ for skill in entries:
664
+ body = str(_attr(skill, "content", "") or "")
665
+ if not body.strip():
666
+ continue
667
+ # Head-keep cap per skill
668
+ if len(body) > POST_COMPACT_MAX_CHARS_PER_SKILL:
669
+ body = body[: POST_COMPACT_MAX_CHARS_PER_SKILL - len(SKILL_TRUNCATION_MARKER)] + SKILL_TRUNCATION_MARKER
670
+ if used_chars + len(body) > POST_COMPACT_SKILLS_BUDGET_CHARS:
671
+ break
672
+ used_chars += len(body)
673
+
674
+ name = str(_attr(skill, "name", "") or "?")
675
+ path = str(_attr(skill, "source_path", "") or "")
676
+ header = f"### Skill: /{name}"
677
+ if path:
678
+ header += f"\nPath: {path}"
679
+ rendered_blocks.append(f"{header}\n\n{body}")
680
+
681
+ if not rendered_blocks:
682
+ return None
683
+
684
+ skills_text = "\n\n---\n\n".join(rendered_blocks)
685
+ preserved = (
686
+ "<system-reminder>\n"
687
+ "The following skills were invoked earlier in this session and their full SKILL.md bodies "
688
+ "were summarized out during compaction. Continue to follow these guidelines exactly — all "
689
+ "gates, checklists, and forbidden shortcuts still apply.\n\n"
690
+ f"{skills_text}\n"
691
+ "</system-reminder>"
692
+ )
693
+
694
+ from aru.history_blocks import text_block
695
+ return {
696
+ "role": "user",
697
+ "content": [text_block(preserved)],
698
+ }
699
+
700
+
610
701
  def apply_compaction(
611
- history: list[dict], summary: str, model_id: str = "default"
702
+ history: list[dict],
703
+ summary: str,
704
+ model_id: str = "default",
705
+ invoked_skills: dict | None = None,
612
706
  ) -> list[dict]:
613
707
  """Replace OLD messages with a summary, keep RECENT messages intact.
614
708
 
@@ -616,12 +710,19 @@ def apply_compaction(
616
710
  role alternation stays natural:
617
711
  [user: "Please summarize..."]
618
712
  [assistant: "<summary>", summary=True]
713
+ [user: "<system-reminder>...invoked skills...</system-reminder>"] ← when invoked_skills
619
714
  + recent messages as-is
620
715
 
621
716
  The assistant summary is marked with `summary: True` as a checkpoint.
622
717
  `prune_history` walks backward and stops at this marker, so content
623
718
  already consolidated into the summary is never re-processed. Mirrors
624
719
  opencode's `msg.info.summary` flag (see message-v2.ts:914).
720
+
721
+ `invoked_skills` (claude-code parity — `STATE.invokedSkills`) is the
722
+ session's record of every SKILL.md invoked so far. When provided, a
723
+ skills-preservation `<system-reminder>` is injected right after the
724
+ summary so the model continues following skill gates even though the
725
+ original SKILL.md body was folded into the summary.
625
726
  """
626
727
  from aru.history_blocks import text_block, coerce_history_item
627
728
  _, recent = _split_history(history, model_id)
@@ -637,6 +738,11 @@ def apply_compaction(
637
738
  "summary": True,
638
739
  },
639
740
  ]
741
+
742
+ skills_item = _build_skills_preservation_item(invoked_skills)
743
+ if skills_item is not None:
744
+ compacted.append(skills_item)
745
+
640
746
  compacted.extend(coerce_history_item(m) for m in recent)
641
747
 
642
748
  return compacted
@@ -647,6 +753,7 @@ async def compact_conversation(
647
753
  model_ref: str,
648
754
  plan_task: str | None = None,
649
755
  model_id: str = "default",
756
+ invoked_skills: dict | None = None,
650
757
  ) -> list[dict[str, str]]:
651
758
  """Run the compaction agent to summarize and replace history.
652
759
 
@@ -663,6 +770,13 @@ async def compact_conversation(
663
770
  step.
664
771
 
665
772
  Falls back to a mechanical summary if the agent call fails.
773
+
774
+ `invoked_skills` is `session.invoked_skills` — passed through to
775
+ `apply_compaction` so SKILL.md bodies survive the summary via a
776
+ `<system-reminder>` preservation block. When None (caller didn't pass
777
+ it, e.g. tests, subagent flows), `get_ctx().session.invoked_skills` is
778
+ consulted as a best-effort fallback. See claude-code
779
+ `createSkillAttachmentIfNeeded` for the pattern.
666
780
  """
667
781
  from aru.providers import create_model
668
782
 
@@ -677,6 +791,19 @@ async def compact_conversation(
677
791
  except (LookupError, AttributeError, ImportError):
678
792
  pass # no plugin manager available — proceed without hooks
679
793
 
794
+ # Best-effort: if caller didn't pass invoked_skills but there's a session
795
+ # in the current runtime context, use its record. Keeps legacy call sites
796
+ # (subagent compaction, tests) covered without forcing every caller to
797
+ # plumb the session through.
798
+ if invoked_skills is None:
799
+ try:
800
+ from aru.runtime import get_ctx
801
+ session = getattr(get_ctx(), "session", None)
802
+ if session is not None:
803
+ invoked_skills = getattr(session, "invoked_skills", None)
804
+ except (LookupError, AttributeError, ImportError):
805
+ pass
806
+
680
807
  prompt = build_compaction_prompt(history, plan_task, model_id=model_id)
681
808
 
682
809
  try:
@@ -705,12 +832,12 @@ async def compact_conversation(
705
832
  # Fallback: simple mechanical summary
706
833
  summary = _fallback_summary(history, plan_task)
707
834
 
708
- return apply_compaction(history, summary, model_id=model_id)
835
+ return apply_compaction(history, summary, model_id=model_id, invoked_skills=invoked_skills)
709
836
 
710
837
  except Exception:
711
838
  # Fallback if agent fails
712
839
  summary = _fallback_summary(history, plan_task)
713
- return apply_compaction(history, summary, model_id=model_id)
840
+ return apply_compaction(history, summary, model_id=model_id, invoked_skills=invoked_skills)
714
841
 
715
842
 
716
843
  def _fallback_summary(history: list[dict], plan_task: str | None = None) -> str:
@@ -119,7 +119,7 @@ def _render_home(session, skip_permissions: bool) -> None:
119
119
  console.print(cmds)
120
120
  console.print()
121
121
 
122
- mode_label = "[red]skip permissions[/red]" if skip_permissions else "[green]safe mode[/green]"
122
+ mode_label = "[bold red]🔥 YOLO mode — permissions bypassed[/bold red]" if skip_permissions else "[green]safe mode[/green]"
123
123
  console.print(
124
124
  Text.from_markup(
125
125
  f" [dim]model:[/dim] [bold]{session.model_display}[/bold] [dim]({session.model_id})[/dim]"