aru-code 0.30.0__tar.gz → 0.31.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aru_code-0.30.0/aru_code.egg-info → aru_code-0.31.0}/PKG-INFO +1 -1
- aru_code-0.31.0/aru/__init__.py +1 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/agent_factory.py +29 -69
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/cli.py +5 -2
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/context.py +74 -5
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/permissions.py +55 -10
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/hooks.py +1 -1
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/runner.py +4 -1
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/runtime.py +19 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/session.py +119 -25
- aru_code-0.31.0/aru/tool_policy.py +196 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/skill.py +10 -4
- {aru_code-0.30.0 → aru_code-0.31.0/aru_code.egg-info}/PKG-INFO +1 -1
- {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/SOURCES.txt +4 -1
- {aru_code-0.30.0 → aru_code-0.31.0}/pyproject.toml +1 -1
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_context.py +49 -2
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_invoked_skills.py +60 -8
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_permissions.py +52 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plugins.py +48 -0
- aru_code-0.31.0/tests/test_runtime.py +40 -0
- aru_code-0.31.0/tests/test_skill_disallowed_tools.py +150 -0
- aru_code-0.31.0/tests/test_tool_policy.py +146 -0
- aru_code-0.30.0/aru/__init__.py +0 -1
- aru_code-0.30.0/tests/test_skill_disallowed_tools.py +0 -78
- {aru_code-0.30.0 → aru_code-0.31.0}/LICENSE +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/README.md +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/__init__.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/base.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/catalog.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/planner.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/cache_patch.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/checkpoints.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/commands.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/completers.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/config.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/display.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/history_blocks.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugin_cache.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/__init__.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/custom_tools.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/manager.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/tool_api.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/providers.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/select.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/__init__.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/_diff.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/_shared.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/ast_tools.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/codebase.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/delegate.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/file_ops.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/gitignore.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/mcp_client.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/plan_mode.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/ranker.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/registry.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/search.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/shell.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/tasklist.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/web.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/dependency_links.txt +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/entry_points.txt +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/requires.txt +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/top_level.txt +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/setup.cfg +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_agents_base.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_agents_md_coverage.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cache_patch_metrics.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cache_patch_stop_reason.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_catalog.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_checkpoints.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_advanced.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_base.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_completers.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_new.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_run_cli.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_session.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_shell.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_codebase.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_confabulation_regression.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_config.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_gitignore.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_guardrails_scenarios.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_invoke_skill.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_main.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_mcp_client.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plan_mode_refactor.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plugin_cache.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_providers.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_ranker.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_runner_recovery.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_select.py +0 -0
- {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_tasklist.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.31.0"
|
|
@@ -29,32 +29,26 @@ async def _fire_hook(event_name: str, data: dict) -> dict:
|
|
|
29
29
|
return data
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
#
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
# get user approval before running any of these.
|
|
37
|
-
_PLAN_MODE_BLOCKED_TOOLS: frozenset[str] = frozenset({
|
|
38
|
-
"edit_file",
|
|
39
|
-
"edit_files",
|
|
40
|
-
"write_file",
|
|
41
|
-
"write_files",
|
|
42
|
-
"bash",
|
|
43
|
-
"delegate_task",
|
|
44
|
-
})
|
|
32
|
+
# Backward-compat re-export. The canonical list now lives in
|
|
33
|
+
# aru.tool_policy.PLAN_MODE_BLOCKED_TOOLS; external callers (tests,
|
|
34
|
+
# docs) that import it from here keep working.
|
|
35
|
+
from aru.tool_policy import PLAN_MODE_BLOCKED_TOOLS as _PLAN_MODE_BLOCKED_TOOLS
|
|
45
36
|
|
|
46
37
|
|
|
47
38
|
def _wrap_tools_with_hooks(tools: list) -> list:
|
|
48
|
-
"""Wrap tool functions
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
39
|
+
"""Wrap tool functions with a single tool-policy gate and plugin hooks.
|
|
40
|
+
|
|
41
|
+
The policy gate (plan mode + active-skill disallowed_tools) is
|
|
42
|
+
evaluated by `aru.tool_policy.evaluate_tool_policy` — a single
|
|
43
|
+
decision function shared with `aru.permissions.resolve_permission`,
|
|
44
|
+
so both the wrapper and per-tool permission checks see the same
|
|
45
|
+
answer. When a tool is denied by multiple rules at once, the policy
|
|
46
|
+
layer returns one combined BLOCKED message rather than two
|
|
47
|
+
sequential contradictory ones (this is the scenario-1 fix of the
|
|
48
|
+
combinatorial gate audit).
|
|
49
|
+
|
|
50
|
+
Plugin hooks run AFTER the policy gate so a plugin's
|
|
51
|
+
tool.execute.before hook cannot bypass plan-mode / skill rules.
|
|
58
52
|
"""
|
|
59
53
|
|
|
60
54
|
def _wrap_one(fn):
|
|
@@ -64,49 +58,13 @@ def _wrap_tools_with_hooks(tools: list) -> list:
|
|
|
64
58
|
@functools.wraps(fn)
|
|
65
59
|
async def wrapper(**kwargs):
|
|
66
60
|
tool_name = fn.__name__
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
session = None
|
|
75
|
-
if session is not None and getattr(session, "plan_mode", False):
|
|
76
|
-
return (
|
|
77
|
-
f"BLOCKED: plan mode is active. Mutating tools "
|
|
78
|
-
f"(edit/write/bash/delegate_task) are blocked until the "
|
|
79
|
-
f"user approves the plan. Finish writing the plan as "
|
|
80
|
-
f"your next assistant message, then call "
|
|
81
|
-
f"exit_plan_mode(plan=<full plan text>) to request "
|
|
82
|
-
f"approval. Do NOT retry {tool_name}."
|
|
83
|
-
)
|
|
84
|
-
# Active-skill disallowed-tools gate — honors the `disallowed-tools`
|
|
85
|
-
# frontmatter field of the currently active skill. Mirrors the
|
|
86
|
-
# plan-mode gate pattern above; runs before plugin hooks so a skill
|
|
87
|
-
# can hard-block a tool regardless of permission/plugin state.
|
|
88
|
-
try:
|
|
89
|
-
from aru.runtime import get_ctx
|
|
90
|
-
ctx = get_ctx()
|
|
91
|
-
session = getattr(ctx, "session", None)
|
|
92
|
-
config = getattr(ctx, "config", None)
|
|
93
|
-
except (LookupError, AttributeError):
|
|
94
|
-
session = None
|
|
95
|
-
config = None
|
|
96
|
-
if session is not None and config is not None:
|
|
97
|
-
active = getattr(session, "active_skill", None)
|
|
98
|
-
skills = getattr(config, "skills", None) or {}
|
|
99
|
-
active_skill_obj = skills.get(active) if active else None
|
|
100
|
-
disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
|
|
101
|
-
if tool_name in disallowed:
|
|
102
|
-
return (
|
|
103
|
-
f"BLOCKED: tool `{tool_name}` is disallowed by the "
|
|
104
|
-
f"currently active skill `{active}`. Read the skill's "
|
|
105
|
-
f"SKILL.md for the prescribed path. Do NOT retry "
|
|
106
|
-
f"`{tool_name}`; use the alternative the skill specifies "
|
|
107
|
-
f"(commonly: write the output to a `.md` file via "
|
|
108
|
-
f"`write_file` instead of using in-session state)."
|
|
109
|
-
)
|
|
61
|
+
# Unified policy gate — one function, one decision, one
|
|
62
|
+
# message on denial (combines plan-mode + skill rules when
|
|
63
|
+
# both apply).
|
|
64
|
+
from aru.tool_policy import evaluate_tool_policy
|
|
65
|
+
decision = evaluate_tool_policy(tool_name)
|
|
66
|
+
if not decision.allowed:
|
|
67
|
+
return decision.message
|
|
110
68
|
# Before hook — plugins can mutate args or raise PermissionError to block
|
|
111
69
|
try:
|
|
112
70
|
before_data = await _fire_hook("tool.execute.before", {
|
|
@@ -151,14 +109,16 @@ async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
|
|
|
151
109
|
})
|
|
152
110
|
instructions = data.get("system_prompt", instructions)
|
|
153
111
|
|
|
154
|
-
# chat.params — plugins can modify LLM parameters
|
|
112
|
+
# chat.params — plugins can modify LLM parameters. max_tokens is
|
|
113
|
+
# deliberately NOT exposed: it is coupled with the recovery loop in
|
|
114
|
+
# runner.py and mutating it from a plugin can break mid-thought
|
|
115
|
+
# recovery. Plugins that need to bound output should do so via model
|
|
116
|
+
# selection or temperature, not raw token limits.
|
|
155
117
|
data = await _fire_hook("chat.params", {
|
|
156
118
|
"model": model_ref,
|
|
157
|
-
"max_tokens": max_tokens,
|
|
158
119
|
"temperature": None, # let plugin set if desired
|
|
159
120
|
})
|
|
160
121
|
model_ref = data.get("model", model_ref)
|
|
161
|
-
max_tokens = data.get("max_tokens", max_tokens)
|
|
162
122
|
|
|
163
123
|
return instructions, model_ref, max_tokens
|
|
164
124
|
|
|
@@ -711,13 +711,16 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
|
|
|
711
711
|
if not skill.user_invocable:
|
|
712
712
|
console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
|
|
713
713
|
else:
|
|
714
|
-
|
|
714
|
+
# Slash-invoked skills always run under the primary agent
|
|
715
|
+
# scope (agent_id=None). Subagents reach skills via the
|
|
716
|
+
# invoke_skill tool, which keys by ctx.agent_id instead.
|
|
717
|
+
session.set_active_skill(None, cmd_name)
|
|
715
718
|
prompt = render_skill_template(skill.content, cmd_args)
|
|
716
719
|
# Record so the skill body survives compaction — mirror of
|
|
717
720
|
# claude-code's addInvokedSkill. Store the rendered content
|
|
718
721
|
# (post-argument substitution) so post-compact restoration
|
|
719
722
|
# matches what the model initially read.
|
|
720
|
-
session.record_invoked_skill(cmd_name, prompt, skill.source_path)
|
|
723
|
+
session.record_invoked_skill(cmd_name, prompt, skill.source_path, agent_id=None)
|
|
721
724
|
console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
|
|
722
725
|
|
|
723
726
|
agent = await create_general_agent(session, config, env_context=_build_env_ctx())
|
|
@@ -525,6 +525,58 @@ def would_prune(history: list[dict], model_id: str = "default") -> bool:
|
|
|
525
525
|
return total_tool_chars >= protect_chars + PRUNE_MINIMUM_CHARS
|
|
526
526
|
|
|
527
527
|
|
|
528
|
+
def _advance_split_past_tool_pairs(history: list[dict], split_idx: int) -> int:
|
|
529
|
+
"""Move split_idx backward until no tool_result in recent is orphaned.
|
|
530
|
+
|
|
531
|
+
A tool_result block must travel with its matching tool_use block in
|
|
532
|
+
the same API request — Anthropic rejects a tool_result whose
|
|
533
|
+
tool_use_id is not declared by any tool_use in the conversation. If
|
|
534
|
+
the initial budget-based split falls between an assistant turn
|
|
535
|
+
(carrying tool_use) and the subsequent user turn (carrying
|
|
536
|
+
tool_result), the pair breaks: tool_use goes into `old` (and is
|
|
537
|
+
discarded when replaced by the summary), leaving tool_result
|
|
538
|
+
orphaned in `recent`.
|
|
539
|
+
|
|
540
|
+
This helper walks `split_idx` backward one index at a time until the
|
|
541
|
+
slice `history[split_idx:]` contains the matching tool_use for every
|
|
542
|
+
tool_result it holds. Matches opencode's invariant that pair
|
|
543
|
+
structure is never cut (compaction.ts does a mark-and-replace on the
|
|
544
|
+
tool output, but never removes either block).
|
|
545
|
+
|
|
546
|
+
O(n²) in the worst case, but n is bounded by history length and the
|
|
547
|
+
inner scan is only over the tail; on real sessions the loop ends in
|
|
548
|
+
1-2 iterations or not at all.
|
|
549
|
+
"""
|
|
550
|
+
from aru.history_blocks import is_tool_result, tool_use_ids_in_item
|
|
551
|
+
|
|
552
|
+
while split_idx > 0:
|
|
553
|
+
# All tool_use ids present in the recent slice
|
|
554
|
+
declared: set[str] = set()
|
|
555
|
+
for msg in history[split_idx:]:
|
|
556
|
+
if msg.get("role") == "assistant":
|
|
557
|
+
declared.update(tool_use_ids_in_item(msg))
|
|
558
|
+
|
|
559
|
+
orphaned = False
|
|
560
|
+
for msg in history[split_idx:]:
|
|
561
|
+
if msg.get("role") != "user":
|
|
562
|
+
continue
|
|
563
|
+
for block in (msg.get("content") or []):
|
|
564
|
+
if not is_tool_result(block):
|
|
565
|
+
continue
|
|
566
|
+
tid = block.get("tool_use_id")
|
|
567
|
+
if tid and tid not in declared:
|
|
568
|
+
orphaned = True
|
|
569
|
+
break
|
|
570
|
+
if orphaned:
|
|
571
|
+
break
|
|
572
|
+
|
|
573
|
+
if not orphaned:
|
|
574
|
+
return split_idx
|
|
575
|
+
split_idx -= 1
|
|
576
|
+
|
|
577
|
+
return split_idx
|
|
578
|
+
|
|
579
|
+
|
|
528
580
|
def _split_history(history: list[dict], model_id: str = "default") -> tuple[list[dict], list[dict]]:
|
|
529
581
|
"""Split history into old (to summarize) and recent (to keep intact).
|
|
530
582
|
|
|
@@ -541,6 +593,12 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
|
|
|
541
593
|
of the summary, but keeping it in recent too means the agent can
|
|
542
594
|
quote it verbatim afterward.
|
|
543
595
|
|
|
596
|
+
The split point is then walked backward (via
|
|
597
|
+
`_advance_split_past_tool_pairs`) to guarantee every tool_result in
|
|
598
|
+
`recent` has its matching tool_use in `recent`. Without that
|
|
599
|
+
invariant, a naive budget split can orphan a tool_result whose
|
|
600
|
+
tool_use landed in `old` — the API rejects such requests.
|
|
601
|
+
|
|
544
602
|
The `model_id` parameter is retained for signature compatibility;
|
|
545
603
|
the recent budget is a flat value not scaled by model context.
|
|
546
604
|
"""
|
|
@@ -556,6 +614,10 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
|
|
|
556
614
|
else:
|
|
557
615
|
break
|
|
558
616
|
|
|
617
|
+
# Pair-safety: never let a tool_result in `recent` reference a tool_use
|
|
618
|
+
# that was discarded into `old`. Walks split_idx backward as needed.
|
|
619
|
+
split_idx = _advance_split_past_tool_pairs(history, split_idx)
|
|
620
|
+
|
|
559
621
|
# Defensive: force the first user turn into `recent` even if the
|
|
560
622
|
# protect budget would have sent it to `old`. The original ask is
|
|
561
623
|
# the session anchor and must stay literal.
|
|
@@ -792,15 +854,22 @@ async def compact_conversation(
|
|
|
792
854
|
pass # no plugin manager available — proceed without hooks
|
|
793
855
|
|
|
794
856
|
# Best-effort: if caller didn't pass invoked_skills but there's a session
|
|
795
|
-
# in the current runtime context,
|
|
796
|
-
# (subagent compaction, tests) covered without forcing
|
|
797
|
-
# plumb the session through.
|
|
857
|
+
# in the current runtime context, pull just this agent's slice. Keeps
|
|
858
|
+
# legacy call sites (subagent compaction, tests) covered without forcing
|
|
859
|
+
# every caller to plumb the session through. Filtering by agent_id means
|
|
860
|
+
# a subagent's compaction doesn't replay primary-scope skills and vice
|
|
861
|
+
# versa.
|
|
798
862
|
if invoked_skills is None:
|
|
799
863
|
try:
|
|
800
864
|
from aru.runtime import get_ctx
|
|
801
|
-
|
|
865
|
+
ctx = get_ctx()
|
|
866
|
+
session = getattr(ctx, "session", None)
|
|
802
867
|
if session is not None:
|
|
803
|
-
|
|
868
|
+
getter = getattr(session, "get_invoked_skills_for_agent", None)
|
|
869
|
+
if callable(getter):
|
|
870
|
+
invoked_skills = getter(getattr(ctx, "agent_id", None))
|
|
871
|
+
else:
|
|
872
|
+
invoked_skills = getattr(session, "invoked_skills", None)
|
|
804
873
|
except (LookupError, AttributeError, ImportError):
|
|
805
874
|
pass
|
|
806
875
|
|
|
@@ -416,6 +416,21 @@ def _most_restrictive(
|
|
|
416
416
|
return worst
|
|
417
417
|
|
|
418
418
|
|
|
419
|
+
# Mapping from permission category (what resolve_permission takes) to the
|
|
420
|
+
# tool_name used by the unified tool-policy gate (what evaluate_tool_policy
|
|
421
|
+
# takes). The permission system asks about *categories* (edit, write, bash),
|
|
422
|
+
# while the tool-policy layer reasons about tool *names* (edit_file, bash,
|
|
423
|
+
# ...). This mapping lets resolve_permission consult the tool-policy layer
|
|
424
|
+
# consistently so that, e.g., a bash check in plan mode denies at the
|
|
425
|
+
# permission level too — not only at the wrapper level.
|
|
426
|
+
_CATEGORY_TO_REPRESENTATIVE_TOOL: dict[str, str] = {
|
|
427
|
+
"edit": "edit_file",
|
|
428
|
+
"write": "write_file",
|
|
429
|
+
"bash": "bash",
|
|
430
|
+
"delegate_task": "delegate_task",
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
|
|
419
434
|
def resolve_permission(
|
|
420
435
|
category: str, subject: str = ""
|
|
421
436
|
) -> tuple[PermissionAction, str]:
|
|
@@ -425,15 +440,32 @@ def resolve_permission(
|
|
|
425
440
|
|
|
426
441
|
Algorithm:
|
|
427
442
|
1. If skip_permissions -> ("allow", "*")
|
|
428
|
-
2.
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
443
|
+
2. Consult unified tool-policy gate (plan_mode / skill disallowed).
|
|
444
|
+
If policy denies this category's representative tool, return
|
|
445
|
+
("deny", "tool-policy"). This is how claude-code / opencode fold
|
|
446
|
+
mode-based gates into the same decision function that handles
|
|
447
|
+
user rules, instead of stacking independent short-circuits.
|
|
448
|
+
3. Check session_allowed for matching (category, pattern)
|
|
449
|
+
-> ("allow", pattern)
|
|
450
|
+
4. For bash: handle compound commands, then walk rules
|
|
451
|
+
5. For others: walk rules (defaults + user config), last-match-wins
|
|
452
|
+
6. Fallback: category default, then global default
|
|
432
453
|
"""
|
|
433
454
|
ctx = get_ctx()
|
|
434
455
|
if ctx.skip_permissions:
|
|
435
456
|
return ("allow", "*")
|
|
436
457
|
|
|
458
|
+
# Unified tool-policy gate — shared with the agent_factory wrapper so
|
|
459
|
+
# both paths agree. A tool denied by plan_mode / skill rules is denied
|
|
460
|
+
# here too; the wrapper renders the combined message for the model,
|
|
461
|
+
# and this call returns a plain "deny" for the user-prompt codepath.
|
|
462
|
+
rep_tool = _CATEGORY_TO_REPRESENTATIVE_TOOL.get(category)
|
|
463
|
+
if rep_tool:
|
|
464
|
+
from aru.tool_policy import evaluate_tool_policy
|
|
465
|
+
decision = evaluate_tool_policy(rep_tool)
|
|
466
|
+
if not decision.allowed:
|
|
467
|
+
return ("deny", "tool-policy")
|
|
468
|
+
|
|
437
469
|
# "Accept edits" mode auto-allows edit/write categories for the session.
|
|
438
470
|
if ctx.permission_mode == "acceptEdits" and category in ("edit", "write"):
|
|
439
471
|
return ("allow", "*")
|
|
@@ -469,8 +501,15 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
|
|
|
469
501
|
|
|
470
502
|
Supports both sync and async handlers. Returns True/False if a handler
|
|
471
503
|
sets event.data["allow"], or None if no handler overrode the decision.
|
|
504
|
+
|
|
505
|
+
Async handlers dispatched in a worker thread carry a copied
|
|
506
|
+
contextvars.Context so plugin code can still call `get_ctx()` and
|
|
507
|
+
other contextvar-backed helpers — without the copy, the new
|
|
508
|
+
`asyncio.run` loop would see an empty context and break handlers
|
|
509
|
+
that rely on the runtime.
|
|
472
510
|
"""
|
|
473
511
|
import asyncio
|
|
512
|
+
import contextvars
|
|
474
513
|
from aru.plugins.hooks import HookEvent
|
|
475
514
|
|
|
476
515
|
evt = HookEvent(hook="permission.ask", data={"category": category, "subject": subject})
|
|
@@ -480,15 +519,21 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
|
|
|
480
519
|
try:
|
|
481
520
|
if asyncio.iscoroutinefunction(handler):
|
|
482
521
|
# Async handler — run via the event loop
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
522
|
+
try:
|
|
523
|
+
loop = asyncio.get_running_loop()
|
|
524
|
+
except RuntimeError:
|
|
525
|
+
loop = None
|
|
526
|
+
if loop is not None:
|
|
527
|
+
# A loop is running in this thread; we cannot call
|
|
528
|
+
# run_until_complete. Dispatch to a worker thread
|
|
529
|
+
# with the current contextvars snapshot so the
|
|
530
|
+
# handler sees the same RuntimeContext.
|
|
487
531
|
import concurrent.futures
|
|
532
|
+
snapshot = contextvars.copy_context()
|
|
488
533
|
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
489
|
-
pool.submit(asyncio.run, handler(evt)).result(timeout=5)
|
|
534
|
+
pool.submit(snapshot.run, asyncio.run, handler(evt)).result(timeout=5)
|
|
490
535
|
else:
|
|
491
|
-
|
|
536
|
+
asyncio.run(handler(evt))
|
|
492
537
|
else:
|
|
493
538
|
handler(evt)
|
|
494
539
|
except Exception:
|
|
@@ -40,7 +40,7 @@ VALID_HOOKS = frozenset({
|
|
|
40
40
|
|
|
41
41
|
# Chat lifecycle
|
|
42
42
|
"chat.message", # Before user message is sent to LLM (can modify)
|
|
43
|
-
"chat.params", # Before LLM call (can modify
|
|
43
|
+
"chat.params", # Before LLM call (can modify model, temperature). NOT max_tokens — coupled with recovery loop.
|
|
44
44
|
"chat.system.transform", # Before LLM call (can modify system prompt)
|
|
45
45
|
"chat.messages.transform", # Before LLM call (can modify message history)
|
|
46
46
|
|
|
@@ -755,10 +755,13 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
|
|
|
755
755
|
with Status("[dim]Compacting context...[/dim]", console=console, spinner="dots"):
|
|
756
756
|
try:
|
|
757
757
|
session.history = prune_history(session.history, model_id=session.model_id)
|
|
758
|
+
# Only the primary agent's invoked skills get replayed
|
|
759
|
+
# after compaction here — subagents have their own
|
|
760
|
+
# compaction flow and carry their own agent_id.
|
|
758
761
|
session.history = await compact_conversation(
|
|
759
762
|
session.history, session.model_ref, session.plan_task,
|
|
760
763
|
model_id=session.model_id,
|
|
761
|
-
invoked_skills=session.
|
|
764
|
+
invoked_skills=session.get_invoked_skills_for_agent(None),
|
|
762
765
|
)
|
|
763
766
|
console.print("[dim]Context compacted to save tokens.[/dim]")
|
|
764
767
|
except Exception:
|
|
@@ -22,6 +22,7 @@ from __future__ import annotations
|
|
|
22
22
|
import contextvars
|
|
23
23
|
import copy
|
|
24
24
|
import threading
|
|
25
|
+
import uuid
|
|
25
26
|
from dataclasses import dataclass, field
|
|
26
27
|
from typing import Any, Callable
|
|
27
28
|
|
|
@@ -104,6 +105,14 @@ class RuntimeContext:
|
|
|
104
105
|
# -- Custom agents --
|
|
105
106
|
custom_agent_defs: dict = field(default_factory=dict)
|
|
106
107
|
|
|
108
|
+
# -- Agent scope --
|
|
109
|
+
# Stable identifier for the current agent's execution scope. None means
|
|
110
|
+
# "primary agent" (the top-level conversation). Subagents forked via
|
|
111
|
+
# fork_ctx() receive a unique identifier here, used to key per-scope
|
|
112
|
+
# state such as active skills (so a subagent does not inherit the
|
|
113
|
+
# parent's skill-active state).
|
|
114
|
+
agent_id: str | None = None
|
|
115
|
+
|
|
107
116
|
# -- Permissions --
|
|
108
117
|
perm_config: Any = field(default_factory=_default_perm_config)
|
|
109
118
|
session_allowed: set[tuple[str, str]] = field(default_factory=set)
|
|
@@ -165,6 +174,11 @@ def fork_ctx() -> RuntimeContext:
|
|
|
165
174
|
Permission state is deep-copied to prevent interleaving when multiple
|
|
166
175
|
sub-agents run concurrently via ``asyncio.gather``. Shared resources
|
|
167
176
|
(console, locks, tracked_processes) are kept by reference.
|
|
177
|
+
|
|
178
|
+
The fork receives a fresh, unique ``agent_id`` so per-scope state
|
|
179
|
+
(e.g. active skills) keyed by agent_id is isolated from the parent.
|
|
180
|
+
Callers may overwrite ``agent_id`` afterwards if they prefer a more
|
|
181
|
+
descriptive label.
|
|
168
182
|
"""
|
|
169
183
|
original = get_ctx()
|
|
170
184
|
forked = copy.copy(original)
|
|
@@ -176,4 +190,9 @@ def fork_ctx() -> RuntimeContext:
|
|
|
176
190
|
forked.read_cache = {}
|
|
177
191
|
# Fresh task store per sub-agent
|
|
178
192
|
forked.task_store = TaskStore()
|
|
193
|
+
# Assign a unique agent_id so skill scope is isolated from the parent.
|
|
194
|
+
# A uuid is used rather than an incrementing counter so nested forks
|
|
195
|
+
# (fork-of-a-fork) still get distinct ids even though the counter on
|
|
196
|
+
# the intermediate ctx was shallow-copied from the root.
|
|
197
|
+
forked.agent_id = f"subagent-{uuid.uuid4().hex[:8]}"
|
|
179
198
|
return forked
|