aru-code 0.30.0__tar.gz → 0.31.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {aru_code-0.30.0/aru_code.egg-info → aru_code-0.31.0}/PKG-INFO +1 -1
  2. aru_code-0.31.0/aru/__init__.py +1 -0
  3. {aru_code-0.30.0 → aru_code-0.31.0}/aru/agent_factory.py +29 -69
  4. {aru_code-0.30.0 → aru_code-0.31.0}/aru/cli.py +5 -2
  5. {aru_code-0.30.0 → aru_code-0.31.0}/aru/context.py +74 -5
  6. {aru_code-0.30.0 → aru_code-0.31.0}/aru/permissions.py +55 -10
  7. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/hooks.py +1 -1
  8. {aru_code-0.30.0 → aru_code-0.31.0}/aru/runner.py +4 -1
  9. {aru_code-0.30.0 → aru_code-0.31.0}/aru/runtime.py +19 -0
  10. {aru_code-0.30.0 → aru_code-0.31.0}/aru/session.py +119 -25
  11. aru_code-0.31.0/aru/tool_policy.py +196 -0
  12. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/skill.py +10 -4
  13. {aru_code-0.30.0 → aru_code-0.31.0/aru_code.egg-info}/PKG-INFO +1 -1
  14. {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/SOURCES.txt +4 -1
  15. {aru_code-0.30.0 → aru_code-0.31.0}/pyproject.toml +1 -1
  16. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_context.py +49 -2
  17. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_invoked_skills.py +60 -8
  18. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_permissions.py +52 -0
  19. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plugins.py +48 -0
  20. aru_code-0.31.0/tests/test_runtime.py +40 -0
  21. aru_code-0.31.0/tests/test_skill_disallowed_tools.py +150 -0
  22. aru_code-0.31.0/tests/test_tool_policy.py +146 -0
  23. aru_code-0.30.0/aru/__init__.py +0 -1
  24. aru_code-0.30.0/tests/test_skill_disallowed_tools.py +0 -78
  25. {aru_code-0.30.0 → aru_code-0.31.0}/LICENSE +0 -0
  26. {aru_code-0.30.0 → aru_code-0.31.0}/README.md +0 -0
  27. {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/__init__.py +0 -0
  28. {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/base.py +0 -0
  29. {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/catalog.py +0 -0
  30. {aru_code-0.30.0 → aru_code-0.31.0}/aru/agents/planner.py +0 -0
  31. {aru_code-0.30.0 → aru_code-0.31.0}/aru/cache_patch.py +0 -0
  32. {aru_code-0.30.0 → aru_code-0.31.0}/aru/checkpoints.py +0 -0
  33. {aru_code-0.30.0 → aru_code-0.31.0}/aru/commands.py +0 -0
  34. {aru_code-0.30.0 → aru_code-0.31.0}/aru/completers.py +0 -0
  35. {aru_code-0.30.0 → aru_code-0.31.0}/aru/config.py +0 -0
  36. {aru_code-0.30.0 → aru_code-0.31.0}/aru/display.py +0 -0
  37. {aru_code-0.30.0 → aru_code-0.31.0}/aru/history_blocks.py +0 -0
  38. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugin_cache.py +0 -0
  39. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/__init__.py +0 -0
  40. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/custom_tools.py +0 -0
  41. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/manager.py +0 -0
  42. {aru_code-0.30.0 → aru_code-0.31.0}/aru/plugins/tool_api.py +0 -0
  43. {aru_code-0.30.0 → aru_code-0.31.0}/aru/providers.py +0 -0
  44. {aru_code-0.30.0 → aru_code-0.31.0}/aru/select.py +0 -0
  45. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/__init__.py +0 -0
  46. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/_diff.py +0 -0
  47. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/_shared.py +0 -0
  48. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/ast_tools.py +0 -0
  49. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/codebase.py +0 -0
  50. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/delegate.py +0 -0
  51. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/file_ops.py +0 -0
  52. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/gitignore.py +0 -0
  53. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/mcp_client.py +0 -0
  54. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/plan_mode.py +0 -0
  55. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/ranker.py +0 -0
  56. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/registry.py +0 -0
  57. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/search.py +0 -0
  58. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/shell.py +0 -0
  59. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/tasklist.py +0 -0
  60. {aru_code-0.30.0 → aru_code-0.31.0}/aru/tools/web.py +0 -0
  61. {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/dependency_links.txt +0 -0
  62. {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/entry_points.txt +0 -0
  63. {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/requires.txt +0 -0
  64. {aru_code-0.30.0 → aru_code-0.31.0}/aru_code.egg-info/top_level.txt +0 -0
  65. {aru_code-0.30.0 → aru_code-0.31.0}/setup.cfg +0 -0
  66. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_agents_base.py +0 -0
  67. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_agents_md_coverage.py +0 -0
  68. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cache_patch_metrics.py +0 -0
  69. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cache_patch_stop_reason.py +0 -0
  70. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_catalog.py +0 -0
  71. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_checkpoints.py +0 -0
  72. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli.py +0 -0
  73. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_advanced.py +0 -0
  74. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_base.py +0 -0
  75. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_completers.py +0 -0
  76. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_new.py +0 -0
  77. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_run_cli.py +0 -0
  78. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_session.py +0 -0
  79. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_cli_shell.py +0 -0
  80. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_codebase.py +0 -0
  81. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_confabulation_regression.py +0 -0
  82. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_config.py +0 -0
  83. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_gitignore.py +0 -0
  84. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_guardrails_scenarios.py +0 -0
  85. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_invoke_skill.py +0 -0
  86. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_main.py +0 -0
  87. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_mcp_client.py +0 -0
  88. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plan_mode_refactor.py +0 -0
  89. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_plugin_cache.py +0 -0
  90. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_providers.py +0 -0
  91. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_ranker.py +0 -0
  92. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_runner_recovery.py +0 -0
  93. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_select.py +0 -0
  94. {aru_code-0.30.0 → aru_code-0.31.0}/tests/test_tasklist.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.30.0
3
+ Version: 0.31.0
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.31.0"
@@ -29,32 +29,26 @@ async def _fire_hook(event_name: str, data: dict) -> dict:
29
29
  return data
30
30
 
31
31
 
32
- # Tools blocked while the session is in plan mode. Read-only tools (read,
33
- # glob, grep, list_directory, web_search, web_fetch, etc.) are NOT in this
34
- # set the agent needs them to research and write the plan. Mutating or
35
- # execution-capable tools are gated: the agent must call exit_plan_mode and
36
- # get user approval before running any of these.
37
- _PLAN_MODE_BLOCKED_TOOLS: frozenset[str] = frozenset({
38
- "edit_file",
39
- "edit_files",
40
- "write_file",
41
- "write_files",
42
- "bash",
43
- "delegate_task",
44
- })
32
+ # Backward-compat re-export. The canonical list now lives in
33
+ # aru.tool_policy.PLAN_MODE_BLOCKED_TOOLS; external callers (tests,
34
+ # docs) that import it from here keep working.
35
+ from aru.tool_policy import PLAN_MODE_BLOCKED_TOOLS as _PLAN_MODE_BLOCKED_TOOLS
45
36
 
46
37
 
47
38
  def _wrap_tools_with_hooks(tools: list) -> list:
48
- """Wrap tool functions to fire tool.execute.before/after plugin hooks.
49
-
50
- Before hook can mutate args; after hook can mutate the result.
51
- If a before hook raises, the tool is not executed and the error is returned.
52
-
53
- Also enforces the plan-mode gate: when `session.plan_mode` is True,
54
- any tool in `_PLAN_MODE_BLOCKED_TOOLS` short-circuits with a structured
55
- BLOCKED message telling the agent to call `exit_plan_mode` first. The
56
- gate runs BEFORE plugin hooks so plan mode is the highest-priority
57
- enforcement; plugins cannot accidentally bypass it.
39
+ """Wrap tool functions with a single tool-policy gate and plugin hooks.
40
+
41
+ The policy gate (plan mode + active-skill disallowed_tools) is
42
+ evaluated by `aru.tool_policy.evaluate_tool_policy` a single
43
+ decision function shared with `aru.permissions.resolve_permission`,
44
+ so both the wrapper and per-tool permission checks see the same
45
+ answer. When a tool is denied by multiple rules at once, the policy
46
+ layer returns one combined BLOCKED message rather than two
47
+ sequential contradictory ones (this is the scenario-1 fix of the
48
+ combinatorial gate audit).
49
+
50
+ Plugin hooks run AFTER the policy gate so a plugin's
51
+ tool.execute.before hook cannot bypass plan-mode / skill rules.
58
52
  """
59
53
 
60
54
  def _wrap_one(fn):
@@ -64,49 +58,13 @@ def _wrap_tools_with_hooks(tools: list) -> list:
64
58
  @functools.wraps(fn)
65
59
  async def wrapper(**kwargs):
66
60
  tool_name = fn.__name__
67
- # Plan-mode gate — fires before any other logic so a mutating
68
- # tool never reaches the permission layer or the actual executor.
69
- if tool_name in _PLAN_MODE_BLOCKED_TOOLS:
70
- try:
71
- from aru.runtime import get_ctx
72
- session = getattr(get_ctx(), "session", None)
73
- except (LookupError, AttributeError):
74
- session = None
75
- if session is not None and getattr(session, "plan_mode", False):
76
- return (
77
- f"BLOCKED: plan mode is active. Mutating tools "
78
- f"(edit/write/bash/delegate_task) are blocked until the "
79
- f"user approves the plan. Finish writing the plan as "
80
- f"your next assistant message, then call "
81
- f"exit_plan_mode(plan=<full plan text>) to request "
82
- f"approval. Do NOT retry {tool_name}."
83
- )
84
- # Active-skill disallowed-tools gate — honors the `disallowed-tools`
85
- # frontmatter field of the currently active skill. Mirrors the
86
- # plan-mode gate pattern above; runs before plugin hooks so a skill
87
- # can hard-block a tool regardless of permission/plugin state.
88
- try:
89
- from aru.runtime import get_ctx
90
- ctx = get_ctx()
91
- session = getattr(ctx, "session", None)
92
- config = getattr(ctx, "config", None)
93
- except (LookupError, AttributeError):
94
- session = None
95
- config = None
96
- if session is not None and config is not None:
97
- active = getattr(session, "active_skill", None)
98
- skills = getattr(config, "skills", None) or {}
99
- active_skill_obj = skills.get(active) if active else None
100
- disallowed = getattr(active_skill_obj, "disallowed_tools", None) or []
101
- if tool_name in disallowed:
102
- return (
103
- f"BLOCKED: tool `{tool_name}` is disallowed by the "
104
- f"currently active skill `{active}`. Read the skill's "
105
- f"SKILL.md for the prescribed path. Do NOT retry "
106
- f"`{tool_name}`; use the alternative the skill specifies "
107
- f"(commonly: write the output to a `.md` file via "
108
- f"`write_file` instead of using in-session state)."
109
- )
61
+ # Unified policy gate — one function, one decision, one
62
+ # message on denial (combines plan-mode + skill rules when
63
+ # both apply).
64
+ from aru.tool_policy import evaluate_tool_policy
65
+ decision = evaluate_tool_policy(tool_name)
66
+ if not decision.allowed:
67
+ return decision.message
110
68
  # Before hook — plugins can mutate args or raise PermissionError to block
111
69
  try:
112
70
  before_data = await _fire_hook("tool.execute.before", {
@@ -151,14 +109,16 @@ async def _apply_chat_hooks(instructions: str, model_ref: str, agent_name: str,
151
109
  })
152
110
  instructions = data.get("system_prompt", instructions)
153
111
 
154
- # chat.params — plugins can modify LLM parameters
112
+ # chat.params — plugins can modify LLM parameters. max_tokens is
113
+ # deliberately NOT exposed: it is coupled with the recovery loop in
114
+ # runner.py and mutating it from a plugin can break mid-thought
115
+ # recovery. Plugins that need to bound output should do so via model
116
+ # selection or temperature, not raw token limits.
155
117
  data = await _fire_hook("chat.params", {
156
118
  "model": model_ref,
157
- "max_tokens": max_tokens,
158
119
  "temperature": None, # let plugin set if desired
159
120
  })
160
121
  model_ref = data.get("model", model_ref)
161
- max_tokens = data.get("max_tokens", max_tokens)
162
122
 
163
123
  return instructions, model_ref, max_tokens
164
124
 
@@ -711,13 +711,16 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
711
711
  if not skill.user_invocable:
712
712
  console.print(f"[yellow]Skill '{cmd_name}' is not user-invocable[/yellow]")
713
713
  else:
714
- session.active_skill = cmd_name
714
+ # Slash-invoked skills always run under the primary agent
715
+ # scope (agent_id=None). Subagents reach skills via the
716
+ # invoke_skill tool, which keys by ctx.agent_id instead.
717
+ session.set_active_skill(None, cmd_name)
715
718
  prompt = render_skill_template(skill.content, cmd_args)
716
719
  # Record so the skill body survives compaction — mirror of
717
720
  # claude-code's addInvokedSkill. Store the rendered content
718
721
  # (post-argument substitution) so post-compact restoration
719
722
  # matches what the model initially read.
720
- session.record_invoked_skill(cmd_name, prompt, skill.source_path)
723
+ session.record_invoked_skill(cmd_name, prompt, skill.source_path, agent_id=None)
721
724
  console.print(f"[bold magenta]Running skill /{cmd_name}...[/bold magenta]")
722
725
 
723
726
  agent = await create_general_agent(session, config, env_context=_build_env_ctx())
@@ -525,6 +525,58 @@ def would_prune(history: list[dict], model_id: str = "default") -> bool:
525
525
  return total_tool_chars >= protect_chars + PRUNE_MINIMUM_CHARS
526
526
 
527
527
 
528
+ def _advance_split_past_tool_pairs(history: list[dict], split_idx: int) -> int:
529
+ """Move split_idx backward until no tool_result in recent is orphaned.
530
+
531
+ A tool_result block must travel with its matching tool_use block in
532
+ the same API request — Anthropic rejects a tool_result whose
533
+ tool_use_id is not declared by any tool_use in the conversation. If
534
+ the initial budget-based split falls between an assistant turn
535
+ (carrying tool_use) and the subsequent user turn (carrying
536
+ tool_result), the pair breaks: tool_use goes into `old` (and is
537
+ discarded when replaced by the summary), leaving tool_result
538
+ orphaned in `recent`.
539
+
540
+ This helper walks `split_idx` backward one index at a time until the
541
+ slice `history[split_idx:]` contains the matching tool_use for every
542
+ tool_result it holds. Matches opencode's invariant that pair
543
+ structure is never cut (compaction.ts does a mark-and-replace on the
544
+ tool output, but never removes either block).
545
+
546
+ O(n²) in the worst case, but n is bounded by history length and the
547
+ inner scan is only over the tail; on real sessions the loop ends in
548
+ 1-2 iterations or not at all.
549
+ """
550
+ from aru.history_blocks import is_tool_result, tool_use_ids_in_item
551
+
552
+ while split_idx > 0:
553
+ # All tool_use ids present in the recent slice
554
+ declared: set[str] = set()
555
+ for msg in history[split_idx:]:
556
+ if msg.get("role") == "assistant":
557
+ declared.update(tool_use_ids_in_item(msg))
558
+
559
+ orphaned = False
560
+ for msg in history[split_idx:]:
561
+ if msg.get("role") != "user":
562
+ continue
563
+ for block in (msg.get("content") or []):
564
+ if not is_tool_result(block):
565
+ continue
566
+ tid = block.get("tool_use_id")
567
+ if tid and tid not in declared:
568
+ orphaned = True
569
+ break
570
+ if orphaned:
571
+ break
572
+
573
+ if not orphaned:
574
+ return split_idx
575
+ split_idx -= 1
576
+
577
+ return split_idx
578
+
579
+
528
580
  def _split_history(history: list[dict], model_id: str = "default") -> tuple[list[dict], list[dict]]:
529
581
  """Split history into old (to summarize) and recent (to keep intact).
530
582
 
@@ -541,6 +593,12 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
541
593
  of the summary, but keeping it in recent too means the agent can
542
594
  quote it verbatim afterward.
543
595
 
596
+ The split point is then walked backward (via
597
+ `_advance_split_past_tool_pairs`) to guarantee every tool_result in
598
+ `recent` has its matching tool_use in `recent`. Without that
599
+ invariant, a naive budget split can orphan a tool_result whose
600
+ tool_use landed in `old` — the API rejects such requests.
601
+
544
602
  The `model_id` parameter is retained for signature compatibility;
545
603
  the recent budget is a flat value not scaled by model context.
546
604
  """
@@ -556,6 +614,10 @@ def _split_history(history: list[dict], model_id: str = "default") -> tuple[list
556
614
  else:
557
615
  break
558
616
 
617
+ # Pair-safety: never let a tool_result in `recent` reference a tool_use
618
+ # that was discarded into `old`. Walks split_idx backward as needed.
619
+ split_idx = _advance_split_past_tool_pairs(history, split_idx)
620
+
559
621
  # Defensive: force the first user turn into `recent` even if the
560
622
  # protect budget would have sent it to `old`. The original ask is
561
623
  # the session anchor and must stay literal.
@@ -792,15 +854,22 @@ async def compact_conversation(
792
854
  pass # no plugin manager available — proceed without hooks
793
855
 
794
856
  # Best-effort: if caller didn't pass invoked_skills but there's a session
795
- # in the current runtime context, use its record. Keeps legacy call sites
796
- # (subagent compaction, tests) covered without forcing every caller to
797
- # plumb the session through.
857
+ # in the current runtime context, pull just this agent's slice. Keeps
858
+ # legacy call sites (subagent compaction, tests) covered without forcing
859
+ # every caller to plumb the session through. Filtering by agent_id means
860
+ # a subagent's compaction doesn't replay primary-scope skills and vice
861
+ # versa.
798
862
  if invoked_skills is None:
799
863
  try:
800
864
  from aru.runtime import get_ctx
801
- session = getattr(get_ctx(), "session", None)
865
+ ctx = get_ctx()
866
+ session = getattr(ctx, "session", None)
802
867
  if session is not None:
803
- invoked_skills = getattr(session, "invoked_skills", None)
868
+ getter = getattr(session, "get_invoked_skills_for_agent", None)
869
+ if callable(getter):
870
+ invoked_skills = getter(getattr(ctx, "agent_id", None))
871
+ else:
872
+ invoked_skills = getattr(session, "invoked_skills", None)
804
873
  except (LookupError, AttributeError, ImportError):
805
874
  pass
806
875
 
@@ -416,6 +416,21 @@ def _most_restrictive(
416
416
  return worst
417
417
 
418
418
 
419
+ # Mapping from permission category (what resolve_permission takes) to the
420
+ # tool_name used by the unified tool-policy gate (what evaluate_tool_policy
421
+ # takes). The permission system asks about *categories* (edit, write, bash),
422
+ # while the tool-policy layer reasons about tool *names* (edit_file, bash,
423
+ # ...). This mapping lets resolve_permission consult the tool-policy layer
424
+ # consistently so that, e.g., a bash check in plan mode denies at the
425
+ # permission level too — not only at the wrapper level.
426
+ _CATEGORY_TO_REPRESENTATIVE_TOOL: dict[str, str] = {
427
+ "edit": "edit_file",
428
+ "write": "write_file",
429
+ "bash": "bash",
430
+ "delegate_task": "delegate_task",
431
+ }
432
+
433
+
419
434
  def resolve_permission(
420
435
  category: str, subject: str = ""
421
436
  ) -> tuple[PermissionAction, str]:
@@ -425,15 +440,32 @@ def resolve_permission(
425
440
 
426
441
  Algorithm:
427
442
  1. If skip_permissions -> ("allow", "*")
428
- 2. Check session_allowed for matching (category, pattern) -> ("allow", pattern)
429
- 3. For bash: handle compound commands, then walk rules
430
- 4. For others: walk rules (defaults + user config), last-match-wins
431
- 5. Fallback: category default, then global default
443
+ 2. Consult unified tool-policy gate (plan_mode / skill disallowed).
444
+ If policy denies this category's representative tool, return
445
+ ("deny", "tool-policy"). This is how claude-code / opencode fold
446
+ mode-based gates into the same decision function that handles
447
+ user rules, instead of stacking independent short-circuits.
448
+ 3. Check session_allowed for matching (category, pattern)
449
+ -> ("allow", pattern)
450
+ 4. For bash: handle compound commands, then walk rules
451
+ 5. For others: walk rules (defaults + user config), last-match-wins
452
+ 6. Fallback: category default, then global default
432
453
  """
433
454
  ctx = get_ctx()
434
455
  if ctx.skip_permissions:
435
456
  return ("allow", "*")
436
457
 
458
+ # Unified tool-policy gate — shared with the agent_factory wrapper so
459
+ # both paths agree. A tool denied by plan_mode / skill rules is denied
460
+ # here too; the wrapper renders the combined message for the model,
461
+ # and this call returns a plain "deny" for the user-prompt codepath.
462
+ rep_tool = _CATEGORY_TO_REPRESENTATIVE_TOOL.get(category)
463
+ if rep_tool:
464
+ from aru.tool_policy import evaluate_tool_policy
465
+ decision = evaluate_tool_policy(rep_tool)
466
+ if not decision.allowed:
467
+ return ("deny", "tool-policy")
468
+
437
469
  # "Accept edits" mode auto-allows edit/write categories for the session.
438
470
  if ctx.permission_mode == "acceptEdits" and category in ("edit", "write"):
439
471
  return ("allow", "*")
@@ -469,8 +501,15 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
469
501
 
470
502
  Supports both sync and async handlers. Returns True/False if a handler
471
503
  sets event.data["allow"], or None if no handler overrode the decision.
504
+
505
+ Async handlers dispatched in a worker thread carry a copied
506
+ contextvars.Context so plugin code can still call `get_ctx()` and
507
+ other contextvar-backed helpers — without the copy, the new
508
+ `asyncio.run` loop would see an empty context and break handlers
509
+ that rely on the runtime.
472
510
  """
473
511
  import asyncio
512
+ import contextvars
474
513
  from aru.plugins.hooks import HookEvent
475
514
 
476
515
  evt = HookEvent(hook="permission.ask", data={"category": category, "subject": subject})
@@ -480,15 +519,21 @@ def _fire_permission_hook(mgr, category: str, subject: str) -> bool | None:
480
519
  try:
481
520
  if asyncio.iscoroutinefunction(handler):
482
521
  # Async handler — run via the event loop
483
- loop = asyncio.get_event_loop()
484
- if loop.is_running():
485
- # Schedule as a task and wait with run_until_complete
486
- # won't work, so use a new loop in a thread
522
+ try:
523
+ loop = asyncio.get_running_loop()
524
+ except RuntimeError:
525
+ loop = None
526
+ if loop is not None:
527
+ # A loop is running in this thread; we cannot call
528
+ # run_until_complete. Dispatch to a worker thread
529
+ # with the current contextvars snapshot so the
530
+ # handler sees the same RuntimeContext.
487
531
  import concurrent.futures
532
+ snapshot = contextvars.copy_context()
488
533
  with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
489
- pool.submit(asyncio.run, handler(evt)).result(timeout=5)
534
+ pool.submit(snapshot.run, asyncio.run, handler(evt)).result(timeout=5)
490
535
  else:
491
- loop.run_until_complete(handler(evt))
536
+ asyncio.run(handler(evt))
492
537
  else:
493
538
  handler(evt)
494
539
  except Exception:
@@ -40,7 +40,7 @@ VALID_HOOKS = frozenset({
40
40
 
41
41
  # Chat lifecycle
42
42
  "chat.message", # Before user message is sent to LLM (can modify)
43
- "chat.params", # Before LLM call (can modify temperature, max_tokens)
43
+ "chat.params", # Before LLM call (can modify model, temperature). NOT max_tokens — coupled with recovery loop.
44
44
  "chat.system.transform", # Before LLM call (can modify system prompt)
45
45
  "chat.messages.transform", # Before LLM call (can modify message history)
46
46
 
@@ -755,10 +755,13 @@ async def run_agent_capture(agent, message: str, session=None, lightweight: bool
755
755
  with Status("[dim]Compacting context...[/dim]", console=console, spinner="dots"):
756
756
  try:
757
757
  session.history = prune_history(session.history, model_id=session.model_id)
758
+ # Only the primary agent's invoked skills get replayed
759
+ # after compaction here — subagents have their own
760
+ # compaction flow and carry their own agent_id.
758
761
  session.history = await compact_conversation(
759
762
  session.history, session.model_ref, session.plan_task,
760
763
  model_id=session.model_id,
761
- invoked_skills=session.invoked_skills,
764
+ invoked_skills=session.get_invoked_skills_for_agent(None),
762
765
  )
763
766
  console.print("[dim]Context compacted to save tokens.[/dim]")
764
767
  except Exception:
@@ -22,6 +22,7 @@ from __future__ import annotations
22
22
  import contextvars
23
23
  import copy
24
24
  import threading
25
+ import uuid
25
26
  from dataclasses import dataclass, field
26
27
  from typing import Any, Callable
27
28
 
@@ -104,6 +105,14 @@ class RuntimeContext:
104
105
  # -- Custom agents --
105
106
  custom_agent_defs: dict = field(default_factory=dict)
106
107
 
108
+ # -- Agent scope --
109
+ # Stable identifier for the current agent's execution scope. None means
110
+ # "primary agent" (the top-level conversation). Subagents forked via
111
+ # fork_ctx() receive a unique identifier here, used to key per-scope
112
+ # state such as active skills (so a subagent does not inherit the
113
+ # parent's skill-active state).
114
+ agent_id: str | None = None
115
+
107
116
  # -- Permissions --
108
117
  perm_config: Any = field(default_factory=_default_perm_config)
109
118
  session_allowed: set[tuple[str, str]] = field(default_factory=set)
@@ -165,6 +174,11 @@ def fork_ctx() -> RuntimeContext:
165
174
  Permission state is deep-copied to prevent interleaving when multiple
166
175
  sub-agents run concurrently via ``asyncio.gather``. Shared resources
167
176
  (console, locks, tracked_processes) are kept by reference.
177
+
178
+ The fork receives a fresh, unique ``agent_id`` so per-scope state
179
+ (e.g. active skills) keyed by agent_id is isolated from the parent.
180
+ Callers may overwrite ``agent_id`` afterwards if they prefer a more
181
+ descriptive label.
168
182
  """
169
183
  original = get_ctx()
170
184
  forked = copy.copy(original)
@@ -176,4 +190,9 @@ def fork_ctx() -> RuntimeContext:
176
190
  forked.read_cache = {}
177
191
  # Fresh task store per sub-agent
178
192
  forked.task_store = TaskStore()
193
+ # Assign a unique agent_id so skill scope is isolated from the parent.
194
+ # A uuid is used rather than an incrementing counter so nested forks
195
+ # (fork-of-a-fork) still get distinct ids even though the counter on
196
+ # the intermediate ctx was shallow-copied from the root.
197
+ forked.agent_id = f"subagent-{uuid.uuid4().hex[:8]}"
179
198
  return forked