@event4u/agent-config 2.9.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/rules/no-roadmap-references.md +19 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +32 -3
- package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +147 -1
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +28 -0
- package/README.md +31 -11
- package/config/agent-settings.template.yml +28 -0
- package/docs/contracts/decision-trace-v1.md +30 -0
- package/docs/contracts/hook-architecture-v1.md +46 -0
- package/docs/contracts/memory-visibility-v1.md +33 -0
- package/docs/contracts/settings-sync-yaml-subset.md +138 -0
- package/docs/readme-split-plan.md +102 -0
- package/package.json +1 -1
- package/scripts/_cli/cmd_settings_check.py +171 -0
- package/scripts/agent-config +40 -0
- package/scripts/chat_history.py +19 -0
- package/scripts/check_council_references.py +46 -5
- package/scripts/hooks/dispatch_hook.py +5 -1
- package/scripts/hooks/replay_hook.py +144 -0
- package/scripts/hooks/state_io.py +24 -1
- package/scripts/hooks_doctor.py +184 -0
- package/scripts/lint_hook_concern_budget.py +203 -0
- package/scripts/roadmap_progress_hook.py +11 -0
|
@@ -83,6 +83,25 @@ fail the build on any new violation.
|
|
|
83
83
|
descriptions — transient by construction, not part of the package
|
|
84
84
|
surface
|
|
85
85
|
|
|
86
|
+
## Structural carve-outs (immutable inputs / decision provenance)
|
|
87
|
+
|
|
88
|
+
Two source/target shapes are exempt from the council-link ban
|
|
89
|
+
because the target is **immutable input** or **decision provenance**,
|
|
90
|
+
not transient drafting state. The linter implements these directly
|
|
91
|
+
(`STRUCTURAL_CARVEOUTS` in `scripts/check_council_references.py`);
|
|
92
|
+
they do **not** need an inline `<!-- council-ref-allowed: ... -->`
|
|
93
|
+
pragma.
|
|
94
|
+
|
|
95
|
+
| Source | Target | Why |
|
|
96
|
+
| ---------------------------------------------- | ------------------------------------------------ | ------------------------------------------------------------------------------------ |
|
|
97
|
+
| `agents/contexts/evaluation-*.md` | `agents/council-questions/*.md` | Question file is a frozen function-parameter / spend-gate input, not documentation. |
|
|
98
|
+
| `docs/contracts/*.md` | `agents/council-sessions/*/synthesis.md` | Synthesis is the audit-trail receipt; contract inlines the decision body itself. |
|
|
99
|
+
|
|
100
|
+
Driven by the 2026-05-14 P3.4 council round (claude-sonnet-4-5 +
|
|
101
|
+
gpt-4o, converged on rule refactor over escape-hatch overuse). Any
|
|
102
|
+
other source/target combination still needs an inline pragma or
|
|
103
|
+
inline-summary rewrite.
|
|
104
|
+
|
|
86
105
|
## What to do instead
|
|
87
106
|
|
|
88
107
|
When a stable artifact needs to cite a transient finding:
|
|
@@ -23,8 +23,11 @@ from __future__ import annotations
|
|
|
23
23
|
|
|
24
24
|
from typing import Any, Iterable
|
|
25
25
|
|
|
26
|
+
from ...scoring.decision_trace import summarise_memory, summarise_verify
|
|
26
27
|
from ...scoring.memory_visibility import (
|
|
27
28
|
DEFAULT_ASKED_TYPES,
|
|
29
|
+
compute_affected,
|
|
30
|
+
format_changed_decisions_block,
|
|
28
31
|
format_line,
|
|
29
32
|
should_emit,
|
|
30
33
|
summarise_visibility,
|
|
@@ -82,20 +85,46 @@ class MemoryVisibilityHook:
|
|
|
82
85
|
visibility_off=self._visibility_off,
|
|
83
86
|
):
|
|
84
87
|
return
|
|
85
|
-
|
|
88
|
+
affected = self._derive_affected(work, memory)
|
|
89
|
+
line = format_line(summary, affected=affected)
|
|
86
90
|
if not line:
|
|
87
91
|
return
|
|
92
|
+
block = format_changed_decisions_block(
|
|
93
|
+
summary.get("ids") or [], affected,
|
|
94
|
+
)
|
|
88
95
|
existing = getattr(work, "report", "") or ""
|
|
89
|
-
|
|
96
|
+
rendered = line if block is None else f"{line}\n\n{block}"
|
|
97
|
+
if line in existing and (block is None or block in existing):
|
|
90
98
|
return
|
|
91
99
|
sep = "\n\n" if existing else ""
|
|
92
100
|
try:
|
|
93
|
-
work.report = f"{existing}{sep}{
|
|
101
|
+
work.report = f"{existing}{sep}{rendered}"
|
|
94
102
|
except AttributeError as exc:
|
|
95
103
|
raise HookError(
|
|
96
104
|
"memory-visibility: state.report not writable",
|
|
97
105
|
) from exc
|
|
98
106
|
|
|
107
|
+
def _derive_affected(self, work: Any, memory: Any) -> list[str] | None:
|
|
108
|
+
"""Compute the closed-list ``affected`` keys for this work step.
|
|
109
|
+
|
|
110
|
+
Reuses the decision-trace summarisers so the counterfactual
|
|
111
|
+
matches the trace hook's view of the same WorkState. Returns
|
|
112
|
+
``None`` when memory was not consulted (hits == 0); callers
|
|
113
|
+
then omit the ``· affected: …`` segment per the contract.
|
|
114
|
+
"""
|
|
115
|
+
memory_summary = summarise_memory(memory)
|
|
116
|
+
verify_summary = summarise_verify(getattr(work, "verify", None))
|
|
117
|
+
ambiguity = bool(getattr(work, "questions", None))
|
|
118
|
+
return compute_affected(
|
|
119
|
+
memory_hits=memory_summary["hits"],
|
|
120
|
+
verify_claims=verify_summary["claims"],
|
|
121
|
+
verify_first_try_passes=verify_summary["first_try_passes"],
|
|
122
|
+
ambiguity_flag=ambiguity,
|
|
123
|
+
changes=getattr(work, "changes", None),
|
|
124
|
+
applied_rules=getattr(work, "applied_rules", None),
|
|
125
|
+
test_plan=getattr(work, "test_plan", None),
|
|
126
|
+
)
|
|
127
|
+
|
|
99
128
|
|
|
100
129
|
def derive_visibility(memory: Any) -> str | None:
|
|
101
130
|
"""Convenience helper: render the line directly from a memory list.
|
|
@@ -4,6 +4,12 @@ Implements the v1 line shape from
|
|
|
4
4
|
``docs/contracts/memory-visibility-v1.md``:
|
|
5
5
|
|
|
6
6
|
🧠 Memory: <hits>/<asks> · ids=[<comma-separated-ids>]
|
|
7
|
+
🧠 Memory: <hits>/<asks> · ids=[<...>] · affected: <keys>
|
|
8
|
+
|
|
9
|
+
The optional ``· affected: <keys>`` trailing segment surfaces which
|
|
10
|
+
closed-list decision-trace keys diverged because memory was
|
|
11
|
+
consulted — see ``docs/contracts/decision-trace-v1.md`` "Memory
|
|
12
|
+
consequence keys".
|
|
7
13
|
|
|
8
14
|
The semantics matched to the work-engine model:
|
|
9
15
|
|
|
@@ -23,6 +29,8 @@ from __future__ import annotations
|
|
|
23
29
|
|
|
24
30
|
from typing import Any, Iterable
|
|
25
31
|
|
|
32
|
+
from .decision_trace import derive_confidence_band, derive_risk_class
|
|
33
|
+
|
|
26
34
|
ICON = "\U0001F9E0" # 🧠
|
|
27
35
|
DEFAULT_MAX_INLINE_IDS = 5
|
|
28
36
|
DEFAULT_ASKED_TYPES = (
|
|
@@ -32,6 +40,13 @@ DEFAULT_ASKED_TYPES = (
|
|
|
32
40
|
"historical-patterns",
|
|
33
41
|
)
|
|
34
42
|
|
|
43
|
+
CONSEQUENCE_KEYS: tuple[str, ...] = (
|
|
44
|
+
"confidence_band",
|
|
45
|
+
"risk_class",
|
|
46
|
+
"applied_rules",
|
|
47
|
+
"test_plan",
|
|
48
|
+
)
|
|
49
|
+
|
|
35
50
|
|
|
36
51
|
def summarise_visibility(
|
|
37
52
|
memory: Any,
|
|
@@ -69,16 +84,111 @@ def summarise_visibility(
|
|
|
69
84
|
return {"asks": asks, "hits": hits, "ids": ids}
|
|
70
85
|
|
|
71
86
|
|
|
87
|
+
def _normalise_key_value(value: Any) -> Any:
|
|
88
|
+
"""Return a comparable shape for a consequence-key value.
|
|
89
|
+
|
|
90
|
+
List-shaped keys (``applied_rules``, ``test_plan``) compare as
|
|
91
|
+
sorted tuples so order is not a divergence; scalar keys
|
|
92
|
+
(``confidence_band``, ``risk_class``) compare as-is.
|
|
93
|
+
"""
|
|
94
|
+
if isinstance(value, list):
|
|
95
|
+
return tuple(sorted(str(item) for item in value))
|
|
96
|
+
return value
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def diff_consequence_keys(
|
|
100
|
+
trace_with: dict[str, Any], trace_without: dict[str, Any],
|
|
101
|
+
) -> list[str]:
|
|
102
|
+
"""Return sorted keys whose values diverge between two traces.
|
|
103
|
+
|
|
104
|
+
Iterates the closed ``CONSEQUENCE_KEYS`` list defined in
|
|
105
|
+
``docs/contracts/decision-trace-v1.md``. A key is considered
|
|
106
|
+
*diverged* when its normalised value differs between the two
|
|
107
|
+
traces. Per the contract, when both sides are ``None`` the key
|
|
108
|
+
is suppressed from the diff entirely.
|
|
109
|
+
"""
|
|
110
|
+
affected: list[str] = []
|
|
111
|
+
for key in CONSEQUENCE_KEYS:
|
|
112
|
+
a = trace_with.get(key)
|
|
113
|
+
b = trace_without.get(key)
|
|
114
|
+
if a is None and b is None:
|
|
115
|
+
continue
|
|
116
|
+
if _normalise_key_value(a) != _normalise_key_value(b):
|
|
117
|
+
affected.append(key)
|
|
118
|
+
return sorted(affected)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def compute_affected(
|
|
122
|
+
*,
|
|
123
|
+
memory_hits: int,
|
|
124
|
+
verify_claims: int = 0,
|
|
125
|
+
verify_first_try_passes: int = 0,
|
|
126
|
+
ambiguity_flag: bool = False,
|
|
127
|
+
changes: Any = None,
|
|
128
|
+
applied_rules: list[str] | None = None,
|
|
129
|
+
test_plan: list[str] | None = None,
|
|
130
|
+
) -> list[str] | None:
|
|
131
|
+
"""Compute the ``affected`` consequence keys for the visibility line.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
* ``None`` when no memory was consulted (``memory_hits <= 0``)
|
|
135
|
+
— caller MUST omit the ``· affected: …`` segment.
|
|
136
|
+
* ``[]`` when memory was consulted but no closed-list key
|
|
137
|
+
diverged — caller MUST render ``· affected: none``.
|
|
138
|
+
* sorted list of keys otherwise.
|
|
139
|
+
|
|
140
|
+
The counterfactual trace is "what the heuristics would have
|
|
141
|
+
emitted if ``memory_hits`` had been ``0``". v1 covers
|
|
142
|
+
``confidence_band`` and ``risk_class`` via the existing scoring
|
|
143
|
+
helpers; ``applied_rules`` and ``test_plan`` pass through
|
|
144
|
+
unchanged because they are not yet memory-derived in the
|
|
145
|
+
engine — the keys stay in the closed list so the diff
|
|
146
|
+
infrastructure is in place when they wire in.
|
|
147
|
+
"""
|
|
148
|
+
if memory_hits <= 0:
|
|
149
|
+
return None
|
|
150
|
+
trace_with = {
|
|
151
|
+
"confidence_band": derive_confidence_band(
|
|
152
|
+
memory_hits=memory_hits,
|
|
153
|
+
verify_claims=verify_claims,
|
|
154
|
+
verify_first_try_passes=verify_first_try_passes,
|
|
155
|
+
ambiguity_flag=ambiguity_flag,
|
|
156
|
+
),
|
|
157
|
+
"risk_class": derive_risk_class(changes),
|
|
158
|
+
"applied_rules": list(applied_rules) if applied_rules else None,
|
|
159
|
+
"test_plan": list(test_plan) if test_plan else None,
|
|
160
|
+
}
|
|
161
|
+
trace_without = {
|
|
162
|
+
"confidence_band": derive_confidence_band(
|
|
163
|
+
memory_hits=0,
|
|
164
|
+
verify_claims=verify_claims,
|
|
165
|
+
verify_first_try_passes=verify_first_try_passes,
|
|
166
|
+
ambiguity_flag=ambiguity_flag,
|
|
167
|
+
),
|
|
168
|
+
"risk_class": derive_risk_class(changes),
|
|
169
|
+
"applied_rules": list(applied_rules) if applied_rules else None,
|
|
170
|
+
"test_plan": list(test_plan) if test_plan else None,
|
|
171
|
+
}
|
|
172
|
+
return diff_consequence_keys(trace_with, trace_without)
|
|
173
|
+
|
|
174
|
+
|
|
72
175
|
def format_line(
|
|
73
176
|
summary: dict[str, Any],
|
|
74
177
|
*,
|
|
75
178
|
max_inline_ids: int = DEFAULT_MAX_INLINE_IDS,
|
|
179
|
+
affected: list[str] | None = None,
|
|
76
180
|
) -> str | None:
|
|
77
181
|
"""Render the visibility line; return ``None`` when ``asks == 0``.
|
|
78
182
|
|
|
79
183
|
Cap inline ids at ``max_inline_ids`` and append ``…+N`` when the
|
|
80
184
|
list is longer. Returning ``None`` enforces the contract clause
|
|
81
185
|
"If ``asks == 0``, the engine MUST suppress the line entirely".
|
|
186
|
+
|
|
187
|
+
When ``affected`` is not ``None``, append the
|
|
188
|
+
``· affected: <keys>`` trailing segment from
|
|
189
|
+
``docs/contracts/memory-visibility-v1.md``: empty list renders as
|
|
190
|
+
``affected: none`` (consulted but no key diverged);
|
|
191
|
+
non-empty list renders the comma-separated keys.
|
|
82
192
|
"""
|
|
83
193
|
asks = int(summary.get("asks", 0) or 0)
|
|
84
194
|
if asks <= 0:
|
|
@@ -94,7 +204,39 @@ def format_line(
|
|
|
94
204
|
if overflow > 0:
|
|
95
205
|
suffix = ", " if rendered_ids else ""
|
|
96
206
|
rendered_ids = f"{rendered_ids}{suffix}\u2026+{overflow}"
|
|
97
|
-
|
|
207
|
+
line = f"{ICON} Memory: {hits}/{asks} \u00b7 ids=[{rendered_ids}]"
|
|
208
|
+
if affected is not None:
|
|
209
|
+
rendered_affected = ",".join(affected) if affected else "none"
|
|
210
|
+
line = f"{line} \u00b7 affected: {rendered_affected}"
|
|
211
|
+
return line
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def format_changed_decisions_block(
|
|
215
|
+
ids: Iterable[str], affected: Iterable[str] | None,
|
|
216
|
+
) -> str | None:
|
|
217
|
+
"""Render the end-of-run "Memory changed decisions" report block.
|
|
218
|
+
|
|
219
|
+
Per ``docs/contracts/memory-visibility-v1.md``: lists
|
|
220
|
+
``<id> → <key>`` rows derived from the same diff source as the
|
|
221
|
+
visibility line's ``affected`` segment. Returns ``None`` when
|
|
222
|
+
no key diverged (``affected`` empty / ``None``) so the caller
|
|
223
|
+
suppresses the block entirely.
|
|
224
|
+
|
|
225
|
+
Attribution in v1 is aggregate: each consulted id pairs with
|
|
226
|
+
each affected key. Per-id attribution is captured as a
|
|
227
|
+
follow-up risk in the roadmap Risk register.
|
|
228
|
+
"""
|
|
229
|
+
if not affected:
|
|
230
|
+
return None
|
|
231
|
+
affected_list = sorted(affected)
|
|
232
|
+
id_list = [str(i) for i in ids if isinstance(i, (str, int))]
|
|
233
|
+
if not id_list:
|
|
234
|
+
return None
|
|
235
|
+
lines = ["Memory changed decisions:"]
|
|
236
|
+
for entry_id in id_list:
|
|
237
|
+
for key in affected_list:
|
|
238
|
+
lines.append(f"- {entry_id} \u2192 {key}")
|
|
239
|
+
return "\n".join(lines)
|
|
98
240
|
|
|
99
241
|
|
|
100
242
|
def should_emit(
|
|
@@ -116,9 +258,13 @@ def should_emit(
|
|
|
116
258
|
|
|
117
259
|
|
|
118
260
|
__all__ = [
|
|
261
|
+
"CONSEQUENCE_KEYS",
|
|
119
262
|
"DEFAULT_ASKED_TYPES",
|
|
120
263
|
"DEFAULT_MAX_INLINE_IDS",
|
|
121
264
|
"ICON",
|
|
265
|
+
"compute_affected",
|
|
266
|
+
"diff_consequence_keys",
|
|
267
|
+
"format_changed_decisions_block",
|
|
122
268
|
"format_line",
|
|
123
269
|
"should_emit",
|
|
124
270
|
"summarise_visibility",
|
package/CHANGELOG.md
CHANGED
|
@@ -429,6 +429,34 @@ our recommendation order, not its support status.
|
|
|
429
429
|
> that forces a new era split (`# Era: 2.8.x`, etc.) — see
|
|
430
430
|
> [`docs/contracts/CHANGELOG-conventions.md § Era splits`](docs/contracts/CHANGELOG-conventions.md).
|
|
431
431
|
|
|
432
|
+
## [2.10.0](https://github.com/event4u-app/agent-config/compare/2.9.0...2.10.0) (2026-05-14)
|
|
433
|
+
|
|
434
|
+
### Features
|
|
435
|
+
|
|
436
|
+
* **ci:** lint-hook-concern-budget Tier-1 fail-closed gate ([8d60b8a](https://github.com/event4u-app/agent-config/commit/8d60b8ab464c4c5bdc6d072bb2a9b0123942e13b))
|
|
437
|
+
* **cli:** settings:check command + YAML subset contract ([638e740](https://github.com/event4u-app/agent-config/commit/638e74017ffea8d7c08073a949e86fde047db109))
|
|
438
|
+
* **hooks:** hooks:doctor + hooks:replay subcommands + fixture corpus ([3156e25](https://github.com/event4u-app/agent-config/commit/3156e25fd3253fc926a57b92e14b407a9ed54b58))
|
|
439
|
+
* **work-engine:** add decision-trace memory_visibility hook + scoring ([bf056ac](https://github.com/event4u-app/agent-config/commit/bf056ace877a696afa5fe758053ea1eb350e5dff))
|
|
440
|
+
|
|
441
|
+
### Bug Fixes
|
|
442
|
+
|
|
443
|
+
* **roadmap:** point productization P6 gate at archived proof-not-features path ([35a1009](https://github.com/event4u-app/agent-config/commit/35a1009cffdfde28c7b0384c890c31a7e62b70cf))
|
|
444
|
+
|
|
445
|
+
### Documentation
|
|
446
|
+
|
|
447
|
+
* **roadmap:** complete road-to-proof-not-features 16/16 + sync dashboard ([89af72d](https://github.com/event4u-app/agent-config/commit/89af72d5514b74c19d533b5a2e69ff7ddf16ecbc))
|
|
448
|
+
* **readme:** split README by audience + audience-order contract ([60a87c0](https://github.com/event4u-app/agent-config/commit/60a87c056555a80585d25555f3d5b87d54c7283a))
|
|
449
|
+
|
|
450
|
+
### Refactoring
|
|
451
|
+
|
|
452
|
+
* **check-council-references:** structural carve-outs for immutable inputs and decision provenance ([3ed7784](https://github.com/event4u-app/agent-config/commit/3ed77841c42c5e3ebf4191611bb7fa4a52ed2fa0))
|
|
453
|
+
|
|
454
|
+
### Chores
|
|
455
|
+
|
|
456
|
+
* **roadmap:** archive road-to-proof-not-features (16/16 done, Phase 1 deferred) ([9d05aed](https://github.com/event4u-app/agent-config/commit/9d05aed79a46023b1e95c5488a1e3d5e78748e67))
|
|
457
|
+
|
|
458
|
+
Tests: 3663 (+60 since 2.9.0)
|
|
459
|
+
|
|
432
460
|
## [2.9.0](https://github.com/event4u-app/agent-config/compare/2.8.0...2.9.0) (2026-05-13)
|
|
433
461
|
|
|
434
462
|
### Features
|
package/README.md
CHANGED
|
@@ -12,17 +12,37 @@ Give your AI agents an audit-disciplined orchestration contract — testing, Git
|
|
|
12
12
|
|
|
13
13
|
---
|
|
14
14
|
|
|
15
|
-
##
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
15
|
+
## Use it in your project
|
|
16
|
+
|
|
17
|
+
You run the package from a consumer repo — bootstrap via `npx`, let the
|
|
18
|
+
agent pick up your stack, and ship work end-to-end. New install? Start
|
|
19
|
+
with the [Quickstart](#quickstart) to write `.agent-settings.yml`,
|
|
20
|
+
`.augment/`, `.claude/`, …. Already installed? [Supported Tools](#supported-tools)
|
|
21
|
+
shows which AIs the package wires up, and [Featured Commands](#featured-commands)
|
|
22
|
+
lists the end-to-end workflows (`/implement-ticket`, `/work`, `/commit`,
|
|
23
|
+
`/create-pr`). For a deeper tour, see the
|
|
24
|
+
[2-minute demo: `/implement-ticket`](#2-minute-demo-implement-ticket).
|
|
25
|
+
|
|
26
|
+
## Prove it
|
|
27
|
+
|
|
28
|
+
The package is audit-disciplined by construction — every memory consult,
|
|
29
|
+
decision key, and hook concern lands in `agents/state/` so you can
|
|
30
|
+
replay it. [Core Principles](#core-principles) names the four invariants.
|
|
31
|
+
[What this package is — and what it isn't](#what-this-package-is--and-what-it-isnt)
|
|
32
|
+
draws the scope boundary. [Documentation](#documentation) lists the
|
|
33
|
+
contracts the package ships against, including
|
|
34
|
+
[`memory-visibility-v1`](docs/contracts/memory-visibility-v1.md) and
|
|
35
|
+
[`decision-trace-v1`](docs/contracts/decision-trace-v1.md).
|
|
36
|
+
|
|
37
|
+
## Contribute
|
|
38
|
+
|
|
39
|
+
Working on the package itself rather than with it?
|
|
40
|
+
[Development](#development) covers the `task ci` pipeline,
|
|
41
|
+
[Requirements](#requirements) the toolchain, and
|
|
42
|
+
[Maintainer telemetry](#maintainer-telemetry-opt-in-default-off) the
|
|
43
|
+
opt-in measurement loop. The source-of-truth tree is
|
|
44
|
+
`.agent-src.uncompressed/`; never hand-edit the generated `.augment/`
|
|
45
|
+
or `.agent-src/`.
|
|
26
46
|
|
|
27
47
|
## Quickstart
|
|
28
48
|
|
|
@@ -390,6 +390,34 @@ memory:
|
|
|
390
390
|
# Example: ["api[_-]?key", "/Users/[a-z]+/Library"]
|
|
391
391
|
redact_patterns: []
|
|
392
392
|
|
|
393
|
+
# --- Hooks ---
|
|
394
|
+
#
|
|
395
|
+
# Configuration surface for the hook dispatcher (see
|
|
396
|
+
# docs/contracts/hook-architecture-v1.md). Each subkey targets one
|
|
397
|
+
# concern script under scripts/hooks/ or scripts/. Defaults are safe;
|
|
398
|
+
# tune only when CI surfaces a real signal.
|
|
399
|
+
hooks:
|
|
400
|
+
# Concern budget gate (P3.3, lint_hook_concern_budget.py). Caps how
|
|
401
|
+
# many concerns may bind to a single (platform, event) cell and
|
|
402
|
+
# restricts fail_closed=true to an explicit Tier-1 allowlist.
|
|
403
|
+
#
|
|
404
|
+
# max_per_event — placeholder until Phase 1 of
|
|
405
|
+
# road-to-proof-not-features.md captures real session evidence.
|
|
406
|
+
# Threshold rule per roadmap: max(observed-in-Phase-1) × 1.5, rounded
|
|
407
|
+
# up. 8 = current-observed-max (5) × 1.5 = 7.5 → 8.
|
|
408
|
+
#
|
|
409
|
+
# tier1_concerns — concerns allowed to block agent execution on
|
|
410
|
+
# failure (fail_closed: true in the manifest). Promotion is explicit
|
|
411
|
+
# opt-in; keep empty until Phase 1 evidence justifies an entry.
|
|
412
|
+
#
|
|
413
|
+
# hard_fail — false = warn-only (default). Flip to true after ≥10
|
|
414
|
+
# captured sessions across host agents per
|
|
415
|
+
# road-to-distribution-and-adoption.md.
|
|
416
|
+
concern_budget:
|
|
417
|
+
max_per_event: 8
|
|
418
|
+
tier1_concerns: []
|
|
419
|
+
hard_fail: false
|
|
420
|
+
|
|
393
421
|
# --- Update check ---
|
|
394
422
|
#
|
|
395
423
|
# Daily background check against the npm registry for a newer
|
|
@@ -113,6 +113,36 @@ the trace inherits the **maximum** risk class across all files the
|
|
|
113
113
|
phase touched. If no files were touched (pure planning phase), risk
|
|
114
114
|
is `low`.
|
|
115
115
|
|
|
116
|
+
## Memory consequence keys
|
|
117
|
+
|
|
118
|
+
**Purpose.** Bound the surface area where a memory hit can be said
|
|
119
|
+
to have *changed* an outcome. Closed list, not open — without this
|
|
120
|
+
bound, every memory call risks the "memory affected everything"
|
|
121
|
+
failure mode (Risk register row 2 of
|
|
122
|
+
[`agents/roadmaps/road-to-proof-not-features.md`](../../agents/roadmaps/road-to-proof-not-features.md)).
|
|
123
|
+
|
|
124
|
+
**Closed list (v1).** Exactly four keys. Adding a fifth requires a
|
|
125
|
+
schema bump + entry under `### Breaking` in `CHANGELOG.md`.
|
|
126
|
+
|
|
127
|
+
| Key | Source | Diff semantics |
|
|
128
|
+
|---|---|---|
|
|
129
|
+
| `confidence_band` | Top-level envelope field. | String inequality (`high` ≠ `medium` ≠ `low`). |
|
|
130
|
+
| `risk_class` | Top-level envelope field. | String inequality. |
|
|
131
|
+
| `applied_rules` | Derived: sorted list of `rules[].rule_id` where `applied == true`. | Set inequality. |
|
|
132
|
+
| `test_plan` | Derived: sorted list of test paths captured in the Plan-phase `state.plan.tests` slice. May be `null` when the phase is not `plan` or no Plan-phase tests were captured. | Set inequality; `null` on either side suppresses the key from the diff. |
|
|
133
|
+
|
|
134
|
+
**Diff semantics.** The producer renders two traces for the same
|
|
135
|
+
phase: one **with** the memory entry consulted, one **without**
|
|
136
|
+
(re-running the heuristic against `memory.hits` decremented by the
|
|
137
|
+
entry's contribution). The `affected` field is the sorted list of
|
|
138
|
+
keys above whose values differ between the two traces. Empty list
|
|
139
|
+
means "consulted but no key diverged" — the call was informational,
|
|
140
|
+
not load-bearing.
|
|
141
|
+
|
|
142
|
+
**Out of scope for v1.** Gradations beyond binary key-diverged /
|
|
143
|
+
not-diverged (overridden, combined, filtered). Tracked as a Phase-1-
|
|
144
|
+
gated revisit in the same Risk register.
|
|
145
|
+
|
|
116
146
|
## Privacy floor
|
|
117
147
|
|
|
118
148
|
- `memory.ids` carries opaque ids only — no entry bodies, no secrets.
|
|
@@ -205,6 +205,50 @@ that:
|
|
|
205
205
|
The dispatcher silently no-ops when called with `--platform copilot`;
|
|
206
206
|
the fallback is consumed by reading the rule, not by hook invocation.
|
|
207
207
|
|
|
208
|
+
## Fixture corpus — `tests/fixtures/hooks/`
|
|
209
|
+
|
|
210
|
+
Replay-safe, platform-native payloads. One JSON file per event in the
|
|
211
|
+
agent-config event vocabulary. Consumed by `./agent-config hooks:replay`
|
|
212
|
+
and by the dispatcher replay tests
|
|
213
|
+
(`tests/hooks/test_hooks_replay.py` — Phase 2.4c).
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
tests/fixtures/hooks/
|
|
217
|
+
session_start.json · session_end.json · user_prompt_submit.json
|
|
218
|
+
pre_tool_use.json · post_tool_use.json · stop.json
|
|
219
|
+
pre_compact.json · agent_error.json
|
|
220
|
+
README.md — corpus contract + platform-shape table
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Each fixture is a **stdin payload** — the dispatcher wraps it via
|
|
224
|
+
`_build_envelope` before handing it to a concern. Required keys:
|
|
225
|
+
|
|
226
|
+
- Valid JSON object at the top level.
|
|
227
|
+
- `session_id` — string, non-empty (drives feedback dir naming).
|
|
228
|
+
- Event-specific fields realistic enough that the bound concerns
|
|
229
|
+
(`chat-history`, `roadmap-progress`, `context-hygiene`,
|
|
230
|
+
`verify-before-complete`, `minimal-safe-diff`) run without raising
|
|
231
|
+
— primarily `tool_name` (for `*_tool_use`), `prompt` (for
|
|
232
|
+
`user_prompt_submit`).
|
|
233
|
+
- No real user content. Committed alongside source; the redaction
|
|
234
|
+
workflow in [`hook-payload-capture`](../hook-payload-capture.md)
|
|
235
|
+
applies to **captured** payloads, not committed fixtures.
|
|
236
|
+
|
|
237
|
+
The corpus is platform-shape-representative, not platform-exhaustive
|
|
238
|
+
— multi-platform shape coverage lives in
|
|
239
|
+
`tests/hooks/test_event_shape_contract.py`. The replay test asserts
|
|
240
|
+
1:1 mapping between `EVENT_VOCABULARY` and this directory.
|
|
241
|
+
|
|
242
|
+
## Replay mode — `AGENT_CONFIG_REPLAY=1`
|
|
243
|
+
|
|
244
|
+
Concerns that write under `agents/state/` MUST honor the
|
|
245
|
+
`AGENT_CONFIG_REPLAY` env var: when set to `1`, skip all state
|
|
246
|
+
mutations and run as read-only. The dispatcher passes the env var
|
|
247
|
+
through to subprocess concerns unchanged. Concerns that do not honor
|
|
248
|
+
the flag are listed by `./agent-config hooks:doctor` as not
|
|
249
|
+
replay-safe; replay tests assert no `agents/state/` mutation
|
|
250
|
+
post-invocation.
|
|
251
|
+
|
|
208
252
|
## Stability
|
|
209
253
|
|
|
210
254
|
Beta. Breaking changes between v1 and v2 are allowed in a minor
|
|
@@ -218,3 +262,5 @@ majors.
|
|
|
218
262
|
operational how-to for capturing redacted live payloads to upgrade
|
|
219
263
|
a platform's chat-history extractor from `docs-verified` to
|
|
220
264
|
`payload-verified`.
|
|
265
|
+
- [`tests/fixtures/hooks/README.md`](../../tests/fixtures/hooks/README.md)
|
|
266
|
+
— fixture corpus contract.
|
|
@@ -24,6 +24,7 @@ and a single space:
|
|
|
24
24
|
|
|
25
25
|
```
|
|
26
26
|
🧠 Memory: <hits>/<asks> · ids=[<comma-separated-ids>]
|
|
27
|
+
🧠 Memory: <hits>/<asks> · ids=[<comma-separated-ids>] · affected: <keys>
|
|
27
28
|
```
|
|
28
29
|
|
|
29
30
|
Examples:
|
|
@@ -32,6 +33,8 @@ Examples:
|
|
|
32
33
|
🧠 Memory: 3/4 · ids=[mem_42, mem_57, mem_91]
|
|
33
34
|
🧠 Memory: 0/2 · ids=[]
|
|
34
35
|
🧠 Memory: 5/5 · ids=[mem_a01, mem_a02, mem_a03, …+2]
|
|
36
|
+
🧠 Memory: 3/4 · ids=[mem_42, mem_57] · affected: confidence_band,applied_rules
|
|
37
|
+
🧠 Memory: 2/4 · ids=[mem_42] · affected: none
|
|
35
38
|
```
|
|
36
39
|
|
|
37
40
|
Cap at 5 ids inline; remainder rendered as `…+N`. The full id list
|
|
@@ -45,10 +48,15 @@ lives in the decision-trace JSON
|
|
|
45
48
|
| `hits` | Count of `memory_retrieve_*` calls during this turn that returned ≥ 1 entry. |
|
|
46
49
|
| `asks` | Count of `memory_retrieve_*` calls during this turn — both successful and empty. |
|
|
47
50
|
| `ids` | Stable memory entry ids returned across all calls, deduped, ordered by retrieval timestamp. |
|
|
51
|
+
| `affected` | Optional trailing segment. Comma-separated list of decision-trace keys that diverged when this memory was consulted vs not consulted. Closed key list defined in [`decision-trace-v1.md § Memory consequence keys`](decision-trace-v1.md#memory-consequence-keys). Rendered as `none` when `hits ≥ 1` but no key diverged. Omitted entirely when `hits == 0` or when the producer cannot compute a counterfactual trace. |
|
|
48
52
|
|
|
49
53
|
`hits ≤ asks` is invariant. If `asks == 0`, the engine MUST suppress
|
|
50
54
|
the line entirely — no `0/0` noise.
|
|
51
55
|
|
|
56
|
+
The `affected` segment is a forward-compat trailing extension per
|
|
57
|
+
the Stability clause below — clients pinned to the segment-free
|
|
58
|
+
shape MUST still parse the line.
|
|
59
|
+
|
|
52
60
|
## Privacy floor
|
|
53
61
|
|
|
54
62
|
The visibility line and the JSON it derives from MUST NOT contain:
|
|
@@ -88,6 +96,31 @@ counts and ids for downstream metrics.
|
|
|
88
96
|
Cost-profile lookup respects `.agent-settings.yml`'s `cost_profile`
|
|
89
97
|
key. Default is `standard`.
|
|
90
98
|
|
|
99
|
+
## End-of-run "Memory changed decisions" block
|
|
100
|
+
|
|
101
|
+
When the visibility line carries a non-empty `affected` segment, the
|
|
102
|
+
engine MUST also append a structured block at the end of the run's
|
|
103
|
+
report surface so reviewers can audit attribution without parsing
|
|
104
|
+
the inline segment:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
Memory changed decisions:
|
|
108
|
+
- mem_42 → confidence_band
|
|
109
|
+
- mem_57 → confidence_band
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Rules:
|
|
113
|
+
|
|
114
|
+
- Suppressed entirely when `affected` is empty or absent (no key
|
|
115
|
+
diverged, or memory was not consulted).
|
|
116
|
+
- Each consulted id from the visibility line's `ids` is paired with
|
|
117
|
+
each affected key. v1 attribution is aggregate; per-id attribution
|
|
118
|
+
is a follow-up risk tracked in the roadmap Risk register.
|
|
119
|
+
- Block heading is the literal string `Memory changed decisions:`
|
|
120
|
+
followed by `-` bullet lines in `<id> → <key>` shape.
|
|
121
|
+
- Implementation: `format_changed_decisions_block` in
|
|
122
|
+
`work_engine/scoring/memory_visibility.py`.
|
|
123
|
+
|
|
91
124
|
## Audit-as-memory feed
|
|
92
125
|
|
|
93
126
|
The visibility output produced by the engine is the input to the
|