@event4u/agent-config 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/.agent-src/commands/council/default.md +74 -76
  2. package/.agent-src/commands/feature/roadmap.md +22 -0
  3. package/.agent-src/commands/roadmap/create.md +38 -6
  4. package/.agent-src/commands/roadmap/execute.md +36 -9
  5. package/.agent-src/rules/agent-authority.md +1 -0
  6. package/.agent-src/rules/agent-docs.md +1 -0
  7. package/.agent-src/rules/analysis-skill-routing.md +1 -0
  8. package/.agent-src/rules/architecture.md +1 -0
  9. package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
  10. package/.agent-src/rules/artifact-engagement-recording.md +1 -0
  11. package/.agent-src/rules/ask-when-uncertain.md +1 -0
  12. package/.agent-src/rules/augment-portability.md +1 -0
  13. package/.agent-src/rules/augment-source-of-truth.md +1 -0
  14. package/.agent-src/rules/autonomous-execution.md +1 -0
  15. package/.agent-src/rules/capture-learnings.md +1 -0
  16. package/.agent-src/rules/chat-history-cadence.md +34 -0
  17. package/.agent-src/rules/chat-history-ownership.md +1 -0
  18. package/.agent-src/rules/chat-history-visibility.md +1 -0
  19. package/.agent-src/rules/cli-output-handling.md +2 -2
  20. package/.agent-src/rules/command-suggestion-policy.md +1 -0
  21. package/.agent-src/rules/commit-conventions.md +1 -0
  22. package/.agent-src/rules/commit-policy.md +1 -0
  23. package/.agent-src/rules/context-hygiene.md +28 -0
  24. package/.agent-src/rules/direct-answers.md +18 -26
  25. package/.agent-src/rules/docker-commands.md +1 -0
  26. package/.agent-src/rules/docs-sync.md +1 -0
  27. package/.agent-src/rules/downstream-changes.md +1 -0
  28. package/.agent-src/rules/e2e-testing.md +1 -0
  29. package/.agent-src/rules/guidelines.md +1 -0
  30. package/.agent-src/rules/improve-before-implement.md +1 -0
  31. package/.agent-src/rules/language-and-tone.md +1 -0
  32. package/.agent-src/rules/laravel-translations.md +1 -0
  33. package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
  34. package/.agent-src/rules/minimal-safe-diff.md +1 -0
  35. package/.agent-src/rules/missing-tool-handling.md +1 -0
  36. package/.agent-src/rules/model-recommendation.md +1 -0
  37. package/.agent-src/rules/no-cheap-questions.md +15 -21
  38. package/.agent-src/rules/no-roadmap-references.md +1 -0
  39. package/.agent-src/rules/non-destructive-by-default.md +1 -0
  40. package/.agent-src/rules/onboarding-gate.md +33 -0
  41. package/.agent-src/rules/package-ci-checks.md +1 -0
  42. package/.agent-src/rules/php-coding.md +1 -0
  43. package/.agent-src/rules/preservation-guard.md +1 -0
  44. package/.agent-src/rules/review-routing-awareness.md +1 -0
  45. package/.agent-src/rules/reviewer-awareness.md +1 -0
  46. package/.agent-src/rules/roadmap-progress-sync.md +49 -0
  47. package/.agent-src/rules/role-mode-adherence.md +2 -2
  48. package/.agent-src/rules/rule-type-governance.md +29 -0
  49. package/.agent-src/rules/runtime-safety.md +1 -0
  50. package/.agent-src/rules/scope-control.md +1 -0
  51. package/.agent-src/rules/security-sensitive-stop.md +1 -0
  52. package/.agent-src/rules/size-enforcement.md +1 -0
  53. package/.agent-src/rules/skill-improvement-trigger.md +1 -0
  54. package/.agent-src/rules/skill-quality.md +1 -0
  55. package/.agent-src/rules/slash-command-routing-policy.md +39 -0
  56. package/.agent-src/rules/think-before-action.md +1 -0
  57. package/.agent-src/rules/token-efficiency.md +1 -0
  58. package/.agent-src/rules/tool-safety.md +1 -0
  59. package/.agent-src/rules/ui-audit-gate.md +1 -0
  60. package/.agent-src/rules/upstream-proposal.md +1 -0
  61. package/.agent-src/rules/user-interaction.md +1 -0
  62. package/.agent-src/rules/verify-before-complete.md +1 -0
  63. package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
  64. package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
  65. package/.agent-src/templates/agent-settings.md +16 -0
  66. package/.agent-src/templates/roadmaps.md +12 -3
  67. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
  68. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
  69. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
  70. package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
  71. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
  72. package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
  73. package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
  74. package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
  75. package/.claude-plugin/marketplace.json +1 -1
  76. package/CHANGELOG.md +97 -0
  77. package/README.md +20 -20
  78. package/config/agent-settings.template.yml +23 -0
  79. package/docs/architecture.md +1 -1
  80. package/docs/catalog.md +5 -2
  81. package/docs/contracts/adr-settings-sync-engine.md +127 -0
  82. package/docs/contracts/decision-trace-v1.md +146 -0
  83. package/docs/contracts/file-ownership-matrix.json +7 -0
  84. package/docs/contracts/hook-architecture-v1.md +213 -0
  85. package/docs/contracts/load-context-budget-model.md +80 -0
  86. package/docs/contracts/load-context-schema.md +20 -0
  87. package/docs/contracts/memory-visibility-v1.md +138 -0
  88. package/docs/contracts/one-off-script-lifecycle.md +109 -0
  89. package/docs/contracts/roadmap-complexity-standard.md +137 -0
  90. package/docs/contracts/rule-interactions.yml +22 -0
  91. package/docs/customization.md +1 -0
  92. package/docs/development.md +4 -1
  93. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
  94. package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
  95. package/docs/guidelines/agent-infra/layered-settings.md +32 -13
  96. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
  97. package/package.json +1 -1
  98. package/scripts/agent-config +64 -0
  99. package/scripts/ai_council/bundler.py +3 -3
  100. package/scripts/ai_council/clients.py +24 -8
  101. package/scripts/ai_council/one_off_archive/2026-05/README.md +67 -0
  102. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
  103. package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +13 -8
  104. package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
  105. package/scripts/ai_council/session.py +92 -0
  106. package/scripts/build_rule_trigger_matrix.py +360 -0
  107. package/scripts/capture_showcase_session.py +361 -0
  108. package/scripts/chat_history.py +11 -1
  109. package/scripts/check_always_budget.py +46 -2
  110. package/scripts/check_one_off_location.py +81 -0
  111. package/scripts/check_references.py +6 -0
  112. package/scripts/compress.py +5 -2
  113. package/scripts/context_hygiene_hook.py +181 -0
  114. package/scripts/council_cli.py +357 -0
  115. package/scripts/hook_manifest.yaml +184 -0
  116. package/scripts/hooks/__init__.py +1 -0
  117. package/scripts/hooks/augment-context-hygiene.sh +55 -0
  118. package/scripts/hooks/augment-dispatcher.sh +72 -0
  119. package/scripts/hooks/augment-onboarding-gate.sh +55 -0
  120. package/scripts/hooks/cline-dispatcher.sh +86 -0
  121. package/scripts/hooks/cursor-dispatcher.sh +76 -0
  122. package/scripts/hooks/dispatch_hook.py +348 -0
  123. package/scripts/hooks/envelope.py +98 -0
  124. package/scripts/hooks/gemini-dispatcher.sh +117 -0
  125. package/scripts/hooks/state_io.py +122 -0
  126. package/scripts/hooks/windsurf-dispatcher.sh +123 -0
  127. package/scripts/hooks_status.py +146 -0
  128. package/scripts/install.py +728 -51
  129. package/scripts/install.sh +1 -1
  130. package/scripts/lint_examples.py +98 -0
  131. package/scripts/lint_hook_manifest.py +216 -0
  132. package/scripts/lint_one_off_age.py +184 -0
  133. package/scripts/lint_roadmap_complexity.py +127 -0
  134. package/scripts/lint_rule_tiers.py +78 -0
  135. package/scripts/lint_showcase_sessions.py +148 -0
  136. package/scripts/minimal_safe_diff_hook.py +245 -0
  137. package/scripts/onboarding_gate_hook.py +142 -0
  138. package/scripts/readme_linter.py +12 -3
  139. package/scripts/roadmap_progress_hook.py +5 -0
  140. package/scripts/schemas/rule.schema.json +5 -0
  141. package/scripts/sync_agent_settings.py +32 -129
  142. package/scripts/sync_yaml_rt.py +734 -0
  143. package/scripts/verify_before_complete_hook.py +216 -0
  144. /package/scripts/ai_council/{_one_off_2a4_acceptance.py → one_off_archive/2026-05/_one_off_2a4_acceptance.py} +0 -0
  145. /package/scripts/ai_council/{_one_off_context_layer_v1_estimate.py → one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py} +0 -0
  146. /package/scripts/ai_council/{_one_off_context_layer_v1_review.py → one_off_archive/2026-05/_one_off_context_layer_v1_review.py} +0 -0
  147. /package/scripts/ai_council/{_one_off_followups_review.py → one_off_archive/2026-05/_one_off_followups_review.py} +0 -0
  148. /package/scripts/ai_council/{_one_off_nondestructive_inline_audit.py → one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py} +0 -0
  149. /package/scripts/{_one_off_phase4_dispatch_latency.py → ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py} +0 -0
  150. /package/scripts/{_one_off_phase6_trigger_jaccard.py → ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py} +0 -0
  151. /package/scripts/ai_council/{_one_off_phase_2a_budget_rebalance.py → one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py} +0 -0
  152. /package/scripts/ai_council/{_one_off_phase_2a_post_revert.py → one_off_archive/2026-05/_one_off_phase_2a_post_revert.py} +0 -0
  153. /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
  154. /package/scripts/ai_council/{_one_off_rule_hardening_v1.py → one_off_archive/2026-05/_one_off_rule_hardening_v1.py} +0 -0
  155. /package/scripts/ai_council/{_one_off_structural_open_questions.py → one_off_archive/2026-05/_one_off_structural_open_questions.py} +0 -0
  156. /package/scripts/ai_council/{_one_off_structural_optimization.py → one_off_archive/2026-05/_one_off_structural_optimization.py} +0 -0
  157. /package/scripts/ai_council/{_one_off_structural_v3_gaps.py → one_off_archive/2026-05/_one_off_structural_v3_gaps.py} +0 -0
  158. /package/scripts/ai_council/{_one_off_structural_v3_review.py → one_off_archive/2026-05/_one_off_structural_v3_review.py} +0 -0
@@ -117,11 +117,28 @@ Every roadmap follows this structure:
117
117
 
118
118
  ### Quality gates
119
119
 
120
- Every roadmap implicitly includes these gates (run after each step that changes code):
120
+ Every roadmap implicitly includes the project's quality pipeline
121
+ (static analysis, autofixes, tests). What's configurable is **when**
122
+ the pipeline runs during `/roadmap execute`, controlled by
123
+ `roadmap.quality_cadence` in `.agent-settings.yml`:
121
124
 
122
- - PHPStan must pass (detect command: artisan vs composer, see `rules/docker-commands.md`)
123
- - Rector: run with fix flag, verify no new PHPStan errors
124
- - Tests: run affected tests
125
+ | Cadence | Pipeline runs | Trade-off |
126
+ |---|---|---|
127
+ | `end_of_roadmap` (default) | Once before archiving | Fastest, fewest tokens; errors compound across phases |
128
+ | `per_phase` | After every completed phase + final | Balanced; catches drift at phase boundaries |
129
+ | `per_step` | After every completed step + final | Legacy verbose; highest token cost |
130
+
131
+ The default is `end_of_roadmap` because most steps are checkbox-only
132
+ content edits and a final pipeline run is the cheapest way to satisfy
133
+ `verify-before-complete`. Switch to `per_phase` for risky migrations or
134
+ unfamiliar codebases.
135
+
136
+ **Always-on, regardless of cadence:**
137
+
138
+ - Step checkboxes flip `[ ] → [x]` and the dashboard regenerates **same
139
+ response** (enforced by `roadmap-progress-sync`).
140
+ - Before any "roadmap complete" claim or archival, the pipeline runs
141
+ fresh (enforced by `verify-before-complete`).
125
142
 
126
143
  ### Step granularity
127
144
 
@@ -149,6 +166,14 @@ Every roadmap implicitly includes these gates (run after each step that changes
149
166
  the roadmap text. If the user declines, do **not** re-propose during
150
167
  `roadmap-execute`. Decline = silence. See [`scope-control`](../../rules/scope-control.md#decline--silence--no-re-asking-on-the-same-task).
151
168
  5. Save with a kebab-case filename (e.g. `optimize-webhook-jobs.md`).
169
+ **Before writing**, scan the entire roadmap namespace for a
170
+ collision — active, `archive/`, `skipped/`, and nested subdirs —
171
+ with `find agents/roadmaps -type f -iname "<name>.md"`. If any
172
+ hit comes back, stop and ask the user to rename, open the
173
+ existing file, or abort. Never silently overwrite an archived
174
+ or skipped roadmap. Detailed prompt in
175
+ [`commands/roadmap/create.md`](../../commands/roadmap/create.md)
176
+ step 6.
152
177
  6. Regenerate the dashboard so the new roadmap is included.
153
178
 
154
179
  ### Executing a roadmap
@@ -128,7 +128,12 @@ When reporting completion to the user:
128
128
  3. **Result** — numeric breakdown (tests passed/failed/skipped, errors,
129
129
  warnings)
130
130
  4. **Caveats** — anything the output flagged but you chose to accept
131
- 5. **Next step** — e.g. "Ready for `/commit`" or "Awaiting review"
131
+ 5. **Untracked files** — if `git status --short` shows any untracked
132
+ files in the working tree, list them verbatim in the report. This
133
+ prevents silently-shipped artefacts (logs, scratch scripts, ad-hoc
134
+ notes) from disappearing into a future commit. Empty list means
135
+ omit the section.
136
+ 6. **Next step** — e.g. "Ready for `/commit`" or "Awaiting review"
132
137
 
133
138
  ## Gotchas
134
139
 
@@ -188,3 +193,5 @@ Before sending a completion message:
188
193
  * [ ] No warnings or skips are hidden
189
194
  * [ ] Targeted tests green → full suite green → quality pipeline clean
190
195
  * [ ] `git status` reflects only the intended change set
196
+ * [ ] If `git status --short` shows untracked files, the report lists
197
+ them verbatim under "Untracked files"
@@ -217,6 +217,21 @@ pipelines:
217
217
  # Included by every cost_profile except `custom`.
218
218
  skill_improvement: true
219
219
 
220
+ # --- Roadmap execution ---
221
+ #
222
+ # Controls when /roadmap execute runs the project's quality pipeline.
223
+ # Step checkboxes and the dashboard are ALWAYS updated in the same
224
+ # response — that cadence is governed by `roadmap-progress-sync` and
225
+ # is non-negotiable. This setting only governs *quality tool runs*.
226
+ roadmap:
227
+ # When to run quality tools during /roadmap execute.
228
+ # end_of_roadmap = once, before archiving (default — fastest, fewest tokens)
229
+ # per_phase = once after every completed phase
230
+ # per_step = after every completed step (legacy; highest token cost)
231
+ # Iron Law `verify-before-complete` still applies — fresh output is
232
+ # mandatory before any "roadmap complete" claim, regardless of cadence.
233
+ quality_cadence: end_of_roadmap
234
+
220
235
  # --- Subagent orchestration ---
221
236
  subagents:
222
237
  # Model for implementer subagents (empty = same tier as the session model)
@@ -362,6 +377,7 @@ lives under `personal:` in YAML.
362
377
  | `hooks.chat_history.enabled` | `true`, `false` | `true` | Register the four chat-history hooks (turn-check, append, halt-append, heartbeat). Gated by **both** this flag AND `chat_history.enabled`; either off → no chat-history hook registers. |
363
378
  | `hooks.chat_history.script` | path | `scripts/chat_history.py` | Override path to the chat-history CLI. Set only when the script lives outside the standard location. |
364
379
  | `pipelines.skill_improvement` | `true`, `false` | `true` | When `true`: propose learning capture after meaningful tasks. When `false`: silent. Included in every profile except `custom`. |
380
+ | `roadmap.quality_cadence` | `end_of_roadmap`, `per_phase`, `per_step` | `end_of_roadmap` | When `/roadmap execute` runs the project's quality pipeline. Default skips per-step / per-phase runs and gates only the final archival. `per_phase` runs once after every phase; `per_step` is the legacy verbose mode. Step checkboxes and the dashboard are always updated regardless. `verify-before-complete` still requires fresh output before any "roadmap complete" claim. |
365
381
  | `subagents.implementer_model` | model alias or empty | _(empty)_ | Model for implementer subagents. Empty = same tier as session model. See [subagent-configuration](../contexts/subagent-configuration.md). |
366
382
  | `subagents.judge_model` | model alias or empty | _(empty)_ | Model for judge subagents. Empty = one tier above implementer (opus if sonnet, sonnet if haiku). |
367
383
  | `subagents.max_parallel` | integer | `3` | Maximum parallel subagent invocations. `1` serializes. |
@@ -39,11 +39,16 @@ Templates for roadmap files stored in `agents/roadmaps/` or `app/Modules/{Module
39
39
 
40
40
  ---
41
41
 
42
- ## Quality Gates (always apply)
42
+ ## Quality Gates (always apply at completion)
43
43
 
44
- Every roadmap must pass these before it is considered done:
44
+ Every roadmap must pass the project's quality pipeline before it is
45
+ considered done. **When** the pipeline runs during `/roadmap execute` is
46
+ governed by `roadmap.quality_cadence` in `.agent-settings.yml`
47
+ (`end_of_roadmap` default → once before archival; `per_phase` → after
48
+ every phase; `per_step` → after every step). Either way, a final fresh
49
+ run is mandatory before "complete" per `verify-before-complete`.
45
50
 
46
- Run the project's quality pipeline and test suite. Common commands:
51
+ Common commands:
47
52
 
48
53
  ```bash
49
54
  # PHP projects (inside Docker container if applicable)
@@ -64,6 +69,10 @@ Check `AGENTS.md` or `Makefile` / `Taskfile.yml` for the exact commands.
64
69
  Copy the structure below into a new file:
65
70
 
66
71
  ```markdown
72
+ ---
73
+ complexity: lightweight
74
+ ---
75
+
67
76
  # Roadmap: {Short descriptive title}
68
77
 
69
78
  > {One sentence: What is the expected outcome?}
@@ -19,8 +19,10 @@ from .hooks.builtin import (
19
19
  ChatHistoryHaltAppendHook,
20
20
  ChatHistoryHeartbeatHook,
21
21
  ChatHistoryTurnCheckHook,
22
+ DecisionTraceHook,
22
23
  DirectiveSetGuardHook,
23
24
  HaltSurfaceAuditHook,
25
+ MemoryVisibilityHook,
24
26
  StateShapeValidationHook,
25
27
  TraceHook,
26
28
  )
@@ -56,6 +58,13 @@ def _build_hook_registry(args: argparse.Namespace) -> HookRegistry:
56
58
  StateShapeValidationHook().register(registry)
57
59
  if settings.directive_set_guard:
58
60
  DirectiveSetGuardHook().register(registry)
61
+ if settings.decision_trace:
62
+ DecisionTraceHook().register(registry)
63
+ if settings.memory_visibility:
64
+ MemoryVisibilityHook(
65
+ cost_profile=settings.cost_profile,
66
+ visibility_off=settings.memory_visibility_off,
67
+ ).register(registry)
59
68
  if settings.chat_history_enabled:
60
69
  _register_chat_history_hooks(registry, settings)
61
70
 
@@ -24,8 +24,10 @@ from .builtin import (
24
24
  ChatHistoryHaltAppendHook,
25
25
  ChatHistoryHeartbeatHook,
26
26
  ChatHistoryTurnCheckHook,
27
+ DecisionTraceHook,
27
28
  DirectiveSetGuardHook,
28
29
  HaltSurfaceAuditHook,
30
+ MemoryVisibilityHook,
29
31
  StateShapeValidationHook,
30
32
  TraceHook,
31
33
  )
@@ -40,6 +42,7 @@ __all__ = [
40
42
  "ChatHistoryHaltAppendHook",
41
43
  "ChatHistoryHeartbeatHook",
42
44
  "ChatHistoryTurnCheckHook",
45
+ "DecisionTraceHook",
43
46
  "DirectiveSetGuardHook",
44
47
  "HaltSurfaceAuditHook",
45
48
  "HookCallback",
@@ -49,6 +52,7 @@ __all__ = [
49
52
  "HookHalt",
50
53
  "HookRegistry",
51
54
  "HookRunner",
55
+ "MemoryVisibilityHook",
52
56
  "StateShapeValidationHook",
53
57
  "TraceHook",
54
58
  ]
@@ -15,8 +15,10 @@ from .chat_history_append import ChatHistoryAppendHook
15
15
  from .chat_history_halt_append import ChatHistoryHaltAppendHook
16
16
  from .chat_history_heartbeat import ChatHistoryHeartbeatHook
17
17
  from .chat_history_turn_check import ChatHistoryTurnCheckHook
18
+ from .decision_trace import DecisionTraceHook
18
19
  from .directive_set_guard import DirectiveSetGuardHook
19
20
  from .halt_surface_audit import HaltSurfaceAuditHook
21
+ from .memory_visibility import MemoryVisibilityHook
20
22
  from .state_shape_validation import StateShapeValidationHook
21
23
  from .trace import TraceHook
22
24
 
@@ -25,8 +27,10 @@ __all__ = [
25
27
  "ChatHistoryHaltAppendHook",
26
28
  "ChatHistoryHeartbeatHook",
27
29
  "ChatHistoryTurnCheckHook",
30
+ "DecisionTraceHook",
28
31
  "DirectiveSetGuardHook",
29
32
  "HaltSurfaceAuditHook",
33
+ "MemoryVisibilityHook",
30
34
  "StateShapeValidationHook",
31
35
  "TraceHook",
32
36
  ]
@@ -0,0 +1,163 @@
1
+ """``DecisionTraceHook`` — emit a decision-trace JSON per phase.
2
+
3
+ Implements the v1 envelope from ``docs/contracts/decision-trace-v1.md``.
4
+ Default-off; opt-in via ``.agent-settings.yml``
5
+ ``decision_engine.surface_traces: true`` (mirrored into
6
+ ``hooks.decision_trace.enabled`` by :mod:`work_engine.hooks.settings`).
7
+
8
+ The hook is purely observational — it never mutates ``DeliveryState``,
9
+ never raises terminal errors. Stream / disk failures surface as
10
+ :class:`HookError` (non-fatal per the three-tier contract).
11
+
12
+ Trace layout (matches the contract):
13
+
14
+ * ``schema_version: 1``
15
+ * ``work_id`` — derived from the state-file directory name when the
16
+ caller follows the ``agents/state/work/<id>/state.json`` convention,
17
+ else from the state-file stem.
18
+ * ``phase`` — engine ``step_name`` (refine/memory/.../report).
19
+ * ``started_at`` / ``ended_at`` — ISO-8601 UTC timestamps captured on
20
+ ``BEFORE_STEP`` and ``AFTER_STEP``.
21
+ * ``confidence_band`` / ``risk_class`` — heuristics defined in
22
+ :mod:`work_engine.scoring.decision_trace`.
23
+ * ``rules`` — empty by default; the engine layer populates rule
24
+ applications when concerns wire into the trace bus (later phase).
25
+ * ``memory`` — counts and ids snapshotted from ``state.memory``.
26
+ * ``verify`` — claims/first-try-passes derived from ``state.verify``.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import time
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+ from typing import Any
35
+
36
+ from ...scoring.decision_trace import (
37
+ derive_confidence_band,
38
+ derive_risk_class,
39
+ summarise_memory,
40
+ summarise_verify,
41
+ )
42
+ from ..context import HookContext
43
+ from ..events import HookEvent
44
+ from ..exceptions import HookError
45
+ from ..registry import HookRegistry
46
+
47
+ SCHEMA_VERSION = 1
48
+ _MAX_MEMORY_IDS = 32
49
+
50
+
51
+ class DecisionTraceHook:
52
+ """Emit one decision-trace JSON file per dispatcher step.
53
+
54
+ Parameters
55
+ ----------
56
+ output_dir:
57
+ Optional override for the trace destination. When ``None`` the
58
+ hook writes alongside the WorkState file: if the state file
59
+ sits under ``agents/state/work/<id>/state.json`` the trace
60
+ lands at ``agents/state/work/<id>/decision-trace-<phase>.json``;
61
+ otherwise the trace lands next to the state file as
62
+ ``<stem>.decision-trace-<phase>.json``.
63
+ """
64
+
65
+ def __init__(self, output_dir: Path | None = None) -> None:
66
+ self._output_dir = output_dir
67
+ self._state_file: Path | None = None
68
+ self._step_started: dict[str, float] = {}
69
+
70
+ def register(self, registry: HookRegistry) -> None:
71
+ """Register the trace callbacks on the lifecycle events used."""
72
+ registry.register(HookEvent.BEFORE_LOAD, self._capture_state_file)
73
+ registry.register(HookEvent.AFTER_LOAD, self._capture_state_file)
74
+ registry.register(HookEvent.BEFORE_STEP, self._mark_step_start)
75
+ registry.register(HookEvent.AFTER_STEP, self._emit_trace)
76
+
77
+ # -- lifecycle callbacks ------------------------------------------
78
+
79
+ def _capture_state_file(self, ctx: HookContext) -> None:
80
+ if ctx.state_file is not None:
81
+ self._state_file = Path(ctx.state_file)
82
+
83
+ def _mark_step_start(self, ctx: HookContext) -> None:
84
+ if ctx.step_name:
85
+ self._step_started[ctx.step_name] = time.time()
86
+
87
+ def _emit_trace(self, ctx: HookContext) -> None:
88
+ if not ctx.step_name:
89
+ return
90
+ started = self._step_started.pop(ctx.step_name, time.time())
91
+ envelope = self._build_envelope(ctx, started)
92
+ target = self._target_path(ctx.step_name)
93
+ try:
94
+ target.parent.mkdir(parents=True, exist_ok=True)
95
+ target.write_text(
96
+ json.dumps(envelope, indent=2, sort_keys=False) + "\n",
97
+ encoding="utf-8",
98
+ )
99
+ except OSError as exc:
100
+ raise HookError(f"decision-trace write failed: {exc}") from exc
101
+
102
+ # -- envelope construction ----------------------------------------
103
+
104
+ def _build_envelope(
105
+ self, ctx: HookContext, started: float,
106
+ ) -> dict[str, Any]:
107
+ delivery = ctx.delivery
108
+ memory = summarise_memory(
109
+ getattr(delivery, "memory", None),
110
+ limit=_MAX_MEMORY_IDS,
111
+ )
112
+ verify = summarise_verify(getattr(delivery, "verify", None))
113
+ ambiguity = bool(getattr(delivery, "questions", None))
114
+ return {
115
+ "schema_version": SCHEMA_VERSION,
116
+ "work_id": self._work_id(),
117
+ "phase": ctx.step_name,
118
+ "started_at": _iso_utc(started),
119
+ "ended_at": _iso_utc(time.time()),
120
+ "confidence_band": derive_confidence_band(
121
+ memory_hits=memory["hits"],
122
+ verify_claims=verify["claims"],
123
+ verify_first_try_passes=verify["first_try_passes"],
124
+ ambiguity_flag=ambiguity,
125
+ ),
126
+ "risk_class": derive_risk_class(
127
+ getattr(delivery, "changes", None),
128
+ ),
129
+ "rules": [],
130
+ "memory": memory,
131
+ "verify": verify,
132
+ }
133
+
134
+ # -- path helpers --------------------------------------------------
135
+
136
+ def _work_id(self) -> str:
137
+ if self._state_file is None:
138
+ return "unknown"
139
+ parent = self._state_file.parent
140
+ if parent.name and parent.parent.name == "work":
141
+ return parent.name
142
+ return self._state_file.stem
143
+
144
+ def _target_path(self, phase: str) -> Path:
145
+ filename = f"decision-trace-{phase}.json"
146
+ if self._output_dir is not None:
147
+ return self._output_dir / filename
148
+ if self._state_file is None:
149
+ return Path(filename)
150
+ parent = self._state_file.parent
151
+ if parent.name and parent.parent.name == "work":
152
+ return parent / filename
153
+ return parent / f"{self._state_file.stem}.{filename}"
154
+
155
+
156
+ def _iso_utc(epoch: float) -> str:
157
+ return (
158
+ datetime.fromtimestamp(epoch, tz=timezone.utc)
159
+ .strftime("%Y-%m-%dT%H:%M:%SZ")
160
+ )
161
+
162
+
163
+ __all__ = ["DecisionTraceHook", "SCHEMA_VERSION"]
@@ -0,0 +1,111 @@
1
+ """``MemoryVisibilityHook`` — emit the visibility line on save.
2
+
3
+ Implements the producer side of
4
+ ``docs/contracts/memory-visibility-v1.md``: derive ``asks/hits/ids``
5
+ from ``state.memory`` and thread the rendered line into
6
+ ``state.report`` so the agent's reply naturally carries the memory
7
+ heartbeat.
8
+
9
+ Fires on ``before_save`` for the same reason as
10
+ ``ChatHistoryHeartbeatHook``: ``cli._sync_back`` runs between
11
+ ``after_dispatch`` and ``before_save`` and reassigns
12
+ ``work.report = delivery.report``. A line written on
13
+ ``after_dispatch`` would be overwritten before ``_save``; firing on
14
+ ``before_save`` lands after the sync.
15
+
16
+ Default-off; opt-in via ``.agent-settings.yml``
17
+ ``hooks.memory_visibility.enabled: true`` (or implicitly when
18
+ ``memory.visibility`` is not ``off`` and the master switch is on).
19
+ The hook is purely observational: failures surface as
20
+ :class:`HookError` (non-fatal per the three-tier contract); the
21
+ engine never crashes on a visibility-line write.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ from typing import Any, Iterable
26
+
27
+ from ...scoring.memory_visibility import (
28
+ DEFAULT_ASKED_TYPES,
29
+ format_line,
30
+ should_emit,
31
+ summarise_visibility,
32
+ )
33
+ from ..context import HookContext
34
+ from ..events import HookEvent
35
+ from ..exceptions import HookError
36
+ from ..registry import HookRegistry
37
+
38
+
39
+ class MemoryVisibilityHook:
40
+ """Thread the ``🧠 Memory: <hits>/<asks> · ids=[…]`` line into the report.
41
+
42
+ Parameters
43
+ ----------
44
+ cost_profile:
45
+ Cadence profile from ``.agent-settings.yml`` (``lean`` /
46
+ ``standard`` / ``verbose``). ``lean`` suppresses the line
47
+ unless ``asks ≥ 3`` per the contract's cadence table.
48
+ visibility_off:
49
+ When ``True``, the hook stays silent — used to mirror
50
+ ``memory.visibility: off`` in the consumer settings.
51
+ asked_types:
52
+ Optional override for the list of memory types treated as
53
+ ``asks`` in the visibility line. Defaults to the four types
54
+ the engine's memory step retrieves over.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ *,
60
+ cost_profile: str = "standard",
61
+ visibility_off: bool = False,
62
+ asked_types: Iterable[str] | None = None,
63
+ ) -> None:
64
+ self._cost_profile = cost_profile
65
+ self._visibility_off = visibility_off
66
+ self._asked_types = (
67
+ tuple(asked_types) if asked_types is not None else DEFAULT_ASKED_TYPES
68
+ )
69
+
70
+ def register(self, registry: HookRegistry) -> None:
71
+ """Register the visibility-line emitter on ``before_save``."""
72
+ registry.register(HookEvent.BEFORE_SAVE, self._on_before_save)
73
+
74
+ def _on_before_save(self, ctx: HookContext) -> None:
75
+ work = ctx.work
76
+ if work is None:
77
+ return
78
+ memory = getattr(work, "memory", None)
79
+ summary = summarise_visibility(memory, asked_types=self._asked_types)
80
+ if not should_emit(
81
+ summary,
82
+ cost_profile=self._cost_profile,
83
+ visibility_off=self._visibility_off,
84
+ ):
85
+ return
86
+ line = format_line(summary)
87
+ if not line:
88
+ return
89
+ existing = getattr(work, "report", "") or ""
90
+ if line in existing:
91
+ return
92
+ sep = "\n\n" if existing else ""
93
+ try:
94
+ work.report = f"{existing}{sep}{line}"
95
+ except AttributeError as exc:
96
+ raise HookError(
97
+ "memory-visibility: state.report not writable",
98
+ ) from exc
99
+
100
+
101
+ def derive_visibility(memory: Any) -> str | None:
102
+ """Convenience helper: render the line directly from a memory list.
103
+
104
+ Used by external callers (CLI ad-hoc smoke tests, the audit-as-
105
+ memory consumer) that have a ``memory`` list but no ``HookContext``.
106
+ Returns ``None`` when ``asks == 0``.
107
+ """
108
+ return format_line(summarise_visibility(memory))
109
+
110
+
111
+ __all__ = ["MemoryVisibilityHook", "derive_visibility"]
@@ -39,6 +39,10 @@ class HookSettings:
39
39
  halt_surface_audit: bool = False
40
40
  state_shape_validation: bool = False
41
41
  directive_set_guard: bool = False
42
+ decision_trace: bool = False
43
+ memory_visibility: bool = False
44
+ memory_visibility_off: bool = False
45
+ cost_profile: str = "standard"
42
46
  chat_history_enabled: bool = False
43
47
  chat_history_script: str = DEFAULT_CHAT_HISTORY_SCRIPT
44
48
 
@@ -102,6 +106,34 @@ def _settings_from_raw(data: dict[str, Any]) -> HookSettings:
102
106
  and _coerce_bool(global_chat.get("enabled"), False)
103
107
  )
104
108
 
109
+ decision_engine = data.get("decision_engine")
110
+ decision_trace_on = (
111
+ isinstance(decision_engine, dict)
112
+ and _coerce_bool(decision_engine.get("surface_traces"), False)
113
+ )
114
+
115
+ memory_section = data.get("memory")
116
+ visibility_off = False
117
+ if isinstance(memory_section, dict):
118
+ raw = memory_section.get("visibility")
119
+ if isinstance(raw, str) and raw.strip().lower() == "off":
120
+ visibility_off = True
121
+ elif isinstance(raw, bool) and raw is False:
122
+ visibility_off = True
123
+
124
+ memory_hooks = hooks.get("memory_visibility")
125
+ if isinstance(memory_hooks, dict):
126
+ memory_visibility_on = _coerce_bool(
127
+ memory_hooks.get("enabled"), True,
128
+ )
129
+ else:
130
+ memory_visibility_on = True
131
+
132
+ cost_profile_raw = data.get("cost_profile") or "standard"
133
+ cost_profile = (
134
+ str(cost_profile_raw).strip().lower() or "standard"
135
+ )
136
+
105
137
  return HookSettings(
106
138
  enabled=True,
107
139
  trace=_coerce_bool(hooks.get("trace"), False),
@@ -114,6 +146,10 @@ def _settings_from_raw(data: dict[str, Any]) -> HookSettings:
114
146
  directive_set_guard=_coerce_bool(
115
147
  hooks.get("directive_set_guard"), True
116
148
  ),
149
+ decision_trace=decision_trace_on,
150
+ memory_visibility=memory_visibility_on,
151
+ memory_visibility_off=visibility_off,
152
+ cost_profile=cost_profile,
117
153
  chat_history_enabled=chat_block_enabled and global_chat_on,
118
154
  chat_history_script=chat_script,
119
155
  )
@@ -0,0 +1,141 @@
1
+ """Confidence-band + risk-class heuristics for decision-trace v1.
2
+
3
+ These heuristics back the JSON envelope emitted by
4
+ :class:`work_engine.hooks.builtin.DecisionTraceHook`. They live here
5
+ (under ``scoring/``) so the rules and the hook share a single source
6
+ of truth, and so unit tests can exercise the heuristics without
7
+ spinning up a dispatcher.
8
+
9
+ Confidence-band heuristic (per
10
+ ``docs/contracts/decision-trace-v1.md``):
11
+
12
+ * ``high`` — ``memory.hits ≥ 2`` AND
13
+ ``verify.first_try_passes == verify.claims`` AND no ambiguity flag.
14
+ * ``medium`` — ``memory.hits ≥ 1`` OR ``verify.first_try_passes ≥ 1``.
15
+ * ``low`` — otherwise.
16
+
17
+ Edge case: ``verify.claims == 0`` is **not** ``high`` by default; it
18
+ folds into ``medium`` if at least one memory hit landed, ``low``
19
+ otherwise.
20
+
21
+ Risk-class heuristic: maximum risk across the files the phase
22
+ touched. With no file-ownership matrix wired in yet, the
23
+ implementation defaults to ``low`` and exposes a ``files`` argument
24
+ so a future hook can pass concrete paths. If the phase touched any
25
+ files at all the heuristic returns ``medium`` so reviewers stay
26
+ nudged toward a closer look until the matrix lands.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ from typing import Any, Iterable
31
+
32
+ BAND_HIGH = "high"
33
+ BAND_MEDIUM = "medium"
34
+ BAND_LOW = "low"
35
+
36
+ RISK_HIGH = "high"
37
+ RISK_MEDIUM = "medium"
38
+ RISK_LOW = "low"
39
+
40
+
41
+ def derive_confidence_band(
42
+ *,
43
+ memory_hits: int,
44
+ verify_claims: int,
45
+ verify_first_try_passes: int,
46
+ ambiguity_flag: bool,
47
+ ) -> str:
48
+ """Return ``high`` / ``medium`` / ``low`` per the v1 heuristic."""
49
+ if (
50
+ memory_hits >= 2
51
+ and verify_claims > 0
52
+ and verify_first_try_passes == verify_claims
53
+ and not ambiguity_flag
54
+ ):
55
+ return BAND_HIGH
56
+ if memory_hits >= 1 or verify_first_try_passes >= 1:
57
+ return BAND_MEDIUM
58
+ return BAND_LOW
59
+
60
+
61
+ def derive_risk_class(changes: Any) -> str:
62
+ """Return the trace-level risk class.
63
+
64
+ ``changes`` is the ``delivery.changes`` slice — a list of dicts in
65
+ the canonical engine shape, or ``None`` for pure planning phases.
66
+ Until the file-ownership matrix is wired in, "any change touched"
67
+ maps to ``medium``; "no change" maps to ``low``. ``high`` is
68
+ reserved for the future ownership-matrix lookup.
69
+ """
70
+ if not changes:
71
+ return RISK_LOW
72
+ if isinstance(changes, Iterable):
73
+ try:
74
+ count = sum(1 for _ in changes)
75
+ except TypeError:
76
+ return RISK_LOW
77
+ return RISK_MEDIUM if count > 0 else RISK_LOW
78
+ return RISK_LOW
79
+
80
+
81
+ def summarise_memory(
82
+ memory: Any, *, limit: int = 32,
83
+ ) -> dict[str, Any]:
84
+ """Reduce ``state.memory`` into the trace-envelope ``memory`` slice.
85
+
86
+ The engine stores memory entries as dicts with at least an ``id``
87
+ or ``rule_id`` key plus arbitrary per-entry payload. The trace
88
+ only carries ids — bodies stay behind the privacy floor.
89
+ """
90
+ if not memory:
91
+ return {"asks": 0, "hits": 0, "ids": []}
92
+ ids: list[str] = []
93
+ asks = 0
94
+ hits = 0
95
+ for entry in memory:
96
+ if not isinstance(entry, dict):
97
+ continue
98
+ asks += int(entry.get("asks", 1) or 0) or 1
99
+ if entry.get("hit", True):
100
+ hits += 1
101
+ entry_id = entry.get("id") or entry.get("rule_id")
102
+ if entry_id and len(ids) < limit:
103
+ ids.append(str(entry_id))
104
+ return {"asks": asks, "hits": hits, "ids": ids}
105
+
106
+
107
+ def summarise_verify(verify: Any) -> dict[str, int]:
108
+ """Reduce ``state.verify`` into the trace-envelope ``verify`` slice.
109
+
110
+ ``verify`` may be ``None`` (no verify run yet), a dict carrying
111
+ ``claims`` / ``first_try_passes``, or a list of attempt records.
112
+ Anything else collapses to zeros.
113
+ """
114
+ if verify is None:
115
+ return {"claims": 0, "first_try_passes": 0}
116
+ if isinstance(verify, dict):
117
+ claims = int(verify.get("claims", 0) or 0)
118
+ passes = int(verify.get("first_try_passes", 0) or 0)
119
+ return {"claims": claims, "first_try_passes": passes}
120
+ if isinstance(verify, list):
121
+ claims = len(verify)
122
+ passes = sum(
123
+ 1 for entry in verify
124
+ if isinstance(entry, dict) and entry.get("first_try_pass")
125
+ )
126
+ return {"claims": claims, "first_try_passes": passes}
127
+ return {"claims": 0, "first_try_passes": 0}
128
+
129
+
130
+ __all__ = [
131
+ "BAND_HIGH",
132
+ "BAND_MEDIUM",
133
+ "BAND_LOW",
134
+ "RISK_HIGH",
135
+ "RISK_MEDIUM",
136
+ "RISK_LOW",
137
+ "derive_confidence_band",
138
+ "derive_risk_class",
139
+ "summarise_memory",
140
+ "summarise_verify",
141
+ ]