@lannguyensi/harness 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/CHANGELOG.md +552 -0
  2. package/LICENSE +21 -0
  3. package/README.md +154 -0
  4. package/dist/cli/add/index.d.ts +14 -0
  5. package/dist/cli/add/index.js +71 -0
  6. package/dist/cli/add/index.js.map +1 -0
  7. package/dist/cli/add/mutate.d.ts +39 -0
  8. package/dist/cli/add/mutate.js +36 -0
  9. package/dist/cli/add/mutate.js.map +1 -0
  10. package/dist/cli/adopt/derive.d.ts +38 -0
  11. package/dist/cli/adopt/derive.js +94 -0
  12. package/dist/cli/adopt/derive.js.map +1 -0
  13. package/dist/cli/adopt/index.d.ts +20 -0
  14. package/dist/cli/adopt/index.js +156 -0
  15. package/dist/cli/adopt/index.js.map +1 -0
  16. package/dist/cli/apply/apply.d.ts +49 -0
  17. package/dist/cli/apply/apply.js +333 -0
  18. package/dist/cli/apply/apply.js.map +1 -0
  19. package/dist/cli/apply/generate-memory-index.d.ts +17 -0
  20. package/dist/cli/apply/generate-memory-index.js +167 -0
  21. package/dist/cli/apply/generate-memory-index.js.map +1 -0
  22. package/dist/cli/apply/generate-settings.d.ts +15 -0
  23. package/dist/cli/apply/generate-settings.js +87 -0
  24. package/dist/cli/apply/generate-settings.js.map +1 -0
  25. package/dist/cli/apply/index.d.ts +1 -0
  26. package/dist/cli/apply/index.js +2 -0
  27. package/dist/cli/apply/index.js.map +1 -0
  28. package/dist/cli/audit.d.ts +36 -0
  29. package/dist/cli/audit.js +121 -0
  30. package/dist/cli/audit.js.map +1 -0
  31. package/dist/cli/describe.d.ts +13 -0
  32. package/dist/cli/describe.js +26 -0
  33. package/dist/cli/describe.js.map +1 -0
  34. package/dist/cli/diff/engine.d.ts +21 -0
  35. package/dist/cli/diff/engine.js +161 -0
  36. package/dist/cli/diff/engine.js.map +1 -0
  37. package/dist/cli/diff/git.d.ts +6 -0
  38. package/dist/cli/diff/git.js +32 -0
  39. package/dist/cli/diff/git.js.map +1 -0
  40. package/dist/cli/diff/index.d.ts +15 -0
  41. package/dist/cli/diff/index.js +39 -0
  42. package/dist/cli/diff/index.js.map +1 -0
  43. package/dist/cli/diff/since-apply.d.ts +57 -0
  44. package/dist/cli/diff/since-apply.js +255 -0
  45. package/dist/cli/diff/since-apply.js.map +1 -0
  46. package/dist/cli/doctor/format.d.ts +2 -0
  47. package/dist/cli/doctor/format.js +126 -0
  48. package/dist/cli/doctor/format.js.map +1 -0
  49. package/dist/cli/doctor/index.d.ts +14 -0
  50. package/dist/cli/doctor/index.js +281 -0
  51. package/dist/cli/doctor/index.js.map +1 -0
  52. package/dist/cli/doctor/types.d.ts +46 -0
  53. package/dist/cli/doctor/types.js +2 -0
  54. package/dist/cli/doctor/types.js.map +1 -0
  55. package/dist/cli/dry-run.d.ts +46 -0
  56. package/dist/cli/dry-run.js +168 -0
  57. package/dist/cli/dry-run.js.map +1 -0
  58. package/dist/cli/exit-codes.d.ts +10 -0
  59. package/dist/cli/exit-codes.js +15 -0
  60. package/dist/cli/exit-codes.js.map +1 -0
  61. package/dist/cli/explain.d.ts +14 -0
  62. package/dist/cli/explain.js +97 -0
  63. package/dist/cli/explain.js.map +1 -0
  64. package/dist/cli/export.d.ts +31 -0
  65. package/dist/cli/export.js +84 -0
  66. package/dist/cli/export.js.map +1 -0
  67. package/dist/cli/index.d.ts +8 -0
  68. package/dist/cli/index.js +549 -0
  69. package/dist/cli/index.js.map +1 -0
  70. package/dist/cli/init/index.d.ts +17 -0
  71. package/dist/cli/init/index.js +57 -0
  72. package/dist/cli/init/index.js.map +1 -0
  73. package/dist/cli/init/templates.d.ts +4 -0
  74. package/dist/cli/init/templates.js +175 -0
  75. package/dist/cli/init/templates.js.map +1 -0
  76. package/dist/cli/list.d.ts +12 -0
  77. package/dist/cli/list.js +118 -0
  78. package/dist/cli/list.js.map +1 -0
  79. package/dist/cli/loader.d.ts +24 -0
  80. package/dist/cli/loader.js +74 -0
  81. package/dist/cli/loader.js.map +1 -0
  82. package/dist/cli/main.d.ts +2 -0
  83. package/dist/cli/main.js +6 -0
  84. package/dist/cli/main.js.map +1 -0
  85. package/dist/cli/policy/intercept.d.ts +34 -0
  86. package/dist/cli/policy/intercept.js +172 -0
  87. package/dist/cli/policy/intercept.js.map +1 -0
  88. package/dist/cli/remove/index.d.ts +18 -0
  89. package/dist/cli/remove/index.js +95 -0
  90. package/dist/cli/remove/index.js.map +1 -0
  91. package/dist/cli/remove/mutate.d.ts +9 -0
  92. package/dist/cli/remove/mutate.js +68 -0
  93. package/dist/cli/remove/mutate.js.map +1 -0
  94. package/dist/cli/validate/checks.d.ts +23 -0
  95. package/dist/cli/validate/checks.js +253 -0
  96. package/dist/cli/validate/checks.js.map +1 -0
  97. package/dist/cli/validate/index.d.ts +18 -0
  98. package/dist/cli/validate/index.js +50 -0
  99. package/dist/cli/validate/index.js.map +1 -0
  100. package/dist/cli/validate/types.d.ts +7 -0
  101. package/dist/cli/validate/types.js +5 -0
  102. package/dist/cli/validate/types.js.map +1 -0
  103. package/dist/index.d.ts +15 -0
  104. package/dist/index.js +16 -0
  105. package/dist/index.js.map +1 -0
  106. package/dist/io/atomic-write.d.ts +8 -0
  107. package/dist/io/atomic-write.js +30 -0
  108. package/dist/io/atomic-write.js.map +1 -0
  109. package/dist/io/harness-lock.d.ts +33 -0
  110. package/dist/io/harness-lock.js +260 -0
  111. package/dist/io/harness-lock.js.map +1 -0
  112. package/dist/io/last-apply.d.ts +20 -0
  113. package/dist/io/last-apply.js +123 -0
  114. package/dist/io/last-apply.js.map +1 -0
  115. package/dist/io/lock.d.ts +11 -0
  116. package/dist/io/lock.js +33 -0
  117. package/dist/io/lock.js.map +1 -0
  118. package/dist/io/patch.d.ts +10 -0
  119. package/dist/io/patch.js +8 -0
  120. package/dist/io/patch.js.map +1 -0
  121. package/dist/io/restart-hints.d.ts +5 -0
  122. package/dist/io/restart-hints.js +59 -0
  123. package/dist/io/restart-hints.js.map +1 -0
  124. package/dist/io/three-state.d.ts +7 -0
  125. package/dist/io/three-state.js +20 -0
  126. package/dist/io/three-state.js.map +1 -0
  127. package/dist/io/validate-before-write.d.ts +12 -0
  128. package/dist/io/validate-before-write.js +23 -0
  129. package/dist/io/validate-before-write.js.map +1 -0
  130. package/dist/overrides/index.d.ts +2 -0
  131. package/dist/overrides/index.js +3 -0
  132. package/dist/overrides/index.js.map +1 -0
  133. package/dist/overrides/machines.d.ts +12 -0
  134. package/dist/overrides/machines.js +46 -0
  135. package/dist/overrides/machines.js.map +1 -0
  136. package/dist/overrides/merge.d.ts +6 -0
  137. package/dist/overrides/merge.js +173 -0
  138. package/dist/overrides/merge.js.map +1 -0
  139. package/dist/policies/duration.d.ts +5 -0
  140. package/dist/policies/duration.js +50 -0
  141. package/dist/policies/duration.js.map +1 -0
  142. package/dist/policies/extract.d.ts +50 -0
  143. package/dist/policies/extract.js +190 -0
  144. package/dist/policies/extract.js.map +1 -0
  145. package/dist/policies/index.d.ts +5 -0
  146. package/dist/policies/index.js +6 -0
  147. package/dist/policies/index.js.map +1 -0
  148. package/dist/policies/ledger-client.d.ts +39 -0
  149. package/dist/policies/ledger-client.js +378 -0
  150. package/dist/policies/ledger-client.js.map +1 -0
  151. package/dist/policies/requires.d.ts +44 -0
  152. package/dist/policies/requires.js +146 -0
  153. package/dist/policies/requires.js.map +1 -0
  154. package/dist/policies/timestamp.d.ts +14 -0
  155. package/dist/policies/timestamp.js +36 -0
  156. package/dist/policies/timestamp.js.map +1 -0
  157. package/dist/probes/mcp.d.ts +29 -0
  158. package/dist/probes/mcp.js +226 -0
  159. package/dist/probes/mcp.js.map +1 -0
  160. package/dist/probes/memory.d.ts +24 -0
  161. package/dist/probes/memory.js +89 -0
  162. package/dist/probes/memory.js.map +1 -0
  163. package/dist/runtime/index.d.ts +3 -0
  164. package/dist/runtime/index.js +4 -0
  165. package/dist/runtime/index.js.map +1 -0
  166. package/dist/runtime/intercept.d.ts +53 -0
  167. package/dist/runtime/intercept.js +181 -0
  168. package/dist/runtime/intercept.js.map +1 -0
  169. package/dist/runtime/ledger-record.d.ts +43 -0
  170. package/dist/runtime/ledger-record.js +239 -0
  171. package/dist/runtime/ledger-record.js.map +1 -0
  172. package/dist/runtime/session-id.d.ts +10 -0
  173. package/dist/runtime/session-id.js +37 -0
  174. package/dist/runtime/session-id.js.map +1 -0
  175. package/dist/schema/extract.d.ts +5 -0
  176. package/dist/schema/extract.js +23 -0
  177. package/dist/schema/extract.js.map +1 -0
  178. package/dist/schema/grounding.d.ts +65 -0
  179. package/dist/schema/grounding.js +21 -0
  180. package/dist/schema/grounding.js.map +1 -0
  181. package/dist/schema/hooks.d.ts +86 -0
  182. package/dist/schema/hooks.js +42 -0
  183. package/dist/schema/hooks.js.map +1 -0
  184. package/dist/schema/index.d.ts +961 -0
  185. package/dist/schema/index.js +55 -0
  186. package/dist/schema/index.js.map +1 -0
  187. package/dist/schema/memory.d.ts +131 -0
  188. package/dist/schema/memory.js +38 -0
  189. package/dist/schema/memory.js.map +1 -0
  190. package/dist/schema/policies.d.ts +412 -0
  191. package/dist/schema/policies.js +53 -0
  192. package/dist/schema/policies.js.map +1 -0
  193. package/dist/schema/requires.d.ts +115 -0
  194. package/dist/schema/requires.js +57 -0
  195. package/dist/schema/requires.js.map +1 -0
  196. package/dist/schema/tools.d.ts +283 -0
  197. package/dist/schema/tools.js +66 -0
  198. package/dist/schema/tools.js.map +1 -0
  199. package/package.json +63 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,552 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/).
7
+
8
+ ## [0.5.0] - 2026-05-01
9
+
10
+ **Phase 5: dogfood + polish.** Phase 4 shipped policies that fire; Phase 5
11
+ ran them end-to-end against real grounding-mcp + the live SQLite ledger,
12
+ caught the bugs that surfaced, and turned the whole feedback loop into a
13
+ quality-of-life pass over `audit`/`explain`/`policy intercept`. The
14
+ package is now also distributed under `@lannguyensi/harness` on npm
15
+ (install with `npm i -g @lannguyensi/harness` and use `harness ...` from
16
+ the command line).
17
+
18
+ The killer-test from the founding incident still works exactly the same
19
+ way; this release is about it staying that way under realistic
20
+ operational pressure.
21
+
22
+ ### Added
23
+
24
+ - **`harness policy intercept --verbose`** (Phase 5 #3, PR #44) — opt-in
25
+ stderr diagnostics for non-allow decisions: policy name, ledger_tag,
26
+ matched count, reason, and sorted extract values. Default off; v0.4.0
27
+ byte-equivalent. Also enabled via `HARNESS_POLICY_VERBOSE=1`
28
+ (case-insensitive disable: `0`/`false`/`no`/`off`).
29
+ - **`$CLAUDE_SESSION_ID` env fallback** (Phase 5 #2, PR #43) for
30
+ `audit`/`explain --trace`/`policy intercept` when `--session` is
31
+ omitted. Real Claude Code sessions arrive via `event.session_id`, so
32
+ reads under the literal `"default"` were silently invisible. New
33
+ precedence: explicit > env > `"default"`.
34
+ - **`policy_decision` first-class entry type** (Phase 5 #4, PR #47) via
35
+ the matching `@lannguyensi/evidence-ledger@0.2.0` change. Writer
36
+ tries `type='policy_decision'` and falls back to legacy `type='fact'`
37
+ on an old server. Reader tags rows with their bucket-derived type so
38
+ the requires evaluator can drop policy-decision rows without the
39
+ substring-pollution that inflated `matchedCount` in PR #39's dogfood.
40
+ Legacy `policy_decision:`-prefixed `fact` rows are also dropped via a
41
+ content-prefix backstop so upgraded users don't keep paying the
42
+ pollution tax until their dev ledger ages out.
43
+ - **Server-side `audit` filter pushdown** (Phase 5 #5, PR #46) via the
44
+ matching evidence-ledger 0.2.0 change. `audit` now passes
45
+ `sinceIso` (derived from its `--since` cutoff) and
46
+ `contentPrefix: "policy_decision:"` to `ledger_summary`. Capability
47
+ detection via `tools/list` keeps it back-compatible with old servers
48
+ (filter args are dropped silently when not advertised). Hot path
49
+ (no filter requested) skips `tools/list` entirely.
50
+ - **`dogfood/phase5/`** — reproducible smoke driver against real
51
+ grounding-mcp + live SQLite ledger (Phase 5 #1, PR #39). All five
52
+ gates (deny / ledger_add / silent allow / 5m audit / 24h audit /
53
+ explain --trace) exit non-zero on regression.
54
+ - **`tests/_helpers/manifest.ts` + `tests/_helpers/decision.ts`** (Phase 5
55
+ #6, PR #45) — shared `makeManifest`, `makePolicy`, `makeDecision`,
56
+ `makeDecisionEntry` builders. Pure refactor; existing test count
57
+ unchanged.
58
+
59
+ ### Fixed
60
+
61
+ - **`audit --since` window now parses UTC ledger timestamps correctly**
62
+ (Phase 5 #8, PR #40). evidence-ledger stores `created_at` as SQLite
63
+ `datetime('now')` (UTC, space-separated). V8's `Date.parse` parses
64
+ the space form as local time; on any non-UTC host a `--since` window
65
+ narrower than the host TZ offset silently filtered out fresh
66
+ entries. New `parseLedgerTimestamp` coerces the SQL form to ISO-with-
67
+ Z before delegating to `Date.parse`. Applied at all four call sites
68
+ (audit row sort + cutoff filter, explain `selectLatestForPolicy`,
69
+ `requires.entryTime`).
70
+ - **`explain --trace` picks the latest decision by `evaluatedAt`**
71
+ (Phase 5 #9, PR #41). Sub-second collisions used to tie at
72
+ `bt - at = 0` because the sort keyed on ledger `createdAt`
73
+ (1-second precision), and V8's stable sort returned the earliest
74
+ fire. New `decisionSortKey` prefers the decoded payload's
75
+ `evaluatedAt` (ms precision), fallback to `createdAt`. Same fix in
76
+ `audit` row order.
77
+
78
+ ### Distribution
79
+
80
+ - Package renamed from `harness` (the unscoped name was already taken on
81
+ npm) to `@lannguyensi/harness`. The CLI binary is still `harness`.
82
+ - New `publish-npm.yml` workflow tags `v*` → publishes to npm with
83
+ provenance. Single package; no monorepo workspace.
84
+
85
+ ## [0.4.0] - 2026-04-30
86
+
87
+ **Phase 4: policy layer.** Policies *fire*. The `requires` schema
88
+ (`ledger_tag`, `+ within`, `+ count` from `docs/ARCHITECTURE.md` §6) is
89
+ now evaluatable at the actual hook event. `harness policy intercept`
90
+ runs as a `PreToolUse` hook: it reads the tool-event JSON from stdin,
91
+ runs every matching policy through extract-substitution + ledger-query
92
+ + requires-evaluation, and emits Claude Code's `{"decision":"deny",...}`
93
+ JSON when blocking. Each fire writes one `policy_decision` entry to the
94
+ evidence ledger via grounding-mcp `ledger_add` so `harness explain
95
+ --trace` and `harness audit` can replay decisions. The killer-test from
96
+ the founding incident is answered end-to-end: `mcp__agent-tasks__pull_requests_merge`
97
+ against a session without a `review:${PR_NUMBER}` ledger entry refuses;
98
+ the same call after `ledger record review:42 …` is allowed.
99
+
100
+ The exit-gate from `docs/ROADMAP.md` is met against a fresh tmpdir
101
+ install: deny without ledger entry / silent allow with one / silent
102
+ allow on unreachable ledger / `explain --trace` shows the full decision
103
+ trail / `audit --since 1h` lists both deny+allow fires sorted ascending
104
+ / `dry-run "merge PR 42"` statically reports `[review-before-merge]` /
105
+ validate enhancements reject `within: yesterday`, `count.min: 0`, and
106
+ warn when policies are declared without `grounding-mcp` wired.
107
+
108
+ ### Added
109
+
110
+ - **`evaluateRequires` library** (`src/policies/requires.ts`) — pure
111
+ evaluator over the three v1 `requires` shapes returning
112
+ `{ allowed, reason, matchedCount, traceData }`. Reason strings match
113
+ the spec verbatim (`no matching ledger entry for tag \`<tag>\``,
114
+ `no matching entry within <duration>`, `<n> of required <bound>
115
+ entries found`). Rejects `within: <bad-duration>` and `count.min: 0`
116
+ at evaluation time as well as validate time.
117
+ - **`evaluateExtract` evaluator + `validateExtractGrammar` parser**
118
+ (`src/policies/extract.ts`) — JSONPath-restricted DSL: dotted
119
+ accessors rooted at `toolArgs / event / session / git`, with
120
+ bracket-quoted keys for non-identifier names. Function calls,
121
+ numeric/slice indices, unknown namespaces all rejected with the
122
+ spec-mandated literal phrases. Built-in vars (`SESSION_ID, REPO,
123
+ BRANCH, TOOL_NAME, CWD`) auto-resolve; extracts override on
124
+ collision with one trace row per variable. `substituteTemplate`
125
+ completes the Appendix-A `review:${REPO}:${PR_NUMBER}` end-to-end.
126
+ - **`queryLedgerByTag` adapter** (`src/policies/ledger-client.ts`) —
127
+ spawns the configured grounding-mcp, performs the
128
+ init/notifications/initialized/`tools/call` handshake, parses
129
+ `ledger_summary`, and returns
130
+ `{ kind: "ok", entries } | { kind: "degraded", reason }`. Every
131
+ spec-named failure mode (spawn ENOENT, JSON-RPC error, stdout
132
+ closes, timeout, payload-shape drift) maps to `degraded`. Empty
133
+ ledger ≠ degraded.
134
+ - **Validate enhancements** — schema delegates `within` to the runtime
135
+ duration parser, rejects `count.min: 0` with the literal "no-op"
136
+ message, validates `trigger.extract` grammar via
137
+ `validateExtractGrammar`, and the CLI layer warns when
138
+ `policies[]` is non-empty but no `tools.mcp[name: grounding-mcp]`
139
+ is wired (links to `docs/ARCHITECTURE.md` §6).
140
+ - **Runtime hook interceptor** (`src/runtime/intercept.ts` +
141
+ `src/cli/policy/intercept.ts`) — wired as `harness policy intercept`.
142
+ Reads tool-event JSON from stdin; runs every matching policy;
143
+ evaluates ALL, denies if any block-enforcement policy denies;
144
+ warn-enforcement deny does NOT block. Unresolved extract →
145
+ `warn-degraded`. Audit-write failure does NOT crash. Multiple
146
+ matching policies on the same event all fire; one ledger entry
147
+ per fire.
148
+ - **`harness explain <policy> --trace`** — replaces the Phase 1 stub.
149
+ Reads the most-recent `policy_decision` entry for the named policy
150
+ and renders the full decision trail (decision, reason,
151
+ triggerMatched, extract substitutions, requiresEval, ledgerQuery).
152
+ Cross-policy entries skipped; latest by `createdAt` wins; malformed
153
+ content silently skipped. Exit codes: 64 missing policy / 1 missing
154
+ evaluation / 1 degraded ledger.
155
+ - **`harness audit [--since <duration>] [--policy <name>] [--outcome
156
+ <allow|deny|warn-degraded>] [--session <id>] [--json]`** — replays
157
+ the evidence ledger for a window; default 24h. Sorted ascending.
158
+ Empty window → documented literal, exit 0. Degraded ledger →
159
+ `ledger unreachable: <reason>`, exit 69 (EX_UNAVAILABLE). Bad input
160
+ → exit 64.
161
+ - **`harness dry-run "<prompt>" [--tool <name>] [--tool-args <json>]
162
+ [--json]`** — static prediction (no LLM, no ledger I/O). With
163
+ `--tool`, simulates a `PreToolUse` event and reports
164
+ `ledgerQuery=<substituted tag>` for each matching policy.
165
+ PreToolUse policies bucket as "could match" without `--tool`.
166
+ - **`policy_decision` audit-log encoding**
167
+ (`src/runtime/ledger-record.ts`) — canonical
168
+ `policy_decision:<name>:<outcome> <json-blob>` format with
169
+ encode/decode round-trip helpers the audit/explain verbs consume.
170
+
171
+ ### Changed
172
+
173
+ - `harness explain` is now async; the CLI awaits the result. Default
174
+ output gains a `--trace` hint replacing the "ships in Phase 4"
175
+ placeholder.
176
+ - Schema modules (`src/schema/extract.ts`, `src/schema/requires.ts`)
177
+ delegate to the runtime grammar/duration helpers. No cycle:
178
+ `policies/duration.ts` and `policies/extract.ts` are leaves.
179
+
180
+ ### Notes
181
+
182
+ - Real Claude Code dogfood (vs. a fake stdio script) is captured in
183
+ the v0.4.0 release PR description; tests use the fake-stdio pattern
184
+ from `tests/probes/mcp.test.ts`.
185
+ - Test count: 519/519 green (pre-release; up from 417 at v0.3.0).
186
+
187
+ ## [0.3.0] - 2026-04-30
188
+
189
+ **Phase 3: declarative truth.** `harness apply` regenerates
190
+ `harness.generated/settings.json` and `harness.generated/MEMORY.md` from
191
+ the manifest, with the three-state drift detection from
192
+ `docs/ARCHITECTURE.md` §7 protecting hand-edits. `harness.lock` pins
193
+ SHA-256 of every referenced asset (hook scripts, MCP entrypoints, skill
194
+ SKILL.md, memory-router binary) plus per-directory Merkle aggregates for
195
+ memory dirs. `harness diff --since-apply` reports drift across three
196
+ sections (generated files, asset SHAs, memory dirs); `--memory-detail`
197
+ expands per-directory Merkle entries to per-file changes. Asset-content
198
+ drift is reported on every apply against the lock with the canonical
199
+ message format: `asset drift detected: <path> changed since last apply`.
200
+
201
+ The exit-gate from `docs/ROADMAP.md` is met: against a fresh tmpdir
202
+ install of `init --template full`, `apply` writes both generated files
203
+ and the lock; re-`apply` is `no changes`; hand-edited
204
+ `harness.generated/settings.json` refuses with the documented diff +
205
+ hint and `--overwrite-drift yes` restores it; an externally-edited hook
206
+ script surfaces `asset drift detected:` on stderr; a memory-file edit
207
+ under a tracked memory directory surfaces a single Merkle drift line
208
+ which `diff --since-apply --memory-detail` expands to the changed
209
+ filename.
210
+
211
+ ### Added
212
+
213
+ - `harness apply [--config <path>] [--project <name>] [--dry-run] [--overwrite-drift]`
214
+ — regenerate runtime files from the manifest. Three-state comparator
215
+ (manifest-expected / last-applied / on-disk-current) decides per file:
216
+ `safe-overwrite` (write fresh), `no-drift` (overwrite is safe), or
217
+ `drift-refuse` (refuse with diff + adopt-or-overwrite hint). Drift
218
+ refusal exits 1; `--overwrite-drift` requires literal `yes` (case-
219
+ insensitive, rejects `y`) before discarding hand-edits. `--dry-run`
220
+ prints the would-be diff and restart hints, exits 0 without writing.
221
+
222
+ - `harness diff --since-apply [--memory-detail] [--json]` — diff against
223
+ the last applied state. Three sections: `# Generated files` (unified
224
+ diff per file), `# Asset drift` (lock SHA mismatches), `# Memory
225
+ directories` (Merkle drift; `--memory-detail` expands to per-file
226
+ added / removed / modified). Exit 0 on no drift; exit 1 on any
227
+ drift. Mutually exclusive with `--since <ref>` (EX_USAGE).
228
+
229
+ - Asset-content drift detection on every apply: re-hashes every locked
230
+ asset / memory-dir Merkle, surfaces mismatches as warning-style
231
+ stderr lines. Warn-only by default; the lock is rewritten with current
232
+ SHAs at the end of the run, so drift is reported once and the next
233
+ apply is clean. Users wanting enforcement wrap apply in a script that
234
+ greps for `asset drift detected:`.
235
+
236
+ - Restart-hint emitter: comparing the prior-apply manifest snapshot with
237
+ the current effective manifest, apply prints `mcp servers changed; …`
238
+ on `tools.mcp[]` change, `memory router command changed; …` on
239
+ `memory.router.command` change, `hooks changed; …` on hook /
240
+ policy structure change. Description-only edits emit no hints.
241
+
242
+ - Library modules (no CLI verbs of their own):
243
+ - `src/io/three-state.ts` — `compare()` returning `safe-overwrite` /
244
+ `no-drift` / `drift-refuse` per the §7 decision table.
245
+ - `src/io/last-apply.ts` — read/write `harness.generated/.last-apply`
246
+ with file SHA + content + optional manifest snapshot + optional
247
+ per-memory-dir per-file index. Atomic-write contract from Phase 2.
248
+ `verifyLastApplyIntegrity()` defends against on-disk corruption.
249
+ - `src/io/harness-lock.ts` — NDJSON `harness.lock` writer/reader.
250
+ Asset entries (hook scripts, MCP entrypoints, skill SKILL.md,
251
+ memory-router binary) plus Merkle-style memory-dir aggregates.
252
+ `enabled: false` mcp[] / `memory.router` and known interpreter
253
+ binaries (`node`, `npx`, `python`, `bash`, `sh`, `tsx`, `deno`,
254
+ `bun`) are excluded. Locale-independent byte-order sort.
255
+ `computeDrift()` returns missing/modified per locked asset.
256
+ - `src/io/restart-hints.ts` — pure manifest-delta to hint list.
257
+ - `src/cli/apply/generate-settings.ts` — manifest hooks projection
258
+ into Claude Code's nested `settings.json` shape.
259
+ - `src/cli/apply/generate-memory-index.ts` — walks
260
+ `memory.directories[]`, parses frontmatter, emits the markdown
261
+ index. CRLF-tolerant; matches the canonical loader's strict
262
+ `name` + `type` requirement; warns + skips on basename collision
263
+ across memory directories.
264
+
265
+ ### Decided here
266
+
267
+ - **Lock granularity.** Every referenced path gets one entry, except
268
+ memory directories which collapse to a Merkle aggregate per directory
269
+ (so a 1000-memory install does not produce a 1000-line lock). Per-
270
+ file detail is recoverable on demand via
271
+ `harness diff --since-apply --memory-detail`. Per-file index lives
272
+ in `.last-apply` (next to the directory hash); the lock stays small.
273
+
274
+ - **Asset drift is warn-only at apply time.** Enforcement is one shell
275
+ script wrapper away (`grep "asset drift detected:"`); coupling
276
+ enforcement into the verb itself would be the wrong default for the
277
+ founding-incident use case (where one edit upstream of harness
278
+ shouldn't block the user from re-applying).
279
+
280
+ - **`apply` writes to `harness.generated/`.** When `--config` is passed
281
+ without an explicit home, generated artefacts live next to the
282
+ configured manifest, not in `~/.claude/harness.generated/`. This
283
+ closes a smoke-test footgun where running with `--config /repo/...`
284
+ silently scribbled into the user's global runtime directory.
285
+
286
+ - **Manifest snapshot integrity.** The optional manifest snapshot in
287
+ `.last-apply` is sha-checked before being used for restart-hint
288
+ comparison; on mismatch, hints fall back to "no prev manifest" so a
289
+ corrupted record does not produce confidently-wrong restart hints.
290
+
291
+ - **`path_match` and `bash_match` do NOT survive the settings.json
292
+ projection.** Per ARCHITECTURE Appendix A canonical pattern, these
293
+ filters are enforced inside the referenced hook script. The manifest
294
+ fields exist for `validate` / `doctor` inventory.
295
+
296
+ ### Carried into Phase 4
297
+
298
+ - **No policy enforcement.** Policies are still schema-only;
299
+ `requires.ledger_tag` / `+ within` / `+ count` evaluation against the
300
+ evidence ledger lands in Phase 4.
301
+ - **No `validate --check-lock`.** Lock-drift is surfaced by `apply`
302
+ and `diff --since-apply` in Phase 3; folding it into `validate`
303
+ is a deferred follow-up.
304
+
305
+ ## [0.2.0] - 2026-04-29
306
+
307
+ **Phase 2: managed edits.** Five write verbs (`init`, `add`, `remove`,
308
+ `adopt`, `export`) plus the foundation library (file lock, atomic write,
309
+ schema-validate-before-write, unified-diff emitter). The exit-gate from
310
+ `docs/ROADMAP.md` is met: a fresh tmpdir round-trip of init → add (mcp /
311
+ cli / hook / skill) → adopt → export → remove → validate runs clean,
312
+ with comments preserved across every mutation.
313
+
314
+ ### Added
315
+
316
+ - `harness init [--template minimal|full] [--force] [--config <path>]` —
317
+ bootstrap a starter manifest. `minimal` is the empty-but-valid header
318
+ + comment block (`harness validate` passes immediately). `full` is
319
+ pre-populated from ARCHITECTURE.md Appendix A (3 MCPs, 3 CLIs, 4
320
+ skills, 4 hooks, 3 policies). Refuses to overwrite without `--force`;
321
+ `--force` emits an `(overwriting ...)` line on stderr.
322
+
323
+ - `harness add <type> <name> ...` — managed insert. Four sub-commands:
324
+ `add mcp <name> --command <cmd> [--health-verb <v>] [--health-timeout-ms <n>] [--enabled <bool>]`,
325
+ `add cli <name> --binary <b> [--required] [--min-version <v>]`,
326
+ `add skill <name>` (managed enable in `tools.skills.enabled[]`),
327
+ `add hook <name> --event <e> --command <c> [--match <r>] [--blocking false|soft|hard] [--budget-ms <n>]`.
328
+ Common flags `--config <path>`, `--dry-run`. Two-stage gate before
329
+ writing: schema (catches duplicate names, dangling references) +
330
+ asset (catches non-+x hook scripts, missing required CLIs). Dry-run
331
+ emits the unified diff and exits 0 without writing.
332
+
333
+ - `harness remove <type> <name>` — drop entries by name with hook-aware
334
+ reference check. Refuses to remove a hook still referenced by a
335
+ policy unless `--force`; with `--force`, the schema gate (dangling
336
+ `policy.hook`) is the safety net so a broken manifest never lands.
337
+ `<unknown>` exits 1 with the available-name list. `--dry-run` shows
338
+ the patch with `-` lines.
339
+
340
+ - `harness adopt <file> [--yes]` — capture hand-edits from
341
+ `~/.claude/settings.json` back into the manifest. Computes drift
342
+ (settings hooks not declared in the manifest), synthesises names
343
+ from command basenames with `-2/-3/...` disambiguation, prints the
344
+ unified diff, prompts `Apply (y/N)?` per the write-and-confirm
345
+ decision. `--yes` skips the prompt. Adopted hooks default to
346
+ `blocking: false` so capture never starts gating tool calls
347
+ unintentionally. Idempotent on re-run.
348
+
349
+ - `harness export [--sanitize] [--json] [-o <file>]` — emit the
350
+ effective merged manifest as a single self-contained YAML or JSON.
351
+ `--sanitize` rewrites `/home/<user>/...` → `~/...` (with a trailing-
352
+ separator anchor so `/home/lan` does not match inside
353
+ `/home/landscape`) and redacts env values whose key matches
354
+ `/(_|^)(KEY|TOKEN|SECRET|PASSWORD|API_KEY)$/i` to `<REDACTED>`.
355
+ Footer comment names what is and is not covered. `-o <file>` writes
356
+ atomically via the foundation's tmp+fsync+rename.
357
+
358
+ - `src/io/` foundation library: `withFileLock(lockPath, fn)` (via
359
+ `proper-lockfile`, lock-then-mutate-then-release), `atomicWriteFile`
360
+ (tmp+fsync+rename), `withDocument` (CST round-trip preserving user
361
+ comments and long flow sequences), `validateBeforeWrite`
362
+ (parseManifest gate returning structured errors), `unifiedDiff`
363
+ (compatible with `patch -p0`).
364
+
365
+ - Example manifest + Appendix A: `grounding-mcp` MCP entry with
366
+ `EVIDENCE_LEDGER_DB` env, the `require-preflight-evidence` hook,
367
+ and the `preflight-before-investigation` policy that gates
368
+ investigative `git status|log|diff|branch` on a fresh
369
+ `agent-preflight` ledger entry. Wires the founding-incident
370
+ block-policy concretely.
371
+
372
+ - Phase 4 ROADMAP acceptance bullet: `validate` warns when `policies[]`
373
+ is non-empty but no `tools.mcp[]` entry named `grounding-mcp` is
374
+ wired (prevents silent degraded-mode failure).
375
+
376
+ ### Changed
377
+
378
+ - `agent-preflight` repositioned in README §Related and across
379
+ VISION / ARCHITECTURE / ROADMAP as the **canonical implementation**
380
+ of preflight hook content, not a sibling tool. The hook script
381
+ `~/.claude/hooks/git-preflight.sh` is canonically a thin wrapper
382
+ around `preflight run --json` + a `ledger record preflight:${REPO}`
383
+ call. ARCHITECTURE §5 acknowledges this pattern: hook commands are
384
+ routinely thin wrappers around named tools, not bespoke shell.
385
+
386
+ - `withDocument` now passes `lineWidth: 0` to the YAML stringifier so
387
+ long flow sequences are not silently rewritten to block style on
388
+ round-trip.
389
+
390
+ ### Decided here
391
+
392
+ - **`harness adopt` UX: write-and-confirm.** Reads the file, computes
393
+ the patch, prints a unified diff, prompts `Apply (y/N)?`. No editor
394
+ mode, no patch-to-stdout shape. `--yes` is the non-interactive
395
+ escape hatch. Per ROADMAP "Open decisions resolved here #2".
396
+
397
+ - **`harness add policy` is intentionally absent in Phase 2.** Policy
398
+ evaluation lands in Phase 4; shipping `add policy` here would create
399
+ the schema-without-behaviour failure mode.
400
+
401
+ ### Known limitations carried from Phase 1
402
+
403
+ - No `harness apply` (Phase 3): adopt captures from settings.json into
404
+ the manifest, but the inverse — generating settings.json *from* the
405
+ manifest — is Phase 3.
406
+ - No policy evaluation (Phase 4): the schema parses `requires` /
407
+ `trigger.extract` and `validate` lints them, but no policy fires
408
+ against the ledger yet.
409
+ - No `harness.lock` (Phase 3): asset-content drift (a hook script
410
+ edited under your feet) is not yet detectable; manifest-layer
411
+ drift is.
412
+
413
+ ## [0.1.0] - 2026-04-29
414
+
415
+ **Phase 1: read-only inventory.** First releasable cut. Six CLI verbs
416
+ (`describe`, `validate`, `doctor`, `list`, `explain`, `diff`) backed by a
417
+ single zod-validated YAML manifest with a per-machine + per-project
418
+ override layer. No write-side verbs yet, no policy evaluation, no lock
419
+ file. The exit-gate from `docs/ROADMAP.md` is met: `harness doctor` against
420
+ a real manifest reproduces the Appendix-D structure with `✗ FAILED:` lines
421
+ that surface the actual MCP-server stderr, not generic "unhealthy" labels.
422
+
423
+ ### Added
424
+
425
+ - `harness describe [--config <path>] [--project <name>] [--pillar <p>] [--json]` —
426
+ print the effective merged manifest. YAML by default, JSON via `--json`.
427
+ `--pillar` filters to one of grounding / tools / memory / hooks /
428
+ policies. Golden fixture `docs/examples/full-manifest.expected.yaml`
429
+ locks the format down byte-for-byte.
430
+
431
+ - `harness validate [--config <path>] [--project <name>] [--strict]` —
432
+ schema lint plus six asset-existence checks: `mcp[].command` first-arg
433
+ rooted-path resolution, `cli[].binary` `$PATH` resolution + semver
434
+ comparison against `min_version`, `tools.skills.required` SKILL.md
435
+ presence, `hooks[].command` exists/regular-file/`+x`, `tools.builtin`
436
+ one-sided drift warning. `--strict` promotes warnings to errors.
437
+ Diagnostics print to stderr; clean runs print "no validation findings"
438
+ to stdout. Exit codes per `sysexits.h`: 1 / 64 / 66.
439
+
440
+ - `harness doctor [--config <path>] [--project <name>] [--shallow]` —
441
+ the killer-test value-demo. Spawns each `mcp[]` server, runs
442
+ initialize → tools/call over JSON-RPC stdio, races against the
443
+ configured `health.timeout_ms` and the child's exit. Captures stderr
444
+ verbatim so a broken server surfaces with the actual error message.
445
+ `--shallow` skips probe spawning (useful in tight iteration loops);
446
+ reports `~ name manifest-only (probe skipped)` instead of falsely
447
+ claiming "healthy". Output follows ARCHITECTURE Appendix D structure
448
+ (Manifest / Tools / Memory / Hooks / Policies / Summary).
449
+
450
+ - `harness list <category> [--filter <substr>] [--json]` —
451
+ pipe-friendly flat listing across the six categories
452
+ (`mcp` / `cli` / `skills` / `memories` / `hooks` / `policies`).
453
+ Default output is a column-aligned table; `--json` gives a flat
454
+ array suited for `jq`. `--filter` is case-insensitive substring
455
+ match on `name` (or `path` for `memories`).
456
+
457
+ - `harness explain <policy-name> [--json]` — schema-only printer for a
458
+ named policy. Includes the Phase-1 caveat
459
+ `schema valid; last-evaluated tracking ships in Phase 4`. Missing
460
+ policy → exit 64 with the available-name list (`(none)` when zero
461
+ policies are declared). `--trace` is intentionally NOT wired here;
462
+ it lands in Phase 4.
463
+
464
+ - `harness diff --since <ref>` — manifest-layer diff against a git ref.
465
+ Name-keyed lists (`tools.mcp[]`, `hooks[]`, `policies[]`) diff by
466
+ `name`, so a single field change emits exactly one hunk on that
467
+ field rather than a wholesale list re-emit. Output groups changes
468
+ under per-pillar headers (`## tools`, `## hooks`, etc.).
469
+ `--since-apply` is explicitly Phase 3 and not wired.
470
+
471
+ - **Manifest schema (zod)** for `version: 1` covering all five pillars
472
+ (grounding / tools / memory / hooks / policies) with strict-by-default
473
+ unknown-key rejection. Includes the `trigger.extract:` JSONPath
474
+ grammar (restricted to dotted accessors rooted at `toolArgs` /
475
+ `event` / `session` / `git`) and the three v1 `requires` shapes
476
+ (`ledger_tag`, `+ within`, `+ count`). Cross-policy validation
477
+ rejects `${PR_NUMBER}` references that lack a matching
478
+ `trigger.extract` entry.
479
+
480
+ - **Override engine** implementing every `ARCHITECTURE.md` §8 rule:
481
+ scalar replace, map merge, name-keyed list merge, plain-list
482
+ wholesale replace, `null` tombstone, empty-list `[]` clears,
483
+ mixed-shape rejection, `_delete: true` removal. Result is fully
484
+ owned (deep-cloned), so callers can mutate without corrupting the
485
+ parsed base.
486
+
487
+ - **Per-machine override layer** at
488
+ `~/.claude/machines/<discriminator>.harness.overrides.yaml` with
489
+ three discriminator types (`hostname` / `os` / `default`) and
490
+ WSL2 detection via `/proc/version` containing `microsoft`
491
+ (case-insensitive). Merge order: base → os → hostname → project.
492
+
493
+ - **MCP stdio probe** (`src/probes/mcp.ts`) with `RealMcpProbe` (real
494
+ spawn) + `McpProbe` interface for test injection. Concurrent probes
495
+ via `Promise.all`. EPIPE handling on early-exit servers; pending
496
+ timers are cleared in `finally`.
497
+
498
+ - **Memory introspection** (`src/probes/memory.ts`): walks declared
499
+ memory directories, surfaces `*.md` files older than
500
+ `retention.staleness_days` with last-touched dates. Router-executable
501
+ detection picks the first absolute / `~/...` path in
502
+ `memory.router.command`, not the runtime binary.
503
+
504
+ - **Loader split** (`loadMergedRaw` vs `loadManifest`) so `validate`
505
+ can convert schema errors to structured diagnostics (exit 1) while
506
+ `describe` keeps refusing to print broken manifests (exit 66).
507
+
508
+ ### Resolved design questions
509
+
510
+ Per `docs/ROADMAP.md` "Open decisions resolved here":
511
+
512
+ - **Phase-1 doctor health checks: real call default + first-class
513
+ `--shallow` flag.** The default mode invokes each `mcp[].health.verb`
514
+ with the configured `timeout_ms` so users learn the diagnostic value
515
+ immediately. `--shallow` is the explicit fast-path opt-in; both modes
516
+ are first-class.
517
+
518
+ - **Override granularity for memory directories.** Lists of
519
+ name-keyed entries (`tools.mcp`, `hooks`, `policies`) merge by
520
+ `name`; lists without `name` (`memory.directories`) replace
521
+ wholesale. Mixed-shape lists are rejected at merge time.
522
+
523
+ - **`harness adopt` UX (Phase 2 deferred).** Per ROADMAP, write-and-confirm
524
+ is the chosen pattern: `harness adopt <file>` will read the on-disk
525
+ file, compute the manifest patch, print a unified diff, and prompt
526
+ `Apply (y/N)?`. This release does not ship `adopt`; the decision is
527
+ recorded so Phase 2 picks up where the design left off.
528
+
529
+ - **Policy storage location (Phase 4 deferred).** Inline `policies:` in
530
+ the main manifest is the runtime-firing surface; library-style
531
+ imported policies (e.g. claim-gate via `grounding.policies_source`)
532
+ stay in their own DSL files. Phase 1 only validates the inline shape;
533
+ Phase 4 wires the evaluator.
534
+
535
+ ### Known limitations (deferred to later phases)
536
+
537
+ - **No `harness apply`.** Source-of-truth applies at the *manifest*
538
+ layer only; runtime files (`~/.claude/settings.json`, etc.) stay
539
+ user-owned in Phase 1. Generation lands in Phase 3.
540
+ - **No policy evaluation.** Policies are schema-only in Phase 1;
541
+ `harness explain --trace` and `harness audit` ship in Phase 4.
542
+ - **No `harness.lock`.** Asset-content drift (a hook script edited
543
+ under your feet) is detectable only after the lock file ships in
544
+ Phase 3.
545
+ - **No write verbs.** `init`, `add`, `remove`, `adopt`, `export`
546
+ ship in Phase 2.
547
+
548
+ ### Tests
549
+
550
+ 147 vitest cases across 12 files. Line coverage: 93.75% on `src/`.
551
+
552
+ [0.1.0]: https://github.com/LanNguyenSi/harness/releases/tag/v0.1.0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Lan Nguyen Si
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.