@lannguyensi/harness 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +154 -0
  2. package/README.md +162 -120
  3. package/dist/cli/add/mutate.d.ts +4 -0
  4. package/dist/cli/add/mutate.js +30 -1
  5. package/dist/cli/add/mutate.js.map +1 -1
  6. package/dist/cli/adopt/derive.d.ts +23 -0
  7. package/dist/cli/adopt/derive.js +85 -0
  8. package/dist/cli/adopt/derive.js.map +1 -1
  9. package/dist/cli/adopt/index.d.ts +9 -1
  10. package/dist/cli/adopt/index.js +72 -4
  11. package/dist/cli/adopt/index.js.map +1 -1
  12. package/dist/cli/apply/apply.d.ts +45 -1
  13. package/dist/cli/apply/apply.js +237 -25
  14. package/dist/cli/apply/apply.js.map +1 -1
  15. package/dist/cli/apply/generate-settings.d.ts +13 -1
  16. package/dist/cli/apply/generate-settings.js +45 -0
  17. package/dist/cli/apply/generate-settings.js.map +1 -1
  18. package/dist/cli/apply/index.d.ts +1 -0
  19. package/dist/cli/apply/index.js +1 -0
  20. package/dist/cli/apply/index.js.map +1 -1
  21. package/dist/cli/apply/next-steps.d.ts +7 -0
  22. package/dist/cli/apply/next-steps.js +37 -0
  23. package/dist/cli/apply/next-steps.js.map +1 -0
  24. package/dist/cli/audit.js +2 -1
  25. package/dist/cli/audit.js.map +1 -1
  26. package/dist/cli/describe.d.ts +1 -1
  27. package/dist/cli/describe.js +2 -0
  28. package/dist/cli/describe.js.map +1 -1
  29. package/dist/cli/diff/since-apply.js +1 -1
  30. package/dist/cli/diff/since-apply.js.map +1 -1
  31. package/dist/cli/doctor/format.js +34 -3
  32. package/dist/cli/doctor/format.js.map +1 -1
  33. package/dist/cli/doctor/index.js +39 -3
  34. package/dist/cli/doctor/index.js.map +1 -1
  35. package/dist/cli/doctor/types.d.ts +14 -2
  36. package/dist/cli/explain.d.ts +10 -1
  37. package/dist/cli/explain.js +44 -18
  38. package/dist/cli/explain.js.map +1 -1
  39. package/dist/cli/index.js +140 -11
  40. package/dist/cli/index.js.map +1 -1
  41. package/dist/cli/list.d.ts +1 -1
  42. package/dist/cli/list.js +17 -0
  43. package/dist/cli/list.js.map +1 -1
  44. package/dist/cli/session-export/index.d.ts +46 -0
  45. package/dist/cli/session-export/index.js +169 -0
  46. package/dist/cli/session-export/index.js.map +1 -0
  47. package/dist/cli/session-export/redact.d.ts +22 -0
  48. package/dist/cli/session-export/redact.js +47 -0
  49. package/dist/cli/session-export/redact.js.map +1 -0
  50. package/dist/cli/session-export/transcript.d.ts +24 -0
  51. package/dist/cli/session-export/transcript.js +162 -0
  52. package/dist/cli/session-export/transcript.js.map +1 -0
  53. package/dist/cli/validate/index.d.ts +8 -0
  54. package/dist/cli/validate/index.js +37 -1
  55. package/dist/cli/validate/index.js.map +1 -1
  56. package/dist/io/harness-lock.d.ts +6 -1
  57. package/dist/io/harness-lock.js +2 -2
  58. package/dist/io/harness-lock.js.map +1 -1
  59. package/dist/io/merge-settings.d.ts +8 -0
  60. package/dist/io/merge-settings.js +47 -0
  61. package/dist/io/merge-settings.js.map +1 -0
  62. package/dist/policies/ledger-client.js +4 -2
  63. package/dist/policies/ledger-client.js.map +1 -1
  64. package/dist/policies/requires.js +3 -2
  65. package/dist/policies/requires.js.map +1 -1
  66. package/dist/probes/mcp.d.ts +13 -0
  67. package/dist/probes/mcp.js +27 -3
  68. package/dist/probes/mcp.js.map +1 -1
  69. package/dist/runtime/intercept.js +3 -2
  70. package/dist/runtime/intercept.js.map +1 -1
  71. package/dist/runtime/ledger-record.d.ts +8 -0
  72. package/dist/runtime/ledger-record.js +12 -3
  73. package/dist/runtime/ledger-record.js.map +1 -1
  74. package/dist/schema/audit.d.ts +71 -0
  75. package/dist/schema/audit.js +32 -0
  76. package/dist/schema/audit.js.map +1 -0
  77. package/dist/schema/index.d.ts +408 -0
  78. package/dist/schema/index.js +21 -0
  79. package/dist/schema/index.js.map +1 -1
  80. package/dist/schema/workflows.d.ts +519 -0
  81. package/dist/schema/workflows.js +81 -0
  82. package/dist/schema/workflows.js.map +1 -0
  83. package/dist/version.d.ts +1 -0
  84. package/dist/version.js +3 -0
  85. package/dist/version.js.map +1 -0
  86. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -5,6 +5,160 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
 
8
+ ## [Unreleased]
9
+
10
+ ## [0.7.0] - 2026-05-06
11
+
12
+ **Headline: workflows-as-data and full-session audit forensics.** The
13
+ `workflows:` block (PR #66) lets adopters declare branch policy,
14
+ review-subagent gating, and merge method as schema-validated data
15
+ instead of prose in memory files. `harness session-export <sessionId>`
16
+ (PR #67) joins the on-disk Claude Code transcript JSONL with the
17
+ evidence ledger for the same session and emits a single chronologically
18
+ ordered audit artifact, with default-on regex redaction extended by a
19
+ new optional `audit.redact[]` manifest block. The README is split into
20
+ audience-specific guides (`docs/for-humans.md`, `docs/for-agents.md`)
21
+ and gains a control-loop flowchart that both audiences read
22
+ identically. `harness explain --last` closes the "what just denied me?"
23
+ loop without needing the policy name. No runtime enforcement of
24
+ `workflows:` yet; that ships as a follow-up.
25
+
26
+ Operator note: no schema bump (still `version: 1`). All new manifest
27
+ fields are optional and additive; manifests written for `0.6.0` parse
28
+ under `0.7.0` byte-identically. The new `audit.redact[]` defaults to a
29
+ denylist that catches the four obvious key/secret patterns even when
30
+ the operator declares no `audit:` block, so existing operators get
31
+ redaction-on-by-default for `session-export` for free.
32
+
33
+ ### Changed
34
+ - `docs/for-agents.md` workflow lifecycle stateDiagram is now anchored
35
+ on the four step kinds the `workflows:` schema actually defines
36
+ (`branch`, `review_subagent`, `ci_gate`, `merge`) instead of
37
+ agent-tasks-MCP-specific verbs (`task_start`, `open` / `in_progress` /
38
+ `done`). A new "If you use agent-tasks MCP" footnote below the
39
+ diagram maps the lifecycle markers to the concrete MCP verbs as one
40
+ example integration; other task systems fit the same lifecycle.
41
+ Spotted right after the audience split landed (PR #69).
42
+ - Root `README.md` gains a control-loop flowchart ("What harness does":
43
+ declare, apply, enforce, record, observe, refine) that both
44
+ audiences read identically. No audience-specific verbs (PR #69).
45
+ - Docs split into two audience-specific surfaces:
46
+ `docs/for-humans.md` (operator guide: install, mental model, first
47
+ hour, diagnostics cheat sheet) and `docs/for-agents.md` (workflow
48
+ lifecycle, policy/ledger sequence, CLI cheat sheet by side-effect
49
+ class, audit triumvirate). README shrunk to a landing page that
50
+ picks audience, with the `Try it in 60 seconds` block, status
51
+ checklist, and `Why this exists` preserved. Three mermaid diagrams
52
+ added: a system flowchart in `for-humans.md`, a workflow
53
+ stateDiagram and a policy/ledger sequenceDiagram in
54
+ `for-agents.md`. Docs-only, no source changes (PR #68).
55
+
56
+ ### Added
57
+ - `harness explain --last` traces the most recent policy decision in the
58
+ evidence ledger without needing the policy name, closing the common
59
+ "I just got a deny, what fired?" loop in one command instead of three.
60
+ Pair with `--decision allow|deny|warn-degraded` to skip past intervening
61
+ outcomes. `<policy>` and `--last` are mutually exclusive (PR #65).
62
+ - `harness session-export <sessionId>` joins the on-disk Claude Code
63
+ transcript JSONL (`~/.claude/projects/<projectDir>/<sessionId>.jsonl`)
64
+ with evidence-ledger rows for the same session and emits a single
65
+ chronologically-ordered audit artifact. `--format json` (default) and
66
+ `--format jsonl` ship in v1; `-o <file>` writes to disk. Each event
67
+ carries an explicit `source: "transcript" | "ledger"` marker so the
68
+ export is traceable back to its inputs (PR #67).
69
+ - New optional `audit.redact[]` block in the manifest. Each entry is
70
+ either `{ regex, replacement? }` or `{ env_var, replacement? }`;
71
+ `env_var:` resolves to the actual value at export time and
72
+ string-replaces it. A default denylist (token / secret / password /
73
+ api_key) ships even when the manifest declares no `audit:` block, so
74
+ redaction is on by default. Manifests without `audit:` parse
75
+ unchanged (PR #67).
76
+ - Additive `workflows:` and `review_templates:` top-level blocks in the
77
+ manifest (still `version: 1`). Lets adopters declare review-subagent
78
+ gating, branch policy, CI gate, and merge method as data instead of
79
+ prose in memory files. The schema rejects duplicate workflow names,
80
+ unknown step `kind` values, `spawn: required` without a `template`,
81
+ and `template:` references not defined in `review_templates`. Surfaces
82
+ via `harness describe --pillar workflows`, `harness list workflows`,
83
+ and a new `Workflows` section in `harness doctor`. No runtime
84
+ enforcement yet, that ships as a follow-up. Manifests without
85
+ `workflows:` parse identically to before (PR #66).
86
+
87
+ ## [0.6.0] - 2026-05-03
88
+
89
+ **Headline: the Phase-5 adoption-blocker cycle closes end-to-end.**
90
+ `harness apply` now writes directly into a Claude Code settings
91
+ discovery path (`--target` + `--merge`), translates the manifest's
92
+ `tools.mcp[]` into the settings.json `mcpServers` block (so a real
93
+ `claude -p --settings <apply'd>` session actually loads them), prints
94
+ a Next-steps hint that names the real wire-up commands instead of
95
+ leaving adopters to guess, and `harness adopt` round-trips hand-edits
96
+ to `mcpServers` back into the manifest. The full
97
+ `apply → hand-edit → adopt → apply` cycle is byte-identical on the
98
+ settings.json bytes.
99
+
100
+ Operator note: no schema changes; `harness.lock` gains an optional
101
+ `target` entry kind (additive). Existing `harness.lock` files without
102
+ target entries continue to parse. The new flags on `apply` are all
103
+ opt-in; the default invocation still writes to `harness.generated/`.
104
+ Per-package version bumped from 0.5.0 to 0.6.0; this is the first
105
+ minor release on the `@lannguyensi/harness` npm distribution. No
106
+ operator action required beyond `npm i -g @lannguyensi/harness@0.6.0`
107
+ on machines running the published binary.
108
+
109
+ ### Added
110
+
111
+ - **`harness apply --target / --merge / --force`** (task `d38f6f91`, PR #58):
112
+ write the rendered settings.json directly into a Claude Code settings
113
+ discovery path (e.g. `.claude/settings.local.json` or
114
+ `~/.claude/settings.json`). `--merge` does a 3-way merge that replaces
115
+ harness-owned top-level keys (today: `hooks`, `mcpServers`) and preserves
116
+ everything else. `harness.lock` records the target sha so `validate
117
+ --check-lock` flags out-of-band edits. Closes the adoption blocker that
118
+ forced every adopter into a hand `cp` or per-invocation `--settings`.
119
+ - **`apply` translates `tools.mcp[]` into the settings.json `mcpServers`
120
+ block** (task `62380337`, PR #59). The Phase 5 #1a caveat is closed:
121
+ `init.mcp_servers` in a `claude -p --settings <apply'd>` session now
122
+ contains the manifest's MCP entries. Disabled servers (`enabled: false`)
123
+ are omitted; warnings (not errors) cover entries that survive schema
124
+ but produce no runnable command. String-form commands with embedded
125
+ whitespace in paths must be expressed as the array form to preserve
126
+ token boundaries.
127
+ - **`apply` prints a Next-steps hint after a successful run** (task
128
+ `517aa919`, PR #60). After the summary line, the CLI prints concrete,
129
+ copy-pasteable next actions: one-shot `claude -p --settings ...`,
130
+ project-scope `harness apply --target .claude/settings.local.json`,
131
+ and user-global `harness apply --target ~/.claude/settings.json --merge`.
132
+ When `--target` was actually written, the hint collapses to a single
133
+ verify line with `--settings <targetPath>` included (so non-canonical
134
+ target paths still resolve through `claude -p`). Two new flags pair
135
+ with this: `--quiet` suppresses the hint while keeping the summary,
136
+ and `--json` emits a machine-readable JSON summary instead of prose
137
+ (implies `--quiet`; refusal outcomes still set non-zero exit).
138
+ Motivated by a 2026-05-03 hallucination incident where an agent
139
+ fabricated a non-existent `claude -p --output-dir` flag because
140
+ nothing in the apply output guided the wire-up step; both unit and
141
+ CLI integration tests assert the hint never contains `--output-dir`.
142
+ - **`adopt` reverse-projection for `mcpServers` into `tools.mcp[]`**
143
+ (task `7059d92b`, PR #61). Closes the round-trip gap: hand-edits to
144
+ settings.json's `mcpServers` block can now be captured back into the
145
+ manifest. New entries are appended; same-name entries with different
146
+ command/env are replaced (preserving manifest-only fields like `health`
147
+ and `enabled: false`, so adopting a hand-edit does not silently wipe
148
+ doctor/probe/policy metadata). The full
149
+ `harness apply --target ... --merge → hand-edit → harness adopt → harness apply`
150
+ cycle is byte-identical on the settings.json bytes.
151
+
152
+ ### Notes for upgraders
153
+
154
+ - The settings.json output now includes a `mcpServers` key when your
155
+ manifest declares enabled MCP servers. On the first apply after
156
+ upgrade, the file grows by that block. The three-state compare handles
157
+ this safely (no spurious drift-refuse on the generated file), but if
158
+ you have hand-edited a `mcpServers` block into a previously apply'd
159
+ settings.json, the next apply will refuse (`drift-refuse`) until you
160
+ reconcile via `harness adopt` or `--overwrite-drift`.
161
+
8
162
  ## [0.5.0] - 2026-05-01
9
163
 
10
164
  **Phase 5: dogfood + polish.** Phase 4 shipped policies that fire; Phase 5
package/README.md CHANGED
@@ -2,153 +2,195 @@
2
2
 
3
3
  **Declarative control plane for agent harnesses.**
4
4
 
5
- One zod-validated YAML manifest for grounding, tools, memory, hooks, and policies — plus a CLI that describes, validates, diffs, applies, audits, and *enforces*.
5
+ One zod-validated YAML manifest for grounding, tools, memory, hooks,
6
+ policies, and workflows, plus a CLI that describes, validates, diffs,
7
+ applies, audits, and *enforces*.
8
+
9
+ > Most config tools tell you what an agent is configured to use.
10
+ > `harness` tells you what an agent is *allowed to do*, under this
11
+ > exact context, and why.
12
+
13
+ `harness` collapses the six-to-eight surfaces a working agent harness
14
+ leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP
15
+ registrations, per-project overrides, hook scripts) into a single
16
+ source of truth. Today (`v0.7.0`) policies fire end-to-end: a
17
+ `mcp__agent-tasks__pull_requests_merge` call against a session
18
+ without a `review:${PR_NUMBER}` ledger entry refuses; `harness
19
+ explain review-before-merge --trace` shows exactly why.
20
+
21
+ ## What harness does
22
+
23
+ ```mermaid
24
+ flowchart LR
25
+ declare["1. Declare<br/><code>harness.yaml</code>"]
26
+ apply["2. Apply<br/><code>harness apply</code>"]
27
+ enforce["3. Enforce<br/>hooks + policies<br/>at runtime"]
28
+ record[("4. Record<br/>evidence ledger")]
29
+ observe["5. Observe<br/><code>audit</code> / <code>explain</code> /<br/><code>session-export</code>"]
30
+
31
+ declare --> apply
32
+ apply --> enforce
33
+ enforce --> record
34
+ record --> observe
35
+ observe -. refine .-> declare
36
+ ```
37
+
38
+ One manifest declares grounding, tools, memory, hooks, policies, and
39
+ workflows. `apply` materialises that into the files Claude Code
40
+ actually reads. At runtime, hooks and policies enforce the contract
41
+ and write decision rows to the evidence ledger. The read-side
42
+ surfaces (`audit`, `explain --trace`, `session-export`) replay those
43
+ rows so you can see what fired, why, and across which session.
44
+ Whatever you learn from observing flows back into the manifest. That
45
+ loop is the whole product.
46
+
47
+ ## Pick your audience
48
+
49
+ - **Operator?** Read [`docs/for-humans.md`](docs/for-humans.md). It
50
+ walks from `npm i -g @lannguyensi/harness` through your first
51
+ `apply`, your first real policy, and the diagnostics cheat sheet.
52
+ - **Agent (or onboarding one)?** Read
53
+ [`docs/for-agents.md`](docs/for-agents.md). It defines the
54
+ workflow lifecycle, the policy / ledger sequence, the CLI cheat
55
+ sheet split by side-effect class, and the audit triumvirate
56
+ (`audit` vs `explain --trace` vs `session-export`).
6
57
 
7
- > Most config tools tell you what an agent is configured to use. `harness` tells you what an agent is *allowed to do*, under this exact context, and why.
58
+ ## Install
8
59
 
9
- `harness` collapses the six-to-eight surfaces a working agent harness leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP registrations, per-project overrides, hook scripts) into a single source of truth. Today (`v0.4.0`) policies fire end-to-end: a `mcp__agent-tasks__pull_requests_merge` call against a session without a `review:${PR_NUMBER}` ledger entry refuses; `harness explain review-before-merge --trace` shows exactly why. Phase 6 adds an *Understanding Gate* (agents confirm task interpretation before editing); Phase 7 adds a *Risk Gate* that blocks `DROP TABLE` against a prod target — even when the model would happily run it.
60
+ ```bash
61
+ npm i -g @lannguyensi/harness
62
+ ```
63
+
64
+ The CLI binary is `harness`. Node 20 or newer required.
10
65
 
11
66
  ## Try it in 60 seconds
12
67
 
13
68
  ```bash
14
69
  git clone https://github.com/LanNguyenSi/harness && cd harness
15
70
  npm install && npm run build
16
-
17
- # Statically predict which policies fire for a tool call (no ledger, no LLM)
18
71
  node dist/cli/main.js dry-run "merge PR 42" \
19
72
  --tool mcp__agent-tasks__pull_requests_merge \
20
73
  --tool-args '{"prNumber":42}' \
21
74
  --config docs/examples/full-manifest.yaml
22
75
  ```
23
76
 
24
- `dry-run` reads the reference manifest (`docs/examples/full-manifest.yaml`), runs the trigger matcher, substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract DSL, and tells you exactly which hooks would fire and which policies would match — before any ledger I/O.
25
-
26
- ## What a run looks like
27
-
28
- ```yaml
29
- prompt: merge PR 42
30
- tool: mcp__agent-tasks__pull_requests_merge
31
- toolArgs:
32
- prNumber: 42
33
- Hooks that would fire:
34
- - event: SessionStart
35
- name: git-preflight
36
- - event: PreToolUse
37
- name: require-review-evidence
38
- - event: PreToolUse
39
- name: require-dogfood-evidence
40
- - event: PreToolUse
41
- name: require-preflight-evidence
42
- Policies that match:
43
- - name: review-before-merge
44
- ledgerQuery: review:42
45
- requires:
46
- ledger_tag: review:${PR_NUMBER}
47
- enforcement: block
48
- triggerEvent: PreToolUse
49
- - name: two-reviewers-required
50
- ledgerQuery: review:42
51
- requires:
52
- ledger_tag: review:${PR_NUMBER}
53
- count:
54
- min: 2
55
- enforcement: warn
56
- triggerEvent: PreToolUse
57
- Policies that COULD match (need --tool):
58
- - name: dogfood-before-release
59
- triggerEvent: PreToolUse
60
- reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
61
- - name: preflight-before-investigation
62
- triggerEvent: PreToolUse
63
- reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
64
- Memories that would route:
65
- - path: ~/.claude/projects/{project}/memory
66
- scope: project
67
- ```
68
-
69
- When the matching policy actually fires (via `harness policy intercept`, wired by `harness apply` into `settings.json` as a `PreToolUse` hook), and the evidence ledger has no `review:42` entry, the runtime emits Claude Code's deny shape on stdout:
70
-
71
- ```json
72
- {"decision":"deny","reason":"review-before-merge: no matching ledger entry for tag `review:42`"}
73
- ```
74
-
75
- After the entry is recorded, the same call is silently allowed. Every fire writes a `policy_decision` row that `harness audit` and `harness explain --trace` replay:
76
-
77
- ```
78
- $ node dist/cli/main.js audit --since 1h --policy review-before-merge --session sess-1 --config docs/examples/full-manifest.yaml
79
-
80
- timestamp policy outcome reason
81
- ------------------------ ------------------- ------- ---------------------------------------------
82
- 2026-04-30T18:30:00.000Z review-before-merge deny no matching ledger entry for tag `review:42`
83
- 2026-04-30T18:31:00.000Z review-before-merge allow 1 matching ledger entries for tag `review:42`
84
- ```
85
-
86
- ## Next steps
87
-
88
- | If you want to... | Read |
89
- |------|------|
90
- | Understand the YAML shape, CLI surface, drift handling, `requires` schema | [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) |
91
- | See phase-by-phase scope, deliverables, acceptance criteria, exit gates | [`docs/ROADMAP.md`](docs/ROADMAP.md) |
92
- | Read the long-form positioning (three pillars, ecosystem map, gaps) | [`docs/VISION.md`](docs/VISION.md) |
93
- | Browse a manifest covering every field | [`docs/examples/full-manifest.yaml`](docs/examples/full-manifest.yaml) |
94
- | Track what's shipping and what's deferred | [`CHANGELOG.md`](CHANGELOG.md) |
77
+ `dry-run` reads the reference manifest, runs the trigger matcher,
78
+ substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract
79
+ DSL, and tells you exactly which hooks would fire and which policies
80
+ would match, before any ledger I/O.
95
81
 
96
- ## Common commands
82
+ ## Status
97
83
 
98
- ```bash
99
- node dist/cli/main.js init --template full --config /tmp/harness-demo/harness.yaml
100
- node dist/cli/main.js describe --config /tmp/harness-demo/harness.yaml --pillar tools
101
- node dist/cli/main.js doctor --config /tmp/harness-demo/harness.yaml --shallow
102
- node dist/cli/main.js validate --config /tmp/harness-demo/harness.yaml
103
- node dist/cli/main.js apply --config /tmp/harness-demo/harness.yaml # regenerate settings.json + MEMORY.md, write harness.lock
104
- node dist/cli/main.js diff --since-apply --config /tmp/harness-demo/harness.yaml
105
- node dist/cli/main.js explain review-before-merge --trace --config docs/examples/full-manifest.yaml
106
- node dist/cli/main.js audit --since 24h --config docs/examples/full-manifest.yaml
107
- ```
84
+ - [x] Phase 1, read-only inventory (`describe`, `validate`, `doctor`,
85
+ `list`, `explain`, `diff`), released as
86
+ [`v0.1.0`](CHANGELOG.md#010---2026-04-29).
87
+ - [x] Phase 2, managed edits (`init`, `add`, `remove`, `adopt`,
88
+ `export`), released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29).
89
+ - [x] Phase 3, declarative truth (`apply`, `diff --since-apply`,
90
+ `harness.lock`), released as
91
+ [`v0.3.0`](CHANGELOG.md#030---2026-04-30).
92
+ - [x] Phase 4, policy layer (`policy intercept`, `explain --trace`,
93
+ `audit`, `dry-run`, requires-evaluator + extract DSL +
94
+ grounding-mcp adapter), released as
95
+ [`v0.4.0`](CHANGELOG.md#040---2026-04-30).
96
+ - [x] Phase 5, polish + dogfood lessons (`--verbose` policy
97
+ diagnostics, `$CLAUDE_SESSION_ID` env fallback, server-side
98
+ `audit` filter pushdown, `policy_decision` first-class entry
99
+ type, npm distribution as `@lannguyensi/harness`), released as
100
+ [`v0.5.0`](CHANGELOG.md#050---2026-05-01).
101
+ - [x] Apply-into-settings cycle, `harness adopt`, `apply --target /
102
+ --merge`, `harness.lock` target tracking, released as
103
+ [`v0.6.0`](CHANGELOG.md#060---2026-05-03).
104
+ - [x] Workflows-as-data + full-session audit forensics: additive
105
+ `workflows:` / `review_templates:` / `audit.redact[]` manifest
106
+ blocks, `harness session-export`, `explain --last`, audience-
107
+ specific docs surfaces, released as
108
+ [`v0.7.0`](CHANGELOG.md#070---2026-05-06).
109
+ - [ ] Phase 6, Understanding Gate Policy Pack: agents must expose and
110
+ confirm task understanding before write-capable tools fire.
111
+ - [ ] Phase 7, Risk Gate: Action Envelope + Risk Classifier +
112
+ `allow / warn / require_approval / deny` for destructive-action
113
+ prevention.
108
114
 
109
115
  ## What's next
110
116
 
111
- Two structurally larger themes are queued after Phase 5's polish:
112
-
113
- **Phase 6 Understanding Gate.** Before an agent edits files, runs shell, commits, or opens a PR, it must produce an *Understanding Report* (its interpretation of the task: derived todos, acceptance criteria, assumptions, out-of-scope, risks). The user confirms, corrects, or "grills me until precise enough". Only after explicit approval is recorded in the evidence ledger may write-capable tools fire. Ships as the first `harness` *Policy Pack* — a reusable bundle of instruction template + hooks + policies + permission profiles. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
114
-
115
- **Phase 7 — Risk Gate.** Today's policy model evaluates a rule per matching trigger and returns a binary block/allow. Phase 7 makes harness reason about *the action itself*: an Action Envelope (tool + raw input + session + runtime context) is enriched by a Context Resolver (production / staging / dev / unknown), classified by a Risk Classifier (severity + categories + reversibility), then matched against policies whose `when:` clauses can reference `risk.severity_at_least`, `environment.name`, and similar. The decision space extends to `allow / warn / require_approval / deny`. Motivating use case: prevent `DROP TABLE users`, `kubectl delete namespace prod`, `terraform destroy` against an unverified production target before they reach the runtime — even if the model would have happily run them. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
116
-
117
- Both build on Phase 4's `policy intercept` runtime backbone; neither replaces it.
117
+ Two structurally larger themes are queued after Phase 5's polish.
118
+
119
+ **Phase 6, Understanding Gate.** Before an agent edits files, runs
120
+ shell, commits, or opens a PR, it must produce an *Understanding
121
+ Report* (its interpretation of the task: derived todos, acceptance
122
+ criteria, assumptions, out-of-scope, risks). The user confirms,
123
+ corrects, or "grills me until precise enough". Only after explicit
124
+ approval is recorded in the evidence ledger may write-capable tools
125
+ fire. Ships as the first `harness` *Policy Pack*: a reusable bundle
126
+ of instruction template + hooks + policies + permission profiles.
127
+
128
+ **Phase 7, Risk Gate.** Today's policy model evaluates a rule per
129
+ matching trigger and returns a binary block/allow. Phase 7 makes
130
+ harness reason about *the action itself*: an Action Envelope (tool +
131
+ raw input + session + runtime context) is enriched by a Context
132
+ Resolver (production / staging / dev / unknown), classified by a Risk
133
+ Classifier (severity + categories + reversibility), then matched
134
+ against policies whose `when:` clauses can reference
135
+ `risk.severity_at_least`, `environment.name`, and similar. The
136
+ decision space extends to `allow / warn / require_approval / deny`.
137
+ Motivating use case: prevent `DROP TABLE users`, `kubectl delete
138
+ namespace prod`, `terraform destroy` against an unverified production
139
+ target, even if the model would have happily run them.
140
+
141
+ Both build on Phase 4's `policy intercept` runtime backbone; neither
142
+ replaces it.
118
143
 
119
144
  > Bring your favorite agent harness. Add governance.
120
145
 
121
- ## Status
122
-
123
- - [x] Repo bootstrap (LICENSE, .gitignore)
124
- - [x] README + VISION — repo legible
125
- - [x] ARCHITECTURE — YAML shape + CLI surface agreed
126
- - [x] ROADMAP — phases 1–4 with acceptance criteria
127
- - [x] Phase 1 — read-only inventory (`describe`, `validate`, `doctor`, `list`, `explain`, `diff`) — released as [`v0.1.0`](CHANGELOG.md#010---2026-04-29)
128
- - [x] Phase 2 — managed edits (`init`, `add`, `remove`, `adopt`, `export`) — released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29)
129
- - [x] Phase 3 — declarative truth (`apply`, `diff --since-apply`, `harness.lock`) — released as [`v0.3.0`](CHANGELOG.md#030---2026-04-30)
130
- - [x] Phase 4 — policy layer (`policy intercept`, `explain --trace`, `audit`, `dry-run`, requires-evaluator + extract DSL + grounding-mcp adapter) — released as [`v0.4.0`](CHANGELOG.md#040---2026-04-30)
131
- - [ ] Phase 5 — polish + dogfood lessons (`apply --strict-lock`, `validate --check-lock`, sessionId default, `--verbose` deny diagnostics, sysexits normalisation, real-Claude-Code dogfood)
132
- - [ ] Phase 6 — Understanding Gate Policy Pack (agents must expose and confirm task understanding before write-capable tools fire)
133
- - [ ] Phase 7 — Risk Gate (Action Envelope + Risk Classifier + `allow / warn / require_approval / deny` for destructive-action prevention)
134
-
135
146
  ## Why this exists
136
147
 
137
- A working agent harness today has six to eight configuration surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`, `CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md` with frontmatter, `~/.claude/keybindings.json`, MCP server registrations in `~/.claude.json`, skill directories, per-project overrides, and external CLIs that behave differently per project.
138
-
139
- There is no single place that answers *"what can this agent do right now, and why is that configured that way?"*. Drift between sessions is invisible until it breaks something. Humans editing one surface don't know which other surfaces they need to touch. A fresh agent instance has no way to audit its own setup.
140
-
141
- Our entry point into this problem: on 2026-04-23, an `agent-grounding` checkout that was 16 commits behind origin led two tasks to be incorrectly called "stale". The check that would have caught it already exists — [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) runs `git fetch` + `git status` (alongside lint, typecheck, test, audit) and emits a structured `ready` + confidence-score result. The missing piece wasn't the check itself, it was the deterministic *trigger*: a `SessionStart` hook that invokes `preflight run` and a policy that gates further work on the result. Building that wiring needs an agreed-upon place for harness config to live first. That conversation is the origin of this repo.
148
+ A working agent harness today has six to eight configuration
149
+ surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`,
150
+ `CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md`
151
+ with frontmatter, `~/.claude/keybindings.json`, MCP server
152
+ registrations in `~/.claude.json`, skill directories, per-project
153
+ overrides, and external CLIs that behave differently per project.
154
+
155
+ There is no single place that answers *"what can this agent do right
156
+ now, and why is that configured that way?"*. Drift between sessions
157
+ is invisible until it breaks something. Humans editing one surface
158
+ do not know which other surfaces they need to touch. A fresh agent
159
+ instance has no way to audit its own setup.
160
+
161
+ Our entry point into this problem: on 2026-04-23, an
162
+ `agent-grounding` checkout that was 16 commits behind origin led two
163
+ tasks to be incorrectly called "stale". The check that would have
164
+ caught it already exists,
165
+ [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight)
166
+ runs `git fetch` + `git status` (alongside lint, typecheck, test,
167
+ audit) and emits a structured `ready` + confidence-score result. The
168
+ missing piece was not the check itself, it was the deterministic
169
+ *trigger*: a `SessionStart` hook that invokes `preflight run` and a
170
+ policy that gates further work on the result. Building that wiring
171
+ needs an agreed-upon place for harness config to live first. That
172
+ conversation is the origin of this repo.
142
173
 
143
174
  ## Related
144
175
 
145
- - [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding) — grounding primitives (evidence-ledger, claim-gate, review-claim-gate); `grounding-mcp` is the canonical client surface harness queries through `queryLedgerByTag` (Phase 4 #3).
146
- - [`agent-memory`](https://github.com/LanNguyenSi/agent-memory) — memory surfaces the control plane inventories.
147
- - [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks) the MCP-registered task platform whose registration + health appear in `harness describe`.
148
- - [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) — local preflight validator; the canonical implementation of preflight-hook content harness wires (see `docs/ARCHITECTURE.md` §5 for the canonical hook-script shape and §6 for the Phase 4 policy that gates further work on a `preflight:${REPO}` ledger entry).
149
- - [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle) — one of the MCP surfaces being registered.
150
- - [`dev-tools`](https://github.com/LanNguyenSi/dev-tools) `git-batch-cli`, a day-to-day tool whose inventory appears in `harness describe`.
176
+ - [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding):
177
+ grounding primitives (evidence-ledger, claim-gate,
178
+ review-claim-gate); `grounding-mcp` is the canonical client surface
179
+ harness queries through `queryLedgerByTag`.
180
+ - [`agent-memory`](https://github.com/LanNguyenSi/agent-memory):
181
+ memory surfaces the control plane inventories.
182
+ - [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks): the
183
+ MCP-registered task platform whose registration + health appear in
184
+ `harness describe`.
185
+ - [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight):
186
+ local preflight validator; the canonical implementation of
187
+ preflight-hook content harness wires.
188
+ - [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle):
189
+ one of the MCP surfaces being registered.
190
+ - [`agent-dx`](https://github.com/LanNguyenSi/agent-dx): ships
191
+ `git-batch-cli`, a day-to-day tool whose inventory appears in
192
+ `harness describe`.
151
193
 
152
194
  ## License
153
195
 
154
- MIT see [LICENSE](LICENSE).
196
+ MIT, see [LICENSE](LICENSE).
@@ -26,6 +26,10 @@ export interface HookEntry {
26
26
  export type AddEntry = {
27
27
  type: "mcp";
28
28
  entry: McpEntry;
29
+ } | {
30
+ type: "mcp_replace";
31
+ name: string;
32
+ entry: McpEntry;
29
33
  } | {
30
34
  type: "cli";
31
35
  entry: CliEntry;
@@ -1,10 +1,13 @@
1
- import { isSeq, parseDocument } from "yaml";
1
+ import { isMap, isSeq, parseDocument } from "yaml";
2
2
  export function applyAdd(yamlText, action) {
3
3
  const doc = parseDocument(yamlText);
4
4
  switch (action.type) {
5
5
  case "mcp":
6
6
  addToSequence(doc, ["tools", "mcp"], action.entry);
7
7
  break;
8
+ case "mcp_replace":
9
+ replaceOrAppendByName(doc, ["tools", "mcp"], action.name, action.entry);
10
+ break;
8
11
  case "cli":
9
12
  addToSequence(doc, ["tools", "cli"], action.entry);
10
13
  break;
@@ -33,4 +36,30 @@ function addToSequence(doc, pathSegments, entry) {
33
36
  }
34
37
  throw new Error(`expected a YAML sequence at ${pathSegments.join(".")}, got ${typeof node}`);
35
38
  }
39
+ // Find the first item in the sequence whose `name:` matches; replace it. If
40
+ // no match is found, append (so the call site doesn't need to branch on
41
+ // "exists vs new"). Comments and other YAML niceties on the original node are
42
+ // dropped on replace; that is acceptable for the adopt round-trip use case
43
+ // (the replacement is the user's hand-edit becoming the new source of truth).
44
+ function replaceOrAppendByName(doc, pathSegments, name, entry) {
45
+ const node = doc.getIn(pathSegments);
46
+ if (node === undefined || node === null) {
47
+ doc.setIn(pathSegments, [entry]);
48
+ return;
49
+ }
50
+ if (!isSeq(node)) {
51
+ throw new Error(`expected a YAML sequence at ${pathSegments.join(".")}, got ${typeof node}`);
52
+ }
53
+ for (let i = 0; i < node.items.length; i++) {
54
+ const item = node.items[i];
55
+ if (!isMap(item))
56
+ continue;
57
+ const itemName = item.get("name");
58
+ if (typeof itemName === "string" && itemName === name) {
59
+ node.set(i, entry);
60
+ return;
61
+ }
62
+ }
63
+ node.add(entry);
64
+ }
36
65
  //# sourceMappingURL=mutate.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"mutate.js","sourceRoot":"","sources":["../../../src/cli/add/mutate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAiB,MAAM,MAAM,CAAC;AAkC3D,MAAM,UAAU,QAAQ,CAAC,QAAgB,EAAE,MAAgB;IACzD,MAAM,GAAG,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACpC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,OAAO;YACV,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,MAAM;YACT,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5C,MAAM;IACV,CAAC;IACD,6EAA6E;IAC7E,yEAAyE;IACzE,8EAA8E;IAC9E,kDAAkD;IAClD,OAAO,GAAG,CAAC,QAAQ,CAAC,EAAE,qBAAqB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,aAAa,CACpB,GAAoB,EACpB,YAAsB,EACtB,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChB,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IACD,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"mutate.js","sourceRoot":"","sources":["../../../src/cli/add/mutate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAiB,MAAM,MAAM,CAAC;AAmClE,MAAM,UAAU,QAAQ,CAAC,QAAgB,EAAE,MAAgB;IACzD,MAAM,GAAG,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACpC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,aAAa;YAChB,qBAAqB,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACxE,MAAM;QACR,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,OAAO;YACV,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,MAAM;YACT,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5C,MAAM;IACV,CAAC;IACD,6EAA6E;IAC7E,yEAAyE;IACzE,8EAA8E;IAC9E,kDAAkD;IAClD,OAAO,GAAG,CAAC,QAAQ,CAAC,EAAE,qBAAqB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,aAAa,CACpB,GAAoB,EACpB,YAAsB,EACtB,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChB,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IACD,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;AACJ,CAAC;AAED,4EAA4E;AAC5E,wEAAwE;AACxE,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAC9E,SAAS,qBAAqB,CAC5B,GAAoB,EACpB,YAAsB,EACtB,IAAY,EACZ,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;IACJ,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YACtD,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACnB,OAAO;QACT,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;AAClB,CAAC"}
@@ -36,3 +36,26 @@ export declare function computeDrift(settingsHooks: DerivedHook[], manifestHooks
36
36
  * `adopted-hook` if the command has no recognisable basename.
37
37
  */
38
38
  export declare function synthesizeName(d: DerivedHook, taken: Set<string>): string;
39
+ export interface DerivedMcp {
40
+ name: string;
41
+ command: string[];
42
+ env?: Record<string, string>;
43
+ }
44
+ export interface SettingsMcpSpec {
45
+ command?: unknown;
46
+ args?: unknown;
47
+ env?: unknown;
48
+ }
49
+ export interface SettingsRootWithMcp extends SettingsRoot {
50
+ mcpServers?: Record<string, SettingsMcpSpec>;
51
+ }
52
+ export declare function parseSettingsMcpServers(raw: unknown): DerivedMcp[];
53
+ export declare function manifestMcpProjection(manifest: Manifest): DerivedMcp[];
54
+ export type McpDriftReason = "new" | "modified";
55
+ export interface McpDriftEntry {
56
+ entry: DerivedMcp;
57
+ reason: McpDriftReason;
58
+ }
59
+ export declare function computeMcpDrift(settingsMcp: DerivedMcp[], manifestMcp: DerivedMcp[]): McpDriftEntry[];
60
+ export { mcpEqual };
61
+ declare function mcpEqual(a: DerivedMcp, b: DerivedMcp): boolean;