npm - @lannguyensi/harness - Versions diffs - 0.5.0 → 0.7.0 - Mend

@lannguyensi/harness 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/CHANGELOG.md +154 -0
package/README.md +162 -120
package/dist/cli/add/mutate.d.ts +4 -0
package/dist/cli/add/mutate.js +30 -1
package/dist/cli/add/mutate.js.map +1 -1
package/dist/cli/adopt/derive.d.ts +23 -0
package/dist/cli/adopt/derive.js +85 -0
package/dist/cli/adopt/derive.js.map +1 -1
package/dist/cli/adopt/index.d.ts +9 -1
package/dist/cli/adopt/index.js +72 -4
package/dist/cli/adopt/index.js.map +1 -1
package/dist/cli/apply/apply.d.ts +45 -1
package/dist/cli/apply/apply.js +237 -25
package/dist/cli/apply/apply.js.map +1 -1
package/dist/cli/apply/generate-settings.d.ts +13 -1
package/dist/cli/apply/generate-settings.js +45 -0
package/dist/cli/apply/generate-settings.js.map +1 -1
package/dist/cli/apply/index.d.ts +1 -0
package/dist/cli/apply/index.js +1 -0
package/dist/cli/apply/index.js.map +1 -1
package/dist/cli/apply/next-steps.d.ts +7 -0
package/dist/cli/apply/next-steps.js +37 -0
package/dist/cli/apply/next-steps.js.map +1 -0
package/dist/cli/audit.js +2 -1
package/dist/cli/audit.js.map +1 -1
package/dist/cli/describe.d.ts +1 -1
package/dist/cli/describe.js +2 -0
package/dist/cli/describe.js.map +1 -1
package/dist/cli/diff/since-apply.js +1 -1
package/dist/cli/diff/since-apply.js.map +1 -1
package/dist/cli/doctor/format.js +34 -3
package/dist/cli/doctor/format.js.map +1 -1
package/dist/cli/doctor/index.js +39 -3
package/dist/cli/doctor/index.js.map +1 -1
package/dist/cli/doctor/types.d.ts +14 -2
package/dist/cli/explain.d.ts +10 -1
package/dist/cli/explain.js +44 -18
package/dist/cli/explain.js.map +1 -1
package/dist/cli/index.js +140 -11
package/dist/cli/index.js.map +1 -1
package/dist/cli/list.d.ts +1 -1
package/dist/cli/list.js +17 -0
package/dist/cli/list.js.map +1 -1
package/dist/cli/session-export/index.d.ts +46 -0
package/dist/cli/session-export/index.js +169 -0
package/dist/cli/session-export/index.js.map +1 -0
package/dist/cli/session-export/redact.d.ts +22 -0
package/dist/cli/session-export/redact.js +47 -0
package/dist/cli/session-export/redact.js.map +1 -0
package/dist/cli/session-export/transcript.d.ts +24 -0
package/dist/cli/session-export/transcript.js +162 -0
package/dist/cli/session-export/transcript.js.map +1 -0
package/dist/cli/validate/index.d.ts +8 -0
package/dist/cli/validate/index.js +37 -1
package/dist/cli/validate/index.js.map +1 -1
package/dist/io/harness-lock.d.ts +6 -1
package/dist/io/harness-lock.js +2 -2
package/dist/io/harness-lock.js.map +1 -1
package/dist/io/merge-settings.d.ts +8 -0
package/dist/io/merge-settings.js +47 -0
package/dist/io/merge-settings.js.map +1 -0
package/dist/policies/ledger-client.js +4 -2
package/dist/policies/ledger-client.js.map +1 -1
package/dist/policies/requires.js +3 -2
package/dist/policies/requires.js.map +1 -1
package/dist/probes/mcp.d.ts +13 -0
package/dist/probes/mcp.js +27 -3
package/dist/probes/mcp.js.map +1 -1
package/dist/runtime/intercept.js +3 -2
package/dist/runtime/intercept.js.map +1 -1
package/dist/runtime/ledger-record.d.ts +8 -0
package/dist/runtime/ledger-record.js +12 -3
package/dist/runtime/ledger-record.js.map +1 -1
package/dist/schema/audit.d.ts +71 -0
package/dist/schema/audit.js +32 -0
package/dist/schema/audit.js.map +1 -0
package/dist/schema/index.d.ts +408 -0
package/dist/schema/index.js +21 -0
package/dist/schema/index.js.map +1 -1
package/dist/schema/workflows.d.ts +519 -0
package/dist/schema/workflows.js +81 -0
package/dist/schema/workflows.js.map +1 -0
package/dist/version.d.ts +1 -0
package/dist/version.js +3 -0
package/dist/version.js.map +1 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,160 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/),
 and this project adheres to [Semantic Versioning](https://semver.org/).
+## [Unreleased]
+## [0.7.0] - 2026-05-06
+**Headline: workflows-as-data and full-session audit forensics.** The
+`workflows:` block (PR #66) lets adopters declare branch policy,
+review-subagent gating, and merge method as schema-validated data
+instead of prose in memory files. `harness session-export <sessionId>`
+(PR #67) joins the on-disk Claude Code transcript JSONL with the
+evidence ledger for the same session and emits a single chronologically
+ordered audit artifact, with default-on regex redaction extended by a
+new optional `audit.redact[]` manifest block. The README is split into
+audience-specific guides (`docs/for-humans.md`, `docs/for-agents.md`)
+and gains a control-loop flowchart that both audiences read
+identically. `harness explain --last` closes the "what just denied me?"
+loop without needing the policy name. No runtime enforcement of
+`workflows:` yet; that ships as a follow-up.
+Operator note: no schema bump (still `version: 1`). All new manifest
+fields are optional and additive; manifests written for `0.6.0` parse
+under `0.7.0` byte-identically. The new `audit.redact[]` defaults to a
+denylist that catches the four obvious key/secret patterns even when
+the operator declares no `audit:` block, so existing operators get
+redaction-on-by-default for `session-export` for free.
+### Changed
+- `docs/for-agents.md` workflow lifecycle stateDiagram is now anchored
+  on the four step kinds the `workflows:` schema actually defines
+  (`branch`, `review_subagent`, `ci_gate`, `merge`) instead of
+  agent-tasks-MCP-specific verbs (`task_start`, `open` / `in_progress` /
+  `done`). A new "If you use agent-tasks MCP" footnote below the
+  diagram maps the lifecycle markers to the concrete MCP verbs as one
+  example integration; other task systems fit the same lifecycle.
+  Spotted right after the audience split landed (PR #69).
+- Root `README.md` gains a control-loop flowchart ("What harness does":
+  declare, apply, enforce, record, observe, refine) that both
+  audiences read identically. No audience-specific verbs (PR #69).
+- Docs split into two audience-specific surfaces:
+  `docs/for-humans.md` (operator guide: install, mental model, first
+  hour, diagnostics cheat sheet) and `docs/for-agents.md` (workflow
+  lifecycle, policy/ledger sequence, CLI cheat sheet by side-effect
+  class, audit triumvirate). README shrunk to a landing page that
+  picks audience, with the `Try it in 60 seconds` block, status
+  checklist, and `Why this exists` preserved. Three mermaid diagrams
+  added: a system flowchart in `for-humans.md`, a workflow
+  stateDiagram and a policy/ledger sequenceDiagram in
+  `for-agents.md`. Docs-only, no source changes (PR #68).
+### Added
+- `harness explain --last` traces the most recent policy decision in the
+  evidence ledger without needing the policy name, closing the common
+  "I just got a deny, what fired?" loop in one command instead of three.
+  Pair with `--decision allow|deny|warn-degraded` to skip past intervening
+  outcomes. `<policy>` and `--last` are mutually exclusive (PR #65).
+- `harness session-export <sessionId>` joins the on-disk Claude Code
+  transcript JSONL (`~/.claude/projects/<projectDir>/<sessionId>.jsonl`)
+  with evidence-ledger rows for the same session and emits a single
+  chronologically-ordered audit artifact. `--format json` (default) and
+  `--format jsonl` ship in v1; `-o <file>` writes to disk. Each event
+  carries an explicit `source: "transcript" | "ledger"` marker so the
+  export is traceable back to its inputs (PR #67).
+- New optional `audit.redact[]` block in the manifest. Each entry is
+  either `{ regex, replacement? }` or `{ env_var, replacement? }`;
+  `env_var:` resolves to the actual value at export time and
+  string-replaces it. A default denylist (token / secret / password /
+  api_key) ships even when the manifest declares no `audit:` block, so
+  redaction is on by default. Manifests without `audit:` parse
+  unchanged (PR #67).
+- Additive `workflows:` and `review_templates:` top-level blocks in the
+  manifest (still `version: 1`). Lets adopters declare review-subagent
+  gating, branch policy, CI gate, and merge method as data instead of
+  prose in memory files. The schema rejects duplicate workflow names,
+  unknown step `kind` values, `spawn: required` without a `template`,
+  and `template:` references not defined in `review_templates`. Surfaces
+  via `harness describe --pillar workflows`, `harness list workflows`,
+  and a new `Workflows` section in `harness doctor`. No runtime
+  enforcement yet, that ships as a follow-up. Manifests without
+  `workflows:` parse identically to before (PR #66).
+## [0.6.0] - 2026-05-03
+**Headline: the Phase-5 adoption-blocker cycle closes end-to-end.**
+`harness apply` now writes directly into a Claude Code settings
+discovery path (`--target` + `--merge`), translates the manifest's
+`tools.mcp[]` into the settings.json `mcpServers` block (so a real
+`claude -p --settings <apply'd>` session actually loads them), prints
+a Next-steps hint that names the real wire-up commands instead of
+leaving adopters to guess, and `harness adopt` round-trips hand-edits
+to `mcpServers` back into the manifest. The full
+`apply → hand-edit → adopt → apply` cycle is byte-identical on the
+settings.json bytes.
+Operator note: no schema changes; `harness.lock` gains an optional
+`target` entry kind (additive). Existing `harness.lock` files without
+target entries continue to parse. The new flags on `apply` are all
+opt-in; the default invocation still writes to `harness.generated/`.
+Per-package version bumped from 0.5.0 to 0.6.0; this is the first
+minor release on the `@lannguyensi/harness` npm distribution. No
+operator action required beyond `npm i -g @lannguyensi/harness@0.6.0`
+on machines running the published binary.
+### Added
+- **`harness apply --target / --merge / --force`** (task `d38f6f91`, PR #58):
+  write the rendered settings.json directly into a Claude Code settings
+  discovery path (e.g. `.claude/settings.local.json` or
+  `~/.claude/settings.json`). `--merge` does a 3-way merge that replaces
+  harness-owned top-level keys (today: `hooks`, `mcpServers`) and preserves
+  everything else. `harness.lock` records the target sha so `validate
+  --check-lock` flags out-of-band edits. Closes the adoption blocker that
+  forced every adopter into a hand `cp` or per-invocation `--settings`.
+- **`apply` translates `tools.mcp[]` into the settings.json `mcpServers`
+  block** (task `62380337`, PR #59). The Phase 5 #1a caveat is closed:
+  `init.mcp_servers` in a `claude -p --settings <apply'd>` session now
+  contains the manifest's MCP entries. Disabled servers (`enabled: false`)
+  are omitted; warnings (not errors) cover entries that survive schema
+  but produce no runnable command. String-form commands with embedded
+  whitespace in paths must be expressed as the array form to preserve
+  token boundaries.
+- **`apply` prints a Next-steps hint after a successful run** (task
+  `517aa919`, PR #60). After the summary line, the CLI prints concrete,
+  copy-pasteable next actions: one-shot `claude -p --settings ...`,
+  project-scope `harness apply --target .claude/settings.local.json`,
+  and user-global `harness apply --target ~/.claude/settings.json --merge`.
+  When `--target` was actually written, the hint collapses to a single
+  verify line with `--settings <targetPath>` included (so non-canonical
+  target paths still resolve through `claude -p`). Two new flags pair
+  with this: `--quiet` suppresses the hint while keeping the summary,
+  and `--json` emits a machine-readable JSON summary instead of prose
+  (implies `--quiet`; refusal outcomes still set non-zero exit).
+  Motivated by a 2026-05-03 hallucination incident where an agent
+  fabricated a non-existent `claude -p --output-dir` flag because
+  nothing in the apply output guided the wire-up step; both unit and
+  CLI integration tests assert the hint never contains `--output-dir`.
+- **`adopt` reverse-projection for `mcpServers` into `tools.mcp[]`**
+  (task `7059d92b`, PR #61). Closes the round-trip gap: hand-edits to
+  settings.json's `mcpServers` block can now be captured back into the
+  manifest. New entries are appended; same-name entries with different
+  command/env are replaced (preserving manifest-only fields like `health`
+  and `enabled: false`, so adopting a hand-edit does not silently wipe
+  doctor/probe/policy metadata). The full
+  `harness apply --target ... --merge → hand-edit → harness adopt → harness apply`
+  cycle is byte-identical on the settings.json bytes.
+### Notes for upgraders
+- The settings.json output now includes a `mcpServers` key when your
+  manifest declares enabled MCP servers. On the first apply after
+  upgrade, the file grows by that block. The three-state compare handles
+  this safely (no spurious drift-refuse on the generated file), but if
+  you have hand-edited a `mcpServers` block into a previously apply'd
+  settings.json, the next apply will refuse (`drift-refuse`) until you
+  reconcile via `harness adopt` or `--overwrite-drift`.
 ## [0.5.0] - 2026-05-01
 **Phase 5: dogfood + polish.** Phase 4 shipped policies that fire; Phase 5

package/README.md CHANGED Viewed

@@ -2,153 +2,195 @@
 **Declarative control plane for agent harnesses.**
-One zod-validated YAML manifest for grounding, tools, memory, hooks, and policies — plus a CLI that describes, validates, diffs, applies, audits, and *enforces*.
+One zod-validated YAML manifest for grounding, tools, memory, hooks,
+policies, and workflows, plus a CLI that describes, validates, diffs,
+applies, audits, and *enforces*.
+> Most config tools tell you what an agent is configured to use.
+> `harness` tells you what an agent is *allowed to do*, under this
+> exact context, and why.
+`harness` collapses the six-to-eight surfaces a working agent harness
+leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP
+registrations, per-project overrides, hook scripts) into a single
+source of truth. Today (`v0.7.0`) policies fire end-to-end: a
+`mcp__agent-tasks__pull_requests_merge` call against a session
+without a `review:${PR_NUMBER}` ledger entry refuses; `harness
+explain review-before-merge --trace` shows exactly why.
+## What harness does
+```mermaid
+flowchart LR
+    declare["1. Declare<br/><code>harness.yaml</code>"]
+    apply["2. Apply<br/><code>harness apply</code>"]
+    enforce["3. Enforce<br/>hooks + policies<br/>at runtime"]
+    record[("4. Record<br/>evidence ledger")]
+    observe["5. Observe<br/><code>audit</code> / <code>explain</code> /<br/><code>session-export</code>"]
+    declare --> apply
+    apply --> enforce
+    enforce --> record
+    record --> observe
+    observe -. refine .-> declare
+```
+One manifest declares grounding, tools, memory, hooks, policies, and
+workflows. `apply` materialises that into the files Claude Code
+actually reads. At runtime, hooks and policies enforce the contract
+and write decision rows to the evidence ledger. The read-side
+surfaces (`audit`, `explain --trace`, `session-export`) replay those
+rows so you can see what fired, why, and across which session.
+Whatever you learn from observing flows back into the manifest. That
+loop is the whole product.
+## Pick your audience
+- **Operator?** Read [`docs/for-humans.md`](docs/for-humans.md). It
+  walks from `npm i -g @lannguyensi/harness` through your first
+  `apply`, your first real policy, and the diagnostics cheat sheet.
+- **Agent (or onboarding one)?** Read
+  [`docs/for-agents.md`](docs/for-agents.md). It defines the
+  workflow lifecycle, the policy / ledger sequence, the CLI cheat
+  sheet split by side-effect class, and the audit triumvirate
+  (`audit` vs `explain --trace` vs `session-export`).
-> Most config tools tell you what an agent is configured to use. `harness` tells you what an agent is *allowed to do*, under this exact context, and why.
+## Install
-`harness` collapses the six-to-eight surfaces a working agent harness leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP registrations, per-project overrides, hook scripts) into a single source of truth. Today (`v0.4.0`) policies fire end-to-end: a `mcp__agent-tasks__pull_requests_merge` call against a session without a `review:${PR_NUMBER}` ledger entry refuses; `harness explain review-before-merge --trace` shows exactly why. Phase 6 adds an *Understanding Gate* (agents confirm task interpretation before editing); Phase 7 adds a *Risk Gate* that blocks `DROP TABLE` against a prod target — even when the model would happily run it.
+```bash
+npm i -g @lannguyensi/harness
+```
+The CLI binary is `harness`. Node 20 or newer required.
 ## Try it in 60 seconds
 ```bash
 git clone https://github.com/LanNguyenSi/harness && cd harness
 npm install && npm run build
-# Statically predict which policies fire for a tool call (no ledger, no LLM)
 node dist/cli/main.js dry-run "merge PR 42" \
   --tool mcp__agent-tasks__pull_requests_merge \
   --tool-args '{"prNumber":42}' \
   --config docs/examples/full-manifest.yaml
 ```
-`dry-run` reads the reference manifest (`docs/examples/full-manifest.yaml`), runs the trigger matcher, substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract DSL, and tells you exactly which hooks would fire and which policies would match — before any ledger I/O.
-## What a run looks like
-```yaml
-prompt: merge PR 42
-tool: mcp__agent-tasks__pull_requests_merge
-toolArgs:
-  prNumber: 42
-Hooks that would fire:
-  - event: SessionStart
-    name: git-preflight
-  - event: PreToolUse
-    name: require-review-evidence
-  - event: PreToolUse
-    name: require-dogfood-evidence
-  - event: PreToolUse
-    name: require-preflight-evidence
-Policies that match:
-  - name: review-before-merge
-    ledgerQuery: review:42
-    requires:
-      ledger_tag: review:${PR_NUMBER}
-    enforcement: block
-    triggerEvent: PreToolUse
-  - name: two-reviewers-required
-    ledgerQuery: review:42
-    requires:
-      ledger_tag: review:${PR_NUMBER}
-      count:
-        min: 2
-    enforcement: warn
-    triggerEvent: PreToolUse
-Policies that COULD match (need --tool):
-  - name: dogfood-before-release
-    triggerEvent: PreToolUse
-    reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
-  - name: preflight-before-investigation
-    triggerEvent: PreToolUse
-    reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
-Memories that would route:
-  - path: ~/.claude/projects/{project}/memory
-    scope: project
-```
-When the matching policy actually fires (via `harness policy intercept`, wired by `harness apply` into `settings.json` as a `PreToolUse` hook), and the evidence ledger has no `review:42` entry, the runtime emits Claude Code's deny shape on stdout:
-```json
-{"decision":"deny","reason":"review-before-merge: no matching ledger entry for tag `review:42`"}
-```
-After the entry is recorded, the same call is silently allowed. Every fire writes a `policy_decision` row that `harness audit` and `harness explain --trace` replay:
-```
-$ node dist/cli/main.js audit --since 1h --policy review-before-merge --session sess-1 --config docs/examples/full-manifest.yaml
-timestamp                 policy               outcome  reason
-------------------------  -------------------  -------  ---------------------------------------------
-2026-04-30T18:30:00.000Z  review-before-merge  deny     no matching ledger entry for tag `review:42`
-2026-04-30T18:31:00.000Z  review-before-merge  allow    1 matching ledger entries for tag `review:42`
-```
-## Next steps
-| If you want to... | Read |
-|------|------|
-| Understand the YAML shape, CLI surface, drift handling, `requires` schema | [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) |
-| See phase-by-phase scope, deliverables, acceptance criteria, exit gates | [`docs/ROADMAP.md`](docs/ROADMAP.md) |
-| Read the long-form positioning (three pillars, ecosystem map, gaps) | [`docs/VISION.md`](docs/VISION.md) |
-| Browse a manifest covering every field | [`docs/examples/full-manifest.yaml`](docs/examples/full-manifest.yaml) |
-| Track what's shipping and what's deferred | [`CHANGELOG.md`](CHANGELOG.md) |
+`dry-run` reads the reference manifest, runs the trigger matcher,
+substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract
+DSL, and tells you exactly which hooks would fire and which policies
+would match, before any ledger I/O.
-## Common commands
+## Status
-```bash
-node dist/cli/main.js init --template full --config /tmp/harness-demo/harness.yaml
-node dist/cli/main.js describe   --config /tmp/harness-demo/harness.yaml --pillar tools
-node dist/cli/main.js doctor     --config /tmp/harness-demo/harness.yaml --shallow
-node dist/cli/main.js validate   --config /tmp/harness-demo/harness.yaml
-node dist/cli/main.js apply      --config /tmp/harness-demo/harness.yaml   # regenerate settings.json + MEMORY.md, write harness.lock
-node dist/cli/main.js diff --since-apply --config /tmp/harness-demo/harness.yaml
-node dist/cli/main.js explain review-before-merge --trace --config docs/examples/full-manifest.yaml
-node dist/cli/main.js audit --since 24h --config docs/examples/full-manifest.yaml
-```
+- [x] Phase 1, read-only inventory (`describe`, `validate`, `doctor`,
+      `list`, `explain`, `diff`), released as
+      [`v0.1.0`](CHANGELOG.md#010---2026-04-29).
+- [x] Phase 2, managed edits (`init`, `add`, `remove`, `adopt`,
+      `export`), released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29).
+- [x] Phase 3, declarative truth (`apply`, `diff --since-apply`,
+      `harness.lock`), released as
+      [`v0.3.0`](CHANGELOG.md#030---2026-04-30).
+- [x] Phase 4, policy layer (`policy intercept`, `explain --trace`,
+      `audit`, `dry-run`, requires-evaluator + extract DSL +
+      grounding-mcp adapter), released as
+      [`v0.4.0`](CHANGELOG.md#040---2026-04-30).
+- [x] Phase 5, polish + dogfood lessons (`--verbose` policy
+      diagnostics, `$CLAUDE_SESSION_ID` env fallback, server-side
+      `audit` filter pushdown, `policy_decision` first-class entry
+      type, npm distribution as `@lannguyensi/harness`), released as
+      [`v0.5.0`](CHANGELOG.md#050---2026-05-01).
+- [x] Apply-into-settings cycle, `harness adopt`, `apply --target /
+      --merge`, `harness.lock` target tracking, released as
+      [`v0.6.0`](CHANGELOG.md#060---2026-05-03).
+- [x] Workflows-as-data + full-session audit forensics: additive
+      `workflows:` / `review_templates:` / `audit.redact[]` manifest
+      blocks, `harness session-export`, `explain --last`, audience-
+      specific docs surfaces, released as
+      [`v0.7.0`](CHANGELOG.md#070---2026-05-06).
+- [ ] Phase 6, Understanding Gate Policy Pack: agents must expose and
+      confirm task understanding before write-capable tools fire.
+- [ ] Phase 7, Risk Gate: Action Envelope + Risk Classifier +
+      `allow / warn / require_approval / deny` for destructive-action
+      prevention.
 ## What's next
-Two structurally larger themes are queued after Phase 5's polish:
-**Phase 6 — Understanding Gate.** Before an agent edits files, runs shell, commits, or opens a PR, it must produce an *Understanding Report* (its interpretation of the task: derived todos, acceptance criteria, assumptions, out-of-scope, risks). The user confirms, corrects, or "grills me until precise enough". Only after explicit approval is recorded in the evidence ledger may write-capable tools fire. Ships as the first `harness` *Policy Pack* — a reusable bundle of instruction template + hooks + policies + permission profiles. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
-**Phase 7 — Risk Gate.** Today's policy model evaluates a rule per matching trigger and returns a binary block/allow. Phase 7 makes harness reason about *the action itself*: an Action Envelope (tool + raw input + session + runtime context) is enriched by a Context Resolver (production / staging / dev / unknown), classified by a Risk Classifier (severity + categories + reversibility), then matched against policies whose `when:` clauses can reference `risk.severity_at_least`, `environment.name`, and similar. The decision space extends to `allow / warn / require_approval / deny`. Motivating use case: prevent `DROP TABLE users`, `kubectl delete namespace prod`, `terraform destroy` against an unverified production target before they reach the runtime — even if the model would have happily run them. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
-Both build on Phase 4's `policy intercept` runtime backbone; neither replaces it.
+Two structurally larger themes are queued after Phase 5's polish.
+**Phase 6, Understanding Gate.** Before an agent edits files, runs
+shell, commits, or opens a PR, it must produce an *Understanding
+Report* (its interpretation of the task: derived todos, acceptance
+criteria, assumptions, out-of-scope, risks). The user confirms,
+corrects, or "grills me until precise enough". Only after explicit
+approval is recorded in the evidence ledger may write-capable tools
+fire. Ships as the first `harness` *Policy Pack*: a reusable bundle
+of instruction template + hooks + policies + permission profiles.
+**Phase 7, Risk Gate.** Today's policy model evaluates a rule per
+matching trigger and returns a binary block/allow. Phase 7 makes
+harness reason about *the action itself*: an Action Envelope (tool +
+raw input + session + runtime context) is enriched by a Context
+Resolver (production / staging / dev / unknown), classified by a Risk
+Classifier (severity + categories + reversibility), then matched
+against policies whose `when:` clauses can reference
+`risk.severity_at_least`, `environment.name`, and similar. The
+decision space extends to `allow / warn / require_approval / deny`.
+Motivating use case: prevent `DROP TABLE users`, `kubectl delete
+namespace prod`, `terraform destroy` against an unverified production
+target, even if the model would have happily run them.
+Both build on Phase 4's `policy intercept` runtime backbone; neither
+replaces it.
 > Bring your favorite agent harness. Add governance.
-## Status
-- [x] Repo bootstrap (LICENSE, .gitignore)
-- [x] README + VISION — repo legible
-- [x] ARCHITECTURE — YAML shape + CLI surface agreed
-- [x] ROADMAP — phases 1–4 with acceptance criteria
-- [x] Phase 1 — read-only inventory (`describe`, `validate`, `doctor`, `list`, `explain`, `diff`) — released as [`v0.1.0`](CHANGELOG.md#010---2026-04-29)
-- [x] Phase 2 — managed edits (`init`, `add`, `remove`, `adopt`, `export`) — released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29)
-- [x] Phase 3 — declarative truth (`apply`, `diff --since-apply`, `harness.lock`) — released as [`v0.3.0`](CHANGELOG.md#030---2026-04-30)
-- [x] Phase 4 — policy layer (`policy intercept`, `explain --trace`, `audit`, `dry-run`, requires-evaluator + extract DSL + grounding-mcp adapter) — released as [`v0.4.0`](CHANGELOG.md#040---2026-04-30)
-- [ ] Phase 5 — polish + dogfood lessons (`apply --strict-lock`, `validate --check-lock`, sessionId default, `--verbose` deny diagnostics, sysexits normalisation, real-Claude-Code dogfood)
-- [ ] Phase 6 — Understanding Gate Policy Pack (agents must expose and confirm task understanding before write-capable tools fire)
-- [ ] Phase 7 — Risk Gate (Action Envelope + Risk Classifier + `allow / warn / require_approval / deny` for destructive-action prevention)
 ## Why this exists
-A working agent harness today has six to eight configuration surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`, `CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md` with frontmatter, `~/.claude/keybindings.json`, MCP server registrations in `~/.claude.json`, skill directories, per-project overrides, and external CLIs that behave differently per project.
-There is no single place that answers *"what can this agent do right now, and why is that configured that way?"*. Drift between sessions is invisible until it breaks something. Humans editing one surface don't know which other surfaces they need to touch. A fresh agent instance has no way to audit its own setup.
-Our entry point into this problem: on 2026-04-23, an `agent-grounding` checkout that was 16 commits behind origin led two tasks to be incorrectly called "stale". The check that would have caught it already exists — [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) runs `git fetch` + `git status` (alongside lint, typecheck, test, audit) and emits a structured `ready` + confidence-score result. The missing piece wasn't the check itself, it was the deterministic *trigger*: a `SessionStart` hook that invokes `preflight run` and a policy that gates further work on the result. Building that wiring needs an agreed-upon place for harness config to live first. That conversation is the origin of this repo.
+A working agent harness today has six to eight configuration
+surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`,
+`CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md`
+with frontmatter, `~/.claude/keybindings.json`, MCP server
+registrations in `~/.claude.json`, skill directories, per-project
+overrides, and external CLIs that behave differently per project.
+There is no single place that answers *"what can this agent do right
+now, and why is that configured that way?"*. Drift between sessions
+is invisible until it breaks something. Humans editing one surface
+do not know which other surfaces they need to touch. A fresh agent
+instance has no way to audit its own setup.
+Our entry point into this problem: on 2026-04-23, an
+`agent-grounding` checkout that was 16 commits behind origin led two
+tasks to be incorrectly called "stale". The check that would have
+caught it already exists,
+[`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight)
+runs `git fetch` + `git status` (alongside lint, typecheck, test,
+audit) and emits a structured `ready` + confidence-score result. The
+missing piece was not the check itself, it was the deterministic
+*trigger*: a `SessionStart` hook that invokes `preflight run` and a
+policy that gates further work on the result. Building that wiring
+needs an agreed-upon place for harness config to live first. That
+conversation is the origin of this repo.
 ## Related
-- [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding) — grounding primitives (evidence-ledger, claim-gate, review-claim-gate); `grounding-mcp` is the canonical client surface harness queries through `queryLedgerByTag` (Phase 4 #3).
-- [`agent-memory`](https://github.com/LanNguyenSi/agent-memory) — memory surfaces the control plane inventories.
-- [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks) — the MCP-registered task platform whose registration + health appear in `harness describe`.
-- [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) — local preflight validator; the canonical implementation of preflight-hook content harness wires (see `docs/ARCHITECTURE.md` §5 for the canonical hook-script shape and §6 for the Phase 4 policy that gates further work on a `preflight:${REPO}` ledger entry).
-- [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle) — one of the MCP surfaces being registered.
-- [`dev-tools`](https://github.com/LanNguyenSi/dev-tools) — `git-batch-cli`, a day-to-day tool whose inventory appears in `harness describe`.
+- [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding):
+  grounding primitives (evidence-ledger, claim-gate,
+  review-claim-gate); `grounding-mcp` is the canonical client surface
+  harness queries through `queryLedgerByTag`.
+- [`agent-memory`](https://github.com/LanNguyenSi/agent-memory):
+  memory surfaces the control plane inventories.
+- [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks): the
+  MCP-registered task platform whose registration + health appear in
+  `harness describe`.
+- [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight):
+  local preflight validator; the canonical implementation of
+  preflight-hook content harness wires.
+- [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle):
+  one of the MCP surfaces being registered.
+- [`agent-dx`](https://github.com/LanNguyenSi/agent-dx): ships
+  `git-batch-cli`, a day-to-day tool whose inventory appears in
+  `harness describe`.
 ## License
-MIT — see [LICENSE](LICENSE).
+MIT, see [LICENSE](LICENSE).

package/dist/cli/add/mutate.d.ts CHANGED Viewed

@@ -26,6 +26,10 @@ export interface HookEntry {
 export type AddEntry = {
     type: "mcp";
     entry: McpEntry;
+} | {
+    type: "mcp_replace";
+    name: string;
+    entry: McpEntry;
 } | {
     type: "cli";
     entry: CliEntry;

package/dist/cli/add/mutate.js CHANGED Viewed

@@ -1,10 +1,13 @@
-import { isSeq, parseDocument } from "yaml";
+import { isMap, isSeq, parseDocument } from "yaml";
 export function applyAdd(yamlText, action) {
     const doc = parseDocument(yamlText);
     switch (action.type) {
         case "mcp":
             addToSequence(doc, ["tools", "mcp"], action.entry);
             break;
+        case "mcp_replace":
+            replaceOrAppendByName(doc, ["tools", "mcp"], action.name, action.entry);
+            break;
         case "cli":
             addToSequence(doc, ["tools", "cli"], action.entry);
             break;
@@ -33,4 +36,30 @@ function addToSequence(doc, pathSegments, entry) {
     }
     throw new Error(`expected a YAML sequence at ${pathSegments.join(".")}, got ${typeof node}`);
 }
+// Find the first item in the sequence whose `name:` matches; replace it. If
+// no match is found, append (so the call site doesn't need to branch on
+// "exists vs new"). Comments and other YAML niceties on the original node are
+// dropped on replace; that is acceptable for the adopt round-trip use case
+// (the replacement is the user's hand-edit becoming the new source of truth).
+function replaceOrAppendByName(doc, pathSegments, name, entry) {
+    const node = doc.getIn(pathSegments);
+    if (node === undefined || node === null) {
+        doc.setIn(pathSegments, [entry]);
+        return;
+    }
+    if (!isSeq(node)) {
+        throw new Error(`expected a YAML sequence at ${pathSegments.join(".")}, got ${typeof node}`);
+    }
+    for (let i = 0; i < node.items.length; i++) {
+        const item = node.items[i];
+        if (!isMap(item))
+            continue;
+        const itemName = item.get("name");
+        if (typeof itemName === "string" && itemName === name) {
+            node.set(i, entry);
+            return;
+        }
+    }
+    node.add(entry);
+}
 //# sourceMappingURL=mutate.js.map

package/dist/cli/add/mutate.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"mutate.js","sourceRoot":"","sources":["../../../src/cli/add/mutate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAiB,MAAM,MAAM,CAAC;~~AAkC3D~~,MAAM,UAAU,QAAQ,CAAC,QAAgB,EAAE,MAAgB;IACzD,MAAM,GAAG,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACpC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,OAAO;YACV,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,MAAM;YACT,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5C,MAAM;IACV,CAAC;IACD,6EAA6E;IAC7E,yEAAyE;IACzE,8EAA8E;IAC9E,kDAAkD;IAClD,OAAO,GAAG,CAAC,QAAQ,CAAC,EAAE,qBAAqB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,aAAa,CACpB,GAAoB,EACpB,YAAsB,EACtB,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChB,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IACD,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;AACJ,CAAC"}
1	+ {"version":3,"file":"mutate.js","sourceRoot":"","sources":["../../../src/cli/add/mutate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAiB,MAAM,MAAM,CAAC;AAmClE,MAAM,UAAU,QAAQ,CAAC,QAAgB,EAAE,MAAgB;IACzD,MAAM,GAAG,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACpC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,aAAa;YAChB,qBAAqB,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACxE,MAAM;QACR,KAAK,KAAK;YACR,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM;QACR,KAAK,OAAO;YACV,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,MAAM;YACT,aAAa,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5C,MAAM;IACV,CAAC;IACD,6EAA6E;IAC7E,yEAAyE;IACzE,8EAA8E;IAC9E,kDAAkD;IAClD,OAAO,GAAG,CAAC,QAAQ,CAAC,EAAE,qBAAqB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,aAAa,CACpB,GAAoB,EACpB,YAAsB,EACtB,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChB,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IACD,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;AACJ,CAAC;AAED,4EAA4E;AAC5E,wEAAwE;AACxE,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAC9E,SAAS,qBAAqB,CAC5B,GAAoB,EACpB,YAAsB,EACtB,IAAY,EACZ,KAAc;IAEd,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACrC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACxC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,OAAO,IAAI,EAAE,CAC5E,CAAC;IACJ,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YACtD,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACnB,OAAO;QACT,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;AAClB,CAAC"}

package/dist/cli/adopt/derive.d.ts CHANGED Viewed

@@ -36,3 +36,26 @@ export declare function computeDrift(settingsHooks: DerivedHook[], manifestHooks
  * `adopted-hook` if the command has no recognisable basename.
  */
 export declare function synthesizeName(d: DerivedHook, taken: Set<string>): string;
+export interface DerivedMcp {
+    name: string;
+    command: string[];
+    env?: Record<string, string>;
+}
+export interface SettingsMcpSpec {
+    command?: unknown;
+    args?: unknown;
+    env?: unknown;
+}
+export interface SettingsRootWithMcp extends SettingsRoot {
+    mcpServers?: Record<string, SettingsMcpSpec>;
+}
+export declare function parseSettingsMcpServers(raw: unknown): DerivedMcp[];
+export declare function manifestMcpProjection(manifest: Manifest): DerivedMcp[];
+export type McpDriftReason = "new" | "modified";
+export interface McpDriftEntry {
+    entry: DerivedMcp;
+    reason: McpDriftReason;
+}
+export declare function computeMcpDrift(settingsMcp: DerivedMcp[], manifestMcp: DerivedMcp[]): McpDriftEntry[];
+export { mcpEqual };
+declare function mcpEqual(a: DerivedMcp, b: DerivedMcp): boolean;