npm - @lannguyensi/harness - Versions diffs - 0.6.0 → 0.8.0 - Mend

@lannguyensi/harness 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

package/CHANGELOG.md +255 -0
package/README.md +189 -148
package/dist/cli/apply/apply.d.ts +13 -0
package/dist/cli/apply/apply.js +59 -3
package/dist/cli/apply/apply.js.map +1 -1
package/dist/cli/apply/generate-codex-config.d.ts +6 -0
package/dist/cli/apply/generate-codex-config.js +149 -0
package/dist/cli/apply/generate-codex-config.js.map +1 -0
package/dist/cli/apply/generate-settings.d.ts +15 -1
package/dist/cli/apply/generate-settings.js +16 -1
package/dist/cli/apply/generate-settings.js.map +1 -1
package/dist/cli/apply/index.d.ts +2 -1
package/dist/cli/apply/index.js +2 -1
package/dist/cli/apply/index.js.map +1 -1
package/dist/cli/approve/understanding.d.ts +39 -0
package/dist/cli/approve/understanding.js +122 -0
package/dist/cli/approve/understanding.js.map +1 -0
package/dist/cli/describe.d.ts +1 -1
package/dist/cli/describe.js +2 -0
package/dist/cli/describe.js.map +1 -1
package/dist/cli/doctor/codex.d.ts +34 -0
package/dist/cli/doctor/codex.js +331 -0
package/dist/cli/doctor/codex.js.map +1 -0
package/dist/cli/doctor/format.js +29 -1
package/dist/cli/doctor/format.js.map +1 -1
package/dist/cli/doctor/index.d.ts +13 -1
package/dist/cli/doctor/index.js +49 -1
package/dist/cli/doctor/index.js.map +1 -1
package/dist/cli/doctor/types.d.ts +35 -1
package/dist/cli/doctor/types.js +12 -1
package/dist/cli/doctor/types.js.map +1 -1
package/dist/cli/explain.d.ts +10 -1
package/dist/cli/explain.js +44 -18
package/dist/cli/explain.js.map +1 -1
package/dist/cli/index.js +315 -8
package/dist/cli/index.js.map +1 -1
package/dist/cli/list.d.ts +1 -1
package/dist/cli/list.js +17 -0
package/dist/cli/list.js.map +1 -1
package/dist/cli/pack/add.d.ts +13 -0
package/dist/cli/pack/add.js +71 -0
package/dist/cli/pack/add.js.map +1 -0
package/dist/cli/pack/hook-codex-pre-tool-use.d.ts +30 -0
package/dist/cli/pack/hook-codex-pre-tool-use.js +149 -0
package/dist/cli/pack/hook-codex-pre-tool-use.js.map +1 -0
package/dist/cli/pack/hook-codex-stop.d.ts +31 -0
package/dist/cli/pack/hook-codex-stop.js +332 -0
package/dist/cli/pack/hook-codex-stop.js.map +1 -0
package/dist/cli/pack/hook-codex-user-prompt-submit.d.ts +18 -0
package/dist/cli/pack/hook-codex-user-prompt-submit.js +92 -0
package/dist/cli/pack/hook-codex-user-prompt-submit.js.map +1 -0
package/dist/cli/pack/hook-pre-tool-use.d.ts +32 -0
package/dist/cli/pack/hook-pre-tool-use.js +181 -0
package/dist/cli/pack/hook-pre-tool-use.js.map +1 -0
package/dist/cli/pack/index.d.ts +4 -0
package/dist/cli/pack/index.js +5 -0
package/dist/cli/pack/index.js.map +1 -0
package/dist/cli/pack/list.d.ts +10 -0
package/dist/cli/pack/list.js +43 -0
package/dist/cli/pack/list.js.map +1 -0
package/dist/cli/pack/mutate.d.ts +14 -0
package/dist/cli/pack/mutate.js +76 -0
package/dist/cli/pack/mutate.js.map +1 -0
package/dist/cli/pack/remove.d.ts +15 -0
package/dist/cli/pack/remove.js +153 -0
package/dist/cli/pack/remove.js.map +1 -0
package/dist/cli/session-export/index.d.ts +46 -0
package/dist/cli/session-export/index.js +169 -0
package/dist/cli/session-export/index.js.map +1 -0
package/dist/cli/session-export/redact.d.ts +22 -0
package/dist/cli/session-export/redact.js +47 -0
package/dist/cli/session-export/redact.js.map +1 -0
package/dist/cli/session-export/transcript.d.ts +24 -0
package/dist/cli/session-export/transcript.js +162 -0
package/dist/cli/session-export/transcript.js.map +1 -0
package/dist/cli/validate/checks.js +32 -0
package/dist/cli/validate/checks.js.map +1 -1
package/dist/policies/ledger-client.js +2 -1
package/dist/policies/ledger-client.js.map +1 -1
package/dist/policy-packs/builtin/permission-profiles.d.ts +11 -0
package/dist/policy-packs/builtin/permission-profiles.js +74 -0
package/dist/policy-packs/builtin/permission-profiles.js.map +1 -0
package/dist/policy-packs/builtin/understanding-before-execution-runtime.d.ts +56 -0
package/dist/policy-packs/builtin/understanding-before-execution-runtime.js +186 -0
package/dist/policy-packs/builtin/understanding-before-execution-runtime.js.map +1 -0
package/dist/policy-packs/builtin/understanding-before-execution.d.ts +15 -0
package/dist/policy-packs/builtin/understanding-before-execution.js +254 -0
package/dist/policy-packs/builtin/understanding-before-execution.js.map +1 -0
package/dist/policy-packs/expand.d.ts +4 -0
package/dist/policy-packs/expand.js +90 -0
package/dist/policy-packs/expand.js.map +1 -0
package/dist/policy-packs/index.d.ts +5 -0
package/dist/policy-packs/index.js +5 -0
package/dist/policy-packs/index.js.map +1 -0
package/dist/policy-packs/permission-translator.d.ts +9 -0
package/dist/policy-packs/permission-translator.js +76 -0
package/dist/policy-packs/permission-translator.js.map +1 -0
package/dist/policy-packs/registry.d.ts +11 -0
package/dist/policy-packs/registry.js +20 -0
package/dist/policy-packs/registry.js.map +1 -0
package/dist/policy-packs/runtime.d.ts +8 -0
package/dist/policy-packs/runtime.js +30 -0
package/dist/policy-packs/runtime.js.map +1 -0
package/dist/policy-packs/source.d.ts +6 -0
package/dist/policy-packs/source.js +10 -0
package/dist/policy-packs/source.js.map +1 -0
package/dist/policy-packs/types.d.ts +41 -0
package/dist/policy-packs/types.js +11 -0
package/dist/policy-packs/types.js.map +1 -0
package/dist/probes/mcp.js +2 -1
package/dist/probes/mcp.js.map +1 -1
package/dist/runtime/index.d.ts +1 -0
package/dist/runtime/index.js +1 -0
package/dist/runtime/index.js.map +1 -1
package/dist/runtime/ledger-add.d.ts +16 -0
package/dist/runtime/ledger-add.js +139 -0
package/dist/runtime/ledger-add.js.map +1 -0
package/dist/runtime/ledger-record.js +2 -1
package/dist/runtime/ledger-record.js.map +1 -1
package/dist/schema/audit.d.ts +71 -0
package/dist/schema/audit.js +32 -0
package/dist/schema/audit.js.map +1 -0
package/dist/schema/index.d.ts +1893 -10
package/dist/schema/index.js +27 -0
package/dist/schema/index.js.map +1 -1
package/dist/schema/permission-profiles.d.ts +2161 -0
package/dist/schema/permission-profiles.js +60 -0
package/dist/schema/permission-profiles.js.map +1 -0
package/dist/schema/policy-packs.d.ts +52 -0
package/dist/schema/policy-packs.js +35 -0
package/dist/schema/policy-packs.js.map +1 -0
package/dist/schema/tools.d.ts +8 -8
package/dist/schema/workflows.d.ts +519 -0
package/dist/schema/workflows.js +81 -0
package/dist/schema/workflows.js.map +1 -0
package/dist/version.d.ts +1 -0
package/dist/version.js +3 -0
package/dist/version.js.map +1 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,261 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
 ## [Unreleased]
+## [0.8.0] - 2026-05-10
+**Headline: Understanding-Gate Policy Pack, end-to-end.** Phase 6 lands
+the *Policy Pack* concept as a first-class harness unit: a reusable
+bundle of instruction template, hooks, policies, and permission
+profiles that ships under one name and is referenced from
+`harness.yaml` with one key. The first showcase pack,
+`understanding-before-execution`, forces an agent to expose its task
+interpretation, an Understanding Report, before any write-capable
+tool fires. The user confirms or corrects; only after explicit
+approval is recorded as evidence may the agent edit, run shell,
+commit, push, or open a PR. The pack ships across two runtimes
+(Claude Code and Codex), three permission profiles
+(`safe-start` / `implementation-after-approval` / `high-risk-grill-me`),
+a CLI surface (`harness pack add / remove / list`,
+`harness apply --runtime <runtime>`, `harness approve understanding`,
+`harness doctor --target codex`), and a synthetic-stdin dogfood smoke
+under `dogfood/phase6-6/` that exercises block, allow, capture, and
+approve round-trips without a real Codex binary.
+Operator note: no schema bump (still `version: 1`). New manifest blocks
+(`policy_packs:`, `permission_profiles:`) are additive and default to
+empty, so `0.7.0` manifests parse byte-identically. Manifests with the
+pack enabled need a one-time `harness apply` after upgrade so the new
+`harness pack hook pre-tool-use` blocker replaces the npm package's
+standalone bin in the rendered `settings.json`. Ensure `harness` is on
+`$PATH` (`npm i -g @lannguyensi/harness@0.8.0`) before the next session
+starts.
+### Added
+- Phase 6 #6 follow-up: `harness pack hook codex-stop` captures the
+  agent's Understanding Report into
+  `.understanding-gate/reports/<iso>-codex-<sessionhash>.json` with
+  `approvalStatus: "pending"`. Wire format on stdin accepts either
+  `last_assistant_message` directly or a `messages[]` array (the last
+  assistant entry is used). The parser recognises markdown headings,
+  bold labels, and plain colon-prefixed labels for the six report
+  fields (interpretation, assumptions, openQuestions, outOfScope,
+  risks, verificationPlan), with synonym support (Questions,
+  Exclusions, Validation). Failure modes (malformed input, missing
+  session id, unwritable reports dir, no recognisable fields)
+  resolve to exit 0 + a stderr diagnostic; capture must never block
+  the agent's stop path. The Codex pack now contributes a Stop hook
+  alongside UserPromptSubmit and PreToolUse. Closes agent-tasks
+  `adf356a0`.
+- Phase 6 #6 follow-up: `harness doctor --target codex` evaluates the
+  harness side of the Codex adapter (binary resolution, harness-managed
+  `harness.generated/codex/config.toml` presence + banner, contributed
+  `[[hooks.*]]` command resolution, and persisted-report directory
+  writability). Codex error/warning counts roll into the top-level
+  totals; `--json` adds a structured `codexTarget` block to the
+  `DoctorReport`. The default `harness doctor` invocation is
+  unchanged. Closes agent-tasks `125fd02b`.
+- Phase 6 #6: Codex adapter for the `understanding-before-execution`
+  policy pack. New CLI flag `harness apply --runtime codex` emits
+  `harness.generated/codex/config.toml` (TOML hook stanzas) instead of
+  `settings.json`; operators copy or include the generated TOML into
+  their own `~/.codex/config.toml`. Two new pack hook subcommands ship:
+  `harness pack hook codex-pre-tool-use` (PreToolUse blocker on
+  `apply_patch|Bash|shell`: exit 2 + reason on stderr when no source
+  has approved, exit 0 otherwise) and `harness pack hook
+  codex-user-prompt-submit` (instruction-template injector that emits
+  the Understanding-Gate prompt on stdout for Codex to prepend to
+  `additional_instructions`). The Codex blocker shares the
+  approval-check pipeline with the Claude Code blocker (ledger source
+  via grounding-mcp + persisted report under
+  `.understanding-gate/reports/`, either approves). Synthetic-stdin
+  smoke under `dogfood/phase6-6/` exercises block + allow paths
+  end-to-end without a Codex binary. `--target` is rejected with
+  `--runtime codex` (target wires Claude Code's `settings.json`, not
+  produced under codex). Phase 6 #6 follow-ups filed as separate
+  agent-tasks entries: `harness doctor --target codex` adapter-health
+  check; Codex Stop-equivalent for transcript capture; permission
+  profile translator into Codex's sandbox shape.
+- Phase 6 anchor: additive `policy_packs:` manifest block (schema-only;
+  no runtime behaviour yet). Each entry has `name`, `source`
+  (default `builtin`), `enabled`, optional `description`, and an
+  opaque `config:` record validated by the pack itself at resolve
+  time. Duplicate names rejected at parse time; `.strict()` rejects
+  unknown keys per entry. The block defaults to `[]` so manifests
+  written for `0.7.0` parse byte-identically.
+- `docs/policy-packs/understanding-before-execution.md`: canonical
+  documentation for the first Policy Pack, covering target
+  architecture, manifest reference, mode semantics, permission-profile
+  sketches, adapter notes for Claude Code / OpenCode / Codex, and the
+  two-source approval-state model (evidence-ledger tag for harnessed
+  sessions; persisted JSON report for solo `@lannguyensi/understanding-gate`
+  users). Phase 6 #2 through #6 will wire the surfaces this doc
+  describes; see `docs/ROADMAP.md` for the sub-task decomposition.
+- `docs/examples/full-manifest.yaml` carries the canonical
+  `understanding-before-execution` pack as a worked example; the
+  byte-for-byte `harness describe` golden test covers the resulting
+  output.
+- Phase 6 #2: `harness apply` now expands enabled `policy_packs[]`
+  entries into hook contributions and an operator audit copy. For the
+  builtin `understanding-before-execution` pack this writes three
+  namespaced hooks into the generated `settings.json`
+  (`UserPromptSubmit` injector, `Stop` capture, `PreToolUse` blocker
+  matching `Edit|Write|Bash`, all pointing at the
+  `@lannguyensi/understanding-gate` bins) and an audit copy at
+  `harness.generated/policy-packs/<name>/instructions.md`. Pack files
+  flow through the existing three-state-compare + lock pipeline, so
+  drift on the audit copy is caught by `harness apply` and surfaced in
+  `harness diff --since-apply`. `enabled: false` skips the pack
+  entirely. `harness validate` rejects an enabled pack with an
+  unrecognised source (only `builtin` resolves in v1) or an unknown
+  builtin name. Phase 6 #4 will add the harness-side ledger-aware
+  PreToolUse blocker; the standalone blocker shipped in
+  `@lannguyensi/understanding-gate@>=0.2.0` is already wired today.
+- Phase 6 #3: new `harness pack` CLI subtree for managing `policy_packs[]`
+  declaratively. `harness pack add <name>` performs a schema-validated
+  insert (rejects unknown source/name pre-flight, then the schema
+  superRefine catches duplicates). `harness pack remove <name>` is
+  reference-checked against `.last-apply`: it refuses without `--force`
+  when applied state is present, and `--force` removes the manifest
+  entry, deletes the on-disk pack files under
+  `harness.generated/policy-packs/<name>/`, and prunes the
+  corresponding `.last-apply` entries so a follow-up `harness apply`
+  reconverges in one step. `harness pack list [--enabled-only] [--json]`
+  prints a flat table or pipeable JSON.
+- Phase 6 #4: harness-side PreToolUse blocker + approve flow. The
+  `understanding-before-execution` pack now ships its `PreToolUse` hook
+  pointing at the new `harness pack hook pre-tool-use` runtime verb
+  (was: the npm package's standalone bin). The harness blocker is the
+  superset: it consults BOTH the evidence-ledger tag
+  `understanding-approved:${SESSION_ID}` (via grounding-mcp's
+  `ledger_summary`, canonical for harnessed sessions) AND the
+  persisted JSON report under `.understanding-gate/reports/` (fallback
+  for sessions without grounding-mcp wired). Either source approves;
+  neither blocks the tool call with a Claude-Code-shaped deny JSON
+  containing the actionable next step (`run \`harness approve
+  understanding\``). Failure modes (manifest unreadable, pack disabled,
+  no session id) resolve to allow with a stderr diagnostic, so the
+  Understanding Gate never bricks a session. Ledger matching filters
+  out `policy_decision` audit rows (typed and legacy-prefix backstop)
+  so a policy decision whose serialised reason field happens to
+  contain the approval substring cannot falsely approve.
+  **Breaking change for users with `understanding-before-execution`
+  enabled**: the regenerated `settings.json` calls `harness pack hook
+  pre-tool-use` instead of the npm bin. Run `harness apply` after
+  upgrading, and ensure `harness` is on `$PATH` (e.g.
+  `npm i -g @lannguyensi/harness`) before the next session starts.
+- New `harness approve understanding [--session <id>] [--reports-dir
+  <path>] [--approved-by <actor>]` CLI verb that round-trips both
+  approval sources: writes the `understanding-approved:${SESSION_ID}`
+  ledger tag via `grounding-mcp`'s `ledger_add` AND flips
+  `approvalStatus: "approved"` on the latest matching persisted JSON
+  report (atomic rewrite). A degraded ledger surfaces as a one-line
+  warning, not a hard failure, so a solo
+  `@lannguyensi/understanding-gate` user without `grounding-mcp` wired
+  still benefits from the persisted-report path.
+- New generic `runtime/ledger-add.ts` writer mirroring the structural
+  shape of `recordPolicyDecision` but exposed for non-policy-decision
+  fact rows. Used by `harness approve understanding`; available to any
+  future pack that wants to emit a session-tagged ledger entry without
+  encoding a policy-decision payload.
+- Phase 6 #5: permission profiles. New top-level `permission_profiles:`
+  manifest block (additive, defaults to `{}`), with three v1 builtins
+  bundled with the `understanding-before-execution` pack: `safe-start`
+  (pre-approval default), `implementation-after-approval` (post-
+  approval working profile), and `high-risk-grill-me` (high-friction
+  for security / infra surfaces). Selection via the pack's
+  `config.permission_profile`. Profile actions (`read` / `edit` /
+  `bash` / `commit` / `push` / `pr` / `deploy`) translate to Claude
+  Code's `permissions: { allow, ask, deny }` block at apply time;
+  the new translator emits canonical tool patterns
+  (`Edit`/`Write`/`MultiEdit` for `edit`, `Bash(git commit*)` for
+  `commit`, etc.). `limited` and `ask_or_deny` collapse onto `ask`
+  for v1 (Claude Code does not natively distinguish them); finer-
+  grained shaping is a Phase 6 #5 follow-up. When multiple packs
+  contribute permissions, the merge follows
+  deny-wins-over-ask-wins-over-allow precedence: a stricter intent
+  from any pack is not silently relaxed by a more permissive
+  sibling. Profiles compose with the Phase 6 #4 PreToolUse blocker:
+  the static permissions block is the always-applies floor, the
+  blocker handles the conditional approval gate on top.
+## [0.7.0] - 2026-05-06
+**Headline: workflows-as-data and full-session audit forensics.** The
+`workflows:` block (PR #66) lets adopters declare branch policy,
+review-subagent gating, and merge method as schema-validated data
+instead of prose in memory files. `harness session-export <sessionId>`
+(PR #67) joins the on-disk Claude Code transcript JSONL with the
+evidence ledger for the same session and emits a single chronologically
+ordered audit artifact, with default-on regex redaction extended by a
+new optional `audit.redact[]` manifest block. The README is split into
+audience-specific guides (`docs/for-humans.md`, `docs/for-agents.md`)
+and gains a control-loop flowchart that both audiences read
+identically. `harness explain --last` closes the "what just denied me?"
+loop without needing the policy name. No runtime enforcement of
+`workflows:` yet; that ships as a follow-up.
+Operator note: no schema bump (still `version: 1`). All new manifest
+fields are optional and additive; manifests written for `0.6.0` parse
+under `0.7.0` byte-identically. The new `audit.redact[]` defaults to a
+denylist that catches the four obvious key/secret patterns even when
+the operator declares no `audit:` block, so existing operators get
+redaction-on-by-default for `session-export` for free.
+### Changed
+- `docs/for-agents.md` workflow lifecycle stateDiagram is now anchored
+  on the four step kinds the `workflows:` schema actually defines
+  (`branch`, `review_subagent`, `ci_gate`, `merge`) instead of
+  agent-tasks-MCP-specific verbs (`task_start`, `open` / `in_progress` /
+  `done`). A new "If you use agent-tasks MCP" footnote below the
+  diagram maps the lifecycle markers to the concrete MCP verbs as one
+  example integration; other task systems fit the same lifecycle.
+  Spotted right after the audience split landed (PR #69).
+- Root `README.md` gains a control-loop flowchart ("What harness does":
+  declare, apply, enforce, record, observe, refine) that both
+  audiences read identically. No audience-specific verbs (PR #69).
+- Docs split into two audience-specific surfaces:
+  `docs/for-humans.md` (operator guide: install, mental model, first
+  hour, diagnostics cheat sheet) and `docs/for-agents.md` (workflow
+  lifecycle, policy/ledger sequence, CLI cheat sheet by side-effect
+  class, audit triumvirate). README shrunk to a landing page that
+  picks audience, with the `Try it in 60 seconds` block, status
+  checklist, and `Why this exists` preserved. Three mermaid diagrams
+  added: a system flowchart in `for-humans.md`, a workflow
+  stateDiagram and a policy/ledger sequenceDiagram in
+  `for-agents.md`. Docs-only, no source changes (PR #68).
+### Added
+- `harness explain --last` traces the most recent policy decision in the
+  evidence ledger without needing the policy name, closing the common
+  "I just got a deny, what fired?" loop in one command instead of three.
+  Pair with `--decision allow|deny|warn-degraded` to skip past intervening
+  outcomes. `<policy>` and `--last` are mutually exclusive (PR #65).
+- `harness session-export <sessionId>` joins the on-disk Claude Code
+  transcript JSONL (`~/.claude/projects/<projectDir>/<sessionId>.jsonl`)
+  with evidence-ledger rows for the same session and emits a single
+  chronologically-ordered audit artifact. `--format json` (default) and
+  `--format jsonl` ship in v1; `-o <file>` writes to disk. Each event
+  carries an explicit `source: "transcript" | "ledger"` marker so the
+  export is traceable back to its inputs (PR #67).
+- New optional `audit.redact[]` block in the manifest. Each entry is
+  either `{ regex, replacement? }` or `{ env_var, replacement? }`;
+  `env_var:` resolves to the actual value at export time and
+  string-replaces it. A default denylist (token / secret / password /
+  api_key) ships even when the manifest declares no `audit:` block, so
+  redaction is on by default. Manifests without `audit:` parse
+  unchanged (PR #67).
+- Additive `workflows:` and `review_templates:` top-level blocks in the
+  manifest (still `version: 1`). Lets adopters declare review-subagent
+  gating, branch policy, CI gate, and merge method as data instead of
+  prose in memory files. The schema rejects duplicate workflow names,
+  unknown step `kind` values, `spawn: required` without a `template`,
+  and `template:` references not defined in `review_templates`. Surfaces
+  via `harness describe --pillar workflows`, `harness list workflows`,
+  and a new `Workflows` section in `harness doctor`. No runtime
+  enforcement yet, that ships as a follow-up. Manifests without
+  `workflows:` parse identically to before (PR #66).
 ## [0.6.0] - 2026-05-03
 **Headline: the Phase-5 adoption-blocker cycle closes end-to-end.**

package/README.md CHANGED Viewed

@@ -2,11 +2,62 @@
 **Declarative control plane for agent harnesses.**
-One zod-validated YAML manifest for grounding, tools, memory, hooks, and policies — plus a CLI that describes, validates, diffs, applies, audits, and *enforces*.
-> Most config tools tell you what an agent is configured to use. `harness` tells you what an agent is *allowed to do*, under this exact context, and why.
+One zod-validated YAML manifest for grounding, tools, memory, hooks,
+policies, and workflows, plus a CLI that describes, validates, diffs,
+applies, audits, and *enforces*.
+> Most config tools tell you what an agent is configured to use.
+> `harness` tells you what an agent is *allowed to do*, under this
+> exact context, and why.
+`harness` collapses the six-to-eight surfaces a working agent harness
+leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP
+registrations, per-project overrides, hook scripts) into a single
+source of truth. Today (`v0.8.0`) policies fire end-to-end and ship as
+reusable *Policy Packs*: a
+`mcp__agent-tasks__pull_requests_merge` call against a session
+without a `review:${PR_NUMBER}` ledger entry refuses; an `Edit` /
+`apply_patch` against a session without an approved Understanding
+Report refuses; `harness explain --last --trace` shows exactly why.
+The Understanding Gate ships across both Claude Code and Codex
+runtimes via `harness apply --runtime <claude-code|codex>`.
+## What harness does
+```mermaid
+flowchart LR
+    declare["1. Declare<br/><code>harness.yaml</code>"]
+    apply["2. Apply<br/><code>harness apply</code>"]
+    enforce["3. Enforce<br/>hooks + policies<br/>at runtime"]
+    record[("4. Record<br/>evidence ledger")]
+    observe["5. Observe<br/><code>audit</code> / <code>explain</code> /<br/><code>session-export</code>"]
+    declare --> apply
+    apply --> enforce
+    enforce --> record
+    record --> observe
+    observe -. refine .-> declare
+```
-`harness` collapses the six-to-eight surfaces a working agent harness leaks across (`settings.json`, `CLAUDE.md`, memory frontmatter, MCP registrations, per-project overrides, hook scripts) into a single source of truth. Today (`v0.5.0`) policies fire end-to-end: a `mcp__agent-tasks__pull_requests_merge` call against a session without a `review:${PR_NUMBER}` ledger entry refuses; `harness explain review-before-merge --trace` shows exactly why. Phase 6 adds an *Understanding Gate* (agents confirm task interpretation before editing); Phase 7 adds a *Risk Gate* that blocks `DROP TABLE` against a prod target, even when the model would happily run it.
+One manifest declares grounding, tools, memory, hooks, policies, and
+workflows. `apply` materialises that into the files Claude Code
+actually reads. At runtime, hooks and policies enforce the contract
+and write decision rows to the evidence ledger. The read-side
+surfaces (`audit`, `explain --trace`, `session-export`) replay those
+rows so you can see what fired, why, and across which session.
+Whatever you learn from observing flows back into the manifest. That
+loop is the whole product.
+## Pick your audience
+- **Operator?** Read [`docs/for-humans.md`](docs/for-humans.md). It
+  walks from `npm i -g @lannguyensi/harness` through your first
+  `apply`, your first real policy, and the diagnostics cheat sheet.
+- **Agent (or onboarding one)?** Read
+  [`docs/for-agents.md`](docs/for-agents.md). It defines the
+  workflow lifecycle, the policy / ledger sequence, the CLI cheat
+  sheet split by side-effect class, and the audit triumvirate
+  (`audit` vs `explain --trace` vs `session-export`).
 ## Install
@@ -14,21 +65,10 @@ One zod-validated YAML manifest for grounding, tools, memory, hooks, and policie
 npm i -g @lannguyensi/harness
 ```
-The CLI binary is `harness`. Node ≥ 20 required.
+The CLI binary is `harness`. Node 20 or newer required.
 ## Try it in 60 seconds
-```bash
-# Statically predict which policies fire for a tool call (no ledger, no LLM).
-# Uses the bundled reference manifest from the npm package.
-harness dry-run "merge PR 42" \
-  --tool mcp__agent-tasks__pull_requests_merge \
-  --tool-args '{"prNumber":42}' \
-  --config "$(npm root -g)/@lannguyensi/harness/dist/../docs/examples/full-manifest.yaml"
-```
-Or from a checkout:
 ```bash
 git clone https://github.com/LanNguyenSi/harness && cd harness
 npm install && npm run build
@@ -38,157 +78,158 @@ node dist/cli/main.js dry-run "merge PR 42" \
   --config docs/examples/full-manifest.yaml
 ```
-`dry-run` reads the reference manifest, runs the trigger matcher, substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract DSL, and tells you exactly which hooks would fire and which policies would match — before any ledger I/O.
+`dry-run` reads the reference manifest, runs the trigger matcher,
+substitutes `${PR_NUMBER}=42` through the JSONPath-restricted extract
+DSL, and tells you exactly which hooks would fire and which policies
+would match, before any ledger I/O.
-## What a run looks like
-```yaml
-prompt: merge PR 42
-tool: mcp__agent-tasks__pull_requests_merge
-toolArgs:
-  prNumber: 42
-Hooks that would fire:
-  - event: SessionStart
-    name: git-preflight
-  - event: PreToolUse
-    name: require-review-evidence
-  - event: PreToolUse
-    name: require-dogfood-evidence
-  - event: PreToolUse
-    name: require-preflight-evidence
-Policies that match:
-  - name: review-before-merge
-    ledgerQuery: review:42
-    requires:
-      ledger_tag: review:${PR_NUMBER}
-    enforcement: block
-    triggerEvent: PreToolUse
-  - name: two-reviewers-required
-    ledgerQuery: review:42
-    requires:
-      ledger_tag: review:${PR_NUMBER}
-      count:
-        min: 2
-    enforcement: warn
-    triggerEvent: PreToolUse
-Policies that COULD match (need --tool):
-  - name: dogfood-before-release
-    triggerEvent: PreToolUse
-    reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
-  - name: preflight-before-investigation
-    triggerEvent: PreToolUse
-    reason: --tool "mcp__agent-tasks__pull_requests_merge" does not contain trigger.match "Bash"
-Memories that would route:
-  - path: ~/.claude/projects/{project}/memory
-    scope: project
-```
+## Status
-When the matching policy actually fires (via `harness policy intercept`, wired by `harness apply` into `settings.json` as a `PreToolUse` hook), and the evidence ledger has no `review:42` entry, the runtime emits Claude Code's deny shape on stdout:
+- [x] Phase 1, read-only inventory (`describe`, `validate`, `doctor`,
+      `list`, `explain`, `diff`), released as
+      [`v0.1.0`](CHANGELOG.md#010---2026-04-29).
+- [x] Phase 2, managed edits (`init`, `add`, `remove`, `adopt`,
+      `export`), released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29).
+- [x] Phase 3, declarative truth (`apply`, `diff --since-apply`,
+      `harness.lock`), released as
+      [`v0.3.0`](CHANGELOG.md#030---2026-04-30).
+- [x] Phase 4, policy layer (`policy intercept`, `explain --trace`,
+      `audit`, `dry-run`, requires-evaluator + extract DSL +
+      grounding-mcp adapter), released as
+      [`v0.4.0`](CHANGELOG.md#040---2026-04-30).
+- [x] Phase 5, polish + dogfood lessons (`--verbose` policy
+      diagnostics, `$CLAUDE_SESSION_ID` env fallback, server-side
+      `audit` filter pushdown, `policy_decision` first-class entry
+      type, npm distribution as `@lannguyensi/harness`), released as
+      [`v0.5.0`](CHANGELOG.md#050---2026-05-01).
+- [x] Apply-into-settings cycle, `harness adopt`, `apply --target /
+      --merge`, `harness.lock` target tracking, released as
+      [`v0.6.0`](CHANGELOG.md#060---2026-05-03).
+- [x] Workflows-as-data + full-session audit forensics: additive
+      `workflows:` / `review_templates:` / `audit.redact[]` manifest
+      blocks, `harness session-export`, `explain --last`, audience-
+      specific docs surfaces, released as
+      [`v0.7.0`](CHANGELOG.md#070---2026-05-06).
+- [x] Phase 6, Understanding Gate Policy Pack: `policy_packs:`
+      manifest block, the canonical `understanding-before-execution`
+      pack, `harness pack add / remove / list`,
+      `harness apply --runtime <claude-code|codex>` with TOML config
+      output for Codex, three permission profiles
+      (`safe-start` / `implementation-after-approval` /
+      `high-risk-grill-me`), a harness-side PreToolUse blocker that
+      consults both the evidence-ledger tag and the persisted JSON
+      report, `harness approve understanding`,
+      `harness doctor --target codex`, and a Codex Stop-equivalent
+      that captures Understanding Reports into
+      `.understanding-gate/reports/`. Released as
+      [`v0.8.0`](CHANGELOG.md#080---2026-05-10).
+- [ ] Phase 7, Risk Gate: Action Envelope + Risk Classifier +
+      `allow / warn / require_approval / deny` for destructive-action
+      prevention.
+## Policy Packs (v0.8.0)
+A *Policy Pack* is a reusable bundle of instruction template, hooks,
+policies, and permission profiles that ships under one name and is
+referenced from `harness.yaml` with a single key. The first pack,
+`understanding-before-execution`, forces agents to expose and confirm
+their task interpretation before any write-capable tool fires.
-```json
-{"decision":"deny","reason":"review-before-merge: no matching ledger entry for tag `review:42`"}
+```yaml
+policy_packs:
+  - name: understanding-before-execution
+    config:
+      mode: grill_me                       # fast_confirm | grill_me | strict
+      permission_profile: safe-start       # safe-start | implementation-after-approval | high-risk-grill-me
 ```
-With `--verbose` (or `HARNESS_POLICY_VERBOSE=1`), stderr also carries a structured diagnostic block — policy name, ledger_tag, matched count, reason, sorted extract values — so the user sees *why* without a follow-up `explain --trace`.
-After the entry is recorded, the same call is silently allowed. Every fire writes a `policy_decision` row that `harness audit` and `harness explain --trace` replay:
-```
-$ harness audit --since 1h --policy review-before-merge
+Manage packs with `harness pack add / remove / list`. Apply against
+either runtime:
-timestamp                 policy               outcome  reason
-------------------------  -------------------  -------  ---------------------------------------------
-2026-04-30T18:30:00.000Z  review-before-merge  deny     no matching ledger entry for tag `review:42`
-2026-04-30T18:31:00.000Z  review-before-merge  allow    1 matching ledger entries for tag `review:42`
+```sh
+harness apply --runtime claude-code        # default; writes harness.generated/settings.json
+harness apply --runtime codex              # writes harness.generated/codex/config.toml
 ```
-Inside a Claude Code session, `--session` defaults to `$CLAUDE_SESSION_ID`, so the read path automatically lines up with what the runtime hook wrote.
-## Wire into Claude Code
-By default, `harness apply` writes the rendered settings to `harness.generated/settings.json` next to your manifest. To make Claude Code actually use it, point `apply` at a settings discovery path with `--target`:
-```bash
-# Project scope: write straight to .claude/settings.local.json (created if missing).
-harness apply --target .claude/settings.local.json
-# User scope: merge harness-owned keys into your existing ~/.claude/settings.json,
-# preserving env, permissions, enabledPlugins, and any other top-level keys.
-harness apply --target ~/.claude/settings.json --merge
-```
-`--merge` does a 3-way merge: harness-owned top-level keys (today: `hooks`) get replaced wholesale; everything else in the existing target file is preserved verbatim. Re-applying is idempotent: running twice produces the same target, and the second run reports `no changes`.
-If the target exists and you pass neither `--merge` nor `--force`, apply refuses with a clear hint instead of clobbering. `--force` overwrites with the generated content as-is (no merge).
-`harness.lock` records the target path + a sha256 of the merged output, so `harness validate --check-lock` flags out-of-band edits.
-## Next steps
-| If you want to... | Read |
-|------|------|
-| Understand the YAML shape, CLI surface, drift handling, `requires` schema | [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) |
-| See phase-by-phase scope, deliverables, acceptance criteria, exit gates | [`docs/ROADMAP.md`](docs/ROADMAP.md) |
-| Read the long-form positioning (three pillars, ecosystem map, gaps) | [`docs/VISION.md`](docs/VISION.md) |
-| Browse a manifest covering every field | [`docs/examples/full-manifest.yaml`](docs/examples/full-manifest.yaml) |
-| Track what's shipping and what's deferred | [`CHANGELOG.md`](CHANGELOG.md) |
-## Common commands
-```bash
-harness init --template full --config /tmp/harness-demo/harness.yaml
-harness describe        --config /tmp/harness-demo/harness.yaml --pillar tools
-harness doctor          --config /tmp/harness-demo/harness.yaml --shallow
-harness validate        --config /tmp/harness-demo/harness.yaml
-harness apply           --config /tmp/harness-demo/harness.yaml   # regenerate settings.json + MEMORY.md, write harness.lock
-harness diff --since-apply --config /tmp/harness-demo/harness.yaml
-harness explain review-before-merge --trace
-harness audit --since 24h
-```
+Approve a session's Understanding Report via
+`harness approve understanding --session <id>` (round-trips both the
+evidence-ledger tag and the persisted JSON report). Verify the
+adapter wiring with `harness doctor --target codex` (`--json` for
+machine-readable). The full reference lives in
+[`docs/policy-packs/understanding-before-execution.md`](docs/policy-packs/understanding-before-execution.md);
+synthetic-stdin dogfood under
+[`dogfood/phase6-6/`](dogfood/phase6-6/run-smoke.sh) exercises the
+block / allow / capture / approve round-trip without a real Codex
+binary.
 ## What's next
-Two structurally larger themes are queued after Phase 5's polish:
-**Phase 6 — Understanding Gate.** Before an agent edits files, runs shell, commits, or opens a PR, it must produce an *Understanding Report* (its interpretation of the task: derived todos, acceptance criteria, assumptions, out-of-scope, risks). The user confirms, corrects, or "grills me until precise enough". Only after explicit approval is recorded in the evidence ledger may write-capable tools fire. Ships as the first `harness` *Policy Pack* — a reusable bundle of instruction template + hooks + policies + permission profiles. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
-**Phase 7 — Risk Gate.** Today's policy model evaluates a rule per matching trigger and returns a binary block/allow. Phase 7 makes harness reason about *the action itself*: an Action Envelope (tool + raw input + session + runtime context) is enriched by a Context Resolver (production / staging / dev / unknown), classified by a Risk Classifier (severity + categories + reversibility), then matched against policies whose `when:` clauses can reference `risk.severity_at_least`, `environment.name`, and similar. The decision space extends to `allow / warn / require_approval / deny`. Motivating use case: prevent `DROP TABLE users`, `kubectl delete namespace prod`, `terraform destroy` against an unverified production target before they reach the runtime — even if the model would have happily run them. Long-form design lives in the internal `lava-ice-logs` logbook (2026-04-30).
-Both build on Phase 4's `policy intercept` runtime backbone; neither replaces it.
+**Phase 7, Risk Gate.** Today's policy model evaluates a rule per
+matching trigger and returns a binary block/allow. Phase 7 makes
+harness reason about *the action itself*: an Action Envelope (tool +
+raw input + session + runtime context) is enriched by a Context
+Resolver (production / staging / dev / unknown), classified by a Risk
+Classifier (severity + categories + reversibility), then matched
+against policies whose `when:` clauses can reference
+`risk.severity_at_least`, `environment.name`, and similar. The
+decision space extends to `allow / warn / require_approval / deny`.
+Motivating use case: prevent `DROP TABLE users`, `kubectl delete
+namespace prod`, `terraform destroy` against an unverified production
+target, even if the model would have happily run them.
+Phase 7 builds on Phase 4's `policy intercept` runtime backbone and
+Phase 6's Policy Pack distribution surface; neither is replaced.
 > Bring your favorite agent harness. Add governance.
-## Status
-- [x] Repo bootstrap (LICENSE, .gitignore)
-- [x] README + VISION — repo legible
-- [x] ARCHITECTURE — YAML shape + CLI surface agreed
-- [x] ROADMAP — phases 1–7 with acceptance criteria
-- [x] Phase 1 — read-only inventory (`describe`, `validate`, `doctor`, `list`, `explain`, `diff`) — released as [`v0.1.0`](CHANGELOG.md#010---2026-04-29)
-- [x] Phase 2 — managed edits (`init`, `add`, `remove`, `adopt`, `export`) — released as [`v0.2.0`](CHANGELOG.md#020---2026-04-29)
-- [x] Phase 3 — declarative truth (`apply`, `diff --since-apply`, `harness.lock`) — released as [`v0.3.0`](CHANGELOG.md#030---2026-04-30)
-- [x] Phase 4 — policy layer (`policy intercept`, `explain --trace`, `audit`, `dry-run`, requires-evaluator + extract DSL + grounding-mcp adapter) — released as [`v0.4.0`](CHANGELOG.md#040---2026-04-30)
-- [x] Phase 5 — polish + dogfood lessons (`--verbose` policy diagnostics, `$CLAUDE_SESSION_ID` env fallback, server-side `audit` filter pushdown, `policy_decision` first-class entry type, audit `--since` UTC parse fix, `explain --trace` ms-precision sort, npm distribution as `@lannguyensi/harness`) — released as [`v0.5.0`](CHANGELOG.md#050---2026-05-01)
-- [ ] Phase 6 — Understanding Gate Policy Pack (agents must expose and confirm task understanding before write-capable tools fire)
-- [ ] Phase 7 — Risk Gate (Action Envelope + Risk Classifier + `allow / warn / require_approval / deny` for destructive-action prevention)
 ## Why this exists
-A working agent harness today has six to eight configuration surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`, `CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md` with frontmatter, `~/.claude/keybindings.json`, MCP server registrations in `~/.claude.json`, skill directories, per-project overrides, and external CLIs that behave differently per project.
-There is no single place that answers *"what can this agent do right now, and why is that configured that way?"*. Drift between sessions is invisible until it breaks something. Humans editing one surface don't know which other surfaces they need to touch. A fresh agent instance has no way to audit its own setup.
-Our entry point into this problem: on 2026-04-23, an `agent-grounding` checkout that was 16 commits behind origin led two tasks to be incorrectly called "stale". The check that would have caught it already exists — [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) runs `git fetch` + `git status` (alongside lint, typecheck, test, audit) and emits a structured `ready` + confidence-score result. The missing piece wasn't the check itself, it was the deterministic *trigger*: a `SessionStart` hook that invokes `preflight run` and a policy that gates further work on the result. Building that wiring needs an agreed-upon place for harness config to live first. That conversation is the origin of this repo.
+A working agent harness today has six to eight configuration
+surfaces, each with its own schema and lifecycle: `~/.claude/settings.json`,
+`CLAUDE.md` (per repo + root), `~/.claude/projects/*/memory/*.md`
+with frontmatter, `~/.claude/keybindings.json`, MCP server
+registrations in `~/.claude.json`, skill directories, per-project
+overrides, and external CLIs that behave differently per project.
+There is no single place that answers *"what can this agent do right
+now, and why is that configured that way?"*. Drift between sessions
+is invisible until it breaks something. Humans editing one surface
+do not know which other surfaces they need to touch. A fresh agent
+instance has no way to audit its own setup.
+Our entry point into this problem: on 2026-04-23, an
+`agent-grounding` checkout that was 16 commits behind origin led two
+tasks to be incorrectly called "stale". The check that would have
+caught it already exists,
+[`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight)
+runs `git fetch` + `git status` (alongside lint, typecheck, test,
+audit) and emits a structured `ready` + confidence-score result. The
+missing piece was not the check itself, it was the deterministic
+*trigger*: a `SessionStart` hook that invokes `preflight run` and a
+policy that gates further work on the result. Building that wiring
+needs an agreed-upon place for harness config to live first. That
+conversation is the origin of this repo.
 ## Related
-- [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding) — grounding primitives (evidence-ledger, claim-gate, review-claim-gate); `grounding-mcp` is the canonical client surface harness queries through `queryLedgerByTag` (Phase 4 #3).
-- [`agent-memory`](https://github.com/LanNguyenSi/agent-memory) — memory surfaces the control plane inventories.
-- [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks) — the MCP-registered task platform whose registration + health appear in `harness describe`.
-- [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight) — local preflight validator; the canonical implementation of preflight-hook content harness wires (see `docs/ARCHITECTURE.md` §5 for the canonical hook-script shape and §6 for the Phase 4 policy that gates further work on a `preflight:${REPO}` ledger entry).
-- [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle) — one of the MCP surfaces being registered.
-- [`agent-dx`](https://github.com/LanNguyenSi/agent-dx) — ships `git-batch-cli` (under `packages/git-batch-cli`), a day-to-day tool whose inventory appears in `harness describe`.
+- [`agent-grounding`](https://github.com/LanNguyenSi/agent-grounding):
+  grounding primitives (evidence-ledger, claim-gate,
+  review-claim-gate); `grounding-mcp` is the canonical client surface
+  harness queries through `queryLedgerByTag`.
+- [`agent-memory`](https://github.com/LanNguyenSi/agent-memory):
+  memory surfaces the control plane inventories.
+- [`agent-tasks`](https://github.com/LanNguyenSi/agent-tasks): the
+  MCP-registered task platform whose registration + health appear in
+  `harness describe`.
+- [`agent-preflight`](https://github.com/LanNguyenSi/agent-preflight):
+  local preflight validator; the canonical implementation of
+  preflight-hook content harness wires.
+- [`codebase-oracle`](https://github.com/LanNguyenSi/codebase-oracle):
+  one of the MCP surfaces being registered.
+- [`agent-dx`](https://github.com/LanNguyenSi/agent-dx): ships
+  `git-batch-cli`, a day-to-day tool whose inventory appears in
+  `harness describe`.
 ## License
-MIT — see [LICENSE](LICENSE).
+MIT, see [LICENSE](LICENSE).