npm - @kontourai/flow-agents - Versions diffs - 1.2.0 → 1.4.0 - Mend

@kontourai/flow-agents 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/.github/workflows/ci.yml +6 -1
package/.github/workflows/kit-gates-demo.yml +6 -2
package/CHANGELOG.md +33 -0
package/CONTRIBUTING.md +30 -0
package/agents/dev.json +1 -1
package/agents/tool-planner.json +1 -1
package/build/src/cli/console-learning-projection.d.ts +1 -0
package/build/src/cli/effective-backlog-settings.d.ts +1 -0
package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
package/build/src/cli/init.d.ts +17 -0
package/build/src/cli/kit.d.ts +1 -0
package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
package/build/src/cli/publish-change-helper.d.ts +1 -0
package/build/src/cli/pull-work-provider.d.ts +1 -0
package/build/src/cli/runtime-adapter.d.ts +1 -0
package/build/src/cli/telemetry-doctor.d.ts +1 -0
package/build/src/cli/usage-feedback.d.ts +1 -0
package/build/src/cli/utterance-check.d.ts +1 -0
package/build/src/cli/validate-hook-influence.d.ts +1 -0
package/build/src/cli/validate-source-tree.d.ts +1 -0
package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
package/build/src/cli/veritas-governance.d.ts +1 -0
package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
package/build/src/cli/workflow-sidecar.d.ts +32 -0
package/build/src/cli/workflow-sidecar.js +119 -22
package/build/src/cli.d.ts +2 -0
package/build/src/flow-kit/validate.d.ts +81 -0
package/build/src/flow-kit/validate.js +32 -1
package/build/src/index.d.ts +5 -0
package/build/src/index.js +36 -0
package/build/src/lib/args.d.ts +8 -0
package/build/src/lib/fs.d.ts +7 -0
package/build/src/lib/workflow-learning-projection.d.ts +132 -0
package/build/src/runtime-adapters.d.ts +18 -0
package/build/src/tools/build-universal-bundles.d.ts +2 -0
package/build/src/tools/build-universal-bundles.js +14 -0
package/build/src/tools/common.d.ts +9 -0
package/build/src/tools/filter-installed-packs.d.ts +2 -0
package/build/src/tools/generate-context-map.d.ts +2 -0
package/build/src/tools/validate-package.d.ts +2 -0
package/build/src/tools/validate-source-tree.d.ts +2 -0
package/console.telemetry.json +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +7 -7
package/docs/developer-architecture.md +14 -0
package/docs/kit-authoring-guide.md +99 -6
package/docs/operating-layers.md +2 -2
package/docs/spec/runtime-hook-surface.md +16 -1
package/docs/veritas-integration.md +4 -4
package/docs/workflow-eval-strategy.md +2 -2
package/docs/workflow-usage-guide.md +1 -1
package/evals/acceptance/test_opencode_harness.sh +18 -10
package/evals/acceptance/test_pi_harness.sh +10 -6
package/evals/ci/run-baseline.sh +1 -1
package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +4 -4
package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +4 -4
package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +4 -4
package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +4 -4
package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +4 -4
package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +4 -4
package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +2 -2
package/evals/fixtures/surface-trust/artifact-absent.json +2 -2
package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +2 -2
package/evals/fixtures/surface-trust/missing-authority-trust-report.json +2 -2
package/evals/fixtures/surface-trust/provider-absent.json +2 -2
package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +2 -2
package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +2 -2
package/evals/integration/test_console_learning_projection.sh +1 -1
package/evals/integration/test_goal_fit_hook.sh +144 -0
package/evals/integration/test_hook_category_behaviors.sh +14 -0
package/evals/integration/test_kit_conformance_levels.sh +55 -1
package/evals/integration/test_workflow_sidecar_writer.sh +9 -9
package/evals/run.sh +2 -0
package/evals/static/test_library_exports.sh +85 -0
package/evals/static/test_package.sh +3 -3
package/evals/static/test_universal_bundles.sh +15 -0
package/evals/static/test_workflow_skills.sh +4 -4
package/kits/builder/flows/build.flow.json +48 -48
package/kits/builder/flows/shape.flow.json +36 -36
package/kits/knowledge/adapters/obsidian-store/index.js +137 -26
package/kits/knowledge/evals/contract-suite/suite.test.js +90 -0
package/kits/knowledge/flows/compile.flow.json +12 -12
package/kits/knowledge/flows/consolidate.flow.json +16 -16
package/kits/knowledge/flows/ingest.flow.json +12 -12
package/kits/knowledge/flows/retire.flow.json +16 -16
package/kits/knowledge/flows/store-contract.flow.json +12 -12
package/kits/knowledge/flows/synthesize.flow.json +16 -16
package/kits/release-evidence/flows/release-evidence.flow.json +3 -3
package/package.json +14 -2
package/schemas/workflow-evidence.schema.json +2 -1
package/scripts/hooks/stop-goal-fit.js +66 -18
package/src/cli/workflow-sidecar.ts +101 -21
package/src/flow-kit/validate.ts +55 -1
package/src/index.ts +53 -0
package/src/tools/build-universal-bundles.ts +14 -0
package/tsconfig.json +1 -0

package/docs/spec/runtime-hook-surface.md CHANGED Viewed

@@ -57,6 +57,20 @@ Canonical hook scripts in `scripts/hooks/` use the following exit code contract
 Adapters translate these exit codes into the host-native response format. The `claude-hook-adapter.js` and `codex-hook-adapter.js` wrappers perform this translation, and all errors fail open so hook runtime failures never block agent work.
+### Block Reason Channel
+A block (exit `2` → deny) is only useful if the agent learns *why* it was blocked and how to proceed. When a policy blocks, the hook script writes a human-readable reason — for example, config-protection's "Fix the source code … instead of weakening the config." The adapter **must surface that reason to the model** through the host's native deny-reason mechanism, **not only to a log or stderr**, where it dies before the agent sees it. A deny without a model-visible reason makes the agent retry the same blocked action instead of self-correcting.
+| Host surface | Model-facing reason channel |
+| --- | --- |
+| Claude Code | `hookSpecificOutput.permissionDecisionReason` (preToolUse); `reason` (stop) |
+| Codex | `hookSpecificOutput.permissionDecisionReason` (preToolUse); `reason` (stop) |
+| opencode | the thrown error message on the blocked `tool.execute.before` (surfaced as the tool result) |
+| pi | the `reason` field of the `{ block: true, reason }` tool-call result |
+| Native pre-dispatch host (e.g. an orchestration layer) | the blocked call's tool-result text |
+The reason text is the canonical steering message: it should tell the agent what to do *instead* (edit the source, not the generated artifact), so the agent can self-correct on the next turn. An adapter that denies the call but drops the reason to a log only is a **conformance gap** — record it in the adapter's conformance declaration.
 ---
 ## 2. Policy Classes
@@ -136,7 +150,7 @@ Flow Agents currently ships four canonical policy classes. Each policy class has
 - `SA_HOOK_INPUT_TRUNCATED` env var — whether input was truncated (truncated payloads are blocked unconditionally)
 - Protected file set: `.eslintrc*`, `eslint.config.*`, `.prettierrc*`, `prettier.config.*`, `biome.json`, `biome.jsonc`, `.ruff.toml`, `ruff.toml`, `.shellcheckrc`, `.stylelintrc*`, `.markdownlint*`
-**Decision contract**: Blocking (exits 2) when the target file basename is in the protected set. Writes a descriptive message to stderr directing the agent to fix source instead. Exits 0 (allow) otherwise.
+**Decision contract**: Blocking (exits 2) when the target file basename is in the protected set. Writes a descriptive message directing the agent to fix source instead, which the adapter surfaces to the model as the deny reason (see [Block Reason Channel](#block-reason-channel)). Exits 0 (allow) otherwise.
 **Degradation when host lacks trigger**: If the host has no `preToolUse`-equivalent blocking hook, config protection cannot veto tool calls. The agent may modify linter configs without interception. Log the gap as `preToolUse: no native blocking equivalent — config-protection policy unavailable`.
@@ -190,6 +204,7 @@ The adapter implements L1 plus all blocking policy classes.
 **Required**:
 - L1 steering and stop telemetry.
 - Config protection fires on `preToolUse` and can block (exit 2 translates to a deny response).
+- Every block surfaces its reason to the model through the host's deny-reason channel (see [Block Reason Channel](#block-reason-channel)), not only to a log.
 - Quality gate fires on `postToolUse`.
 - Stop-goal-fit fires on `stop` with `FLOW_AGENTS_GOAL_FIT_STRICT` configurable (default may be warning mode; strict mode must be possible to enable).

package/docs/veritas-integration.md CHANGED Viewed

@@ -106,9 +106,9 @@ If Veritas is unavailable and the workflow expected it, record `not_verified` in
 ## Builder Kit Trust Evidence
-Builder Kit gates stay provider-neutral. The Builder Kit Flow Definition names gate expectations as `kind: "surface.claim"` and declares the claim type, subject, accepted statuses, and blocking behavior. It does not name Veritas or any other trust producer.
+Builder Kit gates stay provider-neutral. The Builder Kit Flow Definition names gate expectations as `kind: "trust.bundle"` (the Hachure-aligned gate kind) and declares the claim type, subject, accepted statuses, and blocking behavior. It does not name Veritas or any other trust producer.
-When a trust-backed path is configured, Flow Agents may attach a compact Surface-shaped reference to the Builder Kit evidence gate. The reference points at a TrustReport or Trust Snapshot, carries the related gate id, Surface claim type, claim status, artifact ref, integrity summary, authority or trusted-producer summary, subject, and freshness state, and then maps to the normal Flow gate result. Flow owns the gate authority decision, route reason, trusted producer mapping, and accepted gap behavior. Surface owns the portable trust state represented by the Surface claim and the TrustReport / Trust Snapshot. A Probe can request or clarify the evidence needed before planning or before a later Builder Kit gate retries.
+When a trust-backed path is configured, Flow Agents may attach a compact Hachure trust.bundle reference to the Builder Kit evidence gate. The reference uses `artifact_kind: "trust.bundle"` (the Hachure-aligned canonical value), carries the related gate id, domain claim type, claim status, artifact ref, integrity summary, authority or trusted-producer summary, subject, and freshness state, and then maps to the normal Flow gate result. When the `hachure` optional dependency is installed, referenced artifacts are validated against hachure's trust-bundle.schema.json at evidence-recording time. Flow owns the gate authority decision, route reason, trusted producer mapping, and accepted gap behavior. Surface owns the portable trust state represented by the Surface claim and the TrustReport / Trust Snapshot. A Probe can request or clarify the evidence needed before planning or before a later Builder Kit gate retries.
 Veritas is only one optional producer of those artifacts. A local Veritas readiness run can emit native Veritas evidence and, when configured, point Flow Agents at a Surface-shaped TrustReport or Trust Snapshot. Flow Agents records the reference; it does not copy Veritas rule models, readiness semantics, or provider-native fields into Builder Kit gates.
@@ -116,8 +116,8 @@ Provider and artifact absence are explicit:
 - If no trust provider is configured, ordinary Builder Kit activation, planning, verification, and evidence gates continue to work through the existing Flow Kit path.
 - If a trust-backed path was requested but no provider is configured, the trust check records `not_verified` with a clear gap instead of blocking unrelated Builder Kit usage.
-- If a provider is configured but the expected TrustReport or Trust Snapshot is absent or unreadable, only the requested trust-backed evidence check records `not_verified`; it does not silently pass and it does not make Veritas mandatory.
-- If a TrustReport or Trust Snapshot is present but has a rejected, stale, expired, missing-authority, or integrity-mismatched Surface claim, the Builder Kit evidence gate routes through the normal `fail` or `not_verified` path.
+- If a provider is configured but the expected Hachure trust.bundle artifact is absent or unreadable, only the requested trust-backed evidence check records `not_verified`; it does not silently pass and it does not make Veritas mandatory.
+- If a Hachure trust.bundle artifact is present but has a rejected, stale, expired, missing-authority, or integrity-mismatched claim, the Builder Kit evidence gate routes through the normal `fail` or `not_verified` path.
 ## Adoption Gate

package/docs/workflow-eval-strategy.md CHANGED Viewed

@@ -6,7 +6,7 @@ title: Workflow Eval Strategy
 The Builder Kit workflow system now has concrete skill contracts for `idea-to-backlog`, `pull-work`, `plan-work`, `review-work`, `deliver`, `evidence-gate`, `release-readiness`, and `learning-review`, plus shared workflow contracts in `context/contracts/`. Evals should prove both the written contracts and the agent behavior around gates, artifacts, worktrees, Goal Fit, release readiness, final acceptance docs, and learning feedback.
-Flow Agents evals prove coordination, install, runtime adapter behavior, and artifact discipline. They should not redefine Flow gate authority: Flow Definitions use typed `expects` entries, Surface claim gates use `kind: "surface.claim"`, and Flow project config owns trusted producer mappings plus gate overrides.
+Flow Agents evals prove coordination, install, runtime adapter behavior, and artifact discipline. They should not redefine Flow gate authority: Flow Definitions use typed `expects` entries, trust-bundle gates use `kind: "trust.bundle"`, and Flow project config owns trusted producer mappings plus gate overrides.
 ## Goals
@@ -161,7 +161,7 @@ Surface trust artifact attachment is covered by deterministic schema, runtime, a
 bash evals/integration/test_workflow_sidecar_writer.sh
 ```
-That eval exercises Builder Kit `surface.claim` evidence using provider-neutral TrustReport / Trust Snapshot fixtures for accepted, rejected, stale, missing-authority, integrity-mismatch, provider-absent, and artifact-absent cases. It proves Flow Agents can record compact Surface claim evidence in `evidence.json` and report pass, fail, or `NOT_VERIFIED` gaps without requiring provider-specific fields.
+That eval exercises Builder Kit `trust.bundle` evidence using provider-neutral Hachure trust.bundle fixtures for accepted, rejected, stale, missing-authority, integrity-mismatch, provider-absent, and artifact-absent cases. It proves Flow Agents can record compact Surface claim evidence in `evidence.json` and report pass, fail, or `NOT_VERIFIED` gaps without requiring provider-specific fields.
 This coverage does not redefine Flow gate authority. Flow Definitions continue to express expectations, Flow project config owns trusted producer mappings and gate overrides, and Flow gate authority remains outside the local report writer. Runtime/provider gaps should be recorded as `NOT_VERIFIED` when a configured Surface claim path cannot be checked; ordinary Builder Kit workflows remain valid when no trust provider or trust artifact is configured.

package/docs/workflow-usage-guide.md CHANGED Viewed

@@ -6,7 +6,7 @@ title: Workflow Usage Guide
 This guide shows how to use the Builder Kit workflow skills in normal chats.
-> **Which doc do I want?** This page is the *driver's manual* — what to say at each stage and what should happen. If you want the conceptual map first — layers, sidecars, hooks, evidence, and why the system is shaped this way — read the [Agent System Guidebook](agent-system-guidebook.md). For a one-line summary of every skill and gate, use the [Skills Map](skills-map.md). Flow Agents coordinates the local runtime, installs Flow Kits, and records artifacts; Flow owns gate semantics, including typed `expects` entries with `kind: "surface.claim"`, trusted producer config, and gate overrides.
+> **Which doc do I want?** This page is the *driver's manual* — what to say at each stage and what should happen. If you want the conceptual map first — layers, sidecars, hooks, evidence, and why the system is shaped this way — read the [Agent System Guidebook](agent-system-guidebook.md). For a one-line summary of every skill and gate, use the [Skills Map](skills-map.md). Flow Agents coordinates the local runtime, installs Flow Kits, and records artifacts; Flow owns gate semantics, including typed `expects` entries with `kind: "trust.bundle"`, trusted producer config, and gate overrides.
 The core pattern is:

package/evals/acceptance/test_opencode_harness.sh CHANGED Viewed

@@ -21,7 +21,7 @@ wait_for_telemetry() {
   local file="$1"
   local i=0
   while [[ $i -lt 150 ]]; do
-    [[ -s "$file" ]] && return 0
+    if [[ -s "$file" ]] && grep -q '"tool.invoke"' "$file" 2>/dev/null && grep -q '"tool.result"' "$file" 2>/dev/null; then return 0; fi
     sleep 0.1
     i=$((i + 1))
   done
@@ -73,23 +73,31 @@ for _attempt in 1 2; do
   grep -q '"tool.invoke"' "$TMP_WORK/.telemetry/full.jsonl" 2>/dev/null && break
 done
-LATEST_LOG="$(ls -t ~/.local/share/opencode/log/*.log 2>/dev/null | head -1 || true)"
-if [[ -n "$LATEST_LOG" ]] && grep -q "plugins/flow-agents.js loading plugin" "$LATEST_LOG" 2>/dev/null; then
-  _pass "opencode log confirms flow-agents plugin loaded"
+# Confirm load via the plugin's own marker file (written by the FlowAgentsPlugin
+# factory at startup). This replaces grepping opencode's internal
+# "plugins/flow-agents.js loading plugin" message, which opencode 1.17.x dropped
+# and which opencode does not reliably surface to its log file — a stale-assertion
+# false failure (#75). The factory runs regardless of provider, so this load
+# signal is independent of whether a model turn completes.
+if [[ -f "$TMP_WORK/.telemetry/opencode-plugin.loaded" ]]; then
+  _pass "flow-agents plugin loaded (factory marker present)"
 else
-  _fail "opencode log did not confirm flow-agents plugin loaded"
+  _fail "flow-agents plugin did not load (factory marker absent)"
 fi
 telemetry_file="$TMP_WORK/.telemetry/full.jsonl"
 if [[ "$provider_error" -eq 1 ]]; then
   _skip "opencode telemetry assertions skipped (provider/auth error)"
   _skip "opencode telemetry tool events skipped (provider/auth error)"
+elif ! wait_for_telemetry "$telemetry_file"; then
+  # No telemetry was produced at all — the agent never completed a model turn,
+  # expected in a provider-less environment (e.g. CI with no API key). The binary
+  # install, bundle, and mechanical hook chain are already covered; skip the
+  # live-model-dependent telemetry assertions rather than fail on them.
+  _skip "opencode telemetry assertions skipped (no telemetry — agent did not complete a turn, likely no provider)"
+  _skip "opencode telemetry tool events skipped (no turn)"
 else
-  if wait_for_telemetry "$telemetry_file"; then
-    _pass "opencode telemetry log was written"
-  else
-    _fail "opencode telemetry log was not written"
-  fi
+  _pass "opencode telemetry log was written"
   if [[ -f "$telemetry_file" ]] && \
     node -e "

package/evals/acceptance/test_pi_harness.sh CHANGED Viewed

@@ -21,7 +21,7 @@ wait_for_telemetry() {
   local file="$1"
   local i=0
   while [[ $i -lt 150 ]]; do
-    [[ -s "$file" ]] && return 0
+    if [[ -s "$file" ]] && grep -q '"session.start"' "$file" 2>/dev/null && grep -q '"tool.invoke"' "$file" 2>/dev/null && grep -q '"tool.result"' "$file" 2>/dev/null && grep -q '"session.end"' "$file" 2>/dev/null; then return 0; fi
     sleep 0.1
     i=$((i + 1))
   done
@@ -60,12 +60,16 @@ if [[ "$provider_error" -eq 1 ]]; then
   _skip "pi telemetry assertions skipped (provider/auth error)"
   _skip "pi telemetry event types skipped (provider/auth error)"
   _skip "pi telemetry session events skipped (provider/auth error)"
+elif ! wait_for_telemetry "$telemetry_file"; then
+  # No telemetry was produced at all — the agent never completed a model turn,
+  # which in a provider-less environment (e.g. CI with no API key) is expected.
+  # The binary install, bundle, and mechanical hook chain are already covered;
+  # skip the live-model-dependent telemetry assertions rather than fail on them.
+  _skip "pi telemetry assertions skipped (no telemetry — agent did not complete a turn, likely no provider)"
+  _skip "pi telemetry event types skipped (no turn)"
+  _skip "pi telemetry session events skipped (no turn)"
 else
-  if wait_for_telemetry "$telemetry_file"; then
-    _pass "pi telemetry log was written"
-  else
-    _fail "pi telemetry log was not written"
-  fi
+  _pass "pi telemetry log was written"
   if [[ -f "$telemetry_file" ]] && \
     node -e "

package/evals/ci/run-baseline.sh CHANGED Viewed

@@ -74,7 +74,7 @@ LANE_RUNTIME_AND_KIT=(
   "Kit conformance levels integration"
   "Local Flow Kit install integration"
   "Flow Kit install-git integration"
-  # QUARANTINED (#74): passes on macOS, fails on Linux CI — not gating until triaged
+  "Console learning projection integration"
   "Context map integration"
   "Effective backlog settings integration"
   "Flow agents statusline integration"

package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "runtime-evidence",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Runtime activation evidence exists.",
-          "claim": {
-            "type": "mixed.runtime.evidence",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "mixed.runtime.evidence",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "review-evidence",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Review evidence has been recorded.",
-          "claim": {
-            "type": "example.review.evidence",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "example.review.evidence",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "review-finding",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Review finding recorded.",
-          "claim": {
-            "type": "k0.review.finding",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "k0.review.finding",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "build-evidence",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Build evidence recorded.",
-          "claim": {
-            "type": "k1.build.evidence",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "k1.build.evidence",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "synthesis-evidence",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Synthesis evidence with provenance refs.",
-          "claim": {
-            "type": "k2.synthesize.evidence",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "k2.synthesize.evidence",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json CHANGED Viewed

@@ -11,12 +11,12 @@
       "expects": [
         {
           "id": "review-evidence",
-          "kind": "surface.claim",
+          "kind": "trust.bundle",
           "required": true,
           "description": "Review evidence.",
-          "claim": {
-            "type": "third-party.review.evidence",
-            "subject": "artifact",
+          "bundle_claim": {
+            "claimType": "third-party.review.evidence",
+            "subjectType": "artifact",
             "accepted_statuses": ["trusted", "accepted"]
           }
         }

package/evals/fixtures/surface-trust/accepted-claim-trust-report.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schema_version": "1.0",
-  "artifact_kind": "TrustReport",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/accepted-claim-trust-report.json",
   "subject": {
     "type": "flow-step",
@@ -8,7 +8,7 @@
   },
   "gate": {
     "id": "tests-evidence",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.verify.tests",

package/evals/fixtures/surface-trust/artifact-absent.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "schema_version": "1.0",
   "scenario": "artifact_absent",
-  "artifact_kind": "TrustReport",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/missing-trust-report.json",
   "gate": {
     "id": "implementation-plan",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.plan.implementation",

package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schema_version": "1.0",
-  "artifact_kind": "TrustReport",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/integrity-mismatch-trust-report.json",
   "subject": {
     "type": "artifact",
@@ -8,7 +8,7 @@
   },
   "gate": {
     "id": "implementation-plan",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.plan.implementation",

package/evals/fixtures/surface-trust/missing-authority-trust-report.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schema_version": "1.0",
-  "artifact_kind": "TrustReport",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/missing-authority-trust-report.json",
   "subject": {
     "type": "change",
@@ -8,7 +8,7 @@
   },
   "gate": {
     "id": "implementation-scope",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.execute.scope",

package/evals/fixtures/surface-trust/provider-absent.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "schema_version": "1.0",
   "scenario": "provider_absent",
-  "artifact_kind": "Trust Snapshot",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": null,
   "gate": {
     "id": "selected-work",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.pull-work.selected",

package/evals/fixtures/surface-trust/rejected-claim-trust-report.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schema_version": "1.0",
-  "artifact_kind": "TrustReport",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/rejected-claim-trust-report.json",
   "subject": {
     "type": "change",
@@ -8,7 +8,7 @@
   },
   "gate": {
     "id": "implementation-scope",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.execute.scope",

package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schema_version": "1.0",
-  "artifact_kind": "Trust Snapshot",
+  "artifact_kind": "trust.bundle",
   "artifact_ref": "surface-trust://fixtures/stale-claim-trust-snapshot.json",
   "subject": {
     "type": "flow-step",
@@ -8,7 +8,7 @@
   },
   "gate": {
     "id": "tests-evidence",
-    "kind": "surface.claim"
+    "kind": "trust.bundle"
   },
   "claim": {
     "type": "builder.verify.tests",

package/evals/integration/test_console_learning_projection.sh CHANGED Viewed

@@ -6,7 +6,7 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
 source "$ROOT/evals/lib/node.sh"
 FIXTURE_DIR="$ROOT/evals/fixtures/console-learning-projection"
-TMPDIR_EVAL="$(mktemp -d /private/tmp/eval-console-learning-projection.XXXXXX)"
+TMPDIR_EVAL="$(cd "$(mktemp -d "${TMPDIR:-/tmp}/eval-console-learning-projection.XXXXXX")" && pwd -P)"
 ARTIFACT_ROOT="$TMPDIR_EVAL/artifacts"
 KONTOUR_ROOT="$TMPDIR_EVAL/.kontour"
 GENERATED_AT="2026-06-06T20:00:00Z"

package/evals/integration/test_goal_fit_hook.sh CHANGED Viewed

@@ -473,6 +473,150 @@ else
   _fail "promoted doc is missing source or acceptance sections"
 fi
+# --- npm-install regression: validator-environment errors must not block goal-fit ---
+# Simulate the npm-installed condition: build/ is present (always shipped in package files)
+# but tsc is absent from PATH, so `npm run workflow:validate-artifacts` (which rebuilds)
+# would fail. The fix directly invokes node build/.../validate-workflow-artifacts.js instead.
+NPM_INSTALL_REPO="$TMPDIR_EVAL/npm-install-repo"
+mkdir -p "$NPM_INSTALL_REPO/.flow-agents/npm-install-task"
+printf '# Test Repo\n' > "$NPM_INSTALL_REPO/AGENTS.md"
+cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/npm-install-task--deliver.md" <<'MARKDOWN'
+# npm install test task
+branch: main
+worktree: main
+created: 2026-06-01
+status: delivered
+type: deliver
+## Definition Of Done
+- **User outcome:** Something works.
+- **Acceptance criteria:**
+  - [x] Thing works - Evidence: tested
+## Goal Fit Gate
+- [x] Original user goal restated
+- [x] Every acceptance criterion has evidence
+## Verification Report
+### Verdict: PASS
+## Final Acceptance
+- [ ] CI passed
+MARKDOWN
+cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/state.json" <<'JSON'
+{
+  "schema_version": "1.0",
+  "task_slug": "npm-install-task",
+  "status": "delivered",
+  "phase": "done",
+  "updated_at": "2026-06-01T00:00:00Z",
+  "next_action": { "status": "done", "summary": "Local delivery complete." }
+}
+JSON
+cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/acceptance.json" <<'JSON'
+{
+  "schema_version": "1.0",
+  "task_slug": "npm-install-task",
+  "criteria": [
+    {
+      "id": "thing-works",
+      "description": "Thing works.",
+      "status": "pass",
+      "evidence_refs": [
+        { "kind": "artifact", "file": "npm-install-task--deliver.md", "summary": "Delivery artifact." }
+      ]
+    }
+  ],
+  "goal_fit": { "status": "pass", "summary": "User outcome achieved." }
+}
+JSON
+cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/evidence.json" <<'JSON'
+{
+  "schema_version": "1.0",
+  "task_slug": "npm-install-task",
+  "verdict": "pass",
+  "checks": [
+    { "id": "build", "kind": "test", "status": "pass", "summary": "Build passed." }
+  ],
+  "not_verified_gaps": []
+}
+JSON
+cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/handoff.json" <<'JSON'
+{
+  "schema_version": "1.0",
+  "task_slug": "npm-install-task",
+  "summary": "Local delivery complete.",
+  "current_state_ref": "state.json",
+  "next_steps": [],
+  "blockers": [],
+  "warnings": []
+}
+JSON
+# Part 1 of fix: invoke the already-built validator directly (no tsc).
+# Poison tsc so that any call to it fails; confirm the hook does not call it
+# and validates clean sidecars successfully.
+FAKE_TSC_DIR="$TMPDIR_EVAL/fake-tsc"
+mkdir -p "$FAKE_TSC_DIR"
+printf '#!/usr/bin/env bash\necho "error TS5023: tsc should not be called" >&2\nexit 1\n' > "$FAKE_TSC_DIR/tsc"
+chmod +x "$FAKE_TSC_DIR/tsc"
+if PATH="$FAKE_TSC_DIR:$PATH" FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true \
+     node "$ROOT/scripts/hooks/stop-goal-fit.js" \
+     >"$TMPDIR_EVAL/npm-install-valid.out" 2>"$TMPDIR_EVAL/npm-install-valid.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$NPM_INSTALL_REPO"}
+JSON
+then
+  _pass "strict hook with poisoned tsc uses built validator and does not block valid sidecars"
+else
+  _fail "strict hook should not block valid sidecars even with tsc absent: $(cat "$TMPDIR_EVAL/npm-install-valid.err")"
+fi
+if ! rg -q 'tsc: command not found\|TS5023\|tsc should not be called' "$TMPDIR_EVAL/npm-install-valid.err"; then
+  _pass "hook does not emit tsc error noise when using built validator"
+else
+  _fail "hook leaked tsc error into goal-fit output"
+fi
+# Part 2 of fix: when the validator cannot run at all (build/ absent and npm fails),
+# the hook must skip cleanly — never block in strict mode due to an env error.
+mv "$ROOT/build" "$ROOT/build-absent"
+SPAWN_FAIL_DIR="$TMPDIR_EVAL/spawn-fail"
+mkdir -p "$SPAWN_FAIL_DIR"
+printf '#!/usr/bin/env bash\necho "npm ERR! tsc: command not found" >&2\nexit 127\n' > "$SPAWN_FAIL_DIR/npm"
+chmod +x "$SPAWN_FAIL_DIR/npm"
+if PATH="$SPAWN_FAIL_DIR:$PATH" FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true \
+     node "$ROOT/scripts/hooks/stop-goal-fit.js" \
+     >"$TMPDIR_EVAL/npm-install-env-err.out" 2>"$TMPDIR_EVAL/npm-install-env-err.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$NPM_INSTALL_REPO"}
+JSON
+then
+  _pass "strict hook does not block when validator environment fails (build/ absent, tsc missing)"
+else
+  _fail "strict hook must not block when validator env fails: $(cat "$TMPDIR_EVAL/npm-install-env-err.err")"
+fi
+if rg -q 'sidecar validation skipped' "$TMPDIR_EVAL/npm-install-env-err.err"; then
+  _pass "hook emits sidecar validation skipped warning for environment errors"
+else
+  _fail "hook did not emit 'sidecar validation skipped' for environment errors"
+fi
+# Restore build/ so subsequent evals are unaffected.
+mv "$ROOT/build-absent" "$ROOT/build"
 if [[ "$errors" -eq 0 ]]; then
   echo "Goal Fit hook integration passed."
   exit 0

package/evals/integration/test_hook_category_behaviors.sh CHANGED Viewed

@@ -63,9 +63,16 @@ if node "$ROOT/scripts/hooks/claude-hook-adapter.js" PreToolUse pre:config-prote
 {"hook_event_name":"PreToolUse","tool_input":{"path":"prettier.config.js"}}
 JSON
 then
+  claude_reason="$(run_json "$TMPDIR_EVAL/claude-block.json" "hookSpecificOutput.permissionDecisionReason")"
   if [[ "$(run_json "$TMPDIR_EVAL/claude-block.json" "continue")" == "false" ]] \
     && [[ "$(run_json "$TMPDIR_EVAL/claude-block.json" "hookSpecificOutput.permissionDecision")" == "deny" ]]; then
     pass "Claude runtime adapter translates PreToolUse policy block"
+    # Block Reason Channel: the deny must carry the steering reason to the model.
+    if [[ "$claude_reason" == *"Fix the source"* ]]; then
+      pass "Claude block surfaces the steer-to-source reason to the model"
+    else
+      fail "Claude block reason did not reach the model channel (permissionDecisionReason): $claude_reason"
+    fi
   else
     fail "Claude runtime adapter block contract mismatch"
   fi
@@ -77,8 +84,15 @@ if node "$ROOT/scripts/hooks/codex-hook-adapter.js" pre:config-protection config
 {"hook_event_name":"PreToolUse","tool_input":{"path":"biome.json"}}
 JSON
 then
+  codex_reason="$(run_json "$TMPDIR_EVAL/codex-block.json" "hookSpecificOutput.permissionDecisionReason")"
   if [[ "$(run_json "$TMPDIR_EVAL/codex-block.json" "hookSpecificOutput.permissionDecision")" == "deny" ]]; then
     pass "Codex runtime adapter translates PreToolUse policy block"
+    # Block Reason Channel: the deny must carry the steering reason to the model.
+    if [[ "$codex_reason" == *"Fix the source"* ]]; then
+      pass "Codex block surfaces the steer-to-source reason to the model"
+    else
+      fail "Codex block reason did not reach the model channel (permissionDecisionReason): $codex_reason"
+    fi
   else
     fail "Codex runtime adapter block contract mismatch"
   fi