@kontourai/flow-agents 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/.github/workflows/ci.yml +6 -1
  2. package/.github/workflows/kit-gates-demo.yml +6 -2
  3. package/CHANGELOG.md +33 -0
  4. package/CONTRIBUTING.md +30 -0
  5. package/agents/dev.json +1 -1
  6. package/agents/tool-planner.json +1 -1
  7. package/build/src/cli/console-learning-projection.d.ts +1 -0
  8. package/build/src/cli/effective-backlog-settings.d.ts +1 -0
  9. package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
  10. package/build/src/cli/init.d.ts +17 -0
  11. package/build/src/cli/kit.d.ts +1 -0
  12. package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
  13. package/build/src/cli/publish-change-helper.d.ts +1 -0
  14. package/build/src/cli/pull-work-provider.d.ts +1 -0
  15. package/build/src/cli/runtime-adapter.d.ts +1 -0
  16. package/build/src/cli/telemetry-doctor.d.ts +1 -0
  17. package/build/src/cli/usage-feedback.d.ts +1 -0
  18. package/build/src/cli/utterance-check.d.ts +1 -0
  19. package/build/src/cli/validate-hook-influence.d.ts +1 -0
  20. package/build/src/cli/validate-source-tree.d.ts +1 -0
  21. package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
  22. package/build/src/cli/veritas-governance.d.ts +1 -0
  23. package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
  24. package/build/src/cli/workflow-sidecar.d.ts +32 -0
  25. package/build/src/cli/workflow-sidecar.js +119 -22
  26. package/build/src/cli.d.ts +2 -0
  27. package/build/src/flow-kit/validate.d.ts +81 -0
  28. package/build/src/flow-kit/validate.js +32 -1
  29. package/build/src/index.d.ts +5 -0
  30. package/build/src/index.js +36 -0
  31. package/build/src/lib/args.d.ts +8 -0
  32. package/build/src/lib/fs.d.ts +7 -0
  33. package/build/src/lib/workflow-learning-projection.d.ts +132 -0
  34. package/build/src/runtime-adapters.d.ts +18 -0
  35. package/build/src/tools/build-universal-bundles.d.ts +2 -0
  36. package/build/src/tools/build-universal-bundles.js +14 -0
  37. package/build/src/tools/common.d.ts +9 -0
  38. package/build/src/tools/filter-installed-packs.d.ts +2 -0
  39. package/build/src/tools/generate-context-map.d.ts +2 -0
  40. package/build/src/tools/validate-package.d.ts +2 -0
  41. package/build/src/tools/validate-source-tree.d.ts +2 -0
  42. package/console.telemetry.json +1 -1
  43. package/docs/adr/0004-gates-expect-surface-claims.md +7 -7
  44. package/docs/developer-architecture.md +14 -0
  45. package/docs/kit-authoring-guide.md +99 -6
  46. package/docs/operating-layers.md +2 -2
  47. package/docs/spec/runtime-hook-surface.md +16 -1
  48. package/docs/veritas-integration.md +4 -4
  49. package/docs/workflow-eval-strategy.md +2 -2
  50. package/docs/workflow-usage-guide.md +1 -1
  51. package/evals/acceptance/test_opencode_harness.sh +18 -10
  52. package/evals/acceptance/test_pi_harness.sh +10 -6
  53. package/evals/ci/run-baseline.sh +1 -1
  54. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +4 -4
  55. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +4 -4
  56. package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +4 -4
  57. package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +4 -4
  58. package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +4 -4
  59. package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +4 -4
  60. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +2 -2
  61. package/evals/fixtures/surface-trust/artifact-absent.json +2 -2
  62. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +2 -2
  63. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +2 -2
  64. package/evals/fixtures/surface-trust/provider-absent.json +2 -2
  65. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +2 -2
  66. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +2 -2
  67. package/evals/integration/test_console_learning_projection.sh +1 -1
  68. package/evals/integration/test_goal_fit_hook.sh +144 -0
  69. package/evals/integration/test_hook_category_behaviors.sh +14 -0
  70. package/evals/integration/test_kit_conformance_levels.sh +55 -1
  71. package/evals/integration/test_workflow_sidecar_writer.sh +9 -9
  72. package/evals/run.sh +2 -0
  73. package/evals/static/test_library_exports.sh +85 -0
  74. package/evals/static/test_package.sh +3 -3
  75. package/evals/static/test_universal_bundles.sh +15 -0
  76. package/evals/static/test_workflow_skills.sh +4 -4
  77. package/kits/builder/flows/build.flow.json +48 -48
  78. package/kits/builder/flows/shape.flow.json +36 -36
  79. package/kits/knowledge/adapters/obsidian-store/index.js +137 -26
  80. package/kits/knowledge/evals/contract-suite/suite.test.js +90 -0
  81. package/kits/knowledge/flows/compile.flow.json +12 -12
  82. package/kits/knowledge/flows/consolidate.flow.json +16 -16
  83. package/kits/knowledge/flows/ingest.flow.json +12 -12
  84. package/kits/knowledge/flows/retire.flow.json +16 -16
  85. package/kits/knowledge/flows/store-contract.flow.json +12 -12
  86. package/kits/knowledge/flows/synthesize.flow.json +16 -16
  87. package/kits/release-evidence/flows/release-evidence.flow.json +3 -3
  88. package/package.json +14 -2
  89. package/schemas/workflow-evidence.schema.json +2 -1
  90. package/scripts/hooks/stop-goal-fit.js +66 -18
  91. package/src/cli/workflow-sidecar.ts +101 -21
  92. package/src/flow-kit/validate.ts +55 -1
  93. package/src/index.ts +53 -0
  94. package/src/tools/build-universal-bundles.ts +14 -0
  95. package/tsconfig.json +1 -0
@@ -57,6 +57,20 @@ Canonical hook scripts in `scripts/hooks/` use the following exit code contract
57
57
 
58
58
  Adapters translate these exit codes into the host-native response format. The `claude-hook-adapter.js` and `codex-hook-adapter.js` wrappers perform this translation, and all errors fail open so hook runtime failures never block agent work.
59
59
 
60
+ ### Block Reason Channel
61
+
62
+ A block (exit `2` → deny) is only useful if the agent learns *why* it was blocked and how to proceed. When a policy blocks, the hook script writes a human-readable reason — for example, config-protection's "Fix the source code … instead of weakening the config." The adapter **must surface that reason to the model** through the host's native deny-reason mechanism, **not only to a log or stderr**, where it dies before the agent sees it. A deny without a model-visible reason makes the agent retry the same blocked action instead of self-correcting.
63
+
64
+ | Host surface | Model-facing reason channel |
65
+ | --- | --- |
66
+ | Claude Code | `hookSpecificOutput.permissionDecisionReason` (preToolUse); `reason` (stop) |
67
+ | Codex | `hookSpecificOutput.permissionDecisionReason` (preToolUse); `reason` (stop) |
68
+ | opencode | the thrown error message on the blocked `tool.execute.before` (surfaced as the tool result) |
69
+ | pi | the `reason` field of the `{ block: true, reason }` tool-call result |
70
+ | Native pre-dispatch host (e.g. an orchestration layer) | the blocked call's tool-result text |
71
+
72
+ The reason text is the canonical steering message: it should tell the agent what to do *instead* (edit the source, not the generated artifact), so the agent can self-correct on the next turn. An adapter that denies the call but drops the reason to a log only is a **conformance gap** — record it in the adapter's conformance declaration.
73
+
60
74
  ---
61
75
 
62
76
  ## 2. Policy Classes
@@ -136,7 +150,7 @@ Flow Agents currently ships four canonical policy classes. Each policy class has
136
150
  - `SA_HOOK_INPUT_TRUNCATED` env var — whether input was truncated (truncated payloads are blocked unconditionally)
137
151
  - Protected file set: `.eslintrc*`, `eslint.config.*`, `.prettierrc*`, `prettier.config.*`, `biome.json`, `biome.jsonc`, `.ruff.toml`, `ruff.toml`, `.shellcheckrc`, `.stylelintrc*`, `.markdownlint*`
138
152
 
139
- **Decision contract**: Blocking (exits 2) when the target file basename is in the protected set. Writes a descriptive message to stderr directing the agent to fix source instead. Exits 0 (allow) otherwise.
153
+ **Decision contract**: Blocking (exits 2) when the target file basename is in the protected set. Writes a descriptive message directing the agent to fix source instead, which the adapter surfaces to the model as the deny reason (see [Block Reason Channel](#block-reason-channel)). Exits 0 (allow) otherwise.
140
154
 
141
155
  **Degradation when host lacks trigger**: If the host has no `preToolUse`-equivalent blocking hook, config protection cannot veto tool calls. The agent may modify linter configs without interception. Log the gap as `preToolUse: no native blocking equivalent — config-protection policy unavailable`.
142
156
 
@@ -190,6 +204,7 @@ The adapter implements L1 plus all blocking policy classes.
190
204
  **Required**:
191
205
  - L1 steering and stop telemetry.
192
206
  - Config protection fires on `preToolUse` and can block (exit 2 translates to a deny response).
207
+ - Every block surfaces its reason to the model through the host's deny-reason channel (see [Block Reason Channel](#block-reason-channel)), not only to a log.
193
208
  - Quality gate fires on `postToolUse`.
194
209
  - Stop-goal-fit fires on `stop` with `FLOW_AGENTS_GOAL_FIT_STRICT` configurable (default may be warning mode; strict mode must be possible to enable).
195
210
 
@@ -106,9 +106,9 @@ If Veritas is unavailable and the workflow expected it, record `not_verified` in
106
106
 
107
107
  ## Builder Kit Trust Evidence
108
108
 
109
- Builder Kit gates stay provider-neutral. The Builder Kit Flow Definition names gate expectations as `kind: "surface.claim"` and declares the claim type, subject, accepted statuses, and blocking behavior. It does not name Veritas or any other trust producer.
109
+ Builder Kit gates stay provider-neutral. The Builder Kit Flow Definition names gate expectations as `kind: "trust.bundle"` (the Hachure-aligned gate kind) and declares the claim type, subject, accepted statuses, and blocking behavior. It does not name Veritas or any other trust producer.
110
110
 
111
- When a trust-backed path is configured, Flow Agents may attach a compact Surface-shaped reference to the Builder Kit evidence gate. The reference points at a TrustReport or Trust Snapshot, carries the related gate id, Surface claim type, claim status, artifact ref, integrity summary, authority or trusted-producer summary, subject, and freshness state, and then maps to the normal Flow gate result. Flow owns the gate authority decision, route reason, trusted producer mapping, and accepted gap behavior. Surface owns the portable trust state represented by the Surface claim and the TrustReport / Trust Snapshot. A Probe can request or clarify the evidence needed before planning or before a later Builder Kit gate retries.
111
+ When a trust-backed path is configured, Flow Agents may attach a compact Hachure trust.bundle reference to the Builder Kit evidence gate. The reference uses `artifact_kind: "trust.bundle"` (the Hachure-aligned canonical value), carries the related gate id, domain claim type, claim status, artifact ref, integrity summary, authority or trusted-producer summary, subject, and freshness state, and then maps to the normal Flow gate result. When the `hachure` optional dependency is installed, referenced artifacts are validated against hachure's trust-bundle.schema.json at evidence-recording time. Flow owns the gate authority decision, route reason, trusted producer mapping, and accepted gap behavior. Surface owns the portable trust state represented by the Surface claim and the TrustReport / Trust Snapshot. A Probe can request or clarify the evidence needed before planning or before a later Builder Kit gate retries.
112
112
 
113
113
  Veritas is only one optional producer of those artifacts. A local Veritas readiness run can emit native Veritas evidence and, when configured, point Flow Agents at a Surface-shaped TrustReport or Trust Snapshot. Flow Agents records the reference; it does not copy Veritas rule models, readiness semantics, or provider-native fields into Builder Kit gates.
114
114
 
@@ -116,8 +116,8 @@ Provider and artifact absence are explicit:
116
116
 
117
117
  - If no trust provider is configured, ordinary Builder Kit activation, planning, verification, and evidence gates continue to work through the existing Flow Kit path.
118
118
  - If a trust-backed path was requested but no provider is configured, the trust check records `not_verified` with a clear gap instead of blocking unrelated Builder Kit usage.
119
- - If a provider is configured but the expected TrustReport or Trust Snapshot is absent or unreadable, only the requested trust-backed evidence check records `not_verified`; it does not silently pass and it does not make Veritas mandatory.
120
- - If a TrustReport or Trust Snapshot is present but has a rejected, stale, expired, missing-authority, or integrity-mismatched Surface claim, the Builder Kit evidence gate routes through the normal `fail` or `not_verified` path.
119
+ - If a provider is configured but the expected Hachure trust.bundle artifact is absent or unreadable, only the requested trust-backed evidence check records `not_verified`; it does not silently pass and it does not make Veritas mandatory.
120
+ - If a Hachure trust.bundle artifact is present but has a rejected, stale, expired, missing-authority, or integrity-mismatched claim, the Builder Kit evidence gate routes through the normal `fail` or `not_verified` path.
121
121
 
122
122
  ## Adoption Gate
123
123
 
@@ -6,7 +6,7 @@ title: Workflow Eval Strategy
6
6
 
7
7
  The Builder Kit workflow system now has concrete skill contracts for `idea-to-backlog`, `pull-work`, `plan-work`, `review-work`, `deliver`, `evidence-gate`, `release-readiness`, and `learning-review`, plus shared workflow contracts in `context/contracts/`. Evals should prove both the written contracts and the agent behavior around gates, artifacts, worktrees, Goal Fit, release readiness, final acceptance docs, and learning feedback.
8
8
 
9
- Flow Agents evals prove coordination, install, runtime adapter behavior, and artifact discipline. They should not redefine Flow gate authority: Flow Definitions use typed `expects` entries, Surface claim gates use `kind: "surface.claim"`, and Flow project config owns trusted producer mappings plus gate overrides.
9
+ Flow Agents evals prove coordination, install, runtime adapter behavior, and artifact discipline. They should not redefine Flow gate authority: Flow Definitions use typed `expects` entries, trust-bundle gates use `kind: "trust.bundle"`, and Flow project config owns trusted producer mappings plus gate overrides.
10
10
 
11
11
  ## Goals
12
12
 
@@ -161,7 +161,7 @@ Surface trust artifact attachment is covered by deterministic schema, runtime, a
161
161
  bash evals/integration/test_workflow_sidecar_writer.sh
162
162
  ```
163
163
 
164
- That eval exercises Builder Kit `surface.claim` evidence using provider-neutral TrustReport / Trust Snapshot fixtures for accepted, rejected, stale, missing-authority, integrity-mismatch, provider-absent, and artifact-absent cases. It proves Flow Agents can record compact Surface claim evidence in `evidence.json` and report pass, fail, or `NOT_VERIFIED` gaps without requiring provider-specific fields.
164
+ That eval exercises Builder Kit `trust.bundle` evidence using provider-neutral Hachure trust.bundle fixtures for accepted, rejected, stale, missing-authority, integrity-mismatch, provider-absent, and artifact-absent cases. It proves Flow Agents can record compact Surface claim evidence in `evidence.json` and report pass, fail, or `NOT_VERIFIED` gaps without requiring provider-specific fields.
165
165
 
166
166
  This coverage does not redefine Flow gate authority. Flow Definitions continue to express expectations, Flow project config owns trusted producer mappings and gate overrides, and Flow gate authority remains outside the local report writer. Runtime/provider gaps should be recorded as `NOT_VERIFIED` when a configured Surface claim path cannot be checked; ordinary Builder Kit workflows remain valid when no trust provider or trust artifact is configured.
167
167
 
@@ -6,7 +6,7 @@ title: Workflow Usage Guide
6
6
 
7
7
  This guide shows how to use the Builder Kit workflow skills in normal chats.
8
8
 
9
- > **Which doc do I want?** This page is the *driver's manual* — what to say at each stage and what should happen. If you want the conceptual map first — layers, sidecars, hooks, evidence, and why the system is shaped this way — read the [Agent System Guidebook](agent-system-guidebook.md). For a one-line summary of every skill and gate, use the [Skills Map](skills-map.md). Flow Agents coordinates the local runtime, installs Flow Kits, and records artifacts; Flow owns gate semantics, including typed `expects` entries with `kind: "surface.claim"`, trusted producer config, and gate overrides.
9
+ > **Which doc do I want?** This page is the *driver's manual* — what to say at each stage and what should happen. If you want the conceptual map first — layers, sidecars, hooks, evidence, and why the system is shaped this way — read the [Agent System Guidebook](agent-system-guidebook.md). For a one-line summary of every skill and gate, use the [Skills Map](skills-map.md). Flow Agents coordinates the local runtime, installs Flow Kits, and records artifacts; Flow owns gate semantics, including typed `expects` entries with `kind: "trust.bundle"`, trusted producer config, and gate overrides.
10
10
 
11
11
  The core pattern is:
12
12
 
@@ -21,7 +21,7 @@ wait_for_telemetry() {
21
21
  local file="$1"
22
22
  local i=0
23
23
  while [[ $i -lt 150 ]]; do
24
- [[ -s "$file" ]] && return 0
24
+ if [[ -s "$file" ]] && grep -q '"tool.invoke"' "$file" 2>/dev/null && grep -q '"tool.result"' "$file" 2>/dev/null; then return 0; fi
25
25
  sleep 0.1
26
26
  i=$((i + 1))
27
27
  done
@@ -73,23 +73,31 @@ for _attempt in 1 2; do
73
73
  grep -q '"tool.invoke"' "$TMP_WORK/.telemetry/full.jsonl" 2>/dev/null && break
74
74
  done
75
75
 
76
- LATEST_LOG="$(ls -t ~/.local/share/opencode/log/*.log 2>/dev/null | head -1 || true)"
77
- if [[ -n "$LATEST_LOG" ]] && grep -q "plugins/flow-agents.js loading plugin" "$LATEST_LOG" 2>/dev/null; then
78
- _pass "opencode log confirms flow-agents plugin loaded"
76
+ # Confirm load via the plugin's own marker file (written by the FlowAgentsPlugin
77
+ # factory at startup). This replaces grepping opencode's internal
78
+ # "plugins/flow-agents.js loading plugin" message, which opencode 1.17.x dropped
79
+ # and which opencode does not reliably surface to its log file — a stale-assertion
80
+ # false failure (#75). The factory runs regardless of provider, so this load
81
+ # signal is independent of whether a model turn completes.
82
+ if [[ -f "$TMP_WORK/.telemetry/opencode-plugin.loaded" ]]; then
83
+ _pass "flow-agents plugin loaded (factory marker present)"
79
84
  else
80
- _fail "opencode log did not confirm flow-agents plugin loaded"
85
+ _fail "flow-agents plugin did not load (factory marker absent)"
81
86
  fi
82
87
 
83
88
  telemetry_file="$TMP_WORK/.telemetry/full.jsonl"
84
89
  if [[ "$provider_error" -eq 1 ]]; then
85
90
  _skip "opencode telemetry assertions skipped (provider/auth error)"
86
91
  _skip "opencode telemetry tool events skipped (provider/auth error)"
92
+ elif ! wait_for_telemetry "$telemetry_file"; then
93
+ # No telemetry was produced at all — the agent never completed a model turn,
94
+ # expected in a provider-less environment (e.g. CI with no API key). The binary
95
+ # install, bundle, and mechanical hook chain are already covered; skip the
96
+ # live-model-dependent telemetry assertions rather than fail on them.
97
+ _skip "opencode telemetry assertions skipped (no telemetry — agent did not complete a turn, likely no provider)"
98
+ _skip "opencode telemetry tool events skipped (no turn)"
87
99
  else
88
- if wait_for_telemetry "$telemetry_file"; then
89
- _pass "opencode telemetry log was written"
90
- else
91
- _fail "opencode telemetry log was not written"
92
- fi
100
+ _pass "opencode telemetry log was written"
93
101
 
94
102
  if [[ -f "$telemetry_file" ]] && \
95
103
  node -e "
@@ -21,7 +21,7 @@ wait_for_telemetry() {
21
21
  local file="$1"
22
22
  local i=0
23
23
  while [[ $i -lt 150 ]]; do
24
- [[ -s "$file" ]] && return 0
24
+ if [[ -s "$file" ]] && grep -q '"session.start"' "$file" 2>/dev/null && grep -q '"tool.invoke"' "$file" 2>/dev/null && grep -q '"tool.result"' "$file" 2>/dev/null && grep -q '"session.end"' "$file" 2>/dev/null; then return 0; fi
25
25
  sleep 0.1
26
26
  i=$((i + 1))
27
27
  done
@@ -60,12 +60,16 @@ if [[ "$provider_error" -eq 1 ]]; then
60
60
  _skip "pi telemetry assertions skipped (provider/auth error)"
61
61
  _skip "pi telemetry event types skipped (provider/auth error)"
62
62
  _skip "pi telemetry session events skipped (provider/auth error)"
63
+ elif ! wait_for_telemetry "$telemetry_file"; then
64
+ # No telemetry was produced at all — the agent never completed a model turn,
65
+ # which in a provider-less environment (e.g. CI with no API key) is expected.
66
+ # The binary install, bundle, and mechanical hook chain are already covered;
67
+ # skip the live-model-dependent telemetry assertions rather than fail on them.
68
+ _skip "pi telemetry assertions skipped (no telemetry — agent did not complete a turn, likely no provider)"
69
+ _skip "pi telemetry event types skipped (no turn)"
70
+ _skip "pi telemetry session events skipped (no turn)"
63
71
  else
64
- if wait_for_telemetry "$telemetry_file"; then
65
- _pass "pi telemetry log was written"
66
- else
67
- _fail "pi telemetry log was not written"
68
- fi
72
+ _pass "pi telemetry log was written"
69
73
 
70
74
  if [[ -f "$telemetry_file" ]] && \
71
75
  node -e "
@@ -74,7 +74,7 @@ LANE_RUNTIME_AND_KIT=(
74
74
  "Kit conformance levels integration"
75
75
  "Local Flow Kit install integration"
76
76
  "Flow Kit install-git integration"
77
- # QUARANTINED (#74): passes on macOS, fails on Linux CI — not gating until triaged
77
+ "Console learning projection integration"
78
78
  "Context map integration"
79
79
  "Effective backlog settings integration"
80
80
  "Flow agents statusline integration"
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "runtime-evidence",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Runtime activation evidence exists.",
17
- "claim": {
18
- "type": "mixed.runtime.evidence",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "mixed.runtime.evidence",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "review-evidence",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Review evidence has been recorded.",
17
- "claim": {
18
- "type": "example.review.evidence",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "example.review.evidence",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "review-finding",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Review finding recorded.",
17
- "claim": {
18
- "type": "k0.review.finding",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "k0.review.finding",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "build-evidence",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Build evidence recorded.",
17
- "claim": {
18
- "type": "k1.build.evidence",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "k1.build.evidence",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "synthesis-evidence",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Synthesis evidence with provenance refs.",
17
- "claim": {
18
- "type": "k2.synthesize.evidence",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "k2.synthesize.evidence",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -11,12 +11,12 @@
11
11
  "expects": [
12
12
  {
13
13
  "id": "review-evidence",
14
- "kind": "surface.claim",
14
+ "kind": "trust.bundle",
15
15
  "required": true,
16
16
  "description": "Review evidence.",
17
- "claim": {
18
- "type": "third-party.review.evidence",
19
- "subject": "artifact",
17
+ "bundle_claim": {
18
+ "claimType": "third-party.review.evidence",
19
+ "subjectType": "artifact",
20
20
  "accepted_statuses": ["trusted", "accepted"]
21
21
  }
22
22
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
- "artifact_kind": "TrustReport",
3
+ "artifact_kind": "trust.bundle",
4
4
  "artifact_ref": "surface-trust://fixtures/accepted-claim-trust-report.json",
5
5
  "subject": {
6
6
  "type": "flow-step",
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "gate": {
10
10
  "id": "tests-evidence",
11
- "kind": "surface.claim"
11
+ "kind": "trust.bundle"
12
12
  },
13
13
  "claim": {
14
14
  "type": "builder.verify.tests",
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
3
  "scenario": "artifact_absent",
4
- "artifact_kind": "TrustReport",
4
+ "artifact_kind": "trust.bundle",
5
5
  "artifact_ref": "surface-trust://fixtures/missing-trust-report.json",
6
6
  "gate": {
7
7
  "id": "implementation-plan",
8
- "kind": "surface.claim"
8
+ "kind": "trust.bundle"
9
9
  },
10
10
  "claim": {
11
11
  "type": "builder.plan.implementation",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
- "artifact_kind": "TrustReport",
3
+ "artifact_kind": "trust.bundle",
4
4
  "artifact_ref": "surface-trust://fixtures/integrity-mismatch-trust-report.json",
5
5
  "subject": {
6
6
  "type": "artifact",
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "gate": {
10
10
  "id": "implementation-plan",
11
- "kind": "surface.claim"
11
+ "kind": "trust.bundle"
12
12
  },
13
13
  "claim": {
14
14
  "type": "builder.plan.implementation",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
- "artifact_kind": "TrustReport",
3
+ "artifact_kind": "trust.bundle",
4
4
  "artifact_ref": "surface-trust://fixtures/missing-authority-trust-report.json",
5
5
  "subject": {
6
6
  "type": "change",
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "gate": {
10
10
  "id": "implementation-scope",
11
- "kind": "surface.claim"
11
+ "kind": "trust.bundle"
12
12
  },
13
13
  "claim": {
14
14
  "type": "builder.execute.scope",
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
3
  "scenario": "provider_absent",
4
- "artifact_kind": "Trust Snapshot",
4
+ "artifact_kind": "trust.bundle",
5
5
  "artifact_ref": null,
6
6
  "gate": {
7
7
  "id": "selected-work",
8
- "kind": "surface.claim"
8
+ "kind": "trust.bundle"
9
9
  },
10
10
  "claim": {
11
11
  "type": "builder.pull-work.selected",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
- "artifact_kind": "TrustReport",
3
+ "artifact_kind": "trust.bundle",
4
4
  "artifact_ref": "surface-trust://fixtures/rejected-claim-trust-report.json",
5
5
  "subject": {
6
6
  "type": "change",
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "gate": {
10
10
  "id": "implementation-scope",
11
- "kind": "surface.claim"
11
+ "kind": "trust.bundle"
12
12
  },
13
13
  "claim": {
14
14
  "type": "builder.execute.scope",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": "1.0",
3
- "artifact_kind": "Trust Snapshot",
3
+ "artifact_kind": "trust.bundle",
4
4
  "artifact_ref": "surface-trust://fixtures/stale-claim-trust-snapshot.json",
5
5
  "subject": {
6
6
  "type": "flow-step",
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "gate": {
10
10
  "id": "tests-evidence",
11
- "kind": "surface.claim"
11
+ "kind": "trust.bundle"
12
12
  },
13
13
  "claim": {
14
14
  "type": "builder.verify.tests",
@@ -6,7 +6,7 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
6
  source "$ROOT/evals/lib/node.sh"
7
7
 
8
8
  FIXTURE_DIR="$ROOT/evals/fixtures/console-learning-projection"
9
- TMPDIR_EVAL="$(mktemp -d /private/tmp/eval-console-learning-projection.XXXXXX)"
9
+ TMPDIR_EVAL="$(cd "$(mktemp -d "${TMPDIR:-/tmp}/eval-console-learning-projection.XXXXXX")" && pwd -P)"
10
10
  ARTIFACT_ROOT="$TMPDIR_EVAL/artifacts"
11
11
  KONTOUR_ROOT="$TMPDIR_EVAL/.kontour"
12
12
  GENERATED_AT="2026-06-06T20:00:00Z"
@@ -473,6 +473,150 @@ else
473
473
  _fail "promoted doc is missing source or acceptance sections"
474
474
  fi
475
475
 
476
+ # --- npm-install regression: validator-environment errors must not block goal-fit ---
477
+ # Simulate the npm-installed condition: build/ is present (always shipped in package files)
478
+ # but tsc is absent from PATH, so `npm run workflow:validate-artifacts` (which rebuilds)
479
+ # would fail. The fix directly invokes node build/.../validate-workflow-artifacts.js instead.
480
+
481
+ NPM_INSTALL_REPO="$TMPDIR_EVAL/npm-install-repo"
482
+ mkdir -p "$NPM_INSTALL_REPO/.flow-agents/npm-install-task"
483
+ printf '# Test Repo\n' > "$NPM_INSTALL_REPO/AGENTS.md"
484
+
485
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/npm-install-task--deliver.md" <<'MARKDOWN'
486
+ # npm install test task
487
+
488
+ branch: main
489
+ worktree: main
490
+ created: 2026-06-01
491
+ status: delivered
492
+ type: deliver
493
+
494
+ ## Definition Of Done
495
+ - **User outcome:** Something works.
496
+ - **Acceptance criteria:**
497
+ - [x] Thing works - Evidence: tested
498
+
499
+ ## Goal Fit Gate
500
+ - [x] Original user goal restated
501
+ - [x] Every acceptance criterion has evidence
502
+
503
+ ## Verification Report
504
+
505
+ ### Verdict: PASS
506
+
507
+ ## Final Acceptance
508
+
509
+ - [ ] CI passed
510
+ MARKDOWN
511
+
512
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/state.json" <<'JSON'
513
+ {
514
+ "schema_version": "1.0",
515
+ "task_slug": "npm-install-task",
516
+ "status": "delivered",
517
+ "phase": "done",
518
+ "updated_at": "2026-06-01T00:00:00Z",
519
+ "next_action": { "status": "done", "summary": "Local delivery complete." }
520
+ }
521
+ JSON
522
+
523
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/acceptance.json" <<'JSON'
524
+ {
525
+ "schema_version": "1.0",
526
+ "task_slug": "npm-install-task",
527
+ "criteria": [
528
+ {
529
+ "id": "thing-works",
530
+ "description": "Thing works.",
531
+ "status": "pass",
532
+ "evidence_refs": [
533
+ { "kind": "artifact", "file": "npm-install-task--deliver.md", "summary": "Delivery artifact." }
534
+ ]
535
+ }
536
+ ],
537
+ "goal_fit": { "status": "pass", "summary": "User outcome achieved." }
538
+ }
539
+ JSON
540
+
541
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/evidence.json" <<'JSON'
542
+ {
543
+ "schema_version": "1.0",
544
+ "task_slug": "npm-install-task",
545
+ "verdict": "pass",
546
+ "checks": [
547
+ { "id": "build", "kind": "test", "status": "pass", "summary": "Build passed." }
548
+ ],
549
+ "not_verified_gaps": []
550
+ }
551
+ JSON
552
+
553
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/handoff.json" <<'JSON'
554
+ {
555
+ "schema_version": "1.0",
556
+ "task_slug": "npm-install-task",
557
+ "summary": "Local delivery complete.",
558
+ "current_state_ref": "state.json",
559
+ "next_steps": [],
560
+ "blockers": [],
561
+ "warnings": []
562
+ }
563
+ JSON
564
+
565
+ # Part 1 of fix: invoke the already-built validator directly (no tsc).
566
+ # Poison tsc so that any call to it fails; confirm the hook does not call it
567
+ # and validates clean sidecars successfully.
568
+ FAKE_TSC_DIR="$TMPDIR_EVAL/fake-tsc"
569
+ mkdir -p "$FAKE_TSC_DIR"
570
+ printf '#!/usr/bin/env bash\necho "error TS5023: tsc should not be called" >&2\nexit 1\n' > "$FAKE_TSC_DIR/tsc"
571
+ chmod +x "$FAKE_TSC_DIR/tsc"
572
+
573
+ if PATH="$FAKE_TSC_DIR:$PATH" FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true \
574
+ node "$ROOT/scripts/hooks/stop-goal-fit.js" \
575
+ >"$TMPDIR_EVAL/npm-install-valid.out" 2>"$TMPDIR_EVAL/npm-install-valid.err" <<JSON
576
+ {"hook_event_name":"Stop","cwd":"$NPM_INSTALL_REPO"}
577
+ JSON
578
+ then
579
+ _pass "strict hook with poisoned tsc uses built validator and does not block valid sidecars"
580
+ else
581
+ _fail "strict hook should not block valid sidecars even with tsc absent: $(cat "$TMPDIR_EVAL/npm-install-valid.err")"
582
+ fi
583
+
584
+ if ! rg -q 'tsc: command not found\|TS5023\|tsc should not be called' "$TMPDIR_EVAL/npm-install-valid.err"; then
585
+ _pass "hook does not emit tsc error noise when using built validator"
586
+ else
587
+ _fail "hook leaked tsc error into goal-fit output"
588
+ fi
589
+
590
+ # Part 2 of fix: when the validator cannot run at all (build/ absent and npm fails),
591
+ # the hook must skip cleanly — never block in strict mode due to an env error.
592
+ mv "$ROOT/build" "$ROOT/build-absent"
593
+
594
+ SPAWN_FAIL_DIR="$TMPDIR_EVAL/spawn-fail"
595
+ mkdir -p "$SPAWN_FAIL_DIR"
596
+ printf '#!/usr/bin/env bash\necho "npm ERR! tsc: command not found" >&2\nexit 127\n' > "$SPAWN_FAIL_DIR/npm"
597
+ chmod +x "$SPAWN_FAIL_DIR/npm"
598
+
599
+ if PATH="$SPAWN_FAIL_DIR:$PATH" FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true \
600
+ node "$ROOT/scripts/hooks/stop-goal-fit.js" \
601
+ >"$TMPDIR_EVAL/npm-install-env-err.out" 2>"$TMPDIR_EVAL/npm-install-env-err.err" <<JSON
602
+ {"hook_event_name":"Stop","cwd":"$NPM_INSTALL_REPO"}
603
+ JSON
604
+ then
605
+ _pass "strict hook does not block when validator environment fails (build/ absent, tsc missing)"
606
+ else
607
+ _fail "strict hook must not block when validator env fails: $(cat "$TMPDIR_EVAL/npm-install-env-err.err")"
608
+ fi
609
+
610
+ if rg -q 'sidecar validation skipped' "$TMPDIR_EVAL/npm-install-env-err.err"; then
611
+ _pass "hook emits sidecar validation skipped warning for environment errors"
612
+ else
613
+ _fail "hook did not emit 'sidecar validation skipped' for environment errors"
614
+ fi
615
+
616
+ # Restore build/ so subsequent evals are unaffected.
617
+ mv "$ROOT/build-absent" "$ROOT/build"
618
+
619
+
476
620
  if [[ "$errors" -eq 0 ]]; then
477
621
  echo "Goal Fit hook integration passed."
478
622
  exit 0
@@ -63,9 +63,16 @@ if node "$ROOT/scripts/hooks/claude-hook-adapter.js" PreToolUse pre:config-prote
63
63
  {"hook_event_name":"PreToolUse","tool_input":{"path":"prettier.config.js"}}
64
64
  JSON
65
65
  then
66
+ claude_reason="$(run_json "$TMPDIR_EVAL/claude-block.json" "hookSpecificOutput.permissionDecisionReason")"
66
67
  if [[ "$(run_json "$TMPDIR_EVAL/claude-block.json" "continue")" == "false" ]] \
67
68
  && [[ "$(run_json "$TMPDIR_EVAL/claude-block.json" "hookSpecificOutput.permissionDecision")" == "deny" ]]; then
68
69
  pass "Claude runtime adapter translates PreToolUse policy block"
70
+ # Block Reason Channel: the deny must carry the steering reason to the model.
71
+ if [[ "$claude_reason" == *"Fix the source"* ]]; then
72
+ pass "Claude block surfaces the steer-to-source reason to the model"
73
+ else
74
+ fail "Claude block reason did not reach the model channel (permissionDecisionReason): $claude_reason"
75
+ fi
69
76
  else
70
77
  fail "Claude runtime adapter block contract mismatch"
71
78
  fi
@@ -77,8 +84,15 @@ if node "$ROOT/scripts/hooks/codex-hook-adapter.js" pre:config-protection config
77
84
  {"hook_event_name":"PreToolUse","tool_input":{"path":"biome.json"}}
78
85
  JSON
79
86
  then
87
+ codex_reason="$(run_json "$TMPDIR_EVAL/codex-block.json" "hookSpecificOutput.permissionDecisionReason")"
80
88
  if [[ "$(run_json "$TMPDIR_EVAL/codex-block.json" "hookSpecificOutput.permissionDecision")" == "deny" ]]; then
81
89
  pass "Codex runtime adapter translates PreToolUse policy block"
90
+ # Block Reason Channel: the deny must carry the steering reason to the model.
91
+ if [[ "$codex_reason" == *"Fix the source"* ]]; then
92
+ pass "Codex block surfaces the steer-to-source reason to the model"
93
+ else
94
+ fail "Codex block reason did not reach the model channel (permissionDecisionReason): $codex_reason"
95
+ fi
82
96
  else
83
97
  fail "Codex runtime adapter block contract mismatch"
84
98
  fi