@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -4,9 +4,22 @@
4
4
  *
5
5
  * The hook reads .flow-agents artifacts, looks for the most recent active
6
6
  * delivery/session file, and reports missing Definition Of Done, Goal Fit, or
7
- * Final Acceptance state. It is warning-only by default. Set
8
- * FLOW_AGENTS_GOAL_FIT_STRICT=true to return exit code 2 when local goal fit is
9
- * incomplete.
7
+ * Final Acceptance state.
8
+ *
9
+ * Enforcement is controlled by FLOW_AGENTS_GOAL_FIT_MODE:
10
+ * - block: return exit code 2 (blocks the Stop) when local goal fit is incomplete.
11
+ * - warn: return exit code 0 but still emit the guidance on stderr (default).
12
+ * - off: stay silent.
13
+ * The legacy FLOW_AGENTS_GOAL_FIT_STRICT=true is honored as an alias for block.
14
+ * The canonical engine default is warn; shipped runtime configs (e.g. Claude
15
+ * Code at L2) set block so the installed product enforces while the engine
16
+ * default and conformance contract stay warn.
17
+ *
18
+ * Scope: the gate evaluates the session's current task (.flow-agents/current.json)
19
+ * when set, so an unrelated active workflow elsewhere in the repo does not gate
20
+ * this stop. It also never hard-blocks a pre-execution (not-yet-started) task on
21
+ * mere incompleteness — only genuine false-completion signals (a claimed pass the
22
+ * capture log or evidence.json contradicts) block before execution begins.
10
23
  */
11
24
 
12
25
  'use strict';
@@ -29,10 +42,23 @@ const ACTIVE_STATUSES = new Set([
29
42
  'blocked',
30
43
  'partial',
31
44
  ]);
32
- const DELIVERY_TYPES = new Set(['deliver', 'delivery', 'fix-bug', 'execute-plan', 'verify-work']);
45
+ // WORKFLOW_SESSION_TYPES: used for artifact identification only, not for verdict production.
46
+ const WORKFLOW_SESSION_TYPES = new Set(['deliver', 'delivery', 'fix-bug', 'execute-plan', 'verify-work']);
33
47
  const SIDECAR_NAMES = new Set(['state.json', 'acceptance.json', 'evidence.json', 'handoff.json']);
34
48
  const OPTIONAL_SIDECAR_NAMES = new Set(['critique.json']);
35
49
 
50
+ // A workflow that has not started execution is EXPECTED to be incomplete, so the
51
+ // Stop gate must not hard-block on its missing DOD / Goal Fit / not-done state.
52
+ // Only genuine false-completion signals block a pre-execution task; execution
53
+ // onward gates fully.
54
+ const PRE_EXECUTION_STATUSES = new Set(['new', 'planning', 'planned', 'backlog']);
55
+ const PRE_EXECUTION_PHASES = new Set(['idea', 'backlog', 'pickup', 'planning']);
56
+
57
+ // Terminal tasks are complete — they must never gate a stop or count as "active".
58
+ // A stale current.json pointing at one, or a graveyard of finished states, must
59
+ // not block an unrelated session.
60
+ const TERMINAL_STATUSES = new Set(['done', 'delivered', 'accepted', 'archived', 'complete', 'completed']);
61
+
36
62
  function parseJson(raw) {
37
63
  try { return JSON.parse(raw || '{}'); } catch { return {}; }
38
64
  }
@@ -80,6 +106,15 @@ function hasSidecars(dir) {
80
106
  }
81
107
  }
82
108
 
109
+ /**
110
+ * Returns true if a line of validator output looks like a validator-environment
111
+ * error (shell/npm error, tsc missing, spawn failure) rather than a real
112
+ * artifact validation message. Environment errors must never block goal-fit.
113
+ */
114
+ function isEnvironmentError(line) {
115
+ return /tsc[:\s]|command not found|npm ERR!|npm error|ENOENT|EACCES|Cannot find module|node_modules\/.bin|TypeScript version|version conflict|error TS[0-9]/i.test(line);
116
+ }
117
+
83
118
  function sidecarValidation(root, artifactDir) {
84
119
  const requireSidecars = String(process.env.FLOW_AGENTS_REQUIRE_SIDECARS || '').toLowerCase() === 'true';
85
120
  const requireCritique = String(process.env.FLOW_AGENTS_REQUIRE_CRITIQUE || '').toLowerCase() === 'true';
@@ -88,8 +123,6 @@ function sidecarValidation(root, artifactDir) {
88
123
  const packageRoot = fs.existsSync(path.join(root, 'package.json'))
89
124
  ? root
90
125
  : path.resolve(__dirname, '..', '..');
91
- const packageJson = path.join(packageRoot, 'package.json');
92
- if (!fs.existsSync(packageJson)) return [`${relative(root, artifactDir)} sidecar validation: package.json is missing; cannot run TypeScript workflow validator.`];
93
126
 
94
127
  let sidecarFiles = [];
95
128
  try {
@@ -112,33 +145,74 @@ function sidecarValidation(root, artifactDir) {
112
145
 
113
146
  if (sidecarFiles.length === 0) return [];
114
147
 
115
- const args = ['run', 'workflow:validate-artifacts', '--silent', '--'];
116
- args.push('--skip-markdown-validation');
117
- if (requireSidecars) args.push('--require-sidecars');
118
- if (requireCritique) args.push('--require-critique');
119
- args.push(artifactDir);
148
+ // Part 1 fix: invoke the already-built validator directly via `node`, bypassing
149
+ // `npm run build` (tsc). npm-installed packages ship build/ in the package files,
150
+ // so the compiled JS is always available. Only fall back to npm run if build/ is
151
+ // absent (a raw dev checkout that hasn't been built yet).
152
+ const builtValidator = path.join(packageRoot, 'build', 'src', 'cli', 'validate-workflow-artifacts.js');
153
+ const hasBuild = fs.existsSync(builtValidator);
154
+
155
+ const validatorArgs = ['--skip-markdown-validation'];
156
+ if (requireSidecars) validatorArgs.push('--require-sidecars');
157
+ if (requireCritique) validatorArgs.push('--require-critique');
158
+ validatorArgs.push(artifactDir);
159
+
160
+ let result;
161
+ if (hasBuild) {
162
+ // Direct node invocation: no tsc, no npm build step, works from any npm install.
163
+ result = spawnSync(process.execPath, [builtValidator, ...validatorArgs], {
164
+ cwd: packageRoot,
165
+ encoding: 'utf8',
166
+ timeout: 30000,
167
+ });
168
+ } else {
169
+ // Dev checkout without build/: fall back to npm run (may trigger tsc).
170
+ // If this also fails due to environment issues, Part 2 handles it below.
171
+ const npmArgs = ['run', 'workflow:validate-artifacts', '--silent', '--', ...validatorArgs];
172
+ result = spawnSync('npm', npmArgs, {
173
+ cwd: packageRoot,
174
+ encoding: 'utf8',
175
+ timeout: 30000,
176
+ });
177
+ }
120
178
 
121
- const result = spawnSync('npm', args, {
122
- cwd: packageRoot,
123
- encoding: 'utf8',
124
- timeout: 30000,
125
- });
179
+ // Part 2 fix: treat validator-environment failures as SKIP, never as blocking.
180
+ // A spawn error (ENOENT, timeout) means the validator couldn't run at all.
181
+ if (result.error) {
182
+ // Validator couldn't be launched — environment issue, not a goal-fit failure.
183
+ return [`${relative(root, artifactDir)} sidecar validation skipped: validator could not run (${result.error.code || result.error.message})`];
184
+ }
126
185
 
127
186
  if (result.status === 0) return [];
128
- const output = `${result.stdout || ''}\n${result.stderr || ''}`
187
+
188
+ // Validator ran and exited non-zero. Separate real validation errors from
189
+ // environment errors (tsc missing, npm ERR!, shell errors) so that a broken
190
+ // validator environment never blocks goal-fit.
191
+ const allLines = `${result.stdout || ''}\n${result.stderr || ''}`
129
192
  .split('\n')
130
193
  .map(line => line.trim())
131
- .filter(Boolean)
132
- .slice(0, 12);
133
- if (output.length === 0) output.push(`validator exited with status ${result.status ?? 'unknown'}`);
134
- return output.map(line => `${relative(root, artifactDir)} sidecar validation: ${line}`);
194
+ .filter(Boolean);
195
+
196
+ const envLines = allLines.filter(isEnvironmentError);
197
+ const validationLines = allLines.filter(line => !isEnvironmentError(line));
198
+
199
+ if (envLines.length > 0 && validationLines.length === 0) {
200
+ // Pure environment failure — skip, do not block.
201
+ return [`${relative(root, artifactDir)} sidecar validation skipped: validator environment error (${envLines[0].slice(0, 120)})`];
202
+ }
203
+
204
+ // Real validation errors (possibly mixed with a few env noise lines).
205
+ const output = validationLines.length > 0 ? validationLines : allLines;
206
+ const trimmed = output.slice(0, 12);
207
+ if (trimmed.length === 0) trimmed.push(`validator exited with status ${result.status ?? 'unknown'}`);
208
+ return trimmed.map(line => `${relative(root, artifactDir)} sidecar validation: ${line}`);
135
209
  }
136
210
 
137
211
  function isWorkflowArtifact(artifact) {
138
212
  if (!artifact) return false;
139
213
  if (artifact.role === 'plan' || artifact.role === 'review') return false;
140
214
  if (artifact.file.endsWith('-plan.md') || artifact.file.endsWith('-review.md')) return false;
141
- if (DELIVERY_TYPES.has(artifact.type)) return true;
215
+ if (WORKFLOW_SESSION_TYPES.has(artifact.type)) return true;
142
216
  return /--(deliver|fix-bug|execute-plan|verify-work)\b/.test(path.basename(artifact.file));
143
217
  }
144
218
 
@@ -171,6 +245,25 @@ function readJsonFile(file) {
171
245
  }
172
246
  }
173
247
 
248
+ // ─── ADR 0010 Phase 2b: re-derive-at-gate via Surface (fail-open) ─────────────
249
+ // Surface (@kontourai/surface) is ESM-only; stop-goal-fit.js is CJS.
250
+ // Load it via a fail-open dynamic import(), cached after the first attempt.
251
+ // If Surface cannot be loaded (package absent, env mismatch), we fall back to
252
+ // the stored claim.status check from #133 — no regression for environments that
253
+ // lack @kontourai/surface. The module is never written to disk.
254
+ let _surfaceModule; // undefined = not tried yet; null = unavailable
255
+ async function tryLoadSurface() {
256
+ if (_surfaceModule !== undefined) return _surfaceModule;
257
+ try {
258
+ const m = await import('@kontourai/surface');
259
+ _surfaceModule = m;
260
+ return _surfaceModule;
261
+ } catch {
262
+ _surfaceModule = null;
263
+ return null;
264
+ }
265
+ }
266
+
174
267
  function safeOneLine(value, maxLength = 220) {
175
268
  const text = String(value || '').replace(/\s+/g, ' ').trim();
176
269
  if (text.length <= maxLength) return text;
@@ -192,8 +285,12 @@ function sidecarGuidance(root, artifactDir) {
192
285
  const status = normalizedStatus(state.status || 'unknown');
193
286
  const phase = normalizedStatus(state.phase || 'unknown');
194
287
  const next = state.next_action && typeof state.next_action === 'object' ? state.next_action : null;
195
- if (!['done', 'delivered', 'archived', 'accepted', 'complete', 'completed'].includes(status)) {
196
- const nextStatus = next ? normalizedStatus(next.status || 'unknown') : 'unknown';
288
+ const nextStatus = next ? normalizedStatus(next.status || 'unknown') : 'unknown';
289
+ // The agent's work is complete when the recorded next action is done — the
290
+ // gate must not block the agent for a remaining human/CI step (e.g. a verified
291
+ // task whose only next_action is "commit the migration").
292
+ const agentComplete = nextStatus === 'done';
293
+ if (!TERMINAL_STATUSES.has(status) && !agentComplete) {
197
294
  const nextSummary = next && next.summary ? `; next_action:${nextStatus} "${safeOneLine(next.summary)}"` : '';
198
295
  warnings.push(`${base} workflow state: status:${status} phase:${phase}${nextSummary}`);
199
296
  }
@@ -242,16 +339,397 @@ function sidecarGuidance(root, artifactDir) {
242
339
  return warnings;
243
340
  }
244
341
 
245
- function markdownVerdict(text) {
246
- const verdict = (/###\s+Verdict:\s*([A-Za-z_ -]+)/i.exec(text) || [])[1]
247
- || (/^Build:\s*\[?([A-Za-z_ -]+)\]?/im.exec(text) || [])[1]
248
- || '';
249
- return normalizedStatus(verdict).replace(/[^a-z_ -].*$/, '').trim();
342
+ // -----------------------------------------------------------------------
343
+ // Capture-first evidence determinism (Part B)
344
+ //
345
+ // evidence.json is the MODEL transcribing what it thinks happened. The capture
346
+ // hook (evidence-capture.js) writes the REAL command results to
347
+ // command-log.jsonl at the source. Here at the Stop gate we cross-reference the
348
+ // model's claimed-pass command checks against that captured truth, and only fall
349
+ // back to re-running a TRUSTED command when the log has no execution for a
350
+ // claimed-pass command (i.e. it was never actually run).
351
+ // -----------------------------------------------------------------------
352
+
353
+ function normalizeCommand(value) {
354
+ return String(value || '').replace(/\s+/g, ' ').trim();
355
+ }
356
+
357
+ /**
358
+ * Read command-log.jsonl into a map of normalized-command -> aggregate outcome.
359
+ * If the same command was run more than once, a single FAIL makes the aggregate
360
+ * a fail (a caught false-completion must not be masked by a later pass-claim).
361
+ */
362
+ function readCommandLog(artifactDir) {
363
+ const file = path.join(artifactDir, 'command-log.jsonl');
364
+ let raw = '';
365
+ try { raw = fs.readFileSync(file, 'utf8'); } catch { return new Map(); }
366
+ const byCommand = new Map();
367
+ for (const line of raw.split('\n')) {
368
+ const trimmed = line.trim();
369
+ if (!trimmed) continue;
370
+ let entry;
371
+ try { entry = JSON.parse(trimmed); } catch { continue; }
372
+ if (!entry || typeof entry.command !== 'string') continue;
373
+ const key = normalizeCommand(entry.command);
374
+ if (!key) continue;
375
+ const failed = entry.observedResult === 'fail' || (Number.isInteger(entry.exitCode) && entry.exitCode !== 0);
376
+ const prev = byCommand.get(key);
377
+ byCommand.set(key, {
378
+ ran: true,
379
+ failed: failed || (prev ? prev.failed : false),
380
+ exitCode: Number.isInteger(entry.exitCode) ? entry.exitCode : (prev ? prev.exitCode : null),
381
+ });
382
+ }
383
+ return byCommand;
384
+ }
385
+
386
+ /**
387
+ * Resolve a TRUSTED command to re-run for a claimed-pass check whose command was
388
+ * never captured. Priority (most trusted first):
389
+ * (a) the command named by the matching acceptance criterion (acceptance.json
390
+ * evidence_ref of kind "command", `excerpt`/`command`) — authored upfront.
391
+ * (b) the project's declared manifest target — package.json scripts.{test,
392
+ * build,lint}, Makefile target, cargo test, pyproject/tox, just/task.
393
+ * (c) the model's free-form evidence.checks[].command — ONLY when
394
+ * FLOW_AGENTS_GOAL_FIT_RECHECK=true (the RCE-risky opt-in path).
395
+ * Returns { argv, cwd, source } or null when nothing trusted resolves.
396
+ */
397
+ function resolveTrustedCommand(root, artifactDir, check, acceptance) {
398
+ // (a) acceptance criterion command for the matching criterion.
399
+ const fromAcceptance = acceptanceCommandFor(check, acceptance);
400
+ if (fromAcceptance) return { argv: ['bash', '-lc', fromAcceptance], cwd: root, source: 'acceptance' };
401
+
402
+ // (b) declared manifest target. Map the check command/id to a declared script.
403
+ const declared = declaredManifestTarget(root, check);
404
+ if (declared) return { argv: declared.argv, cwd: declared.cwd || root, source: 'manifest' };
405
+
406
+ // (c) free-form model command — opt-in only.
407
+ if (String(process.env.FLOW_AGENTS_GOAL_FIT_RECHECK || '').toLowerCase() === 'true') {
408
+ const cmd = normalizeCommand(check && check.command);
409
+ if (cmd) return { argv: ['bash', '-lc', cmd], cwd: root, source: 'model-command (FLOW_AGENTS_GOAL_FIT_RECHECK)' };
410
+ }
411
+ return null;
412
+ }
413
+
414
+ function acceptanceCommandFor(check, acceptance) {
415
+ if (!acceptance || !Array.isArray(acceptance.criteria)) return null;
416
+ const checkId = normalizedStatus(check && check.id);
417
+ const checkCmd = normalizeCommand(check && check.command);
418
+ let firstCommand = null;
419
+ for (const criterion of acceptance.criteria) {
420
+ const refs = Array.isArray(criterion && criterion.evidence_refs) ? criterion.evidence_refs : [];
421
+ for (const ref of refs) {
422
+ if (!ref || typeof ref !== 'object' || ref.kind !== 'command') continue;
423
+ const refCmd = normalizeCommand(ref.excerpt || ref.command);
424
+ if (!refCmd) continue;
425
+ if (!firstCommand) firstCommand = refCmd;
426
+ // Strong match: the criterion id matches the check id, or the commands match.
427
+ const idMatch = checkId && normalizedStatus(criterion.id) === checkId;
428
+ if (idMatch || (checkCmd && refCmd === checkCmd)) return refCmd;
429
+ }
430
+ }
431
+ // No id/command match — only fall back to the first authored command when the
432
+ // check itself names no command (so we still have an upfront-trusted target).
433
+ return checkCmd ? null : firstCommand;
434
+ }
435
+
436
+ /**
437
+ * Map a claimed-pass command check to a project-declared, NAMED manifest target.
438
+ * Never allowlists arbitrary strings: we only run a target the project itself
439
+ * declared (npm script, Makefile target, cargo/tox/just/task). The check's
440
+ * command/id is used to pick WHICH declared target (test|build|lint), not to run
441
+ * the raw string. `veritas readiness` is just one such declared command — no
442
+ * special-casing.
443
+ */
444
+ function declaredManifestTarget(root, check) {
445
+ const haystack = `${normalizeCommand(check && check.command)} ${normalizedStatus(check && check.id)} ${normalizedStatus(check && check.kind)}`.toLowerCase();
446
+ let want = null;
447
+ if (/\btest|spec|jest|vitest|pytest\b/.test(haystack)) want = 'test';
448
+ else if (/\bbuild|compile|bundle\b/.test(haystack)) want = 'build';
449
+ else if (/\blint|format|style|typecheck\b/.test(haystack)) want = 'lint';
450
+ if (!want) return null;
451
+
452
+ // package.json scripts.{test,build,lint}
453
+ const pkg = readJsonFile(path.join(root, 'package.json'));
454
+ if (pkg && pkg.scripts && typeof pkg.scripts === 'object') {
455
+ const scriptName = pkg.scripts[want] ? want
456
+ : want === 'lint' && pkg.scripts.typecheck ? 'typecheck'
457
+ : null;
458
+ if (scriptName) return { argv: ['npm', 'run', scriptName, '--silent'], cwd: root };
459
+ }
460
+ // Makefile target
461
+ const makefile = ['Makefile', 'makefile', 'GNUmakefile'].map(n => path.join(root, n)).find(p => fs.existsSync(p));
462
+ if (makefile) {
463
+ try {
464
+ const text = fs.readFileSync(makefile, 'utf8');
465
+ if (new RegExp(`^${want}\\s*:`, 'm').test(text)) return { argv: ['make', want], cwd: root };
466
+ } catch { /* ignore */ }
467
+ }
468
+ // cargo
469
+ if (want === 'test' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'test'], cwd: root };
470
+ if (want === 'build' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'build'], cwd: root };
471
+ // py ecosystem: tox / pyproject (declared test target)
472
+ if (want === 'test' && fs.existsSync(path.join(root, 'tox.ini'))) return { argv: ['tox'], cwd: root };
473
+ if (want === 'test' && fs.existsSync(path.join(root, 'pyproject.toml'))) return { argv: ['pytest'], cwd: root };
474
+ // just / task runners
475
+ for (const runner of [['just', 'justfile'], ['task', 'Taskfile.yml'], ['task', 'Taskfile.yaml']]) {
476
+ if (fs.existsSync(path.join(root, runner[1]))) return { argv: [runner[0], want], cwd: root };
477
+ }
478
+ return null;
479
+ }
480
+
481
+ function resolveBackstopTimeout() {
482
+ const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP_TIMEOUT_MS || '', 10);
483
+ return Number.isInteger(raw) && raw > 0 ? raw : 120000;
484
+ }
485
+
486
+ /**
487
+ * Whether the trusted backstop re-run may ride block mode. Default-on so a
488
+ * never-actually-run claimed-pass command is caught, but operator-disablable for
489
+ * latency via FLOW_AGENTS_GOAL_FIT_BACKSTOP=off (re-run becomes warn-only) or
490
+ * =skip (no re-run at all → record NOT_VERIFIED instead).
491
+ */
492
+ function resolveBackstopMode() {
493
+ const v = String(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP || '').trim().toLowerCase();
494
+ if (v === 'off' || v === 'warn' || v === 'skip' || v === 'block') return v === 'warn' ? 'off' : v;
495
+ return 'block';
496
+ }
497
+
498
+ function runBackstop(trusted) {
499
+ const result = spawnSync(trusted.argv[0], trusted.argv.slice(1), {
500
+ cwd: trusted.cwd,
501
+ encoding: 'utf8',
502
+ timeout: resolveBackstopTimeout(),
503
+ killSignal: 'SIGKILL',
504
+ stdio: ['ignore', 'pipe', 'pipe'],
505
+ });
506
+ if (result.error) return { ran: false, error: result.error.code || result.error.message };
507
+ if (result.signal) return { ran: false, error: `killed (${result.signal})`, timedOut: result.signal === 'SIGKILL' || result.signal === 'SIGTERM' };
508
+ return { ran: true, passed: result.status === 0, exitCode: result.status };
250
509
  }
251
510
 
252
- function analyze(root, now = Date.now()) {
253
- const dirs = [path.join(root, '.flow-agents')];
254
- const artifacts = dirs
511
+ /**
512
+ * Cross-reference each evidence.checks[] of kind:"command" claiming status:"pass"
513
+ * that carries a command against the capture log, with the trusted backstop as a
514
+ * thin fallback only when the log has no execution for that command.
515
+ *
516
+ * Emits warnings (which feed the existing block/MAX_BLOCKS machinery) when a
517
+ * claimed-pass command actually FAILED (log or backstop), and NOT_VERIFIED notes
518
+ * when nothing trusted can confirm it.
519
+ */
520
+ function captureCrossReference(root, artifactDir) {
521
+ const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
522
+ if (!evidence || !Array.isArray(evidence.checks)) return [];
523
+ const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
524
+ const log = readCommandLog(artifactDir);
525
+ const base = relative(root, artifactDir);
526
+ const backstopMode = resolveBackstopMode();
527
+ const warnings = [];
528
+
529
+ const claimedPass = evidence.checks.filter(check => {
530
+ if (!check || typeof check !== 'object') return false;
531
+ const kind = normalizedStatus(check.kind);
532
+ const status = normalizedStatus(check.status);
533
+ return kind === 'command' && (status === 'pass' || status === 'passed') && normalizeCommand(check.command);
534
+ });
535
+
536
+ for (const check of claimedPass.slice(0, 8)) {
537
+ const cmd = normalizeCommand(check.command);
538
+ const id = safeOneLine(check.id || cmd, 80);
539
+ const logged = log.get(cmd);
540
+
541
+ if (logged && logged.ran) {
542
+ // (1) Cross-reference the capture log first.
543
+ if (logged.failed) {
544
+ const exit = Number.isInteger(logged.exitCode) ? ` (exitCode:${logged.exitCode})` : '';
545
+ warnings.push(`${base} evidence check ${id}: capture log CONTRADICTS claimed pass — command "${safeOneLine(cmd, 120)}" was recorded as FAIL${exit}. This is a caught false-completion.`);
546
+ }
547
+ // log shows it ran and passed → satisfied deterministically, no re-run.
548
+ continue;
549
+ }
550
+
551
+ // (2) Backstop: the log has NO execution for this claimed-pass command.
552
+ if (backstopMode === 'skip') {
553
+ warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and backstop re-run is disabled (FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip).`);
554
+ continue;
555
+ }
556
+ const trusted = resolveTrustedCommand(root, artifactDir, check, acceptance);
557
+ if (!trusted) {
558
+ warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and no trusted command (acceptance criterion / declared manifest target) resolves to re-run it. Set FLOW_AGENTS_GOAL_FIT_RECHECK=true to opt into re-running the model's free-form command.`);
559
+ continue;
560
+ }
561
+ const outcome = runBackstop(trusted);
562
+ if (!outcome.ran) {
563
+ warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — trusted backstop (${trusted.source}) could not run (${safeOneLine(outcome.error, 80)}).`);
564
+ continue;
565
+ }
566
+ if (!outcome.passed) {
567
+ const note = `${base} evidence check ${id}: trusted backstop (${trusted.source}) re-run of "${trusted.argv.join(' ')}" FAILED with exit ${outcome.exitCode}, contradicting the claimed pass. This is a caught false-completion.`;
568
+ if (backstopMode === 'off') warnings.push(`${note} [backstop in warn mode — not blocking]`);
569
+ else warnings.push(note);
570
+ }
571
+ // backstop passed → claim deterministically confirmed by re-run, no warning.
572
+ }
573
+
574
+ return warnings;
575
+ }
576
+
577
+ // ─── ADR 0010 Phase 2: enforce on the canonical Hachure trust.bundle ──────────
578
+ // The trust.bundle (emitted by workflow-sidecar via @kontourai/surface) carries
579
+ // each claim's Surface-derived status — including capture-authoritative results
580
+ // (a claimed-pass whose captured command FAILED is already `disputed` here). A
581
+ // high-impact `disputed` claim is the canonical false-completion signal; we gate
582
+ // on the bundle the producers already emit, not on bespoke markdown.
583
+ //
584
+ // ADR 0010 Phase 2b: re-derive-at-gate hardening.
585
+ // We re-derive each claim's status from the bundle's own evidence/events/policies
586
+ // via Surface's canonical deriveClaimStatus, so editing the stored `claim.status`
587
+ // field does not bypass the gate. If the re-derived status is disputed/rejected
588
+ // for a high/critical claim, we block. If the re-derived status DIFFERS from the
589
+ // stored status (e.g. stored "verified" but evidence re-derives to "disputed"),
590
+ // that mismatch is a strong tamper signal — block with an explicit warning.
591
+ // Fail-open: if Surface is unavailable, fall back to the stored-status check.
592
+ async function bundleEnforcement(artifactDir) {
593
+ const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
594
+ if (!bundle || !Array.isArray(bundle.claims)) return [];
595
+
596
+ const surface = await tryLoadSurface();
597
+ const warnings = [];
598
+
599
+ const allEvidence = Array.isArray(bundle.evidence) ? bundle.evidence : [];
600
+ const allEvents = Array.isArray(bundle.events) ? bundle.events : [];
601
+ const allPolicies = Array.isArray(bundle.policies) ? bundle.policies : [];
602
+
603
+ for (const claim of bundle.claims) {
604
+ if (!claim || typeof claim !== 'object') continue;
605
+ const impact = String(claim.impactLevel || '').toLowerCase();
606
+ const storedStatus = String(claim.status || '').toLowerCase();
607
+ if (impact !== 'high' && impact !== 'critical') continue;
608
+
609
+ // Step 1: Re-derive status via Surface when available.
610
+ // This closes the gaming vector: editing the stored status field cannot bypass
611
+ // the gate because we recompute from evidence/events/policies.
612
+ let recomputedStatus = null; // null means re-derive was not attempted or threw
613
+ if (surface && typeof surface.deriveClaimStatus === 'function') {
614
+ const claimId = claim.id;
615
+ const claimEvidence = allEvidence.filter(ev => ev && ev.claimId === claimId);
616
+ const claimEvents = allEvents.filter(evt => evt && evt.claimId === claimId);
617
+ try {
618
+ const result = surface.deriveClaimStatus({
619
+ claim,
620
+ evidence: claimEvidence,
621
+ events: claimEvents,
622
+ policies: allPolicies,
623
+ });
624
+ recomputedStatus = result && typeof result.status === 'string' ? result.status.toLowerCase() : 'unknown';
625
+ } catch {
626
+ // deriveClaimStatus threw (e.g. schema mismatch) — fall back to stored status.
627
+ recomputedStatus = null;
628
+ }
629
+ }
630
+
631
+ // Step 2: Compute the effective blocking status.
632
+ // Use the STRICTER of stored vs recomputed so neither can be individually
633
+ // gamed: deleting evidence cannot clear a stored `disputed`, and flipping
634
+ // stored to "verified" cannot hide a recomputed `disputed`.
635
+ const effectiveDisputed = storedStatus === 'disputed' || storedStatus === 'rejected'
636
+ || recomputedStatus === 'disputed' || recomputedStatus === 'rejected';
637
+
638
+ if (!effectiveDisputed) continue; // neither stored nor recomputed is blocking
639
+
640
+ // Step 3: Emit the appropriate warning.
641
+ // Tamper-detection: stored "verified"/"assumed" but evidence re-derives to
642
+ // "disputed"/"rejected" — the stored status was likely altered to bypass the gate.
643
+ const isTampered = recomputedStatus !== null
644
+ && (storedStatus === 'verified' || storedStatus === 'assumed')
645
+ && (recomputedStatus === 'disputed' || recomputedStatus === 'rejected');
646
+
647
+ if (isTampered) {
648
+ warnings.push(`trust.bundle claim tampered: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — stored status "${storedStatus}" does not match recompute "${recomputedStatus}" (possible tampered bundle); caught false-completion.`);
649
+ } else {
650
+ warnings.push(`trust.bundle claim disputed: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — Surface recompute shows not verified; caught false-completion.`);
651
+ }
652
+ }
653
+ return warnings;
654
+ }
655
+
656
+ /**
657
+ * Scope to the session's current task when .flow-agents/current.json points at
658
+ * one (mirroring evidence-capture.js). Returns the slug dir, or null to fall back
659
+ * to scanning all of .flow-agents (newest-mtime).
660
+ */
661
+ function preferredArtifactDir(flowAgentsDir) {
662
+ const current = readJsonFile(path.join(flowAgentsDir, 'current.json'));
663
+ if (!current) return null;
664
+ const slug = current.artifact_dir || current.active_slug;
665
+ if (typeof slug !== 'string' || !slug.trim()) return null;
666
+ const safe = slug.replace(/\.\.+/g, '').replace(/^[/\\]+/, '');
667
+ const dir = path.join(flowAgentsDir, safe);
668
+ return dir.startsWith(flowAgentsDir + path.sep) && fs.existsSync(dir) ? dir : null;
669
+ }
670
+
671
+ /**
672
+ * A task is pre-execution (work not yet started) when its state.json status/phase
673
+ * is still in the idea→planning band, or (no state.json) its markdown status is.
674
+ */
675
+ function isPreExecution(artifactDir, markdownStatus) {
676
+ const state = readJsonFile(path.join(artifactDir, 'state.json'));
677
+ if (state) {
678
+ return PRE_EXECUTION_STATUSES.has(normalizedStatus(state.status))
679
+ || PRE_EXECUTION_PHASES.has(normalizedStatus(state.phase));
680
+ }
681
+ return PRE_EXECUTION_STATUSES.has(normalizedStatus(markdownStatus));
682
+ }
683
+
684
+
685
+ // ─── Wave 2c: no-bundle/no-state fallback gate ────────────────────────────────
686
+ // Sessions that have NEITHER a trust.bundle NOR a state.json fall through
687
+ // both bundleEnforcement (no bundle) and sidecarGuidance (no state). Without the
688
+ // old markdown heading checks this would create a silent ungated-session path.
689
+ // If a trust.bundle exists, bundleEnforcement handles it. If state.json exists,
690
+ // sidecarGuidance handles it. The gap: a session with only a markdown artifact.
691
+ //
692
+ // Adjustment A (sidecar-driven Final Acceptance): when acceptance.json has
693
+ // pending criteria and the task state shows delivered, emit the Final Acceptance
694
+ // hygiene warning from the sidecar rather than markdown template parsing.
695
+ function missingBundleOrStateSignal(artifactDir) {
696
+ const warnings = [];
697
+ const hasBundle = fs.existsSync(path.join(artifactDir, 'trust.bundle'));
698
+ const state = readJsonFile(path.join(artifactDir, 'state.json'));
699
+
700
+ if (!hasBundle && !state) {
701
+ // Neither trust.bundle nor state.json: session is untracked by sidecar path.
702
+ // Emit a NOT_VERIFIED warning so execution-phase sessions remain gated.
703
+ const base = path.basename(artifactDir);
704
+ warnings.push(`${base} NOT_VERIFIED — no trust.bundle or state.json found; run 'workflow-sidecar record-evidence' to build the evidence record before delivery.`);
705
+ return warnings;
706
+ }
707
+
708
+ // Adjustment A: sidecar-driven Final Acceptance hygiene.
709
+ // When the task is delivered but acceptance.json still has pending criteria,
710
+ // emit the Final Acceptance reminder from the sidecar (not markdown parsing).
711
+ const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
712
+ if (acceptance && Array.isArray(acceptance.criteria)) {
713
+ const pendingCriteria = acceptance.criteria.filter(c => {
714
+ const s = normalizedStatus(c && c.status);
715
+ return s === 'pending' || s === 'not_started' || s === '' || s === 'unknown';
716
+ });
717
+ if (pendingCriteria.length > 0) {
718
+ const base = path.basename(artifactDir);
719
+ warnings.push(`${base} Final Acceptance: ${pendingCriteria.length} acceptance criterion/criteria still pending; complete CI/merge/docs before final delivery.`);
720
+ }
721
+ }
722
+
723
+ return warnings;
724
+ }
725
+
726
+ async function analyze(root, now = Date.now()) {
727
+ const flowAgentsDir = path.join(root, '.flow-agents');
728
+ // Scope to the session's current task when current.json names one, so an
729
+ // unrelated active workflow elsewhere in the repo does not gate this stop.
730
+ const scoped = preferredArtifactDir(flowAgentsDir);
731
+ const searchDirs = scoped ? [scoped] : [flowAgentsDir];
732
+ const artifacts = searchDirs
255
733
  .flatMap(dir => walkMarkdown(dir))
256
734
  .map(readArtifact)
257
735
  .filter(isWorkflowArtifact)
@@ -269,51 +747,121 @@ function analyze(root, now = Date.now()) {
269
747
  warnings.push(`${relPath} is still status:${status} (${ageMinutes}m old). Do not final-answer as complete unless the next step is explicit.`);
270
748
  }
271
749
 
272
- if (!hasHeading(latest.text, 'Definition Of Done')) {
273
- warnings.push(`${relPath} is missing ## Definition Of Done, so the user-facing finish line is not explicit.`);
274
- }
275
-
276
- if (!hasHeading(latest.text, 'Goal Fit Gate')) {
277
- warnings.push(`${relPath} is missing ## Goal Fit Gate, so local acceptance has not been checked.`);
278
- } else {
279
- for (const item of uncheckedInSection(latest.text, 'Goal Fit Gate').slice(0, 6)) {
280
- warnings.push(`${relPath} Goal Fit unchecked: ${item}`);
281
- }
282
- }
283
-
284
- if (status === 'delivered' && hasHeading(latest.text, 'Final Acceptance')) {
285
- const uncheckedFinal = uncheckedInSection(latest.text, 'Final Acceptance');
286
- if (uncheckedFinal.length > 0) {
287
- warnings.push(`${relPath} local delivery is marked delivered, but Final Acceptance still has ${uncheckedFinal.length} open item(s) for CI/merge/docs promotion.`);
288
- }
289
- }
750
+ // Builder heading completeness checks (hasHeading DOD/Goal Fit Gate) removed in ADR 0010 2c.
751
+ // Verdict is now bundle-driven via bundleEnforcement + sidecarGuidance.
752
+ // Sessions with neither trust.bundle nor state.json are caught by missingBundleOrStateSignal.
290
753
 
291
754
  warnings.push(...sidecarValidation(root, path.dirname(latest.file)));
292
- const evidence = readJsonFile(path.join(path.dirname(latest.file), 'evidence.json'));
293
- if (evidence && markdownVerdict(latest.text) === 'pass' && normalizedStatus(evidence.verdict) === 'fail') {
294
- warnings.push(`${relPath} Markdown PASS contradicts evidence.json verdict fail.`);
295
- }
296
755
  warnings.push(...sidecarGuidance(root, path.dirname(latest.file)));
756
+ warnings.push(...captureCrossReference(root, path.dirname(latest.file)));
757
+ warnings.push(...(await bundleEnforcement(path.dirname(latest.file))));
758
+ warnings.push(...missingBundleOrStateSignal(path.dirname(latest.file)));
759
+
760
+ // A pre-execution task (not started) OR a terminal task (which is itself a
761
+ // completion *claim*) must not block on mere incompleteness — but a FALSE claim
762
+ // (capture/evidence contradiction) still blocks at any phase. This is the whole
763
+ // point of the capture cross-reference: catch a task that falsely claims done.
764
+ const gateState = readJsonFile(path.join(path.dirname(latest.file), 'state.json'));
765
+ const taskStatus = gateState ? normalizedStatus(gateState.status) : normalizedStatus(status);
766
+ const preExecution = isPreExecution(path.dirname(latest.file), status);
767
+ const terminal = TERMINAL_STATUSES.has(taskStatus);
768
+ // Always-block: a claimed pass the capture log or evidence.json contradicts.
769
+ const HARD_BLOCK = /contradicts evidence\.json|caught false-completion|evidence verdict:|evidence check .+ status:|critique status|critique open|required sidecar is missing/;
770
+ // Full gate (execution onward): also completeness/hygiene and not-done state.
771
+ const FULL_BLOCK = /status:|Definition Of Done|Goal Fit|sidecar validation:|contradicts evidence\.json|workflow state|evidence verdict|evidence check|NOT_VERIFIED gap|critique status|critique open|next action|caught false-completion|NOT_VERIFIED —/;
772
+ const blockRe = (preExecution || terminal) ? HARD_BLOCK : FULL_BLOCK;
773
+ const blocking = warnings.some(w => {
774
+ // Capture cross-reference warn-mode notes never block (operator opted out).
775
+ if (/\[backstop in warn mode — not blocking\]/.test(w)) return false;
776
+ return blockRe.test(w);
777
+ });
778
+ return { warnings, blocking, preExecution };
779
+ }
297
780
 
298
- const blocking = warnings.some(w => /status:|Definition Of Done|Goal Fit|sidecar validation|contradicts evidence\.json|workflow state|evidence verdict|evidence check|NOT_VERIFIED gap|critique status|critique open|next action/.test(w));
299
- return { warnings, blocking };
781
+ /**
782
+ * Resolve the enforcement mode. FLOW_AGENTS_GOAL_FIT_MODE (block|warn|off) wins;
783
+ * the legacy FLOW_AGENTS_GOAL_FIT_STRICT=true maps to block; otherwise the
784
+ * canonical engine default is warn.
785
+ */
786
+ function resolveGoalFitMode() {
787
+ const explicit = String(process.env.FLOW_AGENTS_GOAL_FIT_MODE || '').trim().toLowerCase();
788
+ if (explicit === 'block' || explicit === 'warn' || explicit === 'off') return explicit;
789
+ const strict = String(process.env.FLOW_AGENTS_GOAL_FIT_STRICT || '').toLowerCase() === 'true';
790
+ return strict ? 'block' : 'warn';
791
+ }
792
+
793
+ /**
794
+ * Escape hatch: cap how many times block mode may refuse the SAME goal-fit gap
795
+ * in a row, so a genuinely-unsatisfiable goal cannot trap the agent forever.
796
+ * After this many consecutive identical blocks the hook releases (exit 0) with a
797
+ * loud notice. Configurable via FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS (default 3).
798
+ */
799
+ function resolveMaxBlocks() {
800
+ const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS || '', 10);
801
+ return Number.isInteger(raw) && raw > 0 ? raw : 3;
300
802
  }
301
803
 
302
- function run(rawInput) {
804
+ function blockStreakFile(root) {
805
+ return path.join(root, '.flow-agents', '.goal-fit-block-streak.json');
806
+ }
807
+
808
+ function reasonsHash(warnings) {
809
+ const text = (warnings || []).join('\n');
810
+ let h = 5381;
811
+ for (let i = 0; i < text.length; i += 1) h = ((h << 5) + h + text.charCodeAt(i)) >>> 0;
812
+ return String(h);
813
+ }
814
+
815
+ function clearBlockStreak(root) {
816
+ try { fs.rmSync(blockStreakFile(root), { force: true }); } catch { /* best effort */ }
817
+ }
818
+
819
+ function bumpBlockStreak(root, hash) {
820
+ const file = blockStreakFile(root);
821
+ const prev = readJsonFile(file) || {};
822
+ const count = prev.hash === hash ? (Number(prev.count) || 0) + 1 : 1;
823
+ try {
824
+ fs.mkdirSync(path.dirname(file), { recursive: true });
825
+ fs.writeFileSync(file, JSON.stringify({ hash, count }));
826
+ } catch { /* best effort */ }
827
+ return count;
828
+ }
829
+
830
+ async function run(rawInput) {
303
831
  const input = parseJson(rawInput);
304
832
  const root = findRepoRoot(input.cwd || process.cwd());
305
- const result = analyze(root);
306
- if (result.warnings.length === 0) return rawInput;
833
+ const mode = resolveGoalFitMode();
834
+ if (mode === 'off') return rawInput;
835
+ const result = await analyze(root);
836
+ if (result.warnings.length === 0) {
837
+ clearBlockStreak(root);
838
+ return rawInput;
839
+ }
307
840
 
308
841
  const message = [
309
842
  '[Hook] Goal Fit warning:',
310
843
  ...result.warnings.map(w => ` - ${w}`),
311
844
  ].join('\n');
312
- const strict = String(process.env.FLOW_AGENTS_GOAL_FIT_STRICT || '').toLowerCase() === 'true';
845
+
846
+ if (mode !== 'block' || !result.blocking) {
847
+ clearBlockStreak(root);
848
+ return { stdout: rawInput, stderr: message, exitCode: 0 };
849
+ }
850
+
851
+ const maxBlocks = resolveMaxBlocks();
852
+ const count = bumpBlockStreak(root, reasonsHash(result.warnings));
853
+ if (count >= maxBlocks) {
854
+ clearBlockStreak(root);
855
+ return {
856
+ stdout: rawInput,
857
+ stderr: `${message}\n[Hook] Goal Fit block RELEASED after ${count} consecutive identical blocks (FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=${maxBlocks}): the same gap persists, surfacing to the human instead of looping.`,
858
+ exitCode: 0,
859
+ };
860
+ }
313
861
  return {
314
862
  stdout: rawInput,
315
- stderr: message,
316
- exitCode: strict && result.blocking ? 2 : 0,
863
+ stderr: `${message}\n[Hook] Goal Fit BLOCK ${count}/${maxBlocks}.`,
864
+ exitCode: 2,
317
865
  };
318
866
  }
319
867
 
@@ -324,14 +872,28 @@ if (require.main === module) {
324
872
  if (data.length < MAX_STDIN) data += chunk.substring(0, MAX_STDIN - data.length);
325
873
  });
326
874
  process.stdin.on('end', () => {
327
- const output = run(data);
328
- if (output && typeof output === 'object') {
329
- if (output.stderr) process.stderr.write(output.stderr.endsWith('\n') ? output.stderr : `${output.stderr}\n`);
330
- process.stdout.write(String(output.stdout ?? data));
331
- process.exit(Number.isInteger(output.exitCode) ? output.exitCode : 0);
332
- }
333
- process.stdout.write(String(output));
875
+ // run() is now async (Surface load). We wrap in an async IIFE so the
876
+ // stdin/exit flow is preserved and errors are surfaced as warnings (fail-open).
877
+ (async () => {
878
+ let output;
879
+ try {
880
+ output = await run(data);
881
+ } catch (err) {
882
+ // Unexpected failure in the async gate path — fail-open, allow the Stop.
883
+ process.stderr.write(`[Hook] Goal Fit async error (fail-open): ${String(err && err.message || err)}\n`);
884
+ process.stdout.write(data);
885
+ process.exit(0);
886
+ return;
887
+ }
888
+ if (output && typeof output === 'object') {
889
+ if (output.stderr) process.stderr.write(output.stderr.endsWith('\n') ? output.stderr : `${output.stderr}\n`);
890
+ process.stdout.write(String(output.stdout ?? data));
891
+ process.exit(Number.isInteger(output.exitCode) ? output.exitCode : 0);
892
+ return;
893
+ }
894
+ process.stdout.write(String(output));
895
+ })();
334
896
  });
335
897
  }
336
898
 
337
- module.exports = { analyze, run, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine };
899
+ module.exports = { analyze, run, resolveGoalFitMode, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine, captureCrossReference, bundleEnforcement, readCommandLog, resolveTrustedCommand, declaredManifestTarget };