@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -2,8 +2,11 @@
2
2
  import * as fs from "node:fs";
3
3
  import * as path from "node:path";
4
4
  import { execFileSync } from "node:child_process";
5
+ import { createHash } from "node:crypto";
5
6
  import { createRequire } from "node:module";
6
7
  import { fileURLToPath } from "node:url";
8
+ // ADR 0016 Abstraction A: shared FlowDefinition resolver (P-a)
9
+ import { resolveActiveFlowStep, resolveFlowFilePath, resolvePhaseMap, type ActiveFlowStep } from "../lib/flow-resolver.js";
7
10
 
8
11
  type AnyObj = Record<string, any>;
9
12
 
@@ -25,10 +28,58 @@ export function appendJsonl(file: string, payload: AnyObj): void {
25
28
  }
26
29
  function die(message: string): never { throw new Error(message); }
27
30
  function slugify(value: string, fallback: string): string { return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") || fallback; }
31
+ /** Derives a deterministic, filesystem-safe slug from a canonical work-item ref like `kontourai/flow-agents#161`.
32
+ * Format: `<owner>-<repo>-<id>` e.g. `kontourai-flow-agents-161`.
33
+ * Reuses slugify() for normalization. Validates that the id is a numeric GitHub issue number. */
34
+ function workItemSlug(ref: string): string {
35
+ const hashIdx = ref.indexOf("#");
36
+ if (hashIdx < 0 || hashIdx === ref.length - 1) die("--work-item must be in owner/repo#id format");
37
+ const repoPath = ref.slice(0, hashIdx);
38
+ const id = ref.slice(hashIdx + 1);
39
+ if (!/^\d+$/.test(id)) die("--work-item id must be a numeric issue number");
40
+ const parts = repoPath.split("/");
41
+ if (parts.length !== 2 || !parts[0] || !parts[1]) die("--work-item repo must be owner/repo format");
42
+ const [owner, repo] = parts;
43
+ return slugify(`${owner}-${repo}-${id}`, "work-item");
44
+ }
28
45
 
29
- // Optional Hachure trust-bundle validation. No-ops gracefully when hachure is not installed.
30
- // Install hachure (^0.4.0) as an optional dependency to enable schema validation.
31
- function tryLoadHachureValidator(): ((bundle: unknown) => { valid: boolean; errors: string[] }) | null {
46
+ /**
47
+ * Validate a Hachure trust.bundle using @kontourai/surface's canonical validator
48
+ * (surface is the authoritative owner of trust-bundle schema validation per ADR 0010 / ADR 0015).
49
+ * Returns `{ valid, errors, available }`. When @kontourai/surface is unavailable,
50
+ * `available` is false and `valid` is true (fail-open) so callers can choose to treat
51
+ * unvalidated bundles as acceptable or gate on `available`. Surface is REQUIRED for
52
+ * bundle writes per ADR 0010 Phase 4c — `assertBundleWritten` enforces this on the
53
+ * write path. Surface's validator is equivalent-or-stronger than the prior hachure
54
+ * JSON-Schema validator: it validates the same structural constraints plus cross-reference
55
+ * integrity (evidence/event → claim references) that the JSON schema did not enforce.
56
+ */
57
+ export async function validateTrustBundle(bundle: unknown): Promise<{ valid: boolean; errors: string[]; available: boolean }> {
58
+ // Use the already-loaded surface module when available (zero-cost re-entry after first load).
59
+ // When called standalone (fresh process, surface not yet loaded), attempt a one-shot import.
60
+ let surfaceValidate: ((input: unknown) => unknown) | undefined;
61
+ if (_surfaceModule !== undefined) {
62
+ // Module has been attempted: use cached result (null = unavailable).
63
+ surfaceValidate = _surfaceModule?.validateTrustBundle ?? undefined;
64
+ } else {
65
+ // Not yet attempted — load now for standalone callers (e.g. library consumers, tests).
66
+ const m = await tryLoadSurface();
67
+ surfaceValidate = m?.validateTrustBundle ?? undefined;
68
+ }
69
+ if (!surfaceValidate) return { valid: true, errors: [], available: false };
70
+ try {
71
+ surfaceValidate(bundle);
72
+ return { valid: true, errors: [], available: true };
73
+ } catch (err) {
74
+ const message = err instanceof Error ? err.message : String(err);
75
+ return { valid: false, errors: [message], available: true };
76
+ }
77
+ }
78
+ // Validate a single InquiryRecord against the hachure inquiry-record.schema.json.
79
+ // Uses a separate AJV instance compiled against that schema (not the trust-bundle schema).
80
+ let _hachureInquiryRecordValidator: ((record: unknown) => { valid: boolean; errors: string[] }) | null | undefined;
81
+ function getHachureInquiryRecordValidator(): ((record: unknown) => { valid: boolean; errors: string[] }) | null {
82
+ if (_hachureInquiryRecordValidator !== undefined) return _hachureInquiryRecordValidator;
32
83
  try {
33
84
  const _require = createRequire(import.meta.url);
34
85
  const hachureDir = path.dirname(_require.resolve("hachure"));
@@ -39,16 +90,16 @@ function tryLoadHachureValidator(): ((bundle: unknown) => { valid: boolean; erro
39
90
  if (!file.endsWith(".schema.json")) continue;
40
91
  schemas[file] = JSON.parse(fs.readFileSync(path.join(schemasDir, file), "utf8"));
41
92
  }
93
+ const inquiryRecordSchema = schemas["inquiry-record.schema.json"];
94
+ if (!inquiryRecordSchema) { _hachureInquiryRecordValidator = null; return null; }
42
95
  const ajv = new Ajv({ strict: false, allErrors: true });
43
96
  for (const [filename, schema] of Object.entries(schemas)) {
44
- if (filename === "trust-bundle.schema.json") continue;
97
+ if (filename === "inquiry-record.schema.json") continue;
45
98
  ajv.addSchema(schema, filename);
46
99
  }
47
- const trustBundleSchema = schemas["trust-bundle.schema.json"];
48
- if (!trustBundleSchema) return null;
49
- const validate = ajv.compile(trustBundleSchema);
50
- return (bundle: unknown) => {
51
- const valid = validate(bundle);
100
+ const validate = ajv.compile(inquiryRecordSchema);
101
+ _hachureInquiryRecordValidator = (record: unknown) => {
102
+ const valid = validate(record);
52
103
  if (valid) return { valid: true, errors: [] };
53
104
  const errors = ((validate as any).errors ?? []).map((err: any) => {
54
105
  const loc = err.instancePath || err.schemaPath || "";
@@ -56,30 +107,469 @@ function tryLoadHachureValidator(): ((bundle: unknown) => { valid: boolean; erro
56
107
  });
57
108
  return { valid: false, errors };
58
109
  };
110
+ return _hachureInquiryRecordValidator;
59
111
  } catch {
112
+ _hachureInquiryRecordValidator = null;
113
+ return null;
114
+ }
115
+ }
116
+ /**
117
+ * Validate a record against the canonical hachure inquiry-record.schema.json
118
+ * (https://kontourai.io/schemas/surface/inquiry-record.schema.json).
119
+ * Returns `{ valid, errors, available }`. Fail-open when hachure is not installed.
120
+ */
121
+ export function validateInquiryRecord(record: unknown): { valid: boolean; errors: string[]; available: boolean } {
122
+ const validate = getHachureInquiryRecordValidator();
123
+ if (!validate) return { valid: true, errors: [], available: false };
124
+ return { ...validate(record), available: true };
125
+ }
126
+ // ─── @kontourai/surface status derivation ────────────────────────────────────
127
+ // Surface is ESM-only; this module builds to CJS. Load Surface via a fail-open
128
+ // cached dynamic import(). If Surface cannot be loaded, bundle writes are
129
+ // skipped entirely — no hand-rolled fork fallback.
130
+ //
131
+ // SurfaceInquiry / SurfaceInquiryRecord — minimal local shapes mirroring the
132
+ // canonical Surface Inquiry / InquiryRecord types. Using Record-based typing
133
+ // keeps this module free of a direct ESM import at compile time.
134
+ type SurfaceInquiry = {
135
+ id: string;
136
+ question: string;
137
+ askedBy: string;
138
+ askedAt: string;
139
+ target?: { subjectType: string; subjectId: string; fieldOrBehavior: string; qualifiers?: Record<string, string> };
140
+ metadata?: Record<string, unknown>;
141
+ };
142
+ type SurfaceInquiryRecord = {
143
+ id: string;
144
+ inquiry: SurfaceInquiry;
145
+ outcome: "matched" | "derived" | "unsupported";
146
+ resolutionPath: {
147
+ claimIds: string[];
148
+ ruleId?: string;
149
+ ruleVersion?: string;
150
+ identityLinkIds?: string[];
151
+ transitiveRuleIds?: string[];
152
+ };
153
+ answer?: { value: unknown; status: string };
154
+ inputSnapshot: Array<{ claimId: string; status: string }>;
155
+ statusFunctionVersion: string;
156
+ resolvedAt: string;
157
+ };
158
+ type SurfaceModule = {
159
+ deriveClaimStatus: (args: {
160
+ claim: Record<string, unknown>;
161
+ evidence: Record<string, unknown>[];
162
+ events: Record<string, unknown>[];
163
+ policies: Record<string, unknown>[];
164
+ now?: Date;
165
+ }) => { status: string; policyId: string | undefined };
166
+ generateClaimId: (subjectId: string, surface: string, fieldOrBehavior: string) => string;
167
+ statusFunctionVersion: string;
168
+ resolveInquiry: (
169
+ bundle: Record<string, unknown>,
170
+ inquiry: SurfaceInquiry,
171
+ options?: { now?: Date },
172
+ ) => SurfaceInquiryRecord;
173
+ buildTrustReport: (bundle: Record<string, unknown>, options?: { now?: Date }) => Record<string, unknown>;
174
+ buildDerivationDrilldown: (report: Record<string, unknown>, claimId: string) => Record<string, unknown>;
175
+ /** Canonical trust-bundle validator from @kontourai/surface. Throws on invalid input; returns TrustBundle on success. */
176
+ validateTrustBundle: (input: unknown) => Record<string, unknown>;
177
+ /** Freeze a derivation checkpoint from a report. */
178
+ checkpointFromReport: (report: Record<string, unknown>) => Record<string, unknown>;
179
+ /** Diff two derivations (prior checkpoint → later report) and emit freshness transition events. */
180
+ diffFreshness: (prior: Record<string, unknown>, next: Record<string, unknown>) => Array<Record<string, unknown>>;
181
+ // ─── Increment B1: in-toto / Sigstore interop (consumed from Surface) ────────
182
+ /** Wrap a TrustBundle as an in-toto Statement v1. */
183
+ toInTotoStatement: (bundle: Record<string, unknown>, options: { subjects: Array<{ name: string; digest: Record<string, string> }> }) => {
184
+ _type: "https://in-toto.io/Statement/v1";
185
+ subject: Array<{ name: string; digest: Record<string, string> }>;
186
+ predicateType: "https://hachure.org/v1/bundle";
187
+ predicate: Record<string, unknown>;
188
+ };
189
+ /** Sign an in-toto Statement with Sigstore keyless signing. Returns null when no OIDC identity is available (fail-open). */
190
+ signStatementWithSigstore: (statement: {
191
+ _type: "https://in-toto.io/Statement/v1";
192
+ subject: Array<{ name: string; digest: Record<string, string> }>;
193
+ predicateType: "https://hachure.org/v1/bundle";
194
+ predicate: Record<string, unknown>;
195
+ }) => Promise<{
196
+ envelope: {
197
+ payloadType: "application/vnd.in-toto+json";
198
+ payload: string;
199
+ signatures: Array<{ keyid: string; sig: string }>;
200
+ };
201
+ sigstoreBundle: unknown;
202
+ assuranceLevel: "signed";
203
+ } | null>;
204
+ };
205
+ let _surfaceModule: SurfaceModule | null | undefined; // undefined = not tried yet; null = unavailable
206
+ async function tryLoadSurface(): Promise<SurfaceModule | null> {
207
+ // Test/diagnostic seam: simulate a degraded environment where Surface is unavailable,
208
+ // to exercise the fail-loud (no silent data loss) path without disturbing node_modules.
209
+ if (process.env.FLOW_AGENTS_SURFACE_UNAVAILABLE === "1") return null;
210
+ if (_surfaceModule !== undefined) return _surfaceModule;
211
+ try {
212
+ const m = await import("@kontourai/surface");
213
+ _surfaceModule = m as unknown as SurfaceModule;
214
+ return _surfaceModule;
215
+ } catch (err) {
216
+ const message = err instanceof Error ? err.message : String(err);
217
+ process.stderr.write(`[trust-bundle] @kontourai/surface unavailable — bundle write skipped: ${message}\n`);
218
+ _surfaceModule = null;
60
219
  return null;
61
220
  }
62
221
  }
63
- let _hachureValidator: ReturnType<typeof tryLoadHachureValidator> | undefined;
64
- function getHachureValidator(): ReturnType<typeof tryLoadHachureValidator> {
65
- if (_hachureValidator === undefined) _hachureValidator = tryLoadHachureValidator();
66
- return _hachureValidator;
222
+
223
+ /** Map a workflow check status to the Surface VerificationEvent status. */
224
+ function checkStatusToEventStatus(status: string): string | null {
225
+ if (status === "pass") return "verified";
226
+ if (status === "fail") return "disputed";
227
+ if (status === "skip") return "assumed";
228
+ return null; // not_verified / unknown → no event → Surface returns "unknown"
229
+ }
230
+ /** Map an acceptance criterion status to the Surface VerificationEvent status. */
231
+ function criterionStatusToEventStatus(status: string): string | null {
232
+ if (status === "pass") return "verified";
233
+ if (status === "fail") return "disputed";
234
+ if (status === "accepted_gap") return "assumed";
235
+ return null; // pending / not_verified → no event → Surface returns "unknown"
236
+ }
237
+ /** Map a critique verdict to the Surface VerificationEvent status. */
238
+ function critiqueToEventStatus(verdict: string, findings: AnyObj[]): string | null {
239
+ if (verdict === "fail") return "disputed";
240
+ const hasOpenFinding = Array.isArray(findings) && findings.some((f: AnyObj) => f.status === "open");
241
+ if (verdict === "pass" && hasOpenFinding) return "disputed";
242
+ if (verdict === "pass") return "verified";
243
+ if (verdict === "comment") return "assumed";
244
+ return null; // not_verified or unknown → no event → Surface returns "unknown"
67
245
  }
68
246
 
69
247
  /**
70
- * Validate a Hachure trust.bundle against the canonical trust-bundle schema.
71
- * Returns `{ valid, errors, available }`. When the optional `hachure` dependency
72
- * is not installed, validation is unavailable and this returns
73
- * `{ valid: true, errors: [], available: false }` (fail-open) so callers can
74
- * choose to treat unvalidated bundles as acceptable or gate on `available`.
75
- * This is the same validator the sidecar writer uses for trust-backed evidence.
248
+ * Build a Hachure trust.bundle from raw check/criterion/critique inputs.
249
+ * trust.bundle is the PRIMARY artifact (ADR 0010 Phase 4a producer inversion).
250
+ * Callers pass raw inputs directly not bespoke-sidecar-shaped objects.
251
+ * Derives claim statuses using @kontourai/surface's canonical versioned function.
252
+ * Returns null when Surface is unavailable (caller skips the bundle write).
253
+ * @param slug Task slug (used as subjectId prefix)
254
+ * @param timestamp ISO-8601 timestamp for createdAt / updatedAt / observedAt
255
+ * @param checks Normalized check objects (from record-evidence --check-json / --surface-trust-json)
256
+ * @param criteria Acceptance criteria objects (from acceptance.json .criteria array)
257
+ * @param critiques Critique objects (from critique.json .critiques array)
258
+ * @param commandLog Optional parsed command-log.jsonl entries (capture-authoritative fold)
76
259
  */
77
- export function validateTrustBundle(bundle: unknown): { valid: boolean; errors: string[]; available: boolean } {
78
- const validate = getHachureValidator();
79
- if (!validate) return { valid: true, errors: [], available: false };
80
- return { ...validate(bundle), available: true };
260
+ export async function buildTrustBundle(slug: string, timestamp: string, checks: AnyObj[], criteria: AnyObj[], critiques: AnyObj[], commandLog?: AnyObj[], flowAgentsDir?: string): Promise<AnyObj | null> {
261
+ const surface = await tryLoadSurface();
262
+ if (!surface) return null;
263
+ const { deriveClaimStatus, generateClaimId, statusFunctionVersion } = surface;
264
+
265
+ // ADR 0016 Abstraction A (P-b): resolve active flow step for dual-emit.
266
+ // When flowAgentsDir is provided AND current.json carries active_flow_id/active_step_id,
267
+ // each produced claim gets a DECLARED primary claim (kit-typed) plus a legacy shadow
268
+ // (workflow.* type, claimId suffix "-legacy") for backward compatibility. When null,
269
+ // only the existing workflow.* claims are produced (zero behavior change).
270
+ const activeStep: ActiveFlowStep | null = flowAgentsDir ? resolveActiveFlowStep(flowAgentsDir) : null;
271
+
272
+ const claims: AnyObj[] = [];
273
+ const evidenceItems: AnyObj[] = [];
274
+ const events: AnyObj[] = [];
275
+ const ts = timestamp || new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
276
+
277
+ // One VerificationPolicy per distinct claimType, so status is policy-governed
278
+ // (not derived against an empty policy set). Maximal-fidelity per ADR 0010.
279
+ const policies = new Map<string, AnyObj>();
280
+ const ensurePolicy = (claimType: string, impactLevel: string, requiredEvidence: string[]): AnyObj => {
281
+ let p = policies.get(claimType);
282
+ if (!p) {
283
+ p = { id: `policy:${claimType}`, claimType, requiredEvidence, acceptanceCriteria: [`A verified verification event must support a ${claimType} claim.`], reviewAuthority: "system", validityRule: { kind: "manual" }, stalenessTriggers: [], conflictRules: [], impactLevel };
284
+ policies.set(claimType, p);
285
+ }
286
+ return p;
287
+ };
288
+
289
+ // Index the deterministic capture log by normalized command (a single FAIL wins),
290
+ // so a claimed-pass check whose command actually FAILED becomes authoritative here.
291
+ const captureByCommand = new Map<string, { observedResult: string; exitCode: number | null }>();
292
+ for (const entry of Array.isArray(commandLog) ? commandLog : []) {
293
+ if (!entry || typeof entry.command !== "string") continue;
294
+ const key = entry.command.replace(/\s+/g, " ").trim();
295
+ if (!key) continue;
296
+ const failed = entry.observedResult === "fail" || (Number.isInteger(entry.exitCode) && entry.exitCode !== 0);
297
+ const prev = captureByCommand.get(key);
298
+ captureByCommand.set(key, { observedResult: failed || (prev && prev.observedResult === "fail") ? "fail" : "pass", exitCode: Number.isInteger(entry.exitCode) ? entry.exitCode : (prev ? prev.exitCode : null) });
299
+ }
300
+
301
+ // ─── P-b dual-emit helper ──────────────────────────────────────────────────
302
+ // Semantic matching table (ADR 0016 Abstraction A P-b):
303
+ // check (non-policy kind) → expects[] entry where claimType does NOT contain
304
+ // "acceptance" AND subjectType is NOT "decision". Preference: subjectType=
305
+ // "flow-step". Fallback: first non-decision, non-acceptance entry.
306
+ // check (kind=policy) → expects[] entry whose claimType contains
307
+ // "compliance" or "policy". Fallback: same as non-policy.
308
+ // acceptance criterion → expects[] entry whose subjectType is "flow-step"
309
+ // OR claimType contains "tests" OR "compliance". Fallback: first entry.
310
+ // critique → expects[] entry whose claimType contains "policy"
311
+ // OR "compliance" AND subjectType is "artifact". Fallback: last entry.
312
+ //
313
+ // The DECLARED claim is primary (kit-typed claimType + subjectType).
314
+ // The legacy claim uses the existing workflow.* claimType (suffix "-legacy") as
315
+ // a backward-compat shadow. Both cite the same evidence. Status is derived by
316
+ // Surface from that evidence (never hand-set).
317
+ //
318
+ // Per-gate producibility (ADR 0016 P-d):
319
+ // (a) Already handled via subjectType=flow-step preference:
320
+ // builder.verify.tests (verify-gate, subjectType=flow-step)
321
+ // builder.verify.policy-compliance (verify-gate, kind=policy match)
322
+ // (b) Producible via fallback (non-decision, non-acceptance, first match):
323
+ // builder.plan.implementation (plan-gate, subjectType=artifact)
324
+ // builder.execute.scope (execute-gate, subjectType=change)
325
+ // builder.merge-ready.readiness (merge-ready-gate, subjectType=change)
326
+ // builder.merge-ready-ci.readiness (merge-ready-ci-gate, subjectType=pull-request)
327
+ // (c) No natural producer — required:false in build.flow.json (ADR 0016 P-d plan):
328
+ // builder.pull-work.selected (pull-work-gate, subjectType=work-item)
329
+ // builder.design-probe.pickup-readiness (design-probe-gate, subjectType=work-item)
330
+ // builder.design-probe.decisions (design-probe-gate, subjectType=decision)
331
+ // builder.pr-open.pull-request (pr-open-gate, subjectType=pull-request)
332
+ // builder.learn.decisions (learn-gate, subjectType=decision)
333
+ // builder.learn.evidence (learn-gate, subjectType=release)
334
+ // For category (c): record-gate-claim subcommand allows skills to target a specific
335
+ // expects[] entry by --expectation <id>, bypassing this semantic match entirely.
336
+ function matchExpectsEntry(kind: "check" | "acceptance" | "critique", checkKindVal?: string, expectationId?: string): { claimType: string; subjectType: string } | null {
337
+ if (!activeStep || activeStep.gateExpects.length === 0) return null;
338
+ const expects = activeStep.gateExpects;
339
+ if (kind === "check") {
340
+ // ADR 0016 P-d Increment 2: when an explicit expectation id is given (from record-gate-claim
341
+ // --expectation), bypass heuristics and do exact lookup. This ensures multi-expects[] gates
342
+ // (learn-gate: decision + release; design-probe-gate: work-item + decision) produce the
343
+ // correct declared claimType rather than the heuristic-selected one.
344
+ if (expectationId) {
345
+ const exact = expects.find((e) => e.id === expectationId);
346
+ if (exact) return { claimType: exact.bundle_claim.claimType, subjectType: exact.bundle_claim.subjectType };
347
+ }
348
+ const isPolicy = checkKindVal === "policy";
349
+ if (isPolicy) {
350
+ const match = expects.find((e) => {
351
+ const ct = e.bundle_claim.claimType.toLowerCase();
352
+ return ct.includes("compliance") || ct.includes("policy");
353
+ });
354
+ if (match) return { claimType: match.bundle_claim.claimType, subjectType: match.bundle_claim.subjectType };
355
+ }
356
+ // Non-policy: prefer flow-step subjectType, exclude decision/acceptance entries
357
+ const preferred = expects.find((e) => {
358
+ const ct = e.bundle_claim.claimType.toLowerCase();
359
+ return e.bundle_claim.subjectType !== "decision" && !ct.includes("acceptance") && e.bundle_claim.subjectType === "flow-step";
360
+ });
361
+ if (preferred) return { claimType: preferred.bundle_claim.claimType, subjectType: preferred.bundle_claim.subjectType };
362
+ const fallback = expects.find((e) => {
363
+ const ct = e.bundle_claim.claimType.toLowerCase();
364
+ return e.bundle_claim.subjectType !== "decision" && !ct.includes("acceptance");
365
+ });
366
+ if (fallback) return { claimType: fallback.bundle_claim.claimType, subjectType: fallback.bundle_claim.subjectType };
367
+ return null;
368
+ }
369
+ if (kind === "acceptance") {
370
+ const match = expects.find((e) => {
371
+ const ct = e.bundle_claim.claimType.toLowerCase();
372
+ return e.bundle_claim.subjectType === "flow-step" || ct.includes("tests") || ct.includes("compliance");
373
+ });
374
+ if (match) return { claimType: match.bundle_claim.claimType, subjectType: match.bundle_claim.subjectType };
375
+ return { claimType: expects[0]!.bundle_claim.claimType, subjectType: expects[0]!.bundle_claim.subjectType };
376
+ }
377
+ if (kind === "critique") {
378
+ const match = expects.find((e) => {
379
+ const ct = e.bundle_claim.claimType.toLowerCase();
380
+ return e.bundle_claim.subjectType === "artifact" && (ct.includes("policy") || ct.includes("compliance"));
381
+ });
382
+ if (match) return { claimType: match.bundle_claim.claimType, subjectType: match.bundle_claim.subjectType };
383
+ const last = expects[expects.length - 1]!;
384
+ return { claimType: last.bundle_claim.claimType, subjectType: last.bundle_claim.subjectType };
385
+ }
386
+ return null;
387
+ }
388
+ // ────────────────────────────────────────────────────────────────────────────
389
+
390
+ // Evidence checks → claims + evidence items + events. Capture is authoritative.
391
+ for (const check of Array.isArray(checks) ? checks : []) {
392
+ if (!check.id) continue;
393
+ const subjectId = `${slug}/${check.id}`;
394
+ const fieldOrBehavior = String(check.summary ?? check.id);
395
+ const claimId = generateClaimId(subjectId, "flow-agents.workflow", fieldOrBehavior);
396
+ const evId = `ev:${claimId}`;
397
+ const legacyClaimType = `workflow.check.${check.kind ?? "external"}`;
398
+ const policy = ensurePolicy(legacyClaimType, "high", ["test_output"]);
399
+
400
+ const cmd = typeof check.command === "string" ? check.command.replace(/\s+/g, " ").trim() : "";
401
+ const captured = cmd ? captureByCommand.get(cmd) : undefined;
402
+ const effectiveStatus = captured ? captured.observedResult : String(check.status ?? "");
403
+ const evStatus = checkStatusToEventStatus(effectiveStatus);
404
+
405
+ const claimEvents: AnyObj[] = [];
406
+ if (evStatus) {
407
+ const evt: AnyObj = { id: `evt:${claimId}`, claimId, status: evStatus, actor: "flow-agents/workflow-sidecar", method: "validation", evidenceIds: [evId], createdAt: ts, verifiedAt: ts };
408
+ events.push(evt);
409
+ claimEvents.push(evt);
410
+ }
411
+ const evItem: AnyObj = { id: evId, claimId, evidenceType: "test_output", method: "validation", sourceRef: `${slug}/evidence.json`, excerptOrSummary: fieldOrBehavior, observedAt: ts, collectedBy: "flow-agents/workflow-sidecar", passing: effectiveStatus === "pass" };
412
+ if (captured) {
413
+ evItem.sourceRef = `${slug}/command-log.jsonl`;
414
+ evItem.collectedBy = "flow-agents/evidence-capture";
415
+ evItem.execution = { runner: "bash", label: cmd, isError: captured.observedResult === "fail", ...(captured.exitCode != null ? { exitCode: captured.exitCode } : {}) };
416
+ }
417
+ evidenceItems.push(evItem);
418
+
419
+ // P-d: declared-only when active flow/step present (shadow retired); no-flow path unchanged.
420
+ // When record-gate-claim sets _gate_claim_expectation_id, pass it for exact lookup (ADR 0016 P-d Increment 2).
421
+ const declared = matchExpectsEntry("check", check.kind, typeof check._gate_claim_expectation_id === "string" ? check._gate_claim_expectation_id : undefined);
422
+ if (declared) {
423
+ // Declared kit-typed claim only — no legacy shadow (ADR 0016 P-d).
424
+ const declaredPolicy = ensurePolicy(declared.claimType, "high", ["test_output"]);
425
+ const declaredClaimObj: AnyObj = { id: claimId, subjectType: declared.subjectType, subjectId, surface: "flow-agents.workflow", claimType: declared.claimType, fieldOrBehavior, value: effectiveStatus, createdAt: ts, updatedAt: ts, impactLevel: "high", verificationPolicyId: declaredPolicy.id };
426
+ const { status: declaredStatus } = deriveClaimStatus({ claim: declaredClaimObj as Record<string, unknown>, evidence: [evItem] as Record<string, unknown>[], events: claimEvents as Record<string, unknown>[], policies: [declaredPolicy] as Record<string, unknown>[] });
427
+ claims.push({ ...declaredClaimObj, status: declaredStatus });
428
+ } else {
429
+ // No active flow step — only the workflow.* primary claim (legitimate no-flow fallback path).
430
+ const claimObj: AnyObj = { id: claimId, subjectType: "workflow-check", subjectId, surface: "flow-agents.workflow", claimType: legacyClaimType, fieldOrBehavior, value: effectiveStatus, createdAt: ts, updatedAt: ts, impactLevel: "high", verificationPolicyId: policy.id };
431
+ const { status: derivedStatus } = deriveClaimStatus({ claim: claimObj as Record<string, unknown>, evidence: [evItem] as Record<string, unknown>[], events: claimEvents as Record<string, unknown>[], policies: [policy] as Record<string, unknown>[] });
432
+ claims.push({ ...claimObj, status: derivedStatus });
433
+ }
434
+ }
435
+
436
+ // Acceptance criteria → claims + events
437
+ for (const criterion of Array.isArray(criteria) ? criteria : []) {
438
+ if (!criterion.id) continue;
439
+ const subjectId = `${slug}/${criterion.id}`;
440
+ const fieldOrBehavior = String(criterion.description ?? criterion.id);
441
+ const claimId = generateClaimId(subjectId, "flow-agents.workflow", fieldOrBehavior);
442
+ const legacyClaimType = "workflow.acceptance.criterion";
443
+ const policy = ensurePolicy(legacyClaimType, "high", []);
444
+ const evStatus = criterionStatusToEventStatus(String(criterion.status ?? ""));
445
+ const claimEvents: AnyObj[] = [];
446
+ if (evStatus) {
447
+ const evt: AnyObj = { id: `evt:${claimId}`, claimId, status: evStatus, actor: "flow-agents/workflow-sidecar", method: "validation", evidenceIds: [], createdAt: ts, verifiedAt: ts };
448
+ events.push(evt);
449
+ claimEvents.push(evt);
450
+ }
451
+
452
+ // P-d: declared-only when active flow/step present (shadow retired); no-flow path unchanged.
453
+ const declared = matchExpectsEntry("acceptance");
454
+ if (declared) {
455
+ // Declared kit-typed claim only — no legacy shadow (ADR 0016 P-d).
456
+ const declaredPolicy = ensurePolicy(declared.claimType, "high", []);
457
+ const declaredClaimObj: AnyObj = { id: claimId, subjectType: declared.subjectType, subjectId, surface: "flow-agents.workflow", claimType: declared.claimType, fieldOrBehavior, value: criterion.status, createdAt: ts, updatedAt: ts, impactLevel: "high", verificationPolicyId: declaredPolicy.id };
458
+ const { status: declaredStatus } = deriveClaimStatus({ claim: declaredClaimObj as Record<string, unknown>, evidence: [], events: claimEvents as Record<string, unknown>[], policies: [declaredPolicy] as Record<string, unknown>[] });
459
+ claims.push({ ...declaredClaimObj, status: declaredStatus });
460
+ } else {
461
+ // No active flow step — only the workflow.* primary claim (legitimate no-flow fallback path).
462
+ const claimObj: AnyObj = { id: claimId, subjectType: "workflow-acceptance-criterion", subjectId, surface: "flow-agents.workflow", claimType: legacyClaimType, fieldOrBehavior, value: criterion.status, createdAt: ts, updatedAt: ts, impactLevel: "high", verificationPolicyId: policy.id };
463
+ const { status: derivedStatus } = deriveClaimStatus({ claim: claimObj as Record<string, unknown>, evidence: [], events: claimEvents as Record<string, unknown>[], policies: [policy] as Record<string, unknown>[] });
464
+ claims.push({ ...claimObj, status: derivedStatus });
465
+ }
466
+ }
467
+
468
+ // Critique entries → claims + events
469
+ for (const c of Array.isArray(critiques) ? critiques : []) {
470
+ if (!c.id) continue;
471
+ const subjectId = `${slug}/${c.id}`;
472
+ const fieldOrBehavior = String(c.summary ?? c.verdict ?? c.id);
473
+ const claimId = generateClaimId(subjectId, "flow-agents.workflow", fieldOrBehavior);
474
+ const legacyClaimType = "workflow.critique.review";
475
+ const policy = ensurePolicy(legacyClaimType, "medium", []);
476
+ const evStatus = critiqueToEventStatus(String(c.verdict ?? ""), c.findings ?? []);
477
+ const claimEvents: AnyObj[] = [];
478
+ if (evStatus) {
479
+ const evt: AnyObj = { id: `evt:${claimId}`, claimId, status: evStatus, actor: "flow-agents/workflow-sidecar", method: "validation", evidenceIds: [], createdAt: ts, verifiedAt: ts };
480
+ events.push(evt);
481
+ claimEvents.push(evt);
482
+ }
483
+
484
+ // P-d: declared-only when active flow/step present (shadow retired); no-flow path unchanged.
485
+ const declared = matchExpectsEntry("critique");
486
+ if (declared) {
487
+ // Declared kit-typed claim only — no legacy shadow (ADR 0016 P-d).
488
+ const declaredPolicy = ensurePolicy(declared.claimType, "medium", []);
489
+ const declaredClaimObj: AnyObj = { id: claimId, subjectType: declared.subjectType, subjectId, surface: "flow-agents.workflow", claimType: declared.claimType, fieldOrBehavior, value: c.verdict, createdAt: ts, updatedAt: ts, impactLevel: "medium", verificationPolicyId: declaredPolicy.id };
490
+ const { status: declaredStatus } = deriveClaimStatus({ claim: declaredClaimObj as Record<string, unknown>, evidence: [], events: claimEvents as Record<string, unknown>[], policies: [declaredPolicy] as Record<string, unknown>[] });
491
+ claims.push({ ...declaredClaimObj, status: declaredStatus });
492
+ } else {
493
+ // No active flow step — only the workflow.* primary claim (legitimate no-flow fallback path).
494
+ const claimObj: AnyObj = { id: claimId, subjectType: "workflow-critique", subjectId, surface: "flow-agents.workflow", claimType: legacyClaimType, fieldOrBehavior, value: c.verdict, createdAt: ts, updatedAt: ts, impactLevel: "medium", verificationPolicyId: policy.id };
495
+ const { status: derivedStatus } = deriveClaimStatus({ claim: claimObj as Record<string, unknown>, evidence: [], events: claimEvents as Record<string, unknown>[], policies: [policy] as Record<string, unknown>[] });
496
+ claims.push({ ...claimObj, status: derivedStatus });
497
+ }
498
+ }
499
+
500
+ return {
501
+ schemaVersion: 3,
502
+ source: `flow-agents/workflow-sidecar;statusFunctionVersion=${statusFunctionVersion}`,
503
+ claims,
504
+ evidence: evidenceItems,
505
+ policies: [...policies.values()],
506
+ events,
507
+ };
508
+ }
509
+
510
+ /**
511
+ * Fail-open wrapper: builds (via Surface), validates, and writes a trust.bundle.
512
+ * Accepts raw check/criterion/critique inputs directly (ADR 0010 Phase 4a).
513
+ * trust.bundle is written as the PRIMARY artifact; bespoke sidecars are the
514
+ * caller's responsibility to emit as back-compat projections AFTER this call.
515
+ * ANY error is caught and logged to stderr — this function NEVER throws and
516
+ * NEVER affects the exit code of its caller.
517
+ * Returns { written: false } if Surface is unavailable (fail-open; does NOT
518
+ * fall back to hand-rolled status derivation).
519
+ * @param checks Normalized check objects (same as buildTrustBundle)
520
+ * @param criteria Acceptance criteria objects (same as buildTrustBundle)
521
+ * @param critiques Critique objects (same as buildTrustBundle)
522
+ */
523
+ export async function writeTrustBundle(dir: string, slug: string, timestamp: string, checks: AnyObj[], criteria: AnyObj[], critiques: AnyObj[]): Promise<{ written: boolean; errors: string[] }> {
524
+ try {
525
+ // Fold the deterministic capture log (PostToolUse evidence-capture) into the
526
+ // bundle so capture is authoritative over claimed status. Best-effort read.
527
+ let commandLog: AnyObj[] = [];
528
+ try {
529
+ const raw = fs.readFileSync(path.join(dir, "command-log.jsonl"), "utf8");
530
+ commandLog = raw.split("\n").map((l) => l.trim()).filter(Boolean).map((l) => { try { return JSON.parse(l) as AnyObj; } catch { return null; } }).filter((x): x is AnyObj => x !== null);
531
+ } catch { /* no capture log — fine */ }
532
+ // ADR 0016 Abstraction A (P-d): pass the .flow-agents dir ONLY when current.json
533
+ // points to this session (scoped active-flow guard). If current.json.artifact_dir
534
+ // resolves to a different session, pass null — no active-flow claim mapping for this bundle.
535
+ const _flowAgentsDir = path.dirname(dir);
536
+ let _scopedFlowAgentsDir: string | undefined = undefined;
537
+ try {
538
+ const _currentRaw = JSON.parse(fs.readFileSync(path.join(_flowAgentsDir, "current.json"), "utf8")) as Record<string, unknown>;
539
+ const _artDir = typeof _currentRaw["artifact_dir"] === "string" ? _currentRaw["artifact_dir"] : null;
540
+ if (_artDir && path.resolve(_flowAgentsDir, _artDir) === path.resolve(dir)) {
541
+ _scopedFlowAgentsDir = _flowAgentsDir;
542
+ }
543
+ } catch { /* current.json absent or unreadable — no scoping */ }
544
+ const bundle = await buildTrustBundle(slug, timestamp, checks, criteria, critiques, commandLog, _scopedFlowAgentsDir);
545
+ if (!bundle) return { written: false, errors: [] }; // Surface unavailable — fail-open, skip write
546
+ const result = await validateTrustBundle(bundle);
547
+ if (result.available && !result.valid) {
548
+ process.stderr.write(`[trust-bundle] schema validation failed: ${result.errors.join("; ")}\n`);
549
+ return { written: false, errors: result.errors };
550
+ }
551
+ writeJson(path.join(dir, "trust.bundle"), bundle);
552
+ return { written: true, errors: [] };
553
+ } catch (err) {
554
+ const message = err instanceof Error ? err.message : String(err);
555
+ process.stderr.write(`[trust-bundle] write failed: ${message}\n`);
556
+ return { written: false, errors: [message] };
557
+ }
81
558
  }
82
559
 
560
+ // Phase 4c safety: the trust.bundle is the ONLY store (bespoke sidecars retired), so a
561
+ // fail-open write = SILENT DATA LOSS. Data-persisting writers must fail loudly when the
562
+ // bundle was not written (Surface unavailable, validation, or I/O) instead of exiting 0
563
+ // and dropping the record. (Was masked as a "flaky" concurrent-critique test.)
564
+ function assertBundleWritten(result: { written: boolean; errors: string[] }): void {
565
+ if (result.written) return;
566
+ const reason = result.errors.length
567
+ ? result.errors.join("; ")
568
+ : "@kontourai/surface is unavailable — it is REQUIRED to persist the trust.bundle (bundle-only workspace, ADR 0010 Phase 4c). Install it (>= 1.2) and retry.";
569
+ die(`trust.bundle was NOT written — the record was not persisted: ${reason}`);
570
+ }
571
+ // ─────────────────────────────────────────────────────────────────────────────
572
+
83
573
  function safeRepoIdentifier(value: string): string {
84
574
  const trimmed = value.trim().replace(/\.git$/, "");
85
575
  if (!trimmed || trimmed.length > 120) return "";
@@ -191,9 +681,9 @@ function lockAcquisitionFailureMessage(command: string, lockDir: string, error:
191
681
  ].join(" ");
192
682
  }
193
683
 
194
- async function withLock<T>(dir: string, create: boolean, command: string, body: () => T): Promise<T> {
684
+ async function withLock<T>(dir: string, create: boolean, command: string, body: () => T | Promise<T>): Promise<T> {
195
685
  if (create) fs.mkdirSync(dir, { recursive: true });
196
- if (!fs.existsSync(dir)) return body();
686
+ if (!fs.existsSync(dir)) return await body();
197
687
  const lockDir = path.join(dir, ".workflow-sidecar.lockdir");
198
688
  const staleMs = Number(process.env.FLOW_AGENTS_WORKFLOW_SIDECAR_STALE_LOCK_MS ?? 5 * 60 * 1000);
199
689
  const deadline = Date.now() + 30000;
@@ -221,7 +711,7 @@ async function withLock<T>(dir: string, create: boolean, command: string, body:
221
711
  try {
222
712
  const delay = process.env.FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY;
223
713
  if (delay) await new Promise((resolve) => setTimeout(resolve, Number(delay) * 1000));
224
- return body();
714
+ return await body();
225
715
  } finally {
226
716
  fs.rmSync(lockDir, { recursive: true, force: true });
227
717
  }
@@ -278,7 +768,57 @@ function validateAgentId(agent: string): string {
278
768
  return agent;
279
769
  }
280
770
 
281
- function writeCurrent(root: string, dir: string, timestamp: string, owner: string, source: string): void {
771
+ /**
772
+ * Find the repository root by walking upward from a starting directory to locate
773
+ * the nearest ancestor containing a kits/ subdirectory. Mirrors flow-resolver.ts
774
+ * findRepoRoot, but callable from workflow-sidecar.ts without re-importing the
775
+ * internal helper.
776
+ *
777
+ * ADR 0016 Abstraction A (P-d): used by advance-state and ensure-session to
778
+ * derive repoRoot for resolvePhaseMap calls.
779
+ */
780
+ function findRepoRootFromDir(startDir: string): string {
781
+ let dir = startDir;
782
+ for (let i = 0; i < 16; i++) {
783
+ if (fs.existsSync(path.join(dir, "kits"))) return dir;
784
+ const parent = path.dirname(dir);
785
+ if (parent === dir) break;
786
+ dir = parent;
787
+ }
788
+ return process.cwd();
789
+ }
790
+
791
+ /**
792
+ * Resolve the first step id from a FlowDefinition's steps[] list.
793
+ * Returns null when the flow cannot be loaded or has no steps.
794
+ * Used by ensure-session to default active_step_id when --flow-id is
795
+ * provided without --step-id (Q2 decision, P-d Increment 1).
796
+ */
797
+ function resolveFirstStep(flowId: string, repoRoot: string): string | null {
798
+ if (!flowId) return null;
799
+ const dotIdx = flowId.indexOf(".");
800
+ if (dotIdx < 1) return null;
801
+ const kitId = flowId.slice(0, dotIdx);
802
+ const flowName = flowId.slice(dotIdx + 1);
803
+ if (!kitId || !flowName) return null;
804
+ // Use resolveFlowFilePath for SLUG_RE validation + path-containment check — the same
805
+ // defense used by resolveFlowStep and resolvePhaseMap (single implementation, DRY).
806
+ // Returns null for any traversal attempt (e.g. flowName="../../secret") so the
807
+ // caller gets a clean null return matching the existing null-contract.
808
+ const flowFilePath = resolveFlowFilePath(kitId, flowName, flowId, repoRoot);
809
+ if (!flowFilePath) return null;
810
+ try {
811
+ const raw = fs.readFileSync(flowFilePath, "utf8");
812
+ const flowDef = JSON.parse(raw) as { steps?: Array<{ id: string }> };
813
+ if (!flowDef || !Array.isArray(flowDef.steps) || flowDef.steps.length === 0) return null;
814
+ const first = flowDef.steps[0];
815
+ return (first && typeof first.id === "string" && first.id !== "done") ? first.id : null;
816
+ } catch {
817
+ return null;
818
+ }
819
+ }
820
+
821
+ function writeCurrent(root: string, dir: string, timestamp: string, owner: string, source: string, flowId?: string, stepId?: string): void {
282
822
  writeJson(path.join(root, "current.json"), {
283
823
  schema_version: "1.0",
284
824
  active_slug: path.basename(dir),
@@ -287,6 +827,11 @@ function writeCurrent(root: string, dir: string, timestamp: string, owner: strin
287
827
  owner,
288
828
  source,
289
829
  active_agents: [],
830
+ // ADR 0016 Abstraction A (P-a): optional FlowDefinition routing keys for the producer
831
+ // and enforcer. Both fields are optional and backward-compatible — sessions without a
832
+ // FlowDefinition omit them and fall through to the workflow.* claim type path.
833
+ ...(flowId ? { active_flow_id: flowId } : {}),
834
+ ...(stepId ? { active_step_id: stepId } : {}),
290
835
  });
291
836
  }
292
837
  function loadCurrent(root: string): AnyObj | null {
@@ -329,7 +874,7 @@ function initSidecars(dir: string, slug: string, sourceRequest: string, summary:
329
874
 
330
875
  function ensureSession(p: ReturnType<typeof parseArgs>): number {
331
876
  const root = path.resolve(opt(p, "artifact-root", ".flow-agents"));
332
- const slug = opt(p, "task-slug") || die("--task-slug is required");
877
+ const slug = opt(p, "task-slug") || (opt(p, "work-item") ? workItemSlug(opt(p, "work-item")) : die("--task-slug is required (or pass --work-item to derive it)"));
333
878
  const dir = sessionDirFor(root, slug);
334
879
  fs.mkdirSync(dir, { recursive: true });
335
880
  const timestamp = opt(p, "timestamp", now());
@@ -341,7 +886,21 @@ function ensureSession(p: ReturnType<typeof parseArgs>): number {
341
886
  if (!fs.existsSync(path.join(dir, "state.json")) || !fs.existsSync(path.join(dir, "acceptance.json")) || !fs.existsSync(path.join(dir, "handoff.json"))) {
342
887
  initSidecars(dir, slug, opt(p, "source-request"), opt(p, "summary"), opt(p, "next-action", "Continue."), timestamp, md);
343
888
  }
344
- writeCurrent(root, dir, timestamp, "workflow-sidecar", "ensure-session");
889
+ // ADR 0016 Abstraction A (P-a): optional --flow-id / --step-id flags persist FlowDefinition
890
+ // routing keys into current.json for the producer (P-b) and enforcer (P-c) to consume.
891
+ // When absent, behavior is unchanged — the workflow.* claim type path is used as before.
892
+ // P-d Increment 1 (Q2 decision): when --flow-id is given without --step-id, default
893
+ // active_step_id to the FIRST step in the FlowDefinition's steps[] list. This ensures
894
+ // ensure-session --flow-id builder.build produces a FlowDefinition-driven session even
895
+ // before the first advance-state call.
896
+ const flowId = opt(p, "flow-id");
897
+ let stepId = opt(p, "step-id");
898
+ if (flowId && !stepId) {
899
+ const repoRoot = findRepoRootFromDir(dir);
900
+ const firstStep = resolveFirstStep(flowId, repoRoot);
901
+ if (firstStep) stepId = firstStep;
902
+ }
903
+ writeCurrent(root, dir, timestamp, "workflow-sidecar", "ensure-session", flowId || undefined, stepId || undefined);
345
904
  console.log(dir);
346
905
  return 0;
347
906
  }
@@ -376,6 +935,7 @@ function initPlan(p: ReturnType<typeof parseArgs>): number {
376
935
  const dir = artifactDirFrom(artifact);
377
936
  const slug = taskSlugFor(dir, opt(p, "task-slug"));
378
937
  initSidecars(dir, slug, opt(p, "source-request"), opt(p, "summary"), opt(p, "next-action"), opt(p, "timestamp", now()), read(artifact));
938
+ livenessLifecycle(dir, slug, "claim", opt(p, "timestamp", now()));
379
939
  return 0;
380
940
  }
381
941
 
@@ -428,24 +988,29 @@ export function normalizeCheck(raw: AnyObj): AnyObj {
428
988
  }
429
989
  function normalizeSurfaceRefs(refs: any): AnyObj[] {
430
990
  if (!Array.isArray(refs)) die("surface_trust_refs must be an array");
431
- const hachureValidate = getHachureValidator();
991
+ // Use the cached @kontourai/surface module for advisory inline validation of referenced
992
+ // trust.bundle files. Fail-open when surface is not yet loaded (surface loads on first
993
+ // bundle write via tryLoadSurface; normalizeSurfaceRefs may run before that).
994
+ const surfaceValidateFn = _surfaceModule?.validateTrustBundle ?? null;
432
995
  return refs.map((ref) => {
433
996
  const keys = JSON.stringify(ref).match(/"([^"]+)":/g) ?? [];
434
997
  for (const key of keys.map((k) => k.slice(1, -2))) if (key.toLowerCase().includes("veritas")) die(`unsupported field in Surface trust ref: ${key}`);
435
998
  const out = { ...ref };
436
999
  // trust.bundle is the canonical Hachure-aligned artifact kind; TrustReport/Trust Snapshot are legacy aliases
437
1000
  if (!["trust.bundle", "TrustReport", "Trust Snapshot"].includes(out.artifact_kind)) die("artifact_kind must be one of: trust.bundle, TrustReport, Trust Snapshot");
438
- // When hachure is installed, validate the referenced trust artifact if it is a local file
439
- if (hachureValidate && out.artifact_ref && typeof out.artifact_ref === "string" && fs.existsSync(out.artifact_ref)) {
1001
+ // When surface is loaded, validate the referenced trust artifact if it is a local file.
1002
+ // Advisory: surface's throw-based validator wraps into a fail-loud error on schema failure.
1003
+ if (surfaceValidateFn && out.artifact_ref && typeof out.artifact_ref === "string" && fs.existsSync(out.artifact_ref)) {
440
1004
  try {
441
1005
  const bundle = JSON.parse(fs.readFileSync(out.artifact_ref, "utf8"));
442
- const result = hachureValidate(bundle);
443
- if (!result.valid) {
444
- const errorSummary = result.errors.slice(0, 3).join("; ");
445
- die(`trust.bundle artifact at ${out.artifact_ref} failed Hachure schema validation: ${errorSummary}`);
446
- }
1006
+ surfaceValidateFn(bundle);
447
1007
  } catch (err) {
448
- if (err instanceof Error && err.message.includes("failed Hachure schema validation")) throw err;
1008
+ if (err instanceof Error) {
1009
+ // Re-throw schema validation failures (surface throws on invalid); swallow read/parse errors.
1010
+ const msg = err.message;
1011
+ const isSchemaError = !msg.startsWith("ENOENT") && !msg.startsWith("SyntaxError") && !msg.toLowerCase().startsWith("unexpected");
1012
+ if (isSchemaError) die(`trust.bundle artifact at ${out.artifact_ref} failed schema validation: ${msg}`);
1013
+ }
449
1014
  // File read or parse errors are not re-thrown: the artifact_ref validation path is advisory
450
1015
  }
451
1016
  }
@@ -481,15 +1046,7 @@ function surfaceCheckFromArtifact(file: string, index: number): AnyObj {
481
1046
  }
482
1047
  return { id: `surface-trust-${index + 1}`, kind: "policy", status: ref.status, summary: ref.summary, surface_trust_refs: [ref] };
483
1048
  }
484
- function updateAcceptance(dir: string, verdict: string): void {
485
- const file = path.join(dir, "acceptance.json");
486
- if (!fs.existsSync(file)) return;
487
- const data = loadJson(file);
488
- const status = verdict === "pass" ? "pass" : verdict === "fail" ? "fail" : "not_verified";
489
- if (Array.isArray(data.criteria)) data.criteria = data.criteria.map((c: AnyObj) => ({ ...c, status }));
490
- data.goal_fit = { ...(data.goal_fit ?? {}), status, summary: verdict === "pass" ? "Evidence passed." : "Evidence requires follow-up." };
491
- writeJson(file, data);
492
- }
1049
+
493
1050
  function validateAcceptanceEvidenceRefs(dir: string): void {
494
1051
  const file = path.join(dir, "acceptance.json");
495
1052
  if (!fs.existsSync(file)) return;
@@ -502,7 +1059,104 @@ function validateAcceptanceEvidenceRefs(dir: string): void {
502
1059
  export function writeState(dir: string, slug: string, status: string, phase: string, timestamp: string, summary: string, next = "continue"): void {
503
1060
  writeJson(path.join(dir, "state.json"), { ...loadJson(path.join(dir, "state.json")), ...sidecarBase(slug), status, phase, updated_at: timestamp, artifact_paths: relArtifacts(dir), next_action: { status: next, summary } });
504
1061
  }
505
- function recordEvidence(p: ReturnType<typeof parseArgs>): number {
1062
+ // ─── Phase 4c: bundle-only helpers ───────────────────────────────────────────
1063
+ // After 4c, evidence.json and critique.json are no longer written.
1064
+ // Extract checks and critiques from the existing trust.bundle for callers that
1065
+ // need to rebuild the bundle (e.g. record-critique, record-learning).
1066
+
1067
+ // ADR 0016 Abstraction A (Step 0 Q3 carry-forward): build the set of declared
1068
+ // claimTypes from the active flow step for the session at `dir`. When no active
1069
+ // flow is present (workflow.* sessions), returns an empty set so every existing
1070
+ // predicate is unchanged. When a FlowDefinition-driven session (builder.build)
1071
+ // is active, the set contains the kit-typed claimTypes (e.g. "builder.verify.tests",
1072
+ // "builder.verify.policy-compliance") so round-trip helpers broaden their filters
1073
+ // to include declared claims alongside the legacy workflow.* ones.
1074
+ //
1075
+ // Safety guard: current.json in the .flow-agents dir records the CURRENTLY ACTIVE
1076
+ // session via artifact_dir. If current.json points to a different session than `dir`
1077
+ // (e.g. another session was the last to call advance-state --flow-definition), we
1078
+ // return an empty set so declared-type predicates are NOT applied to the wrong session.
1079
+ // This prevents a cross-session active_flow_id from broadening claim filters for
1080
+ // unrelated sessions (which would cause spurious evidence/critique check behavior).
1081
+ function declaredClaimTypesFor(dir: string): Set<string> {
1082
+ const flowAgentsDir = path.dirname(dir);
1083
+ // Verify that current.json points to `dir` before reading active flow step.
1084
+ // If it points to a different session, return empty set (zero behavior change).
1085
+ const currentFile = path.join(flowAgentsDir, "current.json");
1086
+ try {
1087
+ const current = JSON.parse(fs.readFileSync(currentFile, "utf8")) as Record<string, unknown>;
1088
+ const artDir = typeof current["artifact_dir"] === "string" ? current["artifact_dir"] : null;
1089
+ if (!artDir) return new Set<string>();
1090
+ const resolvedCurrent = path.resolve(flowAgentsDir, artDir);
1091
+ if (path.resolve(dir) !== resolvedCurrent) return new Set<string>();
1092
+ } catch {
1093
+ return new Set<string>();
1094
+ }
1095
+ const activeStep = resolveActiveFlowStep(flowAgentsDir);
1096
+ if (!activeStep || activeStep.gateExpects.length === 0) return new Set<string>();
1097
+ return new Set<string>(activeStep.gateExpects.map((e) => e.bundle_claim.claimType));
1098
+ }
1099
+
1100
+ function checksFromBundle(dir: string, declaredClaimTypes: Set<string> = new Set()): AnyObj[] {
1101
+ const bundle = loadJson(path.join(dir, "trust.bundle"));
1102
+ if (!Array.isArray(bundle.evidence)) return [];
1103
+ const allClaims: AnyObj[] = Array.isArray(bundle.claims) ? bundle.claims : [];
1104
+ const claimById = new Map<string, AnyObj>();
1105
+ for (const c of allClaims) if (c && c.id) claimById.set(c.id, c);
1106
+ const seen = new Set<string>();
1107
+ const checks: AnyObj[] = [];
1108
+ for (const ev of bundle.evidence) {
1109
+ if (!ev || !ev.claimId) continue;
1110
+ const claim = claimById.get(ev.claimId);
1111
+ if (!claim) continue;
1112
+ const ct = String(claim.claimType || "");
1113
+ // ADR 0016 Step 0: broaden to include declared kit-typed claims alongside workflow.check.*
1114
+ if (!ct.startsWith("workflow.check.") && !declaredClaimTypes.has(ct)) continue;
1115
+ if (seen.has(ev.claimId)) continue;
1116
+ seen.add(ev.claimId);
1117
+ const kind = ct.startsWith("workflow.check.") ? (ct.replace("workflow.check.", "") || "external") : (ct.split(".").pop() || "external");
1118
+ const status = claim.value ?? "not_verified";
1119
+ const check: AnyObj = { id: String(claim.subjectId || "").split("/").pop() || ev.claimId, kind, status, summary: claim.fieldOrBehavior || "" };
1120
+ if (ev.execution && typeof ev.execution.label === "string") check.command = ev.execution.label;
1121
+ if (ev.evidenceType) check.evidenceType = ev.evidenceType;
1122
+ checks.push(check);
1123
+ }
1124
+ // Also include check claims that have no evidence item (surface_trust_refs style)
1125
+ for (const claim of allClaims) {
1126
+ if (!claim) continue;
1127
+ const ct = String(claim.claimType || "");
1128
+ // ADR 0016 Step 0: broaden to include declared kit-typed claims alongside workflow.check.*
1129
+ if (!ct.startsWith("workflow.check.") && !declaredClaimTypes.has(ct)) continue;
1130
+ if (seen.has(claim.id)) continue;
1131
+ seen.add(claim.id);
1132
+ const kind = ct.startsWith("workflow.check.") ? (ct.replace("workflow.check.", "") || "external") : (ct.split(".").pop() || "external");
1133
+ checks.push({ id: String(claim.subjectId || "").split("/").pop() || claim.id, kind, status: claim.value ?? "not_verified", summary: claim.fieldOrBehavior || "" });
1134
+ }
1135
+ return checks;
1136
+ }
1137
+ function critiquesFromBundle(dir: string, declaredClaimTypes: Set<string> = new Set()): AnyObj[] {
1138
+ const bundle = loadJson(path.join(dir, "trust.bundle"));
1139
+ if (!Array.isArray(bundle.claims)) return [];
1140
+ // ADR 0016 Step 0: broaden to include declared kit-typed critique claims alongside workflow.critique.review.
1141
+ // P-d: exclude claims that have evidence items (evidence = check claims, not critique claims).
1142
+ // This prevents check-type declared claims (e.g. builder.verify.tests) from being read back
1143
+ // as critiques when declaredClaimTypes includes all gate expects[] types.
1144
+ const evidenceClaimIds = new Set<string>(
1145
+ Array.isArray(bundle.evidence) ? bundle.evidence.map((e: AnyObj) => e?.claimId).filter((id: unknown): id is string => typeof id === "string") : []
1146
+ );
1147
+ const critiqueClaims = bundle.claims.filter((c: AnyObj) => c && (c.claimType === "workflow.critique.review" || declaredClaimTypes.has(c.claimType)) && !evidenceClaimIds.has(c.id));
1148
+ return critiqueClaims.map((c: AnyObj) => ({
1149
+ id: String(c.subjectId || "").split("/").pop() || c.id,
1150
+ verdict: c.value ?? "not_verified",
1151
+ summary: c.fieldOrBehavior || "",
1152
+ findings: [],
1153
+ reviewer: "tool-code-reviewer",
1154
+ reviewed_at: c.updatedAt || c.createdAt || now(),
1155
+ artifact_refs: [],
1156
+ }));
1157
+ }
1158
+ // ─────────────────────────────────────────────────────────────────────────────
1159
+ async function recordEvidence(p: ReturnType<typeof parseArgs>): Promise<number> {
506
1160
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
507
1161
  const verdict = opt(p, "verdict") || die("--verdict is required");
508
1162
  if (!verdicts.has(verdict)) die("verdict must be one of: pass, partial, fail, not_verified");
@@ -510,11 +1164,15 @@ function recordEvidence(p: ReturnType<typeof parseArgs>): number {
510
1164
  const checks = [...opts(p, "check-json").map((v) => normalizeCheck(parseJson(v, "--check-json"))), ...opts(p, "surface-trust-json").map(surfaceCheckFromArtifact)];
511
1165
  if (!checks.length && opts(p, "surface-trust-json").length === 0) die("record-evidence requires at least one --check-json or --surface-trust-json");
512
1166
  validateAcceptanceEvidenceRefs(dir);
513
- const payload = { ...sidecarBase(slug), verdict, checks, not_verified_gaps: opts(p, "gap") };
514
- writeJson(path.join(dir, "evidence.json"), payload);
515
- updateAcceptance(dir, verdict);
1167
+ // Phase 4c: bundle is the sole verification artifact stop writing evidence.json and acceptance.json update.
1168
+ const ts = opt(p, "timestamp", now());
1169
+ const _existingAcceptance = loadJson(path.join(dir, "acceptance.json"));
1170
+ const _existingCriteria: AnyObj[] = Array.isArray(_existingAcceptance.criteria) ? _existingAcceptance.criteria : [];
1171
+ const _criteriaStatus = verdict === "pass" ? "pass" : verdict === "fail" ? "fail" : "not_verified";
1172
+ const _criteriaForBundle: AnyObj[] = _existingCriteria.map((c: AnyObj) => ({ ...c, status: _criteriaStatus }));
1173
+ assertBundleWritten(await writeTrustBundle(dir, slug, ts, checks, _criteriaForBundle, []));
516
1174
  const stateStatus = verdict === "pass" ? "verified" : verdict === "fail" ? "failed" : "not_verified";
517
- writeState(dir, slug, stateStatus, "verification", opt(p, "timestamp", now()), "Evidence recorded.");
1175
+ writeState(dir, slug, stateStatus, "verification", ts, "Evidence recorded.");
518
1176
  return 0;
519
1177
  }
520
1178
 
@@ -523,7 +1181,94 @@ function diagnostic(dir: string, code: string, summary: string): never {
523
1181
  appendJsonl(path.join(dir, "transition-diagnostics.jsonl"), payload);
524
1182
  die(`${code}: ${summary}`);
525
1183
  }
526
- function advanceState(p: ReturnType<typeof parseArgs>): number {
1184
+
1185
+ /**
1186
+ * record-gate-claim — Generic gate-claim producer for skills (ADR 0016 P-d Increment 1).
1187
+ *
1188
+ * Allows a skill to record a claim that satisfies a SPECIFIC gate expectation at the
1189
+ * active step. The caller passes:
1190
+ * --status <pass|fail|not_verified> (required)
1191
+ * --summary <text> (required)
1192
+ * --expectation <id> (optional; auto-resolved when the gate has one entry)
1193
+ * --evidence-json <json> (optional; structured evidence refs)
1194
+ *
1195
+ * The producer emits a check of kind="external" targeting the gate expectation's declared
1196
+ * claimType + subjectType from the active step's expects[]. This populates the trust.bundle
1197
+ * with a correctly-typed claim derived by Surface, suitable for gate enforcement.
1198
+ *
1199
+ * When the gate has exactly ONE expects[] entry, --expectation is optional (auto-resolve).
1200
+ * When the gate has multiple entries, --expectation <id> is required.
1201
+ *
1202
+ * This is what Increment 2's 6 skills will call to satisfy the category (c) gates
1203
+ * (pull-work.selected, design-probe.*, pr-open.pull-request, learn.*) once producers are added.
1204
+ *
1205
+ * Error cases:
1206
+ * - No active flow/step in current.json → die with actionable message
1207
+ * - --expectation not found in expects[] → die
1208
+ * - Multiple expects[] entries and --expectation omitted → die
1209
+ * - Surface unavailable → assertBundleWritten fails loud (no silent data loss)
1210
+ */
1211
+ async function recordGateClaim(p: ReturnType<typeof parseArgs>): Promise<number> {
1212
+ const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
1213
+ const slug = taskSlugFor(dir, opt(p, "task-slug"));
1214
+ const ts = opt(p, "timestamp", now());
1215
+ const statusVal = opt(p, "status");
1216
+ if (!["pass", "fail", "not_verified"].includes(statusVal)) die("--status must be one of: pass, fail, not_verified");
1217
+ const summary = opt(p, "summary") || die("--summary is required");
1218
+ const expectationId = opt(p, "expectation");
1219
+
1220
+ // Resolve the active flow step from current.json
1221
+ const flowAgentsDir = path.dirname(dir);
1222
+ const activeStep = resolveActiveFlowStep(flowAgentsDir);
1223
+ if (!activeStep) die("record-gate-claim requires an active flow step in current.json (set via ensure-session --flow-id or advance-state --flow-definition)");
1224
+
1225
+ const expects = activeStep.gateExpects;
1226
+ if (expects.length === 0) die(`record-gate-claim: active step "${activeStep.stepId}" gate "${activeStep.gateId}" has no expects[] entries`);
1227
+
1228
+ // Resolve the target expects entry
1229
+ let targetExpectation: typeof expects[0] | undefined;
1230
+ if (expectationId) {
1231
+ targetExpectation = expects.find((e) => e.id === expectationId);
1232
+ if (!targetExpectation) die(`record-gate-claim: --expectation "${expectationId}" not found in gate "${activeStep.gateId}" expects[]. Available: ${expects.map((e) => e.id).join(", ")}`);
1233
+ } else if (expects.length === 1) {
1234
+ targetExpectation = expects[0]!;
1235
+ } else {
1236
+ die(`record-gate-claim: gate "${activeStep.gateId}" has ${expects.length} expects[] entries; --expectation <id> is required. Available: ${expects.map((e) => e.id).join(", ")}`);
1237
+ }
1238
+
1239
+ const { claimType, subjectType } = targetExpectation.bundle_claim;
1240
+
1241
+ // Build a synthetic external check that will be matched by matchExpectsEntry to produce
1242
+ // a correctly-typed claim. We use kind="external" so it routes through the non-policy,
1243
+ // non-flow-step fallback path. The subjectType on the resulting claim comes from the
1244
+ // expects[] entry via matchExpectsEntry.
1245
+ const checkId = expectationId || targetExpectation.id;
1246
+ // Build a minimal "external" check. Include _gate_claim_expectation_id so that
1247
+ // matchExpectsEntry can do an exact lookup for multi-expects[] gates (ADR 0016 P-d Increment 2).
1248
+ // normalizeCheck preserves extra underscore-prefixed fields without stripping them.
1249
+ const check: AnyObj = {
1250
+ id: `gate-claim-${checkId}`,
1251
+ kind: "external",
1252
+ status: statusVal,
1253
+ summary,
1254
+ _gate_claim_expectation_id: targetExpectation.id,
1255
+ };
1256
+
1257
+ // Include structured evidence refs if provided
1258
+ const evidenceRefs: AnyObj[] = opts(p, "evidence-ref-json").map((v) => validateEvidenceRef(parseJson(v, "--evidence-ref-json"), "--evidence-ref-json"));
1259
+
1260
+ if (evidenceRefs.length > 0) {
1261
+ check.artifact_refs = evidenceRefs;
1262
+ }
1263
+
1264
+ const checkNormalized = normalizeCheck(check);
1265
+ // Log the targeted gate expectation for transparency (goes to stderr only)
1266
+ process.stderr.write(`[record-gate-claim] targeting ${activeStep.stepId}/${activeStep.gateId}/${targetExpectation.id} → claimType=${claimType} subjectType=${subjectType}\n`);
1267
+ assertBundleWritten(await writeTrustBundle(dir, slug, ts, [checkNormalized], [], []));
1268
+ return 0;
1269
+ }
1270
+
1271
+ async function advanceState(p: ReturnType<typeof parseArgs>): Promise<number> {
527
1272
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
528
1273
  const status = opt(p, "status");
529
1274
  const phase = opt(p, "phase");
@@ -549,6 +1294,26 @@ function advanceState(p: ReturnType<typeof parseArgs>): number {
549
1294
  const timestamp = opt(p, "timestamp", now());
550
1295
  writeState(dir, slug, status, phase, timestamp, opt(p, "summary"));
551
1296
  writeJson(path.join(dir, "handoff.json"), { ...loadJson(path.join(dir, "handoff.json")), ...sidecarBase(slug), summary: opt(p, "summary"), current_state_ref: "state.json", next_steps: [opt(p, "next-action")].filter(Boolean), blockers: [], warnings: [] });
1297
+ // ADR 0016 Abstraction A (P-d, Increment 1): when --flow-definition is provided,
1298
+ // resolve the phase→step mapping from the FlowDefinition and write active_step_id
1299
+ // into current.json. This is the single setter — no skill needs to call ensure-session
1300
+ // --step-id individually. The repoRoot is derived by walking up from dir to find kits/.
1301
+ if (flow) {
1302
+ const root = path.resolve(opt(p, "artifact-root", path.dirname(dir)));
1303
+ const repoRoot = findRepoRootFromDir(dir);
1304
+ const phaseMap = resolvePhaseMap(flow, repoRoot);
1305
+ const stepId = phaseMap?.[phase] ?? undefined;
1306
+ if (stepId) {
1307
+ writeCurrent(root, dir, timestamp, "workflow-sidecar", "advance-state", flow, stepId);
1308
+ }
1309
+ }
1310
+ livenessLifecycle(dir, slug, LIVENESS_TERMINAL.has(status) ? "release" : "heartbeat", timestamp);
1311
+ // Trust checkpoint: when advancing to a terminal delivered status, seal the checkpoint.
1312
+ if (status === "delivered") {
1313
+ await sealTrustCheckpoint(dir, slug, timestamp, status, "release").catch(() => { /* best-effort; checkpoint seal must not break advance-state */ });
1314
+ // Publish delivery bundle: best-effort copy to delivery/ for CI trust-reconcile.
1315
+ await publishDelivery(dir, findRepoRootFromDir(dir)).catch(() => { /* best-effort; must not break advance-state */ });
1316
+ }
552
1317
  return 0;
553
1318
  }
554
1319
 
@@ -556,19 +1321,23 @@ export function normalizeFinding(raw: AnyObj): AnyObj {
556
1321
  if (raw.file_refs !== undefined && !Array.isArray(raw.file_refs)) die("file_refs must be an array");
557
1322
  return raw;
558
1323
  }
559
- function critiqueStatus(critiques: AnyObj[], required: boolean): string {
560
- if (!required && critiques.length === 0) return "not_required";
561
- if (critiques.some((c) => c.verdict === "fail" || (Array.isArray(c.findings) && c.findings.some((f: AnyObj) => f.status === "open")))) return "fail";
562
- return "pass";
563
- }
564
- function recordCritique(p: ReturnType<typeof parseArgs>): number {
1324
+
1325
+ async function recordCritique(p: ReturnType<typeof parseArgs>): Promise<number> {
565
1326
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
566
1327
  const slug = taskSlugFor(dir, opt(p, "task-slug"));
567
- const existing = loadJson(path.join(dir, "critique.json"), { critiques: [] });
1328
+ // Phase 4c: accumulate existing critiques from trust.bundle (critique.json no longer written).
1329
+ // Fall back to critique.json for legacy sessions that still have it on disk.
1330
+ const existingCritiqueJson = loadJson(path.join(dir, "critique.json"), { critiques: [] });
1331
+ const legacyCritiques: AnyObj[] = Array.isArray(existingCritiqueJson.critiques) ? existingCritiqueJson.critiques : [];
1332
+ const _dctCritique = declaredClaimTypesFor(dir);
1333
+ const bundleCritiques = legacyCritiques.length === 0 ? critiquesFromBundle(dir, _dctCritique) : legacyCritiques;
568
1334
  const critique = { id: opt(p, "id") || "review", reviewer: opt(p, "reviewer", "tool-code-reviewer"), reviewed_at: opt(p, "timestamp", now()), verdict: opt(p, "verdict", "pass"), summary: opt(p, "summary"), artifact_refs: opts(p, "artifact-ref"), findings: opts(p, "finding-json").map((v) => normalizeFinding(parseJson(v, "--finding-json"))) };
569
- const critiques = [...(Array.isArray(existing.critiques) ? existing.critiques : []), critique];
1335
+ const critiques = [...bundleCritiques, critique];
570
1336
  if (critique.verdict === "pass" && critique.findings.some((f: AnyObj) => f.status === "open")) die("required critique must pass");
571
- writeJson(path.join(dir, "critique.json"), { ...sidecarBase(slug), status: critiqueStatus(critiques, true), required: true, updated_at: critique.reviewed_at, critiques });
1337
+ // Phase 4c: build bundle from raw inputs; read checks from trust.bundle (evidence.json no longer written).
1338
+ const _critiqueEvChecks: AnyObj[] = checksFromBundle(dir, _dctCritique);
1339
+ const _critiqueAccCriteria: AnyObj[] = Array.isArray(loadJson(path.join(dir, "acceptance.json")).criteria) ? loadJson(path.join(dir, "acceptance.json")).criteria : [];
1340
+ assertBundleWritten(await writeTrustBundle(dir, slug, critique.reviewed_at, _critiqueEvChecks, _critiqueAccCriteria, critiques));
572
1341
  return 0;
573
1342
  }
574
1343
  function frontmatter(text: string, key: string): string {
@@ -577,7 +1346,7 @@ function frontmatter(text: string, key: string): string {
577
1346
  if (end < 0) return "";
578
1347
  return new RegExp(`^${key}:\\s*(.+)$`, "m").exec(text.slice(0, end))?.[1]?.trim() ?? "";
579
1348
  }
580
- function importCritique(p: ReturnType<typeof parseArgs>): number {
1349
+ async function importCritique(p: ReturnType<typeof parseArgs>): Promise<number> {
581
1350
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
582
1351
  const review = p.positional[1] || die("review artifact is required");
583
1352
  const text = read(review);
@@ -592,11 +1361,11 @@ function importCritique(p: ReturnType<typeof parseArgs>): number {
592
1361
  findings.push({ id: slugify(title, `finding-${findings.length + 1}`), severity: (m.groups?.severity ?? "info").toLowerCase(), status: opt(p, "finding-status", verdict === "pass" ? "fixed" : "open"), description: title, file_refs: [m.groups?.target ?? review] });
593
1362
  }
594
1363
  const parsed = { ...p, positional: [dir], opts: { ...p.opts, id: [slugify(path.basename(review).replace(/\.md$/, ""), "review")], reviewer: ["tool-code-reviewer"], verdict: [verdict], summary: [`Imported critique from ${path.basename(review)}`], "finding-json": findings.map((f) => JSON.stringify(f)) }, flags: p.flags };
595
- const result = recordCritique(parsed);
1364
+ const result = await recordCritique(parsed);
596
1365
  if (verdict !== "pass") die("required critique must pass");
597
1366
  return result;
598
1367
  }
599
- function recordRelease(p: ReturnType<typeof parseArgs>): number {
1368
+ async function recordRelease(p: ReturnType<typeof parseArgs>): Promise<number> {
600
1369
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
601
1370
  const slug = taskSlugFor(dir, opt(p, "task-slug"));
602
1371
  const decision = opt(p, "decision");
@@ -607,8 +1376,285 @@ function recordRelease(p: ReturnType<typeof parseArgs>): number {
607
1376
  const stateSummary = opt(p, "summary").trim() || `Release readiness recorded for ${decision}.`;
608
1377
  writeJson(path.join(dir, "release.json"), payload);
609
1378
  writeState(dir, slug, "delivered", "release", payload.updated_at, stateSummary);
1379
+ // Trust checkpoint: seal at the "delivered" moment (the natural terminal mark for record-release).
1380
+ await sealTrustCheckpoint(dir, slug, payload.updated_at, "delivered", "release").catch(() => { /* best-effort; checkpoint seal must not break record-release */ });
1381
+ // Publish delivery bundle: best-effort copy to delivery/ for CI trust-reconcile.
1382
+ await publishDelivery(dir, findRepoRootFromDir(dir)).catch(() => { /* best-effort; must not break record-release */ });
1383
+ return 0;
1384
+ }
1385
+
1386
+ // ─── Trust Checkpoint (Increment A) ──────────────────────────────────────────
1387
+ // Per-run frozen snapshot of verified trust state at completion. Written to
1388
+ // trust.checkpoint.json alongside the other workflow sidecars.
1389
+ // Surface owns the DerivationCheckpoint shape; flow-agents wraps it in an
1390
+ // ENVELOPE that adds per-run context surface does not carry.
1391
+ //
1392
+ // Envelope shape:
1393
+ // {
1394
+ // schema_version: "1.0",
1395
+ // slug: string,
1396
+ // work_item: string | null,
1397
+ // status: string,
1398
+ // phase: string,
1399
+ // sealed_at: ISO-8601,
1400
+ // commit_sha: string | null,
1401
+ // checkpoint: DerivationCheckpoint ← surface owns this
1402
+ // }
1403
+ //
1404
+ // Idempotent: re-running advance-state / record-release to the same terminal
1405
+ // status overwrites with the latest snapshot.
1406
+ // Fail-open: if no trust.bundle exists, or Surface is unavailable, the write
1407
+ // is skipped gracefully (no error surfaced to the caller).
1408
+
1409
+ /** Derive the current git HEAD sha — null if unavailable (not in a repo, git absent). */
1410
+ function resolveCommitSha(): string | null {
1411
+ try {
1412
+ return execFileSync("git", ["rev-parse", "HEAD"], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }).trim() || null;
1413
+ } catch {
1414
+ return null;
1415
+ }
1416
+ }
1417
+
1418
+ /**
1419
+ * Build and write trust.checkpoint.json for a completed run.
1420
+ * Skips silently when:
1421
+ * - trust.bundle is absent (no evidence recorded yet)
1422
+ * - Surface is unavailable (checkpointFromReport not found)
1423
+ * The caller wraps this in .catch() so it never breaks the parent command.
1424
+ *
1425
+ * Increment B1 — checkpoint signing at the release boundary:
1426
+ * After the checkpoint is written, attempts Sigstore keyless signing (OIDC).
1427
+ * - CI/OIDC available: writes trust.checkpoint.sig.json (cosign-verifiable DSSE envelope)
1428
+ * and writes attestation:{status:"signed",...} to trust.checkpoint.attestation.json.
1429
+ * - Local (no OIDC): writes trust.checkpoint.intoto.json (unsigned in-toto statement)
1430
+ * and writes attestation:{status:"unsigned",...} to trust.checkpoint.attestation.json.
1431
+ * Signing is ALWAYS fail-open — a signing failure never breaks the seal.
1432
+ */
1433
+ export async function sealTrustCheckpoint(dir: string, slug: string, sealedAt: string, status: string, phase: string): Promise<void> {
1434
+ const bundlePath = path.join(dir, "trust.bundle");
1435
+ if (!fs.existsSync(bundlePath)) return; // no bundle — skip gracefully
1436
+ const surface = await tryLoadSurface();
1437
+ if (!surface || typeof surface.checkpointFromReport !== "function" || typeof surface.buildTrustReport !== "function") return; // Surface unavailable
1438
+
1439
+ const bundle = JSON.parse(fs.readFileSync(bundlePath, "utf8"));
1440
+ const report = surface.buildTrustReport(bundle as Record<string, unknown>);
1441
+ const checkpoint = surface.checkpointFromReport(report);
1442
+
1443
+ // Derive work_item from state.json if present (best-effort)
1444
+ let workItem: string | null = null;
1445
+ try {
1446
+ const stateRaw = loadJson(path.join(dir, "state.json"));
1447
+ if (typeof stateRaw.work_item === "string") workItem = stateRaw.work_item;
1448
+ } catch { /* ignored */ }
1449
+
1450
+ const checkpointPath = path.join(dir, "trust.checkpoint.json");
1451
+ const envelope: AnyObj = {
1452
+ schema_version: "1.0",
1453
+ slug,
1454
+ work_item: workItem,
1455
+ status,
1456
+ phase,
1457
+ sealed_at: sealedAt,
1458
+ commit_sha: resolveCommitSha(),
1459
+ checkpoint,
1460
+ };
1461
+ writeJson(checkpointPath, envelope);
1462
+
1463
+ // ─── Increment B1: sign the checkpoint at the release boundary ───────────────
1464
+ // Additive: if surface lacks in-toto/sigstore primitives, skip silently.
1465
+ // The .catch() at the call site already guards the parent command; this inner
1466
+ // catch is defense-in-depth so signing never propagates an error upward.
1467
+ await signCheckpointAttestation(dir, surface, bundle, checkpointPath).catch((err) => {
1468
+ process.stderr.write(`[checkpoint-signing] signing skipped due to error: ${err instanceof Error ? err.message : String(err)}\n`);
1469
+ });
1470
+ }
1471
+
1472
+ /**
1473
+ * Increment B1 — Sign the trust checkpoint with in-toto/Sigstore.
1474
+ *
1475
+ * Called from sealTrustCheckpoint AFTER trust.checkpoint.json is written.
1476
+ * Computes the sha256 digest of the checkpoint file, builds an in-toto Statement
1477
+ * (predicate = trust bundle), and attempts Sigstore keyless signing.
1478
+ *
1479
+ * - Signed (CI/OIDC): writes trust.checkpoint.sig.json (DSSE envelope, cosign-verifiable).
1480
+ * - Unsigned (local): writes trust.checkpoint.intoto.json (unsigned statement).
1481
+ * - Always writes: trust.checkpoint.attestation.json with attestation:{status,path,...}.
1482
+ * trust.checkpoint.json is NOT modified after its digest is computed.
1483
+ *
1484
+ * NEVER throws — all errors are caught and surfaced as stderr warnings.
1485
+ * Skips silently when Surface's toInTotoStatement / signStatementWithSigstore are absent.
1486
+ *
1487
+ * @param dir Session artifact directory.
1488
+ * @param surface Loaded Surface module (may or may not have in-toto/sigstore exports).
1489
+ * @param bundle Parsed trust.bundle (becomes the in-toto predicate).
1490
+ * @param checkpointPath Absolute path to the already-written trust.checkpoint.json.
1491
+ */
1492
+ async function signCheckpointAttestation(
1493
+ dir: string,
1494
+ surface: SurfaceModule,
1495
+ bundle: AnyObj,
1496
+ checkpointPath: string,
1497
+ ): Promise<void> {
1498
+ // Guard: both primitives must be present (consumed from Surface, never reimplemented).
1499
+ if (typeof surface.toInTotoStatement !== "function" || typeof surface.signStatementWithSigstore !== "function") {
1500
+ process.stderr.write("[checkpoint-signing] Surface in-toto/sigstore primitives unavailable — skipping attestation\n");
1501
+ return;
1502
+ }
1503
+
1504
+ // Step A: compute sha256 digest of trust.checkpoint.json (the SUBJECT).
1505
+ // The checkpoint is self-evidencing — its digest is the external anchor.
1506
+ const checkpointBytes = fs.readFileSync(checkpointPath);
1507
+ const sha256hex = createHash("sha256").update(checkpointBytes).digest("hex");
1508
+
1509
+ // Step B: build the in-toto Statement.
1510
+ // subject = the checkpoint file (what we are attesting TO)
1511
+ // predicate = the trust bundle (what the checkpoint CONTAINS)
1512
+ const subjects = [{ name: "trust.checkpoint.json", digest: { sha256: sha256hex } }];
1513
+ const statement = surface.toInTotoStatement(bundle as Record<string, unknown>, { subjects });
1514
+
1515
+ // Step C: attempt Sigstore keyless signing (PRIMARY path).
1516
+ // signStatementWithSigstore returns null when no ambient OIDC credential is available
1517
+ // (local development, no ACTIONS_ID_TOKEN_REQUEST_URL). This is the expected local case.
1518
+ let signed: { envelope: { payloadType: "application/vnd.in-toto+json"; payload: string; signatures: Array<{ keyid: string; sig: string }> }; sigstoreBundle: unknown; assuranceLevel: "signed" } | null = null;
1519
+ try {
1520
+ signed = await surface.signStatementWithSigstore(statement);
1521
+ } catch (err) {
1522
+ // signStatementWithSigstore may throw on unexpected failures (network error, config error);
1523
+ // treat as fail-open: fall through to the unsigned path.
1524
+ process.stderr.write(`[checkpoint-signing] signStatementWithSigstore threw: ${err instanceof Error ? err.message : String(err)}\n`);
1525
+ signed = null;
1526
+ }
1527
+
1528
+ let attestation: AnyObj;
1529
+ if (signed) {
1530
+ // CI/OIDC path: write the cosign-verifiable DSSE envelope.
1531
+ const sigPath = path.join(dir, "trust.checkpoint.sig.json");
1532
+ writeJson(sigPath, signed.envelope);
1533
+ const keyid = signed.envelope.signatures[0]?.keyid ?? "";
1534
+ attestation = {
1535
+ status: "signed",
1536
+ path: "trust.checkpoint.sig.json",
1537
+ keyid,
1538
+ };
1539
+ process.stderr.write(`[checkpoint-signing] checkpoint signed with Sigstore — envelope written to ${sigPath}\n`);
1540
+ } else {
1541
+ // Local/unsigned path: write the unsigned in-toto statement for audit purposes.
1542
+ const unsignedPath = path.join(dir, "trust.checkpoint.intoto.json");
1543
+ writeJson(unsignedPath, statement);
1544
+ attestation = {
1545
+ status: "unsigned",
1546
+ path: "trust.checkpoint.intoto.json",
1547
+ reason: "no ambient signing identity",
1548
+ };
1549
+ process.stderr.write("[checkpoint-signing] no ambient OIDC identity — unsigned in-toto statement written (expected locally)\n");
1550
+ }
1551
+
1552
+ // Step D: write the attestation record to a SEPARATE companion file.
1553
+ // trust.checkpoint.json is NOT modified — it must remain byte-identical to what was signed.
1554
+ // The companion file carries the pointer/status; the subject-digest binding in the
1555
+ // in-toto statement ties it back to the checkpoint without breaking the digest.
1556
+ const attestationPath = path.join(dir, "trust.checkpoint.attestation.json");
1557
+ writeJson(attestationPath, attestation);
1558
+ }
1559
+
1560
+ /**
1561
+ * seal-checkpoint <dir> [--timestamp <iso>]
1562
+ *
1563
+ * Explicit seal of the trust checkpoint for the given artifact dir.
1564
+ * Equivalent to the seal that fires automatically at record-release / advance-state
1565
+ * to delivered. Useful for the deliver skill or a human to seal explicitly without
1566
+ * re-running advance-state.
1567
+ *
1568
+ * Usage: workflow-sidecar seal-checkpoint <artifactDir> [--timestamp <iso>]
1569
+ */
1570
+ async function sealCheckpoint(p: ReturnType<typeof parseArgs>): Promise<number> {
1571
+ const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
1572
+ const slug = taskSlugFor(dir, opt(p, "task-slug"));
1573
+ const timestamp = opt(p, "timestamp", now());
1574
+ const stateRaw = loadJson(path.join(dir, "state.json"));
1575
+ const status = typeof stateRaw.status === "string" ? stateRaw.status : "delivered";
1576
+ const phase = typeof stateRaw.phase === "string" ? stateRaw.phase : "release";
1577
+
1578
+ const bundlePath = path.join(dir, "trust.bundle");
1579
+ if (!fs.existsSync(bundlePath)) {
1580
+ process.stderr.write(`[seal-checkpoint] no trust.bundle at ${bundlePath} — skipping (nothing to seal)
1581
+ `);
1582
+ return 0;
1583
+ }
1584
+ await sealTrustCheckpoint(dir, slug, timestamp, status, phase);
1585
+ const checkpointPath = path.join(dir, "trust.checkpoint.json");
1586
+ if (fs.existsSync(checkpointPath)) {
1587
+ console.log(checkpointPath);
1588
+ } else {
1589
+ process.stderr.write(`[seal-checkpoint] checkpoint was not written — @kontourai/surface may be unavailable
1590
+ `);
1591
+ }
1592
+ return 0;
1593
+ }
1594
+
1595
+ // ─── Publish Delivery Bundle ──────────────────────────────────────────────────
1596
+ // Copies the session's trust.bundle (+ checkpoint companions) from the gitignored
1597
+ // session artifact dir (.flow-agents/<slug>/) to the committed delivery/ transport
1598
+ // path so the CI trust-reconcile job can reconcile it against fresh CI results.
1599
+ //
1600
+ // Fail-soft: if trust.bundle is absent (no evidence recorded yet), does nothing.
1601
+ // Idempotent: overwrites on re-delivery.
1602
+ // Called automatically from recordRelease and advanceState→delivered (best-effort).
1603
+ // Also exposed as the `publish-delivery <artifact-dir>` subcommand for explicit use.
1604
+
1605
+ /**
1606
+ * Publish the session's trust artifacts to the committed delivery/ path.
1607
+ *
1608
+ * Copies trust.bundle, trust.checkpoint.json, and (if present)
1609
+ * trust.checkpoint.intoto.json / trust.checkpoint.sig.json from the
1610
+ * session artifact dir to <repoRoot>/delivery/.
1611
+ *
1612
+ * Fail-soft: if trust.bundle is absent, returns without throwing.
1613
+ * Idempotent: overwrites on re-delivery.
1614
+ */
1615
+ export async function publishDelivery(dir: string, repoRoot: string): Promise<void> {
1616
+ const bundleSrc = path.join(dir, "trust.bundle");
1617
+ if (!fs.existsSync(bundleSrc)) return; // no bundle — skip gracefully
1618
+
1619
+ const deliveryDir = path.join(repoRoot, "delivery");
1620
+ fs.mkdirSync(deliveryDir, { recursive: true });
1621
+
1622
+ // Required: trust.bundle (the CI anchor)
1623
+ fs.copyFileSync(bundleSrc, path.join(deliveryDir, "trust.bundle"));
1624
+
1625
+ // Optional companions: checkpoint + signing artifacts
1626
+ const companions = [
1627
+ "trust.checkpoint.json",
1628
+ "trust.checkpoint.intoto.json",
1629
+ "trust.checkpoint.sig.json",
1630
+ ];
1631
+ for (const filename of companions) {
1632
+ const src = path.join(dir, filename);
1633
+ if (fs.existsSync(src)) {
1634
+ fs.copyFileSync(src, path.join(deliveryDir, filename));
1635
+ }
1636
+ }
1637
+
1638
+ process.stderr.write(`[publish-delivery] published trust.bundle and companions to ${deliveryDir}\n`);
1639
+ }
1640
+
1641
+ /**
1642
+ * publish-delivery <artifact-dir> [--repo-root <path>]
1643
+ *
1644
+ * Explicit publish of the session trust bundle to the committed delivery/ path.
1645
+ * Equivalent to the publish that fires automatically at record-release /
1646
+ * advance-state to delivered. Useful for the deliver skill or a human to
1647
+ * publish explicitly.
1648
+ *
1649
+ * Usage: workflow-sidecar publish-delivery <artifactDir> [--repo-root <path>]
1650
+ */
1651
+ async function publishDeliveryCmd(p: ReturnType<typeof parseArgs>): Promise<number> {
1652
+ const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
1653
+ const repoRoot = opt(p, "repo-root") || findRepoRootFromDir(dir);
1654
+ await publishDelivery(dir, repoRoot);
610
1655
  return 0;
611
1656
  }
1657
+
612
1658
  export function validateLearningCorrection(record: AnyObj): void {
613
1659
  const correction = record.correction;
614
1660
  if (correction === undefined) return;
@@ -647,7 +1693,7 @@ export function normalizeLearning(raw: AnyObj, timestamp: string): AnyObj {
647
1693
  validateLearningCorrection(raw);
648
1694
  return { recorded_at: timestamp, ...raw };
649
1695
  }
650
- function recordLearning(p: ReturnType<typeof parseArgs>): number {
1696
+ async function recordLearning(p: ReturnType<typeof parseArgs>): Promise<number> {
651
1697
  const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
652
1698
  const slug = taskSlugFor(dir, opt(p, "task-slug"));
653
1699
  const timestamp = opt(p, "timestamp", now());
@@ -657,16 +1703,53 @@ function recordLearning(p: ReturnType<typeof parseArgs>): number {
657
1703
  if (status === "learned" && records.some((r) => r.correction === undefined)) die("learning status learned requires every record to include correction.needed");
658
1704
  writeJson(path.join(dir, "learning.json"), { ...sidecarBase(slug), status, updated_at: timestamp, records });
659
1705
  writeState(dir, slug, "accepted", "learning", timestamp, opt(p, "summary"));
1706
+ // Phase 4c: build bundle from raw inputs; read checks/critiques from trust.bundle (bespoke sidecars no longer written).
1707
+ // ADR 0016 Step 0: pass declaredClaimTypes so declared builder.* claims survive the round-trip.
1708
+ const _dctLearning = declaredClaimTypesFor(dir);
1709
+ const _learningChecks: AnyObj[] = checksFromBundle(dir, _dctLearning);
1710
+ const _learningCriteria: AnyObj[] = Array.isArray(loadJson(path.join(dir, "acceptance.json")).criteria) ? loadJson(path.join(dir, "acceptance.json")).criteria : [];
1711
+ const _learningCritiques: AnyObj[] = critiquesFromBundle(dir, _dctLearning);
1712
+ assertBundleWritten(await writeTrustBundle(dir, slug, timestamp, _learningChecks, _learningCriteria, _learningCritiques));
660
1713
  return 0;
661
1714
  }
662
- function evidenceClean(dir: string): boolean {
1715
+ function evidenceClean(dir: string, declaredClaimTypes: Set<string> = new Set()): boolean {
1716
+ // Phase 4c: read from trust.bundle (sole verification artifact); fall back to evidence.json for legacy sessions.
1717
+ // ADR 0016 Step 0: declaredClaimTypes broadens the filter to include kit-typed check claims
1718
+ // (e.g. builder.verify.tests) in addition to workflow.check.* for FlowDefinition-driven sessions.
1719
+ const bundle = loadJson(path.join(dir, "trust.bundle"));
1720
+ if (Array.isArray(bundle.claims)) {
1721
+ const checkClaims = (bundle.claims as AnyObj[]).filter((c: AnyObj) => {
1722
+ if (!c) return false;
1723
+ const ct = String(c.claimType || "");
1724
+ return ct.startsWith("workflow.check.") || declaredClaimTypes.has(ct);
1725
+ });
1726
+ if (checkClaims.length === 0) return false;
1727
+ return checkClaims.every((c: AnyObj) => {
1728
+ const v = String(c.value || "");
1729
+ return v === "pass" || v === "skip";
1730
+ });
1731
+ }
1732
+ // Legacy fallback: evidence.json
663
1733
  const e = loadJson(path.join(dir, "evidence.json"), {});
664
1734
  return e.verdict === "pass" && Array.isArray(e.checks) && e.checks.length > 0 && e.checks.every((c: AnyObj) => {
665
1735
  if (!(c.status === "pass" || c.status === "skip")) return false;
666
1736
  return !Array.isArray(c.standard_refs) || c.standard_refs.every((r: AnyObj) => ["junit", "sarif", "coverage", "veritas"].includes(r.standard));
667
1737
  });
668
1738
  }
669
- function critiqueClean(dir: string): boolean {
1739
+ function critiqueClean(dir: string, declaredClaimTypes: Set<string> = new Set()): boolean {
1740
+ // Phase 4c: read from trust.bundle (sole verification artifact); fall back to critique.json for legacy sessions.
1741
+ // ADR 0016 Step 0: declaredClaimTypes broadens the filter to include kit-typed critique claims
1742
+ // (e.g. builder.verify.policy-compliance) in addition to workflow.critique.review.
1743
+ const bundle = loadJson(path.join(dir, "trust.bundle"));
1744
+ if (Array.isArray(bundle.claims)) {
1745
+ const critiqueClaims = (bundle.claims as AnyObj[]).filter((c: AnyObj) => c && (c.claimType === "workflow.critique.review" || declaredClaimTypes.has(c.claimType)));
1746
+ if (critiqueClaims.length === 0) return false; // no critique written yet
1747
+ return critiqueClaims.every((c: AnyObj) => {
1748
+ const v = String(c.value || "");
1749
+ return v !== "fail" && c.status !== "disputed" && c.status !== "rejected";
1750
+ });
1751
+ }
1752
+ // Legacy fallback: critique.json
670
1753
  const c = loadJson(path.join(dir, "critique.json"), {});
671
1754
  return c.status === "pass" && Array.isArray(c.critiques) && c.critiques.every((x: AnyObj) => x.verdict !== "fail" && (!Array.isArray(x.findings) || x.findings.every((f: AnyObj) => f.status !== "open" && (f.file_refs === undefined || Array.isArray(f.file_refs)))));
672
1755
  }
@@ -683,7 +1766,7 @@ function assertExistingLearningValid(dir: string): void {
683
1766
  if (data.status === "learned" && record.correction === undefined) die("learning status learned requires every record to include correction.needed");
684
1767
  }
685
1768
  }
686
- function dogfoodPass(p: ReturnType<typeof parseArgs>): number {
1769
+ async function dogfoodPass(p: ReturnType<typeof parseArgs>): Promise<number> {
687
1770
  const root = path.resolve(opt(p, "artifact-root", ".flow-agents"));
688
1771
  const dir = path.resolve(opt(p, "artifact-dir") || currentDir(root) || "");
689
1772
  requireArtifactDirUnderRoot(dir, root);
@@ -692,22 +1775,29 @@ function dogfoodPass(p: ReturnType<typeof parseArgs>): number {
692
1775
  if (verdict === "pass") {
693
1776
  const checks = opts(p, "check-json").map((v) => normalizeCheck(parseJson(v, "--check-json")));
694
1777
  if (checks.some((c) => c.status !== "pass" && c.status !== "skip")) die("clean evidence requires all non-skipped checks to pass");
695
- if (fs.existsSync(path.join(dir, "evidence.json")) && !evidenceClean(dir)) die("cannot mark clean without passing evidence");
696
- if (!fs.existsSync(path.join(dir, "evidence.json")) && checks.length === 0) die("cannot mark clean without passing evidence");
1778
+ // Phase 4c: evidence check reads from trust.bundle (sole verification artifact); legacy evidence.json fallback in evidenceClean.
1779
+ // ADR 0016 Step 0: pass declaredClaimTypes so builder.* check/critique claims count as clean evidence.
1780
+ const _dctDogfood = declaredClaimTypesFor(dir);
1781
+ const _hasBundleEvidence = fs.existsSync(path.join(dir, "trust.bundle")) && evidenceClean(dir, _dctDogfood);
1782
+ const _hasLegacyEvidence = fs.existsSync(path.join(dir, "evidence.json")) && evidenceClean(dir, _dctDogfood);
1783
+ if (!_hasBundleEvidence && !_hasLegacyEvidence && fs.existsSync(path.join(dir, "trust.bundle"))) die("cannot mark clean without passing evidence");
1784
+ if (!_hasBundleEvidence && !_hasLegacyEvidence && !fs.existsSync(path.join(dir, "trust.bundle")) && fs.existsSync(path.join(dir, "evidence.json"))) die("cannot mark clean without passing evidence");
1785
+ if (!_hasBundleEvidence && !_hasLegacyEvidence && !fs.existsSync(path.join(dir, "trust.bundle")) && !fs.existsSync(path.join(dir, "evidence.json")) && checks.length === 0) die("cannot mark clean without passing evidence");
697
1786
  if (p.flags.has("require-critique") || opt(p, "release-decision")) {
698
1787
  const newCritiqueVerdict = opt(p, "critique-verdict", "pass");
699
1788
  for (const value of opts(p, "finding-json")) normalizeFinding(parseJson(value, "--finding-json"));
700
1789
  if (newCritiqueVerdict !== "pass") die(opt(p, "release-decision") ? "requires clean critique" : "requires clean critique before recording pass evidence");
701
- if (!opt(p, "critique-id") && !critiqueClean(dir)) die("requires passing critique");
702
- if (fs.existsSync(path.join(dir, "critique.json")) && !critiqueClean(dir)) die(opt(p, "release-decision") ? "requires clean critique" : "requires clean critique before recording pass evidence");
1790
+ if (!opt(p, "critique-id") && !critiqueClean(dir, _dctDogfood)) die("requires passing critique");
1791
+ // Phase 4c: if existing state has a dirty critique (in bundle or legacy critique.json), block even when adding a new critique-id.
1792
+ if (!critiqueClean(dir, _dctDogfood) && (fs.existsSync(path.join(dir, "trust.bundle")) || fs.existsSync(path.join(dir, "critique.json")))) die(opt(p, "release-decision") ? "requires clean critique" : "requires clean critique before recording pass evidence");
703
1793
  }
704
1794
  }
705
1795
  const learningRecords = opts(p, "learning-record-json").map((v) => normalizeLearning(parseJson(v, "--learning-record-json"), opt(p, "timestamp", now())));
706
1796
  if (opt(p, "learning-status") === "learned" && learningRecords.some((r) => r.routing.some((x: AnyObj) => x.status === "open"))) die("learned status cannot have open learning routing");
707
1797
  if (opt(p, "learning-status") === "learned" && learningRecords.some((r) => r.correction === undefined)) die("learned status requires every learning record to include correction.needed");
708
- if (opts(p, "check-json").length) recordEvidence({ ...p, positional: [dir], opts: { ...p.opts, verdict: [verdict] }, flags: p.flags });
709
- if (p.flags.has("require-critique") && opt(p, "critique-id")) recordCritique({ ...p, positional: [dir], opts: { ...p.opts, id: [opt(p, "critique-id")], verdict: [opt(p, "critique-verdict", "pass")], summary: [opt(p, "critique-summary", opt(p, "summary"))] }, flags: p.flags });
710
- if (learningRecords.length) recordLearning({ ...p, positional: [dir], opts: { ...p.opts, status: [opt(p, "learning-status", "learned")], "record-json": opts(p, "learning-record-json"), summary: [opt(p, "learning-summary", opt(p, "summary"))] }, flags: p.flags });
1798
+ if (opts(p, "check-json").length) await recordEvidence({ ...p, positional: [dir], opts: { ...p.opts, verdict: [verdict] }, flags: p.flags });
1799
+ if (p.flags.has("require-critique") && opt(p, "critique-id")) await recordCritique({ ...p, positional: [dir], opts: { ...p.opts, id: [opt(p, "critique-id")], verdict: [opt(p, "critique-verdict", "pass")], summary: [opt(p, "critique-summary", opt(p, "summary"))] }, flags: p.flags });
1800
+ if (learningRecords.length) await recordLearning({ ...p, positional: [dir], opts: { ...p.opts, status: [opt(p, "learning-status", "learned")], "record-json": opts(p, "learning-record-json"), summary: [opt(p, "learning-summary", opt(p, "summary"))] }, flags: p.flags });
711
1801
  if (opt(p, "release-decision")) {
712
1802
  recordRelease({ ...p, positional: [dir], opts: { ...p.opts, decision: [opt(p, "release-decision")], scope: [opt(p, "release-scope")], summary: [opt(p, "release-summary", opt(p, "summary"))], "gate-json": ['{"name":"merge","status":"pass","summary":"Dogfood release gate passed."}'], "evidence-ref": ["evidence.json"], "docs-json": [`{"status":"updated","summary":"Docs updated.","refs":["${opt(p, "release-doc-ref", "docs/workflow-usage-guide.md")}"]}`] }, flags: p.flags });
713
1803
  printJson({ release_decision: opt(p, "release-decision") });
@@ -720,14 +1810,903 @@ function dogfoodPass(p: ReturnType<typeof parseArgs>): number {
720
1810
  writeJson(path.join(dir, "handoff.json"), handoff);
721
1811
  }
722
1812
  writeState(dir, taskSlugFor(dir, opt(p, "task-slug")), stateStatus, "verification", opt(p, "timestamp", now()), opt(p, "summary"), verdict === "pass" ? "continue" : "blocked");
1813
+ // Phase 4c: bundle was already written by recordEvidence/recordCritique above (if called).
1814
+ // If neither ran (e.g. verdict=fail with no check-json), re-build from bundle (no bespoke sidecars).
723
1815
  printJson({ state_status: stateStatus });
724
1816
  return 0;
725
1817
  }
726
1818
 
1819
+ // ─── Gate Review — Canonical InquiryRecord output ────────────────────────────
1820
+ // Reads trust.bundle + gate block signal, classifies gate fires/misses (as
1821
+ // correct / false_block / missed_block), and emits gate-review.inquiries.json
1822
+ // as an array of canonical Surface InquiryRecords. ADVISORY ONLY — #119.
1823
+ // Never modifies scripts/hooks/. Consumes Surface.resolveInquiry; no fork.
1824
+
1825
+ /** Shape of a claim from the trust.bundle */
1826
+ export interface TrustClaim {
1827
+ id: string;
1828
+ subjectType: string;
1829
+ subjectId: string;
1830
+ surface: string;
1831
+ claimType: string;
1832
+ fieldOrBehavior: string;
1833
+ value: string;
1834
+ createdAt: string;
1835
+ updatedAt: string;
1836
+ status: "verified" | "disputed" | "assumed" | "proposed" | "rejected" | "stale" | "unknown";
1837
+ }
1838
+
1839
+ /** Shape of the trust.bundle file */
1840
+ export interface BundleFile {
1841
+ schemaVersion: number;
1842
+ source: string;
1843
+ claims: TrustClaim[];
1844
+ evidence: AnyObj[];
1845
+ events: AnyObj[];
1846
+ policies: AnyObj[];
1847
+ }
1848
+
1849
+ /** The gate block signal read from .flow-agents/.goal-fit-block-streak.json */
1850
+ export interface GateBlockSignal {
1851
+ /** True when the streak file exists AND count >= 1 */
1852
+ blocked: boolean;
1853
+ /** The hash from the streak file (for rationale citation) */
1854
+ hash: string | null;
1855
+ /** The consecutive block count */
1856
+ count: number;
1857
+ }
1858
+
1859
+ /**
1860
+ * The gate-review calibration verdict, stored in InquiryRecord.answer.value.
1861
+ * This is gate-review's value-add over the canonical InquiryRecord outcome.
1862
+ */
1863
+ export type GateCalibration = "correct" | "false_block" | "missed_block";
1864
+
1865
+ /**
1866
+ * Read the gate block signal from .flow-agents/.goal-fit-block-streak.json
1867
+ * (written by scripts/hooks/stop-goal-fit.js when block mode fires).
1868
+ * The file sits at <artifact-root>/.goal-fit-block-streak.json — one level
1869
+ * above the session artifact dir. Fail-open: returns { blocked: false } when
1870
+ * the file is absent or unreadable.
1871
+ *
1872
+ * @param artifactRoot The .flow-agents root dir (parent of session slug dir).
1873
+ */
1874
+ export function readGateBlockSignal(artifactRoot: string): GateBlockSignal {
1875
+ const streakFile = path.join(artifactRoot, ".goal-fit-block-streak.json");
1876
+ try {
1877
+ if (!fs.existsSync(streakFile)) return { blocked: false, hash: null, count: 0 };
1878
+ const raw = JSON.parse(fs.readFileSync(streakFile, "utf8"));
1879
+ const count = Number(raw?.count ?? 0);
1880
+ const hash = typeof raw?.hash === "string" ? raw.hash : null;
1881
+ return { blocked: count >= 1, hash, count };
1882
+ } catch {
1883
+ return { blocked: false, hash: null, count: 0 };
1884
+ }
1885
+ }
1886
+
1887
+ /**
1888
+ * Derive the gate-review calibration from a resolved InquiryRecord and the
1889
+ * block signal. Pure function — no I/O.
1890
+ *
1891
+ * Mapping (mirrors SKILL.md Bundle-Claim to Classification table):
1892
+ * outcome="matched", status="disputed"|"rejected", blocked=true → correct
1893
+ * outcome="matched", status="verified"|"assumed", blocked=true → false_block
1894
+ * outcome="matched", status="assumed", blocked=true → false_block
1895
+ * outcome="matched", status="stale"|"unknown", blocked=false → missed_block
1896
+ * outcome="matched", status="proposed", any → missed_block
1897
+ * outcome="unsupported" (absent claim), any → missed_block
1898
+ * outcome="derived", satisfied=true, any → correct/false_block by blocked flag
1899
+ * fallthrough → missed_block
1900
+ */
1901
+ export function deriveGateCalibration(
1902
+ outcome: "matched" | "derived" | "unsupported",
1903
+ answerStatus: string | undefined,
1904
+ blocked: boolean,
1905
+ ): GateCalibration {
1906
+ if (outcome === "unsupported") return "missed_block";
1907
+ if (outcome === "matched" || outcome === "derived") {
1908
+ const s = answerStatus ?? "unknown";
1909
+ if (blocked) {
1910
+ if (s === "disputed" || s === "rejected") return "correct";
1911
+ if (s === "verified" || s === "assumed") return "false_block";
1912
+ // stale/unknown/proposed while blocked — gate fired without solid evidence
1913
+ return "false_block";
1914
+ } else {
1915
+ // Not blocked
1916
+ if (s === "stale" || s === "unknown" || s === "proposed") return "missed_block";
1917
+ // verified/assumed and no block — correct (no block warranted, none issued)
1918
+ return "correct";
1919
+ }
1920
+ }
1921
+ return "missed_block";
1922
+ }
1923
+
1924
+ /**
1925
+ * Compose the advisory proposed-fix string for a gate-review finding.
1926
+ * Pure function — no I/O.
1927
+ */
1928
+ export function gateAdvisoryFix(
1929
+ calibration: GateCalibration,
1930
+ claimId: string,
1931
+ answerStatus: string | undefined,
1932
+ ): string {
1933
+ const s = answerStatus ?? "unknown";
1934
+ if (calibration === "correct") {
1935
+ return `No gate change needed — block was warranted. Resolve the failure in claim \`${claimId}\` (status: \`${s}\`) and re-run gate-review to confirm the gate clears.`;
1936
+ }
1937
+ if (calibration === "false_block") {
1938
+ return `Investigate why the gate blocked when claim \`${claimId}\` has status \`${s}\`. Check whether stop-goal-fit evaluated a stale bundle snapshot or whether the block trigger was unrelated to bundle claims. If the block was spurious, add a freshness check to the gate evaluation loop.`;
1939
+ }
1940
+ // missed_block
1941
+ if (s === "stale") {
1942
+ return `Refresh the stale claim \`${claimId}\` by re-running the evidence capture step, then re-run gate-review to confirm the gate fires on updated data.`;
1943
+ }
1944
+ if (s === "absent") {
1945
+ return `Ensure \`workflow-sidecar record-evidence\` writes a bundle claim for \`${claimId}\` before \`stop-goal-fit\` evaluates. Currently no claim exists in the bundle — the gate has nothing to evaluate.`;
1946
+ }
1947
+ return `Ensure \`workflow-sidecar record-evidence\` writes a definitive event for claim \`${claimId}\` (currently \`${s}\`) before \`stop-goal-fit\` evaluates. The gate had no resolved evidence to act on.`;
1948
+ }
1949
+
1950
+ /**
1951
+ * Build a schema-conformant InquiryRecord for the hachure inquiry-record.schema.json.
1952
+ * Strips Surface-internal fields (identityLinkIds, transitiveRuleIds) from
1953
+ * resolutionPath that are valid in the TS type but not in the JSON schema.
1954
+ * Sets answer.value to the gate-review value-add: { calibration, advisoryFix, gateFired, sessionSlug }.
1955
+ */
1956
+ function toSchemaInquiryRecord(
1957
+ raw: SurfaceInquiryRecord,
1958
+ calibration: GateCalibration,
1959
+ advisoryFix: string,
1960
+ blocked: boolean,
1961
+ slug: string,
1962
+ ): AnyObj {
1963
+ const resolutionPath: AnyObj = { claimIds: raw.resolutionPath.claimIds };
1964
+ if (raw.resolutionPath.ruleId !== undefined) resolutionPath["ruleId"] = raw.resolutionPath.ruleId;
1965
+ if (raw.resolutionPath.ruleVersion !== undefined) resolutionPath["ruleVersion"] = raw.resolutionPath.ruleVersion;
1966
+ const record: AnyObj = {
1967
+ id: raw.id,
1968
+ inquiry: raw.inquiry,
1969
+ outcome: raw.outcome,
1970
+ resolutionPath,
1971
+ inputSnapshot: raw.inputSnapshot,
1972
+ statusFunctionVersion: raw.statusFunctionVersion,
1973
+ resolvedAt: raw.resolvedAt,
1974
+ };
1975
+ // answer carries the canonical trust status AND gate-review's value-add advisory fix.
1976
+ // answer.status = derived TrustStatus from the resolved claim (or "unknown" when absent).
1977
+ // answer.value = { calibration, advisoryFix, gateFired, sessionSlug } — gate-review advisory.
1978
+ const answerStatus = raw.answer?.status ?? "unknown";
1979
+ record["answer"] = {
1980
+ status: answerStatus,
1981
+ value: {
1982
+ calibration,
1983
+ advisoryFix,
1984
+ gateFired: blocked,
1985
+ sessionSlug: slug,
1986
+ },
1987
+ };
1988
+ return record;
1989
+ }
1990
+
1991
+ /**
1992
+ * Build an array of canonical InquiryRecords for all gate-fire and missed-block
1993
+ * candidates in the bundle, using Surface's resolveInquiry. Returns null when
1994
+ * Surface is unavailable (caller skips the output file — no fork fallback).
1995
+ *
1996
+ * @param bundle Parsed trust.bundle (BundleFile shape)
1997
+ * @param blockSignal Result of readGateBlockSignal()
1998
+ * @param slug Task slug (used in inquiry ids and session_slug)
1999
+ * @param expectedCriterionIds Optional list of expected criterion IDs to check
2000
+ * for absent claims (missed_block detection).
2001
+ * @param surface Loaded Surface module (must have resolveInquiry)
2002
+ * @param now Optional timestamp override for deterministic tests
2003
+ */
2004
+ export function buildGateInquiryRecords(
2005
+ bundle: BundleFile,
2006
+ blockSignal: GateBlockSignal,
2007
+ slug: string,
2008
+ expectedCriterionIds: string[],
2009
+ surface: SurfaceModule,
2010
+ now?: Date,
2011
+ ): AnyObj[] {
2012
+ const records: AnyObj[] = [];
2013
+ let idx = 0;
2014
+ const askedAt = (now ?? new Date()).toISOString();
2015
+ const bundleRecord = bundle as unknown as Record<string, unknown>;
2016
+ const claims = Array.isArray(bundle?.claims) ? bundle.claims : [];
2017
+
2018
+ // Build a set of subjectIds already covered by bundle claims
2019
+ const claimSubjectIds = new Set<string>(claims.map((c) => c.subjectId));
2020
+
2021
+ // ── Step 1: resolve each bundle claim via resolveInquiry ──────────────────
2022
+ for (const claim of claims) {
2023
+ idx += 1;
2024
+ const inquiryId = `${slug}-gr-${idx}`;
2025
+ const inquiry: SurfaceInquiry = {
2026
+ id: inquiryId,
2027
+ question: `Was gate action on claim ${claim.id} (status: ${claim.status}) justified given the trust state?`,
2028
+ askedBy: "gate-review",
2029
+ askedAt,
2030
+ target: {
2031
+ subjectType: claim.subjectType,
2032
+ subjectId: claim.subjectId,
2033
+ fieldOrBehavior: claim.fieldOrBehavior,
2034
+ },
2035
+ metadata: { sessionSlug: slug, claimId: claim.id, blocked: blockSignal.blocked },
2036
+ };
2037
+ const rawRecord = surface.resolveInquiry(bundleRecord, inquiry, { now });
2038
+ const calibration = deriveGateCalibration(rawRecord.outcome, rawRecord.answer?.status, blockSignal.blocked);
2039
+ const advisoryFix = gateAdvisoryFix(calibration, claim.id, rawRecord.answer?.status ?? claim.status);
2040
+ records.push(toSchemaInquiryRecord(rawRecord, calibration, advisoryFix, blockSignal.blocked, slug));
2041
+ }
2042
+
2043
+ // ── Step 2: resolve absent expected criteria (missed_block candidates) ────
2044
+ for (const criterionId of expectedCriterionIds) {
2045
+ const subjectId = `${slug}/${criterionId}`;
2046
+ // Skip if there's already a bundle claim for this criterion
2047
+ if (claimSubjectIds.has(subjectId) || claimSubjectIds.has(criterionId)) continue;
2048
+ idx += 1;
2049
+ const inquiryId = `${slug}-gr-${idx}`;
2050
+ const inquiry: SurfaceInquiry = {
2051
+ id: inquiryId,
2052
+ question: `Was acceptance criterion "${criterionId}" claimed in the trust.bundle before gate evaluation?`,
2053
+ askedBy: "gate-review",
2054
+ askedAt,
2055
+ target: {
2056
+ subjectType: "workflow-check",
2057
+ subjectId,
2058
+ fieldOrBehavior: criterionId,
2059
+ },
2060
+ metadata: { sessionSlug: slug, criterionId, blocked: blockSignal.blocked, expectedCriterion: true },
2061
+ };
2062
+ const rawRecord = surface.resolveInquiry(bundleRecord, inquiry, { now });
2063
+ // outcome will be "unsupported" since no claim matches the absent criterion
2064
+ const calibration = deriveGateCalibration(rawRecord.outcome, rawRecord.answer?.status, blockSignal.blocked);
2065
+ const advisoryFix = gateAdvisoryFix(calibration, subjectId, "absent");
2066
+ records.push(toSchemaInquiryRecord(rawRecord, calibration, advisoryFix, blockSignal.blocked, slug));
2067
+ }
2068
+
2069
+ // ── Step 3: if still empty (no claims, no expected criteria), emit one record
2070
+ if (records.length === 0) {
2071
+ idx += 1;
2072
+ const inquiryId = `${slug}-gr-${idx}`;
2073
+ const inquiry: SurfaceInquiry = {
2074
+ id: inquiryId,
2075
+ question: `Does the trust.bundle for session "${slug}" contain any claims for gate evaluation?`,
2076
+ askedBy: "gate-review",
2077
+ askedAt,
2078
+ // No target — natural-language-only inquiry → resolveInquiry returns "unsupported"
2079
+ metadata: { sessionSlug: slug, blocked: blockSignal.blocked, reason: "empty-bundle" },
2080
+ };
2081
+ const rawRecord = surface.resolveInquiry(bundleRecord, inquiry, { now });
2082
+ const advisoryFix = `Ensure \`workflow-sidecar record-evidence\` writes at least one claim to the trust.bundle for session \`${slug}\` before gate-review is invoked.`;
2083
+ records.push(toSchemaInquiryRecord(rawRecord, "missed_block", advisoryFix, blockSignal.blocked, slug));
2084
+ }
2085
+
2086
+ return records;
2087
+ }
2088
+
2089
+ /**
2090
+ * gate-review <artifact-dir>
2091
+ *
2092
+ * Reads the session's trust.bundle and the gate block signal, classifies each
2093
+ * gate fire or suspected miss using Surface's resolveInquiry, and emits
2094
+ * gate-review.inquiries.json as an array of canonical InquiryRecords.
2095
+ * ADVISORY ONLY — never modifies scripts/hooks/. Issue #119.
2096
+ *
2097
+ * The block signal is read from <artifact-root>/.goal-fit-block-streak.json,
2098
+ * written by scripts/hooks/stop-goal-fit.js when block mode fires. The file
2099
+ * lives one level above the session slug dir (the .flow-agents root).
2100
+ *
2101
+ * If @kontourai/surface is unavailable, logs a warning and returns 0
2102
+ * (fail-open — no bespoke fork fallback).
2103
+ */
2104
+ async function gateReview(p: ReturnType<typeof parseArgs>): Promise<number> {
2105
+ const dir = artifactDirFrom(p.positional[0] || die("artifact directory is required"));
2106
+ if (!fs.existsSync(dir)) die(`artifact directory does not exist: ${dir}`);
2107
+ const slug = taskSlugFor(dir, opt(p, "task-slug"));
2108
+
2109
+ // Locate trust.bundle — required per SKILL.md contract
2110
+ const bundlePath = path.join(dir, "trust.bundle");
2111
+ if (!fs.existsSync(bundlePath)) {
2112
+ process.stderr.write(`[gate-review] trust.bundle absent at ${bundlePath} — NOT_VERIFIED. Build ADR 0010 Phase 1 first.\n`);
2113
+ return 1;
2114
+ }
2115
+
2116
+ // Load Surface (ESM, fail-open)
2117
+ const surface = await tryLoadSurface();
2118
+ if (!surface || typeof surface.resolveInquiry !== "function") {
2119
+ process.stderr.write(`[gate-review] @kontourai/surface unavailable or missing resolveInquiry — gate-review skipped (no fork fallback)\n`);
2120
+ return 0;
2121
+ }
2122
+
2123
+ const bundle: BundleFile = JSON.parse(fs.readFileSync(bundlePath, "utf8"));
2124
+
2125
+ // Read gate block signal from .flow-agents root (one level above session dir)
2126
+ const artifactRoot = path.dirname(dir);
2127
+ const blockSignal = readGateBlockSignal(artifactRoot);
2128
+
2129
+ // Enumerate expected criterion IDs: primary = bundle claims (workflow.acceptance.criterion),
2130
+ // fallback = acceptance.json (back-compat for sessions without an up-to-date bundle).
2131
+ const criterionClaims = Array.isArray(bundle.claims)
2132
+ ? (bundle.claims as AnyObj[]).filter((c: AnyObj) => c.claimType === "workflow.acceptance.criterion")
2133
+ : [];
2134
+ let expectedCriterionIds: string[];
2135
+ if (criterionClaims.length > 0) {
2136
+ // Extract the final segment of subjectId (e.g. "slug/AC1" → "AC1")
2137
+ expectedCriterionIds = criterionClaims
2138
+ .map((c: AnyObj) => String(c.subjectId ?? "").split("/").pop() ?? "")
2139
+ .filter(Boolean);
2140
+ } else {
2141
+ // Fallback: read acceptance.json (back-compat for sessions without criterion claims)
2142
+ const acceptancePath = path.join(dir, "acceptance.json");
2143
+ const acceptance = fs.existsSync(acceptancePath) ? (loadJson(acceptancePath) as AnyObj) : null;
2144
+ expectedCriterionIds = Array.isArray(acceptance?.criteria)
2145
+ ? (acceptance!.criteria as AnyObj[]).map((c: AnyObj) => String(c.id ?? "")).filter(Boolean)
2146
+ : [];
2147
+ }
2148
+
2149
+ const records = buildGateInquiryRecords(bundle, blockSignal, slug, expectedCriterionIds, surface);
2150
+
2151
+ // Validate each record against the hachure inquiry-record.schema.json (fail-open)
2152
+ const validator = getHachureInquiryRecordValidator();
2153
+ let schemaValid = true;
2154
+ const validationErrors: string[] = [];
2155
+ for (const record of records) {
2156
+ if (validator) {
2157
+ const result = validator(record);
2158
+ if (!result.valid) {
2159
+ schemaValid = false;
2160
+ validationErrors.push(...result.errors.map((e) => `${record["id"] ?? "?"}: ${e}`));
2161
+ }
2162
+ }
2163
+ }
2164
+ if (!schemaValid) {
2165
+ process.stderr.write(`[gate-review] InquiryRecord schema validation errors:\n${validationErrors.join("\n")}\n`);
2166
+ }
2167
+
2168
+ const outputPath = path.join(dir, "gate-review.inquiries.json");
2169
+ writeJson(outputPath, records);
2170
+
2171
+ // Build summary counts by calibration
2172
+ const counts: Record<string, number> = {};
2173
+ for (const r of records) {
2174
+ const cal = (r["answer"] as AnyObj | undefined)?.["value"]?.["calibration"] ?? "unknown";
2175
+ counts[cal] = (counts[cal] ?? 0) + 1;
2176
+ }
2177
+ const summary = Object.entries(counts)
2178
+ .filter(([, n]) => n > 0)
2179
+ .map(([k, n]) => `${k}=${n}`)
2180
+ .join(", ");
2181
+ const schemaTag = validator ? (schemaValid ? " schema:valid" : " schema:INVALID") : " schema:unavailable";
2182
+ console.log(`gate-review: ${records.length} InquiryRecord(s) [${summary}]${schemaTag} → ${outputPath}`);
2183
+ return 0;
2184
+ }
2185
+ // ─────────────────────────────────────────────────────────────────────────────
2186
+
2187
+ // ─── ADR 0010 Phase 3: project the local trust.bundle to the Surface Trust Panel ──
2188
+ // Surface owns derivation (buildTrustReport) AND rendering (the dependency-free
2189
+ // <surface-trust-panel> element). Flow Agents only assembles a standalone HTML
2190
+ // shell — no trust logic or rendering reimplemented (consume-never-fork).
2191
+
2192
+ /** Locate Surface's self-contained, dependency-free panel element (ESM, no require). */
2193
+ function loadSurfacePanelJs(): string {
2194
+ let d = path.dirname(fileURLToPath(import.meta.url));
2195
+ for (let i = 0; i < 12; i += 1) {
2196
+ try { return fs.readFileSync(path.join(d, "node_modules/@kontourai/surface/dist/src/trust-panel/surface-trust-panel.js"), "utf8"); } catch { /* walk up */ }
2197
+ const parent = path.dirname(d);
2198
+ if (parent === d) break;
2199
+ d = parent;
2200
+ }
2201
+ die("could not locate @kontourai/surface trust-panel element (dist/src/trust-panel/surface-trust-panel.js)");
2202
+ return "";
2203
+ }
2204
+
2205
+ async function renderTrustPanel(p: ReturnType<typeof parseArgs>): Promise<number> {
2206
+ const root = path.resolve(opt(p, "artifact-root", ".flow-agents"));
2207
+ const dir = p.positional[0] ? artifactDirFrom(p.positional[0]) : currentDir(root);
2208
+ if (!dir) die("render-trust-panel requires a workflow dir or a recorded current session");
2209
+ let bundle: AnyObj | null = null;
2210
+ try { bundle = JSON.parse(fs.readFileSync(path.join(dir!, "trust.bundle"), "utf8")); } catch { bundle = null; }
2211
+ if (!bundle) die(`no trust.bundle at ${path.join(dir!, "trust.bundle")} — run record-evidence first`);
2212
+ const surface = (await import("@kontourai/surface")) as unknown as { buildTrustReport?: (b: unknown) => AnyObj; diffFreshness?: (prior: unknown, next: unknown) => Array<Record<string, unknown>> };
2213
+ if (typeof surface.buildTrustReport !== "function") die("@kontourai/surface buildTrustReport unavailable — cannot derive the trust report");
2214
+ const report = surface.buildTrustReport!(bundle);
2215
+ // diffFreshness on resume: if a prior trust.checkpoint.json exists, surface the
2216
+ // fresh→stale transitions so the user sees what has gone stale since the last seal.
2217
+ const checkpointFile = path.join(dir!, "trust.checkpoint.json");
2218
+ if (fs.existsSync(checkpointFile) && typeof surface.diffFreshness === "function") {
2219
+ try {
2220
+ const envelope: AnyObj = JSON.parse(fs.readFileSync(checkpointFile, "utf8"));
2221
+ const priorCheckpoint = envelope.checkpoint;
2222
+ if (priorCheckpoint && typeof priorCheckpoint === "object") {
2223
+ const transitions = surface.diffFreshness(priorCheckpoint, report);
2224
+ const staleTransitions = transitions.filter((t) => t["to"] === "stale");
2225
+ if (staleTransitions.length > 0) {
2226
+ const claimIds = staleTransitions.map((t) => String(t["claimId"] ?? "")).filter(Boolean);
2227
+ process.stderr.write(`[trust-checkpoint] ${staleTransitions.length} claim(s) went stale since the last checkpoint (sealed ${String(envelope.sealed_at ?? "unknown")}):\n${claimIds.map((id) => ` - ${id}`).join("\n")}\n`);
2228
+ } else {
2229
+ process.stderr.write(`[trust-checkpoint] 0 claims went stale since the last checkpoint (sealed ${String(envelope.sealed_at ?? "unknown")}).\n`);
2230
+ }
2231
+ }
2232
+ } catch {
2233
+ /* diffFreshness is advisory — never block the panel render */
2234
+ }
2235
+ }
2236
+ const panelJs = loadSurfacePanelJs();
2237
+ const heading = `Flow Agents trust — ${String(path.basename(dir!)).replace(/[<>"&]/g, "")}`;
2238
+ const reportJson = JSON.stringify(report).replace(/</g, "\\u003c");
2239
+ const html = `<!doctype html>
2240
+ <html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>${heading}</title></head>
2241
+ <body style="margin:0;padding:1.5rem;background:#f4f1e6">
2242
+ <script type="module">
2243
+ ${panelJs}
2244
+ </script>
2245
+ <surface-trust-panel heading="${heading}"></surface-trust-panel>
2246
+ <script id="trust-report" type="application/json">${reportJson}</script>
2247
+ <script type="module">document.querySelector("surface-trust-panel").report = JSON.parse(document.getElementById("trust-report").textContent);</script>
2248
+ </body></html>
2249
+ `;
2250
+ const out = opt(p, "out") || path.join(dir!, "trust-panel.html");
2251
+ fs.writeFileSync(out, html);
2252
+ // Also emit the derived report as a first-class artifact — the universal input for
2253
+ // Surface's hosted Snapshot Viewer and a bare `<surface-trust-panel src=…>` (the HTML
2254
+ // above already embeds it). Suppress with --no-report.
2255
+ let reportOut = "";
2256
+ if (!p.flags.has("no-report")) {
2257
+ reportOut = opt(p, "report-out") || path.join(dir!, "trust-report.json");
2258
+ fs.writeFileSync(reportOut, `${JSON.stringify(report, null, 2)}\n`);
2259
+ }
2260
+ console.log(out);
2261
+ if (reportOut) console.log(reportOut);
2262
+ return 0;
2263
+ }
2264
+ // ─────────────────────────────────────────────────────────────────────────────
2265
+
2266
+ // ─── flow-agents#137 / ADR 0011: wire Surface's MCP to surface trust reports ──
2267
+ // Flow Agents produces the bundle; Surface's MCP projects it. `--mode print` is the
2268
+ // zero-write default (output the snippet). `enable`/`disable` edit a runtime JSON MCP
2269
+ // config (e.g. Claude Code `.mcp.json`) via a *conventional managed key* — idempotent,
2270
+ // reversible, and only ever our own entry (never auto-injected; opt-in only).
2271
+ const TRUST_MCP_SERVER = "flow-agents-surface-trust";
2272
+ function trustMcpRegistration(): AnyObj {
2273
+ // No static `--input` (a single file can't follow many per-task bundles or a moving
2274
+ // current); the skill passes the active task's bundle as a per-call `path` arg.
2275
+ return { command: "npx", args: ["-y", "@kontourai/surface", "mcp"] };
2276
+ }
2277
+ function trustMcp(p: ReturnType<typeof parseArgs>): number {
2278
+ const mode = opt(p, "mode", "print");
2279
+ if (mode === "print") {
2280
+ console.log(JSON.stringify({ mcpServers: { [TRUST_MCP_SERVER]: trustMcpRegistration() } }, null, 2));
2281
+ process.stderr.write(`\n# Paste the above into your runtime MCP config (e.g. .mcp.json). Flow Agents does NOT write it for you unless you run: trust-mcp --mode enable\n`);
2282
+ process.stderr.write(`# To view a task's trust inline, call surface_summary with path=<.flow-agents/<slug>/trust.bundle>.\n`);
2283
+ return 0;
2284
+ }
2285
+ if (mode !== "enable" && mode !== "disable") die("trust-mcp --mode must be print|enable|disable");
2286
+ const configPath = path.resolve(opt(p, "config", ".mcp.json"));
2287
+ let config: AnyObj = {};
2288
+ try { config = JSON.parse(fs.readFileSync(configPath, "utf8")); } catch { config = {}; }
2289
+ if (typeof config !== "object" || config === null || Array.isArray(config)) die(`${configPath} is not a JSON object — refusing to edit`);
2290
+ if (!config.mcpServers || typeof config.mcpServers !== "object" || Array.isArray(config.mcpServers)) config.mcpServers = {};
2291
+ if (mode === "enable") {
2292
+ config.mcpServers[TRUST_MCP_SERVER] = trustMcpRegistration();
2293
+ fs.mkdirSync(path.dirname(configPath), { recursive: true });
2294
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
2295
+ console.log(`enabled ${TRUST_MCP_SERVER} in ${configPath} (remove with: trust-mcp --mode disable)`);
2296
+ return 0;
2297
+ }
2298
+ // disable: remove only our own conventional entry; leave everything else untouched.
2299
+ if (Object.prototype.hasOwnProperty.call(config.mcpServers, TRUST_MCP_SERVER)) {
2300
+ delete config.mcpServers[TRUST_MCP_SERVER];
2301
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
2302
+ console.log(`disabled ${TRUST_MCP_SERVER} in ${configPath}`);
2303
+ } else {
2304
+ console.log(`${TRUST_MCP_SERVER} not present in ${configPath} — nothing to remove`);
2305
+ }
2306
+ return 0;
2307
+ }
2308
+ // ─── ADR 0012: agent coordination as liveness claims (policy-centered) ──────────
2309
+ // A work-claim is a regular Hachure claim governed by a *liveness policy* (ttl +
2310
+ // heartbeat → held/stale/released), keyed by the work-item subjectId, appended to a
2311
+ // shared stream all agents read. Status is RECOMPUTED via Surface's deriveTrustStatus
2312
+ // (no forked logic). Advisory, not a lock. The liveness policy is a general archetype
2313
+ // (not use-case-specific) and is a candidate to graduate upstream into Surface.
2314
+ const LIVENESS_POLICY = {
2315
+ id: "policy:liveness.hold",
2316
+ claimType: "liveness.hold",
2317
+ requiredEvidence: [] as string[],
2318
+ acceptanceCriteria: ["A heartbeat within ttlSeconds holds the claim; a lapse or release frees it."],
2319
+ reviewAuthority: "system",
2320
+ validityRule: { kind: "duration", durationDays: 1 },
2321
+ stalenessTriggers: [] as string[],
2322
+ conflictRules: [] as string[],
2323
+ impactLevel: "medium",
2324
+ };
2325
+
2326
+ function livenessStreamFile(root: string): string { return path.join(root, "liveness", "events.jsonl"); }
2327
+ function appendLivenessEvent(root: string, evt: AnyObj): void {
2328
+ const file = livenessStreamFile(root);
2329
+ fs.mkdirSync(path.dirname(file), { recursive: true });
2330
+ fs.appendFileSync(file, `${JSON.stringify(evt)}\n`);
2331
+ }
2332
+ function readLivenessEvents(root: string): AnyObj[] {
2333
+ // Delegate to the shared pure-CJS helper (scripts/hooks/lib/liveness-read.js).
2334
+ // Using createRequire so the ESM sidecar can load a CJS module without bundling it.
2335
+ try {
2336
+ const _req = createRequire(import.meta.url);
2337
+ const helperPath = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../scripts/hooks/lib/liveness-read.js");
2338
+ const helper = _req(helperPath) as { readLivenessEvents: (p: string) => AnyObj[] };
2339
+ return helper.readLivenessEvents(livenessStreamFile(root));
2340
+ } catch {
2341
+ // Fallback: read inline (keeps sidecar self-sufficient if helper is unavailable)
2342
+ let raw = "";
2343
+ try { raw = fs.readFileSync(livenessStreamFile(root), "utf8"); } catch { return []; }
2344
+ return raw.split("\n").map((l) => l.trim()).filter(Boolean).map((l) => { try { return JSON.parse(l) as AnyObj; } catch { return null; } }).filter((x): x is AnyObj => x !== null);
2345
+ }
2346
+ }
2347
+ function livenessLabel(status: string): string {
2348
+ if (status === "verified") return "held";
2349
+ if (status === "stale" || status === "revoked") return "free"; // reclaimable: lapsed or released
2350
+ if (status === "superseded") return "superseded";
2351
+ return status;
2352
+ }
2353
+
2354
+ // ─── ADR 0012 lifecycle-driven liveness (opt-in via FLOW_AGENTS_LIVENESS) ──────
2355
+ // init-plan claims the work-item; advance-state heartbeats (or releases on terminal),
2356
+ // so the workflow lifecycle itself maintains the liveness claim — no manual liveness calls.
2357
+ // Additive + fail-open: a liveness-emit failure never affects the workflow command.
2358
+ const LIVENESS_TERMINAL = new Set(["delivered", "accepted", "archived"]);
2359
+ function resolveLivenessActor(): string { return (process.env.FLOW_AGENTS_ACTOR || "").trim() || "local"; }
2360
+ function livenessEnabled(): boolean { const v = String(process.env.FLOW_AGENTS_LIVENESS || "").trim().toLowerCase(); return v === "on" || v === "1" || v === "true"; }
2361
+ function livenessLifecycle(taskDir: string, slug: string, kind: "claim" | "heartbeat" | "release", timestamp: string): void {
2362
+ if (!livenessEnabled()) return;
2363
+ try {
2364
+ const root = path.dirname(taskDir); // .flow-agents/<slug> → .flow-agents (the shared liveness stream lives here)
2365
+ const evt: AnyObj = { type: kind, subjectId: slug, actor: resolveLivenessActor(), at: timestamp, source: "lifecycle" };
2366
+ if (kind === "claim") evt.ttlSeconds = 1800;
2367
+ appendLivenessEvent(root, evt);
2368
+ } catch { /* best-effort; liveness is advisory and must never break the workflow */ }
2369
+ }
2370
+
2371
+ async function liveness(p: ReturnType<typeof parseArgs>): Promise<number> {
2372
+ const root = path.resolve(opt(p, "artifact-root", ".flow-agents"));
2373
+ const action = p.positional[0] || "";
2374
+ const subjectId = p.positional[1] || "";
2375
+ const actor = opt(p, "actor", process.env.FLOW_AGENTS_ACTOR || "unknown");
2376
+ const nowIso = new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
2377
+
2378
+ if (action === "claim" || action === "heartbeat" || action === "release") {
2379
+ if (!subjectId) die(`liveness ${action} requires a subjectId`);
2380
+ const evt: AnyObj = { type: action, subjectId, actor, at: opt(p, "at") || nowIso };
2381
+ if (action === "claim") evt.ttlSeconds = Number.parseInt(opt(p, "ttl", "1800"), 10) || 1800;
2382
+ appendLivenessEvent(root, evt);
2383
+ console.log(`liveness ${action}: ${subjectId} by ${actor}`);
2384
+ return 0;
2385
+ }
2386
+
2387
+ if (action === "status") {
2388
+ const surface = (await import("@kontourai/surface")) as unknown as { deriveTrustStatus?: (a: AnyObj) => string };
2389
+ if (typeof surface.deriveTrustStatus !== "function") die("@kontourai/surface deriveTrustStatus unavailable — requires surface >= 1.2");
2390
+ const subjectFilter = opt(p, "subject");
2391
+ const now = opt(p, "now") ? new Date(opt(p, "now")) : new Date();
2392
+ // Group events by subjectId::actor — one liveness claim per holder of a subject.
2393
+ const groups = new Map<string, { subjectId: string; actor: string; ttlSeconds: number; created: string; updated: string; events: AnyObj[] }>();
2394
+ for (const e of readLivenessEvents(root)) {
2395
+ if (!e.subjectId || !e.actor) continue;
2396
+ const key = `${e.subjectId}::${e.actor}`;
2397
+ let g = groups.get(key);
2398
+ if (!g) { g = { subjectId: String(e.subjectId), actor: String(e.actor), ttlSeconds: 1800, created: String(e.at), updated: String(e.at), events: [] }; groups.set(key, g); }
2399
+ g.updated = String(e.at);
2400
+ if (e.type === "claim") { g.ttlSeconds = Number(e.ttlSeconds) || g.ttlSeconds; g.events.push({ id: `c:${key}:${e.at}`, claimId: key, status: "verified", actor: g.actor, method: "observation", evidenceIds: [], createdAt: e.at, verifiedAt: e.at }); }
2401
+ else if (e.type === "heartbeat") { g.events.push({ id: `h:${key}:${e.at}`, claimId: key, status: "verified", actor: g.actor, method: "observation", evidenceIds: [], createdAt: e.at, verifiedAt: e.at }); }
2402
+ else if (e.type === "release") { g.events.push({ id: `r:${key}:${e.at}`, claimId: key, status: "revoked", type: "invalidation", actor: g.actor, method: "observation", evidenceIds: [], createdAt: e.at, verifiedAt: e.at }); }
2403
+ }
2404
+ const rows: AnyObj[] = [];
2405
+ for (const g of groups.values()) {
2406
+ if (subjectFilter && g.subjectId !== subjectFilter) continue;
2407
+ const claim: AnyObj = { id: `${g.subjectId}::${g.actor}`, subjectType: "work-item", subjectId: g.subjectId, surface: "flow.liveness", claimType: "liveness.hold", fieldOrBehavior: "held-by", value: g.actor, createdAt: g.created, updatedAt: g.updated, ttlSeconds: g.ttlSeconds, verificationPolicyId: LIVENESS_POLICY.id };
2408
+ const status = surface.deriveTrustStatus!({ claim, evidence: [], policy: LIVENESS_POLICY, events: g.events, now });
2409
+ rows.push({ subjectId: g.subjectId, actor: g.actor, status, label: livenessLabel(status) });
2410
+ }
2411
+ if (p.flags.has("json")) { console.log(JSON.stringify(rows, null, 2)); return 0; }
2412
+ for (const r of rows) console.log(`${r.subjectId}\t${r.actor}\t${r.label}`);
2413
+ return 0;
2414
+ }
2415
+
2416
+ die("liveness action must be one of: claim | heartbeat | release | status");
2417
+ return 1;
2418
+ }
2419
+ // ─────────────────────────────────────────────────────────────────────────────
2420
+
2421
+ // ─── Claim Lookup — pure helper (promotable to Surface #171) ─────────────────
2422
+ // buildClaimExplanation is a PURE function: report + bundle + id in, structured
2423
+ // explanation out. No fs, no CLI, no .flow-agents paths. Zero flow-agents
2424
+ // specifics inside it — it can be lifted to Surface unchanged (issue #171).
2425
+
2426
+ export interface ClaimEvidenceItem {
2427
+ evidenceType: string;
2428
+ label: string;
2429
+ execution: { runner: string; label: string; isError: boolean; exitCode: number | null } | null;
2430
+ passing: boolean;
2431
+ summary: string;
2432
+ }
2433
+
2434
+ export interface ClaimExplanation {
2435
+ found: boolean;
2436
+ status: string;
2437
+ value: string;
2438
+ claimType: string;
2439
+ evidence: ClaimEvidenceItem[];
2440
+ policy: {
2441
+ id: string;
2442
+ requiredEvidence: string[];
2443
+ requiredMethods?: string[];
2444
+ acceptanceCriteria: string[];
2445
+ reviewAuthority: string;
2446
+ } | null;
2447
+ why: {
2448
+ directInputs: AnyObj[];
2449
+ leafClaims: AnyObj[];
2450
+ diagnostics: AnyObj[];
2451
+ transparencyGaps: AnyObj[];
2452
+ changeRecords: AnyObj[];
2453
+ };
2454
+ }
2455
+
2456
+ /**
2457
+ * Build a structured explanation for a specific claim.
2458
+ * PURE: report + bundle + id in, structured explanation out.
2459
+ * No fs, no CLI, no .flow-agents paths. Promotable to Surface #171.
2460
+ *
2461
+ * @param report TrustReport from buildTrustReport(bundle) — required for derived status
2462
+ * @param bundle Raw parsed trust.bundle (BundleFile shape)
2463
+ * @param claimId The claim id to explain
2464
+ */
2465
+ export function buildClaimExplanation(
2466
+ report: Record<string, unknown>,
2467
+ bundle: Record<string, unknown>,
2468
+ claimId: string,
2469
+ ): ClaimExplanation {
2470
+ const reportClaims = Array.isArray(report.claims) ? (report.claims as AnyObj[]) : [];
2471
+ const reportClaim = reportClaims.find((c: AnyObj) => c.id === claimId);
2472
+
2473
+ if (!reportClaim) {
2474
+ return {
2475
+ found: false,
2476
+ status: "unknown",
2477
+ value: "",
2478
+ claimType: "",
2479
+ evidence: [],
2480
+ policy: null,
2481
+ why: { directInputs: [], leafClaims: [], diagnostics: [], transparencyGaps: [], changeRecords: [] },
2482
+ };
2483
+ }
2484
+
2485
+ const bundleClaims = Array.isArray(bundle.claims) ? (bundle.claims as AnyObj[]) : [];
2486
+ const bundleClaim = bundleClaims.find((c: AnyObj) => c.id === claimId) ?? reportClaim;
2487
+ const bundlePolicies = Array.isArray(bundle.policies) ? (bundle.policies as AnyObj[]) : [];
2488
+ const bundleEvidence = Array.isArray(bundle.evidence) ? (bundle.evidence as AnyObj[]) : [];
2489
+
2490
+ // Governing policy — follow verificationPolicyId into bundle.policies[]
2491
+ const verificationPolicyId = typeof bundleClaim.verificationPolicyId === "string" ? bundleClaim.verificationPolicyId : undefined;
2492
+ const rawPolicy = verificationPolicyId ? bundlePolicies.find((p: AnyObj) => p.id === verificationPolicyId) : undefined;
2493
+ const policy = rawPolicy
2494
+ ? {
2495
+ id: String(rawPolicy.id ?? ""),
2496
+ requiredEvidence: Array.isArray(rawPolicy.requiredEvidence) ? (rawPolicy.requiredEvidence as string[]) : [],
2497
+ requiredMethods: Array.isArray(rawPolicy.requiredMethods) ? (rawPolicy.requiredMethods as string[]) : undefined,
2498
+ acceptanceCriteria: Array.isArray(rawPolicy.acceptanceCriteria) ? (rawPolicy.acceptanceCriteria as string[]) : [],
2499
+ reviewAuthority: String(rawPolicy.reviewAuthority ?? ""),
2500
+ }
2501
+ : null;
2502
+
2503
+ // Evidence enhancement: pull evidence items for this claim, surface the execution block
2504
+ const claimEvidenceItems = bundleEvidence.filter((ev: AnyObj) => ev && ev.claimId === claimId);
2505
+ const evidence: ClaimEvidenceItem[] = claimEvidenceItems.map((ev: AnyObj) => {
2506
+ const exec = ev.execution && typeof ev.execution === "object" ? (ev.execution as AnyObj) : null;
2507
+ const execution = exec
2508
+ ? {
2509
+ runner: String(exec.runner ?? exec.label ?? ""),
2510
+ label: String(exec.label ?? exec.runner ?? ""),
2511
+ isError: Boolean(exec.isError ?? (typeof exec.exitCode === "number" && exec.exitCode !== 0)),
2512
+ exitCode: typeof exec.exitCode === "number" ? exec.exitCode : null,
2513
+ }
2514
+ : null;
2515
+ return {
2516
+ evidenceType: String(ev.evidenceType ?? ev.type ?? "unknown"),
2517
+ label: String(ev.label ?? ev.excerptOrSummary ?? ev.sourceRef ?? ev.id ?? ""),
2518
+ execution,
2519
+ passing: execution ? !execution.isError : String(ev.status ?? "") !== "disputed",
2520
+ summary: String(ev.excerptOrSummary ?? ev.summary ?? ev.label ?? ""),
2521
+ };
2522
+ });
2523
+
2524
+ // Drilldown: extract from report structure (report.transparencyGaps, report.changeRecords)
2525
+ const allGaps = Array.isArray(report.transparencyGaps) ? (report.transparencyGaps as AnyObj[]) : [];
2526
+ const allChanges = Array.isArray(report.changeRecords) ? (report.changeRecords as AnyObj[]) : [];
2527
+ const transparencyGaps = allGaps.filter((g: AnyObj) => g && g.claimId === claimId);
2528
+ const changeRecords = allChanges.filter((c: AnyObj) => c && c.claimId === claimId);
2529
+
2530
+ return {
2531
+ found: true,
2532
+ status: String(reportClaim.status ?? "unknown"),
2533
+ value: String(bundleClaim.value ?? reportClaim.value ?? ""),
2534
+ claimType: String(bundleClaim.claimType ?? reportClaim.claimType ?? ""),
2535
+ evidence,
2536
+ policy,
2537
+ why: {
2538
+ directInputs: [], // populated by buildDerivationDrilldown if non-leaf
2539
+ leafClaims: [],
2540
+ diagnostics: [],
2541
+ transparencyGaps,
2542
+ changeRecords,
2543
+ },
2544
+ };
2545
+ }
2546
+
2547
+ /**
2548
+ * claim <id> <dir>
2549
+ *
2550
+ * Look up a specific claim in the session's trust.bundle and print:
2551
+ * - Derived status and raw value
2552
+ * - Failing evidence items (with execution block: runner, exitCode, isError)
2553
+ * - Governing VerificationPolicy (how-to-verify)
2554
+ * - Derivation drilldown / transparency gaps (why it is in that state)
2555
+ *
2556
+ * --json Emit the structured ClaimExplanation object instead of text.
2557
+ *
2558
+ * Usage: workflow-sidecar claim <claimId> <artifactDir>
2559
+ */
2560
+ async function claimLookup(p: ReturnType<typeof parseArgs>): Promise<number> {
2561
+ const claimId = p.positional[0] || die("claim id is required (first positional argument)");
2562
+ const rawDir = p.positional[1] || die("artifact directory is required (second positional argument)");
2563
+ const dir = path.resolve(rawDir);
2564
+
2565
+ const bundlePath = path.join(dir, "trust.bundle");
2566
+ if (!fs.existsSync(bundlePath)) {
2567
+ process.stderr.write(`[claim] no trust.bundle at ${bundlePath} — run record-evidence first
2568
+ `);
2569
+ return 1;
2570
+ }
2571
+
2572
+ const bundle: BundleFile = JSON.parse(fs.readFileSync(bundlePath, "utf8"));
2573
+ const bundleClaims = Array.isArray(bundle.claims) ? bundle.claims : [];
2574
+
2575
+ const bundleClaim = bundleClaims.find((c) => c.id === claimId);
2576
+ if (!bundleClaim) {
2577
+ const available = bundleClaims.map((c) => c.id).join("\n ");
2578
+ process.stderr.write(`[claim] unknown claim id: ${claimId}
2579
+ Available claim ids:
2580
+ ${available || "(none — bundle has no claims)"}
2581
+ `);
2582
+ return 1;
2583
+ }
2584
+
2585
+ // Load Surface via tryLoadSurface() (ESM, cached, fail-open pattern)
2586
+ const surface = await tryLoadSurface();
2587
+ if (!surface || typeof surface.buildTrustReport !== "function" || typeof surface.buildDerivationDrilldown !== "function") {
2588
+ process.stderr.write(`[claim] @kontourai/surface unavailable or missing buildTrustReport/buildDerivationDrilldown
2589
+ `);
2590
+ return 0; // fail-open, consistent with gate-review pattern
2591
+ }
2592
+
2593
+ // Build TrustReport (required — buildDerivationDrilldown needs TrustReport, not TrustBundle)
2594
+ const report = surface.buildTrustReport(bundle as unknown as Record<string, unknown>);
2595
+
2596
+ // Build the structured explanation (pure, promotable to #171)
2597
+ const explanation = buildClaimExplanation(report, bundle as unknown as Record<string, unknown>, claimId);
2598
+
2599
+ // Enrich the why.directInputs/leafClaims/diagnostics from the drilldown
2600
+ try {
2601
+ const drilldown = surface.buildDerivationDrilldown(report, claimId) as AnyObj;
2602
+ if (drilldown) {
2603
+ explanation.why.directInputs = Array.isArray(drilldown.directInputs) ? drilldown.directInputs : [];
2604
+ explanation.why.leafClaims = Array.isArray(drilldown.leafClaims) ? drilldown.leafClaims : [];
2605
+ explanation.why.diagnostics = Array.isArray(drilldown.diagnostics) ? drilldown.diagnostics : [];
2606
+ }
2607
+ } catch {
2608
+ // buildDerivationDrilldown threw (e.g. claim not in report) — proceed without drilldown
2609
+ }
2610
+
2611
+ if (p.flags.has("json")) {
2612
+ console.log(JSON.stringify(explanation, null, 2));
2613
+ return 0;
2614
+ }
2615
+
2616
+ // ── Human-readable output ───────────────────────────────────────────────────
2617
+ const lines: string[] = [];
2618
+ lines.push(`Claim: ${claimId}`);
2619
+ lines.push(`Status: ${explanation.status} Value: ${explanation.value}`);
2620
+ lines.push(`Type: ${explanation.claimType}`);
2621
+ lines.push("");
2622
+
2623
+ // Evidence section — failing items are the concrete "why disputed"
2624
+ const failingEvidence = explanation.evidence.filter((ev) => !ev.passing);
2625
+ const allEvidence = explanation.evidence;
2626
+ if (allEvidence.length > 0) {
2627
+ lines.push("Evidence:");
2628
+ for (const ev of allEvidence) {
2629
+ const passMark = ev.passing ? "pass" : "FAIL";
2630
+ const execStr = ev.execution
2631
+ ? ` [runner: ${ev.execution.runner}, exitCode: ${ev.execution.exitCode ?? "?"}, isError: ${ev.execution.isError}]`
2632
+ : "";
2633
+ lines.push(` [${passMark}] ${ev.evidenceType}: ${ev.label || ev.summary}${execStr}`);
2634
+ }
2635
+ if (failingEvidence.length > 0) {
2636
+ lines.push("");
2637
+ lines.push(`Failing evidence (disputed because):`);
2638
+ for (const ev of failingEvidence) {
2639
+ const execStr = ev.execution
2640
+ ? ` ${ev.execution.runner} exited ${ev.execution.exitCode ?? "?"} (isError: ${ev.execution.isError})`
2641
+ : "";
2642
+ lines.push(` ${ev.evidenceType}: ${ev.label || ev.summary}${execStr}`);
2643
+ }
2644
+ }
2645
+ } else {
2646
+ lines.push("Evidence: (none recorded for this claim)");
2647
+ }
2648
+ lines.push("");
2649
+
2650
+ // Policy section — how-to-verify
2651
+ if (explanation.policy) {
2652
+ const pol = explanation.policy;
2653
+ lines.push(`Governing Policy (${pol.id}):`);
2654
+ lines.push(` requiredEvidence: [${pol.requiredEvidence.join(", ")}]`);
2655
+ if (pol.requiredMethods && pol.requiredMethods.length > 0) {
2656
+ lines.push(` requiredMethods: [${pol.requiredMethods.join(", ")}]`);
2657
+ }
2658
+ lines.push(` acceptanceCriteria: [${pol.acceptanceCriteria.join(" | ")}]`);
2659
+ lines.push(` reviewAuthority: ${pol.reviewAuthority}`);
2660
+ } else {
2661
+ lines.push("Governing Policy: (none — claim has no verificationPolicyId or policy not found in bundle)");
2662
+ }
2663
+ lines.push("");
2664
+
2665
+ // Why section — derivation drilldown + transparency gaps
2666
+ lines.push("Derivation Drilldown:");
2667
+ if (explanation.why.directInputs.length > 0) {
2668
+ lines.push(` Direct inputs: ${explanation.why.directInputs.length} claim(s)`);
2669
+ for (const inp of explanation.why.directInputs) {
2670
+ const inpStatus = typeof inp.claim === "object" && inp.claim ? String((inp.claim as AnyObj).status ?? "?") : "?";
2671
+ lines.push(` - ${inp.inputClaimId ?? "?"} (status: ${inpStatus})`);
2672
+ }
2673
+ } else {
2674
+ lines.push(" Direct inputs: (none — leaf claim)");
2675
+ }
2676
+ if (explanation.why.leafClaims.length > 0) {
2677
+ lines.push(` Leaf claims: ${explanation.why.leafClaims.length} claim(s)`);
2678
+ }
2679
+ if (explanation.why.diagnostics.length > 0) {
2680
+ lines.push(` Diagnostics: ${explanation.why.diagnostics.length}`);
2681
+ for (const d of explanation.why.diagnostics) {
2682
+ lines.push(` - ${d.type ?? "?"}: ${d.message ?? ""}`);
2683
+ }
2684
+ }
2685
+ if (explanation.why.transparencyGaps.length > 0) {
2686
+ lines.push(` Transparency gaps: ${explanation.why.transparencyGaps.length}`);
2687
+ for (const g of explanation.why.transparencyGaps) {
2688
+ lines.push(` - [${g.severity ?? "?"}] ${g.type ?? "?"}: ${g.message ?? ""}`);
2689
+ }
2690
+ } else {
2691
+ lines.push(" Transparency gaps: (none)");
2692
+ }
2693
+ if (explanation.why.changeRecords.length > 0) {
2694
+ lines.push(` Change records: ${explanation.why.changeRecords.length}`);
2695
+ for (const cr of explanation.why.changeRecords) {
2696
+ lines.push(` - ${cr.action ?? "?"} at ${cr.at ?? cr.createdAt ?? "?"}`);
2697
+ }
2698
+ }
2699
+
2700
+ console.log(lines.join("\n"));
2701
+ return 0;
2702
+ }
2703
+ // ─────────────────────────────────────────────────────────────────────────────
2704
+
2705
+
727
2706
  async function main(): Promise<number> {
728
2707
  const p = parseArgs(process.argv.slice(2));
729
2708
  if (!p.command) die("workflow-sidecar command is required");
730
- const lockRoot = ["ensure-session", "current", "dogfood-pass"].includes(p.command) ? path.resolve(opt(p, "artifact-root", ".flow-agents")) : p.command === "record-agent-event" ? explicitArtifactRoot(p) : p.positional[0] ? artifactDirFrom(p.positional[0]) : "";
2709
+ const lockRoot = ["ensure-session", "current", "dogfood-pass", "liveness"].includes(p.command) ? path.resolve(opt(p, "artifact-root", ".flow-agents")) : p.command === "record-agent-event" ? explicitArtifactRoot(p) : p.command === "claim" ? (p.positional[1] ? path.resolve(p.positional[1]) : "") : p.positional[0] ? artifactDirFrom(p.positional[0]) : "";
731
2710
  return withLock(lockRoot, ["ensure-session", "record-agent-event", "dogfood-pass"].includes(p.command), p.command, () => {
732
2711
  switch (p.command) {
733
2712
  case "ensure-session": return ensureSession(p);
@@ -735,12 +2714,20 @@ async function main(): Promise<number> {
735
2714
  case "record-agent-event": return recordAgentEvent(p);
736
2715
  case "init-plan": return initPlan(p);
737
2716
  case "record-evidence": return recordEvidence(p);
2717
+ case "record-gate-claim": return recordGateClaim(p);
738
2718
  case "advance-state": return advanceState(p);
739
2719
  case "record-critique": return recordCritique(p);
740
2720
  case "import-critique": return importCritique(p);
741
2721
  case "record-release": return recordRelease(p);
742
2722
  case "record-learning": return recordLearning(p);
743
2723
  case "dogfood-pass": return dogfoodPass(p);
2724
+ case "gate-review": return gateReview(p);
2725
+ case "render-trust-panel": return renderTrustPanel(p);
2726
+ case "trust-mcp": return trustMcp(p);
2727
+ case "liveness": return liveness(p);
2728
+ case "claim": return claimLookup(p);
2729
+ case "seal-checkpoint": return sealCheckpoint(p);
2730
+ case "publish-delivery": return publishDeliveryCmd(p);
744
2731
  default: die(`unknown command: ${p.command}`);
745
2732
  }
746
2733
  });