@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,325 @@
1
+ #!/usr/bin/env bash
2
+ # test_trust_checkpoint.sh — Integration eval for Increment A: per-run trust CHECKPOINT.
3
+ #
4
+ # Proves that:
5
+ # 1. SEAL-AT-COMPLETE: running record-release (which sets status=delivered) with a
6
+ # trust.bundle present writes trust.checkpoint.json with the correct envelope shape:
7
+ # - schema_version, slug, status=delivered, phase=release, sealed_at, commit_sha
8
+ # - checkpoint.statusByClaimId, checkpoint.statusFunctionVersion, checkpoint.throughEventCreatedAt
9
+ # 2. ADVANCE-STATE-DELIVERED: advance-state --status delivered also writes
10
+ # trust.checkpoint.json (alternative delivered path).
11
+ # 3. SEAL-CHECKPOINT-SUBCOMMAND: seal-checkpoint <dir> explicit subcommand writes the
12
+ # checkpoint and outputs the path to stdout.
13
+ # 4. DIFF-ON-DRIFT: after sealing, mutating the bundle (expiresAt in the past) causes
14
+ # render-trust-panel to emit a "went stale" message via diffFreshness.
15
+ # 5. NO-BUNDLE-SKIP: when no trust.bundle exists, seal-checkpoint exits 0 (graceful skip).
16
+ # 6. ADDITIVE/NO-REGRESSION: existing commands record-evidence, record-critique,
17
+ # advance-state to non-delivered statuses, record-learning all continue to work.
18
+ #
19
+ # Deterministic, no model spend, self-cleaning.
20
+ # Usage: bash evals/integration/test_trust_checkpoint.sh
21
+
22
+ set -uo pipefail
23
+
24
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
25
+ source "$ROOT/evals/lib/node.sh"
26
+
27
+ WRITER="workflow-sidecar"
28
+ TMP="$(mktemp -d)"
29
+ errors=0
30
+
31
+ _pass() { echo " ✓ $1"; }
32
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
33
+
34
+ cleanup() { rm -rf "$TMP"; }
35
+ trap cleanup EXIT
36
+
37
+ echo ""
38
+ echo "=== TEST 1: Seal-at-complete — record-release writes trust.checkpoint.json ==="
39
+
40
+ AROOT1="$TMP/test1/.flow-agents"
41
+ SLUG1="ckpt-release-test"
42
+ SESSION_DIR1="$AROOT1/$SLUG1"
43
+ mkdir -p "$AROOT1"
44
+
45
+ flow_agents_node "$WRITER" ensure-session \
46
+ --artifact-root "$AROOT1" \
47
+ --task-slug "$SLUG1" \
48
+ --title "Checkpoint Release Test" \
49
+ --summary "Test that record-release seals trust.checkpoint.json." \
50
+ --criterion "Evidence recorded" \
51
+ --timestamp "2026-06-26T10:00:00Z" >/dev/null 2>&1
52
+
53
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR1/${SLUG1}--deliver.md" \
54
+ --source-request "Test" --summary "Test" \
55
+ --timestamp "2026-06-26T10:01:00Z" >/dev/null 2>&1
56
+
57
+ flow_agents_node "$WRITER" record-evidence "$SESSION_DIR1" \
58
+ --verdict pass \
59
+ --check-json '{"id":"build","kind":"build","status":"pass","summary":"build passed"}' \
60
+ --check-json '{"id":"types","kind":"types","status":"pass","summary":"types ok"}' \
61
+ --timestamp "2026-06-26T10:02:00Z" >/dev/null 2>&1
62
+
63
+ flow_agents_node "$WRITER" record-critique "$SESSION_DIR1" \
64
+ --verdict pass \
65
+ --summary "Review passed." \
66
+ --timestamp "2026-06-26T10:03:00Z" >/dev/null 2>&1
67
+
68
+ flow_agents_node "$WRITER" record-release "$SESSION_DIR1" \
69
+ --decision merge \
70
+ --gate-json '{"name":"merge","status":"pass","summary":"Ready to merge."}' \
71
+ --summary "Release recorded." \
72
+ --timestamp "2026-06-26T10:04:00Z" >/dev/null 2>&1
73
+
74
+ if [[ -f "$SESSION_DIR1/trust.checkpoint.json" ]]; then
75
+ _pass "record-release writes trust.checkpoint.json"
76
+ else
77
+ _fail "record-release did NOT write trust.checkpoint.json"
78
+ fi
79
+
80
+ # Validate envelope shape
81
+ node - "$SESSION_DIR1/trust.checkpoint.json" <<'NODE'
82
+ const fs = require("fs");
83
+ const env = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
84
+
85
+ const errors = [];
86
+ if (env.schema_version !== "1.0") errors.push("schema_version expected '1.0', got " + env.schema_version);
87
+ if (typeof env.slug !== "string" || !env.slug) errors.push("slug missing");
88
+ if (env.status !== "delivered") errors.push("status expected 'delivered', got " + env.status);
89
+ if (env.phase !== "release") errors.push("phase expected 'release', got " + env.phase);
90
+ if (typeof env.sealed_at !== "string" || !env.sealed_at) errors.push("sealed_at missing");
91
+ // commit_sha can be null if not in a git repo, but must be present as key
92
+ if (!Object.prototype.hasOwnProperty.call(env, "commit_sha")) errors.push("commit_sha key absent");
93
+ if (!env.checkpoint || typeof env.checkpoint !== "object") errors.push("checkpoint missing or not object");
94
+ const ckpt = env.checkpoint;
95
+ if (!ckpt.statusByClaimId || typeof ckpt.statusByClaimId !== "object") errors.push("checkpoint.statusByClaimId missing");
96
+ if (typeof ckpt.statusFunctionVersion !== "string") errors.push("checkpoint.statusFunctionVersion missing");
97
+ if (!Object.prototype.hasOwnProperty.call(ckpt, "throughEventCreatedAt")) errors.push("checkpoint.throughEventCreatedAt missing");
98
+
99
+ const claimCount = Object.keys(ckpt.statusByClaimId || {}).length;
100
+ if (claimCount === 0) errors.push("checkpoint.statusByClaimId is empty — expected at least 1 claim");
101
+
102
+ if (errors.length > 0) {
103
+ console.error("ENVELOPE SHAPE ERRORS:\n" + errors.join("\n"));
104
+ process.exit(1);
105
+ }
106
+ console.log("envelope valid: schema_version=" + env.schema_version + " status=" + env.status + " claims=" + claimCount + " sfv=" + ckpt.statusFunctionVersion);
107
+ NODE
108
+ if [[ $? -eq 0 ]]; then
109
+ _pass "trust.checkpoint.json envelope shape is valid (schema_version, slug, status, phase, sealed_at, commit_sha, checkpoint.*)"
110
+ else
111
+ _fail "trust.checkpoint.json envelope shape invalid"
112
+ fi
113
+
114
+ echo ""
115
+ echo "=== TEST 2: Seal via advance-state --status delivered ==="
116
+
117
+ AROOT2="$TMP/test2/.flow-agents"
118
+ SLUG2="ckpt-advance-test"
119
+ SESSION_DIR2="$AROOT2/$SLUG2"
120
+ mkdir -p "$AROOT2"
121
+
122
+ flow_agents_node "$WRITER" ensure-session \
123
+ --artifact-root "$AROOT2" \
124
+ --task-slug "$SLUG2" \
125
+ --title "Checkpoint Advance Test" \
126
+ --summary "Test that advance-state --status delivered seals trust.checkpoint.json." \
127
+ --timestamp "2026-06-26T11:00:00Z" >/dev/null 2>&1
128
+
129
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR2/${SLUG2}--deliver.md" \
130
+ --source-request "Test" --summary "Test" \
131
+ --timestamp "2026-06-26T11:01:00Z" >/dev/null 2>&1
132
+
133
+ flow_agents_node "$WRITER" record-evidence "$SESSION_DIR2" \
134
+ --verdict pass \
135
+ --check-json '{"id":"build","kind":"build","status":"pass","summary":"build passed"}' \
136
+ --timestamp "2026-06-26T11:02:00Z" >/dev/null 2>&1
137
+
138
+ if [[ -f "$SESSION_DIR2/trust.checkpoint.json" ]]; then
139
+ _fail "trust.checkpoint.json should NOT exist before advance-state delivered"
140
+ else
141
+ _pass "trust.checkpoint.json absent before advance-state --status delivered (correct)"
142
+ fi
143
+
144
+ flow_agents_node "$WRITER" advance-state "$SESSION_DIR2" \
145
+ --status delivered \
146
+ --phase release \
147
+ --summary "Delivered via advance-state." \
148
+ --timestamp "2026-06-26T11:03:00Z" >/dev/null 2>&1
149
+
150
+ if [[ -f "$SESSION_DIR2/trust.checkpoint.json" ]]; then
151
+ _pass "advance-state --status delivered writes trust.checkpoint.json"
152
+ else
153
+ _fail "advance-state --status delivered did NOT write trust.checkpoint.json"
154
+ fi
155
+
156
+ node - "$SESSION_DIR2/trust.checkpoint.json" <<'NODE'
157
+ const fs = require("fs");
158
+ const env = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
159
+ if (env.status !== "delivered") { console.error("expected status=delivered, got " + env.status); process.exit(1); }
160
+ if (!env.checkpoint || !env.checkpoint.statusByClaimId) { console.error("missing checkpoint.statusByClaimId"); process.exit(1); }
161
+ console.log("advance-state checkpoint: status=" + env.status + " sealed_at=" + env.sealed_at);
162
+ NODE
163
+ if [[ $? -eq 0 ]]; then
164
+ _pass "advance-state delivered checkpoint has correct status and checkpoint fields"
165
+ else
166
+ _fail "advance-state delivered checkpoint shape invalid"
167
+ fi
168
+
169
+ echo ""
170
+ echo "=== TEST 3: seal-checkpoint explicit subcommand ==="
171
+
172
+ AROOT3="$TMP/test3/.flow-agents"
173
+ SLUG3="ckpt-explicit-test"
174
+ SESSION_DIR3="$AROOT3/$SLUG3"
175
+ mkdir -p "$AROOT3"
176
+
177
+ flow_agents_node "$WRITER" ensure-session \
178
+ --artifact-root "$AROOT3" \
179
+ --task-slug "$SLUG3" \
180
+ --title "Checkpoint Explicit Test" \
181
+ --summary "Test seal-checkpoint subcommand." \
182
+ --timestamp "2026-06-26T12:00:00Z" >/dev/null 2>&1
183
+
184
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR3/${SLUG3}--deliver.md" \
185
+ --source-request "Test" --summary "Test" \
186
+ --timestamp "2026-06-26T12:01:00Z" >/dev/null 2>&1
187
+
188
+ flow_agents_node "$WRITER" record-evidence "$SESSION_DIR3" \
189
+ --verdict pass \
190
+ --check-json '{"id":"build","kind":"build","status":"pass","summary":"build passed"}' \
191
+ --timestamp "2026-06-26T12:02:00Z" >/dev/null 2>&1
192
+
193
+ SEAL_OUT="$TMP/seal-out.txt"
194
+ flow_agents_node "$WRITER" seal-checkpoint "$SESSION_DIR3" \
195
+ --timestamp "2026-06-26T12:03:00Z" > "$SEAL_OUT" 2>/dev/null
196
+
197
+ if [[ -f "$SESSION_DIR3/trust.checkpoint.json" ]]; then
198
+ _pass "seal-checkpoint subcommand writes trust.checkpoint.json"
199
+ else
200
+ _fail "seal-checkpoint subcommand did NOT write trust.checkpoint.json"
201
+ fi
202
+
203
+ if grep -q "trust.checkpoint.json" "$SEAL_OUT"; then
204
+ _pass "seal-checkpoint subcommand outputs the checkpoint file path to stdout"
205
+ else
206
+ _fail "seal-checkpoint subcommand did not output file path (got: $(cat "$SEAL_OUT"))"
207
+ fi
208
+
209
+ echo ""
210
+ echo "=== TEST 4: diff-on-drift — stale claim reported on resume ==="
211
+
212
+ # Reuse SESSION_DIR1 which has a sealed checkpoint
213
+ # Mutate the bundle: set expiresAt in the past on the first claim
214
+ node - "$SESSION_DIR1/trust.bundle" <<'NODE'
215
+ const fs = require("fs");
216
+ const bundle = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
217
+ const firstClaim = bundle.claims[0];
218
+ if (!firstClaim) { console.error("No claims in bundle"); process.exit(1); }
219
+ firstClaim.expiresAt = "2020-01-01T00:00:00Z";
220
+ fs.writeFileSync(process.argv[2], JSON.stringify(bundle, null, 2));
221
+ console.log("Mutated claim: " + firstClaim.id + " expiresAt set to past");
222
+ NODE
223
+
224
+ # Run render-trust-panel and capture stderr for the freshness diff
225
+ DIFF_STDERR="$TMP/diff-stderr.txt"
226
+ flow_agents_node "$WRITER" render-trust-panel "$SESSION_DIR1" 2>"$DIFF_STDERR" >/dev/null
227
+
228
+ if grep -q "trust-checkpoint" "$DIFF_STDERR" && grep -q "stale" "$DIFF_STDERR"; then
229
+ _pass "render-trust-panel reports stale claim(s) via diffFreshness after bundle mutation"
230
+ else
231
+ _fail "render-trust-panel did NOT report stale transitions (got: $(cat "$DIFF_STDERR"))"
232
+ fi
233
+
234
+ # Also check the count is non-zero
235
+ if grep -qE "\[trust-checkpoint\] [1-9][0-9]* claim" "$DIFF_STDERR"; then
236
+ _pass "diffFreshness reports at least 1 fresh→stale transition"
237
+ else
238
+ _fail "diffFreshness did not report expected number of stale transitions"
239
+ fi
240
+
241
+ echo ""
242
+ echo "=== TEST 5: No-bundle graceful skip ==="
243
+
244
+ NOBUNDLE_DIR="$TMP/nobundle"
245
+ mkdir -p "$NOBUNDLE_DIR"
246
+ printf '{"schema_version":"1.0","task_slug":"no-bundle","status":"planning","phase":"execution","updated_at":"2026-06-26T10:00:00Z","next_action":{"status":"continue","summary":"test"}}' \
247
+ > "$NOBUNDLE_DIR/state.json"
248
+
249
+ SEAL_ERR="$TMP/seal-no-bundle-err.txt"
250
+ if flow_agents_node "$WRITER" seal-checkpoint "$NOBUNDLE_DIR" \
251
+ --timestamp "2026-06-26T10:00:00Z" > /dev/null 2>"$SEAL_ERR"; then
252
+ _pass "seal-checkpoint exits 0 when no trust.bundle present (graceful skip)"
253
+ else
254
+ _fail "seal-checkpoint exited non-zero when no trust.bundle present"
255
+ fi
256
+
257
+ if [[ -f "$NOBUNDLE_DIR/trust.checkpoint.json" ]]; then
258
+ _fail "seal-checkpoint should NOT write trust.checkpoint.json when no trust.bundle"
259
+ else
260
+ _pass "seal-checkpoint does NOT write trust.checkpoint.json when no trust.bundle"
261
+ fi
262
+
263
+ echo ""
264
+ echo "=== TEST 6: Additive — non-delivered advance-state does NOT write checkpoint ==="
265
+
266
+ AROOT6="$TMP/test6/.flow-agents"
267
+ SLUG6="ckpt-additive-test"
268
+ SESSION_DIR6="$AROOT6/$SLUG6"
269
+ mkdir -p "$AROOT6"
270
+
271
+ flow_agents_node "$WRITER" ensure-session \
272
+ --artifact-root "$AROOT6" \
273
+ --task-slug "$SLUG6" \
274
+ --title "Additive Test" \
275
+ --summary "Test that non-delivered advance-state does not write checkpoint." \
276
+ --timestamp "2026-06-26T13:00:00Z" >/dev/null 2>&1
277
+
278
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR6/${SLUG6}--deliver.md" \
279
+ --source-request "Test" --summary "Test" \
280
+ --timestamp "2026-06-26T13:01:00Z" >/dev/null 2>&1
281
+
282
+ flow_agents_node "$WRITER" record-evidence "$SESSION_DIR6" \
283
+ --verdict pass \
284
+ --check-json '{"id":"build","kind":"build","status":"pass","summary":"build passed"}' \
285
+ --timestamp "2026-06-26T13:02:00Z" >/dev/null 2>&1
286
+
287
+ # Advance to verified (non-delivered)
288
+ flow_agents_node "$WRITER" advance-state "$SESSION_DIR6" \
289
+ --status verified \
290
+ --phase verification \
291
+ --summary "Verified." \
292
+ --timestamp "2026-06-26T13:03:00Z" >/dev/null 2>&1
293
+
294
+ if [[ -f "$SESSION_DIR6/trust.checkpoint.json" ]]; then
295
+ _fail "advance-state to verified should NOT write trust.checkpoint.json"
296
+ else
297
+ _pass "advance-state to non-delivered (verified) does NOT write trust.checkpoint.json (additive)"
298
+ fi
299
+
300
+ echo ""
301
+ echo "=== TEST 7: Idempotent — re-sealing overwrites with latest snapshot ==="
302
+
303
+ # Re-run seal-checkpoint on SESSION_DIR1 (already sealed)
304
+ FIRST_SEALED_AT=$(node -e "const fs=require('fs'); const e=JSON.parse(fs.readFileSync('$SESSION_DIR1/trust.checkpoint.json','utf8')); console.log(e.sealed_at);")
305
+
306
+ flow_agents_node "$WRITER" seal-checkpoint "$SESSION_DIR1" \
307
+ --timestamp "2026-06-26T16:00:00Z" >/dev/null 2>&1
308
+
309
+ SECOND_SEALED_AT=$(node -e "const fs=require('fs'); const e=JSON.parse(fs.readFileSync('$SESSION_DIR1/trust.checkpoint.json','utf8')); console.log(e.sealed_at);")
310
+
311
+ if [[ "$FIRST_SEALED_AT" != "$SECOND_SEALED_AT" ]]; then
312
+ _pass "seal-checkpoint is idempotent — re-running overwrites with latest sealed_at ($SECOND_SEALED_AT)"
313
+ else
314
+ _fail "seal-checkpoint idempotent re-run: sealed_at did not update (still $FIRST_SEALED_AT)"
315
+ fi
316
+
317
+ echo ""
318
+ echo "────────────────────────────────────────────"
319
+ if [[ $errors -eq 0 ]]; then
320
+ echo "test_trust_checkpoint: all checks passed."
321
+ exit 0
322
+ else
323
+ echo "test_trust_checkpoint: $errors check(s) failed."
324
+ exit 1
325
+ fi
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env bash
2
+ # test_trust_reconcile.sh — Integration eval for the CI trust anchor (Phase 1).
3
+ #
4
+ # Proves that scripts/ci/trust-reconcile.js correctly:
5
+ # 1. DIVERGENCE-CAUGHT: bundle claims a command passed; CI re-runs it and it FAILS.
6
+ # Exit 1 with "trust divergence" message naming the command.
7
+ # 2. MATCHING-PASSES: bundle claims a command passed; CI re-runs it and it PASSES.
8
+ # Exit 0 (no divergence).
9
+ # 3. NO-CHECKPOINT: no bundle present; canonical verify passes.
10
+ # Exit 0 (fail-open on bundle absence, enforce fresh verify only).
11
+ # 4. LAUNDERING-CAUGHT: bundle claims "something || true" passed.
12
+ # Exit 1 with laundering message (checked before "CI never ran" check).
13
+ # YAML-VALID: .github/workflows/trust-reconcile.yml parses as valid YAML.
14
+ #
15
+ # Also validates the workflow YAML parses (yamllint or python3 yaml or structural check).
16
+ #
17
+ # Deterministic, no model spend, self-cleaning.
18
+ # Usage: bash evals/integration/test_trust_reconcile.sh
19
+
20
+ set -uo pipefail
21
+
22
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
23
+ RECONCILE="$ROOT/scripts/ci/trust-reconcile.js"
24
+
25
+ TMP="$(mktemp -d)"
26
+ errors=0
27
+
28
+ _pass() { echo " PASS: $1"; }
29
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
30
+
31
+ cleanup() { rm -rf "$TMP"; }
32
+ trap cleanup EXIT
33
+
34
+ # ─── Fixture bundle builder ───────────────────────────────────────────────────
35
+ # Writes a minimal trust.bundle fixture to a path.
36
+ # Usage: write_bundle <path> <label> <passing>
37
+ # <label> — the execution.label command string in the evidence item
38
+ # <passing> — true or false
39
+ write_bundle() {
40
+ local bundle_path="$1"
41
+ local label="$2"
42
+ local passing="$3"
43
+
44
+ node - "$bundle_path" "$label" "$passing" << 'NODE'
45
+ const fs = require('fs');
46
+ const [,, bundlePath, label, passingStr] = process.argv;
47
+ const passing = passingStr === 'true';
48
+ const bundle = {
49
+ schemaVersion: 3,
50
+ source: "test-fixture",
51
+ claims: [
52
+ {
53
+ id: "c1",
54
+ claimType: "workflow.check.build",
55
+ value: passing ? "pass" : "fail",
56
+ status: passing ? "verified" : "disputed",
57
+ subjectId: "test-slug/build",
58
+ surface: "flow-agents.workflow",
59
+ subjectType: "workflow-check",
60
+ fieldOrBehavior: "build",
61
+ createdAt: "2026-06-27T00:00:00Z",
62
+ updatedAt: "2026-06-27T00:00:00Z",
63
+ impactLevel: "high",
64
+ verificationPolicyId: "policy:workflow.check.build"
65
+ }
66
+ ],
67
+ evidence: [
68
+ {
69
+ id: "ev1",
70
+ claimId: "c1",
71
+ evidenceType: "test_output",
72
+ method: "validation",
73
+ sourceRef: "test-slug/command-log.jsonl",
74
+ excerptOrSummary: "build",
75
+ observedAt: "2026-06-27T00:00:00Z",
76
+ collectedBy: "flow-agents/evidence-capture",
77
+ passing: passing,
78
+ execution: {
79
+ runner: "bash",
80
+ label: label,
81
+ isError: !passing,
82
+ exitCode: passing ? 0 : 1
83
+ }
84
+ }
85
+ ],
86
+ policies: [],
87
+ events: []
88
+ };
89
+ fs.writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
90
+ NODE
91
+ }
92
+
93
+ # ─── TEST 1: DIVERGENCE-CAUGHT ────────────────────────────────────────────────
94
+ echo ""
95
+ echo "=== TEST 1: DIVERGENCE-CAUGHT — claimed pass, CI re-run FAILS ==="
96
+
97
+ BUNDLE1="$TMP/bundle-diverge.json"
98
+ write_bundle "$BUNDLE1" "node -e 'process.exit(1)'" "true"
99
+
100
+ # canonical command is "node -e 'process.exit(1)'" — it fails
101
+ # bundle claims that same command passed → divergence
102
+ out1=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(1)'" \
103
+ node "$RECONCILE" \
104
+ --bundle "$BUNDLE1" \
105
+ --repo-root "$TMP" 2>&1)
106
+ exit1=$?
107
+
108
+ if [[ $exit1 -ne 0 ]]; then
109
+ _pass "DIVERGENCE-CAUGHT: exits 1 (got $exit1)"
110
+ else
111
+ _fail "DIVERGENCE-CAUGHT: expected exit 1, got 0"
112
+ fi
113
+
114
+ if echo "$out1" | grep -q "trust divergence"; then
115
+ _pass "DIVERGENCE-CAUGHT: output contains 'trust divergence'"
116
+ else
117
+ _fail "DIVERGENCE-CAUGHT: expected 'trust divergence' in output, got: $out1"
118
+ fi
119
+
120
+ # Verify the divergent command name appears in the message
121
+ if echo "$out1" | grep -q "process.exit(1)"; then
122
+ _pass "DIVERGENCE-CAUGHT: output names the divergent command"
123
+ else
124
+ _fail "DIVERGENCE-CAUGHT: expected command name in output, got: $out1"
125
+ fi
126
+
127
+ # ─── TEST 2: MATCHING-PASSES ──────────────────────────────────────────────────
128
+ echo ""
129
+ echo "=== TEST 2: MATCHING-PASSES — claimed pass, CI re-run also PASSES ==="
130
+
131
+ BUNDLE2="$TMP/bundle-match.json"
132
+ write_bundle "$BUNDLE2" "node -e 'process.exit(0)'" "true"
133
+
134
+ out2=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(0)'" \
135
+ node "$RECONCILE" \
136
+ --bundle "$BUNDLE2" \
137
+ --repo-root "$TMP" 2>&1)
138
+ exit2=$?
139
+
140
+ if [[ $exit2 -eq 0 ]]; then
141
+ _pass "MATCHING-PASSES: exits 0"
142
+ else
143
+ _fail "MATCHING-PASSES: expected exit 0, got $exit2 — output: $out2"
144
+ fi
145
+
146
+ if echo "$out2" | grep -q "RECONCILED"; then
147
+ _pass "MATCHING-PASSES: output shows RECONCILED"
148
+ else
149
+ _fail "MATCHING-PASSES: expected 'RECONCILED' in output, got: $out2"
150
+ fi
151
+
152
+ # ─── TEST 3: NO-CHECKPOINT ────────────────────────────────────────────────────
153
+ echo ""
154
+ echo "=== TEST 3: NO-CHECKPOINT — no bundle present, fresh verify only ==="
155
+
156
+ out3=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(0)'" \
157
+ node "$RECONCILE" \
158
+ --repo-root "$TMP" 2>&1)
159
+ exit3=$?
160
+
161
+ if [[ $exit3 -eq 0 ]]; then
162
+ _pass "NO-CHECKPOINT: exits 0 (fresh verify passes, no bundle)"
163
+ else
164
+ _fail "NO-CHECKPOINT: expected exit 0, got $exit3 — output: $out3"
165
+ fi
166
+
167
+ if echo "$out3" | grep -q "fail-open"; then
168
+ _pass "NO-CHECKPOINT: output notes fail-open on bundle absence"
169
+ else
170
+ _fail "NO-CHECKPOINT: expected 'fail-open' in output, got: $out3"
171
+ fi
172
+
173
+ # Also verify: no-bundle + failing fresh verify still exits 1
174
+ out3b=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(1)'" \
175
+ node "$RECONCILE" \
176
+ --repo-root "$TMP" 2>&1)
177
+ exit3b=$?
178
+
179
+ if [[ $exit3b -ne 0 ]]; then
180
+ _pass "NO-CHECKPOINT: exits 1 when fresh verify fails (no bundle)"
181
+ else
182
+ _fail "NO-CHECKPOINT: expected exit 1 when fresh verify fails, got 0 — output: $out3b"
183
+ fi
184
+
185
+ if echo "$out3b" | grep -q "verification failed in CI"; then
186
+ _pass "NO-CHECKPOINT: 'verification failed in CI' message when fresh verify fails"
187
+ else
188
+ _fail "NO-CHECKPOINT: expected 'verification failed in CI' message, got: $out3b"
189
+ fi
190
+
191
+ # ─── TEST 4: LAUNDERING-CAUGHT ────────────────────────────────────────────────
192
+ echo ""
193
+ echo "=== TEST 4: LAUNDERING-CAUGHT — claimed pass for a laundered command ==="
194
+
195
+ # Bundle claims 'npm run build || true' passed.
196
+ # Canonical verify is a passing command (unrelated).
197
+ # Laundering check fires first — before "CI never ran" check.
198
+ BUNDLE4="$TMP/bundle-launder.json"
199
+ write_bundle "$BUNDLE4" "npm run build || true" "true"
200
+
201
+ out4=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(0)'" \
202
+ node "$RECONCILE" \
203
+ --bundle "$BUNDLE4" \
204
+ --repo-root "$TMP" 2>&1)
205
+ exit4=$?
206
+
207
+ if [[ $exit4 -ne 0 ]]; then
208
+ _pass "LAUNDERING-CAUGHT: exits 1"
209
+ else
210
+ _fail "LAUNDERING-CAUGHT: expected exit 1, got 0 — output: $out4"
211
+ fi
212
+
213
+ if echo "$out4" | grep -q "laundering"; then
214
+ _pass "LAUNDERING-CAUGHT: output contains 'laundering'"
215
+ else
216
+ _fail "LAUNDERING-CAUGHT: expected 'laundering' in output, got: $out4"
217
+ fi
218
+
219
+ if echo "$out4" | grep -q "trust divergence"; then
220
+ _pass "LAUNDERING-CAUGHT: output contains 'trust divergence'"
221
+ else
222
+ _fail "LAUNDERING-CAUGHT: expected 'trust divergence' in output, got: $out4"
223
+ fi
224
+
225
+ # ─── YAML-VALID ───────────────────────────────────────────────────────────────
226
+ echo ""
227
+ echo "=== YAML-VALID: .github/workflows/trust-reconcile.yml parses ==="
228
+
229
+ WORKFLOW_FILE="$ROOT/.github/workflows/trust-reconcile.yml"
230
+
231
+ if [[ ! -f "$WORKFLOW_FILE" ]]; then
232
+ _fail "YAML-VALID: workflow file not found at $WORKFLOW_FILE"
233
+ else
234
+ yaml_valid=0
235
+
236
+ # Try python3 yaml first (standard on macOS and Ubuntu)
237
+ if command -v python3 >/dev/null 2>&1 && python3 -c "import yaml" 2>/dev/null; then
238
+ if python3 - "$WORKFLOW_FILE" << 'PY' 2>/dev/null
239
+ import sys, yaml
240
+ try:
241
+ yaml.safe_load(open(sys.argv[1]).read())
242
+ sys.exit(0)
243
+ except yaml.YAMLError as e:
244
+ print("YAML error: " + str(e))
245
+ sys.exit(1)
246
+ PY
247
+ then
248
+ _pass "YAML-VALID: trust-reconcile.yml parses (python3 yaml)"
249
+ yaml_valid=1
250
+ else
251
+ _fail "YAML-VALID: trust-reconcile.yml failed python3 yaml parse"
252
+ yaml_valid=1 # tested, failed
253
+ fi
254
+ fi
255
+
256
+ # Fall back to yamllint if available and python3 not tried
257
+ if [[ $yaml_valid -eq 0 ]] && command -v yamllint >/dev/null 2>&1; then
258
+ if yamllint -d relaxed "$WORKFLOW_FILE" >/dev/null 2>&1; then
259
+ _pass "YAML-VALID: trust-reconcile.yml parses (yamllint)"
260
+ yaml_valid=1
261
+ else
262
+ _fail "YAML-VALID: trust-reconcile.yml failed yamllint"
263
+ yaml_valid=1
264
+ fi
265
+ fi
266
+
267
+ # Structural fallback: check key fields with grep
268
+ if [[ $yaml_valid -eq 0 ]]; then
269
+ structural_ok=1
270
+ grep -q "^name:" "$WORKFLOW_FILE" || structural_ok=0
271
+ grep -q "trust-reconcile" "$WORKFLOW_FILE" || structural_ok=0
272
+ grep -q "ubuntu-latest" "$WORKFLOW_FILE" || structural_ok=0
273
+ grep -q "node-version" "$WORKFLOW_FILE" || structural_ok=0
274
+ grep -q "npm ci" "$WORKFLOW_FILE" || structural_ok=0
275
+ grep -q "trust-reconcile.js" "$WORKFLOW_FILE" || structural_ok=0
276
+ if [[ $structural_ok -eq 1 ]]; then
277
+ _pass "YAML-VALID: trust-reconcile.yml has expected structure (yaml parser not available)"
278
+ else
279
+ _fail "YAML-VALID: trust-reconcile.yml missing expected structural fields"
280
+ fi
281
+ fi
282
+ fi
283
+
284
+ # ─── Summary ──────────────────────────────────────────────────────────────────
285
+ echo ""
286
+ echo "────────────────────────────────────────────"
287
+ if [[ $errors -eq 0 ]]; then
288
+ echo "test_trust_reconcile: all checks passed."
289
+ exit 0
290
+ else
291
+ echo "test_trust_reconcile: $errors check(s) failed."
292
+ exit 1
293
+ fi