@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,352 @@
1
+ #!/usr/bin/env bash
2
+ # test_claim_lookup.sh — Integration tests for the `claim` subcommand (#162).
3
+ #
4
+ # Verifies:
5
+ # AC1: status + value + failing evidence (with execution block) + policy + derivation drilldown
6
+ # AC1: --json flag emits structured ClaimExplanation object
7
+ # AC1: unknown claim id exits 1 with clear error listing available ids
8
+ # AC1: missing bundle exits 1 with clear error
9
+ # AC3: gate-hint in stop-goal-fit.js disputed warning contains workflow:sidecar -- claim
10
+ set -uo pipefail
11
+
12
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
13
+ source "$ROOT/evals/lib/node.sh"
14
+
15
+ TMPDIR_EVAL="$(mktemp -d)"
16
+ errors=0
17
+
18
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
19
+ trap cleanup EXIT
20
+
21
+ _pass() { echo " ✓ $1"; }
22
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
23
+
24
+ echo "=== Claim Lookup Tests (issue #162) ==="
25
+
26
+ # ── helpers ──────────────────────────────────────────────────────────────────
27
+
28
+ jq_node() {
29
+ local file="$1"; local expr="$2"
30
+ node -e "
31
+ const d=JSON.parse(require('fs').readFileSync('${file}','utf8'));
32
+ const r=(${expr})(d);
33
+ if(r===undefined||r===null){process.exit(2);}
34
+ if(typeof r==='boolean'||typeof r==='number'||typeof r==='string'){
35
+ process.stdout.write(String(r)+'\n');
36
+ }else{
37
+ process.stdout.write(JSON.stringify(r)+'\n');
38
+ }"
39
+ }
40
+
41
+ # Seed a trust.bundle with a DISPUTED claim including a failing execution block and a policy.
42
+ seed_disputed_bundle() {
43
+ local dir="$1" slug="$2"
44
+ local ts="2026-06-25T00:00:00Z"
45
+ local claimId="${slug}/unit-tests.flow-agents.workflow.unit tests pass"
46
+ mkdir -p "$dir"
47
+ cat > "$dir/trust.bundle" <<JSON
48
+ {
49
+ "schemaVersion": 3,
50
+ "source": "claim-lookup-test;statusFunctionVersion=1",
51
+ "claims": [
52
+ {
53
+ "id": "$claimId",
54
+ "subjectType": "workflow-check",
55
+ "subjectId": "${slug}/unit-tests",
56
+ "surface": "flow-agents.workflow",
57
+ "claimType": "workflow.check.test",
58
+ "fieldOrBehavior": "unit tests pass",
59
+ "value": "fail",
60
+ "status": "disputed",
61
+ "impactLevel": "high",
62
+ "verificationPolicyId": "policy:workflow.check.test",
63
+ "createdAt": "$ts",
64
+ "updatedAt": "$ts"
65
+ }
66
+ ],
67
+ "evidence": [
68
+ {
69
+ "id": "ev:${claimId}",
70
+ "claimId": "${claimId}",
71
+ "evidenceType": "test_output",
72
+ "label": "npm test output",
73
+ "method": "validation",
74
+ "excerptOrSummary": "8 tests failed",
75
+ "status": "disputed",
76
+ "execution": {
77
+ "runner": "npm test",
78
+ "label": "npm test",
79
+ "isError": true,
80
+ "exitCode": 1
81
+ },
82
+ "sourceRef": "command-log.jsonl",
83
+ "createdAt": "$ts"
84
+ }
85
+ ],
86
+ "events": [
87
+ {
88
+ "id": "evt:${claimId}",
89
+ "claimId": "${claimId}",
90
+ "status": "disputed",
91
+ "actor": "test",
92
+ "method": "validation",
93
+ "evidenceIds": ["ev:${claimId}"],
94
+ "createdAt": "$ts",
95
+ "verifiedAt": "$ts"
96
+ }
97
+ ],
98
+ "policies": [
99
+ {
100
+ "id": "policy:workflow.check.test",
101
+ "claimType": "workflow.check.test",
102
+ "requiredEvidence": ["test_output"],
103
+ "requiredMethods": ["validation"],
104
+ "acceptanceCriteria": ["A verified verification event must support a workflow.check.test claim."],
105
+ "reviewAuthority": "system",
106
+ "validityRule": { "kind": "manual" },
107
+ "stalenessTriggers": [],
108
+ "conflictRules": [],
109
+ "impactLevel": "high"
110
+ }
111
+ ]
112
+ }
113
+ JSON
114
+ }
115
+
116
+ # ── Test 1: AC1 — text output has status + value + evidence + policy + drilldown ──
117
+
118
+ echo ""
119
+ echo "── Test 1: text output (status + evidence + policy + drilldown) ──"
120
+
121
+ AC1_DIR="$TMPDIR_EVAL/ac1"
122
+ AC1_SLUG="claim-lookup-ac1"
123
+ seed_disputed_bundle "$AC1_DIR" "$AC1_SLUG"
124
+ AC1_CLAIM_ID="${AC1_SLUG}/unit-tests.flow-agents.workflow.unit tests pass"
125
+
126
+ AC1_OUT="$TMPDIR_EVAL/ac1.out"
127
+ if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" >"$AC1_OUT" 2>&1; then
128
+ _pass "AC1: claim command exits 0 for known disputed claim"
129
+ else
130
+ _fail "AC1: claim command failed: $(cat "$AC1_OUT")"
131
+ fi
132
+
133
+ if grep -q "Status: disputed" "$AC1_OUT"; then
134
+ _pass "AC1: output contains derived status (disputed)"
135
+ else
136
+ _fail "AC1: output missing derived status: $(head -3 "$AC1_OUT")"
137
+ fi
138
+
139
+ if grep -q "Value: fail" "$AC1_OUT"; then
140
+ _pass "AC1: output contains raw value"
141
+ else
142
+ _fail "AC1: output missing value"
143
+ fi
144
+
145
+ if grep -q "exitCode: 1" "$AC1_OUT" && grep -q "isError: true" "$AC1_OUT"; then
146
+ _pass "AC1: failing evidence execution block shown (exitCode + isError)"
147
+ else
148
+ _fail "AC1: execution block missing from evidence output: $(grep -i "exitCode\|isError\|Evidence" "$AC1_OUT" || echo '(not found)')"
149
+ fi
150
+
151
+ if grep -q "Governing Policy (policy:workflow.check.test)" "$AC1_OUT"; then
152
+ _pass "AC1: governing policy section present"
153
+ else
154
+ _fail "AC1: governing policy section missing"
155
+ fi
156
+
157
+ if grep -q "requiredEvidence:" "$AC1_OUT" && grep -q "acceptanceCriteria:" "$AC1_OUT" && grep -q "reviewAuthority:" "$AC1_OUT"; then
158
+ _pass "AC1: policy fields (requiredEvidence, acceptanceCriteria, reviewAuthority) present"
159
+ else
160
+ _fail "AC1: policy fields incomplete: $(grep -E "required|acceptance|review" "$AC1_OUT" || echo '(not found)')"
161
+ fi
162
+
163
+ if grep -q "Derivation Drilldown:" "$AC1_OUT"; then
164
+ _pass "AC1: derivation drilldown section present"
165
+ else
166
+ _fail "AC1: derivation drilldown section missing"
167
+ fi
168
+
169
+ # ── Test 2: AC1 — --json flag emits structured ClaimExplanation ──
170
+
171
+ echo ""
172
+ echo "── Test 2: --json flag emits structured ClaimExplanation object ──"
173
+
174
+ AC2_JSON="$TMPDIR_EVAL/ac1.json"
175
+ if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" --json >"$AC2_JSON" 2>&1; then
176
+ _pass "AC2: --json exits 0"
177
+ else
178
+ _fail "AC2: --json failed: $(cat "$AC2_JSON")"
179
+ fi
180
+
181
+ # Validate JSON structure
182
+ FOUND="$(jq_node "$AC2_JSON" 'd => d.found' 2>/dev/null || echo '')"
183
+ STATUS="$(jq_node "$AC2_JSON" 'd => d.status' 2>/dev/null || echo '')"
184
+ VALUE="$(jq_node "$AC2_JSON" 'd => d.value' 2>/dev/null || echo '')"
185
+ HAS_POLICY="$(jq_node "$AC2_JSON" 'd => d.policy !== null && d.policy.id !== undefined' 2>/dev/null || echo '')"
186
+ EVIDENCE_LEN="$(jq_node "$AC2_JSON" 'd => d.evidence.length' 2>/dev/null || echo '')"
187
+ EXEC_EXITCODE="$(jq_node "$AC2_JSON" 'd => d.evidence[0] && d.evidence[0].execution && d.evidence[0].execution.exitCode' 2>/dev/null || echo '')"
188
+ HAS_WHY="$(jq_node "$AC2_JSON" 'd => typeof d.why === "object" && d.why !== null' 2>/dev/null || echo '')"
189
+
190
+ [[ "$FOUND" == "true" ]] && _pass "AC2: found=true in JSON" || _fail "AC2: expected found=true, got '$FOUND'"
191
+ [[ "$STATUS" == "disputed" ]] && _pass "AC2: status=disputed in JSON" || _fail "AC2: expected status=disputed, got '$STATUS'"
192
+ [[ "$VALUE" == "fail" ]] && _pass "AC2: value=fail in JSON" || _fail "AC2: expected value=fail, got '$VALUE'"
193
+ [[ "$HAS_POLICY" == "true" ]] && _pass "AC2: policy object present in JSON" || _fail "AC2: policy missing: $HAS_POLICY"
194
+ [[ "$EVIDENCE_LEN" == "1" ]] && _pass "AC2: evidence array has 1 item" || _fail "AC2: expected 1 evidence item, got '$EVIDENCE_LEN'"
195
+ [[ "$EXEC_EXITCODE" == "1" ]] && _pass "AC2: evidence[0].execution.exitCode=1 in JSON" || _fail "AC2: expected exitCode=1, got '$EXEC_EXITCODE'"
196
+ [[ "$HAS_WHY" == "true" ]] && _pass "AC2: why object present in JSON" || _fail "AC2: why object missing"
197
+
198
+ # ── Test 3: AC1 — unknown id exits 1 with clear error listing available ids ──
199
+
200
+ echo ""
201
+ echo "── Test 3: unknown claim id → clear error + list of available ids ──"
202
+
203
+ AC3_OUT="$TMPDIR_EVAL/ac3.out"
204
+ if flow_agents_node workflow-sidecar claim "nonexistent-claim-id" "$AC1_DIR" >"$AC3_OUT" 2>&1; then
205
+ _fail "AC3: expected exit 1 for unknown claim id but got 0"
206
+ else
207
+ _pass "AC3: exits 1 for unknown claim id"
208
+ fi
209
+
210
+ if grep -q "unknown claim id: nonexistent-claim-id" "$AC3_OUT"; then
211
+ _pass "AC3: error message names the unknown id"
212
+ else
213
+ _fail "AC3: error message missing id: $(cat "$AC3_OUT")"
214
+ fi
215
+
216
+ if grep -q "Available claim ids" "$AC3_OUT"; then
217
+ _pass "AC3: error lists available claim ids"
218
+ else
219
+ _fail "AC3: error does not list available ids: $(cat "$AC3_OUT")"
220
+ fi
221
+
222
+ # ── Test 4: AC1 — missing bundle exits 1 ──
223
+
224
+ echo ""
225
+ echo "── Test 4: missing bundle → clear error ──"
226
+
227
+ AC4_OUT="$TMPDIR_EVAL/ac4.out"
228
+ if flow_agents_node workflow-sidecar claim "any-id" "$TMPDIR_EVAL/nonexistent" >"$AC4_OUT" 2>&1; then
229
+ _fail "AC4: expected exit 1 for missing bundle but got 0"
230
+ else
231
+ _pass "AC4: exits 1 for missing bundle"
232
+ fi
233
+
234
+ if grep -q "no trust.bundle at" "$AC4_OUT"; then
235
+ _pass "AC4: error message mentions missing trust.bundle"
236
+ else
237
+ _fail "AC4: error message missing: $(cat "$AC4_OUT")"
238
+ fi
239
+
240
+ # ── Test 5: AC3 — gate-hint in stop-goal-fit.js warning ──
241
+ # Use a bundle with an acceptance criterion claim (not a check claim) so the
242
+ # bundleEnforcement warning is not deduplicated by captureCrossReference.
243
+ # FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
244
+
245
+ echo ""
246
+ echo "── Test 5: gate-hint appears in stop-goal-fit.js disputed warning ──"
247
+
248
+ AC5_PROJ="$TMPDIR_EVAL/gate-hint-proj"
249
+ AC5_SLUG="gate-hint-test"
250
+ AC5_DIR="$AC5_PROJ/.flow-agents/$AC5_SLUG"
251
+ mkdir -p "$AC5_DIR"
252
+
253
+ # Write a minimal bundle with a disputed acceptance criterion claim.
254
+ # Using workflow.acceptance.criterion (not workflow.check.*) so the subjectId
255
+ # won't match any evidence check id and bundleEnforcement won't be deduped.
256
+ cat > "$AC5_DIR/trust.bundle" <<'BUNDLE'
257
+ {
258
+ "schemaVersion": 3,
259
+ "source": "claim-lookup-test",
260
+ "claims": [
261
+ {
262
+ "id": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
263
+ "subjectType": "workflow-criterion",
264
+ "subjectId": "gate-hint-test/AC1",
265
+ "surface": "flow-agents.workflow",
266
+ "claimType": "workflow.acceptance.criterion",
267
+ "fieldOrBehavior": "acceptance criterion verified",
268
+ "value": "fail",
269
+ "status": "disputed",
270
+ "impactLevel": "high",
271
+ "verificationPolicyId": "policy:workflow.acceptance.criterion",
272
+ "createdAt": "2026-06-25T00:00:00Z",
273
+ "updatedAt": "2026-06-25T00:00:00Z"
274
+ }
275
+ ],
276
+ "evidence": [],
277
+ "events": [
278
+ {
279
+ "id": "evt:gate-hint-test/AC1",
280
+ "claimId": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
281
+ "status": "disputed",
282
+ "actor": "test",
283
+ "method": "validation",
284
+ "evidenceIds": [],
285
+ "createdAt": "2026-06-25T00:00:00Z",
286
+ "verifiedAt": "2026-06-25T00:00:00Z"
287
+ }
288
+ ],
289
+ "policies": [
290
+ {
291
+ "id": "policy:workflow.acceptance.criterion",
292
+ "claimType": "workflow.acceptance.criterion",
293
+ "requiredEvidence": ["human_attestation"],
294
+ "acceptanceCriteria": ["A criterion must have a verified event."],
295
+ "reviewAuthority": "system",
296
+ "validityRule": { "kind": "manual" },
297
+ "stalenessTriggers": [],
298
+ "conflictRules": [],
299
+ "impactLevel": "high"
300
+ }
301
+ ]
302
+ }
303
+ BUNDLE
304
+
305
+ cat > "$AC5_DIR/state.json" <<'JSON'
306
+ {"schema_version":"1.0","task_slug":"gate-hint-test","status":"delivered","phase":"done","updated_at":"2026-06-25T00:00:00Z","next_action":{"status":"done","summary":"done"}}
307
+ JSON
308
+
309
+ cat > "$AC5_DIR/gate-hint-test--deliver.md" <<'MD'
310
+ # Gate Hint Test
311
+
312
+ branch: main
313
+ status: delivered
314
+ type: deliver
315
+
316
+ ## Definition Of Done
317
+ - [x] all tests pass
318
+
319
+ ## Goal Fit Gate
320
+ - [x] criteria verified
321
+
322
+ ### Verdict: PASS
323
+ MD
324
+
325
+ AC5_OUT="$TMPDIR_EVAL/ac5.out"
326
+ # FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
327
+ printf '{"hook_event_name":"Stop","cwd":"%s"}' "$AC5_PROJ" \
328
+ | FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$AC5_OUT" 2>&1 || true
329
+
330
+ if grep -q "workflow:sidecar -- claim" "$AC5_OUT"; then
331
+ _pass "AC5: gate-hint 'workflow:sidecar -- claim' appears in stop-goal-fit output"
332
+ else
333
+ _fail "AC5: gate-hint missing from stop-goal-fit output: $(cat "$AC5_OUT")"
334
+ fi
335
+
336
+ if grep -q "trust.bundle claim disputed" "$AC5_OUT"; then
337
+ _pass "AC5: disputed warning present in stop-goal-fit output"
338
+ else
339
+ _fail "AC5: disputed warning missing: $(cat "$AC5_OUT")"
340
+ fi
341
+
342
+ # ── Results ──────────────────────────────────────────────────────────────────
343
+
344
+ echo ""
345
+ echo "──────────────────────────────────"
346
+ echo "claim lookup tests: $((errors)) failed"
347
+ if [[ "$errors" -eq 0 ]]; then
348
+ echo "ALL PASSED"
349
+ exit 0
350
+ else
351
+ exit 1
352
+ fi
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env bash
2
+ # test_command_log_fork_classification.sh
3
+ #
4
+ # The verifier must tell a BENIGN concurrent fork apart from real TAMPER, and
5
+ # the repair tool must refuse to touch tamper. This is what prevents an honest
6
+ # parallel-write race from becoming a hard block an agent is tempted to launder.
7
+ #
8
+ # forked = two PostToolUse captures share a parent; all hashes self-consistent
9
+ # and reachable. NON-blocking advisory; records stay trusted.
10
+ # broken = content edit (self-hash mismatch) / reorder / deletion / a
11
+ # non-capture sibling on a shared parent. Hard block (unchanged).
12
+ #
13
+ # Also proves: repair re-linearizes forked→ok, and REFUSES broken (no laundering).
14
+ set -uo pipefail
15
+
16
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
17
+ export GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
18
+ REPAIR="$ROOT/scripts/repair-command-log.js"
19
+
20
+ TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
21
+ errors=0
22
+ _pass() { echo " ✓ $1"; }
23
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
24
+
25
+ SD=".flow-agents/s"
26
+
27
+ # Build a command-log from a spec: JSON array of {cmd,exit,src,parent} where
28
+ # parent is the 0-based index of the entry whose hash is this entry's prevHash
29
+ # (-1 = genesis). Lets us construct linear chains AND forks deterministically.
30
+ build() { # $1=dir $2=spec-json
31
+ mkdir -p "$1/$SD"
32
+ DIR="$1" node -e '
33
+ const fs=require("fs"),crypto=require("crypto"),path=require("path");
34
+ const g=require(process.env.GATE), GEN=g.CHAIN_GENESIS_VERIFY;
35
+ const canon=r=>{const k=Object.keys(r).filter(x=>x!=="_chain").sort();const o={};for(const x of k)o[x]=r[x];return JSON.stringify(o);};
36
+ const H=(p,r)=>crypto.createHash("sha256").update(p+canon(r)).digest("hex");
37
+ const spec=JSON.parse(process.argv[1]); const hashes=[],lines=[];
38
+ spec.forEach((s,i)=>{
39
+ const rec={command:s.cmd,observedResult:s.exit===0?"pass":"fail",exitCode:s.exit,
40
+ capturedAt:new Date(Date.UTC(2026,0,1,0,0,i)).toISOString(),source:s.src||"postToolUse-capture"};
41
+ const prev=s.parent===-1?GEN:hashes[s.parent]; const h=H(prev,rec);
42
+ hashes.push(h); lines.push(JSON.stringify({...rec,_chain:{seq:i,prevHash:prev,hash:h}}));
43
+ });
44
+ fs.writeFileSync(path.join(process.env.DIR,".flow-agents/s/command-log.jsonl"),lines.join("\n")+"\n");
45
+ ' "$2"
46
+ }
47
+ status() { DIR="$1" node -e 'const g=require(process.env.GATE);console.log(g.verifyCommandLogChain(process.env.DIR+"/.flow-agents/s").status)' ; }
48
+
49
+ # ── 1. linear → ok ────────────────────────────────────────────────────────────
50
+ D="$TMP/linear"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
51
+ [ "$(status "$D")" = "ok" ] && _pass "linear chain → ok" || _fail "linear → $(status "$D"), want ok"
52
+
53
+ # ── 2. concurrent fork (two captures share a parent) → forked ─────────────────
54
+ D="$TMP/fork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
55
+ [ "$(status "$D")" = "forked" ] && _pass "concurrent fork → forked (not broken)" || _fail "fork → $(status "$D"), want forked"
56
+
57
+ # ── 3. content edit (flip exitCode, keep hash) → broken ───────────────────────
58
+ D="$TMP/flip"; build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run lint","exit":1,"parent":0}]'
59
+ python3 - "$D/$SD/command-log.jsonl" <<'PY'
60
+ import json,sys
61
+ L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0; e["observedResult"]="pass"
62
+ L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
63
+ PY
64
+ [ "$(status "$D")" = "broken" ] && _pass "content edit → broken (tamper, not fork)" || _fail "flip → $(status "$D"), want broken"
65
+
66
+ # ── 4. reorder → broken ───────────────────────────────────────────────────────
67
+ D="$TMP/reorder"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
68
+ python3 - "$D/$SD/command-log.jsonl" <<'PY'
69
+ import sys
70
+ L=open(sys.argv[1]).read().strip().split("\n"); L[0],L[1]=L[1],L[0]; open(sys.argv[1],"w").write("\n".join(L)+"\n")
71
+ PY
72
+ [ "$(status "$D")" = "broken" ] && _pass "reorder → broken" || _fail "reorder → $(status "$D"), want broken"
73
+
74
+ # ── 5. deleted predecessor → broken ───────────────────────────────────────────
75
+ D="$TMP/delete"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
76
+ python3 - "$D/$SD/command-log.jsonl" <<'PY'
77
+ import sys
78
+ L=open(sys.argv[1]).read().strip().split("\n"); open(sys.argv[1],"w").write(L[1]+"\n")
79
+ PY
80
+ [ "$(status "$D")" = "broken" ] && _pass "deleted predecessor → broken" || _fail "delete → $(status "$D"), want broken"
81
+
82
+ # ── 6. non-capture sibling on a shared parent → broken (not a benign fork) ─────
83
+ D="$TMP/badfork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0,"src":"manual-inject"}]'
84
+ [ "$(status "$D")" = "broken" ] && _pass "non-capture sibling fork → broken (conservative)" || _fail "badfork → $(status "$D"), want broken"
85
+
86
+ # ── 7. repair re-linearizes forked → ok; refuses broken ───────────────────────
87
+ D="$TMP/fork2"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
88
+ node "$REPAIR" "$D/$SD" --reason "test" >/dev/null 2>&1
89
+ [ "$(status "$D")" = "ok" ] && _pass "repair: forked → ok" || _fail "repair forked → $(status "$D"), want ok"
90
+
91
+ D="$TMP/flip2"; build "$D" '[{"cmd":"x","exit":0,"parent":-1},{"cmd":"y","exit":1,"parent":0}]'
92
+ python3 - "$D/$SD/command-log.jsonl" <<'PY'
93
+ import json,sys
94
+ L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0
95
+ L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
96
+ PY
97
+ before=$(cat "$D/$SD/command-log.jsonl")
98
+ set +e; node "$REPAIR" "$D/$SD" >/dev/null 2>&1; rc=$?; set -e
99
+ after=$(cat "$D/$SD/command-log.jsonl")
100
+ if [ "$rc" -ne 0 ] && [ "$before" = "$after" ]; then _pass "repair: REFUSES broken (exit!=0, log unchanged — no laundering)"; else _fail "repair touched/accepted a broken log (rc=$rc)"; fi
101
+
102
+ # ── 8. the Stop gate does NOT hard-block a forked log ─────────────────────────
103
+ D="$TMP/gate"; mkdir -p "$D/$SD"
104
+ printf '# Repo\n' > "$D/AGENTS.md"
105
+ printf '%s' '{"schema_version":"1.0","task_slug":"s","status":"delivered","phase":"done","updated_at":"2026-06-23T00:00:00Z","next_action":{"status":"done","summary":"done"}}' > "$D/$SD/state.json"
106
+ cat > "$D/$SD/s--deliver.md" <<'MD'
107
+ # s
108
+
109
+ branch: main
110
+ status: delivered
111
+ type: deliver
112
+
113
+ ## Definition Of Done
114
+ - [x] tests pass
115
+
116
+ ## Goal Fit Gate
117
+ - [x] acceptance verified
118
+
119
+ ### Verdict: PASS
120
+ MD
121
+ # forked log whose captures are all PASS, so there is no contradiction to flag
122
+ build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run build","exit":0,"parent":0},{"cmd":"npm run build","exit":0,"parent":0}]'
123
+ printf '%s' '{"schema_version":"1.0","task_slug":"s","verdict":"pass","checks":[{"id":"t","kind":"command","status":"pass","command":"npm test","summary":"ok"}]}' > "$D/$SD/evidence.json"
124
+ set +e
125
+ out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$D\"}")
126
+ rc=$?
127
+ set -e
128
+ if [ "$rc" -eq 0 ]; then _pass "gate does NOT hard-block forked log (exit 0)"; else _fail "gate blocked forked log (exit $rc): $out"; fi
129
+ echo "$out" | grep -q "concurrent-capture fork" && _pass "gate emits the concurrent-fork advisory" || _fail "missing fork advisory: $out"
130
+ echo "$out" | grep -q "command-log integrity check FAILED" && _fail "gate wrongly emitted tamper warning for a fork" || _pass "no false tamper warning for a fork"
131
+
132
+ echo ""
133
+ if [ "$errors" -eq 0 ]; then echo "fork classification tests passed."; exit 0; fi
134
+ echo "fork classification tests FAILED: $errors issue(s)."; exit 1