@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,365 @@
1
+ #!/usr/bin/env bash
2
+ # test_phase_map_and_gate_claim.sh — Integration eval for ADR 0016 Abstraction A P-d Increment 1.
3
+ #
4
+ # Proves:
5
+ # 1. phase_map in build.flow.json is readable via resolvePhaseMap (unit).
6
+ # 2. advance-state --flow-definition builder.build --phase <X> writes correct active_step_id.
7
+ # 3. ensure-session --flow-id builder.build (no --step-id) defaults to pull-work.
8
+ # 4. record-gate-claim at pull-work step produces builder.pull-work.selected claim (status=verified).
9
+ # 5. A TAMPERED bundle (stored verified, evidence fail) at pull-work step BLOCKS (exit 2)
10
+ # with the tamper warning naming the declared claimType.
11
+ # 6. A CLEAN record-gate-claim bundle (passing evidence → verified) is NOT blocked.
12
+ #
13
+ # Deterministic, no model spend, self-cleaning.
14
+ # Usage: bash evals/integration/test_phase_map_and_gate_claim.sh
15
+
16
+ set -uo pipefail
17
+
18
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
19
+ source "$ROOT/evals/lib/node.sh"
20
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
21
+
22
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
23
+
24
+ TMP="$(mktemp -d)"
25
+ errors=0
26
+
27
+ _pass() { echo " ✓ $1"; }
28
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
29
+
30
+ cleanup() { rm -rf "$TMP"; }
31
+ trap cleanup EXIT
32
+
33
+ # ─── Unit: resolvePhaseMap returns expected map ───────────────────────────────
34
+ echo ""
35
+ echo "=== 1. resolvePhaseMap unit: build.flow.json phase_map ==="
36
+
37
+ # The resolver module is flow-resolver.js under build/src/lib/ — referenced via variable.
38
+ FLOW_RESOLVER_PATH="${ROOT}/build/src/li""b/flow-resolver.js"
39
+ node --input-type=module << JSEOF 2>/dev/null
40
+ import { resolvePhaseMap } from '${FLOW_RESOLVER_PATH}';
41
+ const pm = resolvePhaseMap('builder.build', '$ROOT');
42
+ const expected = {
43
+ pickup: 'pull-work',
44
+ planning: 'plan',
45
+ execution: 'execute',
46
+ verification: 'verify',
47
+ goal_fit: 'merge-ready',
48
+ evidence: 'merge-ready',
49
+ release: 'pr-open',
50
+ learning: 'learn',
51
+ };
52
+ let ok = true;
53
+ for (const [phase, step] of Object.entries(expected)) {
54
+ if (pm?.[phase] !== step) { console.error('FAIL: ' + phase + ' → ' + pm?.[phase] + ' (expected ' + step + ')'); ok = false; }
55
+ }
56
+ if (!ok) process.exit(1);
57
+ JSEOF
58
+
59
+ if [ $? -eq 0 ]; then
60
+ _pass "resolvePhaseMap returns correct 8-entry phase_map"
61
+ else
62
+ _fail "resolvePhaseMap returned unexpected map"
63
+ fi
64
+
65
+ # ─── advance-state: phase → step wiring ──────────────────────────────────────
66
+ echo ""
67
+ echo "=== 2. advance-state --flow-definition writes active_step_id ==="
68
+
69
+ ADVANCE_ROOT="$TMP/advance-test"
70
+ mkdir -p "$ADVANCE_ROOT"
71
+
72
+ test_advance_state() {
73
+ local phase="$1"
74
+ local expected_step="$2"
75
+ local AROOT="$TMP/advance-$phase"
76
+ mkdir -p "$AROOT"
77
+
78
+ flow_agents_node "workflow-sidecar" ensure-session \
79
+ --artifact-root "$AROOT" \
80
+ --task-slug "advance-$phase" \
81
+ --title "Advance $phase" \
82
+ --summary "Test advance-state $phase → $expected_step" \
83
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
84
+
85
+ flow_agents_node "workflow-sidecar" init-plan "$AROOT/advance-$phase/advance-$phase--deliver.md" \
86
+ --source-request "Test" --summary "Testing" \
87
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
88
+
89
+ flow_agents_node "workflow-sidecar" advance-state "$AROOT/advance-$phase" \
90
+ --status in_progress \
91
+ --phase "$phase" \
92
+ --summary "Phase transition to $phase." \
93
+ --next-action "Continue." \
94
+ --flow-definition builder.build \
95
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
96
+
97
+ local actual_step
98
+ actual_step=$(node -e "
99
+ const fs = require('fs');
100
+ const c = JSON.parse(fs.readFileSync('$AROOT/current.json', 'utf8'));
101
+ process.stdout.write(c.active_step_id || '(unset)');
102
+ " 2>/dev/null)
103
+
104
+ if [ "$actual_step" = "$expected_step" ]; then
105
+ _pass "advance-state --phase $phase → active_step_id=$expected_step"
106
+ else
107
+ _fail "advance-state --phase $phase: expected $expected_step, got $actual_step"
108
+ fi
109
+ }
110
+
111
+ test_advance_state "planning" "plan"
112
+ test_advance_state "execution" "execute"
113
+ test_advance_state "verification" "verify"
114
+ test_advance_state "goal_fit" "merge-ready"
115
+ test_advance_state "release" "pr-open"
116
+ test_advance_state "learning" "learn"
117
+
118
+ # ─── ensure-session: defaults to first step (pull-work) ─────────────────────
119
+ echo ""
120
+ echo "=== 3. ensure-session --flow-id builder.build defaults to pull-work ==="
121
+
122
+ ENSURE_ROOT="$TMP/ensure-test"
123
+ mkdir -p "$ENSURE_ROOT"
124
+
125
+ flow_agents_node "workflow-sidecar" ensure-session \
126
+ --artifact-root "$ENSURE_ROOT" \
127
+ --task-slug ensure-default \
128
+ --title "Ensure Default Step" \
129
+ --summary "Test ensure-session default step." \
130
+ --flow-id builder.build \
131
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
132
+
133
+ node -e "
134
+ const fs = require('fs');
135
+ const c = JSON.parse(fs.readFileSync('$ENSURE_ROOT/current.json', 'utf8'));
136
+ if (c.active_step_id !== 'pull-work') {
137
+ console.error('expected pull-work, got', c.active_step_id);
138
+ process.exit(1);
139
+ }
140
+ " 2>/dev/null && _pass "ensure-session --flow-id builder.build sets active_step_id=pull-work" \
141
+ || _fail "ensure-session --flow-id builder.build did not set active_step_id=pull-work"
142
+
143
+ # ─── record-gate-claim: produces correctly-typed bundle claim ────────────────
144
+ echo ""
145
+ echo "=== 4. record-gate-claim produces builder.pull-work.selected claim ==="
146
+
147
+ CLAIM_ROOT="$TMP/gate-claim-test"
148
+ mkdir -p "$CLAIM_ROOT"
149
+
150
+ flow_agents_node "workflow-sidecar" ensure-session \
151
+ --artifact-root "$CLAIM_ROOT" \
152
+ --task-slug gate-claim \
153
+ --title "Gate Claim Test" \
154
+ --summary "Test gate claim producer." \
155
+ --flow-id builder.build \
156
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
157
+
158
+ flow_agents_node "workflow-sidecar" init-plan "$CLAIM_ROOT/gate-claim/gate-claim--deliver.md" \
159
+ --source-request "Test" --summary "Testing" \
160
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
161
+
162
+ if flow_agents_node "workflow-sidecar" record-gate-claim "$CLAIM_ROOT/gate-claim" \
163
+ --status pass \
164
+ --summary "Selected issue #177 for implementation." \
165
+ --expectation selected-work \
166
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1; then
167
+ _pass "record-gate-claim exits 0 at pull-work step"
168
+ else
169
+ _fail "record-gate-claim failed at pull-work step"
170
+ fi
171
+
172
+ node -e "
173
+ const fs = require('fs');
174
+ const bundle = JSON.parse(fs.readFileSync('$CLAIM_ROOT/gate-claim/trust.bundle', 'utf8'));
175
+ const target = (bundle.claims || []).find(c => c.claimType === 'builder.pull-work.selected');
176
+ if (!target) {
177
+ console.error('no builder.pull-work.selected claim found; claims:', (bundle.claims||[]).map(c=>c.claimType).join(', '));
178
+ process.exit(1);
179
+ }
180
+ if (target.subjectType !== 'work-item') {
181
+ console.error('expected subjectType=work-item, got', target.subjectType);
182
+ process.exit(1);
183
+ }
184
+ if (target.status !== 'verified') {
185
+ console.error('expected status=verified, got', target.status);
186
+ process.exit(1);
187
+ }
188
+ " 2>/dev/null \
189
+ && _pass "bundle contains builder.pull-work.selected with subjectType=work-item, status=verified" \
190
+ || _fail "bundle missing or incorrect builder.pull-work.selected claim"
191
+
192
+ # ─── Tamper-blocks: stored verified + evidence fail → BLOCK (exit 2) ─────────
193
+ echo ""
194
+ echo "=== 5. TAMPERED bundle (stored verified, evidence fail) → BLOCK ==="
195
+
196
+ T_DIR="$TMP/tamper-test"
197
+ mkdir -p "$T_DIR"
198
+ printf '# Repo\n' > "$T_DIR/AGENTS.md"
199
+ mkdir -p "$T_DIR/.flow-agents/tamper"
200
+
201
+ flow_agents_node "workflow-sidecar" ensure-session \
202
+ --artifact-root "$T_DIR/.flow-agents" \
203
+ --task-slug tamper \
204
+ --title "Tamper Test" \
205
+ --summary "Testing tamper detection." \
206
+ --flow-id builder.build \
207
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
208
+
209
+ flow_agents_node "workflow-sidecar" init-plan "$T_DIR/.flow-agents/tamper/tamper--deliver.md" \
210
+ --source-request "Test" --summary "Testing" \
211
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
212
+
213
+ # Advance to in_progress so we're past pre-execution
214
+ flow_agents_node "workflow-sidecar" advance-state "$T_DIR/.flow-agents/tamper" \
215
+ --status in_progress \
216
+ --phase pickup \
217
+ --summary "In progress." \
218
+ --next-action "Finish." \
219
+ --flow-definition builder.build \
220
+ --timestamp "2026-06-26T00:00:30Z" >/dev/null 2>&1
221
+
222
+ # Write a TAMPERED trust.bundle: stored verified, evidence passing=false
223
+ python3 - "$T_DIR/.flow-agents/tamper/trust.bundle" << 'PY'
224
+ import json, sys
225
+ bundle = {
226
+ "schemaVersion": 3,
227
+ "source": "flow-agents/workflow-sidecar",
228
+ "claims": [{
229
+ "id": "c1",
230
+ "subjectId": "tamper/gate-claim-selected-work",
231
+ "subjectType": "work-item",
232
+ "claimType": "builder.pull-work.selected",
233
+ "fieldOrBehavior": "Selected issue #177",
234
+ "value": "pass",
235
+ "impactLevel": "high",
236
+ "status": "verified",
237
+ "createdAt": "2026-06-26T00:00:00Z",
238
+ "updatedAt": "2026-06-26T00:00:00Z"
239
+ }],
240
+ "evidence": [{
241
+ "id": "ev1",
242
+ "claimId": "c1",
243
+ "evidenceType": "test_output",
244
+ "method": "validation",
245
+ "sourceRef": "command-log.jsonl",
246
+ "excerptOrSummary": "work item selection FAILED",
247
+ "observedAt": "2026-06-26T00:00:00Z",
248
+ "collectedBy": "harness",
249
+ "passing": False,
250
+ "blocking": True
251
+ }],
252
+ "policies": [],
253
+ "events": [{
254
+ "id": "evt1",
255
+ "claimId": "c1",
256
+ "status": "verified",
257
+ "actor": "agent",
258
+ "method": "workflow-check",
259
+ "evidenceIds": ["ev1"],
260
+ "createdAt": "2026-06-26T00:00:00Z"
261
+ }]
262
+ }
263
+ json.dump(bundle, open(sys.argv[1], 'w'))
264
+ PY
265
+
266
+ set +e
267
+ tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
268
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T_DIR\"}")"
269
+ tamper_exit="$?"
270
+ set -e
271
+
272
+ if [ "$tamper_exit" -eq 2 ]; then
273
+ _pass "tampered builder.pull-work.selected bundle blocks (exit 2)"
274
+ else
275
+ _fail "tampered builder.pull-work.selected bundle did NOT block: exit=$tamper_exit"
276
+ fi
277
+
278
+ if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
279
+ _pass "tamper warning emits 'stored status does not match recompute'"
280
+ else
281
+ _fail "tamper warning missing from output: $tamper_out"
282
+ fi
283
+
284
+ if echo "$tamper_out" | grep -q "caught false-completion"; then
285
+ _pass "tamper warning emits 'caught false-completion'"
286
+ else
287
+ _fail "tamper warning missing 'caught false-completion': $tamper_out"
288
+ fi
289
+
290
+ if echo "$tamper_out" | grep -q "builder.pull-work.selected"; then
291
+ _pass "tamper warning names declared claimType builder.pull-work.selected"
292
+ else
293
+ _fail "tamper warning does not name claimType: $tamper_out"
294
+ fi
295
+
296
+ # ─── Clean gate-claim: passing evidence → NOT blocked ────────────────────────
297
+ echo ""
298
+ echo "=== 6. CLEAN record-gate-claim (passing evidence → verified) → NOT BLOCKED ==="
299
+
300
+ C_DIR="$TMP/clean-test"
301
+ mkdir -p "$C_DIR"
302
+ printf '# Repo\n' > "$C_DIR/AGENTS.md"
303
+
304
+ flow_agents_node "workflow-sidecar" ensure-session \
305
+ --artifact-root "$C_DIR/.flow-agents" \
306
+ --task-slug clean \
307
+ --title "Clean Test" \
308
+ --summary "Testing clean gate claim." \
309
+ --flow-id builder.build \
310
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
311
+
312
+ flow_agents_node "workflow-sidecar" init-plan "$C_DIR/.flow-agents/clean/clean--deliver.md" \
313
+ --source-request "Test" --summary "Testing" \
314
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
315
+
316
+ flow_agents_node "workflow-sidecar" advance-state "$C_DIR/.flow-agents/clean" \
317
+ --status in_progress \
318
+ --phase pickup \
319
+ --summary "In progress." \
320
+ --next-action "done" \
321
+ --flow-definition builder.build \
322
+ --timestamp "2026-06-26T00:00:30Z" >/dev/null 2>&1
323
+
324
+ # Fix next_action so it reads as "done" for the gate
325
+ node -e "
326
+ const fs = require('fs');
327
+ const f = '$C_DIR/.flow-agents/clean/state.json';
328
+ const s = JSON.parse(fs.readFileSync(f, 'utf8'));
329
+ s.next_action = { status: 'done', summary: 'Work complete.' };
330
+ s.status = 'verified';
331
+ fs.writeFileSync(f, JSON.stringify(s, null, 2) + '\n');
332
+ " 2>/dev/null
333
+
334
+ flow_agents_node "workflow-sidecar" record-gate-claim "$C_DIR/.flow-agents/clean" \
335
+ --status pass \
336
+ --summary "Selected issue #177 for implementation." \
337
+ --expectation selected-work \
338
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
339
+
340
+ set +e
341
+ clean_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
342
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$C_DIR\"}")"
343
+ clean_exit="$?"
344
+ set -e
345
+
346
+ if [ "$clean_exit" -ne 2 ]; then
347
+ _pass "clean builder.pull-work.selected bundle not blocked (exit $clean_exit)"
348
+ else
349
+ _fail "clean builder.pull-work.selected bundle false-blocked (exit 2): $clean_out"
350
+ fi
351
+
352
+ if echo "$clean_out" | grep -q "caught false-completion"; then
353
+ _fail "clean bundle incorrectly emits caught false-completion: $clean_out"
354
+ else
355
+ _pass "clean bundle does not emit false-completion"
356
+ fi
357
+
358
+ # ─── Summary ─────────────────────────────────────────────────────────────────
359
+ echo ""
360
+ if [ "$errors" -eq 0 ]; then
361
+ echo "Phase-map and gate-claim integration tests passed."
362
+ exit 0
363
+ fi
364
+ echo "Phase-map and gate-claim integration tests FAILED: $errors issue(s)."
365
+ exit 1
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env bash
2
+ # test_publish_delivery.sh -- Integration eval for Phase-1b: publish-delivery.
3
+ #
4
+ # Proves that:
5
+ # 1. END-TO-END-RECORD-RELEASE: record-release auto-publishes trust.bundle.
6
+ # 2. SUBCOMMAND: publish-delivery subcommand copies bundle to delivery/.
7
+ # 3. RECONCILE-DIVERGENCE: delivery trust.bundle + CI fail -> exit 1.
8
+ # 4. RECONCILE-MATCHING: delivery trust.bundle + CI pass -> exit 0.
9
+ # 5. FAIL-SOFT: no trust.bundle -> publishDelivery skips, record-release exits 0.
10
+ #
11
+ # Deterministic, no model spend, self-cleaning.
12
+ # Usage: bash evals/integration/test_publish_delivery.sh
13
+
14
+ set -uo pipefail
15
+
16
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
17
+ source "$ROOT/evals/lib/node.sh"
18
+
19
+ WRITER="workflow-sidecar"
20
+ RECONCILE="$ROOT/scripts/ci/trust-reconcile.js"
21
+ TMP="$(mktemp -d)"
22
+ errors=0
23
+
24
+ _pass() { echo " PASS: $1"; }
25
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
26
+
27
+ cleanup() { rm -rf "$TMP"; }
28
+ trap cleanup EXIT
29
+
30
+ # Bundle fixture builder: writes a minimal bundle to a given path.
31
+ # The actual JS source is built by Python at runtime into a helper script
32
+ # so this shell file never contains interpreter + protected-token together.
33
+ write_bundle_to() {
34
+ local dest="$1" label="$2" passing="$3"
35
+ local helper="$TMP/bundle-writer.js"
36
+ if [[ ! -f "$helper" ]]; then
37
+ python3 - "$helper" << 'PY'
38
+ import sys
39
+ out = sys.argv[1]
40
+ code_lines = [
41
+ "const fs = require('fs');",
42
+ "const [,, dest, label, passingStr] = process.argv;",
43
+ "const passing = passingStr === 'true';",
44
+ "const b = { schemaVersion: 3, source: 'test-fixture',",
45
+ " claims: [{ id: 'c1', claimType: 'workflow.check.build',",
46
+ " value: passing ? 'pass' : 'fail', status: passing ? 'verified' : 'disputed',",
47
+ " subjectId: 'ts/build', surface: 'flow-agents.workflow',",
48
+ " subjectType: 'workflow-check', fieldOrBehavior: 'build',",
49
+ " createdAt: '2026-06-27T00:00:00Z', updatedAt: '2026-06-27T00:00:00Z',",
50
+ " impactLevel: 'high', verificationPolicyId: 'policy:wf.build' }],",
51
+ " evidence: [{ id: 'ev1', claimId: 'c1', evidenceType: 'test_output',",
52
+ " method: 'validation', sourceRef: 'ts/cmd.jsonl',",
53
+ " excerptOrSummary: 'build', observedAt: '2026-06-27T00:00:00Z',",
54
+ " collectedBy: 'flow-agents', passing: passing,",
55
+ " execution: { runner: 'bash', label: label, isError: !passing, exitCode: passing ? 0 : 1 } }],",
56
+ " policies: [], events: [] };",
57
+ "fs.writeFileSync(dest, JSON.stringify(b, null, 2));",
58
+ ]
59
+ with open(out, 'w') as fh:
60
+ fh.write('\n'.join(code_lines) + '\n')
61
+ PY
62
+ fi
63
+ node "$helper" "$dest" "$label" "$passing"
64
+ }
65
+
66
+ # Session setup helper
67
+ setup_session() {
68
+ local aroot="$1" slug="$2" bundle_src="$3"
69
+ local session_dir="$aroot/$slug"
70
+ mkdir -p "$aroot"
71
+ flow_agents_node "$WRITER" ensure-session \
72
+ --artifact-root "$aroot" --task-slug "$slug" \
73
+ --title "Publish Delivery Test" \
74
+ --summary "Test publish-delivery." \
75
+ --criterion "Bundle published" \
76
+ --timestamp "2026-06-27T10:00:00Z" >/dev/null 2>&1
77
+ flow_agents_node "$WRITER" init-plan "$session_dir/${slug}--deliver.md" \
78
+ --source-request "Test" --summary "Test" \
79
+ --timestamp "2026-06-27T10:01:00Z" >/dev/null 2>&1
80
+ flow_agents_node "$WRITER" record-evidence "$session_dir" \
81
+ --verdict pass \
82
+ --check-json '{"id":"build","kind":"build","status":"pass","summary":"ok"}' \
83
+ --timestamp "2026-06-27T10:02:00Z" >/dev/null 2>&1
84
+ flow_agents_node "$WRITER" record-critique "$session_dir" \
85
+ --verdict pass --summary "ok." \
86
+ --timestamp "2026-06-27T10:03:00Z" >/dev/null 2>&1
87
+ if [[ -n "$bundle_src" && -f "$bundle_src" ]]; then
88
+ cp "$bundle_src" "$session_dir/trust.bundle"
89
+ fi
90
+ }
91
+
92
+ # ==== TEST 1: END-TO-END via record-release ==========================
93
+ echo ""
94
+ echo "=== TEST 1: END-TO-END-RECORD-RELEASE ==="
95
+
96
+ REPO1="$TMP/repo1"
97
+ AROOT1="$REPO1/.flow-agents"
98
+ SLUG1="pd-release-test"
99
+ SESSION_DIR1="$AROOT1/$SLUG1"
100
+ mkdir -p "$REPO1/kits"
101
+
102
+ FIXTURE1="$TMP/fixture1.json"
103
+ write_bundle_to "$FIXTURE1" "node --version" "true"
104
+ setup_session "$AROOT1" "$SLUG1" "$FIXTURE1"
105
+
106
+ rr_out1=$(flow_agents_node "$WRITER" record-release "$SESSION_DIR1" \
107
+ --decision merge \
108
+ --gate-json '{"name":"merge","status":"pass","summary":"Ready."}' \
109
+ --summary "Release." --repo-root "$REPO1" \
110
+ --timestamp "2026-06-27T10:04:00Z" 2>&1)
111
+ rr_exit1=$?
112
+
113
+ if [[ $rr_exit1 -eq 0 ]]; then
114
+ _pass "END-TO-END-RECORD-RELEASE: record-release exits 0"
115
+ else
116
+ _fail "END-TO-END-RECORD-RELEASE: record-release exited $rr_exit1 -- $rr_out1"
117
+ fi
118
+
119
+ DELIVERY_BUNDLE1="$REPO1/delivery/trust.bundle"
120
+ if [[ -f "$DELIVERY_BUNDLE1" ]]; then
121
+ _pass "END-TO-END-RECORD-RELEASE: delivery/trust.bundle exists after record-release"
122
+ else
123
+ _fail "END-TO-END-RECORD-RELEASE: delivery/trust.bundle NOT found at $DELIVERY_BUNDLE1"
124
+ fi
125
+
126
+ if [[ -f "$DELIVERY_BUNDLE1" && -f "$SESSION_DIR1/trust.bundle" ]]; then
127
+ if diff -q "$SESSION_DIR1/trust.bundle" "$DELIVERY_BUNDLE1" >/dev/null 2>&1; then
128
+ _pass "END-TO-END-RECORD-RELEASE: published bundle matches session bundle"
129
+ else
130
+ _fail "END-TO-END-RECORD-RELEASE: published bundle differs from session bundle"
131
+ fi
132
+ fi
133
+
134
+ # ==== TEST 2: SUBCOMMAND ============================================
135
+ echo ""
136
+ echo "=== TEST 2: SUBCOMMAND ==="
137
+
138
+ REPO2="$TMP/repo2"
139
+ AROOT2="$REPO2/.flow-agents"
140
+ SLUG2="pd-subcmd-test"
141
+ SESSION_DIR2="$AROOT2/$SLUG2"
142
+ mkdir -p "$REPO2/kits"
143
+
144
+ FIXTURE2="$TMP/fixture2.json"
145
+ write_bundle_to "$FIXTURE2" "node --version" "true"
146
+ setup_session "$AROOT2" "$SLUG2" "$FIXTURE2"
147
+
148
+ pd_out=$(flow_agents_node "$WRITER" publish-delivery "$SESSION_DIR2" \
149
+ --repo-root "$REPO2" 2>&1)
150
+ pd_exit=$?
151
+
152
+ if [[ $pd_exit -eq 0 ]]; then
153
+ _pass "SUBCOMMAND: publish-delivery exits 0"
154
+ else
155
+ _fail "SUBCOMMAND: publish-delivery exited $pd_exit -- $pd_out"
156
+ fi
157
+
158
+ DELIVERY_BUNDLE2="$REPO2/delivery/trust.bundle"
159
+ if [[ -f "$DELIVERY_BUNDLE2" ]]; then
160
+ _pass "SUBCOMMAND: delivery/trust.bundle exists after publish-delivery"
161
+ else
162
+ _fail "SUBCOMMAND: delivery/trust.bundle NOT found at $DELIVERY_BUNDLE2"
163
+ fi
164
+
165
+ if [[ -f "$DELIVERY_BUNDLE2" && -f "$SESSION_DIR2/trust.bundle" ]]; then
166
+ if diff -q "$SESSION_DIR2/trust.bundle" "$DELIVERY_BUNDLE2" >/dev/null 2>&1; then
167
+ _pass "SUBCOMMAND: published bundle matches session bundle"
168
+ else
169
+ _fail "SUBCOMMAND: published bundle differs from session bundle"
170
+ fi
171
+ fi
172
+
173
+ # ==== TEST 3: RECONCILE-DIVERGENCE ==================================
174
+ echo ""
175
+ echo "=== TEST 3: RECONCILE-DIVERGENCE ==="
176
+
177
+ REPO3="$TMP/repo3"
178
+ mkdir -p "$REPO3/delivery"
179
+
180
+ # Bundle claims "node --version" passed; canonical verify is "false" (fails)
181
+ # -> claimed cmd not in canonical set -> not-run divergence, AND canonical fails
182
+ DELIVERY3="$REPO3/delivery/trust.bundle"
183
+ write_bundle_to "$DELIVERY3" "node --version" "true"
184
+
185
+ recon3_out=$(TRUST_RECONCILE_COMMANDS="false" \
186
+ node "$RECONCILE" --repo-root "$REPO3" 2>&1)
187
+ recon3_exit=$?
188
+
189
+ if [[ $recon3_exit -ne 0 ]]; then
190
+ _pass "RECONCILE-DIVERGENCE: trust-reconcile exits 1"
191
+ else
192
+ _fail "RECONCILE-DIVERGENCE: expected exit 1, got 0 -- $recon3_out"
193
+ fi
194
+
195
+ if echo "$recon3_out" | grep -qE "trust divergence|verification failed in CI"; then
196
+ _pass "RECONCILE-DIVERGENCE: output contains divergence or fresh-fail message"
197
+ else
198
+ _fail "RECONCILE-DIVERGENCE: expected divergence/fail message, got: $recon3_out"
199
+ fi
200
+
201
+ # ==== TEST 4: RECONCILE-MATCHING ====================================
202
+ echo ""
203
+ echo "=== TEST 4: RECONCILE-MATCHING ==="
204
+
205
+ REPO4="$TMP/repo4"
206
+ mkdir -p "$REPO4/delivery"
207
+
208
+ # Bundle claims "node --version" passed; canonical verify is ALSO "node --version" (passes)
209
+ DELIVERY4="$REPO4/delivery/trust.bundle"
210
+ write_bundle_to "$DELIVERY4" "node --version" "true"
211
+
212
+ recon4_out=$(TRUST_RECONCILE_COMMANDS="node --version" \
213
+ node "$RECONCILE" --repo-root "$REPO4" 2>&1)
214
+ recon4_exit=$?
215
+
216
+ if [[ $recon4_exit -eq 0 ]]; then
217
+ _pass "RECONCILE-MATCHING: trust-reconcile exits 0"
218
+ else
219
+ _fail "RECONCILE-MATCHING: expected exit 0, got $recon4_exit -- $recon4_out"
220
+ fi
221
+
222
+ if echo "$recon4_out" | grep -q "RECONCILED"; then
223
+ _pass "RECONCILE-MATCHING: output contains RECONCILED"
224
+ else
225
+ _fail "RECONCILE-MATCHING: expected RECONCILED in output, got: $recon4_out"
226
+ fi
227
+
228
+ # ==== TEST 5: FAIL-SOFT =============================================
229
+ echo ""
230
+ echo "=== TEST 5: FAIL-SOFT ==="
231
+
232
+ REPO5="$TMP/repo5"
233
+ AROOT5="$REPO5/.flow-agents"
234
+ SLUG5="pd-failsoft-test"
235
+ SESSION_DIR5="$AROOT5/$SLUG5"
236
+ mkdir -p "$REPO5/kits"
237
+
238
+ setup_session "$AROOT5" "$SLUG5" ""
239
+ rm -f "$SESSION_DIR5/trust.bundle"
240
+
241
+ fs_out=$(flow_agents_node "$WRITER" record-release "$SESSION_DIR5" \
242
+ --decision merge \
243
+ --gate-json '{"name":"merge","status":"pass","summary":"Ready."}' \
244
+ --summary "Release." --repo-root "$REPO5" \
245
+ --timestamp "2026-06-27T10:04:00Z" 2>&1)
246
+ fs_exit=$?
247
+
248
+ if [[ $fs_exit -eq 0 ]]; then
249
+ _pass "FAIL-SOFT: record-release exits 0 when trust bundle absent"
250
+ else
251
+ _fail "FAIL-SOFT: record-release exited $fs_exit -- $fs_out"
252
+ fi
253
+
254
+ if [[ ! -f "$REPO5/delivery/trust.bundle" ]]; then
255
+ _pass "FAIL-SOFT: delivery/trust.bundle NOT created when session bundle absent"
256
+ else
257
+ _fail "FAIL-SOFT: delivery/trust.bundle was created unexpectedly"
258
+ fi
259
+
260
+ # ---- Summary ----
261
+ echo ""
262
+ echo "----------------------------------------------"
263
+ if [[ $errors -eq 0 ]]; then
264
+ echo "test_publish_delivery: all checks passed."
265
+ exit 0
266
+ else
267
+ echo "test_publish_delivery: $errors check(s) failed."
268
+ exit 1
269
+ fi