@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,20 @@
1
+ {
2
+ "schema_version": "1.0",
3
+ "id": "missing-extension-asset-kit",
4
+ "name": "Missing Extension Asset Kit",
5
+ "product_name": "Missing Extension Asset Kit",
6
+ "description": "A valid Flow Kit container with a Flow Agents extension asset pointing at a missing file.",
7
+ "flows": [
8
+ {
9
+ "id": "missing.extension.asset.review",
10
+ "path": "flows/review.flow.json",
11
+ "description": "Review a small change."
12
+ }
13
+ ],
14
+ "docs": [
15
+ {
16
+ "id": "missing.docs",
17
+ "path": "docs/MISSING.md"
18
+ }
19
+ ]
20
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "id": "unknown.extension.review",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "review", "next": "done" },
6
+ { "id": "done", "next": null }
7
+ ],
8
+ "gates": {
9
+ "review-gate": {
10
+ "step": "review",
11
+ "expects": [
12
+ {
13
+ "id": "review-evidence",
14
+ "kind": "trust.bundle",
15
+ "required": true,
16
+ "description": "Review evidence has been recorded.",
17
+ "bundle_claim": {
18
+ "claimType": "example.review.evidence",
19
+ "subjectType": "artifact",
20
+ "accepted_statuses": ["trusted", "accepted"]
21
+ }
22
+ }
23
+ ]
24
+ }
25
+ }
26
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "schema_version": "1.0",
3
+ "id": "unknown-extension-kit",
4
+ "name": "Unknown Extension Kit",
5
+ "product_name": "Unknown Extension Kit",
6
+ "description": "A valid Flow Kit container with an arbitrary top-level extension field.",
7
+ "flows": [
8
+ {
9
+ "id": "unknown.extension.review",
10
+ "path": "flows/review.flow.json",
11
+ "description": "Review a small change."
12
+ }
13
+ ],
14
+ "third_party_extension": {
15
+ "provider": "example.vendor",
16
+ "path": "vendor/meta.json"
17
+ }
18
+ }
@@ -0,0 +1,379 @@
1
+ #!/usr/bin/env bash
2
+ # test_builder_step_producers.sh — Integration eval for ADR 0016 Abstraction A P-d Increment 2.
3
+ #
4
+ # Proves for each of the 6 producer-wired gate claims:
5
+ # - record-gate-claim at the correct active step produces the declared claim
6
+ # (correct claimType + subjectType, status=verified in the bundle).
7
+ # - A TAMPERED bundle (stored verified, evidence fail) at that step BLOCKS (exit 2)
8
+ # with the tamper warning naming the declared claimType.
9
+ #
10
+ # Claims covered:
11
+ # 1. builder.pull-work.selected (step: pull-work, expectation: selected-work)
12
+ # 2. builder.design-probe.pickup-readiness (step: design-probe, expectation: pickup-probe-readiness)
13
+ # 3. builder.design-probe.decisions (step: design-probe, expectation: probe-decisions-or-accepted-gaps)
14
+ # 4. builder.pr-open.pull-request (step: pr-open, expectation: pull-request-opened)
15
+ # 5. builder.learn.decisions (step: learn, expectation: decision-evidence)
16
+ # 6. builder.learn.evidence (step: learn, expectation: learning-evidence)
17
+ #
18
+ # build.flow.json confirmation:
19
+ # - All 6 claims above are required:true.
20
+ # - policy-compliance remains required:false (advisory — no skill producer).
21
+ #
22
+ # Deterministic, no model spend, self-cleaning.
23
+ # Usage: bash evals/integration/test_builder_step_producers.sh
24
+
25
+ set -uo pipefail
26
+
27
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
28
+ source "$ROOT/evals/lib/node.sh"
29
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
30
+
31
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
32
+
33
+ TMP="$(mktemp -d)"
34
+ errors=0
35
+
36
+ _pass() { echo " ✓ $1"; }
37
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
38
+
39
+ cleanup() { rm -rf "$TMP"; }
40
+ trap cleanup EXIT
41
+
42
+ # ─── Helper: set active_step_id for a step.
43
+ # For steps in the phase_map, use advance-state.
44
+ # For design-probe (no phase mapping), use ensure-session --step-id.
45
+ # ──────────────────────────────────────────────────────────────────
46
+ set_active_step() {
47
+ local aroot="$1" slug="$2" step="$3"
48
+ case "$step" in
49
+ design-probe)
50
+ # design-probe has no lifecycle phase in the phase_map — set via ensure-session --step-id
51
+ flow_agents_node "workflow-sidecar" ensure-session \
52
+ --artifact-root "$aroot" \
53
+ --task-slug "$slug" \
54
+ --title "Producer test: $step" \
55
+ --summary "Test gate-claim producer at $step." \
56
+ --flow-id builder.build \
57
+ --step-id design-probe \
58
+ --timestamp "2026-06-26T00:00:30Z" >/dev/null 2>&1
59
+ ;;
60
+ pull-work)
61
+ flow_agents_node "workflow-sidecar" advance-state "$aroot/$slug" \
62
+ --status in_progress --phase pickup \
63
+ --summary "Testing at $step." --next-action "Record claim." \
64
+ --flow-definition builder.build \
65
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
66
+ ;;
67
+ pr-open)
68
+ flow_agents_node "workflow-sidecar" advance-state "$aroot/$slug" \
69
+ --status in_progress --phase release \
70
+ --summary "Testing at $step." --next-action "Record claim." \
71
+ --flow-definition builder.build \
72
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
73
+ ;;
74
+ learn)
75
+ flow_agents_node "workflow-sidecar" advance-state "$aroot/$slug" \
76
+ --status in_progress --phase learning \
77
+ --summary "Testing at $step." --next-action "Record claim." \
78
+ --flow-definition builder.build \
79
+ --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
80
+ ;;
81
+ esac
82
+ }
83
+
84
+ # ─── Helper: bootstrap a session for produce tests ───────────────────────────
85
+ setup_session_for_produce() {
86
+ local aroot="$1" slug="$2" step="$3"
87
+ mkdir -p "$aroot"
88
+
89
+ flow_agents_node "workflow-sidecar" ensure-session \
90
+ --artifact-root "$aroot" \
91
+ --task-slug "$slug" \
92
+ --title "Producer test: $step" \
93
+ --summary "Test gate-claim producer at $step." \
94
+ --flow-id builder.build \
95
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
96
+
97
+ flow_agents_node "workflow-sidecar" init-plan "$aroot/$slug/$slug--deliver.md" \
98
+ --source-request "Test" --summary "Testing" \
99
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
100
+
101
+ set_active_step "$aroot" "$slug" "$step"
102
+ }
103
+
104
+ # ─── Helper: bootstrap a session + AGENTS.md for tamper tests ────────────────
105
+ setup_tamper_session() {
106
+ local t_dir="$1" slug="$2" step="$3"
107
+ mkdir -p "$t_dir"
108
+ printf '# Repo\n' > "$t_dir/AGENTS.md"
109
+ mkdir -p "$t_dir/.flow-agents/$slug"
110
+
111
+ flow_agents_node "workflow-sidecar" ensure-session \
112
+ --artifact-root "$t_dir/.flow-agents" \
113
+ --task-slug "$slug" \
114
+ --title "Tamper test: $step" \
115
+ --summary "Testing tamper detection." \
116
+ --flow-id builder.build \
117
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
118
+
119
+ flow_agents_node "workflow-sidecar" init-plan "$t_dir/.flow-agents/$slug/$slug--deliver.md" \
120
+ --source-request "Test" --summary "Testing" \
121
+ --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
122
+
123
+ set_active_step "$t_dir/.flow-agents" "$slug" "$step"
124
+ }
125
+
126
+ # ─── Test: produce a gate claim at a given step ───────────────────────────────
127
+ test_produce_claim() {
128
+ local label="$1" step="$2" expectation="$3" expected_claim_type="$4" expected_subject_type="$5"
129
+ echo ""
130
+ echo "=== PRODUCE: $label ==="
131
+
132
+ local slug
133
+ slug="$(echo "prod-$step-$expectation" | tr '/' '-' | tr '.' '-')"
134
+ local aroot="$TMP/$slug"
135
+ setup_session_for_produce "$aroot" "$slug" "$step"
136
+
137
+ if flow_agents_node "workflow-sidecar" record-gate-claim "$aroot/$slug" \
138
+ --status pass \
139
+ --summary "Test claim: $label" \
140
+ --expectation "$expectation" \
141
+ --timestamp "2026-06-26T00:02:00Z" >/dev/null 2>&1; then
142
+ _pass "$label: record-gate-claim exits 0 at $step step"
143
+ else
144
+ _fail "$label: record-gate-claim failed at $step step"
145
+ return
146
+ fi
147
+
148
+ node -e "
149
+ const fs = require('fs');
150
+ const bundle = JSON.parse(fs.readFileSync('$aroot/$slug/trust.bundle', 'utf8'));
151
+ const target = (bundle.claims || []).find(c => c.claimType === '$expected_claim_type');
152
+ if (!target) {
153
+ console.error('no $expected_claim_type claim found; claims:', (bundle.claims||[]).map(c=>c.claimType).join(', '));
154
+ process.exit(1);
155
+ }
156
+ if (target.subjectType !== '$expected_subject_type') {
157
+ console.error('expected subjectType=$expected_subject_type, got', target.subjectType);
158
+ process.exit(1);
159
+ }
160
+ if (target.status !== 'verified') {
161
+ console.error('expected status=verified, got', target.status);
162
+ process.exit(1);
163
+ }
164
+ " 2>/dev/null \
165
+ && _pass "$label: bundle contains $expected_claim_type with subjectType=$expected_subject_type, status=verified" \
166
+ || _fail "$label: bundle missing or incorrect $expected_claim_type claim"
167
+ }
168
+
169
+ # ─── Test: tampered bundle at given step BLOCKS ───────────────────────────────
170
+ test_tamper_blocks() {
171
+ local label="$1" step="$2" claim_type="$3" subject_type="$4"
172
+ echo ""
173
+ echo "=== TAMPER-BLOCKS: $label ==="
174
+
175
+ local slug
176
+ slug="$(echo "tamper-$step-$claim_type" | tr '.' '-' | tr '/' '-')"
177
+ local t_dir="$TMP/$slug"
178
+ setup_tamper_session "$t_dir" "$slug" "$step"
179
+
180
+ # Write a TAMPERED trust.bundle: stored verified, evidence passing=false
181
+ python3 - "$t_dir/.flow-agents/$slug/trust.bundle" "$claim_type" "$subject_type" << 'PY'
182
+ import json, sys
183
+ claim_type = sys.argv[2]
184
+ subject_type = sys.argv[3]
185
+ bundle = {
186
+ "schemaVersion": 3,
187
+ "source": "flow-agents/workflow-sidecar",
188
+ "claims": [{
189
+ "id": "c1",
190
+ "subjectId": "tamper/gate-claim-test",
191
+ "subjectType": subject_type,
192
+ "claimType": claim_type,
193
+ "fieldOrBehavior": "Gate claim test",
194
+ "value": "pass",
195
+ "impactLevel": "high",
196
+ "status": "verified",
197
+ "createdAt": "2026-06-26T00:00:00Z",
198
+ "updatedAt": "2026-06-26T00:00:00Z"
199
+ }],
200
+ "evidence": [{
201
+ "id": "ev1",
202
+ "claimId": "c1",
203
+ "evidenceType": "test_output",
204
+ "method": "validation",
205
+ "sourceRef": "command-log.jsonl",
206
+ "excerptOrSummary": "gate claim FAILED",
207
+ "observedAt": "2026-06-26T00:00:00Z",
208
+ "collectedBy": "harness",
209
+ "passing": False,
210
+ "blocking": True
211
+ }],
212
+ "policies": [],
213
+ "events": [{
214
+ "id": "evt1",
215
+ "claimId": "c1",
216
+ "status": "verified",
217
+ "actor": "agent",
218
+ "method": "workflow-check",
219
+ "evidenceIds": ["ev1"],
220
+ "createdAt": "2026-06-26T00:00:00Z"
221
+ }]
222
+ }
223
+ json.dump(bundle, open(sys.argv[1], 'w'))
224
+ PY
225
+
226
+ set +e
227
+ tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
228
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$t_dir\"}")"
229
+ tamper_exit="$?"
230
+ set -e
231
+
232
+ if [ "$tamper_exit" -eq 2 ]; then
233
+ _pass "$label: tampered bundle blocks (exit 2)"
234
+ else
235
+ _fail "$label: tampered bundle did NOT block: exit=$tamper_exit"
236
+ fi
237
+
238
+ if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
239
+ _pass "$label: tamper warning emits 'stored status does not match recompute'"
240
+ else
241
+ _fail "$label: tamper warning missing from output: $tamper_out"
242
+ fi
243
+
244
+ if echo "$tamper_out" | grep -q "caught false-completion"; then
245
+ _pass "$label: tamper warning emits 'caught false-completion'"
246
+ else
247
+ _fail "$label: tamper warning missing 'caught false-completion': $tamper_out"
248
+ fi
249
+
250
+ if echo "$tamper_out" | grep -q "$claim_type"; then
251
+ _pass "$label: tamper warning names declared claimType $claim_type"
252
+ else
253
+ _fail "$label: tamper warning does not name $claim_type: $tamper_out"
254
+ fi
255
+ }
256
+
257
+ # ─── Test 0: build.flow.json required:true confirmation ──────────────────────
258
+ echo ""
259
+ echo "=== 0. build.flow.json: confirm required:true for produced gates ==="
260
+
261
+ node -e "
262
+ const fs = require('fs');
263
+ const flow = JSON.parse(fs.readFileSync('$ROOT/kits/builder/flows/build.flow.json', 'utf8'));
264
+ const requiredTrue = [
265
+ 'selected-work',
266
+ 'pickup-probe-readiness',
267
+ 'probe-decisions-or-accepted-gaps',
268
+ 'pull-request-opened',
269
+ 'decision-evidence',
270
+ 'learning-evidence',
271
+ ];
272
+ const requiredFalse = ['policy-compliance'];
273
+ let ok = true;
274
+ for (const [gateName, gate] of Object.entries(flow.gates)) {
275
+ for (const exp of gate.expects || []) {
276
+ if (requiredTrue.includes(exp.id) && exp.required !== true) {
277
+ console.error('FAIL: ' + exp.id + ' in ' + gateName + ' should be required:true, got ' + exp.required);
278
+ ok = false;
279
+ }
280
+ if (requiredFalse.includes(exp.id) && exp.required !== false) {
281
+ console.error('FAIL: ' + exp.id + ' in ' + gateName + ' should remain required:false (advisory), got ' + exp.required);
282
+ ok = false;
283
+ }
284
+ }
285
+ }
286
+ if (!ok) process.exit(1);
287
+ " 2>/dev/null \
288
+ && _pass "build.flow.json: 6 produced gates are required:true, policy-compliance is required:false" \
289
+ || _fail "build.flow.json: required flag mismatch"
290
+
291
+ node -e "
292
+ const fs = require('fs');
293
+ const flow = JSON.parse(fs.readFileSync('$ROOT/kits/builder/flows/build.flow.json', 'utf8'));
294
+ const producedIds = [
295
+ 'selected-work',
296
+ 'pickup-probe-readiness',
297
+ 'probe-decisions-or-accepted-gaps',
298
+ 'pull-request-opened',
299
+ 'decision-evidence',
300
+ 'learning-evidence',
301
+ ];
302
+ let ok = true;
303
+ for (const [gateName, gate] of Object.entries(flow.gates)) {
304
+ for (const exp of gate.expects || []) {
305
+ if (producedIds.includes(exp.id) && exp.explore_hint) {
306
+ console.error('FAIL: ' + exp.id + ' in ' + gateName + ' still has explore_hint (remove when producer exists)');
307
+ ok = false;
308
+ }
309
+ }
310
+ }
311
+ if (!ok) process.exit(1);
312
+ " 2>/dev/null \
313
+ && _pass "build.flow.json: no explore_hint on produced gate entries" \
314
+ || _fail "build.flow.json: produced gate entries still have explore_hint"
315
+
316
+ # ─── Tests 1–6: produce + tamper-block for each of the 6 claims ──────────────
317
+
318
+ # Claim 1: builder.pull-work.selected
319
+ test_produce_claim \
320
+ "builder.pull-work.selected" \
321
+ "pull-work" "selected-work" \
322
+ "builder.pull-work.selected" "work-item"
323
+ test_tamper_blocks \
324
+ "builder.pull-work.selected" \
325
+ "pull-work" "builder.pull-work.selected" "work-item"
326
+
327
+ # Claim 2: builder.design-probe.pickup-readiness
328
+ test_produce_claim \
329
+ "builder.design-probe.pickup-readiness" \
330
+ "design-probe" "pickup-probe-readiness" \
331
+ "builder.design-probe.pickup-readiness" "work-item"
332
+ test_tamper_blocks \
333
+ "builder.design-probe.pickup-readiness" \
334
+ "design-probe" "builder.design-probe.pickup-readiness" "work-item"
335
+
336
+ # Claim 3: builder.design-probe.decisions
337
+ test_produce_claim \
338
+ "builder.design-probe.decisions" \
339
+ "design-probe" "probe-decisions-or-accepted-gaps" \
340
+ "builder.design-probe.decisions" "decision"
341
+ test_tamper_blocks \
342
+ "builder.design-probe.decisions" \
343
+ "design-probe" "builder.design-probe.decisions" "decision"
344
+
345
+ # Claim 4: builder.pr-open.pull-request
346
+ test_produce_claim \
347
+ "builder.pr-open.pull-request" \
348
+ "pr-open" "pull-request-opened" \
349
+ "builder.pr-open.pull-request" "pull-request"
350
+ test_tamper_blocks \
351
+ "builder.pr-open.pull-request" \
352
+ "pr-open" "builder.pr-open.pull-request" "pull-request"
353
+
354
+ # Claim 5: builder.learn.decisions
355
+ test_produce_claim \
356
+ "builder.learn.decisions" \
357
+ "learn" "decision-evidence" \
358
+ "builder.learn.decisions" "decision"
359
+ test_tamper_blocks \
360
+ "builder.learn.decisions" \
361
+ "learn" "builder.learn.decisions" "decision"
362
+
363
+ # Claim 6: builder.learn.evidence
364
+ test_produce_claim \
365
+ "builder.learn.evidence" \
366
+ "learn" "learning-evidence" \
367
+ "builder.learn.evidence" "release"
368
+ test_tamper_blocks \
369
+ "builder.learn.evidence" \
370
+ "learn" "builder.learn.evidence" "release"
371
+
372
+ # ─── Summary ──────────────────────────────────────────────────────────────────
373
+ echo ""
374
+ if [ "$errors" -eq 0 ]; then
375
+ echo "Builder step producer tests passed (6 claims: produce + tamper-block each)."
376
+ exit 0
377
+ fi
378
+ echo "Builder step producer tests FAILED: $errors issue(s)."
379
+ exit 1
@@ -32,7 +32,7 @@ KIRO_DEST="$TMPDIR_EVAL/kiro-home"
32
32
  BASE_DEST="$TMPDIR_EVAL/base-workspace"
33
33
  CLAUDE_DEST="$TMPDIR_EVAL/claude-workspace"
34
34
  CODEX_DEST="$TMPDIR_EVAL/codex-workspace"
35
- CODEX_CORE_DEST="$TMPDIR_EVAL/codex-core-workspace"
35
+ CODEX_FULL_DEST="$TMPDIR_EVAL/codex-full-workspace"
36
36
  CODEX_CONSOLE_DEST="$TMPDIR_EVAL/codex-console-workspace"
37
37
  CODEX_HOSTED_CONSOLE_DEST="$TMPDIR_EVAL/codex-hosted-console-workspace"
38
38
  CODEX_USER_HOSTED_CONSOLE_DEST="$TMPDIR_EVAL/codex-user-hosted-console-workspace"
@@ -42,7 +42,7 @@ BASE_INIT_DEST="$TMPDIR_EVAL/base-init-workspace"
42
42
  CODEX_INIT_DEST="$TMPDIR_EVAL/codex-init-workspace"
43
43
  OPENCODE_DEST="$TMPDIR_EVAL/opencode-workspace"
44
44
  OPENCODE_CONSOLE_DEST="$TMPDIR_EVAL/opencode-console-workspace"
45
- OPENCODE_CORE_DEST="$TMPDIR_EVAL/opencode-core-workspace"
45
+ OPENCODE_FULL_DEST="$TMPDIR_EVAL/opencode-full-workspace"
46
46
  PI_DEST="$TMPDIR_EVAL/pi-workspace"
47
47
  CONSOLE_TOKEN_FILE="$TMPDIR_EVAL/console-token"
48
48
  printf 'test-token\n' > "$CONSOLE_TOKEN_FILE"
@@ -128,7 +128,7 @@ else
128
128
  _fail "opencode install with Console telemetry config failed"
129
129
  fi
130
130
 
131
- if node "$ROOT_DIR/build/src/cli.js" init --runtime opencode --dest "$OPENCODE_CORE_DEST" --yes >/dev/null; then
131
+ if node "$ROOT_DIR/build/src/cli.js" init --runtime opencode --dest "$OPENCODE_FULL_DEST" --yes >/dev/null; then
132
132
  _pass "flow-agents init headless opencode install succeeded"
133
133
  else
134
134
  _fail "flow-agents init headless opencode install failed"
@@ -140,32 +140,17 @@ else
140
140
  _fail "pi install failed"
141
141
  fi
142
142
 
143
- USER_SKILLS_DIR="$CODEX_CORE_DEST/.codex/sk""ills/user-skill"
144
- mkdir -p "$CODEX_CORE_DEST/.codex/ag""ents" "$USER_SKILLS_DIR"
145
- printf 'name = "user-agent"\n' > "$CODEX_CORE_DEST/.codex/ag""ents/user-agent.toml"
143
+ USER_SKILLS_DIR="$CODEX_FULL_DEST/.codex/sk""ills/user-skill"
144
+ mkdir -p "$CODEX_FULL_DEST/.codex/ag""ents" "$USER_SKILLS_DIR"
145
+ printf 'name = "user-agent"\n' > "$CODEX_FULL_DEST/.codex/ag""ents/user-agent.toml"
146
146
  printf '# user skill\n' > "$USER_SKILLS_DIR/SKILL.md"
147
147
 
148
- if (cd "$ROOT_DIR/dist/codex" && FLOW_AGENTS_PACKS=core bash install.sh "$CODEX_CORE_DEST" >/dev/null); then
149
- _pass "Codex core-pack filtered install succeeded"
148
+ # A fresh install ships the full standalone base (no pack filtering). Pre-existing
149
+ # unknown user files must be preserved across the rsync install.
150
+ if (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_FULL_DEST" >/dev/null); then
151
+ _pass "Codex full install succeeded"
150
152
  else
151
- _fail "Codex core-pack filtered install failed"
152
- fi
153
-
154
- FILTER_ATTACK_DEST="$TMPDIR_EVAL/filter-attack"
155
- mkdir -p "$FILTER_ATTACK_DEST/packaging" "$FILTER_ATTACK_DEST/skills"
156
- cat > "$FILTER_ATTACK_DEST/packaging/packs.json" <<'JSON'
157
- {
158
- "schema_version": "1.0",
159
- "packs": [
160
- { "name": "core", "default": true, "skills": ["safe"], "agents": [], "powers": [] },
161
- { "name": "extra", "skills": ["../escape"], "agents": [], "powers": [] }
162
- ]
163
- }
164
- JSON
165
- if node "$ROOT_DIR/build/src/tools/filter-installed-packs.js" "$FILTER_ATTACK_DEST" --packs core >"$TMPDIR_EVAL/filter-attack.out" 2>"$TMPDIR_EVAL/filter-attack.err"; then
166
- _fail "pack filter accepted unsafe metadata traversal"
167
- else
168
- _pass "pack filter rejects unsafe metadata traversal before deletion"
153
+ _fail "Codex full install failed"
169
154
  fi
170
155
 
171
156
  echo ""
@@ -179,7 +164,7 @@ for dir in \
179
164
  "$CODEX_DEST/.codex/agents" \
180
165
  "$CODEX_DEST/.codex/skills" \
181
166
  "$CODEX_DEST/.flow-agents" \
182
- "$CODEX_CORE_DEST/.flow-agents"; do
167
+ "$CODEX_FULL_DEST/.flow-agents"; do
183
168
  if [[ -d "$dir" ]]; then
184
169
  _pass "$dir exists"
185
170
  else
@@ -703,64 +688,43 @@ fi
703
688
 
704
689
 
705
690
  echo ""
706
- echo "--- Pack Filtering ---"
707
- CODEX_AGENTS_DIR="$CODEX_CORE_DEST/.codex/ag""ents"
708
- CORE_AGENT="$CODEX_AGENTS_DIR/tool-planner.toml"
709
- OPTIONAL_AGENT="$CODEX_AGENTS_DIR/dev.toml"
710
- if [[ -f "$CORE_AGENT" && ! -f "$OPTIONAL_AGENT" ]]; then
711
- _pass "Codex core-pack install keeps core agents and prunes optional agents"
712
- else
713
- _fail "Codex core-pack agent filtering failed"
714
- fi
715
-
716
- # Kit-owned skills (plan-work, deliver) are always present regardless of pack filter.
717
- # Pack filtering only prunes skills declared in packs.json (the tool-skills).
718
- # The development-pack tool-skill agentic-engineering should be pruned in a core-only install.
719
- if [[ -d "$CODEX_CORE_DEST/.codex/skills/plan-work" && -d "$CODEX_CORE_DEST/.codex/skills/deliver" && ! -d "$CODEX_CORE_DEST/.codex/skills/agentic-engineering" ]]; then
720
- _pass "Codex core-pack install: kit-skills present, dev-only tool-skill pruned"
691
+ echo "--- Full Standalone Base Install ---"
692
+ # There is no pack layer: a fresh install ships the complete standalone base.
693
+ # Both the neutral toolbox agents (tool-planner) and the deeper agents (dev) are
694
+ # present, and kit-owned skills (plan-work, deliver) plus standalone skills
695
+ # (agentic-engineering) all install together.
696
+ # Codex excludes the dev orchestrator agent (manifest.codex.excluded_agents), so
697
+ # assert the neutral toolbox agent plus a deeper agent that codex does ship.
698
+ CODEX_AGENTS_DIR="$CODEX_FULL_DEST/.codex/ag""ents"
699
+ if [[ -f "$CODEX_AGENTS_DIR/tool-planner.toml" && -f "$CODEX_AGENTS_DIR/tool-security-reviewer.toml" ]]; then
700
+ _pass "Codex full install ships the complete agent base"
721
701
  else
722
- _fail "Codex core-pack skill filtering failed"
702
+ _fail "Codex full install is missing base agents"
723
703
  fi
724
704
 
725
- if [[ -f "$CODEX_CORE_DEST/.flow-agents/installed-packs.json" ]]; then
726
- _pass "Codex core-pack install records selected packs"
705
+ if [[ -d "$CODEX_FULL_DEST/.codex/skills/plan-work" && -d "$CODEX_FULL_DEST/.codex/skills/deliver" && -d "$CODEX_FULL_DEST/.codex/skills/agentic-engineering" ]]; then
706
+ _pass "Codex full install ships kit-skills and standalone skills together"
727
707
  else
728
- _fail "Codex core-pack install did not record selected packs"
708
+ _fail "Codex full install is missing skills"
729
709
  fi
730
710
 
731
711
  if [[ -f "$CODEX_AGENTS_DIR/user-agent.toml" && -d "$USER_SKILLS_DIR" ]]; then
732
- _pass "Codex core-pack install preserves unknown user files"
733
- else
734
- _fail "Codex core-pack install removed unknown user files"
735
- fi
736
-
737
- # Pack filtering for opencode
738
- OPENCODE_AGENTS_DIR="$OPENCODE_CORE_DEST/.opencode/agents"
739
- if (cd "$ROOT_DIR/dist/opencode" && FLOW_AGENTS_PACKS=core bash install.sh "$OPENCODE_CORE_DEST" >/dev/null); then
740
- _pass "opencode core-pack filtered install succeeded"
741
- else
742
- _fail "opencode core-pack filtered install failed"
743
- fi
744
-
745
- if [[ -d "$OPENCODE_AGENTS_DIR/tool-planner.md" ]] || [[ -f "$OPENCODE_AGENTS_DIR/tool-planner.md" ]]; then
746
- _pass "opencode core-pack install keeps core agents"
712
+ _pass "Codex full install preserves unknown user files"
747
713
  else
748
- _fail "opencode core-pack agent filtering failed (tool-planner.md missing)"
714
+ _fail "Codex full install removed unknown user files"
749
715
  fi
750
716
 
751
- # Kit-owned skills (plan-work, deliver) are always present regardless of pack filter.
752
- # Pack filtering only prunes skills declared in packs.json (the tool-skills).
753
- # The development-pack tool-skill agentic-engineering should be pruned in a core-only install.
754
- if [[ -d "$OPENCODE_CORE_DEST/.opencode/skills/plan-work" && -d "$OPENCODE_CORE_DEST/.opencode/skills/deliver" && ! -d "$OPENCODE_CORE_DEST/.opencode/skills/agentic-engineering" ]]; then
755
- _pass "opencode core-pack install: kit-skills present, dev-only tool-skill pruned"
717
+ OPENCODE_AGENTS_DIR="$OPENCODE_FULL_DEST/.opencode/agents"
718
+ if [[ -f "$OPENCODE_AGENTS_DIR/tool-planner.md" && -f "$OPENCODE_AGENTS_DIR/dev.md" ]]; then
719
+ _pass "opencode full install ships the complete agent base"
756
720
  else
757
- _fail "opencode core-pack skill filtering failed"
721
+ _fail "opencode full install is missing base agents"
758
722
  fi
759
723
 
760
- if [[ -f "$OPENCODE_CORE_DEST/.flow-agents/installed-packs.json" ]]; then
761
- _pass "opencode core-pack install records selected packs"
724
+ if [[ -d "$OPENCODE_FULL_DEST/.opencode/skills/plan-work" && -d "$OPENCODE_FULL_DEST/.opencode/skills/deliver" && -d "$OPENCODE_FULL_DEST/.opencode/skills/agentic-engineering" ]]; then
725
+ _pass "opencode full install ships kit-skills and standalone skills together"
762
726
  else
763
- _fail "opencode core-pack install did not record selected packs"
727
+ _fail "opencode full install is missing skills"
764
728
  fi
765
729
 
766
730
  echo ""