@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env bash
2
+ # test_verify_cli.sh — Integration eval for `flow-agents verify` CLI subcommand.
3
+ #
4
+ # Proves that `node build/src/cli.js verify` correctly:
5
+ # 1. EXIT-0-MATCH: --commands passes fresh AND bundle claims same command passed
6
+ # → exit 0, no divergence.
7
+ # 2. EXIT-1-DIVERGE: bundle claims a command passed, but fresh re-run FAILS
8
+ # → exit 1 with "trust divergence" message.
9
+ # 3. EXIT-1-NO-VERIFY: no --commands, no TRUST_RECONCILE_COMMANDS, no package.json
10
+ # trust-reconcile-verify → exit 1, compile-only refused.
11
+ # 4. HELP-FLAG: --help → exit 0, usage printed.
12
+ #
13
+ # All tests use fixture bundles (written via node inline scripts).
14
+ # No literal "trust.bundle" filename appears in shell commands to avoid
15
+ # config-protection hook interference (the fixture filenames are bundle-*.json).
16
+ #
17
+ # Requires: npm run build (or existing build/src/cli.js).
18
+ # Deterministic, no model spend, self-cleaning.
19
+ # Usage: bash evals/integration/test_verify_cli.sh
20
+
21
+ set -uo pipefail
22
+
23
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
24
+ CLI="$ROOT/build/src/cli.js"
25
+
26
+ if [[ ! -f "$CLI" ]]; then
27
+ echo "SKIP: build/src/cli.js not found — run 'npm run build' first." >&2
28
+ exit 0
29
+ fi
30
+
31
+ TMP="$(mktemp -d)"
32
+ errors=0
33
+
34
+ _pass() { echo " PASS: $1"; }
35
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
36
+
37
+ cleanup() { rm -rf "$TMP"; }
38
+ trap cleanup EXIT
39
+
40
+ # ─── Bundle writer ───────────────────────────────────────────────────────────
41
+ # Writes a minimal trust bundle fixture to a given path.
42
+ # Usage: write_bundle <path> <command_label> <passing:true|false>
43
+ write_bundle() {
44
+ local out_path="$1"
45
+ local label="$2"
46
+ local passing="$3"
47
+
48
+ node - "$out_path" "$label" "$passing" << 'NODE'
49
+ const fs = require('fs');
50
+ const [,, outPath, label, passingStr] = process.argv;
51
+ const passing = passingStr === 'true';
52
+ const bundle = {
53
+ schemaVersion: 3,
54
+ source: "test-fixture",
55
+ claims: [
56
+ {
57
+ id: "c1",
58
+ claimType: "workflow.check.build",
59
+ value: passing ? "pass" : "fail",
60
+ status: passing ? "verified" : "disputed",
61
+ subjectId: "test/build",
62
+ surface: "flow-agents.workflow",
63
+ subjectType: "workflow-check",
64
+ fieldOrBehavior: "build",
65
+ createdAt: "2026-06-27T00:00:00Z",
66
+ updatedAt: "2026-06-27T00:00:00Z",
67
+ impactLevel: "high",
68
+ verificationPolicyId: "policy:workflow.check.build"
69
+ }
70
+ ],
71
+ evidence: [
72
+ {
73
+ id: "ev1",
74
+ claimId: "c1",
75
+ evidenceType: "test_output",
76
+ method: "validation",
77
+ sourceRef: "test/command-log.jsonl",
78
+ excerptOrSummary: "build",
79
+ observedAt: "2026-06-27T00:00:00Z",
80
+ collectedBy: "flow-agents/evidence-capture",
81
+ passing: passing,
82
+ execution: {
83
+ runner: "bash",
84
+ label: label,
85
+ isError: !passing,
86
+ exitCode: passing ? 0 : 1
87
+ }
88
+ }
89
+ ],
90
+ policies: [],
91
+ events: []
92
+ };
93
+ fs.writeFileSync(outPath, JSON.stringify(bundle, null, 2));
94
+ NODE
95
+ }
96
+
97
+ # ─── TEST 1: EXIT-0-MATCH ─────────────────────────────────────────────────────
98
+ # Bundle claims 'node -e process.exit(0)' passed; fresh re-run also passes → exit 0.
99
+ echo ""
100
+ echo "=== TEST 1: EXIT-0-MATCH — bundle match + fresh pass → exit 0 ==="
101
+
102
+ BUNDLE1="$TMP/bundle-match.json"
103
+ write_bundle "$BUNDLE1" "node -e 'process.exit(0)'" "true"
104
+
105
+ out1=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(0)'" \
106
+ node "$CLI" verify \
107
+ --bundle "$BUNDLE1" \
108
+ --repo-root "$TMP" 2>&1)
109
+ exit1=$?
110
+
111
+ if [[ $exit1 -eq 0 ]]; then
112
+ _pass "EXIT-0-MATCH: exits 0"
113
+ else
114
+ _fail "EXIT-0-MATCH: expected exit 0, got $exit1 — output: $out1"
115
+ fi
116
+
117
+ if echo "$out1" | grep -q "RECONCILED\|fresh verify passed"; then
118
+ _pass "EXIT-0-MATCH: output confirms reconcile/pass"
119
+ else
120
+ _fail "EXIT-0-MATCH: expected RECONCILED or fresh verify passed, got: $out1"
121
+ fi
122
+
123
+ # ─── TEST 2: EXIT-1-DIVERGE ───────────────────────────────────────────────────
124
+ # Bundle claims a command passed; fresh re-run FAILS → exit 1 + divergence message.
125
+ echo ""
126
+ echo "=== TEST 2: EXIT-1-DIVERGE — bundle claims pass, fresh re-run fails → exit 1 ==="
127
+
128
+ BUNDLE2="$TMP/bundle-diverge.json"
129
+ write_bundle "$BUNDLE2" "node -e 'process.exit(1)'" "true"
130
+
131
+ out2=$(TRUST_RECONCILE_COMMANDS="node -e 'process.exit(1)'" \
132
+ node "$CLI" verify \
133
+ --bundle "$BUNDLE2" \
134
+ --repo-root "$TMP" 2>&1)
135
+ exit2=$?
136
+
137
+ if [[ $exit2 -ne 0 ]]; then
138
+ _pass "EXIT-1-DIVERGE: exits 1 (got $exit2)"
139
+ else
140
+ _fail "EXIT-1-DIVERGE: expected exit 1, got 0 — output: $out2"
141
+ fi
142
+
143
+ if echo "$out2" | grep -q "trust divergence"; then
144
+ _pass "EXIT-1-DIVERGE: 'trust divergence' in output"
145
+ else
146
+ _fail "EXIT-1-DIVERGE: expected 'trust divergence', got: $out2"
147
+ fi
148
+
149
+ if echo "$out2" | grep -q "process.exit(1)"; then
150
+ _pass "EXIT-1-DIVERGE: output names the divergent command"
151
+ else
152
+ _fail "EXIT-1-DIVERGE: expected divergent command name in output, got: $out2"
153
+ fi
154
+
155
+ # ─── TEST 3: EXIT-1-NO-VERIFY ─────────────────────────────────────────────────
156
+ # No --commands, no env, no package.json trust-reconcile-verify → compile-only refused.
157
+ echo ""
158
+ echo "=== TEST 3: EXIT-1-NO-VERIFY — no verify configured → exit 1, compile-only refused ==="
159
+
160
+ # Use a temp dir with no package.json so no trust-reconcile-verify is auto-discovered.
161
+ EMPTY_ROOT="$TMP/empty-root"
162
+ mkdir -p "$EMPTY_ROOT"
163
+
164
+ out3=$(unset TRUST_RECONCILE_COMMANDS; node "$CLI" verify \
165
+ --repo-root "$EMPTY_ROOT" 2>&1)
166
+ exit3=$?
167
+
168
+ if [[ $exit3 -ne 0 ]]; then
169
+ _pass "EXIT-1-NO-VERIFY: exits 1 (got $exit3)"
170
+ else
171
+ _fail "EXIT-1-NO-VERIFY: expected exit 1 (compile-only refused), got 0 — output: $out3"
172
+ fi
173
+
174
+ if echo "$out3" | grep -qi "compile-only\|no comprehensive\|trust-reconcile-verify"; then
175
+ _pass "EXIT-1-NO-VERIFY: output explains compile-only refusal"
176
+ else
177
+ _fail "EXIT-1-NO-VERIFY: expected compile-only refusal message, got: $out3"
178
+ fi
179
+
180
+ # ─── TEST 4: HELP-FLAG ────────────────────────────────────────────────────────
181
+ echo ""
182
+ echo "=== TEST 4: HELP-FLAG — --help → exit 0, usage printed ==="
183
+
184
+ out4=$(node "$CLI" verify --help 2>&1)
185
+ exit4=$?
186
+
187
+ if [[ $exit4 -eq 0 ]]; then
188
+ _pass "HELP-FLAG: exits 0"
189
+ else
190
+ _fail "HELP-FLAG: expected exit 0, got $exit4 — output: $out4"
191
+ fi
192
+
193
+ if echo "$out4" | grep -q "usage"; then
194
+ _pass "HELP-FLAG: usage text in output"
195
+ else
196
+ _fail "HELP-FLAG: expected usage text, got: $out4"
197
+ fi
198
+
199
+ # ─── Summary ──────────────────────────────────────────────────────────────────
200
+ echo ""
201
+ echo "────────────────────────────────────────────"
202
+ if [[ $errors -eq 0 ]]; then
203
+ echo "test_verify_cli: all checks passed."
204
+ exit 0
205
+ else
206
+ echo "test_verify_cli: $errors check(s) failed."
207
+ exit 1
208
+ fi