@kontourai/flow-agents 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/runtime-compat.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +103 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +316 -8
- package/build/src/cli/workflow-sidecar.js +1996 -91
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +111 -0
- package/build/src/lib/flow-resolver.js +308 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +55 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_fork_classification.sh +134 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_kit_identity_trust.sh +393 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +47 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +6 -6
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1524 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/scripts/repair-command-log.js +115 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2127 -84
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +369 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_claim_lookup.sh — Integration tests for the `claim` subcommand (#162).
|
|
3
|
+
#
|
|
4
|
+
# Verifies:
|
|
5
|
+
# AC1: status + value + failing evidence (with execution block) + policy + derivation drilldown
|
|
6
|
+
# AC1: --json flag emits structured ClaimExplanation object
|
|
7
|
+
# AC1: unknown claim id exits 1 with clear error listing available ids
|
|
8
|
+
# AC1: missing bundle exits 1 with clear error
|
|
9
|
+
# AC3: gate-hint in stop-goal-fit.js disputed warning contains workflow:sidecar -- claim
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
13
|
+
source "$ROOT/evals/lib/node.sh"
|
|
14
|
+
|
|
15
|
+
TMPDIR_EVAL="$(mktemp -d)"
|
|
16
|
+
errors=0
|
|
17
|
+
|
|
18
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
19
|
+
trap cleanup EXIT
|
|
20
|
+
|
|
21
|
+
_pass() { echo " ✓ $1"; }
|
|
22
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
23
|
+
|
|
24
|
+
echo "=== Claim Lookup Tests (issue #162) ==="
|
|
25
|
+
|
|
26
|
+
# ── helpers ──────────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
jq_node() {
|
|
29
|
+
local file="$1"; local expr="$2"
|
|
30
|
+
node -e "
|
|
31
|
+
const d=JSON.parse(require('fs').readFileSync('${file}','utf8'));
|
|
32
|
+
const r=(${expr})(d);
|
|
33
|
+
if(r===undefined||r===null){process.exit(2);}
|
|
34
|
+
if(typeof r==='boolean'||typeof r==='number'||typeof r==='string'){
|
|
35
|
+
process.stdout.write(String(r)+'\n');
|
|
36
|
+
}else{
|
|
37
|
+
process.stdout.write(JSON.stringify(r)+'\n');
|
|
38
|
+
}"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Seed a trust.bundle with a DISPUTED claim including a failing execution block and a policy.
|
|
42
|
+
seed_disputed_bundle() {
|
|
43
|
+
local dir="$1" slug="$2"
|
|
44
|
+
local ts="2026-06-25T00:00:00Z"
|
|
45
|
+
local claimId="${slug}/unit-tests.flow-agents.workflow.unit tests pass"
|
|
46
|
+
mkdir -p "$dir"
|
|
47
|
+
cat > "$dir/trust.bundle" <<JSON
|
|
48
|
+
{
|
|
49
|
+
"schemaVersion": 3,
|
|
50
|
+
"source": "claim-lookup-test;statusFunctionVersion=1",
|
|
51
|
+
"claims": [
|
|
52
|
+
{
|
|
53
|
+
"id": "$claimId",
|
|
54
|
+
"subjectType": "workflow-check",
|
|
55
|
+
"subjectId": "${slug}/unit-tests",
|
|
56
|
+
"surface": "flow-agents.workflow",
|
|
57
|
+
"claimType": "workflow.check.test",
|
|
58
|
+
"fieldOrBehavior": "unit tests pass",
|
|
59
|
+
"value": "fail",
|
|
60
|
+
"status": "disputed",
|
|
61
|
+
"impactLevel": "high",
|
|
62
|
+
"verificationPolicyId": "policy:workflow.check.test",
|
|
63
|
+
"createdAt": "$ts",
|
|
64
|
+
"updatedAt": "$ts"
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
"evidence": [
|
|
68
|
+
{
|
|
69
|
+
"id": "ev:${claimId}",
|
|
70
|
+
"claimId": "${claimId}",
|
|
71
|
+
"evidenceType": "test_output",
|
|
72
|
+
"label": "npm test output",
|
|
73
|
+
"method": "validation",
|
|
74
|
+
"excerptOrSummary": "8 tests failed",
|
|
75
|
+
"status": "disputed",
|
|
76
|
+
"execution": {
|
|
77
|
+
"runner": "npm test",
|
|
78
|
+
"label": "npm test",
|
|
79
|
+
"isError": true,
|
|
80
|
+
"exitCode": 1
|
|
81
|
+
},
|
|
82
|
+
"sourceRef": "command-log.jsonl",
|
|
83
|
+
"createdAt": "$ts"
|
|
84
|
+
}
|
|
85
|
+
],
|
|
86
|
+
"events": [
|
|
87
|
+
{
|
|
88
|
+
"id": "evt:${claimId}",
|
|
89
|
+
"claimId": "${claimId}",
|
|
90
|
+
"status": "disputed",
|
|
91
|
+
"actor": "test",
|
|
92
|
+
"method": "validation",
|
|
93
|
+
"evidenceIds": ["ev:${claimId}"],
|
|
94
|
+
"createdAt": "$ts",
|
|
95
|
+
"verifiedAt": "$ts"
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"policies": [
|
|
99
|
+
{
|
|
100
|
+
"id": "policy:workflow.check.test",
|
|
101
|
+
"claimType": "workflow.check.test",
|
|
102
|
+
"requiredEvidence": ["test_output"],
|
|
103
|
+
"requiredMethods": ["validation"],
|
|
104
|
+
"acceptanceCriteria": ["A verified verification event must support a workflow.check.test claim."],
|
|
105
|
+
"reviewAuthority": "system",
|
|
106
|
+
"validityRule": { "kind": "manual" },
|
|
107
|
+
"stalenessTriggers": [],
|
|
108
|
+
"conflictRules": [],
|
|
109
|
+
"impactLevel": "high"
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
JSON
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
# ── Test 1: AC1 — text output has status + value + evidence + policy + drilldown ──
|
|
117
|
+
|
|
118
|
+
echo ""
|
|
119
|
+
echo "── Test 1: text output (status + evidence + policy + drilldown) ──"
|
|
120
|
+
|
|
121
|
+
AC1_DIR="$TMPDIR_EVAL/ac1"
|
|
122
|
+
AC1_SLUG="claim-lookup-ac1"
|
|
123
|
+
seed_disputed_bundle "$AC1_DIR" "$AC1_SLUG"
|
|
124
|
+
AC1_CLAIM_ID="${AC1_SLUG}/unit-tests.flow-agents.workflow.unit tests pass"
|
|
125
|
+
|
|
126
|
+
AC1_OUT="$TMPDIR_EVAL/ac1.out"
|
|
127
|
+
if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" >"$AC1_OUT" 2>&1; then
|
|
128
|
+
_pass "AC1: claim command exits 0 for known disputed claim"
|
|
129
|
+
else
|
|
130
|
+
_fail "AC1: claim command failed: $(cat "$AC1_OUT")"
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
if grep -q "Status: disputed" "$AC1_OUT"; then
|
|
134
|
+
_pass "AC1: output contains derived status (disputed)"
|
|
135
|
+
else
|
|
136
|
+
_fail "AC1: output missing derived status: $(head -3 "$AC1_OUT")"
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
if grep -q "Value: fail" "$AC1_OUT"; then
|
|
140
|
+
_pass "AC1: output contains raw value"
|
|
141
|
+
else
|
|
142
|
+
_fail "AC1: output missing value"
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
if grep -q "exitCode: 1" "$AC1_OUT" && grep -q "isError: true" "$AC1_OUT"; then
|
|
146
|
+
_pass "AC1: failing evidence execution block shown (exitCode + isError)"
|
|
147
|
+
else
|
|
148
|
+
_fail "AC1: execution block missing from evidence output: $(grep -i "exitCode\|isError\|Evidence" "$AC1_OUT" || echo '(not found)')"
|
|
149
|
+
fi
|
|
150
|
+
|
|
151
|
+
if grep -q "Governing Policy (policy:workflow.check.test)" "$AC1_OUT"; then
|
|
152
|
+
_pass "AC1: governing policy section present"
|
|
153
|
+
else
|
|
154
|
+
_fail "AC1: governing policy section missing"
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
if grep -q "requiredEvidence:" "$AC1_OUT" && grep -q "acceptanceCriteria:" "$AC1_OUT" && grep -q "reviewAuthority:" "$AC1_OUT"; then
|
|
158
|
+
_pass "AC1: policy fields (requiredEvidence, acceptanceCriteria, reviewAuthority) present"
|
|
159
|
+
else
|
|
160
|
+
_fail "AC1: policy fields incomplete: $(grep -E "required|acceptance|review" "$AC1_OUT" || echo '(not found)')"
|
|
161
|
+
fi
|
|
162
|
+
|
|
163
|
+
if grep -q "Derivation Drilldown:" "$AC1_OUT"; then
|
|
164
|
+
_pass "AC1: derivation drilldown section present"
|
|
165
|
+
else
|
|
166
|
+
_fail "AC1: derivation drilldown section missing"
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
# ── Test 2: AC1 — --json flag emits structured ClaimExplanation ──
|
|
170
|
+
|
|
171
|
+
echo ""
|
|
172
|
+
echo "── Test 2: --json flag emits structured ClaimExplanation object ──"
|
|
173
|
+
|
|
174
|
+
AC2_JSON="$TMPDIR_EVAL/ac1.json"
|
|
175
|
+
if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" --json >"$AC2_JSON" 2>&1; then
|
|
176
|
+
_pass "AC2: --json exits 0"
|
|
177
|
+
else
|
|
178
|
+
_fail "AC2: --json failed: $(cat "$AC2_JSON")"
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
# Validate JSON structure
|
|
182
|
+
FOUND="$(jq_node "$AC2_JSON" 'd => d.found' 2>/dev/null || echo '')"
|
|
183
|
+
STATUS="$(jq_node "$AC2_JSON" 'd => d.status' 2>/dev/null || echo '')"
|
|
184
|
+
VALUE="$(jq_node "$AC2_JSON" 'd => d.value' 2>/dev/null || echo '')"
|
|
185
|
+
HAS_POLICY="$(jq_node "$AC2_JSON" 'd => d.policy !== null && d.policy.id !== undefined' 2>/dev/null || echo '')"
|
|
186
|
+
EVIDENCE_LEN="$(jq_node "$AC2_JSON" 'd => d.evidence.length' 2>/dev/null || echo '')"
|
|
187
|
+
EXEC_EXITCODE="$(jq_node "$AC2_JSON" 'd => d.evidence[0] && d.evidence[0].execution && d.evidence[0].execution.exitCode' 2>/dev/null || echo '')"
|
|
188
|
+
HAS_WHY="$(jq_node "$AC2_JSON" 'd => typeof d.why === "object" && d.why !== null' 2>/dev/null || echo '')"
|
|
189
|
+
|
|
190
|
+
[[ "$FOUND" == "true" ]] && _pass "AC2: found=true in JSON" || _fail "AC2: expected found=true, got '$FOUND'"
|
|
191
|
+
[[ "$STATUS" == "disputed" ]] && _pass "AC2: status=disputed in JSON" || _fail "AC2: expected status=disputed, got '$STATUS'"
|
|
192
|
+
[[ "$VALUE" == "fail" ]] && _pass "AC2: value=fail in JSON" || _fail "AC2: expected value=fail, got '$VALUE'"
|
|
193
|
+
[[ "$HAS_POLICY" == "true" ]] && _pass "AC2: policy object present in JSON" || _fail "AC2: policy missing: $HAS_POLICY"
|
|
194
|
+
[[ "$EVIDENCE_LEN" == "1" ]] && _pass "AC2: evidence array has 1 item" || _fail "AC2: expected 1 evidence item, got '$EVIDENCE_LEN'"
|
|
195
|
+
[[ "$EXEC_EXITCODE" == "1" ]] && _pass "AC2: evidence[0].execution.exitCode=1 in JSON" || _fail "AC2: expected exitCode=1, got '$EXEC_EXITCODE'"
|
|
196
|
+
[[ "$HAS_WHY" == "true" ]] && _pass "AC2: why object present in JSON" || _fail "AC2: why object missing"
|
|
197
|
+
|
|
198
|
+
# ── Test 3: AC1 — unknown id exits 1 with clear error listing available ids ──
|
|
199
|
+
|
|
200
|
+
echo ""
|
|
201
|
+
echo "── Test 3: unknown claim id → clear error + list of available ids ──"
|
|
202
|
+
|
|
203
|
+
AC3_OUT="$TMPDIR_EVAL/ac3.out"
|
|
204
|
+
if flow_agents_node workflow-sidecar claim "nonexistent-claim-id" "$AC1_DIR" >"$AC3_OUT" 2>&1; then
|
|
205
|
+
_fail "AC3: expected exit 1 for unknown claim id but got 0"
|
|
206
|
+
else
|
|
207
|
+
_pass "AC3: exits 1 for unknown claim id"
|
|
208
|
+
fi
|
|
209
|
+
|
|
210
|
+
if grep -q "unknown claim id: nonexistent-claim-id" "$AC3_OUT"; then
|
|
211
|
+
_pass "AC3: error message names the unknown id"
|
|
212
|
+
else
|
|
213
|
+
_fail "AC3: error message missing id: $(cat "$AC3_OUT")"
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
if grep -q "Available claim ids" "$AC3_OUT"; then
|
|
217
|
+
_pass "AC3: error lists available claim ids"
|
|
218
|
+
else
|
|
219
|
+
_fail "AC3: error does not list available ids: $(cat "$AC3_OUT")"
|
|
220
|
+
fi
|
|
221
|
+
|
|
222
|
+
# ── Test 4: AC1 — missing bundle exits 1 ──
|
|
223
|
+
|
|
224
|
+
echo ""
|
|
225
|
+
echo "── Test 4: missing bundle → clear error ──"
|
|
226
|
+
|
|
227
|
+
AC4_OUT="$TMPDIR_EVAL/ac4.out"
|
|
228
|
+
if flow_agents_node workflow-sidecar claim "any-id" "$TMPDIR_EVAL/nonexistent" >"$AC4_OUT" 2>&1; then
|
|
229
|
+
_fail "AC4: expected exit 1 for missing bundle but got 0"
|
|
230
|
+
else
|
|
231
|
+
_pass "AC4: exits 1 for missing bundle"
|
|
232
|
+
fi
|
|
233
|
+
|
|
234
|
+
if grep -q "no trust.bundle at" "$AC4_OUT"; then
|
|
235
|
+
_pass "AC4: error message mentions missing trust.bundle"
|
|
236
|
+
else
|
|
237
|
+
_fail "AC4: error message missing: $(cat "$AC4_OUT")"
|
|
238
|
+
fi
|
|
239
|
+
|
|
240
|
+
# ── Test 5: AC3 — gate-hint in stop-goal-fit.js warning ──
|
|
241
|
+
# Use a bundle with an acceptance criterion claim (not a check claim) so the
|
|
242
|
+
# bundleEnforcement warning is not deduplicated by captureCrossReference.
|
|
243
|
+
# FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
|
|
244
|
+
|
|
245
|
+
echo ""
|
|
246
|
+
echo "── Test 5: gate-hint appears in stop-goal-fit.js disputed warning ──"
|
|
247
|
+
|
|
248
|
+
AC5_PROJ="$TMPDIR_EVAL/gate-hint-proj"
|
|
249
|
+
AC5_SLUG="gate-hint-test"
|
|
250
|
+
AC5_DIR="$AC5_PROJ/.flow-agents/$AC5_SLUG"
|
|
251
|
+
mkdir -p "$AC5_DIR"
|
|
252
|
+
|
|
253
|
+
# Write a minimal bundle with a disputed acceptance criterion claim.
|
|
254
|
+
# Using workflow.acceptance.criterion (not workflow.check.*) so the subjectId
|
|
255
|
+
# won't match any evidence check id and bundleEnforcement won't be deduped.
|
|
256
|
+
cat > "$AC5_DIR/trust.bundle" <<'BUNDLE'
|
|
257
|
+
{
|
|
258
|
+
"schemaVersion": 3,
|
|
259
|
+
"source": "claim-lookup-test",
|
|
260
|
+
"claims": [
|
|
261
|
+
{
|
|
262
|
+
"id": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
|
|
263
|
+
"subjectType": "workflow-criterion",
|
|
264
|
+
"subjectId": "gate-hint-test/AC1",
|
|
265
|
+
"surface": "flow-agents.workflow",
|
|
266
|
+
"claimType": "workflow.acceptance.criterion",
|
|
267
|
+
"fieldOrBehavior": "acceptance criterion verified",
|
|
268
|
+
"value": "fail",
|
|
269
|
+
"status": "disputed",
|
|
270
|
+
"impactLevel": "high",
|
|
271
|
+
"verificationPolicyId": "policy:workflow.acceptance.criterion",
|
|
272
|
+
"createdAt": "2026-06-25T00:00:00Z",
|
|
273
|
+
"updatedAt": "2026-06-25T00:00:00Z"
|
|
274
|
+
}
|
|
275
|
+
],
|
|
276
|
+
"evidence": [],
|
|
277
|
+
"events": [
|
|
278
|
+
{
|
|
279
|
+
"id": "evt:gate-hint-test/AC1",
|
|
280
|
+
"claimId": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
|
|
281
|
+
"status": "disputed",
|
|
282
|
+
"actor": "test",
|
|
283
|
+
"method": "validation",
|
|
284
|
+
"evidenceIds": [],
|
|
285
|
+
"createdAt": "2026-06-25T00:00:00Z",
|
|
286
|
+
"verifiedAt": "2026-06-25T00:00:00Z"
|
|
287
|
+
}
|
|
288
|
+
],
|
|
289
|
+
"policies": [
|
|
290
|
+
{
|
|
291
|
+
"id": "policy:workflow.acceptance.criterion",
|
|
292
|
+
"claimType": "workflow.acceptance.criterion",
|
|
293
|
+
"requiredEvidence": ["human_attestation"],
|
|
294
|
+
"acceptanceCriteria": ["A criterion must have a verified event."],
|
|
295
|
+
"reviewAuthority": "system",
|
|
296
|
+
"validityRule": { "kind": "manual" },
|
|
297
|
+
"stalenessTriggers": [],
|
|
298
|
+
"conflictRules": [],
|
|
299
|
+
"impactLevel": "high"
|
|
300
|
+
}
|
|
301
|
+
]
|
|
302
|
+
}
|
|
303
|
+
BUNDLE
|
|
304
|
+
|
|
305
|
+
cat > "$AC5_DIR/state.json" <<'JSON'
|
|
306
|
+
{"schema_version":"1.0","task_slug":"gate-hint-test","status":"delivered","phase":"done","updated_at":"2026-06-25T00:00:00Z","next_action":{"status":"done","summary":"done"}}
|
|
307
|
+
JSON
|
|
308
|
+
|
|
309
|
+
cat > "$AC5_DIR/gate-hint-test--deliver.md" <<'MD'
|
|
310
|
+
# Gate Hint Test
|
|
311
|
+
|
|
312
|
+
branch: main
|
|
313
|
+
status: delivered
|
|
314
|
+
type: deliver
|
|
315
|
+
|
|
316
|
+
## Definition Of Done
|
|
317
|
+
- [x] all tests pass
|
|
318
|
+
|
|
319
|
+
## Goal Fit Gate
|
|
320
|
+
- [x] criteria verified
|
|
321
|
+
|
|
322
|
+
### Verdict: PASS
|
|
323
|
+
MD
|
|
324
|
+
|
|
325
|
+
AC5_OUT="$TMPDIR_EVAL/ac5.out"
|
|
326
|
+
# FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
|
|
327
|
+
printf '{"hook_event_name":"Stop","cwd":"%s"}' "$AC5_PROJ" \
|
|
328
|
+
| FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$AC5_OUT" 2>&1 || true
|
|
329
|
+
|
|
330
|
+
if grep -q "workflow:sidecar -- claim" "$AC5_OUT"; then
|
|
331
|
+
_pass "AC5: gate-hint 'workflow:sidecar -- claim' appears in stop-goal-fit output"
|
|
332
|
+
else
|
|
333
|
+
_fail "AC5: gate-hint missing from stop-goal-fit output: $(cat "$AC5_OUT")"
|
|
334
|
+
fi
|
|
335
|
+
|
|
336
|
+
if grep -q "trust.bundle claim disputed" "$AC5_OUT"; then
|
|
337
|
+
_pass "AC5: disputed warning present in stop-goal-fit output"
|
|
338
|
+
else
|
|
339
|
+
_fail "AC5: disputed warning missing: $(cat "$AC5_OUT")"
|
|
340
|
+
fi
|
|
341
|
+
|
|
342
|
+
# ── Results ──────────────────────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
echo ""
|
|
345
|
+
echo "──────────────────────────────────"
|
|
346
|
+
echo "claim lookup tests: $((errors)) failed"
|
|
347
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
348
|
+
echo "ALL PASSED"
|
|
349
|
+
exit 0
|
|
350
|
+
else
|
|
351
|
+
exit 1
|
|
352
|
+
fi
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_command_log_fork_classification.sh
|
|
3
|
+
#
|
|
4
|
+
# The verifier must tell a BENIGN concurrent fork apart from real TAMPER, and
|
|
5
|
+
# the repair tool must refuse to touch tamper. This is what prevents an honest
|
|
6
|
+
# parallel-write race from becoming a hard block an agent is tempted to launder.
|
|
7
|
+
#
|
|
8
|
+
# forked = two PostToolUse captures share a parent; all hashes self-consistent
|
|
9
|
+
# and reachable. NON-blocking advisory; records stay trusted.
|
|
10
|
+
# broken = content edit (self-hash mismatch) / reorder / deletion / a
|
|
11
|
+
# non-capture sibling on a shared parent. Hard block (unchanged).
|
|
12
|
+
#
|
|
13
|
+
# Also proves: repair re-linearizes forked→ok, and REFUSES broken (no laundering).
|
|
14
|
+
set -uo pipefail
|
|
15
|
+
|
|
16
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
17
|
+
export GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
|
|
18
|
+
REPAIR="$ROOT/scripts/repair-command-log.js"
|
|
19
|
+
|
|
20
|
+
TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
|
|
21
|
+
errors=0
|
|
22
|
+
_pass() { echo " ✓ $1"; }
|
|
23
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
24
|
+
|
|
25
|
+
SD=".flow-agents/s"
|
|
26
|
+
|
|
27
|
+
# Build a command-log from a spec: JSON array of {cmd,exit,src,parent} where
|
|
28
|
+
# parent is the 0-based index of the entry whose hash is this entry's prevHash
|
|
29
|
+
# (-1 = genesis). Lets us construct linear chains AND forks deterministically.
|
|
30
|
+
build() { # $1=dir $2=spec-json
|
|
31
|
+
mkdir -p "$1/$SD"
|
|
32
|
+
DIR="$1" node -e '
|
|
33
|
+
const fs=require("fs"),crypto=require("crypto"),path=require("path");
|
|
34
|
+
const g=require(process.env.GATE), GEN=g.CHAIN_GENESIS_VERIFY;
|
|
35
|
+
const canon=r=>{const k=Object.keys(r).filter(x=>x!=="_chain").sort();const o={};for(const x of k)o[x]=r[x];return JSON.stringify(o);};
|
|
36
|
+
const H=(p,r)=>crypto.createHash("sha256").update(p+canon(r)).digest("hex");
|
|
37
|
+
const spec=JSON.parse(process.argv[1]); const hashes=[],lines=[];
|
|
38
|
+
spec.forEach((s,i)=>{
|
|
39
|
+
const rec={command:s.cmd,observedResult:s.exit===0?"pass":"fail",exitCode:s.exit,
|
|
40
|
+
capturedAt:new Date(Date.UTC(2026,0,1,0,0,i)).toISOString(),source:s.src||"postToolUse-capture"};
|
|
41
|
+
const prev=s.parent===-1?GEN:hashes[s.parent]; const h=H(prev,rec);
|
|
42
|
+
hashes.push(h); lines.push(JSON.stringify({...rec,_chain:{seq:i,prevHash:prev,hash:h}}));
|
|
43
|
+
});
|
|
44
|
+
fs.writeFileSync(path.join(process.env.DIR,".flow-agents/s/command-log.jsonl"),lines.join("\n")+"\n");
|
|
45
|
+
' "$2"
|
|
46
|
+
}
|
|
47
|
+
status() { DIR="$1" node -e 'const g=require(process.env.GATE);console.log(g.verifyCommandLogChain(process.env.DIR+"/.flow-agents/s").status)' ; }
|
|
48
|
+
|
|
49
|
+
# ── 1. linear → ok ────────────────────────────────────────────────────────────
|
|
50
|
+
D="$TMP/linear"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
51
|
+
[ "$(status "$D")" = "ok" ] && _pass "linear chain → ok" || _fail "linear → $(status "$D"), want ok"
|
|
52
|
+
|
|
53
|
+
# ── 2. concurrent fork (two captures share a parent) → forked ─────────────────
|
|
54
|
+
D="$TMP/fork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
|
|
55
|
+
[ "$(status "$D")" = "forked" ] && _pass "concurrent fork → forked (not broken)" || _fail "fork → $(status "$D"), want forked"
|
|
56
|
+
|
|
57
|
+
# ── 3. content edit (flip exitCode, keep hash) → broken ───────────────────────
|
|
58
|
+
D="$TMP/flip"; build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run lint","exit":1,"parent":0}]'
|
|
59
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
60
|
+
import json,sys
|
|
61
|
+
L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0; e["observedResult"]="pass"
|
|
62
|
+
L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
63
|
+
PY
|
|
64
|
+
[ "$(status "$D")" = "broken" ] && _pass "content edit → broken (tamper, not fork)" || _fail "flip → $(status "$D"), want broken"
|
|
65
|
+
|
|
66
|
+
# ── 4. reorder → broken ───────────────────────────────────────────────────────
|
|
67
|
+
D="$TMP/reorder"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
68
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
69
|
+
import sys
|
|
70
|
+
L=open(sys.argv[1]).read().strip().split("\n"); L[0],L[1]=L[1],L[0]; open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
71
|
+
PY
|
|
72
|
+
[ "$(status "$D")" = "broken" ] && _pass "reorder → broken" || _fail "reorder → $(status "$D"), want broken"
|
|
73
|
+
|
|
74
|
+
# ── 5. deleted predecessor → broken ───────────────────────────────────────────
|
|
75
|
+
D="$TMP/delete"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
76
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
77
|
+
import sys
|
|
78
|
+
L=open(sys.argv[1]).read().strip().split("\n"); open(sys.argv[1],"w").write(L[1]+"\n")
|
|
79
|
+
PY
|
|
80
|
+
[ "$(status "$D")" = "broken" ] && _pass "deleted predecessor → broken" || _fail "delete → $(status "$D"), want broken"
|
|
81
|
+
|
|
82
|
+
# ── 6. non-capture sibling on a shared parent → broken (not a benign fork) ─────
|
|
83
|
+
D="$TMP/badfork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0,"src":"manual-inject"}]'
|
|
84
|
+
[ "$(status "$D")" = "broken" ] && _pass "non-capture sibling fork → broken (conservative)" || _fail "badfork → $(status "$D"), want broken"
|
|
85
|
+
|
|
86
|
+
# ── 7. repair re-linearizes forked → ok; refuses broken ───────────────────────
|
|
87
|
+
D="$TMP/fork2"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
|
|
88
|
+
node "$REPAIR" "$D/$SD" --reason "test" >/dev/null 2>&1
|
|
89
|
+
[ "$(status "$D")" = "ok" ] && _pass "repair: forked → ok" || _fail "repair forked → $(status "$D"), want ok"
|
|
90
|
+
|
|
91
|
+
D="$TMP/flip2"; build "$D" '[{"cmd":"x","exit":0,"parent":-1},{"cmd":"y","exit":1,"parent":0}]'
|
|
92
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
93
|
+
import json,sys
|
|
94
|
+
L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0
|
|
95
|
+
L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
96
|
+
PY
|
|
97
|
+
before=$(cat "$D/$SD/command-log.jsonl")
|
|
98
|
+
set +e; node "$REPAIR" "$D/$SD" >/dev/null 2>&1; rc=$?; set -e
|
|
99
|
+
after=$(cat "$D/$SD/command-log.jsonl")
|
|
100
|
+
if [ "$rc" -ne 0 ] && [ "$before" = "$after" ]; then _pass "repair: REFUSES broken (exit!=0, log unchanged — no laundering)"; else _fail "repair touched/accepted a broken log (rc=$rc)"; fi
|
|
101
|
+
|
|
102
|
+
# ── 8. the Stop gate does NOT hard-block a forked log ─────────────────────────
|
|
103
|
+
D="$TMP/gate"; mkdir -p "$D/$SD"
|
|
104
|
+
printf '# Repo\n' > "$D/AGENTS.md"
|
|
105
|
+
printf '%s' '{"schema_version":"1.0","task_slug":"s","status":"delivered","phase":"done","updated_at":"2026-06-23T00:00:00Z","next_action":{"status":"done","summary":"done"}}' > "$D/$SD/state.json"
|
|
106
|
+
cat > "$D/$SD/s--deliver.md" <<'MD'
|
|
107
|
+
# s
|
|
108
|
+
|
|
109
|
+
branch: main
|
|
110
|
+
status: delivered
|
|
111
|
+
type: deliver
|
|
112
|
+
|
|
113
|
+
## Definition Of Done
|
|
114
|
+
- [x] tests pass
|
|
115
|
+
|
|
116
|
+
## Goal Fit Gate
|
|
117
|
+
- [x] acceptance verified
|
|
118
|
+
|
|
119
|
+
### Verdict: PASS
|
|
120
|
+
MD
|
|
121
|
+
# forked log whose captures are all PASS, so there is no contradiction to flag
|
|
122
|
+
build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run build","exit":0,"parent":0},{"cmd":"npm run build","exit":0,"parent":0}]'
|
|
123
|
+
printf '%s' '{"schema_version":"1.0","task_slug":"s","verdict":"pass","checks":[{"id":"t","kind":"command","status":"pass","command":"npm test","summary":"ok"}]}' > "$D/$SD/evidence.json"
|
|
124
|
+
set +e
|
|
125
|
+
out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$D\"}")
|
|
126
|
+
rc=$?
|
|
127
|
+
set -e
|
|
128
|
+
if [ "$rc" -eq 0 ]; then _pass "gate does NOT hard-block forked log (exit 0)"; else _fail "gate blocked forked log (exit $rc): $out"; fi
|
|
129
|
+
echo "$out" | grep -q "concurrent-capture fork" && _pass "gate emits the concurrent-fork advisory" || _fail "missing fork advisory: $out"
|
|
130
|
+
echo "$out" | grep -q "command-log integrity check FAILED" && _fail "gate wrongly emitted tamper warning for a fork" || _pass "no false tamper warning for a fork"
|
|
131
|
+
|
|
132
|
+
echo ""
|
|
133
|
+
if [ "$errors" -eq 0 ]; then echo "fork classification tests passed."; exit 0; fi
|
|
134
|
+
echo "fork classification tests FAILED: $errors issue(s)."; exit 1
|