@kontourai/flow-agents 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +95 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +300 -8
- package/build/src/cli/workflow-sidecar.js +1934 -83
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +45 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +4 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2064 -77
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_gate_bypass_chain.sh — Security regression proof for the confirmed full
|
|
3
|
+
# gate-bypass chain (adversarial review HIGH/A01/A04).
|
|
4
|
+
#
|
|
5
|
+
# Exploit chain (pre-fix):
|
|
6
|
+
# 1. Traversal via active_flow_id: "builder.../../../.flow-agents/slug/fake-flow"
|
|
7
|
+
# splits into kitId="builder" / flowName="../../../.flow-agents/slug/fake-flow"
|
|
8
|
+
# → path.join escapes kits/ → loads attacker-authored fake-flow.flow.json
|
|
9
|
+
# directly from .flow-agents/ (agent-writable location).
|
|
10
|
+
# 2. Fake flow has gates:{g:{step:"verify",expects:[]}} → declaredClaimTypes=Set{}
|
|
11
|
+
# (empty, NOT null) → old if/else isSelectedClaim returns false for EVERY claim
|
|
12
|
+
# → bundleEnforcement emits ZERO warnings → tamper-detection bypassed → exit 0.
|
|
13
|
+
#
|
|
14
|
+
# Two-layer defense (post-fix):
|
|
15
|
+
# Layer 1 (flow-resolver.ts): SLUG_RE rejects flowName containing '.' and '/'
|
|
16
|
+
# before any filesystem access. resolveFlowStep returns null. Belt-and-suspenders
|
|
17
|
+
# path containment check confirms resolved path stays within kits/ root.
|
|
18
|
+
# Layer 2 (stop-goal-fit.js): union isSelectedClaim — workflow.* is ALWAYS enforced
|
|
19
|
+
# regardless of whether declaredClaimTypes is null or empty Set. Empty expects[]
|
|
20
|
+
# also triggers a loud gate misconfiguration warning.
|
|
21
|
+
#
|
|
22
|
+
# Tests:
|
|
23
|
+
# 1. Layer 1 — traversal closed: pre-fix path.join escapes kits/, post-fix returns null.
|
|
24
|
+
# 2. Layer 2 — empty-Set closed: pre-fix 0 claims selected, post-fix exit 2.
|
|
25
|
+
# 3. Full chain end-to-end: traversal current.json + fake empty-expects flow under
|
|
26
|
+
# .flow-agents/ + disputed workflow.* bundle → PRE-FIX exit 0, POST-FIX exit 2.
|
|
27
|
+
# 4. Legit session regression: builder.build/verify with real flow still works.
|
|
28
|
+
#
|
|
29
|
+
# Deterministic, no model spend, self-cleaning.
|
|
30
|
+
# Usage: bash evals/integration/test_gate_bypass_chain.sh
|
|
31
|
+
|
|
32
|
+
set -uo pipefail
|
|
33
|
+
|
|
34
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
35
|
+
GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
|
|
36
|
+
RESOLVER="$ROOT/build/src/li""b/flow-resolver.js"
|
|
37
|
+
|
|
38
|
+
export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
|
|
39
|
+
|
|
40
|
+
TMP="$(mktemp -d)"
|
|
41
|
+
errors=0
|
|
42
|
+
_pass() { echo " PASS: $1"; }
|
|
43
|
+
_fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
|
|
44
|
+
|
|
45
|
+
cleanup() { rm -rf "$TMP"; }
|
|
46
|
+
trap cleanup EXIT
|
|
47
|
+
|
|
48
|
+
# ─── Helper: seed a minimal in-progress workflow artifact ─────────────────────
|
|
49
|
+
seed_repo_inprogress() { # $1=dir $2=slug
|
|
50
|
+
local p="$1" slug="$2"
|
|
51
|
+
mkdir -p "$p/.flow-agents/$slug"
|
|
52
|
+
printf '# Repo\n' > "$p/AGENTS.md"
|
|
53
|
+
printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"in_progress\",\"phase\":\"execution\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"in_progress\",\"summary\":\"Testing\"}}" \
|
|
54
|
+
> "$p/.flow-agents/$slug/state.json"
|
|
55
|
+
cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
|
|
56
|
+
# $slug
|
|
57
|
+
|
|
58
|
+
branch: main
|
|
59
|
+
status: in_progress
|
|
60
|
+
type: deliver
|
|
61
|
+
|
|
62
|
+
## Definition Of Done
|
|
63
|
+
- [ ] tests pass
|
|
64
|
+
|
|
65
|
+
## Goal Fit Gate
|
|
66
|
+
- [ ] acceptance verified
|
|
67
|
+
MD
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
seed_disputed_bundle() { # $1=bundle_path $2=slug
|
|
71
|
+
python3 - "$1" "$2" << 'PY'
|
|
72
|
+
import json, sys
|
|
73
|
+
bundle_path, slug = sys.argv[1], sys.argv[2]
|
|
74
|
+
bundle = {
|
|
75
|
+
"schemaVersion": 3,
|
|
76
|
+
"source": "flow-agents/workflow-sidecar",
|
|
77
|
+
"claims": [{
|
|
78
|
+
"id": "c-dispute",
|
|
79
|
+
"subjectId": slug + "/unit-tests",
|
|
80
|
+
"subjectType": "workflow-check",
|
|
81
|
+
"claimType": "workflow.check.command",
|
|
82
|
+
"fieldOrBehavior": "unit tests",
|
|
83
|
+
"value": "fail",
|
|
84
|
+
"impactLevel": "high",
|
|
85
|
+
"status": "disputed",
|
|
86
|
+
"createdAt": "2026-06-27T00:00:00Z",
|
|
87
|
+
"updatedAt": "2026-06-27T00:00:00Z"
|
|
88
|
+
}],
|
|
89
|
+
"evidence": [], "policies": [], "events": []
|
|
90
|
+
}
|
|
91
|
+
json.dump(bundle, open(bundle_path, 'w'))
|
|
92
|
+
PY
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
echo ""
|
|
97
|
+
echo "================================================================="
|
|
98
|
+
echo " Gate-Bypass Chain Security Regression (A01/A04)"
|
|
99
|
+
echo "================================================================="
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ─── Test 1: Traversal closed — Layer 1 slug validation + path containment ───
|
|
103
|
+
echo ""
|
|
104
|
+
echo "=== 1. Layer 1 — Traversal defense: slug validation + path containment ==="
|
|
105
|
+
|
|
106
|
+
echo "--- 1a. PRE-FIX: show path.join traversal escapes kits/ ---"
|
|
107
|
+
node -e "
|
|
108
|
+
const path = require('path');
|
|
109
|
+
const repoRoot = '/repo';
|
|
110
|
+
|
|
111
|
+
// Exact exploit string from the task description
|
|
112
|
+
const malId = 'builder.../../../.flow-agents/slug/fake-flow';
|
|
113
|
+
const dot = malId.indexOf('.'); // 7
|
|
114
|
+
const kitId = malId.slice(0, dot); // 'builder'
|
|
115
|
+
const flowName = malId.slice(dot + 1); // '../../../.flow-agents/slug/fake-flow'
|
|
116
|
+
|
|
117
|
+
console.log(' Traversal flowId: \"' + malId + '\"');
|
|
118
|
+
console.log(' Parsed: kitId=\"' + kitId + '\" flowName=\"' + flowName + '\"');
|
|
119
|
+
|
|
120
|
+
// PRE-FIX: no slug validation, path.join with flowName
|
|
121
|
+
const preFix = path.join(repoRoot, 'kits', kitId, 'flows', flowName + '.flow.json');
|
|
122
|
+
console.log(' PRE-FIX path.join: ' + preFix);
|
|
123
|
+
const escaped = !preFix.startsWith(path.join(repoRoot, 'kits') + '/');
|
|
124
|
+
console.log(' PRE-FIX escapes kits/: ' + escaped + ' → would load attacker file under .flow-agents/');
|
|
125
|
+
|
|
126
|
+
if (!escaped) {
|
|
127
|
+
console.error('ERROR: expected traversal to escape kits/ with this flowId');
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
console.log(' PRE-FIX: attacker file loads → fake flow with empty expects[] → declaredClaimTypes=Set{}');
|
|
131
|
+
console.log(' PRE-FIX: old if/else isSelectedClaim → false for ALL → 0 warnings → exit 0 (bypassed)');
|
|
132
|
+
" 2>&1 && _pass "PRE-FIX: traversal escapes kits/ via path.join (attacker file would load)" \
|
|
133
|
+
|| _fail "PRE-FIX path.join simulation error"
|
|
134
|
+
|
|
135
|
+
echo ""
|
|
136
|
+
echo "--- 1b. POST-FIX: resolveFlowStep returns null for traversal IDs ---"
|
|
137
|
+
node -e "
|
|
138
|
+
const r = require('$RESOLVER');
|
|
139
|
+
const repoRoot = '$ROOT';
|
|
140
|
+
|
|
141
|
+
// Traversal IDs — all must return null (slug validation rejects '.', '/', etc.)
|
|
142
|
+
const cases = [
|
|
143
|
+
['builder.../../../.flow-agents/slug/fake-flow', 'verify'], // exact exploit from task
|
|
144
|
+
['builder../../../.flow-agents/x/fake', 'verify'], // double-dot variant
|
|
145
|
+
['builder.../etc/passwd', 'verify'], // etc/passwd probe
|
|
146
|
+
['kit-id.flow/../../secret', 'step'], // different separator
|
|
147
|
+
['builder.build', '../../../etc'], // traversal in stepId
|
|
148
|
+
['../../../etc.passwd', 'verify'], // traversal in kitId
|
|
149
|
+
];
|
|
150
|
+
let allNull = true;
|
|
151
|
+
for (const [flowId, stepId] of cases) {
|
|
152
|
+
const result = r.resolveFlowStep(flowId, stepId, repoRoot);
|
|
153
|
+
if (result !== null) {
|
|
154
|
+
console.error('EXPLOIT OPEN: resolveFlowStep(\"' + flowId + '\",\"' + stepId + '\") returned non-null');
|
|
155
|
+
allNull = false;
|
|
156
|
+
} else {
|
|
157
|
+
console.log(' null for flowId=\"' + flowId + '\" (correct)');
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
if (!allNull) process.exit(1);
|
|
161
|
+
console.log(' All traversal variants return null → filesystem never accessed');
|
|
162
|
+
" 2>&1 && _pass "POST-FIX: all traversal variants return null (slug validation blocks)" \
|
|
163
|
+
|| _fail "POST-FIX: some traversal variant returned non-null (EXPLOIT OPEN)"
|
|
164
|
+
|
|
165
|
+
# Legit flow still resolves (no over-rejection)
|
|
166
|
+
node -e "
|
|
167
|
+
const r = require('$RESOLVER');
|
|
168
|
+
const repoRoot = '$ROOT';
|
|
169
|
+
const result = r.resolveFlowStep('builder.build', 'verify', repoRoot);
|
|
170
|
+
if (!result) { console.error('REGRESSION: builder.build/verify returned null'); process.exit(1); }
|
|
171
|
+
if (result.gateExpects.length === 0) { console.error('REGRESSION: expects[] empty for builder.build/verify'); process.exit(1); }
|
|
172
|
+
console.log('builder.build/verify: gateId=' + result.gateId + ' expects=' + result.gateExpects.length);
|
|
173
|
+
" 2>&1 && _pass "Legit builder.build/verify resolves correctly (no over-rejection)" \
|
|
174
|
+
|| _fail "Legit builder.build/verify regression"
|
|
175
|
+
|
|
176
|
+
# Validate FLOW_AGENTS_FLOW_DEFS_DIR under .flow-agents is rejected
|
|
177
|
+
T1_DIR="$TMP/t1-override"
|
|
178
|
+
mkdir -p "$T1_DIR/.flow-agents/fake-flows"
|
|
179
|
+
cat > "$T1_DIR/.flow-agents/fake-flows/builder.build.flow.json" << 'JSON'
|
|
180
|
+
{"id":"fake","version":"0.0","gates":{"g":{"step":"verify","expects":[]}}}
|
|
181
|
+
JSON
|
|
182
|
+
node -e "
|
|
183
|
+
const r = require('$RESOLVER');
|
|
184
|
+
// Override points INTO .flow-agents (agent-writable) — must fall back to kits/
|
|
185
|
+
process.env.FLOW_AGENTS_FLOW_DEFS_DIR = '$T1_DIR/.flow-agents/fake-flows';
|
|
186
|
+
const result = r.resolveFlowStep('builder.build', 'verify', '$T1_DIR');
|
|
187
|
+
delete process.env.FLOW_AGENTS_FLOW_DEFS_DIR;
|
|
188
|
+
// Falls back to repoRoot/kits/ which has no builder.build flow → null
|
|
189
|
+
// This confirms the agent-writable FLOW_DEFS_DIR override was rejected
|
|
190
|
+
console.log('FLOW_DEFS_DIR under .flow-agents: result =', result, '(null = override rejected)');
|
|
191
|
+
" 2>&1 && _pass "FLOW_AGENTS_FLOW_DEFS_DIR under .flow-agents is ignored (agent-writable protection)" \
|
|
192
|
+
|| _fail "FLOW_AGENTS_FLOW_DEFS_DIR .flow-agents bypass not blocked"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ─── Test 2: Empty-Set closed — Layer 2 union form ───────────────────────────
|
|
196
|
+
echo ""
|
|
197
|
+
echo "=== 2. Layer 2 — Empty-Set defense: union isSelectedClaim + empty-expects warning ==="
|
|
198
|
+
|
|
199
|
+
T2_DIR="$TMP/t2-empty-set"
|
|
200
|
+
seed_repo_inprogress "$T2_DIR" "empty-set-test"
|
|
201
|
+
seed_disputed_bundle "$T2_DIR/.flow-agents/empty-set-test/trust.bundle" "empty-set-test"
|
|
202
|
+
printf '%s' '{"artifact_dir":"empty-set-test","active_flow_id":"builder.build","active_step_id":"verify"}' \
|
|
203
|
+
> "$T2_DIR/.flow-agents/current.json"
|
|
204
|
+
|
|
205
|
+
# Fake flow with empty expects[] (loaded via FLOW_DEFS_DIR — NOT under .flow-agents)
|
|
206
|
+
mkdir -p "$TMP/fake-flows-safe"
|
|
207
|
+
cat > "$TMP/fake-flows-safe/builder.build.flow.json" << 'JSON'
|
|
208
|
+
{"id":"builder.build","version":"0.0","gates":{"fake-gate":{"step":"verify","expects":[]}}}
|
|
209
|
+
JSON
|
|
210
|
+
|
|
211
|
+
echo "--- 2a. PRE-FIX simulation: isSelectedClaim with empty Set ---"
|
|
212
|
+
node -e "
|
|
213
|
+
const claimType = 'workflow.check.command';
|
|
214
|
+
const declaredClaimTypes = new Set(); // empty Set — from fake flow with expects:[]
|
|
215
|
+
|
|
216
|
+
// PRE-FIX isSelectedClaim (if/else):
|
|
217
|
+
const preFixSelected = (declaredClaimTypes != null)
|
|
218
|
+
? declaredClaimTypes.has(claimType) // false — empty Set never matches
|
|
219
|
+
: claimType.startsWith('workflow.');
|
|
220
|
+
// POST-FIX isSelectedClaim (union):
|
|
221
|
+
const postFixSelected = claimType.startsWith('workflow.')
|
|
222
|
+
|| (declaredClaimTypes != null && declaredClaimTypes.has(claimType));
|
|
223
|
+
|
|
224
|
+
console.log(' PRE-FIX isSelectedClaim(\"workflow.check.command\") with empty Set:', preFixSelected, '← 0 claims selected → 0 warnings → exit 0');
|
|
225
|
+
console.log(' POST-FIX isSelectedClaim(\"workflow.check.command\") with empty Set:', postFixSelected, '← 1 claim selected → warning emitted → exit 2');
|
|
226
|
+
|
|
227
|
+
if (preFixSelected !== false) { console.error('PRE-FIX simulation incorrect'); process.exit(1); }
|
|
228
|
+
if (postFixSelected !== true) { console.error('POST-FIX union incorrect'); process.exit(1); }
|
|
229
|
+
" 2>&1 && _pass "PRE-FIX: empty Set + old if/else = 0 claims selected = 0 warnings = exit 0 (bypassed)" \
|
|
230
|
+
|| _fail "PRE-FIX/POST-FIX simulation error"
|
|
231
|
+
|
|
232
|
+
echo "--- 2b. POST-FIX: actual gate run with fake empty-expects flow ---"
|
|
233
|
+
set +e
|
|
234
|
+
t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
|
|
235
|
+
FLOW_AGENTS_FLOW_DEFS_DIR="$TMP/fake-flows-safe" \
|
|
236
|
+
node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2_DIR\"}")"
|
|
237
|
+
t2_exit=$?
|
|
238
|
+
set -e
|
|
239
|
+
|
|
240
|
+
echo " POST-FIX exit code: $t2_exit (expected 2)"
|
|
241
|
+
if [ "$t2_exit" -eq 2 ]; then
|
|
242
|
+
_pass "POST-FIX: empty-expects flow + disputed workflow.* claim blocks (exit 2)"
|
|
243
|
+
else
|
|
244
|
+
_fail "POST-FIX: expected exit 2, got $t2_exit. output: $t2_out"
|
|
245
|
+
fi
|
|
246
|
+
|
|
247
|
+
if echo "$t2_out" | grep -q "gate misconfiguration"; then
|
|
248
|
+
_pass "POST-FIX: empty-expects warning emitted (gate misconfiguration)"
|
|
249
|
+
else
|
|
250
|
+
_fail "POST-FIX: empty-expects warning NOT emitted. output: $t2_out"
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
if echo "$t2_out" | grep -qE "disputed|false-completion|caught"; then
|
|
254
|
+
_pass "POST-FIX: disputed workflow.* claim caught by union form"
|
|
255
|
+
else
|
|
256
|
+
_fail "POST-FIX: disputed claim NOT caught. output: $t2_out"
|
|
257
|
+
fi
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ─── Test 3: Full exploit chain end-to-end ───────────────────────────────────
|
|
261
|
+
echo ""
|
|
262
|
+
echo "=== 3. Full exploit chain: traversal current.json + fake empty-expects flow + disputed bundle ==="
|
|
263
|
+
echo ""
|
|
264
|
+
echo "Setup: T3_DIR has kits/ so findRepoRoot returns T3_DIR."
|
|
265
|
+
echo "Traversal flowId 'builder.../../../.flow-agents/slug/fake-flow' with repoRoot=T3_DIR resolves to:"
|
|
266
|
+
echo " T3_DIR/.flow-agents/slug/fake-flow.flow.json (agent-writable — the fake flow)"
|
|
267
|
+
|
|
268
|
+
T3_DIR="$TMP/t3-full-chain"
|
|
269
|
+
mkdir -p "$T3_DIR/kits" # ensures findRepoRoot returns T3_DIR
|
|
270
|
+
seed_repo_inprogress "$T3_DIR" "exploit-test"
|
|
271
|
+
seed_disputed_bundle "$T3_DIR/.flow-agents/exploit-test/trust.bundle" "exploit-test"
|
|
272
|
+
|
|
273
|
+
# Place the fake flow exactly where the traversal would resolve it:
|
|
274
|
+
# path.join(T3_DIR, "kits", "builder", "flows", "../../../.flow-agents/slug/fake-flow.flow.json")
|
|
275
|
+
# = T3_DIR/.flow-agents/slug/fake-flow.flow.json
|
|
276
|
+
mkdir -p "$T3_DIR/.flow-agents/slug"
|
|
277
|
+
cat > "$T3_DIR/.flow-agents/slug/fake-flow.flow.json" << 'JSON'
|
|
278
|
+
{
|
|
279
|
+
"id": "fake-bypass",
|
|
280
|
+
"version": "0.0",
|
|
281
|
+
"gates": {
|
|
282
|
+
"fake-gate": {
|
|
283
|
+
"step": "verify",
|
|
284
|
+
"expects": []
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
JSON
|
|
289
|
+
|
|
290
|
+
# current.json: traversal active_flow_id pointing to the fake flow
|
|
291
|
+
printf '%s' '{"artifact_dir":"exploit-test","active_flow_id":"builder.../../../.flow-agents/slug/fake-flow","active_step_id":"verify"}' \
|
|
292
|
+
> "$T3_DIR/.flow-agents/current.json"
|
|
293
|
+
|
|
294
|
+
echo ""
|
|
295
|
+
echo "--- 3a. PRE-FIX: demonstrate traversal would load the fake flow ---"
|
|
296
|
+
node -e "
|
|
297
|
+
const path = require('path');
|
|
298
|
+
const repoRoot = '$T3_DIR';
|
|
299
|
+
const flowId = 'builder.../../../.flow-agents/slug/fake-flow';
|
|
300
|
+
const dot = flowId.indexOf('.');
|
|
301
|
+
const kitId = flowId.slice(0, dot);
|
|
302
|
+
const flowName = flowId.slice(dot + 1);
|
|
303
|
+
const preFix = path.join(repoRoot, 'kits', kitId, 'flows', flowName + '.flow.json');
|
|
304
|
+
const resolved = path.resolve(preFix);
|
|
305
|
+
const fs = require('fs');
|
|
306
|
+
const exists = fs.existsSync(resolved);
|
|
307
|
+
console.log(' PRE-FIX path.join result:', resolved);
|
|
308
|
+
console.log(' Fake flow file exists at resolved path:', exists);
|
|
309
|
+
if (!exists) { console.error('ERROR: fake flow not found at ' + resolved); process.exit(1); }
|
|
310
|
+
const fakeFlow = JSON.parse(fs.readFileSync(resolved, 'utf8'));
|
|
311
|
+
const gate = fakeFlow.gates && Object.values(fakeFlow.gates)[0];
|
|
312
|
+
const emptyExpects = gate && Array.isArray(gate.expects) && gate.expects.length === 0;
|
|
313
|
+
console.log(' Fake flow gate expects[]:', JSON.stringify(gate && gate.expects));
|
|
314
|
+
console.log(' Empty expects[] (Set{}):', emptyExpects);
|
|
315
|
+
console.log(' PRE-FIX result: loads fake flow → Set{} → old if/else → 0 claims selected → exit 0');
|
|
316
|
+
if (!emptyExpects) { console.error('ERROR: fake flow does not have empty expects'); process.exit(1); }
|
|
317
|
+
" 2>&1 && _pass "PRE-FIX: traversal resolves to fake flow with empty expects[] (would ship with exit 0)" \
|
|
318
|
+
|| _fail "PRE-FIX chain setup error"
|
|
319
|
+
|
|
320
|
+
echo ""
|
|
321
|
+
echo "--- 3b. POST-FIX: gate blocks the full exploit chain ---"
|
|
322
|
+
set +e
|
|
323
|
+
t3_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
|
|
324
|
+
node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3_DIR\"}")"
|
|
325
|
+
t3_exit=$?
|
|
326
|
+
set -e
|
|
327
|
+
|
|
328
|
+
echo " POST-FIX gate exit code: $t3_exit (expected 2)"
|
|
329
|
+
if [ "$t3_exit" -eq 2 ]; then
|
|
330
|
+
_pass "POST-FIX: full exploit chain blocked (exit 2)"
|
|
331
|
+
else
|
|
332
|
+
_fail "POST-FIX: full exploit chain NOT blocked (exit $t3_exit). output: $t3_out"
|
|
333
|
+
fi
|
|
334
|
+
|
|
335
|
+
if echo "$t3_out" | grep -qE "disputed|false-completion|caught"; then
|
|
336
|
+
_pass "POST-FIX: disputed workflow.* claim caught (Layer 1 → null → workflow.* fallback active)"
|
|
337
|
+
else
|
|
338
|
+
_fail "POST-FIX: disputed claim NOT caught in full chain. output: $t3_out"
|
|
339
|
+
fi
|
|
340
|
+
|
|
341
|
+
echo ""
|
|
342
|
+
echo "--- 3c. Exit code summary ---"
|
|
343
|
+
echo " PRE-FIX exit code (simulated): 0 — loads fake flow, empty Set bypasses bundleEnforcement"
|
|
344
|
+
echo " POST-FIX exit code (actual): $t3_exit — slug validation returns null, workflow.* enforced"
|
|
345
|
+
if [ "$t3_exit" -eq 2 ]; then
|
|
346
|
+
echo " Result: EXPLOIT CLOSED (pre=0, post=2)"
|
|
347
|
+
else
|
|
348
|
+
echo " Result: EXPLOIT STILL OPEN"
|
|
349
|
+
fi
|
|
350
|
+
|
|
351
|
+
# ─── Test 4: Legit builder.build session regression ──────────────────────────
|
|
352
|
+
echo ""
|
|
353
|
+
echo "=== 4. Regression: legit builder.build/verify session passes (no false-block) ==="
|
|
354
|
+
|
|
355
|
+
T4_DIR="$TMP/t4-legit"
|
|
356
|
+
mkdir -p "$T4_DIR/.flow-agents/legit-test"
|
|
357
|
+
printf '# Repo\n' > "$T4_DIR/AGENTS.md"
|
|
358
|
+
printf '%s' '{"artifact_dir":"legit-test","active_flow_id":"builder.build","active_step_id":"verify"}' \
|
|
359
|
+
> "$T4_DIR/.flow-agents/current.json"
|
|
360
|
+
printf '%s' '{"schema_version":"1.0","task_slug":"legit-test","status":"delivered","phase":"done","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"done","summary":"done"}}' \
|
|
361
|
+
> "$T4_DIR/.flow-agents/legit-test/state.json"
|
|
362
|
+
cat > "$T4_DIR/.flow-agents/legit-test/legit-test--deliver.md" << 'MD'
|
|
363
|
+
# legit-test
|
|
364
|
+
|
|
365
|
+
branch: main
|
|
366
|
+
status: delivered
|
|
367
|
+
type: deliver
|
|
368
|
+
|
|
369
|
+
## Definition Of Done
|
|
370
|
+
- [x] tests pass
|
|
371
|
+
|
|
372
|
+
## Goal Fit Gate
|
|
373
|
+
- [x] acceptance verified
|
|
374
|
+
|
|
375
|
+
### Verdict: PASS
|
|
376
|
+
MD
|
|
377
|
+
|
|
378
|
+
# Write a CLEAN trust.bundle for builder.verify.tests (status=verified, passing evidence)
|
|
379
|
+
python3 - "$T4_DIR/.flow-agents/legit-test/trust.bundle" << 'PY'
|
|
380
|
+
import json, sys
|
|
381
|
+
bundle = {
|
|
382
|
+
"schemaVersion": 3,
|
|
383
|
+
"source": "flow-agents/workflow-sidecar",
|
|
384
|
+
"claims": [{
|
|
385
|
+
"id": "c-legit",
|
|
386
|
+
"subjectId": "legit-test/tests",
|
|
387
|
+
"subjectType": "flow-step",
|
|
388
|
+
"claimType": "builder.verify.tests",
|
|
389
|
+
"fieldOrBehavior": "all tests pass",
|
|
390
|
+
"value": "pass",
|
|
391
|
+
"impactLevel": "high",
|
|
392
|
+
"status": "verified",
|
|
393
|
+
"createdAt": "2026-06-27T00:00:00Z",
|
|
394
|
+
"updatedAt": "2026-06-27T00:00:00Z"
|
|
395
|
+
}],
|
|
396
|
+
"evidence": [{
|
|
397
|
+
"id": "ev-legit",
|
|
398
|
+
"claimId": "c-legit",
|
|
399
|
+
"evidenceType": "test_output",
|
|
400
|
+
"method": "validation",
|
|
401
|
+
"sourceRef": "command-log.jsonl",
|
|
402
|
+
"excerptOrSummary": "All tests passed",
|
|
403
|
+
"observedAt": "2026-06-27T00:00:00Z",
|
|
404
|
+
"collectedBy": "harness",
|
|
405
|
+
"passing": True,
|
|
406
|
+
"blocking": False
|
|
407
|
+
}],
|
|
408
|
+
"policies": [],
|
|
409
|
+
"events": [{
|
|
410
|
+
"id": "evt-legit",
|
|
411
|
+
"claimId": "c-legit",
|
|
412
|
+
"status": "verified",
|
|
413
|
+
"actor": "agent",
|
|
414
|
+
"method": "workflow-check",
|
|
415
|
+
"evidenceIds": ["ev-legit"],
|
|
416
|
+
"createdAt": "2026-06-27T00:00:00Z"
|
|
417
|
+
}]
|
|
418
|
+
}
|
|
419
|
+
json.dump(bundle, open(sys.argv[1], 'w'))
|
|
420
|
+
PY
|
|
421
|
+
|
|
422
|
+
set +e
|
|
423
|
+
t4_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
|
|
424
|
+
node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T4_DIR\"}")"
|
|
425
|
+
t4_exit=$?
|
|
426
|
+
set -e
|
|
427
|
+
|
|
428
|
+
if [ "$t4_exit" -ne 2 ]; then
|
|
429
|
+
_pass "Legit builder.build/verify: clean bundle passes gate (exit $t4_exit)"
|
|
430
|
+
else
|
|
431
|
+
_fail "Legit builder.build/verify: false-blocked (exit 2). output: $t4_out"
|
|
432
|
+
fi
|
|
433
|
+
|
|
434
|
+
# ─── Summary ─────────────────────────────────────────────────────────────────
|
|
435
|
+
echo ""
|
|
436
|
+
echo "================================================================="
|
|
437
|
+
if [ "$errors" -eq 0 ]; then
|
|
438
|
+
echo "PASS Gate-bypass chain eval: all checks passed."
|
|
439
|
+
echo ""
|
|
440
|
+
echo "Security proof:"
|
|
441
|
+
echo " Layer 1 (flow-resolver.ts): SLUG_RE + containment — all traversal IDs return null"
|
|
442
|
+
echo " Layer 2 (stop-goal-fit.js): union isSelectedClaim — workflow.* always enforced"
|
|
443
|
+
echo " Full chain: PRE-FIX exit 0 (would ship) → POST-FIX exit 2 (blocked)"
|
|
444
|
+
echo " No regression: legit builder.build/verify session passes"
|
|
445
|
+
exit 0
|
|
446
|
+
fi
|
|
447
|
+
echo "FAIL Gate-bypass chain eval: $errors check(s) failed."
|
|
448
|
+
exit 1
|