@kontourai/flow-agents 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +103 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/console-learning-projection.d.ts +1 -0
- package/build/src/cli/effective-backlog-settings.d.ts +1 -0
- package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
- package/build/src/cli/init.d.ts +17 -0
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/kit.d.ts +1 -0
- package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
- package/build/src/cli/publish-change-helper.d.ts +1 -0
- package/build/src/cli/pull-work-provider.d.ts +1 -0
- package/build/src/cli/runtime-adapter.d.ts +1 -0
- package/build/src/cli/telemetry-doctor.d.ts +1 -0
- package/build/src/cli/usage-feedback.d.ts +1 -0
- package/build/src/cli/utterance-check.d.ts +1 -0
- package/build/src/cli/validate-hook-influence.d.ts +1 -0
- package/build/src/cli/validate-source-tree.d.ts +1 -0
- package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/veritas-governance.d.ts +1 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
- package/build/src/cli/workflow-sidecar.d.ts +324 -0
- package/build/src/cli/workflow-sidecar.js +1973 -90
- package/build/src/cli.d.ts +2 -0
- package/build/src/cli.js +2 -3
- package/build/src/flow-kit/validate.d.ts +81 -0
- package/build/src/index.d.ts +5 -0
- package/build/src/index.js +36 -0
- package/build/src/lib/args.d.ts +8 -0
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/lib/fs.d.ts +7 -0
- package/build/src/lib/workflow-learning-projection.d.ts +132 -0
- package/build/src/runtime-adapters.d.ts +18 -0
- package/build/src/tools/build-universal-bundles.d.ts +2 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/common.d.ts +9 -0
- package/build/src/tools/generate-context-map.d.ts +2 -0
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-package.d.ts +2 -0
- package/build/src/tools/validate-source-tree.d.ts +2 -0
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/developer-architecture.md +14 -0
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +78 -10
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_hook_category_behaviors.sh +14 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +47 -0
- package/evals/static/test_library_exports.sh +85 -0
- package/evals/static/test_universal_bundles.sh +15 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +13 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2093 -84
- package/src/cli.ts +2 -3
- package/src/index.ts +53 -0
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/tsconfig.json +1 -0
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -480,10 +480,12 @@ else
|
|
|
480
480
|
_fail "sidecar writer evidence failed: $(cat "$TMPDIR_EVAL/evidence.out" "$TMPDIR_EVAL/evidence.err")"
|
|
481
481
|
fi
|
|
482
482
|
|
|
483
|
-
|
|
484
|
-
|
|
483
|
+
# Phase 4c: acceptance.json criteria status no longer updated at verification time (bundle-only).
|
|
484
|
+
# State is verified; bundle claims carry the criteria status.
|
|
485
|
+
if rg -q '"status": "verified"' "$ARTIFACT_DIR/state.json" && [[ -f "$ARTIFACT_DIR/trust.bundle" ]] && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const ac=b.claims.filter(c=>c.claimType==="workflow.acceptance.criterion"); if(ac.length===0) throw new Error("no acceptance criterion claims in bundle"); if(ac.some(c=>c.value!=="pass")) throw new Error("some acceptance criterion not pass in bundle: "+JSON.stringify(ac.map(c=>c.value)));' "$ARTIFACT_DIR/trust.bundle" 2>/dev/null; then
|
|
486
|
+
_pass "sidecar writer updates state and records acceptance in bundle from evidence"
|
|
485
487
|
else
|
|
486
|
-
_fail "sidecar writer did not update state
|
|
488
|
+
_fail "sidecar writer did not update state or bundle from evidence"
|
|
487
489
|
fi
|
|
488
490
|
|
|
489
491
|
INVALID_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-evidence-ref"
|
|
@@ -545,14 +547,15 @@ else
|
|
|
545
547
|
fi
|
|
546
548
|
|
|
547
549
|
SURFACE_CHECK='{"id":"surface-trust-fixture","kind":"policy","status":"pass","summary":"Hachure trust.bundle evidence passed.","surface_trust_refs":[{"artifact_kind":"trust.bundle","artifact_ref":"trust/report.json","gate_id":"builder.trust.bundle","claim_type":"builder.trust.bundle","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Issued during this workflow."},"authority":{"producer":"surface-local","summary":"Local Surface trust producer."},"integrity":{"status":"matched","summary":"Artifact digest matched expected subject and gate.","digest":"sha256:abc123"},"status":"pass","summary":"Accepted trust.bundle claim."}]}'
|
|
550
|
+
# Phase 4c: evidence.json no longer written; verify in trust.bundle (sole verification artifact).
|
|
548
551
|
if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
|
|
549
552
|
--verdict pass \
|
|
550
553
|
--check-json "$SURFACE_CHECK" \
|
|
551
554
|
--timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/surface-evidence.out" 2>"$TMPDIR_EVAL/surface-evidence.err" \
|
|
552
|
-
&&
|
|
553
|
-
&& rg -q '
|
|
554
|
-
&& !
|
|
555
|
-
_pass "sidecar writer records Hachure-aligned trust.bundle refs"
|
|
555
|
+
&& [[ -f "$ARTIFACT_DIR/trust.bundle" ]] \
|
|
556
|
+
&& ! rg -q 'veritas' "$ARTIFACT_DIR/trust.bundle" \
|
|
557
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const c=b.claims.find(c=>c.claimType==="workflow.check.policy"); if(!c) throw new Error("no policy claim in bundle"); if(c.value!=="pass") throw new Error("expected pass, got "+c.value);' "$ARTIFACT_DIR/trust.bundle" 2>/dev/null; then
|
|
558
|
+
_pass "sidecar writer records Hachure-aligned trust.bundle refs (verified in bundle)"
|
|
556
559
|
else
|
|
557
560
|
_fail "sidecar writer did not record Hachure-aligned trust.bundle refs: $(cat "$TMPDIR_EVAL/surface-evidence.out" "$TMPDIR_EVAL/surface-evidence.err")"
|
|
558
561
|
fi
|
|
@@ -595,13 +598,16 @@ check_surface_fixture() {
|
|
|
595
598
|
local expected_text="$5"
|
|
596
599
|
local dir="$TMPDIR_EVAL/repo/.flow-agents/surface-$name"
|
|
597
600
|
mkdir -p "$dir"
|
|
601
|
+
# Phase 4c: evidence.json no longer written; verify surface trust check status in trust.bundle.
|
|
598
602
|
if flow_agents_node "$WRITER" record-evidence "$dir" \
|
|
599
603
|
--task-slug "surface-$name" \
|
|
600
604
|
--verdict "$verdict" \
|
|
601
605
|
--check-json '{"id":"ordinary-builder-evidence","kind":"test","status":"pass","summary":"Ordinary Builder Kit evidence still records."}' \
|
|
602
606
|
--surface-trust-json "$SURFACE_FIXTURE_DIR/$fixture" \
|
|
603
607
|
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/surface-$name.out" 2>"$TMPDIR_EVAL/surface-$name.err" \
|
|
604
|
-
&&
|
|
608
|
+
&& [[ -f "$dir/trust.bundle" ]] \
|
|
609
|
+
&& ! grep -qi 'veritas' "$dir/trust.bundle" \
|
|
610
|
+
&& node -e 'const fs=require("fs"); const [bundleFile, expectedStatus, expectedText]=process.argv.slice(1); const b=JSON.parse(fs.readFileSync(bundleFile,"utf8")); const policyClaims=b.claims.filter((c)=>c.claimType==="workflow.check.policy"); if(policyClaims.length!==1) throw new Error("expected one policy claim, found "+policyClaims.length); const c=policyClaims[0]; if(c.value!==expectedStatus) throw new Error("expected "+expectedStatus+", got "+c.value); const blob=JSON.stringify(b); if(!blob.includes(expectedText)) throw new Error("missing expected text "+expectedText+" in bundle");' "$dir/trust.bundle" "$expected_status" "$expected_text" 2>/dev/null
|
|
605
611
|
then
|
|
606
612
|
_pass "surface trust fixture maps $name to $expected_status evidence"
|
|
607
613
|
else
|
|
@@ -619,13 +625,15 @@ check_surface_fixture "artifact-absent" "artifact-absent.json" "not_verified" "n
|
|
|
619
625
|
|
|
620
626
|
PURE_SURFACE_DIR="$TMPDIR_EVAL/repo/.flow-agents/surface-trust-only"
|
|
621
627
|
mkdir -p "$PURE_SURFACE_DIR"
|
|
628
|
+
# Phase 4c: evidence.json no longer written; verify in trust.bundle.
|
|
622
629
|
if flow_agents_node "$WRITER" record-evidence "$PURE_SURFACE_DIR" \
|
|
623
630
|
--task-slug "surface-trust-only" \
|
|
624
631
|
--verdict pass \
|
|
625
632
|
--surface-trust-json "$SURFACE_FIXTURE_DIR/accepted-claim-trust-report.json" \
|
|
626
633
|
--timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/surface-only.out" 2>"$TMPDIR_EVAL/surface-only.err" \
|
|
627
|
-
&&
|
|
628
|
-
|
|
634
|
+
&& [[ -f "$PURE_SURFACE_DIR/trust.bundle" ]] \
|
|
635
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); if(!Array.isArray(b.claims)||b.claims.length===0) throw new Error("no claims in bundle"); ' "$PURE_SURFACE_DIR/trust.bundle" 2>/dev/null; then
|
|
636
|
+
_pass "sidecar writer records Surface trust evidence without unrelated check-json (verified in bundle)"
|
|
629
637
|
else
|
|
630
638
|
_fail "sidecar writer should accept Surface trust evidence without check-json: $(cat "$TMPDIR_EVAL/surface-only.out" "$TMPDIR_EVAL/surface-only.err")"
|
|
631
639
|
fi
|
|
@@ -885,8 +893,12 @@ else
|
|
|
885
893
|
_fail "sidecar writer not-verified evidence failed: $(cat "$TMPDIR_EVAL/nv-evidence.out" "$TMPDIR_EVAL/nv-evidence.err")"
|
|
886
894
|
fi
|
|
887
895
|
|
|
888
|
-
|
|
889
|
-
|
|
896
|
+
# Phase 4c: evidence.json no longer written; not-verified state is in state.json + trust.bundle.
|
|
897
|
+
# not_verified_gaps are accepted as input but not persisted to a sidecar (bundle-only sessions).
|
|
898
|
+
if rg -q '"status": "not_verified"' "$NV_DIR/state.json" \
|
|
899
|
+
&& [[ -f "$NV_DIR/trust.bundle" ]] \
|
|
900
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const c=b.claims.find(c=>c.claimType==="workflow.check.external"); if(!c) throw new Error("no external check claim"); if(c.value!=="not_verified") throw new Error("expected not_verified, got "+c.value);' "$NV_DIR/trust.bundle" 2>/dev/null; then
|
|
901
|
+
_pass "sidecar writer preserves not-verified state in state.json and bundle"
|
|
890
902
|
else
|
|
891
903
|
_fail "sidecar writer did not preserve not-verified state"
|
|
892
904
|
fi
|
|
@@ -978,10 +990,11 @@ status_a=$?
|
|
|
978
990
|
wait "$pid_b"
|
|
979
991
|
status_b=$?
|
|
980
992
|
|
|
993
|
+
# Phase 4c: critique.json no longer written; verify both reviews are in trust.bundle claims.
|
|
981
994
|
if [[ "$status_a" -eq 0 && "$status_b" -eq 0 ]] \
|
|
982
|
-
&&
|
|
983
|
-
&&
|
|
984
|
-
_pass "sidecar writer serializes concurrent sidecar writes"
|
|
995
|
+
&& [[ -f "$CONCURRENT_DIR/trust.bundle" ]] \
|
|
996
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const cc=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(cc.length<2) throw new Error("expected 2 critique claims, found "+cc.length+": "+JSON.stringify(cc.map(c=>c.subjectId)));' "$CONCURRENT_DIR/trust.bundle" 2>/dev/null; then
|
|
997
|
+
_pass "sidecar writer serializes concurrent sidecar writes (both reviews in bundle)"
|
|
985
998
|
else
|
|
986
999
|
_fail "sidecar writer lost concurrent critique writes: $(cat "$TMPDIR_EVAL/concurrent-a.out" "$TMPDIR_EVAL/concurrent-a.err" "$TMPDIR_EVAL/concurrent-b.out" "$TMPDIR_EVAL/concurrent-b.err")"
|
|
987
1000
|
fi
|
|
@@ -1679,21 +1692,24 @@ else
|
|
|
1679
1692
|
_fail "dogfood-pass should allow honest failed records: $(cat "$TMPDIR_EVAL/dogfood-failed-pass.out" "$TMPDIR_EVAL/dogfood-failed-pass.err")"
|
|
1680
1693
|
fi
|
|
1681
1694
|
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
&& rg -q '
|
|
1685
|
-
&&
|
|
1686
|
-
|
|
1695
|
+
# Phase 4c: evidence.json/critique.json no longer written; verify in trust.bundle.
|
|
1696
|
+
if rg -q '"status": "failed"' "$FAILED_DOGFOOD_DIR/state.json" \
|
|
1697
|
+
&& rg -q 'Required dogfood critique is not passing' "$FAILED_DOGFOOD_DIR/handoff.json" \
|
|
1698
|
+
&& [[ -f "$FAILED_DOGFOOD_DIR/trust.bundle" ]] \
|
|
1699
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const cc=b.claims.filter(c=>c.claimType==="workflow.check.test"); if(!cc.length) throw new Error("no test check claim"); if(cc[0].value!=="fail") throw new Error("expected fail, got "+cc[0].value); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="fail") throw new Error("expected fail critique, got "+crit[0].value);' "$FAILED_DOGFOOD_DIR/trust.bundle" 2>/dev/null; then
|
|
1700
|
+
_pass "dogfood-pass failed records preserve failed state and blockers (verified in bundle)"
|
|
1687
1701
|
else
|
|
1688
1702
|
_fail "dogfood-pass failed record did not preserve routing state"
|
|
1689
1703
|
fi
|
|
1690
1704
|
|
|
1705
|
+
# Phase 4c: critique.json no longer written; validator reports sidecar missing (still blocks gate).
|
|
1706
|
+
# The trust.bundle carries the disputed critique claim which is the authoritative gate signal.
|
|
1691
1707
|
if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$FAILED_DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-failed-valid.out" 2>"$TMPDIR_EVAL/dogfood-failed-valid.err"; then
|
|
1692
|
-
_fail "strict validator should still reject
|
|
1693
|
-
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
|
|
1694
|
-
_pass "dogfood-pass failed records remain visibly blocked under strict validation"
|
|
1708
|
+
_fail "strict validator should still reject when critique is missing (4c bundle-only)"
|
|
1709
|
+
elif rg -q 'required critique must pass|required sidecar is missing' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
|
|
1710
|
+
_pass "dogfood-pass failed records remain visibly blocked under strict validation (sidecar missing or critique fail)"
|
|
1695
1711
|
else
|
|
1696
|
-
_fail "dogfood-pass failed record strict validation did not expose critique blocker"
|
|
1712
|
+
_fail "dogfood-pass failed record strict validation did not expose critique blocker: $(cat "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err")"
|
|
1697
1713
|
fi
|
|
1698
1714
|
|
|
1699
1715
|
if flow_agents_node "$WRITER" dogfood-pass \
|
|
@@ -1715,11 +1731,13 @@ else
|
|
|
1715
1731
|
_fail "dogfood-pass failed: $(cat "$TMPDIR_EVAL/dogfood-pass.out" "$TMPDIR_EVAL/dogfood-pass.err")"
|
|
1716
1732
|
fi
|
|
1717
1733
|
|
|
1734
|
+
# Phase 4c: critique.json no longer written; verify in trust.bundle.
|
|
1718
1735
|
if rg -q '"state_status": "verified"' "$TMPDIR_EVAL/dogfood-pass.out" \
|
|
1719
|
-
&& rg -q '"status": "pass"' "$DOGFOOD_DIR/critique.json" \
|
|
1720
1736
|
&& rg -q '"status": "learned"' "$DOGFOOD_DIR/learning.json" \
|
|
1721
|
-
&& rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"
|
|
1722
|
-
|
|
1737
|
+
&& rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json" \
|
|
1738
|
+
&& [[ -f "$DOGFOOD_DIR/trust.bundle" ]] \
|
|
1739
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim in bundle"); if(crit[0].value!=="pass") throw new Error("expected pass critique, got "+crit[0].value);' "$DOGFOOD_DIR/trust.bundle" 2>/dev/null; then
|
|
1740
|
+
_pass "dogfood-pass writes clean bundle, learning, and state (4c bundle-only)"
|
|
1723
1741
|
else
|
|
1724
1742
|
_fail "dogfood-pass did not produce expected clean sidecars"
|
|
1725
1743
|
fi
|
|
@@ -1830,6 +1848,7 @@ flow_agents_node "$WRITER" init-plan "$DOGFOOD_NV_DIR/dogfood-not-verified--deli
|
|
|
1830
1848
|
--next-action "Record not verified dogfood pass." \
|
|
1831
1849
|
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-nv-init.out" 2>"$TMPDIR_EVAL/dogfood-nv-init.err"
|
|
1832
1850
|
|
|
1851
|
+
# Phase 4c: evidence.json no longer written; verify not-verified claim in trust.bundle.
|
|
1833
1852
|
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1834
1853
|
--artifact-root "$SESSION_ROOT" \
|
|
1835
1854
|
--artifact-dir "$DOGFOOD_NV_DIR" \
|
|
@@ -1838,10 +1857,10 @@ if flow_agents_node "$WRITER" dogfood-pass \
|
|
|
1838
1857
|
--gap "External live runtime unavailable." \
|
|
1839
1858
|
--summary "Dogfood pass preserved not verified evidence." \
|
|
1840
1859
|
--timestamp "2026-05-09T00:06:00Z" >"$TMPDIR_EVAL/dogfood-nv.out" 2>"$TMPDIR_EVAL/dogfood-nv.err" \
|
|
1841
|
-
&& rg -q '"verdict": "not_verified"' "$DOGFOOD_NV_DIR/evidence.json" \
|
|
1842
1860
|
&& rg -q '"state_status": "not_verified"' "$TMPDIR_EVAL/dogfood-nv.out" \
|
|
1843
|
-
&&
|
|
1844
|
-
|
|
1861
|
+
&& [[ -f "$DOGFOOD_NV_DIR/trust.bundle" ]] \
|
|
1862
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const ec=b.claims.filter(c=>c.claimType==="workflow.check.external"); if(!ec.length) throw new Error("no external check claim"); if(ec[0].value!=="not_verified") throw new Error("expected not_verified, got "+ec[0].value);' "$DOGFOOD_NV_DIR/trust.bundle" 2>/dev/null; then
|
|
1863
|
+
_pass "dogfood-pass preserves NOT_VERIFIED evidence and routing (verified in bundle)"
|
|
1845
1864
|
else
|
|
1846
1865
|
_fail "dogfood-pass did not preserve not verified evidence: $(cat "$TMPDIR_EVAL/dogfood-nv.out" "$TMPDIR_EVAL/dogfood-nv.err")"
|
|
1847
1866
|
fi
|
|
@@ -2009,8 +2028,10 @@ else
|
|
|
2009
2028
|
_fail "sidecar writer import critique failed: $(cat "$TMPDIR_EVAL/import-critique.out" "$TMPDIR_EVAL/import-critique.err")"
|
|
2010
2029
|
fi
|
|
2011
2030
|
|
|
2012
|
-
|
|
2013
|
-
|
|
2031
|
+
# Phase 4c: critique.json no longer written; verify critique claim in trust.bundle.
|
|
2032
|
+
if [[ -f "$REVIEW_DIR/trust.bundle" ]] \
|
|
2033
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="pass") throw new Error("expected pass, got "+crit[0].value);' "$REVIEW_DIR/trust.bundle" 2>/dev/null; then
|
|
2034
|
+
_pass "sidecar writer extracts review findings (verified in bundle)"
|
|
2014
2035
|
else
|
|
2015
2036
|
_fail "sidecar writer did not extract review findings"
|
|
2016
2037
|
fi
|
|
@@ -2097,12 +2118,506 @@ MARKDOWN
|
|
|
2097
2118
|
|
|
2098
2119
|
if flow_agents_node "$WRITER" import-critique "$IMPORT_BAD" "$IMPORT_BAD/imported-bad-critique--review.md" >"$TMPDIR_EVAL/import-bad-critique.out" 2>&1; then
|
|
2099
2120
|
_fail "sidecar writer should reject imported failing critique"
|
|
2100
|
-
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out"
|
|
2101
|
-
|
|
2121
|
+
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out" \
|
|
2122
|
+
&& [[ -f "$IMPORT_BAD/trust.bundle" ]] \
|
|
2123
|
+
&& node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="fail") throw new Error("expected fail, got "+crit[0].value);' "$IMPORT_BAD/trust.bundle" 2>/dev/null; then
|
|
2124
|
+
_pass "sidecar writer persists and rejects imported failing critique (critique in bundle, not sidecar)"
|
|
2102
2125
|
else
|
|
2103
2126
|
_fail "imported failing critique did not persist actionable finding"
|
|
2104
2127
|
fi
|
|
2105
2128
|
|
|
2129
|
+
|
|
2130
|
+
# ─── AC1: trust.bundle dual-write file existence and schema validity ──────────
|
|
2131
|
+
TB_SCHEMA_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-schema"
|
|
2132
|
+
mkdir -p "$TB_SCHEMA_DIR"
|
|
2133
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_SCHEMA_DIR/trust-bundle-schema--deliver.md"
|
|
2134
|
+
flow_agents_node "$WRITER" init-plan "$TB_SCHEMA_DIR/trust-bundle-schema--deliver.md" \
|
|
2135
|
+
--source-request "Trust bundle schema fixture." \
|
|
2136
|
+
--summary "Trust bundle schema fixture." \
|
|
2137
|
+
--next-action "Record evidence and verify trust.bundle." \
|
|
2138
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-schema-init.out" 2>"$TMPDIR_EVAL/tb-schema-init.err"
|
|
2139
|
+
|
|
2140
|
+
if flow_agents_node "$WRITER" record-evidence "$TB_SCHEMA_DIR" \
|
|
2141
|
+
--verdict pass \
|
|
2142
|
+
--check-json '{"id":"tb-schema-check","kind":"test","status":"pass","summary":"Trust bundle schema fixture check passed."}' \
|
|
2143
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-schema-evidence.out" 2>"$TMPDIR_EVAL/tb-schema-evidence.err" \
|
|
2144
|
+
&& [[ -f "$TB_SCHEMA_DIR/trust.bundle" ]]; then
|
|
2145
|
+
_pass "trust.bundle dual-write creates trust.bundle after record-evidence"
|
|
2146
|
+
else
|
|
2147
|
+
_fail "trust.bundle dual-write did not create trust.bundle after record-evidence: $(cat "$TMPDIR_EVAL/tb-schema-evidence.out" "$TMPDIR_EVAL/tb-schema-evidence.err")"
|
|
2148
|
+
fi
|
|
2149
|
+
|
|
2150
|
+
TB_BUNDLE_PATH="$TB_SCHEMA_DIR/trust.bundle"
|
|
2151
|
+
if [[ -f "$TB_BUNDLE_PATH" ]]; then
|
|
2152
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-validate.err"
|
|
2153
|
+
import { readFileSync } from 'node:fs';
|
|
2154
|
+
import { validateTrustBundle } from '${ROOT}/build/src/cli/workflow-sidecar.js';
|
|
2155
|
+
const bundle = JSON.parse(readFileSync('${TB_BUNDLE_PATH}', 'utf8'));
|
|
2156
|
+
const result = await validateTrustBundle(bundle);
|
|
2157
|
+
if (!result.available) { process.stderr.write('surface unavailable: validateTrustBundle.available was false\n'); process.exit(2); }
|
|
2158
|
+
if (!result.valid) { process.stderr.write('schema invalid: ' + result.errors.join('; ') + '\n'); process.exit(1); }
|
|
2159
|
+
NODEOF
|
|
2160
|
+
then
|
|
2161
|
+
_pass "trust.bundle dual-write produces schema-valid bundle (available:true, valid:true)"
|
|
2162
|
+
else
|
|
2163
|
+
_fail "trust.bundle schema validation failed: $(cat "$TMPDIR_EVAL/tb-validate.err")"
|
|
2164
|
+
fi
|
|
2165
|
+
fi
|
|
2166
|
+
|
|
2167
|
+
# ─── AC2: claim status fidelity — pass→verified, fail→disputed ───────────────
|
|
2168
|
+
TB_FIDELITY_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-fidelity"
|
|
2169
|
+
mkdir -p "$TB_FIDELITY_DIR"
|
|
2170
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_FIDELITY_DIR/trust-bundle-fidelity--deliver.md"
|
|
2171
|
+
flow_agents_node "$WRITER" init-plan "$TB_FIDELITY_DIR/trust-bundle-fidelity--deliver.md" \
|
|
2172
|
+
--source-request "Trust bundle claim fidelity fixture." \
|
|
2173
|
+
--summary "Trust bundle claim fidelity fixture." \
|
|
2174
|
+
--next-action "Seed pass and fail checks to verify claim status mapping." \
|
|
2175
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-fidelity-init.out" 2>"$TMPDIR_EVAL/tb-fidelity-init.err"
|
|
2176
|
+
|
|
2177
|
+
if flow_agents_node "$WRITER" record-evidence "$TB_FIDELITY_DIR" \
|
|
2178
|
+
--verdict fail \
|
|
2179
|
+
--check-json '{"id":"tb-pass-check","kind":"test","status":"pass","summary":"This check passed."}' \
|
|
2180
|
+
--check-json '{"id":"tb-fail-check","kind":"test","status":"fail","summary":"This check failed."}' \
|
|
2181
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-fidelity-evidence.out" 2>"$TMPDIR_EVAL/tb-fidelity-evidence.err" \
|
|
2182
|
+
&& [[ -f "$TB_FIDELITY_DIR/trust.bundle" ]]; then
|
|
2183
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-fidelity-check.err"
|
|
2184
|
+
import { readFileSync } from 'node:fs';
|
|
2185
|
+
const bundle = JSON.parse(readFileSync('${TB_FIDELITY_DIR}/trust.bundle', 'utf8'));
|
|
2186
|
+
const claims = bundle.claims;
|
|
2187
|
+
// Surface uses generateClaimId: search by subjectId (which encodes slug/checkId)
|
|
2188
|
+
const passClaim = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-pass-check'));
|
|
2189
|
+
const failClaim = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-fail-check'));
|
|
2190
|
+
if (!passClaim) { process.stderr.write('missing claim for subjectId ending with /tb-pass-check\n'); process.exit(1); }
|
|
2191
|
+
if (!failClaim) { process.stderr.write('missing claim for subjectId ending with /tb-fail-check\n'); process.exit(1); }
|
|
2192
|
+
if (passClaim.status !== 'verified') { process.stderr.write('pass check claim status was ' + passClaim.status + ', expected verified (Surface deriveClaimStatus)\n'); process.exit(1); }
|
|
2193
|
+
if (failClaim.status !== 'disputed') { process.stderr.write('fail check claim status was ' + failClaim.status + ', expected disputed (Surface deriveClaimStatus)\n'); process.exit(1); }
|
|
2194
|
+
// Assert at least one acceptance criterion claim exists (seeded by init-plan)
|
|
2195
|
+
const acClaims = claims.filter((c) => c.claimType === 'workflow.acceptance.criterion');
|
|
2196
|
+
if (acClaims.length === 0) { process.stderr.write('expected at least one workflow.acceptance.criterion claim but found none\n'); process.exit(1); }
|
|
2197
|
+
NODEOF
|
|
2198
|
+
then
|
|
2199
|
+
_pass "trust.bundle claim fidelity: pass check maps to verified, fail check maps to disputed, ac criterion claim present (Surface deriveClaimStatus)"
|
|
2200
|
+
else
|
|
2201
|
+
_fail "trust.bundle claim fidelity assertion failed: $(cat "$TMPDIR_EVAL/tb-fidelity-check.err")"
|
|
2202
|
+
fi
|
|
2203
|
+
else
|
|
2204
|
+
_fail "trust.bundle claim fidelity setup failed: $(cat "$TMPDIR_EVAL/tb-fidelity-evidence.out" "$TMPDIR_EVAL/tb-fidelity-evidence.err")"
|
|
2205
|
+
fi
|
|
2206
|
+
|
|
2207
|
+
# ─── AC2: claim status fidelity — critique fail→disputed, pass→verified ──────
|
|
2208
|
+
TB_CRITIQUE_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-critique"
|
|
2209
|
+
mkdir -p "$TB_CRITIQUE_DIR"
|
|
2210
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CRITIQUE_DIR/trust-bundle-critique--deliver.md"
|
|
2211
|
+
flow_agents_node "$WRITER" init-plan "$TB_CRITIQUE_DIR/trust-bundle-critique--deliver.md" \
|
|
2212
|
+
--source-request "Trust bundle critique claim fidelity fixture." \
|
|
2213
|
+
--summary "Trust bundle critique claim fidelity fixture." \
|
|
2214
|
+
--next-action "Record pass and fail critiques to verify claim status mapping." \
|
|
2215
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-critique-init.out" 2>"$TMPDIR_EVAL/tb-critique-init.err"
|
|
2216
|
+
flow_agents_node "$WRITER" record-evidence "$TB_CRITIQUE_DIR" \
|
|
2217
|
+
--verdict pass \
|
|
2218
|
+
--check-json '{"id":"tb-critique-setup","kind":"test","status":"pass","summary":"Critique fidelity setup passed."}' \
|
|
2219
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-critique-evidence.out" 2>"$TMPDIR_EVAL/tb-critique-evidence.err"
|
|
2220
|
+
|
|
2221
|
+
# Record a failing critique (verdict fail → claim status disputed)
|
|
2222
|
+
flow_agents_node "$WRITER" record-critique "$TB_CRITIQUE_DIR" \
|
|
2223
|
+
--id tb-fail-review \
|
|
2224
|
+
--reviewer tool-code-reviewer \
|
|
2225
|
+
--verdict fail \
|
|
2226
|
+
--summary "Critique failed — blocking finding." \
|
|
2227
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/tb-critique-fail.out" 2>"$TMPDIR_EVAL/tb-critique-fail.err" || true
|
|
2228
|
+
|
|
2229
|
+
# Record a passing critique (verdict pass, no open findings → claim status verified)
|
|
2230
|
+
if flow_agents_node "$WRITER" record-critique "$TB_CRITIQUE_DIR" \
|
|
2231
|
+
--id tb-pass-review \
|
|
2232
|
+
--reviewer tool-code-reviewer \
|
|
2233
|
+
--verdict pass \
|
|
2234
|
+
--summary "Critique passed — no blocking findings." \
|
|
2235
|
+
--timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/tb-critique-pass.out" 2>"$TMPDIR_EVAL/tb-critique-pass.err" \
|
|
2236
|
+
&& [[ -f "$TB_CRITIQUE_DIR/trust.bundle" ]]; then
|
|
2237
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-critique-assert.err"
|
|
2238
|
+
import { readFileSync } from 'node:fs';
|
|
2239
|
+
const bundle = JSON.parse(readFileSync('${TB_CRITIQUE_DIR}/trust.bundle', 'utf8'));
|
|
2240
|
+
const claims = bundle.claims;
|
|
2241
|
+
// Surface uses generateClaimId: search by subjectId (which encodes slug/reviewId)
|
|
2242
|
+
const failCritique = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-fail-review'));
|
|
2243
|
+
const passCritique = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-pass-review'));
|
|
2244
|
+
if (!failCritique) { process.stderr.write('missing claim for subjectId ending with /tb-fail-review\n'); process.exit(1); }
|
|
2245
|
+
if (!passCritique) { process.stderr.write('missing claim for subjectId ending with /tb-pass-review\n'); process.exit(1); }
|
|
2246
|
+
if (failCritique.status !== 'disputed') { process.stderr.write('fail critique claim status was ' + failCritique.status + ', expected disputed (Surface deriveClaimStatus)\n'); process.exit(1); }
|
|
2247
|
+
if (passCritique.status !== 'verified') { process.stderr.write('pass critique claim status was ' + passCritique.status + ', expected verified (Surface deriveClaimStatus)\n'); process.exit(1); }
|
|
2248
|
+
NODEOF
|
|
2249
|
+
then
|
|
2250
|
+
_pass "trust.bundle claim fidelity: critique fail→disputed, critique pass→verified"
|
|
2251
|
+
else
|
|
2252
|
+
_fail "trust.bundle critique claim fidelity assertion failed: $(cat "$TMPDIR_EVAL/tb-critique-assert.err")"
|
|
2253
|
+
fi
|
|
2254
|
+
else
|
|
2255
|
+
_fail "trust.bundle critique claim fidelity setup failed: $(cat "$TMPDIR_EVAL/tb-critique-pass.out" "$TMPDIR_EVAL/tb-critique-pass.err")"
|
|
2256
|
+
fi
|
|
2257
|
+
|
|
2258
|
+
# ─── AC3: capture authoritative over claimed status + policies present (ADR 0010 maximal) ──
|
|
2259
|
+
TB_CAPTURE_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-capture"
|
|
2260
|
+
mkdir -p "$TB_CAPTURE_DIR"
|
|
2261
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CAPTURE_DIR/trust-bundle-capture--deliver.md"
|
|
2262
|
+
flow_agents_node "$WRITER" init-plan "$TB_CAPTURE_DIR/trust-bundle-capture--deliver.md" \
|
|
2263
|
+
--source-request "Capture-authoritative trust bundle fixture." \
|
|
2264
|
+
--summary "Capture-authoritative trust bundle fixture." \
|
|
2265
|
+
--next-action "Seed a claimed-pass check whose command actually failed in the capture log." \
|
|
2266
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-capture-init.out" 2>"$TMPDIR_EVAL/tb-capture-init.err"
|
|
2267
|
+
# Deterministic capture log: the command FAILED (exit 1), recorded before record-evidence.
|
|
2268
|
+
printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1}' > "$TB_CAPTURE_DIR/command-log.jsonl"
|
|
2269
|
+
if flow_agents_node "$WRITER" record-evidence "$TB_CAPTURE_DIR" \
|
|
2270
|
+
--verdict pass \
|
|
2271
|
+
--check-json '{"id":"tb-capture-check","kind":"test","status":"pass","summary":"Claimed pass.","command":"npm test"}' \
|
|
2272
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-capture-evidence.out" 2>"$TMPDIR_EVAL/tb-capture-evidence.err" \
|
|
2273
|
+
&& [[ -f "$TB_CAPTURE_DIR/trust.bundle" ]]; then
|
|
2274
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-capture-assert.err"
|
|
2275
|
+
import { readFileSync } from 'node:fs';
|
|
2276
|
+
const bundle = JSON.parse(readFileSync('${TB_CAPTURE_DIR}/trust.bundle', 'utf8'));
|
|
2277
|
+
const claim = bundle.claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-capture-check'));
|
|
2278
|
+
if (!claim) { process.stderr.write('missing claim for /tb-capture-check\n'); process.exit(1); }
|
|
2279
|
+
if (claim.status !== 'disputed') { process.stderr.write('claimed-pass check with captured FAIL had status ' + claim.status + ', expected disputed (capture authoritative)\n'); process.exit(1); }
|
|
2280
|
+
if (!Array.isArray(bundle.policies) || bundle.policies.length === 0) { process.stderr.write('bundle.policies empty — expected a verification policy per claimType\n'); process.exit(1); }
|
|
2281
|
+
const ev = bundle.evidence.find((e) => e.claimId === claim.id);
|
|
2282
|
+
if (!ev || !ev.execution || ev.execution.isError !== true) { process.stderr.write('capture evidence with execution.isError=true missing\n'); process.exit(1); }
|
|
2283
|
+
NODEOF
|
|
2284
|
+
then
|
|
2285
|
+
_pass "trust.bundle capture authoritative: claimed-pass + captured-fail → disputed; policies present; execution evidence folded in"
|
|
2286
|
+
else
|
|
2287
|
+
_fail "trust.bundle capture-authoritative assertion failed: $(cat "$TMPDIR_EVAL/tb-capture-assert.err")"
|
|
2288
|
+
fi
|
|
2289
|
+
else
|
|
2290
|
+
_fail "trust.bundle capture-authoritative setup failed: $(cat "$TMPDIR_EVAL/tb-capture-evidence.out" "$TMPDIR_EVAL/tb-capture-evidence.err")"
|
|
2291
|
+
fi
|
|
2292
|
+
|
|
2293
|
+
# ─── AC4: render-trust-panel projects the bundle to a standalone Surface Trust Panel (ADR 0010 Phase 3) ──
|
|
2294
|
+
if [[ -f "$TB_CAPTURE_DIR/trust.bundle" ]] && flow_agents_node "$WRITER" render-trust-panel "$TB_CAPTURE_DIR" --out "$TB_CAPTURE_DIR/trust-panel.html" >"$TMPDIR_EVAL/tb-panel.out" 2>"$TMPDIR_EVAL/tb-panel.err"; then
|
|
2295
|
+
PANEL="$TB_CAPTURE_DIR/trust-panel.html"
|
|
2296
|
+
REPORT="$TB_CAPTURE_DIR/trust-report.json"
|
|
2297
|
+
if [[ -f "$PANEL" ]] \
|
|
2298
|
+
&& rg -q "<surface-trust-panel" "$PANEL" \
|
|
2299
|
+
&& rg -q "customElements.define" "$PANEL" \
|
|
2300
|
+
&& rg -q '"status":"disputed"' "$PANEL"; then
|
|
2301
|
+
_pass "render-trust-panel: standalone Trust Panel HTML with inlined Surface element + disputed claim from the derived report"
|
|
2302
|
+
else
|
|
2303
|
+
_fail "render-trust-panel output missing panel element / inlined JS / disputed claim"
|
|
2304
|
+
fi
|
|
2305
|
+
# report artifact: the derived TrustReport (universal input for Surface's Snapshot Viewer / bare element)
|
|
2306
|
+
if [[ -f "$REPORT" ]] && rg -q '"status": "disputed"' "$REPORT" && rg -q '"claims"' "$REPORT"; then
|
|
2307
|
+
_pass "render-trust-panel: also emits trust-report.json (derived report with the disputed claim)"
|
|
2308
|
+
else
|
|
2309
|
+
_fail "render-trust-panel did not emit a valid trust-report.json: $(head -c 200 "$REPORT" 2>/dev/null)"
|
|
2310
|
+
fi
|
|
2311
|
+
else
|
|
2312
|
+
_fail "render-trust-panel failed: $(cat "$TMPDIR_EVAL/tb-panel.out" "$TMPDIR_EVAL/tb-panel.err")"
|
|
2313
|
+
fi
|
|
2314
|
+
|
|
2315
|
+
# ─── AC5: trust-mcp wiring (flow-agents#137) — zero-write print + opt-in, reversible enable/disable ──
|
|
2316
|
+
TB_MCP_CFG="$TMPDIR_EVAL/mcp/.mcp.json"
|
|
2317
|
+
mkdir -p "$(dirname "$TB_MCP_CFG")"
|
|
2318
|
+
echo '{"mcpServers":{"other":{"command":"x","args":[]}}}' > "$TB_MCP_CFG"
|
|
2319
|
+
if flow_agents_node "$WRITER" trust-mcp >"$TMPDIR_EVAL/tb-mcp-print.out" 2>/dev/null \
|
|
2320
|
+
&& rg -q "flow-agents-surface-trust" "$TMPDIR_EVAL/tb-mcp-print.out" \
|
|
2321
|
+
&& flow_agents_node "$WRITER" trust-mcp --mode enable --config "$TB_MCP_CFG" >/dev/null 2>&1 \
|
|
2322
|
+
&& flow_agents_node "$WRITER" trust-mcp --mode enable --config "$TB_MCP_CFG" >/dev/null 2>&1; then
|
|
2323
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-mcp.err"
|
|
2324
|
+
import { readFileSync } from 'node:fs';
|
|
2325
|
+
const s = (JSON.parse(readFileSync('${TB_MCP_CFG}','utf8')).mcpServers) || {};
|
|
2326
|
+
if (!s['flow-agents-surface-trust']) { process.stderr.write('enable did not add our server\n'); process.exit(1); }
|
|
2327
|
+
if (!s['other']) { process.stderr.write('enable clobbered an existing server\n'); process.exit(1); }
|
|
2328
|
+
if (Object.keys(s).length !== 2) { process.stderr.write('enable not idempotent (count ' + Object.keys(s).length + ')\n'); process.exit(1); }
|
|
2329
|
+
NODEOF
|
|
2330
|
+
then
|
|
2331
|
+
flow_agents_node "$WRITER" trust-mcp --mode disable --config "$TB_MCP_CFG" >/dev/null 2>&1
|
|
2332
|
+
if node --input-type=module <<NODEOF 2>>"$TMPDIR_EVAL/tb-mcp.err"
|
|
2333
|
+
import { readFileSync } from 'node:fs';
|
|
2334
|
+
const s = (JSON.parse(readFileSync('${TB_MCP_CFG}','utf8')).mcpServers) || {};
|
|
2335
|
+
if (s['flow-agents-surface-trust']) { process.stderr.write('disable left our server\n'); process.exit(1); }
|
|
2336
|
+
if (!s['other']) { process.stderr.write('disable removed an existing server\n'); process.exit(1); }
|
|
2337
|
+
NODEOF
|
|
2338
|
+
then
|
|
2339
|
+
_pass "trust-mcp: zero-write print; enable idempotent + preserves existing; disable removes only ours"
|
|
2340
|
+
else
|
|
2341
|
+
_fail "trust-mcp disable assertion failed: $(cat "$TMPDIR_EVAL/tb-mcp.err")"
|
|
2342
|
+
fi
|
|
2343
|
+
else
|
|
2344
|
+
_fail "trust-mcp enable assertion failed: $(cat "$TMPDIR_EVAL/tb-mcp.err")"
|
|
2345
|
+
fi
|
|
2346
|
+
else
|
|
2347
|
+
_fail "trust-mcp print/enable invocation failed"
|
|
2348
|
+
fi
|
|
2349
|
+
|
|
2350
|
+
# ─── AC6: agent liveness (ADR 0012) — held / free-on-lapse / free-on-release ──
|
|
2351
|
+
TB_LIVENESS_ROOT="$TMPDIR_EVAL/liveness/.flow-agents"
|
|
2352
|
+
flow_agents_node "$WRITER" liveness claim held-subj --actor agent-A --at "2026-06-25T11:50:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
|
|
2353
|
+
flow_agents_node "$WRITER" liveness heartbeat held-subj --actor agent-A --at "2026-06-25T11:58:00Z" --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
|
|
2354
|
+
flow_agents_node "$WRITER" liveness claim stale-subj --actor agent-B --at "2026-06-25T11:00:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
|
|
2355
|
+
flow_agents_node "$WRITER" liveness claim rel-subj --actor agent-C --at "2026-06-25T11:50:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
|
|
2356
|
+
flow_agents_node "$WRITER" liveness release rel-subj --actor agent-C --at "2026-06-25T11:55:00Z" --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
|
|
2357
|
+
LIVENESS_OUT=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LIVENESS_ROOT" 2>/dev/null | grep -viE "unknown format")
|
|
2358
|
+
if echo "$LIVENESS_OUT" | grep -qE "held-subj.*agent-A.*held" \
|
|
2359
|
+
&& echo "$LIVENESS_OUT" | grep -qE "stale-subj.*agent-B.*free" \
|
|
2360
|
+
&& echo "$LIVENESS_OUT" | grep -qE "rel-subj.*agent-C.*free"; then
|
|
2361
|
+
_pass "liveness: liveness claims recompute held / free(lapsed) / free(released) via Surface deriveTrustStatus (ADR 0012)"
|
|
2362
|
+
else
|
|
2363
|
+
_fail "liveness status mismatch (expected held/free/free): $LIVENESS_OUT"
|
|
2364
|
+
fi
|
|
2365
|
+
|
|
2366
|
+
# ─── AC7: lifecycle-driven liveness (ADR 0012) — init-plan claims, advance-state releases (opt-in) ──
|
|
2367
|
+
TB_LC_ROOT="$TMPDIR_EVAL/liveness-lifecycle/.flow-agents"
|
|
2368
|
+
TB_LC_DIR="$TB_LC_ROOT/lc-task"; mkdir -p "$TB_LC_DIR"
|
|
2369
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_LC_DIR/lc-task--deliver.md"
|
|
2370
|
+
FLOW_AGENTS_LIVENESS=on FLOW_AGENTS_ACTOR=agent-LC flow_agents_node "$WRITER" init-plan "$TB_LC_DIR/lc-task--deliver.md" --task-slug lc-task --source-request x --summary y --next-action z --timestamp "2026-06-25T11:50:00Z" >/dev/null 2>&1
|
|
2371
|
+
LC_HELD=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LC_ROOT" 2>/dev/null | grep -viE "unknown format")
|
|
2372
|
+
FLOW_AGENTS_LIVENESS=on FLOW_AGENTS_ACTOR=agent-LC flow_agents_node "$WRITER" advance-state "$TB_LC_DIR" --status delivered --phase done --task-slug lc-task --timestamp "2026-06-25T11:55:00Z" >/dev/null 2>&1
|
|
2373
|
+
LC_FREE=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LC_ROOT" 2>/dev/null | grep -viE "unknown format")
|
|
2374
|
+
TB_OFF_ROOT="$TMPDIR_EVAL/liveness-off/.flow-agents"; mkdir -p "$TB_OFF_ROOT/off-task"
|
|
2375
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_OFF_ROOT/off-task/off-task--deliver.md"
|
|
2376
|
+
flow_agents_node "$WRITER" init-plan "$TB_OFF_ROOT/off-task/off-task--deliver.md" --task-slug off-task --source-request x --summary y --next-action z >/dev/null 2>&1
|
|
2377
|
+
if echo "$LC_HELD" | grep -qE "lc-task.*agent-LC.*held" && echo "$LC_FREE" | grep -qE "lc-task.*agent-LC.*free" && [ ! -f "$TB_OFF_ROOT/liveness/events.jsonl" ]; then
|
|
2378
|
+
_pass "liveness lifecycle: init-plan claims (held), advance→delivered releases (free); opt-in respected (no events when disabled)"
|
|
2379
|
+
else
|
|
2380
|
+
_fail "liveness lifecycle mismatch: held=[$LC_HELD] free=[$LC_FREE] off=$([ -f "$TB_OFF_ROOT/liveness/events.jsonl" ] && echo wrote || echo none)"
|
|
2381
|
+
fi
|
|
2382
|
+
|
|
2383
|
+
# ─── AC8: bundle-writers fail LOUDLY when Surface unavailable — no silent data loss (#156) ──
|
|
2384
|
+
TB_FO_DIR="$TMPDIR_EVAL/repo/.flow-agents/failopen"
|
|
2385
|
+
mkdir -p "$TB_FO_DIR"
|
|
2386
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_FO_DIR/failopen--deliver.md"
|
|
2387
|
+
flow_agents_node "$WRITER" init-plan "$TB_FO_DIR/failopen--deliver.md" --task-slug failopen --source-request x --summary y --next-action z --timestamp "2026-05-09T00:00:00Z" >/dev/null 2>&1
|
|
2388
|
+
flow_agents_node "$WRITER" record-evidence "$TB_FO_DIR" --verdict pass --check-json '{"id":"c1","kind":"test","status":"pass","summary":"s"}' --timestamp "2026-05-09T00:01:00Z" >/dev/null 2>&1
|
|
2389
|
+
# With Surface forced-unavailable, record-critique MUST fail (non-zero), not silently drop the critique.
|
|
2390
|
+
if FLOW_AGENTS_SURFACE_UNAVAILABLE=1 flow_agents_node "$WRITER" record-critique "$TB_FO_DIR" --id rev-fo --reviewer r --verdict pass --summary fo --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/failopen.out" 2>&1; then
|
|
2391
|
+
_fail "record-critique fail-opened (exit 0) when Surface unavailable — SILENT DATA LOSS: $(cat "$TMPDIR_EVAL/failopen.out")"
|
|
2392
|
+
elif grep -qiE "was NOT written|not persisted" "$TMPDIR_EVAL/failopen.out"; then
|
|
2393
|
+
_pass "bundle-writers fail loudly (no silent data loss) when Surface unavailable (#156)"
|
|
2394
|
+
else
|
|
2395
|
+
_fail "record-critique failed but without a clear not-persisted message: $(cat "$TMPDIR_EVAL/failopen.out")"
|
|
2396
|
+
fi
|
|
2397
|
+
|
|
2398
|
+
|
|
2399
|
+
# ─── AC3: statusFunctionVersion conformance ───────────────────────────────────
|
|
2400
|
+
# Assert the statusFunctionVersion embedded in the emitted trust.bundle source
|
|
2401
|
+
# field matches @kontourai/surface's exported statusFunctionVersion constant.
|
|
2402
|
+
# Also run hachure conformance vectors through Surface's deriveClaimStatus to
|
|
2403
|
+
# confirm our producer path produces canonical statuses.
|
|
2404
|
+
TB_CONF_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-conformance"
|
|
2405
|
+
mkdir -p "$TB_CONF_DIR"
|
|
2406
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CONF_DIR/trust-bundle-conformance--deliver.md"
|
|
2407
|
+
flow_agents_node "$WRITER" init-plan "$TB_CONF_DIR/trust-bundle-conformance--deliver.md" --source-request "Conformance fixture." --summary "Conformance fixture." --next-action "Record evidence and check statusFunctionVersion." --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-conf-init.out" 2>"$TMPDIR_EVAL/tb-conf-init.err"
|
|
2408
|
+
flow_agents_node "$WRITER" record-evidence "$TB_CONF_DIR" --verdict pass --check-json '{"id":"conf-check","kind":"test","status":"pass","summary":"Conformance check passed."}' --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-conf-evidence.out" 2>"$TMPDIR_EVAL/tb-conf-evidence.err"
|
|
2409
|
+
|
|
2410
|
+
if [[ -f "$TB_CONF_DIR/trust.bundle" ]]; then
|
|
2411
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-sfv-check.err"
|
|
2412
|
+
import { readFileSync } from 'node:fs';
|
|
2413
|
+
import { statusFunctionVersion } from '@kontourai/surface';
|
|
2414
|
+
const bundle = JSON.parse(readFileSync('${TB_CONF_DIR}/trust.bundle', 'utf8'));
|
|
2415
|
+
// statusFunctionVersion is encoded in the source field as "...;statusFunctionVersion=<version>"
|
|
2416
|
+
const sourceMatch = (bundle.source || '').match(/statusFunctionVersion=(.+)$/);
|
|
2417
|
+
if (!sourceMatch) { process.stderr.write('bundle source does not contain statusFunctionVersion: ' + bundle.source + '\n'); process.exit(1); }
|
|
2418
|
+
const bundleSfv = sourceMatch[1];
|
|
2419
|
+
const surfaceSfv = String(statusFunctionVersion);
|
|
2420
|
+
if (bundleSfv !== surfaceSfv) {
|
|
2421
|
+
process.stderr.write('bundle statusFunctionVersion ' + bundleSfv + ' does not match Surface statusFunctionVersion ' + surfaceSfv + '\n');
|
|
2422
|
+
process.exit(1);
|
|
2423
|
+
}
|
|
2424
|
+
NODEOF
|
|
2425
|
+
then
|
|
2426
|
+
_pass "trust.bundle source encodes statusFunctionVersion matching Surface\'s canonical export"
|
|
2427
|
+
else
|
|
2428
|
+
_fail "trust.bundle statusFunctionVersion mismatch: $(cat "$TMPDIR_EVAL/tb-sfv-check.err")"
|
|
2429
|
+
fi
|
|
2430
|
+
fi
|
|
2431
|
+
|
|
2432
|
+
# Conformance vectors: assert Surface's deriveClaimStatus produces canonical statuses
|
|
2433
|
+
# for hachure's reference sf-*.json vectors (sf-verified-commit → verified, sf-disputed-blocking → disputed).
|
|
2434
|
+
HACHURE_CONF="$ROOT/node_modules/hachure/conformance"
|
|
2435
|
+
if [[ -d "$HACHURE_CONF" ]]; then
|
|
2436
|
+
if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-conf-vectors.err"
|
|
2437
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
2438
|
+
import { deriveClaimStatus, statusFunctionVersion } from '@kontourai/surface';
|
|
2439
|
+
const confDir = '${HACHURE_CONF}';
|
|
2440
|
+
const vectors = readdirSync(confDir).filter(f => f.startsWith('sf-') && f.endsWith('.json'));
|
|
2441
|
+
let passed = 0; let failed = 0;
|
|
2442
|
+
for (const vec of vectors) {
|
|
2443
|
+
const data = JSON.parse(readFileSync(confDir + '/' + vec, 'utf8'));
|
|
2444
|
+
const { input, expect, now: nowStr } = data;
|
|
2445
|
+
const now = nowStr ? new Date(nowStr) : new Date();
|
|
2446
|
+
for (const [claimId, expectedStatus] of Object.entries(expect.statusByClaimId ?? {})) {
|
|
2447
|
+
const claim = input.claims.find((c) => c.id === claimId);
|
|
2448
|
+
if (!claim) { process.stderr.write('vector ' + vec + ': claim ' + claimId + ' not found\n'); failed++; continue; }
|
|
2449
|
+
const evidence = (input.evidence || []).filter((e) => e.claimId === claimId);
|
|
2450
|
+
const events = (input.events || []).filter((e) => e.claimId === claimId);
|
|
2451
|
+
const policies = (input.policies || []);
|
|
2452
|
+
const authorityTrace = (input.authorityTrace || []);
|
|
2453
|
+
const result = deriveClaimStatus({ claim, evidence, events, policies, now, authorityTrace });
|
|
2454
|
+
if (result.status !== expectedStatus) {
|
|
2455
|
+
process.stderr.write('vector ' + vec + ' claim ' + claimId + ': got ' + result.status + ', expected ' + expectedStatus + '\n');
|
|
2456
|
+
failed++;
|
|
2457
|
+
} else {
|
|
2458
|
+
passed++;
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
}
|
|
2462
|
+
process.stderr.write('conformance vectors: ' + passed + ' passed, ' + failed + ' failed (statusFunctionVersion=' + statusFunctionVersion + ')\n');
|
|
2463
|
+
if (failed > 0) process.exit(1);
|
|
2464
|
+
NODEOF
|
|
2465
|
+
then
|
|
2466
|
+
_pass "hachure conformance vectors pass Surface deriveClaimStatus"
|
|
2467
|
+
else
|
|
2468
|
+
_fail "hachure conformance vectors failed: $(cat "$TMPDIR_EVAL/tb-conf-vectors.err")"
|
|
2469
|
+
fi
|
|
2470
|
+
fi
|
|
2471
|
+
|
|
2472
|
+
# ─── Deterministic session slug from work-item ref (#161) ───────────────────
|
|
2473
|
+
|
|
2474
|
+
WORK_ITEM_ROOT="$TMPDIR_EVAL/work-item-repo/.flow-agents"
|
|
2475
|
+
|
|
2476
|
+
# (a) --work-item derives deterministic slug kontourai-flow-agents-161
|
|
2477
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2478
|
+
--artifact-root "$WORK_ITEM_ROOT" \
|
|
2479
|
+
--work-item "kontourai/flow-agents#161" \
|
|
2480
|
+
--title "Work Item 161" \
|
|
2481
|
+
--summary "Deterministic slug from work-item ref." \
|
|
2482
|
+
--timestamp "2026-06-25T00:00:00Z" >"$TMPDIR_EVAL/wi-ensure.out" 2>"$TMPDIR_EVAL/wi-ensure.err"; then
|
|
2483
|
+
_pass "ensure-session --work-item derives slug kontourai-flow-agents-161"
|
|
2484
|
+
else
|
|
2485
|
+
_fail "ensure-session --work-item failed: $(cat "$TMPDIR_EVAL/wi-ensure.out" "$TMPDIR_EVAL/wi-ensure.err")"
|
|
2486
|
+
fi
|
|
2487
|
+
|
|
2488
|
+
if [[ -f "$WORK_ITEM_ROOT/kontourai-flow-agents-161/state.json" ]]; then
|
|
2489
|
+
_pass "ensure-session --work-item creates expected session directory"
|
|
2490
|
+
else
|
|
2491
|
+
_fail "ensure-session --work-item did not create $WORK_ITEM_ROOT/kontourai-flow-agents-161/"
|
|
2492
|
+
fi
|
|
2493
|
+
|
|
2494
|
+
# (b) idempotency: second call same ref → same directory, no failure
|
|
2495
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2496
|
+
--artifact-root "$WORK_ITEM_ROOT" \
|
|
2497
|
+
--work-item "kontourai/flow-agents#161" \
|
|
2498
|
+
--title "Work Item 161 Second" \
|
|
2499
|
+
--summary "Idempotent call." \
|
|
2500
|
+
--timestamp "2026-06-25T00:00:01Z" >"$TMPDIR_EVAL/wi-ensure2.out" 2>"$TMPDIR_EVAL/wi-ensure2.err" \
|
|
2501
|
+
&& [[ -f "$WORK_ITEM_ROOT/kontourai-flow-agents-161/state.json" ]]; then
|
|
2502
|
+
_pass "ensure-session --work-item is idempotent (same slug/dir on second call)"
|
|
2503
|
+
else
|
|
2504
|
+
_fail "ensure-session --work-item idempotency failed: $(cat "$TMPDIR_EVAL/wi-ensure2.out" "$TMPDIR_EVAL/wi-ensure2.err")"
|
|
2505
|
+
fi
|
|
2506
|
+
|
|
2507
|
+
# (c) --task-slug wins over --work-item (back-compat: explicit overrides derived)
|
|
2508
|
+
TASK_SLUG_ROOT="$TMPDIR_EVAL/task-slug-repo/.flow-agents"
|
|
2509
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2510
|
+
--artifact-root "$TASK_SLUG_ROOT" \
|
|
2511
|
+
--task-slug "manual-slug" \
|
|
2512
|
+
--work-item "kontourai/flow-agents#161" \
|
|
2513
|
+
--title "Manual Slug" \
|
|
2514
|
+
--summary "Explicit task-slug must win over work-item." \
|
|
2515
|
+
--timestamp "2026-06-25T00:00:02Z" >"$TMPDIR_EVAL/wi-taskslug.out" 2>"$TMPDIR_EVAL/wi-taskslug.err" \
|
|
2516
|
+
&& [[ -d "$TASK_SLUG_ROOT/manual-slug" ]] \
|
|
2517
|
+
&& [[ ! -d "$TASK_SLUG_ROOT/kontourai-flow-agents-161" ]]; then
|
|
2518
|
+
_pass "ensure-session --task-slug wins over --work-item (back-compat)"
|
|
2519
|
+
else
|
|
2520
|
+
_fail "ensure-session --task-slug did not win over --work-item: $(cat "$TMPDIR_EVAL/wi-taskslug.out" "$TMPDIR_EVAL/wi-taskslug.err")"
|
|
2521
|
+
fi
|
|
2522
|
+
|
|
2523
|
+
# (c2) --task-slug only (no --work-item) still works
|
|
2524
|
+
TASK_SLUG_ONLY_ROOT="$TMPDIR_EVAL/task-slug-only-repo/.flow-agents"
|
|
2525
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2526
|
+
--artifact-root "$TASK_SLUG_ONLY_ROOT" \
|
|
2527
|
+
--task-slug "explicit-only" \
|
|
2528
|
+
--title "Explicit Only" \
|
|
2529
|
+
--summary "task-slug only, no work-item." \
|
|
2530
|
+
--timestamp "2026-06-25T00:00:03Z" >"$TMPDIR_EVAL/wi-onlyslug.out" 2>"$TMPDIR_EVAL/wi-onlyslug.err" \
|
|
2531
|
+
&& [[ -d "$TASK_SLUG_ONLY_ROOT/explicit-only" ]]; then
|
|
2532
|
+
_pass "ensure-session --task-slug alone still works (back-compat regression guard)"
|
|
2533
|
+
else
|
|
2534
|
+
_fail "ensure-session --task-slug alone failed: $(cat "$TMPDIR_EVAL/wi-onlyslug.out" "$TMPDIR_EVAL/wi-onlyslug.err")"
|
|
2535
|
+
fi
|
|
2536
|
+
|
|
2537
|
+
# (d) liveness subjectId matches work-item slug
|
|
2538
|
+
# ensure-session establishes the slug; liveness events (emitted by init-plan/advance-state) key
|
|
2539
|
+
# on that same slug as subjectId. We verify this by emitting two liveness claim events directly
|
|
2540
|
+
# via `liveness claim` using the slug derived from the ref, then asserting both share subjectId.
|
|
2541
|
+
LIVENESS_WORK_ROOT="$TMPDIR_EVAL/liveness-wi-repo/.flow-agents"
|
|
2542
|
+
# First: ensure-session --work-item produces the expected slug (directory name proof)
|
|
2543
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2544
|
+
--artifact-root "$LIVENESS_WORK_ROOT" \
|
|
2545
|
+
--work-item "kontourai/flow-agents#162" \
|
|
2546
|
+
--title "Liveness Work Item" \
|
|
2547
|
+
--summary "Liveness subjectId test." \
|
|
2548
|
+
--timestamp "2026-06-25T00:00:04Z" >"$TMPDIR_EVAL/wi-liveness1.out" 2>"$TMPDIR_EVAL/wi-liveness1.err" \
|
|
2549
|
+
&& [[ -d "$LIVENESS_WORK_ROOT/kontourai-flow-agents-162" ]]; then
|
|
2550
|
+
_pass "ensure-session --work-item creates session dir with deterministic slug"
|
|
2551
|
+
else
|
|
2552
|
+
_fail "ensure-session --work-item session dir check failed: $(cat "$TMPDIR_EVAL/wi-liveness1.out" "$TMPDIR_EVAL/wi-liveness1.err")"
|
|
2553
|
+
fi
|
|
2554
|
+
|
|
2555
|
+
# Emit two liveness claim events using the same subjectId (as init-plan does when FLOW_AGENTS_LIVENESS=on).
|
|
2556
|
+
# This proves: same work-item ref → same slug → same subjectId across two agents.
|
|
2557
|
+
FLOW_AGENTS_ACTOR=agent-a flow_agents_node "$WRITER" liveness claim \
|
|
2558
|
+
--artifact-root "$LIVENESS_WORK_ROOT" \
|
|
2559
|
+
kontourai-flow-agents-162 >"$TMPDIR_EVAL/wi-liveness-claim-a.out" 2>"$TMPDIR_EVAL/wi-liveness-claim-a.err"
|
|
2560
|
+
FLOW_AGENTS_ACTOR=agent-b flow_agents_node "$WRITER" liveness claim \
|
|
2561
|
+
--artifact-root "$LIVENESS_WORK_ROOT" \
|
|
2562
|
+
kontourai-flow-agents-162 >"$TMPDIR_EVAL/wi-liveness-claim-b.out" 2>"$TMPDIR_EVAL/wi-liveness-claim-b.err"
|
|
2563
|
+
|
|
2564
|
+
LIVENESS_EVENTS="$LIVENESS_WORK_ROOT/liveness/events.jsonl"
|
|
2565
|
+
if [[ -f "$LIVENESS_EVENTS" ]] \
|
|
2566
|
+
&& grep -q '"subjectId":"kontourai-flow-agents-162"' "$LIVENESS_EVENTS"; then
|
|
2567
|
+
_pass "liveness events contain subjectId kontourai-flow-agents-162"
|
|
2568
|
+
else
|
|
2569
|
+
_fail "liveness events missing expected subjectId: $(cat "$LIVENESS_EVENTS" 2>/dev/null || echo 'file not found')"
|
|
2570
|
+
fi
|
|
2571
|
+
|
|
2572
|
+
# Both events must share the same subjectId value (two agents, same ref → same subjectId)
|
|
2573
|
+
subject_count=$(grep -c '"subjectId":"kontourai-flow-agents-162"' "$LIVENESS_EVENTS" 2>/dev/null || echo 0)
|
|
2574
|
+
if [[ "$subject_count" -ge 2 ]]; then
|
|
2575
|
+
_pass "both liveness events share subjectId kontourai-flow-agents-162 (same ref → same subjectId)"
|
|
2576
|
+
else
|
|
2577
|
+
_fail "expected >=2 liveness events with subjectId kontourai-flow-agents-162, found $subject_count"
|
|
2578
|
+
fi
|
|
2579
|
+
|
|
2580
|
+
# (e) malformed ref is rejected
|
|
2581
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2582
|
+
--artifact-root "$WORK_ITEM_ROOT" \
|
|
2583
|
+
--work-item "kontourai/flow-agents/bad" \
|
|
2584
|
+
--title "Bad Ref" \
|
|
2585
|
+
--summary "Should fail." \
|
|
2586
|
+
--timestamp "2026-06-25T00:00:06Z" >"$TMPDIR_EVAL/wi-bad-slash.out" 2>&1; then
|
|
2587
|
+
_fail "ensure-session should reject work-item ref without # separator"
|
|
2588
|
+
elif grep -q 'owner/repo#id format' "$TMPDIR_EVAL/wi-bad-slash.out"; then
|
|
2589
|
+
_pass "ensure-session rejects work-item ref without # separator"
|
|
2590
|
+
else
|
|
2591
|
+
_fail "malformed ref rejection message was unexpected: $(cat "$TMPDIR_EVAL/wi-bad-slash.out")"
|
|
2592
|
+
fi
|
|
2593
|
+
|
|
2594
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2595
|
+
--artifact-root "$WORK_ITEM_ROOT" \
|
|
2596
|
+
--work-item "kontourai/flow-agents#abc" \
|
|
2597
|
+
--title "Bad ID" \
|
|
2598
|
+
--summary "Should fail on non-numeric id." \
|
|
2599
|
+
--timestamp "2026-06-25T00:00:07Z" >"$TMPDIR_EVAL/wi-bad-id.out" 2>&1; then
|
|
2600
|
+
_fail "ensure-session should reject work-item with non-numeric id"
|
|
2601
|
+
elif grep -q 'numeric issue number' "$TMPDIR_EVAL/wi-bad-id.out"; then
|
|
2602
|
+
_pass "ensure-session rejects work-item with non-numeric id"
|
|
2603
|
+
else
|
|
2604
|
+
_fail "non-numeric id rejection message was unexpected: $(cat "$TMPDIR_EVAL/wi-bad-id.out")"
|
|
2605
|
+
fi
|
|
2606
|
+
|
|
2607
|
+
# Neither --task-slug nor --work-item → back-compat error message must contain "task-slug is required"
|
|
2608
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
2609
|
+
--artifact-root "$WORK_ITEM_ROOT" \
|
|
2610
|
+
--title "No Slug" \
|
|
2611
|
+
--summary "Should fail." \
|
|
2612
|
+
--timestamp "2026-06-25T00:00:08Z" >"$TMPDIR_EVAL/wi-no-slug.out" 2>&1; then
|
|
2613
|
+
_fail "ensure-session should require --task-slug or --work-item"
|
|
2614
|
+
elif grep -q 'task-slug is required' "$TMPDIR_EVAL/wi-no-slug.out"; then
|
|
2615
|
+
_pass "ensure-session dies with 'task-slug is required' when neither flag is supplied (back-compat)"
|
|
2616
|
+
else
|
|
2617
|
+
_fail "missing slug error message lacked 'task-slug is required': $(cat "$TMPDIR_EVAL/wi-no-slug.out")"
|
|
2618
|
+
fi
|
|
2619
|
+
|
|
2620
|
+
|
|
2106
2621
|
if [[ "$errors" -eq 0 ]]; then
|
|
2107
2622
|
echo "Workflow sidecar writer integration passed."
|
|
2108
2623
|
exit 0
|