npm - @kontourai/flow-agents - Versions diffs - 1.3.0 → 2.0.0 - Mend

@kontourai/flow-agents 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (214) hide show

package/.github/CODEOWNERS +29 -0
package/.github/actions/trust-verify/action.yml +145 -0
package/.github/workflows/ci.yml +11 -4
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +10 -2
package/.github/workflows/release-please.yml +1 -1
package/.github/workflows/trust-reconcile.yml +113 -0
package/AGENTS.md +13 -0
package/CHANGELOG.md +103 -0
package/CONTRIBUTING.md +4 -4
package/README.md +1 -0
package/agents/tool-planner.json +1 -1
package/build/src/cli/console-learning-projection.d.ts +1 -0
package/build/src/cli/effective-backlog-settings.d.ts +1 -0
package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
package/build/src/cli/init.d.ts +17 -0
package/build/src/cli/init.js +242 -20
package/build/src/cli/kit.d.ts +1 -0
package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
package/build/src/cli/publish-change-helper.d.ts +1 -0
package/build/src/cli/pull-work-provider.d.ts +1 -0
package/build/src/cli/runtime-adapter.d.ts +1 -0
package/build/src/cli/telemetry-doctor.d.ts +1 -0
package/build/src/cli/usage-feedback.d.ts +1 -0
package/build/src/cli/utterance-check.d.ts +1 -0
package/build/src/cli/validate-hook-influence.d.ts +1 -0
package/build/src/cli/validate-source-tree.d.ts +1 -0
package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
package/build/src/cli/validate-workflow-artifacts.js +19 -2
package/build/src/cli/verify.d.ts +1 -0
package/build/src/cli/verify.js +90 -0
package/build/src/cli/veritas-governance.d.ts +1 -0
package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
package/build/src/cli/workflow-sidecar.d.ts +324 -0
package/build/src/cli/workflow-sidecar.js +1973 -90
package/build/src/cli.d.ts +2 -0
package/build/src/cli.js +2 -3
package/build/src/flow-kit/validate.d.ts +81 -0
package/build/src/index.d.ts +5 -0
package/build/src/index.js +36 -0
package/build/src/lib/args.d.ts +8 -0
package/build/src/lib/flow-resolver.d.ts +82 -0
package/build/src/lib/flow-resolver.js +237 -0
package/build/src/lib/fs.d.ts +7 -0
package/build/src/lib/workflow-learning-projection.d.ts +132 -0
package/build/src/runtime-adapters.d.ts +18 -0
package/build/src/tools/build-universal-bundles.d.ts +2 -0
package/build/src/tools/build-universal-bundles.js +34 -22
package/build/src/tools/common.d.ts +9 -0
package/build/src/tools/generate-context-map.d.ts +2 -0
package/build/src/tools/generate-context-map.js +3 -16
package/build/src/tools/validate-package.d.ts +2 -0
package/build/src/tools/validate-source-tree.d.ts +2 -0
package/build/src/tools/validate-source-tree.js +42 -162
package/context/contracts/artifact-contract.md +10 -0
package/context/contracts/delivery-contract.md +1 -0
package/context/contracts/review-contract.md +1 -0
package/context/contracts/verification-contract.md +2 -0
package/context/gate-awareness.md +39 -0
package/context/scripts/hooks/stop-goal-fit.js +632 -70
package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
package/docs/adr/0007-skill-audit.md +1 -1
package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
package/docs/adr/0011-mcp-posture.md +100 -0
package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
package/docs/adr/0013-context-lifecycle.md +151 -0
package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
package/docs/adr/0016-three-hard-boundary-model.md +71 -0
package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
package/docs/agent-system-guidebook.md +5 -12
package/docs/context-map.md +4 -10
package/docs/developer-architecture.md +14 -0
package/docs/index.md +3 -2
package/docs/integrations/framework-adapter.md +19 -6
package/docs/integrations/index.md +2 -2
package/docs/north-star.md +4 -4
package/docs/operating-layers.md +3 -3
package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
package/docs/repository-structure.md +2 -2
package/docs/skills-map.md +1 -0
package/docs/spec/runtime-hook-surface.md +78 -10
package/docs/standards-register.md +3 -3
package/docs/survey-utterance-check.md +1 -1
package/docs/trust-anchor-adoption.md +197 -0
package/docs/verifiable-trust.md +95 -0
package/docs/veritas-integration.md +2 -2
package/docs/workflow-usage-guide.md +69 -0
package/evals/acceptance/DEMO-false-completion.md +144 -0
package/evals/acceptance/demo-cast.sh +92 -0
package/evals/acceptance/demo-false-completion.sh +72 -0
package/evals/acceptance/demo-real-evidence.sh +104 -0
package/evals/acceptance/demo.tape +29 -0
package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
package/evals/acceptance/prove-capture-teeth.sh +114 -0
package/evals/acceptance/prove-teeth.sh +105 -0
package/evals/ci/antigaming-suite.sh +54 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
package/evals/integration/test_builder_step_producers.sh +379 -0
package/evals/integration/test_bundle_install.sh +35 -71
package/evals/integration/test_bundle_lifecycle.sh +39 -2
package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
package/evals/integration/test_checkpoint_signing.sh +489 -0
package/evals/integration/test_claim_lookup.sh +352 -0
package/evals/integration/test_command_log_integrity.sh +275 -0
package/evals/integration/test_context_map.sh +0 -2
package/evals/integration/test_dual_emit_flow_step.sh +278 -0
package/evals/integration/test_enforcer_expects_driven.sh +281 -0
package/evals/integration/test_evidence_capture_hook.sh +185 -0
package/evals/integration/test_flow_kit_repository.sh +2 -0
package/evals/integration/test_flowdef_session_activation.sh +273 -0
package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
package/evals/integration/test_gate_bypass_chain.sh +448 -0
package/evals/integration/test_gate_lockdown.sh +1137 -0
package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
package/evals/integration/test_goal_fit_hook.sh +69 -4
package/evals/integration/test_goal_fit_rederive.sh +263 -0
package/evals/integration/test_hook_category_behaviors.sh +14 -0
package/evals/integration/test_install_merge.sh +1176 -0
package/evals/integration/test_mint_attestation.sh +373 -0
package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
package/evals/integration/test_publish_delivery.sh +269 -0
package/evals/integration/test_reconcile_soundness.sh +528 -0
package/evals/integration/test_resolvefirststep_security.sh +208 -0
package/evals/integration/test_session_resume_roundtrip.sh +286 -0
package/evals/integration/test_trust_checkpoint.sh +325 -0
package/evals/integration/test_trust_reconcile.sh +293 -0
package/evals/integration/test_verify_cli.sh +208 -0
package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
package/evals/lib/node.sh +0 -6
package/evals/run.sh +47 -0
package/evals/static/test_library_exports.sh +85 -0
package/evals/static/test_universal_bundles.sh +15 -0
package/evals/static/test_workflow_skills.sh +6 -13
package/install.sh +0 -7
package/integrations/strands-ts/README.md +25 -15
package/integrations/veritas/flow-agents.adapter.json +1 -2
package/kits/builder/flows/build.flow.json +59 -12
package/kits/builder/kit.json +85 -15
package/kits/builder/skills/continue-work/SKILL.md +116 -0
package/kits/builder/skills/deliver/SKILL.md +36 -6
package/kits/builder/skills/design-probe/SKILL.md +28 -0
package/kits/builder/skills/execute-plan/SKILL.md +9 -1
package/kits/builder/skills/gate-review/SKILL.md +234 -0
package/kits/builder/skills/learning-review/SKILL.md +30 -0
package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
package/kits/builder/skills/plan-work/SKILL.md +13 -1
package/kits/builder/skills/pull-work/SKILL.md +19 -0
package/kits/knowledge/adapters/default-store/index.js +38 -0
package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
package/kits/knowledge/docs/store-contract.md +314 -0
package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
package/kits/knowledge/evals/entities/suite.test.js +40 -0
package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
package/kits/knowledge/evals/retirement/suite.test.js +145 -0
package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
package/kits/knowledge/kit.json +51 -1
package/package.json +13 -4
package/packaging/conformance/README.md +10 -2
package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
package/packaging/conformance/run-conformance.js +1 -1
package/scripts/README.md +2 -1
package/scripts/build-universal-bundles.js +0 -1
package/scripts/ci/mint-attestation.js +221 -0
package/scripts/ci/trust-reconcile.js +545 -0
package/scripts/hooks/config-protection.js +423 -1
package/scripts/hooks/evidence-capture.js +348 -0
package/scripts/hooks/lib/liveness-read.js +113 -0
package/scripts/hooks/run-hook.js +6 -1
package/scripts/hooks/stop-goal-fit.js +1471 -79
package/scripts/hooks/workflow-steering.js +135 -5
package/scripts/install-codex-home.sh +39 -0
package/scripts/install-merge.js +330 -0
package/src/cli/init.ts +218 -20
package/src/cli/validate-workflow-artifacts.ts +18 -2
package/src/cli/verify.ts +100 -0
package/src/cli/workflow-sidecar.ts +2093 -84
package/src/cli.ts +2 -3
package/src/index.ts +53 -0
package/src/lib/flow-resolver.ts +284 -0
package/src/tools/build-universal-bundles.ts +34 -21
package/src/tools/generate-context-map.ts +3 -17
package/src/tools/validate-source-tree.ts +44 -104
package/tsconfig.json +1 -0
package/build/src/tools/filter-installed-packs.js +0 -135
package/packaging/packs.json +0 -49
package/scripts/filter-installed-packs.js +0 -2
package/src/tools/filter-installed-packs.ts +0 -132

package/evals/integration/test_claim_lookup.sh ADDED Viewed

@@ -0,0 +1,352 @@
+#!/usr/bin/env bash
+# test_claim_lookup.sh — Integration tests for the `claim` subcommand (#162).
+#
+# Verifies:
+#   AC1: status + value + failing evidence (with execution block) + policy + derivation drilldown
+#   AC1: --json flag emits structured ClaimExplanation object
+#   AC1: unknown claim id exits 1 with clear error listing available ids
+#   AC1: missing bundle exits 1 with clear error
+#   AC3: gate-hint in stop-goal-fit.js disputed warning contains workflow:sidecar -- claim
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+TMPDIR_EVAL="$(mktemp -d)"
+errors=0
+cleanup() { rm -rf "$TMPDIR_EVAL"; }
+trap cleanup EXIT
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+echo "=== Claim Lookup Tests (issue #162) ==="
+# ── helpers ──────────────────────────────────────────────────────────────────
+jq_node() {
+  local file="$1"; local expr="$2"
+  node -e "
+const d=JSON.parse(require('fs').readFileSync('${file}','utf8'));
+const r=(${expr})(d);
+if(r===undefined||r===null){process.exit(2);}
+if(typeof r==='boolean'||typeof r==='number'||typeof r==='string'){
+  process.stdout.write(String(r)+'\n');
+}else{
+  process.stdout.write(JSON.stringify(r)+'\n');
+}"
+}
+# Seed a trust.bundle with a DISPUTED claim including a failing execution block and a policy.
+seed_disputed_bundle() {
+  local dir="$1" slug="$2"
+  local ts="2026-06-25T00:00:00Z"
+  local claimId="${slug}/unit-tests.flow-agents.workflow.unit tests pass"
+  mkdir -p "$dir"
+  cat > "$dir/trust.bundle" <<JSON
+{
+  "schemaVersion": 3,
+  "source": "claim-lookup-test;statusFunctionVersion=1",
+  "claims": [
+    {
+      "id": "$claimId",
+      "subjectType": "workflow-check",
+      "subjectId": "${slug}/unit-tests",
+      "surface": "flow-agents.workflow",
+      "claimType": "workflow.check.test",
+      "fieldOrBehavior": "unit tests pass",
+      "value": "fail",
+      "status": "disputed",
+      "impactLevel": "high",
+      "verificationPolicyId": "policy:workflow.check.test",
+      "createdAt": "$ts",
+      "updatedAt": "$ts"
+    }
+  ],
+  "evidence": [
+    {
+      "id": "ev:${claimId}",
+      "claimId": "${claimId}",
+      "evidenceType": "test_output",
+      "label": "npm test output",
+      "method": "validation",
+      "excerptOrSummary": "8 tests failed",
+      "status": "disputed",
+      "execution": {
+        "runner": "npm test",
+        "label": "npm test",
+        "isError": true,
+        "exitCode": 1
+      },
+      "sourceRef": "command-log.jsonl",
+      "createdAt": "$ts"
+    }
+  ],
+  "events": [
+    {
+      "id": "evt:${claimId}",
+      "claimId": "${claimId}",
+      "status": "disputed",
+      "actor": "test",
+      "method": "validation",
+      "evidenceIds": ["ev:${claimId}"],
+      "createdAt": "$ts",
+      "verifiedAt": "$ts"
+    }
+  ],
+  "policies": [
+    {
+      "id": "policy:workflow.check.test",
+      "claimType": "workflow.check.test",
+      "requiredEvidence": ["test_output"],
+      "requiredMethods": ["validation"],
+      "acceptanceCriteria": ["A verified verification event must support a workflow.check.test claim."],
+      "reviewAuthority": "system",
+      "validityRule": { "kind": "manual" },
+      "stalenessTriggers": [],
+      "conflictRules": [],
+      "impactLevel": "high"
+    }
+  ]
+}
+JSON
+}
+# ── Test 1: AC1 — text output has status + value + evidence + policy + drilldown ──
+echo ""
+echo "── Test 1: text output (status + evidence + policy + drilldown) ──"
+AC1_DIR="$TMPDIR_EVAL/ac1"
+AC1_SLUG="claim-lookup-ac1"
+seed_disputed_bundle "$AC1_DIR" "$AC1_SLUG"
+AC1_CLAIM_ID="${AC1_SLUG}/unit-tests.flow-agents.workflow.unit tests pass"
+AC1_OUT="$TMPDIR_EVAL/ac1.out"
+if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" >"$AC1_OUT" 2>&1; then
+  _pass "AC1: claim command exits 0 for known disputed claim"
+else
+  _fail "AC1: claim command failed: $(cat "$AC1_OUT")"
+fi
+if grep -q "Status: disputed" "$AC1_OUT"; then
+  _pass "AC1: output contains derived status (disputed)"
+else
+  _fail "AC1: output missing derived status: $(head -3 "$AC1_OUT")"
+fi
+if grep -q "Value: fail" "$AC1_OUT"; then
+  _pass "AC1: output contains raw value"
+else
+  _fail "AC1: output missing value"
+fi
+if grep -q "exitCode: 1" "$AC1_OUT" && grep -q "isError: true" "$AC1_OUT"; then
+  _pass "AC1: failing evidence execution block shown (exitCode + isError)"
+else
+  _fail "AC1: execution block missing from evidence output: $(grep -i "exitCode\|isError\|Evidence" "$AC1_OUT" || echo '(not found)')"
+fi
+if grep -q "Governing Policy (policy:workflow.check.test)" "$AC1_OUT"; then
+  _pass "AC1: governing policy section present"
+else
+  _fail "AC1: governing policy section missing"
+fi
+if grep -q "requiredEvidence:" "$AC1_OUT" && grep -q "acceptanceCriteria:" "$AC1_OUT" && grep -q "reviewAuthority:" "$AC1_OUT"; then
+  _pass "AC1: policy fields (requiredEvidence, acceptanceCriteria, reviewAuthority) present"
+else
+  _fail "AC1: policy fields incomplete: $(grep -E "required|acceptance|review" "$AC1_OUT" || echo '(not found)')"
+fi
+if grep -q "Derivation Drilldown:" "$AC1_OUT"; then
+  _pass "AC1: derivation drilldown section present"
+else
+  _fail "AC1: derivation drilldown section missing"
+fi
+# ── Test 2: AC1 — --json flag emits structured ClaimExplanation ──
+echo ""
+echo "── Test 2: --json flag emits structured ClaimExplanation object ──"
+AC2_JSON="$TMPDIR_EVAL/ac1.json"
+if flow_agents_node workflow-sidecar claim "$AC1_CLAIM_ID" "$AC1_DIR" --json >"$AC2_JSON" 2>&1; then
+  _pass "AC2: --json exits 0"
+else
+  _fail "AC2: --json failed: $(cat "$AC2_JSON")"
+fi
+# Validate JSON structure
+FOUND="$(jq_node "$AC2_JSON" 'd => d.found' 2>/dev/null || echo '')"
+STATUS="$(jq_node "$AC2_JSON" 'd => d.status' 2>/dev/null || echo '')"
+VALUE="$(jq_node "$AC2_JSON" 'd => d.value' 2>/dev/null || echo '')"
+HAS_POLICY="$(jq_node "$AC2_JSON" 'd => d.policy !== null && d.policy.id !== undefined' 2>/dev/null || echo '')"
+EVIDENCE_LEN="$(jq_node "$AC2_JSON" 'd => d.evidence.length' 2>/dev/null || echo '')"
+EXEC_EXITCODE="$(jq_node "$AC2_JSON" 'd => d.evidence[0] && d.evidence[0].execution && d.evidence[0].execution.exitCode' 2>/dev/null || echo '')"
+HAS_WHY="$(jq_node "$AC2_JSON" 'd => typeof d.why === "object" && d.why !== null' 2>/dev/null || echo '')"
+[[ "$FOUND" == "true" ]] && _pass "AC2: found=true in JSON" || _fail "AC2: expected found=true, got '$FOUND'"
+[[ "$STATUS" == "disputed" ]] && _pass "AC2: status=disputed in JSON" || _fail "AC2: expected status=disputed, got '$STATUS'"
+[[ "$VALUE" == "fail" ]] && _pass "AC2: value=fail in JSON" || _fail "AC2: expected value=fail, got '$VALUE'"
+[[ "$HAS_POLICY" == "true" ]] && _pass "AC2: policy object present in JSON" || _fail "AC2: policy missing: $HAS_POLICY"
+[[ "$EVIDENCE_LEN" == "1" ]] && _pass "AC2: evidence array has 1 item" || _fail "AC2: expected 1 evidence item, got '$EVIDENCE_LEN'"
+[[ "$EXEC_EXITCODE" == "1" ]] && _pass "AC2: evidence[0].execution.exitCode=1 in JSON" || _fail "AC2: expected exitCode=1, got '$EXEC_EXITCODE'"
+[[ "$HAS_WHY" == "true" ]] && _pass "AC2: why object present in JSON" || _fail "AC2: why object missing"
+# ── Test 3: AC1 — unknown id exits 1 with clear error listing available ids ──
+echo ""
+echo "── Test 3: unknown claim id → clear error + list of available ids ──"
+AC3_OUT="$TMPDIR_EVAL/ac3.out"
+if flow_agents_node workflow-sidecar claim "nonexistent-claim-id" "$AC1_DIR" >"$AC3_OUT" 2>&1; then
+  _fail "AC3: expected exit 1 for unknown claim id but got 0"
+else
+  _pass "AC3: exits 1 for unknown claim id"
+fi
+if grep -q "unknown claim id: nonexistent-claim-id" "$AC3_OUT"; then
+  _pass "AC3: error message names the unknown id"
+else
+  _fail "AC3: error message missing id: $(cat "$AC3_OUT")"
+fi
+if grep -q "Available claim ids" "$AC3_OUT"; then
+  _pass "AC3: error lists available claim ids"
+else
+  _fail "AC3: error does not list available ids: $(cat "$AC3_OUT")"
+fi
+# ── Test 4: AC1 — missing bundle exits 1 ──
+echo ""
+echo "── Test 4: missing bundle → clear error ──"
+AC4_OUT="$TMPDIR_EVAL/ac4.out"
+if flow_agents_node workflow-sidecar claim "any-id" "$TMPDIR_EVAL/nonexistent" >"$AC4_OUT" 2>&1; then
+  _fail "AC4: expected exit 1 for missing bundle but got 0"
+else
+  _pass "AC4: exits 1 for missing bundle"
+fi
+if grep -q "no trust.bundle at" "$AC4_OUT"; then
+  _pass "AC4: error message mentions missing trust.bundle"
+else
+  _fail "AC4: error message missing: $(cat "$AC4_OUT")"
+fi
+# ── Test 5: AC3 — gate-hint in stop-goal-fit.js warning ──
+# Use a bundle with an acceptance criterion claim (not a check claim) so the
+# bundleEnforcement warning is not deduplicated by captureCrossReference.
+# FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
+echo ""
+echo "── Test 5: gate-hint appears in stop-goal-fit.js disputed warning ──"
+AC5_PROJ="$TMPDIR_EVAL/gate-hint-proj"
+AC5_SLUG="gate-hint-test"
+AC5_DIR="$AC5_PROJ/.flow-agents/$AC5_SLUG"
+mkdir -p "$AC5_DIR"
+# Write a minimal bundle with a disputed acceptance criterion claim.
+# Using workflow.acceptance.criterion (not workflow.check.*) so the subjectId
+# won't match any evidence check id and bundleEnforcement won't be deduped.
+cat > "$AC5_DIR/trust.bundle" <<'BUNDLE'
+{
+  "schemaVersion": 3,
+  "source": "claim-lookup-test",
+  "claims": [
+    {
+      "id": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
+      "subjectType": "workflow-criterion",
+      "subjectId": "gate-hint-test/AC1",
+      "surface": "flow-agents.workflow",
+      "claimType": "workflow.acceptance.criterion",
+      "fieldOrBehavior": "acceptance criterion verified",
+      "value": "fail",
+      "status": "disputed",
+      "impactLevel": "high",
+      "verificationPolicyId": "policy:workflow.acceptance.criterion",
+      "createdAt": "2026-06-25T00:00:00Z",
+      "updatedAt": "2026-06-25T00:00:00Z"
+    }
+  ],
+  "evidence": [],
+  "events": [
+    {
+      "id": "evt:gate-hint-test/AC1",
+      "claimId": "gate-hint-test/AC1.flow-agents.workflow.acceptance criterion verified",
+      "status": "disputed",
+      "actor": "test",
+      "method": "validation",
+      "evidenceIds": [],
+      "createdAt": "2026-06-25T00:00:00Z",
+      "verifiedAt": "2026-06-25T00:00:00Z"
+    }
+  ],
+  "policies": [
+    {
+      "id": "policy:workflow.acceptance.criterion",
+      "claimType": "workflow.acceptance.criterion",
+      "requiredEvidence": ["human_attestation"],
+      "acceptanceCriteria": ["A criterion must have a verified event."],
+      "reviewAuthority": "system",
+      "validityRule": { "kind": "manual" },
+      "stalenessTriggers": [],
+      "conflictRules": [],
+      "impactLevel": "high"
+    }
+  ]
+}
+BUNDLE
+cat > "$AC5_DIR/state.json" <<'JSON'
+{"schema_version":"1.0","task_slug":"gate-hint-test","status":"delivered","phase":"done","updated_at":"2026-06-25T00:00:00Z","next_action":{"status":"done","summary":"done"}}
+JSON
+cat > "$AC5_DIR/gate-hint-test--deliver.md" <<'MD'
+# Gate Hint Test
+branch: main
+status: delivered
+type: deliver
+## Definition Of Done
+- [x] all tests pass
+## Goal Fit Gate
+- [x] criteria verified
+### Verdict: PASS
+MD
+AC5_OUT="$TMPDIR_EVAL/ac5.out"
+# FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip prevents backstop re-runs for hermeticity.
+printf '{"hook_event_name":"Stop","cwd":"%s"}' "$AC5_PROJ" \
+  | FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$AC5_OUT" 2>&1 || true
+if grep -q "workflow:sidecar -- claim" "$AC5_OUT"; then
+  _pass "AC5: gate-hint 'workflow:sidecar -- claim' appears in stop-goal-fit output"
+else
+  _fail "AC5: gate-hint missing from stop-goal-fit output: $(cat "$AC5_OUT")"
+fi
+if grep -q "trust.bundle claim disputed" "$AC5_OUT"; then
+  _pass "AC5: disputed warning present in stop-goal-fit output"
+else
+  _fail "AC5: disputed warning missing: $(cat "$AC5_OUT")"
+fi
+# ── Results ──────────────────────────────────────────────────────────────────
+echo ""
+echo "──────────────────────────────────"
+echo "claim lookup tests: $((errors)) failed"
+if [[ "$errors" -eq 0 ]]; then
+  echo "ALL PASSED"
+  exit 0
+else
+  exit 1
+fi

package/evals/integration/test_command_log_integrity.sh ADDED Viewed

@@ -0,0 +1,275 @@
+#!/usr/bin/env bash
+# test_command_log_integrity.sh — Hash-chain tamper-evidence for command-log.jsonl (Increment B2)
+#
+# Proves that command-log.jsonl's hash-chain makes alteration DETECTABLE:
+#   1. Altered entry (flip exitCode/observedResult without fixing hash) → broken → gate blocks.
+#   2. Removed/reordered entry → linkage breaks → broken → gate blocks.
+#   3. Legit untampered chain → ok → normal behavior; genuine fail still caught.
+#   4. Legacy unchained log (pre-B2) → legacy → existing behavior unchanged.
+#
+# HONEST FRAMING (in comments and code): this is tamper-EVIDENCE, not tamper-PROOF.
+# An agent that recomputes the whole chain can forge a valid chain. The real
+# tamper-proof boundary is the signed checkpoint (B1, already merged). This chain
+# raises the local bar and catches casual/accidental tampering and corruption.
+#
+# Usage: bash evals/integration/test_command_log_integrity.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+CAPTURE="$ROOT/scripts/hooks/evidence-capture.js"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+# ── helper: seed a minimal delivered workflow artifact ────────────────────────
+seed_repo() { # $1=dir $2=slug
+  local p="$1" slug="$2"
+  mkdir -p "$p/.flow-agents/$slug"
+  printf '# Repo\n' > "$p/AGENTS.md"
+  printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
+    > "$p/.flow-agents/$slug/state.json"
+  cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
+# $slug
+branch: main
+status: delivered
+type: deliver
+## Definition Of Done
+- [x] tests pass
+## Goal Fit Gate
+- [x] acceptance verified
+### Verdict: PASS
+MD
+}
+# Write two chained entries to command-log.jsonl via evidence-capture.js.
+# Returns the log file path.
+write_chained_log() { # $1=repo_dir $2=slug
+  local p="$1" slug="$2"
+  # Entry 0: npm test passes
+  printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0,"stdout":"ok"}}' "$p" \
+    | node "$CAPTURE" >/dev/null 2>&1
+  # Entry 1: npm run lint FAILS
+  printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run lint"},"tool_response":{"exitCode":1,"stderr":"lint errors"}}' "$p" \
+    | node "$CAPTURE" >/dev/null 2>&1
+}
+# ─── Test 1: altered entry detected (flip exitCode/observedResult, keep old hash) ──────
+echo "Test 1: altered entry (flip fail→pass without fixing hash) → broken → gate blocks"
+T1="$TMP/t1"; seed_repo "$T1" t1
+write_chained_log "$T1" t1
+LOG="$T1/.flow-agents/t1/command-log.jsonl"
+if [[ -f "$LOG" ]]; then _pass "T1: command-log.jsonl written"; else _fail "T1: command-log.jsonl missing"; fi
+# Verify clean chain (before tamper)
+chain_status=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T1/.flow-agents/t1'); console.log(r.status);")
+if [[ "$chain_status" == "ok" ]]; then
+  _pass "T1: untampered chain verifies as ok"
+else
+  _fail "T1: expected ok, got $chain_status"
+fi
+# Tamper: flip entry 1 (lint, FAIL) to look like a PASS — change exitCode and observedResult
+# but do NOT update _chain.hash → chain is broken.
+python3 - "$LOG" << 'PY'
+import json, sys
+lines = open(sys.argv[1]).read().strip().split('\n')
+e1 = json.loads(lines[1])
+e1['exitCode'] = 0          # hide the failure
+e1['observedResult'] = 'pass'  # claim it passed
+# _chain.hash is NOT updated — deliberate, this is the tamper
+lines[1] = json.dumps(e1)
+open(sys.argv[1], 'w').write('\n'.join(lines) + '\n')
+PY
+# Verify broken chain
+chain_after=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T1/.flow-agents/t1'); console.log(r.status + ':' + r.brokenAt);")
+if [[ "$chain_after" == "broken:1" ]]; then
+  _pass "T1: tampered entry detected → broken at entry 1"
+else
+  _fail "T1: expected broken:1, got $chain_after"
+fi
+# Seed evidence.json claiming npm test passed (the untampered entry)
+# The tampered entry (lint) was a FAIL flipped to PASS — so the log now shows a false pass.
+# Since chain is broken, gate should block with integrity warning and NOT trust log passes.
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"npm-test","kind":"command","status":"pass","command":"npm test","summary":"passed"}]}' \
+  > "$T1/.flow-agents/t1/evidence.json"
+set +e
+gate_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+  node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T1\"}")
+gate_exit=$?
+set -e
+if [[ "$gate_exit" -eq 2 ]]; then
+  _pass "T1: gate blocks (exit 2) when chain is broken"
+else
+  _fail "T1: gate should block on broken chain, exit=$gate_exit output=$gate_out"
+fi
+if echo "$gate_out" | grep -q "command-log integrity check FAILED"; then
+  _pass "T1: gate emits integrity-failure warning"
+else
+  _fail "T1: missing integrity-failure warning: $gate_out"
+fi
+if echo "$gate_out" | grep -q "NOT trusted"; then
+  _pass "T1: gate emits 'NOT trusted' signal for claimed passes"
+else
+  _fail "T1: missing NOT trusted signal: $gate_out"
+fi
+# ─── Test 2: removed/reordered entry detected ─────────────────────────────────────
+echo ""
+echo "Test 2: removed/reordered entry → linkage breaks → broken → gate flags it"
+T2="$TMP/t2"; seed_repo "$T2" t2
+write_chained_log "$T2" t2
+LOG2="$T2/.flow-agents/t2/command-log.jsonl"
+lines_before=$(wc -l < "$LOG2" | tr -d ' ')
+# Reorder: swap entry 0 and entry 1
+python3 - "$LOG2" << 'PY'
+import sys
+lines = open(sys.argv[1]).read().strip().split('\n')
+# swap
+lines[0], lines[1] = lines[1], lines[0]
+open(sys.argv[1], 'w').write('\n'.join(lines) + '\n')
+PY
+chain_reorder=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T2/.flow-agents/t2'); console.log(r.status);")
+if [[ "$chain_reorder" == "broken" ]]; then
+  _pass "T2: reordered entries detected → broken"
+else
+  _fail "T2: expected broken on reorder, got $chain_reorder"
+fi
+# Test: delete middle entry (restore then delete entry 0 so entry 1's prevHash is wrong)
+write_chained_log "$T2" t2  # re-append fresh entries (now 4 total — but that's fine for test)
+# Write a fresh log with just 2 entries and then delete the first
+LOG2_FRESH="$T2/.flow-agents/t2/command-log.jsonl"
+python3 - "$LOG2_FRESH" << 'PY'
+import sys
+lines = [l for l in open(sys.argv[1]).read().strip().split('\n') if l.strip()]
+# Keep only the last 2 entries (fresh from second write_chained_log call above)
+last2 = lines[-2:]
+# Delete entry[0] of the last2 → only entry[1] remains, whose prevHash won't match genesis
+open(sys.argv[1], 'w').write(last2[1] + '\n')
+PY
+chain_delete=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T2/.flow-agents/t2'); console.log(r.status);")
+if [[ "$chain_delete" == "broken" ]]; then
+  _pass "T2: removed predecessor entry detected → broken (prevHash mismatch)"
+else
+  _fail "T2: expected broken on removed predecessor, got $chain_delete"
+fi
+# ─── Test 3: legit untampered chain — ok — genuine fail still caught ─────────────────
+echo ""
+echo "Test 3: legit untampered chain → ok → genuine fail still caught (capture-teeth)"
+T3="$TMP/t3"; seed_repo "$T3" t3
+# Write entry 0 (pass) and entry 1 (fail)
+printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0}}' "$T3" \
+  | node "$CAPTURE" >/dev/null 2>&1
+printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run build"},"tool_response":{"exitCode":1}}' "$T3" \
+  | node "$CAPTURE" >/dev/null 2>&1
+chain_legit=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T3/.flow-agents/t3'); console.log(r.status);")
+if [[ "$chain_legit" == "ok" ]]; then
+  _pass "T3: untampered chained log verifies ok"
+else
+  _fail "T3: expected ok, got $chain_legit"
+fi
+# Evidence claims npm run build passed (it actually failed → capture log shows fail → block)
+printf '%s' '{"schema_version":"1.0","task_slug":"t3","verdict":"pass","checks":[{"id":"build","kind":"command","status":"pass","command":"npm run build","summary":"build passed"}]}' \
+  > "$T3/.flow-agents/t3/evidence.json"
+set +e
+gate3_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+  node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3\"}")
+gate3_exit=$?
+set -e
+if [[ "$gate3_exit" -eq 2 ]]; then
+  _pass "T3: gate blocks on genuine fail caught by capture log (ok chain, capture teeth active)"
+else
+  _fail "T3: gate should block on captured fail, exit=$gate3_exit output=$gate3_out"
+fi
+if echo "$gate3_out" | grep -q "capture log CONTRADICTS claimed pass"; then
+  _pass "T3: gate emits capture-log contradicts warning (genuine fail caught)"
+else
+  _fail "T3: missing capture-log contradicts warning: $gate3_out"
+fi
+if ! echo "$gate3_out" | grep -q "command-log integrity check FAILED"; then
+  _pass "T3: no false integrity-failure warning for untampered chain"
+else
+  _fail "T3: spurious integrity-failure warning emitted: $gate3_out"
+fi
+# ─── Test 4: backward-compat — legacy unchained log → legacy → existing behavior ────
+echo ""
+echo "Test 4: legacy unchained log (no _chain) → legacy → existing behavior unchanged"
+T4="$TMP/t4"; seed_repo "$T4" t4
+# Write a legacy-style log (no _chain field) — exactly like pre-B2 fixtures
+printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' \
+  > "$T4/.flow-agents/t4/command-log.jsonl"
+chain_legacy=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T4/.flow-agents/t4'); console.log(r.status);")
+if [[ "$chain_legacy" == "legacy" ]]; then
+  _pass "T4: unchained (legacy) log returns legacy status"
+else
+  _fail "T4: expected legacy, got $chain_legacy"
+fi
+# Evidence claims npm test passed, but legacy log shows it failed → still blocks
+printf '%s' '{"schema_version":"1.0","task_slug":"t4","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"passed"}]}' \
+  > "$T4/.flow-agents/t4/evidence.json"
+set +e
+gate4_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+  node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T4\"}")
+gate4_exit=$?
+set -e
+if [[ "$gate4_exit" -eq 2 ]] && echo "$gate4_out" | grep -q "capture log CONTRADICTS"; then
+  _pass "T4: legacy log still catches false-completion (existing behavior preserved)"
+else
+  _fail "T4: legacy log failed to catch false-completion: exit=$gate4_exit output=$gate4_out"
+fi
+if ! echo "$gate4_out" | grep -q "command-log integrity check FAILED"; then
+  _pass "T4: no integrity-failure warning for legacy (unchained) log"
+else
+  _fail "T4: spurious integrity warning for legacy log: $gate4_out"
+fi
+# ─── Summary ─────────────────────────────────────────────────────────────────
+echo ""
+if [[ "$errors" -eq 0 ]]; then
+  echo "command-log integrity tests passed."
+  exit 0
+fi
+echo "command-log integrity tests FAILED: $errors issue(s)."
+exit 1

package/evals/integration/test_context_map.sh CHANGED Viewed

@@ -38,10 +38,8 @@ for expected in \
   'Support Skills' \
   'Agents' \
   'Optional Powers' \
-  'Packs' \
   'Context Loading Rules' \
   'npm run context-map:check' \
-  'packaging/packs.json' \
   'workflow-release.schema.json' \
   'workflow-learning.schema.json' \
   'plan-work' \