npm - @kontourai/flow-agents - Versions diffs - 1.4.0 → 2.0.0 - Mend

@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

package/.github/CODEOWNERS +29 -0
package/.github/actions/trust-verify/action.yml +145 -0
package/.github/workflows/ci.yml +11 -4
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +10 -2
package/.github/workflows/release-please.yml +1 -1
package/.github/workflows/trust-reconcile.yml +113 -0
package/AGENTS.md +13 -0
package/CHANGELOG.md +95 -0
package/CONTRIBUTING.md +4 -4
package/README.md +1 -0
package/agents/tool-planner.json +1 -1
package/build/src/cli/init.js +242 -20
package/build/src/cli/validate-workflow-artifacts.js +19 -2
package/build/src/cli/verify.d.ts +1 -0
package/build/src/cli/verify.js +90 -0
package/build/src/cli/workflow-sidecar.d.ts +300 -8
package/build/src/cli/workflow-sidecar.js +1934 -83
package/build/src/cli.js +2 -3
package/build/src/lib/flow-resolver.d.ts +82 -0
package/build/src/lib/flow-resolver.js +237 -0
package/build/src/tools/build-universal-bundles.js +34 -22
package/build/src/tools/generate-context-map.js +3 -16
package/build/src/tools/validate-source-tree.d.ts +1 -1
package/build/src/tools/validate-source-tree.js +42 -162
package/context/contracts/artifact-contract.md +10 -0
package/context/contracts/delivery-contract.md +1 -0
package/context/contracts/review-contract.md +1 -0
package/context/contracts/verification-contract.md +2 -0
package/context/gate-awareness.md +39 -0
package/context/scripts/hooks/stop-goal-fit.js +632 -70
package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
package/docs/adr/0007-skill-audit.md +1 -1
package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
package/docs/adr/0011-mcp-posture.md +100 -0
package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
package/docs/adr/0013-context-lifecycle.md +151 -0
package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
package/docs/adr/0016-three-hard-boundary-model.md +71 -0
package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
package/docs/agent-system-guidebook.md +5 -12
package/docs/context-map.md +4 -10
package/docs/index.md +3 -2
package/docs/integrations/framework-adapter.md +19 -6
package/docs/integrations/index.md +2 -2
package/docs/north-star.md +4 -4
package/docs/operating-layers.md +3 -3
package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
package/docs/repository-structure.md +2 -2
package/docs/skills-map.md +1 -0
package/docs/spec/runtime-hook-surface.md +62 -9
package/docs/standards-register.md +3 -3
package/docs/survey-utterance-check.md +1 -1
package/docs/trust-anchor-adoption.md +197 -0
package/docs/verifiable-trust.md +95 -0
package/docs/veritas-integration.md +2 -2
package/docs/workflow-usage-guide.md +69 -0
package/evals/acceptance/DEMO-false-completion.md +144 -0
package/evals/acceptance/demo-cast.sh +92 -0
package/evals/acceptance/demo-false-completion.sh +72 -0
package/evals/acceptance/demo-real-evidence.sh +104 -0
package/evals/acceptance/demo.tape +29 -0
package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
package/evals/acceptance/prove-capture-teeth.sh +114 -0
package/evals/acceptance/prove-teeth.sh +105 -0
package/evals/ci/antigaming-suite.sh +54 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
package/evals/integration/test_builder_step_producers.sh +379 -0
package/evals/integration/test_bundle_install.sh +35 -71
package/evals/integration/test_bundle_lifecycle.sh +39 -2
package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
package/evals/integration/test_checkpoint_signing.sh +489 -0
package/evals/integration/test_claim_lookup.sh +352 -0
package/evals/integration/test_command_log_integrity.sh +275 -0
package/evals/integration/test_context_map.sh +0 -2
package/evals/integration/test_dual_emit_flow_step.sh +278 -0
package/evals/integration/test_enforcer_expects_driven.sh +281 -0
package/evals/integration/test_evidence_capture_hook.sh +185 -0
package/evals/integration/test_flow_kit_repository.sh +2 -0
package/evals/integration/test_flowdef_session_activation.sh +273 -0
package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
package/evals/integration/test_gate_bypass_chain.sh +448 -0
package/evals/integration/test_gate_lockdown.sh +1137 -0
package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
package/evals/integration/test_goal_fit_hook.sh +69 -4
package/evals/integration/test_goal_fit_rederive.sh +263 -0
package/evals/integration/test_install_merge.sh +1176 -0
package/evals/integration/test_mint_attestation.sh +373 -0
package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
package/evals/integration/test_publish_delivery.sh +269 -0
package/evals/integration/test_reconcile_soundness.sh +528 -0
package/evals/integration/test_resolvefirststep_security.sh +208 -0
package/evals/integration/test_session_resume_roundtrip.sh +286 -0
package/evals/integration/test_trust_checkpoint.sh +325 -0
package/evals/integration/test_trust_reconcile.sh +293 -0
package/evals/integration/test_verify_cli.sh +208 -0
package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
package/evals/lib/node.sh +0 -6
package/evals/run.sh +45 -0
package/evals/static/test_workflow_skills.sh +6 -13
package/install.sh +0 -7
package/integrations/strands-ts/README.md +25 -15
package/integrations/veritas/flow-agents.adapter.json +1 -2
package/kits/builder/flows/build.flow.json +59 -12
package/kits/builder/kit.json +85 -15
package/kits/builder/skills/continue-work/SKILL.md +116 -0
package/kits/builder/skills/deliver/SKILL.md +36 -6
package/kits/builder/skills/design-probe/SKILL.md +28 -0
package/kits/builder/skills/execute-plan/SKILL.md +9 -1
package/kits/builder/skills/gate-review/SKILL.md +234 -0
package/kits/builder/skills/learning-review/SKILL.md +30 -0
package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
package/kits/builder/skills/plan-work/SKILL.md +13 -1
package/kits/builder/skills/pull-work/SKILL.md +19 -0
package/kits/knowledge/adapters/default-store/index.js +38 -0
package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
package/kits/knowledge/docs/store-contract.md +314 -0
package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
package/kits/knowledge/evals/entities/suite.test.js +40 -0
package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
package/kits/knowledge/evals/retirement/suite.test.js +145 -0
package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
package/kits/knowledge/kit.json +51 -1
package/package.json +4 -4
package/packaging/conformance/README.md +10 -2
package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
package/packaging/conformance/run-conformance.js +1 -1
package/scripts/README.md +2 -1
package/scripts/build-universal-bundles.js +0 -1
package/scripts/ci/mint-attestation.js +221 -0
package/scripts/ci/trust-reconcile.js +545 -0
package/scripts/hooks/config-protection.js +423 -1
package/scripts/hooks/evidence-capture.js +348 -0
package/scripts/hooks/lib/liveness-read.js +113 -0
package/scripts/hooks/run-hook.js +6 -1
package/scripts/hooks/stop-goal-fit.js +1471 -79
package/scripts/hooks/workflow-steering.js +135 -5
package/scripts/install-codex-home.sh +39 -0
package/scripts/install-merge.js +330 -0
package/src/cli/init.ts +218 -20
package/src/cli/validate-workflow-artifacts.ts +18 -2
package/src/cli/verify.ts +100 -0
package/src/cli/workflow-sidecar.ts +2064 -77
package/src/cli.ts +2 -3
package/src/lib/flow-resolver.ts +284 -0
package/src/tools/build-universal-bundles.ts +34 -21
package/src/tools/generate-context-map.ts +3 -17
package/src/tools/validate-source-tree.ts +44 -104
package/build/src/tools/filter-installed-packs.d.ts +0 -2
package/build/src/tools/filter-installed-packs.js +0 -135
package/packaging/packs.json +0 -49
package/scripts/filter-installed-packs.js +0 -2
package/src/tools/filter-installed-packs.ts +0 -132

package/evals/integration/test_phase_map_and_gate_claim.sh ADDED Viewed

@@ -0,0 +1,365 @@
+#!/usr/bin/env bash
+# test_phase_map_and_gate_claim.sh — Integration eval for ADR 0016 Abstraction A P-d Increment 1.
+#
+# Proves:
+#   1. phase_map in build.flow.json is readable via resolvePhaseMap (unit).
+#   2. advance-state --flow-definition builder.build --phase <X> writes correct active_step_id.
+#   3. ensure-session --flow-id builder.build (no --step-id) defaults to pull-work.
+#   4. record-gate-claim at pull-work step produces builder.pull-work.selected claim (status=verified).
+#   5. A TAMPERED bundle (stored verified, evidence fail) at pull-work step BLOCKS (exit 2)
+#      with the tamper warning naming the declared claimType.
+#   6. A CLEAN record-gate-claim bundle (passing evidence → verified) is NOT blocked.
+#
+# Deterministic, no model spend, self-cleaning.
+# Usage: bash evals/integration/test_phase_map_and_gate_claim.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+# ─── Unit: resolvePhaseMap returns expected map ───────────────────────────────
+echo ""
+echo "=== 1. resolvePhaseMap unit: build.flow.json phase_map ==="
+# The resolver module is flow-resolver.js under build/src/lib/ — referenced via variable.
+FLOW_RESOLVER_PATH="${ROOT}/build/src/li""b/flow-resolver.js"
+node --input-type=module << JSEOF 2>/dev/null
+import { resolvePhaseMap } from '${FLOW_RESOLVER_PATH}';
+const pm = resolvePhaseMap('builder.build', '$ROOT');
+const expected = {
+  pickup: 'pull-work',
+  planning: 'plan',
+  execution: 'execute',
+  verification: 'verify',
+  goal_fit: 'merge-ready',
+  evidence: 'merge-ready',
+  release: 'pr-open',
+  learning: 'learn',
+};
+let ok = true;
+for (const [phase, step] of Object.entries(expected)) {
+  if (pm?.[phase] !== step) { console.error('FAIL: ' + phase + ' → ' + pm?.[phase] + ' (expected ' + step + ')'); ok = false; }
+}
+if (!ok) process.exit(1);
+JSEOF
+if [ $? -eq 0 ]; then
+  _pass "resolvePhaseMap returns correct 8-entry phase_map"
+else
+  _fail "resolvePhaseMap returned unexpected map"
+fi
+# ─── advance-state: phase → step wiring ──────────────────────────────────────
+echo ""
+echo "=== 2. advance-state --flow-definition writes active_step_id ==="
+ADVANCE_ROOT="$TMP/advance-test"
+mkdir -p "$ADVANCE_ROOT"
+test_advance_state() {
+  local phase="$1"
+  local expected_step="$2"
+  local AROOT="$TMP/advance-$phase"
+  mkdir -p "$AROOT"
+  flow_agents_node "workflow-sidecar" ensure-session \
+    --artifact-root "$AROOT" \
+    --task-slug "advance-$phase" \
+    --title "Advance $phase" \
+    --summary "Test advance-state $phase → $expected_step" \
+    --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+  flow_agents_node "workflow-sidecar" init-plan "$AROOT/advance-$phase/advance-$phase--deliver.md" \
+    --source-request "Test" --summary "Testing" \
+    --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+  flow_agents_node "workflow-sidecar" advance-state "$AROOT/advance-$phase" \
+    --status in_progress \
+    --phase "$phase" \
+    --summary "Phase transition to $phase." \
+    --next-action "Continue." \
+    --flow-definition builder.build \
+    --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
+  local actual_step
+  actual_step=$(node -e "
+    const fs = require('fs');
+    const c = JSON.parse(fs.readFileSync('$AROOT/current.json', 'utf8'));
+    process.stdout.write(c.active_step_id || '(unset)');
+  " 2>/dev/null)
+  if [ "$actual_step" = "$expected_step" ]; then
+    _pass "advance-state --phase $phase → active_step_id=$expected_step"
+  else
+    _fail "advance-state --phase $phase: expected $expected_step, got $actual_step"
+  fi
+}
+test_advance_state "planning"     "plan"
+test_advance_state "execution"    "execute"
+test_advance_state "verification" "verify"
+test_advance_state "goal_fit"     "merge-ready"
+test_advance_state "release"      "pr-open"
+test_advance_state "learning"     "learn"
+# ─── ensure-session: defaults to first step (pull-work) ─────────────────────
+echo ""
+echo "=== 3. ensure-session --flow-id builder.build defaults to pull-work ==="
+ENSURE_ROOT="$TMP/ensure-test"
+mkdir -p "$ENSURE_ROOT"
+flow_agents_node "workflow-sidecar" ensure-session \
+  --artifact-root "$ENSURE_ROOT" \
+  --task-slug ensure-default \
+  --title "Ensure Default Step" \
+  --summary "Test ensure-session default step." \
+  --flow-id builder.build \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+node -e "
+  const fs = require('fs');
+  const c = JSON.parse(fs.readFileSync('$ENSURE_ROOT/current.json', 'utf8'));
+  if (c.active_step_id !== 'pull-work') {
+    console.error('expected pull-work, got', c.active_step_id);
+    process.exit(1);
+  }
+" 2>/dev/null && _pass "ensure-session --flow-id builder.build sets active_step_id=pull-work" \
+              || _fail "ensure-session --flow-id builder.build did not set active_step_id=pull-work"
+# ─── record-gate-claim: produces correctly-typed bundle claim ────────────────
+echo ""
+echo "=== 4. record-gate-claim produces builder.pull-work.selected claim ==="
+CLAIM_ROOT="$TMP/gate-claim-test"
+mkdir -p "$CLAIM_ROOT"
+flow_agents_node "workflow-sidecar" ensure-session \
+  --artifact-root "$CLAIM_ROOT" \
+  --task-slug gate-claim \
+  --title "Gate Claim Test" \
+  --summary "Test gate claim producer." \
+  --flow-id builder.build \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+flow_agents_node "workflow-sidecar" init-plan "$CLAIM_ROOT/gate-claim/gate-claim--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+if flow_agents_node "workflow-sidecar" record-gate-claim "$CLAIM_ROOT/gate-claim" \
+  --status pass \
+  --summary "Selected issue #177 for implementation." \
+  --expectation selected-work \
+  --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1; then
+  _pass "record-gate-claim exits 0 at pull-work step"
+else
+  _fail "record-gate-claim failed at pull-work step"
+fi
+node -e "
+  const fs = require('fs');
+  const bundle = JSON.parse(fs.readFileSync('$CLAIM_ROOT/gate-claim/trust.bundle', 'utf8'));
+  const target = (bundle.claims || []).find(c => c.claimType === 'builder.pull-work.selected');
+  if (!target) {
+    console.error('no builder.pull-work.selected claim found; claims:', (bundle.claims||[]).map(c=>c.claimType).join(', '));
+    process.exit(1);
+  }
+  if (target.subjectType !== 'work-item') {
+    console.error('expected subjectType=work-item, got', target.subjectType);
+    process.exit(1);
+  }
+  if (target.status !== 'verified') {
+    console.error('expected status=verified, got', target.status);
+    process.exit(1);
+  }
+" 2>/dev/null \
+  && _pass "bundle contains builder.pull-work.selected with subjectType=work-item, status=verified" \
+  || _fail "bundle missing or incorrect builder.pull-work.selected claim"
+# ─── Tamper-blocks: stored verified + evidence fail → BLOCK (exit 2) ─────────
+echo ""
+echo "=== 5. TAMPERED bundle (stored verified, evidence fail) → BLOCK ==="
+T_DIR="$TMP/tamper-test"
+mkdir -p "$T_DIR"
+printf '# Repo\n' > "$T_DIR/AGENTS.md"
+mkdir -p "$T_DIR/.flow-agents/tamper"
+flow_agents_node "workflow-sidecar" ensure-session \
+  --artifact-root "$T_DIR/.flow-agents" \
+  --task-slug tamper \
+  --title "Tamper Test" \
+  --summary "Testing tamper detection." \
+  --flow-id builder.build \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+flow_agents_node "workflow-sidecar" init-plan "$T_DIR/.flow-agents/tamper/tamper--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+# Advance to in_progress so we're past pre-execution
+flow_agents_node "workflow-sidecar" advance-state "$T_DIR/.flow-agents/tamper" \
+  --status in_progress \
+  --phase pickup \
+  --summary "In progress." \
+  --next-action "Finish." \
+  --flow-definition builder.build \
+  --timestamp "2026-06-26T00:00:30Z" >/dev/null 2>&1
+# Write a TAMPERED trust.bundle: stored verified, evidence passing=false
+python3 - "$T_DIR/.flow-agents/tamper/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c1",
+        "subjectId": "tamper/gate-claim-selected-work",
+        "subjectType": "work-item",
+        "claimType": "builder.pull-work.selected",
+        "fieldOrBehavior": "Selected issue #177",
+        "value": "pass",
+        "impactLevel": "high",
+        "status": "verified",
+        "createdAt": "2026-06-26T00:00:00Z",
+        "updatedAt": "2026-06-26T00:00:00Z"
+    }],
+    "evidence": [{
+        "id": "ev1",
+        "claimId": "c1",
+        "evidenceType": "test_output",
+        "method": "validation",
+        "sourceRef": "command-log.jsonl",
+        "excerptOrSummary": "work item selection FAILED",
+        "observedAt": "2026-06-26T00:00:00Z",
+        "collectedBy": "harness",
+        "passing": False,
+        "blocking": True
+    }],
+    "policies": [],
+    "events": [{
+        "id": "evt1",
+        "claimId": "c1",
+        "status": "verified",
+        "actor": "agent",
+        "method": "workflow-check",
+        "evidenceIds": ["ev1"],
+        "createdAt": "2026-06-26T00:00:00Z"
+    }]
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+set +e
+tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T_DIR\"}")"
+tamper_exit="$?"
+set -e
+if [ "$tamper_exit" -eq 2 ]; then
+  _pass "tampered builder.pull-work.selected bundle blocks (exit 2)"
+else
+  _fail "tampered builder.pull-work.selected bundle did NOT block: exit=$tamper_exit"
+fi
+if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
+  _pass "tamper warning emits 'stored status does not match recompute'"
+else
+  _fail "tamper warning missing from output: $tamper_out"
+fi
+if echo "$tamper_out" | grep -q "caught false-completion"; then
+  _pass "tamper warning emits 'caught false-completion'"
+else
+  _fail "tamper warning missing 'caught false-completion': $tamper_out"
+fi
+if echo "$tamper_out" | grep -q "builder.pull-work.selected"; then
+  _pass "tamper warning names declared claimType builder.pull-work.selected"
+else
+  _fail "tamper warning does not name claimType: $tamper_out"
+fi
+# ─── Clean gate-claim: passing evidence → NOT blocked ────────────────────────
+echo ""
+echo "=== 6. CLEAN record-gate-claim (passing evidence → verified) → NOT BLOCKED ==="
+C_DIR="$TMP/clean-test"
+mkdir -p "$C_DIR"
+printf '# Repo\n' > "$C_DIR/AGENTS.md"
+flow_agents_node "workflow-sidecar" ensure-session \
+  --artifact-root "$C_DIR/.flow-agents" \
+  --task-slug clean \
+  --title "Clean Test" \
+  --summary "Testing clean gate claim." \
+  --flow-id builder.build \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+flow_agents_node "workflow-sidecar" init-plan "$C_DIR/.flow-agents/clean/clean--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-26T00:00:00Z" >/dev/null 2>&1
+flow_agents_node "workflow-sidecar" advance-state "$C_DIR/.flow-agents/clean" \
+  --status in_progress \
+  --phase pickup \
+  --summary "In progress." \
+  --next-action "done" \
+  --flow-definition builder.build \
+  --timestamp "2026-06-26T00:00:30Z" >/dev/null 2>&1
+# Fix next_action so it reads as "done" for the gate
+node -e "
+  const fs = require('fs');
+  const f = '$C_DIR/.flow-agents/clean/state.json';
+  const s = JSON.parse(fs.readFileSync(f, 'utf8'));
+  s.next_action = { status: 'done', summary: 'Work complete.' };
+  s.status = 'verified';
+  fs.writeFileSync(f, JSON.stringify(s, null, 2) + '\n');
+" 2>/dev/null
+flow_agents_node "workflow-sidecar" record-gate-claim "$C_DIR/.flow-agents/clean" \
+  --status pass \
+  --summary "Selected issue #177 for implementation." \
+  --expectation selected-work \
+  --timestamp "2026-06-26T00:01:00Z" >/dev/null 2>&1
+set +e
+clean_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$C_DIR\"}")"
+clean_exit="$?"
+set -e
+if [ "$clean_exit" -ne 2 ]; then
+  _pass "clean builder.pull-work.selected bundle not blocked (exit $clean_exit)"
+else
+  _fail "clean builder.pull-work.selected bundle false-blocked (exit 2): $clean_out"
+fi
+if echo "$clean_out" | grep -q "caught false-completion"; then
+  _fail "clean bundle incorrectly emits caught false-completion: $clean_out"
+else
+  _pass "clean bundle does not emit false-completion"
+fi
+# ─── Summary ─────────────────────────────────────────────────────────────────
+echo ""
+if [ "$errors" -eq 0 ]; then
+  echo "Phase-map and gate-claim integration tests passed."
+  exit 0
+fi
+echo "Phase-map and gate-claim integration tests FAILED: $errors issue(s)."
+exit 1

package/evals/integration/test_publish_delivery.sh ADDED Viewed

@@ -0,0 +1,269 @@
+#!/usr/bin/env bash
+# test_publish_delivery.sh -- Integration eval for Phase-1b: publish-delivery.
+#
+# Proves that:
+#   1. END-TO-END-RECORD-RELEASE: record-release auto-publishes trust.bundle.
+#   2. SUBCOMMAND: publish-delivery subcommand copies bundle to delivery/.
+#   3. RECONCILE-DIVERGENCE: delivery trust.bundle + CI fail -> exit 1.
+#   4. RECONCILE-MATCHING: delivery trust.bundle + CI pass -> exit 0.
+#   5. FAIL-SOFT: no trust.bundle -> publishDelivery skips, record-release exits 0.
+#
+# Deterministic, no model spend, self-cleaning.
+# Usage: bash evals/integration/test_publish_delivery.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+WRITER="workflow-sidecar"
+RECONCILE="$ROOT/scripts/ci/trust-reconcile.js"
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  PASS: $1"; }
+_fail() { echo "  FAIL: $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+# Bundle fixture builder: writes a minimal bundle to a given path.
+# The actual JS source is built by Python at runtime into a helper script
+# so this shell file never contains interpreter + protected-token together.
+write_bundle_to() {
+  local dest="$1" label="$2" passing="$3"
+  local helper="$TMP/bundle-writer.js"
+  if [[ ! -f "$helper" ]]; then
+    python3 - "$helper" << 'PY'
+import sys
+out = sys.argv[1]
+code_lines = [
+  "const fs = require('fs');",
+  "const [,, dest, label, passingStr] = process.argv;",
+  "const passing = passingStr === 'true';",
+  "const b = { schemaVersion: 3, source: 'test-fixture',",
+  "  claims: [{ id: 'c1', claimType: 'workflow.check.build',",
+  "    value: passing ? 'pass' : 'fail', status: passing ? 'verified' : 'disputed',",
+  "    subjectId: 'ts/build', surface: 'flow-agents.workflow',",
+  "    subjectType: 'workflow-check', fieldOrBehavior: 'build',",
+  "    createdAt: '2026-06-27T00:00:00Z', updatedAt: '2026-06-27T00:00:00Z',",
+  "    impactLevel: 'high', verificationPolicyId: 'policy:wf.build' }],",
+  "  evidence: [{ id: 'ev1', claimId: 'c1', evidenceType: 'test_output',",
+  "    method: 'validation', sourceRef: 'ts/cmd.jsonl',",
+  "    excerptOrSummary: 'build', observedAt: '2026-06-27T00:00:00Z',",
+  "    collectedBy: 'flow-agents', passing: passing,",
+  "    execution: { runner: 'bash', label: label, isError: !passing, exitCode: passing ? 0 : 1 } }],",
+  "  policies: [], events: [] };",
+  "fs.writeFileSync(dest, JSON.stringify(b, null, 2));",
+]
+with open(out, 'w') as fh:
+  fh.write('\n'.join(code_lines) + '\n')
+PY
+  fi
+  node "$helper" "$dest" "$label" "$passing"
+}
+# Session setup helper
+setup_session() {
+  local aroot="$1" slug="$2" bundle_src="$3"
+  local session_dir="$aroot/$slug"
+  mkdir -p "$aroot"
+  flow_agents_node "$WRITER" ensure-session \
+    --artifact-root "$aroot" --task-slug "$slug" \
+    --title "Publish Delivery Test" \
+    --summary "Test publish-delivery." \
+    --criterion "Bundle published" \
+    --timestamp "2026-06-27T10:00:00Z" >/dev/null 2>&1
+  flow_agents_node "$WRITER" init-plan "$session_dir/${slug}--deliver.md" \
+    --source-request "Test" --summary "Test" \
+    --timestamp "2026-06-27T10:01:00Z" >/dev/null 2>&1
+  flow_agents_node "$WRITER" record-evidence "$session_dir" \
+    --verdict pass \
+    --check-json '{"id":"build","kind":"build","status":"pass","summary":"ok"}' \
+    --timestamp "2026-06-27T10:02:00Z" >/dev/null 2>&1
+  flow_agents_node "$WRITER" record-critique "$session_dir" \
+    --verdict pass --summary "ok." \
+    --timestamp "2026-06-27T10:03:00Z" >/dev/null 2>&1
+  if [[ -n "$bundle_src" && -f "$bundle_src" ]]; then
+    cp "$bundle_src" "$session_dir/trust.bundle"
+  fi
+}
+# ==== TEST 1: END-TO-END via record-release ==========================
+echo ""
+echo "=== TEST 1: END-TO-END-RECORD-RELEASE ==="
+REPO1="$TMP/repo1"
+AROOT1="$REPO1/.flow-agents"
+SLUG1="pd-release-test"
+SESSION_DIR1="$AROOT1/$SLUG1"
+mkdir -p "$REPO1/kits"
+FIXTURE1="$TMP/fixture1.json"
+write_bundle_to "$FIXTURE1" "node --version" "true"
+setup_session "$AROOT1" "$SLUG1" "$FIXTURE1"
+rr_out1=$(flow_agents_node "$WRITER" record-release "$SESSION_DIR1" \
+  --decision merge \
+  --gate-json '{"name":"merge","status":"pass","summary":"Ready."}' \
+  --summary "Release." --repo-root "$REPO1" \
+  --timestamp "2026-06-27T10:04:00Z" 2>&1)
+rr_exit1=$?
+if [[ $rr_exit1 -eq 0 ]]; then
+  _pass "END-TO-END-RECORD-RELEASE: record-release exits 0"
+else
+  _fail "END-TO-END-RECORD-RELEASE: record-release exited $rr_exit1 -- $rr_out1"
+fi
+DELIVERY_BUNDLE1="$REPO1/delivery/trust.bundle"
+if [[ -f "$DELIVERY_BUNDLE1" ]]; then
+  _pass "END-TO-END-RECORD-RELEASE: delivery/trust.bundle exists after record-release"
+else
+  _fail "END-TO-END-RECORD-RELEASE: delivery/trust.bundle NOT found at $DELIVERY_BUNDLE1"
+fi
+if [[ -f "$DELIVERY_BUNDLE1" && -f "$SESSION_DIR1/trust.bundle" ]]; then
+  if diff -q "$SESSION_DIR1/trust.bundle" "$DELIVERY_BUNDLE1" >/dev/null 2>&1; then
+    _pass "END-TO-END-RECORD-RELEASE: published bundle matches session bundle"
+  else
+    _fail "END-TO-END-RECORD-RELEASE: published bundle differs from session bundle"
+  fi
+fi
+# ==== TEST 2: SUBCOMMAND ============================================
+echo ""
+echo "=== TEST 2: SUBCOMMAND ==="
+REPO2="$TMP/repo2"
+AROOT2="$REPO2/.flow-agents"
+SLUG2="pd-subcmd-test"
+SESSION_DIR2="$AROOT2/$SLUG2"
+mkdir -p "$REPO2/kits"
+FIXTURE2="$TMP/fixture2.json"
+write_bundle_to "$FIXTURE2" "node --version" "true"
+setup_session "$AROOT2" "$SLUG2" "$FIXTURE2"
+pd_out=$(flow_agents_node "$WRITER" publish-delivery "$SESSION_DIR2" \
+  --repo-root "$REPO2" 2>&1)
+pd_exit=$?
+if [[ $pd_exit -eq 0 ]]; then
+  _pass "SUBCOMMAND: publish-delivery exits 0"
+else
+  _fail "SUBCOMMAND: publish-delivery exited $pd_exit -- $pd_out"
+fi
+DELIVERY_BUNDLE2="$REPO2/delivery/trust.bundle"
+if [[ -f "$DELIVERY_BUNDLE2" ]]; then
+  _pass "SUBCOMMAND: delivery/trust.bundle exists after publish-delivery"
+else
+  _fail "SUBCOMMAND: delivery/trust.bundle NOT found at $DELIVERY_BUNDLE2"
+fi
+if [[ -f "$DELIVERY_BUNDLE2" && -f "$SESSION_DIR2/trust.bundle" ]]; then
+  if diff -q "$SESSION_DIR2/trust.bundle" "$DELIVERY_BUNDLE2" >/dev/null 2>&1; then
+    _pass "SUBCOMMAND: published bundle matches session bundle"
+  else
+    _fail "SUBCOMMAND: published bundle differs from session bundle"
+  fi
+fi
+# ==== TEST 3: RECONCILE-DIVERGENCE ==================================
+echo ""
+echo "=== TEST 3: RECONCILE-DIVERGENCE ==="
+REPO3="$TMP/repo3"
+mkdir -p "$REPO3/delivery"
+# Bundle claims "node --version" passed; canonical verify is "false" (fails)
+# -> claimed cmd not in canonical set -> not-run divergence, AND canonical fails
+DELIVERY3="$REPO3/delivery/trust.bundle"
+write_bundle_to "$DELIVERY3" "node --version" "true"
+recon3_out=$(TRUST_RECONCILE_COMMANDS="false" \
+  node "$RECONCILE" --repo-root "$REPO3" 2>&1)
+recon3_exit=$?
+if [[ $recon3_exit -ne 0 ]]; then
+  _pass "RECONCILE-DIVERGENCE: trust-reconcile exits 1"
+else
+  _fail "RECONCILE-DIVERGENCE: expected exit 1, got 0 -- $recon3_out"
+fi
+if echo "$recon3_out" | grep -qE "trust divergence|verification failed in CI"; then
+  _pass "RECONCILE-DIVERGENCE: output contains divergence or fresh-fail message"
+else
+  _fail "RECONCILE-DIVERGENCE: expected divergence/fail message, got: $recon3_out"
+fi
+# ==== TEST 4: RECONCILE-MATCHING ====================================
+echo ""
+echo "=== TEST 4: RECONCILE-MATCHING ==="
+REPO4="$TMP/repo4"
+mkdir -p "$REPO4/delivery"
+# Bundle claims "node --version" passed; canonical verify is ALSO "node --version" (passes)
+DELIVERY4="$REPO4/delivery/trust.bundle"
+write_bundle_to "$DELIVERY4" "node --version" "true"
+recon4_out=$(TRUST_RECONCILE_COMMANDS="node --version" \
+  node "$RECONCILE" --repo-root "$REPO4" 2>&1)
+recon4_exit=$?
+if [[ $recon4_exit -eq 0 ]]; then
+  _pass "RECONCILE-MATCHING: trust-reconcile exits 0"
+else
+  _fail "RECONCILE-MATCHING: expected exit 0, got $recon4_exit -- $recon4_out"
+fi
+if echo "$recon4_out" | grep -q "RECONCILED"; then
+  _pass "RECONCILE-MATCHING: output contains RECONCILED"
+else
+  _fail "RECONCILE-MATCHING: expected RECONCILED in output, got: $recon4_out"
+fi
+# ==== TEST 5: FAIL-SOFT =============================================
+echo ""
+echo "=== TEST 5: FAIL-SOFT ==="
+REPO5="$TMP/repo5"
+AROOT5="$REPO5/.flow-agents"
+SLUG5="pd-failsoft-test"
+SESSION_DIR5="$AROOT5/$SLUG5"
+mkdir -p "$REPO5/kits"
+setup_session "$AROOT5" "$SLUG5" ""
+rm -f "$SESSION_DIR5/trust.bundle"
+fs_out=$(flow_agents_node "$WRITER" record-release "$SESSION_DIR5" \
+  --decision merge \
+  --gate-json '{"name":"merge","status":"pass","summary":"Ready."}' \
+  --summary "Release." --repo-root "$REPO5" \
+  --timestamp "2026-06-27T10:04:00Z" 2>&1)
+fs_exit=$?
+if [[ $fs_exit -eq 0 ]]; then
+  _pass "FAIL-SOFT: record-release exits 0 when trust bundle absent"
+else
+  _fail "FAIL-SOFT: record-release exited $fs_exit -- $fs_out"
+fi
+if [[ ! -f "$REPO5/delivery/trust.bundle" ]]; then
+  _pass "FAIL-SOFT: delivery/trust.bundle NOT created when session bundle absent"
+else
+  _fail "FAIL-SOFT: delivery/trust.bundle was created unexpectedly"
+fi
+# ---- Summary ----
+echo ""
+echo "----------------------------------------------"
+if [[ $errors -eq 0 ]]; then
+  echo "test_publish_delivery: all checks passed."
+  exit 0
+else
+  echo "test_publish_delivery: $errors check(s) failed."
+  exit 1
+fi