npm - @kontourai/flow-agents - Versions diffs - 1.4.0 → 2.0.1 - Mend

@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

package/.github/CODEOWNERS +29 -0
package/.github/actions/trust-verify/action.yml +145 -0
package/.github/workflows/ci.yml +11 -4
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +10 -2
package/.github/workflows/release-please.yml +1 -1
package/.github/workflows/runtime-compat.yml +1 -1
package/.github/workflows/trust-reconcile.yml +113 -0
package/AGENTS.md +13 -0
package/CHANGELOG.md +103 -0
package/CONTRIBUTING.md +4 -4
package/README.md +1 -0
package/agents/tool-planner.json +1 -1
package/build/src/cli/init.js +242 -20
package/build/src/cli/validate-workflow-artifacts.js +19 -2
package/build/src/cli/verify.d.ts +1 -0
package/build/src/cli/verify.js +90 -0
package/build/src/cli/workflow-sidecar.d.ts +316 -8
package/build/src/cli/workflow-sidecar.js +1996 -91
package/build/src/cli.js +2 -3
package/build/src/lib/flow-resolver.d.ts +111 -0
package/build/src/lib/flow-resolver.js +308 -0
package/build/src/tools/build-universal-bundles.js +34 -22
package/build/src/tools/generate-context-map.js +3 -16
package/build/src/tools/validate-source-tree.d.ts +1 -1
package/build/src/tools/validate-source-tree.js +42 -162
package/context/contracts/artifact-contract.md +10 -0
package/context/contracts/delivery-contract.md +1 -0
package/context/contracts/review-contract.md +1 -0
package/context/contracts/verification-contract.md +2 -0
package/context/gate-awareness.md +39 -0
package/context/scripts/hooks/stop-goal-fit.js +632 -70
package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
package/docs/adr/0007-skill-audit.md +1 -1
package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
package/docs/adr/0011-mcp-posture.md +100 -0
package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
package/docs/adr/0013-context-lifecycle.md +151 -0
package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
package/docs/adr/0016-three-hard-boundary-model.md +71 -0
package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
package/docs/agent-system-guidebook.md +5 -12
package/docs/context-map.md +4 -10
package/docs/index.md +3 -2
package/docs/integrations/framework-adapter.md +19 -6
package/docs/integrations/index.md +2 -2
package/docs/north-star.md +4 -4
package/docs/operating-layers.md +3 -3
package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
package/docs/repository-structure.md +2 -2
package/docs/skills-map.md +1 -0
package/docs/spec/runtime-hook-surface.md +62 -9
package/docs/standards-register.md +3 -3
package/docs/survey-utterance-check.md +1 -1
package/docs/trust-anchor-adoption.md +197 -0
package/docs/verifiable-trust.md +95 -0
package/docs/veritas-integration.md +2 -2
package/docs/workflow-usage-guide.md +69 -0
package/evals/acceptance/DEMO-false-completion.md +144 -0
package/evals/acceptance/demo-cast.sh +92 -0
package/evals/acceptance/demo-false-completion.sh +72 -0
package/evals/acceptance/demo-real-evidence.sh +104 -0
package/evals/acceptance/demo.tape +29 -0
package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
package/evals/acceptance/prove-capture-teeth.sh +114 -0
package/evals/acceptance/prove-teeth.sh +105 -0
package/evals/ci/antigaming-suite.sh +55 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
package/evals/integration/test_builder_step_producers.sh +379 -0
package/evals/integration/test_bundle_install.sh +35 -71
package/evals/integration/test_bundle_lifecycle.sh +39 -2
package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
package/evals/integration/test_checkpoint_signing.sh +489 -0
package/evals/integration/test_claim_lookup.sh +352 -0
package/evals/integration/test_command_log_fork_classification.sh +134 -0
package/evals/integration/test_command_log_integrity.sh +275 -0
package/evals/integration/test_context_map.sh +0 -2
package/evals/integration/test_dual_emit_flow_step.sh +278 -0
package/evals/integration/test_enforcer_expects_driven.sh +281 -0
package/evals/integration/test_evidence_capture_hook.sh +185 -0
package/evals/integration/test_flow_kit_repository.sh +2 -0
package/evals/integration/test_flowdef_session_activation.sh +273 -0
package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
package/evals/integration/test_gate_bypass_chain.sh +448 -0
package/evals/integration/test_gate_lockdown.sh +1137 -0
package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
package/evals/integration/test_goal_fit_hook.sh +69 -4
package/evals/integration/test_goal_fit_rederive.sh +263 -0
package/evals/integration/test_install_merge.sh +1176 -0
package/evals/integration/test_kit_identity_trust.sh +393 -0
package/evals/integration/test_mint_attestation.sh +373 -0
package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
package/evals/integration/test_publish_delivery.sh +269 -0
package/evals/integration/test_reconcile_soundness.sh +528 -0
package/evals/integration/test_resolvefirststep_security.sh +208 -0
package/evals/integration/test_session_resume_roundtrip.sh +286 -0
package/evals/integration/test_trust_checkpoint.sh +325 -0
package/evals/integration/test_trust_reconcile.sh +293 -0
package/evals/integration/test_verify_cli.sh +208 -0
package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
package/evals/lib/node.sh +0 -6
package/evals/run.sh +47 -0
package/evals/static/test_workflow_skills.sh +6 -13
package/install.sh +0 -7
package/integrations/strands-ts/README.md +25 -15
package/integrations/veritas/flow-agents.adapter.json +1 -2
package/kits/builder/flows/build.flow.json +59 -12
package/kits/builder/kit.json +85 -15
package/kits/builder/skills/continue-work/SKILL.md +116 -0
package/kits/builder/skills/deliver/SKILL.md +36 -6
package/kits/builder/skills/design-probe/SKILL.md +28 -0
package/kits/builder/skills/execute-plan/SKILL.md +9 -1
package/kits/builder/skills/gate-review/SKILL.md +234 -0
package/kits/builder/skills/learning-review/SKILL.md +30 -0
package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
package/kits/builder/skills/plan-work/SKILL.md +13 -1
package/kits/builder/skills/pull-work/SKILL.md +19 -0
package/kits/knowledge/adapters/default-store/index.js +38 -0
package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
package/kits/knowledge/docs/store-contract.md +314 -0
package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
package/kits/knowledge/evals/entities/suite.test.js +40 -0
package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
package/kits/knowledge/evals/retirement/suite.test.js +145 -0
package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
package/kits/knowledge/kit.json +51 -1
package/package.json +6 -6
package/packaging/conformance/README.md +10 -2
package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
package/packaging/conformance/run-conformance.js +1 -1
package/scripts/README.md +2 -1
package/scripts/build-universal-bundles.js +0 -1
package/scripts/ci/mint-attestation.js +221 -0
package/scripts/ci/trust-reconcile.js +545 -0
package/scripts/hooks/config-protection.js +423 -1
package/scripts/hooks/evidence-capture.js +348 -0
package/scripts/hooks/lib/liveness-read.js +113 -0
package/scripts/hooks/run-hook.js +6 -1
package/scripts/hooks/stop-goal-fit.js +1524 -79
package/scripts/hooks/workflow-steering.js +135 -5
package/scripts/install-codex-home.sh +39 -0
package/scripts/install-merge.js +330 -0
package/scripts/repair-command-log.js +115 -0
package/src/cli/init.ts +218 -20
package/src/cli/validate-workflow-artifacts.ts +18 -2
package/src/cli/verify.ts +100 -0
package/src/cli/workflow-sidecar.ts +2127 -84
package/src/cli.ts +2 -3
package/src/lib/flow-resolver.ts +369 -0
package/src/tools/build-universal-bundles.ts +34 -21
package/src/tools/generate-context-map.ts +3 -17
package/src/tools/validate-source-tree.ts +44 -104
package/build/src/tools/filter-installed-packs.d.ts +0 -2
package/build/src/tools/filter-installed-packs.js +0 -135
package/packaging/packs.json +0 -49
package/scripts/filter-installed-packs.js +0 -2
package/src/tools/filter-installed-packs.ts +0 -132

package/evals/integration/test_flowdef_session_activation.sh ADDED Viewed

@@ -0,0 +1,273 @@
+#!/usr/bin/env bash
+# test_flowdef_session_activation.sh — Integration eval for ADR 0016 Step 1.
+#
+# Proves that ensure-session --flow-id builder.build activates the FlowDefinition-
+# driven path so producers fire, gates enforce on builder.* claims, and advance-state
+# correctly sets active_step_id via the phase_map at each phase.
+#
+# Tests:
+#   1. ensure-session --flow-id builder.build writes active_flow_id + default
+#      active_step_id (pull-work) to current.json.
+#   2. advance-state through phases (planning→execution→verification) sets correct
+#      active_step_id via phase_map at each transition.
+#   3. At the verify step, record-gate-claim for tests-evidence produces
+#      builder.verify.tests (status=verified) in the bundle — producer fires.
+#   4. A TAMPERED builder.verify.tests bundle at the verify step BLOCKS (exit 2)
+#      with the tamper warning naming the declared claimType.
+#   5. Fallback: session without --flow-id produces only workflow.* claims (the
+#      retained safety net for non-flow sessions).
+#
+# Deterministic, no model spend, self-cleaning.
+# Usage: bash evals/integration/test_flowdef_session_activation.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+WRITER="workflow-sidecar"
+# ─── TEST 1: ensure-session --flow-id activates the flow ─────────────────────
+echo ""
+echo "=== 1. ensure-session --flow-id builder.build activates FlowDefinition-driven path ==="
+MAIN_AROOT="$TMP/main-aroot"
+SLUG="activation-test"
+SESSION_DIR="$MAIN_AROOT/$SLUG"
+mkdir -p "$MAIN_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$MAIN_AROOT" \
+  --task-slug "$SLUG" \
+  --title "Step 1 activation test" \
+  --summary "Test that --flow-id builder.build activates the FlowDefinition-driven path." \
+  --criterion "All gates produce declared claims" \
+  --flow-id builder.build \
+  --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
+node -e "
+const fs = require('fs');
+const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
+if (c.active_flow_id !== 'builder.build') throw new Error('expected active_flow_id=builder.build, got ' + c.active_flow_id);
+if (!c.active_step_id) throw new Error('expected active_step_id to be set (first step default), got ' + c.active_step_id);
+console.log('current.json: active_flow_id=' + c.active_flow_id + ' active_step_id=' + c.active_step_id);
+" 2>&1 \
+  && _pass "ensure-session --flow-id builder.build writes active_flow_id + default active_step_id to current.json" \
+  || _fail "ensure-session --flow-id builder.build did NOT write active_flow_id to current.json"
+# ─── TEST 2: advance-state sets active_step_id via phase_map ─────────────────
+echo ""
+echo "=== 2. advance-state through phases sets active_step_id via phase_map ==="
+flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-01T00:00:30Z" >/dev/null 2>&1
+test_phase_step() {
+  local phase="$1" expected_step="$2"
+  flow_agents_node "$WRITER" advance-state "$SESSION_DIR" \
+    --status in_progress --phase "$phase" \
+    --summary "Testing phase $phase." \
+    --next-action "Continue." \
+    --flow-definition builder.build \
+    --timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
+  local actual
+  actual=$(node -e "
+    const fs = require('fs');
+    const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
+    console.log(c.active_step_id || '');
+  " 2>/dev/null)
+  if [ "$actual" = "$expected_step" ]; then
+    _pass "advance-state phase=$phase → active_step_id=$expected_step"
+  else
+    _fail "advance-state phase=$phase → got active_step_id=$actual (expected $expected_step)"
+  fi
+}
+test_phase_step "planning"     "plan"
+test_phase_step "execution"    "execute"
+test_phase_step "verification" "verify"
+# ─── TEST 3: at verify step, record-gate-claim produces builder.verify.tests ──
+echo ""
+echo "=== 3. verify step: producer fires — record-gate-claim produces builder.verify.tests ==="
+if flow_agents_node "$WRITER" record-gate-claim "$SESSION_DIR" \
+  --status pass \
+  --summary "All tests pass." \
+  --expectation "tests-evidence" \
+  --timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1; then
+  _pass "record-gate-claim at verify step succeeds (expectation=tests-evidence)"
+else
+  _fail "record-gate-claim at verify step FAILED"
+fi
+node -e "
+const fs = require('fs');
+const bundlePath = '$SESSION_DIR/trust.bundle';
+if (!fs.existsSync(bundlePath)) throw new Error('trust.bundle not found');
+const bundle = JSON.parse(fs.readFileSync(bundlePath, 'utf8'));
+const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
+if (!declared) throw new Error('MISSING builder.verify.tests; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
+if (declared.status !== 'verified') throw new Error('expected status=verified, got ' + declared.status);
+console.log('builder.verify.tests: subjectType=' + declared.subjectType + ' status=' + declared.status + ' value=' + declared.value);
+" 2>&1 \
+  && _pass "bundle contains builder.verify.tests (subjectType=flow-step, status=verified, value=pass)" \
+  || _fail "bundle missing or incorrect builder.verify.tests claim"
+# ─── TEST 4: tampered bundle at verify step BLOCKS ────────────────────────────
+echo ""
+echo "=== 4. tamper-blocks: builder.verify.tests — tampered bundle triggers gate exit 2 ==="
+TAMPER_DIR="$TMP/tamper-verify"
+TAMPER_SLUG="tamper-verify-test"
+mkdir -p "$TAMPER_DIR"
+printf '# Test repo\n' > "$TAMPER_DIR/AGENTS.md"
+mkdir -p "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$TAMPER_DIR/.flow-agents" \
+  --task-slug "$TAMPER_SLUG" \
+  --title "Tamper verify test" \
+  --summary "Testing tamper detection at verify step." \
+  --flow-id builder.build \
+  --step-id verify \
+  --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/$TAMPER_SLUG--deliver.md" \
+  --source-request "Test" --summary "Tamper test" \
+  --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" advance-state "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG" \
+  --status in_progress --phase verification \
+  --summary "At verify." --next-action "Continue." \
+  --flow-definition builder.build \
+  --timestamp "2026-06-01T02:00:30Z" >/dev/null 2>&1
+# Write TAMPERED trust.bundle: stored verified, evidence passing=false
+python3 - "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c1",
+        "subjectId": "tamper-verify-test/verify-tests",
+        "subjectType": "flow-step",
+        "claimType": "builder.verify.tests",
+        "fieldOrBehavior": "Tests pass",
+        "value": "pass",
+        "impactLevel": "high",
+        "status": "verified",
+        "createdAt": "2026-06-01T02:00:00Z",
+        "updatedAt": "2026-06-01T02:00:00Z"
+    }],
+    "evidence": [{
+        "id": "ev1",
+        "claimId": "c1",
+        "evidenceType": "test_output",
+        "method": "validation",
+        "sourceRef": "command-log.jsonl",
+        "excerptOrSummary": "tests FAILED",
+        "observedAt": "2026-06-01T02:00:00Z",
+        "collectedBy": "harness",
+        "passing": False,
+        "blocking": True
+    }],
+    "policies": [],
+    "events": [{
+        "id": "evt1",
+        "claimId": "c1",
+        "status": "verified",
+        "actor": "agent",
+        "method": "workflow-check",
+        "evidenceIds": ["ev1"],
+        "createdAt": "2026-06-01T02:00:00Z"
+    }]
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+set +e
+tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$TAMPER_DIR\"}")"
+tamper_exit="$?"
+set -e
+if [ "$tamper_exit" -eq 2 ]; then
+  _pass "gate BLOCKS tampered builder.verify.tests bundle (exit 2)"
+else
+  _fail "gate did NOT block tampered bundle: exit=$tamper_exit"
+fi
+if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle|caught false-completion"; then
+  _pass "gate emits tamper warning for builder.verify.tests"
+else
+  _fail "gate tamper warning missing from output: $tamper_out"
+fi
+if echo "$tamper_out" | grep -q "builder.verify.tests"; then
+  _pass "gate tamper warning names declared claimType builder.verify.tests"
+else
+  _fail "gate tamper warning does not name builder.verify.tests: $tamper_out"
+fi
+# ─── TEST 5: Fallback — session without --flow-id (workflow.* only, safety net) ─
+echo ""
+echo "=== 5. Fallback: session without --flow-id produces only workflow.* claims (safety net intact) ==="
+FALLBACK_AROOT="$TMP/fallback-aroot"
+FALLBACK_SLUG="fallback-test"
+FALLBACK_DIR="$FALLBACK_AROOT/$FALLBACK_SLUG"
+mkdir -p "$FALLBACK_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$FALLBACK_AROOT" \
+  --task-slug "$FALLBACK_SLUG" \
+  --title "Fallback no-flow test" \
+  --summary "No --flow-id: workflow.* fallback is the safety net for non-flow sessions." \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$FALLBACK_DIR/$FALLBACK_SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing fallback." \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" record-evidence "$FALLBACK_DIR" \
+  --verdict pass \
+  --check-json '{"id":"fallback-check","kind":"test","status":"pass","summary":"Fallback test passes"}' \
+  --timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$FALLBACK_DIR/trust.bundle', 'utf8'));
+const claims = bundle.claims || [];
+const wfClaim = claims.find(c => c.claimType === 'workflow.check.test');
+const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
+if (!wfClaim) throw new Error('MISSING workflow.check.test in fallback session');
+if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in fallback session: ' + builderClaims.map(c=>c.claimType).join(', '));
+if (wfClaim.id.endsWith('-legacy')) throw new Error('workflow.check.test should not have -legacy suffix when no flow active');
+console.log('fallback: only workflow.check.test present (no builder.* claims, no -legacy suffix)');
+" 2>&1 \
+  && _pass "fallback (no --flow-id): only workflow.check.test produced, builder.* absent (producers dormant)" \
+  || _fail "fallback (no --flow-id): unexpected claims in trust.bundle"
+# ─── Summary ──────────────────────────────────────────────────────────────────
+echo ""
+if [ "$errors" -eq 0 ]; then
+  echo "test_flowdef_session_activation: all checks passed."
+  exit 0
+fi
+echo "test_flowdef_session_activation: $errors check(s) FAILED."
+exit 1

package/evals/integration/test_flowdef_session_history_preservation.sh ADDED Viewed

@@ -0,0 +1,250 @@
+#!/usr/bin/env bash
+# test_flowdef_session_history_preservation.sh — Integration eval for ADR 0016 Step 0.
+#
+# Proves:
+#   1. A FlowDefinition-driven session (ensure-session --flow-id builder.build, step=verify)
+#      records a check via the declared builder.verify.tests path, then record-critique and
+#      record-learning PRESERVE the prior declared check + critique claims in the rebuilt
+#      bundle (no history loss).
+#   2. A workflow.* session (no --flow-id) record-critique/record-learning round-trip is
+#      UNCHANGED — only workflow.check.* and workflow.critique.review claims survive.
+#   3. evidenceClean/critiqueClean return correct results for a builder.* bundle:
+#      checked by running dogfood-pass --verdict pass on a clean builder.build session.
+#
+# Deterministic, no model spend, self-cleaning.
+# Usage: bash evals/integration/test_flowdef_session_history_preservation.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+WRITER="workflow-sidecar"
+# ─── TEST 1: FlowDefinition-driven session round-trip (no history loss) ────────
+echo ""
+echo "=== 1. FlowDefinition-driven session: record-critique/record-learning preserve declared claims ==="
+FLOW_AROOT="$TMP/flow-aroot"
+SLUG="history-flow-test"
+SESSION_DIR="$FLOW_AROOT/$SLUG"
+mkdir -p "$FLOW_AROOT"
+# Create a FlowDefinition-driven session at the verify step (builder.verify.tests is declared)
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$FLOW_AROOT" \
+  --task-slug "$SLUG" \
+  --title "History preservation test" \
+  --summary "Test that declared builder.* claims survive round-trips." \
+  --flow-id builder.build \
+  --step-id verify \
+  --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
+# Record a passing check (produces ONLY builder.verify.tests declared claim — no legacy shadow, P-d)
+flow_agents_node "$WRITER" record-evidence "$SESSION_DIR" \
+  --verdict pass \
+  --check-json '{"id":"unit-tests","kind":"test","status":"pass","summary":"Unit tests pass"}' \
+  --timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
+# Verify declared claim is in bundle before round-trip
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
+const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
+if (!declared) throw new Error('MISSING builder.verify.tests before round-trip; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
+console.log('before round-trip: builder.verify.tests status=' + declared.status);
+" 2>&1 \
+  && _pass "builder.verify.tests declared claim present before round-trip" \
+  || _fail "builder.verify.tests declared claim MISSING before round-trip"
+# Now do record-critique (the round-trip: checksFromBundle + critiquesFromBundle rebuild)
+flow_agents_node "$WRITER" record-critique "$SESSION_DIR" \
+  --id "code-review" \
+  --verdict pass \
+  --summary "Code review passed." \
+  --timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1
+# Assert builder.verify.tests survived the record-critique round-trip
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
+const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
+if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-critique; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
+console.log('after record-critique: builder.verify.tests status=' + declared.status);
+" 2>&1 \
+  && _pass "builder.verify.tests declared claim preserved after record-critique (no history loss)" \
+  || _fail "builder.verify.tests declared claim LOST after record-critique (history loss)"
+# Also verify the critique claim itself is present.
+# In a flow-driven session (verify step), critique maps to the declared builder.verify.policy-compliance
+# (the critique heuristic matches: subjectType=artifact + claimType contains "compliance").
+# workflow.critique.review is emitted in no-flow sessions only (P-d: shadow retired).
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
+const claims = bundle.claims || [];
+// Declared critique claim for verify-step: builder.verify.policy-compliance
+const crit = claims.find(c => c.claimType === 'builder.verify.policy-compliance');
+if (!crit) throw new Error('MISSING builder.verify.policy-compliance critique claim after record-critique; claims: ' + claims.map(c=>c.claimType).join(', '));
+// Must NOT have workflow.critique.review in a flow-driven session (no shadow, P-d)
+const legacy = claims.find(c => c.claimType === 'workflow.critique.review');
+if (legacy) throw new Error('UNEXPECTED workflow.critique.review in flow-driven session (P-d retired shadow); id=' + legacy.id);
+console.log('declared critique claim: claimType=' + crit.claimType + ' value=' + crit.value);
+" 2>&1 \
+  && _pass "builder.verify.policy-compliance declared critique claim present (no workflow.critique.review shadow, P-d)" \
+  || _fail "declared critique claim MISSING or unexpected workflow.critique.review found after record-critique"
+# Now do record-learning (second round-trip)
+flow_agents_node "$WRITER" record-learning "$SESSION_DIR" \
+  --status learned \
+  --record-json '{
+    "outcome": "success",
+    "source_refs": [],
+    "facts": ["Tests passed clean."],
+    "routing": [{"target":"none","status":"completed","summary":"No routing needed."}],
+    "correction": {"needed": false, "evidence": "All checks passed cleanly."}
+  }' \
+  --summary "Learning recorded." \
+  --timestamp "2026-06-01T00:03:00Z" >/dev/null 2>&1
+# Assert builder.verify.tests survived the record-learning round-trip
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
+const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
+if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-learning; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
+console.log('after record-learning: builder.verify.tests status=' + declared.status);
+" 2>&1 \
+  && _pass "builder.verify.tests declared claim preserved after record-learning (no history loss)" \
+  || _fail "builder.verify.tests declared claim LOST after record-learning (history loss)"
+# ─── TEST 2: workflow.* session round-trip is UNCHANGED ────────────────────────
+echo ""
+echo "=== 2. workflow.* session (no --flow-id): round-trip unchanged ==="
+NOFLOW_AROOT="$TMP/noflow-aroot"
+NOFLOW_SLUG="history-noflow-test"
+NOFLOW_DIR="$NOFLOW_AROOT/$NOFLOW_SLUG"
+mkdir -p "$NOFLOW_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$NOFLOW_AROOT" \
+  --task-slug "$NOFLOW_SLUG" \
+  --title "No-flow session history test" \
+  --summary "Baseline: no FlowDefinition. Round-trip must be unchanged." \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$NOFLOW_DIR/$NOFLOW_SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+# Record a check (produces only workflow.check.test — no declared claims)
+flow_agents_node "$WRITER" record-evidence "$NOFLOW_DIR" \
+  --verdict pass \
+  --check-json '{"id":"noflow-unit-tests","kind":"test","status":"pass","summary":"No-flow tests pass"}' \
+  --timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
+# record-critique round-trip
+flow_agents_node "$WRITER" record-critique "$NOFLOW_DIR" \
+  --id "noflow-review" \
+  --verdict pass \
+  --summary "Review passed." \
+  --timestamp "2026-06-01T10:02:00Z" >/dev/null 2>&1
+# Assert only workflow.* claims survived (no builder.* contamination)
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$NOFLOW_DIR/trust.bundle', 'utf8'));
+const claims = bundle.claims || [];
+const wfCheck = claims.find(c => c.claimType === 'workflow.check.test');
+const wfCritique = claims.find(c => c.claimType === 'workflow.critique.review');
+const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
+if (!wfCheck) throw new Error('MISSING workflow.check.test after record-critique');
+if (!wfCritique) throw new Error('MISSING workflow.critique.review after record-critique');
+if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in no-flow session after round-trip: ' + builderClaims.map(c=>c.claimType).join(', '));
+console.log('after record-critique: workflow.check.test + workflow.critique.review, no builder.*');
+" 2>&1 \
+  && _pass "no-flow session: workflow.* only after record-critique round-trip (unchanged)" \
+  || _fail "no-flow session: unexpected claims after record-critique round-trip"
+# ─── TEST 3: evidenceClean/critiqueClean correct for builder.* bundle ──────────
+echo ""
+echo "=== 3. evidenceClean/critiqueClean correct for builder.* bundle ==="
+# Create a fresh builder.build session at verify step for dogfood-pass test
+DOGFOOD_AROOT="$TMP/dogfood-aroot"
+DOGFOOD_SLUG="dogfood-clean-test"
+DOGFOOD_DIR="$DOGFOOD_AROOT/$DOGFOOD_SLUG"
+mkdir -p "$DOGFOOD_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$DOGFOOD_AROOT" \
+  --task-slug "$DOGFOOD_SLUG" \
+  --title "Dogfood clean test" \
+  --summary "Test evidenceClean/critiqueClean on builder.build session." \
+  --flow-id builder.build \
+  --step-id verify \
+  --timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$DOGFOOD_DIR/$DOGFOOD_SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
+# Record pass evidence (produces builder.verify.tests declared claim, status=verified)
+flow_agents_node "$WRITER" record-evidence "$DOGFOOD_DIR" \
+  --verdict pass \
+  --check-json '{"id":"ev-check","kind":"test","status":"pass","summary":"Evidence check passes"}' \
+  --timestamp "2026-06-01T20:01:00Z" >/dev/null 2>&1
+# Record pass critique
+flow_agents_node "$WRITER" record-critique "$DOGFOOD_DIR" \
+  --id "ev-critique" \
+  --verdict pass \
+  --summary "Critique passed." \
+  --timestamp "2026-06-01T20:02:00Z" >/dev/null 2>&1
+# dogfood-pass --verdict pass should succeed: evidenceClean=true (builder.verify.tests passes)
+# and critiqueClean=true (builder.verify.policy-compliance passes — declared critique for verify step).
+flow_agents_node "$WRITER" dogfood-pass \
+  --artifact-root "$DOGFOOD_AROOT" \
+  --artifact-dir "$DOGFOOD_DIR" \
+  --verdict pass \
+  --check-json '{"id":"dogfood-ev-check","kind":"test","status":"pass","summary":"Dogfood evidence check"}' \
+  --summary "Dogfood pass for builder.build session." \
+  --timestamp "2026-06-01T20:03:00Z" >/dev/null 2>&1 \
+  && _pass "dogfood-pass succeeds: evidenceClean returns true for builder.verify.tests declared claim" \
+  || _fail "dogfood-pass FAILED: evidenceClean did not recognize builder.verify.tests as passing evidence"
+# Verify directly that the bundle has builder.verify.tests as the evidence claim
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$DOGFOOD_DIR/trust.bundle', 'utf8'));
+const claims = bundle.claims || [];
+const builderCheck = claims.find(c => c.claimType === 'builder.verify.tests' && c.value === 'pass');
+if (!builderCheck) throw new Error('MISSING builder.verify.tests (pass) in bundle; claims: ' + claims.map(c=>c.claimType+'='+c.value).join(', '));
+console.log('builder.verify.tests evidence claim present with value=pass, status=' + builderCheck.status);
+" 2>&1 \
+  && _pass "bundle contains builder.verify.tests with value=pass (declared claim recognized by evidenceClean)" \
+  || _fail "bundle missing builder.verify.tests with value=pass"
+# ─── Summary ──────────────────────────────────────────────────────────────────
+echo ""
+if [ "$errors" -eq 0 ]; then
+  echo "test_flowdef_session_history_preservation: all checks passed."
+  exit 0
+fi
+echo "test_flowdef_session_history_preservation: $errors check(s) FAILED."
+exit 1