npm - @kontourai/flow-agents - Versions diffs - 1.4.0 → 2.0.0 - Mend

@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

package/.github/CODEOWNERS +29 -0
package/.github/actions/trust-verify/action.yml +145 -0
package/.github/workflows/ci.yml +11 -4
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +10 -2
package/.github/workflows/release-please.yml +1 -1
package/.github/workflows/trust-reconcile.yml +113 -0
package/AGENTS.md +13 -0
package/CHANGELOG.md +95 -0
package/CONTRIBUTING.md +4 -4
package/README.md +1 -0
package/agents/tool-planner.json +1 -1
package/build/src/cli/init.js +242 -20
package/build/src/cli/validate-workflow-artifacts.js +19 -2
package/build/src/cli/verify.d.ts +1 -0
package/build/src/cli/verify.js +90 -0
package/build/src/cli/workflow-sidecar.d.ts +300 -8
package/build/src/cli/workflow-sidecar.js +1934 -83
package/build/src/cli.js +2 -3
package/build/src/lib/flow-resolver.d.ts +82 -0
package/build/src/lib/flow-resolver.js +237 -0
package/build/src/tools/build-universal-bundles.js +34 -22
package/build/src/tools/generate-context-map.js +3 -16
package/build/src/tools/validate-source-tree.d.ts +1 -1
package/build/src/tools/validate-source-tree.js +42 -162
package/context/contracts/artifact-contract.md +10 -0
package/context/contracts/delivery-contract.md +1 -0
package/context/contracts/review-contract.md +1 -0
package/context/contracts/verification-contract.md +2 -0
package/context/gate-awareness.md +39 -0
package/context/scripts/hooks/stop-goal-fit.js +632 -70
package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
package/docs/adr/0007-skill-audit.md +1 -1
package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
package/docs/adr/0011-mcp-posture.md +100 -0
package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
package/docs/adr/0013-context-lifecycle.md +151 -0
package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
package/docs/adr/0016-three-hard-boundary-model.md +71 -0
package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
package/docs/agent-system-guidebook.md +5 -12
package/docs/context-map.md +4 -10
package/docs/index.md +3 -2
package/docs/integrations/framework-adapter.md +19 -6
package/docs/integrations/index.md +2 -2
package/docs/north-star.md +4 -4
package/docs/operating-layers.md +3 -3
package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
package/docs/repository-structure.md +2 -2
package/docs/skills-map.md +1 -0
package/docs/spec/runtime-hook-surface.md +62 -9
package/docs/standards-register.md +3 -3
package/docs/survey-utterance-check.md +1 -1
package/docs/trust-anchor-adoption.md +197 -0
package/docs/verifiable-trust.md +95 -0
package/docs/veritas-integration.md +2 -2
package/docs/workflow-usage-guide.md +69 -0
package/evals/acceptance/DEMO-false-completion.md +144 -0
package/evals/acceptance/demo-cast.sh +92 -0
package/evals/acceptance/demo-false-completion.sh +72 -0
package/evals/acceptance/demo-real-evidence.sh +104 -0
package/evals/acceptance/demo.tape +29 -0
package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
package/evals/acceptance/prove-capture-teeth.sh +114 -0
package/evals/acceptance/prove-teeth.sh +105 -0
package/evals/ci/antigaming-suite.sh +54 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
package/evals/integration/test_builder_step_producers.sh +379 -0
package/evals/integration/test_bundle_install.sh +35 -71
package/evals/integration/test_bundle_lifecycle.sh +39 -2
package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
package/evals/integration/test_checkpoint_signing.sh +489 -0
package/evals/integration/test_claim_lookup.sh +352 -0
package/evals/integration/test_command_log_integrity.sh +275 -0
package/evals/integration/test_context_map.sh +0 -2
package/evals/integration/test_dual_emit_flow_step.sh +278 -0
package/evals/integration/test_enforcer_expects_driven.sh +281 -0
package/evals/integration/test_evidence_capture_hook.sh +185 -0
package/evals/integration/test_flow_kit_repository.sh +2 -0
package/evals/integration/test_flowdef_session_activation.sh +273 -0
package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
package/evals/integration/test_gate_bypass_chain.sh +448 -0
package/evals/integration/test_gate_lockdown.sh +1137 -0
package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
package/evals/integration/test_goal_fit_hook.sh +69 -4
package/evals/integration/test_goal_fit_rederive.sh +263 -0
package/evals/integration/test_install_merge.sh +1176 -0
package/evals/integration/test_mint_attestation.sh +373 -0
package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
package/evals/integration/test_publish_delivery.sh +269 -0
package/evals/integration/test_reconcile_soundness.sh +528 -0
package/evals/integration/test_resolvefirststep_security.sh +208 -0
package/evals/integration/test_session_resume_roundtrip.sh +286 -0
package/evals/integration/test_trust_checkpoint.sh +325 -0
package/evals/integration/test_trust_reconcile.sh +293 -0
package/evals/integration/test_verify_cli.sh +208 -0
package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
package/evals/lib/node.sh +0 -6
package/evals/run.sh +45 -0
package/evals/static/test_workflow_skills.sh +6 -13
package/install.sh +0 -7
package/integrations/strands-ts/README.md +25 -15
package/integrations/veritas/flow-agents.adapter.json +1 -2
package/kits/builder/flows/build.flow.json +59 -12
package/kits/builder/kit.json +85 -15
package/kits/builder/skills/continue-work/SKILL.md +116 -0
package/kits/builder/skills/deliver/SKILL.md +36 -6
package/kits/builder/skills/design-probe/SKILL.md +28 -0
package/kits/builder/skills/execute-plan/SKILL.md +9 -1
package/kits/builder/skills/gate-review/SKILL.md +234 -0
package/kits/builder/skills/learning-review/SKILL.md +30 -0
package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
package/kits/builder/skills/plan-work/SKILL.md +13 -1
package/kits/builder/skills/pull-work/SKILL.md +19 -0
package/kits/knowledge/adapters/default-store/index.js +38 -0
package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
package/kits/knowledge/docs/store-contract.md +314 -0
package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
package/kits/knowledge/evals/entities/suite.test.js +40 -0
package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
package/kits/knowledge/evals/retirement/suite.test.js +145 -0
package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
package/kits/knowledge/kit.json +51 -1
package/package.json +4 -4
package/packaging/conformance/README.md +10 -2
package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
package/packaging/conformance/run-conformance.js +1 -1
package/scripts/README.md +2 -1
package/scripts/build-universal-bundles.js +0 -1
package/scripts/ci/mint-attestation.js +221 -0
package/scripts/ci/trust-reconcile.js +545 -0
package/scripts/hooks/config-protection.js +423 -1
package/scripts/hooks/evidence-capture.js +348 -0
package/scripts/hooks/lib/liveness-read.js +113 -0
package/scripts/hooks/run-hook.js +6 -1
package/scripts/hooks/stop-goal-fit.js +1471 -79
package/scripts/hooks/workflow-steering.js +135 -5
package/scripts/install-codex-home.sh +39 -0
package/scripts/install-merge.js +330 -0
package/src/cli/init.ts +218 -20
package/src/cli/validate-workflow-artifacts.ts +18 -2
package/src/cli/verify.ts +100 -0
package/src/cli/workflow-sidecar.ts +2064 -77
package/src/cli.ts +2 -3
package/src/lib/flow-resolver.ts +284 -0
package/src/tools/build-universal-bundles.ts +34 -21
package/src/tools/generate-context-map.ts +3 -17
package/src/tools/validate-source-tree.ts +44 -104
package/build/src/tools/filter-installed-packs.d.ts +0 -2
package/build/src/tools/filter-installed-packs.js +0 -135
package/packaging/packs.json +0 -49
package/scripts/filter-installed-packs.js +0 -2
package/src/tools/filter-installed-packs.ts +0 -132

package/evals/integration/test_evidence_capture_hook.sh ADDED Viewed

@@ -0,0 +1,185 @@
+#!/usr/bin/env bash
+# test_evidence_capture_hook.sh — Capture-first evidence determinism contracts.
+#
+# Part A: evidence-capture.js deterministically records command executions to
+#         .flow-agents/<slug>/command-log.jsonl (machine-recorded, not model-claimed).
+# Part B: stop-goal-fit.js cross-references evidence.json claimed-pass command
+#         checks against the capture log, and re-runs a TRUSTED backstop command
+#         only when the log has no execution for a claimed-pass command.
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+CAPTURE="$ROOT/scripts/hooks/evidence-capture.js"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+# Disable the block escape hatch so repeated independent assertions never trip it.
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+# ---- helpers -------------------------------------------------------------
+seed_repo() { # $1 dir, $2 slug
+  local p="$1" slug="$2"
+  mkdir -p "$p/.flow-agents/$slug"
+  printf '# Repo\n' > "$p/AGENTS.md"
+  printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" > "$p/.flow-agents/$slug/state.json"
+  cat > "$p/.flow-agents/$slug/$slug--deliver.md" <<MD
+# $slug
+branch: main
+status: delivered
+type: deliver
+## Definition Of Done
+- [x] tests pass
+## Goal Fit Gate
+- [x] acceptance verified
+### Verdict: PASS
+MD
+}
+capture() { # stdin = payload json
+  node "$CAPTURE" >/dev/null 2>&1
+}
+# ============================================================================
+# Part A — deterministic capture
+# ============================================================================
+A="$TMP/capture"; seed_repo "$A" t1
+echo "Part A: deterministic capture"
+printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0,"stdout":"ok"}}' "$A" | capture
+printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run lint"},"error":"command failed"}' "$A" | capture
+printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"make build"},"tool_response":{"exit_code":2}}' "$A" | capture
+# A non-command tool (Write) must NOT be captured.
+printf '{"hook_event_name":"PostToolUse","tool_name":"Write","cwd":"%s","tool_input":{"file_path":"/tmp/x"}}' "$A" | capture
+LOG="$A/.flow-agents/t1/command-log.jsonl"
+if [[ -f "$LOG" ]]; then _pass "capture writes command-log.jsonl"; else _fail "capture did not write command-log.jsonl"; fi
+lines=$(wc -l < "$LOG" | tr -d ' ')
+if [[ "$lines" == "3" ]]; then _pass "capture records 3 command executions (Write tool excluded)"; else _fail "expected 3 log lines, got $lines"; fi
+if rg -q '"command":"npm test","observedResult":"pass","exitCode":0' "$LOG"; then
+  _pass "clean exit 0 recorded as observedResult:pass exitCode:0"
+else _fail "passing command not recorded correctly: $(cat "$LOG")"; fi
+if rg -q '"command":"npm run lint","observedResult":"fail","exitCode":null' "$LOG"; then
+  _pass "error field with no exit code recorded as fail exitCode:null"
+else _fail "errored command not recorded correctly"; fi
+if rg -q '"command":"make build","observedResult":"fail","exitCode":2' "$LOG"; then
+  _pass "non-zero exit recorded as fail with exitCode"
+else _fail "non-zero-exit command not recorded correctly"; fi
+if rg -q '"source":"postToolUse-capture"' "$LOG"; then _pass "records source:postToolUse-capture"; else _fail "missing source field"; fi
+# Capture is non-blocking: it always exits 0 and echoes stdin.
+out=$(printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"echo hi"},"error":"boom"}' "$A" | node "$CAPTURE"; echo "EXIT=$?")
+if rg -q 'EXIT=0' <<<"$out" && rg -q 'echo hi' <<<"$out"; then
+  _pass "capture is non-blocking (exit 0, echoes stdin) even on a failing command"
+else _fail "capture should be non-blocking and echo stdin"; fi
+# ============================================================================
+# Part B1 — gate cross-references log: claimed pass but log shows FAIL → block
+# ============================================================================
+echo "Part B1: log contradicts claimed pass → block"
+B="$TMP/contradict"; seed_repo "$B" t1
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$B/.flow-agents/t1/evidence.json"
+printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$B/.flow-agents/t1/command-log.jsonl"
+if FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$GATE" >/dev/null 2>"$TMP/b1.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$B"}
+JSON
+then _fail "gate should BLOCK when capture log contradicts claimed pass"
+else
+  status=$?
+  if [[ "$status" -eq 2 ]] && rg -q 'capture log CONTRADICTS claimed pass' "$TMP/b1.err" && rg -q 'caught false-completion' "$TMP/b1.err"; then
+    _pass "gate blocks (exit 2) caught false-completion via capture log"
+  else _fail "gate returned unexpected result: status=$status output=$(cat "$TMP/b1.err")"; fi
+fi
+# ============================================================================
+# Part B2 — gate cross-references log: claimed pass and log shows PASS → no re-run
+# ============================================================================
+echo "Part B2: log confirms claimed pass → satisfied, no re-run"
+C="$TMP/confirm"; seed_repo "$C" t1
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$C/.flow-agents/t1/evidence.json"
+printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$C/.flow-agents/t1/command-log.jsonl"
+# A poisoned npm on PATH proves the gate does NOT re-run when the log confirms.
+POISON="$TMP/poison"; mkdir -p "$POISON"
+printf '#!/usr/bin/env bash\necho "npm should not run" >&2\nexit 99\n' > "$POISON/npm"; chmod +x "$POISON/npm"
+PATH="$POISON:$PATH" FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b2.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$C"}
+JSON
+if rg -q 'CONTRADICTS|backstop|npm should not run' "$TMP/b2.err"; then
+  _fail "gate should NOT re-run or warn when the capture log confirms the pass: $(cat "$TMP/b2.err")"
+else _pass "gate trusts the log on a confirmed pass and does not re-run the backstop"; fi
+# ============================================================================
+# Part B3 — never-captured claimed-pass command → trusted backstop re-run (declared manifest target FAILS) → block
+# ============================================================================
+echo "Part B3: never-captured claim → trusted manifest backstop catches a fail"
+D="$TMP/backstop"; seed_repo "$D" t1
+printf '%s' '{"name":"x","scripts":{"test":"exit 7"}}' > "$D/package.json"
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$D/.flow-agents/t1/evidence.json"
+# command-log.jsonl intentionally absent — the command was never actually run.
+if FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b3.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$D"}
+JSON
+then _fail "gate should BLOCK when trusted backstop re-run of declared manifest target fails"
+else
+  status=$?
+  if [[ "$status" -eq 2 ]] && rg -q 'trusted backstop \(manifest\)' "$TMP/b3.err" && rg -q 'FAILED with exit 7' "$TMP/b3.err"; then
+    _pass "gate runs trusted declared manifest target as backstop and blocks on its failure"
+  else _fail "backstop did not catch declared-target failure: status=$status output=$(cat "$TMP/b3.err")"; fi
+fi
+# ============================================================================
+# Part B4 — never-captured claim, no trusted command resolves → NOT_VERIFIED (never a silent pass)
+# ============================================================================
+echo "Part B4: never-captured claim, nothing trusted resolves → NOT_VERIFIED"
+E="$TMP/notverified"; seed_repo "$E" t1
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"custom","kind":"command","status":"pass","command":"./my-thing.sh","summary":"ran custom"}]}' > "$E/.flow-agents/t1/evidence.json"
+if FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b4.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$E"}
+JSON
+then _fail "gate should not silently pass an un-captured, un-verifiable claimed-pass command"
+else
+  status=$?
+  if [[ "$status" -eq 2 ]] && rg -q 'NOT_VERIFIED' "$TMP/b4.err" && rg -q 'no trusted command' "$TMP/b4.err"; then
+    _pass "gate records NOT_VERIFIED (never a guess) when no trusted command resolves"
+  else _fail "NOT_VERIFIED path returned unexpected result: status=$status output=$(cat "$TMP/b4.err")"; fi
+fi
+# ============================================================================
+# Part B5 — arbitrary model command is opt-in only (FLOW_AGENTS_GOAL_FIT_RECHECK)
+# ============================================================================
+echo "Part B5: free-form model command re-run is opt-in only"
+F="$TMP/recheck"; seed_repo "$F" t1
+printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"custom","kind":"command","status":"pass","command":"exit 5","summary":"ran custom"}]}' > "$F/.flow-agents/t1/evidence.json"
+# Opt-in ON: the model's free-form "exit 5" is re-run and fails → block.
+if FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_RECHECK=true node "$GATE" >/dev/null 2>"$TMP/b5.err" <<JSON
+{"hook_event_name":"Stop","cwd":"$F"}
+JSON
+then _fail "with RECHECK=true the failing model command should block"
+else
+  status=$?
+  if [[ "$status" -eq 2 ]] && rg -q 'FLOW_AGENTS_GOAL_FIT_RECHECK' "$TMP/b5.err"; then
+    _pass "FLOW_AGENTS_GOAL_FIT_RECHECK=true opts into re-running the model's free-form command"
+  else _fail "recheck opt-in path returned unexpected result: status=$status output=$(cat "$TMP/b5.err")"; fi
+fi
+if [[ "$errors" -eq 0 ]]; then
+  echo "Evidence capture hook integration passed."
+  exit 0
+fi
+echo "Evidence capture hook integration failed: $errors issue(s)."
+exit 1

package/evals/integration/test_flow_kit_repository.sh CHANGED Viewed

@@ -53,6 +53,7 @@ expect_fail() {
 echo "=== Flow Kit Repository Fixture Checks ==="
 expect_pass "valid-local-kit"
+expect_pass "valid-unknown-extension"
 expect_fail "invalid-schema-version" '\.schema_version must be "1\.0"'
 expect_fail "invalid-missing-schema-version" '\.schema_version must be "1\.0"'
 expect_fail "invalid-id" '\.id must be a kebab-case string'
@@ -63,6 +64,7 @@ expect_fail "invalid-absolute-path" 'flows\[0\]\.path must be relative'
 expect_fail "invalid-traversal" "flows\\[0\\]\\.path must not contain"
 expect_fail "invalid-malformed-json" 'invalid JSON'
 expect_fail "invalid-asset-section" '\.docs must be a list'
+expect_fail "invalid-missing-extension-asset" 'docs\[0\]\.path points at missing asset'
 expect_fail "invalid-duplicate-flow" "flows\\[1\\]\\.path duplicates"
 echo ""

package/evals/integration/test_flowdef_session_activation.sh ADDED Viewed

@@ -0,0 +1,273 @@
+#!/usr/bin/env bash
+# test_flowdef_session_activation.sh — Integration eval for ADR 0016 Step 1.
+#
+# Proves that ensure-session --flow-id builder.build activates the FlowDefinition-
+# driven path so producers fire, gates enforce on builder.* claims, and advance-state
+# correctly sets active_step_id via the phase_map at each phase.
+#
+# Tests:
+#   1. ensure-session --flow-id builder.build writes active_flow_id + default
+#      active_step_id (pull-work) to current.json.
+#   2. advance-state through phases (planning→execution→verification) sets correct
+#      active_step_id via phase_map at each transition.
+#   3. At the verify step, record-gate-claim for tests-evidence produces
+#      builder.verify.tests (status=verified) in the bundle — producer fires.
+#   4. A TAMPERED builder.verify.tests bundle at the verify step BLOCKS (exit 2)
+#      with the tamper warning naming the declared claimType.
+#   5. Fallback: session without --flow-id produces only workflow.* claims (the
+#      retained safety net for non-flow sessions).
+#
+# Deterministic, no model spend, self-cleaning.
+# Usage: bash evals/integration/test_flowdef_session_activation.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "$ROOT/evals/lib/node.sh"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+WRITER="workflow-sidecar"
+# ─── TEST 1: ensure-session --flow-id activates the flow ─────────────────────
+echo ""
+echo "=== 1. ensure-session --flow-id builder.build activates FlowDefinition-driven path ==="
+MAIN_AROOT="$TMP/main-aroot"
+SLUG="activation-test"
+SESSION_DIR="$MAIN_AROOT/$SLUG"
+mkdir -p "$MAIN_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$MAIN_AROOT" \
+  --task-slug "$SLUG" \
+  --title "Step 1 activation test" \
+  --summary "Test that --flow-id builder.build activates the FlowDefinition-driven path." \
+  --criterion "All gates produce declared claims" \
+  --flow-id builder.build \
+  --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
+node -e "
+const fs = require('fs');
+const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
+if (c.active_flow_id !== 'builder.build') throw new Error('expected active_flow_id=builder.build, got ' + c.active_flow_id);
+if (!c.active_step_id) throw new Error('expected active_step_id to be set (first step default), got ' + c.active_step_id);
+console.log('current.json: active_flow_id=' + c.active_flow_id + ' active_step_id=' + c.active_step_id);
+" 2>&1 \
+  && _pass "ensure-session --flow-id builder.build writes active_flow_id + default active_step_id to current.json" \
+  || _fail "ensure-session --flow-id builder.build did NOT write active_flow_id to current.json"
+# ─── TEST 2: advance-state sets active_step_id via phase_map ─────────────────
+echo ""
+echo "=== 2. advance-state through phases sets active_step_id via phase_map ==="
+flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing" \
+  --timestamp "2026-06-01T00:00:30Z" >/dev/null 2>&1
+test_phase_step() {
+  local phase="$1" expected_step="$2"
+  flow_agents_node "$WRITER" advance-state "$SESSION_DIR" \
+    --status in_progress --phase "$phase" \
+    --summary "Testing phase $phase." \
+    --next-action "Continue." \
+    --flow-definition builder.build \
+    --timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
+  local actual
+  actual=$(node -e "
+    const fs = require('fs');
+    const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
+    console.log(c.active_step_id || '');
+  " 2>/dev/null)
+  if [ "$actual" = "$expected_step" ]; then
+    _pass "advance-state phase=$phase → active_step_id=$expected_step"
+  else
+    _fail "advance-state phase=$phase → got active_step_id=$actual (expected $expected_step)"
+  fi
+}
+test_phase_step "planning"     "plan"
+test_phase_step "execution"    "execute"
+test_phase_step "verification" "verify"
+# ─── TEST 3: at verify step, record-gate-claim produces builder.verify.tests ──
+echo ""
+echo "=== 3. verify step: producer fires — record-gate-claim produces builder.verify.tests ==="
+if flow_agents_node "$WRITER" record-gate-claim "$SESSION_DIR" \
+  --status pass \
+  --summary "All tests pass." \
+  --expectation "tests-evidence" \
+  --timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1; then
+  _pass "record-gate-claim at verify step succeeds (expectation=tests-evidence)"
+else
+  _fail "record-gate-claim at verify step FAILED"
+fi
+node -e "
+const fs = require('fs');
+const bundlePath = '$SESSION_DIR/trust.bundle';
+if (!fs.existsSync(bundlePath)) throw new Error('trust.bundle not found');
+const bundle = JSON.parse(fs.readFileSync(bundlePath, 'utf8'));
+const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
+if (!declared) throw new Error('MISSING builder.verify.tests; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
+if (declared.status !== 'verified') throw new Error('expected status=verified, got ' + declared.status);
+console.log('builder.verify.tests: subjectType=' + declared.subjectType + ' status=' + declared.status + ' value=' + declared.value);
+" 2>&1 \
+  && _pass "bundle contains builder.verify.tests (subjectType=flow-step, status=verified, value=pass)" \
+  || _fail "bundle missing or incorrect builder.verify.tests claim"
+# ─── TEST 4: tampered bundle at verify step BLOCKS ────────────────────────────
+echo ""
+echo "=== 4. tamper-blocks: builder.verify.tests — tampered bundle triggers gate exit 2 ==="
+TAMPER_DIR="$TMP/tamper-verify"
+TAMPER_SLUG="tamper-verify-test"
+mkdir -p "$TAMPER_DIR"
+printf '# Test repo\n' > "$TAMPER_DIR/AGENTS.md"
+mkdir -p "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$TAMPER_DIR/.flow-agents" \
+  --task-slug "$TAMPER_SLUG" \
+  --title "Tamper verify test" \
+  --summary "Testing tamper detection at verify step." \
+  --flow-id builder.build \
+  --step-id verify \
+  --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/$TAMPER_SLUG--deliver.md" \
+  --source-request "Test" --summary "Tamper test" \
+  --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" advance-state "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG" \
+  --status in_progress --phase verification \
+  --summary "At verify." --next-action "Continue." \
+  --flow-definition builder.build \
+  --timestamp "2026-06-01T02:00:30Z" >/dev/null 2>&1
+# Write TAMPERED trust.bundle: stored verified, evidence passing=false
+python3 - "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c1",
+        "subjectId": "tamper-verify-test/verify-tests",
+        "subjectType": "flow-step",
+        "claimType": "builder.verify.tests",
+        "fieldOrBehavior": "Tests pass",
+        "value": "pass",
+        "impactLevel": "high",
+        "status": "verified",
+        "createdAt": "2026-06-01T02:00:00Z",
+        "updatedAt": "2026-06-01T02:00:00Z"
+    }],
+    "evidence": [{
+        "id": "ev1",
+        "claimId": "c1",
+        "evidenceType": "test_output",
+        "method": "validation",
+        "sourceRef": "command-log.jsonl",
+        "excerptOrSummary": "tests FAILED",
+        "observedAt": "2026-06-01T02:00:00Z",
+        "collectedBy": "harness",
+        "passing": False,
+        "blocking": True
+    }],
+    "policies": [],
+    "events": [{
+        "id": "evt1",
+        "claimId": "c1",
+        "status": "verified",
+        "actor": "agent",
+        "method": "workflow-check",
+        "evidenceIds": ["ev1"],
+        "createdAt": "2026-06-01T02:00:00Z"
+    }]
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+set +e
+tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$TAMPER_DIR\"}")"
+tamper_exit="$?"
+set -e
+if [ "$tamper_exit" -eq 2 ]; then
+  _pass "gate BLOCKS tampered builder.verify.tests bundle (exit 2)"
+else
+  _fail "gate did NOT block tampered bundle: exit=$tamper_exit"
+fi
+if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle|caught false-completion"; then
+  _pass "gate emits tamper warning for builder.verify.tests"
+else
+  _fail "gate tamper warning missing from output: $tamper_out"
+fi
+if echo "$tamper_out" | grep -q "builder.verify.tests"; then
+  _pass "gate tamper warning names declared claimType builder.verify.tests"
+else
+  _fail "gate tamper warning does not name builder.verify.tests: $tamper_out"
+fi
+# ─── TEST 5: Fallback — session without --flow-id (workflow.* only, safety net) ─
+echo ""
+echo "=== 5. Fallback: session without --flow-id produces only workflow.* claims (safety net intact) ==="
+FALLBACK_AROOT="$TMP/fallback-aroot"
+FALLBACK_SLUG="fallback-test"
+FALLBACK_DIR="$FALLBACK_AROOT/$FALLBACK_SLUG"
+mkdir -p "$FALLBACK_AROOT"
+flow_agents_node "$WRITER" ensure-session \
+  --artifact-root "$FALLBACK_AROOT" \
+  --task-slug "$FALLBACK_SLUG" \
+  --title "Fallback no-flow test" \
+  --summary "No --flow-id: workflow.* fallback is the safety net for non-flow sessions." \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" init-plan "$FALLBACK_DIR/$FALLBACK_SLUG--deliver.md" \
+  --source-request "Test" --summary "Testing fallback." \
+  --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
+flow_agents_node "$WRITER" record-evidence "$FALLBACK_DIR" \
+  --verdict pass \
+  --check-json '{"id":"fallback-check","kind":"test","status":"pass","summary":"Fallback test passes"}' \
+  --timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
+node -e "
+const fs = require('fs');
+const bundle = JSON.parse(fs.readFileSync('$FALLBACK_DIR/trust.bundle', 'utf8'));
+const claims = bundle.claims || [];
+const wfClaim = claims.find(c => c.claimType === 'workflow.check.test');
+const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
+if (!wfClaim) throw new Error('MISSING workflow.check.test in fallback session');
+if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in fallback session: ' + builderClaims.map(c=>c.claimType).join(', '));
+if (wfClaim.id.endsWith('-legacy')) throw new Error('workflow.check.test should not have -legacy suffix when no flow active');
+console.log('fallback: only workflow.check.test present (no builder.* claims, no -legacy suffix)');
+" 2>&1 \
+  && _pass "fallback (no --flow-id): only workflow.check.test produced, builder.* absent (producers dormant)" \
+  || _fail "fallback (no --flow-id): unexpected claims in trust.bundle"
+# ─── Summary ──────────────────────────────────────────────────────────────────
+echo ""
+if [ "$errors" -eq 0 ]; then
+  echo "test_flowdef_session_activation: all checks passed."
+  exit 0
+fi
+echo "test_flowdef_session_activation: $errors check(s) FAILED."
+exit 1