npm - @kontourai/flow-agents - Versions diffs - 1.4.0 → 2.0.0 - Mend

@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

package/.github/CODEOWNERS +29 -0
package/.github/actions/trust-verify/action.yml +145 -0
package/.github/workflows/ci.yml +11 -4
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +10 -2
package/.github/workflows/release-please.yml +1 -1
package/.github/workflows/trust-reconcile.yml +113 -0
package/AGENTS.md +13 -0
package/CHANGELOG.md +95 -0
package/CONTRIBUTING.md +4 -4
package/README.md +1 -0
package/agents/tool-planner.json +1 -1
package/build/src/cli/init.js +242 -20
package/build/src/cli/validate-workflow-artifacts.js +19 -2
package/build/src/cli/verify.d.ts +1 -0
package/build/src/cli/verify.js +90 -0
package/build/src/cli/workflow-sidecar.d.ts +300 -8
package/build/src/cli/workflow-sidecar.js +1934 -83
package/build/src/cli.js +2 -3
package/build/src/lib/flow-resolver.d.ts +82 -0
package/build/src/lib/flow-resolver.js +237 -0
package/build/src/tools/build-universal-bundles.js +34 -22
package/build/src/tools/generate-context-map.js +3 -16
package/build/src/tools/validate-source-tree.d.ts +1 -1
package/build/src/tools/validate-source-tree.js +42 -162
package/context/contracts/artifact-contract.md +10 -0
package/context/contracts/delivery-contract.md +1 -0
package/context/contracts/review-contract.md +1 -0
package/context/contracts/verification-contract.md +2 -0
package/context/gate-awareness.md +39 -0
package/context/scripts/hooks/stop-goal-fit.js +632 -70
package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
package/docs/adr/0007-skill-audit.md +1 -1
package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
package/docs/adr/0011-mcp-posture.md +100 -0
package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
package/docs/adr/0013-context-lifecycle.md +151 -0
package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
package/docs/adr/0016-three-hard-boundary-model.md +71 -0
package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
package/docs/agent-system-guidebook.md +5 -12
package/docs/context-map.md +4 -10
package/docs/index.md +3 -2
package/docs/integrations/framework-adapter.md +19 -6
package/docs/integrations/index.md +2 -2
package/docs/north-star.md +4 -4
package/docs/operating-layers.md +3 -3
package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
package/docs/repository-structure.md +2 -2
package/docs/skills-map.md +1 -0
package/docs/spec/runtime-hook-surface.md +62 -9
package/docs/standards-register.md +3 -3
package/docs/survey-utterance-check.md +1 -1
package/docs/trust-anchor-adoption.md +197 -0
package/docs/verifiable-trust.md +95 -0
package/docs/veritas-integration.md +2 -2
package/docs/workflow-usage-guide.md +69 -0
package/evals/acceptance/DEMO-false-completion.md +144 -0
package/evals/acceptance/demo-cast.sh +92 -0
package/evals/acceptance/demo-false-completion.sh +72 -0
package/evals/acceptance/demo-real-evidence.sh +104 -0
package/evals/acceptance/demo.tape +29 -0
package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
package/evals/acceptance/prove-capture-teeth.sh +114 -0
package/evals/acceptance/prove-teeth.sh +105 -0
package/evals/ci/antigaming-suite.sh +54 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
package/evals/integration/test_builder_step_producers.sh +379 -0
package/evals/integration/test_bundle_install.sh +35 -71
package/evals/integration/test_bundle_lifecycle.sh +39 -2
package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
package/evals/integration/test_checkpoint_signing.sh +489 -0
package/evals/integration/test_claim_lookup.sh +352 -0
package/evals/integration/test_command_log_integrity.sh +275 -0
package/evals/integration/test_context_map.sh +0 -2
package/evals/integration/test_dual_emit_flow_step.sh +278 -0
package/evals/integration/test_enforcer_expects_driven.sh +281 -0
package/evals/integration/test_evidence_capture_hook.sh +185 -0
package/evals/integration/test_flow_kit_repository.sh +2 -0
package/evals/integration/test_flowdef_session_activation.sh +273 -0
package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
package/evals/integration/test_gate_bypass_chain.sh +448 -0
package/evals/integration/test_gate_lockdown.sh +1137 -0
package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
package/evals/integration/test_goal_fit_hook.sh +69 -4
package/evals/integration/test_goal_fit_rederive.sh +263 -0
package/evals/integration/test_install_merge.sh +1176 -0
package/evals/integration/test_mint_attestation.sh +373 -0
package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
package/evals/integration/test_publish_delivery.sh +269 -0
package/evals/integration/test_reconcile_soundness.sh +528 -0
package/evals/integration/test_resolvefirststep_security.sh +208 -0
package/evals/integration/test_session_resume_roundtrip.sh +286 -0
package/evals/integration/test_trust_checkpoint.sh +325 -0
package/evals/integration/test_trust_reconcile.sh +293 -0
package/evals/integration/test_verify_cli.sh +208 -0
package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
package/evals/lib/node.sh +0 -6
package/evals/run.sh +45 -0
package/evals/static/test_workflow_skills.sh +6 -13
package/install.sh +0 -7
package/integrations/strands-ts/README.md +25 -15
package/integrations/veritas/flow-agents.adapter.json +1 -2
package/kits/builder/flows/build.flow.json +59 -12
package/kits/builder/kit.json +85 -15
package/kits/builder/skills/continue-work/SKILL.md +116 -0
package/kits/builder/skills/deliver/SKILL.md +36 -6
package/kits/builder/skills/design-probe/SKILL.md +28 -0
package/kits/builder/skills/execute-plan/SKILL.md +9 -1
package/kits/builder/skills/gate-review/SKILL.md +234 -0
package/kits/builder/skills/learning-review/SKILL.md +30 -0
package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
package/kits/builder/skills/plan-work/SKILL.md +13 -1
package/kits/builder/skills/pull-work/SKILL.md +19 -0
package/kits/knowledge/adapters/default-store/index.js +38 -0
package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
package/kits/knowledge/docs/store-contract.md +314 -0
package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
package/kits/knowledge/evals/entities/suite.test.js +40 -0
package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
package/kits/knowledge/evals/retirement/suite.test.js +145 -0
package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
package/kits/knowledge/kit.json +51 -1
package/package.json +4 -4
package/packaging/conformance/README.md +10 -2
package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
package/packaging/conformance/run-conformance.js +1 -1
package/scripts/README.md +2 -1
package/scripts/build-universal-bundles.js +0 -1
package/scripts/ci/mint-attestation.js +221 -0
package/scripts/ci/trust-reconcile.js +545 -0
package/scripts/hooks/config-protection.js +423 -1
package/scripts/hooks/evidence-capture.js +348 -0
package/scripts/hooks/lib/liveness-read.js +113 -0
package/scripts/hooks/run-hook.js +6 -1
package/scripts/hooks/stop-goal-fit.js +1471 -79
package/scripts/hooks/workflow-steering.js +135 -5
package/scripts/install-codex-home.sh +39 -0
package/scripts/install-merge.js +330 -0
package/src/cli/init.ts +218 -20
package/src/cli/validate-workflow-artifacts.ts +18 -2
package/src/cli/verify.ts +100 -0
package/src/cli/workflow-sidecar.ts +2064 -77
package/src/cli.ts +2 -3
package/src/lib/flow-resolver.ts +284 -0
package/src/tools/build-universal-bundles.ts +34 -21
package/src/tools/generate-context-map.ts +3 -17
package/src/tools/validate-source-tree.ts +44 -104
package/build/src/tools/filter-installed-packs.d.ts +0 -2
package/build/src/tools/filter-installed-packs.js +0 -135
package/packaging/packs.json +0 -49
package/scripts/filter-installed-packs.js +0 -2
package/src/tools/filter-installed-packs.ts +0 -132

package/evals/integration/test_goal_fit_rederive.sh ADDED Viewed

@@ -0,0 +1,263 @@
+#!/usr/bin/env bash
+# test_goal_fit_rederive.sh — Killer test for ADR 0010 Phase 2b re-derive-at-gate hardening.
+#
+# Proves that:
+#   1. A TAMPERED trust.bundle (stored status "verified" but evidence re-derives to
+#      "disputed" because evidence[].passing === false) still BLOCKS (exit 2) and emits
+#      the "stored status does not match recompute (possible tampered bundle)" warning.
+#   2. A LEGITIMATE bundle (stored "verified" AND evidence re-derives to "verified") is
+#      ALLOWED (no false-block).
+#   3. The existing stored-status path still fires for a stored "disputed" claim (no
+#      regression from #133).
+#
+# Design: self-cleaning, deterministic (no model spend, no live commands).
+# Usage: bash evals/integration/test_goal_fit_rederive.sh
+set -uo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
+export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
+TMP="$(mktemp -d)"
+errors=0
+_pass() { echo "  ✓ $1"; }
+_fail() { echo "  ✗ $1"; errors=$((errors + 1)); }
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+# ─── helper: seed a minimal delivered workflow artifact ───────────────────────
+seed_repo() { # $1=dir $2=slug
+  local p="$1" slug="$2"
+  mkdir -p "$p/.flow-agents/$slug"
+  printf '# Repo\n' > "$p/AGENTS.md"
+  printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
+    > "$p/.flow-agents/$slug/state.json"
+  cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
+# $slug
+branch: main
+status: delivered
+type: deliver
+## Definition Of Done
+- [x] tests pass
+## Goal Fit Gate
+- [x] acceptance verified
+### Verdict: PASS
+MD
+}
+# ─── Test 1: TAMPERED bundle — stored "verified" but evidence re-derives to disputed ─
+# A trust.bundle where the agent wrote claim.status="verified" to bypass the gate,
+# but the evidence array has passing:false (a failing command result folded in by
+# buildTrustBundle). Surface's deriveClaimStatus must re-derive "disputed" from
+# that evidence, and the gate must block with a tamper warning.
+echo "Test 1: tampered bundle (stored verified, evidence→disputed) must BLOCK"
+TAMPER_DIR="$TMP/tamper"
+seed_repo "$TAMPER_DIR" "tampered"
+# Build a trust.bundle:
+# - claim.status = "verified"   (stored, tampered to look safe)
+# - evidence[passing=false]     (real command failed, fold in by sidecar)
+# Surface.deriveClaimStatus will see passing:false evidence and return "disputed".
+python3 - "$TAMPER_DIR/.flow-agents/tampered/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c1",
+        "subjectId": "tampered/unit-tests",
+        "subjectType": "workflow-check",
+        "claimType": "workflow.check.command",
+        "fieldOrBehavior": "unit tests",
+        "value": "pass",
+        "impactLevel": "high",
+        "status": "verified",   # tampered: agent edited this from "disputed" → "verified"
+        "createdAt": "2026-06-23T00:00:00Z",
+        "updatedAt": "2026-06-23T00:00:00Z"
+    }],
+    "evidence": [{
+        "id": "ev1",
+        "claimId": "c1",
+        "evidenceType": "test_output",
+        "method": "validation",
+        "sourceRef": "command-log.jsonl",
+        "excerptOrSummary": "npm test failed with exit 1",
+        "observedAt": "2026-06-23T00:00:00Z",
+        "collectedBy": "harness",
+        "passing": False,       # the actual command FAILED — surface sees this
+        "blocking": True
+    }],
+    "policies": [],
+    "events": [{
+        "id": "evt1",
+        "claimId": "c1",
+        "status": "verified",  # the event says verified (tampered)
+        "actor": "agent",
+        "method": "workflow-check",
+        "evidenceIds": ["ev1"],
+        "createdAt": "2026-06-23T00:00:00Z"
+    }]
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+# Run the gate in block mode.
+set +e
+result_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$TAMPER_DIR\"}")"
+result_exit="$?"
+set -e
+if [ "$result_exit" -eq 2 ]; then
+  _pass "tampered bundle blocks (exit 2)"
+else
+  _fail "tampered bundle did NOT block: exit=$result_exit output=$result_out"
+fi
+if echo "$result_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
+  _pass "tampered bundle emits tamper warning"
+else
+  _fail "tampered bundle missing tamper warning: $result_out"
+fi
+if echo "$result_out" | grep -q "caught false-completion"; then
+  _pass "tampered bundle emits caught false-completion"
+else
+  _fail "tampered bundle missing caught false-completion: $result_out"
+fi
+# ─── Test 2: LEGITIMATE bundle — stored "verified" AND evidence re-derives to "verified" ─
+# A bundle where both the stored status and the re-derived status agree on "verified"
+# (a passing:true evidence + a "verified" event). Must ALLOW (exit 0 in warn mode).
+echo ""
+echo "Test 2: legitimate bundle (stored verified, evidence→verified) must ALLOW"
+LEGIT_DIR="$TMP/legit"
+seed_repo "$LEGIT_DIR" "legit"
+python3 - "$LEGIT_DIR/.flow-agents/legit/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c2",
+        "subjectId": "legit/unit-tests",
+        "subjectType": "workflow-check",
+        "claimType": "workflow.check.command",
+        "fieldOrBehavior": "unit tests",
+        "value": "pass",
+        "impactLevel": "high",
+        "status": "verified",
+        "createdAt": "2026-06-23T00:00:00Z",
+        "updatedAt": "2026-06-23T00:00:00Z"
+    }],
+    "evidence": [{
+        "id": "ev2",
+        "claimId": "c2",
+        "evidenceType": "test_output",
+        "method": "validation",
+        "sourceRef": "command-log.jsonl",
+        "excerptOrSummary": "npm test passed",
+        "observedAt": "2026-06-23T00:00:00Z",
+        "collectedBy": "harness",
+        "passing": True,        # command genuinely passed
+        "blocking": False
+    }],
+    "policies": [],
+    "events": [{
+        "id": "evt2",
+        "claimId": "c2",
+        "status": "verified",
+        "actor": "agent",
+        "method": "workflow-check",
+        "evidenceIds": ["ev2"],
+        "createdAt": "2026-06-23T00:00:00Z"
+    }]
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+set +e
+legit_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$LEGIT_DIR\"}")"
+legit_exit="$?"
+set -e
+if [ "$legit_exit" -ne 2 ]; then
+  _pass "legitimate bundle not blocked (exit $legit_exit)"
+else
+  _fail "legitimate bundle false-blocked (exit 2): $legit_out"
+fi
+if echo "$legit_out" | grep -q "caught false-completion"; then
+  _fail "legitimate bundle incorrectly emits false-completion: $legit_out"
+else
+  _pass "legitimate bundle does not emit false-completion"
+fi
+# ─── Test 3: existing stored-disputed path still fires (no regression from #133) ──
+echo ""
+echo "Test 3: stored-disputed bundle must still BLOCK (no regression from #133)"
+STORED_DIR="$TMP/stored"
+seed_repo "$STORED_DIR" "stored"
+python3 - "$STORED_DIR/.flow-agents/stored/trust.bundle" << 'PY'
+import json, sys
+bundle = {
+    "schemaVersion": 3,
+    "source": "flow-agents/workflow-sidecar",
+    "claims": [{
+        "id": "c3",
+        "subjectId": "stored/unit-tests",
+        "subjectType": "workflow-check",
+        "claimType": "workflow.check.command",
+        "fieldOrBehavior": "unit tests",
+        "value": "fail",
+        "impactLevel": "high",
+        "status": "disputed",   # stored as disputed (not tampered — correctly flagged)
+        "createdAt": "2026-06-23T00:00:00Z",
+        "updatedAt": "2026-06-23T00:00:00Z"
+    }],
+    "evidence": [],
+    "policies": [],
+    "events": []
+}
+json.dump(bundle, open(sys.argv[1], 'w'))
+PY
+set +e
+stored_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
+    node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$STORED_DIR\"}")"
+stored_exit="$?"
+set -e
+if [ "$stored_exit" -eq 2 ]; then
+  _pass "stored-disputed bundle blocks (exit 2)"
+else
+  _fail "stored-disputed bundle did NOT block (exit $stored_exit): $stored_out"
+fi
+if echo "$stored_out" | grep -q "caught false-completion"; then
+  _pass "stored-disputed bundle emits caught false-completion"
+else
+  _fail "stored-disputed bundle missing caught false-completion: $stored_out"
+fi
+# ─── Summary ─────────────────────────────────────────────────────────────────
+echo ""
+if [ "$errors" -eq 0 ]; then
+  echo "Re-derive-at-gate hardening tests passed."
+  exit 0
+fi
+echo "Re-derive-at-gate hardening tests FAILED: $errors issue(s)."
+exit 1