@kontourai/flow-agents 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +95 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +300 -8
- package/build/src/cli/workflow-sidecar.js +1934 -83
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +45 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +4 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2064 -77
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_flowdef_session_history_preservation.sh — Integration eval for ADR 0016 Step 0.
|
|
3
|
+
#
|
|
4
|
+
# Proves:
|
|
5
|
+
# 1. A FlowDefinition-driven session (ensure-session --flow-id builder.build, step=verify)
|
|
6
|
+
# records a check via the declared builder.verify.tests path, then record-critique and
|
|
7
|
+
# record-learning PRESERVE the prior declared check + critique claims in the rebuilt
|
|
8
|
+
# bundle (no history loss).
|
|
9
|
+
# 2. A workflow.* session (no --flow-id) record-critique/record-learning round-trip is
|
|
10
|
+
# UNCHANGED — only workflow.check.* and workflow.critique.review claims survive.
|
|
11
|
+
# 3. evidenceClean/critiqueClean return correct results for a builder.* bundle:
|
|
12
|
+
# checked by running dogfood-pass --verdict pass on a clean builder.build session.
|
|
13
|
+
#
|
|
14
|
+
# Deterministic, no model spend, self-cleaning.
|
|
15
|
+
# Usage: bash evals/integration/test_flowdef_session_history_preservation.sh
|
|
16
|
+
|
|
17
|
+
set -uo pipefail
|
|
18
|
+
|
|
19
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
20
|
+
source "$ROOT/evals/lib/node.sh"
|
|
21
|
+
|
|
22
|
+
TMP="$(mktemp -d)"
|
|
23
|
+
errors=0
|
|
24
|
+
|
|
25
|
+
_pass() { echo " ✓ $1"; }
|
|
26
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
27
|
+
|
|
28
|
+
cleanup() { rm -rf "$TMP"; }
|
|
29
|
+
trap cleanup EXIT
|
|
30
|
+
|
|
31
|
+
WRITER="workflow-sidecar"
|
|
32
|
+
|
|
33
|
+
# ─── TEST 1: FlowDefinition-driven session round-trip (no history loss) ────────
|
|
34
|
+
echo ""
|
|
35
|
+
echo "=== 1. FlowDefinition-driven session: record-critique/record-learning preserve declared claims ==="
|
|
36
|
+
|
|
37
|
+
FLOW_AROOT="$TMP/flow-aroot"
|
|
38
|
+
SLUG="history-flow-test"
|
|
39
|
+
SESSION_DIR="$FLOW_AROOT/$SLUG"
|
|
40
|
+
mkdir -p "$FLOW_AROOT"
|
|
41
|
+
|
|
42
|
+
# Create a FlowDefinition-driven session at the verify step (builder.verify.tests is declared)
|
|
43
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
44
|
+
--artifact-root "$FLOW_AROOT" \
|
|
45
|
+
--task-slug "$SLUG" \
|
|
46
|
+
--title "History preservation test" \
|
|
47
|
+
--summary "Test that declared builder.* claims survive round-trips." \
|
|
48
|
+
--flow-id builder.build \
|
|
49
|
+
--step-id verify \
|
|
50
|
+
--timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
|
|
51
|
+
|
|
52
|
+
flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
|
|
53
|
+
--source-request "Test" --summary "Testing" \
|
|
54
|
+
--timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
|
|
55
|
+
|
|
56
|
+
# Record a passing check (produces ONLY builder.verify.tests declared claim — no legacy shadow, P-d)
|
|
57
|
+
flow_agents_node "$WRITER" record-evidence "$SESSION_DIR" \
|
|
58
|
+
--verdict pass \
|
|
59
|
+
--check-json '{"id":"unit-tests","kind":"test","status":"pass","summary":"Unit tests pass"}' \
|
|
60
|
+
--timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
|
|
61
|
+
|
|
62
|
+
# Verify declared claim is in bundle before round-trip
|
|
63
|
+
node -e "
|
|
64
|
+
const fs = require('fs');
|
|
65
|
+
const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
|
|
66
|
+
const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
|
|
67
|
+
if (!declared) throw new Error('MISSING builder.verify.tests before round-trip; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
|
|
68
|
+
console.log('before round-trip: builder.verify.tests status=' + declared.status);
|
|
69
|
+
" 2>&1 \
|
|
70
|
+
&& _pass "builder.verify.tests declared claim present before round-trip" \
|
|
71
|
+
|| _fail "builder.verify.tests declared claim MISSING before round-trip"
|
|
72
|
+
|
|
73
|
+
# Now do record-critique (the round-trip: checksFromBundle + critiquesFromBundle rebuild)
|
|
74
|
+
flow_agents_node "$WRITER" record-critique "$SESSION_DIR" \
|
|
75
|
+
--id "code-review" \
|
|
76
|
+
--verdict pass \
|
|
77
|
+
--summary "Code review passed." \
|
|
78
|
+
--timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1
|
|
79
|
+
|
|
80
|
+
# Assert builder.verify.tests survived the record-critique round-trip
|
|
81
|
+
node -e "
|
|
82
|
+
const fs = require('fs');
|
|
83
|
+
const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
|
|
84
|
+
const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
|
|
85
|
+
if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-critique; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
|
|
86
|
+
console.log('after record-critique: builder.verify.tests status=' + declared.status);
|
|
87
|
+
" 2>&1 \
|
|
88
|
+
&& _pass "builder.verify.tests declared claim preserved after record-critique (no history loss)" \
|
|
89
|
+
|| _fail "builder.verify.tests declared claim LOST after record-critique (history loss)"
|
|
90
|
+
|
|
91
|
+
# Also verify the critique claim itself is present.
|
|
92
|
+
# In a flow-driven session (verify step), critique maps to the declared builder.verify.policy-compliance
|
|
93
|
+
# (the critique heuristic matches: subjectType=artifact + claimType contains "compliance").
|
|
94
|
+
# workflow.critique.review is emitted in no-flow sessions only (P-d: shadow retired).
|
|
95
|
+
node -e "
|
|
96
|
+
const fs = require('fs');
|
|
97
|
+
const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
|
|
98
|
+
const claims = bundle.claims || [];
|
|
99
|
+
// Declared critique claim for verify-step: builder.verify.policy-compliance
|
|
100
|
+
const crit = claims.find(c => c.claimType === 'builder.verify.policy-compliance');
|
|
101
|
+
if (!crit) throw new Error('MISSING builder.verify.policy-compliance critique claim after record-critique; claims: ' + claims.map(c=>c.claimType).join(', '));
|
|
102
|
+
// Must NOT have workflow.critique.review in a flow-driven session (no shadow, P-d)
|
|
103
|
+
const legacy = claims.find(c => c.claimType === 'workflow.critique.review');
|
|
104
|
+
if (legacy) throw new Error('UNEXPECTED workflow.critique.review in flow-driven session (P-d retired shadow); id=' + legacy.id);
|
|
105
|
+
console.log('declared critique claim: claimType=' + crit.claimType + ' value=' + crit.value);
|
|
106
|
+
" 2>&1 \
|
|
107
|
+
&& _pass "builder.verify.policy-compliance declared critique claim present (no workflow.critique.review shadow, P-d)" \
|
|
108
|
+
|| _fail "declared critique claim MISSING or unexpected workflow.critique.review found after record-critique"
|
|
109
|
+
|
|
110
|
+
# Now do record-learning (second round-trip)
|
|
111
|
+
flow_agents_node "$WRITER" record-learning "$SESSION_DIR" \
|
|
112
|
+
--status learned \
|
|
113
|
+
--record-json '{
|
|
114
|
+
"outcome": "success",
|
|
115
|
+
"source_refs": [],
|
|
116
|
+
"facts": ["Tests passed clean."],
|
|
117
|
+
"routing": [{"target":"none","status":"completed","summary":"No routing needed."}],
|
|
118
|
+
"correction": {"needed": false, "evidence": "All checks passed cleanly."}
|
|
119
|
+
}' \
|
|
120
|
+
--summary "Learning recorded." \
|
|
121
|
+
--timestamp "2026-06-01T00:03:00Z" >/dev/null 2>&1
|
|
122
|
+
|
|
123
|
+
# Assert builder.verify.tests survived the record-learning round-trip
|
|
124
|
+
node -e "
|
|
125
|
+
const fs = require('fs');
|
|
126
|
+
const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
|
|
127
|
+
const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
|
|
128
|
+
if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-learning; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
|
|
129
|
+
console.log('after record-learning: builder.verify.tests status=' + declared.status);
|
|
130
|
+
" 2>&1 \
|
|
131
|
+
&& _pass "builder.verify.tests declared claim preserved after record-learning (no history loss)" \
|
|
132
|
+
|| _fail "builder.verify.tests declared claim LOST after record-learning (history loss)"
|
|
133
|
+
|
|
134
|
+
# ─── TEST 2: workflow.* session round-trip is UNCHANGED ────────────────────────
|
|
135
|
+
echo ""
|
|
136
|
+
echo "=== 2. workflow.* session (no --flow-id): round-trip unchanged ==="
|
|
137
|
+
|
|
138
|
+
NOFLOW_AROOT="$TMP/noflow-aroot"
|
|
139
|
+
NOFLOW_SLUG="history-noflow-test"
|
|
140
|
+
NOFLOW_DIR="$NOFLOW_AROOT/$NOFLOW_SLUG"
|
|
141
|
+
mkdir -p "$NOFLOW_AROOT"
|
|
142
|
+
|
|
143
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
144
|
+
--artifact-root "$NOFLOW_AROOT" \
|
|
145
|
+
--task-slug "$NOFLOW_SLUG" \
|
|
146
|
+
--title "No-flow session history test" \
|
|
147
|
+
--summary "Baseline: no FlowDefinition. Round-trip must be unchanged." \
|
|
148
|
+
--timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
|
|
149
|
+
|
|
150
|
+
flow_agents_node "$WRITER" init-plan "$NOFLOW_DIR/$NOFLOW_SLUG--deliver.md" \
|
|
151
|
+
--source-request "Test" --summary "Testing" \
|
|
152
|
+
--timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
|
|
153
|
+
|
|
154
|
+
# Record a check (produces only workflow.check.test — no declared claims)
|
|
155
|
+
flow_agents_node "$WRITER" record-evidence "$NOFLOW_DIR" \
|
|
156
|
+
--verdict pass \
|
|
157
|
+
--check-json '{"id":"noflow-unit-tests","kind":"test","status":"pass","summary":"No-flow tests pass"}' \
|
|
158
|
+
--timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
|
|
159
|
+
|
|
160
|
+
# record-critique round-trip
|
|
161
|
+
flow_agents_node "$WRITER" record-critique "$NOFLOW_DIR" \
|
|
162
|
+
--id "noflow-review" \
|
|
163
|
+
--verdict pass \
|
|
164
|
+
--summary "Review passed." \
|
|
165
|
+
--timestamp "2026-06-01T10:02:00Z" >/dev/null 2>&1
|
|
166
|
+
|
|
167
|
+
# Assert only workflow.* claims survived (no builder.* contamination)
|
|
168
|
+
node -e "
|
|
169
|
+
const fs = require('fs');
|
|
170
|
+
const bundle = JSON.parse(fs.readFileSync('$NOFLOW_DIR/trust.bundle', 'utf8'));
|
|
171
|
+
const claims = bundle.claims || [];
|
|
172
|
+
const wfCheck = claims.find(c => c.claimType === 'workflow.check.test');
|
|
173
|
+
const wfCritique = claims.find(c => c.claimType === 'workflow.critique.review');
|
|
174
|
+
const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
|
|
175
|
+
if (!wfCheck) throw new Error('MISSING workflow.check.test after record-critique');
|
|
176
|
+
if (!wfCritique) throw new Error('MISSING workflow.critique.review after record-critique');
|
|
177
|
+
if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in no-flow session after round-trip: ' + builderClaims.map(c=>c.claimType).join(', '));
|
|
178
|
+
console.log('after record-critique: workflow.check.test + workflow.critique.review, no builder.*');
|
|
179
|
+
" 2>&1 \
|
|
180
|
+
&& _pass "no-flow session: workflow.* only after record-critique round-trip (unchanged)" \
|
|
181
|
+
|| _fail "no-flow session: unexpected claims after record-critique round-trip"
|
|
182
|
+
|
|
183
|
+
# ─── TEST 3: evidenceClean/critiqueClean correct for builder.* bundle ──────────
|
|
184
|
+
echo ""
|
|
185
|
+
echo "=== 3. evidenceClean/critiqueClean correct for builder.* bundle ==="
|
|
186
|
+
|
|
187
|
+
# Create a fresh builder.build session at verify step for dogfood-pass test
|
|
188
|
+
DOGFOOD_AROOT="$TMP/dogfood-aroot"
|
|
189
|
+
DOGFOOD_SLUG="dogfood-clean-test"
|
|
190
|
+
DOGFOOD_DIR="$DOGFOOD_AROOT/$DOGFOOD_SLUG"
|
|
191
|
+
mkdir -p "$DOGFOOD_AROOT"
|
|
192
|
+
|
|
193
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
194
|
+
--artifact-root "$DOGFOOD_AROOT" \
|
|
195
|
+
--task-slug "$DOGFOOD_SLUG" \
|
|
196
|
+
--title "Dogfood clean test" \
|
|
197
|
+
--summary "Test evidenceClean/critiqueClean on builder.build session." \
|
|
198
|
+
--flow-id builder.build \
|
|
199
|
+
--step-id verify \
|
|
200
|
+
--timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
|
|
201
|
+
|
|
202
|
+
flow_agents_node "$WRITER" init-plan "$DOGFOOD_DIR/$DOGFOOD_SLUG--deliver.md" \
|
|
203
|
+
--source-request "Test" --summary "Testing" \
|
|
204
|
+
--timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
|
|
205
|
+
|
|
206
|
+
# Record pass evidence (produces builder.verify.tests declared claim, status=verified)
|
|
207
|
+
flow_agents_node "$WRITER" record-evidence "$DOGFOOD_DIR" \
|
|
208
|
+
--verdict pass \
|
|
209
|
+
--check-json '{"id":"ev-check","kind":"test","status":"pass","summary":"Evidence check passes"}' \
|
|
210
|
+
--timestamp "2026-06-01T20:01:00Z" >/dev/null 2>&1
|
|
211
|
+
|
|
212
|
+
# Record pass critique
|
|
213
|
+
flow_agents_node "$WRITER" record-critique "$DOGFOOD_DIR" \
|
|
214
|
+
--id "ev-critique" \
|
|
215
|
+
--verdict pass \
|
|
216
|
+
--summary "Critique passed." \
|
|
217
|
+
--timestamp "2026-06-01T20:02:00Z" >/dev/null 2>&1
|
|
218
|
+
|
|
219
|
+
# dogfood-pass --verdict pass should succeed: evidenceClean=true (builder.verify.tests passes)
|
|
220
|
+
# and critiqueClean=true (builder.verify.policy-compliance passes — declared critique for verify step).
|
|
221
|
+
flow_agents_node "$WRITER" dogfood-pass \
|
|
222
|
+
--artifact-root "$DOGFOOD_AROOT" \
|
|
223
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
224
|
+
--verdict pass \
|
|
225
|
+
--check-json '{"id":"dogfood-ev-check","kind":"test","status":"pass","summary":"Dogfood evidence check"}' \
|
|
226
|
+
--summary "Dogfood pass for builder.build session." \
|
|
227
|
+
--timestamp "2026-06-01T20:03:00Z" >/dev/null 2>&1 \
|
|
228
|
+
&& _pass "dogfood-pass succeeds: evidenceClean returns true for builder.verify.tests declared claim" \
|
|
229
|
+
|| _fail "dogfood-pass FAILED: evidenceClean did not recognize builder.verify.tests as passing evidence"
|
|
230
|
+
|
|
231
|
+
# Verify directly that the bundle has builder.verify.tests as the evidence claim
|
|
232
|
+
node -e "
|
|
233
|
+
const fs = require('fs');
|
|
234
|
+
const bundle = JSON.parse(fs.readFileSync('$DOGFOOD_DIR/trust.bundle', 'utf8'));
|
|
235
|
+
const claims = bundle.claims || [];
|
|
236
|
+
const builderCheck = claims.find(c => c.claimType === 'builder.verify.tests' && c.value === 'pass');
|
|
237
|
+
if (!builderCheck) throw new Error('MISSING builder.verify.tests (pass) in bundle; claims: ' + claims.map(c=>c.claimType+'='+c.value).join(', '));
|
|
238
|
+
console.log('builder.verify.tests evidence claim present with value=pass, status=' + builderCheck.status);
|
|
239
|
+
" 2>&1 \
|
|
240
|
+
&& _pass "bundle contains builder.verify.tests with value=pass (declared claim recognized by evidenceClean)" \
|
|
241
|
+
|| _fail "bundle missing builder.verify.tests with value=pass"
|
|
242
|
+
|
|
243
|
+
# ─── Summary ──────────────────────────────────────────────────────────────────
|
|
244
|
+
echo ""
|
|
245
|
+
if [ "$errors" -eq 0 ]; then
|
|
246
|
+
echo "test_flowdef_session_history_preservation: all checks passed."
|
|
247
|
+
exit 0
|
|
248
|
+
fi
|
|
249
|
+
echo "test_flowdef_session_history_preservation: $errors check(s) FAILED."
|
|
250
|
+
exit 1
|