@kontourai/flow-agents 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +95 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +300 -8
- package/build/src/cli/workflow-sidecar.js +1934 -83
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +45 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +4 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2064 -77
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -4,9 +4,22 @@
|
|
|
4
4
|
*
|
|
5
5
|
* The hook reads .flow-agents artifacts, looks for the most recent active
|
|
6
6
|
* delivery/session file, and reports missing Definition Of Done, Goal Fit, or
|
|
7
|
-
* Final Acceptance state.
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Final Acceptance state.
|
|
8
|
+
*
|
|
9
|
+
* Enforcement is controlled by FLOW_AGENTS_GOAL_FIT_MODE:
|
|
10
|
+
* - block: return exit code 2 (blocks the Stop) when local goal fit is incomplete.
|
|
11
|
+
* - warn: return exit code 0 but still emit the guidance on stderr (default).
|
|
12
|
+
* - off: stay silent.
|
|
13
|
+
* The legacy FLOW_AGENTS_GOAL_FIT_STRICT=true is honored as an alias for block.
|
|
14
|
+
* The canonical engine default is warn; shipped runtime configs (e.g. Claude
|
|
15
|
+
* Code at L2) set block so the installed product enforces while the engine
|
|
16
|
+
* default and conformance contract stay warn.
|
|
17
|
+
*
|
|
18
|
+
* Scope: the gate evaluates the session's current task (.flow-agents/current.json)
|
|
19
|
+
* when set, so an unrelated active workflow elsewhere in the repo does not gate
|
|
20
|
+
* this stop. It also never hard-blocks a pre-execution (not-yet-started) task on
|
|
21
|
+
* mere incompleteness — only genuine false-completion signals (a claimed pass the
|
|
22
|
+
* capture log or evidence.json contradicts) block before execution begins.
|
|
10
23
|
*/
|
|
11
24
|
|
|
12
25
|
'use strict';
|
|
@@ -14,6 +27,7 @@
|
|
|
14
27
|
const fs = require('fs');
|
|
15
28
|
const path = require('path');
|
|
16
29
|
const { spawnSync } = require('child_process');
|
|
30
|
+
const crypto = require('crypto');
|
|
17
31
|
|
|
18
32
|
const MAX_STDIN = 1024 * 1024;
|
|
19
33
|
const ACTIVE_STATUSES = new Set([
|
|
@@ -29,9 +43,23 @@ const ACTIVE_STATUSES = new Set([
|
|
|
29
43
|
'blocked',
|
|
30
44
|
'partial',
|
|
31
45
|
]);
|
|
32
|
-
|
|
33
|
-
const
|
|
34
|
-
|
|
46
|
+
// WORKFLOW_SESSION_TYPES: used for artifact identification only, not for verdict production.
|
|
47
|
+
const WORKFLOW_SESSION_TYPES = new Set(['deliver', 'delivery', 'fix-bug', 'execute-plan', 'verify-work']);
|
|
48
|
+
// Phase 4c: bundle-only. Required set = {state.json, handoff.json, trust.bundle}. Drop evidence.json/acceptance.json/critique.json.
|
|
49
|
+
const SIDECAR_NAMES = new Set(['state.json', 'handoff.json', 'trust.bundle']);
|
|
50
|
+
const OPTIONAL_SIDECAR_NAMES = new Set();
|
|
51
|
+
|
|
52
|
+
// A workflow that has not started execution is EXPECTED to be incomplete, so the
|
|
53
|
+
// Stop gate must not hard-block on its missing DOD / Goal Fit / not-done state.
|
|
54
|
+
// Only genuine false-completion signals block a pre-execution task; execution
|
|
55
|
+
// onward gates fully.
|
|
56
|
+
const PRE_EXECUTION_STATUSES = new Set(['new', 'planning', 'planned', 'backlog']);
|
|
57
|
+
const PRE_EXECUTION_PHASES = new Set(['idea', 'backlog', 'pickup', 'planning']);
|
|
58
|
+
|
|
59
|
+
// Terminal tasks are complete — they must never gate a stop or count as "active".
|
|
60
|
+
// A stale current.json pointing at one, or a graveyard of finished states, must
|
|
61
|
+
// not block an unrelated session.
|
|
62
|
+
const TERMINAL_STATUSES = new Set(['done', 'delivered', 'accepted', 'archived', 'complete', 'completed']);
|
|
35
63
|
|
|
36
64
|
function parseJson(raw) {
|
|
37
65
|
try { return JSON.parse(raw || '{}'); } catch { return {}; }
|
|
@@ -110,7 +138,23 @@ function sidecarValidation(root, artifactDir) {
|
|
|
110
138
|
if (requireSidecars || requireCritique) {
|
|
111
139
|
const present = new Set(sidecarFiles.map(file => path.basename(file)));
|
|
112
140
|
const requiredNames = new Set(requireSidecars ? SIDECAR_NAMES : []);
|
|
113
|
-
|
|
141
|
+
// Phase 4c: critique.json is no longer written; trust.bundle carries critique claims.
|
|
142
|
+
// FLOW_AGENTS_REQUIRE_CRITIQUE is satisfied by:
|
|
143
|
+
// - critique.json (legacy, may not be in SIDECAR_NAMES but may still be on disk), OR
|
|
144
|
+
// - trust.bundle that contains at least one workflow.critique.review claim.
|
|
145
|
+
if (requireCritique) {
|
|
146
|
+
// Check disk directly (critique.json is no longer in SIDECAR_NAMES so may not be in present)
|
|
147
|
+
const hasCritiqueJson = fs.existsSync(path.join(artifactDir, 'critique.json'));
|
|
148
|
+
const bundleFile = path.join(artifactDir, 'trust.bundle');
|
|
149
|
+
let hasBundleCritique = false;
|
|
150
|
+
if (fs.existsSync(bundleFile)) {
|
|
151
|
+
try {
|
|
152
|
+
const b = JSON.parse(fs.readFileSync(bundleFile, 'utf8'));
|
|
153
|
+
hasBundleCritique = Array.isArray(b.claims) && b.claims.some(c => c && c.claimType === 'workflow.critique.review');
|
|
154
|
+
} catch { /* fall through — no bundle critique */ }
|
|
155
|
+
}
|
|
156
|
+
if (!hasCritiqueJson && !hasBundleCritique) requiredNames.add('critique.json');
|
|
157
|
+
}
|
|
114
158
|
const missing = [...requiredNames].filter(name => !present.has(name)).sort();
|
|
115
159
|
if (missing.length > 0) {
|
|
116
160
|
return missing.map(name => `${relative(root, path.join(artifactDir, name))} sidecar validation: required sidecar is missing`);
|
|
@@ -186,7 +230,7 @@ function isWorkflowArtifact(artifact) {
|
|
|
186
230
|
if (!artifact) return false;
|
|
187
231
|
if (artifact.role === 'plan' || artifact.role === 'review') return false;
|
|
188
232
|
if (artifact.file.endsWith('-plan.md') || artifact.file.endsWith('-review.md')) return false;
|
|
189
|
-
if (
|
|
233
|
+
if (WORKFLOW_SESSION_TYPES.has(artifact.type)) return true;
|
|
190
234
|
return /--(deliver|fix-bug|execute-plan|verify-work)\b/.test(path.basename(artifact.file));
|
|
191
235
|
}
|
|
192
236
|
|
|
@@ -219,6 +263,44 @@ function readJsonFile(file) {
|
|
|
219
263
|
}
|
|
220
264
|
}
|
|
221
265
|
|
|
266
|
+
// ─── ADR 0010 Phase 2b: re-derive-at-gate via Surface (fail-open) ─────────────
|
|
267
|
+
// Surface (@kontourai/surface) is ESM-only; stop-goal-fit.js is CJS.
|
|
268
|
+
// Load it via a fail-open dynamic import(), cached after the first attempt.
|
|
269
|
+
// If Surface cannot be loaded (package absent, env mismatch), we fall back to
|
|
270
|
+
// the stored claim.status check from #133 — no regression for environments that
|
|
271
|
+
// lack @kontourai/surface. The module is never written to disk.
|
|
272
|
+
let _surfaceModule; // undefined = not tried yet; null = unavailable
|
|
273
|
+
async function tryLoadSurface() {
|
|
274
|
+
if (_surfaceModule !== undefined) return _surfaceModule;
|
|
275
|
+
try {
|
|
276
|
+
const m = await import('@kontourai/surface');
|
|
277
|
+
_surfaceModule = m;
|
|
278
|
+
return _surfaceModule;
|
|
279
|
+
} catch {
|
|
280
|
+
_surfaceModule = null;
|
|
281
|
+
return null;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ─── ADR 0016 Abstraction A P-c: flow-resolver integration ────────────────────
|
|
286
|
+
// Load the compiled flow-resolver (build/src/lib/flow-resolver.js) via CJS
|
|
287
|
+
// require behind the same hasBuild guard used for the validator. Fail-open:
|
|
288
|
+
// returns null when build/ is absent, require throws, or current.json has no
|
|
289
|
+
// active_flow_id / active_step_id. The caller (bundleEnforcement, sidecarGuidance)
|
|
290
|
+
// treats null as "no active FlowDefinition" and falls back to the workflow.* path.
|
|
291
|
+
function loadActiveFlowStep(flowAgentsDir) {
|
|
292
|
+
const packageRoot = path.resolve(__dirname, '..', '..');
|
|
293
|
+
const builtResolver = path.join(packageRoot, 'build', 'src', 'lib', 'flow-resolver.js');
|
|
294
|
+
if (!fs.existsSync(builtResolver)) return null; // hasBuild guard: no build/ yet
|
|
295
|
+
try {
|
|
296
|
+
const resolver = require(builtResolver);
|
|
297
|
+
if (typeof resolver.resolveActiveFlowStep !== 'function') return null;
|
|
298
|
+
return resolver.resolveActiveFlowStep(flowAgentsDir);
|
|
299
|
+
} catch {
|
|
300
|
+
return null; // require failed or resolver threw — fail-open
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
222
304
|
function safeOneLine(value, maxLength = 220) {
|
|
223
305
|
const text = String(value || '').replace(/\s+/g, ' ').trim();
|
|
224
306
|
if (text.length <= maxLength) return text;
|
|
@@ -229,19 +311,225 @@ function normalizedStatus(value) {
|
|
|
229
311
|
return safeOneLine(value, 80).toLowerCase();
|
|
230
312
|
}
|
|
231
313
|
|
|
232
|
-
|
|
314
|
+
// ─── ADR 0010 Phase 4b: bundle-first helpers for consumer migration ────────────
|
|
315
|
+
// These helpers extract evidence/critique/acceptance data from the trust.bundle
|
|
316
|
+
// when it is present, falling back to the bespoke sidecar for bundle-less sessions.
|
|
317
|
+
// The sidecar content is IDENTICAL to the bundle projection (Phase 4a guarantee),
|
|
318
|
+
// so consumer reads produce identical verdicts.
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Extract the effective "verdict" from trust.bundle workflow.check.* claims,
|
|
322
|
+
* or from declared claimTypes when a FlowDefinition is active (P-c extension).
|
|
323
|
+
* Priority of non-pass statuses: fail > not_verified > partial > pass.
|
|
324
|
+
* Returns null when the bundle has no matching claims.
|
|
325
|
+
*
|
|
326
|
+
* @param {Array} claims - trust.bundle claims array
|
|
327
|
+
* @param {Set<string>|null} [declaredClaimTypes] - optional set of declared claimTypes from gateExpects[]
|
|
328
|
+
*/
|
|
329
|
+
function bundleEvidenceVerdict(claims, declaredClaimTypes) {
|
|
330
|
+
const checkClaims = claims.filter(c => {
|
|
331
|
+
if (!c || typeof c.claimType !== 'string') return false;
|
|
332
|
+
if (c.claimType.startsWith('workflow.check.')) return true;
|
|
333
|
+
return declaredClaimTypes != null && declaredClaimTypes.has(c.claimType);
|
|
334
|
+
});
|
|
335
|
+
if (checkClaims.length === 0) return null;
|
|
336
|
+
let worst = 'pass';
|
|
337
|
+
const PRIORITY = { fail: 4, failed: 4, not_verified: 3, 'not-verified': 3, partial: 2, pass: 1, skip: 0 };
|
|
338
|
+
for (const c of checkClaims) {
|
|
339
|
+
const v = normalizedStatus(c.value || 'pass');
|
|
340
|
+
if ((PRIORITY[v] || 0) > (PRIORITY[worst] || 0)) worst = v;
|
|
341
|
+
}
|
|
342
|
+
return worst;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Extract the check ID from a claim's subjectId (format: "${slug}/${checkId}").
|
|
347
|
+
* Returns the part after the first slash, or the full subjectId if no slash.
|
|
348
|
+
*/
|
|
349
|
+
function claimCheckId(subjectId) {
|
|
350
|
+
const s = String(subjectId || '');
|
|
351
|
+
const slash = s.indexOf('/');
|
|
352
|
+
return slash >= 0 ? s.slice(slash + 1) : s;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Build the list of blocking check-claims from trust.bundle (equivalent to
|
|
357
|
+
* evidence.json.checks filtered to non-pass status).
|
|
358
|
+
* Returns objects shaped like { id, status, summary } (summary from fieldOrBehavior).
|
|
359
|
+
*
|
|
360
|
+
* @param {Array} claims - trust.bundle claims array
|
|
361
|
+
* @param {Set<string>|null} [declaredClaimTypes] - optional set of declared claimTypes from gateExpects[]
|
|
362
|
+
*/
|
|
363
|
+
function bundleBlockingChecks(claims, declaredClaimTypes) {
|
|
364
|
+
return claims.filter(c => {
|
|
365
|
+
if (!c || typeof c.claimType !== 'string') return false;
|
|
366
|
+
const typeMatch = c.claimType.startsWith('workflow.check.')
|
|
367
|
+
|| (declaredClaimTypes != null && declaredClaimTypes.has(c.claimType));
|
|
368
|
+
if (!typeMatch) return false;
|
|
369
|
+
const v = normalizedStatus(c.value || '');
|
|
370
|
+
return v === 'fail' || v === 'failed' || v === 'not_verified' || v === 'not-verified';
|
|
371
|
+
}).map(c => ({
|
|
372
|
+
id: claimCheckId(c.subjectId),
|
|
373
|
+
status: c.value || 'unknown',
|
|
374
|
+
summary: c.fieldOrBehavior || '',
|
|
375
|
+
}));
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Determine critique status from trust.bundle workflow.critique.review claims,
|
|
380
|
+
* or from declared claimTypes when a FlowDefinition is active (P-c extension).
|
|
381
|
+
* Returns the "worst" value among critique claims, or null when none present.
|
|
382
|
+
*
|
|
383
|
+
* @param {Array} claims - trust.bundle claims array
|
|
384
|
+
* @param {Set<string>|null} [declaredClaimTypes] - optional set of declared claimTypes from gateExpects[]
|
|
385
|
+
*/
|
|
386
|
+
function bundleCritiqueStatus(claims, declaredClaimTypes) {
|
|
387
|
+
const critiqueClaims = claims.filter(c => {
|
|
388
|
+
if (!c || typeof c.claimType !== 'string') return false;
|
|
389
|
+
if (c.claimType === 'workflow.critique.review') return true;
|
|
390
|
+
return declaredClaimTypes != null && declaredClaimTypes.has(c.claimType);
|
|
391
|
+
});
|
|
392
|
+
if (critiqueClaims.length === 0) return null;
|
|
393
|
+
// A disputed or failed critique is blocking
|
|
394
|
+
for (const c of critiqueClaims) {
|
|
395
|
+
const v = normalizedStatus(c.value || '');
|
|
396
|
+
if (v === 'fail' || v === 'failed' || c.status === 'disputed' || c.status === 'rejected') return c.value || 'fail';
|
|
397
|
+
}
|
|
398
|
+
return 'pass';
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Build the list of claimed-pass command checks from the trust.bundle's evidence[]
|
|
403
|
+
* (items with execution.label) and from workflow.check.command claims whose effective
|
|
404
|
+
* value is "pass" (never-captured claimed pass). Falls back to an empty list when
|
|
405
|
+
* the bundle has no evidence items.
|
|
406
|
+
*
|
|
407
|
+
* Returns objects shaped like { id, kind, status, command } — same shape as
|
|
408
|
+
* evidence.json.checks — so captureCrossReference's body logic is unchanged.
|
|
409
|
+
*
|
|
410
|
+
* @param {object} bundle - trust.bundle object
|
|
411
|
+
* @param {Set<string>|null} [declaredClaimTypes] - optional set of declared claimTypes from gateExpects[]
|
|
412
|
+
*/
|
|
413
|
+
function bundleClaimedPassCommandChecks(bundle, declaredClaimTypes) {
|
|
414
|
+
const allEvidence = Array.isArray(bundle.evidence) ? bundle.evidence : [];
|
|
415
|
+
const allClaims = Array.isArray(bundle.claims) ? bundle.claims : [];
|
|
416
|
+
|
|
417
|
+
// Build a map from claimId -> claim for fast lookup
|
|
418
|
+
const claimById = new Map();
|
|
419
|
+
for (const c of allClaims) {
|
|
420
|
+
if (c && c.id) claimById.set(c.id, c);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
const checks = [];
|
|
424
|
+
const seen = new Set();
|
|
425
|
+
|
|
426
|
+
// (A) Evidence items with execution.label (command captures).
|
|
427
|
+
// These represent commands that actually ran — include them regardless of
|
|
428
|
+
// effective status so we can cross-reference against the live log.
|
|
429
|
+
for (const ev of allEvidence) {
|
|
430
|
+
if (!ev || !ev.execution || !ev.execution.label) continue;
|
|
431
|
+
const cmd = String(ev.execution.label || '').replace(/\s+/g, ' ').trim();
|
|
432
|
+
if (!cmd) continue;
|
|
433
|
+
const claim = claimById.get(ev.claimId);
|
|
434
|
+
if (!claim) continue;
|
|
435
|
+
const claimTypeStr = String(claim.claimType || '');
|
|
436
|
+
if (!claimTypeStr.startsWith('workflow.check.') && !(declaredClaimTypes != null && declaredClaimTypes.has(claimTypeStr))) continue;
|
|
437
|
+
// Deduplicate by command
|
|
438
|
+
if (seen.has(cmd)) continue;
|
|
439
|
+
seen.add(cmd);
|
|
440
|
+
const id = claimCheckId(claim.subjectId);
|
|
441
|
+
// Use 'pass' as the nominal claimed status; cross-reference catches contradictions.
|
|
442
|
+
checks.push({ id, kind: 'command', status: 'pass', command: cmd });
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// (B) Workflow.check.command claims with effective value "pass" but no capture
|
|
446
|
+
// (no evidence item with execution) — these are originally-claimed-pass checks
|
|
447
|
+
// that were never captured.
|
|
448
|
+
for (const c of allClaims) {
|
|
449
|
+
if (!c || typeof c.claimType !== 'string') continue;
|
|
450
|
+
const isCommandType = c.claimType === 'workflow.check.command'
|
|
451
|
+
|| (declaredClaimTypes != null && declaredClaimTypes.has(c.claimType));
|
|
452
|
+
if (!isCommandType) continue;
|
|
453
|
+
if (normalizedStatus(c.value || '') !== 'pass') continue;
|
|
454
|
+
// Check if this claim already has a capture evidence item (covered in (A))
|
|
455
|
+
const hasCapture = allEvidence.some(ev => ev && ev.claimId === c.id && ev.execution && ev.execution.label);
|
|
456
|
+
if (hasCapture) continue;
|
|
457
|
+
// No capture — use fieldOrBehavior as command identifier for backstop resolution.
|
|
458
|
+
const evItem = allEvidence.find(ev => ev && ev.claimId === c.id);
|
|
459
|
+
const cmd = evItem
|
|
460
|
+
? normalizeCommand(evItem.excerptOrSummary || '')
|
|
461
|
+
: normalizeCommand(c.fieldOrBehavior || '');
|
|
462
|
+
const id = claimCheckId(c.subjectId);
|
|
463
|
+
if (!cmd) {
|
|
464
|
+
checks.push({ id, kind: 'command', status: 'pass', command: '' });
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
if (seen.has(cmd)) continue;
|
|
468
|
+
seen.add(cmd);
|
|
469
|
+
checks.push({ id, kind: 'command', status: 'pass', command: cmd });
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return checks;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Extract pending acceptance criteria from trust.bundle workflow.acceptance.criterion claims,
|
|
477
|
+
* or from declared claimTypes when a FlowDefinition is active (P-c extension).
|
|
478
|
+
* Returns the count of claims whose value is pending/not_started/empty/unknown.
|
|
479
|
+
* Returns null when the bundle has no matching claims.
|
|
480
|
+
*
|
|
481
|
+
* @param {Array} claims - trust.bundle claims array
|
|
482
|
+
* @param {Set<string>|null} [declaredClaimTypes] - optional set of declared claimTypes from gateExpects[]
|
|
483
|
+
*/
|
|
484
|
+
function bundlePendingCriteriaCount(claims, declaredClaimTypes) {
|
|
485
|
+
const criteriaClaims = claims.filter(c => {
|
|
486
|
+
if (!c || typeof c.claimType !== 'string') return false;
|
|
487
|
+
if (c.claimType === 'workflow.acceptance.criterion') return true;
|
|
488
|
+
return declaredClaimTypes != null && declaredClaimTypes.has(c.claimType);
|
|
489
|
+
});
|
|
490
|
+
if (criteriaClaims.length === 0) return null;
|
|
491
|
+
const pending = criteriaClaims.filter(c => {
|
|
492
|
+
const v = normalizedStatus(c.value || '');
|
|
493
|
+
return v === 'pending' || v === 'not_started' || v === '' || v === 'unknown';
|
|
494
|
+
});
|
|
495
|
+
return pending.length;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* ADR 0010 Phase 4b: sidecarGuidance — bundle-first evidence/critique reads.
|
|
502
|
+
* state.json reads are UNCHANGED (state.json stays as primary source).
|
|
503
|
+
* evidence.json verdict/checks: read from trust.bundle when present, fall back
|
|
504
|
+
* to evidence.json for bundle-less sessions (no regression).
|
|
505
|
+
* not_verified_gaps: always from evidence.json (no bundle equivalent).
|
|
506
|
+
* critique status: read from trust.bundle when present, fall back to critique.json.
|
|
507
|
+
* Finding details: still from critique.json when present (both bundle and sidecar paths).
|
|
508
|
+
*
|
|
509
|
+
* ADR 0016 P-c: when activeFlowStep is non-null, pass its declared claimTypes to
|
|
510
|
+
* bundle helpers so declared-type claims (e.g. builder.verify.tests) produce the
|
|
511
|
+
* same sidecar guidance signals as workflow.* claims.
|
|
512
|
+
*/
|
|
513
|
+
function sidecarGuidance(root, artifactDir, activeFlowStep) {
|
|
514
|
+
// Build the declared claimType set from the FlowDefinition gate expects[] (P-c).
|
|
515
|
+
// Null when no FlowDefinition is active (fallback: helpers use workflow.* prefix only).
|
|
516
|
+
const declaredClaimTypes = activeFlowStep && Array.isArray(activeFlowStep.gateExpects)
|
|
517
|
+
? new Set(activeFlowStep.gateExpects.map(e => e && e.bundle_claim && e.bundle_claim.claimType).filter(Boolean))
|
|
518
|
+
: null;
|
|
233
519
|
const warnings = [];
|
|
234
520
|
const state = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
235
|
-
const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
|
|
236
|
-
const critique = readJsonFile(path.join(artifactDir, 'critique.json'));
|
|
237
521
|
const base = relative(root, artifactDir);
|
|
238
522
|
|
|
239
523
|
if (state) {
|
|
240
524
|
const status = normalizedStatus(state.status || 'unknown');
|
|
241
525
|
const phase = normalizedStatus(state.phase || 'unknown');
|
|
242
526
|
const next = state.next_action && typeof state.next_action === 'object' ? state.next_action : null;
|
|
243
|
-
|
|
244
|
-
|
|
527
|
+
const nextStatus = next ? normalizedStatus(next.status || 'unknown') : 'unknown';
|
|
528
|
+
// The agent's work is complete when the recorded next action is done — the
|
|
529
|
+
// gate must not block the agent for a remaining human/CI step (e.g. a verified
|
|
530
|
+
// task whose only next_action is "commit the migration").
|
|
531
|
+
const agentComplete = nextStatus === 'done';
|
|
532
|
+
if (!TERMINAL_STATUSES.has(status) && !agentComplete) {
|
|
245
533
|
const nextSummary = next && next.summary ? `; next_action:${nextStatus} "${safeOneLine(next.summary)}"` : '';
|
|
246
534
|
warnings.push(`${base} workflow state: status:${status} phase:${phase}${nextSummary}`);
|
|
247
535
|
}
|
|
@@ -252,54 +540,1021 @@ function sidecarGuidance(root, artifactDir) {
|
|
|
252
540
|
warnings.push(`${base} next action: ${safeOneLine(next.summary)}${next.target_phase ? ` (target phase: ${safeOneLine(next.target_phase, 80)})` : ''}`);
|
|
253
541
|
}
|
|
254
542
|
|
|
255
|
-
|
|
256
|
-
|
|
543
|
+
// ── Evidence verdict + checks: bundle-first, fallback to evidence.json ────
|
|
544
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
545
|
+
const bundleClaims = bundle && Array.isArray(bundle.claims) ? bundle.claims : null;
|
|
546
|
+
|
|
547
|
+
if (bundleClaims) {
|
|
548
|
+
// Phase 4b: read verdict and per-check signals from trust.bundle claims.
|
|
549
|
+
// P-c: pass declaredClaimTypes so declared-type claims are included alongside workflow.*.
|
|
550
|
+
const verdict = bundleEvidenceVerdict(bundleClaims, declaredClaimTypes);
|
|
551
|
+
if (verdict && verdict !== 'pass' && verdict !== 'skip') {
|
|
552
|
+
warnings.push(`${base} evidence verdict:${safeOneLine(verdict, 40)}; do not deliver without accepted gap or new evidence.`);
|
|
553
|
+
}
|
|
554
|
+
const blockingChecks = bundleBlockingChecks(bundleClaims, declaredClaimTypes);
|
|
555
|
+
for (const check of blockingChecks.slice(0, 4)) {
|
|
556
|
+
const status = safeOneLine(check.status || 'unknown', 40);
|
|
557
|
+
warnings.push(`${base} evidence check ${safeOneLine(check.id || 'unknown', 80)} status:${status}: ${safeOneLine(check.summary)}`);
|
|
558
|
+
}
|
|
559
|
+
} else {
|
|
560
|
+
// Fallback: no bundle — read from evidence.json (existing behavior, no regression).
|
|
561
|
+
const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
|
|
562
|
+
if (evidence && normalizedStatus(evidence.verdict) && normalizedStatus(evidence.verdict) !== 'pass') {
|
|
563
|
+
warnings.push(`${base} evidence verdict:${safeOneLine(evidence.verdict, 40)}; do not deliver without accepted gap or new evidence.`);
|
|
564
|
+
}
|
|
565
|
+
if (evidence && Array.isArray(evidence.checks)) {
|
|
566
|
+
const blockingChecks = evidence.checks.filter(check => {
|
|
567
|
+
const status = normalizedStatus(check && check.status);
|
|
568
|
+
return status === 'fail' || status === 'failed' || status === 'not_verified' || status === 'not-verified';
|
|
569
|
+
});
|
|
570
|
+
for (const check of blockingChecks.slice(0, 4)) {
|
|
571
|
+
const status = safeOneLine(check.status || 'unknown', 40);
|
|
572
|
+
warnings.push(`${base} evidence check ${safeOneLine(check.id || 'unknown', 80)} status:${status}: ${safeOneLine(check.summary)}`);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
257
575
|
}
|
|
576
|
+
|
|
577
|
+
// not_verified_gaps: always from evidence.json (no bundle equivalent).
|
|
578
|
+
const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
|
|
258
579
|
if (evidence && Array.isArray(evidence.not_verified_gaps) && evidence.not_verified_gaps.length > 0) {
|
|
259
580
|
for (const gap of evidence.not_verified_gaps.slice(0, 3)) {
|
|
260
581
|
warnings.push(`${base} evidence NOT_VERIFIED gap: ${safeOneLine(gap)}`);
|
|
261
582
|
}
|
|
262
583
|
}
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
584
|
+
|
|
585
|
+
// ── Critique: bundle-first status, critique.json for finding details ──────
|
|
586
|
+
const critique = readJsonFile(path.join(artifactDir, 'critique.json'));
|
|
587
|
+
|
|
588
|
+
if (bundleClaims) {
|
|
589
|
+
// Phase 4b: read critique status from trust.bundle claims.
|
|
590
|
+
// P-c: pass declaredClaimTypes so declared-type critique claims are included.
|
|
591
|
+
const critiqueStatusVal = bundleCritiqueStatus(bundleClaims, declaredClaimTypes);
|
|
592
|
+
const critiqueIsBlocking = critiqueStatusVal !== null && normalizedStatus(critiqueStatusVal) !== 'pass';
|
|
593
|
+
if (critiqueIsBlocking) {
|
|
594
|
+
warnings.push(`${base} critique status:${safeOneLine(critiqueStatusVal || 'unknown', 40)}; required critique must pass or findings be accepted.`);
|
|
595
|
+
// Finding details: still from critique.json when present (both paths use the same details source).
|
|
596
|
+
const critiques = critique && Array.isArray(critique.critiques) ? critique.critiques : [];
|
|
597
|
+
let openCount = 0;
|
|
598
|
+
for (const review of critiques) {
|
|
599
|
+
const findings = Array.isArray(review && review.findings) ? review.findings : [];
|
|
600
|
+
for (const finding of findings) {
|
|
601
|
+
if (!finding || normalizedStatus(finding.status) !== 'open') continue;
|
|
602
|
+
warnings.push(`${base} critique open ${safeOneLine(finding.severity || 'unknown', 40)}: ${safeOneLine(finding.description)}`);
|
|
603
|
+
openCount += 1;
|
|
604
|
+
if (openCount >= 3) break;
|
|
605
|
+
}
|
|
606
|
+
if (openCount >= 3) break;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
} else {
|
|
610
|
+
// Fallback: no bundle — read from critique.json (existing behavior, no regression).
|
|
611
|
+
if (critique && critique.required === true && normalizedStatus(critique.status) !== 'pass') {
|
|
612
|
+
warnings.push(`${base} critique status:${safeOneLine(critique.status || 'unknown', 40)}; required critique must pass or findings be accepted.`);
|
|
613
|
+
const critiques = Array.isArray(critique.critiques) ? critique.critiques : [];
|
|
614
|
+
let openCount = 0;
|
|
615
|
+
for (const review of critiques) {
|
|
616
|
+
const findings = Array.isArray(review && review.findings) ? review.findings : [];
|
|
617
|
+
for (const finding of findings) {
|
|
618
|
+
if (!finding || normalizedStatus(finding.status) !== 'open') continue;
|
|
619
|
+
warnings.push(`${base} critique open ${safeOneLine(finding.severity || 'unknown', 40)}: ${safeOneLine(finding.description)}`);
|
|
620
|
+
openCount += 1;
|
|
621
|
+
if (openCount >= 3) break;
|
|
622
|
+
}
|
|
623
|
+
if (openCount >= 3) break;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
return warnings;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// -----------------------------------------------------------------------
|
|
632
|
+
// Capture-first evidence determinism (Part B)
|
|
633
|
+
//
|
|
634
|
+
// The trust.bundle (emitted by workflow-sidecar via @kontourai/surface) carries
|
|
635
|
+
// capture-authoritative evidence items. The capture hook (evidence-capture.js)
|
|
636
|
+
// writes REAL command results to command-log.jsonl at the source. Here at the
|
|
637
|
+
// Stop gate we cross-reference claimed-pass command checks against that captured
|
|
638
|
+
// truth, and only fall back to re-running a TRUSTED command when the log has no
|
|
639
|
+
// execution for a claimed-pass command (i.e. it was never actually run).
|
|
640
|
+
//
|
|
641
|
+
// ADR 0010 Phase 4b: source the claimed-pass command checks from the bundle's
|
|
642
|
+
// evidence[] (execution/command items) instead of evidence.json checks.
|
|
643
|
+
// command-log.jsonl path UNCHANGED — it stays the capture truth source.
|
|
644
|
+
// -----------------------------------------------------------------------
|
|
645
|
+
|
|
646
|
+
function normalizeCommand(value) {
|
|
647
|
+
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
/**
|
|
651
|
+
* Read command-log.jsonl into a map of normalized-command -> aggregate outcome.
|
|
652
|
+
* If the same command was run more than once, a single FAIL makes the aggregate
|
|
653
|
+
* a fail (a caught false-completion must not be masked by a later pass-claim).
|
|
654
|
+
*/
|
|
655
|
+
function readCommandLog(artifactDir) {
|
|
656
|
+
const file = path.join(artifactDir, 'command-log.jsonl');
|
|
657
|
+
let raw = '';
|
|
658
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return new Map(); }
|
|
659
|
+
const byCommand = new Map();
|
|
660
|
+
for (const line of raw.split('\n')) {
|
|
661
|
+
const trimmed = line.trim();
|
|
662
|
+
if (!trimmed) continue;
|
|
663
|
+
let entry;
|
|
664
|
+
try { entry = JSON.parse(trimmed); } catch { continue; }
|
|
665
|
+
if (!entry || typeof entry.command !== 'string') continue;
|
|
666
|
+
const key = normalizeCommand(entry.command);
|
|
667
|
+
if (!key) continue;
|
|
668
|
+
const failed = entry.observedResult === 'fail' || (Number.isInteger(entry.exitCode) && entry.exitCode !== 0);
|
|
669
|
+
const prev = byCommand.get(key);
|
|
670
|
+
byCommand.set(key, {
|
|
671
|
+
ran: true,
|
|
672
|
+
failed: failed || (prev ? prev.failed : false),
|
|
673
|
+
exitCode: Number.isInteger(entry.exitCode) ? entry.exitCode : (prev ? prev.exitCode : null),
|
|
267
674
|
});
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
675
|
+
}
|
|
676
|
+
return byCommand;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
/**
|
|
680
|
+
* Read command-log.jsonl into a map of normalized-command -> LATEST capture outcome.
|
|
681
|
+
* The LAST entry for each command wins (unlike readCommandLog which makes FAIL sticky).
|
|
682
|
+
* Used for both capturedFailReconciliation and captureCrossReference (Fix C): we want to
|
|
683
|
+
* know the LAST result, so a genuine re-run-to-pass clears the earlier FAIL. Only an actual
|
|
684
|
+
* re-run (new PASS entry in the log) clears it — a new claim cannot change the log.
|
|
685
|
+
*/
|
|
686
|
+
function readLatestCommandLog(artifactDir) {
|
|
687
|
+
const file = path.join(artifactDir, 'command-log.jsonl');
|
|
688
|
+
let raw = '';
|
|
689
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return new Map(); }
|
|
690
|
+
const byCommand = new Map();
|
|
691
|
+
for (const line of raw.split('\n')) {
|
|
692
|
+
const trimmed = line.trim();
|
|
693
|
+
if (!trimmed) continue;
|
|
694
|
+
let entry;
|
|
695
|
+
try { entry = JSON.parse(trimmed); } catch { continue; }
|
|
696
|
+
if (!entry || typeof entry.command !== 'string') continue;
|
|
697
|
+
const key = normalizeCommand(entry.command);
|
|
698
|
+
if (!key) continue;
|
|
699
|
+
const failed = entry.observedResult === 'fail' || (Number.isInteger(entry.exitCode) && entry.exitCode !== 0);
|
|
700
|
+
// LAST entry wins — genuine re-run-to-pass overwrites the earlier FAIL.
|
|
701
|
+
byCommand.set(key, {
|
|
702
|
+
ran: true,
|
|
703
|
+
failed,
|
|
704
|
+
exitCode: Number.isInteger(entry.exitCode) ? entry.exitCode : null,
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
return byCommand;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// ─── Claim-status helpers for capturedFailReconciliation ─────────────────────
|
|
711
|
+
|
|
712
|
+
/**
|
|
713
|
+
* Returns true when a claim's stored status+value asserts the command PASSED.
|
|
714
|
+
* Used to detect namespace-agnostic false-completions.
|
|
715
|
+
*/
|
|
716
|
+
function claimAssertsPass(status, value) {
|
|
717
|
+
const s = String(status || '').toLowerCase();
|
|
718
|
+
const v = String(value || '').toLowerCase().replace(/\s+/g, ' ').trim();
|
|
719
|
+
// Fix E: added 'approved' status alias and 'true'/'ok' value aliases
|
|
720
|
+
return (s === 'verified' || s === 'assumed' || s === 'accepted' || s === 'trusted' || s === 'approved')
|
|
721
|
+
&& (v === 'pass' || v === 'passed' || v === 'verified' || v === 'true' || v === 'ok');
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Returns true when a claim's stored status+value ACKNOWLEDGES a failure
|
|
726
|
+
* (the agent owned the failure rather than claiming pass).
|
|
727
|
+
*/
|
|
728
|
+
function claimAcknowledgesFailure(status, value) {
|
|
729
|
+
const s = String(status || '').toLowerCase();
|
|
730
|
+
const v = String(value || '').toLowerCase().replace(/\s+/g, ' ').trim();
|
|
731
|
+
return s === 'disputed' || s === 'rejected' || s === 'failing' || s === 'failed'
|
|
732
|
+
|| s === 'not_verified' || s === 'not-verified'
|
|
733
|
+
|| v === 'fail' || v === 'failed' || v === 'not_verified' || v === 'failing';
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
/**
|
|
737
|
+
* Returns true when a command string contains an exit-code-neutralizing operator.
|
|
738
|
+
* A claimed-pass check whose captured command uses one of these cannot be accepted as a
|
|
739
|
+
* deterministic pass — the real sub-command may have failed silently.
|
|
740
|
+
*
|
|
741
|
+
* R6 extended logic (identical patterns used by scripts/ci/trust-reconcile.js — centralize
|
|
742
|
+
* as a follow-up if drift becomes a maintenance concern):
|
|
743
|
+
* - ANY || operator is flagged. A legitimate verification command never needs || — its
|
|
744
|
+
* only purpose in a verification command is to mask the real exit code (e.g.
|
|
745
|
+
* `npm test || exit 0`, `npm test || echo ok`, `npm test || /bin/true`, `npm test || (exit 0)`).
|
|
746
|
+
* - | true (single pipe into true — always exits 0)
|
|
747
|
+
* - Trailing ; or newline followed by: true : exit 0 /bin/true
|
|
748
|
+
*
|
|
749
|
+
* Fix D: applied in captureCrossReference's satisfied path and capturedFailReconciliation.
|
|
750
|
+
*/
|
|
751
|
+
function hasLaunderingOperator(cmd) {
|
|
752
|
+
// ANY || in a claimed verification command is an exit-code mask.
|
|
753
|
+
// Legitimate verification commands never need || — its only purpose there is to
|
|
754
|
+
// suppress the real exit code (|| exit 0, || echo ok, || /bin/true, || (exit 0), etc.).
|
|
755
|
+
if (/\|\|/.test(cmd)) return true;
|
|
756
|
+
// | true — single-pipe into true: `cmd | true` always exits 0 regardless of left-side exit code.
|
|
757
|
+
if (/\|\s*true\b/.test(cmd)) return true;
|
|
758
|
+
// Trailing ; or \n followed by exit-neutralizing commands (same threat, appended after the real cmd):
|
|
759
|
+
// ; true ; : ; exit 0 ; /bin/true (and \n variants)
|
|
760
|
+
if (/[;\n]\s*true\b/.test(cmd)) return true;
|
|
761
|
+
if (/[;\n]\s*:\s*(?:$|\s|;)/.test(cmd)) return true;
|
|
762
|
+
if (/[;\n]\s*exit\s+0\b/.test(cmd)) return true;
|
|
763
|
+
if (/[;\n]\s*\/bin\/true\b/.test(cmd)) return true;
|
|
764
|
+
return false;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// ─── Hash-chain integrity verification (Increment B2, tamper-EVIDENCE) ────────
|
|
768
|
+
//
|
|
769
|
+
// command-log.jsonl is the independent capture truth source. Each chained entry
|
|
770
|
+
// carries a `_chain` block: {seq, prevHash, hash} where
|
|
771
|
+
// hash = sha256(prevHash + canonicalJson(record))
|
|
772
|
+
// and canonicalJson is the record WITHOUT `_chain`, keys sorted alphabetically.
|
|
773
|
+
//
|
|
774
|
+
// HONEST FRAMING: this is tamper-EVIDENCE, not tamper-PROOF. An agent that
|
|
775
|
+
// rewrites all hashes can still forge a valid chain. The real tamper-proof
|
|
776
|
+
// boundary is the signed checkpoint (B1). This raises the local bar and catches
|
|
777
|
+
// casual/accidental tampering and corruption.
|
|
778
|
+
//
|
|
779
|
+
// verifyCommandLogChain returns:
|
|
780
|
+
// "ok" — all entries have _chain and every hash/linkage is valid.
|
|
781
|
+
// "legacy" — no _chain fields at all (pre-B2 log); treat as today, no
|
|
782
|
+
// integrity claim. Backward-compat: existing fixtures stay green.
|
|
783
|
+
// "broken" — a chained entry has a bad hash or bad linkage; the capture
|
|
784
|
+
// truth source appears altered/removed/reordered.
|
|
785
|
+
//
|
|
786
|
+
// The genesis prevHash is a fixed arbitrary sentinel — NOT the SHA256 of any
|
|
787
|
+
// specific input string. The comment in evidence-capture.js previously (and
|
|
788
|
+
// incorrectly) claimed it was sha256("flow-agents:command-log:genesis"); it is not.
|
|
789
|
+
// Writer (evidence-capture.js CHAIN_GENESIS) and verifier (CHAIN_GENESIS_VERIFY here)
|
|
790
|
+
// MUST use the same value. Do not change one without changing the other.
|
|
791
|
+
const CHAIN_GENESIS_VERIFY = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3';
|
|
792
|
+
|
|
793
|
+
/**
|
|
794
|
+
* Canonical JSON for chain verification: record WITHOUT `_chain`, keys sorted.
|
|
795
|
+
* Must be byte-identical to canonicalJsonForChain() in evidence-capture.js.
|
|
796
|
+
*/
|
|
797
|
+
function canonicalJsonForVerify(record) {
|
|
798
|
+
const keys = Object.keys(record).filter(k => k !== '_chain').sort();
|
|
799
|
+
const obj = {};
|
|
800
|
+
for (const k of keys) obj[k] = record[k];
|
|
801
|
+
return JSON.stringify(obj);
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Verify the hash chain of command-log.jsonl.
|
|
806
|
+
* Returns { status, brokenAt } where:
|
|
807
|
+
* status = "ok" | "legacy" | "broken"
|
|
808
|
+
* brokenAt = index (0-based) of the first broken entry, or null
|
|
809
|
+
*/
|
|
810
|
+
function verifyCommandLogChain(artifactDir) {
|
|
811
|
+
const file = path.join(artifactDir, 'command-log.jsonl');
|
|
812
|
+
let raw = '';
|
|
813
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return { status: 'legacy', brokenAt: null }; }
|
|
814
|
+
|
|
815
|
+
const lines = raw.split('\n').filter(l => l.trim());
|
|
816
|
+
if (lines.length === 0) return { status: 'legacy', brokenAt: null };
|
|
817
|
+
|
|
818
|
+
// Parse all entries, tolerating unparseable lines (they count as legacy/unchained).
|
|
819
|
+
const entries = [];
|
|
820
|
+
for (const line of lines) {
|
|
821
|
+
try {
|
|
822
|
+
const entry = JSON.parse(line);
|
|
823
|
+
if (entry && typeof entry === 'object') entries.push(entry);
|
|
824
|
+
} catch { /* skip malformed lines */ }
|
|
825
|
+
}
|
|
826
|
+
if (entries.length === 0) return { status: 'legacy', brokenAt: null };
|
|
827
|
+
|
|
828
|
+
// Classify: are there any chained entries?
|
|
829
|
+
const hasAnyChain = entries.some(e => e._chain && typeof e._chain.hash === 'string');
|
|
830
|
+
if (!hasAnyChain) return { status: 'legacy', brokenAt: null };
|
|
831
|
+
|
|
832
|
+
// Verify chain linkage. Legacy entries (no _chain) that precede the first
|
|
833
|
+
// chained entry are tolerated (mixed log during the upgrade transition).
|
|
834
|
+
// However, a chain entry following another chain entry must link correctly.
|
|
835
|
+
let prevHash = CHAIN_GENESIS_VERIFY;
|
|
836
|
+
let prevWasChained = false;
|
|
837
|
+
let chainedCount = 0;
|
|
838
|
+
for (let i = 0; i < entries.length; i++) {
|
|
839
|
+
const entry = entries[i];
|
|
840
|
+
const chain = entry._chain;
|
|
841
|
+
if (!chain || typeof chain.hash !== 'string') {
|
|
842
|
+
// Legacy entry without _chain. If we have already seen a chained entry,
|
|
843
|
+
// a gap in the chain (a legacy entry in the middle) counts as broken
|
|
844
|
+
// (it could indicate a removed chained entry was replaced by a legacy one).
|
|
845
|
+
if (prevWasChained) return { status: 'broken', brokenAt: i };
|
|
846
|
+
// Before any chained entry: tolerate (legacy prefix).
|
|
847
|
+
continue;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
// This is a chained entry. Verify hash.
|
|
851
|
+
const expectedHash = crypto.createHash('sha256')
|
|
852
|
+
.update(prevHash + canonicalJsonForVerify(entry), 'utf8')
|
|
853
|
+
.digest('hex');
|
|
854
|
+
if (chain.hash !== expectedHash) return { status: 'broken', brokenAt: i };
|
|
855
|
+
|
|
856
|
+
// Verify linkage: prevHash must match what this entry claims.
|
|
857
|
+
if (chain.prevHash !== prevHash) return { status: 'broken', brokenAt: i };
|
|
858
|
+
|
|
859
|
+
prevHash = chain.hash;
|
|
860
|
+
prevWasChained = true;
|
|
861
|
+
chainedCount += 1;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
return { status: 'ok', brokenAt: null };
|
|
865
|
+
}
|
|
866
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
867
|
+
|
|
868
|
+
/**
|
|
869
|
+
* Resolve a TRUSTED command to re-run for a claimed-pass check whose command was
|
|
870
|
+
* never captured. Priority (most trusted first):
|
|
871
|
+
* (a) the command named by the matching acceptance criterion (acceptance.json
|
|
872
|
+
* evidence_ref of kind "command", `excerpt`/`command`) — authored upfront.
|
|
873
|
+
* (b) the project's declared manifest target — package.json scripts.{test,
|
|
874
|
+
* build,lint}, Makefile target, cargo test, pyproject/tox, just/task.
|
|
875
|
+
* (c) the model's free-form evidence.checks[].command — ONLY when
|
|
876
|
+
* FLOW_AGENTS_GOAL_FIT_RECHECK=true (the RCE-risky opt-in path).
|
|
877
|
+
* Returns { argv, cwd, source } or null when nothing trusted resolves.
|
|
878
|
+
*/
|
|
879
|
+
function resolveTrustedCommand(root, artifactDir, check, acceptance) {
|
|
880
|
+
// (a) acceptance criterion command for the matching criterion.
|
|
881
|
+
const fromAcceptance = acceptanceCommandFor(check, acceptance);
|
|
882
|
+
if (fromAcceptance) return { argv: ['bash', '-lc', fromAcceptance], cwd: root, source: 'acceptance' };
|
|
883
|
+
|
|
884
|
+
// (b) declared manifest target. Map the check command/id to a declared script.
|
|
885
|
+
const declared = declaredManifestTarget(root, check);
|
|
886
|
+
if (declared) return { argv: declared.argv, cwd: declared.cwd || root, source: 'manifest' };
|
|
887
|
+
|
|
888
|
+
// (c) free-form model command — opt-in only.
|
|
889
|
+
if (String(process.env.FLOW_AGENTS_GOAL_FIT_RECHECK || '').toLowerCase() === 'true') {
|
|
890
|
+
const cmd = normalizeCommand(check && check.command);
|
|
891
|
+
if (cmd) return { argv: ['bash', '-lc', cmd], cwd: root, source: 'model-command (FLOW_AGENTS_GOAL_FIT_RECHECK)' };
|
|
892
|
+
}
|
|
893
|
+
return null;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function acceptanceCommandFor(check, acceptance) {
|
|
897
|
+
if (!acceptance || !Array.isArray(acceptance.criteria)) return null;
|
|
898
|
+
const checkId = normalizedStatus(check && check.id);
|
|
899
|
+
const checkCmd = normalizeCommand(check && check.command);
|
|
900
|
+
let firstCommand = null;
|
|
901
|
+
for (const criterion of acceptance.criteria) {
|
|
902
|
+
const refs = Array.isArray(criterion && criterion.evidence_refs) ? criterion.evidence_refs : [];
|
|
903
|
+
for (const ref of refs) {
|
|
904
|
+
if (!ref || typeof ref !== 'object' || ref.kind !== 'command') continue;
|
|
905
|
+
const refCmd = normalizeCommand(ref.excerpt || ref.command);
|
|
906
|
+
if (!refCmd) continue;
|
|
907
|
+
if (!firstCommand) firstCommand = refCmd;
|
|
908
|
+
// Strong match: the criterion id matches the check id, or the commands match.
|
|
909
|
+
const idMatch = checkId && normalizedStatus(criterion.id) === checkId;
|
|
910
|
+
if (idMatch || (checkCmd && refCmd === checkCmd)) return refCmd;
|
|
271
911
|
}
|
|
272
912
|
}
|
|
913
|
+
// No id/command match — only fall back to the first authored command when the
|
|
914
|
+
// check itself names no command (so we still have an upfront-trusted target).
|
|
915
|
+
return checkCmd ? null : firstCommand;
|
|
916
|
+
}
|
|
273
917
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
918
|
+
/**
|
|
919
|
+
* Map a claimed-pass command check to a project-declared, NAMED manifest target.
|
|
920
|
+
* Never allowlists arbitrary strings: we only run a target the project itself
|
|
921
|
+
* declared (npm script, Makefile target, cargo/tox/just/task). The check's
|
|
922
|
+
* command/id is used to pick WHICH declared target (test|build|lint), not to run
|
|
923
|
+
* the raw string. `veritas readiness` is just one such declared command — no
|
|
924
|
+
* special-casing.
|
|
925
|
+
*/
|
|
926
|
+
function declaredManifestTarget(root, check) {
|
|
927
|
+
const haystack = `${normalizeCommand(check && check.command)} ${normalizedStatus(check && check.id)} ${normalizedStatus(check && check.kind)}`.toLowerCase();
|
|
928
|
+
let want = null;
|
|
929
|
+
if (/\btest|spec|jest|vitest|pytest\b/.test(haystack)) want = 'test';
|
|
930
|
+
else if (/\bbuild|compile|bundle\b/.test(haystack)) want = 'build';
|
|
931
|
+
else if (/\blint|format|style|typecheck\b/.test(haystack)) want = 'lint';
|
|
932
|
+
if (!want) return null;
|
|
933
|
+
|
|
934
|
+
// package.json scripts.{test,build,lint}
|
|
935
|
+
const pkg = readJsonFile(path.join(root, 'package.json'));
|
|
936
|
+
if (pkg && pkg.scripts && typeof pkg.scripts === 'object') {
|
|
937
|
+
const scriptName = pkg.scripts[want] ? want
|
|
938
|
+
: want === 'lint' && pkg.scripts.typecheck ? 'typecheck'
|
|
939
|
+
: null;
|
|
940
|
+
if (scriptName) return { argv: ['npm', 'run', scriptName, '--silent'], cwd: root };
|
|
941
|
+
}
|
|
942
|
+
// Makefile target
|
|
943
|
+
const makefile = ['Makefile', 'makefile', 'GNUmakefile'].map(n => path.join(root, n)).find(p => fs.existsSync(p));
|
|
944
|
+
if (makefile) {
|
|
945
|
+
try {
|
|
946
|
+
const text = fs.readFileSync(makefile, 'utf8');
|
|
947
|
+
if (new RegExp(`^${want}\\s*:`, 'm').test(text)) return { argv: ['make', want], cwd: root };
|
|
948
|
+
} catch { /* ignore */ }
|
|
949
|
+
}
|
|
950
|
+
// cargo
|
|
951
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'test'], cwd: root };
|
|
952
|
+
if (want === 'build' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'build'], cwd: root };
|
|
953
|
+
// py ecosystem: tox / pyproject (declared test target)
|
|
954
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'tox.ini'))) return { argv: ['tox'], cwd: root };
|
|
955
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'pyproject.toml'))) return { argv: ['pytest'], cwd: root };
|
|
956
|
+
// just / task runners
|
|
957
|
+
for (const runner of [['just', 'justfile'], ['task', 'Taskfile.yml'], ['task', 'Taskfile.yaml']]) {
|
|
958
|
+
if (fs.existsSync(path.join(root, runner[1]))) return { argv: [runner[0], want], cwd: root };
|
|
959
|
+
}
|
|
960
|
+
return null;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
function resolveBackstopTimeout() {
|
|
964
|
+
const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP_TIMEOUT_MS || '', 10);
|
|
965
|
+
return Number.isInteger(raw) && raw > 0 ? raw : 120000;
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
/**
|
|
969
|
+
* Whether the trusted backstop re-run may ride block mode. Default-on so a
|
|
970
|
+
* never-actually-run claimed-pass command is caught, but operator-disablable for
|
|
971
|
+
* latency via FLOW_AGENTS_GOAL_FIT_BACKSTOP=off (re-run becomes warn-only) or
|
|
972
|
+
* =skip (no re-run at all → record NOT_VERIFIED instead).
|
|
973
|
+
*/
|
|
974
|
+
function resolveBackstopMode() {
|
|
975
|
+
const v = String(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP || '').trim().toLowerCase();
|
|
976
|
+
if (v === 'off' || v === 'warn' || v === 'skip' || v === 'block') return v === 'warn' ? 'off' : v;
|
|
977
|
+
return 'block';
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
function runBackstop(trusted) {
|
|
981
|
+
const result = spawnSync(trusted.argv[0], trusted.argv.slice(1), {
|
|
982
|
+
cwd: trusted.cwd,
|
|
983
|
+
encoding: 'utf8',
|
|
984
|
+
timeout: resolveBackstopTimeout(),
|
|
985
|
+
killSignal: 'SIGKILL',
|
|
986
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
987
|
+
});
|
|
988
|
+
if (result.error) return { ran: false, error: result.error.code || result.error.message };
|
|
989
|
+
if (result.signal) return { ran: false, error: `killed (${result.signal})`, timedOut: result.signal === 'SIGKILL' || result.signal === 'SIGTERM' };
|
|
990
|
+
return { ran: true, passed: result.status === 0, exitCode: result.status };
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
/**
|
|
994
|
+
* ADR 0010 Phase 4b: captureCrossReference — bundle-first command check sourcing.
|
|
995
|
+
* Sources the claimed-pass command checks from trust.bundle evidence[] (execution/
|
|
996
|
+
* command items) when the bundle is present, falling back to evidence.json checks
|
|
997
|
+
* for bundle-less sessions. command-log.jsonl UNCHANGED — it stays the capture
|
|
998
|
+
* truth source. The teeth (claimed-pass + captured-fail → block) are byte-identical.
|
|
999
|
+
*
|
|
1000
|
+
* ADR 0016 P-c (fix): accept activeFlowStep so declared-type sessions (e.g.
|
|
1001
|
+
* builder.verify.tests) are visible to the cross-reference, closing the hole
|
|
1002
|
+
* where captureCrossReference was the only capture consumer not threaded with
|
|
1003
|
+
* the FlowDefinition. Mirrors the pattern in bundleEnforcement / sidecarGuidance.
|
|
1004
|
+
*/
|
|
1005
|
+
function captureCrossReference(root, artifactDir, activeFlowStep) {
|
|
1006
|
+
// Build the declared claimType set from the FlowDefinition gate expects[] (P-c).
|
|
1007
|
+
// Null when no FlowDefinition is active (fallback: bundleClaimedPassCommandChecks
|
|
1008
|
+
// uses workflow.check.* prefix only — no regression for non-FlowDefinition sessions).
|
|
1009
|
+
const declaredClaimTypes = activeFlowStep && Array.isArray(activeFlowStep.gateExpects)
|
|
1010
|
+
? new Set(activeFlowStep.gateExpects.map(e => e && e.bundle_claim && e.bundle_claim.claimType).filter(Boolean))
|
|
1011
|
+
: null;
|
|
1012
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
1013
|
+
const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
|
|
1014
|
+
const log = readLatestCommandLog(artifactDir); // Fix C: latest-wins; genuine fix-then-rerun-to-pass clears the block
|
|
1015
|
+
const base = relative(root, artifactDir);
|
|
1016
|
+
const backstopMode = resolveBackstopMode();
|
|
1017
|
+
const warnings = [];
|
|
1018
|
+
|
|
1019
|
+
// AC3 fail-closed: detect a missing command log in a post-execution session.
|
|
1020
|
+
// When state.json confirms the session is past the planning phase (commands should
|
|
1021
|
+
// have run), but command-log.jsonl is absent, emit a FULL_BLOCK warning.
|
|
1022
|
+
// A genuine pre-execution session (no commands expected yet) is NOT blocked —
|
|
1023
|
+
// distinguished by requiring BOTH a non-pre-execution phase AND a non-pre-execution
|
|
1024
|
+
// status (avoids blocking a freshly planned task that has state.json but no log yet).
|
|
1025
|
+
//
|
|
1026
|
+
// BAR-RAISER: an agent could forge state.json with a pre-execution phase to avoid
|
|
1027
|
+
// this. The real anchor is external (signed checkpoints + human review).
|
|
1028
|
+
{
|
|
1029
|
+
const logFileMissing = !fs.existsSync(path.join(artifactDir, 'command-log.jsonl'));
|
|
1030
|
+
if (logFileMissing) {
|
|
1031
|
+
const captureState = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
1032
|
+
if (captureState) {
|
|
1033
|
+
const capturePhase = normalizedStatus(captureState.phase || '');
|
|
1034
|
+
const captureStatus = normalizedStatus(captureState.status || '');
|
|
1035
|
+
// Post-execution: phase is set AND not a pre-execution phase (idea/backlog/pickup/planning).
|
|
1036
|
+
// Also require status is not a pre-execution status (new/planning/planned/backlog).
|
|
1037
|
+
const postExecPhase = capturePhase && !PRE_EXECUTION_PHASES.has(capturePhase);
|
|
1038
|
+
const preExecStatus = !captureStatus || captureStatus === 'new' || PRE_EXECUTION_STATUSES.has(captureStatus);
|
|
1039
|
+
if (postExecPhase && !preExecStatus) {
|
|
1040
|
+
// Fix #216 over-block: only emit the missing-log warning when a command was
|
|
1041
|
+
// actually EXPECTED to be captured — i.e., the trust.bundle evidence has at
|
|
1042
|
+
// least one item with execution.label (concrete proof a command was meant to
|
|
1043
|
+
// be captured). A no-command session (doc review, policy task advanced to
|
|
1044
|
+
// verification without running shell commands) must NOT be blocked here.
|
|
1045
|
+
// Note: `bundle` is already read at the top of captureCrossReference.
|
|
1046
|
+
const captureEvidence = bundle && Array.isArray(bundle.evidence) ? bundle.evidence : [];
|
|
1047
|
+
const hasExpectedCapture = captureEvidence.some(ev => ev && ev.execution && ev.execution.label);
|
|
1048
|
+
if (hasExpectedCapture) {
|
|
1049
|
+
warnings.push(
|
|
1050
|
+
`${base} expected capture log is missing — possible deletion of the capture truth source; ` +
|
|
1051
|
+
`phase:${capturePhase} status:${captureStatus} indicates commands should have run. ` +
|
|
1052
|
+
'Cannot verify command execution deterministically. ' +
|
|
1053
|
+
'Restore from a checkpoint or investigate.'
|
|
1054
|
+
);
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
285
1057
|
}
|
|
286
|
-
if (openCount >= 3) break;
|
|
287
1058
|
}
|
|
288
1059
|
}
|
|
289
1060
|
|
|
1061
|
+
// ── Hash-chain integrity check ──────────────────────────────────────────────
|
|
1062
|
+
// Verify command-log.jsonl before trusting its pass/fail signals. If the chain
|
|
1063
|
+
// is broken (altered, removed, or reordered entries), the capture truth source
|
|
1064
|
+
// is compromised: we must NOT trust its pass signals for claimed-pass checks.
|
|
1065
|
+
//
|
|
1066
|
+
// ok → proceed normally (chain is valid, log is trustworthy).
|
|
1067
|
+
// legacy → proceed normally (pre-B2 log, no chain to verify, existing behavior).
|
|
1068
|
+
// broken → emit a loud warning and treat ALL claimed-pass commands relying on
|
|
1069
|
+
// this log as NOT_VERIFIED/blocking — do not let them sail through.
|
|
1070
|
+
let chainBroken = false;
|
|
1071
|
+
{
|
|
1072
|
+
const chainResult = verifyCommandLogChain(artifactDir);
|
|
1073
|
+
if (chainResult.status === 'broken') {
|
|
1074
|
+
chainBroken = true;
|
|
1075
|
+
const brokenIdx = chainResult.brokenAt !== null ? ` (entry ${chainResult.brokenAt})` : '';
|
|
1076
|
+
warnings.push(
|
|
1077
|
+
`${base} command-log integrity check FAILED — capture truth source appears tampered${brokenIdx}: ` +
|
|
1078
|
+
'claimed-pass checks relying on it are NOT trusted. ' +
|
|
1079
|
+
'This is tamper-EVIDENCE (hash-chain broken); alteration, removal, or reordering detected. ' +
|
|
1080
|
+
'NOT_VERIFIED: cannot confirm or deny claimed passes.'
|
|
1081
|
+
);
|
|
1082
|
+
}
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
// Build the list of claimed-pass command checks — bundle-first, evidence.json fallback.
|
|
1086
|
+
let claimedPass;
|
|
1087
|
+
if (bundle && Array.isArray(bundle.claims)) {
|
|
1088
|
+
// Phase 4b: source from trust.bundle evidence[] (execution/command items).
|
|
1089
|
+
claimedPass = bundleClaimedPassCommandChecks(bundle, declaredClaimTypes);
|
|
1090
|
+
} else {
|
|
1091
|
+
// Fallback: no bundle — read from evidence.json (existing behavior, no regression).
|
|
1092
|
+
const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
|
|
1093
|
+
if (!evidence || !Array.isArray(evidence.checks)) return warnings;
|
|
1094
|
+
claimedPass = evidence.checks.filter(check => {
|
|
1095
|
+
if (!check || typeof check !== 'object') return false;
|
|
1096
|
+
const kind = normalizedStatus(check.kind);
|
|
1097
|
+
const status = normalizedStatus(check.status);
|
|
1098
|
+
return kind === 'command' && (status === 'pass' || status === 'passed') && normalizeCommand(check.command);
|
|
1099
|
+
});
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
for (const check of claimedPass.slice(0, 8)) {
|
|
1103
|
+
const cmd = normalizeCommand(check.command);
|
|
1104
|
+
if (!cmd) continue;
|
|
1105
|
+
const id = safeOneLine(check.id || cmd, 80);
|
|
1106
|
+
const logged = log.get(cmd);
|
|
1107
|
+
|
|
1108
|
+
if (!chainBroken && logged && logged.ran) {
|
|
1109
|
+
// (1) Cross-reference the capture log first (only when chain is intact).
|
|
1110
|
+
// A broken chain means we cannot trust the log's pass signals — skip this
|
|
1111
|
+
// shortcut and fall through to the backstop/NOT_VERIFIED path below.
|
|
1112
|
+
if (logged.failed) {
|
|
1113
|
+
const exit = Number.isInteger(logged.exitCode) ? ` (exitCode:${logged.exitCode})` : '';
|
|
1114
|
+
warnings.push(`${base} evidence check ${id}: capture log CONTRADICTS claimed pass — command "${safeOneLine(cmd, 120)}" was recorded as FAIL${exit}. This is a caught false-completion.`);
|
|
1115
|
+
} else if (hasLaunderingOperator(cmd)) {
|
|
1116
|
+
// Fix D: exit-code laundering. The captured exit-0 is not trustworthy — the command
|
|
1117
|
+
// baked in '|| true' / '|| :' / '; true' / '; exit 0' / '| true' to mask the real result.
|
|
1118
|
+
warnings.push(`${base} evidence check ${id}: claimed pass relies on an exit-code-laundered command "${safeOneLine(cmd, 120)}" — the exit code is not a trustworthy signal (laundering operators mask the real exit code).`);
|
|
1119
|
+
}
|
|
1120
|
+
// else: log shows it ran and passed with no laundering → satisfied deterministically.
|
|
1121
|
+
continue;
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
// (2) Backstop: the log has NO execution for this claimed-pass command.
|
|
1125
|
+
if (backstopMode === 'skip') {
|
|
1126
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and backstop re-run is disabled (FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip).`);
|
|
1127
|
+
continue;
|
|
1128
|
+
}
|
|
1129
|
+
const trusted = resolveTrustedCommand(root, artifactDir, check, acceptance);
|
|
1130
|
+
if (!trusted) {
|
|
1131
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and no trusted command (acceptance criterion / declared manifest target) resolves to re-run it. Set FLOW_AGENTS_GOAL_FIT_RECHECK=true to opt into re-running the model's free-form command.`);
|
|
1132
|
+
continue;
|
|
1133
|
+
}
|
|
1134
|
+
const outcome = runBackstop(trusted);
|
|
1135
|
+
if (!outcome.ran) {
|
|
1136
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — trusted backstop (${trusted.source}) could not run (${safeOneLine(outcome.error, 80)}).`);
|
|
1137
|
+
continue;
|
|
1138
|
+
}
|
|
1139
|
+
if (!outcome.passed) {
|
|
1140
|
+
const note = `${base} evidence check ${id}: trusted backstop (${trusted.source}) re-run of "${trusted.argv.join(' ')}" FAILED with exit ${outcome.exitCode}, contradicting the claimed pass. This is a caught false-completion.`;
|
|
1141
|
+
if (backstopMode === 'off') warnings.push(`${note} [backstop in warn mode — not blocking]`);
|
|
1142
|
+
else warnings.push(note);
|
|
1143
|
+
}
|
|
1144
|
+
// backstop passed → claim deterministically confirmed by re-run, no warning.
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
return warnings;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
/**
|
|
1151
|
+
* Namespace-agnostic captured-FAIL reconciliation (AC1 — closes the allowlist bypass).
|
|
1152
|
+
*
|
|
1153
|
+
* The existing captureCrossReference only checks claims that pass the namespace
|
|
1154
|
+
* allowlist (workflow.* prefix or declared gateExpects[]). A kit-typed claim
|
|
1155
|
+
* (e.g. builder.verify.tests) whose command-log entry says FAIL can slip through
|
|
1156
|
+
* when no active FlowDefinition declares that claimType.
|
|
1157
|
+
*
|
|
1158
|
+
* This function is namespace-agnostic: it builds the LATEST-capture-per-command map
|
|
1159
|
+
* and for each command whose last capture is FAIL it checks:
|
|
1160
|
+
* (A) Any claim (ANY namespace) asserting pass for that command → false-completion HARD_BLOCK
|
|
1161
|
+
* Fix A: runs on EVERY stop (status-independent). A claim contradicting the capture is
|
|
1162
|
+
* a false-completion regardless of whether state.json shows the task as 'done'.
|
|
1163
|
+
* Fix D: also checks commands with laundering operators whose latest capture is PASS (exit 0);
|
|
1164
|
+
* a claimed-pass for a laundered command is NOT a trustworthy signal.
|
|
1165
|
+
* Fix B: Case B (unaccounted at completion — no-claim-at-all branch) REMOVED.
|
|
1166
|
+
* It over-blocked incidental failures (grep no-match, git diff --exit-code, etc.).
|
|
1167
|
+
* Case A covers the real threat (claimed pass contradicts captured fail).
|
|
1168
|
+
* Fix E: verifyCommandLogChain called; on broken chain reconciliation is skipped (log
|
|
1169
|
+
* integrity is already signalled by captureCrossReference).
|
|
1170
|
+
*
|
|
1171
|
+
* No-over-block guarantees:
|
|
1172
|
+
* - Fail-then-re-run-to-pass: latest is PASS → not in latestFails → no warning.
|
|
1173
|
+
* - Acknowledged failure: claim has failing/disputed status → ackClaims → no warning.
|
|
1174
|
+
* - No-command session: no log → latestLog empty → no warning.
|
|
1175
|
+
* - Incidental fail (grep/diff/find) with no pass-claim → no warning (Case B removed).
|
|
1176
|
+
*/
|
|
1177
|
+
function capturedFailReconciliation(root, artifactDir, taskStatus) {
|
|
1178
|
+
// Fix A: removed the `completing` guard. Run on EVERY stop — status-independent.
|
|
1179
|
+
// A claim contradicting the capture is a false-completion whether or not the agent
|
|
1180
|
+
// has set state.json.status to a terminal value. (taskStatus param kept for compat.)
|
|
1181
|
+
|
|
1182
|
+
const latestLog = readLatestCommandLog(artifactDir);
|
|
1183
|
+
if (latestLog.size === 0) return []; // No captures — nothing to reconcile
|
|
1184
|
+
|
|
1185
|
+
// Fix E: verify chain integrity; skip reconciliation when broken (log untrusted).
|
|
1186
|
+
// The main integrity warning is already emitted by captureCrossReference.
|
|
1187
|
+
const chainResult = verifyCommandLogChain(artifactDir);
|
|
1188
|
+
if (chainResult.status === 'broken') return []; // Can't trust pass/fail signals
|
|
1189
|
+
|
|
1190
|
+
// Collect commands whose LATEST capture is FAIL (Case A).
|
|
1191
|
+
const latestFails = new Map(); // cmd -> {failed:true, exitCode}
|
|
1192
|
+
for (const [cmd, info] of latestLog) {
|
|
1193
|
+
if (info.failed) latestFails.set(cmd, info);
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
// Fix D: Collect commands whose latest capture is PASS (exit 0) but whose command
|
|
1197
|
+
// string contains an exit-code-neutralizing operator (laundering). The captured
|
|
1198
|
+
// exit-0 is not a trustworthy signal for these — real test failures are hidden.
|
|
1199
|
+
const launderedPass = new Map(); // cmd -> {failed:false, exitCode:0}
|
|
1200
|
+
for (const [cmd, info] of latestLog) {
|
|
1201
|
+
if (!info.failed && hasLaunderingOperator(cmd)) launderedPass.set(cmd, info);
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
if (latestFails.size === 0 && launderedPass.size === 0) return []; // Nothing to flag
|
|
1205
|
+
|
|
1206
|
+
const base = relative(root, artifactDir);
|
|
1207
|
+
const warnings = [];
|
|
1208
|
+
|
|
1209
|
+
// Load the trust.bundle for claim/evidence analysis.
|
|
1210
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
1211
|
+
const allClaims = bundle && Array.isArray(bundle.claims) ? bundle.claims : [];
|
|
1212
|
+
const allEvidence = bundle && Array.isArray(bundle.evidence) ? bundle.evidence : [];
|
|
1213
|
+
|
|
1214
|
+
// Build: claimId → claim (for fast evidence→claim lookup)
|
|
1215
|
+
const claimById = new Map();
|
|
1216
|
+
for (const c of allClaims) {
|
|
1217
|
+
if (c && c.id) claimById.set(c.id, c);
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
// commandsToCheck: FAIL-latest commands + laundered-pass commands
|
|
1221
|
+
const commandsToCheck = new Set([...latestFails.keys(), ...launderedPass.keys()]);
|
|
1222
|
+
|
|
1223
|
+
// For each relevant command, track what claims say about it.
|
|
1224
|
+
// cmdAcc: cmd → {passClaims: [...], ackClaims: []}
|
|
1225
|
+
const cmdAcc = new Map();
|
|
1226
|
+
const initAcc = (cmd) => {
|
|
1227
|
+
if (!cmdAcc.has(cmd)) cmdAcc.set(cmd, { passClaims: [], ackClaims: [] });
|
|
1228
|
+
return cmdAcc.get(cmd);
|
|
1229
|
+
};
|
|
1230
|
+
|
|
1231
|
+
// Path A: evidence items with execution.label link a claim to a specific command.
|
|
1232
|
+
for (const ev of allEvidence) {
|
|
1233
|
+
if (!ev || !ev.execution || !ev.execution.label) continue;
|
|
1234
|
+
const cmd = normalizeCommand(ev.execution.label);
|
|
1235
|
+
if (!cmd || !commandsToCheck.has(cmd)) continue;
|
|
1236
|
+
const claim = claimById.get(ev.claimId);
|
|
1237
|
+
if (!claim) continue;
|
|
1238
|
+
const acc = initAcc(cmd);
|
|
1239
|
+
const s = String(claim.status || '').toLowerCase();
|
|
1240
|
+
const v = normalizedStatus(claim.value || '');
|
|
1241
|
+
if (claimAssertsPass(s, v)) acc.passClaims.push(claim);
|
|
1242
|
+
if (claimAcknowledgesFailure(s, v)) acc.ackClaims.push(claim);
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
// Path B: claim.fieldOrBehavior resolves directly to the command (field-based resolution).
|
|
1246
|
+
for (const c of allClaims) {
|
|
1247
|
+
if (!c) continue;
|
|
1248
|
+
const cmd = normalizeCommand(c.fieldOrBehavior || '');
|
|
1249
|
+
if (!cmd || !commandsToCheck.has(cmd)) continue;
|
|
1250
|
+
const acc = initAcc(cmd);
|
|
1251
|
+
const s = String(c.status || '').toLowerCase();
|
|
1252
|
+
const v = normalizedStatus(c.value || '');
|
|
1253
|
+
if (claimAssertsPass(s, v)) acc.passClaims.push(c);
|
|
1254
|
+
if (claimAcknowledgesFailure(s, v)) acc.ackClaims.push(c);
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
// Case A: Evaluate each FAIL-latest command for pass-claims (status-independent).
|
|
1258
|
+
for (const [cmd, failInfo] of latestFails) {
|
|
1259
|
+
const exit = Number.isInteger(failInfo.exitCode) ? failInfo.exitCode : null;
|
|
1260
|
+
const exitStr = exit !== null ? ` (exit ${exit})` : '';
|
|
1261
|
+
const acc = cmdAcc.get(cmd);
|
|
1262
|
+
|
|
1263
|
+
if (acc && acc.passClaims.length > 0) {
|
|
1264
|
+
// Any-namespace claim asserts pass for a command whose latest capture is FAIL.
|
|
1265
|
+
// This is the namespace-agnostic false-completion signal.
|
|
1266
|
+
const claim = acc.passClaims[0];
|
|
1267
|
+
warnings.push(
|
|
1268
|
+
`${base} captured command '${safeOneLine(cmd, 120)}' last ran FAIL${exitStr} ` +
|
|
1269
|
+
`but claim ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) ` +
|
|
1270
|
+
`asserts pass — namespace-agnostic caught false-completion.`
|
|
1271
|
+
);
|
|
1272
|
+
}
|
|
1273
|
+
// Fix B: Case B (unaccounted at completion — no-claim-at-all) REMOVED.
|
|
1274
|
+
// It over-blocked incidental failures with no pass-claim. Case A covers the real threat.
|
|
1275
|
+
// Acknowledged failure (ackClaims.length > 0) → OK, no warning.
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
// Fix D: Evaluate laundered-pass commands for pass-claims.
|
|
1279
|
+
for (const [cmd] of launderedPass) {
|
|
1280
|
+
const acc = cmdAcc.get(cmd);
|
|
1281
|
+
if (acc && acc.passClaims.length > 0) {
|
|
1282
|
+
const claim = acc.passClaims[0];
|
|
1283
|
+
warnings.push(
|
|
1284
|
+
`${base} captured command '${safeOneLine(cmd, 120)}' claimed pass relies on an ` +
|
|
1285
|
+
`exit-code-laundered command — claim ${safeOneLine(claim.subjectId || claim.id, 80)} ` +
|
|
1286
|
+
`(${safeOneLine(claim.claimType, 48)}) asserts pass but the exit code is not a ` +
|
|
1287
|
+
`trustworthy signal (laundering operators mask the real exit code).`
|
|
1288
|
+
);
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
return warnings;
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
// ─── ADR 0010 Phase 2: enforce on the canonical Hachure trust.bundle ──────────
|
|
1296
|
+
|
|
1297
|
+
// ─── ADR 0010 Phase 2: enforce on the canonical Hachure trust.bundle ──────────
|
|
1298
|
+
// The trust.bundle (emitted by workflow-sidecar via @kontourai/surface) carries
|
|
1299
|
+
// each claim's Surface-derived status — including capture-authoritative results
|
|
1300
|
+
// (a claimed-pass whose captured command FAILED is already `disputed` here). A
|
|
1301
|
+
// high-impact `disputed` claim is the canonical false-completion signal; we gate
|
|
1302
|
+
// on the bundle the producers already emit, not on bespoke markdown.
|
|
1303
|
+
//
|
|
1304
|
+
// ADR 0010 Phase 2b: re-derive-at-gate hardening.
|
|
1305
|
+
// We re-derive each claim's status from the bundle's own evidence/events/policies
|
|
1306
|
+
// via Surface's canonical deriveClaimStatus, so editing the stored `claim.status`
|
|
1307
|
+
// field does not bypass the gate. If the re-derived status is disputed/rejected
|
|
1308
|
+
// for a high/critical claim, we block. If the re-derived status DIFFERS from the
|
|
1309
|
+
// stored status (e.g. stored "verified" but evidence re-derives to "disputed"),
|
|
1310
|
+
// that mismatch is a strong tamper signal — block with an explicit warning.
|
|
1311
|
+
// Fail-open: if Surface is unavailable, fall back to the stored-status check.
|
|
1312
|
+
//
|
|
1313
|
+
// ADR 0016 P-c: when activeFlowStep is non-null, claim-selection uses the gate's
|
|
1314
|
+
// declared claimType set (gateExpects[].bundle_claim.claimType). When null, the
|
|
1315
|
+
// existing workflow.* prefix filter runs unchanged (fallback). The re-derivation
|
|
1316
|
+
// loop, tamper-detection, high/critical filter, and block/exit-2 logic are
|
|
1317
|
+
// STRUCTURALLY UNCHANGED — only WHICH claims are selected changes.
|
|
1318
|
+
async function bundleEnforcement(artifactDir, activeFlowStep) {
|
|
1319
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
1320
|
+
if (!bundle || !Array.isArray(bundle.claims)) return [];
|
|
1321
|
+
|
|
1322
|
+
const surface = await tryLoadSurface();
|
|
1323
|
+
const warnings = [];
|
|
1324
|
+
|
|
1325
|
+
const allEvidence = Array.isArray(bundle.evidence) ? bundle.evidence : [];
|
|
1326
|
+
const allEvents = Array.isArray(bundle.events) ? bundle.events : [];
|
|
1327
|
+
const allPolicies = Array.isArray(bundle.policies) ? bundle.policies : [];
|
|
1328
|
+
|
|
1329
|
+
// P-c: claim-selection predicate.
|
|
1330
|
+
// When activeFlowStep is non-null, select claims whose claimType is in the
|
|
1331
|
+
// gate's declared set. When null, fall back to the existing workflow.* prefix
|
|
1332
|
+
// filter so no-FlowDefinition sessions are unaffected.
|
|
1333
|
+
const declaredClaimTypes = activeFlowStep && Array.isArray(activeFlowStep.gateExpects)
|
|
1334
|
+
? new Set(activeFlowStep.gateExpects.map(e => e && e.bundle_claim && e.bundle_claim.claimType).filter(Boolean))
|
|
1335
|
+
: null;
|
|
1336
|
+
|
|
1337
|
+
// SECURITY (Layer 2 — gate-bypass-chain fix): use UNION form instead of if/else.
|
|
1338
|
+
// With the old if/else, an empty declaredClaimTypes (Set{}) from a fake flow with
|
|
1339
|
+
// expects:[] caused isSelectedClaim to return false for EVERY claim — all
|
|
1340
|
+
// bundleEnforcement checks were silently bypassed. The union form ensures workflow.*
|
|
1341
|
+
// claims are ALWAYS enforced regardless of whether a FlowDefinition is active or what
|
|
1342
|
+
// its expects[] contains. Declared claimTypes are added on top of the baseline.
|
|
1343
|
+
//
|
|
1344
|
+
// AC3 two-part dependency (regression guard — see test_captured_fail_reconciliation.sh):
|
|
1345
|
+
// Part 1 (this union form): ensures bundleEnforcement always enforces workflow.* claims
|
|
1346
|
+
// regardless of declaredClaimTypes being null or empty Set.
|
|
1347
|
+
// Part 2 (empty-expects guard below): emits gate-misconfiguration HARD_BLOCK when
|
|
1348
|
+
// declaredClaimTypes is a non-null empty Set (active flow with expects:[]).
|
|
1349
|
+
// Both parts are required: Part 1 alone lets the empty-expects bypass slip through
|
|
1350
|
+
// without a loud signal; Part 2 alone without the union would silently pass 0 claims.
|
|
1351
|
+
const isSelectedClaim = (claim) => {
|
|
1352
|
+
const ct = String(claim.claimType || '');
|
|
1353
|
+
// Union: workflow.* is always selected (baseline); declared types extend it.
|
|
1354
|
+
return ct.startsWith('workflow.') || (declaredClaimTypes != null && declaredClaimTypes.has(ct));
|
|
1355
|
+
};
|
|
1356
|
+
|
|
1357
|
+
// Misconfiguration guard: an active FlowDefinition with zero expects[] is suspicious.
|
|
1358
|
+
// Empty expects can indicate a tampered flow definition (the gate-bypass exploit).
|
|
1359
|
+
// Emit a hard warning so operators see it regardless of session phase.
|
|
1360
|
+
if (declaredClaimTypes !== null && declaredClaimTypes.size === 0) {
|
|
1361
|
+
warnings.push(`gate misconfiguration: active FlowDefinition has empty expects[] (possible tampered flow definition or zero-expects bypass attempt); verify the flow definition is authentic before delivery.`);
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
// AC3 fail-closed (high-assurance path): when @kontourai/surface is unavailable, we
|
|
1365
|
+
// cannot re-derive claim status from evidence/events/policies. For high/critical-impact
|
|
1366
|
+
// selected claims this means we silently trust the stored status field — exactly the
|
|
1367
|
+
// vector re-derive-at-gate (ADR 0010 Phase 2b) was designed to close.
|
|
1368
|
+
// Fail-closed: emit a FULL_BLOCK warning so operators see that re-derivation is absent.
|
|
1369
|
+
// Low-impact-only bundles stay non-blocking (noise reduction for common dev sessions).
|
|
1370
|
+
//
|
|
1371
|
+
// BAR-RAISER, NOT AIRTIGHT: an agent that deletes/corrupts @kontourai/surface to force
|
|
1372
|
+
// surface=null would trigger this path deliberately. The real anchor is external:
|
|
1373
|
+
// signed checkpoints + human review. This raises the cost of the silent-trust vector.
|
|
1374
|
+
if (!surface) {
|
|
1375
|
+
const highImpactCount = bundle.claims.reduce((n, claim) => {
|
|
1376
|
+
if (!claim || typeof claim !== 'object') return n;
|
|
1377
|
+
const impact = String(claim.impactLevel || '').toLowerCase();
|
|
1378
|
+
if (impact !== 'high' && impact !== 'critical') return n;
|
|
1379
|
+
return isSelectedClaim(claim) ? n + 1 : n;
|
|
1380
|
+
}, 0);
|
|
1381
|
+
if (highImpactCount > 0) {
|
|
1382
|
+
warnings.push(
|
|
1383
|
+
`surface unavailable — ${highImpactCount} high/critical-impact claim(s) could not be re-derived at gate; ` +
|
|
1384
|
+
'stored claim status is trusted without independent re-derivation (fail-closed: high-assurance path). ' +
|
|
1385
|
+
'Ensure @kontourai/surface is installed and importable, or escalate for operator review.'
|
|
1386
|
+
);
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
for (const claim of bundle.claims) {
|
|
1391
|
+
if (!claim || typeof claim !== 'object') continue;
|
|
1392
|
+
const impact = String(claim.impactLevel || '').toLowerCase();
|
|
1393
|
+
const storedStatus = String(claim.status || '').toLowerCase();
|
|
1394
|
+
if (impact !== 'high' && impact !== 'critical') continue;
|
|
1395
|
+
// P-c: claim-selection — only process claims matching the active predicate.
|
|
1396
|
+
if (!isSelectedClaim(claim)) continue;
|
|
1397
|
+
|
|
1398
|
+
// Step 1: Re-derive status via Surface when available.
|
|
1399
|
+
// This closes the gaming vector: editing the stored status field cannot bypass
|
|
1400
|
+
// the gate because we recompute from evidence/events/policies.
|
|
1401
|
+
let recomputedStatus = null; // null means re-derive was not attempted or threw
|
|
1402
|
+
if (surface && typeof surface.deriveClaimStatus === 'function') {
|
|
1403
|
+
const claimId = claim.id;
|
|
1404
|
+
const claimEvidence = allEvidence.filter(ev => ev && ev.claimId === claimId);
|
|
1405
|
+
const claimEvents = allEvents.filter(evt => evt && evt.claimId === claimId);
|
|
1406
|
+
try {
|
|
1407
|
+
const result = surface.deriveClaimStatus({
|
|
1408
|
+
claim,
|
|
1409
|
+
evidence: claimEvidence,
|
|
1410
|
+
events: claimEvents,
|
|
1411
|
+
policies: allPolicies,
|
|
1412
|
+
});
|
|
1413
|
+
recomputedStatus = result && typeof result.status === 'string' ? result.status.toLowerCase() : 'unknown';
|
|
1414
|
+
} catch {
|
|
1415
|
+
// deriveClaimStatus threw (e.g. schema mismatch) — fall back to stored status.
|
|
1416
|
+
recomputedStatus = null;
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
// Step 2: Compute the effective blocking status.
|
|
1421
|
+
// Use the STRICTER of stored vs recomputed so neither can be individually
|
|
1422
|
+
// gamed: deleting evidence cannot clear a stored `disputed`, and flipping
|
|
1423
|
+
// stored to "verified" cannot hide a recomputed `disputed`.
|
|
1424
|
+
const effectiveDisputed = storedStatus === 'disputed' || storedStatus === 'rejected'
|
|
1425
|
+
|| recomputedStatus === 'disputed' || recomputedStatus === 'rejected';
|
|
1426
|
+
|
|
1427
|
+
if (!effectiveDisputed) continue; // neither stored nor recomputed is blocking
|
|
1428
|
+
|
|
1429
|
+
// Step 3: Emit the appropriate warning.
|
|
1430
|
+
// Tamper-detection: stored "verified"/"assumed" but evidence re-derives to
|
|
1431
|
+
// "disputed"/"rejected" — the stored status was likely altered to bypass the gate.
|
|
1432
|
+
const isTampered = recomputedStatus !== null
|
|
1433
|
+
&& (storedStatus === 'verified' || storedStatus === 'assumed')
|
|
1434
|
+
&& (recomputedStatus === 'disputed' || recomputedStatus === 'rejected');
|
|
1435
|
+
|
|
1436
|
+
if (isTampered) {
|
|
1437
|
+
warnings.push(`trust.bundle claim tampered: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — stored status "${storedStatus}" does not match recompute "${recomputedStatus}" (possible tampered bundle); caught false-completion. Run: npm run workflow:sidecar -- claim ${safeOneLine(claim.subjectId || claim.id, 80)} ${artifactDir}`);
|
|
1438
|
+
} else {
|
|
1439
|
+
warnings.push(`trust.bundle claim disputed: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — Surface recompute shows not verified; caught false-completion. Run: npm run workflow:sidecar -- claim ${safeOneLine(claim.subjectId || claim.id, 80)} ${artifactDir}`);
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
290
1442
|
return warnings;
|
|
291
1443
|
}
|
|
292
1444
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
1445
|
+
/**
|
|
1446
|
+
* Scope to the session's current task when .flow-agents/current.json points at
|
|
1447
|
+
* one (mirroring evidence-capture.js). Returns the slug dir, or null to fall back
|
|
1448
|
+
* to scanning all of .flow-agents (newest-mtime).
|
|
1449
|
+
*/
|
|
1450
|
+
function preferredArtifactDir(flowAgentsDir) {
|
|
1451
|
+
const current = readJsonFile(path.join(flowAgentsDir, 'current.json'));
|
|
1452
|
+
if (!current) return null;
|
|
1453
|
+
const slug = current.artifact_dir || current.active_slug;
|
|
1454
|
+
if (typeof slug !== 'string' || !slug.trim()) return null;
|
|
1455
|
+
const safe = slug.replace(/\.\.+/g, '').replace(/^[/\\]+/, '');
|
|
1456
|
+
const dir = path.join(flowAgentsDir, safe);
|
|
1457
|
+
return dir.startsWith(flowAgentsDir + path.sep) && fs.existsSync(dir) ? dir : null;
|
|
298
1458
|
}
|
|
299
1459
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
1460
|
+
/**
|
|
1461
|
+
* A task is pre-execution (work not yet started) when its state.json status/phase
|
|
1462
|
+
* is still in the idea→planning band, or (no state.json) its markdown status is.
|
|
1463
|
+
*/
|
|
1464
|
+
function isPreExecution(artifactDir, markdownStatus) {
|
|
1465
|
+
const state = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
1466
|
+
if (state) {
|
|
1467
|
+
return PRE_EXECUTION_STATUSES.has(normalizedStatus(state.status))
|
|
1468
|
+
|| PRE_EXECUTION_PHASES.has(normalizedStatus(state.phase));
|
|
1469
|
+
}
|
|
1470
|
+
return PRE_EXECUTION_STATUSES.has(normalizedStatus(markdownStatus));
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
// ─── Wave 2c: no-bundle/no-state fallback gate ────────────────────────────────
|
|
1475
|
+
// Sessions that have NEITHER a trust.bundle NOR a state.json fall through
|
|
1476
|
+
// both bundleEnforcement (no bundle) and sidecarGuidance (no state). Without the
|
|
1477
|
+
// old markdown heading checks this would create a silent ungated-session path.
|
|
1478
|
+
// If a trust.bundle exists, bundleEnforcement handles it. If state.json exists,
|
|
1479
|
+
// sidecarGuidance handles it. The gap: a session with only a markdown artifact.
|
|
1480
|
+
//
|
|
1481
|
+
// ADR 0010 Phase 4b: Adjustment A (Final Acceptance hygiene):
|
|
1482
|
+
// When the task is delivered but acceptance criteria are still pending, emit the
|
|
1483
|
+
// Final Acceptance reminder. Read from trust.bundle claims when present; fall back
|
|
1484
|
+
// to acceptance.json for bundle-less sessions.
|
|
1485
|
+
//
|
|
1486
|
+
// ADR 0016 P-c: pass activeFlowStep so bundlePendingCriteriaCount includes declared types.
|
|
1487
|
+
function missingBundleOrStateSignal(artifactDir, activeFlowStep) {
|
|
1488
|
+
// Build the declared claimType set from the FlowDefinition gate expects[] (P-c).
|
|
1489
|
+
const declaredClaimTypes = activeFlowStep && Array.isArray(activeFlowStep.gateExpects)
|
|
1490
|
+
? new Set(activeFlowStep.gateExpects.map(e => e && e.bundle_claim && e.bundle_claim.claimType).filter(Boolean))
|
|
1491
|
+
: null;
|
|
1492
|
+
const warnings = [];
|
|
1493
|
+
const hasBundle = fs.existsSync(path.join(artifactDir, 'trust.bundle'));
|
|
1494
|
+
const state = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
1495
|
+
|
|
1496
|
+
if (!hasBundle && !state) {
|
|
1497
|
+
// Neither trust.bundle nor state.json: session is untracked by sidecar path.
|
|
1498
|
+
// Emit a NOT_VERIFIED warning so execution-phase sessions remain gated.
|
|
1499
|
+
const base = path.basename(artifactDir);
|
|
1500
|
+
warnings.push(`${base} NOT_VERIFIED — no trust.bundle or state.json found; run 'workflow-sidecar record-evidence' to build the evidence record before delivery.`);
|
|
1501
|
+
return warnings;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
// Adjustment A: Final Acceptance hygiene.
|
|
1505
|
+
// When the task is delivered but acceptance criteria are still pending, emit the
|
|
1506
|
+
// Final Acceptance reminder. Bundle-first; fall back to acceptance.json.
|
|
1507
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
1508
|
+
const bundleClaims = bundle && Array.isArray(bundle.claims) ? bundle.claims : null;
|
|
1509
|
+
|
|
1510
|
+
if (bundleClaims) {
|
|
1511
|
+
// Phase 4b: read pending criteria count from trust.bundle claims.
|
|
1512
|
+
// P-c: pass declaredClaimTypes so declared-type acceptance claims are included.
|
|
1513
|
+
const pendingCount = bundlePendingCriteriaCount(bundleClaims, declaredClaimTypes);
|
|
1514
|
+
if (pendingCount !== null && pendingCount > 0) {
|
|
1515
|
+
const base = path.basename(artifactDir);
|
|
1516
|
+
warnings.push(`${base} Final Acceptance: ${pendingCount} acceptance criterion/criteria still pending; complete CI/merge/docs before final delivery.`);
|
|
1517
|
+
}
|
|
1518
|
+
} else {
|
|
1519
|
+
// Fallback: no bundle — read from acceptance.json (existing behavior, no regression).
|
|
1520
|
+
const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
|
|
1521
|
+
if (acceptance && Array.isArray(acceptance.criteria)) {
|
|
1522
|
+
const pendingCriteria = acceptance.criteria.filter(c => {
|
|
1523
|
+
const s = normalizedStatus(c && c.status);
|
|
1524
|
+
return s === 'pending' || s === 'not_started' || s === '' || s === 'unknown';
|
|
1525
|
+
});
|
|
1526
|
+
if (pendingCriteria.length > 0) {
|
|
1527
|
+
const base = path.basename(artifactDir);
|
|
1528
|
+
warnings.push(`${base} Final Acceptance: ${pendingCriteria.length} acceptance criterion/criteria still pending; complete CI/merge/docs before final delivery.`);
|
|
1529
|
+
}
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
return warnings;
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
// ─── Gate severity classification regexes (module scope — used by analyze() and run()) ─
|
|
1537
|
+
//
|
|
1538
|
+
// HARD_BLOCK: always blocks, even for pre-execution and terminal tasks.
|
|
1539
|
+
// Fires on genuine false-completion signals (a claimed pass the capture log or
|
|
1540
|
+
// evidence.json contradicts), integrity failures, and gate misconfiguration.
|
|
1541
|
+
//
|
|
1542
|
+
// FULL_BLOCK: fires for execution-onward tasks (post-planning, non-terminal).
|
|
1543
|
+
// Includes all HARD_BLOCK patterns plus completeness/hygiene and not-done state.
|
|
1544
|
+
//
|
|
1545
|
+
// Both are used in analyze() for blocking decisions AND in run() for the AC2
|
|
1546
|
+
// MAX_BLOCKS hard-block guard (preventing auto-release of hard blocks).
|
|
1547
|
+
const HARD_BLOCK = /contradicts evidence\.json|caught false-completion|evidence verdict:|evidence check .+ status:|critique status|critique open|required sidecar is missing|command-log integrity check FAILED|gate misconfiguration:|exit-code-laundered/;
|
|
1548
|
+
// FULL_BLOCK adds: workflow-state hygiene, surface-unavailable fail-closed, missing log.
|
|
1549
|
+
const FULL_BLOCK = /status:|Definition Of Done|Goal Fit|sidecar validation:|contradicts evidence\.json|workflow state|evidence verdict|evidence check|NOT_VERIFIED gap|critique status|critique open|next action|caught false-completion|NOT_VERIFIED —|command-log integrity check FAILED|gate misconfiguration:|surface unavailable —|expected capture log is missing|exit-code-laundered/;
|
|
1550
|
+
|
|
1551
|
+
async function analyze(root, now = Date.now()) {
|
|
1552
|
+
const flowAgentsDir = path.join(root, '.flow-agents');
|
|
1553
|
+
// Scope to the session's current task when current.json names one, so an
|
|
1554
|
+
// unrelated active workflow elsewhere in the repo does not gate this stop.
|
|
1555
|
+
const scoped = preferredArtifactDir(flowAgentsDir);
|
|
1556
|
+
const searchDirs = scoped ? [scoped] : [flowAgentsDir];
|
|
1557
|
+
const artifacts = searchDirs
|
|
303
1558
|
.flatMap(dir => walkMarkdown(dir))
|
|
304
1559
|
.map(readArtifact)
|
|
305
1560
|
.filter(isWorkflowArtifact)
|
|
@@ -317,51 +1572,174 @@ function analyze(root, now = Date.now()) {
|
|
|
317
1572
|
warnings.push(`${relPath} is still status:${status} (${ageMinutes}m old). Do not final-answer as complete unless the next step is explicit.`);
|
|
318
1573
|
}
|
|
319
1574
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
if (!hasHeading(latest.text, 'Goal Fit Gate')) {
|
|
325
|
-
warnings.push(`${relPath} is missing ## Goal Fit Gate, so local acceptance has not been checked.`);
|
|
326
|
-
} else {
|
|
327
|
-
for (const item of uncheckedInSection(latest.text, 'Goal Fit Gate').slice(0, 6)) {
|
|
328
|
-
warnings.push(`${relPath} Goal Fit unchecked: ${item}`);
|
|
329
|
-
}
|
|
330
|
-
}
|
|
1575
|
+
// Builder heading completeness checks (hasHeading DOD/Goal Fit Gate) removed in ADR 0010 2c.
|
|
1576
|
+
// Verdict is now bundle-driven via bundleEnforcement + sidecarGuidance.
|
|
1577
|
+
// Sessions with neither trust.bundle nor state.json are caught by missingBundleOrStateSignal.
|
|
331
1578
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
warnings.push(`${relPath} local delivery is marked delivered, but Final Acceptance still has ${uncheckedFinal.length} open item(s) for CI/merge/docs promotion.`);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
1579
|
+
// ADR 0016 P-c: load the active FlowDefinition gate (fail-open: null when absent).
|
|
1580
|
+
// Null → existing workflow.* fallback path unchanged. Non-null → expects[]-driven claim selection.
|
|
1581
|
+
const activeFlowStep = loadActiveFlowStep(flowAgentsDir);
|
|
338
1582
|
|
|
339
1583
|
warnings.push(...sidecarValidation(root, path.dirname(latest.file)));
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
1584
|
+
warnings.push(...sidecarGuidance(root, path.dirname(latest.file), activeFlowStep));
|
|
1585
|
+
const captureWarnings = captureCrossReference(root, path.dirname(latest.file), activeFlowStep);
|
|
1586
|
+
warnings.push(...captureWarnings);
|
|
1587
|
+
// Dedup: bundleEnforcement and captureCrossReference can both fire "caught false-completion"
|
|
1588
|
+
// for the same disputed claim. Suppress the bundleEnforcement warning ONLY when
|
|
1589
|
+
// captureCrossReference already produced a hard-block warning ("caught false-completion")
|
|
1590
|
+
// for the same check. NOT_VERIFIED / backstop-skip capture warnings must NOT suppress
|
|
1591
|
+
// the bundle tamper/disputed signal — that mismatch is a re-derive block independent of
|
|
1592
|
+
// whether the command was ever captured (anti-gaming guarantee, ADR 0010 Phase 2b).
|
|
1593
|
+
const captureHardBlockIds = new Set();
|
|
1594
|
+
for (const w of captureWarnings) {
|
|
1595
|
+
if (!/caught false-completion/.test(w)) continue; // only hard blocks suppress bundle warning
|
|
1596
|
+
const m = /evidence check ([^\s:]+):/.exec(w);
|
|
1597
|
+
if (m) captureHardBlockIds.add(m[1]);
|
|
343
1598
|
}
|
|
344
|
-
|
|
1599
|
+
const bundleWarnings = (await bundleEnforcement(path.dirname(latest.file), activeFlowStep)).filter(w => {
|
|
1600
|
+
if (!captureHardBlockIds.size) return true;
|
|
1601
|
+
// bundleEnforcement warns: "trust.bundle claim disputed: <subjectId> ..."
|
|
1602
|
+
const m = /trust\.bundle claim (?:disputed|tampered): ([^\s(]+)/.exec(w);
|
|
1603
|
+
if (!m) return true;
|
|
1604
|
+
const subjectId = m[1];
|
|
1605
|
+
// subjectId = "slug/checkId" — extract the checkId (last segment)
|
|
1606
|
+
const checkId = subjectId.includes('/') ? subjectId.slice(subjectId.indexOf('/') + 1) : subjectId;
|
|
1607
|
+
// If captureCrossReference already hard-blocked this check, suppress the bundle warning.
|
|
1608
|
+
return !captureHardBlockIds.has(checkId);
|
|
1609
|
+
});
|
|
1610
|
+
warnings.push(...bundleWarnings);
|
|
1611
|
+
warnings.push(...missingBundleOrStateSignal(path.dirname(latest.file), activeFlowStep));
|
|
1612
|
+
|
|
1613
|
+
// A pre-execution task (not started) OR a terminal task (which is itself a
|
|
1614
|
+
// completion *claim*) must not block on mere incompleteness — but a FALSE claim
|
|
1615
|
+
// (capture/evidence contradiction) still blocks at any phase. This is the whole
|
|
1616
|
+
// point of the capture cross-reference: catch a task that falsely claims done.
|
|
1617
|
+
const gateState = readJsonFile(path.join(path.dirname(latest.file), 'state.json'));
|
|
1618
|
+
const taskStatus = gateState ? normalizedStatus(gateState.status) : normalizedStatus(status);
|
|
1619
|
+
const preExecution = isPreExecution(path.dirname(latest.file), status);
|
|
1620
|
+
const terminal = TERMINAL_STATUSES.has(taskStatus);
|
|
1621
|
+
|
|
1622
|
+
// Namespace-agnostic captured-FAIL reconciliation (AC1 — closes the allowlist bypass).
|
|
1623
|
+
// Fix A: status-independent — runs on EVERY stop. A claim contradicting the capture
|
|
1624
|
+
// is a false-completion whether or not the agent says the task is 'done'.
|
|
1625
|
+
warnings.push(...capturedFailReconciliation(root, path.dirname(latest.file), taskStatus));
|
|
1626
|
+
|
|
1627
|
+
// Use module-scope HARD_BLOCK / FULL_BLOCK (defined above analyze()).
|
|
1628
|
+
// pre-execution/terminal tasks: only HARD_BLOCK signals cause a block.
|
|
1629
|
+
// execution-onward tasks: FULL_BLOCK signals cause a block.
|
|
1630
|
+
const blockRe = (preExecution || terminal) ? HARD_BLOCK : FULL_BLOCK;
|
|
1631
|
+
const blocking = warnings.some(w => {
|
|
1632
|
+
// Capture cross-reference warn-mode notes never block (operator opted out).
|
|
1633
|
+
if (/\[backstop in warn mode — not blocking\]/.test(w)) return false;
|
|
1634
|
+
return blockRe.test(w);
|
|
1635
|
+
});
|
|
1636
|
+
return { warnings, blocking, preExecution };
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
/**
|
|
1640
|
+
* Resolve the enforcement mode. FLOW_AGENTS_GOAL_FIT_MODE (block|warn|off) wins;
|
|
1641
|
+
* the legacy FLOW_AGENTS_GOAL_FIT_STRICT=true maps to block; otherwise the
|
|
1642
|
+
* canonical engine default is warn.
|
|
1643
|
+
*/
|
|
1644
|
+
function resolveGoalFitMode() {
|
|
1645
|
+
const explicit = String(process.env.FLOW_AGENTS_GOAL_FIT_MODE || '').trim().toLowerCase();
|
|
1646
|
+
if (explicit === 'block' || explicit === 'warn' || explicit === 'off') return explicit;
|
|
1647
|
+
const strict = String(process.env.FLOW_AGENTS_GOAL_FIT_STRICT || '').toLowerCase() === 'true';
|
|
1648
|
+
return strict ? 'block' : 'warn';
|
|
1649
|
+
}
|
|
1650
|
+
|
|
1651
|
+
/**
|
|
1652
|
+
* Escape hatch: cap how many times block mode may refuse the SAME goal-fit gap
|
|
1653
|
+
* in a row, so a genuinely-unsatisfiable goal cannot trap the agent forever.
|
|
1654
|
+
* After this many consecutive identical blocks the hook releases (exit 0) with a
|
|
1655
|
+
* loud notice. Configurable via FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS (default 3).
|
|
1656
|
+
*/
|
|
1657
|
+
function resolveMaxBlocks() {
|
|
1658
|
+
const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS || '', 10);
|
|
1659
|
+
return Number.isInteger(raw) && raw > 0 ? raw : 3;
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
function blockStreakFile(root) {
|
|
1663
|
+
return path.join(root, '.flow-agents', '.goal-fit-block-streak.json');
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
function reasonsHash(warnings) {
|
|
1667
|
+
const text = (warnings || []).join('\n');
|
|
1668
|
+
let h = 5381;
|
|
1669
|
+
for (let i = 0; i < text.length; i += 1) h = ((h << 5) + h + text.charCodeAt(i)) >>> 0;
|
|
1670
|
+
return String(h);
|
|
1671
|
+
}
|
|
345
1672
|
|
|
346
|
-
|
|
347
|
-
|
|
1673
|
+
function clearBlockStreak(root) {
|
|
1674
|
+
try { fs.rmSync(blockStreakFile(root), { force: true }); } catch { /* best effort */ }
|
|
348
1675
|
}
|
|
349
1676
|
|
|
350
|
-
function
|
|
1677
|
+
function bumpBlockStreak(root, hash) {
|
|
1678
|
+
const file = blockStreakFile(root);
|
|
1679
|
+
const prev = readJsonFile(file) || {};
|
|
1680
|
+
const count = prev.hash === hash ? (Number(prev.count) || 0) + 1 : 1;
|
|
1681
|
+
try {
|
|
1682
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
1683
|
+
fs.writeFileSync(file, JSON.stringify({ hash, count }));
|
|
1684
|
+
} catch { /* best effort */ }
|
|
1685
|
+
return count;
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
async function run(rawInput) {
|
|
351
1689
|
const input = parseJson(rawInput);
|
|
352
1690
|
const root = findRepoRoot(input.cwd || process.cwd());
|
|
353
|
-
const
|
|
354
|
-
if (
|
|
1691
|
+
const mode = resolveGoalFitMode();
|
|
1692
|
+
if (mode === 'off') return rawInput;
|
|
1693
|
+
const result = await analyze(root);
|
|
1694
|
+
if (result.warnings.length === 0) {
|
|
1695
|
+
clearBlockStreak(root);
|
|
1696
|
+
return rawInput;
|
|
1697
|
+
}
|
|
355
1698
|
|
|
356
1699
|
const message = [
|
|
357
1700
|
'[Hook] Goal Fit warning:',
|
|
358
1701
|
...result.warnings.map(w => ` - ${w}`),
|
|
359
1702
|
].join('\n');
|
|
360
|
-
|
|
1703
|
+
|
|
1704
|
+
if (mode !== 'block' || !result.blocking) {
|
|
1705
|
+
clearBlockStreak(root);
|
|
1706
|
+
return { stdout: rawInput, stderr: message, exitCode: 0 };
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
const maxBlocks = resolveMaxBlocks();
|
|
1710
|
+
const count = bumpBlockStreak(root, reasonsHash(result.warnings));
|
|
1711
|
+
if (count >= maxBlocks) {
|
|
1712
|
+
// AC2: never auto-release a HARD block (caught false-completion, capture contradiction,
|
|
1713
|
+
// tamper signal, gate misconfiguration, integrity failure). An agent burning through
|
|
1714
|
+
// N identical hard blocks to escape via the streak counter must not work.
|
|
1715
|
+
//
|
|
1716
|
+
// BAR-RAISER, NOT AIRTIGHT: an agent with shell access can still reset the streak
|
|
1717
|
+
// by deleting .flow-agents/.goal-fit-block-streak.json or by modifying the warning
|
|
1718
|
+
// text so the hash changes. The real anchor is external (signed checkpoints + human
|
|
1719
|
+
// review). This raises the cost of the burn-through-the-counter escape vector.
|
|
1720
|
+
const isHardBlock = result.warnings.some(w => {
|
|
1721
|
+
if (/\[backstop in warn mode — not blocking\]/.test(w)) return false;
|
|
1722
|
+
return HARD_BLOCK.test(w);
|
|
1723
|
+
});
|
|
1724
|
+
if (isHardBlock) {
|
|
1725
|
+
// Do NOT clear the streak — keep accumulating so the same hard block stays visible.
|
|
1726
|
+
return {
|
|
1727
|
+
stdout: rawInput,
|
|
1728
|
+
stderr: `${message}\n[Hook] Goal Fit: max-blocks reached but the block is a caught false-completion / integrity failure — not auto-releasing; requires a real fix or operator override.`,
|
|
1729
|
+
exitCode: 2,
|
|
1730
|
+
};
|
|
1731
|
+
}
|
|
1732
|
+
clearBlockStreak(root);
|
|
1733
|
+
return {
|
|
1734
|
+
stdout: rawInput,
|
|
1735
|
+
stderr: `${message}\n[Hook] Goal Fit block RELEASED after ${count} consecutive identical blocks (FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=${maxBlocks}): the same gap persists, surfacing to the human instead of looping.`,
|
|
1736
|
+
exitCode: 0,
|
|
1737
|
+
};
|
|
1738
|
+
}
|
|
361
1739
|
return {
|
|
362
1740
|
stdout: rawInput,
|
|
363
|
-
stderr: message
|
|
364
|
-
exitCode:
|
|
1741
|
+
stderr: `${message}\n[Hook] Goal Fit BLOCK ${count}/${maxBlocks}.`,
|
|
1742
|
+
exitCode: 2,
|
|
365
1743
|
};
|
|
366
1744
|
}
|
|
367
1745
|
|
|
@@ -372,14 +1750,28 @@ if (require.main === module) {
|
|
|
372
1750
|
if (data.length < MAX_STDIN) data += chunk.substring(0, MAX_STDIN - data.length);
|
|
373
1751
|
});
|
|
374
1752
|
process.stdin.on('end', () => {
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
1753
|
+
// run() is now async (Surface load). We wrap in an async IIFE so the
|
|
1754
|
+
// stdin/exit flow is preserved and errors are surfaced as warnings (fail-open).
|
|
1755
|
+
(async () => {
|
|
1756
|
+
let output;
|
|
1757
|
+
try {
|
|
1758
|
+
output = await run(data);
|
|
1759
|
+
} catch (err) {
|
|
1760
|
+
// Unexpected failure in the async gate path — fail-open, allow the Stop.
|
|
1761
|
+
process.stderr.write(`[Hook] Goal Fit async error (fail-open): ${String(err && err.message || err)}\n`);
|
|
1762
|
+
process.stdout.write(data);
|
|
1763
|
+
process.exit(0);
|
|
1764
|
+
return;
|
|
1765
|
+
}
|
|
1766
|
+
if (output && typeof output === 'object') {
|
|
1767
|
+
if (output.stderr) process.stderr.write(output.stderr.endsWith('\n') ? output.stderr : `${output.stderr}\n`);
|
|
1768
|
+
process.stdout.write(String(output.stdout ?? data));
|
|
1769
|
+
process.exit(Number.isInteger(output.exitCode) ? output.exitCode : 0);
|
|
1770
|
+
return;
|
|
1771
|
+
}
|
|
1772
|
+
process.stdout.write(String(output));
|
|
1773
|
+
})();
|
|
382
1774
|
});
|
|
383
1775
|
}
|
|
384
1776
|
|
|
385
|
-
module.exports = { analyze, run, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine };
|
|
1777
|
+
module.exports = { analyze, run, resolveGoalFitMode, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine, captureCrossReference, bundleEnforcement, loadActiveFlowStep, readCommandLog, resolveTrustedCommand, declaredManifestTarget, verifyCommandLogChain, CHAIN_GENESIS_VERIFY, hasLaunderingOperator };
|