@kontourai/flow-agents 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +95 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +300 -8
- package/build/src/cli/workflow-sidecar.js +1934 -83
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +45 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +4 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2064 -77
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -4,9 +4,22 @@
|
|
|
4
4
|
*
|
|
5
5
|
* The hook reads .flow-agents artifacts, looks for the most recent active
|
|
6
6
|
* delivery/session file, and reports missing Definition Of Done, Goal Fit, or
|
|
7
|
-
* Final Acceptance state.
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Final Acceptance state.
|
|
8
|
+
*
|
|
9
|
+
* Enforcement is controlled by FLOW_AGENTS_GOAL_FIT_MODE:
|
|
10
|
+
* - block: return exit code 2 (blocks the Stop) when local goal fit is incomplete.
|
|
11
|
+
* - warn: return exit code 0 but still emit the guidance on stderr (default).
|
|
12
|
+
* - off: stay silent.
|
|
13
|
+
* The legacy FLOW_AGENTS_GOAL_FIT_STRICT=true is honored as an alias for block.
|
|
14
|
+
* The canonical engine default is warn; shipped runtime configs (e.g. Claude
|
|
15
|
+
* Code at L2) set block so the installed product enforces while the engine
|
|
16
|
+
* default and conformance contract stay warn.
|
|
17
|
+
*
|
|
18
|
+
* Scope: the gate evaluates the session's current task (.flow-agents/current.json)
|
|
19
|
+
* when set, so an unrelated active workflow elsewhere in the repo does not gate
|
|
20
|
+
* this stop. It also never hard-blocks a pre-execution (not-yet-started) task on
|
|
21
|
+
* mere incompleteness — only genuine false-completion signals (a claimed pass the
|
|
22
|
+
* capture log or evidence.json contradicts) block before execution begins.
|
|
10
23
|
*/
|
|
11
24
|
|
|
12
25
|
'use strict';
|
|
@@ -29,10 +42,23 @@ const ACTIVE_STATUSES = new Set([
|
|
|
29
42
|
'blocked',
|
|
30
43
|
'partial',
|
|
31
44
|
]);
|
|
32
|
-
|
|
45
|
+
// WORKFLOW_SESSION_TYPES: used for artifact identification only, not for verdict production.
|
|
46
|
+
const WORKFLOW_SESSION_TYPES = new Set(['deliver', 'delivery', 'fix-bug', 'execute-plan', 'verify-work']);
|
|
33
47
|
const SIDECAR_NAMES = new Set(['state.json', 'acceptance.json', 'evidence.json', 'handoff.json']);
|
|
34
48
|
const OPTIONAL_SIDECAR_NAMES = new Set(['critique.json']);
|
|
35
49
|
|
|
50
|
+
// A workflow that has not started execution is EXPECTED to be incomplete, so the
|
|
51
|
+
// Stop gate must not hard-block on its missing DOD / Goal Fit / not-done state.
|
|
52
|
+
// Only genuine false-completion signals block a pre-execution task; execution
|
|
53
|
+
// onward gates fully.
|
|
54
|
+
const PRE_EXECUTION_STATUSES = new Set(['new', 'planning', 'planned', 'backlog']);
|
|
55
|
+
const PRE_EXECUTION_PHASES = new Set(['idea', 'backlog', 'pickup', 'planning']);
|
|
56
|
+
|
|
57
|
+
// Terminal tasks are complete — they must never gate a stop or count as "active".
|
|
58
|
+
// A stale current.json pointing at one, or a graveyard of finished states, must
|
|
59
|
+
// not block an unrelated session.
|
|
60
|
+
const TERMINAL_STATUSES = new Set(['done', 'delivered', 'accepted', 'archived', 'complete', 'completed']);
|
|
61
|
+
|
|
36
62
|
function parseJson(raw) {
|
|
37
63
|
try { return JSON.parse(raw || '{}'); } catch { return {}; }
|
|
38
64
|
}
|
|
@@ -80,6 +106,15 @@ function hasSidecars(dir) {
|
|
|
80
106
|
}
|
|
81
107
|
}
|
|
82
108
|
|
|
109
|
+
/**
|
|
110
|
+
* Returns true if a line of validator output looks like a validator-environment
|
|
111
|
+
* error (shell/npm error, tsc missing, spawn failure) rather than a real
|
|
112
|
+
* artifact validation message. Environment errors must never block goal-fit.
|
|
113
|
+
*/
|
|
114
|
+
function isEnvironmentError(line) {
|
|
115
|
+
return /tsc[:\s]|command not found|npm ERR!|npm error|ENOENT|EACCES|Cannot find module|node_modules\/.bin|TypeScript version|version conflict|error TS[0-9]/i.test(line);
|
|
116
|
+
}
|
|
117
|
+
|
|
83
118
|
function sidecarValidation(root, artifactDir) {
|
|
84
119
|
const requireSidecars = String(process.env.FLOW_AGENTS_REQUIRE_SIDECARS || '').toLowerCase() === 'true';
|
|
85
120
|
const requireCritique = String(process.env.FLOW_AGENTS_REQUIRE_CRITIQUE || '').toLowerCase() === 'true';
|
|
@@ -88,8 +123,6 @@ function sidecarValidation(root, artifactDir) {
|
|
|
88
123
|
const packageRoot = fs.existsSync(path.join(root, 'package.json'))
|
|
89
124
|
? root
|
|
90
125
|
: path.resolve(__dirname, '..', '..');
|
|
91
|
-
const packageJson = path.join(packageRoot, 'package.json');
|
|
92
|
-
if (!fs.existsSync(packageJson)) return [`${relative(root, artifactDir)} sidecar validation: package.json is missing; cannot run TypeScript workflow validator.`];
|
|
93
126
|
|
|
94
127
|
let sidecarFiles = [];
|
|
95
128
|
try {
|
|
@@ -112,33 +145,74 @@ function sidecarValidation(root, artifactDir) {
|
|
|
112
145
|
|
|
113
146
|
if (sidecarFiles.length === 0) return [];
|
|
114
147
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if
|
|
118
|
-
|
|
119
|
-
|
|
148
|
+
// Part 1 fix: invoke the already-built validator directly via `node`, bypassing
|
|
149
|
+
// `npm run build` (tsc). npm-installed packages ship build/ in the package files,
|
|
150
|
+
// so the compiled JS is always available. Only fall back to npm run if build/ is
|
|
151
|
+
// absent (a raw dev checkout that hasn't been built yet).
|
|
152
|
+
const builtValidator = path.join(packageRoot, 'build', 'src', 'cli', 'validate-workflow-artifacts.js');
|
|
153
|
+
const hasBuild = fs.existsSync(builtValidator);
|
|
154
|
+
|
|
155
|
+
const validatorArgs = ['--skip-markdown-validation'];
|
|
156
|
+
if (requireSidecars) validatorArgs.push('--require-sidecars');
|
|
157
|
+
if (requireCritique) validatorArgs.push('--require-critique');
|
|
158
|
+
validatorArgs.push(artifactDir);
|
|
159
|
+
|
|
160
|
+
let result;
|
|
161
|
+
if (hasBuild) {
|
|
162
|
+
// Direct node invocation: no tsc, no npm build step, works from any npm install.
|
|
163
|
+
result = spawnSync(process.execPath, [builtValidator, ...validatorArgs], {
|
|
164
|
+
cwd: packageRoot,
|
|
165
|
+
encoding: 'utf8',
|
|
166
|
+
timeout: 30000,
|
|
167
|
+
});
|
|
168
|
+
} else {
|
|
169
|
+
// Dev checkout without build/: fall back to npm run (may trigger tsc).
|
|
170
|
+
// If this also fails due to environment issues, Part 2 handles it below.
|
|
171
|
+
const npmArgs = ['run', 'workflow:validate-artifacts', '--silent', '--', ...validatorArgs];
|
|
172
|
+
result = spawnSync('npm', npmArgs, {
|
|
173
|
+
cwd: packageRoot,
|
|
174
|
+
encoding: 'utf8',
|
|
175
|
+
timeout: 30000,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
120
178
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
179
|
+
// Part 2 fix: treat validator-environment failures as SKIP, never as blocking.
|
|
180
|
+
// A spawn error (ENOENT, timeout) means the validator couldn't run at all.
|
|
181
|
+
if (result.error) {
|
|
182
|
+
// Validator couldn't be launched — environment issue, not a goal-fit failure.
|
|
183
|
+
return [`${relative(root, artifactDir)} sidecar validation skipped: validator could not run (${result.error.code || result.error.message})`];
|
|
184
|
+
}
|
|
126
185
|
|
|
127
186
|
if (result.status === 0) return [];
|
|
128
|
-
|
|
187
|
+
|
|
188
|
+
// Validator ran and exited non-zero. Separate real validation errors from
|
|
189
|
+
// environment errors (tsc missing, npm ERR!, shell errors) so that a broken
|
|
190
|
+
// validator environment never blocks goal-fit.
|
|
191
|
+
const allLines = `${result.stdout || ''}\n${result.stderr || ''}`
|
|
129
192
|
.split('\n')
|
|
130
193
|
.map(line => line.trim())
|
|
131
|
-
.filter(Boolean)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
194
|
+
.filter(Boolean);
|
|
195
|
+
|
|
196
|
+
const envLines = allLines.filter(isEnvironmentError);
|
|
197
|
+
const validationLines = allLines.filter(line => !isEnvironmentError(line));
|
|
198
|
+
|
|
199
|
+
if (envLines.length > 0 && validationLines.length === 0) {
|
|
200
|
+
// Pure environment failure — skip, do not block.
|
|
201
|
+
return [`${relative(root, artifactDir)} sidecar validation skipped: validator environment error (${envLines[0].slice(0, 120)})`];
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Real validation errors (possibly mixed with a few env noise lines).
|
|
205
|
+
const output = validationLines.length > 0 ? validationLines : allLines;
|
|
206
|
+
const trimmed = output.slice(0, 12);
|
|
207
|
+
if (trimmed.length === 0) trimmed.push(`validator exited with status ${result.status ?? 'unknown'}`);
|
|
208
|
+
return trimmed.map(line => `${relative(root, artifactDir)} sidecar validation: ${line}`);
|
|
135
209
|
}
|
|
136
210
|
|
|
137
211
|
function isWorkflowArtifact(artifact) {
|
|
138
212
|
if (!artifact) return false;
|
|
139
213
|
if (artifact.role === 'plan' || artifact.role === 'review') return false;
|
|
140
214
|
if (artifact.file.endsWith('-plan.md') || artifact.file.endsWith('-review.md')) return false;
|
|
141
|
-
if (
|
|
215
|
+
if (WORKFLOW_SESSION_TYPES.has(artifact.type)) return true;
|
|
142
216
|
return /--(deliver|fix-bug|execute-plan|verify-work)\b/.test(path.basename(artifact.file));
|
|
143
217
|
}
|
|
144
218
|
|
|
@@ -171,6 +245,25 @@ function readJsonFile(file) {
|
|
|
171
245
|
}
|
|
172
246
|
}
|
|
173
247
|
|
|
248
|
+
// ─── ADR 0010 Phase 2b: re-derive-at-gate via Surface (fail-open) ─────────────
|
|
249
|
+
// Surface (@kontourai/surface) is ESM-only; stop-goal-fit.js is CJS.
|
|
250
|
+
// Load it via a fail-open dynamic import(), cached after the first attempt.
|
|
251
|
+
// If Surface cannot be loaded (package absent, env mismatch), we fall back to
|
|
252
|
+
// the stored claim.status check from #133 — no regression for environments that
|
|
253
|
+
// lack @kontourai/surface. The module is never written to disk.
|
|
254
|
+
let _surfaceModule; // undefined = not tried yet; null = unavailable
|
|
255
|
+
async function tryLoadSurface() {
|
|
256
|
+
if (_surfaceModule !== undefined) return _surfaceModule;
|
|
257
|
+
try {
|
|
258
|
+
const m = await import('@kontourai/surface');
|
|
259
|
+
_surfaceModule = m;
|
|
260
|
+
return _surfaceModule;
|
|
261
|
+
} catch {
|
|
262
|
+
_surfaceModule = null;
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
174
267
|
function safeOneLine(value, maxLength = 220) {
|
|
175
268
|
const text = String(value || '').replace(/\s+/g, ' ').trim();
|
|
176
269
|
if (text.length <= maxLength) return text;
|
|
@@ -192,8 +285,12 @@ function sidecarGuidance(root, artifactDir) {
|
|
|
192
285
|
const status = normalizedStatus(state.status || 'unknown');
|
|
193
286
|
const phase = normalizedStatus(state.phase || 'unknown');
|
|
194
287
|
const next = state.next_action && typeof state.next_action === 'object' ? state.next_action : null;
|
|
195
|
-
|
|
196
|
-
|
|
288
|
+
const nextStatus = next ? normalizedStatus(next.status || 'unknown') : 'unknown';
|
|
289
|
+
// The agent's work is complete when the recorded next action is done — the
|
|
290
|
+
// gate must not block the agent for a remaining human/CI step (e.g. a verified
|
|
291
|
+
// task whose only next_action is "commit the migration").
|
|
292
|
+
const agentComplete = nextStatus === 'done';
|
|
293
|
+
if (!TERMINAL_STATUSES.has(status) && !agentComplete) {
|
|
197
294
|
const nextSummary = next && next.summary ? `; next_action:${nextStatus} "${safeOneLine(next.summary)}"` : '';
|
|
198
295
|
warnings.push(`${base} workflow state: status:${status} phase:${phase}${nextSummary}`);
|
|
199
296
|
}
|
|
@@ -242,16 +339,397 @@ function sidecarGuidance(root, artifactDir) {
|
|
|
242
339
|
return warnings;
|
|
243
340
|
}
|
|
244
341
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
342
|
+
// -----------------------------------------------------------------------
|
|
343
|
+
// Capture-first evidence determinism (Part B)
|
|
344
|
+
//
|
|
345
|
+
// evidence.json is the MODEL transcribing what it thinks happened. The capture
|
|
346
|
+
// hook (evidence-capture.js) writes the REAL command results to
|
|
347
|
+
// command-log.jsonl at the source. Here at the Stop gate we cross-reference the
|
|
348
|
+
// model's claimed-pass command checks against that captured truth, and only fall
|
|
349
|
+
// back to re-running a TRUSTED command when the log has no execution for a
|
|
350
|
+
// claimed-pass command (i.e. it was never actually run).
|
|
351
|
+
// -----------------------------------------------------------------------
|
|
352
|
+
|
|
353
|
+
function normalizeCommand(value) {
|
|
354
|
+
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Read command-log.jsonl into a map of normalized-command -> aggregate outcome.
|
|
359
|
+
* If the same command was run more than once, a single FAIL makes the aggregate
|
|
360
|
+
* a fail (a caught false-completion must not be masked by a later pass-claim).
|
|
361
|
+
*/
|
|
362
|
+
function readCommandLog(artifactDir) {
|
|
363
|
+
const file = path.join(artifactDir, 'command-log.jsonl');
|
|
364
|
+
let raw = '';
|
|
365
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return new Map(); }
|
|
366
|
+
const byCommand = new Map();
|
|
367
|
+
for (const line of raw.split('\n')) {
|
|
368
|
+
const trimmed = line.trim();
|
|
369
|
+
if (!trimmed) continue;
|
|
370
|
+
let entry;
|
|
371
|
+
try { entry = JSON.parse(trimmed); } catch { continue; }
|
|
372
|
+
if (!entry || typeof entry.command !== 'string') continue;
|
|
373
|
+
const key = normalizeCommand(entry.command);
|
|
374
|
+
if (!key) continue;
|
|
375
|
+
const failed = entry.observedResult === 'fail' || (Number.isInteger(entry.exitCode) && entry.exitCode !== 0);
|
|
376
|
+
const prev = byCommand.get(key);
|
|
377
|
+
byCommand.set(key, {
|
|
378
|
+
ran: true,
|
|
379
|
+
failed: failed || (prev ? prev.failed : false),
|
|
380
|
+
exitCode: Number.isInteger(entry.exitCode) ? entry.exitCode : (prev ? prev.exitCode : null),
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
return byCommand;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
/**
|
|
387
|
+
* Resolve a TRUSTED command to re-run for a claimed-pass check whose command was
|
|
388
|
+
* never captured. Priority (most trusted first):
|
|
389
|
+
* (a) the command named by the matching acceptance criterion (acceptance.json
|
|
390
|
+
* evidence_ref of kind "command", `excerpt`/`command`) — authored upfront.
|
|
391
|
+
* (b) the project's declared manifest target — package.json scripts.{test,
|
|
392
|
+
* build,lint}, Makefile target, cargo test, pyproject/tox, just/task.
|
|
393
|
+
* (c) the model's free-form evidence.checks[].command — ONLY when
|
|
394
|
+
* FLOW_AGENTS_GOAL_FIT_RECHECK=true (the RCE-risky opt-in path).
|
|
395
|
+
* Returns { argv, cwd, source } or null when nothing trusted resolves.
|
|
396
|
+
*/
|
|
397
|
+
function resolveTrustedCommand(root, artifactDir, check, acceptance) {
|
|
398
|
+
// (a) acceptance criterion command for the matching criterion.
|
|
399
|
+
const fromAcceptance = acceptanceCommandFor(check, acceptance);
|
|
400
|
+
if (fromAcceptance) return { argv: ['bash', '-lc', fromAcceptance], cwd: root, source: 'acceptance' };
|
|
401
|
+
|
|
402
|
+
// (b) declared manifest target. Map the check command/id to a declared script.
|
|
403
|
+
const declared = declaredManifestTarget(root, check);
|
|
404
|
+
if (declared) return { argv: declared.argv, cwd: declared.cwd || root, source: 'manifest' };
|
|
405
|
+
|
|
406
|
+
// (c) free-form model command — opt-in only.
|
|
407
|
+
if (String(process.env.FLOW_AGENTS_GOAL_FIT_RECHECK || '').toLowerCase() === 'true') {
|
|
408
|
+
const cmd = normalizeCommand(check && check.command);
|
|
409
|
+
if (cmd) return { argv: ['bash', '-lc', cmd], cwd: root, source: 'model-command (FLOW_AGENTS_GOAL_FIT_RECHECK)' };
|
|
410
|
+
}
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
function acceptanceCommandFor(check, acceptance) {
|
|
415
|
+
if (!acceptance || !Array.isArray(acceptance.criteria)) return null;
|
|
416
|
+
const checkId = normalizedStatus(check && check.id);
|
|
417
|
+
const checkCmd = normalizeCommand(check && check.command);
|
|
418
|
+
let firstCommand = null;
|
|
419
|
+
for (const criterion of acceptance.criteria) {
|
|
420
|
+
const refs = Array.isArray(criterion && criterion.evidence_refs) ? criterion.evidence_refs : [];
|
|
421
|
+
for (const ref of refs) {
|
|
422
|
+
if (!ref || typeof ref !== 'object' || ref.kind !== 'command') continue;
|
|
423
|
+
const refCmd = normalizeCommand(ref.excerpt || ref.command);
|
|
424
|
+
if (!refCmd) continue;
|
|
425
|
+
if (!firstCommand) firstCommand = refCmd;
|
|
426
|
+
// Strong match: the criterion id matches the check id, or the commands match.
|
|
427
|
+
const idMatch = checkId && normalizedStatus(criterion.id) === checkId;
|
|
428
|
+
if (idMatch || (checkCmd && refCmd === checkCmd)) return refCmd;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
// No id/command match — only fall back to the first authored command when the
|
|
432
|
+
// check itself names no command (so we still have an upfront-trusted target).
|
|
433
|
+
return checkCmd ? null : firstCommand;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Map a claimed-pass command check to a project-declared, NAMED manifest target.
|
|
438
|
+
* Never allowlists arbitrary strings: we only run a target the project itself
|
|
439
|
+
* declared (npm script, Makefile target, cargo/tox/just/task). The check's
|
|
440
|
+
* command/id is used to pick WHICH declared target (test|build|lint), not to run
|
|
441
|
+
* the raw string. `veritas readiness` is just one such declared command — no
|
|
442
|
+
* special-casing.
|
|
443
|
+
*/
|
|
444
|
+
function declaredManifestTarget(root, check) {
|
|
445
|
+
const haystack = `${normalizeCommand(check && check.command)} ${normalizedStatus(check && check.id)} ${normalizedStatus(check && check.kind)}`.toLowerCase();
|
|
446
|
+
let want = null;
|
|
447
|
+
if (/\btest|spec|jest|vitest|pytest\b/.test(haystack)) want = 'test';
|
|
448
|
+
else if (/\bbuild|compile|bundle\b/.test(haystack)) want = 'build';
|
|
449
|
+
else if (/\blint|format|style|typecheck\b/.test(haystack)) want = 'lint';
|
|
450
|
+
if (!want) return null;
|
|
451
|
+
|
|
452
|
+
// package.json scripts.{test,build,lint}
|
|
453
|
+
const pkg = readJsonFile(path.join(root, 'package.json'));
|
|
454
|
+
if (pkg && pkg.scripts && typeof pkg.scripts === 'object') {
|
|
455
|
+
const scriptName = pkg.scripts[want] ? want
|
|
456
|
+
: want === 'lint' && pkg.scripts.typecheck ? 'typecheck'
|
|
457
|
+
: null;
|
|
458
|
+
if (scriptName) return { argv: ['npm', 'run', scriptName, '--silent'], cwd: root };
|
|
459
|
+
}
|
|
460
|
+
// Makefile target
|
|
461
|
+
const makefile = ['Makefile', 'makefile', 'GNUmakefile'].map(n => path.join(root, n)).find(p => fs.existsSync(p));
|
|
462
|
+
if (makefile) {
|
|
463
|
+
try {
|
|
464
|
+
const text = fs.readFileSync(makefile, 'utf8');
|
|
465
|
+
if (new RegExp(`^${want}\\s*:`, 'm').test(text)) return { argv: ['make', want], cwd: root };
|
|
466
|
+
} catch { /* ignore */ }
|
|
467
|
+
}
|
|
468
|
+
// cargo
|
|
469
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'test'], cwd: root };
|
|
470
|
+
if (want === 'build' && fs.existsSync(path.join(root, 'Cargo.toml'))) return { argv: ['cargo', 'build'], cwd: root };
|
|
471
|
+
// py ecosystem: tox / pyproject (declared test target)
|
|
472
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'tox.ini'))) return { argv: ['tox'], cwd: root };
|
|
473
|
+
if (want === 'test' && fs.existsSync(path.join(root, 'pyproject.toml'))) return { argv: ['pytest'], cwd: root };
|
|
474
|
+
// just / task runners
|
|
475
|
+
for (const runner of [['just', 'justfile'], ['task', 'Taskfile.yml'], ['task', 'Taskfile.yaml']]) {
|
|
476
|
+
if (fs.existsSync(path.join(root, runner[1]))) return { argv: [runner[0], want], cwd: root };
|
|
477
|
+
}
|
|
478
|
+
return null;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
function resolveBackstopTimeout() {
|
|
482
|
+
const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP_TIMEOUT_MS || '', 10);
|
|
483
|
+
return Number.isInteger(raw) && raw > 0 ? raw : 120000;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Whether the trusted backstop re-run may ride block mode. Default-on so a
|
|
488
|
+
* never-actually-run claimed-pass command is caught, but operator-disablable for
|
|
489
|
+
* latency via FLOW_AGENTS_GOAL_FIT_BACKSTOP=off (re-run becomes warn-only) or
|
|
490
|
+
* =skip (no re-run at all → record NOT_VERIFIED instead).
|
|
491
|
+
*/
|
|
492
|
+
function resolveBackstopMode() {
|
|
493
|
+
const v = String(process.env.FLOW_AGENTS_GOAL_FIT_BACKSTOP || '').trim().toLowerCase();
|
|
494
|
+
if (v === 'off' || v === 'warn' || v === 'skip' || v === 'block') return v === 'warn' ? 'off' : v;
|
|
495
|
+
return 'block';
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
function runBackstop(trusted) {
|
|
499
|
+
const result = spawnSync(trusted.argv[0], trusted.argv.slice(1), {
|
|
500
|
+
cwd: trusted.cwd,
|
|
501
|
+
encoding: 'utf8',
|
|
502
|
+
timeout: resolveBackstopTimeout(),
|
|
503
|
+
killSignal: 'SIGKILL',
|
|
504
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
505
|
+
});
|
|
506
|
+
if (result.error) return { ran: false, error: result.error.code || result.error.message };
|
|
507
|
+
if (result.signal) return { ran: false, error: `killed (${result.signal})`, timedOut: result.signal === 'SIGKILL' || result.signal === 'SIGTERM' };
|
|
508
|
+
return { ran: true, passed: result.status === 0, exitCode: result.status };
|
|
250
509
|
}
|
|
251
510
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
511
|
+
/**
|
|
512
|
+
* Cross-reference each evidence.checks[] of kind:"command" claiming status:"pass"
|
|
513
|
+
* that carries a command against the capture log, with the trusted backstop as a
|
|
514
|
+
* thin fallback only when the log has no execution for that command.
|
|
515
|
+
*
|
|
516
|
+
* Emits warnings (which feed the existing block/MAX_BLOCKS machinery) when a
|
|
517
|
+
* claimed-pass command actually FAILED (log or backstop), and NOT_VERIFIED notes
|
|
518
|
+
* when nothing trusted can confirm it.
|
|
519
|
+
*/
|
|
520
|
+
function captureCrossReference(root, artifactDir) {
|
|
521
|
+
const evidence = readJsonFile(path.join(artifactDir, 'evidence.json'));
|
|
522
|
+
if (!evidence || !Array.isArray(evidence.checks)) return [];
|
|
523
|
+
const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
|
|
524
|
+
const log = readCommandLog(artifactDir);
|
|
525
|
+
const base = relative(root, artifactDir);
|
|
526
|
+
const backstopMode = resolveBackstopMode();
|
|
527
|
+
const warnings = [];
|
|
528
|
+
|
|
529
|
+
const claimedPass = evidence.checks.filter(check => {
|
|
530
|
+
if (!check || typeof check !== 'object') return false;
|
|
531
|
+
const kind = normalizedStatus(check.kind);
|
|
532
|
+
const status = normalizedStatus(check.status);
|
|
533
|
+
return kind === 'command' && (status === 'pass' || status === 'passed') && normalizeCommand(check.command);
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
for (const check of claimedPass.slice(0, 8)) {
|
|
537
|
+
const cmd = normalizeCommand(check.command);
|
|
538
|
+
const id = safeOneLine(check.id || cmd, 80);
|
|
539
|
+
const logged = log.get(cmd);
|
|
540
|
+
|
|
541
|
+
if (logged && logged.ran) {
|
|
542
|
+
// (1) Cross-reference the capture log first.
|
|
543
|
+
if (logged.failed) {
|
|
544
|
+
const exit = Number.isInteger(logged.exitCode) ? ` (exitCode:${logged.exitCode})` : '';
|
|
545
|
+
warnings.push(`${base} evidence check ${id}: capture log CONTRADICTS claimed pass — command "${safeOneLine(cmd, 120)}" was recorded as FAIL${exit}. This is a caught false-completion.`);
|
|
546
|
+
}
|
|
547
|
+
// log shows it ran and passed → satisfied deterministically, no re-run.
|
|
548
|
+
continue;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// (2) Backstop: the log has NO execution for this claimed-pass command.
|
|
552
|
+
if (backstopMode === 'skip') {
|
|
553
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and backstop re-run is disabled (FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip).`);
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
const trusted = resolveTrustedCommand(root, artifactDir, check, acceptance);
|
|
557
|
+
if (!trusted) {
|
|
558
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — command "${safeOneLine(cmd, 120)}" was never captured and no trusted command (acceptance criterion / declared manifest target) resolves to re-run it. Set FLOW_AGENTS_GOAL_FIT_RECHECK=true to opt into re-running the model's free-form command.`);
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
const outcome = runBackstop(trusted);
|
|
562
|
+
if (!outcome.ran) {
|
|
563
|
+
warnings.push(`${base} evidence check ${id}: claimed pass but NOT_VERIFIED — trusted backstop (${trusted.source}) could not run (${safeOneLine(outcome.error, 80)}).`);
|
|
564
|
+
continue;
|
|
565
|
+
}
|
|
566
|
+
if (!outcome.passed) {
|
|
567
|
+
const note = `${base} evidence check ${id}: trusted backstop (${trusted.source}) re-run of "${trusted.argv.join(' ')}" FAILED with exit ${outcome.exitCode}, contradicting the claimed pass. This is a caught false-completion.`;
|
|
568
|
+
if (backstopMode === 'off') warnings.push(`${note} [backstop in warn mode — not blocking]`);
|
|
569
|
+
else warnings.push(note);
|
|
570
|
+
}
|
|
571
|
+
// backstop passed → claim deterministically confirmed by re-run, no warning.
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
return warnings;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// ─── ADR 0010 Phase 2: enforce on the canonical Hachure trust.bundle ──────────
|
|
578
|
+
// The trust.bundle (emitted by workflow-sidecar via @kontourai/surface) carries
|
|
579
|
+
// each claim's Surface-derived status — including capture-authoritative results
|
|
580
|
+
// (a claimed-pass whose captured command FAILED is already `disputed` here). A
|
|
581
|
+
// high-impact `disputed` claim is the canonical false-completion signal; we gate
|
|
582
|
+
// on the bundle the producers already emit, not on bespoke markdown.
|
|
583
|
+
//
|
|
584
|
+
// ADR 0010 Phase 2b: re-derive-at-gate hardening.
|
|
585
|
+
// We re-derive each claim's status from the bundle's own evidence/events/policies
|
|
586
|
+
// via Surface's canonical deriveClaimStatus, so editing the stored `claim.status`
|
|
587
|
+
// field does not bypass the gate. If the re-derived status is disputed/rejected
|
|
588
|
+
// for a high/critical claim, we block. If the re-derived status DIFFERS from the
|
|
589
|
+
// stored status (e.g. stored "verified" but evidence re-derives to "disputed"),
|
|
590
|
+
// that mismatch is a strong tamper signal — block with an explicit warning.
|
|
591
|
+
// Fail-open: if Surface is unavailable, fall back to the stored-status check.
|
|
592
|
+
async function bundleEnforcement(artifactDir) {
|
|
593
|
+
const bundle = readJsonFile(path.join(artifactDir, 'trust.bundle'));
|
|
594
|
+
if (!bundle || !Array.isArray(bundle.claims)) return [];
|
|
595
|
+
|
|
596
|
+
const surface = await tryLoadSurface();
|
|
597
|
+
const warnings = [];
|
|
598
|
+
|
|
599
|
+
const allEvidence = Array.isArray(bundle.evidence) ? bundle.evidence : [];
|
|
600
|
+
const allEvents = Array.isArray(bundle.events) ? bundle.events : [];
|
|
601
|
+
const allPolicies = Array.isArray(bundle.policies) ? bundle.policies : [];
|
|
602
|
+
|
|
603
|
+
for (const claim of bundle.claims) {
|
|
604
|
+
if (!claim || typeof claim !== 'object') continue;
|
|
605
|
+
const impact = String(claim.impactLevel || '').toLowerCase();
|
|
606
|
+
const storedStatus = String(claim.status || '').toLowerCase();
|
|
607
|
+
if (impact !== 'high' && impact !== 'critical') continue;
|
|
608
|
+
|
|
609
|
+
// Step 1: Re-derive status via Surface when available.
|
|
610
|
+
// This closes the gaming vector: editing the stored status field cannot bypass
|
|
611
|
+
// the gate because we recompute from evidence/events/policies.
|
|
612
|
+
let recomputedStatus = null; // null means re-derive was not attempted or threw
|
|
613
|
+
if (surface && typeof surface.deriveClaimStatus === 'function') {
|
|
614
|
+
const claimId = claim.id;
|
|
615
|
+
const claimEvidence = allEvidence.filter(ev => ev && ev.claimId === claimId);
|
|
616
|
+
const claimEvents = allEvents.filter(evt => evt && evt.claimId === claimId);
|
|
617
|
+
try {
|
|
618
|
+
const result = surface.deriveClaimStatus({
|
|
619
|
+
claim,
|
|
620
|
+
evidence: claimEvidence,
|
|
621
|
+
events: claimEvents,
|
|
622
|
+
policies: allPolicies,
|
|
623
|
+
});
|
|
624
|
+
recomputedStatus = result && typeof result.status === 'string' ? result.status.toLowerCase() : 'unknown';
|
|
625
|
+
} catch {
|
|
626
|
+
// deriveClaimStatus threw (e.g. schema mismatch) — fall back to stored status.
|
|
627
|
+
recomputedStatus = null;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Step 2: Compute the effective blocking status.
|
|
632
|
+
// Use the STRICTER of stored vs recomputed so neither can be individually
|
|
633
|
+
// gamed: deleting evidence cannot clear a stored `disputed`, and flipping
|
|
634
|
+
// stored to "verified" cannot hide a recomputed `disputed`.
|
|
635
|
+
const effectiveDisputed = storedStatus === 'disputed' || storedStatus === 'rejected'
|
|
636
|
+
|| recomputedStatus === 'disputed' || recomputedStatus === 'rejected';
|
|
637
|
+
|
|
638
|
+
if (!effectiveDisputed) continue; // neither stored nor recomputed is blocking
|
|
639
|
+
|
|
640
|
+
// Step 3: Emit the appropriate warning.
|
|
641
|
+
// Tamper-detection: stored "verified"/"assumed" but evidence re-derives to
|
|
642
|
+
// "disputed"/"rejected" — the stored status was likely altered to bypass the gate.
|
|
643
|
+
const isTampered = recomputedStatus !== null
|
|
644
|
+
&& (storedStatus === 'verified' || storedStatus === 'assumed')
|
|
645
|
+
&& (recomputedStatus === 'disputed' || recomputedStatus === 'rejected');
|
|
646
|
+
|
|
647
|
+
if (isTampered) {
|
|
648
|
+
warnings.push(`trust.bundle claim tampered: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — stored status "${storedStatus}" does not match recompute "${recomputedStatus}" (possible tampered bundle); caught false-completion.`);
|
|
649
|
+
} else {
|
|
650
|
+
warnings.push(`trust.bundle claim disputed: ${safeOneLine(claim.subjectId || claim.id, 80)} (${safeOneLine(claim.claimType, 48)}) — Surface recompute shows not verified; caught false-completion.`);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
return warnings;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* Scope to the session's current task when .flow-agents/current.json points at
|
|
658
|
+
* one (mirroring evidence-capture.js). Returns the slug dir, or null to fall back
|
|
659
|
+
* to scanning all of .flow-agents (newest-mtime).
|
|
660
|
+
*/
|
|
661
|
+
function preferredArtifactDir(flowAgentsDir) {
|
|
662
|
+
const current = readJsonFile(path.join(flowAgentsDir, 'current.json'));
|
|
663
|
+
if (!current) return null;
|
|
664
|
+
const slug = current.artifact_dir || current.active_slug;
|
|
665
|
+
if (typeof slug !== 'string' || !slug.trim()) return null;
|
|
666
|
+
const safe = slug.replace(/\.\.+/g, '').replace(/^[/\\]+/, '');
|
|
667
|
+
const dir = path.join(flowAgentsDir, safe);
|
|
668
|
+
return dir.startsWith(flowAgentsDir + path.sep) && fs.existsSync(dir) ? dir : null;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
/**
|
|
672
|
+
* A task is pre-execution (work not yet started) when its state.json status/phase
|
|
673
|
+
* is still in the idea→planning band, or (no state.json) its markdown status is.
|
|
674
|
+
*/
|
|
675
|
+
function isPreExecution(artifactDir, markdownStatus) {
|
|
676
|
+
const state = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
677
|
+
if (state) {
|
|
678
|
+
return PRE_EXECUTION_STATUSES.has(normalizedStatus(state.status))
|
|
679
|
+
|| PRE_EXECUTION_PHASES.has(normalizedStatus(state.phase));
|
|
680
|
+
}
|
|
681
|
+
return PRE_EXECUTION_STATUSES.has(normalizedStatus(markdownStatus));
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
// ─── Wave 2c: no-bundle/no-state fallback gate ────────────────────────────────
|
|
686
|
+
// Sessions that have NEITHER a trust.bundle NOR a state.json fall through
|
|
687
|
+
// both bundleEnforcement (no bundle) and sidecarGuidance (no state). Without the
|
|
688
|
+
// old markdown heading checks this would create a silent ungated-session path.
|
|
689
|
+
// If a trust.bundle exists, bundleEnforcement handles it. If state.json exists,
|
|
690
|
+
// sidecarGuidance handles it. The gap: a session with only a markdown artifact.
|
|
691
|
+
//
|
|
692
|
+
// Adjustment A (sidecar-driven Final Acceptance): when acceptance.json has
|
|
693
|
+
// pending criteria and the task state shows delivered, emit the Final Acceptance
|
|
694
|
+
// hygiene warning from the sidecar rather than markdown template parsing.
|
|
695
|
+
function missingBundleOrStateSignal(artifactDir) {
|
|
696
|
+
const warnings = [];
|
|
697
|
+
const hasBundle = fs.existsSync(path.join(artifactDir, 'trust.bundle'));
|
|
698
|
+
const state = readJsonFile(path.join(artifactDir, 'state.json'));
|
|
699
|
+
|
|
700
|
+
if (!hasBundle && !state) {
|
|
701
|
+
// Neither trust.bundle nor state.json: session is untracked by sidecar path.
|
|
702
|
+
// Emit a NOT_VERIFIED warning so execution-phase sessions remain gated.
|
|
703
|
+
const base = path.basename(artifactDir);
|
|
704
|
+
warnings.push(`${base} NOT_VERIFIED — no trust.bundle or state.json found; run 'workflow-sidecar record-evidence' to build the evidence record before delivery.`);
|
|
705
|
+
return warnings;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
// Adjustment A: sidecar-driven Final Acceptance hygiene.
|
|
709
|
+
// When the task is delivered but acceptance.json still has pending criteria,
|
|
710
|
+
// emit the Final Acceptance reminder from the sidecar (not markdown parsing).
|
|
711
|
+
const acceptance = readJsonFile(path.join(artifactDir, 'acceptance.json'));
|
|
712
|
+
if (acceptance && Array.isArray(acceptance.criteria)) {
|
|
713
|
+
const pendingCriteria = acceptance.criteria.filter(c => {
|
|
714
|
+
const s = normalizedStatus(c && c.status);
|
|
715
|
+
return s === 'pending' || s === 'not_started' || s === '' || s === 'unknown';
|
|
716
|
+
});
|
|
717
|
+
if (pendingCriteria.length > 0) {
|
|
718
|
+
const base = path.basename(artifactDir);
|
|
719
|
+
warnings.push(`${base} Final Acceptance: ${pendingCriteria.length} acceptance criterion/criteria still pending; complete CI/merge/docs before final delivery.`);
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
return warnings;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
async function analyze(root, now = Date.now()) {
|
|
727
|
+
const flowAgentsDir = path.join(root, '.flow-agents');
|
|
728
|
+
// Scope to the session's current task when current.json names one, so an
|
|
729
|
+
// unrelated active workflow elsewhere in the repo does not gate this stop.
|
|
730
|
+
const scoped = preferredArtifactDir(flowAgentsDir);
|
|
731
|
+
const searchDirs = scoped ? [scoped] : [flowAgentsDir];
|
|
732
|
+
const artifacts = searchDirs
|
|
255
733
|
.flatMap(dir => walkMarkdown(dir))
|
|
256
734
|
.map(readArtifact)
|
|
257
735
|
.filter(isWorkflowArtifact)
|
|
@@ -269,51 +747,121 @@ function analyze(root, now = Date.now()) {
|
|
|
269
747
|
warnings.push(`${relPath} is still status:${status} (${ageMinutes}m old). Do not final-answer as complete unless the next step is explicit.`);
|
|
270
748
|
}
|
|
271
749
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
if (!hasHeading(latest.text, 'Goal Fit Gate')) {
|
|
277
|
-
warnings.push(`${relPath} is missing ## Goal Fit Gate, so local acceptance has not been checked.`);
|
|
278
|
-
} else {
|
|
279
|
-
for (const item of uncheckedInSection(latest.text, 'Goal Fit Gate').slice(0, 6)) {
|
|
280
|
-
warnings.push(`${relPath} Goal Fit unchecked: ${item}`);
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
if (status === 'delivered' && hasHeading(latest.text, 'Final Acceptance')) {
|
|
285
|
-
const uncheckedFinal = uncheckedInSection(latest.text, 'Final Acceptance');
|
|
286
|
-
if (uncheckedFinal.length > 0) {
|
|
287
|
-
warnings.push(`${relPath} local delivery is marked delivered, but Final Acceptance still has ${uncheckedFinal.length} open item(s) for CI/merge/docs promotion.`);
|
|
288
|
-
}
|
|
289
|
-
}
|
|
750
|
+
// Builder heading completeness checks (hasHeading DOD/Goal Fit Gate) removed in ADR 0010 2c.
|
|
751
|
+
// Verdict is now bundle-driven via bundleEnforcement + sidecarGuidance.
|
|
752
|
+
// Sessions with neither trust.bundle nor state.json are caught by missingBundleOrStateSignal.
|
|
290
753
|
|
|
291
754
|
warnings.push(...sidecarValidation(root, path.dirname(latest.file)));
|
|
292
|
-
const evidence = readJsonFile(path.join(path.dirname(latest.file), 'evidence.json'));
|
|
293
|
-
if (evidence && markdownVerdict(latest.text) === 'pass' && normalizedStatus(evidence.verdict) === 'fail') {
|
|
294
|
-
warnings.push(`${relPath} Markdown PASS contradicts evidence.json verdict fail.`);
|
|
295
|
-
}
|
|
296
755
|
warnings.push(...sidecarGuidance(root, path.dirname(latest.file)));
|
|
756
|
+
warnings.push(...captureCrossReference(root, path.dirname(latest.file)));
|
|
757
|
+
warnings.push(...(await bundleEnforcement(path.dirname(latest.file))));
|
|
758
|
+
warnings.push(...missingBundleOrStateSignal(path.dirname(latest.file)));
|
|
759
|
+
|
|
760
|
+
// A pre-execution task (not started) OR a terminal task (which is itself a
|
|
761
|
+
// completion *claim*) must not block on mere incompleteness — but a FALSE claim
|
|
762
|
+
// (capture/evidence contradiction) still blocks at any phase. This is the whole
|
|
763
|
+
// point of the capture cross-reference: catch a task that falsely claims done.
|
|
764
|
+
const gateState = readJsonFile(path.join(path.dirname(latest.file), 'state.json'));
|
|
765
|
+
const taskStatus = gateState ? normalizedStatus(gateState.status) : normalizedStatus(status);
|
|
766
|
+
const preExecution = isPreExecution(path.dirname(latest.file), status);
|
|
767
|
+
const terminal = TERMINAL_STATUSES.has(taskStatus);
|
|
768
|
+
// Always-block: a claimed pass the capture log or evidence.json contradicts.
|
|
769
|
+
const HARD_BLOCK = /contradicts evidence\.json|caught false-completion|evidence verdict:|evidence check .+ status:|critique status|critique open|required sidecar is missing/;
|
|
770
|
+
// Full gate (execution onward): also completeness/hygiene and not-done state.
|
|
771
|
+
const FULL_BLOCK = /status:|Definition Of Done|Goal Fit|sidecar validation:|contradicts evidence\.json|workflow state|evidence verdict|evidence check|NOT_VERIFIED gap|critique status|critique open|next action|caught false-completion|NOT_VERIFIED —/;
|
|
772
|
+
const blockRe = (preExecution || terminal) ? HARD_BLOCK : FULL_BLOCK;
|
|
773
|
+
const blocking = warnings.some(w => {
|
|
774
|
+
// Capture cross-reference warn-mode notes never block (operator opted out).
|
|
775
|
+
if (/\[backstop in warn mode — not blocking\]/.test(w)) return false;
|
|
776
|
+
return blockRe.test(w);
|
|
777
|
+
});
|
|
778
|
+
return { warnings, blocking, preExecution };
|
|
779
|
+
}
|
|
297
780
|
|
|
298
|
-
|
|
299
|
-
|
|
781
|
+
/**
|
|
782
|
+
* Resolve the enforcement mode. FLOW_AGENTS_GOAL_FIT_MODE (block|warn|off) wins;
|
|
783
|
+
* the legacy FLOW_AGENTS_GOAL_FIT_STRICT=true maps to block; otherwise the
|
|
784
|
+
* canonical engine default is warn.
|
|
785
|
+
*/
|
|
786
|
+
function resolveGoalFitMode() {
|
|
787
|
+
const explicit = String(process.env.FLOW_AGENTS_GOAL_FIT_MODE || '').trim().toLowerCase();
|
|
788
|
+
if (explicit === 'block' || explicit === 'warn' || explicit === 'off') return explicit;
|
|
789
|
+
const strict = String(process.env.FLOW_AGENTS_GOAL_FIT_STRICT || '').toLowerCase() === 'true';
|
|
790
|
+
return strict ? 'block' : 'warn';
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
/**
|
|
794
|
+
* Escape hatch: cap how many times block mode may refuse the SAME goal-fit gap
|
|
795
|
+
* in a row, so a genuinely-unsatisfiable goal cannot trap the agent forever.
|
|
796
|
+
* After this many consecutive identical blocks the hook releases (exit 0) with a
|
|
797
|
+
* loud notice. Configurable via FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS (default 3).
|
|
798
|
+
*/
|
|
799
|
+
function resolveMaxBlocks() {
|
|
800
|
+
const raw = Number.parseInt(process.env.FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS || '', 10);
|
|
801
|
+
return Number.isInteger(raw) && raw > 0 ? raw : 3;
|
|
300
802
|
}
|
|
301
803
|
|
|
302
|
-
function
|
|
804
|
+
function blockStreakFile(root) {
|
|
805
|
+
return path.join(root, '.flow-agents', '.goal-fit-block-streak.json');
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
function reasonsHash(warnings) {
|
|
809
|
+
const text = (warnings || []).join('\n');
|
|
810
|
+
let h = 5381;
|
|
811
|
+
for (let i = 0; i < text.length; i += 1) h = ((h << 5) + h + text.charCodeAt(i)) >>> 0;
|
|
812
|
+
return String(h);
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
function clearBlockStreak(root) {
|
|
816
|
+
try { fs.rmSync(blockStreakFile(root), { force: true }); } catch { /* best effort */ }
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
function bumpBlockStreak(root, hash) {
|
|
820
|
+
const file = blockStreakFile(root);
|
|
821
|
+
const prev = readJsonFile(file) || {};
|
|
822
|
+
const count = prev.hash === hash ? (Number(prev.count) || 0) + 1 : 1;
|
|
823
|
+
try {
|
|
824
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
825
|
+
fs.writeFileSync(file, JSON.stringify({ hash, count }));
|
|
826
|
+
} catch { /* best effort */ }
|
|
827
|
+
return count;
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
async function run(rawInput) {
|
|
303
831
|
const input = parseJson(rawInput);
|
|
304
832
|
const root = findRepoRoot(input.cwd || process.cwd());
|
|
305
|
-
const
|
|
306
|
-
if (
|
|
833
|
+
const mode = resolveGoalFitMode();
|
|
834
|
+
if (mode === 'off') return rawInput;
|
|
835
|
+
const result = await analyze(root);
|
|
836
|
+
if (result.warnings.length === 0) {
|
|
837
|
+
clearBlockStreak(root);
|
|
838
|
+
return rawInput;
|
|
839
|
+
}
|
|
307
840
|
|
|
308
841
|
const message = [
|
|
309
842
|
'[Hook] Goal Fit warning:',
|
|
310
843
|
...result.warnings.map(w => ` - ${w}`),
|
|
311
844
|
].join('\n');
|
|
312
|
-
|
|
845
|
+
|
|
846
|
+
if (mode !== 'block' || !result.blocking) {
|
|
847
|
+
clearBlockStreak(root);
|
|
848
|
+
return { stdout: rawInput, stderr: message, exitCode: 0 };
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
const maxBlocks = resolveMaxBlocks();
|
|
852
|
+
const count = bumpBlockStreak(root, reasonsHash(result.warnings));
|
|
853
|
+
if (count >= maxBlocks) {
|
|
854
|
+
clearBlockStreak(root);
|
|
855
|
+
return {
|
|
856
|
+
stdout: rawInput,
|
|
857
|
+
stderr: `${message}\n[Hook] Goal Fit block RELEASED after ${count} consecutive identical blocks (FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=${maxBlocks}): the same gap persists, surfacing to the human instead of looping.`,
|
|
858
|
+
exitCode: 0,
|
|
859
|
+
};
|
|
860
|
+
}
|
|
313
861
|
return {
|
|
314
862
|
stdout: rawInput,
|
|
315
|
-
stderr: message
|
|
316
|
-
exitCode:
|
|
863
|
+
stderr: `${message}\n[Hook] Goal Fit BLOCK ${count}/${maxBlocks}.`,
|
|
864
|
+
exitCode: 2,
|
|
317
865
|
};
|
|
318
866
|
}
|
|
319
867
|
|
|
@@ -324,14 +872,28 @@ if (require.main === module) {
|
|
|
324
872
|
if (data.length < MAX_STDIN) data += chunk.substring(0, MAX_STDIN - data.length);
|
|
325
873
|
});
|
|
326
874
|
process.stdin.on('end', () => {
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
875
|
+
// run() is now async (Surface load). We wrap in an async IIFE so the
|
|
876
|
+
// stdin/exit flow is preserved and errors are surfaced as warnings (fail-open).
|
|
877
|
+
(async () => {
|
|
878
|
+
let output;
|
|
879
|
+
try {
|
|
880
|
+
output = await run(data);
|
|
881
|
+
} catch (err) {
|
|
882
|
+
// Unexpected failure in the async gate path — fail-open, allow the Stop.
|
|
883
|
+
process.stderr.write(`[Hook] Goal Fit async error (fail-open): ${String(err && err.message || err)}\n`);
|
|
884
|
+
process.stdout.write(data);
|
|
885
|
+
process.exit(0);
|
|
886
|
+
return;
|
|
887
|
+
}
|
|
888
|
+
if (output && typeof output === 'object') {
|
|
889
|
+
if (output.stderr) process.stderr.write(output.stderr.endsWith('\n') ? output.stderr : `${output.stderr}\n`);
|
|
890
|
+
process.stdout.write(String(output.stdout ?? data));
|
|
891
|
+
process.exit(Number.isInteger(output.exitCode) ? output.exitCode : 0);
|
|
892
|
+
return;
|
|
893
|
+
}
|
|
894
|
+
process.stdout.write(String(output));
|
|
895
|
+
})();
|
|
334
896
|
});
|
|
335
897
|
}
|
|
336
898
|
|
|
337
|
-
module.exports = { analyze, run, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine };
|
|
899
|
+
module.exports = { analyze, run, resolveGoalFitMode, uncheckedInSection, findRepoRoot, sidecarGuidance, safeOneLine, captureCrossReference, bundleEnforcement, readCommandLog, resolveTrustedCommand, declaredManifestTarget };
|