@besales/ops-framework 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +328 -0
- package/bin/build-check-context.mjs +67 -0
- package/bin/build-execution-ledger.mjs +54 -0
- package/bin/estimate-llm-input.mjs +160 -0
- package/bin/guard-task.mjs +384 -0
- package/bin/hash-task-artifacts.mjs +44 -0
- package/bin/init-project.mjs +49 -0
- package/bin/intake-execution-feedback.mjs +207 -0
- package/bin/intake-feedback.test.mjs +73 -0
- package/bin/learning-loop.mjs +658 -0
- package/bin/learning-loop.test.mjs +175 -0
- package/bin/lib/bootstrap-utils.mjs +542 -0
- package/bin/lib/bootstrap-utils.test.mjs +156 -0
- package/bin/lib/check-context-utils.mjs +1448 -0
- package/bin/lib/check-context-utils.test.mjs +497 -0
- package/bin/lib/execution-ledger-utils.mjs +162 -0
- package/bin/lib/execution-ledger-utils.test.mjs +74 -0
- package/bin/lib/llm-input-pack-utils.mjs +663 -0
- package/bin/lib/llm-input-pack-utils.test.mjs +262 -0
- package/bin/lib/project-config.mjs +229 -0
- package/bin/lib/project-config.test.mjs +102 -0
- package/bin/lib/task-manifest-utils.mjs +512 -0
- package/bin/lib/task-manifest-utils.test.mjs +218 -0
- package/bin/lib/task-metrics-utils.mjs +63 -0
- package/bin/lib/task-metrics-utils.test.mjs +40 -0
- package/bin/lib/test-setup.mjs +37 -0
- package/bin/new-task.mjs +42 -0
- package/bin/ops-agent.mjs +81 -0
- package/bin/preflight.mjs +56 -0
- package/bin/providers/external-cli-checker.mjs +190 -0
- package/bin/providers/openai-checker.mjs +62 -0
- package/bin/quality-gates.mjs +92 -0
- package/bin/run-check.mjs +559 -0
- package/bin/run-plan-check-loop.mjs +392 -0
- package/bin/run-verify.mjs +627 -0
- package/bin/self-lint.mjs +88 -0
- package/bin/supervisor-turn.mjs +146 -0
- package/bin/supervisor-turn.test.mjs +72 -0
- package/bin/task-manifest.mjs +57 -0
- package/bin/task-metrics.mjs +48 -0
- package/bin/transition.mjs +94 -0
- package/bin/validate-check-artifacts.mjs +418 -0
- package/config/default-agents.json +100 -0
- package/package.json +28 -0
- package/playbooks/checker-context.md +9 -0
- package/playbooks/complexity-performance.md +13 -0
- package/playbooks/production-rollout.md +9 -0
- package/playbooks/source-sync-provider.md +9 -0
- package/playbooks/ui-acceptance.md +9 -0
- package/prompts/checker.md +170 -0
- package/prompts/executor.md +54 -0
- package/prompts/planner.md +128 -0
- package/prompts/researcher.md +44 -0
- package/prompts/supervisor.md +337 -0
- package/prompts/verifier.md +128 -0
- package/templates/brief.md +15 -0
- package/templates/check-resolution.md +69 -0
- package/templates/check-result.json +32 -0
- package/templates/check.md +46 -0
- package/templates/execution-feedback.md +25 -0
- package/templates/execution.md +101 -0
- package/templates/human-gate-summary.md +49 -0
- package/templates/orchestration-log.md +8 -0
- package/templates/plan.md +86 -0
- package/templates/research.md +13 -0
- package/templates/retrospective.md +48 -0
- package/templates/status.md +53 -0
- package/templates/verify-result.json +19 -0
- package/templates/verify.md +41 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import {
|
|
4
|
+
appendOrchestrationLog,
|
|
5
|
+
getFlag,
|
|
6
|
+
normalizeMarkdownBody,
|
|
7
|
+
parseCliArgs,
|
|
8
|
+
readAgentsConfig,
|
|
9
|
+
readPrompt,
|
|
10
|
+
readTaskFile,
|
|
11
|
+
repoRoot,
|
|
12
|
+
resolveConfigValue,
|
|
13
|
+
resolveTaskDir,
|
|
14
|
+
sha256,
|
|
15
|
+
validateExecutionEvidenceForPlan,
|
|
16
|
+
writeTaskFile,
|
|
17
|
+
} from './lib/check-context-utils.mjs';
|
|
18
|
+
import {
|
|
19
|
+
resolveExternalCliProvider,
|
|
20
|
+
runExternalCliChecker,
|
|
21
|
+
} from './providers/external-cli-checker.mjs';
|
|
22
|
+
import {
|
|
23
|
+
buildContextModeSequence,
|
|
24
|
+
buildVerifierLlmInputPack,
|
|
25
|
+
isContextInsufficientResult,
|
|
26
|
+
resolveLlmContextMode,
|
|
27
|
+
summarizePackForConsole,
|
|
28
|
+
} from './lib/llm-input-pack-utils.mjs';
|
|
29
|
+
import { recordLlmInputUsage } from './lib/task-manifest-utils.mjs';
|
|
30
|
+
|
|
31
|
+
function main() {
|
|
32
|
+
runMain().catch((error) => {
|
|
33
|
+
console.error(`Error: ${error.message}`);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function runMain() {
|
|
39
|
+
const args = parseCliArgs(process.argv.slice(2));
|
|
40
|
+
const taskArg = args.positional[0];
|
|
41
|
+
if (!taskArg) {
|
|
42
|
+
throw new Error('Usage: node ops/agent-pipeline/bin/run-verify.mjs <TASK-id-or-task-path> [--verifier-provider codex-cli] [--verifier-model gpt-5.5]');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const taskDir = resolveTaskDir(taskArg);
|
|
46
|
+
const taskId = path.basename(taskDir);
|
|
47
|
+
const verifierConfig = resolveVerifierConfig(args);
|
|
48
|
+
const planSha = hashTaskMarkdown(taskDir, 'plan.md');
|
|
49
|
+
const executionSha = hashTaskMarkdown(taskDir, 'execution.md');
|
|
50
|
+
const taskManifest = readOptionalJson(taskDir, 'task-manifest.json');
|
|
51
|
+
const initialContextMode = resolveLlmContextMode({
|
|
52
|
+
requestedMode: getFlag(args, 'context-mode') || getFlag(args, 'llm-context-mode'),
|
|
53
|
+
riskTriggers: taskManifest?.context?.riskTriggers || [],
|
|
54
|
+
});
|
|
55
|
+
const evidenceIssues = validateExecutionEvidenceForPlan({
|
|
56
|
+
planContent: readTaskFile(taskDir, 'plan.md'),
|
|
57
|
+
executionContent: readTaskFile(taskDir, 'execution.md'),
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
if (evidenceIssues.length > 0) {
|
|
61
|
+
writeDeterministicEvidenceReturn({
|
|
62
|
+
taskDir,
|
|
63
|
+
taskId,
|
|
64
|
+
verifierConfig,
|
|
65
|
+
planSha,
|
|
66
|
+
executionSha,
|
|
67
|
+
evidenceIssues,
|
|
68
|
+
});
|
|
69
|
+
console.log(`Verifier preflight blocked ${taskId}: return_to_execute`);
|
|
70
|
+
console.log(`- evidenceIssues: ${evidenceIssues.length}`);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (verifierConfig.mode === 'internal_supervisor') {
|
|
75
|
+
writeInternalSupervisorVerify({
|
|
76
|
+
taskDir,
|
|
77
|
+
taskId,
|
|
78
|
+
verifierConfig,
|
|
79
|
+
planSha,
|
|
80
|
+
executionSha,
|
|
81
|
+
});
|
|
82
|
+
console.log(`Internal supervisor Verify artifact written for ${taskId}: pass_with_notes`);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
let output = null;
|
|
87
|
+
let verifyResultJson = null;
|
|
88
|
+
let verifyMarkdown = '';
|
|
89
|
+
let verifierRunId = null;
|
|
90
|
+
let finalPack = null;
|
|
91
|
+
let rerunCount = 0;
|
|
92
|
+
const llmInputAttempts = [];
|
|
93
|
+
|
|
94
|
+
for (const contextMode of buildContextModeSequence(initialContextMode)) {
|
|
95
|
+
verifierRunId = [
|
|
96
|
+
'external-cli',
|
|
97
|
+
verifierConfig.provider,
|
|
98
|
+
verifierConfig.model,
|
|
99
|
+
contextMode,
|
|
100
|
+
new Date().toISOString().replace(/[:.]/g, '-'),
|
|
101
|
+
].join(':');
|
|
102
|
+
const promptPayload = buildVerifierPrompt({
|
|
103
|
+
taskDir,
|
|
104
|
+
taskId,
|
|
105
|
+
verifierConfig,
|
|
106
|
+
verifierRunId,
|
|
107
|
+
planSha,
|
|
108
|
+
executionSha,
|
|
109
|
+
contextMode,
|
|
110
|
+
});
|
|
111
|
+
finalPack = promptPayload.pack;
|
|
112
|
+
console.log(`Verifier LLM input for ${taskId}`);
|
|
113
|
+
for (const line of summarizePackForConsole(promptPayload.pack)) {
|
|
114
|
+
console.log(line);
|
|
115
|
+
}
|
|
116
|
+
if (promptPayload.pack.meta.overCap && contextMode !== 'strict') {
|
|
117
|
+
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'skipped_over_cap'));
|
|
118
|
+
appendOrchestrationLog(taskDir, `verifier LLM input exceeded ${contextMode} cap; rerunning pack builder with expanded context`);
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
if (promptPayload.pack.meta.overCap) {
|
|
122
|
+
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'over_cap_blocked'));
|
|
123
|
+
writeVerifierFailure({
|
|
124
|
+
taskDir,
|
|
125
|
+
taskId,
|
|
126
|
+
verifierConfig,
|
|
127
|
+
verifierRunId,
|
|
128
|
+
planSha,
|
|
129
|
+
executionSha,
|
|
130
|
+
failureReason: 'context_overflow',
|
|
131
|
+
message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
|
|
132
|
+
rawOutput: null,
|
|
133
|
+
});
|
|
134
|
+
recordLlmInputUsage({
|
|
135
|
+
taskDir,
|
|
136
|
+
stage: 'verify',
|
|
137
|
+
packMeta: promptPayload.pack.meta,
|
|
138
|
+
attempts: llmInputAttempts,
|
|
139
|
+
rerunCount,
|
|
140
|
+
});
|
|
141
|
+
console.log(`Verifier blocked for ${taskId}: strict context pack over cap`);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
output = await runExternalCliChecker({
|
|
147
|
+
providerName: verifierConfig.provider,
|
|
148
|
+
providerConfig: verifierConfig.providerConfig,
|
|
149
|
+
model: verifierConfig.model,
|
|
150
|
+
reasoningEffort: verifierConfig.reasoningEffort,
|
|
151
|
+
prompt: promptPayload.prompt,
|
|
152
|
+
cwd: repoRoot,
|
|
153
|
+
});
|
|
154
|
+
} catch (error) {
|
|
155
|
+
writeVerifierFailure({
|
|
156
|
+
taskDir,
|
|
157
|
+
taskId,
|
|
158
|
+
verifierConfig,
|
|
159
|
+
verifierRunId,
|
|
160
|
+
planSha,
|
|
161
|
+
executionSha,
|
|
162
|
+
failureReason: error.failureReason || 'unknown',
|
|
163
|
+
message: error.message,
|
|
164
|
+
rawOutput: error.rawOutput || null,
|
|
165
|
+
});
|
|
166
|
+
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${error.failureReason || 'unknown'}`));
|
|
167
|
+
recordLlmInputUsage({
|
|
168
|
+
taskDir,
|
|
169
|
+
stage: 'verify',
|
|
170
|
+
packMeta: promptPayload.pack.meta,
|
|
171
|
+
attempts: llmInputAttempts,
|
|
172
|
+
rerunCount,
|
|
173
|
+
});
|
|
174
|
+
throw error;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
verifyMarkdown = String(output.verifyMarkdown || '').trim();
|
|
178
|
+
verifyResultJson = normalizeVerifyResult({
|
|
179
|
+
taskId,
|
|
180
|
+
verifierConfig,
|
|
181
|
+
verifierRunId,
|
|
182
|
+
planSha,
|
|
183
|
+
executionSha,
|
|
184
|
+
value: output.verifyResultJson,
|
|
185
|
+
});
|
|
186
|
+
if (!isContextInsufficientResult(verifyResultJson) || contextMode === 'strict') {
|
|
187
|
+
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, verifyResultJson?.verdict || 'completed'));
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'context_insufficient'));
|
|
191
|
+
rerunCount += 1;
|
|
192
|
+
appendOrchestrationLog(taskDir, `verifier returned context_insufficient in ${contextMode}; rerunning with expanded context`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (!verifyMarkdown) {
|
|
196
|
+
throw new Error('Verifier output missing verifyMarkdown.');
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
200
|
+
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(verifyResultJson, null, 2));
|
|
201
|
+
if (finalPack) {
|
|
202
|
+
recordLlmInputUsage({
|
|
203
|
+
taskDir,
|
|
204
|
+
stage: 'verify',
|
|
205
|
+
packMeta: finalPack.meta,
|
|
206
|
+
attempts: llmInputAttempts,
|
|
207
|
+
rerunCount,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
appendOrchestrationLog(taskDir, `external CLI verifier completed via ${verifierConfig.provider}; verdict=${verifyResultJson.verdict}; runId=${verifierRunId}`);
|
|
211
|
+
console.log(`Verifier run completed for ${taskId}: ${verifyResultJson.verdict}`);
|
|
212
|
+
console.log(`- verifierRunId: ${verifierRunId}`);
|
|
213
|
+
if (finalPack) {
|
|
214
|
+
console.log(`- finalLlmInputMode: ${finalPack.meta.mode}`);
|
|
215
|
+
console.log(`- finalEstimatedInputTokens: ${finalPack.meta.estimatedTokens}`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function buildAttemptRecord(packMeta, outcome) {
|
|
220
|
+
return {
|
|
221
|
+
mode: packMeta.mode,
|
|
222
|
+
estimatedTokens: packMeta.estimatedTokens,
|
|
223
|
+
bytes: packMeta.bytes,
|
|
224
|
+
capTokens: packMeta.capTokens,
|
|
225
|
+
overCap: packMeta.overCap,
|
|
226
|
+
outcome,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function writeDeterministicEvidenceReturn({
|
|
231
|
+
taskDir,
|
|
232
|
+
taskId,
|
|
233
|
+
verifierConfig,
|
|
234
|
+
planSha,
|
|
235
|
+
executionSha,
|
|
236
|
+
evidenceIssues,
|
|
237
|
+
}) {
|
|
238
|
+
const verifierRunId = `deterministic-preverify:${new Date().toISOString().replace(/[:.]/g, '-')}`;
|
|
239
|
+
const findings = evidenceIssues.map((issue, index) => ({
|
|
240
|
+
id: `V-${String(index + 1).padStart(3, '0')}`,
|
|
241
|
+
severity: 'blocking',
|
|
242
|
+
claimCategory: issue.category,
|
|
243
|
+
affectedArtifacts: ['plan.md', 'execution.md'],
|
|
244
|
+
claim: issue.message,
|
|
245
|
+
evidenceRefs: [
|
|
246
|
+
{
|
|
247
|
+
type: 'artifact',
|
|
248
|
+
ref: 'deterministic pre-verify evidence gate',
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
expectedCorrection: expectedCorrectionForEvidenceIssue(issue),
|
|
252
|
+
}));
|
|
253
|
+
const verifyMarkdown = [
|
|
254
|
+
'# Verify',
|
|
255
|
+
'',
|
|
256
|
+
'## Verdict',
|
|
257
|
+
'',
|
|
258
|
+
'`FAIL` / `return_to_execute`',
|
|
259
|
+
'',
|
|
260
|
+
'## Deterministic Pre-Verify Gate',
|
|
261
|
+
'',
|
|
262
|
+
'External/internal verifier was not invoked because mandatory execution evidence is missing.',
|
|
263
|
+
'',
|
|
264
|
+
'## Findings',
|
|
265
|
+
'',
|
|
266
|
+
'| ID | Severity | Category | Claim | Expected correction |',
|
|
267
|
+
'| --- | --- | --- | --- | --- |',
|
|
268
|
+
...findings.map((finding) => `| ${finding.id} | ${finding.severity} | ${finding.claimCategory} | ${escapeTableCell(finding.claim)} | ${escapeTableCell(finding.expectedCorrection)} |`),
|
|
269
|
+
'',
|
|
270
|
+
'## Recommended next step',
|
|
271
|
+
'',
|
|
272
|
+
'Return to Execute, record the missing evidence, then rerun Verify.',
|
|
273
|
+
].join('\n');
|
|
274
|
+
const result = {
|
|
275
|
+
schemaVersion: 1,
|
|
276
|
+
taskId,
|
|
277
|
+
planSha,
|
|
278
|
+
executionSha,
|
|
279
|
+
verificationMode: verifierConfig.mode,
|
|
280
|
+
verifierProvider: 'deterministic-preverify',
|
|
281
|
+
verifierModel: 'none',
|
|
282
|
+
verifierRunId,
|
|
283
|
+
verdict: 'return_to_execute',
|
|
284
|
+
failureReason: null,
|
|
285
|
+
readyForRetrospective: false,
|
|
286
|
+
counts: {
|
|
287
|
+
blockingFindings: findings.length,
|
|
288
|
+
nonBlockingFindings: 0,
|
|
289
|
+
questions: 0,
|
|
290
|
+
},
|
|
291
|
+
findings,
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
295
|
+
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
296
|
+
appendOrchestrationLog(taskDir, `deterministic pre-verify evidence gate returned return_to_execute; findings=${findings.length}; configuredMode=${verifierConfig.mode}`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function escapeTableCell(value) {
|
|
300
|
+
return String(value).replace(/\|/g, '\\|').replace(/\n/g, ' ').trim();
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function expectedCorrectionForEvidenceIssue(issue) {
|
|
304
|
+
if (issue.category === 'ui_verification_gap') {
|
|
305
|
+
return 'Record UI Acceptance Evidence for each planned UI scenario with scenario id, result, observed visible state and screenshot/payload/rendered/API fallback evidence.';
|
|
306
|
+
}
|
|
307
|
+
if (issue.message.includes('Production Rollout')) {
|
|
308
|
+
return 'Record Production Rollout Evidence with rollout impact, env/deploy facts, rollback path and post-deploy logs/metrics/smoke evidence.';
|
|
309
|
+
}
|
|
310
|
+
if (issue.message.includes('Source Sync / Provider')) {
|
|
311
|
+
return 'Record Source Sync / Provider Evidence with scope/window, idempotency, retry/pagination/rate-limit, raw-record and coverage/parity evidence.';
|
|
312
|
+
}
|
|
313
|
+
if (issue.message.includes('Optimization Review')) {
|
|
314
|
+
return 'Record Optimization Review Evidence with hot-path review, anti-pattern findings, timing/rows/benchmark/EXPLAIN evidence or explicit deferred findings before Verify.';
|
|
315
|
+
}
|
|
316
|
+
return 'Record Complexity / Performance Evidence with timing, row-count, EXPLAIN, N+1 or hot-path review evidence before Verify.';
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function resolveVerifierConfig(args) {
|
|
320
|
+
const config = readAgentsConfig();
|
|
321
|
+
const mode = normalizeVerifierMode(
|
|
322
|
+
getFlag(args, 'verify-mode')
|
|
323
|
+
|| getFlag(args, 'verification-mode')
|
|
324
|
+
|| process.env.VERIFICATION_MODE
|
|
325
|
+
|| process.env.VERIFIER_MODE
|
|
326
|
+
|| resolveConfigValue(config.verifier?.mode)
|
|
327
|
+
|| 'external_cli',
|
|
328
|
+
);
|
|
329
|
+
const provider = getFlag(args, 'verifier-provider')
|
|
330
|
+
|| process.env.VERIFIER_PROVIDER
|
|
331
|
+
|| resolveConfigValue(config.verifier?.provider)
|
|
332
|
+
|| 'codex-cli';
|
|
333
|
+
const providerDefaults = config.checkerProviders?.[provider] || {};
|
|
334
|
+
const model = getFlag(args, 'verifier-model')
|
|
335
|
+
|| process.env.VERIFIER_MODEL
|
|
336
|
+
|| resolveConfigValue(providerDefaults.model)
|
|
337
|
+
|| resolveConfigValue(config.verifier?.model)
|
|
338
|
+
|| (provider === 'claude-cli' || provider === 'cloud-cli' ? 'claude-opus-4-7' : 'gpt-5.5');
|
|
339
|
+
const reasoningEffort = getFlag(args, 'verifier-reasoning-effort')
|
|
340
|
+
|| process.env.VERIFIER_REASONING_EFFORT
|
|
341
|
+
|| resolveConfigValue(providerDefaults.reasoningEffort)
|
|
342
|
+
|| resolveConfigValue(config.verifier?.reasoningEffort)
|
|
343
|
+
|| 'medium';
|
|
344
|
+
|
|
345
|
+
return {
|
|
346
|
+
mode,
|
|
347
|
+
provider,
|
|
348
|
+
model,
|
|
349
|
+
reasoningEffort,
|
|
350
|
+
providerConfig: resolveExternalCliProvider(provider, config),
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function normalizeVerifierMode(value) {
|
|
355
|
+
if (value === 'external' || value === 'external_cli' || value === 'independent') {
|
|
356
|
+
return 'external_cli';
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if (value === 'internal' || value === 'internal_supervisor' || value === 'off' || value === 'none' || value === 'no_external') {
|
|
360
|
+
return 'internal_supervisor';
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
throw new Error(`Invalid verifier mode: ${value}. Use external_cli or internal_supervisor.`);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function writeInternalSupervisorVerify({
|
|
367
|
+
taskDir,
|
|
368
|
+
taskId,
|
|
369
|
+
verifierConfig,
|
|
370
|
+
planSha,
|
|
371
|
+
executionSha,
|
|
372
|
+
}) {
|
|
373
|
+
const verifierRunId = `internal-supervisor:${new Date().toISOString().replace(/[:.]/g, '-')}`;
|
|
374
|
+
const status = readTaskFile(taskDir, 'status.md');
|
|
375
|
+
const execution = readTaskFile(taskDir, 'execution.md');
|
|
376
|
+
const verifyMarkdown = [
|
|
377
|
+
'# Verify',
|
|
378
|
+
'',
|
|
379
|
+
'## verdict',
|
|
380
|
+
'',
|
|
381
|
+
'`PASS_WITH_NOTES`',
|
|
382
|
+
'',
|
|
383
|
+
'Internal Supervisor Verify was selected. No external CLI/model verifier was invoked.',
|
|
384
|
+
'',
|
|
385
|
+
'## verification mode',
|
|
386
|
+
'',
|
|
387
|
+
'- `verificationMode`: `internal_supervisor`',
|
|
388
|
+
'- `verifierProvider`: `supervisor`',
|
|
389
|
+
'- External independent verifier: skipped by configured cost-saving mode.',
|
|
390
|
+
'',
|
|
391
|
+
'## evidence reviewed',
|
|
392
|
+
'',
|
|
393
|
+
'- `plan.md` hash matched the recorded verify input.',
|
|
394
|
+
'- `execution.md` hash matched the recorded verify input.',
|
|
395
|
+
'- Supervisor reviewed the current execution ledger and status artifact.',
|
|
396
|
+
'',
|
|
397
|
+
'## residual risks',
|
|
398
|
+
'',
|
|
399
|
+
'- This is not an independent fresh-context verifier run.',
|
|
400
|
+
'- Use `--verify-mode external_cli` for production-readiness, R4/R5, material migrations/backfills, destructive/security/financial work, broad ambiguous refactors or explicit human request.',
|
|
401
|
+
'',
|
|
402
|
+
'## latest status excerpt',
|
|
403
|
+
'',
|
|
404
|
+
'```md',
|
|
405
|
+
status.trim().slice(0, 2000),
|
|
406
|
+
'```',
|
|
407
|
+
'',
|
|
408
|
+
'## execution evidence excerpt',
|
|
409
|
+
'',
|
|
410
|
+
'```md',
|
|
411
|
+
execution.trim().slice(-4000),
|
|
412
|
+
'```',
|
|
413
|
+
].join('\n');
|
|
414
|
+
const result = {
|
|
415
|
+
schemaVersion: 1,
|
|
416
|
+
taskId,
|
|
417
|
+
planSha,
|
|
418
|
+
executionSha,
|
|
419
|
+
verificationMode: 'internal_supervisor',
|
|
420
|
+
verifierProvider: 'supervisor',
|
|
421
|
+
verifierModel: 'none',
|
|
422
|
+
verifierRunId,
|
|
423
|
+
verdict: 'pass_with_notes',
|
|
424
|
+
failureReason: null,
|
|
425
|
+
readyForRetrospective: true,
|
|
426
|
+
counts: {
|
|
427
|
+
blockingFindings: 0,
|
|
428
|
+
nonBlockingFindings: 1,
|
|
429
|
+
questions: 0,
|
|
430
|
+
},
|
|
431
|
+
findings: [
|
|
432
|
+
{
|
|
433
|
+
id: 'V-001',
|
|
434
|
+
severity: 'non_blocking',
|
|
435
|
+
claimCategory: 'insufficient_evidence',
|
|
436
|
+
affectedArtifacts: ['verify.md', 'verify.result.json'],
|
|
437
|
+
claim: 'Verify used internal supervisor mode, so no independent fresh-context verifier reviewed this execution.',
|
|
438
|
+
evidenceRefs: [
|
|
439
|
+
{
|
|
440
|
+
type: 'config',
|
|
441
|
+
ref: `verifier.mode=${verifierConfig.mode}`,
|
|
442
|
+
},
|
|
443
|
+
],
|
|
444
|
+
expectedCorrection: 'Run `corepack yarn agent:run-verify <TASK> --verify-mode external_cli` if the task risk or human decision requires independent verification.',
|
|
445
|
+
},
|
|
446
|
+
],
|
|
447
|
+
};
|
|
448
|
+
|
|
449
|
+
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
450
|
+
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
451
|
+
appendOrchestrationLog(taskDir, `internal supervisor verifier completed; verdict=${result.verdict}; runId=${verifierRunId}`);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function buildVerifierPrompt({
|
|
455
|
+
taskDir,
|
|
456
|
+
taskId,
|
|
457
|
+
verifierConfig,
|
|
458
|
+
verifierRunId,
|
|
459
|
+
planSha,
|
|
460
|
+
executionSha,
|
|
461
|
+
contextMode,
|
|
462
|
+
}) {
|
|
463
|
+
const verifierPrompt = readPrompt('verifier.md');
|
|
464
|
+
const pack = buildVerifierLlmInputPack({
|
|
465
|
+
taskDir,
|
|
466
|
+
taskId,
|
|
467
|
+
planSha,
|
|
468
|
+
executionSha,
|
|
469
|
+
verifier: {
|
|
470
|
+
provider: verifierConfig.provider,
|
|
471
|
+
model: verifierConfig.model,
|
|
472
|
+
reasoningEffort: verifierConfig.reasoningEffort,
|
|
473
|
+
runId: verifierRunId,
|
|
474
|
+
},
|
|
475
|
+
mode: contextMode,
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
const prompt = [
|
|
479
|
+
verifierPrompt,
|
|
480
|
+
'',
|
|
481
|
+
'Fresh-context enforcement:',
|
|
482
|
+
'- You are an external isolated CLI Verifier invocation.',
|
|
483
|
+
'- You do not inherit this chat, Executor reasoning, Planner reasoning or prior Verifier reasoning.',
|
|
484
|
+
'- Treat execution.md as claims to verify against plan.md, task artifacts and repository state.',
|
|
485
|
+
'- Read repository files as needed, but do not mutate any files.',
|
|
486
|
+
'- If you compare planSha/executionSha with local files, use `corepack yarn agent:hash-task-artifacts <TASK-id>` or equivalent normalizeMarkdownBody semantics; raw `shasum` over markdown files is not equivalent.',
|
|
487
|
+
'',
|
|
488
|
+
'Return exactly one JSON object, with no markdown wrapper:',
|
|
489
|
+
'{',
|
|
490
|
+
' "verifyMarkdown": "full markdown for verify.md",',
|
|
491
|
+
' "verifyResultJson": { ... object matching verify.result.json contract ... }',
|
|
492
|
+
'}',
|
|
493
|
+
'',
|
|
494
|
+
'Required verifyResultJson fields:',
|
|
495
|
+
'- schemaVersion: 1',
|
|
496
|
+
`- taskId: ${taskId}`,
|
|
497
|
+
`- planSha: ${planSha}`,
|
|
498
|
+
`- executionSha: ${executionSha}`,
|
|
499
|
+
'- verificationMode: external_cli',
|
|
500
|
+
`- verifierProvider: ${verifierConfig.provider}`,
|
|
501
|
+
`- verifierModel: ${verifierConfig.model}`,
|
|
502
|
+
`- verifierRunId: ${verifierRunId}`,
|
|
503
|
+
'- verdict: pass | pass_with_notes | return_to_execute | return_to_plan | human_arbitration_required | context_insufficient | verifier_failed',
|
|
504
|
+
'- failureReason: null unless verdict=verifier_failed',
|
|
505
|
+
'- readyForRetrospective: true only for pass/pass_with_notes',
|
|
506
|
+
'- counts matching findings[]',
|
|
507
|
+
'- findings[] with V-001, V-002 IDs and severity blocking | non_blocking | question',
|
|
508
|
+
'',
|
|
509
|
+
'<task_input_json>',
|
|
510
|
+
JSON.stringify(pack.input, null, 2),
|
|
511
|
+
'</task_input_json>',
|
|
512
|
+
].join('\n');
|
|
513
|
+
return {
|
|
514
|
+
prompt,
|
|
515
|
+
pack,
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
function normalizeVerifyResult({
|
|
520
|
+
taskId,
|
|
521
|
+
verifierConfig,
|
|
522
|
+
verifierRunId,
|
|
523
|
+
planSha,
|
|
524
|
+
executionSha,
|
|
525
|
+
value,
|
|
526
|
+
}) {
|
|
527
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
528
|
+
throw new Error('Verifier output missing verifyResultJson object.');
|
|
529
|
+
}
|
|
530
|
+
return {
|
|
531
|
+
...value,
|
|
532
|
+
schemaVersion: 1,
|
|
533
|
+
taskId,
|
|
534
|
+
planSha,
|
|
535
|
+
executionSha,
|
|
536
|
+
verificationMode: 'external_cli',
|
|
537
|
+
verifierProvider: verifierConfig.provider,
|
|
538
|
+
verifierModel: verifierConfig.model,
|
|
539
|
+
verifierRunId,
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
function writeVerifierFailure({
|
|
544
|
+
taskDir,
|
|
545
|
+
taskId,
|
|
546
|
+
verifierConfig,
|
|
547
|
+
verifierRunId,
|
|
548
|
+
planSha,
|
|
549
|
+
executionSha,
|
|
550
|
+
failureReason,
|
|
551
|
+
message,
|
|
552
|
+
rawOutput,
|
|
553
|
+
}) {
|
|
554
|
+
const verifyMarkdown = [
|
|
555
|
+
'# Verify',
|
|
556
|
+
'',
|
|
557
|
+
'## Verdict',
|
|
558
|
+
'',
|
|
559
|
+
'`VERIFIER_FAILED`',
|
|
560
|
+
'',
|
|
561
|
+
`External CLI verifier failed before producing a valid result: ${message}`,
|
|
562
|
+
'',
|
|
563
|
+
'## Findings',
|
|
564
|
+
'',
|
|
565
|
+
'| ID | Severity | Category | Claim | Expected correction |',
|
|
566
|
+
'| --- | --- | --- | --- | --- |',
|
|
567
|
+
`| V-001 | blocking | verifier_failure | External verifier failed with ${failureReason}. | Fix provider/config/output and rerun external verifier. |`,
|
|
568
|
+
].join('\n');
|
|
569
|
+
const result = {
|
|
570
|
+
schemaVersion: 1,
|
|
571
|
+
taskId,
|
|
572
|
+
planSha,
|
|
573
|
+
executionSha,
|
|
574
|
+
verificationMode: 'external_cli',
|
|
575
|
+
verifierProvider: verifierConfig.provider,
|
|
576
|
+
verifierModel: verifierConfig.model,
|
|
577
|
+
verifierRunId,
|
|
578
|
+
verdict: 'verifier_failed',
|
|
579
|
+
failureReason,
|
|
580
|
+
readyForRetrospective: false,
|
|
581
|
+
counts: {
|
|
582
|
+
blockingFindings: 1,
|
|
583
|
+
nonBlockingFindings: 0,
|
|
584
|
+
questions: 0,
|
|
585
|
+
},
|
|
586
|
+
findings: [
|
|
587
|
+
{
|
|
588
|
+
id: 'V-001',
|
|
589
|
+
severity: 'blocking',
|
|
590
|
+
claimCategory: 'verifier_failure',
|
|
591
|
+
affectedArtifacts: ['verify.result.json'],
|
|
592
|
+
claim: `External verifier failed: ${message}`,
|
|
593
|
+
evidenceRefs: rawOutput ? [{ type: 'external_output', ref: rawOutput.slice(0, 1000) }] : [],
|
|
594
|
+
expectedCorrection: 'Fix provider/config/output and rerun external verifier.',
|
|
595
|
+
},
|
|
596
|
+
],
|
|
597
|
+
};
|
|
598
|
+
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
599
|
+
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
600
|
+
appendOrchestrationLog(taskDir, `external CLI verifier failed via ${verifierConfig.provider}; failureReason=${failureReason}; runId=${verifierRunId}`);
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function hashTaskMarkdown(taskDir, fileName) {
|
|
604
|
+
return sha256(normalizeMarkdownBody(readRequiredTaskFile(taskDir, fileName)));
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
function readRequiredTaskFile(taskDir, fileName) {
|
|
608
|
+
const filePath = path.join(taskDir, fileName);
|
|
609
|
+
if (!fs.existsSync(filePath)) {
|
|
610
|
+
throw new Error(`Missing required task artifact: ${fileName}`);
|
|
611
|
+
}
|
|
612
|
+
return fs.readFileSync(filePath, 'utf8');
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
function readOptionalJson(taskDir, fileName) {
|
|
616
|
+
const filePath = path.join(taskDir, fileName);
|
|
617
|
+
if (!fs.existsSync(filePath)) {
|
|
618
|
+
return null;
|
|
619
|
+
}
|
|
620
|
+
try {
|
|
621
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
622
|
+
} catch {
|
|
623
|
+
return null;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
main();
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const packageRoot = path.resolve(path.dirname(__filename), '..');
|
|
8
|
+
const fix = process.argv.includes('--fix');
|
|
9
|
+
const issues = [];
|
|
10
|
+
const textExtensions = new Set(['.json', '.md', '.mjs', '.ts', '.yaml', '.yml']);
|
|
11
|
+
const ignoredDirs = new Set(['node_modules', '.git', '.yarn']);
|
|
12
|
+
const openWorkMarkerPattern = new RegExp(`\\b(${['TO', 'DO'].join('')}|${['FIX', 'ME'].join('')})\\b`);
|
|
13
|
+
const localAbsolutePathPattern = new RegExp(`/${'Users'}/|/${'Applications'}/`);
|
|
14
|
+
const projectSpecificPattern = new RegExp([
|
|
15
|
+
String.raw`\b${['apps', 'panel'].join('/')}\b`,
|
|
16
|
+
['Leak', 'Engine'].join(''),
|
|
17
|
+
['Delivery', 'Os'].join(''),
|
|
18
|
+
['Personal', 'Blog'].join(''),
|
|
19
|
+
].join('|'));
|
|
20
|
+
|
|
21
|
+
for (const filePath of walk(packageRoot)) {
|
|
22
|
+
const relativePath = path.relative(packageRoot, filePath);
|
|
23
|
+
if (path.basename(filePath) === '.DS_Store') {
|
|
24
|
+
issues.push(`${relativePath}: remove .DS_Store from package files`);
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
if (!textExtensions.has(path.extname(filePath))) {
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
checkTextFile(filePath, relativePath);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (issues.length > 0) {
|
|
34
|
+
console.error(`ops-framework lint failed (${issues.length} issue${issues.length === 1 ? '' : 's'}):`);
|
|
35
|
+
for (const issue of issues) {
|
|
36
|
+
console.error(`- ${issue}`);
|
|
37
|
+
}
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
console.log(`ops-framework lint ${fix ? 'fixed and ' : ''}passed`);
|
|
42
|
+
|
|
43
|
+
function checkTextFile(filePath, relativePath) {
|
|
44
|
+
const original = fs.readFileSync(filePath, 'utf8');
|
|
45
|
+
let content = original;
|
|
46
|
+
|
|
47
|
+
if (openWorkMarkerPattern.test(content)) {
|
|
48
|
+
issues.push(`${relativePath}: contains open-work marker`);
|
|
49
|
+
}
|
|
50
|
+
if (localAbsolutePathPattern.test(content)) {
|
|
51
|
+
issues.push(`${relativePath}: contains local absolute path`);
|
|
52
|
+
}
|
|
53
|
+
if (projectSpecificPattern.test(content)) {
|
|
54
|
+
issues.push(`${relativePath}: contains project-specific path or name`);
|
|
55
|
+
}
|
|
56
|
+
if (/[ \t]+$/m.test(content)) {
|
|
57
|
+
issues.push(`${relativePath}: contains trailing whitespace`);
|
|
58
|
+
if (fix) {
|
|
59
|
+
content = content.replace(/[ \t]+$/gm, '');
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
if (content && !content.endsWith('\n')) {
|
|
63
|
+
issues.push(`${relativePath}: missing final newline`);
|
|
64
|
+
if (fix) {
|
|
65
|
+
content = `${content}\n`;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (fix && content !== original) {
|
|
70
|
+
fs.writeFileSync(filePath, content);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function* walk(dir) {
|
|
75
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
76
|
+
if (ignoredDirs.has(entry.name)) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
const filePath = path.join(dir, entry.name);
|
|
80
|
+
if (entry.isDirectory()) {
|
|
81
|
+
yield* walk(filePath);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (entry.isFile()) {
|
|
85
|
+
yield filePath;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|