@besales/ops-framework 0.1.18 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/initiative.mjs +61 -0
- package/bin/initiative.test.mjs +114 -0
- package/bin/lib/check-context-utils.mjs +251 -2
- package/bin/lib/check-context-utils.test.mjs +260 -0
- package/bin/lib/execution-ledger-utils.mjs +22 -2
- package/bin/lib/execution-ledger-utils.test.mjs +60 -0
- package/bin/lib/llm-input-pack-utils.mjs +89 -19
- package/bin/lib/llm-input-pack-utils.test.mjs +11 -4
- package/bin/lib/task-manifest-utils.test.mjs +22 -0
- package/bin/run-check.mjs +92 -0
- package/bin/run-verify.mjs +106 -0
- package/package.json +1 -1
package/bin/run-check.mjs
CHANGED
|
@@ -66,6 +66,13 @@ async function runMain() {
|
|
|
66
66
|
const noCache = getFlag(args, 'no-cache', false) === true;
|
|
67
67
|
const checkerConfig = resolveCheckerConfig(args);
|
|
68
68
|
const runStartedAt = new Date();
|
|
69
|
+
appendCheckTimeline(taskDir, {
|
|
70
|
+
event: 'check_started',
|
|
71
|
+
provider: checkerConfig.provider,
|
|
72
|
+
model: checkerConfig.model,
|
|
73
|
+
noCache,
|
|
74
|
+
dryRun,
|
|
75
|
+
});
|
|
69
76
|
|
|
70
77
|
let checkContext = ensureFreshCheckContext(taskDir, taskId);
|
|
71
78
|
const deterministicPrecheck = syncManifestAndStatusBeforeCheck({ taskDir, taskId, checkContext });
|
|
@@ -79,6 +86,12 @@ async function runMain() {
|
|
|
79
86
|
issues: deterministicPrecheck.issues,
|
|
80
87
|
startedAt: runStartedAt,
|
|
81
88
|
});
|
|
89
|
+
appendCheckTimeline(taskDir, {
|
|
90
|
+
event: 'deterministic_precheck_blocked',
|
|
91
|
+
verdict: 'return_to_plan',
|
|
92
|
+
issues: deterministicPrecheck.issues.map((issue) => issue.message),
|
|
93
|
+
timing: buildTiming(runStartedAt),
|
|
94
|
+
});
|
|
82
95
|
runValidator(taskArg);
|
|
83
96
|
console.log(`Checker preflight blocked ${taskId}: return_to_plan`);
|
|
84
97
|
console.log(`- deterministicIssues: ${deterministicPrecheck.issues.length}`);
|
|
@@ -146,6 +159,13 @@ async function runMain() {
|
|
|
146
159
|
cacheKey,
|
|
147
160
|
contextMode,
|
|
148
161
|
});
|
|
162
|
+
appendCheckTimeline(taskDir, {
|
|
163
|
+
event: 'llm_input_built',
|
|
164
|
+
contextMode,
|
|
165
|
+
cacheKeySha,
|
|
166
|
+
packMeta: promptPayload.pack.meta,
|
|
167
|
+
timing: buildTiming(runStartedAt),
|
|
168
|
+
});
|
|
149
169
|
|
|
150
170
|
console.log(`Checker LLM input for ${taskId}`);
|
|
151
171
|
for (const line of summarizePackForConsole(promptPayload.pack)) {
|
|
@@ -169,6 +189,13 @@ async function runMain() {
|
|
|
169
189
|
message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
|
|
170
190
|
rawOutput: null,
|
|
171
191
|
});
|
|
192
|
+
appendCheckTimeline(taskDir, {
|
|
193
|
+
event: 'context_overflow',
|
|
194
|
+
contextMode,
|
|
195
|
+
cacheKeySha,
|
|
196
|
+
packMeta: promptPayload.pack.meta,
|
|
197
|
+
timing: buildTiming(runStartedAt),
|
|
198
|
+
});
|
|
172
199
|
recordLlmInputUsage({
|
|
173
200
|
taskDir,
|
|
174
201
|
stage: 'check',
|
|
@@ -184,6 +211,13 @@ async function runMain() {
|
|
|
184
211
|
|
|
185
212
|
if (!noCache && restoreFromCache({ taskDir, taskArg, cacheKeySha })) {
|
|
186
213
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'cache_hit'));
|
|
214
|
+
appendCheckTimeline(taskDir, {
|
|
215
|
+
event: 'cache_hit',
|
|
216
|
+
contextMode,
|
|
217
|
+
cacheKeySha,
|
|
218
|
+
packMeta: promptPayload.pack.meta,
|
|
219
|
+
timing: buildTiming(runStartedAt),
|
|
220
|
+
});
|
|
187
221
|
recordLlmInputUsage({
|
|
188
222
|
taskDir,
|
|
189
223
|
stage: 'check',
|
|
@@ -197,11 +231,32 @@ async function runMain() {
|
|
|
197
231
|
}
|
|
198
232
|
|
|
199
233
|
try {
|
|
234
|
+
const providerStartedAt = new Date();
|
|
235
|
+
appendCheckTimeline(taskDir, {
|
|
236
|
+
event: 'provider_started',
|
|
237
|
+
provider: checkerConfig.provider,
|
|
238
|
+
model: checkerConfig.model,
|
|
239
|
+
reasoningEffort: checkerConfig.reasoningEffort,
|
|
240
|
+
contextMode,
|
|
241
|
+
cacheKeySha,
|
|
242
|
+
packMeta: promptPayload.pack.meta,
|
|
243
|
+
timing: buildTiming(runStartedAt),
|
|
244
|
+
});
|
|
200
245
|
providerOutput = await runProvider({
|
|
201
246
|
checkerConfig,
|
|
202
247
|
messages: promptPayload.messages,
|
|
203
248
|
prompt: promptPayload.prompt,
|
|
204
249
|
});
|
|
250
|
+
appendCheckTimeline(taskDir, {
|
|
251
|
+
event: 'provider_completed',
|
|
252
|
+
provider: checkerConfig.provider,
|
|
253
|
+
model: checkerConfig.model,
|
|
254
|
+
contextMode,
|
|
255
|
+
cacheKeySha,
|
|
256
|
+
verdict: providerOutput.checkResultJson?.verdict || null,
|
|
257
|
+
providerTiming: buildTiming(providerStartedAt),
|
|
258
|
+
timing: buildTiming(runStartedAt),
|
|
259
|
+
});
|
|
205
260
|
} catch (error) {
|
|
206
261
|
const failureReason = error.failureReason || 'unknown';
|
|
207
262
|
writeFailureArtifacts({
|
|
@@ -215,6 +270,16 @@ async function runMain() {
|
|
|
215
270
|
message: error.message,
|
|
216
271
|
rawOutput: error.rawOutput || null,
|
|
217
272
|
});
|
|
273
|
+
appendCheckTimeline(taskDir, {
|
|
274
|
+
event: 'provider_failed',
|
|
275
|
+
provider: checkerConfig.provider,
|
|
276
|
+
model: checkerConfig.model,
|
|
277
|
+
contextMode,
|
|
278
|
+
cacheKeySha,
|
|
279
|
+
failureReason,
|
|
280
|
+
message: error.message,
|
|
281
|
+
timing: buildTiming(runStartedAt),
|
|
282
|
+
});
|
|
218
283
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${failureReason}`));
|
|
219
284
|
recordLlmInputUsage({
|
|
220
285
|
taskDir,
|
|
@@ -246,6 +311,13 @@ async function runMain() {
|
|
|
246
311
|
cacheKey,
|
|
247
312
|
providerOutput,
|
|
248
313
|
});
|
|
314
|
+
appendCheckTimeline(taskDir, {
|
|
315
|
+
event: 'check_completed',
|
|
316
|
+
verdict: providerOutput.checkResultJson?.verdict || null,
|
|
317
|
+
contextMode: promptPayload.pack.meta.mode,
|
|
318
|
+
cacheKeySha,
|
|
319
|
+
timing: buildTiming(runStartedAt),
|
|
320
|
+
});
|
|
249
321
|
if (!isContextInsufficientResult(providerOutput.checkResultJson)) {
|
|
250
322
|
storeInCache({ taskDir, cacheKeySha });
|
|
251
323
|
}
|
|
@@ -263,6 +335,26 @@ async function runMain() {
|
|
|
263
335
|
console.log(`- finalEstimatedInputTokens: ${promptPayload.pack.meta.estimatedTokens}`);
|
|
264
336
|
}
|
|
265
337
|
|
|
338
|
+
function appendCheckTimeline(taskDir, event) {
|
|
339
|
+
const timelinePath = path.join(taskDir, 'check-timeline.json');
|
|
340
|
+
let existing = [];
|
|
341
|
+
if (fs.existsSync(timelinePath)) {
|
|
342
|
+
try {
|
|
343
|
+
const parsed = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
|
|
344
|
+
if (Array.isArray(parsed)) {
|
|
345
|
+
existing = parsed;
|
|
346
|
+
}
|
|
347
|
+
} catch {
|
|
348
|
+
existing = [];
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
existing.push({
|
|
352
|
+
at: new Date().toISOString(),
|
|
353
|
+
...event,
|
|
354
|
+
});
|
|
355
|
+
writeTaskFile(taskDir, 'check-timeline.json', JSON.stringify(existing, null, 2));
|
|
356
|
+
}
|
|
357
|
+
|
|
266
358
|
function buildAttemptRecord(packMeta, outcome) {
|
|
267
359
|
return {
|
|
268
360
|
mode: packMeta.mode,
|
package/bin/run-verify.mjs
CHANGED
|
@@ -45,6 +45,13 @@ async function runMain() {
|
|
|
45
45
|
const taskDir = resolveTaskDir(taskArg);
|
|
46
46
|
const taskId = path.basename(taskDir);
|
|
47
47
|
const verifierConfig = resolveVerifierConfig(args);
|
|
48
|
+
const runStartedAt = new Date();
|
|
49
|
+
appendVerifyTimeline(taskDir, {
|
|
50
|
+
event: 'verify_started',
|
|
51
|
+
mode: verifierConfig.mode,
|
|
52
|
+
provider: verifierConfig.provider,
|
|
53
|
+
model: verifierConfig.model,
|
|
54
|
+
});
|
|
48
55
|
const planSha = hashTaskMarkdown(taskDir, 'plan.md');
|
|
49
56
|
const executionSha = hashTaskMarkdown(taskDir, 'execution.md');
|
|
50
57
|
const taskManifest = readOptionalJson(taskDir, 'task-manifest.json');
|
|
@@ -66,6 +73,12 @@ async function runMain() {
|
|
|
66
73
|
executionSha,
|
|
67
74
|
evidenceIssues,
|
|
68
75
|
});
|
|
76
|
+
appendVerifyTimeline(taskDir, {
|
|
77
|
+
event: 'deterministic_preverify_blocked',
|
|
78
|
+
verdict: 'return_to_execute',
|
|
79
|
+
issues: evidenceIssues.map((issue) => issue.message),
|
|
80
|
+
timing: buildTiming(runStartedAt),
|
|
81
|
+
});
|
|
69
82
|
console.log(`Verifier preflight blocked ${taskId}: return_to_execute`);
|
|
70
83
|
console.log(`- evidenceIssues: ${evidenceIssues.length}`);
|
|
71
84
|
return;
|
|
@@ -79,6 +92,11 @@ async function runMain() {
|
|
|
79
92
|
planSha,
|
|
80
93
|
executionSha,
|
|
81
94
|
});
|
|
95
|
+
appendVerifyTimeline(taskDir, {
|
|
96
|
+
event: 'internal_supervisor_completed',
|
|
97
|
+
verdict: 'pass_with_notes',
|
|
98
|
+
timing: buildTiming(runStartedAt),
|
|
99
|
+
});
|
|
82
100
|
console.log(`Internal supervisor Verify artifact written for ${taskId}: pass_with_notes`);
|
|
83
101
|
return;
|
|
84
102
|
}
|
|
@@ -109,12 +127,26 @@ async function runMain() {
|
|
|
109
127
|
contextMode,
|
|
110
128
|
});
|
|
111
129
|
finalPack = promptPayload.pack;
|
|
130
|
+
appendVerifyTimeline(taskDir, {
|
|
131
|
+
event: 'llm_input_built',
|
|
132
|
+
contextMode,
|
|
133
|
+
verifierRunId,
|
|
134
|
+
packMeta: promptPayload.pack.meta,
|
|
135
|
+
timing: buildTiming(runStartedAt),
|
|
136
|
+
});
|
|
112
137
|
console.log(`Verifier LLM input for ${taskId}`);
|
|
113
138
|
for (const line of summarizePackForConsole(promptPayload.pack)) {
|
|
114
139
|
console.log(line);
|
|
115
140
|
}
|
|
116
141
|
if (promptPayload.pack.meta.overCap && contextMode !== 'strict') {
|
|
117
142
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'skipped_over_cap'));
|
|
143
|
+
appendVerifyTimeline(taskDir, {
|
|
144
|
+
event: 'llm_input_over_cap_escalating',
|
|
145
|
+
contextMode,
|
|
146
|
+
verifierRunId,
|
|
147
|
+
packMeta: promptPayload.pack.meta,
|
|
148
|
+
timing: buildTiming(runStartedAt),
|
|
149
|
+
});
|
|
118
150
|
appendOrchestrationLog(taskDir, `verifier LLM input exceeded ${contextMode} cap; rerunning pack builder with expanded context`);
|
|
119
151
|
continue;
|
|
120
152
|
}
|
|
@@ -131,6 +163,13 @@ async function runMain() {
|
|
|
131
163
|
message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
|
|
132
164
|
rawOutput: null,
|
|
133
165
|
});
|
|
166
|
+
appendVerifyTimeline(taskDir, {
|
|
167
|
+
event: 'context_overflow',
|
|
168
|
+
contextMode,
|
|
169
|
+
verifierRunId,
|
|
170
|
+
packMeta: promptPayload.pack.meta,
|
|
171
|
+
timing: buildTiming(runStartedAt),
|
|
172
|
+
});
|
|
134
173
|
recordLlmInputUsage({
|
|
135
174
|
taskDir,
|
|
136
175
|
stage: 'verify',
|
|
@@ -143,6 +182,17 @@ async function runMain() {
|
|
|
143
182
|
}
|
|
144
183
|
|
|
145
184
|
try {
|
|
185
|
+
const providerStartedAt = new Date();
|
|
186
|
+
appendVerifyTimeline(taskDir, {
|
|
187
|
+
event: 'provider_started',
|
|
188
|
+
provider: verifierConfig.provider,
|
|
189
|
+
model: verifierConfig.model,
|
|
190
|
+
reasoningEffort: verifierConfig.reasoningEffort,
|
|
191
|
+
contextMode,
|
|
192
|
+
verifierRunId,
|
|
193
|
+
packMeta: promptPayload.pack.meta,
|
|
194
|
+
timing: buildTiming(runStartedAt),
|
|
195
|
+
});
|
|
146
196
|
output = await runExternalCliChecker({
|
|
147
197
|
providerName: verifierConfig.provider,
|
|
148
198
|
providerConfig: verifierConfig.providerConfig,
|
|
@@ -151,6 +201,15 @@ async function runMain() {
|
|
|
151
201
|
prompt: promptPayload.prompt,
|
|
152
202
|
cwd: repoRoot,
|
|
153
203
|
});
|
|
204
|
+
appendVerifyTimeline(taskDir, {
|
|
205
|
+
event: 'provider_completed',
|
|
206
|
+
provider: verifierConfig.provider,
|
|
207
|
+
model: verifierConfig.model,
|
|
208
|
+
contextMode,
|
|
209
|
+
verifierRunId,
|
|
210
|
+
providerTiming: buildTiming(providerStartedAt),
|
|
211
|
+
timing: buildTiming(runStartedAt),
|
|
212
|
+
});
|
|
154
213
|
} catch (error) {
|
|
155
214
|
writeVerifierFailure({
|
|
156
215
|
taskDir,
|
|
@@ -163,6 +222,16 @@ async function runMain() {
|
|
|
163
222
|
message: error.message,
|
|
164
223
|
rawOutput: error.rawOutput || null,
|
|
165
224
|
});
|
|
225
|
+
appendVerifyTimeline(taskDir, {
|
|
226
|
+
event: 'provider_failed',
|
|
227
|
+
provider: verifierConfig.provider,
|
|
228
|
+
model: verifierConfig.model,
|
|
229
|
+
contextMode,
|
|
230
|
+
verifierRunId,
|
|
231
|
+
failureReason: error.failureReason || 'unknown',
|
|
232
|
+
message: error.message,
|
|
233
|
+
timing: buildTiming(runStartedAt),
|
|
234
|
+
});
|
|
166
235
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${error.failureReason || 'unknown'}`));
|
|
167
236
|
recordLlmInputUsage({
|
|
168
237
|
taskDir,
|
|
@@ -205,8 +274,17 @@ async function runMain() {
|
|
|
205
274
|
packMeta: finalPack.meta,
|
|
206
275
|
attempts: llmInputAttempts,
|
|
207
276
|
rerunCount,
|
|
277
|
+
timing: buildTiming(runStartedAt),
|
|
208
278
|
});
|
|
209
279
|
}
|
|
280
|
+
appendVerifyTimeline(taskDir, {
|
|
281
|
+
event: 'verify_completed',
|
|
282
|
+
verdict: verifyResultJson.verdict,
|
|
283
|
+
verifierRunId,
|
|
284
|
+
finalMode: finalPack?.meta?.mode || null,
|
|
285
|
+
finalEstimatedTokens: finalPack?.meta?.estimatedTokens || null,
|
|
286
|
+
timing: buildTiming(runStartedAt),
|
|
287
|
+
});
|
|
210
288
|
appendOrchestrationLog(taskDir, `external CLI verifier completed via ${verifierConfig.provider}; verdict=${verifyResultJson.verdict}; runId=${verifierRunId}`);
|
|
211
289
|
console.log(`Verifier run completed for ${taskId}: ${verifyResultJson.verdict}`);
|
|
212
290
|
console.log(`- verifierRunId: ${verifierRunId}`);
|
|
@@ -216,6 +294,34 @@ async function runMain() {
|
|
|
216
294
|
}
|
|
217
295
|
}
|
|
218
296
|
|
|
297
|
+
function buildTiming(startedAt, completedAt = new Date()) {
|
|
298
|
+
return {
|
|
299
|
+
startedAt: startedAt.toISOString(),
|
|
300
|
+
completedAt: completedAt.toISOString(),
|
|
301
|
+
durationMs: Math.max(0, completedAt.getTime() - startedAt.getTime()),
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function appendVerifyTimeline(taskDir, event) {
|
|
306
|
+
const timelinePath = path.join(taskDir, 'verify-timeline.json');
|
|
307
|
+
let existing = [];
|
|
308
|
+
if (fs.existsSync(timelinePath)) {
|
|
309
|
+
try {
|
|
310
|
+
const parsed = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
|
|
311
|
+
if (Array.isArray(parsed)) {
|
|
312
|
+
existing = parsed;
|
|
313
|
+
}
|
|
314
|
+
} catch {
|
|
315
|
+
existing = [];
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
existing.push({
|
|
319
|
+
at: new Date().toISOString(),
|
|
320
|
+
...event,
|
|
321
|
+
});
|
|
322
|
+
writeTaskFile(taskDir, 'verify-timeline.json', JSON.stringify(existing, null, 2));
|
|
323
|
+
}
|
|
324
|
+
|
|
219
325
|
function buildAttemptRecord(packMeta, outcome) {
|
|
220
326
|
return {
|
|
221
327
|
mode: packMeta.mode,
|