@besales/ops-framework 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/bin/lib/check-context-utils.mjs +3 -0
- package/bin/run-verify.mjs +123 -17
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.28
|
|
4
|
+
|
|
5
|
+
- Added Verify reuse guard: a passing `verify.result.json` is reused when `plan.md` and `execution.md` hashes are unchanged, unless `--force` is passed.
|
|
6
|
+
- Updated status after external Verify pass/fail/return verdicts so Supervisor routing does not keep asking for stale Verify reruns.
|
|
7
|
+
- Routed passing Verify results to retrospective/learning closeout instead of repeated verifier loops.
|
|
8
|
+
|
|
9
|
+
## 0.1.27
|
|
10
|
+
|
|
11
|
+
- Updated internal Verify closeout behavior so `status.md` moves to retrospective/learning closeout instead of continuing to ask for external Verify.
|
|
12
|
+
- Removed the default non-blocking "no independent verifier" finding from internal Verify for bounded local slices; external Verify remains an explicit escalation choice.
|
|
13
|
+
- Added status updater support for latest Verify verdict/result and expected outcome sections.
|
|
14
|
+
|
|
3
15
|
## 0.1.26
|
|
4
16
|
|
|
5
17
|
- Raised `standard_plus` context cap to 36k so compact Verify packs around 29-31k estimated tokens stay out of strict mode after safety margin.
|
|
@@ -431,7 +431,10 @@ export function updateStatus(taskDir, updates) {
|
|
|
431
431
|
['humanGateSummary', 'Human Gate summary'],
|
|
432
432
|
['feedbackIntake', 'Feedback intake'],
|
|
433
433
|
['executionFeedback', 'Execution feedback intake'],
|
|
434
|
+
['verifyVerdict', 'Latest verify verdict'],
|
|
435
|
+
['verifyResult', 'Latest verify result'],
|
|
434
436
|
['supervisorAction', 'Последнее действие supervisor'],
|
|
437
|
+
['expectedOutcome', 'Что ожидается на выходе текущего этапа'],
|
|
435
438
|
['nextStep', 'Следующий шаг'],
|
|
436
439
|
['humanApproval', 'Нужен ли сейчас human approval'],
|
|
437
440
|
]);
|
package/bin/run-verify.mjs
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
resolveConfigValue,
|
|
13
13
|
resolveTaskDir,
|
|
14
14
|
sha256,
|
|
15
|
+
updateStatus,
|
|
15
16
|
validateExecutionEvidenceForPlan,
|
|
16
17
|
writeTaskFile,
|
|
17
18
|
} from './lib/check-context-utils.mjs';
|
|
@@ -45,6 +46,7 @@ async function runMain() {
|
|
|
45
46
|
const taskDir = resolveTaskDir(taskArg);
|
|
46
47
|
const taskId = path.basename(taskDir);
|
|
47
48
|
const verifierConfig = resolveVerifierConfig(args);
|
|
49
|
+
const force = getFlag(args, 'force', false) === true;
|
|
48
50
|
const runStartedAt = new Date();
|
|
49
51
|
appendVerifyTimeline(taskDir, {
|
|
50
52
|
event: 'verify_started',
|
|
@@ -84,6 +86,25 @@ async function runMain() {
|
|
|
84
86
|
return;
|
|
85
87
|
}
|
|
86
88
|
|
|
89
|
+
const reusableVerify = readReusableVerifyResult({ taskDir, planSha, executionSha });
|
|
90
|
+
if (reusableVerify && !force) {
|
|
91
|
+
updateStatusForVerifyResult(taskDir, reusableVerify, {
|
|
92
|
+
reused: true,
|
|
93
|
+
verifierMode: reusableVerify.verificationMode || verifierConfig.mode,
|
|
94
|
+
});
|
|
95
|
+
appendVerifyTimeline(taskDir, {
|
|
96
|
+
event: 'verify_reused',
|
|
97
|
+
verdict: reusableVerify.verdict,
|
|
98
|
+
verifierRunId: reusableVerify.verifierRunId || null,
|
|
99
|
+
timing: buildTiming(runStartedAt),
|
|
100
|
+
});
|
|
101
|
+
appendOrchestrationLog(taskDir, `verify result reused; verdict=${reusableVerify.verdict}; plan/execution unchanged; use --force to rerun`);
|
|
102
|
+
console.log(`Verifier result reused for ${taskId}: ${reusableVerify.verdict}`);
|
|
103
|
+
console.log('- reason: plan.md and execution.md hashes match existing passing verify.result.json');
|
|
104
|
+
console.log('- use --force to rerun Verify anyway');
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
87
108
|
if (verifierConfig.mode === 'internal_supervisor') {
|
|
88
109
|
writeInternalSupervisorVerify({
|
|
89
110
|
taskDir,
|
|
@@ -267,6 +288,10 @@ async function runMain() {
|
|
|
267
288
|
|
|
268
289
|
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
269
290
|
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(verifyResultJson, null, 2));
|
|
291
|
+
updateStatusForVerifyResult(taskDir, verifyResultJson, {
|
|
292
|
+
reused: false,
|
|
293
|
+
verifierMode: 'external_cli',
|
|
294
|
+
});
|
|
270
295
|
if (finalPack) {
|
|
271
296
|
recordLlmInputUsage({
|
|
272
297
|
taskDir,
|
|
@@ -302,6 +327,85 @@ function buildTiming(startedAt, completedAt = new Date()) {
|
|
|
302
327
|
};
|
|
303
328
|
}
|
|
304
329
|
|
|
330
|
+
function readReusableVerifyResult({ taskDir, planSha, executionSha }) {
|
|
331
|
+
const result = readOptionalJson(taskDir, 'verify.result.json');
|
|
332
|
+
if (!result || typeof result !== 'object' || Array.isArray(result)) {
|
|
333
|
+
return null;
|
|
334
|
+
}
|
|
335
|
+
if (!['pass', 'pass_with_notes'].includes(String(result.verdict || '').toLowerCase())) {
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
if (result.planSha !== planSha || result.executionSha !== executionSha) {
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
341
|
+
if (result.readyForRetrospective !== true) {
|
|
342
|
+
return null;
|
|
343
|
+
}
|
|
344
|
+
return result;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function updateStatusForVerifyResult(taskDir, result, { reused = false, verifierMode = null } = {}) {
|
|
348
|
+
const verdict = String(result?.verdict || 'unknown').toLowerCase();
|
|
349
|
+
const mode = verifierMode || result?.verificationMode || 'unknown';
|
|
350
|
+
const commonResult = [
|
|
351
|
+
`- \`verify.result.json\`: \`${verdict}\``,
|
|
352
|
+
`- Verification mode: \`${mode}\``,
|
|
353
|
+
`- Ready for retrospective: \`${Boolean(result?.readyForRetrospective)}\``,
|
|
354
|
+
reused ? '- Reused: `true` (plan/execution unchanged)' : null,
|
|
355
|
+
].filter(Boolean).join('\n');
|
|
356
|
+
|
|
357
|
+
if (['pass', 'pass_with_notes'].includes(verdict)) {
|
|
358
|
+
updateStatus(taskDir, {
|
|
359
|
+
stage: 'Verify complete',
|
|
360
|
+
verifyVerdict: `\`${verdict}\``,
|
|
361
|
+
verifyResult: commonResult,
|
|
362
|
+
supervisorAction: reused
|
|
363
|
+
? 'Existing passing Verify result reused because plan/execution hashes are unchanged.'
|
|
364
|
+
: 'Verify completed with sufficient evidence for this slice.',
|
|
365
|
+
expectedOutcome: 'Retrospective, learning closeout, then task closeout.',
|
|
366
|
+
humanApproval: 'no',
|
|
367
|
+
nextStep: 'Run learning closeout / retrospective. Do not rerun Verify unless plan/execution changes or explicit --force/human escalation applies.',
|
|
368
|
+
});
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (verdict === 'return_to_execute') {
|
|
373
|
+
updateStatus(taskDir, {
|
|
374
|
+
stage: 'Execute',
|
|
375
|
+
verifyVerdict: '`return_to_execute`',
|
|
376
|
+
verifyResult: commonResult,
|
|
377
|
+
supervisorAction: 'Verify returned blocking execution findings.',
|
|
378
|
+
expectedOutcome: 'Address blocking Verify findings, update execution evidence, then rerun Verify.',
|
|
379
|
+
humanApproval: 'no',
|
|
380
|
+
nextStep: 'Return to Execute and fix the blocking Verify findings before another Verify run.',
|
|
381
|
+
});
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (verdict === 'return_to_plan') {
|
|
386
|
+
updateStatus(taskDir, {
|
|
387
|
+
stage: 'Plan',
|
|
388
|
+
verifyVerdict: '`return_to_plan`',
|
|
389
|
+
verifyResult: commonResult,
|
|
390
|
+
supervisorAction: 'Verify found plan/scope mismatch.',
|
|
391
|
+
expectedOutcome: 'Revise plan/check artifacts before Execute continues.',
|
|
392
|
+
humanApproval: 'maybe',
|
|
393
|
+
nextStep: 'Return to Plan and resolve the Verify findings before Execute continues.',
|
|
394
|
+
});
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
updateStatus(taskDir, {
|
|
399
|
+
stage: 'Verify',
|
|
400
|
+
verifyVerdict: `\`${verdict}\``,
|
|
401
|
+
verifyResult: commonResult,
|
|
402
|
+
supervisorAction: 'Verify did not produce a passing result.',
|
|
403
|
+
expectedOutcome: 'Resolve verifier failure or request human decision.',
|
|
404
|
+
humanApproval: 'maybe',
|
|
405
|
+
nextStep: 'Resolve Verify result before continuing.',
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
305
409
|
function appendVerifyTimeline(taskDir, event) {
|
|
306
410
|
const timelinePath = path.join(taskDir, 'verify-timeline.json');
|
|
307
411
|
let existing = [];
|
|
@@ -532,29 +636,27 @@ function writeInternalSupervisorVerify({
|
|
|
532
636
|
readyForRetrospective: true,
|
|
533
637
|
counts: {
|
|
534
638
|
blockingFindings: 0,
|
|
535
|
-
nonBlockingFindings:
|
|
639
|
+
nonBlockingFindings: 0,
|
|
536
640
|
questions: 0,
|
|
537
641
|
},
|
|
538
|
-
findings: [
|
|
539
|
-
{
|
|
540
|
-
id: 'V-001',
|
|
541
|
-
severity: 'non_blocking',
|
|
542
|
-
claimCategory: 'insufficient_evidence',
|
|
543
|
-
affectedArtifacts: ['verify.md', 'verify.result.json'],
|
|
544
|
-
claim: 'Verify used internal supervisor mode, so no independent fresh-context verifier reviewed this execution.',
|
|
545
|
-
evidenceRefs: [
|
|
546
|
-
{
|
|
547
|
-
type: 'config',
|
|
548
|
-
ref: `verifier.mode=${verifierConfig.mode}`,
|
|
549
|
-
},
|
|
550
|
-
],
|
|
551
|
-
expectedCorrection: 'Run `corepack yarn agent:run-verify <TASK> --verify-mode external_cli` if the task risk or human decision requires independent verification.',
|
|
552
|
-
},
|
|
553
|
-
],
|
|
642
|
+
findings: [],
|
|
554
643
|
};
|
|
555
644
|
|
|
556
645
|
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
557
646
|
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
647
|
+
updateStatus(taskDir, {
|
|
648
|
+
stage: 'Verify complete',
|
|
649
|
+
verifyVerdict: '`pass_with_notes`',
|
|
650
|
+
verifyResult: [
|
|
651
|
+
'- `verify.result.json`: `pass_with_notes`',
|
|
652
|
+
'- Verification mode: `internal_supervisor`',
|
|
653
|
+
'- Ready for retrospective: `true`',
|
|
654
|
+
].join('\n'),
|
|
655
|
+
supervisorAction: 'Internal Verify completed with sufficient local evidence for this bounded slice.',
|
|
656
|
+
expectedOutcome: 'Retrospective, learning closeout, then task closeout.',
|
|
657
|
+
humanApproval: 'no',
|
|
658
|
+
nextStep: 'Run learning closeout / retrospective. Do not run external Verify unless R4/R5, production/real-data/destructive/security/financial/broad-risk work, or explicit human request applies.',
|
|
659
|
+
});
|
|
558
660
|
appendOrchestrationLog(taskDir, `internal supervisor verifier completed; verdict=${result.verdict}; runId=${verifierRunId}`);
|
|
559
661
|
}
|
|
560
662
|
|
|
@@ -704,6 +806,10 @@ function writeVerifierFailure({
|
|
|
704
806
|
};
|
|
705
807
|
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
706
808
|
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
809
|
+
updateStatusForVerifyResult(taskDir, result, {
|
|
810
|
+
reused: false,
|
|
811
|
+
verifierMode: 'external_cli',
|
|
812
|
+
});
|
|
707
813
|
appendOrchestrationLog(taskDir, `external CLI verifier failed via ${verifierConfig.provider}; failureReason=${failureReason}; runId=${verifierRunId}`);
|
|
708
814
|
}
|
|
709
815
|
|