@besales/ops-framework 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/bin/lib/llm-input-pack-utils.mjs +4 -1
- package/bin/lib/llm-input-pack-utils.test.mjs +58 -0
- package/bin/run-check.mjs +80 -2
- package/bin/run-verify.mjs +107 -0
- package/package.json +1 -1
- package/prompts/checker.md +2 -0
- package/prompts/supervisor.md +2 -0
- package/prompts/verifier.md +2 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.29
|
|
4
|
+
|
|
5
|
+
- Added `precheck-remediation.md` for deterministic Check blocks so all missing plan gates are consolidated into one checklist before another external Check.
|
|
6
|
+
- Included precheck remediation artifacts in Checker and Verifier LLM input packs.
|
|
7
|
+
- Updated Checker/Verifier/Supervisor rules to avoid one-item precheck loops and route minor/tooling issues to notes or human decision when acceptance is covered.
|
|
8
|
+
|
|
9
|
+
## 0.1.28
|
|
10
|
+
|
|
11
|
+
- Added Verify reuse guard: a passing `verify.result.json` is reused when `plan.md` and `execution.md` hashes are unchanged, unless `--force` is passed.
|
|
12
|
+
- Updated status after external Verify pass/fail/return verdicts so Supervisor routing does not keep asking for stale Verify reruns.
|
|
13
|
+
- Routed passing Verify results to retrospective/learning closeout instead of repeated verifier loops.
|
|
14
|
+
|
|
3
15
|
## 0.1.27
|
|
4
16
|
|
|
5
17
|
- Updated internal Verify closeout behavior so `status.md` moves to retrospective/learning closeout instead of continuing to ask for external Verify.
|
|
@@ -199,12 +199,13 @@ export function buildCheckerLlmInputPack({
|
|
|
199
199
|
}) {
|
|
200
200
|
const selectedMode = normalizeLlmContextMode(mode) || 'standard';
|
|
201
201
|
const artifacts = selectedMode === 'strict'
|
|
202
|
-
? readArtifacts(taskDir, ['brief.md', 'research.md', 'plan.md', 'status.md', 'feedback.md', 'execution-feedback.md'], 'full')
|
|
202
|
+
? readArtifacts(taskDir, ['brief.md', 'research.md', 'plan.md', 'status.md', 'precheck-remediation.md', 'feedback.md', 'execution-feedback.md'], 'full')
|
|
203
203
|
: {
|
|
204
204
|
'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
|
|
205
205
|
'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo', 'architecture', 'standards']),
|
|
206
206
|
'plan.md': compactArtifact(taskDir, 'plan.md', selectedMode, CHECK_RELEVANT_SECTIONS),
|
|
207
207
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
208
|
+
'precheck-remediation.md': compactArtifact(taskDir, 'precheck-remediation.md', selectedMode, ['checklist', 'rerun rule', 'purpose']),
|
|
208
209
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
|
|
209
210
|
};
|
|
210
211
|
|
|
@@ -325,6 +326,7 @@ export function buildVerifierLlmInputPack({
|
|
|
325
326
|
checkResult: readOptionalJson(taskDir, 'check.result.json'),
|
|
326
327
|
mode: 'standard',
|
|
327
328
|
}),
|
|
329
|
+
'precheck-remediation.md': compactArtifact(taskDir, 'precheck-remediation.md', 'standard', ['checklist', 'rerun rule', 'purpose']),
|
|
328
330
|
'check-resolution.md': compactArtifact(taskDir, 'check-resolution.md', 'standard', ['structured resolution', 'root cause', 'resolution']),
|
|
329
331
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
|
|
330
332
|
'execution.md': readTaskFile(taskDir, 'execution.md'),
|
|
@@ -346,6 +348,7 @@ export function buildVerifierLlmInputPack({
|
|
|
346
348
|
checkResult: readOptionalJson(taskDir, 'check.result.json'),
|
|
347
349
|
mode: selectedMode,
|
|
348
350
|
}),
|
|
351
|
+
'precheck-remediation.md': compactArtifact(taskDir, 'precheck-remediation.md', selectedMode, ['checklist', 'rerun rule', 'purpose']),
|
|
349
352
|
'check-resolution.md': truncateMiddle(readTaskFile(taskDir, 'check-resolution.md'), charLimitForMode(selectedMode, 1500, 3500)),
|
|
350
353
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
|
|
351
354
|
'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
|
|
@@ -118,6 +118,42 @@ describe('llm input pack utilities', () => {
|
|
|
118
118
|
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
|
119
119
|
});
|
|
120
120
|
|
|
121
|
+
it('includes precheck remediation in checker packs when present', () => {
|
|
122
|
+
const taskDir = createTask();
|
|
123
|
+
write(taskDir, 'precheck-remediation.md', [
|
|
124
|
+
'# Precheck Remediation',
|
|
125
|
+
'',
|
|
126
|
+
'## Checklist',
|
|
127
|
+
'',
|
|
128
|
+
'- [ ] P-001: Optimization Strategy is missing.',
|
|
129
|
+
'',
|
|
130
|
+
'## Rerun Rule',
|
|
131
|
+
'',
|
|
132
|
+
'- Rerun Check only after every checklist item is addressed.',
|
|
133
|
+
].join('\n'));
|
|
134
|
+
|
|
135
|
+
const pack = buildCheckerLlmInputPack({
|
|
136
|
+
taskDir,
|
|
137
|
+
taskId: 'TASK-999-token-pack',
|
|
138
|
+
checkerPromptSha: 'sha256:test',
|
|
139
|
+
cacheKey: { test: true },
|
|
140
|
+
checkContext: {
|
|
141
|
+
planSha: 'sha256:plan',
|
|
142
|
+
memorySha: 'sha256:memory',
|
|
143
|
+
riskProfile: 'high',
|
|
144
|
+
riskTriggers: ['source-sync-provider'],
|
|
145
|
+
},
|
|
146
|
+
checkEvidence: '# Evidence\n\nok',
|
|
147
|
+
checkerContextPack: '# Checker Context Pack\n\nok',
|
|
148
|
+
taskManifest: '{}',
|
|
149
|
+
projectMemory: [],
|
|
150
|
+
mode: 'standard',
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
expect(pack.input.taskArtifacts['precheck-remediation.md']).toContain('Optimization Strategy is missing.');
|
|
154
|
+
expect(pack.input.taskArtifacts['precheck-remediation.md']).toContain('Rerun Check only after every checklist item is addressed.');
|
|
155
|
+
});
|
|
156
|
+
|
|
121
157
|
it('stabilizes checker task manifest by excluding volatile check telemetry', () => {
|
|
122
158
|
const taskDir = createTask();
|
|
123
159
|
const pack = buildCheckerLlmInputPack({
|
|
@@ -274,6 +310,28 @@ describe('llm input pack utilities', () => {
|
|
|
274
310
|
expect(pack.input.taskArtifacts['check.md']).toContain('Human question should remain visible.');
|
|
275
311
|
});
|
|
276
312
|
|
|
313
|
+
it('includes precheck remediation in verifier packs when present', () => {
|
|
314
|
+
const taskDir = createTask();
|
|
315
|
+
write(taskDir, 'precheck-remediation.md', [
|
|
316
|
+
'# Precheck Remediation',
|
|
317
|
+
'',
|
|
318
|
+
'## Checklist',
|
|
319
|
+
'',
|
|
320
|
+
'- [ ] P-001: Source Sync / Provider Gate is missing.',
|
|
321
|
+
].join('\n'));
|
|
322
|
+
|
|
323
|
+
const pack = buildVerifierLlmInputPack({
|
|
324
|
+
taskDir,
|
|
325
|
+
taskId: 'TASK-999-token-pack',
|
|
326
|
+
planSha: 'sha256:plan',
|
|
327
|
+
executionSha: 'sha256:execution',
|
|
328
|
+
verifier: { provider: 'test', model: 'test', reasoningEffort: 'none', runId: 'run' },
|
|
329
|
+
mode: 'standard',
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
expect(pack.input.taskArtifacts['precheck-remediation.md']).toContain('Source Sync / Provider Gate is missing.');
|
|
333
|
+
});
|
|
334
|
+
|
|
277
335
|
it('uses a conservative estimate for Cyrillic and JSON-heavy payloads', () => {
|
|
278
336
|
const value = JSON.stringify({
|
|
279
337
|
text: 'Проверка русскоязычного JSON payload с большим количеством кавычек и структурных символов.',
|
package/bin/run-check.mjs
CHANGED
|
@@ -462,6 +462,12 @@ function writeDeterministicPrecheckReturn({
|
|
|
462
462
|
readyForHumanGate: false,
|
|
463
463
|
createdAt: new Date().toISOString(),
|
|
464
464
|
};
|
|
465
|
+
const remediation = buildPrecheckRemediation({
|
|
466
|
+
taskId,
|
|
467
|
+
checkContext,
|
|
468
|
+
issues,
|
|
469
|
+
startedAt,
|
|
470
|
+
});
|
|
465
471
|
const markdown = [
|
|
466
472
|
'# Check',
|
|
467
473
|
'',
|
|
@@ -471,6 +477,10 @@ function writeDeterministicPrecheckReturn({
|
|
|
471
477
|
'',
|
|
472
478
|
'External checker was not invoked because machine-readable plan/context gates already found blocking issues.',
|
|
473
479
|
'',
|
|
480
|
+
'## Consolidated remediation',
|
|
481
|
+
'',
|
|
482
|
+
'Before rerunning Check, close the full checklist in `precheck-remediation.md`. Do not fix one item and immediately rerun Check while other listed items remain open.',
|
|
483
|
+
'',
|
|
474
484
|
'## structured findings',
|
|
475
485
|
'',
|
|
476
486
|
'| ID | Severity | Category | Claim | Expected correction |',
|
|
@@ -488,15 +498,16 @@ function writeDeterministicPrecheckReturn({
|
|
|
488
498
|
|
|
489
499
|
writeTaskFile(taskDir, 'check.md', markdown);
|
|
490
500
|
writeTaskFile(taskDir, 'check.result.json', JSON.stringify(result, null, 2));
|
|
501
|
+
writeTaskFile(taskDir, 'precheck-remediation.md', remediation.markdown);
|
|
491
502
|
updateStatus(taskDir, {
|
|
492
503
|
checkVerdict: '`return_to_plan`',
|
|
493
504
|
checkResult: '- `check.result.json`: current',
|
|
494
505
|
supervisorAction: 'Deterministic Check preflight blocked external checker invocation.',
|
|
495
|
-
nextStep: '
|
|
506
|
+
nextStep: 'Close every item in `precheck-remediation.md`, update plan/research/status once, then rerun Check.',
|
|
496
507
|
humanApproval: 'no',
|
|
497
508
|
});
|
|
498
509
|
ensureFreshCheckContext(taskDir, taskId);
|
|
499
|
-
appendOrchestrationLog(taskDir, `deterministic Check preflight returned return_to_plan; findings=${findings.length}; external checker skipped`);
|
|
510
|
+
appendOrchestrationLog(taskDir, `deterministic Check preflight returned return_to_plan; findings=${findings.length}; remediation=${remediation.issueSetSha}; external checker skipped`);
|
|
500
511
|
}
|
|
501
512
|
|
|
502
513
|
function expectedCorrectionForPrecheckIssue(issue) {
|
|
@@ -509,6 +520,73 @@ function expectedCorrectionForPrecheckIssue(issue) {
|
|
|
509
520
|
return 'Fix task-manifest/check-context consistency before external Check.';
|
|
510
521
|
}
|
|
511
522
|
|
|
523
|
+
function buildPrecheckRemediation({
|
|
524
|
+
taskId,
|
|
525
|
+
checkContext,
|
|
526
|
+
issues,
|
|
527
|
+
startedAt,
|
|
528
|
+
}) {
|
|
529
|
+
const issueSetSha = sha256Json(issues.map((issue) => ({
|
|
530
|
+
category: issue.category,
|
|
531
|
+
message: issue.message,
|
|
532
|
+
})));
|
|
533
|
+
const grouped = groupIssuesByCategory(issues);
|
|
534
|
+
const lines = [
|
|
535
|
+
'# Precheck Remediation',
|
|
536
|
+
'',
|
|
537
|
+
`Task: \`${taskId}\``,
|
|
538
|
+
`Issue set: \`${issueSetSha}\``,
|
|
539
|
+
`Plan SHA: \`${checkContext.planSha}\``,
|
|
540
|
+
`Memory SHA: \`${checkContext.memorySha}\``,
|
|
541
|
+
`Created at: \`${new Date().toISOString()}\``,
|
|
542
|
+
`Precheck duration: \`${buildTiming(startedAt).durationMs}ms\``,
|
|
543
|
+
'',
|
|
544
|
+
'## Purpose',
|
|
545
|
+
'',
|
|
546
|
+
'This artifact consolidates deterministic Check blockers so the plan can be fixed in one focused pass before another external Check.',
|
|
547
|
+
'',
|
|
548
|
+
'Do not rerun external Check while any checklist item below is still open. Update `plan.md`, `research.md`, or `status.md` once, then rerun Check after the whole set is addressed.',
|
|
549
|
+
'',
|
|
550
|
+
'## Checklist',
|
|
551
|
+
'',
|
|
552
|
+
];
|
|
553
|
+
|
|
554
|
+
let index = 1;
|
|
555
|
+
for (const [category, categoryIssues] of grouped.entries()) {
|
|
556
|
+
lines.push(`### ${category}`);
|
|
557
|
+
lines.push('');
|
|
558
|
+
for (const issue of categoryIssues) {
|
|
559
|
+
lines.push(`- [ ] P-${String(index).padStart(3, '0')}: ${issue.message}`);
|
|
560
|
+
lines.push(` - Expected correction: ${expectedCorrectionForPrecheckIssue(issue)}`);
|
|
561
|
+
index += 1;
|
|
562
|
+
}
|
|
563
|
+
lines.push('');
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
lines.push('## Rerun Rule');
|
|
567
|
+
lines.push('');
|
|
568
|
+
lines.push('- Rerun Check only after every checklist item is addressed or explicitly rejected with evidence/human decision.');
|
|
569
|
+
lines.push('- If the same issue set appears again, consolidate the remaining fixes instead of starting another one-item loop.');
|
|
570
|
+
lines.push('- If a listed item is not applicable, record the reason in `plan.md` or `status.md` before rerunning Check.');
|
|
571
|
+
lines.push('');
|
|
572
|
+
|
|
573
|
+
return {
|
|
574
|
+
issueSetSha,
|
|
575
|
+
markdown: lines.join('\n'),
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
function groupIssuesByCategory(issues) {
|
|
580
|
+
const grouped = new Map();
|
|
581
|
+
for (const issue of issues) {
|
|
582
|
+
const category = issue.category || 'unknown';
|
|
583
|
+
const existing = grouped.get(category) || [];
|
|
584
|
+
existing.push(issue);
|
|
585
|
+
grouped.set(category, existing);
|
|
586
|
+
}
|
|
587
|
+
return grouped;
|
|
588
|
+
}
|
|
589
|
+
|
|
512
590
|
function escapeTableCell(value) {
|
|
513
591
|
return String(value || '').replace(/\|/g, '\\|').replace(/\n/g, ' ').trim();
|
|
514
592
|
}
|
package/bin/run-verify.mjs
CHANGED
|
@@ -46,6 +46,7 @@ async function runMain() {
|
|
|
46
46
|
const taskDir = resolveTaskDir(taskArg);
|
|
47
47
|
const taskId = path.basename(taskDir);
|
|
48
48
|
const verifierConfig = resolveVerifierConfig(args);
|
|
49
|
+
const force = getFlag(args, 'force', false) === true;
|
|
49
50
|
const runStartedAt = new Date();
|
|
50
51
|
appendVerifyTimeline(taskDir, {
|
|
51
52
|
event: 'verify_started',
|
|
@@ -85,6 +86,25 @@ async function runMain() {
|
|
|
85
86
|
return;
|
|
86
87
|
}
|
|
87
88
|
|
|
89
|
+
const reusableVerify = readReusableVerifyResult({ taskDir, planSha, executionSha });
|
|
90
|
+
if (reusableVerify && !force) {
|
|
91
|
+
updateStatusForVerifyResult(taskDir, reusableVerify, {
|
|
92
|
+
reused: true,
|
|
93
|
+
verifierMode: reusableVerify.verificationMode || verifierConfig.mode,
|
|
94
|
+
});
|
|
95
|
+
appendVerifyTimeline(taskDir, {
|
|
96
|
+
event: 'verify_reused',
|
|
97
|
+
verdict: reusableVerify.verdict,
|
|
98
|
+
verifierRunId: reusableVerify.verifierRunId || null,
|
|
99
|
+
timing: buildTiming(runStartedAt),
|
|
100
|
+
});
|
|
101
|
+
appendOrchestrationLog(taskDir, `verify result reused; verdict=${reusableVerify.verdict}; plan/execution unchanged; use --force to rerun`);
|
|
102
|
+
console.log(`Verifier result reused for ${taskId}: ${reusableVerify.verdict}`);
|
|
103
|
+
console.log('- reason: plan.md and execution.md hashes match existing passing verify.result.json');
|
|
104
|
+
console.log('- use --force to rerun Verify anyway');
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
88
108
|
if (verifierConfig.mode === 'internal_supervisor') {
|
|
89
109
|
writeInternalSupervisorVerify({
|
|
90
110
|
taskDir,
|
|
@@ -268,6 +288,10 @@ async function runMain() {
|
|
|
268
288
|
|
|
269
289
|
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
270
290
|
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(verifyResultJson, null, 2));
|
|
291
|
+
updateStatusForVerifyResult(taskDir, verifyResultJson, {
|
|
292
|
+
reused: false,
|
|
293
|
+
verifierMode: 'external_cli',
|
|
294
|
+
});
|
|
271
295
|
if (finalPack) {
|
|
272
296
|
recordLlmInputUsage({
|
|
273
297
|
taskDir,
|
|
@@ -303,6 +327,85 @@ function buildTiming(startedAt, completedAt = new Date()) {
|
|
|
303
327
|
};
|
|
304
328
|
}
|
|
305
329
|
|
|
330
|
+
function readReusableVerifyResult({ taskDir, planSha, executionSha }) {
|
|
331
|
+
const result = readOptionalJson(taskDir, 'verify.result.json');
|
|
332
|
+
if (!result || typeof result !== 'object' || Array.isArray(result)) {
|
|
333
|
+
return null;
|
|
334
|
+
}
|
|
335
|
+
if (!['pass', 'pass_with_notes'].includes(String(result.verdict || '').toLowerCase())) {
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
if (result.planSha !== planSha || result.executionSha !== executionSha) {
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
341
|
+
if (result.readyForRetrospective !== true) {
|
|
342
|
+
return null;
|
|
343
|
+
}
|
|
344
|
+
return result;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function updateStatusForVerifyResult(taskDir, result, { reused = false, verifierMode = null } = {}) {
|
|
348
|
+
const verdict = String(result?.verdict || 'unknown').toLowerCase();
|
|
349
|
+
const mode = verifierMode || result?.verificationMode || 'unknown';
|
|
350
|
+
const commonResult = [
|
|
351
|
+
`- \`verify.result.json\`: \`${verdict}\``,
|
|
352
|
+
`- Verification mode: \`${mode}\``,
|
|
353
|
+
`- Ready for retrospective: \`${Boolean(result?.readyForRetrospective)}\``,
|
|
354
|
+
reused ? '- Reused: `true` (plan/execution unchanged)' : null,
|
|
355
|
+
].filter(Boolean).join('\n');
|
|
356
|
+
|
|
357
|
+
if (['pass', 'pass_with_notes'].includes(verdict)) {
|
|
358
|
+
updateStatus(taskDir, {
|
|
359
|
+
stage: 'Verify complete',
|
|
360
|
+
verifyVerdict: `\`${verdict}\``,
|
|
361
|
+
verifyResult: commonResult,
|
|
362
|
+
supervisorAction: reused
|
|
363
|
+
? 'Existing passing Verify result reused because plan/execution hashes are unchanged.'
|
|
364
|
+
: 'Verify completed with sufficient evidence for this slice.',
|
|
365
|
+
expectedOutcome: 'Retrospective, learning closeout, then task closeout.',
|
|
366
|
+
humanApproval: 'no',
|
|
367
|
+
nextStep: 'Run learning closeout / retrospective. Do not rerun Verify unless plan/execution changes or explicit --force/human escalation applies.',
|
|
368
|
+
});
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (verdict === 'return_to_execute') {
|
|
373
|
+
updateStatus(taskDir, {
|
|
374
|
+
stage: 'Execute',
|
|
375
|
+
verifyVerdict: '`return_to_execute`',
|
|
376
|
+
verifyResult: commonResult,
|
|
377
|
+
supervisorAction: 'Verify returned blocking execution findings.',
|
|
378
|
+
expectedOutcome: 'Address blocking Verify findings, update execution evidence, then rerun Verify.',
|
|
379
|
+
humanApproval: 'no',
|
|
380
|
+
nextStep: 'Return to Execute and fix the blocking Verify findings before another Verify run.',
|
|
381
|
+
});
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (verdict === 'return_to_plan') {
|
|
386
|
+
updateStatus(taskDir, {
|
|
387
|
+
stage: 'Plan',
|
|
388
|
+
verifyVerdict: '`return_to_plan`',
|
|
389
|
+
verifyResult: commonResult,
|
|
390
|
+
supervisorAction: 'Verify found plan/scope mismatch.',
|
|
391
|
+
expectedOutcome: 'Revise plan/check artifacts before Execute continues.',
|
|
392
|
+
humanApproval: 'maybe',
|
|
393
|
+
nextStep: 'Return to Plan and resolve the Verify findings before Execute continues.',
|
|
394
|
+
});
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
updateStatus(taskDir, {
|
|
399
|
+
stage: 'Verify',
|
|
400
|
+
verifyVerdict: `\`${verdict}\``,
|
|
401
|
+
verifyResult: commonResult,
|
|
402
|
+
supervisorAction: 'Verify did not produce a passing result.',
|
|
403
|
+
expectedOutcome: 'Resolve verifier failure or request human decision.',
|
|
404
|
+
humanApproval: 'maybe',
|
|
405
|
+
nextStep: 'Resolve Verify result before continuing.',
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
306
409
|
function appendVerifyTimeline(taskDir, event) {
|
|
307
410
|
const timelinePath = path.join(taskDir, 'verify-timeline.json');
|
|
308
411
|
let existing = [];
|
|
@@ -703,6 +806,10 @@ function writeVerifierFailure({
|
|
|
703
806
|
};
|
|
704
807
|
writeTaskFile(taskDir, 'verify.md', verifyMarkdown);
|
|
705
808
|
writeTaskFile(taskDir, 'verify.result.json', JSON.stringify(result, null, 2));
|
|
809
|
+
updateStatusForVerifyResult(taskDir, result, {
|
|
810
|
+
reused: false,
|
|
811
|
+
verifierMode: 'external_cli',
|
|
812
|
+
});
|
|
706
813
|
appendOrchestrationLog(taskDir, `external CLI verifier failed via ${verifierConfig.provider}; failureReason=${failureReason}; runId=${verifierRunId}`);
|
|
707
814
|
}
|
|
708
815
|
|
package/package.json
CHANGED
package/prompts/checker.md
CHANGED
|
@@ -70,6 +70,8 @@ Project-specific context приходит только через task artifacts
|
|
|
70
70
|
22. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают sync/import/provider/raw records/retries/pagination/rate limits/idempotency/replay/backfill/partial failure, план обязан содержать `## Source Sync / Provider Gate`: scope/provider window, idempotency with dedupe key and duplicate action, failure handling/retry boundaries and coverage/parity evidence. Для import/manual-upload/transcript/evidence-capture задач план также обязан содержать `## Import / Ingestion Contract`: real representative fixtures when available or explicit no-real-fixtures reason, raw metadata/speaker-label extraction needed downstream, and repeat-import policy. Размытое "duplicates detected or reported" без skip/link/update/reject/report-only semantics недостаточно.
|
|
71
71
|
23. Если `task-manifest.json.loopDetector.requiresConsolidatedRemediation=true`, Checker должен блокировать повторный мелкий loop, пока plan/check-resolution не содержит consolidated remediation секцию, которая объединяет repeated reasons.
|
|
72
72
|
24. Если `llmInputPolicy.mode` не `strict` и отсутствующий full artifact реально нужен для честной оценки, verdict должен быть `context_insufficient`. Не используй `context_insufficient`, если deterministic gate уже явно показывает `return_to_plan`.
|
|
73
|
+
25. Если в task artifacts есть `precheck-remediation.md`, Checker должен проверить, что весь checklist был закрыт одним consolidated plan update. Не создавай новый мелкий blocker по одному пункту из старого checklist, если оставшиеся пункты тоже очевидно не закрыты: верни consolidated finding, ссылающийся на `precheck-remediation.md`.
|
|
74
|
+
26. Minor process/evidence polish не должен блокировать Human Gate, если deterministic gates закрыты, acceptance criteria покрыты, scope/risk/security/data correctness не нарушены, а остаток можно безопасно записать как `non_blocking` или human question.
|
|
73
75
|
|
|
74
76
|
## Контракт выхода
|
|
75
77
|
|
package/prompts/supervisor.md
CHANGED
|
@@ -59,6 +59,8 @@ Supervisor является code-level orchestrator по контракту: rou
|
|
|
59
59
|
25. `verify.result.json` должен сверять `plan.md` с фактическим `execution.md`, diff/files/tests и явным execution evidence. Self-reported executor checks без verifier verdict не являются достаточным Verify.
|
|
60
60
|
26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when execution evidence covers the plan gates.
|
|
61
61
|
27. Если external verifier/checker/browser tooling начинает тратить непропорционально много времени или блокируется окружением, Supervisor обязан остановить loop и вынести human decision: принять internal verify/evidence, запустить external escalation вручную или изменить scope.
|
|
62
|
+
28. Если deterministic Check preflight создал `precheck-remediation.md`, Supervisor не должен запускать повторный Check после точечной правки одного пункта. Сначала Planner/Executor должен закрыть весь checklist или явно отметить not-applicable с evidence/human decision в `plan.md`/`status.md`, затем допускается один fresh Check.
|
|
63
|
+
29. Перед повторным Check после deterministic precheck Supervisor обязан сверить, что `precheck-remediation.md` был использован как consolidated checklist: все listed gates отражены в plan/research/status, а не закрывались по одному через серию precheck loops.
|
|
62
64
|
|
|
63
65
|
## Hard Gate: Material Scope Expansion -> Brief Reset
|
|
64
66
|
|
package/prompts/verifier.md
CHANGED
|
@@ -44,6 +44,8 @@
|
|
|
44
44
|
20. Если `plan.md` содержит `## Source Sync / Provider Gate`, verifier должен проверить `Source Sync / Provider Evidence`: scope/window, idempotency, retries/pagination/rate limits, raw-record handling, partial failure recovery and coverage/parity evidence.
|
|
45
45
|
21. Если `task-manifest.json.loopDetector.requiresConsolidatedRemediation=true`, verifier не должен закрывать задачу, пока repeated return reasons не объединены в consolidated remediation и не покрыты execution evidence.
|
|
46
46
|
22. Если `llmInputPolicy.mode` не `strict` и отсутствующий full artifact реально нужен для честной оценки, verdict должен быть `context_insufficient`. Не используй `context_insufficient`, если execution evidence уже явно отсутствует или противоречит plan.
|
|
47
|
+
23. Если task содержит `precheck-remediation.md`, verifier должен проверить только применимые пункты, которые дошли до Execute. Не возвращай `return_to_execute` из-за старого precheck checklist, если план закрыл его до Human Gate и фактическая реализация покрывает acceptance.
|
|
48
|
+
24. Environment/tooling failures внешнего verifier/browser smoke не должны превращаться в бесконечный `return_to_execute` loop. Если implementation evidence достаточно, но внешний инструмент заблокирован окружением, используй `pass_with_notes` или `human_arbitration_required` согласно риску.
|
|
47
49
|
|
|
48
50
|
## Контракт выхода
|
|
49
51
|
|