@besales/ops-framework 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/bin/lib/execution-ledger-utils.mjs +22 -2
- package/bin/lib/execution-ledger-utils.test.mjs +60 -0
- package/bin/lib/llm-input-pack-utils.mjs +109 -24
- package/bin/lib/llm-input-pack-utils.test.mjs +41 -6
- package/bin/run-check.mjs +11 -0
- package/bin/run-verify.mjs +106 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.22
|
|
4
|
+
|
|
5
|
+
- Added a compact `standard_plus` LLM context mode between `standard` and `strict` so near-cap Check/Verify runs avoid full strict context when compact evidence is sufficient.
|
|
6
|
+
- Refreshed `task-manifest.json` after successful Check and cache hits so `lastCheckResult` reflects the final current verdict instead of stale `return_to_plan` results.
|
|
7
|
+
|
|
8
|
+
## 0.1.21
|
|
9
|
+
|
|
10
|
+
- Added `verify-timeline.json` telemetry for Verify runs, including deterministic blocks, LLM input sizing, context-mode escalation, provider duration/failure and final verdict.
|
|
11
|
+
- Prevented Verify artifact-growth loops by compacting generated review/log artifacts (`verify.md`, `check.md`, `check-resolution.md`, `orchestration-log.md`) even in strict verifier context while preserving full execution evidence.
|
|
12
|
+
- Compacted `execution-ledger.json` in verifier input and marked files mentioned in `execution.md` so task-scope files do not remain in `unrelatedDirtyFiles`.
|
|
13
|
+
|
|
3
14
|
## 0.1.20
|
|
4
15
|
|
|
5
16
|
- Added deterministic task Check gates for schema/migration plans: a real disposable/scratch database apply path is required before external Check, and Verify/Human Gate evidence must show a successful apply/migrate/psql run rather than static SQL review only.
|
|
@@ -33,6 +33,11 @@ export function buildExecutionLedger({
|
|
|
33
33
|
}) {
|
|
34
34
|
const git = collectGitExecutionState({ repoRoot, taskDir });
|
|
35
35
|
const taskArtifacts = listTaskArtifacts(taskDir);
|
|
36
|
+
const executionMentionedFiles = readExecutionMentionedFiles(taskDir);
|
|
37
|
+
const changedFiles = git.changedFiles.map((file) => ({
|
|
38
|
+
...file,
|
|
39
|
+
isExecutionMentioned: executionMentionedFiles.has(file.path),
|
|
40
|
+
}));
|
|
36
41
|
|
|
37
42
|
return {
|
|
38
43
|
schemaVersion: 1,
|
|
@@ -42,8 +47,8 @@ export function buildExecutionLedger({
|
|
|
42
47
|
executionSha,
|
|
43
48
|
git: {
|
|
44
49
|
taskRelativePath: git.taskRelativePath,
|
|
45
|
-
changedFiles: compactLedgerFiles(
|
|
46
|
-
unrelatedDirtyFiles: compactLedgerFiles(
|
|
50
|
+
changedFiles: compactLedgerFiles(changedFiles),
|
|
51
|
+
unrelatedDirtyFiles: compactLedgerFiles(changedFiles.filter((file) => !file.isTaskArtifact && !file.isOpsFrameworkFile && !file.isExecutionMentioned)),
|
|
47
52
|
},
|
|
48
53
|
taskArtifacts,
|
|
49
54
|
notes: [
|
|
@@ -59,6 +64,7 @@ function compactLedgerFiles(files) {
|
|
|
59
64
|
status: file.status,
|
|
60
65
|
isTaskArtifact: file.isTaskArtifact,
|
|
61
66
|
isOpsFrameworkFile: file.isOpsFrameworkFile,
|
|
67
|
+
isExecutionMentioned: Boolean(file.isExecutionMentioned),
|
|
62
68
|
}));
|
|
63
69
|
}
|
|
64
70
|
|
|
@@ -145,6 +151,20 @@ function listTaskArtifacts(taskDir) {
|
|
|
145
151
|
}));
|
|
146
152
|
}
|
|
147
153
|
|
|
154
|
+
function readExecutionMentionedFiles(taskDir) {
|
|
155
|
+
const executionPath = path.join(taskDir, 'execution.md');
|
|
156
|
+
if (!fs.existsSync(executionPath)) {
|
|
157
|
+
return new Set();
|
|
158
|
+
}
|
|
159
|
+
const content = fs.readFileSync(executionPath, 'utf8');
|
|
160
|
+
const refs = new Set();
|
|
161
|
+
const pathPattern = /`([^`\n]+\/[^`\n]+)`/g;
|
|
162
|
+
for (const match of content.matchAll(pathPattern)) {
|
|
163
|
+
refs.add(normalizePath(match[1].trim()));
|
|
164
|
+
}
|
|
165
|
+
return refs;
|
|
166
|
+
}
|
|
167
|
+
|
|
148
168
|
function runGitLines(repoRoot, args) {
|
|
149
169
|
const result = spawnSync('git', args, {
|
|
150
170
|
cwd: repoRoot,
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { spawnSync } from 'node:child_process';
|
|
2
6
|
import {
|
|
7
|
+
buildExecutionLedger,
|
|
3
8
|
mergeChangedFiles,
|
|
4
9
|
parseGitStatusLine,
|
|
5
10
|
} from './execution-ledger-utils.mjs';
|
|
@@ -71,4 +76,59 @@ describe('execution ledger utils', () => {
|
|
|
71
76
|
}),
|
|
72
77
|
]);
|
|
73
78
|
});
|
|
79
|
+
|
|
80
|
+
it('does not classify execution-mentioned files as unrelated dirty files', () => {
|
|
81
|
+
const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ops-ledger-repo-'));
|
|
82
|
+
run(repoRoot, ['init']);
|
|
83
|
+
run(repoRoot, ['config', 'user.email', 'test@example.com']);
|
|
84
|
+
run(repoRoot, ['config', 'user.name', 'Test User']);
|
|
85
|
+
fs.mkdirSync(path.join(repoRoot, 'apps', 'api'), { recursive: true });
|
|
86
|
+
fs.mkdirSync(path.join(repoRoot, 'docs'), { recursive: true });
|
|
87
|
+
fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{}\n');
|
|
88
|
+
fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'source\n');
|
|
89
|
+
run(repoRoot, ['add', '.']);
|
|
90
|
+
run(repoRoot, ['commit', '-m', 'initial']);
|
|
91
|
+
|
|
92
|
+
fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{"type":"module"}\n');
|
|
93
|
+
fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'dirty source\n');
|
|
94
|
+
|
|
95
|
+
const taskDir = path.join(repoRoot, 'ops', 'agent-pipeline', 'tasks', 'TASK-999-example');
|
|
96
|
+
fs.mkdirSync(taskDir, { recursive: true });
|
|
97
|
+
fs.writeFileSync(path.join(taskDir, 'execution.md'), [
|
|
98
|
+
'# Execution',
|
|
99
|
+
'',
|
|
100
|
+
'## Измененные файлы',
|
|
101
|
+
'',
|
|
102
|
+
'| File | Change summary | Planned item / reason |',
|
|
103
|
+
'| --- | --- | --- |',
|
|
104
|
+
'| `apps/api/package.json` | package setup | planned |',
|
|
105
|
+
].join('\n'));
|
|
106
|
+
|
|
107
|
+
const ledger = buildExecutionLedger({
|
|
108
|
+
taskId: 'TASK-999-example',
|
|
109
|
+
taskDir,
|
|
110
|
+
repoRoot,
|
|
111
|
+
planSha: 'sha256:plan',
|
|
112
|
+
executionSha: 'sha256:execution',
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
expect(ledger.git.changedFiles).toContainEqual(expect.objectContaining({
|
|
116
|
+
path: 'apps/api/package.json',
|
|
117
|
+
isExecutionMentioned: true,
|
|
118
|
+
}));
|
|
119
|
+
expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).not.toContain('apps/api/package.json');
|
|
120
|
+
expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).toContain('docs/source.md');
|
|
121
|
+
|
|
122
|
+
fs.rmSync(repoRoot, { recursive: true, force: true });
|
|
123
|
+
});
|
|
74
124
|
});
|
|
125
|
+
|
|
126
|
+
function run(cwd, args) {
|
|
127
|
+
const result = spawnSync('git', args, {
|
|
128
|
+
cwd,
|
|
129
|
+
encoding: 'utf8',
|
|
130
|
+
});
|
|
131
|
+
if (result.status !== 0) {
|
|
132
|
+
throw new Error(`git ${args.join(' ')} failed: ${result.stdout}${result.stderr}`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -8,10 +8,11 @@ import {
|
|
|
8
8
|
renderRelevantPlaybooks,
|
|
9
9
|
} from './check-context-utils.mjs';
|
|
10
10
|
|
|
11
|
-
export const LLM_CONTEXT_MODES = ['fast', 'standard', 'strict'];
|
|
11
|
+
export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict'];
|
|
12
12
|
export const LLM_CONTEXT_CAPS = {
|
|
13
13
|
fast: 8000,
|
|
14
14
|
standard: 20000,
|
|
15
|
+
standard_plus: 26000,
|
|
15
16
|
strict: 45000,
|
|
16
17
|
};
|
|
17
18
|
|
|
@@ -20,6 +21,7 @@ const PACK_CAP_SAFETY_MULTIPLIER = 1.15;
|
|
|
20
21
|
const MEMORY_MAX_CHARS = {
|
|
21
22
|
fast: 3000,
|
|
22
23
|
standard: 3500,
|
|
24
|
+
standard_plus: 4500,
|
|
23
25
|
strict: Infinity,
|
|
24
26
|
};
|
|
25
27
|
|
|
@@ -163,6 +165,9 @@ export function nextLlmContextMode(mode) {
|
|
|
163
165
|
return 'standard';
|
|
164
166
|
}
|
|
165
167
|
if (mode === 'standard') {
|
|
168
|
+
return 'standard_plus';
|
|
169
|
+
}
|
|
170
|
+
if (mode === 'standard_plus') {
|
|
166
171
|
return 'strict';
|
|
167
172
|
}
|
|
168
173
|
return null;
|
|
@@ -215,8 +220,8 @@ export function buildCheckerLlmInputPack({
|
|
|
215
220
|
fullContextAvailableViaStrict: selectedMode !== 'strict',
|
|
216
221
|
contextInsufficientFallback: selectedMode === 'strict' ? 'stop_and_report' : `rerun_${nextLlmContextMode(selectedMode)}`,
|
|
217
222
|
},
|
|
218
|
-
checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000 }),
|
|
219
|
-
checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600 }),
|
|
223
|
+
checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000, standard_plus: 5600 }),
|
|
224
|
+
checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600, standard_plus: 6200 }),
|
|
220
225
|
relevantPlaybooks: selectedMode === 'strict'
|
|
221
226
|
? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
|
|
222
227
|
: renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
|
|
@@ -254,23 +259,27 @@ export function buildVerifierLlmInputPack({
|
|
|
254
259
|
}) {
|
|
255
260
|
const selectedMode = normalizeLlmContextMode(mode) || 'standard';
|
|
256
261
|
const taskArtifacts = selectedMode === 'strict'
|
|
257
|
-
?
|
|
258
|
-
'brief.md',
|
|
259
|
-
'research.md',
|
|
260
|
-
'plan.md',
|
|
261
|
-
'task-manifest.json',
|
|
262
|
-
'check.result.json',
|
|
263
|
-
'check.md'
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
'
|
|
269
|
-
'
|
|
270
|
-
'
|
|
271
|
-
'execution-
|
|
272
|
-
'
|
|
273
|
-
|
|
262
|
+
? {
|
|
263
|
+
'brief.md': readTaskFile(taskDir, 'brief.md'),
|
|
264
|
+
'research.md': readTaskFile(taskDir, 'research.md'),
|
|
265
|
+
'plan.md': readTaskFile(taskDir, 'plan.md'),
|
|
266
|
+
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
267
|
+
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
268
|
+
'check.md': compactCheckMarkdown({
|
|
269
|
+
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
270
|
+
checkResult: readOptionalJson(taskDir, 'check.result.json'),
|
|
271
|
+
mode: 'standard',
|
|
272
|
+
}),
|
|
273
|
+
'check-resolution.md': compactArtifact(taskDir, 'check-resolution.md', 'standard', ['structured resolution', 'root cause', 'resolution']),
|
|
274
|
+
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
|
|
275
|
+
'execution.md': readTaskFile(taskDir, 'execution.md'),
|
|
276
|
+
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
|
|
277
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'standard'),
|
|
278
|
+
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
279
|
+
'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
280
|
+
'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
281
|
+
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), 'standard'),
|
|
282
|
+
}
|
|
274
283
|
: {
|
|
275
284
|
'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
|
|
276
285
|
'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
|
|
@@ -285,8 +294,8 @@ export function buildVerifierLlmInputPack({
|
|
|
285
294
|
'check-resolution.md': truncateMiddle(readTaskFile(taskDir, 'check-resolution.md'), charLimitForMode(selectedMode, 1500, 3500)),
|
|
286
295
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
|
|
287
296
|
'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
|
|
288
|
-
'execution-ledger.json':
|
|
289
|
-
'verify.md':
|
|
297
|
+
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
|
|
298
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), selectedMode),
|
|
290
299
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
291
300
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
|
|
292
301
|
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),
|
|
@@ -483,6 +492,72 @@ function compactOrchestrationLog(log, mode) {
|
|
|
483
492
|
return markCompacted('orchestration-log.md', log, compacted);
|
|
484
493
|
}
|
|
485
494
|
|
|
495
|
+
function compactVerifierMarkdown(verifyMarkdown, verifyResult, mode) {
|
|
496
|
+
if (!verifyMarkdown.trim()) {
|
|
497
|
+
return '';
|
|
498
|
+
}
|
|
499
|
+
const findings = Array.isArray(verifyResult?.findings) ? verifyResult.findings : [];
|
|
500
|
+
const lines = [
|
|
501
|
+
'# Verify compact excerpt',
|
|
502
|
+
'',
|
|
503
|
+
`Verdict: ${verifyResult?.verdict || 'unknown'}`,
|
|
504
|
+
`Verifier run: ${verifyResult?.verifierRunId || 'unknown'}`,
|
|
505
|
+
`Findings: ${findings.length}`,
|
|
506
|
+
'',
|
|
507
|
+
compactMarkdownSections(verifyMarkdown, ['verdict', 'findings', 'residual risks', 'recommended next step'], charLimitForMode(mode, 1600, 3200)),
|
|
508
|
+
'',
|
|
509
|
+
...findings.map((finding) => [
|
|
510
|
+
`## ${finding.id || 'finding'}`,
|
|
511
|
+
`- Severity: ${finding.severity || 'unknown'}`,
|
|
512
|
+
`- Category: ${finding.claimCategory || 'unknown'}`,
|
|
513
|
+
`- Affected artifacts: ${truncateEnd(JSON.stringify(finding.affectedArtifacts || []), 250)}`,
|
|
514
|
+
'- Evidence refs:',
|
|
515
|
+
...formatRefs(finding.evidenceRefs || [], 220),
|
|
516
|
+
`- Claim: ${truncateEnd(finding.claim || '', 700)}`,
|
|
517
|
+
`- Expected correction: ${truncateEnd(finding.expectedCorrection || '', 700)}`,
|
|
518
|
+
].join('\n')),
|
|
519
|
+
];
|
|
520
|
+
return markCompacted('verify.md', verifyMarkdown, lines.join('\n').trim());
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
function compactExecutionLedger(ledger, mode) {
|
|
524
|
+
if (!ledger || typeof ledger !== 'object' || Array.isArray(ledger)) {
|
|
525
|
+
return '{}';
|
|
526
|
+
}
|
|
527
|
+
const changedFiles = Array.isArray(ledger.git?.changedFiles) ? ledger.git.changedFiles : [];
|
|
528
|
+
const unrelatedDirtyFiles = Array.isArray(ledger.git?.unrelatedDirtyFiles) ? ledger.git.unrelatedDirtyFiles : [];
|
|
529
|
+
const limit = mode === 'fast' ? 40 : mode === 'standard' ? 90 : mode === 'standard_plus' ? 120 : 160;
|
|
530
|
+
const compact = {
|
|
531
|
+
schemaVersion: ledger.schemaVersion,
|
|
532
|
+
taskId: ledger.taskId,
|
|
533
|
+
createdAt: ledger.createdAt,
|
|
534
|
+
planSha: ledger.planSha,
|
|
535
|
+
executionSha: ledger.executionSha,
|
|
536
|
+
git: {
|
|
537
|
+
taskRelativePath: ledger.git?.taskRelativePath || null,
|
|
538
|
+
changedFileCount: changedFiles.length,
|
|
539
|
+
unrelatedDirtyFileCount: unrelatedDirtyFiles.length,
|
|
540
|
+
changedFiles: changedFiles.slice(0, limit).map(compactLedgerFile),
|
|
541
|
+
unrelatedDirtyFiles: unrelatedDirtyFiles.slice(0, limit).map(compactLedgerFile),
|
|
542
|
+
truncatedChangedFiles: Math.max(0, changedFiles.length - limit),
|
|
543
|
+
truncatedUnrelatedDirtyFiles: Math.max(0, unrelatedDirtyFiles.length - limit),
|
|
544
|
+
},
|
|
545
|
+
taskArtifacts: Array.isArray(ledger.taskArtifacts) ? ledger.taskArtifacts : [],
|
|
546
|
+
notes: ledger.notes || [],
|
|
547
|
+
};
|
|
548
|
+
return markCompacted('execution-ledger.json', JSON.stringify(ledger, null, 2), JSON.stringify(compact, null, 2));
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
function compactLedgerFile(file) {
|
|
552
|
+
return {
|
|
553
|
+
path: file.path,
|
|
554
|
+
status: file.status,
|
|
555
|
+
isTaskArtifact: Boolean(file.isTaskArtifact),
|
|
556
|
+
isOpsFrameworkFile: Boolean(file.isOpsFrameworkFile),
|
|
557
|
+
isExecutionMentioned: Boolean(file.isExecutionMentioned),
|
|
558
|
+
};
|
|
559
|
+
}
|
|
560
|
+
|
|
486
561
|
function compactProjectMemory(projectMemory, mode) {
|
|
487
562
|
if (mode === 'strict') {
|
|
488
563
|
return projectMemory;
|
|
@@ -510,7 +585,11 @@ function compactGeneratedMarkdown(fileName, content, mode, limits) {
|
|
|
510
585
|
if (mode === 'strict' || !content) {
|
|
511
586
|
return content;
|
|
512
587
|
}
|
|
513
|
-
const limit = mode === 'fast'
|
|
588
|
+
const limit = mode === 'fast'
|
|
589
|
+
? limits.fast
|
|
590
|
+
: mode === 'standard_plus'
|
|
591
|
+
? limits.standard_plus || Math.ceil(limits.standard * 1.35)
|
|
592
|
+
: limits.standard;
|
|
514
593
|
return markCompacted(fileName, content, truncateMiddle(content, limit));
|
|
515
594
|
}
|
|
516
595
|
|
|
@@ -637,7 +716,13 @@ function isProtectedSection(value) {
|
|
|
637
716
|
}
|
|
638
717
|
|
|
639
718
|
function charLimitForMode(mode, fastChars, standardChars) {
|
|
640
|
-
|
|
719
|
+
if (mode === 'fast') {
|
|
720
|
+
return fastChars;
|
|
721
|
+
}
|
|
722
|
+
if (mode === 'standard_plus') {
|
|
723
|
+
return Math.ceil(standardChars * 1.25);
|
|
724
|
+
}
|
|
725
|
+
return standardChars;
|
|
641
726
|
}
|
|
642
727
|
|
|
643
728
|
function readOptionalJson(taskDir, fileName) {
|
|
@@ -62,9 +62,10 @@ describe('llm input pack utilities', () => {
|
|
|
62
62
|
expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
it('
|
|
66
|
-
const taskDir = createTask({ orchestrationEvents:
|
|
65
|
+
it('compacts generated review artifacts in strict verifier pack', () => {
|
|
66
|
+
const taskDir = createTask({ orchestrationEvents: 80 });
|
|
67
67
|
const fullLog = fs.readFileSync(path.join(taskDir, 'orchestration-log.md'), 'utf8');
|
|
68
|
+
const fullExecution = fs.readFileSync(path.join(taskDir, 'execution.md'), 'utf8');
|
|
68
69
|
const pack = buildVerifierLlmInputPack({
|
|
69
70
|
taskDir,
|
|
70
71
|
taskId: 'TASK-999-token-pack',
|
|
@@ -74,17 +75,49 @@ describe('llm input pack utilities', () => {
|
|
|
74
75
|
mode: 'strict',
|
|
75
76
|
});
|
|
76
77
|
|
|
77
|
-
expect(pack.input.taskArtifacts['
|
|
78
|
-
expect(pack.
|
|
78
|
+
expect(pack.input.taskArtifacts['execution.md']).toBe(fullExecution);
|
|
79
|
+
expect(pack.input.taskArtifacts['orchestration-log.md']).toContain('# Orchestration Log Compact');
|
|
80
|
+
expect(pack.input.taskArtifacts['orchestration-log.md'].length).toBeLessThan(fullLog.length);
|
|
81
|
+
expect(pack.input.taskArtifacts['verify.md']).toContain('# Verify compact excerpt');
|
|
82
|
+
expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
|
|
83
|
+
expect(pack.meta.compactedArtifacts).toContain('verify.md');
|
|
79
84
|
});
|
|
80
85
|
|
|
81
86
|
it('builds bounded fallback mode sequence for context insufficient results', () => {
|
|
82
|
-
expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'strict']);
|
|
83
|
-
expect(buildContextModeSequence('standard')).toEqual(['standard', 'strict']);
|
|
87
|
+
expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'standard_plus', 'strict']);
|
|
88
|
+
expect(buildContextModeSequence('standard')).toEqual(['standard', 'standard_plus', 'strict']);
|
|
89
|
+
expect(buildContextModeSequence('standard_plus')).toEqual(['standard_plus', 'strict']);
|
|
84
90
|
expect(buildContextModeSequence('strict')).toEqual(['strict']);
|
|
85
91
|
expect(isContextInsufficientResult({ verdict: 'context_insufficient' })).toBe(true);
|
|
86
92
|
});
|
|
87
93
|
|
|
94
|
+
it('provides a compact standard_plus check mode before strict', () => {
|
|
95
|
+
const taskDir = createTask();
|
|
96
|
+
const pack = buildCheckerLlmInputPack({
|
|
97
|
+
taskDir,
|
|
98
|
+
taskId: 'TASK-999-token-pack',
|
|
99
|
+
checkerPromptSha: 'sha256:test',
|
|
100
|
+
cacheKey: { test: true },
|
|
101
|
+
checkContext: {
|
|
102
|
+
planSha: 'sha256:plan',
|
|
103
|
+
memorySha: 'sha256:memory',
|
|
104
|
+
riskProfile: 'high',
|
|
105
|
+
riskTriggers: ['source-sync-provider', 'prisma-schema'],
|
|
106
|
+
},
|
|
107
|
+
checkEvidence: '# Evidence\n\nok\n'.repeat(900),
|
|
108
|
+
checkerContextPack: '# Checker Context Pack\n\nok\n'.repeat(900),
|
|
109
|
+
taskManifest: '{}',
|
|
110
|
+
projectMemory: [],
|
|
111
|
+
mode: 'standard_plus',
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(pack.meta.mode).toBe('standard_plus');
|
|
115
|
+
expect(pack.meta.capTokens).toBe(26000);
|
|
116
|
+
expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
|
|
117
|
+
expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
|
|
118
|
+
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
|
119
|
+
});
|
|
120
|
+
|
|
88
121
|
it('preserves protected verification sections when compacting long plans', () => {
|
|
89
122
|
const taskDir = createTask();
|
|
90
123
|
const longPlan = [
|
|
@@ -246,6 +279,8 @@ function createTask({ orchestrationEvents = 40 } = {}) {
|
|
|
246
279
|
write(taskDir, 'status.md', '# Status\n\n## Текущий этап\n\nverify\n\n## Следующий шаг\n\nRun verify.');
|
|
247
280
|
write(taskDir, 'check.result.json', JSON.stringify({ verdict: 'ready_for_human_gate', findings: [] }, null, 2));
|
|
248
281
|
write(taskDir, 'check.md', '# Check\n\n## Итоговая оценка\n\nReady.');
|
|
282
|
+
write(taskDir, 'verify.result.json', JSON.stringify({ verdict: 'return_to_execute', findings: [] }, null, 2));
|
|
283
|
+
write(taskDir, 'verify.md', '# Verify\n\n## verdict\n\nreturn_to_execute\n\n## findings\n\nPrevious finding.');
|
|
249
284
|
write(taskDir, 'execution-ledger.json', JSON.stringify({ git: { changedFiles: [] } }, null, 2));
|
|
250
285
|
write(taskDir, 'task-manifest.json', JSON.stringify({ context: { riskTriggers: ['panel-ui'] } }, null, 2));
|
|
251
286
|
write(taskDir, 'orchestration-log.md', [
|
package/bin/run-check.mjs
CHANGED
|
@@ -226,6 +226,7 @@ async function runMain() {
|
|
|
226
226
|
rerunCount,
|
|
227
227
|
timing: buildTiming(runStartedAt),
|
|
228
228
|
});
|
|
229
|
+
refreshTaskManifestAfterCheck(taskDir);
|
|
229
230
|
console.log(`Checker cache hit for ${taskId}: ${cacheKeySha}`);
|
|
230
231
|
return;
|
|
231
232
|
}
|
|
@@ -329,12 +330,22 @@ async function runMain() {
|
|
|
329
330
|
rerunCount,
|
|
330
331
|
timing: buildTiming(runStartedAt),
|
|
331
332
|
});
|
|
333
|
+
refreshTaskManifestAfterCheck(taskDir);
|
|
332
334
|
runValidator(taskArg);
|
|
333
335
|
console.log(`Checker run completed for ${taskId}: ${providerOutput.checkResultJson?.verdict}`);
|
|
334
336
|
console.log(`- finalLlmInputMode: ${promptPayload.pack.meta.mode}`);
|
|
335
337
|
console.log(`- finalEstimatedInputTokens: ${promptPayload.pack.meta.estimatedTokens}`);
|
|
336
338
|
}
|
|
337
339
|
|
|
340
|
+
function refreshTaskManifestAfterCheck(taskDir) {
|
|
341
|
+
const manifest = buildTaskManifest({ taskDir });
|
|
342
|
+
writeTaskManifest(taskDir, manifest);
|
|
343
|
+
appendCheckTimeline(taskDir, {
|
|
344
|
+
event: 'task_manifest_refreshed_after_check',
|
|
345
|
+
lastCheckResult: manifest.lastCheckResult,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
|
|
338
349
|
function appendCheckTimeline(taskDir, event) {
|
|
339
350
|
const timelinePath = path.join(taskDir, 'check-timeline.json');
|
|
340
351
|
let existing = [];
|
package/bin/run-verify.mjs
CHANGED
|
@@ -45,6 +45,13 @@ async function runMain() {
|
|
|
45
45
|
const taskDir = resolveTaskDir(taskArg);
|
|
46
46
|
const taskId = path.basename(taskDir);
|
|
47
47
|
const verifierConfig = resolveVerifierConfig(args);
|
|
48
|
+
const runStartedAt = new Date();
|
|
49
|
+
appendVerifyTimeline(taskDir, {
|
|
50
|
+
event: 'verify_started',
|
|
51
|
+
mode: verifierConfig.mode,
|
|
52
|
+
provider: verifierConfig.provider,
|
|
53
|
+
model: verifierConfig.model,
|
|
54
|
+
});
|
|
48
55
|
const planSha = hashTaskMarkdown(taskDir, 'plan.md');
|
|
49
56
|
const executionSha = hashTaskMarkdown(taskDir, 'execution.md');
|
|
50
57
|
const taskManifest = readOptionalJson(taskDir, 'task-manifest.json');
|
|
@@ -66,6 +73,12 @@ async function runMain() {
|
|
|
66
73
|
executionSha,
|
|
67
74
|
evidenceIssues,
|
|
68
75
|
});
|
|
76
|
+
appendVerifyTimeline(taskDir, {
|
|
77
|
+
event: 'deterministic_preverify_blocked',
|
|
78
|
+
verdict: 'return_to_execute',
|
|
79
|
+
issues: evidenceIssues.map((issue) => issue.message),
|
|
80
|
+
timing: buildTiming(runStartedAt),
|
|
81
|
+
});
|
|
69
82
|
console.log(`Verifier preflight blocked ${taskId}: return_to_execute`);
|
|
70
83
|
console.log(`- evidenceIssues: ${evidenceIssues.length}`);
|
|
71
84
|
return;
|
|
@@ -79,6 +92,11 @@ async function runMain() {
|
|
|
79
92
|
planSha,
|
|
80
93
|
executionSha,
|
|
81
94
|
});
|
|
95
|
+
appendVerifyTimeline(taskDir, {
|
|
96
|
+
event: 'internal_supervisor_completed',
|
|
97
|
+
verdict: 'pass_with_notes',
|
|
98
|
+
timing: buildTiming(runStartedAt),
|
|
99
|
+
});
|
|
82
100
|
console.log(`Internal supervisor Verify artifact written for ${taskId}: pass_with_notes`);
|
|
83
101
|
return;
|
|
84
102
|
}
|
|
@@ -109,12 +127,26 @@ async function runMain() {
|
|
|
109
127
|
contextMode,
|
|
110
128
|
});
|
|
111
129
|
finalPack = promptPayload.pack;
|
|
130
|
+
appendVerifyTimeline(taskDir, {
|
|
131
|
+
event: 'llm_input_built',
|
|
132
|
+
contextMode,
|
|
133
|
+
verifierRunId,
|
|
134
|
+
packMeta: promptPayload.pack.meta,
|
|
135
|
+
timing: buildTiming(runStartedAt),
|
|
136
|
+
});
|
|
112
137
|
console.log(`Verifier LLM input for ${taskId}`);
|
|
113
138
|
for (const line of summarizePackForConsole(promptPayload.pack)) {
|
|
114
139
|
console.log(line);
|
|
115
140
|
}
|
|
116
141
|
if (promptPayload.pack.meta.overCap && contextMode !== 'strict') {
|
|
117
142
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'skipped_over_cap'));
|
|
143
|
+
appendVerifyTimeline(taskDir, {
|
|
144
|
+
event: 'llm_input_over_cap_escalating',
|
|
145
|
+
contextMode,
|
|
146
|
+
verifierRunId,
|
|
147
|
+
packMeta: promptPayload.pack.meta,
|
|
148
|
+
timing: buildTiming(runStartedAt),
|
|
149
|
+
});
|
|
118
150
|
appendOrchestrationLog(taskDir, `verifier LLM input exceeded ${contextMode} cap; rerunning pack builder with expanded context`);
|
|
119
151
|
continue;
|
|
120
152
|
}
|
|
@@ -131,6 +163,13 @@ async function runMain() {
|
|
|
131
163
|
message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
|
|
132
164
|
rawOutput: null,
|
|
133
165
|
});
|
|
166
|
+
appendVerifyTimeline(taskDir, {
|
|
167
|
+
event: 'context_overflow',
|
|
168
|
+
contextMode,
|
|
169
|
+
verifierRunId,
|
|
170
|
+
packMeta: promptPayload.pack.meta,
|
|
171
|
+
timing: buildTiming(runStartedAt),
|
|
172
|
+
});
|
|
134
173
|
recordLlmInputUsage({
|
|
135
174
|
taskDir,
|
|
136
175
|
stage: 'verify',
|
|
@@ -143,6 +182,17 @@ async function runMain() {
|
|
|
143
182
|
}
|
|
144
183
|
|
|
145
184
|
try {
|
|
185
|
+
const providerStartedAt = new Date();
|
|
186
|
+
appendVerifyTimeline(taskDir, {
|
|
187
|
+
event: 'provider_started',
|
|
188
|
+
provider: verifierConfig.provider,
|
|
189
|
+
model: verifierConfig.model,
|
|
190
|
+
reasoningEffort: verifierConfig.reasoningEffort,
|
|
191
|
+
contextMode,
|
|
192
|
+
verifierRunId,
|
|
193
|
+
packMeta: promptPayload.pack.meta,
|
|
194
|
+
timing: buildTiming(runStartedAt),
|
|
195
|
+
});
|
|
146
196
|
output = await runExternalCliChecker({
|
|
147
197
|
providerName: verifierConfig.provider,
|
|
148
198
|
providerConfig: verifierConfig.providerConfig,
|
|
@@ -151,6 +201,15 @@ async function runMain() {
|
|
|
151
201
|
prompt: promptPayload.prompt,
|
|
152
202
|
cwd: repoRoot,
|
|
153
203
|
});
|
|
204
|
+
appendVerifyTimeline(taskDir, {
|
|
205
|
+
event: 'provider_completed',
|
|
206
|
+
provider: verifierConfig.provider,
|
|
207
|
+
model: verifierConfig.model,
|
|
208
|
+
contextMode,
|
|
209
|
+
verifierRunId,
|
|
210
|
+
providerTiming: buildTiming(providerStartedAt),
|
|
211
|
+
timing: buildTiming(runStartedAt),
|
|
212
|
+
});
|
|
154
213
|
} catch (error) {
|
|
155
214
|
writeVerifierFailure({
|
|
156
215
|
taskDir,
|
|
@@ -163,6 +222,16 @@ async function runMain() {
|
|
|
163
222
|
message: error.message,
|
|
164
223
|
rawOutput: error.rawOutput || null,
|
|
165
224
|
});
|
|
225
|
+
appendVerifyTimeline(taskDir, {
|
|
226
|
+
event: 'provider_failed',
|
|
227
|
+
provider: verifierConfig.provider,
|
|
228
|
+
model: verifierConfig.model,
|
|
229
|
+
contextMode,
|
|
230
|
+
verifierRunId,
|
|
231
|
+
failureReason: error.failureReason || 'unknown',
|
|
232
|
+
message: error.message,
|
|
233
|
+
timing: buildTiming(runStartedAt),
|
|
234
|
+
});
|
|
166
235
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${error.failureReason || 'unknown'}`));
|
|
167
236
|
recordLlmInputUsage({
|
|
168
237
|
taskDir,
|
|
@@ -205,8 +274,17 @@ async function runMain() {
|
|
|
205
274
|
packMeta: finalPack.meta,
|
|
206
275
|
attempts: llmInputAttempts,
|
|
207
276
|
rerunCount,
|
|
277
|
+
timing: buildTiming(runStartedAt),
|
|
208
278
|
});
|
|
209
279
|
}
|
|
280
|
+
appendVerifyTimeline(taskDir, {
|
|
281
|
+
event: 'verify_completed',
|
|
282
|
+
verdict: verifyResultJson.verdict,
|
|
283
|
+
verifierRunId,
|
|
284
|
+
finalMode: finalPack?.meta?.mode || null,
|
|
285
|
+
finalEstimatedTokens: finalPack?.meta?.estimatedTokens || null,
|
|
286
|
+
timing: buildTiming(runStartedAt),
|
|
287
|
+
});
|
|
210
288
|
appendOrchestrationLog(taskDir, `external CLI verifier completed via ${verifierConfig.provider}; verdict=${verifyResultJson.verdict}; runId=${verifierRunId}`);
|
|
211
289
|
console.log(`Verifier run completed for ${taskId}: ${verifyResultJson.verdict}`);
|
|
212
290
|
console.log(`- verifierRunId: ${verifierRunId}`);
|
|
@@ -216,6 +294,34 @@ async function runMain() {
|
|
|
216
294
|
}
|
|
217
295
|
}
|
|
218
296
|
|
|
297
|
+
function buildTiming(startedAt, completedAt = new Date()) {
|
|
298
|
+
return {
|
|
299
|
+
startedAt: startedAt.toISOString(),
|
|
300
|
+
completedAt: completedAt.toISOString(),
|
|
301
|
+
durationMs: Math.max(0, completedAt.getTime() - startedAt.getTime()),
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function appendVerifyTimeline(taskDir, event) {
|
|
306
|
+
const timelinePath = path.join(taskDir, 'verify-timeline.json');
|
|
307
|
+
let existing = [];
|
|
308
|
+
if (fs.existsSync(timelinePath)) {
|
|
309
|
+
try {
|
|
310
|
+
const parsed = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
|
|
311
|
+
if (Array.isArray(parsed)) {
|
|
312
|
+
existing = parsed;
|
|
313
|
+
}
|
|
314
|
+
} catch {
|
|
315
|
+
existing = [];
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
existing.push({
|
|
319
|
+
at: new Date().toISOString(),
|
|
320
|
+
...event,
|
|
321
|
+
});
|
|
322
|
+
writeTaskFile(taskDir, 'verify-timeline.json', JSON.stringify(existing, null, 2));
|
|
323
|
+
}
|
|
324
|
+
|
|
219
325
|
function buildAttemptRecord(packMeta, outcome) {
|
|
220
326
|
return {
|
|
221
327
|
mode: packMeta.mode,
|