@besales/ops-framework 0.1.20 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.21
|
|
4
|
+
|
|
5
|
+
- Added `verify-timeline.json` telemetry for Verify runs, including deterministic blocks, LLM input sizing, context-mode escalation, provider duration/failure and final verdict.
|
|
6
|
+
- Prevented Verify artifact-growth loops by compacting generated review/log artifacts (`verify.md`, `check.md`, `check-resolution.md`, `orchestration-log.md`) even in strict verifier context while preserving full execution evidence.
|
|
7
|
+
- Compacted `execution-ledger.json` in verifier input and marked files mentioned in `execution.md` so task-scope files do not remain in `unrelatedDirtyFiles`.
|
|
8
|
+
|
|
3
9
|
## 0.1.20
|
|
4
10
|
|
|
5
11
|
- Added deterministic task Check gates for schema/migration plans: a real disposable/scratch database apply path is required before external Check, and Verify/Human Gate evidence must show a successful apply/migrate/psql run rather than static SQL review only.
|
|
@@ -33,6 +33,11 @@ export function buildExecutionLedger({
|
|
|
33
33
|
}) {
|
|
34
34
|
const git = collectGitExecutionState({ repoRoot, taskDir });
|
|
35
35
|
const taskArtifacts = listTaskArtifacts(taskDir);
|
|
36
|
+
const executionMentionedFiles = readExecutionMentionedFiles(taskDir);
|
|
37
|
+
const changedFiles = git.changedFiles.map((file) => ({
|
|
38
|
+
...file,
|
|
39
|
+
isExecutionMentioned: executionMentionedFiles.has(file.path),
|
|
40
|
+
}));
|
|
36
41
|
|
|
37
42
|
return {
|
|
38
43
|
schemaVersion: 1,
|
|
@@ -42,8 +47,8 @@ export function buildExecutionLedger({
|
|
|
42
47
|
executionSha,
|
|
43
48
|
git: {
|
|
44
49
|
taskRelativePath: git.taskRelativePath,
|
|
45
|
-
changedFiles: compactLedgerFiles(
|
|
46
|
-
unrelatedDirtyFiles: compactLedgerFiles(
|
|
50
|
+
changedFiles: compactLedgerFiles(changedFiles),
|
|
51
|
+
unrelatedDirtyFiles: compactLedgerFiles(changedFiles.filter((file) => !file.isTaskArtifact && !file.isOpsFrameworkFile && !file.isExecutionMentioned)),
|
|
47
52
|
},
|
|
48
53
|
taskArtifacts,
|
|
49
54
|
notes: [
|
|
@@ -59,6 +64,7 @@ function compactLedgerFiles(files) {
|
|
|
59
64
|
status: file.status,
|
|
60
65
|
isTaskArtifact: file.isTaskArtifact,
|
|
61
66
|
isOpsFrameworkFile: file.isOpsFrameworkFile,
|
|
67
|
+
isExecutionMentioned: Boolean(file.isExecutionMentioned),
|
|
62
68
|
}));
|
|
63
69
|
}
|
|
64
70
|
|
|
@@ -145,6 +151,20 @@ function listTaskArtifacts(taskDir) {
|
|
|
145
151
|
}));
|
|
146
152
|
}
|
|
147
153
|
|
|
154
|
+
function readExecutionMentionedFiles(taskDir) {
|
|
155
|
+
const executionPath = path.join(taskDir, 'execution.md');
|
|
156
|
+
if (!fs.existsSync(executionPath)) {
|
|
157
|
+
return new Set();
|
|
158
|
+
}
|
|
159
|
+
const content = fs.readFileSync(executionPath, 'utf8');
|
|
160
|
+
const refs = new Set();
|
|
161
|
+
const pathPattern = /`([^`\n]+\/[^`\n]+)`/g;
|
|
162
|
+
for (const match of content.matchAll(pathPattern)) {
|
|
163
|
+
refs.add(normalizePath(match[1].trim()));
|
|
164
|
+
}
|
|
165
|
+
return refs;
|
|
166
|
+
}
|
|
167
|
+
|
|
148
168
|
function runGitLines(repoRoot, args) {
|
|
149
169
|
const result = spawnSync('git', args, {
|
|
150
170
|
cwd: repoRoot,
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { spawnSync } from 'node:child_process';
|
|
2
6
|
import {
|
|
7
|
+
buildExecutionLedger,
|
|
3
8
|
mergeChangedFiles,
|
|
4
9
|
parseGitStatusLine,
|
|
5
10
|
} from './execution-ledger-utils.mjs';
|
|
@@ -71,4 +76,59 @@ describe('execution ledger utils', () => {
|
|
|
71
76
|
}),
|
|
72
77
|
]);
|
|
73
78
|
});
|
|
79
|
+
|
|
80
|
+
it('does not classify execution-mentioned files as unrelated dirty files', () => {
|
|
81
|
+
const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ops-ledger-repo-'));
|
|
82
|
+
run(repoRoot, ['init']);
|
|
83
|
+
run(repoRoot, ['config', 'user.email', 'test@example.com']);
|
|
84
|
+
run(repoRoot, ['config', 'user.name', 'Test User']);
|
|
85
|
+
fs.mkdirSync(path.join(repoRoot, 'apps', 'api'), { recursive: true });
|
|
86
|
+
fs.mkdirSync(path.join(repoRoot, 'docs'), { recursive: true });
|
|
87
|
+
fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{}\n');
|
|
88
|
+
fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'source\n');
|
|
89
|
+
run(repoRoot, ['add', '.']);
|
|
90
|
+
run(repoRoot, ['commit', '-m', 'initial']);
|
|
91
|
+
|
|
92
|
+
fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{"type":"module"}\n');
|
|
93
|
+
fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'dirty source\n');
|
|
94
|
+
|
|
95
|
+
const taskDir = path.join(repoRoot, 'ops', 'agent-pipeline', 'tasks', 'TASK-999-example');
|
|
96
|
+
fs.mkdirSync(taskDir, { recursive: true });
|
|
97
|
+
fs.writeFileSync(path.join(taskDir, 'execution.md'), [
|
|
98
|
+
'# Execution',
|
|
99
|
+
'',
|
|
100
|
+
'## Измененные файлы',
|
|
101
|
+
'',
|
|
102
|
+
'| File | Change summary | Planned item / reason |',
|
|
103
|
+
'| --- | --- | --- |',
|
|
104
|
+
'| `apps/api/package.json` | package setup | planned |',
|
|
105
|
+
].join('\n'));
|
|
106
|
+
|
|
107
|
+
const ledger = buildExecutionLedger({
|
|
108
|
+
taskId: 'TASK-999-example',
|
|
109
|
+
taskDir,
|
|
110
|
+
repoRoot,
|
|
111
|
+
planSha: 'sha256:plan',
|
|
112
|
+
executionSha: 'sha256:execution',
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
expect(ledger.git.changedFiles).toContainEqual(expect.objectContaining({
|
|
116
|
+
path: 'apps/api/package.json',
|
|
117
|
+
isExecutionMentioned: true,
|
|
118
|
+
}));
|
|
119
|
+
expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).not.toContain('apps/api/package.json');
|
|
120
|
+
expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).toContain('docs/source.md');
|
|
121
|
+
|
|
122
|
+
fs.rmSync(repoRoot, { recursive: true, force: true });
|
|
123
|
+
});
|
|
74
124
|
});
|
|
125
|
+
|
|
126
|
+
function run(cwd, args) {
|
|
127
|
+
const result = spawnSync('git', args, {
|
|
128
|
+
cwd,
|
|
129
|
+
encoding: 'utf8',
|
|
130
|
+
});
|
|
131
|
+
if (result.status !== 0) {
|
|
132
|
+
throw new Error(`git ${args.join(' ')} failed: ${result.stdout}${result.stderr}`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -254,23 +254,27 @@ export function buildVerifierLlmInputPack({
|
|
|
254
254
|
}) {
|
|
255
255
|
const selectedMode = normalizeLlmContextMode(mode) || 'standard';
|
|
256
256
|
const taskArtifacts = selectedMode === 'strict'
|
|
257
|
-
?
|
|
258
|
-
'brief.md',
|
|
259
|
-
'research.md',
|
|
260
|
-
'plan.md',
|
|
261
|
-
'task-manifest.json',
|
|
262
|
-
'check.result.json',
|
|
263
|
-
'check.md'
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
'
|
|
269
|
-
'
|
|
270
|
-
'
|
|
271
|
-
'execution-
|
|
272
|
-
'
|
|
273
|
-
|
|
257
|
+
? {
|
|
258
|
+
'brief.md': readTaskFile(taskDir, 'brief.md'),
|
|
259
|
+
'research.md': readTaskFile(taskDir, 'research.md'),
|
|
260
|
+
'plan.md': readTaskFile(taskDir, 'plan.md'),
|
|
261
|
+
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
262
|
+
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
263
|
+
'check.md': compactCheckMarkdown({
|
|
264
|
+
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
265
|
+
checkResult: readOptionalJson(taskDir, 'check.result.json'),
|
|
266
|
+
mode: 'standard',
|
|
267
|
+
}),
|
|
268
|
+
'check-resolution.md': compactArtifact(taskDir, 'check-resolution.md', 'standard', ['structured resolution', 'root cause', 'resolution']),
|
|
269
|
+
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
|
|
270
|
+
'execution.md': readTaskFile(taskDir, 'execution.md'),
|
|
271
|
+
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
|
|
272
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'standard'),
|
|
273
|
+
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
274
|
+
'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
275
|
+
'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
276
|
+
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), 'standard'),
|
|
277
|
+
}
|
|
274
278
|
: {
|
|
275
279
|
'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
|
|
276
280
|
'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
|
|
@@ -285,8 +289,8 @@ export function buildVerifierLlmInputPack({
|
|
|
285
289
|
'check-resolution.md': truncateMiddle(readTaskFile(taskDir, 'check-resolution.md'), charLimitForMode(selectedMode, 1500, 3500)),
|
|
286
290
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
|
|
287
291
|
'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
|
|
288
|
-
'execution-ledger.json':
|
|
289
|
-
'verify.md':
|
|
292
|
+
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
|
|
293
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), selectedMode),
|
|
290
294
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
291
295
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
|
|
292
296
|
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),
|
|
@@ -483,6 +487,72 @@ function compactOrchestrationLog(log, mode) {
|
|
|
483
487
|
return markCompacted('orchestration-log.md', log, compacted);
|
|
484
488
|
}
|
|
485
489
|
|
|
490
|
+
function compactVerifierMarkdown(verifyMarkdown, verifyResult, mode) {
|
|
491
|
+
if (!verifyMarkdown.trim()) {
|
|
492
|
+
return '';
|
|
493
|
+
}
|
|
494
|
+
const findings = Array.isArray(verifyResult?.findings) ? verifyResult.findings : [];
|
|
495
|
+
const lines = [
|
|
496
|
+
'# Verify compact excerpt',
|
|
497
|
+
'',
|
|
498
|
+
`Verdict: ${verifyResult?.verdict || 'unknown'}`,
|
|
499
|
+
`Verifier run: ${verifyResult?.verifierRunId || 'unknown'}`,
|
|
500
|
+
`Findings: ${findings.length}`,
|
|
501
|
+
'',
|
|
502
|
+
compactMarkdownSections(verifyMarkdown, ['verdict', 'findings', 'residual risks', 'recommended next step'], charLimitForMode(mode, 1600, 3200)),
|
|
503
|
+
'',
|
|
504
|
+
...findings.map((finding) => [
|
|
505
|
+
`## ${finding.id || 'finding'}`,
|
|
506
|
+
`- Severity: ${finding.severity || 'unknown'}`,
|
|
507
|
+
`- Category: ${finding.claimCategory || 'unknown'}`,
|
|
508
|
+
`- Affected artifacts: ${truncateEnd(JSON.stringify(finding.affectedArtifacts || []), 250)}`,
|
|
509
|
+
'- Evidence refs:',
|
|
510
|
+
...formatRefs(finding.evidenceRefs || [], 220),
|
|
511
|
+
`- Claim: ${truncateEnd(finding.claim || '', 700)}`,
|
|
512
|
+
`- Expected correction: ${truncateEnd(finding.expectedCorrection || '', 700)}`,
|
|
513
|
+
].join('\n')),
|
|
514
|
+
];
|
|
515
|
+
return markCompacted('verify.md', verifyMarkdown, lines.join('\n').trim());
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function compactExecutionLedger(ledger, mode) {
|
|
519
|
+
if (!ledger || typeof ledger !== 'object' || Array.isArray(ledger)) {
|
|
520
|
+
return '{}';
|
|
521
|
+
}
|
|
522
|
+
const changedFiles = Array.isArray(ledger.git?.changedFiles) ? ledger.git.changedFiles : [];
|
|
523
|
+
const unrelatedDirtyFiles = Array.isArray(ledger.git?.unrelatedDirtyFiles) ? ledger.git.unrelatedDirtyFiles : [];
|
|
524
|
+
const limit = mode === 'fast' ? 40 : mode === 'standard' ? 90 : 160;
|
|
525
|
+
const compact = {
|
|
526
|
+
schemaVersion: ledger.schemaVersion,
|
|
527
|
+
taskId: ledger.taskId,
|
|
528
|
+
createdAt: ledger.createdAt,
|
|
529
|
+
planSha: ledger.planSha,
|
|
530
|
+
executionSha: ledger.executionSha,
|
|
531
|
+
git: {
|
|
532
|
+
taskRelativePath: ledger.git?.taskRelativePath || null,
|
|
533
|
+
changedFileCount: changedFiles.length,
|
|
534
|
+
unrelatedDirtyFileCount: unrelatedDirtyFiles.length,
|
|
535
|
+
changedFiles: changedFiles.slice(0, limit).map(compactLedgerFile),
|
|
536
|
+
unrelatedDirtyFiles: unrelatedDirtyFiles.slice(0, limit).map(compactLedgerFile),
|
|
537
|
+
truncatedChangedFiles: Math.max(0, changedFiles.length - limit),
|
|
538
|
+
truncatedUnrelatedDirtyFiles: Math.max(0, unrelatedDirtyFiles.length - limit),
|
|
539
|
+
},
|
|
540
|
+
taskArtifacts: Array.isArray(ledger.taskArtifacts) ? ledger.taskArtifacts : [],
|
|
541
|
+
notes: ledger.notes || [],
|
|
542
|
+
};
|
|
543
|
+
return markCompacted('execution-ledger.json', JSON.stringify(ledger, null, 2), JSON.stringify(compact, null, 2));
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
function compactLedgerFile(file) {
|
|
547
|
+
return {
|
|
548
|
+
path: file.path,
|
|
549
|
+
status: file.status,
|
|
550
|
+
isTaskArtifact: Boolean(file.isTaskArtifact),
|
|
551
|
+
isOpsFrameworkFile: Boolean(file.isOpsFrameworkFile),
|
|
552
|
+
isExecutionMentioned: Boolean(file.isExecutionMentioned),
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
|
|
486
556
|
function compactProjectMemory(projectMemory, mode) {
|
|
487
557
|
if (mode === 'strict') {
|
|
488
558
|
return projectMemory;
|
|
@@ -62,9 +62,10 @@ describe('llm input pack utilities', () => {
|
|
|
62
62
|
expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
it('
|
|
66
|
-
const taskDir = createTask({ orchestrationEvents:
|
|
65
|
+
it('compacts generated review artifacts in strict verifier pack', () => {
|
|
66
|
+
const taskDir = createTask({ orchestrationEvents: 80 });
|
|
67
67
|
const fullLog = fs.readFileSync(path.join(taskDir, 'orchestration-log.md'), 'utf8');
|
|
68
|
+
const fullExecution = fs.readFileSync(path.join(taskDir, 'execution.md'), 'utf8');
|
|
68
69
|
const pack = buildVerifierLlmInputPack({
|
|
69
70
|
taskDir,
|
|
70
71
|
taskId: 'TASK-999-token-pack',
|
|
@@ -74,8 +75,12 @@ describe('llm input pack utilities', () => {
|
|
|
74
75
|
mode: 'strict',
|
|
75
76
|
});
|
|
76
77
|
|
|
77
|
-
expect(pack.input.taskArtifacts['
|
|
78
|
-
expect(pack.
|
|
78
|
+
expect(pack.input.taskArtifacts['execution.md']).toBe(fullExecution);
|
|
79
|
+
expect(pack.input.taskArtifacts['orchestration-log.md']).toContain('# Orchestration Log Compact');
|
|
80
|
+
expect(pack.input.taskArtifacts['orchestration-log.md'].length).toBeLessThan(fullLog.length);
|
|
81
|
+
expect(pack.input.taskArtifacts['verify.md']).toContain('# Verify compact excerpt');
|
|
82
|
+
expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
|
|
83
|
+
expect(pack.meta.compactedArtifacts).toContain('verify.md');
|
|
79
84
|
});
|
|
80
85
|
|
|
81
86
|
it('builds bounded fallback mode sequence for context insufficient results', () => {
|
|
@@ -246,6 +251,8 @@ function createTask({ orchestrationEvents = 40 } = {}) {
|
|
|
246
251
|
write(taskDir, 'status.md', '# Status\n\n## Текущий этап\n\nverify\n\n## Следующий шаг\n\nRun verify.');
|
|
247
252
|
write(taskDir, 'check.result.json', JSON.stringify({ verdict: 'ready_for_human_gate', findings: [] }, null, 2));
|
|
248
253
|
write(taskDir, 'check.md', '# Check\n\n## Итоговая оценка\n\nReady.');
|
|
254
|
+
write(taskDir, 'verify.result.json', JSON.stringify({ verdict: 'return_to_execute', findings: [] }, null, 2));
|
|
255
|
+
write(taskDir, 'verify.md', '# Verify\n\n## verdict\n\nreturn_to_execute\n\n## findings\n\nPrevious finding.');
|
|
249
256
|
write(taskDir, 'execution-ledger.json', JSON.stringify({ git: { changedFiles: [] } }, null, 2));
|
|
250
257
|
write(taskDir, 'task-manifest.json', JSON.stringify({ context: { riskTriggers: ['panel-ui'] } }, null, 2));
|
|
251
258
|
write(taskDir, 'orchestration-log.md', [
|
package/bin/run-verify.mjs
CHANGED
|
@@ -45,6 +45,13 @@ async function runMain() {
|
|
|
45
45
|
const taskDir = resolveTaskDir(taskArg);
|
|
46
46
|
const taskId = path.basename(taskDir);
|
|
47
47
|
const verifierConfig = resolveVerifierConfig(args);
|
|
48
|
+
const runStartedAt = new Date();
|
|
49
|
+
appendVerifyTimeline(taskDir, {
|
|
50
|
+
event: 'verify_started',
|
|
51
|
+
mode: verifierConfig.mode,
|
|
52
|
+
provider: verifierConfig.provider,
|
|
53
|
+
model: verifierConfig.model,
|
|
54
|
+
});
|
|
48
55
|
const planSha = hashTaskMarkdown(taskDir, 'plan.md');
|
|
49
56
|
const executionSha = hashTaskMarkdown(taskDir, 'execution.md');
|
|
50
57
|
const taskManifest = readOptionalJson(taskDir, 'task-manifest.json');
|
|
@@ -66,6 +73,12 @@ async function runMain() {
|
|
|
66
73
|
executionSha,
|
|
67
74
|
evidenceIssues,
|
|
68
75
|
});
|
|
76
|
+
appendVerifyTimeline(taskDir, {
|
|
77
|
+
event: 'deterministic_preverify_blocked',
|
|
78
|
+
verdict: 'return_to_execute',
|
|
79
|
+
issues: evidenceIssues.map((issue) => issue.message),
|
|
80
|
+
timing: buildTiming(runStartedAt),
|
|
81
|
+
});
|
|
69
82
|
console.log(`Verifier preflight blocked ${taskId}: return_to_execute`);
|
|
70
83
|
console.log(`- evidenceIssues: ${evidenceIssues.length}`);
|
|
71
84
|
return;
|
|
@@ -79,6 +92,11 @@ async function runMain() {
|
|
|
79
92
|
planSha,
|
|
80
93
|
executionSha,
|
|
81
94
|
});
|
|
95
|
+
appendVerifyTimeline(taskDir, {
|
|
96
|
+
event: 'internal_supervisor_completed',
|
|
97
|
+
verdict: 'pass_with_notes',
|
|
98
|
+
timing: buildTiming(runStartedAt),
|
|
99
|
+
});
|
|
82
100
|
console.log(`Internal supervisor Verify artifact written for ${taskId}: pass_with_notes`);
|
|
83
101
|
return;
|
|
84
102
|
}
|
|
@@ -109,12 +127,26 @@ async function runMain() {
|
|
|
109
127
|
contextMode,
|
|
110
128
|
});
|
|
111
129
|
finalPack = promptPayload.pack;
|
|
130
|
+
appendVerifyTimeline(taskDir, {
|
|
131
|
+
event: 'llm_input_built',
|
|
132
|
+
contextMode,
|
|
133
|
+
verifierRunId,
|
|
134
|
+
packMeta: promptPayload.pack.meta,
|
|
135
|
+
timing: buildTiming(runStartedAt),
|
|
136
|
+
});
|
|
112
137
|
console.log(`Verifier LLM input for ${taskId}`);
|
|
113
138
|
for (const line of summarizePackForConsole(promptPayload.pack)) {
|
|
114
139
|
console.log(line);
|
|
115
140
|
}
|
|
116
141
|
if (promptPayload.pack.meta.overCap && contextMode !== 'strict') {
|
|
117
142
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'skipped_over_cap'));
|
|
143
|
+
appendVerifyTimeline(taskDir, {
|
|
144
|
+
event: 'llm_input_over_cap_escalating',
|
|
145
|
+
contextMode,
|
|
146
|
+
verifierRunId,
|
|
147
|
+
packMeta: promptPayload.pack.meta,
|
|
148
|
+
timing: buildTiming(runStartedAt),
|
|
149
|
+
});
|
|
118
150
|
appendOrchestrationLog(taskDir, `verifier LLM input exceeded ${contextMode} cap; rerunning pack builder with expanded context`);
|
|
119
151
|
continue;
|
|
120
152
|
}
|
|
@@ -131,6 +163,13 @@ async function runMain() {
|
|
|
131
163
|
message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
|
|
132
164
|
rawOutput: null,
|
|
133
165
|
});
|
|
166
|
+
appendVerifyTimeline(taskDir, {
|
|
167
|
+
event: 'context_overflow',
|
|
168
|
+
contextMode,
|
|
169
|
+
verifierRunId,
|
|
170
|
+
packMeta: promptPayload.pack.meta,
|
|
171
|
+
timing: buildTiming(runStartedAt),
|
|
172
|
+
});
|
|
134
173
|
recordLlmInputUsage({
|
|
135
174
|
taskDir,
|
|
136
175
|
stage: 'verify',
|
|
@@ -143,6 +182,17 @@ async function runMain() {
|
|
|
143
182
|
}
|
|
144
183
|
|
|
145
184
|
try {
|
|
185
|
+
const providerStartedAt = new Date();
|
|
186
|
+
appendVerifyTimeline(taskDir, {
|
|
187
|
+
event: 'provider_started',
|
|
188
|
+
provider: verifierConfig.provider,
|
|
189
|
+
model: verifierConfig.model,
|
|
190
|
+
reasoningEffort: verifierConfig.reasoningEffort,
|
|
191
|
+
contextMode,
|
|
192
|
+
verifierRunId,
|
|
193
|
+
packMeta: promptPayload.pack.meta,
|
|
194
|
+
timing: buildTiming(runStartedAt),
|
|
195
|
+
});
|
|
146
196
|
output = await runExternalCliChecker({
|
|
147
197
|
providerName: verifierConfig.provider,
|
|
148
198
|
providerConfig: verifierConfig.providerConfig,
|
|
@@ -151,6 +201,15 @@ async function runMain() {
|
|
|
151
201
|
prompt: promptPayload.prompt,
|
|
152
202
|
cwd: repoRoot,
|
|
153
203
|
});
|
|
204
|
+
appendVerifyTimeline(taskDir, {
|
|
205
|
+
event: 'provider_completed',
|
|
206
|
+
provider: verifierConfig.provider,
|
|
207
|
+
model: verifierConfig.model,
|
|
208
|
+
contextMode,
|
|
209
|
+
verifierRunId,
|
|
210
|
+
providerTiming: buildTiming(providerStartedAt),
|
|
211
|
+
timing: buildTiming(runStartedAt),
|
|
212
|
+
});
|
|
154
213
|
} catch (error) {
|
|
155
214
|
writeVerifierFailure({
|
|
156
215
|
taskDir,
|
|
@@ -163,6 +222,16 @@ async function runMain() {
|
|
|
163
222
|
message: error.message,
|
|
164
223
|
rawOutput: error.rawOutput || null,
|
|
165
224
|
});
|
|
225
|
+
appendVerifyTimeline(taskDir, {
|
|
226
|
+
event: 'provider_failed',
|
|
227
|
+
provider: verifierConfig.provider,
|
|
228
|
+
model: verifierConfig.model,
|
|
229
|
+
contextMode,
|
|
230
|
+
verifierRunId,
|
|
231
|
+
failureReason: error.failureReason || 'unknown',
|
|
232
|
+
message: error.message,
|
|
233
|
+
timing: buildTiming(runStartedAt),
|
|
234
|
+
});
|
|
166
235
|
llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${error.failureReason || 'unknown'}`));
|
|
167
236
|
recordLlmInputUsage({
|
|
168
237
|
taskDir,
|
|
@@ -205,8 +274,17 @@ async function runMain() {
|
|
|
205
274
|
packMeta: finalPack.meta,
|
|
206
275
|
attempts: llmInputAttempts,
|
|
207
276
|
rerunCount,
|
|
277
|
+
timing: buildTiming(runStartedAt),
|
|
208
278
|
});
|
|
209
279
|
}
|
|
280
|
+
appendVerifyTimeline(taskDir, {
|
|
281
|
+
event: 'verify_completed',
|
|
282
|
+
verdict: verifyResultJson.verdict,
|
|
283
|
+
verifierRunId,
|
|
284
|
+
finalMode: finalPack?.meta?.mode || null,
|
|
285
|
+
finalEstimatedTokens: finalPack?.meta?.estimatedTokens || null,
|
|
286
|
+
timing: buildTiming(runStartedAt),
|
|
287
|
+
});
|
|
210
288
|
appendOrchestrationLog(taskDir, `external CLI verifier completed via ${verifierConfig.provider}; verdict=${verifyResultJson.verdict}; runId=${verifierRunId}`);
|
|
211
289
|
console.log(`Verifier run completed for ${taskId}: ${verifyResultJson.verdict}`);
|
|
212
290
|
console.log(`- verifierRunId: ${verifierRunId}`);
|
|
@@ -216,6 +294,34 @@ async function runMain() {
|
|
|
216
294
|
}
|
|
217
295
|
}
|
|
218
296
|
|
|
297
|
+
function buildTiming(startedAt, completedAt = new Date()) {
|
|
298
|
+
return {
|
|
299
|
+
startedAt: startedAt.toISOString(),
|
|
300
|
+
completedAt: completedAt.toISOString(),
|
|
301
|
+
durationMs: Math.max(0, completedAt.getTime() - startedAt.getTime()),
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function appendVerifyTimeline(taskDir, event) {
|
|
306
|
+
const timelinePath = path.join(taskDir, 'verify-timeline.json');
|
|
307
|
+
let existing = [];
|
|
308
|
+
if (fs.existsSync(timelinePath)) {
|
|
309
|
+
try {
|
|
310
|
+
const parsed = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
|
|
311
|
+
if (Array.isArray(parsed)) {
|
|
312
|
+
existing = parsed;
|
|
313
|
+
}
|
|
314
|
+
} catch {
|
|
315
|
+
existing = [];
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
existing.push({
|
|
319
|
+
at: new Date().toISOString(),
|
|
320
|
+
...event,
|
|
321
|
+
});
|
|
322
|
+
writeTaskFile(taskDir, 'verify-timeline.json', JSON.stringify(existing, null, 2));
|
|
323
|
+
}
|
|
324
|
+
|
|
219
325
|
function buildAttemptRecord(packMeta, outcome) {
|
|
220
326
|
return {
|
|
221
327
|
mode: packMeta.mode,
|