@besales/ops-framework 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.22
4
+
5
+ - Added a compact `standard_plus` LLM context mode between `standard` and `strict` so near-cap Check/Verify runs avoid full strict context when compact evidence is sufficient.
6
+ - Refreshed `task-manifest.json` after successful Check and cache hits so `lastCheckResult` reflects the final current verdict instead of stale `return_to_plan` results.
7
+
8
+ ## 0.1.21
9
+
10
+ - Added `verify-timeline.json` telemetry for Verify runs, including deterministic blocks, LLM input sizing, context-mode escalation, provider duration/failure and final verdict.
11
+ - Prevented Verify artifact-growth loops by compacting generated review/log artifacts (`verify.md`, `check.md`, `check-resolution.md`, `orchestration-log.md`) even in strict verifier context while preserving full execution evidence.
12
+ - Compacted `execution-ledger.json` in verifier input and marked files mentioned in `execution.md` so task-scope files do not remain in `unrelatedDirtyFiles`.
13
+
3
14
  ## 0.1.20
4
15
 
5
16
  - Added deterministic task Check gates for schema/migration plans: a real disposable/scratch database apply path is required before external Check, and Verify/Human Gate evidence must show a successful apply/migrate/psql run rather than static SQL review only.
@@ -33,6 +33,11 @@ export function buildExecutionLedger({
33
33
  }) {
34
34
  const git = collectGitExecutionState({ repoRoot, taskDir });
35
35
  const taskArtifacts = listTaskArtifacts(taskDir);
36
+ const executionMentionedFiles = readExecutionMentionedFiles(taskDir);
37
+ const changedFiles = git.changedFiles.map((file) => ({
38
+ ...file,
39
+ isExecutionMentioned: executionMentionedFiles.has(file.path),
40
+ }));
36
41
 
37
42
  return {
38
43
  schemaVersion: 1,
@@ -42,8 +47,8 @@ export function buildExecutionLedger({
42
47
  executionSha,
43
48
  git: {
44
49
  taskRelativePath: git.taskRelativePath,
45
- changedFiles: compactLedgerFiles(git.changedFiles),
46
- unrelatedDirtyFiles: compactLedgerFiles(git.changedFiles.filter((file) => !file.isTaskArtifact && !file.isOpsFrameworkFile)),
50
+ changedFiles: compactLedgerFiles(changedFiles),
51
+ unrelatedDirtyFiles: compactLedgerFiles(changedFiles.filter((file) => !file.isTaskArtifact && !file.isOpsFrameworkFile && !file.isExecutionMentioned)),
47
52
  },
48
53
  taskArtifacts,
49
54
  notes: [
@@ -59,6 +64,7 @@ function compactLedgerFiles(files) {
59
64
  status: file.status,
60
65
  isTaskArtifact: file.isTaskArtifact,
61
66
  isOpsFrameworkFile: file.isOpsFrameworkFile,
67
+ isExecutionMentioned: Boolean(file.isExecutionMentioned),
62
68
  }));
63
69
  }
64
70
 
@@ -145,6 +151,20 @@ function listTaskArtifacts(taskDir) {
145
151
  }));
146
152
  }
147
153
 
154
+ function readExecutionMentionedFiles(taskDir) {
155
+ const executionPath = path.join(taskDir, 'execution.md');
156
+ if (!fs.existsSync(executionPath)) {
157
+ return new Set();
158
+ }
159
+ const content = fs.readFileSync(executionPath, 'utf8');
160
+ const refs = new Set();
161
+ const pathPattern = /`([^`\n]+\/[^`\n]+)`/g;
162
+ for (const match of content.matchAll(pathPattern)) {
163
+ refs.add(normalizePath(match[1].trim()));
164
+ }
165
+ return refs;
166
+ }
167
+
148
168
  function runGitLines(repoRoot, args) {
149
169
  const result = spawnSync('git', args, {
150
170
  cwd: repoRoot,
@@ -1,5 +1,10 @@
1
1
  import { describe, expect, it } from 'vitest';
2
+ import fs from 'node:fs';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
5
+ import { spawnSync } from 'node:child_process';
2
6
  import {
7
+ buildExecutionLedger,
3
8
  mergeChangedFiles,
4
9
  parseGitStatusLine,
5
10
  } from './execution-ledger-utils.mjs';
@@ -71,4 +76,59 @@ describe('execution ledger utils', () => {
71
76
  }),
72
77
  ]);
73
78
  });
79
+
80
+ it('does not classify execution-mentioned files as unrelated dirty files', () => {
81
+ const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ops-ledger-repo-'));
82
+ run(repoRoot, ['init']);
83
+ run(repoRoot, ['config', 'user.email', 'test@example.com']);
84
+ run(repoRoot, ['config', 'user.name', 'Test User']);
85
+ fs.mkdirSync(path.join(repoRoot, 'apps', 'api'), { recursive: true });
86
+ fs.mkdirSync(path.join(repoRoot, 'docs'), { recursive: true });
87
+ fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{}\n');
88
+ fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'source\n');
89
+ run(repoRoot, ['add', '.']);
90
+ run(repoRoot, ['commit', '-m', 'initial']);
91
+
92
+ fs.writeFileSync(path.join(repoRoot, 'apps', 'api', 'package.json'), '{"type":"module"}\n');
93
+ fs.writeFileSync(path.join(repoRoot, 'docs', 'source.md'), 'dirty source\n');
94
+
95
+ const taskDir = path.join(repoRoot, 'ops', 'agent-pipeline', 'tasks', 'TASK-999-example');
96
+ fs.mkdirSync(taskDir, { recursive: true });
97
+ fs.writeFileSync(path.join(taskDir, 'execution.md'), [
98
+ '# Execution',
99
+ '',
100
+ '## Измененные файлы',
101
+ '',
102
+ '| File | Change summary | Planned item / reason |',
103
+ '| --- | --- | --- |',
104
+ '| `apps/api/package.json` | package setup | planned |',
105
+ ].join('\n'));
106
+
107
+ const ledger = buildExecutionLedger({
108
+ taskId: 'TASK-999-example',
109
+ taskDir,
110
+ repoRoot,
111
+ planSha: 'sha256:plan',
112
+ executionSha: 'sha256:execution',
113
+ });
114
+
115
+ expect(ledger.git.changedFiles).toContainEqual(expect.objectContaining({
116
+ path: 'apps/api/package.json',
117
+ isExecutionMentioned: true,
118
+ }));
119
+ expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).not.toContain('apps/api/package.json');
120
+ expect(ledger.git.unrelatedDirtyFiles.map((file) => file.path)).toContain('docs/source.md');
121
+
122
+ fs.rmSync(repoRoot, { recursive: true, force: true });
123
+ });
74
124
  });
125
+
126
+ function run(cwd, args) {
127
+ const result = spawnSync('git', args, {
128
+ cwd,
129
+ encoding: 'utf8',
130
+ });
131
+ if (result.status !== 0) {
132
+ throw new Error(`git ${args.join(' ')} failed: ${result.stdout}${result.stderr}`);
133
+ }
134
+ }
@@ -8,10 +8,11 @@ import {
8
8
  renderRelevantPlaybooks,
9
9
  } from './check-context-utils.mjs';
10
10
 
11
- export const LLM_CONTEXT_MODES = ['fast', 'standard', 'strict'];
11
+ export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict'];
12
12
  export const LLM_CONTEXT_CAPS = {
13
13
  fast: 8000,
14
14
  standard: 20000,
15
+ standard_plus: 26000,
15
16
  strict: 45000,
16
17
  };
17
18
 
@@ -20,6 +21,7 @@ const PACK_CAP_SAFETY_MULTIPLIER = 1.15;
20
21
  const MEMORY_MAX_CHARS = {
21
22
  fast: 3000,
22
23
  standard: 3500,
24
+ standard_plus: 4500,
23
25
  strict: Infinity,
24
26
  };
25
27
 
@@ -163,6 +165,9 @@ export function nextLlmContextMode(mode) {
163
165
  return 'standard';
164
166
  }
165
167
  if (mode === 'standard') {
168
+ return 'standard_plus';
169
+ }
170
+ if (mode === 'standard_plus') {
166
171
  return 'strict';
167
172
  }
168
173
  return null;
@@ -215,8 +220,8 @@ export function buildCheckerLlmInputPack({
215
220
  fullContextAvailableViaStrict: selectedMode !== 'strict',
216
221
  contextInsufficientFallback: selectedMode === 'strict' ? 'stop_and_report' : `rerun_${nextLlmContextMode(selectedMode)}`,
217
222
  },
218
- checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000 }),
219
- checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600 }),
223
+ checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000, standard_plus: 5600 }),
224
+ checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600, standard_plus: 6200 }),
220
225
  relevantPlaybooks: selectedMode === 'strict'
221
226
  ? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
222
227
  : renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
@@ -254,23 +259,27 @@ export function buildVerifierLlmInputPack({
254
259
  }) {
255
260
  const selectedMode = normalizeLlmContextMode(mode) || 'standard';
256
261
  const taskArtifacts = selectedMode === 'strict'
257
- ? readArtifacts(taskDir, [
258
- 'brief.md',
259
- 'research.md',
260
- 'plan.md',
261
- 'task-manifest.json',
262
- 'check.result.json',
263
- 'check.md',
264
- 'check-resolution.md',
265
- 'human-gate-summary.md',
266
- 'execution.md',
267
- 'execution-ledger.json',
268
- 'verify.md',
269
- 'status.md',
270
- 'feedback.md',
271
- 'execution-feedback.md',
272
- 'orchestration-log.md',
273
- ], 'full')
262
+ ? {
263
+ 'brief.md': readTaskFile(taskDir, 'brief.md'),
264
+ 'research.md': readTaskFile(taskDir, 'research.md'),
265
+ 'plan.md': readTaskFile(taskDir, 'plan.md'),
266
+ 'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
267
+ 'check.result.json': readTaskFile(taskDir, 'check.result.json'),
268
+ 'check.md': compactCheckMarkdown({
269
+ checkMarkdown: readTaskFile(taskDir, 'check.md'),
270
+ checkResult: readOptionalJson(taskDir, 'check.result.json'),
271
+ mode: 'standard',
272
+ }),
273
+ 'check-resolution.md': compactArtifact(taskDir, 'check-resolution.md', 'standard', ['structured resolution', 'root cause', 'resolution']),
274
+ 'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
275
+ 'execution.md': readTaskFile(taskDir, 'execution.md'),
276
+ 'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
277
+ 'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'standard'),
278
+ 'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
279
+ 'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
280
+ 'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
281
+ 'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), 'standard'),
282
+ }
274
283
  : {
275
284
  'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
276
285
  'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
@@ -285,8 +294,8 @@ export function buildVerifierLlmInputPack({
285
294
  'check-resolution.md': truncateMiddle(readTaskFile(taskDir, 'check-resolution.md'), charLimitForMode(selectedMode, 1500, 3500)),
286
295
  'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
287
296
  'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
288
- 'execution-ledger.json': readTaskFile(taskDir, 'execution-ledger.json'),
289
- 'verify.md': compactArtifact(taskDir, 'verify.md', selectedMode, ['verdict', 'findings', 'residual risks', 'recommended next step']),
297
+ 'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
298
+ 'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), selectedMode),
290
299
  'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
291
300
  'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
292
301
  'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),
@@ -483,6 +492,72 @@ function compactOrchestrationLog(log, mode) {
483
492
  return markCompacted('orchestration-log.md', log, compacted);
484
493
  }
485
494
 
495
+ function compactVerifierMarkdown(verifyMarkdown, verifyResult, mode) {
496
+ if (!verifyMarkdown.trim()) {
497
+ return '';
498
+ }
499
+ const findings = Array.isArray(verifyResult?.findings) ? verifyResult.findings : [];
500
+ const lines = [
501
+ '# Verify compact excerpt',
502
+ '',
503
+ `Verdict: ${verifyResult?.verdict || 'unknown'}`,
504
+ `Verifier run: ${verifyResult?.verifierRunId || 'unknown'}`,
505
+ `Findings: ${findings.length}`,
506
+ '',
507
+ compactMarkdownSections(verifyMarkdown, ['verdict', 'findings', 'residual risks', 'recommended next step'], charLimitForMode(mode, 1600, 3200)),
508
+ '',
509
+ ...findings.map((finding) => [
510
+ `## ${finding.id || 'finding'}`,
511
+ `- Severity: ${finding.severity || 'unknown'}`,
512
+ `- Category: ${finding.claimCategory || 'unknown'}`,
513
+ `- Affected artifacts: ${truncateEnd(JSON.stringify(finding.affectedArtifacts || []), 250)}`,
514
+ '- Evidence refs:',
515
+ ...formatRefs(finding.evidenceRefs || [], 220),
516
+ `- Claim: ${truncateEnd(finding.claim || '', 700)}`,
517
+ `- Expected correction: ${truncateEnd(finding.expectedCorrection || '', 700)}`,
518
+ ].join('\n')),
519
+ ];
520
+ return markCompacted('verify.md', verifyMarkdown, lines.join('\n').trim());
521
+ }
522
+
523
+ function compactExecutionLedger(ledger, mode) {
524
+ if (!ledger || typeof ledger !== 'object' || Array.isArray(ledger)) {
525
+ return '{}';
526
+ }
527
+ const changedFiles = Array.isArray(ledger.git?.changedFiles) ? ledger.git.changedFiles : [];
528
+ const unrelatedDirtyFiles = Array.isArray(ledger.git?.unrelatedDirtyFiles) ? ledger.git.unrelatedDirtyFiles : [];
529
+ const limit = mode === 'fast' ? 40 : mode === 'standard' ? 90 : mode === 'standard_plus' ? 120 : 160;
530
+ const compact = {
531
+ schemaVersion: ledger.schemaVersion,
532
+ taskId: ledger.taskId,
533
+ createdAt: ledger.createdAt,
534
+ planSha: ledger.planSha,
535
+ executionSha: ledger.executionSha,
536
+ git: {
537
+ taskRelativePath: ledger.git?.taskRelativePath || null,
538
+ changedFileCount: changedFiles.length,
539
+ unrelatedDirtyFileCount: unrelatedDirtyFiles.length,
540
+ changedFiles: changedFiles.slice(0, limit).map(compactLedgerFile),
541
+ unrelatedDirtyFiles: unrelatedDirtyFiles.slice(0, limit).map(compactLedgerFile),
542
+ truncatedChangedFiles: Math.max(0, changedFiles.length - limit),
543
+ truncatedUnrelatedDirtyFiles: Math.max(0, unrelatedDirtyFiles.length - limit),
544
+ },
545
+ taskArtifacts: Array.isArray(ledger.taskArtifacts) ? ledger.taskArtifacts : [],
546
+ notes: ledger.notes || [],
547
+ };
548
+ return markCompacted('execution-ledger.json', JSON.stringify(ledger, null, 2), JSON.stringify(compact, null, 2));
549
+ }
550
+
551
+ function compactLedgerFile(file) {
552
+ return {
553
+ path: file.path,
554
+ status: file.status,
555
+ isTaskArtifact: Boolean(file.isTaskArtifact),
556
+ isOpsFrameworkFile: Boolean(file.isOpsFrameworkFile),
557
+ isExecutionMentioned: Boolean(file.isExecutionMentioned),
558
+ };
559
+ }
560
+
486
561
  function compactProjectMemory(projectMemory, mode) {
487
562
  if (mode === 'strict') {
488
563
  return projectMemory;
@@ -510,7 +585,11 @@ function compactGeneratedMarkdown(fileName, content, mode, limits) {
510
585
  if (mode === 'strict' || !content) {
511
586
  return content;
512
587
  }
513
- const limit = mode === 'fast' ? limits.fast : limits.standard;
588
+ const limit = mode === 'fast'
589
+ ? limits.fast
590
+ : mode === 'standard_plus'
591
+ ? limits.standard_plus || Math.ceil(limits.standard * 1.35)
592
+ : limits.standard;
514
593
  return markCompacted(fileName, content, truncateMiddle(content, limit));
515
594
  }
516
595
 
@@ -637,7 +716,13 @@ function isProtectedSection(value) {
637
716
  }
638
717
 
639
718
  function charLimitForMode(mode, fastChars, standardChars) {
640
- return mode === 'fast' ? fastChars : standardChars;
719
+ if (mode === 'fast') {
720
+ return fastChars;
721
+ }
722
+ if (mode === 'standard_plus') {
723
+ return Math.ceil(standardChars * 1.25);
724
+ }
725
+ return standardChars;
641
726
  }
642
727
 
643
728
  function readOptionalJson(taskDir, fileName) {
@@ -62,9 +62,10 @@ describe('llm input pack utilities', () => {
62
62
  expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
63
63
  });
64
64
 
65
- it('keeps full artifacts in strict verifier pack', () => {
66
- const taskDir = createTask({ orchestrationEvents: 8 });
65
+ it('compacts generated review artifacts in strict verifier pack', () => {
66
+ const taskDir = createTask({ orchestrationEvents: 80 });
67
67
  const fullLog = fs.readFileSync(path.join(taskDir, 'orchestration-log.md'), 'utf8');
68
+ const fullExecution = fs.readFileSync(path.join(taskDir, 'execution.md'), 'utf8');
68
69
  const pack = buildVerifierLlmInputPack({
69
70
  taskDir,
70
71
  taskId: 'TASK-999-token-pack',
@@ -74,17 +75,49 @@ describe('llm input pack utilities', () => {
74
75
  mode: 'strict',
75
76
  });
76
77
 
77
- expect(pack.input.taskArtifacts['orchestration-log.md']).toBe(fullLog);
78
- expect(pack.meta.compactedArtifacts).not.toContain('orchestration-log.md');
78
+ expect(pack.input.taskArtifacts['execution.md']).toBe(fullExecution);
79
+ expect(pack.input.taskArtifacts['orchestration-log.md']).toContain('# Orchestration Log Compact');
80
+ expect(pack.input.taskArtifacts['orchestration-log.md'].length).toBeLessThan(fullLog.length);
81
+ expect(pack.input.taskArtifacts['verify.md']).toContain('# Verify compact excerpt');
82
+ expect(pack.meta.compactedArtifacts).toContain('orchestration-log.md');
83
+ expect(pack.meta.compactedArtifacts).toContain('verify.md');
79
84
  });
80
85
 
81
86
  it('builds bounded fallback mode sequence for context insufficient results', () => {
82
- expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'strict']);
83
- expect(buildContextModeSequence('standard')).toEqual(['standard', 'strict']);
87
+ expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'standard_plus', 'strict']);
88
+ expect(buildContextModeSequence('standard')).toEqual(['standard', 'standard_plus', 'strict']);
89
+ expect(buildContextModeSequence('standard_plus')).toEqual(['standard_plus', 'strict']);
84
90
  expect(buildContextModeSequence('strict')).toEqual(['strict']);
85
91
  expect(isContextInsufficientResult({ verdict: 'context_insufficient' })).toBe(true);
86
92
  });
87
93
 
94
+ it('provides a compact standard_plus check mode before strict', () => {
95
+ const taskDir = createTask();
96
+ const pack = buildCheckerLlmInputPack({
97
+ taskDir,
98
+ taskId: 'TASK-999-token-pack',
99
+ checkerPromptSha: 'sha256:test',
100
+ cacheKey: { test: true },
101
+ checkContext: {
102
+ planSha: 'sha256:plan',
103
+ memorySha: 'sha256:memory',
104
+ riskProfile: 'high',
105
+ riskTriggers: ['source-sync-provider', 'prisma-schema'],
106
+ },
107
+ checkEvidence: '# Evidence\n\nok\n'.repeat(900),
108
+ checkerContextPack: '# Checker Context Pack\n\nok\n'.repeat(900),
109
+ taskManifest: '{}',
110
+ projectMemory: [],
111
+ mode: 'standard_plus',
112
+ });
113
+
114
+ expect(pack.meta.mode).toBe('standard_plus');
115
+ expect(pack.meta.capTokens).toBe(26000);
116
+ expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
117
+ expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
118
+ expect(pack.meta.compactedArtifacts).toContain('plan.md');
119
+ });
120
+
88
121
  it('preserves protected verification sections when compacting long plans', () => {
89
122
  const taskDir = createTask();
90
123
  const longPlan = [
@@ -246,6 +279,8 @@ function createTask({ orchestrationEvents = 40 } = {}) {
246
279
  write(taskDir, 'status.md', '# Status\n\n## Текущий этап\n\nverify\n\n## Следующий шаг\n\nRun verify.');
247
280
  write(taskDir, 'check.result.json', JSON.stringify({ verdict: 'ready_for_human_gate', findings: [] }, null, 2));
248
281
  write(taskDir, 'check.md', '# Check\n\n## Итоговая оценка\n\nReady.');
282
+ write(taskDir, 'verify.result.json', JSON.stringify({ verdict: 'return_to_execute', findings: [] }, null, 2));
283
+ write(taskDir, 'verify.md', '# Verify\n\n## verdict\n\nreturn_to_execute\n\n## findings\n\nPrevious finding.');
249
284
  write(taskDir, 'execution-ledger.json', JSON.stringify({ git: { changedFiles: [] } }, null, 2));
250
285
  write(taskDir, 'task-manifest.json', JSON.stringify({ context: { riskTriggers: ['panel-ui'] } }, null, 2));
251
286
  write(taskDir, 'orchestration-log.md', [
package/bin/run-check.mjs CHANGED
@@ -226,6 +226,7 @@ async function runMain() {
226
226
  rerunCount,
227
227
  timing: buildTiming(runStartedAt),
228
228
  });
229
+ refreshTaskManifestAfterCheck(taskDir);
229
230
  console.log(`Checker cache hit for ${taskId}: ${cacheKeySha}`);
230
231
  return;
231
232
  }
@@ -329,12 +330,22 @@ async function runMain() {
329
330
  rerunCount,
330
331
  timing: buildTiming(runStartedAt),
331
332
  });
333
+ refreshTaskManifestAfterCheck(taskDir);
332
334
  runValidator(taskArg);
333
335
  console.log(`Checker run completed for ${taskId}: ${providerOutput.checkResultJson?.verdict}`);
334
336
  console.log(`- finalLlmInputMode: ${promptPayload.pack.meta.mode}`);
335
337
  console.log(`- finalEstimatedInputTokens: ${promptPayload.pack.meta.estimatedTokens}`);
336
338
  }
337
339
 
340
+ function refreshTaskManifestAfterCheck(taskDir) {
341
+ const manifest = buildTaskManifest({ taskDir });
342
+ writeTaskManifest(taskDir, manifest);
343
+ appendCheckTimeline(taskDir, {
344
+ event: 'task_manifest_refreshed_after_check',
345
+ lastCheckResult: manifest.lastCheckResult,
346
+ });
347
+ }
348
+
338
349
  function appendCheckTimeline(taskDir, event) {
339
350
  const timelinePath = path.join(taskDir, 'check-timeline.json');
340
351
  let existing = [];
@@ -45,6 +45,13 @@ async function runMain() {
45
45
  const taskDir = resolveTaskDir(taskArg);
46
46
  const taskId = path.basename(taskDir);
47
47
  const verifierConfig = resolveVerifierConfig(args);
48
+ const runStartedAt = new Date();
49
+ appendVerifyTimeline(taskDir, {
50
+ event: 'verify_started',
51
+ mode: verifierConfig.mode,
52
+ provider: verifierConfig.provider,
53
+ model: verifierConfig.model,
54
+ });
48
55
  const planSha = hashTaskMarkdown(taskDir, 'plan.md');
49
56
  const executionSha = hashTaskMarkdown(taskDir, 'execution.md');
50
57
  const taskManifest = readOptionalJson(taskDir, 'task-manifest.json');
@@ -66,6 +73,12 @@ async function runMain() {
66
73
  executionSha,
67
74
  evidenceIssues,
68
75
  });
76
+ appendVerifyTimeline(taskDir, {
77
+ event: 'deterministic_preverify_blocked',
78
+ verdict: 'return_to_execute',
79
+ issues: evidenceIssues.map((issue) => issue.message),
80
+ timing: buildTiming(runStartedAt),
81
+ });
69
82
  console.log(`Verifier preflight blocked ${taskId}: return_to_execute`);
70
83
  console.log(`- evidenceIssues: ${evidenceIssues.length}`);
71
84
  return;
@@ -79,6 +92,11 @@ async function runMain() {
79
92
  planSha,
80
93
  executionSha,
81
94
  });
95
+ appendVerifyTimeline(taskDir, {
96
+ event: 'internal_supervisor_completed',
97
+ verdict: 'pass_with_notes',
98
+ timing: buildTiming(runStartedAt),
99
+ });
82
100
  console.log(`Internal supervisor Verify artifact written for ${taskId}: pass_with_notes`);
83
101
  return;
84
102
  }
@@ -109,12 +127,26 @@ async function runMain() {
109
127
  contextMode,
110
128
  });
111
129
  finalPack = promptPayload.pack;
130
+ appendVerifyTimeline(taskDir, {
131
+ event: 'llm_input_built',
132
+ contextMode,
133
+ verifierRunId,
134
+ packMeta: promptPayload.pack.meta,
135
+ timing: buildTiming(runStartedAt),
136
+ });
112
137
  console.log(`Verifier LLM input for ${taskId}`);
113
138
  for (const line of summarizePackForConsole(promptPayload.pack)) {
114
139
  console.log(line);
115
140
  }
116
141
  if (promptPayload.pack.meta.overCap && contextMode !== 'strict') {
117
142
  llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, 'skipped_over_cap'));
143
+ appendVerifyTimeline(taskDir, {
144
+ event: 'llm_input_over_cap_escalating',
145
+ contextMode,
146
+ verifierRunId,
147
+ packMeta: promptPayload.pack.meta,
148
+ timing: buildTiming(runStartedAt),
149
+ });
118
150
  appendOrchestrationLog(taskDir, `verifier LLM input exceeded ${contextMode} cap; rerunning pack builder with expanded context`);
119
151
  continue;
120
152
  }
@@ -131,6 +163,13 @@ async function runMain() {
131
163
  message: `Strict LLM input pack exceeds cap: estimatedTokens=${promptPayload.pack.meta.estimatedTokens}, capTokens=${promptPayload.pack.meta.capTokens}`,
132
164
  rawOutput: null,
133
165
  });
166
+ appendVerifyTimeline(taskDir, {
167
+ event: 'context_overflow',
168
+ contextMode,
169
+ verifierRunId,
170
+ packMeta: promptPayload.pack.meta,
171
+ timing: buildTiming(runStartedAt),
172
+ });
134
173
  recordLlmInputUsage({
135
174
  taskDir,
136
175
  stage: 'verify',
@@ -143,6 +182,17 @@ async function runMain() {
143
182
  }
144
183
 
145
184
  try {
185
+ const providerStartedAt = new Date();
186
+ appendVerifyTimeline(taskDir, {
187
+ event: 'provider_started',
188
+ provider: verifierConfig.provider,
189
+ model: verifierConfig.model,
190
+ reasoningEffort: verifierConfig.reasoningEffort,
191
+ contextMode,
192
+ verifierRunId,
193
+ packMeta: promptPayload.pack.meta,
194
+ timing: buildTiming(runStartedAt),
195
+ });
146
196
  output = await runExternalCliChecker({
147
197
  providerName: verifierConfig.provider,
148
198
  providerConfig: verifierConfig.providerConfig,
@@ -151,6 +201,15 @@ async function runMain() {
151
201
  prompt: promptPayload.prompt,
152
202
  cwd: repoRoot,
153
203
  });
204
+ appendVerifyTimeline(taskDir, {
205
+ event: 'provider_completed',
206
+ provider: verifierConfig.provider,
207
+ model: verifierConfig.model,
208
+ contextMode,
209
+ verifierRunId,
210
+ providerTiming: buildTiming(providerStartedAt),
211
+ timing: buildTiming(runStartedAt),
212
+ });
154
213
  } catch (error) {
155
214
  writeVerifierFailure({
156
215
  taskDir,
@@ -163,6 +222,16 @@ async function runMain() {
163
222
  message: error.message,
164
223
  rawOutput: error.rawOutput || null,
165
224
  });
225
+ appendVerifyTimeline(taskDir, {
226
+ event: 'provider_failed',
227
+ provider: verifierConfig.provider,
228
+ model: verifierConfig.model,
229
+ contextMode,
230
+ verifierRunId,
231
+ failureReason: error.failureReason || 'unknown',
232
+ message: error.message,
233
+ timing: buildTiming(runStartedAt),
234
+ });
166
235
  llmInputAttempts.push(buildAttemptRecord(promptPayload.pack.meta, `provider_failed:${error.failureReason || 'unknown'}`));
167
236
  recordLlmInputUsage({
168
237
  taskDir,
@@ -205,8 +274,17 @@ async function runMain() {
205
274
  packMeta: finalPack.meta,
206
275
  attempts: llmInputAttempts,
207
276
  rerunCount,
277
+ timing: buildTiming(runStartedAt),
208
278
  });
209
279
  }
280
+ appendVerifyTimeline(taskDir, {
281
+ event: 'verify_completed',
282
+ verdict: verifyResultJson.verdict,
283
+ verifierRunId,
284
+ finalMode: finalPack?.meta?.mode || null,
285
+ finalEstimatedTokens: finalPack?.meta?.estimatedTokens || null,
286
+ timing: buildTiming(runStartedAt),
287
+ });
210
288
  appendOrchestrationLog(taskDir, `external CLI verifier completed via ${verifierConfig.provider}; verdict=${verifyResultJson.verdict}; runId=${verifierRunId}`);
211
289
  console.log(`Verifier run completed for ${taskId}: ${verifyResultJson.verdict}`);
212
290
  console.log(`- verifierRunId: ${verifierRunId}`);
@@ -216,6 +294,34 @@ async function runMain() {
216
294
  }
217
295
  }
218
296
 
297
+ function buildTiming(startedAt, completedAt = new Date()) {
298
+ return {
299
+ startedAt: startedAt.toISOString(),
300
+ completedAt: completedAt.toISOString(),
301
+ durationMs: Math.max(0, completedAt.getTime() - startedAt.getTime()),
302
+ };
303
+ }
304
+
305
+ function appendVerifyTimeline(taskDir, event) {
306
+ const timelinePath = path.join(taskDir, 'verify-timeline.json');
307
+ let existing = [];
308
+ if (fs.existsSync(timelinePath)) {
309
+ try {
310
+ const parsed = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
311
+ if (Array.isArray(parsed)) {
312
+ existing = parsed;
313
+ }
314
+ } catch {
315
+ existing = [];
316
+ }
317
+ }
318
+ existing.push({
319
+ at: new Date().toISOString(),
320
+ ...event,
321
+ });
322
+ writeTaskFile(taskDir, 'verify-timeline.json', JSON.stringify(existing, null, 2));
323
+ }
324
+
219
325
  function buildAttemptRecord(packMeta, outcome) {
220
326
  return {
221
327
  mode: packMeta.mode,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@besales/ops-framework",
3
- "version": "0.1.20",
3
+ "version": "0.1.22",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "ops-agent": "bin/ops-agent.mjs"