@weldr/runr 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +216 -0
- package/LICENSE +190 -0
- package/NOTICE +4 -0
- package/README.md +200 -0
- package/dist/cli.js +464 -0
- package/dist/commands/__tests__/report.test.js +202 -0
- package/dist/commands/compare.js +168 -0
- package/dist/commands/doctor.js +124 -0
- package/dist/commands/follow.js +251 -0
- package/dist/commands/gc.js +161 -0
- package/dist/commands/guards-only.js +89 -0
- package/dist/commands/metrics.js +441 -0
- package/dist/commands/orchestrate.js +800 -0
- package/dist/commands/paths.js +31 -0
- package/dist/commands/preflight.js +152 -0
- package/dist/commands/report.js +478 -0
- package/dist/commands/resume.js +149 -0
- package/dist/commands/run.js +538 -0
- package/dist/commands/status.js +189 -0
- package/dist/commands/summarize.js +220 -0
- package/dist/commands/version.js +82 -0
- package/dist/commands/wait.js +170 -0
- package/dist/config/__tests__/presets.test.js +104 -0
- package/dist/config/load.js +66 -0
- package/dist/config/schema.js +160 -0
- package/dist/context/__tests__/artifact.test.js +130 -0
- package/dist/context/__tests__/pack.test.js +191 -0
- package/dist/context/artifact.js +67 -0
- package/dist/context/index.js +2 -0
- package/dist/context/pack.js +273 -0
- package/dist/diagnosis/analyzer.js +678 -0
- package/dist/diagnosis/formatter.js +136 -0
- package/dist/diagnosis/index.js +6 -0
- package/dist/diagnosis/types.js +7 -0
- package/dist/env/__tests__/fingerprint.test.js +116 -0
- package/dist/env/fingerprint.js +111 -0
- package/dist/orchestrator/__tests__/policy.test.js +185 -0
- package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
- package/dist/orchestrator/artifacts.js +405 -0
- package/dist/orchestrator/state-machine.js +646 -0
- package/dist/orchestrator/types.js +88 -0
- package/dist/ownership/normalize.js +45 -0
- package/dist/repo/context.js +90 -0
- package/dist/repo/git.js +13 -0
- package/dist/repo/worktree.js +239 -0
- package/dist/store/run-store.js +107 -0
- package/dist/store/run-utils.js +69 -0
- package/dist/store/runs-root.js +126 -0
- package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
- package/dist/supervisor/__tests__/ownership.test.js +103 -0
- package/dist/supervisor/__tests__/state-machine.test.js +290 -0
- package/dist/supervisor/collision.js +240 -0
- package/dist/supervisor/evidence-gate.js +98 -0
- package/dist/supervisor/planner.js +18 -0
- package/dist/supervisor/runner.js +1562 -0
- package/dist/supervisor/scope-guard.js +55 -0
- package/dist/supervisor/state-machine.js +121 -0
- package/dist/supervisor/verification-policy.js +64 -0
- package/dist/tasks/task-metadata.js +72 -0
- package/dist/types/schemas.js +1 -0
- package/dist/verification/engine.js +49 -0
- package/dist/workers/__tests__/claude.test.js +88 -0
- package/dist/workers/__tests__/codex.test.js +81 -0
- package/dist/workers/claude.js +119 -0
- package/dist/workers/codex.js +162 -0
- package/dist/workers/json.js +22 -0
- package/dist/workers/mock.js +193 -0
- package/dist/workers/prompts.js +98 -0
- package/dist/workers/schemas.js +39 -0
- package/package.json +47 -0
- package/templates/prompts/implementer.md +70 -0
- package/templates/prompts/planner.md +62 -0
- package/templates/prompts/reviewer.md +77 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* runr paths command - Display canonical runr directory paths.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for all runr paths. Use this in scripts,
|
|
5
|
+
* CI, and external tools instead of hardcoding paths.
|
|
6
|
+
*/
|
|
7
|
+
import { getRunrPaths } from '../store/runs-root.js';
|
|
8
|
+
/**
|
|
9
|
+
* Execute the paths command.
|
|
10
|
+
*/
|
|
11
|
+
export async function pathsCommand(options) {
|
|
12
|
+
const paths = getRunrPaths(options.repo);
|
|
13
|
+
if (options.json) {
|
|
14
|
+
console.log(JSON.stringify(paths, null, 2));
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
console.log('Runr Paths');
|
|
18
|
+
console.log('==========');
|
|
19
|
+
console.log('');
|
|
20
|
+
console.log(`repo_root: ${paths.repo_root}`);
|
|
21
|
+
console.log(`runr_root: ${paths.runr_root}`);
|
|
22
|
+
console.log(`runs_dir: ${paths.runs_dir}`);
|
|
23
|
+
console.log(`worktrees_dir: ${paths.worktrees_dir}`);
|
|
24
|
+
console.log(`orchestrations_dir: ${paths.orchestrations_dir}`);
|
|
25
|
+
if (paths.using_legacy) {
|
|
26
|
+
console.log('');
|
|
27
|
+
console.log('\x1b[33m⚠ Using legacy .agent/ directory. Consider migrating to .runr/\x1b[0m');
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
process.exitCode = 0;
|
|
31
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { execa } from 'execa';
|
|
4
|
+
import { buildRepoContext } from '../repo/context.js';
|
|
5
|
+
import { checkLockfiles, checkScope, partitionChangedFiles } from '../supervisor/scope-guard.js';
|
|
6
|
+
import { selectTiersWithReasons } from '../supervisor/verification-policy.js';
|
|
7
|
+
import { pingClaude } from '../workers/claude.js';
|
|
8
|
+
import { pingCodex } from '../workers/codex.js';
|
|
9
|
+
/**
|
|
10
|
+
* Check worker binary exists and can report version.
|
|
11
|
+
* This is cheaper than ping (no API call).
|
|
12
|
+
*/
|
|
13
|
+
async function checkWorkerBinary(name, worker) {
|
|
14
|
+
try {
|
|
15
|
+
const result = await execa(worker.bin, ['--version'], {
|
|
16
|
+
timeout: 5000,
|
|
17
|
+
reject: false
|
|
18
|
+
});
|
|
19
|
+
if (result.exitCode === 0) {
|
|
20
|
+
const version = result.stdout.trim().split('\n')[0];
|
|
21
|
+
return { worker: name, bin: worker.bin, ok: true, version };
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
worker: name,
|
|
25
|
+
bin: worker.bin,
|
|
26
|
+
ok: false,
|
|
27
|
+
error: result.stderr || 'Version check failed'
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
const error = err;
|
|
32
|
+
return {
|
|
33
|
+
worker: name,
|
|
34
|
+
bin: worker.bin,
|
|
35
|
+
ok: false,
|
|
36
|
+
error: error.message.includes('ENOENT')
|
|
37
|
+
? `Command not found: ${worker.bin}`
|
|
38
|
+
: error.message
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
export async function runPreflight(options) {
|
|
43
|
+
const repoContext = await buildRepoContext(options.repoPath, options.runId, options.slug, options.config.repo.default_branch ?? 'main');
|
|
44
|
+
// Partition changed files into env artifacts vs semantic changes
|
|
45
|
+
// Env artifacts (node_modules, .next, .agent, etc.) are allowed noise
|
|
46
|
+
// Built-in patterns ensure agent artifacts never trigger guard failures
|
|
47
|
+
const builtinEnvAllowlist = ['.agent/**', '.agent-worktrees/**'];
|
|
48
|
+
const effectiveEnvAllowlist = Array.from(new Set([...(options.config.scope.env_allowlist ?? []), ...builtinEnvAllowlist]));
|
|
49
|
+
const { env_touched, semantic_changed } = partitionChangedFiles(repoContext.changed_files, effectiveEnvAllowlist);
|
|
50
|
+
const dirty_files = repoContext.changed_files;
|
|
51
|
+
const dirty_is_env_only = dirty_files.length > 0 && semantic_changed.length === 0;
|
|
52
|
+
// "Dirty" means semantic dirty, not env noise
|
|
53
|
+
const dirty = semantic_changed.length > 0;
|
|
54
|
+
// Scope/lockfile checks should only consider semantic changes
|
|
55
|
+
const scopeCheck = checkScope(semantic_changed, options.config.scope.allowlist, options.config.scope.denylist);
|
|
56
|
+
const lockfileCheck = checkLockfiles(semantic_changed, options.config.scope.lockfiles, options.allowDeps);
|
|
57
|
+
const reasons = [];
|
|
58
|
+
if (dirty && !options.allowDirty) {
|
|
59
|
+
reasons.push('dirty_worktree');
|
|
60
|
+
}
|
|
61
|
+
if (!scopeCheck.ok) {
|
|
62
|
+
reasons.push('scope_violation');
|
|
63
|
+
}
|
|
64
|
+
if (!lockfileCheck.ok) {
|
|
65
|
+
reasons.push('lockfile_violation');
|
|
66
|
+
}
|
|
67
|
+
// Check verification.cwd exists if specified
|
|
68
|
+
if (options.config.verification.cwd) {
|
|
69
|
+
const verifyCwd = path.join(options.repoPath, options.config.verification.cwd);
|
|
70
|
+
if (!fs.existsSync(verifyCwd)) {
|
|
71
|
+
reasons.push(`verification_cwd_missing:${options.config.verification.cwd}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Check worker binaries exist (cheaper than ping, catches "command not found")
|
|
75
|
+
const workers = options.config.workers;
|
|
76
|
+
const binaryCheckPromises = [];
|
|
77
|
+
if (workers.claude) {
|
|
78
|
+
binaryCheckPromises.push(checkWorkerBinary('claude', workers.claude));
|
|
79
|
+
}
|
|
80
|
+
if (workers.codex) {
|
|
81
|
+
binaryCheckPromises.push(checkWorkerBinary('codex', workers.codex));
|
|
82
|
+
}
|
|
83
|
+
const binaryResults = await Promise.all(binaryCheckPromises);
|
|
84
|
+
// Add binary failures to reasons
|
|
85
|
+
for (const result of binaryResults) {
|
|
86
|
+
if (!result.ok) {
|
|
87
|
+
reasons.push(`binary_missing:${result.worker}:${result.error}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const binaryStatus = {
|
|
91
|
+
ok: binaryResults.every(r => r.ok),
|
|
92
|
+
results: binaryResults
|
|
93
|
+
};
|
|
94
|
+
// Ping workers to verify auth/connectivity (after binary checks)
|
|
95
|
+
// Skip ping if binaries failed (no point pinging missing binaries)
|
|
96
|
+
let pingStatus;
|
|
97
|
+
if (options.skipPing || !binaryStatus.ok) {
|
|
98
|
+
pingStatus = {
|
|
99
|
+
ok: true,
|
|
100
|
+
skipped: true,
|
|
101
|
+
results: []
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
const pingResults = [];
|
|
106
|
+
// Ping all configured workers in parallel
|
|
107
|
+
const pingPromises = [];
|
|
108
|
+
if (workers.claude) {
|
|
109
|
+
pingPromises.push(pingClaude(workers.claude));
|
|
110
|
+
}
|
|
111
|
+
if (workers.codex) {
|
|
112
|
+
pingPromises.push(pingCodex(workers.codex));
|
|
113
|
+
}
|
|
114
|
+
const results = await Promise.all(pingPromises);
|
|
115
|
+
pingResults.push(...results);
|
|
116
|
+
// Add failures to reasons
|
|
117
|
+
for (const result of pingResults) {
|
|
118
|
+
if (!result.ok) {
|
|
119
|
+
const category = result.category || 'unknown';
|
|
120
|
+
reasons.push(`ping_failed:${result.worker}:${category}`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
pingStatus = {
|
|
124
|
+
ok: pingResults.every(r => r.ok),
|
|
125
|
+
skipped: false,
|
|
126
|
+
results: pingResults
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
const selection = selectTiersWithReasons(options.config.verification, {
|
|
130
|
+
changed_files: repoContext.changed_files,
|
|
131
|
+
risk_level: options.milestoneRiskLevel,
|
|
132
|
+
is_milestone_end: false,
|
|
133
|
+
is_run_end: false
|
|
134
|
+
});
|
|
135
|
+
return {
|
|
136
|
+
repo_context: repoContext,
|
|
137
|
+
guard: {
|
|
138
|
+
ok: reasons.length === 0,
|
|
139
|
+
reasons,
|
|
140
|
+
dirty,
|
|
141
|
+
scope_violations: scopeCheck.violations,
|
|
142
|
+
lockfile_violations: lockfileCheck.violations,
|
|
143
|
+
dirty_files,
|
|
144
|
+
env_touched,
|
|
145
|
+
dirty_is_env_only,
|
|
146
|
+
},
|
|
147
|
+
binary: binaryStatus,
|
|
148
|
+
ping: pingStatus,
|
|
149
|
+
tiers: selection.tiers,
|
|
150
|
+
tier_reasons: selection.reasons
|
|
151
|
+
};
|
|
152
|
+
}
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import readline from 'node:readline';
|
|
4
|
+
import { readContextPackArtifact, formatContextPackStatus } from '../context/index.js';
|
|
5
|
+
import { findLatestRunId } from '../store/run-utils.js';
|
|
6
|
+
import { getRunsRoot } from '../store/runs-root.js';
|
|
7
|
+
// Re-export for backward compatibility with cli.ts
|
|
8
|
+
export { findLatestRunId };
|
|
9
|
+
export async function reportCommand(options) {
|
|
10
|
+
const runDir = path.join(getRunsRoot(options.repo), options.runId);
|
|
11
|
+
if (!fs.existsSync(runDir)) {
|
|
12
|
+
throw new Error(`${missingRunMessage(runDir)}`);
|
|
13
|
+
}
|
|
14
|
+
const statePath = path.join(runDir, 'state.json');
|
|
15
|
+
if (!fs.existsSync(statePath)) {
|
|
16
|
+
throw new Error(`State not found: ${statePath}`);
|
|
17
|
+
}
|
|
18
|
+
const state = JSON.parse(fs.readFileSync(statePath, 'utf-8'));
|
|
19
|
+
const timelinePath = path.join(runDir, 'timeline.jsonl');
|
|
20
|
+
const defaultKpi = {
|
|
21
|
+
version: 1,
|
|
22
|
+
total_duration_ms: null,
|
|
23
|
+
unattributed_ms: null,
|
|
24
|
+
started_at: null,
|
|
25
|
+
ended_at: null,
|
|
26
|
+
phases: {},
|
|
27
|
+
workers: { claude: 'unknown', codex: 'unknown' },
|
|
28
|
+
verify: { attempts: 0, retries: 0, total_duration_ms: 0 },
|
|
29
|
+
milestones: { completed: 0 },
|
|
30
|
+
reliability: {
|
|
31
|
+
infra_retries: 0,
|
|
32
|
+
fallback_used: false,
|
|
33
|
+
fallback_count: 0,
|
|
34
|
+
stalls_triggered: 0,
|
|
35
|
+
late_results_ignored: 0
|
|
36
|
+
},
|
|
37
|
+
outcome: 'unknown',
|
|
38
|
+
stop_reason: null
|
|
39
|
+
};
|
|
40
|
+
const scan = fs.existsSync(timelinePath)
|
|
41
|
+
? await scanTimeline(timelinePath, options.tail)
|
|
42
|
+
: { tailEvents: [], kpi: defaultKpi };
|
|
43
|
+
const flags = readFlags(scan.runStarted);
|
|
44
|
+
const contextPackArtifact = readContextPackArtifact(runDir);
|
|
45
|
+
const header = [
|
|
46
|
+
'Run',
|
|
47
|
+
`run_id: ${options.runId}`,
|
|
48
|
+
`repo: ${state.repo_path}`,
|
|
49
|
+
`run_dir: ${runDir}`,
|
|
50
|
+
`current_phase: ${state.phase}`,
|
|
51
|
+
`milestone_index: ${state.milestone_index}`,
|
|
52
|
+
`phase_attempt: ${state.phase_attempt ?? 0}`,
|
|
53
|
+
`last_error: ${state.last_error ?? 'none'}`,
|
|
54
|
+
`dry_run: ${flags.dry_run ?? 'unknown'}`,
|
|
55
|
+
`no_branch: ${flags.no_branch ?? 'unknown'}`,
|
|
56
|
+
`allow_dirty: ${flags.allow_dirty ?? 'unknown'}`,
|
|
57
|
+
`allow_deps: ${flags.allow_deps ?? 'unknown'}`
|
|
58
|
+
].join('\n');
|
|
59
|
+
const kpiBlock = formatKpiBlock(scan.kpi, contextPackArtifact);
|
|
60
|
+
if (options.kpiOnly) {
|
|
61
|
+
// Compact output: just run_id and KPIs
|
|
62
|
+
console.log(`${options.runId}: ${scan.kpi.outcome} ${formatDuration(scan.kpi.total_duration_ms)} milestones=${scan.kpi.milestones.completed}`);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
const events = formatEvents(scan.tailEvents);
|
|
66
|
+
const pointers = formatPointers({
|
|
67
|
+
statePath,
|
|
68
|
+
timelinePath,
|
|
69
|
+
runDir,
|
|
70
|
+
checkpoint: state.checkpoint_commit_sha
|
|
71
|
+
});
|
|
72
|
+
console.log([header, '', 'KPIs', kpiBlock, '', 'Last events', events, '', 'Pointers', pointers].join('\n'));
|
|
73
|
+
}
|
|
74
|
+
function formatDuration(ms) {
|
|
75
|
+
if (ms === null)
|
|
76
|
+
return 'unknown';
|
|
77
|
+
if (ms < 0)
|
|
78
|
+
return `-${formatPositiveDuration(Math.abs(ms))}`;
|
|
79
|
+
return formatPositiveDuration(ms);
|
|
80
|
+
}
|
|
81
|
+
function formatPositiveDuration(ms) {
|
|
82
|
+
if (ms < 1000)
|
|
83
|
+
return `${ms}ms`;
|
|
84
|
+
const seconds = Math.floor(ms / 1000);
|
|
85
|
+
if (seconds < 60)
|
|
86
|
+
return `${seconds}s`;
|
|
87
|
+
const minutes = Math.floor(seconds / 60);
|
|
88
|
+
const remainingSeconds = seconds % 60;
|
|
89
|
+
if (minutes < 60) {
|
|
90
|
+
return remainingSeconds > 0 ? `${minutes}m${remainingSeconds}s` : `${minutes}m`;
|
|
91
|
+
}
|
|
92
|
+
const hours = Math.floor(minutes / 60);
|
|
93
|
+
const remainingMinutes = minutes % 60;
|
|
94
|
+
return remainingMinutes > 0 ? `${hours}h${remainingMinutes}m` : `${hours}h`;
|
|
95
|
+
}
|
|
96
|
+
function missingRunMessage(runDir) {
|
|
97
|
+
const runsRoot = path.dirname(runDir);
|
|
98
|
+
if (!fs.existsSync(runsRoot)) {
|
|
99
|
+
return `Run not found: ${runDir}. Known runs: none.`;
|
|
100
|
+
}
|
|
101
|
+
const candidates = fs
|
|
102
|
+
.readdirSync(runsRoot, { withFileTypes: true })
|
|
103
|
+
.filter((entry) => entry.isDirectory())
|
|
104
|
+
.map((entry) => entry.name)
|
|
105
|
+
.sort()
|
|
106
|
+
.reverse()
|
|
107
|
+
.slice(0, 5);
|
|
108
|
+
const hint = candidates.length ? candidates.join(', ') : 'none';
|
|
109
|
+
return `Run not found: ${runDir}. Known runs: ${hint}.`;
|
|
110
|
+
}
|
|
111
|
+
function readFlags(runStarted) {
|
|
112
|
+
if (!runStarted?.payload || typeof runStarted.payload !== 'object') {
|
|
113
|
+
return {};
|
|
114
|
+
}
|
|
115
|
+
const payload = runStarted.payload;
|
|
116
|
+
return {
|
|
117
|
+
dry_run: payload.dry_run,
|
|
118
|
+
no_branch: payload.no_branch,
|
|
119
|
+
allow_dirty: payload.allow_dirty,
|
|
120
|
+
allow_deps: payload.allow_deps
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
function formatKpiBlock(kpi, contextPackArtifact) {
|
|
124
|
+
const lines = [];
|
|
125
|
+
// Total duration + unattributed
|
|
126
|
+
const durationStr = kpi.total_duration_ms !== null ? formatDuration(kpi.total_duration_ms) : 'unknown';
|
|
127
|
+
let unattributedStr;
|
|
128
|
+
if (kpi.unattributed_ms === null) {
|
|
129
|
+
unattributedStr = 'unknown';
|
|
130
|
+
}
|
|
131
|
+
else if (kpi.unattributed_ms < 0) {
|
|
132
|
+
// Negative = phases exceed tracked time (e.g., resumed runs with gaps)
|
|
133
|
+
unattributedStr = `-${formatPositiveDuration(Math.abs(kpi.unattributed_ms))} (resume/gap)`;
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
unattributedStr = formatPositiveDuration(kpi.unattributed_ms);
|
|
137
|
+
}
|
|
138
|
+
lines.push(`total_duration: ${durationStr} (unattributed: ${unattributedStr})`);
|
|
139
|
+
// Outcome
|
|
140
|
+
const outcomeStr = kpi.stop_reason
|
|
141
|
+
? `${kpi.outcome} (${kpi.stop_reason})`
|
|
142
|
+
: kpi.outcome;
|
|
143
|
+
lines.push(`outcome: ${outcomeStr}`);
|
|
144
|
+
// Milestones
|
|
145
|
+
lines.push(`milestones_completed: ${kpi.milestones.completed}`);
|
|
146
|
+
// Worker calls
|
|
147
|
+
const claudeCalls = kpi.workers.claude;
|
|
148
|
+
const codexCalls = kpi.workers.codex;
|
|
149
|
+
lines.push(`worker_calls: claude=${claudeCalls} codex=${codexCalls}`);
|
|
150
|
+
// Phase durations (sorted by typical order: plan, implement, review, verify)
|
|
151
|
+
const phaseOrder = ['plan', 'implement', 'review', 'verify'];
|
|
152
|
+
const phaseEntries = Object.entries(kpi.phases);
|
|
153
|
+
if (phaseEntries.length > 0) {
|
|
154
|
+
const sortedPhases = phaseEntries.sort(([a], [b]) => {
|
|
155
|
+
const aIdx = phaseOrder.indexOf(a);
|
|
156
|
+
const bIdx = phaseOrder.indexOf(b);
|
|
157
|
+
if (aIdx === -1 && bIdx === -1)
|
|
158
|
+
return a.localeCompare(b);
|
|
159
|
+
if (aIdx === -1)
|
|
160
|
+
return 1;
|
|
161
|
+
if (bIdx === -1)
|
|
162
|
+
return -1;
|
|
163
|
+
return aIdx - bIdx;
|
|
164
|
+
});
|
|
165
|
+
const phaseParts = sortedPhases.map(([phase, data]) => `${phase}=${formatDuration(data.duration_ms)}(x${data.count})`);
|
|
166
|
+
lines.push(`phases: ${phaseParts.join(' ')}`);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
lines.push('phases: (no phase data)');
|
|
170
|
+
}
|
|
171
|
+
// Verification stats
|
|
172
|
+
if (kpi.verify.attempts > 0) {
|
|
173
|
+
const verifyDur = formatDuration(kpi.verify.total_duration_ms);
|
|
174
|
+
lines.push(`verify: attempts=${kpi.verify.attempts} retries=${kpi.verify.retries} duration=${verifyDur}`);
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
lines.push('verify: (no verification data)');
|
|
178
|
+
}
|
|
179
|
+
// Reliability metrics
|
|
180
|
+
const rel = kpi.reliability;
|
|
181
|
+
const relParts = [];
|
|
182
|
+
if (rel.infra_retries > 0)
|
|
183
|
+
relParts.push(`retries=${rel.infra_retries}`);
|
|
184
|
+
if (rel.fallback_used)
|
|
185
|
+
relParts.push(`fallback=${rel.fallback_count}`);
|
|
186
|
+
if (rel.stalls_triggered > 0)
|
|
187
|
+
relParts.push(`stalls=${rel.stalls_triggered}`);
|
|
188
|
+
if (rel.late_results_ignored > 0)
|
|
189
|
+
relParts.push(`late_ignored=${rel.late_results_ignored}`);
|
|
190
|
+
lines.push(`reliability: ${relParts.length ? relParts.join(' ') : 'clean'}`);
|
|
191
|
+
// Context pack status
|
|
192
|
+
lines.push(formatContextPackStatus(contextPackArtifact ?? null));
|
|
193
|
+
return lines.join('\n');
|
|
194
|
+
}
|
|
195
|
+
// Exported for testing - computes KPIs from an array of timeline events
|
|
196
|
+
export function computeKpiFromEvents(events) {
|
|
197
|
+
let startedAt = null;
|
|
198
|
+
let endedAt = null;
|
|
199
|
+
const phases = {};
|
|
200
|
+
let currentPhase = null;
|
|
201
|
+
let currentPhaseStart = null;
|
|
202
|
+
let workersClaude = 'unknown';
|
|
203
|
+
let workersCodex = 'unknown';
|
|
204
|
+
let verifyAttempts = 0;
|
|
205
|
+
let verifyRetries = 0;
|
|
206
|
+
let verifyDurationMs = 0;
|
|
207
|
+
let milestonesCompleted = 0;
|
|
208
|
+
let outcome = 'unknown';
|
|
209
|
+
let stopReason = null;
|
|
210
|
+
// Reliability metrics
|
|
211
|
+
let infraRetries = 0;
|
|
212
|
+
let fallbackCount = 0;
|
|
213
|
+
let stallsTriggered = 0;
|
|
214
|
+
let lateResultsIgnored = 0;
|
|
215
|
+
for (const event of events) {
|
|
216
|
+
const eventType = event.type;
|
|
217
|
+
const timestamp = event.timestamp;
|
|
218
|
+
const payload = event.payload && typeof event.payload === 'object'
|
|
219
|
+
? event.payload
|
|
220
|
+
: {};
|
|
221
|
+
// Track run_started
|
|
222
|
+
if (eventType === 'run_started' && startedAt === null) {
|
|
223
|
+
startedAt = timestamp ?? null;
|
|
224
|
+
outcome = 'running';
|
|
225
|
+
}
|
|
226
|
+
// Track phase_start events for phase durations
|
|
227
|
+
if (eventType === 'phase_start' && payload.phase) {
|
|
228
|
+
// Close previous phase if any
|
|
229
|
+
if (currentPhase && currentPhaseStart !== null && timestamp) {
|
|
230
|
+
const phaseEnd = new Date(timestamp).getTime();
|
|
231
|
+
const phaseDuration = phaseEnd - currentPhaseStart;
|
|
232
|
+
if (!phases[currentPhase]) {
|
|
233
|
+
phases[currentPhase] = { duration_ms: 0, count: 0 };
|
|
234
|
+
}
|
|
235
|
+
phases[currentPhase].duration_ms += phaseDuration;
|
|
236
|
+
}
|
|
237
|
+
// Start new phase
|
|
238
|
+
currentPhase = payload.phase;
|
|
239
|
+
currentPhaseStart = timestamp ? new Date(timestamp).getTime() : null;
|
|
240
|
+
if (!phases[currentPhase]) {
|
|
241
|
+
phases[currentPhase] = { duration_ms: 0, count: 0 };
|
|
242
|
+
}
|
|
243
|
+
phases[currentPhase].count += 1;
|
|
244
|
+
}
|
|
245
|
+
// Track worker_stats event (emitted at finalize)
|
|
246
|
+
if (eventType === 'worker_stats' && payload.stats) {
|
|
247
|
+
const stats = payload.stats;
|
|
248
|
+
if (typeof stats.claude === 'number') {
|
|
249
|
+
workersClaude = stats.claude;
|
|
250
|
+
}
|
|
251
|
+
if (typeof stats.codex === 'number') {
|
|
252
|
+
workersCodex = stats.codex;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Track verification events
|
|
256
|
+
if (eventType === 'verification') {
|
|
257
|
+
verifyAttempts += 1;
|
|
258
|
+
if (payload.duration_ms && typeof payload.duration_ms === 'number') {
|
|
259
|
+
verifyDurationMs += payload.duration_ms;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Track verify retries (retry_count in verification payload)
|
|
263
|
+
if (eventType === 'verification' && typeof payload.retry === 'number') {
|
|
264
|
+
verifyRetries += payload.retry;
|
|
265
|
+
}
|
|
266
|
+
// Track milestone completion
|
|
267
|
+
if (eventType === 'milestone_complete') {
|
|
268
|
+
milestonesCompleted += 1;
|
|
269
|
+
}
|
|
270
|
+
// Track stop event
|
|
271
|
+
if (eventType === 'stop') {
|
|
272
|
+
endedAt = timestamp ?? null;
|
|
273
|
+
outcome = 'stopped';
|
|
274
|
+
stopReason = payload.reason ?? null;
|
|
275
|
+
// Close current phase
|
|
276
|
+
if (currentPhase && currentPhaseStart !== null && timestamp) {
|
|
277
|
+
const phaseEnd = new Date(timestamp).getTime();
|
|
278
|
+
const phaseDuration = phaseEnd - currentPhaseStart;
|
|
279
|
+
if (!phases[currentPhase]) {
|
|
280
|
+
phases[currentPhase] = { duration_ms: 0, count: 0 };
|
|
281
|
+
}
|
|
282
|
+
phases[currentPhase].duration_ms += phaseDuration;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// Track run_complete event
|
|
286
|
+
if (eventType === 'run_complete') {
|
|
287
|
+
endedAt = timestamp ?? null;
|
|
288
|
+
outcome = 'complete';
|
|
289
|
+
// Close current phase
|
|
290
|
+
if (currentPhase && currentPhaseStart !== null && timestamp) {
|
|
291
|
+
const phaseEnd = new Date(timestamp).getTime();
|
|
292
|
+
const phaseDuration = phaseEnd - currentPhaseStart;
|
|
293
|
+
if (!phases[currentPhase]) {
|
|
294
|
+
phases[currentPhase] = { duration_ms: 0, count: 0 };
|
|
295
|
+
}
|
|
296
|
+
phases[currentPhase].duration_ms += phaseDuration;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
// Track reliability metrics
|
|
300
|
+
if (eventType === 'parse_failed') {
|
|
301
|
+
const retryCount = payload.retry_count ?? 0;
|
|
302
|
+
infraRetries += retryCount;
|
|
303
|
+
}
|
|
304
|
+
if (eventType === 'worker_fallback') {
|
|
305
|
+
fallbackCount += 1;
|
|
306
|
+
}
|
|
307
|
+
if (eventType === 'stop' && payload.reason === 'stalled_timeout') {
|
|
308
|
+
stallsTriggered += 1;
|
|
309
|
+
}
|
|
310
|
+
if (eventType === 'late_worker_result_ignored') {
|
|
311
|
+
lateResultsIgnored += 1;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Compute total duration
|
|
315
|
+
let totalDurationMs = null;
|
|
316
|
+
if (startedAt && endedAt) {
|
|
317
|
+
totalDurationMs = new Date(endedAt).getTime() - new Date(startedAt).getTime();
|
|
318
|
+
}
|
|
319
|
+
// Compute unattributed time (total - sum of phase durations)
|
|
320
|
+
let unattributedMs = null;
|
|
321
|
+
if (totalDurationMs !== null) {
|
|
322
|
+
const attributedMs = Object.values(phases).reduce((sum, p) => sum + p.duration_ms, 0);
|
|
323
|
+
unattributedMs = totalDurationMs - attributedMs;
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
version: 1,
|
|
327
|
+
total_duration_ms: totalDurationMs,
|
|
328
|
+
unattributed_ms: unattributedMs,
|
|
329
|
+
started_at: startedAt,
|
|
330
|
+
ended_at: endedAt,
|
|
331
|
+
phases,
|
|
332
|
+
workers: {
|
|
333
|
+
claude: workersClaude,
|
|
334
|
+
codex: workersCodex
|
|
335
|
+
},
|
|
336
|
+
verify: {
|
|
337
|
+
attempts: verifyAttempts,
|
|
338
|
+
retries: verifyRetries,
|
|
339
|
+
total_duration_ms: verifyDurationMs
|
|
340
|
+
},
|
|
341
|
+
milestones: {
|
|
342
|
+
completed: milestonesCompleted
|
|
343
|
+
},
|
|
344
|
+
reliability: {
|
|
345
|
+
infra_retries: infraRetries,
|
|
346
|
+
fallback_used: fallbackCount > 0,
|
|
347
|
+
fallback_count: fallbackCount,
|
|
348
|
+
stalls_triggered: stallsTriggered,
|
|
349
|
+
late_results_ignored: lateResultsIgnored
|
|
350
|
+
},
|
|
351
|
+
outcome,
|
|
352
|
+
stop_reason: stopReason
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
async function scanTimeline(timelinePath, tailCount) {
|
|
356
|
+
const allEvents = [];
|
|
357
|
+
const tailEvents = [];
|
|
358
|
+
let runStarted;
|
|
359
|
+
const stream = fs.createReadStream(timelinePath, { encoding: 'utf-8' });
|
|
360
|
+
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
|
|
361
|
+
for await (const line of rl) {
|
|
362
|
+
const trimmed = line.trim();
|
|
363
|
+
if (!trimmed) {
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
try {
|
|
367
|
+
const event = JSON.parse(trimmed);
|
|
368
|
+
allEvents.push(event);
|
|
369
|
+
if (!runStarted && event.type === 'run_started') {
|
|
370
|
+
runStarted = event;
|
|
371
|
+
}
|
|
372
|
+
tailEvents.push(event);
|
|
373
|
+
if (tailEvents.length > tailCount) {
|
|
374
|
+
tailEvents.shift();
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
catch {
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
const kpi = computeKpiFromEvents(allEvents);
|
|
382
|
+
return { runStarted, tailEvents, kpi };
|
|
383
|
+
}
|
|
384
|
+
function formatEvents(events) {
|
|
385
|
+
if (events.length === 0) {
|
|
386
|
+
return '(no events)';
|
|
387
|
+
}
|
|
388
|
+
const lines = events.map((event) => {
|
|
389
|
+
const seq = event.seq ?? '?';
|
|
390
|
+
const ts = event.timestamp ?? '?';
|
|
391
|
+
const type = event.type ?? 'unknown';
|
|
392
|
+
const source = event.source ?? 'unknown';
|
|
393
|
+
const summary = summarizeEvent(event);
|
|
394
|
+
return `${seq} ${ts} ${type} ${source} ${summary}`.trim();
|
|
395
|
+
});
|
|
396
|
+
return lines.join('\n');
|
|
397
|
+
}
|
|
398
|
+
function summarizeEvent(event) {
|
|
399
|
+
const payload = event.payload && typeof event.payload === 'object'
|
|
400
|
+
? event.payload
|
|
401
|
+
: {};
|
|
402
|
+
if (event.type === 'phase_start' && payload.phase) {
|
|
403
|
+
return `phase=${payload.phase}`;
|
|
404
|
+
}
|
|
405
|
+
if (event.type === 'verification' && payload.tier) {
|
|
406
|
+
return `tier=${payload.tier} ok=${payload.ok}`;
|
|
407
|
+
}
|
|
408
|
+
if (event.type === 'verify_complete' && Array.isArray(payload.results)) {
|
|
409
|
+
return `results=${payload.results.join('; ')}`;
|
|
410
|
+
}
|
|
411
|
+
if (event.type === 'guard_violation') {
|
|
412
|
+
if (payload.guard && typeof payload.guard === 'object') {
|
|
413
|
+
const guard = payload.guard;
|
|
414
|
+
const reasons = Array.isArray(guard.reasons) ? guard.reasons.join(',') : '';
|
|
415
|
+
return `guard=${reasons || 'violation'}`;
|
|
416
|
+
}
|
|
417
|
+
return 'guard_violation';
|
|
418
|
+
}
|
|
419
|
+
if (event.type === 'stop' && payload.reason) {
|
|
420
|
+
return `reason=${payload.reason}`;
|
|
421
|
+
}
|
|
422
|
+
if (event.type === 'parse_failed') {
|
|
423
|
+
const context = payload.parser_context ?? 'unknown';
|
|
424
|
+
const retry = payload.retry_count ?? 0;
|
|
425
|
+
const snippet = payload.output_snippet ? clip(String(payload.output_snippet), 120) : '';
|
|
426
|
+
return `context=${context} retry=${retry} ${snippet ? `snippet="${snippet}"` : ''}`.trim();
|
|
427
|
+
}
|
|
428
|
+
if (event.type === 'run_started') {
|
|
429
|
+
const flags = [
|
|
430
|
+
`dry_run=${payload.dry_run}`,
|
|
431
|
+
`no_branch=${payload.no_branch}`,
|
|
432
|
+
`allow_dirty=${payload.allow_dirty}`,
|
|
433
|
+
`allow_deps=${payload.allow_deps}`
|
|
434
|
+
].join(' ');
|
|
435
|
+
return flags;
|
|
436
|
+
}
|
|
437
|
+
if (event.type === 'run_resumed') {
|
|
438
|
+
return `max_ticks=${payload.max_ticks ?? '?'} time=${payload.time ?? '?'}`;
|
|
439
|
+
}
|
|
440
|
+
const keys = Object.keys(payload);
|
|
441
|
+
if (keys.length) {
|
|
442
|
+
return `keys=${keys.slice(0, 4).join(',')}`;
|
|
443
|
+
}
|
|
444
|
+
return '';
|
|
445
|
+
}
|
|
446
|
+
function formatPointers(input) {
|
|
447
|
+
const artifactsDir = path.join(input.runDir, 'artifacts');
|
|
448
|
+
const lastVerifyLog = findLatestVerifyLog(artifactsDir);
|
|
449
|
+
const lines = [
|
|
450
|
+
`state: ${input.statePath}`,
|
|
451
|
+
`timeline: ${input.timelinePath}`,
|
|
452
|
+
`last_verification_log: ${lastVerifyLog ?? 'none'}`,
|
|
453
|
+
`checkpoint_sha: ${input.checkpoint ?? 'none'}`
|
|
454
|
+
];
|
|
455
|
+
return lines.join('\n');
|
|
456
|
+
}
|
|
457
|
+
function findLatestVerifyLog(artifactsDir) {
|
|
458
|
+
if (!fs.existsSync(artifactsDir)) {
|
|
459
|
+
return null;
|
|
460
|
+
}
|
|
461
|
+
const logs = fs
|
|
462
|
+
.readdirSync(artifactsDir)
|
|
463
|
+
.filter((file) => file.startsWith('tests_') && file.endsWith('.log'))
|
|
464
|
+
.map((file) => path.join(artifactsDir, file));
|
|
465
|
+
if (logs.length === 0) {
|
|
466
|
+
return null;
|
|
467
|
+
}
|
|
468
|
+
const withTimes = logs
|
|
469
|
+
.map((file) => ({ file, mtime: fs.statSync(file).mtimeMs }))
|
|
470
|
+
.sort((a, b) => b.mtime - a.mtime);
|
|
471
|
+
return withTimes[0]?.file ?? null;
|
|
472
|
+
}
|
|
473
|
+
function clip(value, max) {
|
|
474
|
+
if (value.length <= max) {
|
|
475
|
+
return value;
|
|
476
|
+
}
|
|
477
|
+
return `${value.slice(0, max)}...`;
|
|
478
|
+
}
|