@weldr/runr 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +216 -0
- package/LICENSE +190 -0
- package/NOTICE +4 -0
- package/README.md +200 -0
- package/dist/cli.js +464 -0
- package/dist/commands/__tests__/report.test.js +202 -0
- package/dist/commands/compare.js +168 -0
- package/dist/commands/doctor.js +124 -0
- package/dist/commands/follow.js +251 -0
- package/dist/commands/gc.js +161 -0
- package/dist/commands/guards-only.js +89 -0
- package/dist/commands/metrics.js +441 -0
- package/dist/commands/orchestrate.js +800 -0
- package/dist/commands/paths.js +31 -0
- package/dist/commands/preflight.js +152 -0
- package/dist/commands/report.js +478 -0
- package/dist/commands/resume.js +149 -0
- package/dist/commands/run.js +538 -0
- package/dist/commands/status.js +189 -0
- package/dist/commands/summarize.js +220 -0
- package/dist/commands/version.js +82 -0
- package/dist/commands/wait.js +170 -0
- package/dist/config/__tests__/presets.test.js +104 -0
- package/dist/config/load.js +66 -0
- package/dist/config/schema.js +160 -0
- package/dist/context/__tests__/artifact.test.js +130 -0
- package/dist/context/__tests__/pack.test.js +191 -0
- package/dist/context/artifact.js +67 -0
- package/dist/context/index.js +2 -0
- package/dist/context/pack.js +273 -0
- package/dist/diagnosis/analyzer.js +678 -0
- package/dist/diagnosis/formatter.js +136 -0
- package/dist/diagnosis/index.js +6 -0
- package/dist/diagnosis/types.js +7 -0
- package/dist/env/__tests__/fingerprint.test.js +116 -0
- package/dist/env/fingerprint.js +111 -0
- package/dist/orchestrator/__tests__/policy.test.js +185 -0
- package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
- package/dist/orchestrator/artifacts.js +405 -0
- package/dist/orchestrator/state-machine.js +646 -0
- package/dist/orchestrator/types.js +88 -0
- package/dist/ownership/normalize.js +45 -0
- package/dist/repo/context.js +90 -0
- package/dist/repo/git.js +13 -0
- package/dist/repo/worktree.js +239 -0
- package/dist/store/run-store.js +107 -0
- package/dist/store/run-utils.js +69 -0
- package/dist/store/runs-root.js +126 -0
- package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
- package/dist/supervisor/__tests__/ownership.test.js +103 -0
- package/dist/supervisor/__tests__/state-machine.test.js +290 -0
- package/dist/supervisor/collision.js +240 -0
- package/dist/supervisor/evidence-gate.js +98 -0
- package/dist/supervisor/planner.js +18 -0
- package/dist/supervisor/runner.js +1562 -0
- package/dist/supervisor/scope-guard.js +55 -0
- package/dist/supervisor/state-machine.js +121 -0
- package/dist/supervisor/verification-policy.js +64 -0
- package/dist/tasks/task-metadata.js +72 -0
- package/dist/types/schemas.js +1 -0
- package/dist/verification/engine.js +49 -0
- package/dist/workers/__tests__/claude.test.js +88 -0
- package/dist/workers/__tests__/codex.test.js +81 -0
- package/dist/workers/claude.js +119 -0
- package/dist/workers/codex.js +162 -0
- package/dist/workers/json.js +22 -0
- package/dist/workers/mock.js +193 -0
- package/dist/workers/prompts.js +98 -0
- package/dist/workers/schemas.js +39 -0
- package/package.json +47 -0
- package/templates/prompts/implementer.md +70 -0
- package/templates/prompts/planner.md +62 -0
- package/templates/prompts/reviewer.md +77 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import picomatch from 'picomatch';
|
|
3
|
+
function toPosix(filePath) {
|
|
4
|
+
return filePath.split(path.sep).join('/');
|
|
5
|
+
}
|
|
6
|
+
function compile(patterns) {
|
|
7
|
+
return patterns.map((p) => picomatch(p));
|
|
8
|
+
}
|
|
9
|
+
function matchesAny(matchers, s) {
|
|
10
|
+
return matchers.some((m) => m(s));
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Partition changed files into env artifacts vs semantic changes.
|
|
14
|
+
* Env artifacts (matching env_allowlist) are allowed noise that shouldn't
|
|
15
|
+
* trigger dirty_worktree or scope_violation.
|
|
16
|
+
*/
|
|
17
|
+
export function partitionChangedFiles(changedFiles, envAllowlist) {
|
|
18
|
+
const envMatchers = compile(envAllowlist);
|
|
19
|
+
const env_touched = [];
|
|
20
|
+
const semantic_changed = [];
|
|
21
|
+
for (const file of changedFiles) {
|
|
22
|
+
const posixFile = toPosix(file);
|
|
23
|
+
if (envMatchers.length > 0 && matchesAny(envMatchers, posixFile)) {
|
|
24
|
+
env_touched.push(file);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
semantic_changed.push(file);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return { env_touched, semantic_changed };
|
|
31
|
+
}
|
|
32
|
+
export function checkScope(changedFiles, allowlist, denylist) {
|
|
33
|
+
const allowMatchers = allowlist.map((pattern) => picomatch(pattern));
|
|
34
|
+
const denyMatchers = denylist.map((pattern) => picomatch(pattern));
|
|
35
|
+
const violations = [];
|
|
36
|
+
for (const file of changedFiles) {
|
|
37
|
+
const posixFile = toPosix(file);
|
|
38
|
+
if (denyMatchers.some((match) => match(posixFile))) {
|
|
39
|
+
violations.push(file);
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
if (allowMatchers.length > 0 && !allowMatchers.some((match) => match(posixFile))) {
|
|
43
|
+
violations.push(file);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return { ok: violations.length === 0, violations };
|
|
47
|
+
}
|
|
48
|
+
export function checkLockfiles(changedFiles, lockfiles, allowDeps) {
|
|
49
|
+
if (allowDeps) {
|
|
50
|
+
return { ok: true, violations: [] };
|
|
51
|
+
}
|
|
52
|
+
const lockfileSet = new Set(lockfiles);
|
|
53
|
+
const violations = changedFiles.filter((file) => lockfileSet.has(file));
|
|
54
|
+
return { ok: violations.length === 0, violations };
|
|
55
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { buildMilestonesFromTask } from './planner.js';
|
|
2
|
+
/**
|
|
3
|
+
* Creates initial run state with all timestamps set.
|
|
4
|
+
*
|
|
5
|
+
* Progress Tracking Design:
|
|
6
|
+
* - `started_at`: Run start time (never changes)
|
|
7
|
+
* - `updated_at`: Last state mutation (any write)
|
|
8
|
+
* - `phase_started_at`: Current phase start time
|
|
9
|
+
* - `last_progress_at`: Last meaningful work (for stall detection)
|
|
10
|
+
*
|
|
11
|
+
* The supervisor's `recordProgress()` is the canonical way to mark progress.
|
|
12
|
+
* It updates both `last_progress_at` and `updated_at` together.
|
|
13
|
+
*/
|
|
14
|
+
export function createInitialState(input) {
|
|
15
|
+
const now = new Date().toISOString();
|
|
16
|
+
const milestones = buildMilestonesFromTask(input.task_text);
|
|
17
|
+
return {
|
|
18
|
+
run_id: input.run_id,
|
|
19
|
+
repo_path: input.repo_path,
|
|
20
|
+
phase: 'INIT',
|
|
21
|
+
milestone_index: 0,
|
|
22
|
+
milestones,
|
|
23
|
+
owned_paths: input.owned_paths,
|
|
24
|
+
scope_lock: {
|
|
25
|
+
allowlist: input.allowlist,
|
|
26
|
+
denylist: input.denylist
|
|
27
|
+
},
|
|
28
|
+
risk_score: 0,
|
|
29
|
+
retries: 0,
|
|
30
|
+
milestone_retries: 0,
|
|
31
|
+
resume_token: input.run_id,
|
|
32
|
+
phase_started_at: now,
|
|
33
|
+
phase_attempt: 0,
|
|
34
|
+
started_at: now,
|
|
35
|
+
updated_at: now,
|
|
36
|
+
last_progress_at: now,
|
|
37
|
+
worker_stats: {
|
|
38
|
+
claude: 0,
|
|
39
|
+
codex: 0,
|
|
40
|
+
by_phase: {
|
|
41
|
+
plan: { claude: 0, codex: 0 },
|
|
42
|
+
implement: { claude: 0, codex: 0 },
|
|
43
|
+
review: { claude: 0, codex: 0 }
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export function updatePhase(state, phase) {
|
|
49
|
+
const now = new Date().toISOString();
|
|
50
|
+
const phaseAttempt = state.phase === phase ? state.phase_attempt + 1 : 1;
|
|
51
|
+
return {
|
|
52
|
+
...state,
|
|
53
|
+
phase,
|
|
54
|
+
last_successful_phase: state.phase,
|
|
55
|
+
phase_started_at: now,
|
|
56
|
+
phase_attempt: phaseAttempt,
|
|
57
|
+
updated_at: now
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
export function stopRun(state, reason) {
|
|
61
|
+
const now = new Date().toISOString();
|
|
62
|
+
return {
|
|
63
|
+
...state,
|
|
64
|
+
phase: 'STOPPED',
|
|
65
|
+
stop_reason: reason,
|
|
66
|
+
updated_at: now,
|
|
67
|
+
phase_started_at: now
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Canonical phase order for determining resume target.
|
|
72
|
+
* Used by both manual resume command and auto-resume.
|
|
73
|
+
*/
|
|
74
|
+
const PHASE_ORDER = ['INIT', 'PLAN', 'IMPLEMENT', 'VERIFY', 'REVIEW', 'CHECKPOINT', 'FINALIZE'];
|
|
75
|
+
/**
|
|
76
|
+
* Compute the target phase to resume from based on last successful phase.
|
|
77
|
+
* Shared between resume command and auto-resume to prevent drift.
|
|
78
|
+
*
|
|
79
|
+
* Logic:
|
|
80
|
+
* - If STOPPED with last_successful_phase, resume from phase after that
|
|
81
|
+
* - If at FINALIZE, stay at FINALIZE
|
|
82
|
+
* - Otherwise use current phase (for non-STOPPED states)
|
|
83
|
+
*/
|
|
84
|
+
export function computeResumeTargetPhase(state) {
|
|
85
|
+
// If not stopped, just use current phase
|
|
86
|
+
if (state.phase !== 'STOPPED') {
|
|
87
|
+
return state.phase;
|
|
88
|
+
}
|
|
89
|
+
// If we have a last successful phase, resume from the next one
|
|
90
|
+
if (state.last_successful_phase) {
|
|
91
|
+
const lastIdx = PHASE_ORDER.indexOf(state.last_successful_phase);
|
|
92
|
+
if (lastIdx >= 0 && lastIdx < PHASE_ORDER.length - 1) {
|
|
93
|
+
return PHASE_ORDER[lastIdx + 1];
|
|
94
|
+
}
|
|
95
|
+
// At FINALIZE or beyond, stay there
|
|
96
|
+
return state.last_successful_phase;
|
|
97
|
+
}
|
|
98
|
+
// No last successful phase, start from INIT
|
|
99
|
+
return 'INIT';
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Prepare state for resumption (manual or auto).
|
|
103
|
+
* Clears stop state, sets resume phase, optionally increments auto_resume_count.
|
|
104
|
+
*/
|
|
105
|
+
export function prepareForResume(state, options = {}) {
|
|
106
|
+
const now = new Date().toISOString();
|
|
107
|
+
const targetPhase = computeResumeTargetPhase(state);
|
|
108
|
+
const autoResumeCount = state.auto_resume_count ?? 0;
|
|
109
|
+
return {
|
|
110
|
+
...state,
|
|
111
|
+
phase: targetPhase,
|
|
112
|
+
stop_reason: undefined,
|
|
113
|
+
last_error: undefined,
|
|
114
|
+
resume_token: options.resumeToken ?? state.run_id,
|
|
115
|
+
updated_at: now,
|
|
116
|
+
phase_started_at: now,
|
|
117
|
+
auto_resume_count: options.incrementAutoResumeCount
|
|
118
|
+
? autoResumeCount + 1
|
|
119
|
+
: autoResumeCount
|
|
120
|
+
};
|
|
121
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import picomatch from 'picomatch';
|
|
2
|
+
const ORDERED_TIERS = ['tier0', 'tier1', 'tier2'];
|
|
3
|
+
export function selectTiersWithReasons(policy, context) {
|
|
4
|
+
const tiers = new Set();
|
|
5
|
+
const reasons = [];
|
|
6
|
+
tiers.add('tier0');
|
|
7
|
+
reasons.push('tier0_always');
|
|
8
|
+
const matches = triggerMatches(policy, context.changed_files);
|
|
9
|
+
const highRiskMatches = matches.filter((match) => match.tier !== 'tier0');
|
|
10
|
+
if (highRiskMatches.length > 0) {
|
|
11
|
+
tiers.add('tier1');
|
|
12
|
+
for (const match of highRiskMatches) {
|
|
13
|
+
reasons.push(`risk_trigger:${match.name}`);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
if (context.is_milestone_end) {
|
|
17
|
+
tiers.add('tier1');
|
|
18
|
+
reasons.push('milestone_end');
|
|
19
|
+
}
|
|
20
|
+
if (context.risk_level === 'high') {
|
|
21
|
+
tiers.add('tier1');
|
|
22
|
+
reasons.push('risk_level_high');
|
|
23
|
+
}
|
|
24
|
+
if (context.is_run_end) {
|
|
25
|
+
tiers.add('tier2');
|
|
26
|
+
reasons.push('run_end');
|
|
27
|
+
}
|
|
28
|
+
const selected = ORDERED_TIERS.filter((tier) => tiers.has(tier));
|
|
29
|
+
return { tiers: selected, reasons };
|
|
30
|
+
}
|
|
31
|
+
export function selectTiers(policy, context) {
|
|
32
|
+
return selectTiersWithReasons(policy, context).tiers;
|
|
33
|
+
}
|
|
34
|
+
export function triggerTiers(policy, changedFiles) {
|
|
35
|
+
const tiers = new Set();
|
|
36
|
+
for (const match of triggerMatches(policy, changedFiles)) {
|
|
37
|
+
tiers.add(match.tier);
|
|
38
|
+
}
|
|
39
|
+
return Array.from(tiers);
|
|
40
|
+
}
|
|
41
|
+
export function triggerMatches(policy, changedFiles) {
|
|
42
|
+
if (!policy.risk_triggers.length) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
const matches = [];
|
|
46
|
+
const files = changedFiles.map((file) => file.replace(/\\/g, '/'));
|
|
47
|
+
for (const trigger of policy.risk_triggers) {
|
|
48
|
+
const matcher = picomatch(trigger.patterns);
|
|
49
|
+
if (files.some((file) => matcher(file))) {
|
|
50
|
+
const tier = trigger.tier === 'tier2' ? 'tier1' : trigger.tier;
|
|
51
|
+
matches.push({ name: trigger.name, tier });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return matches;
|
|
55
|
+
}
|
|
56
|
+
export function commandsForTier(policy, tier) {
|
|
57
|
+
if (tier === 'tier0') {
|
|
58
|
+
return policy.tier0;
|
|
59
|
+
}
|
|
60
|
+
if (tier === 'tier1') {
|
|
61
|
+
return policy.tier1;
|
|
62
|
+
}
|
|
63
|
+
return policy.tier2;
|
|
64
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import yaml from 'yaml';
|
|
3
|
+
import { normalizeOwnsPatterns } from '../ownership/normalize.js';
|
|
4
|
+
// Re-export for backward compatibility
|
|
5
|
+
export { normalizeOwnsPatterns } from '../ownership/normalize.js';
|
|
6
|
+
function hasFrontmatter(raw) {
|
|
7
|
+
const trimmed = raw.startsWith('\ufeff') ? raw.slice(1) : raw;
|
|
8
|
+
return trimmed.startsWith('---');
|
|
9
|
+
}
|
|
10
|
+
function splitFrontmatter(raw) {
|
|
11
|
+
const trimmed = raw.startsWith('\ufeff') ? raw.slice(1) : raw;
|
|
12
|
+
if (!hasFrontmatter(trimmed)) {
|
|
13
|
+
return { frontmatterText: null, body: raw };
|
|
14
|
+
}
|
|
15
|
+
const lines = trimmed.split(/\r?\n/);
|
|
16
|
+
if (lines[0].trim() !== '---') {
|
|
17
|
+
return { frontmatterText: null, body: raw };
|
|
18
|
+
}
|
|
19
|
+
const endIdx = lines.findIndex((line, idx) => idx > 0 && line.trim() === '---');
|
|
20
|
+
if (endIdx === -1) {
|
|
21
|
+
return { frontmatterText: null, body: raw };
|
|
22
|
+
}
|
|
23
|
+
const frontmatterText = lines.slice(1, endIdx).join('\n');
|
|
24
|
+
const body = lines.slice(endIdx + 1).join('\n');
|
|
25
|
+
return { frontmatterText, body };
|
|
26
|
+
}
|
|
27
|
+
function coerceOwns(value, taskPath) {
|
|
28
|
+
if (value === undefined || value === null) {
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
if (typeof value === 'string') {
|
|
32
|
+
return [value];
|
|
33
|
+
}
|
|
34
|
+
if (Array.isArray(value)) {
|
|
35
|
+
const nonStrings = value.filter((item) => typeof item !== 'string');
|
|
36
|
+
if (nonStrings.length > 0) {
|
|
37
|
+
throw new Error(`Invalid owns entry in ${taskPath}: must be string or string[]`);
|
|
38
|
+
}
|
|
39
|
+
return value;
|
|
40
|
+
}
|
|
41
|
+
throw new Error(`Invalid owns entry in ${taskPath}: must be string or string[]`);
|
|
42
|
+
}
|
|
43
|
+
export function loadTaskMetadata(taskPath) {
|
|
44
|
+
const raw = fs.readFileSync(taskPath, 'utf-8');
|
|
45
|
+
const { frontmatterText, body } = splitFrontmatter(raw);
|
|
46
|
+
let frontmatter = null;
|
|
47
|
+
let ownsRaw = [];
|
|
48
|
+
if (frontmatterText !== null) {
|
|
49
|
+
try {
|
|
50
|
+
const parsed = yaml.parse(frontmatterText);
|
|
51
|
+
if (parsed && typeof parsed === 'object') {
|
|
52
|
+
frontmatter = parsed;
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
frontmatter = {};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
60
|
+
throw new Error(`Failed to parse task frontmatter in ${taskPath}: ${message}`);
|
|
61
|
+
}
|
|
62
|
+
ownsRaw = coerceOwns(frontmatter.owns, taskPath);
|
|
63
|
+
}
|
|
64
|
+
const ownsNormalized = normalizeOwnsPatterns(ownsRaw);
|
|
65
|
+
return {
|
|
66
|
+
raw,
|
|
67
|
+
body,
|
|
68
|
+
owns_raw: ownsRaw,
|
|
69
|
+
owns_normalized: ownsNormalized,
|
|
70
|
+
frontmatter
|
|
71
|
+
};
|
|
72
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { execa } from 'execa';
|
|
2
|
+
export async function runVerification(tier, commands, cwd, timeoutSeconds) {
|
|
3
|
+
const started = Date.now();
|
|
4
|
+
let output = '';
|
|
5
|
+
let ok = true;
|
|
6
|
+
const commandResults = [];
|
|
7
|
+
for (const command of commands) {
|
|
8
|
+
try {
|
|
9
|
+
const result = await execa(command, {
|
|
10
|
+
cwd,
|
|
11
|
+
shell: true,
|
|
12
|
+
timeout: timeoutSeconds * 1000,
|
|
13
|
+
all: true
|
|
14
|
+
});
|
|
15
|
+
const cmdOutput = result.all ? `${result.all}\n` : '';
|
|
16
|
+
output += cmdOutput;
|
|
17
|
+
commandResults.push({
|
|
18
|
+
command,
|
|
19
|
+
exit_code: 0,
|
|
20
|
+
output: cmdOutput
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
ok = false;
|
|
25
|
+
const errWithCode = error;
|
|
26
|
+
const exitCode = typeof errWithCode.exitCode === 'number' ? errWithCode.exitCode : 1;
|
|
27
|
+
const errorOutput = typeof errWithCode.all === 'string'
|
|
28
|
+
? errWithCode.all
|
|
29
|
+
: error instanceof Error
|
|
30
|
+
? error.message
|
|
31
|
+
: 'Verification command failed';
|
|
32
|
+
output += `${errorOutput}\n`;
|
|
33
|
+
commandResults.push({
|
|
34
|
+
command,
|
|
35
|
+
exit_code: exitCode,
|
|
36
|
+
output: errorOutput
|
|
37
|
+
});
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
tier,
|
|
43
|
+
commands,
|
|
44
|
+
command_results: commandResults,
|
|
45
|
+
ok,
|
|
46
|
+
duration_ms: Date.now() - started,
|
|
47
|
+
output
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
function extractTextFromClaudeJson(output) {
|
|
3
|
+
try {
|
|
4
|
+
const parsed = JSON.parse(output);
|
|
5
|
+
return parsed.result || parsed.content || parsed.message || output;
|
|
6
|
+
}
|
|
7
|
+
catch {
|
|
8
|
+
// If not valid JSON, return raw output
|
|
9
|
+
return output;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
describe('extractTextFromClaudeJson', () => {
|
|
13
|
+
it('extracts result field from valid JSON', () => {
|
|
14
|
+
const input = '{"result":"Hello World"}';
|
|
15
|
+
expect(extractTextFromClaudeJson(input)).toBe('Hello World');
|
|
16
|
+
});
|
|
17
|
+
it('extracts content field when result is missing', () => {
|
|
18
|
+
const input = '{"content":"Content text","other":"ignored"}';
|
|
19
|
+
expect(extractTextFromClaudeJson(input)).toBe('Content text');
|
|
20
|
+
});
|
|
21
|
+
it('extracts message field as fallback', () => {
|
|
22
|
+
const input = '{"message":"Message text"}';
|
|
23
|
+
expect(extractTextFromClaudeJson(input)).toBe('Message text');
|
|
24
|
+
});
|
|
25
|
+
it('returns raw output when not valid JSON', () => {
|
|
26
|
+
const input = 'This is plain text, not JSON';
|
|
27
|
+
expect(extractTextFromClaudeJson(input)).toBe('This is plain text, not JSON');
|
|
28
|
+
});
|
|
29
|
+
it('returns raw output for truncated JSON', () => {
|
|
30
|
+
const input = '{"result":"trun';
|
|
31
|
+
expect(extractTextFromClaudeJson(input)).toBe('{"result":"trun');
|
|
32
|
+
});
|
|
33
|
+
it('returns raw output for empty object with no known fields', () => {
|
|
34
|
+
const input = '{"unknown":"field"}';
|
|
35
|
+
// Returns the original input since result/content/message are all falsy
|
|
36
|
+
expect(extractTextFromClaudeJson(input)).toBe('{"unknown":"field"}');
|
|
37
|
+
});
|
|
38
|
+
it('handles empty string result', () => {
|
|
39
|
+
const input = '{"result":"","content":"fallback"}';
|
|
40
|
+
// Empty string is falsy, so falls through to content
|
|
41
|
+
expect(extractTextFromClaudeJson(input)).toBe('fallback');
|
|
42
|
+
});
|
|
43
|
+
it('handles null values', () => {
|
|
44
|
+
const input = '{"result":null,"content":"actual content"}';
|
|
45
|
+
expect(extractTextFromClaudeJson(input)).toBe('actual content');
|
|
46
|
+
});
|
|
47
|
+
it('handles nested JSON in result', () => {
|
|
48
|
+
const input = '{"result":"{\\"nested\\":\\"json\\"}"}';
|
|
49
|
+
expect(extractTextFromClaudeJson(input)).toBe('{"nested":"json"}');
|
|
50
|
+
});
|
|
51
|
+
it('handles array response gracefully', () => {
|
|
52
|
+
const input = '["array","response"]';
|
|
53
|
+
// Arrays don't have result/content/message, returns original
|
|
54
|
+
expect(extractTextFromClaudeJson(input)).toBe('["array","response"]');
|
|
55
|
+
});
|
|
56
|
+
it('handles error field in response', () => {
|
|
57
|
+
const input = '{"error":"Something went wrong","result":""}';
|
|
58
|
+
// Empty result is falsy, but we don't currently use error field
|
|
59
|
+
// This returns the raw input since result/content/message are empty/missing
|
|
60
|
+
expect(extractTextFromClaudeJson(input)).toBe('{"error":"Something went wrong","result":""}');
|
|
61
|
+
});
|
|
62
|
+
it('handles whitespace in output', () => {
|
|
63
|
+
const input = ' {"result":"with spaces"} ';
|
|
64
|
+
// JSON.parse handles leading/trailing whitespace
|
|
65
|
+
expect(extractTextFromClaudeJson(input)).toBe('with spaces');
|
|
66
|
+
});
|
|
67
|
+
it('handles newlines in result text', () => {
|
|
68
|
+
const input = '{"result":"line1\\nline2\\nline3"}';
|
|
69
|
+
expect(extractTextFromClaudeJson(input)).toBe('line1\nline2\nline3');
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
describe('Claude JSON error handling', () => {
|
|
73
|
+
it('fails loud on completely invalid input', () => {
|
|
74
|
+
// Current implementation returns the raw output, which is reasonable
|
|
75
|
+
// but we might want to distinguish "valid text" from "parse error" later
|
|
76
|
+
const input = '}{invalid';
|
|
77
|
+
const result = extractTextFromClaudeJson(input);
|
|
78
|
+
// Currently returns raw - this is acceptable behavior
|
|
79
|
+
expect(result).toBe('}{invalid');
|
|
80
|
+
});
|
|
81
|
+
it('handles BOM characters', () => {
|
|
82
|
+
const input = '\uFEFF{"result":"with BOM"}';
|
|
83
|
+
// JSON.parse may fail with BOM prefix
|
|
84
|
+
const result = extractTextFromClaudeJson(input);
|
|
85
|
+
// Should either parse successfully or return raw
|
|
86
|
+
expect(result.includes('with BOM') || result.includes('\uFEFF')).toBe(true);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
function extractTextFromCodexJsonl(output) {
|
|
3
|
+
const lines = output.trim().split('\n').filter(Boolean);
|
|
4
|
+
const texts = [];
|
|
5
|
+
for (const line of lines) {
|
|
6
|
+
try {
|
|
7
|
+
const event = JSON.parse(line);
|
|
8
|
+
if (event.type === 'item.completed' && event.item?.type === 'agent_message' && event.item.text) {
|
|
9
|
+
texts.push(event.item.text);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
// Skip malformed lines
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return texts.join('\n');
|
|
17
|
+
}
|
|
18
|
+
describe('extractTextFromCodexJsonl', () => {
|
|
19
|
+
it('extracts agent_message text from valid JSONL', () => {
|
|
20
|
+
const input = `{"type":"thread.started","thread_id":"abc123"}
|
|
21
|
+
{"type":"turn.started"}
|
|
22
|
+
{"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"thinking..."}}
|
|
23
|
+
{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"HELLO WORLD"}}
|
|
24
|
+
{"type":"turn.completed","usage":{"input_tokens":100,"output_tokens":10}}`;
|
|
25
|
+
const result = extractTextFromCodexJsonl(input);
|
|
26
|
+
expect(result).toBe('HELLO WORLD');
|
|
27
|
+
});
|
|
28
|
+
it('concatenates multiple agent_message items', () => {
|
|
29
|
+
const input = `{"type":"item.completed","item":{"type":"agent_message","text":"First message"}}
|
|
30
|
+
{"type":"item.completed","item":{"type":"agent_message","text":"Second message"}}`;
|
|
31
|
+
const result = extractTextFromCodexJsonl(input);
|
|
32
|
+
expect(result).toBe('First message\nSecond message');
|
|
33
|
+
});
|
|
34
|
+
it('ignores reasoning and command_execution items', () => {
|
|
35
|
+
const input = `{"type":"item.completed","item":{"type":"reasoning","text":"I should run git status"}}
|
|
36
|
+
{"type":"item.completed","item":{"type":"command_execution","command":"git status","aggregated_output":"clean"}}
|
|
37
|
+
{"type":"item.completed","item":{"type":"agent_message","text":"Done!"}}`;
|
|
38
|
+
const result = extractTextFromCodexJsonl(input);
|
|
39
|
+
expect(result).toBe('Done!');
|
|
40
|
+
});
|
|
41
|
+
it('handles malformed JSON lines gracefully', () => {
|
|
42
|
+
const input = `{"type":"item.completed","item":{"type":"agent_message","text":"Valid"}}
|
|
43
|
+
this is not json
|
|
44
|
+
{"broken json
|
|
45
|
+
{"type":"item.completed","item":{"type":"agent_message","text":"Also valid"}}`;
|
|
46
|
+
const result = extractTextFromCodexJsonl(input);
|
|
47
|
+
expect(result).toBe('Valid\nAlso valid');
|
|
48
|
+
});
|
|
49
|
+
it('returns empty string when no agent_message found', () => {
|
|
50
|
+
const input = `{"type":"thread.started","thread_id":"abc"}
|
|
51
|
+
{"type":"turn.started"}
|
|
52
|
+
{"type":"item.completed","item":{"type":"reasoning","text":"thinking"}}
|
|
53
|
+
{"type":"turn.completed"}`;
|
|
54
|
+
const result = extractTextFromCodexJsonl(input);
|
|
55
|
+
expect(result).toBe('');
|
|
56
|
+
});
|
|
57
|
+
it('handles empty input', () => {
|
|
58
|
+
expect(extractTextFromCodexJsonl('')).toBe('');
|
|
59
|
+
expect(extractTextFromCodexJsonl(' ')).toBe('');
|
|
60
|
+
expect(extractTextFromCodexJsonl('\n\n')).toBe('');
|
|
61
|
+
});
|
|
62
|
+
it('handles missing item.text gracefully', () => {
|
|
63
|
+
const input = `{"type":"item.completed","item":{"type":"agent_message"}}
|
|
64
|
+
{"type":"item.completed","item":{"type":"agent_message","text":""}}
|
|
65
|
+
{"type":"item.completed","item":{"type":"agent_message","text":"Has text"}}`;
|
|
66
|
+
const result = extractTextFromCodexJsonl(input);
|
|
67
|
+
expect(result).toBe('Has text');
|
|
68
|
+
});
|
|
69
|
+
it('handles control characters and unicode', () => {
|
|
70
|
+
const input = `{"type":"item.completed","item":{"type":"agent_message","text":"Hello\\nWorld\\twith\\ttabs"}}`;
|
|
71
|
+
const result = extractTextFromCodexJsonl(input);
|
|
72
|
+
expect(result).toBe('Hello\nWorld\twith\ttabs');
|
|
73
|
+
});
|
|
74
|
+
it('handles carriage returns in output', () => {
|
|
75
|
+
const input = `{"type":"item.completed","item":{"type":"agent_message","text":"Line1"}}\r\n{"type":"item.completed","item":{"type":"agent_message","text":"Line2"}}`;
|
|
76
|
+
const result = extractTextFromCodexJsonl(input);
|
|
77
|
+
// After trim and split, we should still get both messages
|
|
78
|
+
expect(result).toContain('Line1');
|
|
79
|
+
expect(result).toContain('Line2');
|
|
80
|
+
});
|
|
81
|
+
});
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { execa } from 'execa';
|
|
2
|
+
function extractTextFromClaudeJson(output) {
|
|
3
|
+
try {
|
|
4
|
+
const parsed = JSON.parse(output);
|
|
5
|
+
return parsed.result || parsed.content || parsed.message || output;
|
|
6
|
+
}
|
|
7
|
+
catch {
|
|
8
|
+
// If not valid JSON, return raw output
|
|
9
|
+
return output;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
export async function runClaude(input) {
|
|
13
|
+
const { bin, args } = input.worker;
|
|
14
|
+
try {
|
|
15
|
+
const result = await execa(bin, args, {
|
|
16
|
+
cwd: input.repo_path,
|
|
17
|
+
input: input.prompt,
|
|
18
|
+
stdout: 'pipe',
|
|
19
|
+
stderr: 'pipe',
|
|
20
|
+
timeout: 300000 // 5 min timeout
|
|
21
|
+
});
|
|
22
|
+
const rawOutput = result.stdout;
|
|
23
|
+
const text = input.worker.output === 'json'
|
|
24
|
+
? extractTextFromClaudeJson(rawOutput)
|
|
25
|
+
: rawOutput;
|
|
26
|
+
return {
|
|
27
|
+
status: result.exitCode === 0 ? 'ok' : 'failed',
|
|
28
|
+
commands_run: [`${bin} ${args.join(' ')}`],
|
|
29
|
+
observations: [text || rawOutput]
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
catch (error) {
|
|
33
|
+
const err = error;
|
|
34
|
+
const output = err.stdout || err.stderr || err.message || 'Claude command failed';
|
|
35
|
+
return {
|
|
36
|
+
status: 'failed',
|
|
37
|
+
commands_run: [`${bin} ${args.join(' ')}`],
|
|
38
|
+
observations: [output]
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Classify error output into categories for preflight reporting.
|
|
44
|
+
*/
|
|
45
|
+
function classifyError(output) {
|
|
46
|
+
const lower = output.toLowerCase();
|
|
47
|
+
// Auth errors
|
|
48
|
+
if (lower.includes('oauth') || lower.includes('token expired') ||
|
|
49
|
+
lower.includes('authentication') || lower.includes('login') ||
|
|
50
|
+
lower.includes('401') || lower.includes('unauthorized') ||
|
|
51
|
+
lower.includes('not authenticated') || lower.includes('sign in')) {
|
|
52
|
+
return 'auth';
|
|
53
|
+
}
|
|
54
|
+
// Network errors
|
|
55
|
+
if (lower.includes('enotfound') || lower.includes('econnrefused') ||
|
|
56
|
+
lower.includes('network') || lower.includes('timeout') ||
|
|
57
|
+
lower.includes('econnreset') || lower.includes('socket')) {
|
|
58
|
+
return 'network';
|
|
59
|
+
}
|
|
60
|
+
// Rate limit errors
|
|
61
|
+
if (lower.includes('rate limit') || lower.includes('429') ||
|
|
62
|
+
lower.includes('too many requests') || lower.includes('quota')) {
|
|
63
|
+
return 'rate_limit';
|
|
64
|
+
}
|
|
65
|
+
return 'unknown';
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Ping Claude to verify auth and connectivity.
|
|
69
|
+
* Success = process exits 0 within timeout.
|
|
70
|
+
*/
|
|
71
|
+
export async function pingClaude(worker) {
|
|
72
|
+
const { bin, args } = worker;
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
const pingPrompt = 'Respond with exactly: ok';
|
|
75
|
+
try {
|
|
76
|
+
const result = await execa(bin, args, {
|
|
77
|
+
input: pingPrompt,
|
|
78
|
+
stdout: 'pipe',
|
|
79
|
+
stderr: 'pipe',
|
|
80
|
+
timeout: 15000 // 15s timeout for ping
|
|
81
|
+
});
|
|
82
|
+
const ms = Date.now() - start;
|
|
83
|
+
// Success = exit code 0
|
|
84
|
+
if (result.exitCode === 0) {
|
|
85
|
+
return { ok: true, worker: 'claude', ms };
|
|
86
|
+
}
|
|
87
|
+
// Non-zero exit
|
|
88
|
+
const output = result.stderr || result.stdout || '';
|
|
89
|
+
return {
|
|
90
|
+
ok: false,
|
|
91
|
+
worker: 'claude',
|
|
92
|
+
ms,
|
|
93
|
+
category: classifyError(output),
|
|
94
|
+
message: output.slice(0, 200)
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
const ms = Date.now() - start;
|
|
99
|
+
const err = error;
|
|
100
|
+
const output = err.stderr || err.stdout || err.message || 'Ping failed';
|
|
101
|
+
// Check for timeout
|
|
102
|
+
if (err.code === 'ETIMEDOUT' || (err.message && err.message.includes('timed out'))) {
|
|
103
|
+
return {
|
|
104
|
+
ok: false,
|
|
105
|
+
worker: 'claude',
|
|
106
|
+
ms,
|
|
107
|
+
category: 'network',
|
|
108
|
+
message: 'Ping timed out'
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
ok: false,
|
|
113
|
+
worker: 'claude',
|
|
114
|
+
ms,
|
|
115
|
+
category: classifyError(output),
|
|
116
|
+
message: output.slice(0, 200)
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|