agentboss 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,265 +1,316 @@
1
- /**
2
- * LLM judge runner — spawns a local AI CLI for evaluation tasks.
3
- *
4
- * Detection order (first one found wins):
5
- * 1. `opencode run -p "<prompt>"`
6
- * 2. `claude -p "<prompt>"`
7
- *
8
- * Returns parsed JSON. Failures (CLI missing / timeout / non-JSON
9
- * output) resolve to `null` so callers can fall back to rule-based
10
- * heuristics. Never throws.
11
- *
12
- * @author Felix
13
- */
14
-
15
- 'use strict';
16
-
17
- const { spawn } = require('child_process');
18
- const { JUDGE_SENTINEL } = require('./judge-prompts');
19
-
20
- /**
21
- * Prepend the JUDGE_SENTINEL to the prompt if it isn't already the very
22
- * first line. This is the last-line defence that guarantees *every*
23
- * LLM call originating from aboss is recognisable when its session
24
- * later gets re-imported by the ETL (see server/etl/judge-filter.js).
25
- *
26
- * Callers (e.g. buildE1Prompt / buildO1Prompt) already prepend the
27
- * sentinel, but enforcing it here means any future caller — or any
28
- * accidentally-omitted sentinel — still produces a tagged session
29
- * rather than polluting the user's own work.
30
- */
31
- function ensureSentinel(prompt) {
32
- if (typeof prompt !== 'string') return prompt;
33
- if (prompt.startsWith(JUDGE_SENTINEL)) return prompt;
34
- return `${JUDGE_SENTINEL}(内部标记,忽略本行)\n${prompt}`;
35
- }
36
-
37
- // ---------------------------------------------------------------------------
38
- // Detection
39
- // ---------------------------------------------------------------------------
40
-
41
- /**
42
- * CLI candidates.
43
- *
44
- * `argv` builds the command-line args. When `stdinPrompt: true`, the
45
- * prompt is fed on STDIN instead of being inlined into argv — this is
46
- * essential on Windows where the command-line cap is ~8 KB and our
47
- * judge prompts run 10 KB+.
48
- */
49
- const CANDIDATES = [
50
- // opencode reads stdin when no positional arg is given (after `run`)
51
- { name: 'opencode', bin: 'opencode', argv: () => ['run'], stdinPrompt: true },
52
- // claude -p reads stdin when -p is used without an inline prompt
53
- { name: 'claude', bin: 'claude', argv: () => ['-p'], stdinPrompt: true },
54
- ];
55
-
56
- let _cachedCli = undefined; // null = detected none; obj = found
57
-
58
- /**
59
- * Detect which CLI is available. Tries `bin --version` for each candidate.
60
- * Caches the result for the process lifetime.
61
- *
62
- * @returns {Promise<{name:string, bin:string, argv:Function}|null>}
63
- */
64
- async function detectAvailableCli() {
65
- if (_cachedCli !== undefined) return _cachedCli;
66
- for (const c of CANDIDATES) {
67
- if (await canSpawn(c.bin)) {
68
- _cachedCli = c;
69
- return c;
70
- }
71
- }
72
- _cachedCli = null;
73
- return null;
74
- }
75
-
76
- /** Reset the detection cache. Mostly useful in tests / settings reload. */
77
- function _resetCache() { _cachedCli = undefined; }
78
-
79
- /**
80
- * Try to spawn `bin --version`. Resolves true on exit code 0. Cross-
81
- * platform: on Windows `bin` is resolved via PATH automatically by spawn.
82
- */
83
- function canSpawn(bin) {
84
- return new Promise((resolve) => {
85
- let resolved = false;
86
- const settle = (v) => { if (!resolved) { resolved = true; resolve(v); } };
87
-
88
- try {
89
- const proc = spawn(bin, ['--version'], {
90
- stdio: 'ignore',
91
- shell: process.platform === 'win32',
92
- });
93
- proc.on('error', () => settle(false));
94
- proc.on('exit', (code) => settle(code === 0));
95
- // hard timeout
96
- setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} settle(false); }, 5000);
97
- } catch {
98
- settle(false);
99
- }
100
- });
101
- }
102
-
103
- // ---------------------------------------------------------------------------
104
- // Runner
105
- // ---------------------------------------------------------------------------
106
-
107
- /**
108
- * Spawn the chosen CLI with the prompt, capture stdout, and try to parse
109
- * it as JSON. The caller's prompt should *demand* JSON output.
110
- *
111
- * Options:
112
- * timeoutMs (default 30_000)
113
- * maxBytes (default 256 KB) — guard against runaway output
114
- *
115
- * Resolves:
116
- * { ok: true, data: any, raw: string, cli: 'opencode'|'claude' }
117
- * { ok: false, reason: 'no-cli' | 'timeout' | 'exit-non-zero' | 'bad-json' | 'spawn-error', raw?: string, error?: string }
118
- *
119
- * @param {Object} opts
120
- * @returns {Promise<Object>}
121
- */
122
- async function runJudge(opts = {}) {
123
- const { prompt: rawPrompt, timeoutMs = 30_000, maxBytes = 256 * 1024 } = opts;
124
- if (!rawPrompt || typeof rawPrompt !== 'string') {
125
- return { ok: false, reason: 'no-prompt' };
126
- }
127
- // Stamp the sentinel onto every outbound prompt so the ETL can later
128
- // recognise and discard the session this CLI call will create.
129
- const prompt = ensureSentinel(rawPrompt);
130
-
131
- const cli = await detectAvailableCli();
132
- if (!cli) return { ok: false, reason: 'no-cli' };
133
-
134
- return new Promise((resolve) => {
135
- let resolved = false;
136
- const settle = (v) => { if (!resolved) { resolved = true; resolve(v); } };
137
-
138
- let proc;
139
- try {
140
- const useStdin = cli.stdinPrompt === true;
141
- proc = spawn(cli.bin, cli.argv(prompt), {
142
- stdio: [useStdin ? 'pipe' : 'ignore', 'pipe', 'pipe'],
143
- shell: process.platform === 'win32',
144
- windowsHide: true,
145
- });
146
- if (useStdin && proc.stdin) {
147
- proc.stdin.on('error', () => {}); // EPIPE if CLI exits early
148
- proc.stdin.end(prompt, 'utf8');
149
- }
150
- } catch (err) {
151
- return settle({ ok: false, reason: 'spawn-error', error: err.message });
152
- }
153
-
154
- let stdout = '';
155
- let stderr = '';
156
- let truncated = false;
157
-
158
- proc.stdout.on('data', (chunk) => {
159
- if (truncated) return;
160
- stdout += chunk.toString('utf8');
161
- if (stdout.length > maxBytes) {
162
- stdout = stdout.slice(0, maxBytes);
163
- truncated = true;
164
- try { proc.kill('SIGKILL'); } catch {}
165
- }
166
- });
167
- proc.stderr.on('data', (chunk) => { stderr += chunk.toString('utf8'); });
168
-
169
- proc.on('error', (err) => settle({ ok: false, reason: 'spawn-error', error: err.message }));
170
-
171
- proc.on('exit', (code) => {
172
- if (code !== 0 && !truncated) {
173
- return settle({ ok: false, reason: 'exit-non-zero', raw: stdout, error: stderr.slice(0, 500) });
174
- }
175
- const parsed = extractJson(stdout);
176
- if (parsed === undefined) {
177
- // Include a snippet of stderr too when claude / opencode
178
- // print a warning ("not logged in", "rate-limited", "use
179
- // --print"), the stdout JSON parse fails but the actual cause
180
- // lives in stderr.
181
- return settle({
182
- ok: false,
183
- reason: 'bad-json',
184
- raw: stdout.slice(0, 500),
185
- error: stderr ? stderr.slice(0, 500) : undefined,
186
- });
187
- }
188
- settle({ ok: true, data: parsed, raw: stdout, cli: cli.name });
189
- });
190
-
191
- const t = setTimeout(() => {
192
- try { proc.kill('SIGKILL'); } catch {}
193
- // Surface stderr on timeout — usually has "waiting for input" or
194
- // a prompt that explains why the CLI is hanging.
195
- settle({
196
- ok: false,
197
- reason: 'timeout',
198
- error: stderr ? stderr.slice(0, 500) : undefined,
199
- });
200
- }, timeoutMs);
201
- proc.on('exit', () => clearTimeout(t));
202
- });
203
- }
204
-
205
- /**
206
- * Try to find a JSON value in raw stdout. Tolerates leading log lines
207
- * by scanning for the first { or [. Returns the parsed value or
208
- * undefined on failure.
209
- */
210
- function extractJson(raw) {
211
- if (!raw) return undefined;
212
- // common case: stdout is pure JSON
213
- const trimmed = raw.trim();
214
- try { return JSON.parse(trimmed); } catch {}
215
- // fall back: find first { or [
216
- const i1 = trimmed.indexOf('{');
217
- const i2 = trimmed.indexOf('[');
218
- let start = -1;
219
- if (i1 >= 0 && i2 >= 0) start = Math.min(i1, i2);
220
- else if (i1 >= 0) start = i1;
221
- else if (i2 >= 0) start = i2;
222
- if (start < 0) return undefined;
223
-
224
- // find matching last brace/bracket of the same kind
225
- const open = trimmed[start];
226
- const close = open === '{' ? '}' : ']';
227
- const end = trimmed.lastIndexOf(close);
228
- if (end < start) return undefined;
229
- try { return JSON.parse(trimmed.slice(start, end + 1)); } catch {}
230
- return undefined;
231
- }
232
-
233
- // ---------------------------------------------------------------------------
234
- // Concurrency guard
235
- // ---------------------------------------------------------------------------
236
-
237
- let _inFlight = 0;
238
- const _waiters = [];
239
- const MAX_CONCURRENT = 2;
240
-
241
- /** Run `fn` under a 2-wide semaphore so we don't fork-bomb the CLI. */
242
- function withSlot(fn) {
243
- return new Promise((resolve) => {
244
- const start = async () => {
245
- _inFlight++;
246
- try { resolve(await fn()); }
247
- finally {
248
- _inFlight--;
249
- const next = _waiters.shift();
250
- if (next) next();
251
- }
252
- };
253
- if (_inFlight < MAX_CONCURRENT) start();
254
- else _waiters.push(start);
255
- });
256
- }
257
-
258
- module.exports = {
259
- detectAvailableCli,
260
- runJudge,
261
- withSlot,
262
- // exported for tests
263
- _resetCache,
264
- extractJson,
265
- };
1
+ /**
2
+ * LLM judge runner — spawns a local AI CLI for evaluation tasks.
3
+ *
4
+ * Detection order (first one found wins):
5
+ * 1. `opencode run -p "<prompt>"`
6
+ * 2. `claude -p "<prompt>"`
7
+ *
8
+ * Returns parsed JSON. Failures (CLI missing / timeout / non-JSON
9
+ * output) resolve to `null` so callers can fall back to rule-based
10
+ * heuristics. Never throws.
11
+ *
12
+ * @author Felix
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ const { spawn } = require('child_process');
18
+ const { JUDGE_SENTINEL } = require('./judge-prompts');
19
+
20
+ /**
21
+ * Prepend the JUDGE_SENTINEL to the prompt if it isn't already the very
22
+ * first line. This is the last-line defence that guarantees *every*
23
+ * LLM call originating from aboss is recognisable when its session
24
+ * later gets re-imported by the ETL (see server/etl/judge-filter.js).
25
+ *
26
+ * Callers (e.g. buildE1Prompt / buildO1Prompt) already prepend the
27
+ * sentinel, but enforcing it here means any future caller — or any
28
+ * accidentally-omitted sentinel — still produces a tagged session
29
+ * rather than polluting the user's own work.
30
+ */
31
+ function ensureSentinel(prompt) {
32
+ if (typeof prompt !== 'string') return prompt;
33
+ if (prompt.startsWith(JUDGE_SENTINEL)) return prompt;
34
+ return `${JUDGE_SENTINEL}(内部标记,忽略本行)\n${prompt}`;
35
+ }
36
+
37
+ // ---------------------------------------------------------------------------
38
+ // Detection
39
+ // ---------------------------------------------------------------------------
40
+
41
+ /**
42
+ * CLI candidates.
43
+ *
44
+ * `argv` builds the command-line args. When `stdinPrompt: true`, the
45
+ * prompt is fed on STDIN instead of being inlined into argv — this is
46
+ * essential on Windows where the command-line cap is ~8 KB and our
47
+ * judge prompts run 10 KB+.
48
+ */
49
+ const CANDIDATES = [
50
+ // opencode reads stdin when no positional arg is given (after `run`)
51
+ { name: 'opencode', bin: 'opencode', argv: () => ['run'], stdinPrompt: true },
52
+ // claude -p reads stdin when -p is used without an inline prompt
53
+ { name: 'claude', bin: 'claude', argv: () => ['-p'], stdinPrompt: true },
54
+ ];
55
+
56
+ let _cachedCli = undefined; // null = detected none; obj = found (default order)
57
+ let _cachedAll = undefined; // Array<{name, bin, available}> | undefined
58
+
59
+ /**
60
+ * Detect which CLI to use.
61
+ *
62
+ * @param {string} [preferred] one of 'auto' | 'opencode' | 'claude'.
63
+ * - 'opencode' / 'claude': return that CLI iff it's available; otherwise
64
+ * fall back to the first other available CLI (so a user pick never
65
+ * silently disables LLM features when their preferred CLI vanishes).
66
+ * - 'auto' (default): first available in CANDIDATES order.
67
+ *
68
+ * Caches the *default-order* result for the process lifetime; preferred
69
+ * picks are computed against `detectAllCli()` (also cached) so the result
70
+ * is consistent across calls without re-spawning.
71
+ *
72
+ * @returns {Promise<{name:string, bin:string, argv:Function}|null>}
73
+ */
74
+ async function detectAvailableCli(preferred = 'auto') {
75
+ const pref = String(preferred || 'auto').toLowerCase();
76
+
77
+ if (pref === 'auto') {
78
+ if (_cachedCli !== undefined) return _cachedCli;
79
+ for (const c of CANDIDATES) {
80
+ if (await canSpawn(c.bin)) {
81
+ _cachedCli = c;
82
+ return c;
83
+ }
84
+ }
85
+ _cachedCli = null;
86
+ return null;
87
+ }
88
+
89
+ // Explicit preference: consult the full availability map and try the
90
+ // preferred one first, then fall back to any other available CLI.
91
+ const all = await detectAllCli();
92
+ const wanted = all.find((x) => x.name === pref && x.available);
93
+ if (wanted) return CANDIDATES.find((c) => c.name === wanted.name) || null;
94
+ // Preferred not installed fall back to whatever IS available
95
+ for (const c of CANDIDATES) {
96
+ const entry = all.find((x) => x.name === c.name);
97
+ if (entry && entry.available) return c;
98
+ }
99
+ return null;
100
+ }
101
+
102
+ /**
103
+ * Detect every candidate CLI in parallel. Used by the Settings page to
104
+ * show all options the user can pick from.
105
+ *
106
+ * @returns {Promise<Array<{name:string, bin:string, available:boolean}>>}
107
+ */
108
+ async function detectAllCli() {
109
+ if (_cachedAll !== undefined) return _cachedAll;
110
+ const results = await Promise.all(
111
+ CANDIDATES.map(async (c) => ({
112
+ name: c.name,
113
+ bin: c.bin,
114
+ available: await canSpawn(c.bin),
115
+ }))
116
+ );
117
+ _cachedAll = results;
118
+ return results;
119
+ }
120
+
121
+ /** Reset detection caches. Used by tests and after settings reload. */
122
+ function _resetCache() { _cachedCli = undefined; _cachedAll = undefined; }
123
+
124
+ /**
125
+ * Try to spawn `bin --version`. Resolves true on exit code 0. Cross-
126
+ * platform: on Windows `bin` is resolved via PATH automatically by spawn.
127
+ */
128
+ function canSpawn(bin) {
129
+ return new Promise((resolve) => {
130
+ let resolved = false;
131
+ const settle = (v) => { if (!resolved) { resolved = true; resolve(v); } };
132
+
133
+ try {
134
+ const proc = spawn(bin, ['--version'], {
135
+ stdio: 'ignore',
136
+ shell: process.platform === 'win32',
137
+ });
138
+ proc.on('error', () => settle(false));
139
+ proc.on('exit', (code) => settle(code === 0));
140
+ // hard timeout
141
+ setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} settle(false); }, 5000);
142
+ } catch {
143
+ settle(false);
144
+ }
145
+ });
146
+ }
147
+
148
+ // ---------------------------------------------------------------------------
149
+ // Runner
150
+ // ---------------------------------------------------------------------------
151
+
152
+ /**
153
+ * Spawn the chosen CLI with the prompt, capture stdout, and try to parse
154
+ * it as JSON. The caller's prompt should *demand* JSON output.
155
+ *
156
+ * Options:
157
+ * timeoutMs (default 30_000)
158
+ * maxBytes (default 256 KB) guard against runaway output
159
+ *
160
+ * Resolves:
161
+ * { ok: true, data: any, raw: string, cli: 'opencode'|'claude' }
162
+ * { ok: false, reason: 'no-cli' | 'timeout' | 'exit-non-zero' | 'bad-json' | 'spawn-error', raw?: string, error?: string }
163
+ *
164
+ * @param {Object} opts
165
+ * @returns {Promise<Object>}
166
+ */
167
+ async function runJudge(opts = {}) {
168
+ const {
169
+ prompt: rawPrompt,
170
+ timeoutMs = 30_000,
171
+ maxBytes = 256 * 1024,
172
+ preferredCli = 'auto',
173
+ } = opts;
174
+ if (!rawPrompt || typeof rawPrompt !== 'string') {
175
+ return { ok: false, reason: 'no-prompt' };
176
+ }
177
+ // Stamp the sentinel onto every outbound prompt so the ETL can later
178
+ // recognise and discard the session this CLI call will create.
179
+ const prompt = ensureSentinel(rawPrompt);
180
+
181
+ const cli = await detectAvailableCli(preferredCli);
182
+ if (!cli) return { ok: false, reason: 'no-cli' };
183
+
184
+ return new Promise((resolve) => {
185
+ let resolved = false;
186
+ const settle = (v) => { if (!resolved) { resolved = true; resolve(v); } };
187
+
188
+ let proc;
189
+ try {
190
+ const useStdin = cli.stdinPrompt === true;
191
+ proc = spawn(cli.bin, cli.argv(prompt), {
192
+ stdio: [useStdin ? 'pipe' : 'ignore', 'pipe', 'pipe'],
193
+ shell: process.platform === 'win32',
194
+ windowsHide: true,
195
+ });
196
+ if (useStdin && proc.stdin) {
197
+ proc.stdin.on('error', () => {}); // EPIPE if CLI exits early
198
+ proc.stdin.end(prompt, 'utf8');
199
+ }
200
+ } catch (err) {
201
+ return settle({ ok: false, reason: 'spawn-error', error: err.message });
202
+ }
203
+
204
+ let stdout = '';
205
+ let stderr = '';
206
+ let truncated = false;
207
+
208
+ proc.stdout.on('data', (chunk) => {
209
+ if (truncated) return;
210
+ stdout += chunk.toString('utf8');
211
+ if (stdout.length > maxBytes) {
212
+ stdout = stdout.slice(0, maxBytes);
213
+ truncated = true;
214
+ try { proc.kill('SIGKILL'); } catch {}
215
+ }
216
+ });
217
+ proc.stderr.on('data', (chunk) => { stderr += chunk.toString('utf8'); });
218
+
219
+ proc.on('error', (err) => settle({ ok: false, reason: 'spawn-error', error: err.message }));
220
+
221
+ proc.on('exit', (code) => {
222
+ if (code !== 0 && !truncated) {
223
+ return settle({ ok: false, reason: 'exit-non-zero', raw: stdout, error: stderr.slice(0, 500) });
224
+ }
225
+ const parsed = extractJson(stdout);
226
+ if (parsed === undefined) {
227
+ // Include a snippet of stderr too — when claude / opencode
228
+ // print a warning ("not logged in", "rate-limited", "use
229
+ // --print"), the stdout JSON parse fails but the actual cause
230
+ // lives in stderr.
231
+ return settle({
232
+ ok: false,
233
+ reason: 'bad-json',
234
+ raw: stdout.slice(0, 500),
235
+ error: stderr ? stderr.slice(0, 500) : undefined,
236
+ });
237
+ }
238
+ settle({ ok: true, data: parsed, raw: stdout, cli: cli.name });
239
+ });
240
+
241
+ const t = setTimeout(() => {
242
+ try { proc.kill('SIGKILL'); } catch {}
243
+ // Surface stderr on timeout — usually has "waiting for input" or
244
+ // a prompt that explains why the CLI is hanging.
245
+ settle({
246
+ ok: false,
247
+ reason: 'timeout',
248
+ error: stderr ? stderr.slice(0, 500) : undefined,
249
+ });
250
+ }, timeoutMs);
251
+ proc.on('exit', () => clearTimeout(t));
252
+ });
253
+ }
254
+
255
+ /**
256
+ * Try to find a JSON value in raw stdout. Tolerates leading log lines
257
+ * by scanning for the first { or [. Returns the parsed value or
258
+ * undefined on failure.
259
+ */
260
+ function extractJson(raw) {
261
+ if (!raw) return undefined;
262
+ // common case: stdout is pure JSON
263
+ const trimmed = raw.trim();
264
+ try { return JSON.parse(trimmed); } catch {}
265
+ // fall back: find first { or [
266
+ const i1 = trimmed.indexOf('{');
267
+ const i2 = trimmed.indexOf('[');
268
+ let start = -1;
269
+ if (i1 >= 0 && i2 >= 0) start = Math.min(i1, i2);
270
+ else if (i1 >= 0) start = i1;
271
+ else if (i2 >= 0) start = i2;
272
+ if (start < 0) return undefined;
273
+
274
+ // find matching last brace/bracket of the same kind
275
+ const open = trimmed[start];
276
+ const close = open === '{' ? '}' : ']';
277
+ const end = trimmed.lastIndexOf(close);
278
+ if (end < start) return undefined;
279
+ try { return JSON.parse(trimmed.slice(start, end + 1)); } catch {}
280
+ return undefined;
281
+ }
282
+
283
+ // ---------------------------------------------------------------------------
284
+ // Concurrency guard
285
+ // ---------------------------------------------------------------------------
286
+
287
+ let _inFlight = 0;
288
+ const _waiters = [];
289
+ const MAX_CONCURRENT = 2;
290
+
291
+ /** Run `fn` under a 2-wide semaphore so we don't fork-bomb the CLI. */
292
+ function withSlot(fn) {
293
+ return new Promise((resolve) => {
294
+ const start = async () => {
295
+ _inFlight++;
296
+ try { resolve(await fn()); }
297
+ finally {
298
+ _inFlight--;
299
+ const next = _waiters.shift();
300
+ if (next) next();
301
+ }
302
+ };
303
+ if (_inFlight < MAX_CONCURRENT) start();
304
+ else _waiters.push(start);
305
+ });
306
+ }
307
+
308
+ module.exports = {
309
+ detectAvailableCli,
310
+ detectAllCli,
311
+ runJudge,
312
+ withSlot,
313
+ // exported for tests
314
+ _resetCache,
315
+ extractJson,
316
+ };