ralph-prd 3.0.3 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/ralph/lib/transport.mjs +68 -9
- package/ralph/test/transport.test.mjs +42 -3
package/package.json
CHANGED
package/ralph/lib/transport.mjs
CHANGED
|
@@ -26,7 +26,7 @@ import { relative } from 'path';
|
|
|
26
26
|
/** Timeout for the preflight check only — should always be fast. */
|
|
27
27
|
const PREFLIGHT_TIMEOUT_MS = 30_000;
|
|
28
28
|
|
|
29
|
-
/** Default timeout for send() — 20 minutes per CLI session. */
|
|
29
|
+
/** Default timeout for send() — 20 minutes of inactivity per CLI session. */
|
|
30
30
|
const SEND_TIMEOUT_MS = 20 * 60 * 1000;
|
|
31
31
|
|
|
32
32
|
const CLI_FLAGS = [
|
|
@@ -60,6 +60,29 @@ export class TransportError extends Error {
|
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
+
// ─── Retryable stderr detection ───────────────────────────────────────────────
|
|
64
|
+
//
|
|
65
|
+
// Claude CLI writes structured JSON error types to stderr on API failures.
|
|
66
|
+
// These are machine-readable strings that never appear in response prose,
|
|
67
|
+
// so matching them on stderr is safe.
|
|
68
|
+
// Source: https://docs.anthropic.com/en/api/errors
|
|
69
|
+
|
|
70
|
+
const RETRYABLE_STDERR_TYPES = [
|
|
71
|
+
'rate_limit_error', // 429 — account rate limit
|
|
72
|
+
'overloaded_error', // 529 — API overloaded
|
|
73
|
+
'authentication_error', // 401 — auth/token issue
|
|
74
|
+
'api_error', // 500 — internal API error
|
|
75
|
+
'timeout_error', // 504 — request timed out
|
|
76
|
+
'econnrefused',
|
|
77
|
+
'econnreset',
|
|
78
|
+
'socket hang up',
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
export function isRetryableStderr(text) {
|
|
82
|
+
const lower = (text ?? '').toLowerCase();
|
|
83
|
+
return RETRYABLE_STDERR_TYPES.some(t => lower.includes(t));
|
|
84
|
+
}
|
|
85
|
+
|
|
63
86
|
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
64
87
|
|
|
65
88
|
function resolveCLI() {
|
|
@@ -285,10 +308,15 @@ export async function preflight() {
|
|
|
285
308
|
if (code === 0 || code === null) {
|
|
286
309
|
resolve();
|
|
287
310
|
} else {
|
|
311
|
+
// Distinguish rate limit / transient API errors from auth failures so
|
|
312
|
+
// callers can retry instead of treating it as a credential problem.
|
|
313
|
+
const errorType = isRetryableStderr(stderr) ? 'rate_limit' : 'auth';
|
|
314
|
+
const detail = stderr.trim()
|
|
315
|
+
? `stderr: ${stderr.trim()}`
|
|
316
|
+
: (errorType === 'rate_limit' ? 'rate limit or transient API error' : 'Is it installed and authenticated?');
|
|
288
317
|
reject(new TransportError(
|
|
289
|
-
`\`claude\` CLI preflight exited with code ${code}.
|
|
290
|
-
|
|
291
|
-
'auth'
|
|
318
|
+
`\`claude\` CLI preflight exited with code ${code}. ${detail}`,
|
|
319
|
+
errorType
|
|
292
320
|
));
|
|
293
321
|
}
|
|
294
322
|
});
|
|
@@ -343,16 +371,24 @@ export async function send(prompt, { onChunk, signal, timeoutMs } = {}) {
|
|
|
343
371
|
const child = spawn(cliBin, CLI_FLAGS, { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
344
372
|
|
|
345
373
|
// ── Timeout: kills the CLI if no output arrives within the timeout window ──
|
|
346
|
-
|
|
374
|
+
// This is a no-activity timeout, not a hard cap — it resets on every stdout
|
|
375
|
+
// chunk so long-running tasks that keep producing output are never killed.
|
|
376
|
+
let timer = setTimeout(onTimeout, timeout);
|
|
377
|
+
function onTimeout() {
|
|
347
378
|
child.kill();
|
|
348
379
|
done(reject, new TransportError(
|
|
349
|
-
`\`claude\` CLI timed out after ${(timeout / 1000).toFixed(0)}s with no
|
|
380
|
+
`\`claude\` CLI timed out after ${(timeout / 1000).toFixed(0)}s with no output. ` +
|
|
350
381
|
'The session may have hung or lost connectivity.',
|
|
351
382
|
'timeout'
|
|
352
383
|
));
|
|
353
|
-
}
|
|
384
|
+
}
|
|
385
|
+
function resetTimer() {
|
|
386
|
+
clearTimeout(timer);
|
|
387
|
+
timer = setTimeout(onTimeout, timeout);
|
|
388
|
+
}
|
|
354
389
|
|
|
355
390
|
child.stdout.on('data', (chunk) => {
|
|
391
|
+
resetTimer();
|
|
356
392
|
lineBuffer += chunk.toString();
|
|
357
393
|
const lines = lineBuffer.split('\n');
|
|
358
394
|
lineBuffer = lines.pop() ?? '';
|
|
@@ -423,13 +459,36 @@ export async function send(prompt, { onChunk, signal, timeoutMs } = {}) {
|
|
|
423
459
|
if (code !== 0 && code !== null) {
|
|
424
460
|
const errMsg = stderr.trim() || `exited with code ${code}`;
|
|
425
461
|
const hadResult = resultText !== null;
|
|
462
|
+
// Check stderr for known transient API error types (rate limit, overload, etc.)
|
|
463
|
+
// and surface them as 'rate_limit' so callers can distinguish retryable
|
|
464
|
+
// failures from hard errors.
|
|
465
|
+
const errorType = isRetryableStderr(stderr) ? 'rate_limit' : 'response';
|
|
426
466
|
done(reject, new TransportError(
|
|
427
467
|
`\`claude\` CLI failed (exit ${code}${hadResult ? ', partial result received' : ''}): ${errMsg}`,
|
|
428
|
-
|
|
468
|
+
errorType
|
|
429
469
|
));
|
|
430
470
|
return;
|
|
431
471
|
}
|
|
432
|
-
|
|
472
|
+
|
|
473
|
+
// Exit code 0 with no output means the CLI quit before producing any
|
|
474
|
+
// response — this happens when a rate limit or transient API error causes
|
|
475
|
+
// the process to exit cleanly without emitting a result event.
|
|
476
|
+
// Write a recognisable token to stderr so ralph-afk's retry detector
|
|
477
|
+
// can mark this run as retryable and restart after a backoff.
|
|
478
|
+
const finalText = resultText ?? accumulatedText;
|
|
479
|
+
if (!finalText) {
|
|
480
|
+
process.stderr.write(
|
|
481
|
+
'[ralph] rate_limit_error: `claude` CLI exited cleanly with no output — ' +
|
|
482
|
+
'likely a rate limit or transient API error; will retry.\n'
|
|
483
|
+
);
|
|
484
|
+
done(reject, new TransportError(
|
|
485
|
+
'`claude` CLI exited cleanly but produced no output — possible rate limit or transient API error.',
|
|
486
|
+
'empty_response'
|
|
487
|
+
));
|
|
488
|
+
return;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
done(resolve, finalText);
|
|
433
492
|
});
|
|
434
493
|
|
|
435
494
|
child.on('error', (err) => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { test, describe } from 'node:test';
|
|
2
2
|
import assert from 'node:assert/strict';
|
|
3
|
-
import { TransportError, getCumulativeCost, _addCost, _resetCost } from '../lib/transport.mjs';
|
|
3
|
+
import { TransportError, getCumulativeCost, _addCost, _resetCost, isRetryableStderr } from '../lib/transport.mjs';
|
|
4
4
|
|
|
5
5
|
describe('TransportError', () => {
|
|
6
6
|
|
|
@@ -20,8 +20,8 @@ describe('TransportError', () => {
|
|
|
20
20
|
assert.equal(err.type, 'auth');
|
|
21
21
|
});
|
|
22
22
|
|
|
23
|
-
test('type can be auth | timeout | response | network | parse', () => {
|
|
24
|
-
for (const type of ['auth', 'timeout', 'response', 'network', 'parse']) {
|
|
23
|
+
test('type can be auth | timeout | response | network | parse | empty_response | rate_limit', () => {
|
|
24
|
+
for (const type of ['auth', 'timeout', 'response', 'network', 'parse', 'empty_response', 'rate_limit']) {
|
|
25
25
|
const err = new TransportError('msg', type);
|
|
26
26
|
assert.equal(err.type, type);
|
|
27
27
|
}
|
|
@@ -30,6 +30,45 @@ describe('TransportError', () => {
|
|
|
30
30
|
});
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
describe('isRetryableStderr', () => {
|
|
34
|
+
|
|
35
|
+
test('returns true for rate_limit_error', () => {
|
|
36
|
+
assert.ok(isRetryableStderr('{"type":"error","error":{"type":"rate_limit_error"}}'));
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test('returns true for overloaded_error', () => {
|
|
40
|
+
assert.ok(isRetryableStderr('overloaded_error: API is temporarily overloaded'));
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test('returns true for econnreset', () => {
|
|
44
|
+
assert.ok(isRetryableStderr('Error: read ECONNRESET'));
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('returns true for socket hang up', () => {
|
|
48
|
+
assert.ok(isRetryableStderr('Error: socket hang up'));
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('returns false for unrelated stderr', () => {
|
|
52
|
+
assert.ok(!isRetryableStderr('SyntaxError: Unexpected token'));
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('returns false for empty string', () => {
|
|
56
|
+
assert.ok(!isRetryableStderr(''));
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test('returns false for null/undefined', () => {
|
|
60
|
+
assert.ok(!isRetryableStderr(null));
|
|
61
|
+
assert.ok(!isRetryableStderr(undefined));
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('is case-insensitive', () => {
|
|
65
|
+
assert.ok(isRetryableStderr('RATE_LIMIT_ERROR'));
|
|
66
|
+
assert.ok(isRetryableStderr('Socket Hang Up'));
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
|
|
33
72
|
describe('getCumulativeCost', () => {
|
|
34
73
|
|
|
35
74
|
test('initial cost is 0 (after reset)', () => {
|