ralph-prd 3.0.3 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-prd",
3
- "version": "3.0.3",
3
+ "version": "3.0.5",
4
4
  "type": "module",
5
5
  "description": "AI-powered phased implementation runner for Claude Code — from PRD to shipped code",
6
6
  "bin": {
@@ -26,7 +26,7 @@ import { relative } from 'path';
26
26
  /** Timeout for the preflight check only — should always be fast. */
27
27
  const PREFLIGHT_TIMEOUT_MS = 30_000;
28
28
 
29
- /** Default timeout for send() — 20 minutes per CLI session. */
29
+ /** Default timeout for send() — 20 minutes of inactivity per CLI session. */
30
30
  const SEND_TIMEOUT_MS = 20 * 60 * 1000;
31
31
 
32
32
  const CLI_FLAGS = [
@@ -60,6 +60,29 @@ export class TransportError extends Error {
60
60
  }
61
61
  }
62
62
 
63
+ // ─── Retryable stderr detection ───────────────────────────────────────────────
64
+ //
65
+ // Claude CLI writes structured JSON error types to stderr on API failures.
66
+ // These are machine-readable strings that never appear in response prose,
67
+ // so matching them on stderr is safe.
68
+ // Source: https://docs.anthropic.com/en/api/errors
69
+
70
+ const RETRYABLE_STDERR_TYPES = [
71
+ 'rate_limit_error', // 429 — account rate limit
72
+ 'overloaded_error', // 529 — API overloaded
73
+ 'authentication_error', // 401 — auth/token issue
74
+ 'api_error', // 500 — internal API error
75
+ 'timeout_error', // 504 — request timed out
76
+ 'econnrefused',
77
+ 'econnreset',
78
+ 'socket hang up',
79
+ ];
80
+
81
+ export function isRetryableStderr(text) {
82
+ const lower = (text ?? '').toLowerCase();
83
+ return RETRYABLE_STDERR_TYPES.some(t => lower.includes(t));
84
+ }
85
+
63
86
  // ─── Helpers ──────────────────────────────────────────────────────────────────
64
87
 
65
88
  function resolveCLI() {
@@ -285,10 +308,15 @@ export async function preflight() {
285
308
  if (code === 0 || code === null) {
286
309
  resolve();
287
310
  } else {
311
+ // Distinguish rate limit / transient API errors from auth failures so
312
+ // callers can retry instead of treating it as a credential problem.
313
+ const errorType = isRetryableStderr(stderr) ? 'rate_limit' : 'auth';
314
+ const detail = stderr.trim()
315
+ ? `stderr: ${stderr.trim()}`
316
+ : (errorType === 'rate_limit' ? 'rate limit or transient API error' : 'Is it installed and authenticated?');
288
317
  reject(new TransportError(
289
- `\`claude\` CLI preflight exited with code ${code}. ` +
290
- (stderr.trim() ? `stderr: ${stderr.trim()}` : 'Is it installed and authenticated?'),
291
- 'auth'
318
+ `\`claude\` CLI preflight exited with code ${code}. ${detail}`,
319
+ errorType
292
320
  ));
293
321
  }
294
322
  });
@@ -343,16 +371,24 @@ export async function send(prompt, { onChunk, signal, timeoutMs } = {}) {
343
371
  const child = spawn(cliBin, CLI_FLAGS, { stdio: ['pipe', 'pipe', 'pipe'] });
344
372
 
345
373
  // ── Timeout: kills the CLI if no output arrives within the timeout window ──
346
- const timer = setTimeout(() => {
374
+ // This is a no-activity timeout, not a hard cap — it resets on every stdout
375
+ // chunk so long-running tasks that keep producing output are never killed.
376
+ let timer = setTimeout(onTimeout, timeout);
377
+ function onTimeout() {
347
378
  child.kill();
348
379
  done(reject, new TransportError(
349
- `\`claude\` CLI timed out after ${(timeout / 1000).toFixed(0)}s with no response. ` +
380
+ `\`claude\` CLI timed out after ${(timeout / 1000).toFixed(0)}s with no output. ` +
350
381
  'The session may have hung or lost connectivity.',
351
382
  'timeout'
352
383
  ));
353
- }, timeout);
384
+ }
385
+ function resetTimer() {
386
+ clearTimeout(timer);
387
+ timer = setTimeout(onTimeout, timeout);
388
+ }
354
389
 
355
390
  child.stdout.on('data', (chunk) => {
391
+ resetTimer();
356
392
  lineBuffer += chunk.toString();
357
393
  const lines = lineBuffer.split('\n');
358
394
  lineBuffer = lines.pop() ?? '';
@@ -423,13 +459,36 @@ export async function send(prompt, { onChunk, signal, timeoutMs } = {}) {
423
459
  if (code !== 0 && code !== null) {
424
460
  const errMsg = stderr.trim() || `exited with code ${code}`;
425
461
  const hadResult = resultText !== null;
462
+ // Check stderr for known transient API error types (rate limit, overload, etc.)
463
+ // and surface them as 'rate_limit' so callers can distinguish retryable
464
+ // failures from hard errors.
465
+ const errorType = isRetryableStderr(stderr) ? 'rate_limit' : 'response';
426
466
  done(reject, new TransportError(
427
467
  `\`claude\` CLI failed (exit ${code}${hadResult ? ', partial result received' : ''}): ${errMsg}`,
428
- 'response'
468
+ errorType
429
469
  ));
430
470
  return;
431
471
  }
432
- done(resolve, resultText ?? accumulatedText);
472
+
473
+ // Exit code 0 with no output means the CLI quit before producing any
474
+ // response — this happens when a rate limit or transient API error causes
475
+ // the process to exit cleanly without emitting a result event.
476
+ // Write a recognisable token to stderr so ralph-afk's retry detector
477
+ // can mark this run as retryable and restart after a backoff.
478
+ const finalText = resultText ?? accumulatedText;
479
+ if (!finalText) {
480
+ process.stderr.write(
481
+ '[ralph] rate_limit_error: `claude` CLI exited cleanly with no output — ' +
482
+ 'likely a rate limit or transient API error; will retry.\n'
483
+ );
484
+ done(reject, new TransportError(
485
+ '`claude` CLI exited cleanly but produced no output — possible rate limit or transient API error.',
486
+ 'empty_response'
487
+ ));
488
+ return;
489
+ }
490
+
491
+ done(resolve, finalText);
433
492
  });
434
493
 
435
494
  child.on('error', (err) => {
@@ -1,6 +1,6 @@
1
1
  import { test, describe } from 'node:test';
2
2
  import assert from 'node:assert/strict';
3
- import { TransportError, getCumulativeCost, _addCost, _resetCost } from '../lib/transport.mjs';
3
+ import { TransportError, getCumulativeCost, _addCost, _resetCost, isRetryableStderr } from '../lib/transport.mjs';
4
4
 
5
5
  describe('TransportError', () => {
6
6
 
@@ -20,8 +20,8 @@ describe('TransportError', () => {
20
20
  assert.equal(err.type, 'auth');
21
21
  });
22
22
 
23
- test('type can be auth | timeout | response | network | parse', () => {
24
- for (const type of ['auth', 'timeout', 'response', 'network', 'parse']) {
23
+ test('type can be auth | timeout | response | network | parse | empty_response | rate_limit', () => {
24
+ for (const type of ['auth', 'timeout', 'response', 'network', 'parse', 'empty_response', 'rate_limit']) {
25
25
  const err = new TransportError('msg', type);
26
26
  assert.equal(err.type, type);
27
27
  }
@@ -30,6 +30,45 @@ describe('TransportError', () => {
30
30
  });
31
31
 
32
32
 
33
+ describe('isRetryableStderr', () => {
34
+
35
+ test('returns true for rate_limit_error', () => {
36
+ assert.ok(isRetryableStderr('{"type":"error","error":{"type":"rate_limit_error"}}'));
37
+ });
38
+
39
+ test('returns true for overloaded_error', () => {
40
+ assert.ok(isRetryableStderr('overloaded_error: API is temporarily overloaded'));
41
+ });
42
+
43
+ test('returns true for econnreset', () => {
44
+ assert.ok(isRetryableStderr('Error: read ECONNRESET'));
45
+ });
46
+
47
+ test('returns true for socket hang up', () => {
48
+ assert.ok(isRetryableStderr('Error: socket hang up'));
49
+ });
50
+
51
+ test('returns false for unrelated stderr', () => {
52
+ assert.ok(!isRetryableStderr('SyntaxError: Unexpected token'));
53
+ });
54
+
55
+ test('returns false for empty string', () => {
56
+ assert.ok(!isRetryableStderr(''));
57
+ });
58
+
59
+ test('returns false for null/undefined', () => {
60
+ assert.ok(!isRetryableStderr(null));
61
+ assert.ok(!isRetryableStderr(undefined));
62
+ });
63
+
64
+ test('is case-insensitive', () => {
65
+ assert.ok(isRetryableStderr('RATE_LIMIT_ERROR'));
66
+ assert.ok(isRetryableStderr('Socket Hang Up'));
67
+ });
68
+
69
+ });
70
+
71
+
33
72
  describe('getCumulativeCost', () => {
34
73
 
35
74
  test('initial cost is 0 (after reset)', () => {