@a1hvdy/cc-openclaw 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,7 @@ export class PersistentClaudeSession extends EventEmitter {
63
63
  history: [],
64
64
  retries: 0,
65
65
  lastRetryError: undefined,
66
+ lastStopReason: undefined,
66
67
  lastTurnContextTokens: 0,
67
68
  };
68
69
  }
@@ -624,6 +625,15 @@ export class PersistentClaudeSession extends EventEmitter {
624
625
  (usage.cache_creation_input_tokens || 0);
625
626
  this._updateCost();
626
627
  }
628
+ // v0.32.0 — record the turn's stop_reason BEFORE TURN_COMPLETE is
629
+ // emitted (that emit resolves the streaming handler's await). The
630
+ // handler reads stats.lastStopReason to tell a hard rate_limit/error
631
+ // (→ non-200 so OpenClaw fails over to Sonnet) apart from an
632
+ // empty-but-successful turn (→ the "Done." backstop).
633
+ {
634
+ const sr = event.stop_reason;
635
+ this.stats.lastStopReason = typeof sr === 'string' ? sr : undefined;
636
+ }
627
637
  this.emit(SESSION_EVENT.RESULT, event);
628
638
  this.emit(SESSION_EVENT.TURN_COMPLETE, event);
629
639
  // v0.27.6 — liveness watchdog (Killer #1): turn is over; clear any
@@ -826,6 +836,7 @@ export class PersistentClaudeSession extends EventEmitter {
826
836
  100)),
827
837
  retries: this.stats.retries,
828
838
  lastRetryError: this.stats.lastRetryError,
839
+ lastStopReason: this.stats.lastStopReason,
829
840
  sessionId: this.sessionId,
830
841
  uptime: this.stats.startTime ? Math.round((Date.now() - new Date(this.stats.startTime).getTime()) / 1000) : 0,
831
842
  };
@@ -87,12 +87,36 @@ onFinalText) {
87
87
  // #4 dual-surface seam — hoist once per turn (read off the hot delta loop).
88
88
  // Default OFF: card is the activity pane, gateway draft is the answer pane.
89
89
  const mirrorAnswerToCard = getCardAnswerMirrorEnabled();
90
- res.writeHead(200, {
90
+ // v0.32.0 — DEFERRED header commit. Previously res.writeHead(200) fired here
91
+ // eagerly (before any model output), permanently locking the status to 200.
92
+ // On an Anthropic weekly-cap rate_limit the turn then resolved as a 200-empty
93
+ // SSE stream that OpenClaw read as a successful (empty) turn — so its model
94
+ // fallback chain (opus → sonnet → haiku) never advanced. By holding the
95
+ // header until the first real SSE byte, a pre-stream rate_limit/error can
96
+ // instead return HTTP 429/5xx, which OpenClaw's status-code failover acts on.
97
+ // Once a byte has streamed the 200 is committed and we fall back to an in-band
98
+ // SSE error (can't retract a 200) — but a hard cap almost always trips before
99
+ // the first byte. For a normal turn the only change is that the role-opener
100
+ // chunk now flushes immediately before the first content delta (sub-second,
101
+ // immaterial to the SSE client) instead of at handler entry.
102
+ const SSE_HEADERS = {
91
103
  'Content-Type': 'text/event-stream',
92
104
  'Cache-Control': 'no-cache',
93
105
  Connection: 'keep-alive',
94
106
  'X-Accel-Buffering': 'no',
95
- });
107
+ };
108
+ let streamOpened = false;
109
+ // Opens the SSE stream exactly once: commits the 200 header + the role-opener
110
+ // chunk. Called lazily from writeSSE on the first content write. Until it
111
+ // runs, the response status is still mutable (so a pre-stream failure can
112
+ // return a non-200).
113
+ const openStream = () => {
114
+ if (streamOpened)
115
+ return;
116
+ streamOpened = true;
117
+ res.writeHead(200, SSE_HEADERS);
118
+ res.write(`data: ${JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null))}\n\n`);
119
+ };
96
120
  // v0.14.0 turn-trace probe: track wall-clock + accumulate streamed text
97
121
  // for the final per-turn diagnostic emit. accumulatedText mirrors what
98
122
  // the client actually received (text deltas only — tool_calls are tracked
@@ -123,6 +147,7 @@ onFinalText) {
123
147
  });
124
148
  const writeSSE = (data) => {
125
149
  if (!clientDisconnected) {
150
+ openStream(); // commit 200 + role opener on the first real byte
126
151
  try {
127
152
  res.write(`data: ${data}\n\n`);
128
153
  }
@@ -131,8 +156,10 @@ onFinalText) {
131
156
  }
132
157
  }
133
158
  };
134
- // Initial chunk with role
135
- writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null)));
159
+ // v0.32.0 — the role-opener chunk is now emitted by openStream() on the first
160
+ // content write (see the deferred-header note above), so the eager write that
161
+ // lived here is gone. This keeps the HTTP status mutable until real content
162
+ // exists, enabling the pre-stream 429/5xx failover path below.
136
163
  // SSE keepalive heartbeat. v0.27.5: 30s → 15s. A long quiet phase (Claude
137
164
  // CLI thinking, a slow Bash/tool step) with no SSE write can make OpenClaw's
138
165
  // HTTP client perceive the stream as dead and fire a RETRY — which the
@@ -142,7 +169,11 @@ onFinalText) {
142
169
  // retries at the source. (The request-coalescer is the second line of
143
170
  // defense for the retries that still slip through.)
144
171
  const heartbeatTimer = setInterval(() => {
145
- if (!clientDisconnected) {
172
+ // v0.32.0 — only heartbeat once the stream is actually open. Before the
173
+ // first byte the response status is still mutable (so a pre-stream
174
+ // rate_limit can 429); a keepalive write here would implicitly commit a 200
175
+ // and lock that failover option out.
176
+ if (!clientDisconnected && streamOpened) {
146
177
  try {
147
178
  res.write(': keepalive\n\n');
148
179
  }
@@ -376,6 +407,11 @@ onFinalText) {
376
407
  }
377
408
  // Get token usage for final chunk
378
409
  let usage;
410
+ // v0.32.0 — captured from the same getStatus() call to drive the no-output
411
+ // failover branch below. persistent-session sets stats.lastStopReason on the
412
+ // CLI `result` event BEFORE TURN_COMPLETE resolves this await, so it is
413
+ // populated by the time we read it here.
414
+ let lastStopReason;
379
415
  try {
380
416
  const status = manager.getStatus(sessionName);
381
417
  usage = {
@@ -383,6 +419,7 @@ onFinalText) {
383
419
  completion_tokens: status.stats.tokensOut,
384
420
  total_tokens: status.stats.tokensIn + status.stats.tokensOut,
385
421
  };
422
+ lastStopReason = status.stats.lastStopReason;
386
423
  }
387
424
  catch {
388
425
  /* best effort */
@@ -393,6 +430,68 @@ onFinalText) {
393
430
  // payload. Skip when tool_calls were emitted — those are openai-spec
394
431
  // valid as the only payload (multi-turn tool-use sessions).
395
432
  const noVisiblePayload = !streamedAnything && bufferedText.length === 0 && toolCallsEmitted === 0;
433
+ // v0.32.0 — quota/error fast-fail. A no-output turn whose stop_reason is a
434
+ // hard failure (rate_limit = Anthropic weekly cap reached; error = upstream
435
+ // fault) must NOT be masked as a "Done." 200 below — that is exactly what
436
+ // made OpenClaw accept a capped turn as an empty success and skip its
437
+ // opus → sonnet → haiku fallback chain. While no byte has streamed the HTTP
438
+ // status is still mutable, so return a non-200 (429 for rate_limit, 502 for
439
+ // error) with an OpenAI-shaped error body; OpenClaw's status-code failover
440
+ // then advances to the next model in the chain. Once a byte has streamed we
441
+ // cannot retract the 200 — fall through to the normal finalize in that rare
442
+ // partial-output case (the catch path / card ❌ still surface the failure).
443
+ const isFailureStop = lastStopReason === 'rate_limit' || lastStopReason === 'error';
444
+ if (noVisiblePayload && isFailureStop && !streamOpened && !clientDisconnected) {
445
+ clearInterval(heartbeatTimer);
446
+ const isRateLimit = lastStopReason === 'rate_limit';
447
+ const httpStatus = isRateLimit ? 429 : 502;
448
+ reportStatus('idle', isRateLimit ? 'Rate limited' : 'Upstream error');
449
+ // Flip the Telegram card to ❌ <reason> so it never finalizes a misleading
450
+ // "✓ Done" on a capped/failed turn. Mirrors the catch-path pattern; the
451
+ // finally block's finalize respects the already-failed card state.
452
+ try {
453
+ await mirrorFailActiveCards(isRateLimit ? 'rate limited — model quota reached' : 'upstream error');
454
+ }
455
+ catch {
456
+ /* card fail is cosmetic */
457
+ }
458
+ const errBody = {
459
+ error: {
460
+ message: isRateLimit
461
+ ? `Model ${model} is rate limited (quota reached).`
462
+ : `Model ${model} returned an upstream error.`,
463
+ type: isRateLimit ? 'rate_limit_error' : 'server_error',
464
+ code: isRateLimit ? 'rate_limited' : 'upstream_error',
465
+ },
466
+ };
467
+ formatError(new Error(errBody.error.message), {
468
+ code: ERROR_CODES.SESSION_ERROR,
469
+ sessionId: sessionName,
470
+ details: { phase: 'handleStreaming', stopReason: lastStopReason, httpStatus },
471
+ });
472
+ emitTurnTrace({
473
+ path: 'streaming',
474
+ model,
475
+ userTextPreview: userText.slice(0, 500),
476
+ userTextLen: userText.length,
477
+ hasTools,
478
+ useToolStream,
479
+ toolCallCount: 0,
480
+ outputTextPreview: '',
481
+ outputTextLen: 0,
482
+ finishReason: 'error',
483
+ doneBackstopFired: false,
484
+ voiceIntent,
485
+ tokensIn: usage?.prompt_tokens ?? 0,
486
+ tokensOut: usage?.completion_tokens ?? 0,
487
+ durationMs: Date.now() - turnStartMs,
488
+ errorMessage: errBody.error.message,
489
+ errorName: isRateLimit ? 'RateLimitError' : 'UpstreamError',
490
+ }, sessionName);
491
+ res.writeHead(httpStatus, { 'Content-Type': 'application/json' });
492
+ res.end(JSON.stringify(errBody));
493
+ return;
494
+ }
396
495
  if (noVisiblePayload) {
397
496
  markFirstByte();
398
497
  writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: 'Done.' }, null)));
@@ -580,8 +679,21 @@ onFinalText) {
580
679
  }
581
680
  // v0.4.3: route through formatError for errors_total + trajectory error.
582
681
  formatError(err, { code: ERROR_CODES.SESSION_ERROR, sessionId: sessionName, details: { phase: 'handleStreaming' } });
583
- writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
584
- writeSSE('[DONE]');
682
+ // v0.32.0 if nothing has streamed yet the status is still mutable: return
683
+ // a 502 so OpenClaw's failover advances the model chain. OpenClaw does not
684
+ // treat an in-band SSE error on a committed 200 as a provider failure, so
685
+ // the old SSE-error path (kept for the already-streaming case) could not
686
+ // trigger a fallback.
687
+ if (!streamOpened && !clientDisconnected) {
688
+ res.writeHead(502, { 'Content-Type': 'application/json' });
689
+ res.end(JSON.stringify({
690
+ error: { message: err.message, type: 'server_error', code: 'upstream_error' },
691
+ }));
692
+ }
693
+ else {
694
+ writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
695
+ writeSSE('[DONE]');
696
+ }
585
697
  // v0.15.0 Slice 1: turn-trace probe now ALSO fires on error-exit so
586
698
  // broken turns (claude CLI crash, timeout, stalled session kill) get
587
699
  // captured in the same trajectory stream as successful turns. Without
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a1hvdy/cc-openclaw",
3
- "version": "0.31.0",
3
+ "version": "0.32.0",
4
4
  "description": "A1xAI's Anthropic CLI bridge plugin for OpenClaw",
5
5
  "author": "@a1cy",
6
6
  "license": "MIT",