npm - @a1hvdy/cc-openclaw - Versions diffs - 0.31.0 → 0.32.0 - Mend

@a1hvdy/cc-openclaw 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/src/engines/persistent-session.js +11 -0
package/dist/src/openai-compat/streaming-handler.js +119 -7
package/package.json +1 -1

package/dist/src/engines/persistent-session.js CHANGED Viewed

@@ -63,6 +63,7 @@ export class PersistentClaudeSession extends EventEmitter {
             history: [],
             retries: 0,
             lastRetryError: undefined,
+            lastStopReason: undefined,
             lastTurnContextTokens: 0,
         };
     }
@@ -624,6 +625,15 @@ export class PersistentClaudeSession extends EventEmitter {
                             (usage.cache_creation_input_tokens || 0);
                     this._updateCost();
                 }
+                // v0.32.0 — record the turn's stop_reason BEFORE TURN_COMPLETE is
+                // emitted (that emit resolves the streaming handler's await). The
+                // handler reads stats.lastStopReason to tell a hard rate_limit/error
+                // (→ non-200 so OpenClaw fails over to Sonnet) apart from an
+                // empty-but-successful turn (→ the "Done." backstop).
+                {
+                    const sr = event.stop_reason;
+                    this.stats.lastStopReason = typeof sr === 'string' ? sr : undefined;
+                }
                 this.emit(SESSION_EVENT.RESULT, event);
                 this.emit(SESSION_EVENT.TURN_COMPLETE, event);
                 // v0.27.6 — liveness watchdog (Killer #1): turn is over; clear any
@@ -826,6 +836,7 @@ export class PersistentClaudeSession extends EventEmitter {
                 100)),
             retries: this.stats.retries,
             lastRetryError: this.stats.lastRetryError,
+            lastStopReason: this.stats.lastStopReason,
             sessionId: this.sessionId,
             uptime: this.stats.startTime ? Math.round((Date.now() - new Date(this.stats.startTime).getTime()) / 1000) : 0,
         };

package/dist/src/openai-compat/streaming-handler.js CHANGED Viewed

@@ -87,12 +87,36 @@ onFinalText) {
     // #4 dual-surface seam — hoist once per turn (read off the hot delta loop).
     // Default OFF: card is the activity pane, gateway draft is the answer pane.
     const mirrorAnswerToCard = getCardAnswerMirrorEnabled();
-    res.writeHead(200, {
+    // v0.32.0 — DEFERRED header commit. Previously res.writeHead(200) fired here
+    // eagerly (before any model output), permanently locking the status to 200.
+    // On an Anthropic weekly-cap rate_limit the turn then resolved as a 200-empty
+    // SSE stream that OpenClaw read as a successful (empty) turn — so its model
+    // fallback chain (opus → sonnet → haiku) never advanced. By holding the
+    // header until the first real SSE byte, a pre-stream rate_limit/error can
+    // instead return HTTP 429/5xx, which OpenClaw's status-code failover acts on.
+    // Once a byte has streamed the 200 is committed and we fall back to an in-band
+    // SSE error (can't retract a 200) — but a hard cap almost always trips before
+    // the first byte. For a normal turn the only change is that the role-opener
+    // chunk now flushes immediately before the first content delta (sub-second,
+    // immaterial to the SSE client) instead of at handler entry.
+    const SSE_HEADERS = {
         'Content-Type': 'text/event-stream',
         'Cache-Control': 'no-cache',
         Connection: 'keep-alive',
         'X-Accel-Buffering': 'no',
-    });
+    };
+    let streamOpened = false;
+    // Opens the SSE stream exactly once: commits the 200 header + the role-opener
+    // chunk. Called lazily from writeSSE on the first content write. Until it
+    // runs, the response status is still mutable (so a pre-stream failure can
+    // return a non-200).
+    const openStream = () => {
+        if (streamOpened)
+            return;
+        streamOpened = true;
+        res.writeHead(200, SSE_HEADERS);
+        res.write(`data: ${JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null))}\n\n`);
+    };
     // v0.14.0 turn-trace probe: track wall-clock + accumulate streamed text
     // for the final per-turn diagnostic emit. accumulatedText mirrors what
     // the client actually received (text deltas only — tool_calls are tracked
@@ -123,6 +147,7 @@ onFinalText) {
     });
     const writeSSE = (data) => {
         if (!clientDisconnected) {
+            openStream(); // commit 200 + role opener on the first real byte
             try {
                 res.write(`data: ${data}\n\n`);
             }
@@ -131,8 +156,10 @@ onFinalText) {
             }
         }
     };
-    // Initial chunk with role
-    writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null)));
+    // v0.32.0 — the role-opener chunk is now emitted by openStream() on the first
+    // content write (see the deferred-header note above), so the eager write that
+    // lived here is gone. This keeps the HTTP status mutable until real content
+    // exists, enabling the pre-stream 429/5xx failover path below.
     // SSE keepalive heartbeat. v0.27.5: 30s → 15s. A long quiet phase (Claude
     // CLI thinking, a slow Bash/tool step) with no SSE write can make OpenClaw's
     // HTTP client perceive the stream as dead and fire a RETRY — which the
@@ -142,7 +169,11 @@ onFinalText) {
     // retries at the source. (The request-coalescer is the second line of
     // defense for the retries that still slip through.)
     const heartbeatTimer = setInterval(() => {
-        if (!clientDisconnected) {
+        // v0.32.0 — only heartbeat once the stream is actually open. Before the
+        // first byte the response status is still mutable (so a pre-stream
+        // rate_limit can 429); a keepalive write here would implicitly commit a 200
+        // and lock that failover option out.
+        if (!clientDisconnected && streamOpened) {
             try {
                 res.write(': keepalive\n\n');
             }
@@ -376,6 +407,11 @@ onFinalText) {
         }
         // Get token usage for final chunk
         let usage;
+        // v0.32.0 — captured from the same getStatus() call to drive the no-output
+        // failover branch below. persistent-session sets stats.lastStopReason on the
+        // CLI `result` event BEFORE TURN_COMPLETE resolves this await, so it is
+        // populated by the time we read it here.
+        let lastStopReason;
         try {
             const status = manager.getStatus(sessionName);
             usage = {
@@ -383,6 +419,7 @@ onFinalText) {
                 completion_tokens: status.stats.tokensOut,
                 total_tokens: status.stats.tokensIn + status.stats.tokensOut,
             };
+            lastStopReason = status.stats.lastStopReason;
         }
         catch {
             /* best effort */
@@ -393,6 +430,68 @@ onFinalText) {
         // payload. Skip when tool_calls were emitted — those are openai-spec
         // valid as the only payload (multi-turn tool-use sessions).
         const noVisiblePayload = !streamedAnything && bufferedText.length === 0 && toolCallsEmitted === 0;
+        // v0.32.0 — quota/error fast-fail. A no-output turn whose stop_reason is a
+        // hard failure (rate_limit = Anthropic weekly cap reached; error = upstream
+        // fault) must NOT be masked as a "Done." 200 below — that is exactly what
+        // made OpenClaw accept a capped turn as an empty success and skip its
+        // opus → sonnet → haiku fallback chain. While no byte has streamed the HTTP
+        // status is still mutable, so return a non-200 (429 for rate_limit, 502 for
+        // error) with an OpenAI-shaped error body; OpenClaw's status-code failover
+        // then advances to the next model in the chain. Once a byte has streamed we
+        // cannot retract the 200 — fall through to the normal finalize in that rare
+        // partial-output case (the catch path / card ❌ still surface the failure).
+        const isFailureStop = lastStopReason === 'rate_limit' || lastStopReason === 'error';
+        if (noVisiblePayload && isFailureStop && !streamOpened && !clientDisconnected) {
+            clearInterval(heartbeatTimer);
+            const isRateLimit = lastStopReason === 'rate_limit';
+            const httpStatus = isRateLimit ? 429 : 502;
+            reportStatus('idle', isRateLimit ? 'Rate limited' : 'Upstream error');
+            // Flip the Telegram card to ❌ <reason> so it never finalizes a misleading
+            // "✓ Done" on a capped/failed turn. Mirrors the catch-path pattern; the
+            // finally block's finalize respects the already-failed card state.
+            try {
+                await mirrorFailActiveCards(isRateLimit ? 'rate limited — model quota reached' : 'upstream error');
+            }
+            catch {
+                /* card fail is cosmetic */
+            }
+            const errBody = {
+                error: {
+                    message: isRateLimit
+                        ? `Model ${model} is rate limited (quota reached).`
+                        : `Model ${model} returned an upstream error.`,
+                    type: isRateLimit ? 'rate_limit_error' : 'server_error',
+                    code: isRateLimit ? 'rate_limited' : 'upstream_error',
+                },
+            };
+            formatError(new Error(errBody.error.message), {
+                code: ERROR_CODES.SESSION_ERROR,
+                sessionId: sessionName,
+                details: { phase: 'handleStreaming', stopReason: lastStopReason, httpStatus },
+            });
+            emitTurnTrace({
+                path: 'streaming',
+                model,
+                userTextPreview: userText.slice(0, 500),
+                userTextLen: userText.length,
+                hasTools,
+                useToolStream,
+                toolCallCount: 0,
+                outputTextPreview: '',
+                outputTextLen: 0,
+                finishReason: 'error',
+                doneBackstopFired: false,
+                voiceIntent,
+                tokensIn: usage?.prompt_tokens ?? 0,
+                tokensOut: usage?.completion_tokens ?? 0,
+                durationMs: Date.now() - turnStartMs,
+                errorMessage: errBody.error.message,
+                errorName: isRateLimit ? 'RateLimitError' : 'UpstreamError',
+            }, sessionName);
+            res.writeHead(httpStatus, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify(errBody));
+            return;
+        }
         if (noVisiblePayload) {
             markFirstByte();
             writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: 'Done.' }, null)));
@@ -580,8 +679,21 @@ onFinalText) {
         }
         // v0.4.3: route through formatError for errors_total + trajectory error.
         formatError(err, { code: ERROR_CODES.SESSION_ERROR, sessionId: sessionName, details: { phase: 'handleStreaming' } });
-        writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
-        writeSSE('[DONE]');
+        // v0.32.0 — if nothing has streamed yet the status is still mutable: return
+        // a 502 so OpenClaw's failover advances the model chain. OpenClaw does not
+        // treat an in-band SSE error on a committed 200 as a provider failure, so
+        // the old SSE-error path (kept for the already-streaming case) could not
+        // trigger a fallback.
+        if (!streamOpened && !clientDisconnected) {
+            res.writeHead(502, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({
+                error: { message: err.message, type: 'server_error', code: 'upstream_error' },
+            }));
+        }
+        else {
+            writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
+            writeSSE('[DONE]');
+        }
         // v0.15.0 Slice 1: turn-trace probe now ALSO fires on error-exit so
         // broken turns (claude CLI crash, timeout, stalled session kill) get
         // captured in the same trajectory stream as successful turns. Without

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@a1hvdy/cc-openclaw",
-  "version": "0.31.0",
+  "version": "0.32.0",
   "description": "A1xAI's Anthropic CLI bridge plugin for OpenClaw",
   "author": "@a1cy",
   "license": "MIT",