@a1hvdy/cc-openclaw 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -63,6 +63,7 @@ export class PersistentClaudeSession extends EventEmitter {
|
|
|
63
63
|
history: [],
|
|
64
64
|
retries: 0,
|
|
65
65
|
lastRetryError: undefined,
|
|
66
|
+
lastStopReason: undefined,
|
|
66
67
|
lastTurnContextTokens: 0,
|
|
67
68
|
};
|
|
68
69
|
}
|
|
@@ -624,6 +625,15 @@ export class PersistentClaudeSession extends EventEmitter {
|
|
|
624
625
|
(usage.cache_creation_input_tokens || 0);
|
|
625
626
|
this._updateCost();
|
|
626
627
|
}
|
|
628
|
+
// v0.32.0 — record the turn's stop_reason BEFORE TURN_COMPLETE is
|
|
629
|
+
// emitted (that emit resolves the streaming handler's await). The
|
|
630
|
+
// handler reads stats.lastStopReason to tell a hard rate_limit/error
|
|
631
|
+
// (→ non-200 so OpenClaw fails over to Sonnet) apart from an
|
|
632
|
+
// empty-but-successful turn (→ the "Done." backstop).
|
|
633
|
+
{
|
|
634
|
+
const sr = event.stop_reason;
|
|
635
|
+
this.stats.lastStopReason = typeof sr === 'string' ? sr : undefined;
|
|
636
|
+
}
|
|
627
637
|
this.emit(SESSION_EVENT.RESULT, event);
|
|
628
638
|
this.emit(SESSION_EVENT.TURN_COMPLETE, event);
|
|
629
639
|
// v0.27.6 — liveness watchdog (Killer #1): turn is over; clear any
|
|
@@ -826,6 +836,7 @@ export class PersistentClaudeSession extends EventEmitter {
|
|
|
826
836
|
100)),
|
|
827
837
|
retries: this.stats.retries,
|
|
828
838
|
lastRetryError: this.stats.lastRetryError,
|
|
839
|
+
lastStopReason: this.stats.lastStopReason,
|
|
829
840
|
sessionId: this.sessionId,
|
|
830
841
|
uptime: this.stats.startTime ? Math.round((Date.now() - new Date(this.stats.startTime).getTime()) / 1000) : 0,
|
|
831
842
|
};
|
|
@@ -87,12 +87,36 @@ onFinalText) {
|
|
|
87
87
|
// #4 dual-surface seam — hoist once per turn (read off the hot delta loop).
|
|
88
88
|
// Default OFF: card is the activity pane, gateway draft is the answer pane.
|
|
89
89
|
const mirrorAnswerToCard = getCardAnswerMirrorEnabled();
|
|
90
|
-
res.writeHead(200
|
|
90
|
+
// v0.32.0 — DEFERRED header commit. Previously res.writeHead(200) fired here
|
|
91
|
+
// eagerly (before any model output), permanently locking the status to 200.
|
|
92
|
+
// On an Anthropic weekly-cap rate_limit the turn then resolved as a 200-empty
|
|
93
|
+
// SSE stream that OpenClaw read as a successful (empty) turn — so its model
|
|
94
|
+
// fallback chain (opus → sonnet → haiku) never advanced. By holding the
|
|
95
|
+
// header until the first real SSE byte, a pre-stream rate_limit/error can
|
|
96
|
+
// instead return HTTP 429/5xx, which OpenClaw's status-code failover acts on.
|
|
97
|
+
// Once a byte has streamed the 200 is committed and we fall back to an in-band
|
|
98
|
+
// SSE error (can't retract a 200) — but a hard cap almost always trips before
|
|
99
|
+
// the first byte. For a normal turn the only change is that the role-opener
|
|
100
|
+
// chunk now flushes immediately before the first content delta (sub-second,
|
|
101
|
+
// immaterial to the SSE client) instead of at handler entry.
|
|
102
|
+
const SSE_HEADERS = {
|
|
91
103
|
'Content-Type': 'text/event-stream',
|
|
92
104
|
'Cache-Control': 'no-cache',
|
|
93
105
|
Connection: 'keep-alive',
|
|
94
106
|
'X-Accel-Buffering': 'no',
|
|
95
|
-
}
|
|
107
|
+
};
|
|
108
|
+
let streamOpened = false;
|
|
109
|
+
// Opens the SSE stream exactly once: commits the 200 header + the role-opener
|
|
110
|
+
// chunk. Called lazily from writeSSE on the first content write. Until it
|
|
111
|
+
// runs, the response status is still mutable (so a pre-stream failure can
|
|
112
|
+
// return a non-200).
|
|
113
|
+
const openStream = () => {
|
|
114
|
+
if (streamOpened)
|
|
115
|
+
return;
|
|
116
|
+
streamOpened = true;
|
|
117
|
+
res.writeHead(200, SSE_HEADERS);
|
|
118
|
+
res.write(`data: ${JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null))}\n\n`);
|
|
119
|
+
};
|
|
96
120
|
// v0.14.0 turn-trace probe: track wall-clock + accumulate streamed text
|
|
97
121
|
// for the final per-turn diagnostic emit. accumulatedText mirrors what
|
|
98
122
|
// the client actually received (text deltas only — tool_calls are tracked
|
|
@@ -123,6 +147,7 @@ onFinalText) {
|
|
|
123
147
|
});
|
|
124
148
|
const writeSSE = (data) => {
|
|
125
149
|
if (!clientDisconnected) {
|
|
150
|
+
openStream(); // commit 200 + role opener on the first real byte
|
|
126
151
|
try {
|
|
127
152
|
res.write(`data: ${data}\n\n`);
|
|
128
153
|
}
|
|
@@ -131,8 +156,10 @@ onFinalText) {
|
|
|
131
156
|
}
|
|
132
157
|
}
|
|
133
158
|
};
|
|
134
|
-
//
|
|
135
|
-
|
|
159
|
+
// v0.32.0 — the role-opener chunk is now emitted by openStream() on the first
|
|
160
|
+
// content write (see the deferred-header note above), so the eager write that
|
|
161
|
+
// lived here is gone. This keeps the HTTP status mutable until real content
|
|
162
|
+
// exists, enabling the pre-stream 429/5xx failover path below.
|
|
136
163
|
// SSE keepalive heartbeat. v0.27.5: 30s → 15s. A long quiet phase (Claude
|
|
137
164
|
// CLI thinking, a slow Bash/tool step) with no SSE write can make OpenClaw's
|
|
138
165
|
// HTTP client perceive the stream as dead and fire a RETRY — which the
|
|
@@ -142,7 +169,11 @@ onFinalText) {
|
|
|
142
169
|
// retries at the source. (The request-coalescer is the second line of
|
|
143
170
|
// defense for the retries that still slip through.)
|
|
144
171
|
const heartbeatTimer = setInterval(() => {
|
|
145
|
-
|
|
172
|
+
// v0.32.0 — only heartbeat once the stream is actually open. Before the
|
|
173
|
+
// first byte the response status is still mutable (so a pre-stream
|
|
174
|
+
// rate_limit can 429); a keepalive write here would implicitly commit a 200
|
|
175
|
+
// and lock that failover option out.
|
|
176
|
+
if (!clientDisconnected && streamOpened) {
|
|
146
177
|
try {
|
|
147
178
|
res.write(': keepalive\n\n');
|
|
148
179
|
}
|
|
@@ -376,6 +407,11 @@ onFinalText) {
|
|
|
376
407
|
}
|
|
377
408
|
// Get token usage for final chunk
|
|
378
409
|
let usage;
|
|
410
|
+
// v0.32.0 — captured from the same getStatus() call to drive the no-output
|
|
411
|
+
// failover branch below. persistent-session sets stats.lastStopReason on the
|
|
412
|
+
// CLI `result` event BEFORE TURN_COMPLETE resolves this await, so it is
|
|
413
|
+
// populated by the time we read it here.
|
|
414
|
+
let lastStopReason;
|
|
379
415
|
try {
|
|
380
416
|
const status = manager.getStatus(sessionName);
|
|
381
417
|
usage = {
|
|
@@ -383,6 +419,7 @@ onFinalText) {
|
|
|
383
419
|
completion_tokens: status.stats.tokensOut,
|
|
384
420
|
total_tokens: status.stats.tokensIn + status.stats.tokensOut,
|
|
385
421
|
};
|
|
422
|
+
lastStopReason = status.stats.lastStopReason;
|
|
386
423
|
}
|
|
387
424
|
catch {
|
|
388
425
|
/* best effort */
|
|
@@ -393,6 +430,68 @@ onFinalText) {
|
|
|
393
430
|
// payload. Skip when tool_calls were emitted — those are openai-spec
|
|
394
431
|
// valid as the only payload (multi-turn tool-use sessions).
|
|
395
432
|
const noVisiblePayload = !streamedAnything && bufferedText.length === 0 && toolCallsEmitted === 0;
|
|
433
|
+
// v0.32.0 — quota/error fast-fail. A no-output turn whose stop_reason is a
|
|
434
|
+
// hard failure (rate_limit = Anthropic weekly cap reached; error = upstream
|
|
435
|
+
// fault) must NOT be masked as a "Done." 200 below — that is exactly what
|
|
436
|
+
// made OpenClaw accept a capped turn as an empty success and skip its
|
|
437
|
+
// opus → sonnet → haiku fallback chain. While no byte has streamed the HTTP
|
|
438
|
+
// status is still mutable, so return a non-200 (429 for rate_limit, 502 for
|
|
439
|
+
// error) with an OpenAI-shaped error body; OpenClaw's status-code failover
|
|
440
|
+
// then advances to the next model in the chain. Once a byte has streamed we
|
|
441
|
+
// cannot retract the 200 — fall through to the normal finalize in that rare
|
|
442
|
+
// partial-output case (the catch path / card ❌ still surface the failure).
|
|
443
|
+
const isFailureStop = lastStopReason === 'rate_limit' || lastStopReason === 'error';
|
|
444
|
+
if (noVisiblePayload && isFailureStop && !streamOpened && !clientDisconnected) {
|
|
445
|
+
clearInterval(heartbeatTimer);
|
|
446
|
+
const isRateLimit = lastStopReason === 'rate_limit';
|
|
447
|
+
const httpStatus = isRateLimit ? 429 : 502;
|
|
448
|
+
reportStatus('idle', isRateLimit ? 'Rate limited' : 'Upstream error');
|
|
449
|
+
// Flip the Telegram card to ❌ <reason> so it never finalizes a misleading
|
|
450
|
+
// "✓ Done" on a capped/failed turn. Mirrors the catch-path pattern; the
|
|
451
|
+
// finally block's finalize respects the already-failed card state.
|
|
452
|
+
try {
|
|
453
|
+
await mirrorFailActiveCards(isRateLimit ? 'rate limited — model quota reached' : 'upstream error');
|
|
454
|
+
}
|
|
455
|
+
catch {
|
|
456
|
+
/* card fail is cosmetic */
|
|
457
|
+
}
|
|
458
|
+
const errBody = {
|
|
459
|
+
error: {
|
|
460
|
+
message: isRateLimit
|
|
461
|
+
? `Model ${model} is rate limited (quota reached).`
|
|
462
|
+
: `Model ${model} returned an upstream error.`,
|
|
463
|
+
type: isRateLimit ? 'rate_limit_error' : 'server_error',
|
|
464
|
+
code: isRateLimit ? 'rate_limited' : 'upstream_error',
|
|
465
|
+
},
|
|
466
|
+
};
|
|
467
|
+
formatError(new Error(errBody.error.message), {
|
|
468
|
+
code: ERROR_CODES.SESSION_ERROR,
|
|
469
|
+
sessionId: sessionName,
|
|
470
|
+
details: { phase: 'handleStreaming', stopReason: lastStopReason, httpStatus },
|
|
471
|
+
});
|
|
472
|
+
emitTurnTrace({
|
|
473
|
+
path: 'streaming',
|
|
474
|
+
model,
|
|
475
|
+
userTextPreview: userText.slice(0, 500),
|
|
476
|
+
userTextLen: userText.length,
|
|
477
|
+
hasTools,
|
|
478
|
+
useToolStream,
|
|
479
|
+
toolCallCount: 0,
|
|
480
|
+
outputTextPreview: '',
|
|
481
|
+
outputTextLen: 0,
|
|
482
|
+
finishReason: 'error',
|
|
483
|
+
doneBackstopFired: false,
|
|
484
|
+
voiceIntent,
|
|
485
|
+
tokensIn: usage?.prompt_tokens ?? 0,
|
|
486
|
+
tokensOut: usage?.completion_tokens ?? 0,
|
|
487
|
+
durationMs: Date.now() - turnStartMs,
|
|
488
|
+
errorMessage: errBody.error.message,
|
|
489
|
+
errorName: isRateLimit ? 'RateLimitError' : 'UpstreamError',
|
|
490
|
+
}, sessionName);
|
|
491
|
+
res.writeHead(httpStatus, { 'Content-Type': 'application/json' });
|
|
492
|
+
res.end(JSON.stringify(errBody));
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
396
495
|
if (noVisiblePayload) {
|
|
397
496
|
markFirstByte();
|
|
398
497
|
writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: 'Done.' }, null)));
|
|
@@ -580,8 +679,21 @@ onFinalText) {
|
|
|
580
679
|
}
|
|
581
680
|
// v0.4.3: route through formatError for errors_total + trajectory error.
|
|
582
681
|
formatError(err, { code: ERROR_CODES.SESSION_ERROR, sessionId: sessionName, details: { phase: 'handleStreaming' } });
|
|
583
|
-
|
|
584
|
-
|
|
682
|
+
// v0.32.0 — if nothing has streamed yet the status is still mutable: return
|
|
683
|
+
// a 502 so OpenClaw's failover advances the model chain. OpenClaw does not
|
|
684
|
+
// treat an in-band SSE error on a committed 200 as a provider failure, so
|
|
685
|
+
// the old SSE-error path (kept for the already-streaming case) could not
|
|
686
|
+
// trigger a fallback.
|
|
687
|
+
if (!streamOpened && !clientDisconnected) {
|
|
688
|
+
res.writeHead(502, { 'Content-Type': 'application/json' });
|
|
689
|
+
res.end(JSON.stringify({
|
|
690
|
+
error: { message: err.message, type: 'server_error', code: 'upstream_error' },
|
|
691
|
+
}));
|
|
692
|
+
}
|
|
693
|
+
else {
|
|
694
|
+
writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
|
|
695
|
+
writeSSE('[DONE]');
|
|
696
|
+
}
|
|
585
697
|
// v0.15.0 Slice 1: turn-trace probe now ALSO fires on error-exit so
|
|
586
698
|
// broken turns (claude CLI crash, timeout, stalled session kill) get
|
|
587
699
|
// captured in the same trajectory stream as successful turns. Without
|