alvin-bot 4.18.2 β 4.18.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/providers/claude-sdk-provider.js +73 -18
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,38 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [4.18.4] β 2026-04-23
|
|
6
|
+
|
|
7
|
+
### π Critical fix: detect Anthropic quota-exhausted responses
|
|
8
|
+
|
|
9
|
+
**Problem:** When a Claude Max subscription runs out of weekly limit or extra-usage credits, Anthropic's gateway responds to every query with a short text chunk like *"You're out of extra usage Β· resets 9pm (Europe/Berlin)"* β delivered as `output_tokens=0`. The SDK surfaces it as a normal assistant text message. The bot has no way to distinguish it from a real Claude response, so one of two things happens:
|
|
10
|
+
|
|
11
|
+
1. The text passes through unchanged and the user sees the raw quota message as if it were Claude's reply.
|
|
12
|
+
2. The text is filtered downstream (some legacy paths) and the user sees `"(Keine Antwort)"` with zero explanation.
|
|
13
|
+
|
|
14
|
+
Both outcomes hide the real cause (credits) and every retry attempt wastes more credits on nothing.
|
|
15
|
+
|
|
16
|
+
**Symptoms observed on 2026-04-23:**
|
|
17
|
+
- User activates `/extra-usage`, sends query β `(Keine Antwort)` or raw limit text.
|
|
18
|
+
- Assumes bot / workspace / token is broken, spends hours debugging.
|
|
19
|
+
- Actual cause: extra-usage quota silently exhausted mid-debug-session.
|
|
20
|
+
|
|
21
|
+
**Fix** (`src/providers/claude-sdk-provider.ts`):
|
|
22
|
+
|
|
23
|
+
- New `isQuotaLimitOutput(text)` detects the Anthropic-gateway quota signatures (multiple English/German variants: "out of extra usage", "weekly usage limit", "rate limit exceeded", "quota exceeded", etc.).
|
|
24
|
+
- In the SDK stream loop: when the first text chunk matches this pattern, rewrite it as a clear actionable hint (*"β οΈ β¦Top up the plan or wait for the resetβ¦"*) AND invalidate the availability cache so the next heartbeat re-probes β but do NOT yield an `error` chunk (that would trigger fallback-cascade to Ollama and waste more credits on retries).
|
|
25
|
+
- In `isAvailable()`: the heartbeat probe now treats quota-exhausted output as "unavailable" in the same way it treats auth errors. Provider is marked unhealthy, bot stops trying until the next probe succeeds.
|
|
26
|
+
|
|
27
|
+
**Net effect:** bot no longer silently wastes credits after a quota limit is hit. Users see a plain, actionable message pointing at the right fix.
|
|
28
|
+
|
|
29
|
+
## [4.18.3] β 2026-04-23
|
|
30
|
+
|
|
31
|
+
### π Hotfix: 4.18.2 triggered unwanted failover to Ollama
|
|
32
|
+
|
|
33
|
+
**Bug in 4.18.2:** The empty-stream detector yielded an `error` chunk, which the registry's `queryWithFallback()` interprets as "primary provider failed" and immediately switches to the fallback (Ollama/Gemma 4). User saw `β‘ Claude (Agent SDK) unavailable β switching to Gemma 4 E4B` after every token rotation β the opposite of the intended behavior.
|
|
34
|
+
|
|
35
|
+
**Fix:** yield a `text` chunk instead of `error`. Same user-visible message, same cache-invalidation, but no failover cascade. The next CLI subprocess spawns with the fresh Keychain token automatically, and claude-sdk stays selected.
|
|
36
|
+
|
|
5
37
|
## [4.18.2] β 2026-04-23
|
|
6
38
|
|
|
7
39
|
### π Fix: silent empty-stream after OAuth-token rotation
|
|
@@ -25,6 +25,34 @@ export function isAuthErrorOutput(text) {
|
|
|
25
25
|
return false;
|
|
26
26
|
return /^\s*not logged in\b/i.test(text);
|
|
27
27
|
}
|
|
28
|
+
/**
|
|
29
|
+
* Detects Anthropic's rate-limit / quota-exhausted gateway responses.
|
|
30
|
+
* These are NOT model outputs β they come back as a single text chunk with
|
|
31
|
+
* output_tokens = 0 before the model even sees the prompt. Without this
|
|
32
|
+
* detection, the bot would forward the gateway message as if it were the
|
|
33
|
+
* assistant's reply ("(Keine Antwort)" or the raw quota text), masking the
|
|
34
|
+
* real cause and wasting more calls on retries.
|
|
35
|
+
*
|
|
36
|
+
* Covers the observed variants:
|
|
37
|
+
* - "You're out of extra usage Β· resets 9pm (Europe/Berlin)"
|
|
38
|
+
* - "You've reached your weekly usage limit. β¦"
|
|
39
|
+
* - "Rate limit exceeded"
|
|
40
|
+
* - Claude Max / Pro quota messages in both EN/DE
|
|
41
|
+
*/
|
|
42
|
+
export function isQuotaLimitOutput(text) {
|
|
43
|
+
if (!text)
|
|
44
|
+
return false;
|
|
45
|
+
const t = text.trim();
|
|
46
|
+
if (t.length === 0)
|
|
47
|
+
return false;
|
|
48
|
+
return (/you['β]re out of extra usage/i.test(t) ||
|
|
49
|
+
/reached (your |the )?(weekly |monthly |daily )?(usage|rate) limit/i.test(t) ||
|
|
50
|
+
/rate[- ]?limit(ed)? (exceeded|reached)/i.test(t) ||
|
|
51
|
+
/quota exceeded/i.test(t) ||
|
|
52
|
+
/usage limit reached/i.test(t) ||
|
|
53
|
+
/limit (reached|hit) for (this|your) (week|month|day)/i.test(t) ||
|
|
54
|
+
/resets? \d{1,2}(am|pm|:)/i.test(t) && /usage|limit/i.test(t));
|
|
55
|
+
}
|
|
28
56
|
const BOT_PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
|
|
29
57
|
// Load CLAUDE.md once at startup
|
|
30
58
|
let botClaudeMd = "";
|
|
@@ -198,6 +226,24 @@ export class ClaudeSDKProvider {
|
|
|
198
226
|
};
|
|
199
227
|
return;
|
|
200
228
|
}
|
|
229
|
+
// v4.18.4 β Guard against Anthropic rate-limit / quota-exhausted
|
|
230
|
+
// gateway messages that also arrive as a single text chunk (with
|
|
231
|
+
// output_tokens = 0). Pass them through as a friendly text chunk
|
|
232
|
+
// (NOT an error β would trigger fallback cascade to Ollama) and
|
|
233
|
+
// mark the provider as degraded so the next heartbeat re-checks.
|
|
234
|
+
if (!accumulatedText && isQuotaLimitOutput(block.text)) {
|
|
235
|
+
const hint = "β οΈ " + block.text.trim() +
|
|
236
|
+
"\n\nTop up the plan or wait for the reset. No message was sent to Claude.";
|
|
237
|
+
this.invalidateAvailabilityCache();
|
|
238
|
+
yield {
|
|
239
|
+
type: "text",
|
|
240
|
+
text: hint,
|
|
241
|
+
delta: hint,
|
|
242
|
+
sessionId: capturedSessionId,
|
|
243
|
+
};
|
|
244
|
+
accumulatedText = hint;
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
201
247
|
accumulatedText += block.text;
|
|
202
248
|
yield {
|
|
203
249
|
type: "text",
|
|
@@ -309,33 +355,40 @@ export class ClaudeSDKProvider {
|
|
|
309
355
|
? (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0) + (usage.cache_read_input_tokens || 0)
|
|
310
356
|
: 0;
|
|
311
357
|
const outputTok = usage?.output_tokens || 0;
|
|
312
|
-
// v4.18.
|
|
358
|
+
// v4.18.3 β Silent-empty-stream detection (replaces 4.18.2 approach).
|
|
313
359
|
//
|
|
314
360
|
// If the stream terminated cleanly but produced ZERO text chunks,
|
|
315
|
-
// something went wrong that the SDK didn't surface as an error
|
|
316
|
-
//
|
|
317
|
-
//
|
|
318
|
-
// still
|
|
319
|
-
// 401, emits no text, and we complete
|
|
320
|
-
// accumulatedText === "".
|
|
361
|
+
// something went wrong that the SDK didn't surface as an error.
|
|
362
|
+
// Most common cause: the OAuth token in the Keychain was rotated
|
|
363
|
+
// (e.g. right after /extra-usage or /login) while our in-memory
|
|
364
|
+
// SDK client still held the old one β the CLI subprocess silently
|
|
365
|
+
// gets a 401, emits no text, and we complete with
|
|
366
|
+
// accumulatedText === "".
|
|
321
367
|
//
|
|
322
|
-
//
|
|
323
|
-
//
|
|
324
|
-
//
|
|
325
|
-
//
|
|
368
|
+
// CRITICAL: we must NOT yield an "error" chunk here β the registry's
|
|
369
|
+
// queryWithFallback() treats that as "primary failed" and kicks off
|
|
370
|
+
// a full failover to the next provider (Ollama). That's exactly
|
|
371
|
+
// wrong: the next CLI subprocess would have picked up the fresh
|
|
372
|
+
// token by itself. Instead we:
|
|
373
|
+
// 1. Invalidate the availability cache so the next heartbeat
|
|
374
|
+
// re-probes `claude auth status` with a fresh subprocess.
|
|
375
|
+
// 2. Return a friendly "text" chunk explaining what happened,
|
|
376
|
+
// so the user sees a clear message (not "(Keine Antwort)")
|
|
377
|
+
// and knows to resend β without tripping the failover.
|
|
326
378
|
if (accumulatedText === "" && outputTok === 0) {
|
|
327
379
|
this.invalidateAvailabilityCache();
|
|
380
|
+
const hint = "β οΈ Claude antwortete mit leerem Stream (meist nach /extra-usage, /login oder Token-Refresh). " +
|
|
381
|
+
"Der SDK-Token-Cache wurde geleert β bitte schick die Nachricht einfach nochmal.";
|
|
328
382
|
yield {
|
|
329
|
-
type: "
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
383
|
+
type: "text",
|
|
384
|
+
text: hint,
|
|
385
|
+
delta: hint,
|
|
386
|
+
sessionId: resultMsg.session_id || capturedSessionId,
|
|
333
387
|
};
|
|
334
|
-
return;
|
|
335
388
|
}
|
|
336
389
|
yield {
|
|
337
390
|
type: "done",
|
|
338
|
-
text: accumulatedText,
|
|
391
|
+
text: accumulatedText || "",
|
|
339
392
|
sessionId: resultMsg.session_id || capturedSessionId,
|
|
340
393
|
costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
|
|
341
394
|
inputTokens: inputTok,
|
|
@@ -404,7 +457,9 @@ export class ClaudeSDKProvider {
|
|
|
404
457
|
// sniff-stdout approach for backward compat.
|
|
405
458
|
try {
|
|
406
459
|
const { stdout: probeOut } = await execFileAsync(claudePath, ["-p", "ping", "--output-format", "text"], { timeout: 15000 });
|
|
407
|
-
|
|
460
|
+
// v4.18.4 β treat quota-exhausted as "unavailable" so heartbeat
|
|
461
|
+
// surfaces it and stops wasting extra-usage credits on retries.
|
|
462
|
+
return cache(!isAuthErrorOutput(probeOut) && !isQuotaLimitOutput(probeOut));
|
|
408
463
|
}
|
|
409
464
|
catch {
|
|
410
465
|
// Both checks failed β treat as unavailable
|