npm - @askalf/dario - Versions diffs - 3.7.2 → 3.8.0 - Mend

@askalf/dario 3.7.2 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/pool.d.ts CHANGED Viewed

@@ -50,8 +50,8 @@ export declare class AccountPool {
     get size(): number;
     /** Select the best account for the next request. */
     select(): PoolAccount | null;
-    /** Select the next-best account, excluding the given alias. */
-    selectExcluding(excludeAlias: string): PoolAccount | null;
+    /** Select the next-best account, excluding the given set of aliases. */
+    selectExcluding(excluded: Set<string>): PoolAccount | null;
     updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
     markRejected(alias: string, snapshot: RateLimitSnapshot): void;
     updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;

package/dist/pool.js CHANGED Viewed

@@ -82,12 +82,12 @@ export class AccountPool {
         // No rate-limit data at all — least-used first
         return all.reduce((a, b) => a.requestCount < b.requestCount ? a : b);
     }
-    /** Select the next-best account, excluding the given alias. */
-    selectExcluding(excludeAlias) {
+    /** Select the next-best account, excluding the given set of aliases. */
+    selectExcluding(excluded) {
         if (this.accounts.size <= 1)
             return null;
         const now = Date.now();
-        const candidates = [...this.accounts.values()].filter(a => a.alias !== excludeAlias);
+        const candidates = [...this.accounts.values()].filter(a => !excluded.has(a.alias));
         const eligible = candidates.filter(a => a.rateLimit.status !== 'rejected' &&
             a.expiresAt > now + 30_000);
         if (eligible.length > 0) {

package/dist/proxy.js CHANGED Viewed

@@ -731,65 +731,147 @@ export async function startProxy(opts = {}) {
                 }
             };
             req.on('close', onClientClose);
-            let upstream = await fetch(targetBase, {
-                method: req.method ?? 'POST',
-                headers,
-                body: finalBody ? new Uint8Array(finalBody) : undefined,
-                signal: upstreamAbort.signal,
-            });
-            // Pool mode: capture rate-limit snapshot from the response. parseRateLimits
-            // returns status='rejected' on 429, which makes the next `select()` call
-            // route traffic away from this account until it resets.
-            if (pool && poolAccount) {
-                const snapshot = parseRateLimits(upstream.headers);
-                if (upstream.status === 429) {
-                    pool.markRejected(poolAccount.alias, snapshot);
-                }
-                else {
-                    pool.updateRateLimits(poolAccount.alias, snapshot);
-                }
-            }
-            // Auto-retry without context-1m if it triggers a long-context billing error.
-            // Anthropic returns this as either 400 ("long context beta is not yet available
-            // for this subscription") or 429 ("Extra usage is required for long context
-            // requests") depending on the endpoint — we handle both.
-            //
-            // Note: `upstream.text()` consumes the body, so once we peek we MUST
-            // handle the response here (can't fall through to the normal forwarder).
+            const startTime = Date.now();
+            // Tracks which accounts we've already tried this request — used by the
+            // inside-request 429 failover loop to avoid re-hitting exhausted accounts.
+            const triedAliases = new Set();
+            if (poolAccount)
+                triedAliases.add(poolAccount.alias);
+            let upstream;
             let peekedBody = null;
-            if ((upstream.status === 400 || upstream.status === 429) && !passthrough) {
-                peekedBody = await upstream.text().catch(() => '');
-                const isLongContextError = peekedBody.includes('long context')
-                    || peekedBody.includes('Extra usage is required')
-                    || peekedBody.includes('long_context');
-                if (isLongContextError) {
-                    if (verbose)
-                        console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it`);
-                    const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
-                    const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
-                    const retry = await fetch(targetBase, {
-                        method: req.method ?? 'POST',
-                        headers: retryHeaders,
-                        body: finalBody ? new Uint8Array(finalBody) : undefined,
-                        signal: upstreamAbort.signal,
-                    });
-                    // Use the retry response from here on — peeked body is now stale
-                    upstream = retry;
-                    peekedBody = null;
-                    // Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
-                    if (pool && poolAccount) {
-                        const retrySnapshot = parseRateLimits(upstream.headers);
-                        if (upstream.status === 429) {
-                            pool.markRejected(poolAccount.alias, retrySnapshot);
+            // Inside-request 429 failover loop (v3.8.0). On a 429, pool mode tries
+            // the next-best account before surfacing the error to the client.
+            // Bounded to pool.size iterations; breaks immediately on any non-429.
+            dispatchLoop: while (true) {
+                upstream = await fetch(targetBase, {
+                    method: req.method ?? 'POST',
+                    headers,
+                    body: finalBody ? new Uint8Array(finalBody) : undefined,
+                    signal: upstreamAbort.signal,
+                });
+                // Pool mode: capture rate-limit snapshot from the response. parseRateLimits
+                // returns status='rejected' on 429, which makes the next `select()` call
+                // route traffic away from this account until it resets.
+                if (pool && poolAccount) {
+                    const snapshot = parseRateLimits(upstream.headers);
+                    if (upstream.status === 429) {
+                        pool.markRejected(poolAccount.alias, snapshot);
+                    }
+                    else {
+                        pool.updateRateLimits(poolAccount.alias, snapshot);
+                    }
+                }
+                // Auto-retry without context-1m if it triggers a long-context billing error.
+                // Anthropic returns this as either 400 ("long context beta is not yet available
+                // for this subscription") or 429 ("Extra usage is required for long context
+                // requests") depending on the endpoint — we handle both.
+                //
+                // Note: `upstream.text()` consumes the body, so once we peek we MUST
+                // handle the response here (can't fall through to the normal forwarder).
+                peekedBody = null;
+                if ((upstream.status === 400 || upstream.status === 429) && !passthrough) {
+                    peekedBody = await upstream.text().catch(() => '');
+                    const isLongContextError = peekedBody.includes('long context')
+                        || peekedBody.includes('Extra usage is required')
+                        || peekedBody.includes('long_context');
+                    if (isLongContextError) {
+                        if (verbose)
+                            console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it`);
+                        const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
+                        const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
+                        const retry = await fetch(targetBase, {
+                            method: req.method ?? 'POST',
+                            headers: retryHeaders,
+                            body: finalBody ? new Uint8Array(finalBody) : undefined,
+                            signal: upstreamAbort.signal,
+                        });
+                        // Use the retry response from here on — peeked body is now stale
+                        upstream = retry;
+                        peekedBody = null;
+                        // Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
+                        if (pool && poolAccount) {
+                            const retrySnapshot = parseRateLimits(upstream.headers);
+                            if (upstream.status === 429) {
+                                pool.markRejected(poolAccount.alias, retrySnapshot);
+                            }
+                            else {
+                                pool.updateRateLimits(poolAccount.alias, retrySnapshot);
+                            }
                         }
-                        else {
-                            pool.updateRateLimits(poolAccount.alias, retrySnapshot);
+                    }
+                    else if (upstream.status === 429) {
+                        // Not a context-1m issue — try pool failover before surfacing to client
+                        if (pool && poolAccount) {
+                            const nextAccount = pool.selectExcluding(triedAliases);
+                            if (nextAccount) {
+                                triedAliases.add(nextAccount.alias);
+                                poolAccount = nextAccount;
+                                accessToken = nextAccount.accessToken;
+                                headers['Authorization'] = `Bearer ${accessToken}`;
+                                headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
+                                peekedBody = null;
+                                continue dispatchLoop;
+                            }
+                        }
+                        const enriched = enrich429(peekedBody, upstream.headers);
+                        const responseHeaders = {
+                            'Content-Type': 'application/json',
+                            'Access-Control-Allow-Origin': corsOrigin,
+                            ...SECURITY_HEADERS,
+                        };
+                        for (const [key, value] of upstream.headers.entries()) {
+                            if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
+                                responseHeaders[key] = value;
+                            }
+                        }
+                        requestCount++;
+                        if (analytics && poolAccount) {
+                            analytics.record({
+                                timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
+                                inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
+                                claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
+                                util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
+                                latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
+                            });
                         }
+                        res.writeHead(429, responseHeaders);
+                        res.end(enriched);
+                        return;
+                    }
+                    else if (upstream.status === 400) {
+                        // Non-long-context 400 — forward upstream error directly.
+                        // The body is already consumed, so we write it straight out.
+                        const responseHeaders = {
+                            'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
+                            'Access-Control-Allow-Origin': corsOrigin,
+                            ...SECURITY_HEADERS,
+                        };
+                        for (const [key, value] of upstream.headers.entries()) {
+                            if (key === 'request-id')
+                                responseHeaders[key] = value;
+                        }
+                        requestCount++;
+                        res.writeHead(400, responseHeaders);
+                        res.end(peekedBody);
+                        return;
                     }
                 }
-                else if (upstream.status === 429) {
-                    // Not a context-1m issue — return enriched 429 directly
-                    const enriched = enrich429(peekedBody, upstream.headers);
+                // Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
+                if (upstream.status === 429) {
+                    // Try pool failover before surfacing to client
+                    if (pool && poolAccount) {
+                        const nextAccount = pool.selectExcluding(triedAliases);
+                        if (nextAccount) {
+                            triedAliases.add(nextAccount.alias);
+                            poolAccount = nextAccount;
+                            accessToken = nextAccount.accessToken;
+                            headers['Authorization'] = `Bearer ${accessToken}`;
+                            headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
+                            continue dispatchLoop;
+                        }
+                    }
+                    const errBody = await upstream.text().catch(() => '');
+                    const enriched = enrich429(errBody, upstream.headers);
                     const responseHeaders = {
                         'Content-Type': 'application/json',
                         'Access-Control-Allow-Origin': corsOrigin,
@@ -801,47 +883,22 @@ export async function startProxy(opts = {}) {
                         }
                     }
                     requestCount++;
+                    if (analytics && poolAccount) {
+                        analytics.record({
+                            timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
+                            inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
+                            claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
+                            util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
+                            latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
+                        });
+                    }
                     res.writeHead(429, responseHeaders);
                     res.end(enriched);
                     return;
                 }
-                else if (upstream.status === 400) {
-                    // Non-long-context 400 — forward upstream error directly.
-                    // The body is already consumed, so we write it straight out.
-                    const responseHeaders = {
-                        'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
-                        'Access-Control-Allow-Origin': corsOrigin,
-                        ...SECURITY_HEADERS,
-                    };
-                    for (const [key, value] of upstream.headers.entries()) {
-                        if (key === 'request-id')
-                            responseHeaders[key] = value;
-                    }
-                    requestCount++;
-                    res.writeHead(400, responseHeaders);
-                    res.end(peekedBody);
-                    return;
-                }
-            }
-            // Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
-            if (upstream.status === 429) {
-                const errBody = await upstream.text().catch(() => '');
-                const enriched = enrich429(errBody, upstream.headers);
-                const responseHeaders = {
-                    'Content-Type': 'application/json',
-                    'Access-Control-Allow-Origin': corsOrigin,
-                    ...SECURITY_HEADERS,
-                };
-                for (const [key, value] of upstream.headers.entries()) {
-                    if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
-                        responseHeaders[key] = value;
-                    }
-                }
-                requestCount++;
-                res.writeHead(429, responseHeaders);
-                res.end(enriched);
-                return;
-            }
+                // Non-429 — exit dispatch loop and forward the response to client.
+                break;
+            } // end dispatchLoop: while (true)
             // Detect streaming from content-type (reliable) or body (fallback)
             const contentType = upstream.headers.get('content-type') ?? '';
             const isStream = contentType.includes('text/event-stream');
@@ -869,6 +926,14 @@ export async function startProxy(opts = {}) {
             }
             res.writeHead(upstream.status, responseHeaders);
             if (isStream && upstream.body) {
+                // Analytics accumulators for streaming responses — filled by parsing
+                // message_start / message_delta SSE events as they flow through.
+                let streamInputTokens = 0;
+                let streamOutputTokens = 0;
+                let streamCacheReadTokens = 0;
+                let streamCacheCreateTokens = 0;
+                const analyticsDecoder = (analytics && poolAccount) ? new TextDecoder() : null;
+                let analyticsBuffer = '';
                 // Stream SSE chunks through
                 const reader = upstream.body.getReader();
                 const decoder = new TextDecoder();
@@ -888,6 +953,34 @@ export async function startProxy(opts = {}) {
                         const { done, value } = await reader.read();
                         if (done)
                             break;
+                        // Parse SSE events for analytics regardless of routing branch
+                        if (analyticsDecoder && value) {
+                            analyticsBuffer += analyticsDecoder.decode(value, { stream: true });
+                            const parts = analyticsBuffer.split('\n\n');
+                            analyticsBuffer = parts.pop() ?? '';
+                            for (const part of parts) {
+                                const dataLine = part.split('\n').find(l => l.startsWith('data: '));
+                                if (!dataLine)
+                                    continue;
+                                try {
+                                    const e = JSON.parse(dataLine.slice(6));
+                                    if (e.type === 'message_start') {
+                                        const u = e.message?.usage;
+                                        if (u) {
+                                            streamInputTokens = u.input_tokens ?? 0;
+                                            streamCacheReadTokens = u.cache_read_input_tokens ?? 0;
+                                            streamCacheCreateTokens = u.cache_creation_input_tokens ?? 0;
+                                        }
+                                    }
+                                    else if (e.type === 'message_delta') {
+                                        const u = e.usage;
+                                        if (u?.output_tokens)
+                                            streamOutputTokens = u.output_tokens;
+                                    }
+                                }
+                                catch { /* ignore malformed SSE events */ }
+                            }
+                        }
                         if (isOpenAI) {
                             // Translate Anthropic SSE → OpenAI SSE
                             buffer += decoder.decode(value, { stream: true });
@@ -929,6 +1022,17 @@ export async function startProxy(opts = {}) {
                         console.error('[dario] Stream error:', sanitizeError(err));
                 }
                 res.end();
+                if (analytics && poolAccount) {
+                    analytics.record({
+                        timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
+                        inputTokens: streamInputTokens, outputTokens: streamOutputTokens,
+                        cacheReadTokens: streamCacheReadTokens, cacheCreateTokens: streamCacheCreateTokens,
+                        thinkingTokens: 0,
+                        claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
+                        util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
+                        latencyMs: Date.now() - startTime, status: upstream.status, isStream: true, isOpenAI,
+                    });
+                }
             }
             else {
                 // Buffer and forward
@@ -948,6 +1052,23 @@ export async function startProxy(opts = {}) {
                 else {
                     res.end(responseBody);
                 }
+                if (analytics && poolAccount) {
+                    try {
+                        const parsed = JSON.parse(responseBody);
+                        const usage = Analytics.parseUsage(parsed);
+                        analytics.record({
+                            timestamp: Date.now(), account: poolAccount.alias,
+                            model: usage.model || requestModel,
+                            inputTokens: usage.inputTokens, outputTokens: usage.outputTokens,
+                            cacheReadTokens: usage.cacheReadTokens, cacheCreateTokens: usage.cacheCreateTokens,
+                            thinkingTokens: usage.thinkingTokens,
+                            claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
+                            util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
+                            latencyMs: Date.now() - startTime, status: upstream.status, isStream: false, isOpenAI,
+                        });
+                    }
+                    catch { /* don't let analytics errors break responses */ }
+                }
                 if (verbose)
                     console.log(`[dario] #${requestCount} ${upstream.status}`);
             }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@askalf/dario",
-  "version": "3.7.2",
+  "version": "3.8.0",
   "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
   "type": "module",
   "bin": {
@@ -21,7 +21,7 @@
   ],
   "scripts": {
     "build": "tsc && cp src/cc-template-data.json dist/",
-    "test": "node test/issue-29-tool-translation.mjs",
+    "test": "node test/issue-29-tool-translation.mjs && node test/analytics-recording.mjs && node test/failover-429.mjs",
     "audit": "npm audit --production --audit-level=high",
     "prepublishOnly": "npm run build",
     "start": "node dist/cli.js",