@askalf/dario 3.7.2 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pool.d.ts CHANGED
@@ -50,8 +50,8 @@ export declare class AccountPool {
50
50
  get size(): number;
51
51
  /** Select the best account for the next request. */
52
52
  select(): PoolAccount | null;
53
- /** Select the next-best account, excluding the given alias. */
54
- selectExcluding(excludeAlias: string): PoolAccount | null;
53
+ /** Select the next-best account, excluding the given set of aliases. */
54
+ selectExcluding(excluded: Set<string>): PoolAccount | null;
55
55
  updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
56
56
  markRejected(alias: string, snapshot: RateLimitSnapshot): void;
57
57
  updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
package/dist/pool.js CHANGED
@@ -82,12 +82,12 @@ export class AccountPool {
82
82
  // No rate-limit data at all — least-used first
83
83
  return all.reduce((a, b) => a.requestCount < b.requestCount ? a : b);
84
84
  }
85
- /** Select the next-best account, excluding the given alias. */
86
- selectExcluding(excludeAlias) {
85
+ /** Select the next-best account, excluding the given set of aliases. */
86
+ selectExcluding(excluded) {
87
87
  if (this.accounts.size <= 1)
88
88
  return null;
89
89
  const now = Date.now();
90
- const candidates = [...this.accounts.values()].filter(a => a.alias !== excludeAlias);
90
+ const candidates = [...this.accounts.values()].filter(a => !excluded.has(a.alias));
91
91
  const eligible = candidates.filter(a => a.rateLimit.status !== 'rejected' &&
92
92
  a.expiresAt > now + 30_000);
93
93
  if (eligible.length > 0) {
package/dist/proxy.js CHANGED
@@ -731,65 +731,147 @@ export async function startProxy(opts = {}) {
731
731
  }
732
732
  };
733
733
  req.on('close', onClientClose);
734
- let upstream = await fetch(targetBase, {
735
- method: req.method ?? 'POST',
736
- headers,
737
- body: finalBody ? new Uint8Array(finalBody) : undefined,
738
- signal: upstreamAbort.signal,
739
- });
740
- // Pool mode: capture rate-limit snapshot from the response. parseRateLimits
741
- // returns status='rejected' on 429, which makes the next `select()` call
742
- // route traffic away from this account until it resets.
743
- if (pool && poolAccount) {
744
- const snapshot = parseRateLimits(upstream.headers);
745
- if (upstream.status === 429) {
746
- pool.markRejected(poolAccount.alias, snapshot);
747
- }
748
- else {
749
- pool.updateRateLimits(poolAccount.alias, snapshot);
750
- }
751
- }
752
- // Auto-retry without context-1m if it triggers a long-context billing error.
753
- // Anthropic returns this as either 400 ("long context beta is not yet available
754
- // for this subscription") or 429 ("Extra usage is required for long context
755
- // requests") depending on the endpoint — we handle both.
756
- //
757
- // Note: `upstream.text()` consumes the body, so once we peek we MUST
758
- // handle the response here (can't fall through to the normal forwarder).
734
+ const startTime = Date.now();
735
+ // Tracks which accounts we've already tried this request — used by the
736
+ // inside-request 429 failover loop to avoid re-hitting exhausted accounts.
737
+ const triedAliases = new Set();
738
+ if (poolAccount)
739
+ triedAliases.add(poolAccount.alias);
740
+ let upstream;
759
741
  let peekedBody = null;
760
- if ((upstream.status === 400 || upstream.status === 429) && !passthrough) {
761
- peekedBody = await upstream.text().catch(() => '');
762
- const isLongContextError = peekedBody.includes('long context')
763
- || peekedBody.includes('Extra usage is required')
764
- || peekedBody.includes('long_context');
765
- if (isLongContextError) {
766
- if (verbose)
767
- console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) retrying without it`);
768
- const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
769
- const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
770
- const retry = await fetch(targetBase, {
771
- method: req.method ?? 'POST',
772
- headers: retryHeaders,
773
- body: finalBody ? new Uint8Array(finalBody) : undefined,
774
- signal: upstreamAbort.signal,
775
- });
776
- // Use the retry response from here on — peeked body is now stale
777
- upstream = retry;
778
- peekedBody = null;
779
- // Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
780
- if (pool && poolAccount) {
781
- const retrySnapshot = parseRateLimits(upstream.headers);
782
- if (upstream.status === 429) {
783
- pool.markRejected(poolAccount.alias, retrySnapshot);
742
+ // Inside-request 429 failover loop (v3.8.0). On a 429, pool mode tries
743
+ // the next-best account before surfacing the error to the client.
744
+ // Bounded to pool.size iterations; breaks immediately on any non-429.
745
+ dispatchLoop: while (true) {
746
+ upstream = await fetch(targetBase, {
747
+ method: req.method ?? 'POST',
748
+ headers,
749
+ body: finalBody ? new Uint8Array(finalBody) : undefined,
750
+ signal: upstreamAbort.signal,
751
+ });
752
+ // Pool mode: capture rate-limit snapshot from the response. parseRateLimits
753
+ // returns status='rejected' on 429, which makes the next `select()` call
754
+ // route traffic away from this account until it resets.
755
+ if (pool && poolAccount) {
756
+ const snapshot = parseRateLimits(upstream.headers);
757
+ if (upstream.status === 429) {
758
+ pool.markRejected(poolAccount.alias, snapshot);
759
+ }
760
+ else {
761
+ pool.updateRateLimits(poolAccount.alias, snapshot);
762
+ }
763
+ }
764
+ // Auto-retry without context-1m if it triggers a long-context billing error.
765
+ // Anthropic returns this as either 400 ("long context beta is not yet available
766
+ // for this subscription") or 429 ("Extra usage is required for long context
767
+ // requests") depending on the endpoint — we handle both.
768
+ //
769
+ // Note: `upstream.text()` consumes the body, so once we peek we MUST
770
+ // handle the response here (can't fall through to the normal forwarder).
771
+ peekedBody = null;
772
+ if ((upstream.status === 400 || upstream.status === 429) && !passthrough) {
773
+ peekedBody = await upstream.text().catch(() => '');
774
+ const isLongContextError = peekedBody.includes('long context')
775
+ || peekedBody.includes('Extra usage is required')
776
+ || peekedBody.includes('long_context');
777
+ if (isLongContextError) {
778
+ if (verbose)
779
+ console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it`);
780
+ const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
781
+ const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
782
+ const retry = await fetch(targetBase, {
783
+ method: req.method ?? 'POST',
784
+ headers: retryHeaders,
785
+ body: finalBody ? new Uint8Array(finalBody) : undefined,
786
+ signal: upstreamAbort.signal,
787
+ });
788
+ // Use the retry response from here on — peeked body is now stale
789
+ upstream = retry;
790
+ peekedBody = null;
791
+ // Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
792
+ if (pool && poolAccount) {
793
+ const retrySnapshot = parseRateLimits(upstream.headers);
794
+ if (upstream.status === 429) {
795
+ pool.markRejected(poolAccount.alias, retrySnapshot);
796
+ }
797
+ else {
798
+ pool.updateRateLimits(poolAccount.alias, retrySnapshot);
799
+ }
784
800
  }
785
- else {
786
- pool.updateRateLimits(poolAccount.alias, retrySnapshot);
801
+ }
802
+ else if (upstream.status === 429) {
803
+ // Not a context-1m issue — try pool failover before surfacing to client
804
+ if (pool && poolAccount) {
805
+ const nextAccount = pool.selectExcluding(triedAliases);
806
+ if (nextAccount) {
807
+ triedAliases.add(nextAccount.alias);
808
+ poolAccount = nextAccount;
809
+ accessToken = nextAccount.accessToken;
810
+ headers['Authorization'] = `Bearer ${accessToken}`;
811
+ headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
812
+ peekedBody = null;
813
+ continue dispatchLoop;
814
+ }
815
+ }
816
+ const enriched = enrich429(peekedBody, upstream.headers);
817
+ const responseHeaders = {
818
+ 'Content-Type': 'application/json',
819
+ 'Access-Control-Allow-Origin': corsOrigin,
820
+ ...SECURITY_HEADERS,
821
+ };
822
+ for (const [key, value] of upstream.headers.entries()) {
823
+ if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
824
+ responseHeaders[key] = value;
825
+ }
826
+ }
827
+ requestCount++;
828
+ if (analytics && poolAccount) {
829
+ analytics.record({
830
+ timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
831
+ inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
832
+ claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
833
+ util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
834
+ latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
835
+ });
787
836
  }
837
+ res.writeHead(429, responseHeaders);
838
+ res.end(enriched);
839
+ return;
840
+ }
841
+ else if (upstream.status === 400) {
842
+ // Non-long-context 400 — forward upstream error directly.
843
+ // The body is already consumed, so we write it straight out.
844
+ const responseHeaders = {
845
+ 'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
846
+ 'Access-Control-Allow-Origin': corsOrigin,
847
+ ...SECURITY_HEADERS,
848
+ };
849
+ for (const [key, value] of upstream.headers.entries()) {
850
+ if (key === 'request-id')
851
+ responseHeaders[key] = value;
852
+ }
853
+ requestCount++;
854
+ res.writeHead(400, responseHeaders);
855
+ res.end(peekedBody);
856
+ return;
788
857
  }
789
858
  }
790
- else if (upstream.status === 429) {
791
- // Not a context-1m issue — return enriched 429 directly
792
- const enriched = enrich429(peekedBody, upstream.headers);
859
+ // Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
860
+ if (upstream.status === 429) {
861
+ // Try pool failover before surfacing to client
862
+ if (pool && poolAccount) {
863
+ const nextAccount = pool.selectExcluding(triedAliases);
864
+ if (nextAccount) {
865
+ triedAliases.add(nextAccount.alias);
866
+ poolAccount = nextAccount;
867
+ accessToken = nextAccount.accessToken;
868
+ headers['Authorization'] = `Bearer ${accessToken}`;
869
+ headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
870
+ continue dispatchLoop;
871
+ }
872
+ }
873
+ const errBody = await upstream.text().catch(() => '');
874
+ const enriched = enrich429(errBody, upstream.headers);
793
875
  const responseHeaders = {
794
876
  'Content-Type': 'application/json',
795
877
  'Access-Control-Allow-Origin': corsOrigin,
@@ -801,47 +883,22 @@ export async function startProxy(opts = {}) {
801
883
  }
802
884
  }
803
885
  requestCount++;
886
+ if (analytics && poolAccount) {
887
+ analytics.record({
888
+ timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
889
+ inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
890
+ claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
891
+ util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
892
+ latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
893
+ });
894
+ }
804
895
  res.writeHead(429, responseHeaders);
805
896
  res.end(enriched);
806
897
  return;
807
898
  }
808
- else if (upstream.status === 400) {
809
- // Non-long-context 400 — forward upstream error directly.
810
- // The body is already consumed, so we write it straight out.
811
- const responseHeaders = {
812
- 'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
813
- 'Access-Control-Allow-Origin': corsOrigin,
814
- ...SECURITY_HEADERS,
815
- };
816
- for (const [key, value] of upstream.headers.entries()) {
817
- if (key === 'request-id')
818
- responseHeaders[key] = value;
819
- }
820
- requestCount++;
821
- res.writeHead(400, responseHeaders);
822
- res.end(peekedBody);
823
- return;
824
- }
825
- }
826
- // Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
827
- if (upstream.status === 429) {
828
- const errBody = await upstream.text().catch(() => '');
829
- const enriched = enrich429(errBody, upstream.headers);
830
- const responseHeaders = {
831
- 'Content-Type': 'application/json',
832
- 'Access-Control-Allow-Origin': corsOrigin,
833
- ...SECURITY_HEADERS,
834
- };
835
- for (const [key, value] of upstream.headers.entries()) {
836
- if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
837
- responseHeaders[key] = value;
838
- }
839
- }
840
- requestCount++;
841
- res.writeHead(429, responseHeaders);
842
- res.end(enriched);
843
- return;
844
- }
899
+ // Non-429 exit dispatch loop and forward the response to client.
900
+ break;
901
+ } // end dispatchLoop: while (true)
845
902
  // Detect streaming from content-type (reliable) or body (fallback)
846
903
  const contentType = upstream.headers.get('content-type') ?? '';
847
904
  const isStream = contentType.includes('text/event-stream');
@@ -869,6 +926,14 @@ export async function startProxy(opts = {}) {
869
926
  }
870
927
  res.writeHead(upstream.status, responseHeaders);
871
928
  if (isStream && upstream.body) {
929
+ // Analytics accumulators for streaming responses — filled by parsing
930
+ // message_start / message_delta SSE events as they flow through.
931
+ let streamInputTokens = 0;
932
+ let streamOutputTokens = 0;
933
+ let streamCacheReadTokens = 0;
934
+ let streamCacheCreateTokens = 0;
935
+ const analyticsDecoder = (analytics && poolAccount) ? new TextDecoder() : null;
936
+ let analyticsBuffer = '';
872
937
  // Stream SSE chunks through
873
938
  const reader = upstream.body.getReader();
874
939
  const decoder = new TextDecoder();
@@ -888,6 +953,34 @@ export async function startProxy(opts = {}) {
888
953
  const { done, value } = await reader.read();
889
954
  if (done)
890
955
  break;
956
+ // Parse SSE events for analytics regardless of routing branch
957
+ if (analyticsDecoder && value) {
958
+ analyticsBuffer += analyticsDecoder.decode(value, { stream: true });
959
+ const parts = analyticsBuffer.split('\n\n');
960
+ analyticsBuffer = parts.pop() ?? '';
961
+ for (const part of parts) {
962
+ const dataLine = part.split('\n').find(l => l.startsWith('data: '));
963
+ if (!dataLine)
964
+ continue;
965
+ try {
966
+ const e = JSON.parse(dataLine.slice(6));
967
+ if (e.type === 'message_start') {
968
+ const u = e.message?.usage;
969
+ if (u) {
970
+ streamInputTokens = u.input_tokens ?? 0;
971
+ streamCacheReadTokens = u.cache_read_input_tokens ?? 0;
972
+ streamCacheCreateTokens = u.cache_creation_input_tokens ?? 0;
973
+ }
974
+ }
975
+ else if (e.type === 'message_delta') {
976
+ const u = e.usage;
977
+ if (u?.output_tokens)
978
+ streamOutputTokens = u.output_tokens;
979
+ }
980
+ }
981
+ catch { /* ignore malformed SSE events */ }
982
+ }
983
+ }
891
984
  if (isOpenAI) {
892
985
  // Translate Anthropic SSE → OpenAI SSE
893
986
  buffer += decoder.decode(value, { stream: true });
@@ -929,6 +1022,17 @@ export async function startProxy(opts = {}) {
929
1022
  console.error('[dario] Stream error:', sanitizeError(err));
930
1023
  }
931
1024
  res.end();
1025
+ if (analytics && poolAccount) {
1026
+ analytics.record({
1027
+ timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
1028
+ inputTokens: streamInputTokens, outputTokens: streamOutputTokens,
1029
+ cacheReadTokens: streamCacheReadTokens, cacheCreateTokens: streamCacheCreateTokens,
1030
+ thinkingTokens: 0,
1031
+ claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
1032
+ util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
1033
+ latencyMs: Date.now() - startTime, status: upstream.status, isStream: true, isOpenAI,
1034
+ });
1035
+ }
932
1036
  }
933
1037
  else {
934
1038
  // Buffer and forward
@@ -948,6 +1052,23 @@ export async function startProxy(opts = {}) {
948
1052
  else {
949
1053
  res.end(responseBody);
950
1054
  }
1055
+ if (analytics && poolAccount) {
1056
+ try {
1057
+ const parsed = JSON.parse(responseBody);
1058
+ const usage = Analytics.parseUsage(parsed);
1059
+ analytics.record({
1060
+ timestamp: Date.now(), account: poolAccount.alias,
1061
+ model: usage.model || requestModel,
1062
+ inputTokens: usage.inputTokens, outputTokens: usage.outputTokens,
1063
+ cacheReadTokens: usage.cacheReadTokens, cacheCreateTokens: usage.cacheCreateTokens,
1064
+ thinkingTokens: usage.thinkingTokens,
1065
+ claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
1066
+ util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
1067
+ latencyMs: Date.now() - startTime, status: upstream.status, isStream: false, isOpenAI,
1068
+ });
1069
+ }
1070
+ catch { /* don't let analytics errors break responses */ }
1071
+ }
951
1072
  if (verbose)
952
1073
  console.log(`[dario] #${requestCount} ${upstream.status}`);
953
1074
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.7.2",
3
+ "version": "3.8.0",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "scripts": {
23
23
  "build": "tsc && cp src/cc-template-data.json dist/",
24
- "test": "node test/issue-29-tool-translation.mjs",
24
+ "test": "node test/issue-29-tool-translation.mjs && node test/analytics-recording.mjs && node test/failover-429.mjs",
25
25
  "audit": "npm audit --production --audit-level=high",
26
26
  "prepublishOnly": "npm run build",
27
27
  "start": "node dist/cli.js",