@askalf/dario 3.9.3 → 3.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/proxy.js +23 -4
  2. package/package.json +1 -1
package/dist/proxy.js CHANGED
@@ -420,6 +420,14 @@ export async function startProxy(opts = {}) {
420
420
  };
421
421
  let requestCount = 0;
422
422
  const semaphore = new Semaphore(MAX_CONCURRENT);
423
+ // Cache context-1m beta availability. Set false once per account (or process
424
+ // in single-account mode) after the first "long context" rejection, so we
425
+ // skip sending context-1m on every subsequent request instead of paying the
426
+ // round-trip + retry cost each time. Keyed by account alias; `__default__`
427
+ // is the single-account slot. Reported by @boeingchoco in dario#36 — the
428
+ // retry loop was firing on every POST with hybrid-tools + OC.
429
+ const context1mUnavailable = new Set();
430
+ const ACCOUNT_KEY_SINGLE = '__default__';
423
431
  // Rate governor — minimum 500ms between requests. Fast enough for agents,
424
432
  // slow enough to not look like a scripted flood of identical traffic.
425
433
  let lastRequestTime = 0;
@@ -693,8 +701,14 @@ export async function startProxy(opts = {}) {
693
701
  }
694
702
  else {
695
703
  // CC v2.1.104 beta set — 8 flags in the order Claude Code sends them.
696
- // context-1m requires Extra Usage — if it 400s, we auto-retry without it.
697
- beta = 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
704
+ // context-1m requires Extra Usage — if it 400s, we auto-retry without
705
+ // it, and cache the rejection so subsequent requests on this account
706
+ // skip context-1m entirely (dario#36).
707
+ const acctKey = poolAccount?.alias ?? ACCOUNT_KEY_SINGLE;
708
+ const skipContext1m = context1mUnavailable.has(acctKey);
709
+ beta = skipContext1m
710
+ ? 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24'
711
+ : 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
698
712
  if (clientBeta) {
699
713
  const baseSet = new Set(beta.split(','));
700
714
  const filtered = filterBillableBetas(clientBeta)
@@ -792,8 +806,13 @@ export async function startProxy(opts = {}) {
792
806
  || peekedBody.includes('Extra usage is required')
793
807
  || peekedBody.includes('long_context');
794
808
  if (isLongContextError) {
795
- if (verbose)
796
- console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) retrying without it`);
809
+ // Cache the rejection so future requests on this account skip
810
+ // context-1m up front instead of re-paying the 400/429 round-trip.
811
+ const acctKey = poolAccount?.alias ?? ACCOUNT_KEY_SINGLE;
812
+ const firstRejection = !context1mUnavailable.has(acctKey);
813
+ context1mUnavailable.add(acctKey);
814
+ if (verbose && firstRejection)
815
+ console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it (cached for session)`);
797
816
  const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
798
817
  const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
799
818
  const retry = await fetch(targetBase, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.9.3",
3
+ "version": "3.9.4",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {