@askalf/dario 3.31.16 → 3.31.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pool.d.ts CHANGED
@@ -19,6 +19,20 @@ export interface RateLimitSnapshot {
19
19
  status: string;
20
20
  util5h: number;
21
21
  util7d: number;
22
+ /**
23
+ * Per-model 7-day utilization buckets — Anthropic carves separate
24
+ * weekly windows for some model families. As of 2026-04-25 the live
25
+ * API emits `anthropic-ratelimit-unified-7d_sonnet-utilization` on
26
+ * Sonnet responses (corresponds to the "Sonnet only" line on the user
27
+ * dashboard); other families do not yet have dedicated buckets but
28
+ * the parser scans the header set generically so any future
29
+ * `7d_<family>` header is captured automatically.
30
+ *
31
+ * Keyed by the family suffix as it arrived on the wire (lowercase,
32
+ * e.g. `sonnet` / `opus` / `haiku`). Empty when no per-model headers
33
+ * were on the response.
34
+ */
35
+ perModel7d: Record<string, number>;
22
36
  overageUtil: number;
23
37
  claim: string;
24
38
  reset: number;
@@ -45,6 +59,30 @@ export interface PoolStatus {
45
59
  }
46
60
  /** Parse an Anthropic response's rate-limit headers into a snapshot. */
47
61
  export declare function parseRateLimits(headers: Headers): RateLimitSnapshot;
62
+ /**
63
+ * Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
64
+ * model id. Used to look up the per-model 7d bucket in
65
+ * `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
66
+ * for non-Claude models or model ids that don't carry a recognizable
67
+ * family token (those requests just use the unified buckets).
68
+ *
69
+ * Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
70
+ * `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
71
+ * so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
72
+ */
73
+ export declare function modelFamily(modelId: string | null | undefined): string | null;
74
+ /**
75
+ * Compute headroom for a single account given its rate-limit snapshot.
76
+ * Headroom is the slack between the most-saturated relevant bucket and
77
+ * full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
78
+ *
79
+ * When `family` is supplied AND the snapshot has a corresponding per-
80
+ * model 7d bucket, that bucket is included in the max. When the family
81
+ * isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
82
+ * request yet so `7d_sonnet` is unknown), headroom is computed from the
83
+ * unified buckets only — best-effort, populated on the next response.
84
+ */
85
+ export declare function computeHeadroom(snapshot: RateLimitSnapshot, family?: string | null): number;
48
86
  export declare class AccountPool {
49
87
  private accounts;
50
88
  private queue;
@@ -61,8 +99,14 @@ export declare class AccountPool {
61
99
  }): void;
62
100
  remove(alias: string): boolean;
63
101
  get size(): number;
64
- /** Select the best account for the next request. */
65
- select(): PoolAccount | null;
102
+ /**
103
+ * Select the best account for the next request. `family` (when supplied)
104
+ * is the request's model family (`opus` / `sonnet` / `haiku`); when
105
+ * present and the account has a matching per-model 7d bucket, that
106
+ * bucket joins the headroom max. Family-less calls fall back to the
107
+ * unified-buckets-only headroom — same behavior as before this PR.
108
+ */
109
+ select(family?: string | null): PoolAccount | null;
66
110
  /**
67
111
  * Select with session stickiness. If `stickyKey` is already bound to a
68
112
  * healthy account (not rejected, token not near expiry, headroom > 2%),
@@ -79,7 +123,7 @@ export declare class AccountPool {
79
123
  *
80
124
  * Also performs lazy cleanup of expired bindings (TTL or size cap).
81
125
  */
82
- selectSticky(stickyKey: string | null): PoolAccount | null;
126
+ selectSticky(stickyKey: string | null, family?: string | null): PoolAccount | null;
83
127
  /**
84
128
  * Rebind a sticky key to a different account — called by proxy after an
85
129
  * in-request 429 failover moves to the next-best account. Without this
@@ -99,7 +143,7 @@ export declare class AccountPool {
99
143
  /** Test/inspection helper — current alias bound to a key, or null. */
100
144
  stickyAliasFor(stickyKey: string): string | null;
101
145
  /** Select the next-best account, excluding the given set of aliases. */
102
- selectExcluding(excluded: Set<string>): PoolAccount | null;
146
+ selectExcluding(excluded: Set<string>, family?: string | null): PoolAccount | null;
103
147
  updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
104
148
  markRejected(alias: string, snapshot: RateLimitSnapshot): void;
105
149
  updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
package/dist/pool.js CHANGED
@@ -28,19 +28,44 @@ export const EMPTY_SNAPSHOT = {
28
28
  status: 'unknown',
29
29
  util5h: 0,
30
30
  util7d: 0,
31
+ perModel7d: {},
31
32
  overageUtil: 0,
32
33
  claim: 'unknown',
33
34
  reset: 0,
34
35
  fallbackPct: 0,
35
36
  updatedAt: 0,
36
37
  };
38
+ /**
39
+ * Match `anthropic-ratelimit-unified-7d_<family>-utilization`. Generic on
40
+ * `<family>` so a future `7d_opus` / `7d_haiku` (or anything Anthropic
41
+ * adds without notice) is captured automatically. The family is
42
+ * normalized to lowercase to match `modelFamily()` output.
43
+ */
44
+ const PER_MODEL_7D_HEADER = /^anthropic-ratelimit-unified-7d_([a-z0-9-]+)-utilization$/i;
37
45
  /** Parse an Anthropic response's rate-limit headers into a snapshot. */
38
46
  export function parseRateLimits(headers) {
39
47
  const get = (key) => headers.get(`anthropic-ratelimit-unified-${key}`) ?? '';
48
+ const perModel7d = {};
49
+ // Iterate the full header set — `headers.get` only retrieves known
50
+ // keys, but Anthropic can add new `7d_<family>-utilization` shapes
51
+ // unannounced. Scanning the iterator means the parser is automatically
52
+ // forward-compatible. Real `Headers` instances and test-side mocks
53
+ // (which implement `.entries()` but not direct iteration) both work
54
+ // through the explicit `.entries()` call.
55
+ const entries = (typeof headers.entries === 'function')
56
+ ? headers.entries()
57
+ : headers;
58
+ for (const [k, v] of entries) {
59
+ const m = k.match(PER_MODEL_7D_HEADER);
60
+ if (m && m[1]) {
61
+ perModel7d[m[1].toLowerCase()] = parseFloat(v) || 0;
62
+ }
63
+ }
40
64
  return {
41
65
  status: get('status') || 'unknown',
42
66
  util5h: parseFloat(get('5h-utilization')) || 0,
43
67
  util7d: parseFloat(get('7d-utilization')) || 0,
68
+ perModel7d,
44
69
  overageUtil: parseFloat(get('overage-utilization')) || 0,
45
70
  claim: get('representative-claim') || 'unknown',
46
71
  reset: parseInt(get('reset')) || 0,
@@ -48,6 +73,49 @@ export function parseRateLimits(headers) {
48
73
  updatedAt: Date.now(),
49
74
  };
50
75
  }
76
+ /**
77
+ * Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
78
+ * model id. Used to look up the per-model 7d bucket in
79
+ * `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
80
+ * for non-Claude models or model ids that don't carry a recognizable
81
+ * family token (those requests just use the unified buckets).
82
+ *
83
+ * Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
84
+ * `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
85
+ * so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
86
+ */
87
+ export function modelFamily(modelId) {
88
+ if (!modelId)
89
+ return null;
90
+ const m = modelId.toLowerCase();
91
+ if (m.includes('opus'))
92
+ return 'opus';
93
+ if (m.includes('sonnet'))
94
+ return 'sonnet';
95
+ if (m.includes('haiku'))
96
+ return 'haiku';
97
+ return null;
98
+ }
99
+ /**
100
+ * Compute headroom for a single account given its rate-limit snapshot.
101
+ * Headroom is the slack between the most-saturated relevant bucket and
102
+ * full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
103
+ *
104
+ * When `family` is supplied AND the snapshot has a corresponding per-
105
+ * model 7d bucket, that bucket is included in the max. When the family
106
+ * isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
107
+ * request yet so `7d_sonnet` is unknown), headroom is computed from the
108
+ * unified buckets only — best-effort, populated on the next response.
109
+ */
110
+ export function computeHeadroom(snapshot, family) {
111
+ const utils = [snapshot.util5h, snapshot.util7d];
112
+ if (family) {
113
+ const perModel = snapshot.perModel7d[family];
114
+ if (perModel !== undefined)
115
+ utils.push(perModel);
116
+ }
117
+ return 1 - Math.max(...utils);
118
+ }
51
119
  const STICKY_TTL_MS = 6 * 60 * 60 * 1000; // 6h
52
120
  const STICKY_MAX_ENTRIES = 2_000; // lazy cleanup cap
53
121
  /**
@@ -87,8 +155,14 @@ export class AccountPool {
87
155
  get size() {
88
156
  return this.accounts.size;
89
157
  }
90
- /** Select the best account for the next request. */
91
- select() {
158
+ /**
159
+ * Select the best account for the next request. `family` (when supplied)
160
+ * is the request's model family (`opus` / `sonnet` / `haiku`); when
161
+ * present and the account has a matching per-model 7d bucket, that
162
+ * bucket joins the headroom max. Family-less calls fall back to the
163
+ * unified-buckets-only headroom — same behavior as before this PR.
164
+ */
165
+ select(family) {
92
166
  if (this.accounts.size === 0)
93
167
  return null;
94
168
  const now = Date.now();
@@ -97,8 +171,8 @@ export class AccountPool {
97
171
  a.expiresAt > now + 30_000);
98
172
  if (eligible.length > 0) {
99
173
  return eligible.reduce((best, curr) => {
100
- const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
101
- const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
174
+ const bestHeadroom = computeHeadroom(best.rateLimit, family);
175
+ const currHeadroom = computeHeadroom(curr.rateLimit, family);
102
176
  return currHeadroom > bestHeadroom ? curr : best;
103
177
  });
104
178
  }
@@ -126,9 +200,9 @@ export class AccountPool {
126
200
  *
127
201
  * Also performs lazy cleanup of expired bindings (TTL or size cap).
128
202
  */
129
- selectSticky(stickyKey) {
203
+ selectSticky(stickyKey, family) {
130
204
  if (!stickyKey)
131
- return this.select();
205
+ return this.select(family);
132
206
  this.cleanupSticky();
133
207
  const binding = this.sticky.get(stickyKey);
134
208
  if (binding) {
@@ -137,11 +211,11 @@ export class AccountPool {
137
211
  if (bound
138
212
  && bound.rateLimit.status !== 'rejected'
139
213
  && bound.expiresAt > now + 30_000
140
- && (1 - Math.max(bound.rateLimit.util5h, bound.rateLimit.util7d)) > POOL_HEADROOM_FLOOR) {
214
+ && computeHeadroom(bound.rateLimit, family) > POOL_HEADROOM_FLOOR) {
141
215
  return bound;
142
216
  }
143
217
  }
144
- const picked = this.select();
218
+ const picked = this.select(family);
145
219
  if (picked) {
146
220
  this.sticky.set(stickyKey, { alias: picked.alias, boundAt: Date.now() });
147
221
  }
@@ -189,7 +263,7 @@ export class AccountPool {
189
263
  return this.sticky.get(stickyKey)?.alias ?? null;
190
264
  }
191
265
  /** Select the next-best account, excluding the given set of aliases. */
192
- selectExcluding(excluded) {
266
+ selectExcluding(excluded, family) {
193
267
  if (this.accounts.size <= 1)
194
268
  return null;
195
269
  const now = Date.now();
@@ -198,8 +272,8 @@ export class AccountPool {
198
272
  a.expiresAt > now + 30_000);
199
273
  if (eligible.length > 0) {
200
274
  return eligible.reduce((best, curr) => {
201
- const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
202
- const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
275
+ const bestHeadroom = computeHeadroom(best.rateLimit, family);
276
+ const currHeadroom = computeHeadroom(curr.rateLimit, family);
203
277
  return currHeadroom > bestHeadroom ? curr : best;
204
278
  });
205
279
  }
@@ -240,7 +314,10 @@ export class AccountPool {
240
314
  const now = Date.now();
241
315
  const healthy = all.filter(a => a.rateLimit.status !== 'rejected' &&
242
316
  a.expiresAt > now + 30_000);
243
- const headrooms = all.map(a => 1 - Math.max(a.rateLimit.util5h, a.rateLimit.util7d));
317
+ // Status is a pool-wide aggregate; family-agnostic. Per-model
318
+ // headroom is request-context-specific and only meaningful at
319
+ // select() time.
320
+ const headrooms = all.map(a => computeHeadroom(a.rateLimit));
244
321
  const avgHeadroom = headrooms.length > 0 ? headrooms.reduce((a, b) => a + b, 0) / headrooms.length : 0;
245
322
  const best = this.select();
246
323
  return {
@@ -260,7 +337,7 @@ export class AccountPool {
260
337
  async waitForAccount() {
261
338
  const immediate = this.select();
262
339
  if (immediate) {
263
- const headroom = 1 - Math.max(immediate.rateLimit.util5h, immediate.rateLimit.util7d);
340
+ const headroom = computeHeadroom(immediate.rateLimit);
264
341
  if (headroom > POOL_HEADROOM_FLOOR)
265
342
  return immediate;
266
343
  }
@@ -303,7 +380,7 @@ export class AccountPool {
303
380
  const account = this.select();
304
381
  if (!account)
305
382
  break;
306
- const headroom = 1 - Math.max(account.rateLimit.util5h, account.rateLimit.util7d);
383
+ const headroom = computeHeadroom(account.rateLimit);
307
384
  if (headroom <= POOL_HEADROOM_FLOOR)
308
385
  break;
309
386
  const entry = this.queue.shift();
package/dist/proxy.js CHANGED
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
8
8
  import { getAccessToken, getStatus } from './oauth.js';
9
9
  import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
10
10
  import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
11
- import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
11
+ import { AccountPool, computeStickyKey, parseRateLimits, modelFamily } from './pool.js';
12
12
  import { Analytics, billingBucketFromClaim } from './analytics.js';
13
13
  import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
14
14
  import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
@@ -460,6 +460,11 @@ export async function startProxy(opts = {}) {
460
460
  // Single-account dario keeps its existing code path unchanged.
461
461
  const accountsList = await loadAllAccounts();
462
462
  const pool = accountsList.length >= 2 ? new AccountPool() : null;
463
+ // Per-model rate-limit bucket families seen during this proxy run. First-
464
+ // sight is logged once when verbose so a new Anthropic bucket (e.g. an
465
+ // eventual `7d_opus`) doesn't slip past unnoticed. Pure observability —
466
+ // routing already handles unknown families generically.
467
+ const seenPerModelBuckets = new Set();
463
468
  const analytics = pool ? new Analytics() : null;
464
469
  let status;
465
470
  if (pool) {
@@ -964,7 +969,7 @@ export async function startProxy(opts = {}) {
964
969
  // Rotating off mid-session costs cache-create on every turn.
965
970
  stickyKey = computeStickyKey(userMsg);
966
971
  if (pool && stickyKey) {
967
- const preferred = pool.selectSticky(stickyKey);
972
+ const preferred = pool.selectSticky(stickyKey, modelFamily(requestModel));
968
973
  if (preferred && preferred.alias !== poolAccount?.alias) {
969
974
  poolAccount = preferred;
970
975
  accessToken = preferred.accessToken;
@@ -1185,6 +1190,20 @@ export async function startProxy(opts = {}) {
1185
1190
  else {
1186
1191
  pool.updateRateLimits(poolAccount.alias, snapshot);
1187
1192
  }
1193
+ // First-sight detector for per-model rate-limit buckets. Anthropic
1194
+ // ships these unannounced — e.g. `7d_sonnet-utilization` appeared
1195
+ // around 2026-04-25 — and verbose-mode users want a heads-up the
1196
+ // first time a new family shows up so they can decide whether to
1197
+ // bump dario's expectations. Pure logging; the routing path
1198
+ // already handles arbitrary family keys (see pool.computeHeadroom).
1199
+ for (const family of Object.keys(snapshot.perModel7d)) {
1200
+ if (!seenPerModelBuckets.has(family)) {
1201
+ seenPerModelBuckets.add(family);
1202
+ if (verbose) {
1203
+ console.log(`[dario] new per-model rate-limit bucket observed: 7d_${family} (util=${snapshot.perModel7d[family]?.toFixed(2)})`);
1204
+ }
1205
+ }
1206
+ }
1188
1207
  }
1189
1208
  // Auto-retry without context-1m if it triggers a long-context billing error.
1190
1209
  // Anthropic returns this as either 400 ("long context beta is not yet available
@@ -1287,7 +1306,7 @@ export async function startProxy(opts = {}) {
1287
1306
  else if (upstream.status === 429) {
1288
1307
  // Not a context-1m issue — try pool failover before surfacing to client
1289
1308
  if (pool && poolAccount) {
1290
- const nextAccount = pool.selectExcluding(triedAliases);
1309
+ const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
1291
1310
  if (nextAccount) {
1292
1311
  triedAliases.add(nextAccount.alias);
1293
1312
  poolAccount = nextAccount;
@@ -1346,7 +1365,7 @@ export async function startProxy(opts = {}) {
1346
1365
  if (upstream.status === 429) {
1347
1366
  // Try pool failover before surfacing to client
1348
1367
  if (pool && poolAccount) {
1349
- const nextAccount = pool.selectExcluding(triedAliases);
1368
+ const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
1350
1369
  if (nextAccount) {
1351
1370
  triedAliases.add(nextAccount.alias);
1352
1371
  poolAccount = nextAccount;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.31.16",
3
+ "version": "3.31.17",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {