@askalf/dario 3.31.16 → 3.31.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pool.d.ts +48 -4
- package/dist/pool.js +91 -14
- package/dist/proxy.js +23 -4
- package/package.json +1 -1
package/dist/pool.d.ts
CHANGED
|
@@ -19,6 +19,20 @@ export interface RateLimitSnapshot {
|
|
|
19
19
|
status: string;
|
|
20
20
|
util5h: number;
|
|
21
21
|
util7d: number;
|
|
22
|
+
/**
|
|
23
|
+
* Per-model 7-day utilization buckets — Anthropic carves separate
|
|
24
|
+
* weekly windows for some model families. As of 2026-04-25 the live
|
|
25
|
+
* API emits `anthropic-ratelimit-unified-7d_sonnet-utilization` on
|
|
26
|
+
* Sonnet responses (corresponds to the "Sonnet only" line on the user
|
|
27
|
+
* dashboard); other families do not yet have dedicated buckets but
|
|
28
|
+
* the parser scans the header set generically so any future
|
|
29
|
+
* `7d_<family>` header is captured automatically.
|
|
30
|
+
*
|
|
31
|
+
* Keyed by the family suffix as it arrived on the wire (lowercase,
|
|
32
|
+
* e.g. `sonnet` / `opus` / `haiku`). Empty when no per-model headers
|
|
33
|
+
* were on the response.
|
|
34
|
+
*/
|
|
35
|
+
perModel7d: Record<string, number>;
|
|
22
36
|
overageUtil: number;
|
|
23
37
|
claim: string;
|
|
24
38
|
reset: number;
|
|
@@ -45,6 +59,30 @@ export interface PoolStatus {
|
|
|
45
59
|
}
|
|
46
60
|
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
47
61
|
export declare function parseRateLimits(headers: Headers): RateLimitSnapshot;
|
|
62
|
+
/**
|
|
63
|
+
* Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
|
|
64
|
+
* model id. Used to look up the per-model 7d bucket in
|
|
65
|
+
* `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
|
|
66
|
+
* for non-Claude models or model ids that don't carry a recognizable
|
|
67
|
+
* family token (those requests just use the unified buckets).
|
|
68
|
+
*
|
|
69
|
+
* Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
|
|
70
|
+
* `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
|
|
71
|
+
* so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
|
|
72
|
+
*/
|
|
73
|
+
export declare function modelFamily(modelId: string | null | undefined): string | null;
|
|
74
|
+
/**
|
|
75
|
+
* Compute headroom for a single account given its rate-limit snapshot.
|
|
76
|
+
* Headroom is the slack between the most-saturated relevant bucket and
|
|
77
|
+
* full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
|
|
78
|
+
*
|
|
79
|
+
* When `family` is supplied AND the snapshot has a corresponding per-
|
|
80
|
+
* model 7d bucket, that bucket is included in the max. When the family
|
|
81
|
+
* isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
|
|
82
|
+
* request yet so `7d_sonnet` is unknown), headroom is computed from the
|
|
83
|
+
* unified buckets only — best-effort, populated on the next response.
|
|
84
|
+
*/
|
|
85
|
+
export declare function computeHeadroom(snapshot: RateLimitSnapshot, family?: string | null): number;
|
|
48
86
|
export declare class AccountPool {
|
|
49
87
|
private accounts;
|
|
50
88
|
private queue;
|
|
@@ -61,8 +99,14 @@ export declare class AccountPool {
|
|
|
61
99
|
}): void;
|
|
62
100
|
remove(alias: string): boolean;
|
|
63
101
|
get size(): number;
|
|
64
|
-
/**
|
|
65
|
-
|
|
102
|
+
/**
|
|
103
|
+
* Select the best account for the next request. `family` (when supplied)
|
|
104
|
+
* is the request's model family (`opus` / `sonnet` / `haiku`); when
|
|
105
|
+
* present and the account has a matching per-model 7d bucket, that
|
|
106
|
+
* bucket joins the headroom max. Family-less calls fall back to the
|
|
107
|
+
* unified-buckets-only headroom — same behavior as before this PR.
|
|
108
|
+
*/
|
|
109
|
+
select(family?: string | null): PoolAccount | null;
|
|
66
110
|
/**
|
|
67
111
|
* Select with session stickiness. If `stickyKey` is already bound to a
|
|
68
112
|
* healthy account (not rejected, token not near expiry, headroom > 2%),
|
|
@@ -79,7 +123,7 @@ export declare class AccountPool {
|
|
|
79
123
|
*
|
|
80
124
|
* Also performs lazy cleanup of expired bindings (TTL or size cap).
|
|
81
125
|
*/
|
|
82
|
-
selectSticky(stickyKey: string | null): PoolAccount | null;
|
|
126
|
+
selectSticky(stickyKey: string | null, family?: string | null): PoolAccount | null;
|
|
83
127
|
/**
|
|
84
128
|
* Rebind a sticky key to a different account — called by proxy after an
|
|
85
129
|
* in-request 429 failover moves to the next-best account. Without this
|
|
@@ -99,7 +143,7 @@ export declare class AccountPool {
|
|
|
99
143
|
/** Test/inspection helper — current alias bound to a key, or null. */
|
|
100
144
|
stickyAliasFor(stickyKey: string): string | null;
|
|
101
145
|
/** Select the next-best account, excluding the given set of aliases. */
|
|
102
|
-
selectExcluding(excluded: Set<string
|
|
146
|
+
selectExcluding(excluded: Set<string>, family?: string | null): PoolAccount | null;
|
|
103
147
|
updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
|
|
104
148
|
markRejected(alias: string, snapshot: RateLimitSnapshot): void;
|
|
105
149
|
updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
|
package/dist/pool.js
CHANGED
|
@@ -28,19 +28,44 @@ export const EMPTY_SNAPSHOT = {
|
|
|
28
28
|
status: 'unknown',
|
|
29
29
|
util5h: 0,
|
|
30
30
|
util7d: 0,
|
|
31
|
+
perModel7d: {},
|
|
31
32
|
overageUtil: 0,
|
|
32
33
|
claim: 'unknown',
|
|
33
34
|
reset: 0,
|
|
34
35
|
fallbackPct: 0,
|
|
35
36
|
updatedAt: 0,
|
|
36
37
|
};
|
|
38
|
+
/**
|
|
39
|
+
* Match `anthropic-ratelimit-unified-7d_<family>-utilization`. Generic on
|
|
40
|
+
* `<family>` so a future `7d_opus` / `7d_haiku` (or anything Anthropic
|
|
41
|
+
* adds without notice) is captured automatically. The family is
|
|
42
|
+
* normalized to lowercase to match `modelFamily()` output.
|
|
43
|
+
*/
|
|
44
|
+
const PER_MODEL_7D_HEADER = /^anthropic-ratelimit-unified-7d_([a-z0-9-]+)-utilization$/i;
|
|
37
45
|
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
38
46
|
export function parseRateLimits(headers) {
|
|
39
47
|
const get = (key) => headers.get(`anthropic-ratelimit-unified-${key}`) ?? '';
|
|
48
|
+
const perModel7d = {};
|
|
49
|
+
// Iterate the full header set — `headers.get` only retrieves known
|
|
50
|
+
// keys, but Anthropic can add new `7d_<family>-utilization` shapes
|
|
51
|
+
// unannounced. Scanning the iterator means the parser is automatically
|
|
52
|
+
// forward-compatible. Real `Headers` instances and test-side mocks
|
|
53
|
+
// (which implement `.entries()` but not direct iteration) both work
|
|
54
|
+
// through the explicit `.entries()` call.
|
|
55
|
+
const entries = (typeof headers.entries === 'function')
|
|
56
|
+
? headers.entries()
|
|
57
|
+
: headers;
|
|
58
|
+
for (const [k, v] of entries) {
|
|
59
|
+
const m = k.match(PER_MODEL_7D_HEADER);
|
|
60
|
+
if (m && m[1]) {
|
|
61
|
+
perModel7d[m[1].toLowerCase()] = parseFloat(v) || 0;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
40
64
|
return {
|
|
41
65
|
status: get('status') || 'unknown',
|
|
42
66
|
util5h: parseFloat(get('5h-utilization')) || 0,
|
|
43
67
|
util7d: parseFloat(get('7d-utilization')) || 0,
|
|
68
|
+
perModel7d,
|
|
44
69
|
overageUtil: parseFloat(get('overage-utilization')) || 0,
|
|
45
70
|
claim: get('representative-claim') || 'unknown',
|
|
46
71
|
reset: parseInt(get('reset')) || 0,
|
|
@@ -48,6 +73,49 @@ export function parseRateLimits(headers) {
|
|
|
48
73
|
updatedAt: Date.now(),
|
|
49
74
|
};
|
|
50
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
|
|
78
|
+
* model id. Used to look up the per-model 7d bucket in
|
|
79
|
+
* `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
|
|
80
|
+
* for non-Claude models or model ids that don't carry a recognizable
|
|
81
|
+
* family token (those requests just use the unified buckets).
|
|
82
|
+
*
|
|
83
|
+
* Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
|
|
84
|
+
* `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
|
|
85
|
+
* so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
|
|
86
|
+
*/
|
|
87
|
+
export function modelFamily(modelId) {
|
|
88
|
+
if (!modelId)
|
|
89
|
+
return null;
|
|
90
|
+
const m = modelId.toLowerCase();
|
|
91
|
+
if (m.includes('opus'))
|
|
92
|
+
return 'opus';
|
|
93
|
+
if (m.includes('sonnet'))
|
|
94
|
+
return 'sonnet';
|
|
95
|
+
if (m.includes('haiku'))
|
|
96
|
+
return 'haiku';
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Compute headroom for a single account given its rate-limit snapshot.
|
|
101
|
+
* Headroom is the slack between the most-saturated relevant bucket and
|
|
102
|
+
* full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
|
|
103
|
+
*
|
|
104
|
+
* When `family` is supplied AND the snapshot has a corresponding per-
|
|
105
|
+
* model 7d bucket, that bucket is included in the max. When the family
|
|
106
|
+
* isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
|
|
107
|
+
* request yet so `7d_sonnet` is unknown), headroom is computed from the
|
|
108
|
+
* unified buckets only — best-effort, populated on the next response.
|
|
109
|
+
*/
|
|
110
|
+
export function computeHeadroom(snapshot, family) {
|
|
111
|
+
const utils = [snapshot.util5h, snapshot.util7d];
|
|
112
|
+
if (family) {
|
|
113
|
+
const perModel = snapshot.perModel7d[family];
|
|
114
|
+
if (perModel !== undefined)
|
|
115
|
+
utils.push(perModel);
|
|
116
|
+
}
|
|
117
|
+
return 1 - Math.max(...utils);
|
|
118
|
+
}
|
|
51
119
|
const STICKY_TTL_MS = 6 * 60 * 60 * 1000; // 6h
|
|
52
120
|
const STICKY_MAX_ENTRIES = 2_000; // lazy cleanup cap
|
|
53
121
|
/**
|
|
@@ -87,8 +155,14 @@ export class AccountPool {
|
|
|
87
155
|
get size() {
|
|
88
156
|
return this.accounts.size;
|
|
89
157
|
}
|
|
90
|
-
/**
|
|
91
|
-
|
|
158
|
+
/**
|
|
159
|
+
* Select the best account for the next request. `family` (when supplied)
|
|
160
|
+
* is the request's model family (`opus` / `sonnet` / `haiku`); when
|
|
161
|
+
* present and the account has a matching per-model 7d bucket, that
|
|
162
|
+
* bucket joins the headroom max. Family-less calls fall back to the
|
|
163
|
+
* unified-buckets-only headroom — same behavior as before this PR.
|
|
164
|
+
*/
|
|
165
|
+
select(family) {
|
|
92
166
|
if (this.accounts.size === 0)
|
|
93
167
|
return null;
|
|
94
168
|
const now = Date.now();
|
|
@@ -97,8 +171,8 @@ export class AccountPool {
|
|
|
97
171
|
a.expiresAt > now + 30_000);
|
|
98
172
|
if (eligible.length > 0) {
|
|
99
173
|
return eligible.reduce((best, curr) => {
|
|
100
|
-
const bestHeadroom =
|
|
101
|
-
const currHeadroom =
|
|
174
|
+
const bestHeadroom = computeHeadroom(best.rateLimit, family);
|
|
175
|
+
const currHeadroom = computeHeadroom(curr.rateLimit, family);
|
|
102
176
|
return currHeadroom > bestHeadroom ? curr : best;
|
|
103
177
|
});
|
|
104
178
|
}
|
|
@@ -126,9 +200,9 @@ export class AccountPool {
|
|
|
126
200
|
*
|
|
127
201
|
* Also performs lazy cleanup of expired bindings (TTL or size cap).
|
|
128
202
|
*/
|
|
129
|
-
selectSticky(stickyKey) {
|
|
203
|
+
selectSticky(stickyKey, family) {
|
|
130
204
|
if (!stickyKey)
|
|
131
|
-
return this.select();
|
|
205
|
+
return this.select(family);
|
|
132
206
|
this.cleanupSticky();
|
|
133
207
|
const binding = this.sticky.get(stickyKey);
|
|
134
208
|
if (binding) {
|
|
@@ -137,11 +211,11 @@ export class AccountPool {
|
|
|
137
211
|
if (bound
|
|
138
212
|
&& bound.rateLimit.status !== 'rejected'
|
|
139
213
|
&& bound.expiresAt > now + 30_000
|
|
140
|
-
&& (
|
|
214
|
+
&& computeHeadroom(bound.rateLimit, family) > POOL_HEADROOM_FLOOR) {
|
|
141
215
|
return bound;
|
|
142
216
|
}
|
|
143
217
|
}
|
|
144
|
-
const picked = this.select();
|
|
218
|
+
const picked = this.select(family);
|
|
145
219
|
if (picked) {
|
|
146
220
|
this.sticky.set(stickyKey, { alias: picked.alias, boundAt: Date.now() });
|
|
147
221
|
}
|
|
@@ -189,7 +263,7 @@ export class AccountPool {
|
|
|
189
263
|
return this.sticky.get(stickyKey)?.alias ?? null;
|
|
190
264
|
}
|
|
191
265
|
/** Select the next-best account, excluding the given set of aliases. */
|
|
192
|
-
selectExcluding(excluded) {
|
|
266
|
+
selectExcluding(excluded, family) {
|
|
193
267
|
if (this.accounts.size <= 1)
|
|
194
268
|
return null;
|
|
195
269
|
const now = Date.now();
|
|
@@ -198,8 +272,8 @@ export class AccountPool {
|
|
|
198
272
|
a.expiresAt > now + 30_000);
|
|
199
273
|
if (eligible.length > 0) {
|
|
200
274
|
return eligible.reduce((best, curr) => {
|
|
201
|
-
const bestHeadroom =
|
|
202
|
-
const currHeadroom =
|
|
275
|
+
const bestHeadroom = computeHeadroom(best.rateLimit, family);
|
|
276
|
+
const currHeadroom = computeHeadroom(curr.rateLimit, family);
|
|
203
277
|
return currHeadroom > bestHeadroom ? curr : best;
|
|
204
278
|
});
|
|
205
279
|
}
|
|
@@ -240,7 +314,10 @@ export class AccountPool {
|
|
|
240
314
|
const now = Date.now();
|
|
241
315
|
const healthy = all.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
242
316
|
a.expiresAt > now + 30_000);
|
|
243
|
-
|
|
317
|
+
// Status is a pool-wide aggregate; family-agnostic. Per-model
|
|
318
|
+
// headroom is request-context-specific and only meaningful at
|
|
319
|
+
// select() time.
|
|
320
|
+
const headrooms = all.map(a => computeHeadroom(a.rateLimit));
|
|
244
321
|
const avgHeadroom = headrooms.length > 0 ? headrooms.reduce((a, b) => a + b, 0) / headrooms.length : 0;
|
|
245
322
|
const best = this.select();
|
|
246
323
|
return {
|
|
@@ -260,7 +337,7 @@ export class AccountPool {
|
|
|
260
337
|
async waitForAccount() {
|
|
261
338
|
const immediate = this.select();
|
|
262
339
|
if (immediate) {
|
|
263
|
-
const headroom =
|
|
340
|
+
const headroom = computeHeadroom(immediate.rateLimit);
|
|
264
341
|
if (headroom > POOL_HEADROOM_FLOOR)
|
|
265
342
|
return immediate;
|
|
266
343
|
}
|
|
@@ -303,7 +380,7 @@ export class AccountPool {
|
|
|
303
380
|
const account = this.select();
|
|
304
381
|
if (!account)
|
|
305
382
|
break;
|
|
306
|
-
const headroom =
|
|
383
|
+
const headroom = computeHeadroom(account.rateLimit);
|
|
307
384
|
if (headroom <= POOL_HEADROOM_FLOOR)
|
|
308
385
|
break;
|
|
309
386
|
const entry = this.queue.shift();
|
package/dist/proxy.js
CHANGED
|
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
|
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
10
10
|
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
11
|
-
import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
|
|
11
|
+
import { AccountPool, computeStickyKey, parseRateLimits, modelFamily } from './pool.js';
|
|
12
12
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
13
13
|
import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
|
|
14
14
|
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
@@ -460,6 +460,11 @@ export async function startProxy(opts = {}) {
|
|
|
460
460
|
// Single-account dario keeps its existing code path unchanged.
|
|
461
461
|
const accountsList = await loadAllAccounts();
|
|
462
462
|
const pool = accountsList.length >= 2 ? new AccountPool() : null;
|
|
463
|
+
// Per-model rate-limit bucket families seen during this proxy run. First-
|
|
464
|
+
// sight is logged once when verbose so a new Anthropic bucket (e.g. an
|
|
465
|
+
// eventual `7d_opus`) doesn't slip past unnoticed. Pure observability —
|
|
466
|
+
// routing already handles unknown families generically.
|
|
467
|
+
const seenPerModelBuckets = new Set();
|
|
463
468
|
const analytics = pool ? new Analytics() : null;
|
|
464
469
|
let status;
|
|
465
470
|
if (pool) {
|
|
@@ -964,7 +969,7 @@ export async function startProxy(opts = {}) {
|
|
|
964
969
|
// Rotating off mid-session costs cache-create on every turn.
|
|
965
970
|
stickyKey = computeStickyKey(userMsg);
|
|
966
971
|
if (pool && stickyKey) {
|
|
967
|
-
const preferred = pool.selectSticky(stickyKey);
|
|
972
|
+
const preferred = pool.selectSticky(stickyKey, modelFamily(requestModel));
|
|
968
973
|
if (preferred && preferred.alias !== poolAccount?.alias) {
|
|
969
974
|
poolAccount = preferred;
|
|
970
975
|
accessToken = preferred.accessToken;
|
|
@@ -1185,6 +1190,20 @@ export async function startProxy(opts = {}) {
|
|
|
1185
1190
|
else {
|
|
1186
1191
|
pool.updateRateLimits(poolAccount.alias, snapshot);
|
|
1187
1192
|
}
|
|
1193
|
+
// First-sight detector for per-model rate-limit buckets. Anthropic
|
|
1194
|
+
// ships these unannounced — e.g. `7d_sonnet-utilization` appeared
|
|
1195
|
+
// around 2026-04-25 — and verbose-mode users want a heads-up the
|
|
1196
|
+
// first time a new family shows up so they can decide whether to
|
|
1197
|
+
// bump dario's expectations. Pure logging; the routing path
|
|
1198
|
+
// already handles arbitrary family keys (see pool.computeHeadroom).
|
|
1199
|
+
for (const family of Object.keys(snapshot.perModel7d)) {
|
|
1200
|
+
if (!seenPerModelBuckets.has(family)) {
|
|
1201
|
+
seenPerModelBuckets.add(family);
|
|
1202
|
+
if (verbose) {
|
|
1203
|
+
console.log(`[dario] new per-model rate-limit bucket observed: 7d_${family} (util=${snapshot.perModel7d[family]?.toFixed(2)})`);
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1188
1207
|
}
|
|
1189
1208
|
// Auto-retry without context-1m if it triggers a long-context billing error.
|
|
1190
1209
|
// Anthropic returns this as either 400 ("long context beta is not yet available
|
|
@@ -1287,7 +1306,7 @@ export async function startProxy(opts = {}) {
|
|
|
1287
1306
|
else if (upstream.status === 429) {
|
|
1288
1307
|
// Not a context-1m issue — try pool failover before surfacing to client
|
|
1289
1308
|
if (pool && poolAccount) {
|
|
1290
|
-
const nextAccount = pool.selectExcluding(triedAliases);
|
|
1309
|
+
const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
|
|
1291
1310
|
if (nextAccount) {
|
|
1292
1311
|
triedAliases.add(nextAccount.alias);
|
|
1293
1312
|
poolAccount = nextAccount;
|
|
@@ -1346,7 +1365,7 @@ export async function startProxy(opts = {}) {
|
|
|
1346
1365
|
if (upstream.status === 429) {
|
|
1347
1366
|
// Try pool failover before surfacing to client
|
|
1348
1367
|
if (pool && poolAccount) {
|
|
1349
|
-
const nextAccount = pool.selectExcluding(triedAliases);
|
|
1368
|
+
const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
|
|
1350
1369
|
if (nextAccount) {
|
|
1351
1370
|
triedAliases.add(nextAccount.alias);
|
|
1352
1371
|
poolAccount = nextAccount;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.31.
|
|
3
|
+
"version": "3.31.17",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|