@askalf/dario 3.31.16 → 3.31.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +8 -1
- package/dist/doctor.d.ts +10 -0
- package/dist/doctor.js +129 -0
- package/dist/pool.d.ts +48 -4
- package/dist/pool.js +91 -14
- package/dist/proxy.js +23 -4
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -652,6 +652,12 @@ async function help() {
|
|
|
652
652
|
the server's verdict — the single reliable
|
|
653
653
|
signal for scope-policy drift (dario#42/#71
|
|
654
654
|
class). One GET to claude.ai; no PII.
|
|
655
|
+
dario doctor --usage Fire one minimal Haiku request through your
|
|
656
|
+
OAuth and surface the rate-limit snapshot:
|
|
657
|
+
All-models 5h/7d, per-model 7d buckets
|
|
658
|
+
(Sonnet only, Opus only when Anthropic ships
|
|
659
|
+
them), overage. Mirrors the user-dashboard
|
|
660
|
+
usage page. Costs ~1 subscription request.
|
|
655
661
|
dario doctor --json Emit the check report as structured JSON
|
|
656
662
|
for machine consumption (claude-bridge
|
|
657
663
|
/status, CI scripts, etc.) instead of the
|
|
@@ -972,6 +978,7 @@ async function mcp() {
|
|
|
972
978
|
async function doctor() {
|
|
973
979
|
const { runChecks, formatChecks, formatChecksJson, exitCodeFor, runAuthCheck } = await import('./doctor.js');
|
|
974
980
|
const probe = args.includes('--probe');
|
|
981
|
+
const usage = args.includes('--usage');
|
|
975
982
|
const asJson = args.includes('--json');
|
|
976
983
|
const authCheck = args.includes('--auth-check');
|
|
977
984
|
if (authCheck) {
|
|
@@ -1008,7 +1015,7 @@ async function doctor() {
|
|
|
1008
1015
|
console.log('');
|
|
1009
1016
|
process.exit(result.verdict === 'match' ? 0 : 1);
|
|
1010
1017
|
}
|
|
1011
|
-
const checks = await runChecks({ probe });
|
|
1018
|
+
const checks = await runChecks({ probe, usage });
|
|
1012
1019
|
if (asJson) {
|
|
1013
1020
|
// JSON mode is meant for machine consumption (claude-bridge /status,
|
|
1014
1021
|
// deepdive health checks, CI scripts) — no decorative header, no
|
package/dist/doctor.d.ts
CHANGED
|
@@ -50,6 +50,16 @@ export interface RunChecksOptions {
|
|
|
50
50
|
* GET to `claude.ai` and runs in parallel with the other checks.
|
|
51
51
|
*/
|
|
52
52
|
probe?: boolean;
|
|
53
|
+
/**
|
|
54
|
+
* Opt-in: fire a minimal `POST /v1/messages` through the user's OAuth
|
|
55
|
+
* (Haiku, `max_tokens=1`) to capture the current rate-limit snapshot,
|
|
56
|
+
* including the unified buckets AND the per-model buckets Anthropic
|
|
57
|
+
* started carving in late April 2026 (`7d_sonnet-utilization` etc).
|
|
58
|
+
* Surfaces "All models X%, Sonnet only Y%" the way the user dashboard
|
|
59
|
+
* does. Enable with `dario doctor --usage`; costs ~1 subscription
|
|
60
|
+
* request.
|
|
61
|
+
*/
|
|
62
|
+
usage?: boolean;
|
|
53
63
|
}
|
|
54
64
|
/**
|
|
55
65
|
* Run every available health check. Never throws — each check is
|
package/dist/doctor.js
CHANGED
|
@@ -282,6 +282,135 @@ export async function runChecks(opts = {}) {
|
|
|
282
282
|
});
|
|
283
283
|
}
|
|
284
284
|
}
|
|
285
|
+
// ---- Usage snapshot (opt-in, --usage).
|
|
286
|
+
// Fires one `POST /v1/messages` via the loaded OAuth (Haiku, max_tokens=1)
|
|
287
|
+
// to capture the current rate-limit snapshot including the per-model
|
|
288
|
+
// buckets Anthropic started carving around 2026-04-25. Surfaces the
|
|
289
|
+
// `All models` vs `Sonnet only` split the way the user dashboard does.
|
|
290
|
+
// Direct-to-Anthropic, not through the proxy — the proxy doesn't need
|
|
291
|
+
// to be running for `dario doctor --usage`.
|
|
292
|
+
if (opts.usage) {
|
|
293
|
+
try {
|
|
294
|
+
const { parseRateLimits } = await import('./pool.js');
|
|
295
|
+
const { billingBucketFromClaim } = await import('./analytics.js');
|
|
296
|
+
// Probe routing decision: Anthropic's subscription path rejects
|
|
297
|
+
// non-CC-shaped requests on Sonnet/Opus (returns 429 with no
|
|
298
|
+
// rate-limit headers). Haiku accepts the raw shape. So:
|
|
299
|
+
// - If a local `dario proxy` is listening, route through it —
|
|
300
|
+
// the proxy injects the full CC template and all three families
|
|
301
|
+
// succeed, giving us the _sonnet / _opus / _haiku per-model
|
|
302
|
+
// bucket headers on a single round trip each.
|
|
303
|
+
// - Else fall back to direct-to-Anthropic with Haiku only.
|
|
304
|
+
// Unified buckets surface but per-model buckets won't.
|
|
305
|
+
const dario_base = process.env.DARIO_TEST_URL || 'http://127.0.0.1:3456';
|
|
306
|
+
let probeEndpoint = `${dario_base}/v1/messages`;
|
|
307
|
+
let probeHeaders = {
|
|
308
|
+
'content-type': 'application/json',
|
|
309
|
+
'anthropic-version': '2023-06-01',
|
|
310
|
+
'authorization': 'Bearer dario',
|
|
311
|
+
};
|
|
312
|
+
let proxyAvailable = false;
|
|
313
|
+
try {
|
|
314
|
+
const healthRes = await fetch(`${dario_base}/health`, { signal: AbortSignal.timeout(800) });
|
|
315
|
+
proxyAvailable = healthRes.ok;
|
|
316
|
+
}
|
|
317
|
+
catch { /* proxy not running */ }
|
|
318
|
+
if (!proxyAvailable) {
|
|
319
|
+
const { getAccessToken } = await import('./oauth.js');
|
|
320
|
+
const token = await getAccessToken();
|
|
321
|
+
probeEndpoint = 'https://api.anthropic.com/v1/messages';
|
|
322
|
+
probeHeaders = {
|
|
323
|
+
'content-type': 'application/json',
|
|
324
|
+
'anthropic-version': '2023-06-01',
|
|
325
|
+
'anthropic-beta': 'oauth-2025-04-20',
|
|
326
|
+
'authorization': `Bearer ${token}`,
|
|
327
|
+
};
|
|
328
|
+
checks.push({
|
|
329
|
+
status: 'info',
|
|
330
|
+
label: 'Usage probe',
|
|
331
|
+
detail: 'dario proxy not running — probing direct. Per-model buckets visible only when probing through a running proxy (start `dario proxy` in another terminal and re-run).',
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
// Probe each family in parallel. Anthropic only returns the
|
|
335
|
+
// per-model 7d bucket header on a request TO that family.
|
|
336
|
+
const families = [
|
|
337
|
+
{ family: 'haiku', model: 'claude-haiku-4-5' },
|
|
338
|
+
{ family: 'sonnet', model: 'claude-sonnet-4-6' },
|
|
339
|
+
{ family: 'opus', model: 'claude-opus-4-7' },
|
|
340
|
+
];
|
|
341
|
+
const probe = async (model) => {
|
|
342
|
+
const res = await fetch(probeEndpoint, {
|
|
343
|
+
method: 'POST',
|
|
344
|
+
headers: probeHeaders,
|
|
345
|
+
body: JSON.stringify({
|
|
346
|
+
model,
|
|
347
|
+
max_tokens: 1,
|
|
348
|
+
messages: [{ role: 'user', content: 'ok' }],
|
|
349
|
+
}),
|
|
350
|
+
signal: AbortSignal.timeout(15_000),
|
|
351
|
+
});
|
|
352
|
+
// Consume the body so the socket releases; we only care about headers.
|
|
353
|
+
await res.text().catch(() => '');
|
|
354
|
+
// Ignore 429/4xx snapshots without useful rate-limit headers.
|
|
355
|
+
if (!res.headers.get('anthropic-ratelimit-unified-status'))
|
|
356
|
+
return null;
|
|
357
|
+
return parseRateLimits(res.headers);
|
|
358
|
+
};
|
|
359
|
+
const results = await Promise.all(families.map(f => probe(f.model).catch(() => null)));
|
|
360
|
+
// Use the first non-null snapshot for the unified view — they
|
|
361
|
+
// should all agree on the unified buckets (same account, same moment).
|
|
362
|
+
const firstOk = results.find(s => s !== null);
|
|
363
|
+
if (!firstOk)
|
|
364
|
+
throw new Error('all probe requests failed');
|
|
365
|
+
const bucket = billingBucketFromClaim(firstOk.claim);
|
|
366
|
+
const pct = (n) => `${(n * 100).toFixed(1)}%`;
|
|
367
|
+
checks.push({
|
|
368
|
+
status: firstOk.util5h >= 0.90 ? 'warn' : 'ok',
|
|
369
|
+
label: 'Usage 5h (all)',
|
|
370
|
+
detail: `${pct(firstOk.util5h)} used • status=${firstOk.status} • claim=${firstOk.claim} (${bucket})`,
|
|
371
|
+
});
|
|
372
|
+
checks.push({
|
|
373
|
+
status: firstOk.util7d >= 0.90 ? 'warn' : 'ok',
|
|
374
|
+
label: 'Usage 7d (all)',
|
|
375
|
+
detail: `${pct(firstOk.util7d)} used`,
|
|
376
|
+
});
|
|
377
|
+
// Merge per-model buckets across all probes — each probe's response
|
|
378
|
+
// carries at most its own family bucket; union them for display.
|
|
379
|
+
const mergedPerModel = {};
|
|
380
|
+
for (const s of results) {
|
|
381
|
+
if (!s)
|
|
382
|
+
continue;
|
|
383
|
+
for (const [family, util] of Object.entries(s.perModel7d)) {
|
|
384
|
+
mergedPerModel[family] = util;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
for (const [family, util] of Object.entries(mergedPerModel).sort()) {
|
|
388
|
+
const divergence = util - firstOk.util7d;
|
|
389
|
+
const marker = Math.abs(divergence) > 0.05
|
|
390
|
+
? ` • Δ vs 7d(all): ${divergence >= 0 ? '+' : ''}${(divergence * 100).toFixed(1)}pp`
|
|
391
|
+
: '';
|
|
392
|
+
checks.push({
|
|
393
|
+
status: util >= 0.90 ? 'warn' : 'ok',
|
|
394
|
+
label: `Usage 7d (${family} only)`,
|
|
395
|
+
detail: `${pct(util)} used${marker}`,
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
if (firstOk.overageUtil > 0) {
|
|
399
|
+
checks.push({
|
|
400
|
+
status: firstOk.overageUtil >= 0.90 ? 'warn' : 'info',
|
|
401
|
+
label: 'Usage overage',
|
|
402
|
+
detail: `${pct(firstOk.overageUtil)} of configured monthly spend`,
|
|
403
|
+
});
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
catch (err) {
|
|
407
|
+
checks.push({
|
|
408
|
+
status: 'warn',
|
|
409
|
+
label: 'Usage snapshot',
|
|
410
|
+
detail: `probe failed: ${err.message}`,
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
}
|
|
285
414
|
// ---- Account pool
|
|
286
415
|
try {
|
|
287
416
|
const { listAccountAliases, loadAllAccounts } = await import('./accounts.js');
|
package/dist/pool.d.ts
CHANGED
|
@@ -19,6 +19,20 @@ export interface RateLimitSnapshot {
|
|
|
19
19
|
status: string;
|
|
20
20
|
util5h: number;
|
|
21
21
|
util7d: number;
|
|
22
|
+
/**
|
|
23
|
+
* Per-model 7-day utilization buckets — Anthropic carves separate
|
|
24
|
+
* weekly windows for some model families. As of 2026-04-25 the live
|
|
25
|
+
* API emits `anthropic-ratelimit-unified-7d_sonnet-utilization` on
|
|
26
|
+
* Sonnet responses (corresponds to the "Sonnet only" line on the user
|
|
27
|
+
* dashboard); other families do not yet have dedicated buckets but
|
|
28
|
+
* the parser scans the header set generically so any future
|
|
29
|
+
* `7d_<family>` header is captured automatically.
|
|
30
|
+
*
|
|
31
|
+
* Keyed by the family suffix as it arrived on the wire (lowercase,
|
|
32
|
+
* e.g. `sonnet` / `opus` / `haiku`). Empty when no per-model headers
|
|
33
|
+
* were on the response.
|
|
34
|
+
*/
|
|
35
|
+
perModel7d: Record<string, number>;
|
|
22
36
|
overageUtil: number;
|
|
23
37
|
claim: string;
|
|
24
38
|
reset: number;
|
|
@@ -45,6 +59,30 @@ export interface PoolStatus {
|
|
|
45
59
|
}
|
|
46
60
|
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
47
61
|
export declare function parseRateLimits(headers: Headers): RateLimitSnapshot;
|
|
62
|
+
/**
|
|
63
|
+
* Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
|
|
64
|
+
* model id. Used to look up the per-model 7d bucket in
|
|
65
|
+
* `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
|
|
66
|
+
* for non-Claude models or model ids that don't carry a recognizable
|
|
67
|
+
* family token (those requests just use the unified buckets).
|
|
68
|
+
*
|
|
69
|
+
* Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
|
|
70
|
+
* `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
|
|
71
|
+
* so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
|
|
72
|
+
*/
|
|
73
|
+
export declare function modelFamily(modelId: string | null | undefined): string | null;
|
|
74
|
+
/**
|
|
75
|
+
* Compute headroom for a single account given its rate-limit snapshot.
|
|
76
|
+
* Headroom is the slack between the most-saturated relevant bucket and
|
|
77
|
+
* full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
|
|
78
|
+
*
|
|
79
|
+
* When `family` is supplied AND the snapshot has a corresponding per-
|
|
80
|
+
* model 7d bucket, that bucket is included in the max. When the family
|
|
81
|
+
* isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
|
|
82
|
+
* request yet so `7d_sonnet` is unknown), headroom is computed from the
|
|
83
|
+
* unified buckets only — best-effort, populated on the next response.
|
|
84
|
+
*/
|
|
85
|
+
export declare function computeHeadroom(snapshot: RateLimitSnapshot, family?: string | null): number;
|
|
48
86
|
export declare class AccountPool {
|
|
49
87
|
private accounts;
|
|
50
88
|
private queue;
|
|
@@ -61,8 +99,14 @@ export declare class AccountPool {
|
|
|
61
99
|
}): void;
|
|
62
100
|
remove(alias: string): boolean;
|
|
63
101
|
get size(): number;
|
|
64
|
-
/**
|
|
65
|
-
|
|
102
|
+
/**
|
|
103
|
+
* Select the best account for the next request. `family` (when supplied)
|
|
104
|
+
* is the request's model family (`opus` / `sonnet` / `haiku`); when
|
|
105
|
+
* present and the account has a matching per-model 7d bucket, that
|
|
106
|
+
* bucket joins the headroom max. Family-less calls fall back to the
|
|
107
|
+
* unified-buckets-only headroom — same behavior as before this PR.
|
|
108
|
+
*/
|
|
109
|
+
select(family?: string | null): PoolAccount | null;
|
|
66
110
|
/**
|
|
67
111
|
* Select with session stickiness. If `stickyKey` is already bound to a
|
|
68
112
|
* healthy account (not rejected, token not near expiry, headroom > 2%),
|
|
@@ -79,7 +123,7 @@ export declare class AccountPool {
|
|
|
79
123
|
*
|
|
80
124
|
* Also performs lazy cleanup of expired bindings (TTL or size cap).
|
|
81
125
|
*/
|
|
82
|
-
selectSticky(stickyKey: string | null): PoolAccount | null;
|
|
126
|
+
selectSticky(stickyKey: string | null, family?: string | null): PoolAccount | null;
|
|
83
127
|
/**
|
|
84
128
|
* Rebind a sticky key to a different account — called by proxy after an
|
|
85
129
|
* in-request 429 failover moves to the next-best account. Without this
|
|
@@ -99,7 +143,7 @@ export declare class AccountPool {
|
|
|
99
143
|
/** Test/inspection helper — current alias bound to a key, or null. */
|
|
100
144
|
stickyAliasFor(stickyKey: string): string | null;
|
|
101
145
|
/** Select the next-best account, excluding the given set of aliases. */
|
|
102
|
-
selectExcluding(excluded: Set<string
|
|
146
|
+
selectExcluding(excluded: Set<string>, family?: string | null): PoolAccount | null;
|
|
103
147
|
updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
|
|
104
148
|
markRejected(alias: string, snapshot: RateLimitSnapshot): void;
|
|
105
149
|
updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
|
package/dist/pool.js
CHANGED
|
@@ -28,19 +28,44 @@ export const EMPTY_SNAPSHOT = {
|
|
|
28
28
|
status: 'unknown',
|
|
29
29
|
util5h: 0,
|
|
30
30
|
util7d: 0,
|
|
31
|
+
perModel7d: {},
|
|
31
32
|
overageUtil: 0,
|
|
32
33
|
claim: 'unknown',
|
|
33
34
|
reset: 0,
|
|
34
35
|
fallbackPct: 0,
|
|
35
36
|
updatedAt: 0,
|
|
36
37
|
};
|
|
38
|
+
/**
|
|
39
|
+
* Match `anthropic-ratelimit-unified-7d_<family>-utilization`. Generic on
|
|
40
|
+
* `<family>` so a future `7d_opus` / `7d_haiku` (or anything Anthropic
|
|
41
|
+
* adds without notice) is captured automatically. The family is
|
|
42
|
+
* normalized to lowercase to match `modelFamily()` output.
|
|
43
|
+
*/
|
|
44
|
+
const PER_MODEL_7D_HEADER = /^anthropic-ratelimit-unified-7d_([a-z0-9-]+)-utilization$/i;
|
|
37
45
|
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
38
46
|
export function parseRateLimits(headers) {
|
|
39
47
|
const get = (key) => headers.get(`anthropic-ratelimit-unified-${key}`) ?? '';
|
|
48
|
+
const perModel7d = {};
|
|
49
|
+
// Iterate the full header set — `headers.get` only retrieves known
|
|
50
|
+
// keys, but Anthropic can add new `7d_<family>-utilization` shapes
|
|
51
|
+
// unannounced. Scanning the iterator means the parser is automatically
|
|
52
|
+
// forward-compatible. Real `Headers` instances and test-side mocks
|
|
53
|
+
// (which implement `.entries()` but not direct iteration) both work
|
|
54
|
+
// through the explicit `.entries()` call.
|
|
55
|
+
const entries = (typeof headers.entries === 'function')
|
|
56
|
+
? headers.entries()
|
|
57
|
+
: headers;
|
|
58
|
+
for (const [k, v] of entries) {
|
|
59
|
+
const m = k.match(PER_MODEL_7D_HEADER);
|
|
60
|
+
if (m && m[1]) {
|
|
61
|
+
perModel7d[m[1].toLowerCase()] = parseFloat(v) || 0;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
40
64
|
return {
|
|
41
65
|
status: get('status') || 'unknown',
|
|
42
66
|
util5h: parseFloat(get('5h-utilization')) || 0,
|
|
43
67
|
util7d: parseFloat(get('7d-utilization')) || 0,
|
|
68
|
+
perModel7d,
|
|
44
69
|
overageUtil: parseFloat(get('overage-utilization')) || 0,
|
|
45
70
|
claim: get('representative-claim') || 'unknown',
|
|
46
71
|
reset: parseInt(get('reset')) || 0,
|
|
@@ -48,6 +73,49 @@ export function parseRateLimits(headers) {
|
|
|
48
73
|
updatedAt: Date.now(),
|
|
49
74
|
};
|
|
50
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
|
|
78
|
+
* model id. Used to look up the per-model 7d bucket in
|
|
79
|
+
* `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
|
|
80
|
+
* for non-Claude models or model ids that don't carry a recognizable
|
|
81
|
+
* family token (those requests just use the unified buckets).
|
|
82
|
+
*
|
|
83
|
+
* Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
|
|
84
|
+
* `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
|
|
85
|
+
* so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
|
|
86
|
+
*/
|
|
87
|
+
export function modelFamily(modelId) {
|
|
88
|
+
if (!modelId)
|
|
89
|
+
return null;
|
|
90
|
+
const m = modelId.toLowerCase();
|
|
91
|
+
if (m.includes('opus'))
|
|
92
|
+
return 'opus';
|
|
93
|
+
if (m.includes('sonnet'))
|
|
94
|
+
return 'sonnet';
|
|
95
|
+
if (m.includes('haiku'))
|
|
96
|
+
return 'haiku';
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Compute headroom for a single account given its rate-limit snapshot.
|
|
101
|
+
* Headroom is the slack between the most-saturated relevant bucket and
|
|
102
|
+
* full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
|
|
103
|
+
*
|
|
104
|
+
* When `family` is supplied AND the snapshot has a corresponding per-
|
|
105
|
+
* model 7d bucket, that bucket is included in the max. When the family
|
|
106
|
+
* isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
|
|
107
|
+
* request yet so `7d_sonnet` is unknown), headroom is computed from the
|
|
108
|
+
* unified buckets only — best-effort, populated on the next response.
|
|
109
|
+
*/
|
|
110
|
+
export function computeHeadroom(snapshot, family) {
|
|
111
|
+
const utils = [snapshot.util5h, snapshot.util7d];
|
|
112
|
+
if (family) {
|
|
113
|
+
const perModel = snapshot.perModel7d[family];
|
|
114
|
+
if (perModel !== undefined)
|
|
115
|
+
utils.push(perModel);
|
|
116
|
+
}
|
|
117
|
+
return 1 - Math.max(...utils);
|
|
118
|
+
}
|
|
51
119
|
const STICKY_TTL_MS = 6 * 60 * 60 * 1000; // 6h
|
|
52
120
|
const STICKY_MAX_ENTRIES = 2_000; // lazy cleanup cap
|
|
53
121
|
/**
|
|
@@ -87,8 +155,14 @@ export class AccountPool {
|
|
|
87
155
|
get size() {
|
|
88
156
|
return this.accounts.size;
|
|
89
157
|
}
|
|
90
|
-
/**
|
|
91
|
-
|
|
158
|
+
/**
|
|
159
|
+
* Select the best account for the next request. `family` (when supplied)
|
|
160
|
+
* is the request's model family (`opus` / `sonnet` / `haiku`); when
|
|
161
|
+
* present and the account has a matching per-model 7d bucket, that
|
|
162
|
+
* bucket joins the headroom max. Family-less calls fall back to the
|
|
163
|
+
* unified-buckets-only headroom — same behavior as before this PR.
|
|
164
|
+
*/
|
|
165
|
+
select(family) {
|
|
92
166
|
if (this.accounts.size === 0)
|
|
93
167
|
return null;
|
|
94
168
|
const now = Date.now();
|
|
@@ -97,8 +171,8 @@ export class AccountPool {
|
|
|
97
171
|
a.expiresAt > now + 30_000);
|
|
98
172
|
if (eligible.length > 0) {
|
|
99
173
|
return eligible.reduce((best, curr) => {
|
|
100
|
-
const bestHeadroom =
|
|
101
|
-
const currHeadroom =
|
|
174
|
+
const bestHeadroom = computeHeadroom(best.rateLimit, family);
|
|
175
|
+
const currHeadroom = computeHeadroom(curr.rateLimit, family);
|
|
102
176
|
return currHeadroom > bestHeadroom ? curr : best;
|
|
103
177
|
});
|
|
104
178
|
}
|
|
@@ -126,9 +200,9 @@ export class AccountPool {
|
|
|
126
200
|
*
|
|
127
201
|
* Also performs lazy cleanup of expired bindings (TTL or size cap).
|
|
128
202
|
*/
|
|
129
|
-
selectSticky(stickyKey) {
|
|
203
|
+
selectSticky(stickyKey, family) {
|
|
130
204
|
if (!stickyKey)
|
|
131
|
-
return this.select();
|
|
205
|
+
return this.select(family);
|
|
132
206
|
this.cleanupSticky();
|
|
133
207
|
const binding = this.sticky.get(stickyKey);
|
|
134
208
|
if (binding) {
|
|
@@ -137,11 +211,11 @@ export class AccountPool {
|
|
|
137
211
|
if (bound
|
|
138
212
|
&& bound.rateLimit.status !== 'rejected'
|
|
139
213
|
&& bound.expiresAt > now + 30_000
|
|
140
|
-
&& (
|
|
214
|
+
&& computeHeadroom(bound.rateLimit, family) > POOL_HEADROOM_FLOOR) {
|
|
141
215
|
return bound;
|
|
142
216
|
}
|
|
143
217
|
}
|
|
144
|
-
const picked = this.select();
|
|
218
|
+
const picked = this.select(family);
|
|
145
219
|
if (picked) {
|
|
146
220
|
this.sticky.set(stickyKey, { alias: picked.alias, boundAt: Date.now() });
|
|
147
221
|
}
|
|
@@ -189,7 +263,7 @@ export class AccountPool {
|
|
|
189
263
|
return this.sticky.get(stickyKey)?.alias ?? null;
|
|
190
264
|
}
|
|
191
265
|
/** Select the next-best account, excluding the given set of aliases. */
|
|
192
|
-
selectExcluding(excluded) {
|
|
266
|
+
selectExcluding(excluded, family) {
|
|
193
267
|
if (this.accounts.size <= 1)
|
|
194
268
|
return null;
|
|
195
269
|
const now = Date.now();
|
|
@@ -198,8 +272,8 @@ export class AccountPool {
|
|
|
198
272
|
a.expiresAt > now + 30_000);
|
|
199
273
|
if (eligible.length > 0) {
|
|
200
274
|
return eligible.reduce((best, curr) => {
|
|
201
|
-
const bestHeadroom =
|
|
202
|
-
const currHeadroom =
|
|
275
|
+
const bestHeadroom = computeHeadroom(best.rateLimit, family);
|
|
276
|
+
const currHeadroom = computeHeadroom(curr.rateLimit, family);
|
|
203
277
|
return currHeadroom > bestHeadroom ? curr : best;
|
|
204
278
|
});
|
|
205
279
|
}
|
|
@@ -240,7 +314,10 @@ export class AccountPool {
|
|
|
240
314
|
const now = Date.now();
|
|
241
315
|
const healthy = all.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
242
316
|
a.expiresAt > now + 30_000);
|
|
243
|
-
|
|
317
|
+
// Status is a pool-wide aggregate; family-agnostic. Per-model
|
|
318
|
+
// headroom is request-context-specific and only meaningful at
|
|
319
|
+
// select() time.
|
|
320
|
+
const headrooms = all.map(a => computeHeadroom(a.rateLimit));
|
|
244
321
|
const avgHeadroom = headrooms.length > 0 ? headrooms.reduce((a, b) => a + b, 0) / headrooms.length : 0;
|
|
245
322
|
const best = this.select();
|
|
246
323
|
return {
|
|
@@ -260,7 +337,7 @@ export class AccountPool {
|
|
|
260
337
|
async waitForAccount() {
|
|
261
338
|
const immediate = this.select();
|
|
262
339
|
if (immediate) {
|
|
263
|
-
const headroom =
|
|
340
|
+
const headroom = computeHeadroom(immediate.rateLimit);
|
|
264
341
|
if (headroom > POOL_HEADROOM_FLOOR)
|
|
265
342
|
return immediate;
|
|
266
343
|
}
|
|
@@ -303,7 +380,7 @@ export class AccountPool {
|
|
|
303
380
|
const account = this.select();
|
|
304
381
|
if (!account)
|
|
305
382
|
break;
|
|
306
|
-
const headroom =
|
|
383
|
+
const headroom = computeHeadroom(account.rateLimit);
|
|
307
384
|
if (headroom <= POOL_HEADROOM_FLOOR)
|
|
308
385
|
break;
|
|
309
386
|
const entry = this.queue.shift();
|
package/dist/proxy.js
CHANGED
|
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
|
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
10
10
|
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
11
|
-
import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
|
|
11
|
+
import { AccountPool, computeStickyKey, parseRateLimits, modelFamily } from './pool.js';
|
|
12
12
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
13
13
|
import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
|
|
14
14
|
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
@@ -460,6 +460,11 @@ export async function startProxy(opts = {}) {
|
|
|
460
460
|
// Single-account dario keeps its existing code path unchanged.
|
|
461
461
|
const accountsList = await loadAllAccounts();
|
|
462
462
|
const pool = accountsList.length >= 2 ? new AccountPool() : null;
|
|
463
|
+
// Per-model rate-limit bucket families seen during this proxy run. First-
|
|
464
|
+
// sight is logged once when verbose so a new Anthropic bucket (e.g. an
|
|
465
|
+
// eventual `7d_opus`) doesn't slip past unnoticed. Pure observability —
|
|
466
|
+
// routing already handles unknown families generically.
|
|
467
|
+
const seenPerModelBuckets = new Set();
|
|
463
468
|
const analytics = pool ? new Analytics() : null;
|
|
464
469
|
let status;
|
|
465
470
|
if (pool) {
|
|
@@ -964,7 +969,7 @@ export async function startProxy(opts = {}) {
|
|
|
964
969
|
// Rotating off mid-session costs cache-create on every turn.
|
|
965
970
|
stickyKey = computeStickyKey(userMsg);
|
|
966
971
|
if (pool && stickyKey) {
|
|
967
|
-
const preferred = pool.selectSticky(stickyKey);
|
|
972
|
+
const preferred = pool.selectSticky(stickyKey, modelFamily(requestModel));
|
|
968
973
|
if (preferred && preferred.alias !== poolAccount?.alias) {
|
|
969
974
|
poolAccount = preferred;
|
|
970
975
|
accessToken = preferred.accessToken;
|
|
@@ -1185,6 +1190,20 @@ export async function startProxy(opts = {}) {
|
|
|
1185
1190
|
else {
|
|
1186
1191
|
pool.updateRateLimits(poolAccount.alias, snapshot);
|
|
1187
1192
|
}
|
|
1193
|
+
// First-sight detector for per-model rate-limit buckets. Anthropic
|
|
1194
|
+
// ships these unannounced — e.g. `7d_sonnet-utilization` appeared
|
|
1195
|
+
// around 2026-04-25 — and verbose-mode users want a heads-up the
|
|
1196
|
+
// first time a new family shows up so they can decide whether to
|
|
1197
|
+
// bump dario's expectations. Pure logging; the routing path
|
|
1198
|
+
// already handles arbitrary family keys (see pool.computeHeadroom).
|
|
1199
|
+
for (const family of Object.keys(snapshot.perModel7d)) {
|
|
1200
|
+
if (!seenPerModelBuckets.has(family)) {
|
|
1201
|
+
seenPerModelBuckets.add(family);
|
|
1202
|
+
if (verbose) {
|
|
1203
|
+
console.log(`[dario] new per-model rate-limit bucket observed: 7d_${family} (util=${snapshot.perModel7d[family]?.toFixed(2)})`);
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1188
1207
|
}
|
|
1189
1208
|
// Auto-retry without context-1m if it triggers a long-context billing error.
|
|
1190
1209
|
// Anthropic returns this as either 400 ("long context beta is not yet available
|
|
@@ -1287,7 +1306,7 @@ export async function startProxy(opts = {}) {
|
|
|
1287
1306
|
else if (upstream.status === 429) {
|
|
1288
1307
|
// Not a context-1m issue — try pool failover before surfacing to client
|
|
1289
1308
|
if (pool && poolAccount) {
|
|
1290
|
-
const nextAccount = pool.selectExcluding(triedAliases);
|
|
1309
|
+
const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
|
|
1291
1310
|
if (nextAccount) {
|
|
1292
1311
|
triedAliases.add(nextAccount.alias);
|
|
1293
1312
|
poolAccount = nextAccount;
|
|
@@ -1346,7 +1365,7 @@ export async function startProxy(opts = {}) {
|
|
|
1346
1365
|
if (upstream.status === 429) {
|
|
1347
1366
|
// Try pool failover before surfacing to client
|
|
1348
1367
|
if (pool && poolAccount) {
|
|
1349
|
-
const nextAccount = pool.selectExcluding(triedAliases);
|
|
1368
|
+
const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
|
|
1350
1369
|
if (nextAccount) {
|
|
1351
1370
|
triedAliases.add(nextAccount.alias);
|
|
1352
1371
|
poolAccount = nextAccount;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.31.
|
|
3
|
+
"version": "3.31.18",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|