@askalf/dario 3.31.16 → 3.31.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -652,6 +652,12 @@ async function help() {
652
652
  the server's verdict — the single reliable
653
653
  signal for scope-policy drift (dario#42/#71
654
654
  class). One GET to claude.ai; no PII.
655
+ dario doctor --usage Fire one minimal Haiku request through your
656
+ OAuth and surface the rate-limit snapshot:
657
+ All-models 5h/7d, per-model 7d buckets
658
+ (Sonnet only, Opus only when Anthropic ships
659
+ them), overage. Mirrors the user-dashboard
660
+ usage page. Costs ~1 subscription request.
655
661
  dario doctor --json Emit the check report as structured JSON
656
662
  for machine consumption (claude-bridge
657
663
  /status, CI scripts, etc.) instead of the
@@ -972,6 +978,7 @@ async function mcp() {
972
978
  async function doctor() {
973
979
  const { runChecks, formatChecks, formatChecksJson, exitCodeFor, runAuthCheck } = await import('./doctor.js');
974
980
  const probe = args.includes('--probe');
981
+ const usage = args.includes('--usage');
975
982
  const asJson = args.includes('--json');
976
983
  const authCheck = args.includes('--auth-check');
977
984
  if (authCheck) {
@@ -1008,7 +1015,7 @@ async function doctor() {
1008
1015
  console.log('');
1009
1016
  process.exit(result.verdict === 'match' ? 0 : 1);
1010
1017
  }
1011
- const checks = await runChecks({ probe });
1018
+ const checks = await runChecks({ probe, usage });
1012
1019
  if (asJson) {
1013
1020
  // JSON mode is meant for machine consumption (claude-bridge /status,
1014
1021
  // deepdive health checks, CI scripts) — no decorative header, no
package/dist/doctor.d.ts CHANGED
@@ -50,6 +50,16 @@ export interface RunChecksOptions {
50
50
  * GET to `claude.ai` and runs in parallel with the other checks.
51
51
  */
52
52
  probe?: boolean;
53
+ /**
54
+ * Opt-in: fire a minimal `POST /v1/messages` through the user's OAuth
55
+ * (Haiku, `max_tokens=1`) to capture the current rate-limit snapshot,
56
+ * including the unified buckets AND the per-model buckets Anthropic
57
+ * started carving in late April 2026 (`7d_sonnet-utilization` etc).
58
+ * Surfaces "All models X%, Sonnet only Y%" the way the user dashboard
59
+ * does. Enable with `dario doctor --usage`; costs ~1 subscription
60
+ * request.
61
+ */
62
+ usage?: boolean;
53
63
  }
54
64
  /**
55
65
  * Run every available health check. Never throws — each check is
package/dist/doctor.js CHANGED
@@ -282,6 +282,135 @@ export async function runChecks(opts = {}) {
282
282
  });
283
283
  }
284
284
  }
285
+ // ---- Usage snapshot (opt-in, --usage).
286
+ // Fires one `POST /v1/messages` via the loaded OAuth (Haiku, max_tokens=1)
287
+ // to capture the current rate-limit snapshot including the per-model
288
+ // buckets Anthropic started carving around 2026-04-25. Surfaces the
289
+ // `All models` vs `Sonnet only` split the way the user dashboard does.
290
+ // Direct-to-Anthropic, not through the proxy — the proxy doesn't need
291
+ // to be running for `dario doctor --usage`.
292
+ if (opts.usage) {
293
+ try {
294
+ const { parseRateLimits } = await import('./pool.js');
295
+ const { billingBucketFromClaim } = await import('./analytics.js');
296
+ // Probe routing decision: Anthropic's subscription path rejects
297
+ // non-CC-shaped requests on Sonnet/Opus (returns 429 with no
298
+ // rate-limit headers). Haiku accepts the raw shape. So:
299
+ // - If a local `dario proxy` is listening, route through it —
300
+ // the proxy injects the full CC template and all three families
301
+ // succeed, giving us the _sonnet / _opus / _haiku per-model
302
+ // bucket headers on a single round trip each.
303
+ // - Else fall back to direct-to-Anthropic with Haiku only.
304
+ // Unified buckets surface but per-model buckets won't.
305
+ const dario_base = process.env.DARIO_TEST_URL || 'http://127.0.0.1:3456';
306
+ let probeEndpoint = `${dario_base}/v1/messages`;
307
+ let probeHeaders = {
308
+ 'content-type': 'application/json',
309
+ 'anthropic-version': '2023-06-01',
310
+ 'authorization': 'Bearer dario',
311
+ };
312
+ let proxyAvailable = false;
313
+ try {
314
+ const healthRes = await fetch(`${dario_base}/health`, { signal: AbortSignal.timeout(800) });
315
+ proxyAvailable = healthRes.ok;
316
+ }
317
+ catch { /* proxy not running */ }
318
+ if (!proxyAvailable) {
319
+ const { getAccessToken } = await import('./oauth.js');
320
+ const token = await getAccessToken();
321
+ probeEndpoint = 'https://api.anthropic.com/v1/messages';
322
+ probeHeaders = {
323
+ 'content-type': 'application/json',
324
+ 'anthropic-version': '2023-06-01',
325
+ 'anthropic-beta': 'oauth-2025-04-20',
326
+ 'authorization': `Bearer ${token}`,
327
+ };
328
+ checks.push({
329
+ status: 'info',
330
+ label: 'Usage probe',
331
+ detail: 'dario proxy not running — probing direct. Per-model buckets visible only when probing through a running proxy (start `dario proxy` in another terminal and re-run).',
332
+ });
333
+ }
334
+ // Probe each family in parallel. Anthropic only returns the
335
+ // per-model 7d bucket header on a request TO that family.
336
+ const families = [
337
+ { family: 'haiku', model: 'claude-haiku-4-5' },
338
+ { family: 'sonnet', model: 'claude-sonnet-4-6' },
339
+ { family: 'opus', model: 'claude-opus-4-7' },
340
+ ];
341
+ const probe = async (model) => {
342
+ const res = await fetch(probeEndpoint, {
343
+ method: 'POST',
344
+ headers: probeHeaders,
345
+ body: JSON.stringify({
346
+ model,
347
+ max_tokens: 1,
348
+ messages: [{ role: 'user', content: 'ok' }],
349
+ }),
350
+ signal: AbortSignal.timeout(15_000),
351
+ });
352
+ // Consume the body so the socket releases; we only care about headers.
353
+ await res.text().catch(() => '');
354
+ // Ignore 429/4xx snapshots without useful rate-limit headers.
355
+ if (!res.headers.get('anthropic-ratelimit-unified-status'))
356
+ return null;
357
+ return parseRateLimits(res.headers);
358
+ };
359
+ const results = await Promise.all(families.map(f => probe(f.model).catch(() => null)));
360
+ // Use the first non-null snapshot for the unified view — they
361
+ // should all agree on the unified buckets (same account, same moment).
362
+ const firstOk = results.find(s => s !== null);
363
+ if (!firstOk)
364
+ throw new Error('all probe requests failed');
365
+ const bucket = billingBucketFromClaim(firstOk.claim);
366
+ const pct = (n) => `${(n * 100).toFixed(1)}%`;
367
+ checks.push({
368
+ status: firstOk.util5h >= 0.90 ? 'warn' : 'ok',
369
+ label: 'Usage 5h (all)',
370
+ detail: `${pct(firstOk.util5h)} used • status=${firstOk.status} • claim=${firstOk.claim} (${bucket})`,
371
+ });
372
+ checks.push({
373
+ status: firstOk.util7d >= 0.90 ? 'warn' : 'ok',
374
+ label: 'Usage 7d (all)',
375
+ detail: `${pct(firstOk.util7d)} used`,
376
+ });
377
+ // Merge per-model buckets across all probes — each probe's response
378
+ // carries at most its own family bucket; union them for display.
379
+ const mergedPerModel = {};
380
+ for (const s of results) {
381
+ if (!s)
382
+ continue;
383
+ for (const [family, util] of Object.entries(s.perModel7d)) {
384
+ mergedPerModel[family] = util;
385
+ }
386
+ }
387
+ for (const [family, util] of Object.entries(mergedPerModel).sort()) {
388
+ const divergence = util - firstOk.util7d;
389
+ const marker = Math.abs(divergence) > 0.05
390
+ ? ` • Δ vs 7d(all): ${divergence >= 0 ? '+' : ''}${(divergence * 100).toFixed(1)}pp`
391
+ : '';
392
+ checks.push({
393
+ status: util >= 0.90 ? 'warn' : 'ok',
394
+ label: `Usage 7d (${family} only)`,
395
+ detail: `${pct(util)} used${marker}`,
396
+ });
397
+ }
398
+ if (firstOk.overageUtil > 0) {
399
+ checks.push({
400
+ status: firstOk.overageUtil >= 0.90 ? 'warn' : 'info',
401
+ label: 'Usage overage',
402
+ detail: `${pct(firstOk.overageUtil)} of configured monthly spend`,
403
+ });
404
+ }
405
+ }
406
+ catch (err) {
407
+ checks.push({
408
+ status: 'warn',
409
+ label: 'Usage snapshot',
410
+ detail: `probe failed: ${err.message}`,
411
+ });
412
+ }
413
+ }
285
414
  // ---- Account pool
286
415
  try {
287
416
  const { listAccountAliases, loadAllAccounts } = await import('./accounts.js');
package/dist/pool.d.ts CHANGED
@@ -19,6 +19,20 @@ export interface RateLimitSnapshot {
19
19
  status: string;
20
20
  util5h: number;
21
21
  util7d: number;
22
+ /**
23
+ * Per-model 7-day utilization buckets — Anthropic carves separate
24
+ * weekly windows for some model families. As of 2026-04-25 the live
25
+ * API emits `anthropic-ratelimit-unified-7d_sonnet-utilization` on
26
+ * Sonnet responses (corresponds to the "Sonnet only" line on the user
27
+ * dashboard); other families do not yet have dedicated buckets but
28
+ * the parser scans the header set generically so any future
29
+ * `7d_<family>` header is captured automatically.
30
+ *
31
+ * Keyed by the family suffix as it arrived on the wire (lowercase,
32
+ * e.g. `sonnet` / `opus` / `haiku`). Empty when no per-model headers
33
+ * were on the response.
34
+ */
35
+ perModel7d: Record<string, number>;
22
36
  overageUtil: number;
23
37
  claim: string;
24
38
  reset: number;
@@ -45,6 +59,30 @@ export interface PoolStatus {
45
59
  }
46
60
  /** Parse an Anthropic response's rate-limit headers into a snapshot. */
47
61
  export declare function parseRateLimits(headers: Headers): RateLimitSnapshot;
62
+ /**
63
+ * Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
64
+ * model id. Used to look up the per-model 7d bucket in
65
+ * `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
66
+ * for non-Claude models or model ids that don't carry a recognizable
67
+ * family token (those requests just use the unified buckets).
68
+ *
69
+ * Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
70
+ * `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
71
+ * so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
72
+ */
73
+ export declare function modelFamily(modelId: string | null | undefined): string | null;
74
+ /**
75
+ * Compute headroom for a single account given its rate-limit snapshot.
76
+ * Headroom is the slack between the most-saturated relevant bucket and
77
+ * full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
78
+ *
79
+ * When `family` is supplied AND the snapshot has a corresponding per-
80
+ * model 7d bucket, that bucket is included in the max. When the family
81
+ * isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
82
+ * request yet so `7d_sonnet` is unknown), headroom is computed from the
83
+ * unified buckets only — best-effort, populated on the next response.
84
+ */
85
+ export declare function computeHeadroom(snapshot: RateLimitSnapshot, family?: string | null): number;
48
86
  export declare class AccountPool {
49
87
  private accounts;
50
88
  private queue;
@@ -61,8 +99,14 @@ export declare class AccountPool {
61
99
  }): void;
62
100
  remove(alias: string): boolean;
63
101
  get size(): number;
64
- /** Select the best account for the next request. */
65
- select(): PoolAccount | null;
102
+ /**
103
+ * Select the best account for the next request. `family` (when supplied)
104
+ * is the request's model family (`opus` / `sonnet` / `haiku`); when
105
+ * present and the account has a matching per-model 7d bucket, that
106
+ * bucket joins the headroom max. Family-less calls fall back to the
107
+ * unified-buckets-only headroom — same behavior as before this PR.
108
+ */
109
+ select(family?: string | null): PoolAccount | null;
66
110
  /**
67
111
  * Select with session stickiness. If `stickyKey` is already bound to a
68
112
  * healthy account (not rejected, token not near expiry, headroom > 2%),
@@ -79,7 +123,7 @@ export declare class AccountPool {
79
123
  *
80
124
  * Also performs lazy cleanup of expired bindings (TTL or size cap).
81
125
  */
82
- selectSticky(stickyKey: string | null): PoolAccount | null;
126
+ selectSticky(stickyKey: string | null, family?: string | null): PoolAccount | null;
83
127
  /**
84
128
  * Rebind a sticky key to a different account — called by proxy after an
85
129
  * in-request 429 failover moves to the next-best account. Without this
@@ -99,7 +143,7 @@ export declare class AccountPool {
99
143
  /** Test/inspection helper — current alias bound to a key, or null. */
100
144
  stickyAliasFor(stickyKey: string): string | null;
101
145
  /** Select the next-best account, excluding the given set of aliases. */
102
- selectExcluding(excluded: Set<string>): PoolAccount | null;
146
+ selectExcluding(excluded: Set<string>, family?: string | null): PoolAccount | null;
103
147
  updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
104
148
  markRejected(alias: string, snapshot: RateLimitSnapshot): void;
105
149
  updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
package/dist/pool.js CHANGED
@@ -28,19 +28,44 @@ export const EMPTY_SNAPSHOT = {
28
28
  status: 'unknown',
29
29
  util5h: 0,
30
30
  util7d: 0,
31
+ perModel7d: {},
31
32
  overageUtil: 0,
32
33
  claim: 'unknown',
33
34
  reset: 0,
34
35
  fallbackPct: 0,
35
36
  updatedAt: 0,
36
37
  };
38
+ /**
39
+ * Match `anthropic-ratelimit-unified-7d_<family>-utilization`. Generic on
40
+ * `<family>` so a future `7d_opus` / `7d_haiku` (or anything Anthropic
41
+ * adds without notice) is captured automatically. The family is
42
+ * normalized to lowercase to match `modelFamily()` output.
43
+ */
44
+ const PER_MODEL_7D_HEADER = /^anthropic-ratelimit-unified-7d_([a-z0-9-]+)-utilization$/i;
37
45
  /** Parse an Anthropic response's rate-limit headers into a snapshot. */
38
46
  export function parseRateLimits(headers) {
39
47
  const get = (key) => headers.get(`anthropic-ratelimit-unified-${key}`) ?? '';
48
+ const perModel7d = {};
49
+ // Iterate the full header set — `headers.get` only retrieves known
50
+ // keys, but Anthropic can add new `7d_<family>-utilization` shapes
51
+ // unannounced. Scanning the iterator means the parser is automatically
52
+ // forward-compatible. Real `Headers` instances and test-side mocks
53
+ // (which implement `.entries()` but not direct iteration) both work
54
+ // through the explicit `.entries()` call.
55
+ const entries = (typeof headers.entries === 'function')
56
+ ? headers.entries()
57
+ : headers;
58
+ for (const [k, v] of entries) {
59
+ const m = k.match(PER_MODEL_7D_HEADER);
60
+ if (m && m[1]) {
61
+ perModel7d[m[1].toLowerCase()] = parseFloat(v) || 0;
62
+ }
63
+ }
40
64
  return {
41
65
  status: get('status') || 'unknown',
42
66
  util5h: parseFloat(get('5h-utilization')) || 0,
43
67
  util7d: parseFloat(get('7d-utilization')) || 0,
68
+ perModel7d,
44
69
  overageUtil: parseFloat(get('overage-utilization')) || 0,
45
70
  claim: get('representative-claim') || 'unknown',
46
71
  reset: parseInt(get('reset')) || 0,
@@ -48,6 +73,49 @@ export function parseRateLimits(headers) {
48
73
  updatedAt: Date.now(),
49
74
  };
50
75
  }
76
+ /**
77
+ * Extract the model family (`opus` / `sonnet` / `haiku`) from a request's
78
+ * model id. Used to look up the per-model 7d bucket in
79
+ * `RateLimitSnapshot.perModel7d` during routing decisions. Returns null
80
+ * for non-Claude models or model ids that don't carry a recognizable
81
+ * family token (those requests just use the unified buckets).
82
+ *
83
+ * Generous on input shape: matches `claude-opus-4-7`, `opus`, `claude-3-7-sonnet-…`,
84
+ * `claude-haiku-4-5`, anything containing the family token. Lowercase-normalized
85
+ * so it pairs cleanly with `parseRateLimits`'s lowercase family keys.
86
+ */
87
+ export function modelFamily(modelId) {
88
+ if (!modelId)
89
+ return null;
90
+ const m = modelId.toLowerCase();
91
+ if (m.includes('opus'))
92
+ return 'opus';
93
+ if (m.includes('sonnet'))
94
+ return 'sonnet';
95
+ if (m.includes('haiku'))
96
+ return 'haiku';
97
+ return null;
98
+ }
99
+ /**
100
+ * Compute headroom for a single account given its rate-limit snapshot.
101
+ * Headroom is the slack between the most-saturated relevant bucket and
102
+ * full utilization: `1 - max(util5h, util7d, util_per_model_if_known)`.
103
+ *
104
+ * When `family` is supplied AND the snapshot has a corresponding per-
105
+ * model 7d bucket, that bucket is included in the max. When the family
106
+ * isn't represented in the snapshot (e.g. account hasn't seen a Sonnet
107
+ * request yet so `7d_sonnet` is unknown), headroom is computed from the
108
+ * unified buckets only — best-effort, populated on the next response.
109
+ */
110
+ export function computeHeadroom(snapshot, family) {
111
+ const utils = [snapshot.util5h, snapshot.util7d];
112
+ if (family) {
113
+ const perModel = snapshot.perModel7d[family];
114
+ if (perModel !== undefined)
115
+ utils.push(perModel);
116
+ }
117
+ return 1 - Math.max(...utils);
118
+ }
51
119
  const STICKY_TTL_MS = 6 * 60 * 60 * 1000; // 6h
52
120
  const STICKY_MAX_ENTRIES = 2_000; // lazy cleanup cap
53
121
  /**
@@ -87,8 +155,14 @@ export class AccountPool {
87
155
  get size() {
88
156
  return this.accounts.size;
89
157
  }
90
- /** Select the best account for the next request. */
91
- select() {
158
+ /**
159
+ * Select the best account for the next request. `family` (when supplied)
160
+ * is the request's model family (`opus` / `sonnet` / `haiku`); when
161
+ * present and the account has a matching per-model 7d bucket, that
162
+ * bucket joins the headroom max. Family-less calls fall back to the
163
+ * unified-buckets-only headroom — same behavior as before this PR.
164
+ */
165
+ select(family) {
92
166
  if (this.accounts.size === 0)
93
167
  return null;
94
168
  const now = Date.now();
@@ -97,8 +171,8 @@ export class AccountPool {
97
171
  a.expiresAt > now + 30_000);
98
172
  if (eligible.length > 0) {
99
173
  return eligible.reduce((best, curr) => {
100
- const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
101
- const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
174
+ const bestHeadroom = computeHeadroom(best.rateLimit, family);
175
+ const currHeadroom = computeHeadroom(curr.rateLimit, family);
102
176
  return currHeadroom > bestHeadroom ? curr : best;
103
177
  });
104
178
  }
@@ -126,9 +200,9 @@ export class AccountPool {
126
200
  *
127
201
  * Also performs lazy cleanup of expired bindings (TTL or size cap).
128
202
  */
129
- selectSticky(stickyKey) {
203
+ selectSticky(stickyKey, family) {
130
204
  if (!stickyKey)
131
- return this.select();
205
+ return this.select(family);
132
206
  this.cleanupSticky();
133
207
  const binding = this.sticky.get(stickyKey);
134
208
  if (binding) {
@@ -137,11 +211,11 @@ export class AccountPool {
137
211
  if (bound
138
212
  && bound.rateLimit.status !== 'rejected'
139
213
  && bound.expiresAt > now + 30_000
140
- && (1 - Math.max(bound.rateLimit.util5h, bound.rateLimit.util7d)) > POOL_HEADROOM_FLOOR) {
214
+ && computeHeadroom(bound.rateLimit, family) > POOL_HEADROOM_FLOOR) {
141
215
  return bound;
142
216
  }
143
217
  }
144
- const picked = this.select();
218
+ const picked = this.select(family);
145
219
  if (picked) {
146
220
  this.sticky.set(stickyKey, { alias: picked.alias, boundAt: Date.now() });
147
221
  }
@@ -189,7 +263,7 @@ export class AccountPool {
189
263
  return this.sticky.get(stickyKey)?.alias ?? null;
190
264
  }
191
265
  /** Select the next-best account, excluding the given set of aliases. */
192
- selectExcluding(excluded) {
266
+ selectExcluding(excluded, family) {
193
267
  if (this.accounts.size <= 1)
194
268
  return null;
195
269
  const now = Date.now();
@@ -198,8 +272,8 @@ export class AccountPool {
198
272
  a.expiresAt > now + 30_000);
199
273
  if (eligible.length > 0) {
200
274
  return eligible.reduce((best, curr) => {
201
- const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
202
- const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
275
+ const bestHeadroom = computeHeadroom(best.rateLimit, family);
276
+ const currHeadroom = computeHeadroom(curr.rateLimit, family);
203
277
  return currHeadroom > bestHeadroom ? curr : best;
204
278
  });
205
279
  }
@@ -240,7 +314,10 @@ export class AccountPool {
240
314
  const now = Date.now();
241
315
  const healthy = all.filter(a => a.rateLimit.status !== 'rejected' &&
242
316
  a.expiresAt > now + 30_000);
243
- const headrooms = all.map(a => 1 - Math.max(a.rateLimit.util5h, a.rateLimit.util7d));
317
+ // Status is a pool-wide aggregate; family-agnostic. Per-model
318
+ // headroom is request-context-specific and only meaningful at
319
+ // select() time.
320
+ const headrooms = all.map(a => computeHeadroom(a.rateLimit));
244
321
  const avgHeadroom = headrooms.length > 0 ? headrooms.reduce((a, b) => a + b, 0) / headrooms.length : 0;
245
322
  const best = this.select();
246
323
  return {
@@ -260,7 +337,7 @@ export class AccountPool {
260
337
  async waitForAccount() {
261
338
  const immediate = this.select();
262
339
  if (immediate) {
263
- const headroom = 1 - Math.max(immediate.rateLimit.util5h, immediate.rateLimit.util7d);
340
+ const headroom = computeHeadroom(immediate.rateLimit);
264
341
  if (headroom > POOL_HEADROOM_FLOOR)
265
342
  return immediate;
266
343
  }
@@ -303,7 +380,7 @@ export class AccountPool {
303
380
  const account = this.select();
304
381
  if (!account)
305
382
  break;
306
- const headroom = 1 - Math.max(account.rateLimit.util5h, account.rateLimit.util7d);
383
+ const headroom = computeHeadroom(account.rateLimit);
307
384
  if (headroom <= POOL_HEADROOM_FLOOR)
308
385
  break;
309
386
  const entry = this.queue.shift();
package/dist/proxy.js CHANGED
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
8
8
  import { getAccessToken, getStatus } from './oauth.js';
9
9
  import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
10
10
  import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
11
- import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
11
+ import { AccountPool, computeStickyKey, parseRateLimits, modelFamily } from './pool.js';
12
12
  import { Analytics, billingBucketFromClaim } from './analytics.js';
13
13
  import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
14
14
  import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
@@ -460,6 +460,11 @@ export async function startProxy(opts = {}) {
460
460
  // Single-account dario keeps its existing code path unchanged.
461
461
  const accountsList = await loadAllAccounts();
462
462
  const pool = accountsList.length >= 2 ? new AccountPool() : null;
463
+ // Per-model rate-limit bucket families seen during this proxy run. First-
464
+ // sight is logged once when verbose so a new Anthropic bucket (e.g. an
465
+ // eventual `7d_opus`) doesn't slip past unnoticed. Pure observability —
466
+ // routing already handles unknown families generically.
467
+ const seenPerModelBuckets = new Set();
463
468
  const analytics = pool ? new Analytics() : null;
464
469
  let status;
465
470
  if (pool) {
@@ -964,7 +969,7 @@ export async function startProxy(opts = {}) {
964
969
  // Rotating off mid-session costs cache-create on every turn.
965
970
  stickyKey = computeStickyKey(userMsg);
966
971
  if (pool && stickyKey) {
967
- const preferred = pool.selectSticky(stickyKey);
972
+ const preferred = pool.selectSticky(stickyKey, modelFamily(requestModel));
968
973
  if (preferred && preferred.alias !== poolAccount?.alias) {
969
974
  poolAccount = preferred;
970
975
  accessToken = preferred.accessToken;
@@ -1185,6 +1190,20 @@ export async function startProxy(opts = {}) {
1185
1190
  else {
1186
1191
  pool.updateRateLimits(poolAccount.alias, snapshot);
1187
1192
  }
1193
+ // First-sight detector for per-model rate-limit buckets. Anthropic
1194
+ // ships these unannounced — e.g. `7d_sonnet-utilization` appeared
1195
+ // around 2026-04-25 — and verbose-mode users want a heads-up the
1196
+ // first time a new family shows up so they can decide whether to
1197
+ // bump dario's expectations. Pure logging; the routing path
1198
+ // already handles arbitrary family keys (see pool.computeHeadroom).
1199
+ for (const family of Object.keys(snapshot.perModel7d)) {
1200
+ if (!seenPerModelBuckets.has(family)) {
1201
+ seenPerModelBuckets.add(family);
1202
+ if (verbose) {
1203
+ console.log(`[dario] new per-model rate-limit bucket observed: 7d_${family} (util=${snapshot.perModel7d[family]?.toFixed(2)})`);
1204
+ }
1205
+ }
1206
+ }
1188
1207
  }
1189
1208
  // Auto-retry without context-1m if it triggers a long-context billing error.
1190
1209
  // Anthropic returns this as either 400 ("long context beta is not yet available
@@ -1287,7 +1306,7 @@ export async function startProxy(opts = {}) {
1287
1306
  else if (upstream.status === 429) {
1288
1307
  // Not a context-1m issue — try pool failover before surfacing to client
1289
1308
  if (pool && poolAccount) {
1290
- const nextAccount = pool.selectExcluding(triedAliases);
1309
+ const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
1291
1310
  if (nextAccount) {
1292
1311
  triedAliases.add(nextAccount.alias);
1293
1312
  poolAccount = nextAccount;
@@ -1346,7 +1365,7 @@ export async function startProxy(opts = {}) {
1346
1365
  if (upstream.status === 429) {
1347
1366
  // Try pool failover before surfacing to client
1348
1367
  if (pool && poolAccount) {
1349
- const nextAccount = pool.selectExcluding(triedAliases);
1368
+ const nextAccount = pool.selectExcluding(triedAliases, modelFamily(requestModel));
1350
1369
  if (nextAccount) {
1351
1370
  triedAliases.add(nextAccount.alias);
1352
1371
  poolAccount = nextAccount;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.31.16",
3
+ "version": "3.31.18",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {