npm - @askalf/dario - Versions diffs - 4.8.56 → 4.8.58 - Mend

@askalf/dario 4.8.56 → 4.8.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -191,7 +191,7 @@ You point every tool at one URL. dario reads each request, decides which backend
 The tool doesn't know. The backend doesn't know. dario is the seam.
-**The full Claude lineup, kept current.** Claude Fable 5 (CC's flagship), Opus 4.8, Sonnet 4.6, and Haiku 4.5 — plus `[1m]` long-context variants — by full id (`claude-fable-5`, `claude-opus-4-8`) or shortcut (`fable` / `opus` / `sonnet` / `haiku` / `fable1m`). dario adopts each new model the day it lands and applies its model-specific wire shape (effort level, beta set, thinking config) automatically; `GET /v1/models` always reflects the live set.
+**The full Claude lineup, autodetected.** Claude Fable 5 (CC's flagship), Opus 4.8, Sonnet 4.6, and Haiku 4.5 — plus `[1m]` long-context variants, generated by one rule for every family — by full id (`claude-fable-5`, `claude-opus-4-8`) or shortcut (`fable` / `opus` / `sonnet` / `haiku`, append `1m` for the long-context form). `GET /v1/models` asks Anthropic's live catalog (TTL-cached, baked fallback when offline), and the family shortcuts track it — a new model shows up and resolves the day it lands, no dario release needed; the model-specific wire shape (effort level, beta set, thinking config) is applied automatically.
 ---

package/dist/model-catalog.d.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * model-catalog.ts — upstream model autodetection with a baked fallback.
+ *
+ * Single source of truth for "which models does dario advertise". Two
+ * problems this solves (operator direction, 2026-06-10):
+ *
+ *  1. AUTODETECTION. `GET /v1/models` used to serve a hardcoded list that
+ *     went stale every time Anthropic shipped a model (fable-5 needed a
+ *     manual PR; `opus` was bumped to 4-8 in #389 while `opus1m` silently
+ *     stayed on 4-7). The catalog now asks api.anthropic.com/v1/models
+ *     what actually exists, TTL-cached, falling back to the baked list
+ *     whenever upstream is unreachable — startup, offline, auth-broken,
+ *     all serve the same baked set as before.
+ *
+ *  2. ONE METHOD FOR CONTEXT WINDOWS. The `[1m]` long-context variant was
+ *     hand-sprinkled: the listing carried `claude-fable-5[1m]` but no
+ *     opus/sonnet variants, while the alias map pinned each `<family>1m`
+ *     to a hand-picked id. Now every family goes through the same two
+ *     rules: `longContextEligible()` decides which bases take a `[1m]`
+ *     variant (everything except haiku — real CC never offers 1M haiku),
+ *     and `<family>1m` is DERIVED as `resolve(<family>) + '[1m]'`, so the
+ *     pair can never drift apart again.
+ *
+ * The wire mechanics are unchanged and already uniform: `[1m]` is a
+ * client-side label — proxy.ts strips it and rides `context-1m-2025-08-07`
+ * on the request (see stripContext1mTag / betaForModel).
+ */
+/**
+ * Baked fallback — the catalog served when upstream has never answered.
+ * Base ids only ([1m] variants are generated, never stored). Order is the
+ * advertised order: family rank (fable, opus, sonnet, haiku), version desc
+ * — the same ordering normalizeUpstreamIds() produces for live data.
+ */
+export declare const BAKED_BASE_MODELS: readonly string[];
+/**
+ * THE long-context rule — applied identically to every family. A base id
+ * takes a `[1m]` variant unless it's the haiku family (CC's picker never
+ * offers 1M haiku; it's also the family CC strips the effort and
+ * mid-conversation betas from). Already-tagged and non-Claude ids are
+ * never eligible.
+ */
+export declare function longContextEligible(id: string): boolean;
+/**
+ * Expand base ids into the advertised list: each eligible base is followed
+ * by its `[1m]` variant (matching the historical fable-5 / fable-5[1m]
+ * adjacency), ineligible bases pass through alone.
+ */
+export declare function withLongContextVariants(bases: readonly string[]): string[];
+/** Numeric segments of a model id (`claude-opus-4-8` → [4, 8]) for version ordering. */
+export declare function modelVersionKey(id: string): number[];
+/**
+ * Normalize a raw upstream id listing into dario's advertised base set:
+ *  - keep `claude-*` ids only (no [1m] tags — those are ours to generate)
+ *  - drop legacy generations of known families (< 4; fable exempt)
+ *  - prefer the CC-style short id when upstream lists both `claude-opus-4-8`
+ *    and a dated `claude-opus-4-8-YYYYMMDD`; keep the dated id when it's the
+ *    only form
+ *  - deterministic order: family rank, then version desc, unknown families last
+ */
+export declare function normalizeUpstreamIds(ids: readonly string[]): string[];
+/** Newest base id of a family within a base set, or null if absent. */
+export declare function resolveFamilyBase(family: string, bases: readonly string[]): string | null;
+/**
+ * Resolve a family shorthand against a base set. `<family>` → the newest
+ * base of that family; `<family>1m` → the SAME base + `[1m]` (one
+ * derivation rule for every family — `opus` and `opus1m` can't disagree).
+ * Returns null when the name isn't a family shorthand or the family is
+ * absent/ineligible — callers fall back to their static map.
+ */
+export declare function resolveAliasAgainst(model: string, bases: readonly string[]): string | null;
+/** OpenAI-shape /v1/models payload for a list of advertised ids. */
+export declare function buildOpenAIModelsList(ids: readonly string[]): {
+    object: string;
+    data: Array<{
+        id: string;
+        object: string;
+        created: number;
+        owned_by: string;
+    }>;
+};
+export interface ModelCatalog {
+    bases: readonly string[];
+    source: 'upstream' | 'baked';
+    fetchedAt: number;
+}
+export interface CatalogDeps {
+    fetchImpl?: typeof fetch;
+    /** OAuth bearer source (single-account getAccessToken). Ignored when upstreamApiKey is set. */
+    getToken?: () => Promise<string>;
+    /** Per-token API pool mode — forwarded as x-api-key, mirroring request-path auth. */
+    upstreamApiKey?: string;
+    now?: () => number;
+    log?: (msg: string) => void;
+    ttlMs?: number;
+    retryMs?: number;
+    timeoutMs?: number;
+}
+export declare const DEFAULT_CATALOG_TTL_MS = 3600000;
+export declare const DEFAULT_CATALOG_RETRY_MS = 300000;
+/**
+ * The catalog, stale-while-revalidate. Warm cache returns immediately
+ * (kicking an async refresh when past TTL); a cold start tries upstream
+ * once (bounded by timeoutMs) and falls back to the baked list. Never
+ * throws — /v1/models must always answer.
+ */
+export declare function getModelCatalog(deps?: CatalogDeps): Promise<ModelCatalog>;
+/**
+ * Synchronous view for request-path alias resolution — whatever the last
+ * catalog produced, or the baked set before the first fetch completes.
+ * Never blocks the hot path on the network.
+ */
+export declare function getCachedBases(): readonly string[];
+/** Fire-and-forget warmup so the first client /v1/models call is served warm. */
+export declare function prewarmModelCatalog(deps?: CatalogDeps): void;
+export declare function _resetModelCatalogForTest(): void;

package/dist/model-catalog.js ADDED Viewed

@@ -0,0 +1,264 @@
+/**
+ * model-catalog.ts — upstream model autodetection with a baked fallback.
+ *
+ * Single source of truth for "which models does dario advertise". Two
+ * problems this solves (operator direction, 2026-06-10):
+ *
+ *  1. AUTODETECTION. `GET /v1/models` used to serve a hardcoded list that
+ *     went stale every time Anthropic shipped a model (fable-5 needed a
+ *     manual PR; `opus` was bumped to 4-8 in #389 while `opus1m` silently
+ *     stayed on 4-7). The catalog now asks api.anthropic.com/v1/models
+ *     what actually exists, TTL-cached, falling back to the baked list
+ *     whenever upstream is unreachable — startup, offline, auth-broken,
+ *     all serve the same baked set as before.
+ *
+ *  2. ONE METHOD FOR CONTEXT WINDOWS. The `[1m]` long-context variant was
+ *     hand-sprinkled: the listing carried `claude-fable-5[1m]` but no
+ *     opus/sonnet variants, while the alias map pinned each `<family>1m`
+ *     to a hand-picked id. Now every family goes through the same two
+ *     rules: `longContextEligible()` decides which bases take a `[1m]`
+ *     variant (everything except haiku — real CC never offers 1M haiku),
+ *     and `<family>1m` is DERIVED as `resolve(<family>) + '[1m]'`, so the
+ *     pair can never drift apart again.
+ *
+ * The wire mechanics are unchanged and already uniform: `[1m]` is a
+ * client-side label — proxy.ts strips it and rides `context-1m-2025-08-07`
+ * on the request (see stripContext1mTag / betaForModel).
+ */
+import { modelFamily } from './pool.js';
+const ANTHROPIC_API = 'https://api.anthropic.com';
+const ANTHROPIC_VERSION = '2023-06-01';
+const OAUTH_BETA = 'oauth-2025-04-20';
+/**
+ * Baked fallback — the catalog served when upstream has never answered.
+ * Base ids only ([1m] variants are generated, never stored). Order is the
+ * advertised order: family rank (fable, opus, sonnet, haiku), version desc
+ * — the same ordering normalizeUpstreamIds() produces for live data.
+ */
+export const BAKED_BASE_MODELS = [
+    'claude-fable-5',
+    'claude-opus-4-8',
+    'claude-opus-4-7',
+    'claude-opus-4-6',
+    'claude-sonnet-4-6',
+    'claude-haiku-4-5',
+];
+/**
+ * THE long-context rule — applied identically to every family. A base id
+ * takes a `[1m]` variant unless it's the haiku family (CC's picker never
+ * offers 1M haiku; it's also the family CC strips the effort and
+ * mid-conversation betas from). Already-tagged and non-Claude ids are
+ * never eligible.
+ */
+export function longContextEligible(id) {
+    const m = id.toLowerCase();
+    return m.startsWith('claude-') && !m.includes('haiku') && !m.endsWith('[1m]');
+}
+/**
+ * Expand base ids into the advertised list: each eligible base is followed
+ * by its `[1m]` variant (matching the historical fable-5 / fable-5[1m]
+ * adjacency), ineligible bases pass through alone.
+ */
+export function withLongContextVariants(bases) {
+    return bases.flatMap((b) => (longContextEligible(b) ? [b, `${b}[1m]`] : [b]));
+}
+/** Numeric segments of a model id (`claude-opus-4-8` → [4, 8]) for version ordering. */
+export function modelVersionKey(id) {
+    const nums = id.match(/\d+/g);
+    return nums ? nums.map(Number) : [];
+}
+/** Descending version compare on modelVersionKey output. */
+function cmpVersionDesc(a, b) {
+    const n = Math.max(a.length, b.length);
+    for (let i = 0; i < n; i++) {
+        const d = (b[i] ?? -1) - (a[i] ?? -1);
+        if (d !== 0)
+            return d;
+    }
+    return 0;
+}
+// Advertised order: CC lists the flagship first, then the big families.
+// Unknown future families rank last (still advertised — a brand-new family
+// shows up on the next catalog refresh without a dario release).
+const FAMILY_RANK = { fable: 0, opus: 1, sonnet: 2, haiku: 3 };
+// Known families older than this generation are dropped from the advertised
+// list (claude-3-x etc. — not what a CC-shaped proxy should offer). fable is
+// exempt: its versioning is its own line (fable-5).
+const MIN_GENERATION = 4;
+/**
+ * Normalize a raw upstream id listing into dario's advertised base set:
+ *  - keep `claude-*` ids only (no [1m] tags — those are ours to generate)
+ *  - drop legacy generations of known families (< 4; fable exempt)
+ *  - prefer the CC-style short id when upstream lists both `claude-opus-4-8`
+ *    and a dated `claude-opus-4-8-YYYYMMDD`; keep the dated id when it's the
+ *    only form
+ *  - deterministic order: family rank, then version desc, unknown families last
+ */
+export function normalizeUpstreamIds(ids) {
+    let list = ids.filter((id) => typeof id === 'string' && /^claude-/i.test(id) && !id.includes('['));
+    list = list.filter((id) => {
+        const fam = modelFamily(id);
+        if (fam === null || fam === 'fable')
+            return true;
+        return (modelVersionKey(id)[0] ?? 0) >= MIN_GENERATION;
+    });
+    const byKey = new Map();
+    for (const id of list) {
+        const key = id.replace(/-\d{8}$/, '').toLowerCase();
+        const existing = byKey.get(key);
+        if (existing === undefined) {
+            byKey.set(key, id);
+        }
+        else if (id.toLowerCase() === key && existing.toLowerCase() !== key) {
+            byKey.set(key, id); // short form wins over dated duplicate
+        }
+    }
+    return [...byKey.values()].sort((a, b) => {
+        const ra = FAMILY_RANK[modelFamily(a) ?? ''] ?? 99;
+        const rb = FAMILY_RANK[modelFamily(b) ?? ''] ?? 99;
+        if (ra !== rb)
+            return ra - rb;
+        return cmpVersionDesc(modelVersionKey(a), modelVersionKey(b));
+    });
+}
+/** Newest base id of a family within a base set, or null if absent. */
+export function resolveFamilyBase(family, bases) {
+    const candidates = bases.filter((b) => modelFamily(b) === family && !b.includes('['));
+    if (candidates.length === 0)
+        return null;
+    return [...candidates].sort((a, b) => cmpVersionDesc(modelVersionKey(a), modelVersionKey(b)))[0];
+}
+const FAMILY_ALIASES = new Set(['fable', 'opus', 'sonnet', 'haiku']);
+/**
+ * Resolve a family shorthand against a base set. `<family>` → the newest
+ * base of that family; `<family>1m` → the SAME base + `[1m]` (one
+ * derivation rule for every family — `opus` and `opus1m` can't disagree).
+ * Returns null when the name isn't a family shorthand or the family is
+ * absent/ineligible — callers fall back to their static map.
+ */
+export function resolveAliasAgainst(model, bases) {
+    const m = model.toLowerCase().trim();
+    if (FAMILY_ALIASES.has(m))
+        return resolveFamilyBase(m, bases);
+    const match = m.match(/^([a-z]+)1m$/);
+    if (match !== null && FAMILY_ALIASES.has(match[1])) {
+        const base = resolveFamilyBase(match[1], bases);
+        return base !== null && longContextEligible(base) ? `${base}[1m]` : null;
+    }
+    return null;
+}
+/** OpenAI-shape /v1/models payload for a list of advertised ids. */
+export function buildOpenAIModelsList(ids) {
+    return {
+        object: 'list',
+        data: ids.map((id) => ({ id, object: 'model', created: 1700000000, owned_by: 'anthropic' })),
+    };
+}
+export const DEFAULT_CATALOG_TTL_MS = 3_600_000; // 1h — model launches are rare
+export const DEFAULT_CATALOG_RETRY_MS = 300_000; // failed-fetch backoff: 5min
+const DEFAULT_FETCH_TIMEOUT_MS = 4_000;
+let cache = null;
+let lastAttempt = 0;
+let inflight = null;
+function envInt(name, dflt) {
+    const v = Number(process.env[name]);
+    return Number.isFinite(v) && v > 0 ? v : dflt;
+}
+async function fetchUpstreamBases(deps) {
+    const f = deps.fetchImpl ?? fetch;
+    const headers = {
+        accept: 'application/json',
+        'anthropic-version': ANTHROPIC_VERSION,
+    };
+    if (deps.upstreamApiKey) {
+        headers['x-api-key'] = deps.upstreamApiKey;
+    }
+    else {
+        if (!deps.getToken)
+            throw new Error('no token source for catalog fetch');
+        headers['authorization'] = `Bearer ${await deps.getToken()}`;
+        headers['anthropic-beta'] = OAUTH_BETA;
+    }
+    const ctl = new AbortController();
+    const timer = setTimeout(() => ctl.abort(), deps.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS);
+    try {
+        const res = await f(`${ANTHROPIC_API}/v1/models?limit=100`, { headers, signal: ctl.signal });
+        if (!res.ok)
+            throw new Error(`upstream /v1/models ${res.status}`);
+        const json = (await res.json());
+        const ids = (json.data ?? [])
+            .map((d) => d?.id)
+            .filter((x) => typeof x === 'string');
+        const bases = normalizeUpstreamIds(ids);
+        if (bases.length === 0)
+            throw new Error('upstream /v1/models returned no usable claude ids');
+        return bases;
+    }
+    finally {
+        clearTimeout(timer);
+    }
+}
+async function refresh(deps) {
+    const now = deps.now ?? Date.now;
+    lastAttempt = now();
+    const bases = await fetchUpstreamBases(deps);
+    cache = { bases, source: 'upstream', fetchedAt: now() };
+    deps.log?.(`[dario] model catalog: autodetected ${bases.length} base models upstream`);
+}
+function maybeRefreshInBackground(deps) {
+    const now = (deps.now ?? Date.now)();
+    const ttl = deps.ttlMs ?? envInt('DARIO_MODEL_CATALOG_TTL_MS', DEFAULT_CATALOG_TTL_MS);
+    const retry = deps.retryMs ?? DEFAULT_CATALOG_RETRY_MS;
+    const fresh = cache !== null && cache.source === 'upstream' && now - cache.fetchedAt < ttl;
+    if (fresh || inflight !== null || now - lastAttempt < retry)
+        return;
+    inflight = refresh(deps)
+        .catch((err) => {
+        deps.log?.(`[dario] model catalog refresh failed: ${err.message} — keeping ${cache?.source ?? 'baked'} list`);
+    })
+        .finally(() => {
+        inflight = null;
+    });
+}
+/**
+ * The catalog, stale-while-revalidate. Warm cache returns immediately
+ * (kicking an async refresh when past TTL); a cold start tries upstream
+ * once (bounded by timeoutMs) and falls back to the baked list. Never
+ * throws — /v1/models must always answer.
+ */
+export async function getModelCatalog(deps = {}) {
+    if (cache !== null) {
+        maybeRefreshInBackground(deps);
+        return cache;
+    }
+    const now = (deps.now ?? Date.now)();
+    const retry = deps.retryMs ?? DEFAULT_CATALOG_RETRY_MS;
+    if (now - lastAttempt >= retry) {
+        try {
+            await refresh(deps);
+        }
+        catch (err) {
+            deps.log?.(`[dario] model catalog fetch failed: ${err.message} — serving baked list`);
+        }
+    }
+    if (cache === null)
+        cache = { bases: [...BAKED_BASE_MODELS], source: 'baked', fetchedAt: 0 };
+    return cache;
+}
+/**
+ * Synchronous view for request-path alias resolution — whatever the last
+ * catalog produced, or the baked set before the first fetch completes.
+ * Never blocks the hot path on the network.
+ */
+export function getCachedBases() {
+    return cache?.bases ?? BAKED_BASE_MODELS;
+}
+/** Fire-and-forget warmup so the first client /v1/models call is served warm. */
+export function prewarmModelCatalog(deps = {}) {
+    void getModelCatalog(deps);
+}
+export function _resetModelCatalogForTest() {
+    cache = null;
+    lastAttempt = 0;
+    inflight = null;
+}

package/dist/proxy.d.ts CHANGED Viewed

@@ -2,11 +2,19 @@ import { type IncomingMessage } from 'node:http';
 import { type WriteStream } from 'node:fs';
 import { type EffortValue } from './cc-template.js';
 /**
- * Resolve a Claude-side model name through MODEL_ALIASES if it's a short
- * alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through unchanged.
+ * Resolve a Claude-side model name through the family-alias rules if it's a
+ * short alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
+ * unchanged.
+ *
+ * Family shorthands resolve against the live model catalog: `<family>` is
+ * the newest base of that family, and `<family>1m` DERIVES from that same
+ * base + `[1m]` — one rule for every family, so the pair can't drift apart
+ * (pre-catalog, #389 bumped `opus` to 4-8 while `opus1m` silently stayed on
+ * 4-7). Before the first catalog fetch the baked set produces the same
+ * answers as the static map; the map stays as the last-resort fallback.
  *
  * Used at request time on the provider-prefix path so `claude:opus` arrives
- * upstream as `claude-opus-4-6` rather than the bare `opus` (which Anthropic
+ * upstream as a full model id rather than the bare `opus` (which Anthropic
  * 400's). Critical for Cursor BYOK setups (dario#190) where users have to
  * pick a colon-prefixed model name to dodge Cursor's built-in `claude-*`
  * name collision — which means the natural shorthand is `claude:opus`, and
@@ -71,6 +79,32 @@ export declare function betaForModel(base: string, model: string | null | undefi
  * very end of the id. Exported for tests.
  */
 export declare function stripContext1mTag(model: string): string;
+/**
+ * Parse upstream's effort-capability rejection:
+ *
+ *   400 {"type":"invalid_request_error","message":"This model does not
+ *        support effort level 'max'. Supported levels: high, low, medium."}
+ *
+ * Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
+ * catalog exposes models that predate the newer effort tiers, and a pinned
+ * DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
+ * level plus the model's supported set, or null when the body is some other
+ * 400. NOTE: fable's effort intolerance is different in kind — a SOFT
+ * refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
+ * stays handled by its measured clamp in resolveEffort.
+ * Exported for tests.
+ */
+export declare function parseEffortRejection(body: string): {
+    rejected: string;
+    supported: string[];
+} | null;
+/**
+ * Pick the strongest effort level a model says it supports. Preference is
+ * descending capability — the caller asked for more than the model can do,
+ * so degrade as little as possible. Exported for tests.
+ */
+export declare const EFFORT_PREFERENCE: readonly string[];
+export declare function bestSupportedEffort(supported: readonly string[]): string;
 /**
  * Resolve an inbound API path to its upstream target + forwarding mode.
  * Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
@@ -113,12 +147,12 @@ export declare function buildOrchestrationPatterns(preserveTags?: Set<string>):
 export declare function sanitizeMessages(body: Record<string, unknown>, preserveTags?: Set<string>): void;
 export declare const OPENAI_MODELS_LIST: {
     object: string;
-    data: {
+    data: Array<{
         id: string;
         object: string;
         created: number;
         owned_by: string;
-    }[];
+    }>;
 };
 interface ProxyOptions {
     port?: number;

package/dist/proxy.js CHANGED Viewed

@@ -17,6 +17,7 @@ import { loadAllAccounts, loadAccount, refreshAccountToken, resyncLoginFromCrede
 import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
 import { RequestQueue, QueueFullError, QueueTimeoutError, DEFAULT_MAX_CONCURRENT, DEFAULT_MAX_QUEUED, DEFAULT_QUEUE_TIMEOUT_MS } from './request-queue.js';
 import { redactSecrets } from './redact.js';
+import { BAKED_BASE_MODELS, withLongContextVariants, buildOpenAIModelsList, getModelCatalog, getCachedBases, resolveAliasAgainst, prewarmModelCatalog } from './model-catalog.js';
 const ANTHROPIC_API = 'https://api.anthropic.com';
 const DEFAULT_PORT = 3456;
 const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MB — generous for large prompts, prevents abuse
@@ -133,31 +134,42 @@ function loadClaudeIdentity() {
     }
     return { deviceId: '', accountUuid: '' };
 }
-// Model shortcuts — users can pass short names
+// Model shortcuts — users can pass short names. Family shorthands
+// (`opus`, `opus1m`, …) resolve DYNAMICALLY against the model catalog in
+// resolveClaudeAlias — this static map is the offline fallback plus the
+// deliberate legacy version pins (`opus47`/`opus46`), which never float.
 const MODEL_ALIASES = {
     'fable': 'claude-fable-5',
     'fable1m': 'claude-fable-5[1m]',
     'opus': 'claude-opus-4-8',
     'opus47': 'claude-opus-4-7',
     'opus46': 'claude-opus-4-6',
-    'opus1m': 'claude-opus-4-7[1m]',
+    'opus1m': 'claude-opus-4-8[1m]',
     'sonnet': 'claude-sonnet-4-6',
     'sonnet1m': 'claude-sonnet-4-6[1m]',
     'haiku': 'claude-haiku-4-5',
 };
 /**
- * Resolve a Claude-side model name through MODEL_ALIASES if it's a short
- * alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through unchanged.
+ * Resolve a Claude-side model name through the family-alias rules if it's a
+ * short alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
+ * unchanged.
+ *
+ * Family shorthands resolve against the live model catalog: `<family>` is
+ * the newest base of that family, and `<family>1m` DERIVES from that same
+ * base + `[1m]` — one rule for every family, so the pair can't drift apart
+ * (pre-catalog, #389 bumped `opus` to 4-8 while `opus1m` silently stayed on
+ * 4-7). Before the first catalog fetch the baked set produces the same
+ * answers as the static map; the map stays as the last-resort fallback.
  *
  * Used at request time on the provider-prefix path so `claude:opus` arrives
- * upstream as `claude-opus-4-6` rather than the bare `opus` (which Anthropic
+ * upstream as a full model id rather than the bare `opus` (which Anthropic
  * 400's). Critical for Cursor BYOK setups (dario#190) where users have to
  * pick a colon-prefixed model name to dodge Cursor's built-in `claude-*`
  * name collision — which means the natural shorthand is `claude:opus`, and
  * that needs to Just Work.
  */
 export function resolveClaudeAlias(model) {
-    return MODEL_ALIASES[model] ?? model;
+    return resolveAliasAgainst(model, getCachedBases()) ?? MODEL_ALIASES[model] ?? model;
 }
 // Provider prefix in the `model` field — `<provider>:<model>`. Forces
 // routing regardless of model-name regex. Only recognized prefixes are
@@ -279,6 +291,40 @@ export function stripContext1mTag(model) {
         return model;
     return model.replace(/\[1m\]$/i, '');
 }
+/**
+ * Parse upstream's effort-capability rejection:
+ *
+ *   400 {"type":"invalid_request_error","message":"This model does not
+ *        support effort level 'max'. Supported levels: high, low, medium."}
+ *
+ * Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
+ * catalog exposes models that predate the newer effort tiers, and a pinned
+ * DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
+ * level plus the model's supported set, or null when the body is some other
+ * 400. NOTE: fable's effort intolerance is different in kind — a SOFT
+ * refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
+ * stays handled by its measured clamp in resolveEffort.
+ * Exported for tests.
+ */
+export function parseEffortRejection(body) {
+    const m = body.match(/does not support effort level '([^']+)'\.?\s*Supported levels:\s*([a-z,\s]+)/i);
+    if (!m)
+        return null;
+    const supported = m[2].split(',').map((s) => s.trim().toLowerCase()).filter((s) => s.length > 0);
+    return supported.length > 0 ? { rejected: m[1], supported } : null;
+}
+/**
+ * Pick the strongest effort level a model says it supports. Preference is
+ * descending capability — the caller asked for more than the model can do,
+ * so degrade as little as possible. Exported for tests.
+ */
+export const EFFORT_PREFERENCE = ['xhigh', 'max', 'high', 'medium', 'low'];
+export function bestSupportedEffort(supported) {
+    for (const e of EFFORT_PREFERENCE)
+        if (supported.includes(e))
+            return e;
+    return supported[0] ?? 'high';
+}
 /**
  * Resolve an inbound API path to its upstream target + forwarding mode.
  * Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
@@ -477,7 +523,11 @@ function translateStreamChunk(line) {
     catch { }
     return null;
 }
-export const OPENAI_MODELS_LIST = { object: 'list', data: ['claude-fable-5', 'claude-fable-5[1m]', 'claude-opus-4-8', 'claude-opus-4-7', 'claude-opus-4-6', 'claude-sonnet-4-6', 'claude-haiku-4-5'].map(id => ({ id, object: 'model', created: 1700000000, owned_by: 'anthropic' })) };
+// Baked /v1/models payload — what the proxy advertises before (or without)
+// a successful upstream catalog fetch. The live route serves the
+// autodetected catalog (model-catalog.ts); `[1m]` variants are GENERATED by
+// the one shared long-context rule, never hand-listed per model.
+export const OPENAI_MODELS_LIST = buildOpenAIModelsList(withLongContextVariants(BAKED_BASE_MODELS));
 /**
  * Append a JSON-ND line to the proxy log file. No-op when stream is
  * null (logFile not configured). Errors are swallowed — log writes
@@ -832,7 +882,7 @@ export async function startProxy(opts = {}) {
     const modelPrefix = opts.model ? parseProviderPrefix(opts.model) : null;
     const cliModelRaw = modelPrefix ? modelPrefix.model : opts.model;
     const cliProviderOverride = modelPrefix ? modelPrefix.provider : null;
-    const modelOverride = cliModelRaw ? (MODEL_ALIASES[cliModelRaw] ?? cliModelRaw) : null;
+    const modelOverride = cliModelRaw ? resolveClaudeAlias(cliModelRaw) : null;
     const identity = loadClaudeIdentity();
     if (identity.deviceId) {
         console.log('  Device identity: detected');
@@ -903,6 +953,12 @@ export async function startProxy(opts = {}) {
     // re-pay the 400 round-trip. Keyed by account alias (pool) or `__default__`.
     const unavailableBetas = new Map();
     const ACCOUNT_KEY_SINGLE = '__default__';
+    // Per-model effort capability cache — same pay-the-round-trip-once pattern
+    // as context1mUnavailable, but keyed by WIRE MODEL id: effort support is a
+    // model property, not an account property. Populated from upstream's
+    // "does not support effort level" 400 (see parseEffortRejection); consulted
+    // up front at body-build time so capped models never re-pay the rejection.
+    const effortSupportByModel = new Map();
     // Beta flag set — sourced from the live template when the capture recorded
     // one (schema v2+), else falls back to the v2.1.104 bundled default. Same
     // fallback string shim/runtime.cjs uses (kept in sync so proxy and shim
@@ -1020,7 +1076,17 @@ export async function startProxy(opts = {}) {
         ...SECURITY_HEADERS,
     };
     const JSON_HEADERS = { 'Content-Type': 'application/json', ...SECURITY_HEADERS };
-    const MODELS_JSON = JSON.stringify(OPENAI_MODELS_LIST);
+    // Model catalog wiring — /v1/models serves the upstream-autodetected set,
+    // authenticated the same way the request path is (per-token API key when
+    // ANTHROPIC_UPSTREAM_API_KEY is set, OAuth bearer otherwise). Prewarmed so
+    // the first client call is answered from cache; every failure path inside
+    // getModelCatalog falls back to the baked list, so the route always 200s.
+    const catalogDeps = {
+        upstreamApiKey: upstreamApiKey || undefined,
+        getToken: getAccessToken,
+        log: verbose ? (m) => console.log(m) : undefined,
+    };
+    prewarmModelCatalog(catalogDeps);
     const ERR_UNAUTH = JSON.stringify({ error: 'Unauthorized', message: 'Invalid or missing API key' });
     const ERR_FORBIDDEN = JSON.stringify({ error: 'Forbidden', message: 'Path not allowed. Supported paths: POST /v1/messages, POST /v1/messages/count_tokens, POST /v1/chat/completions, GET /v1/models' });
     const ERR_METHOD = JSON.stringify({ error: 'Method not allowed' });
@@ -1246,8 +1312,13 @@ export async function startProxy(opts = {}) {
         }
         if (urlPath === '/v1/models' && req.method === 'GET') {
             requestCount++;
+            // Upstream-autodetected catalog (TTL-cached, baked fallback — never
+            // throws). [1m] variants come from the shared long-context rule, so
+            // every family advertises its 1M form the same way.
+            const catalog = await getModelCatalog(catalogDeps);
+            const body = JSON.stringify(buildOpenAIModelsList(withLongContextVariants(catalog.bases)));
             res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
-            res.end(MODELS_JSON);
+            res.end(body);
             return;
         }
         // Detect OpenAI-format requests
@@ -1651,6 +1722,18 @@ export async function startProxy(opts = {}) {
                         // does on /v1/messages.
                         r.model = stripContext1mTag(r.model);
                     }
+                    // Effort capability clamp — when a prior request taught us this
+                    // model's supported effort set (autodetected catalogs expose
+                    // models that predate newer tiers), rewrite output_config.effort
+                    // up front instead of re-paying the 400 round-trip. In-place value
+                    // mutation: field order (a fingerprint surface) is untouched.
+                    if (typeof r.model === 'string') {
+                        const supportedEfforts = effortSupportByModel.get(r.model);
+                        const oc = r.output_config;
+                        if (supportedEfforts && oc && typeof oc.effort === 'string' && !supportedEfforts.includes(oc.effort)) {
+                            oc.effort = bestSupportedEffort(supportedEfforts);
+                        }
+                    }
                     finalBody = Buffer.from(JSON.stringify(r));
                 }
                 catch { /* not JSON, send as-is */ }
@@ -1928,6 +2011,67 @@ export async function startProxy(opts = {}) {
                             }
                         }
                     }
+                    else if (upstream.status === 400 && parseEffortRejection(peekedBody) && finalBody) {
+                        // Effort-capability rejection — the model predates the requested
+                        // effort tier (e.g. opus-4-5 + a DARIO_EFFORT=max pin; surfaced by
+                        // the autodetected catalog). Clamp output_config.effort to the
+                        // strongest level the error says the model supports, retry once,
+                        // and cache the supported set per model so the up-front clamp
+                        // handles every later request without the round-trip.
+                        const rejection = parseEffortRejection(peekedBody);
+                        const clamped = bestSupportedEffort(rejection.supported);
+                        let retried = false;
+                        try {
+                            const rb = JSON.parse(finalBody.toString('utf8'));
+                            const wireModel = typeof rb.model === 'string' ? rb.model : '';
+                            const oc = rb.output_config;
+                            if (wireModel && oc && typeof oc.effort === 'string') {
+                                const firstRejection = !effortSupportByModel.has(wireModel);
+                                effortSupportByModel.set(wireModel, rejection.supported);
+                                if (verbose && firstRejection)
+                                    console.log(`[dario] #${requestCount} effort '${rejection.rejected}' rejected by ${wireModel} — retrying with '${clamped}' (supported set cached per model)`);
+                                oc.effort = clamped; // in-place value mutation — field order untouched
+                                finalBody = Buffer.from(JSON.stringify(rb));
+                                const retry = await fetch(targetBase, {
+                                    method: req.method ?? 'POST',
+                                    headers: passthrough ? headers : orderHeadersForOutbound(headers),
+                                    body: new Uint8Array(finalBody),
+                                    signal: upstreamAbort.signal,
+                                });
+                                upstream = retry;
+                                peekedBody = null;
+                                retried = true;
+                                if (pool && poolAccount) {
+                                    const retrySnapshot = parseRateLimits(upstream.headers);
+                                    if (upstream.status === 429) {
+                                        pool.markRejected(poolAccount.alias, retrySnapshot);
+                                    }
+                                    else {
+                                        pool.updateRateLimits(poolAccount.alias, retrySnapshot);
+                                    }
+                                }
+                            }
+                        }
+                        catch { /* body not JSON — forward the original 400 below */ }
+                        if (!retried) {
+                            // Couldn't rebuild the body (no output_config.effort / not JSON)
+                            // — the upstream body is already consumed, so forward it here;
+                            // the chain's terminal 400 branch won't run for us.
+                            const responseHeaders = {
+                                'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
+                                'Access-Control-Allow-Origin': corsOrigin,
+                                ...SECURITY_HEADERS,
+                            };
+                            for (const [key, value] of upstream.headers.entries()) {
+                                if (key === 'request-id')
+                                    responseHeaders[key] = value;
+                            }
+                            requestCount++;
+                            res.writeHead(400, responseHeaders);
+                            res.end(peekedBody);
+                            return;
+                        }
+                    }
                     else if (isLongContextError) {
                         // Cache the rejection so future requests on this account skip
                         // context-1m up front instead of re-paying the 400/429 round-trip.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@askalf/dario",
-  "version": "4.8.56",
+  "version": "4.8.58",
   "description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
   "type": "module",
   "bin": {