@askalf/dario 4.8.56 → 4.8.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/model-catalog.d.ts +115 -0
- package/dist/model-catalog.js +264 -0
- package/dist/proxy.d.ts +13 -5
- package/dist/proxy.js +41 -10
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -191,7 +191,7 @@ You point every tool at one URL. dario reads each request, decides which backend
|
|
|
191
191
|
|
|
192
192
|
The tool doesn't know. The backend doesn't know. dario is the seam.
|
|
193
193
|
|
|
194
|
-
**The full Claude lineup,
|
|
194
|
+
**The full Claude lineup, autodetected.** Claude Fable 5 (CC's flagship), Opus 4.8, Sonnet 4.6, and Haiku 4.5 — plus `[1m]` long-context variants, generated by one rule for every family — by full id (`claude-fable-5`, `claude-opus-4-8`) or shortcut (`fable` / `opus` / `sonnet` / `haiku`, append `1m` for the long-context form). `GET /v1/models` asks Anthropic's live catalog (TTL-cached, baked fallback when offline), and the family shortcuts track it — a new model shows up and resolves the day it lands, no dario release needed; the model-specific wire shape (effort level, beta set, thinking config) is applied automatically.
|
|
195
195
|
|
|
196
196
|
---
|
|
197
197
|
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* model-catalog.ts — upstream model autodetection with a baked fallback.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "which models does dario advertise". Two
|
|
5
|
+
* problems this solves (operator direction, 2026-06-10):
|
|
6
|
+
*
|
|
7
|
+
* 1. AUTODETECTION. `GET /v1/models` used to serve a hardcoded list that
|
|
8
|
+
* went stale every time Anthropic shipped a model (fable-5 needed a
|
|
9
|
+
* manual PR; `opus` was bumped to 4-8 in #389 while `opus1m` silently
|
|
10
|
+
* stayed on 4-7). The catalog now asks api.anthropic.com/v1/models
|
|
11
|
+
* what actually exists, TTL-cached, falling back to the baked list
|
|
12
|
+
* whenever upstream is unreachable — startup, offline, auth-broken,
|
|
13
|
+
* all serve the same baked set as before.
|
|
14
|
+
*
|
|
15
|
+
* 2. ONE METHOD FOR CONTEXT WINDOWS. The `[1m]` long-context variant was
|
|
16
|
+
* hand-sprinkled: the listing carried `claude-fable-5[1m]` but no
|
|
17
|
+
* opus/sonnet variants, while the alias map pinned each `<family>1m`
|
|
18
|
+
* to a hand-picked id. Now every family goes through the same two
|
|
19
|
+
* rules: `longContextEligible()` decides which bases take a `[1m]`
|
|
20
|
+
* variant (everything except haiku — real CC never offers 1M haiku),
|
|
21
|
+
* and `<family>1m` is DERIVED as `resolve(<family>) + '[1m]'`, so the
|
|
22
|
+
* pair can never drift apart again.
|
|
23
|
+
*
|
|
24
|
+
* The wire mechanics are unchanged and already uniform: `[1m]` is a
|
|
25
|
+
* client-side label — proxy.ts strips it and rides `context-1m-2025-08-07`
|
|
26
|
+
* on the request (see stripContext1mTag / betaForModel).
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Baked fallback — the catalog served when upstream has never answered.
|
|
30
|
+
* Base ids only ([1m] variants are generated, never stored). Order is the
|
|
31
|
+
* advertised order: family rank (fable, opus, sonnet, haiku), version desc
|
|
32
|
+
* — the same ordering normalizeUpstreamIds() produces for live data.
|
|
33
|
+
*/
|
|
34
|
+
export declare const BAKED_BASE_MODELS: readonly string[];
|
|
35
|
+
/**
|
|
36
|
+
* THE long-context rule — applied identically to every family. A base id
|
|
37
|
+
* takes a `[1m]` variant unless it's the haiku family (CC's picker never
|
|
38
|
+
* offers 1M haiku; it's also the family CC strips the effort and
|
|
39
|
+
* mid-conversation betas from). Already-tagged and non-Claude ids are
|
|
40
|
+
* never eligible.
|
|
41
|
+
*/
|
|
42
|
+
export declare function longContextEligible(id: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Expand base ids into the advertised list: each eligible base is followed
|
|
45
|
+
* by its `[1m]` variant (matching the historical fable-5 / fable-5[1m]
|
|
46
|
+
* adjacency), ineligible bases pass through alone.
|
|
47
|
+
*/
|
|
48
|
+
export declare function withLongContextVariants(bases: readonly string[]): string[];
|
|
49
|
+
/** Numeric segments of a model id (`claude-opus-4-8` → [4, 8]) for version ordering. */
|
|
50
|
+
export declare function modelVersionKey(id: string): number[];
|
|
51
|
+
/**
|
|
52
|
+
* Normalize a raw upstream id listing into dario's advertised base set:
|
|
53
|
+
* - keep `claude-*` ids only (no [1m] tags — those are ours to generate)
|
|
54
|
+
* - drop legacy generations of known families (< 4; fable exempt)
|
|
55
|
+
* - prefer the CC-style short id when upstream lists both `claude-opus-4-8`
|
|
56
|
+
* and a dated `claude-opus-4-8-YYYYMMDD`; keep the dated id when it's the
|
|
57
|
+
* only form
|
|
58
|
+
* - deterministic order: family rank, then version desc, unknown families last
|
|
59
|
+
*/
|
|
60
|
+
export declare function normalizeUpstreamIds(ids: readonly string[]): string[];
|
|
61
|
+
/** Newest base id of a family within a base set, or null if absent. */
|
|
62
|
+
export declare function resolveFamilyBase(family: string, bases: readonly string[]): string | null;
|
|
63
|
+
/**
|
|
64
|
+
* Resolve a family shorthand against a base set. `<family>` → the newest
|
|
65
|
+
* base of that family; `<family>1m` → the SAME base + `[1m]` (one
|
|
66
|
+
* derivation rule for every family — `opus` and `opus1m` can't disagree).
|
|
67
|
+
* Returns null when the name isn't a family shorthand or the family is
|
|
68
|
+
* absent/ineligible — callers fall back to their static map.
|
|
69
|
+
*/
|
|
70
|
+
export declare function resolveAliasAgainst(model: string, bases: readonly string[]): string | null;
|
|
71
|
+
/** OpenAI-shape /v1/models payload for a list of advertised ids. */
|
|
72
|
+
export declare function buildOpenAIModelsList(ids: readonly string[]): {
|
|
73
|
+
object: string;
|
|
74
|
+
data: Array<{
|
|
75
|
+
id: string;
|
|
76
|
+
object: string;
|
|
77
|
+
created: number;
|
|
78
|
+
owned_by: string;
|
|
79
|
+
}>;
|
|
80
|
+
};
|
|
81
|
+
export interface ModelCatalog {
|
|
82
|
+
bases: readonly string[];
|
|
83
|
+
source: 'upstream' | 'baked';
|
|
84
|
+
fetchedAt: number;
|
|
85
|
+
}
|
|
86
|
+
export interface CatalogDeps {
|
|
87
|
+
fetchImpl?: typeof fetch;
|
|
88
|
+
/** OAuth bearer source (single-account getAccessToken). Ignored when upstreamApiKey is set. */
|
|
89
|
+
getToken?: () => Promise<string>;
|
|
90
|
+
/** Per-token API pool mode — forwarded as x-api-key, mirroring request-path auth. */
|
|
91
|
+
upstreamApiKey?: string;
|
|
92
|
+
now?: () => number;
|
|
93
|
+
log?: (msg: string) => void;
|
|
94
|
+
ttlMs?: number;
|
|
95
|
+
retryMs?: number;
|
|
96
|
+
timeoutMs?: number;
|
|
97
|
+
}
|
|
98
|
+
export declare const DEFAULT_CATALOG_TTL_MS = 3600000;
|
|
99
|
+
export declare const DEFAULT_CATALOG_RETRY_MS = 300000;
|
|
100
|
+
/**
|
|
101
|
+
* The catalog, stale-while-revalidate. Warm cache returns immediately
|
|
102
|
+
* (kicking an async refresh when past TTL); a cold start tries upstream
|
|
103
|
+
* once (bounded by timeoutMs) and falls back to the baked list. Never
|
|
104
|
+
* throws — /v1/models must always answer.
|
|
105
|
+
*/
|
|
106
|
+
export declare function getModelCatalog(deps?: CatalogDeps): Promise<ModelCatalog>;
|
|
107
|
+
/**
|
|
108
|
+
* Synchronous view for request-path alias resolution — whatever the last
|
|
109
|
+
* catalog produced, or the baked set before the first fetch completes.
|
|
110
|
+
* Never blocks the hot path on the network.
|
|
111
|
+
*/
|
|
112
|
+
export declare function getCachedBases(): readonly string[];
|
|
113
|
+
/** Fire-and-forget warmup so the first client /v1/models call is served warm. */
|
|
114
|
+
export declare function prewarmModelCatalog(deps?: CatalogDeps): void;
|
|
115
|
+
export declare function _resetModelCatalogForTest(): void;
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* model-catalog.ts — upstream model autodetection with a baked fallback.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "which models does dario advertise". Two
|
|
5
|
+
* problems this solves (operator direction, 2026-06-10):
|
|
6
|
+
*
|
|
7
|
+
* 1. AUTODETECTION. `GET /v1/models` used to serve a hardcoded list that
|
|
8
|
+
* went stale every time Anthropic shipped a model (fable-5 needed a
|
|
9
|
+
* manual PR; `opus` was bumped to 4-8 in #389 while `opus1m` silently
|
|
10
|
+
* stayed on 4-7). The catalog now asks api.anthropic.com/v1/models
|
|
11
|
+
* what actually exists, TTL-cached, falling back to the baked list
|
|
12
|
+
* whenever upstream is unreachable — startup, offline, auth-broken,
|
|
13
|
+
* all serve the same baked set as before.
|
|
14
|
+
*
|
|
15
|
+
* 2. ONE METHOD FOR CONTEXT WINDOWS. The `[1m]` long-context variant was
|
|
16
|
+
* hand-sprinkled: the listing carried `claude-fable-5[1m]` but no
|
|
17
|
+
* opus/sonnet variants, while the alias map pinned each `<family>1m`
|
|
18
|
+
* to a hand-picked id. Now every family goes through the same two
|
|
19
|
+
* rules: `longContextEligible()` decides which bases take a `[1m]`
|
|
20
|
+
* variant (everything except haiku — real CC never offers 1M haiku),
|
|
21
|
+
* and `<family>1m` is DERIVED as `resolve(<family>) + '[1m]'`, so the
|
|
22
|
+
* pair can never drift apart again.
|
|
23
|
+
*
|
|
24
|
+
* The wire mechanics are unchanged and already uniform: `[1m]` is a
|
|
25
|
+
* client-side label — proxy.ts strips it and rides `context-1m-2025-08-07`
|
|
26
|
+
* on the request (see stripContext1mTag / betaForModel).
|
|
27
|
+
*/
|
|
28
|
+
import { modelFamily } from './pool.js';
|
|
29
|
+
const ANTHROPIC_API = 'https://api.anthropic.com';
|
|
30
|
+
const ANTHROPIC_VERSION = '2023-06-01';
|
|
31
|
+
const OAUTH_BETA = 'oauth-2025-04-20';
|
|
32
|
+
/**
|
|
33
|
+
* Baked fallback — the catalog served when upstream has never answered.
|
|
34
|
+
* Base ids only ([1m] variants are generated, never stored). Order is the
|
|
35
|
+
* advertised order: family rank (fable, opus, sonnet, haiku), version desc
|
|
36
|
+
* — the same ordering normalizeUpstreamIds() produces for live data.
|
|
37
|
+
*/
|
|
38
|
+
export const BAKED_BASE_MODELS = [
|
|
39
|
+
'claude-fable-5',
|
|
40
|
+
'claude-opus-4-8',
|
|
41
|
+
'claude-opus-4-7',
|
|
42
|
+
'claude-opus-4-6',
|
|
43
|
+
'claude-sonnet-4-6',
|
|
44
|
+
'claude-haiku-4-5',
|
|
45
|
+
];
|
|
46
|
+
/**
|
|
47
|
+
* THE long-context rule — applied identically to every family. A base id
|
|
48
|
+
* takes a `[1m]` variant unless it's the haiku family (CC's picker never
|
|
49
|
+
* offers 1M haiku; it's also the family CC strips the effort and
|
|
50
|
+
* mid-conversation betas from). Already-tagged and non-Claude ids are
|
|
51
|
+
* never eligible.
|
|
52
|
+
*/
|
|
53
|
+
export function longContextEligible(id) {
|
|
54
|
+
const m = id.toLowerCase();
|
|
55
|
+
return m.startsWith('claude-') && !m.includes('haiku') && !m.endsWith('[1m]');
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Expand base ids into the advertised list: each eligible base is followed
|
|
59
|
+
* by its `[1m]` variant (matching the historical fable-5 / fable-5[1m]
|
|
60
|
+
* adjacency), ineligible bases pass through alone.
|
|
61
|
+
*/
|
|
62
|
+
export function withLongContextVariants(bases) {
|
|
63
|
+
return bases.flatMap((b) => (longContextEligible(b) ? [b, `${b}[1m]`] : [b]));
|
|
64
|
+
}
|
|
65
|
+
/** Numeric segments of a model id (`claude-opus-4-8` → [4, 8]) for version ordering. */
|
|
66
|
+
export function modelVersionKey(id) {
|
|
67
|
+
const nums = id.match(/\d+/g);
|
|
68
|
+
return nums ? nums.map(Number) : [];
|
|
69
|
+
}
|
|
70
|
+
/** Descending version compare on modelVersionKey output. */
|
|
71
|
+
function cmpVersionDesc(a, b) {
|
|
72
|
+
const n = Math.max(a.length, b.length);
|
|
73
|
+
for (let i = 0; i < n; i++) {
|
|
74
|
+
const d = (b[i] ?? -1) - (a[i] ?? -1);
|
|
75
|
+
if (d !== 0)
|
|
76
|
+
return d;
|
|
77
|
+
}
|
|
78
|
+
return 0;
|
|
79
|
+
}
|
|
80
|
+
// Advertised order: CC lists the flagship first, then the big families.
|
|
81
|
+
// Unknown future families rank last (still advertised — a brand-new family
|
|
82
|
+
// shows up on the next catalog refresh without a dario release).
|
|
83
|
+
const FAMILY_RANK = { fable: 0, opus: 1, sonnet: 2, haiku: 3 };
|
|
84
|
+
// Known families older than this generation are dropped from the advertised
|
|
85
|
+
// list (claude-3-x etc. — not what a CC-shaped proxy should offer). fable is
|
|
86
|
+
// exempt: its versioning is its own line (fable-5).
|
|
87
|
+
const MIN_GENERATION = 4;
|
|
88
|
+
/**
|
|
89
|
+
* Normalize a raw upstream id listing into dario's advertised base set:
|
|
90
|
+
* - keep `claude-*` ids only (no [1m] tags — those are ours to generate)
|
|
91
|
+
* - drop legacy generations of known families (< 4; fable exempt)
|
|
92
|
+
* - prefer the CC-style short id when upstream lists both `claude-opus-4-8`
|
|
93
|
+
* and a dated `claude-opus-4-8-YYYYMMDD`; keep the dated id when it's the
|
|
94
|
+
* only form
|
|
95
|
+
* - deterministic order: family rank, then version desc, unknown families last
|
|
96
|
+
*/
|
|
97
|
+
export function normalizeUpstreamIds(ids) {
|
|
98
|
+
let list = ids.filter((id) => typeof id === 'string' && /^claude-/i.test(id) && !id.includes('['));
|
|
99
|
+
list = list.filter((id) => {
|
|
100
|
+
const fam = modelFamily(id);
|
|
101
|
+
if (fam === null || fam === 'fable')
|
|
102
|
+
return true;
|
|
103
|
+
return (modelVersionKey(id)[0] ?? 0) >= MIN_GENERATION;
|
|
104
|
+
});
|
|
105
|
+
const byKey = new Map();
|
|
106
|
+
for (const id of list) {
|
|
107
|
+
const key = id.replace(/-\d{8}$/, '').toLowerCase();
|
|
108
|
+
const existing = byKey.get(key);
|
|
109
|
+
if (existing === undefined) {
|
|
110
|
+
byKey.set(key, id);
|
|
111
|
+
}
|
|
112
|
+
else if (id.toLowerCase() === key && existing.toLowerCase() !== key) {
|
|
113
|
+
byKey.set(key, id); // short form wins over dated duplicate
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return [...byKey.values()].sort((a, b) => {
|
|
117
|
+
const ra = FAMILY_RANK[modelFamily(a) ?? ''] ?? 99;
|
|
118
|
+
const rb = FAMILY_RANK[modelFamily(b) ?? ''] ?? 99;
|
|
119
|
+
if (ra !== rb)
|
|
120
|
+
return ra - rb;
|
|
121
|
+
return cmpVersionDesc(modelVersionKey(a), modelVersionKey(b));
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
/** Newest base id of a family within a base set, or null if absent. */
|
|
125
|
+
export function resolveFamilyBase(family, bases) {
|
|
126
|
+
const candidates = bases.filter((b) => modelFamily(b) === family && !b.includes('['));
|
|
127
|
+
if (candidates.length === 0)
|
|
128
|
+
return null;
|
|
129
|
+
return [...candidates].sort((a, b) => cmpVersionDesc(modelVersionKey(a), modelVersionKey(b)))[0];
|
|
130
|
+
}
|
|
131
|
+
const FAMILY_ALIASES = new Set(['fable', 'opus', 'sonnet', 'haiku']);
|
|
132
|
+
/**
|
|
133
|
+
* Resolve a family shorthand against a base set. `<family>` → the newest
|
|
134
|
+
* base of that family; `<family>1m` → the SAME base + `[1m]` (one
|
|
135
|
+
* derivation rule for every family — `opus` and `opus1m` can't disagree).
|
|
136
|
+
* Returns null when the name isn't a family shorthand or the family is
|
|
137
|
+
* absent/ineligible — callers fall back to their static map.
|
|
138
|
+
*/
|
|
139
|
+
export function resolveAliasAgainst(model, bases) {
|
|
140
|
+
const m = model.toLowerCase().trim();
|
|
141
|
+
if (FAMILY_ALIASES.has(m))
|
|
142
|
+
return resolveFamilyBase(m, bases);
|
|
143
|
+
const match = m.match(/^([a-z]+)1m$/);
|
|
144
|
+
if (match !== null && FAMILY_ALIASES.has(match[1])) {
|
|
145
|
+
const base = resolveFamilyBase(match[1], bases);
|
|
146
|
+
return base !== null && longContextEligible(base) ? `${base}[1m]` : null;
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
/** OpenAI-shape /v1/models payload for a list of advertised ids. */
|
|
151
|
+
export function buildOpenAIModelsList(ids) {
|
|
152
|
+
return {
|
|
153
|
+
object: 'list',
|
|
154
|
+
data: ids.map((id) => ({ id, object: 'model', created: 1700000000, owned_by: 'anthropic' })),
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
export const DEFAULT_CATALOG_TTL_MS = 3_600_000; // 1h — model launches are rare
|
|
158
|
+
export const DEFAULT_CATALOG_RETRY_MS = 300_000; // failed-fetch backoff: 5min
|
|
159
|
+
const DEFAULT_FETCH_TIMEOUT_MS = 4_000;
|
|
160
|
+
let cache = null;
|
|
161
|
+
let lastAttempt = 0;
|
|
162
|
+
let inflight = null;
|
|
163
|
+
function envInt(name, dflt) {
|
|
164
|
+
const v = Number(process.env[name]);
|
|
165
|
+
return Number.isFinite(v) && v > 0 ? v : dflt;
|
|
166
|
+
}
|
|
167
|
+
async function fetchUpstreamBases(deps) {
|
|
168
|
+
const f = deps.fetchImpl ?? fetch;
|
|
169
|
+
const headers = {
|
|
170
|
+
accept: 'application/json',
|
|
171
|
+
'anthropic-version': ANTHROPIC_VERSION,
|
|
172
|
+
};
|
|
173
|
+
if (deps.upstreamApiKey) {
|
|
174
|
+
headers['x-api-key'] = deps.upstreamApiKey;
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
if (!deps.getToken)
|
|
178
|
+
throw new Error('no token source for catalog fetch');
|
|
179
|
+
headers['authorization'] = `Bearer ${await deps.getToken()}`;
|
|
180
|
+
headers['anthropic-beta'] = OAUTH_BETA;
|
|
181
|
+
}
|
|
182
|
+
const ctl = new AbortController();
|
|
183
|
+
const timer = setTimeout(() => ctl.abort(), deps.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS);
|
|
184
|
+
try {
|
|
185
|
+
const res = await f(`${ANTHROPIC_API}/v1/models?limit=100`, { headers, signal: ctl.signal });
|
|
186
|
+
if (!res.ok)
|
|
187
|
+
throw new Error(`upstream /v1/models ${res.status}`);
|
|
188
|
+
const json = (await res.json());
|
|
189
|
+
const ids = (json.data ?? [])
|
|
190
|
+
.map((d) => d?.id)
|
|
191
|
+
.filter((x) => typeof x === 'string');
|
|
192
|
+
const bases = normalizeUpstreamIds(ids);
|
|
193
|
+
if (bases.length === 0)
|
|
194
|
+
throw new Error('upstream /v1/models returned no usable claude ids');
|
|
195
|
+
return bases;
|
|
196
|
+
}
|
|
197
|
+
finally {
|
|
198
|
+
clearTimeout(timer);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
async function refresh(deps) {
|
|
202
|
+
const now = deps.now ?? Date.now;
|
|
203
|
+
lastAttempt = now();
|
|
204
|
+
const bases = await fetchUpstreamBases(deps);
|
|
205
|
+
cache = { bases, source: 'upstream', fetchedAt: now() };
|
|
206
|
+
deps.log?.(`[dario] model catalog: autodetected ${bases.length} base models upstream`);
|
|
207
|
+
}
|
|
208
|
+
function maybeRefreshInBackground(deps) {
|
|
209
|
+
const now = (deps.now ?? Date.now)();
|
|
210
|
+
const ttl = deps.ttlMs ?? envInt('DARIO_MODEL_CATALOG_TTL_MS', DEFAULT_CATALOG_TTL_MS);
|
|
211
|
+
const retry = deps.retryMs ?? DEFAULT_CATALOG_RETRY_MS;
|
|
212
|
+
const fresh = cache !== null && cache.source === 'upstream' && now - cache.fetchedAt < ttl;
|
|
213
|
+
if (fresh || inflight !== null || now - lastAttempt < retry)
|
|
214
|
+
return;
|
|
215
|
+
inflight = refresh(deps)
|
|
216
|
+
.catch((err) => {
|
|
217
|
+
deps.log?.(`[dario] model catalog refresh failed: ${err.message} — keeping ${cache?.source ?? 'baked'} list`);
|
|
218
|
+
})
|
|
219
|
+
.finally(() => {
|
|
220
|
+
inflight = null;
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* The catalog, stale-while-revalidate. Warm cache returns immediately
|
|
225
|
+
* (kicking an async refresh when past TTL); a cold start tries upstream
|
|
226
|
+
* once (bounded by timeoutMs) and falls back to the baked list. Never
|
|
227
|
+
* throws — /v1/models must always answer.
|
|
228
|
+
*/
|
|
229
|
+
export async function getModelCatalog(deps = {}) {
|
|
230
|
+
if (cache !== null) {
|
|
231
|
+
maybeRefreshInBackground(deps);
|
|
232
|
+
return cache;
|
|
233
|
+
}
|
|
234
|
+
const now = (deps.now ?? Date.now)();
|
|
235
|
+
const retry = deps.retryMs ?? DEFAULT_CATALOG_RETRY_MS;
|
|
236
|
+
if (now - lastAttempt >= retry) {
|
|
237
|
+
try {
|
|
238
|
+
await refresh(deps);
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
deps.log?.(`[dario] model catalog fetch failed: ${err.message} — serving baked list`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (cache === null)
|
|
245
|
+
cache = { bases: [...BAKED_BASE_MODELS], source: 'baked', fetchedAt: 0 };
|
|
246
|
+
return cache;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Synchronous view for request-path alias resolution — whatever the last
|
|
250
|
+
* catalog produced, or the baked set before the first fetch completes.
|
|
251
|
+
* Never blocks the hot path on the network.
|
|
252
|
+
*/
|
|
253
|
+
export function getCachedBases() {
|
|
254
|
+
return cache?.bases ?? BAKED_BASE_MODELS;
|
|
255
|
+
}
|
|
256
|
+
/** Fire-and-forget warmup so the first client /v1/models call is served warm. */
|
|
257
|
+
export function prewarmModelCatalog(deps = {}) {
|
|
258
|
+
void getModelCatalog(deps);
|
|
259
|
+
}
|
|
260
|
+
export function _resetModelCatalogForTest() {
|
|
261
|
+
cache = null;
|
|
262
|
+
lastAttempt = 0;
|
|
263
|
+
inflight = null;
|
|
264
|
+
}
|
package/dist/proxy.d.ts
CHANGED
|
@@ -2,11 +2,19 @@ import { type IncomingMessage } from 'node:http';
|
|
|
2
2
|
import { type WriteStream } from 'node:fs';
|
|
3
3
|
import { type EffortValue } from './cc-template.js';
|
|
4
4
|
/**
|
|
5
|
-
* Resolve a Claude-side model name through
|
|
6
|
-
* alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
|
|
5
|
+
* Resolve a Claude-side model name through the family-alias rules if it's a
|
|
6
|
+
* short alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
|
|
7
|
+
* unchanged.
|
|
8
|
+
*
|
|
9
|
+
* Family shorthands resolve against the live model catalog: `<family>` is
|
|
10
|
+
* the newest base of that family, and `<family>1m` DERIVES from that same
|
|
11
|
+
* base + `[1m]` — one rule for every family, so the pair can't drift apart
|
|
12
|
+
* (pre-catalog, #389 bumped `opus` to 4-8 while `opus1m` silently stayed on
|
|
13
|
+
* 4-7). Before the first catalog fetch the baked set produces the same
|
|
14
|
+
* answers as the static map; the map stays as the last-resort fallback.
|
|
7
15
|
*
|
|
8
16
|
* Used at request time on the provider-prefix path so `claude:opus` arrives
|
|
9
|
-
* upstream as
|
|
17
|
+
* upstream as a full model id rather than the bare `opus` (which Anthropic
|
|
10
18
|
* 400's). Critical for Cursor BYOK setups (dario#190) where users have to
|
|
11
19
|
* pick a colon-prefixed model name to dodge Cursor's built-in `claude-*`
|
|
12
20
|
* name collision — which means the natural shorthand is `claude:opus`, and
|
|
@@ -113,12 +121,12 @@ export declare function buildOrchestrationPatterns(preserveTags?: Set<string>):
|
|
|
113
121
|
export declare function sanitizeMessages(body: Record<string, unknown>, preserveTags?: Set<string>): void;
|
|
114
122
|
export declare const OPENAI_MODELS_LIST: {
|
|
115
123
|
object: string;
|
|
116
|
-
data: {
|
|
124
|
+
data: Array<{
|
|
117
125
|
id: string;
|
|
118
126
|
object: string;
|
|
119
127
|
created: number;
|
|
120
128
|
owned_by: string;
|
|
121
|
-
}
|
|
129
|
+
}>;
|
|
122
130
|
};
|
|
123
131
|
interface ProxyOptions {
|
|
124
132
|
port?: number;
|
package/dist/proxy.js
CHANGED
|
@@ -17,6 +17,7 @@ import { loadAllAccounts, loadAccount, refreshAccountToken, resyncLoginFromCrede
|
|
|
17
17
|
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
18
18
|
import { RequestQueue, QueueFullError, QueueTimeoutError, DEFAULT_MAX_CONCURRENT, DEFAULT_MAX_QUEUED, DEFAULT_QUEUE_TIMEOUT_MS } from './request-queue.js';
|
|
19
19
|
import { redactSecrets } from './redact.js';
|
|
20
|
+
import { BAKED_BASE_MODELS, withLongContextVariants, buildOpenAIModelsList, getModelCatalog, getCachedBases, resolveAliasAgainst, prewarmModelCatalog } from './model-catalog.js';
|
|
20
21
|
const ANTHROPIC_API = 'https://api.anthropic.com';
|
|
21
22
|
const DEFAULT_PORT = 3456;
|
|
22
23
|
const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MB — generous for large prompts, prevents abuse
|
|
@@ -133,31 +134,42 @@ function loadClaudeIdentity() {
|
|
|
133
134
|
}
|
|
134
135
|
return { deviceId: '', accountUuid: '' };
|
|
135
136
|
}
|
|
136
|
-
// Model shortcuts — users can pass short names
|
|
137
|
+
// Model shortcuts — users can pass short names. Family shorthands
|
|
138
|
+
// (`opus`, `opus1m`, …) resolve DYNAMICALLY against the model catalog in
|
|
139
|
+
// resolveClaudeAlias — this static map is the offline fallback plus the
|
|
140
|
+
// deliberate legacy version pins (`opus47`/`opus46`), which never float.
|
|
137
141
|
const MODEL_ALIASES = {
|
|
138
142
|
'fable': 'claude-fable-5',
|
|
139
143
|
'fable1m': 'claude-fable-5[1m]',
|
|
140
144
|
'opus': 'claude-opus-4-8',
|
|
141
145
|
'opus47': 'claude-opus-4-7',
|
|
142
146
|
'opus46': 'claude-opus-4-6',
|
|
143
|
-
'opus1m': 'claude-opus-4-
|
|
147
|
+
'opus1m': 'claude-opus-4-8[1m]',
|
|
144
148
|
'sonnet': 'claude-sonnet-4-6',
|
|
145
149
|
'sonnet1m': 'claude-sonnet-4-6[1m]',
|
|
146
150
|
'haiku': 'claude-haiku-4-5',
|
|
147
151
|
};
|
|
148
152
|
/**
|
|
149
|
-
* Resolve a Claude-side model name through
|
|
150
|
-
* alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
|
|
153
|
+
* Resolve a Claude-side model name through the family-alias rules if it's a
|
|
154
|
+
* short alias (`opus`/`sonnet`/`haiku`/etc.), otherwise pass through
|
|
155
|
+
* unchanged.
|
|
156
|
+
*
|
|
157
|
+
* Family shorthands resolve against the live model catalog: `<family>` is
|
|
158
|
+
* the newest base of that family, and `<family>1m` DERIVES from that same
|
|
159
|
+
* base + `[1m]` — one rule for every family, so the pair can't drift apart
|
|
160
|
+
* (pre-catalog, #389 bumped `opus` to 4-8 while `opus1m` silently stayed on
|
|
161
|
+
* 4-7). Before the first catalog fetch the baked set produces the same
|
|
162
|
+
* answers as the static map; the map stays as the last-resort fallback.
|
|
151
163
|
*
|
|
152
164
|
* Used at request time on the provider-prefix path so `claude:opus` arrives
|
|
153
|
-
* upstream as
|
|
165
|
+
* upstream as a full model id rather than the bare `opus` (which Anthropic
|
|
154
166
|
* 400's). Critical for Cursor BYOK setups (dario#190) where users have to
|
|
155
167
|
* pick a colon-prefixed model name to dodge Cursor's built-in `claude-*`
|
|
156
168
|
* name collision — which means the natural shorthand is `claude:opus`, and
|
|
157
169
|
* that needs to Just Work.
|
|
158
170
|
*/
|
|
159
171
|
export function resolveClaudeAlias(model) {
|
|
160
|
-
return MODEL_ALIASES[model] ?? model;
|
|
172
|
+
return resolveAliasAgainst(model, getCachedBases()) ?? MODEL_ALIASES[model] ?? model;
|
|
161
173
|
}
|
|
162
174
|
// Provider prefix in the `model` field — `<provider>:<model>`. Forces
|
|
163
175
|
// routing regardless of model-name regex. Only recognized prefixes are
|
|
@@ -477,7 +489,11 @@ function translateStreamChunk(line) {
|
|
|
477
489
|
catch { }
|
|
478
490
|
return null;
|
|
479
491
|
}
|
|
480
|
-
|
|
492
|
+
// Baked /v1/models payload — what the proxy advertises before (or without)
|
|
493
|
+
// a successful upstream catalog fetch. The live route serves the
|
|
494
|
+
// autodetected catalog (model-catalog.ts); `[1m]` variants are GENERATED by
|
|
495
|
+
// the one shared long-context rule, never hand-listed per model.
|
|
496
|
+
export const OPENAI_MODELS_LIST = buildOpenAIModelsList(withLongContextVariants(BAKED_BASE_MODELS));
|
|
481
497
|
/**
|
|
482
498
|
* Append a JSON-ND line to the proxy log file. No-op when stream is
|
|
483
499
|
* null (logFile not configured). Errors are swallowed — log writes
|
|
@@ -832,7 +848,7 @@ export async function startProxy(opts = {}) {
|
|
|
832
848
|
const modelPrefix = opts.model ? parseProviderPrefix(opts.model) : null;
|
|
833
849
|
const cliModelRaw = modelPrefix ? modelPrefix.model : opts.model;
|
|
834
850
|
const cliProviderOverride = modelPrefix ? modelPrefix.provider : null;
|
|
835
|
-
const modelOverride = cliModelRaw ? (
|
|
851
|
+
const modelOverride = cliModelRaw ? resolveClaudeAlias(cliModelRaw) : null;
|
|
836
852
|
const identity = loadClaudeIdentity();
|
|
837
853
|
if (identity.deviceId) {
|
|
838
854
|
console.log(' Device identity: detected');
|
|
@@ -1020,7 +1036,17 @@ export async function startProxy(opts = {}) {
|
|
|
1020
1036
|
...SECURITY_HEADERS,
|
|
1021
1037
|
};
|
|
1022
1038
|
const JSON_HEADERS = { 'Content-Type': 'application/json', ...SECURITY_HEADERS };
|
|
1023
|
-
|
|
1039
|
+
// Model catalog wiring — /v1/models serves the upstream-autodetected set,
|
|
1040
|
+
// authenticated the same way the request path is (per-token API key when
|
|
1041
|
+
// ANTHROPIC_UPSTREAM_API_KEY is set, OAuth bearer otherwise). Prewarmed so
|
|
1042
|
+
// the first client call is answered from cache; every failure path inside
|
|
1043
|
+
// getModelCatalog falls back to the baked list, so the route always 200s.
|
|
1044
|
+
const catalogDeps = {
|
|
1045
|
+
upstreamApiKey: upstreamApiKey || undefined,
|
|
1046
|
+
getToken: getAccessToken,
|
|
1047
|
+
log: verbose ? (m) => console.log(m) : undefined,
|
|
1048
|
+
};
|
|
1049
|
+
prewarmModelCatalog(catalogDeps);
|
|
1024
1050
|
const ERR_UNAUTH = JSON.stringify({ error: 'Unauthorized', message: 'Invalid or missing API key' });
|
|
1025
1051
|
const ERR_FORBIDDEN = JSON.stringify({ error: 'Forbidden', message: 'Path not allowed. Supported paths: POST /v1/messages, POST /v1/messages/count_tokens, POST /v1/chat/completions, GET /v1/models' });
|
|
1026
1052
|
const ERR_METHOD = JSON.stringify({ error: 'Method not allowed' });
|
|
@@ -1246,8 +1272,13 @@ export async function startProxy(opts = {}) {
|
|
|
1246
1272
|
}
|
|
1247
1273
|
if (urlPath === '/v1/models' && req.method === 'GET') {
|
|
1248
1274
|
requestCount++;
|
|
1275
|
+
// Upstream-autodetected catalog (TTL-cached, baked fallback — never
|
|
1276
|
+
// throws). [1m] variants come from the shared long-context rule, so
|
|
1277
|
+
// every family advertises its 1M form the same way.
|
|
1278
|
+
const catalog = await getModelCatalog(catalogDeps);
|
|
1279
|
+
const body = JSON.stringify(buildOpenAIModelsList(withLongContextVariants(catalog.bases)));
|
|
1249
1280
|
res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
1250
|
-
res.end(
|
|
1281
|
+
res.end(body);
|
|
1251
1282
|
return;
|
|
1252
1283
|
}
|
|
1253
1284
|
// Detect OpenAI-format requests
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.57",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|