@askalf/dario 4.8.57 → 4.8.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/proxy.d.ts +26 -0
- package/dist/proxy.js +113 -0
- package/package.json +1 -1
package/dist/proxy.d.ts
CHANGED
|
@@ -79,6 +79,32 @@ export declare function betaForModel(base: string, model: string | null | undefi
|
|
|
79
79
|
* very end of the id. Exported for tests.
|
|
80
80
|
*/
|
|
81
81
|
export declare function stripContext1mTag(model: string): string;
|
|
82
|
+
/**
|
|
83
|
+
* Parse upstream's effort-capability rejection:
|
|
84
|
+
*
|
|
85
|
+
* 400 {"type":"invalid_request_error","message":"This model does not
|
|
86
|
+
* support effort level 'max'. Supported levels: high, low, medium."}
|
|
87
|
+
*
|
|
88
|
+
* Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
|
|
89
|
+
* catalog exposes models that predate the newer effort tiers, and a pinned
|
|
90
|
+
* DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
|
|
91
|
+
* level plus the model's supported set, or null when the body is some other
|
|
92
|
+
* 400. NOTE: fable's effort intolerance is different in kind — a SOFT
|
|
93
|
+
* refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
|
|
94
|
+
* stays handled by its measured clamp in resolveEffort.
|
|
95
|
+
* Exported for tests.
|
|
96
|
+
*/
|
|
97
|
+
export declare function parseEffortRejection(body: string): {
|
|
98
|
+
rejected: string;
|
|
99
|
+
supported: string[];
|
|
100
|
+
} | null;
|
|
101
|
+
/**
|
|
102
|
+
* Pick the strongest effort level a model says it supports. Preference is
|
|
103
|
+
* descending capability — the caller asked for more than the model can do,
|
|
104
|
+
* so degrade as little as possible. Exported for tests.
|
|
105
|
+
*/
|
|
106
|
+
export declare const EFFORT_PREFERENCE: readonly string[];
|
|
107
|
+
export declare function bestSupportedEffort(supported: readonly string[]): string;
|
|
82
108
|
/**
|
|
83
109
|
* Resolve an inbound API path to its upstream target + forwarding mode.
|
|
84
110
|
* Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
|
package/dist/proxy.js
CHANGED
|
@@ -291,6 +291,40 @@ export function stripContext1mTag(model) {
|
|
|
291
291
|
return model;
|
|
292
292
|
return model.replace(/\[1m\]$/i, '');
|
|
293
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Parse upstream's effort-capability rejection:
|
|
296
|
+
*
|
|
297
|
+
* 400 {"type":"invalid_request_error","message":"This model does not
|
|
298
|
+
* support effort level 'max'. Supported levels: high, low, medium."}
|
|
299
|
+
*
|
|
300
|
+
* Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
|
|
301
|
+
* catalog exposes models that predate the newer effort tiers, and a pinned
|
|
302
|
+
* DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
|
|
303
|
+
* level plus the model's supported set, or null when the body is some other
|
|
304
|
+
* 400. NOTE: fable's effort intolerance is different in kind — a SOFT
|
|
305
|
+
* refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
|
|
306
|
+
* stays handled by its measured clamp in resolveEffort.
|
|
307
|
+
* Exported for tests.
|
|
308
|
+
*/
|
|
309
|
+
export function parseEffortRejection(body) {
|
|
310
|
+
const m = body.match(/does not support effort level '([^']+)'\.?\s*Supported levels:\s*([a-z,\s]+)/i);
|
|
311
|
+
if (!m)
|
|
312
|
+
return null;
|
|
313
|
+
const supported = m[2].split(',').map((s) => s.trim().toLowerCase()).filter((s) => s.length > 0);
|
|
314
|
+
return supported.length > 0 ? { rejected: m[1], supported } : null;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Pick the strongest effort level a model says it supports. Preference is
|
|
318
|
+
* descending capability — the caller asked for more than the model can do,
|
|
319
|
+
* so degrade as little as possible. Exported for tests.
|
|
320
|
+
*/
|
|
321
|
+
export const EFFORT_PREFERENCE = ['xhigh', 'max', 'high', 'medium', 'low'];
|
|
322
|
+
export function bestSupportedEffort(supported) {
|
|
323
|
+
for (const e of EFFORT_PREFERENCE)
|
|
324
|
+
if (supported.includes(e))
|
|
325
|
+
return e;
|
|
326
|
+
return supported[0] ?? 'high';
|
|
327
|
+
}
|
|
294
328
|
/**
|
|
295
329
|
* Resolve an inbound API path to its upstream target + forwarding mode.
|
|
296
330
|
* Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
|
|
@@ -919,6 +953,12 @@ export async function startProxy(opts = {}) {
|
|
|
919
953
|
// re-pay the 400 round-trip. Keyed by account alias (pool) or `__default__`.
|
|
920
954
|
const unavailableBetas = new Map();
|
|
921
955
|
const ACCOUNT_KEY_SINGLE = '__default__';
|
|
956
|
+
// Per-model effort capability cache — same pay-the-round-trip-once pattern
|
|
957
|
+
// as context1mUnavailable, but keyed by WIRE MODEL id: effort support is a
|
|
958
|
+
// model property, not an account property. Populated from upstream's
|
|
959
|
+
// "does not support effort level" 400 (see parseEffortRejection); consulted
|
|
960
|
+
// up front at body-build time so capped models never re-pay the rejection.
|
|
961
|
+
const effortSupportByModel = new Map();
|
|
922
962
|
// Beta flag set — sourced from the live template when the capture recorded
|
|
923
963
|
// one (schema v2+), else falls back to the v2.1.104 bundled default. Same
|
|
924
964
|
// fallback string shim/runtime.cjs uses (kept in sync so proxy and shim
|
|
@@ -1682,6 +1722,18 @@ export async function startProxy(opts = {}) {
|
|
|
1682
1722
|
// does on /v1/messages.
|
|
1683
1723
|
r.model = stripContext1mTag(r.model);
|
|
1684
1724
|
}
|
|
1725
|
+
// Effort capability clamp — when a prior request taught us this
|
|
1726
|
+
// model's supported effort set (autodetected catalogs expose
|
|
1727
|
+
// models that predate newer tiers), rewrite output_config.effort
|
|
1728
|
+
// up front instead of re-paying the 400 round-trip. In-place value
|
|
1729
|
+
// mutation: field order (a fingerprint surface) is untouched.
|
|
1730
|
+
if (typeof r.model === 'string') {
|
|
1731
|
+
const supportedEfforts = effortSupportByModel.get(r.model);
|
|
1732
|
+
const oc = r.output_config;
|
|
1733
|
+
if (supportedEfforts && oc && typeof oc.effort === 'string' && !supportedEfforts.includes(oc.effort)) {
|
|
1734
|
+
oc.effort = bestSupportedEffort(supportedEfforts);
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1685
1737
|
finalBody = Buffer.from(JSON.stringify(r));
|
|
1686
1738
|
}
|
|
1687
1739
|
catch { /* not JSON, send as-is */ }
|
|
@@ -1959,6 +2011,67 @@ export async function startProxy(opts = {}) {
|
|
|
1959
2011
|
}
|
|
1960
2012
|
}
|
|
1961
2013
|
}
|
|
2014
|
+
else if (upstream.status === 400 && parseEffortRejection(peekedBody) && finalBody) {
|
|
2015
|
+
// Effort-capability rejection — the model predates the requested
|
|
2016
|
+
// effort tier (e.g. opus-4-5 + a DARIO_EFFORT=max pin; surfaced by
|
|
2017
|
+
// the autodetected catalog). Clamp output_config.effort to the
|
|
2018
|
+
// strongest level the error says the model supports, retry once,
|
|
2019
|
+
// and cache the supported set per model so the up-front clamp
|
|
2020
|
+
// handles every later request without the round-trip.
|
|
2021
|
+
const rejection = parseEffortRejection(peekedBody);
|
|
2022
|
+
const clamped = bestSupportedEffort(rejection.supported);
|
|
2023
|
+
let retried = false;
|
|
2024
|
+
try {
|
|
2025
|
+
const rb = JSON.parse(finalBody.toString('utf8'));
|
|
2026
|
+
const wireModel = typeof rb.model === 'string' ? rb.model : '';
|
|
2027
|
+
const oc = rb.output_config;
|
|
2028
|
+
if (wireModel && oc && typeof oc.effort === 'string') {
|
|
2029
|
+
const firstRejection = !effortSupportByModel.has(wireModel);
|
|
2030
|
+
effortSupportByModel.set(wireModel, rejection.supported);
|
|
2031
|
+
if (verbose && firstRejection)
|
|
2032
|
+
console.log(`[dario] #${requestCount} effort '${rejection.rejected}' rejected by ${wireModel} — retrying with '${clamped}' (supported set cached per model)`);
|
|
2033
|
+
oc.effort = clamped; // in-place value mutation — field order untouched
|
|
2034
|
+
finalBody = Buffer.from(JSON.stringify(rb));
|
|
2035
|
+
const retry = await fetch(targetBase, {
|
|
2036
|
+
method: req.method ?? 'POST',
|
|
2037
|
+
headers: passthrough ? headers : orderHeadersForOutbound(headers),
|
|
2038
|
+
body: new Uint8Array(finalBody),
|
|
2039
|
+
signal: upstreamAbort.signal,
|
|
2040
|
+
});
|
|
2041
|
+
upstream = retry;
|
|
2042
|
+
peekedBody = null;
|
|
2043
|
+
retried = true;
|
|
2044
|
+
if (pool && poolAccount) {
|
|
2045
|
+
const retrySnapshot = parseRateLimits(upstream.headers);
|
|
2046
|
+
if (upstream.status === 429) {
|
|
2047
|
+
pool.markRejected(poolAccount.alias, retrySnapshot);
|
|
2048
|
+
}
|
|
2049
|
+
else {
|
|
2050
|
+
pool.updateRateLimits(poolAccount.alias, retrySnapshot);
|
|
2051
|
+
}
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
catch { /* body not JSON — forward the original 400 below */ }
|
|
2056
|
+
if (!retried) {
|
|
2057
|
+
// Couldn't rebuild the body (no output_config.effort / not JSON)
|
|
2058
|
+
// — the upstream body is already consumed, so forward it here;
|
|
2059
|
+
// the chain's terminal 400 branch won't run for us.
|
|
2060
|
+
const responseHeaders = {
|
|
2061
|
+
'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
|
|
2062
|
+
'Access-Control-Allow-Origin': corsOrigin,
|
|
2063
|
+
...SECURITY_HEADERS,
|
|
2064
|
+
};
|
|
2065
|
+
for (const [key, value] of upstream.headers.entries()) {
|
|
2066
|
+
if (key === 'request-id')
|
|
2067
|
+
responseHeaders[key] = value;
|
|
2068
|
+
}
|
|
2069
|
+
requestCount++;
|
|
2070
|
+
res.writeHead(400, responseHeaders);
|
|
2071
|
+
res.end(peekedBody);
|
|
2072
|
+
return;
|
|
2073
|
+
}
|
|
2074
|
+
}
|
|
1962
2075
|
else if (isLongContextError) {
|
|
1963
2076
|
// Cache the rejection so future requests on this account skip
|
|
1964
2077
|
// context-1m up front instead of re-paying the 400/429 round-trip.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.58",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|