@askalf/dario 4.8.57 → 4.8.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -282,7 +282,7 @@ export declare function _resetInstalledVersionProbeForTest(): void;
282
282
  */
283
283
  export declare const SUPPORTED_CC_RANGE: {
284
284
  readonly min: "1.0.0";
285
- readonly maxTested: "2.1.170";
285
+ readonly maxTested: "2.1.172";
286
286
  };
287
287
  /**
288
288
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
@@ -786,7 +786,7 @@ export function _resetInstalledVersionProbeForTest() {
786
786
  */
787
787
  export const SUPPORTED_CC_RANGE = {
788
788
  min: '1.0.0',
789
- maxTested: '2.1.170',
789
+ maxTested: '2.1.172',
790
790
  };
791
791
  /**
792
792
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
package/dist/proxy.d.ts CHANGED
@@ -79,6 +79,32 @@ export declare function betaForModel(base: string, model: string | null | undefi
79
79
  * very end of the id. Exported for tests.
80
80
  */
81
81
  export declare function stripContext1mTag(model: string): string;
82
+ /**
83
+ * Parse upstream's effort-capability rejection:
84
+ *
85
+ * 400 {"type":"invalid_request_error","message":"This model does not
86
+ * support effort level 'max'. Supported levels: high, low, medium."}
87
+ *
88
+ * Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
89
+ * catalog exposes models that predate the newer effort tiers, and a pinned
90
+ * DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
91
+ * level plus the model's supported set, or null when the body is some other
92
+ * 400. NOTE: fable's effort intolerance is different in kind — a SOFT
93
+ * refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
94
+ * stays handled by its measured clamp in resolveEffort.
95
+ * Exported for tests.
96
+ */
97
+ export declare function parseEffortRejection(body: string): {
98
+ rejected: string;
99
+ supported: string[];
100
+ } | null;
101
+ /**
102
+ * Pick the strongest effort level a model says it supports. Preference is
103
+ * descending capability — the caller asked for more than the model can do,
104
+ * so degrade as little as possible. Exported for tests.
105
+ */
106
+ export declare const EFFORT_PREFERENCE: readonly string[];
107
+ export declare function bestSupportedEffort(supported: readonly string[]): string;
82
108
  /**
83
109
  * Resolve an inbound API path to its upstream target + forwarding mode.
84
110
  * Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
package/dist/proxy.js CHANGED
@@ -291,6 +291,40 @@ export function stripContext1mTag(model) {
291
291
  return model;
292
292
  return model.replace(/\[1m\]$/i, '');
293
293
  }
294
+ /**
295
+ * Parse upstream's effort-capability rejection:
296
+ *
297
+ * 400 {"type":"invalid_request_error","message":"This model does not
298
+ * support effort level 'max'. Supported levels: high, low, medium."}
299
+ *
300
+ * Observed live 2026-06-10 on `claude-opus-4-5-20251101` — the autodetected
301
+ * catalog exposes models that predate the newer effort tiers, and a pinned
302
+ * DARIO_EFFORT (the box pins `max`) hard-400s on them. Returns the rejected
303
+ * level plus the model's supported set, or null when the body is some other
304
+ * 400. NOTE: fable's effort intolerance is different in kind — a SOFT
305
+ * refusal (200 + stop_reason:"refusal"), invisible to this machinery — and
306
+ * stays handled by its measured clamp in resolveEffort.
307
+ * Exported for tests.
308
+ */
309
+ export function parseEffortRejection(body) {
310
+ const m = body.match(/does not support effort level '([^']+)'\.?\s*Supported levels:\s*([a-z,\s]+)/i);
311
+ if (!m)
312
+ return null;
313
+ const supported = m[2].split(',').map((s) => s.trim().toLowerCase()).filter((s) => s.length > 0);
314
+ return supported.length > 0 ? { rejected: m[1], supported } : null;
315
+ }
316
+ /**
317
+ * Pick the strongest effort level a model says it supports. Preference is
318
+ * descending capability — the caller asked for more than the model can do,
319
+ * so degrade as little as possible. Exported for tests.
320
+ */
321
+ export const EFFORT_PREFERENCE = ['xhigh', 'max', 'high', 'medium', 'low'];
322
+ export function bestSupportedEffort(supported) {
323
+ for (const e of EFFORT_PREFERENCE)
324
+ if (supported.includes(e))
325
+ return e;
326
+ return supported[0] ?? 'high';
327
+ }
294
328
  /**
295
329
  * Resolve an inbound API path to its upstream target + forwarding mode.
296
330
  * Allowlist semantics — anything unlisted is 403'd (prevents SSRF through
@@ -919,6 +953,12 @@ export async function startProxy(opts = {}) {
919
953
  // re-pay the 400 round-trip. Keyed by account alias (pool) or `__default__`.
920
954
  const unavailableBetas = new Map();
921
955
  const ACCOUNT_KEY_SINGLE = '__default__';
956
+ // Per-model effort capability cache — same pay-the-round-trip-once pattern
957
+ // as context1mUnavailable, but keyed by WIRE MODEL id: effort support is a
958
+ // model property, not an account property. Populated from upstream's
959
+ // "does not support effort level" 400 (see parseEffortRejection); consulted
960
+ // up front at body-build time so capped models never re-pay the rejection.
961
+ const effortSupportByModel = new Map();
922
962
  // Beta flag set — sourced from the live template when the capture recorded
923
963
  // one (schema v2+), else falls back to the v2.1.104 bundled default. Same
924
964
  // fallback string shim/runtime.cjs uses (kept in sync so proxy and shim
@@ -1682,6 +1722,18 @@ export async function startProxy(opts = {}) {
1682
1722
  // does on /v1/messages.
1683
1723
  r.model = stripContext1mTag(r.model);
1684
1724
  }
1725
+ // Effort capability clamp — when a prior request taught us this
1726
+ // model's supported effort set (autodetected catalogs expose
1727
+ // models that predate newer tiers), rewrite output_config.effort
1728
+ // up front instead of re-paying the 400 round-trip. In-place value
1729
+ // mutation: field order (a fingerprint surface) is untouched.
1730
+ if (typeof r.model === 'string') {
1731
+ const supportedEfforts = effortSupportByModel.get(r.model);
1732
+ const oc = r.output_config;
1733
+ if (supportedEfforts && oc && typeof oc.effort === 'string' && !supportedEfforts.includes(oc.effort)) {
1734
+ oc.effort = bestSupportedEffort(supportedEfforts);
1735
+ }
1736
+ }
1685
1737
  finalBody = Buffer.from(JSON.stringify(r));
1686
1738
  }
1687
1739
  catch { /* not JSON, send as-is */ }
@@ -1959,6 +2011,67 @@ export async function startProxy(opts = {}) {
1959
2011
  }
1960
2012
  }
1961
2013
  }
2014
+ else if (upstream.status === 400 && parseEffortRejection(peekedBody) && finalBody) {
2015
+ // Effort-capability rejection — the model predates the requested
2016
+ // effort tier (e.g. opus-4-5 + a DARIO_EFFORT=max pin; surfaced by
2017
+ // the autodetected catalog). Clamp output_config.effort to the
2018
+ // strongest level the error says the model supports, retry once,
2019
+ // and cache the supported set per model so the up-front clamp
2020
+ // handles every later request without the round-trip.
2021
+ const rejection = parseEffortRejection(peekedBody);
2022
+ const clamped = bestSupportedEffort(rejection.supported);
2023
+ let retried = false;
2024
+ try {
2025
+ const rb = JSON.parse(finalBody.toString('utf8'));
2026
+ const wireModel = typeof rb.model === 'string' ? rb.model : '';
2027
+ const oc = rb.output_config;
2028
+ if (wireModel && oc && typeof oc.effort === 'string') {
2029
+ const firstRejection = !effortSupportByModel.has(wireModel);
2030
+ effortSupportByModel.set(wireModel, rejection.supported);
2031
+ if (verbose && firstRejection)
2032
+ console.log(`[dario] #${requestCount} effort '${rejection.rejected}' rejected by ${wireModel} — retrying with '${clamped}' (supported set cached per model)`);
2033
+ oc.effort = clamped; // in-place value mutation — field order untouched
2034
+ finalBody = Buffer.from(JSON.stringify(rb));
2035
+ const retry = await fetch(targetBase, {
2036
+ method: req.method ?? 'POST',
2037
+ headers: passthrough ? headers : orderHeadersForOutbound(headers),
2038
+ body: new Uint8Array(finalBody),
2039
+ signal: upstreamAbort.signal,
2040
+ });
2041
+ upstream = retry;
2042
+ peekedBody = null;
2043
+ retried = true;
2044
+ if (pool && poolAccount) {
2045
+ const retrySnapshot = parseRateLimits(upstream.headers);
2046
+ if (upstream.status === 429) {
2047
+ pool.markRejected(poolAccount.alias, retrySnapshot);
2048
+ }
2049
+ else {
2050
+ pool.updateRateLimits(poolAccount.alias, retrySnapshot);
2051
+ }
2052
+ }
2053
+ }
2054
+ }
2055
+ catch { /* body not JSON — forward the original 400 below */ }
2056
+ if (!retried) {
2057
+ // Couldn't rebuild the body (no output_config.effort / not JSON)
2058
+ // — the upstream body is already consumed, so forward it here;
2059
+ // the chain's terminal 400 branch won't run for us.
2060
+ const responseHeaders = {
2061
+ 'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
2062
+ 'Access-Control-Allow-Origin': corsOrigin,
2063
+ ...SECURITY_HEADERS,
2064
+ };
2065
+ for (const [key, value] of upstream.headers.entries()) {
2066
+ if (key === 'request-id')
2067
+ responseHeaders[key] = value;
2068
+ }
2069
+ requestCount++;
2070
+ res.writeHead(400, responseHeaders);
2071
+ res.end(peekedBody);
2072
+ return;
2073
+ }
2074
+ }
1962
2075
  else if (isLongContextError) {
1963
2076
  // Cache the rejection so future requests on this account skip
1964
2077
  // context-1m up front instead of re-paying the 400/429 round-trip.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "4.8.57",
3
+ "version": "4.8.59",
4
4
  "description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
5
5
  "type": "module",
6
6
  "bin": {