@askalf/dario 4.8.39 → 4.8.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -323,6 +323,23 @@ export declare function resolveEffort(flag: EffortValue | undefined, clientBody:
323
323
  * — never a broken request.
324
324
  */
325
325
  export declare function supportsAdaptiveThinking(modelId: string): boolean;
326
+ /**
327
+ * Place CC-style prompt-cache breakpoints on the tools array and the
328
+ * conversation. The system prompt is already cached at build time (2 system
329
+ * breakpoints); this adds the last tool + a single rolling breakpoint on the
330
+ * last message — total 4, the Anthropic max, mirroring Claude Code.
331
+ *
332
+ * Why: dario previously cached ONLY the system prompt and stripped every
333
+ * message breakpoint, so the tools schema (10-20KB) and the entire growing
334
+ * conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
335
+ * vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
336
+ * exactly why long agentic sessions hit a wall through dario that real CC
337
+ * sails through. CC genuinely caches tools + conversation, so NOT caching them
338
+ * was itself a wire divergence from CC. Exported for unit testing.
339
+ */
340
+ export declare function applyCcPromptCaching(ccRequest: Record<string, unknown>, cacheControl: {
341
+ type: 'ephemeral';
342
+ }): void;
326
343
  export declare function buildCCRequest(clientBody: Record<string, unknown>, billingTag: string, cacheControl: {
327
344
  type: 'ephemeral';
328
345
  }, identity: {
@@ -1053,6 +1053,49 @@ export function supportsAdaptiveThinking(modelId) {
1053
1053
  return true;
1054
1054
  return false;
1055
1055
  }
1056
+ /**
1057
+ * Place CC-style prompt-cache breakpoints on the tools array and the
1058
+ * conversation. The system prompt is already cached at build time (2 system
1059
+ * breakpoints); this adds the last tool + a single rolling breakpoint on the
1060
+ * last message — total 4, the Anthropic max, mirroring Claude Code.
1061
+ *
1062
+ * Why: dario previously cached ONLY the system prompt and stripped every
1063
+ * message breakpoint, so the tools schema (10-20KB) and the entire growing
1064
+ * conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
1065
+ * vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
1066
+ * exactly why long agentic sessions hit a wall through dario that real CC
1067
+ * sails through. CC genuinely caches tools + conversation, so NOT caching them
1068
+ * was itself a wire divergence from CC. Exported for unit testing.
1069
+ */
1070
+ export function applyCcPromptCaching(ccRequest, cacheControl) {
1071
+ // Tools — clone (CC_TOOL_DEFINITIONS is a shared module constant), strip any
1072
+ // stray breakpoints, cache the LAST tool (caches the whole tools prefix).
1073
+ const tools = ccRequest.tools;
1074
+ if (Array.isArray(tools) && tools.length > 0) {
1075
+ const cloned = tools.map((t) => {
1076
+ const copy = { ...t };
1077
+ delete copy.cache_control;
1078
+ return copy;
1079
+ });
1080
+ cloned[cloned.length - 1] = { ...cloned[cloned.length - 1], cache_control: cacheControl };
1081
+ ccRequest.tools = cloned;
1082
+ }
1083
+ // Conversation — cache up to and including the last message so the NEXT turn
1084
+ // reads the whole prefix from cache. Client breakpoints were already stripped
1085
+ // upstream; this is the single rolling breakpoint CC uses.
1086
+ const msgs = ccRequest.messages;
1087
+ if (Array.isArray(msgs) && msgs.length > 0) {
1088
+ const last = msgs[msgs.length - 1];
1089
+ // Only block-array content gets a breakpoint. String content (some SDK
1090
+ // clients) is left untouched — wrapping it would change the wire shape, and
1091
+ // a bare string user turn is tiny anyway, so system+tools caching is the
1092
+ // win. Real CC / agentic sessions use block arrays, which DO get cached.
1093
+ if (Array.isArray(last.content) && last.content.length > 0) {
1094
+ const blocks = last.content;
1095
+ blocks[blocks.length - 1] = { ...blocks[blocks.length - 1], cache_control: cacheControl };
1096
+ }
1097
+ }
1098
+ }
1056
1099
  export function buildCCRequest(clientBody, billingTag, cacheControl, identity, opts = {}) {
1057
1100
  const model = clientBody.model || 'claude-sonnet-4-6';
1058
1101
  const isHaiku = model.toLowerCase().includes('haiku');
package/dist/proxy.d.ts CHANGED
@@ -193,6 +193,17 @@ interface ProxyOptions {
193
193
  * Sourced from `--system-prompt=<value>` or DARIO_SYSTEM_PROMPT.
194
194
  */
195
195
  systemPrompt?: string;
196
+ /**
197
+ * Upstream auth override: forward to api.anthropic.com using `x-api-key:
198
+ * <this>` (the per-token API pool) instead of the Pro/Max OAuth bearer.
199
+ * When set, OAuth/getAccessToken and the account pool are bypassed entirely
200
+ * — dario becomes a thin per-token Anthropic proxy. Default (unset) keeps
201
+ * the subscription-OAuth behavior. Used by the self-hosted compat workflow
202
+ * so it can route the suite THROUGH dario without tripping the subscription
203
+ * pool's ~3/min cap. Sourced from ANTHROPIC_UPSTREAM_API_KEY (env-only — never
204
+ * a CLI flag, so the key never lands in `ps`/argv).
205
+ */
206
+ upstreamApiKey?: string;
196
207
  /**
197
208
  * Overage-guard — halt the proxy on the first response carrying
198
209
  * `representative-claim: overage`. Subscribers should never see a
@@ -253,5 +264,11 @@ export declare function authenticateRequest(headers: IncomingMessage['headers'],
253
264
  * user's real credential for some other provider. Pure over inputs (dario#97).
254
265
  */
255
266
  export declare function describeAuthReject(headers: IncomingMessage['headers']): string;
267
+ /**
268
+ * Build the upstream auth header for the request to api.anthropic.com.
269
+ * `upstreamApiKey` set → per-token API pool (`x-api-key`); otherwise the
270
+ * Pro/Max OAuth bearer. Pure + exported for unit testing.
271
+ */
272
+ export declare function upstreamAuthHeaders(upstreamApiKey: string, accessToken: string): Record<string, string>;
256
273
  export declare function startProxy(opts?: ProxyOptions): Promise<void>;
257
274
  export {};
package/dist/proxy.js CHANGED
@@ -7,7 +7,7 @@ import { homedir } from 'node:os';
7
7
  import { setDefaultResultOrder } from 'node:dns';
8
8
  import { arch, platform } from 'node:process';
9
9
  import { getAccessToken, getStatus } from './oauth.js';
10
- import { buildCCRequest, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
10
+ import { buildCCRequest, applyCcPromptCaching, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
11
11
  import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
12
12
  import { AccountPool, computeStickyKey, parseRateLimits, modelFamily, isInAuthCooldown, authCooldownMs } from './pool.js';
13
13
  import { Analytics, billingBucketFromClaim } from './analytics.js';
@@ -448,11 +448,27 @@ function enrich429(body, headers) {
448
448
  return body;
449
449
  }
450
450
  }
451
+ /**
452
+ * Build the upstream auth header for the request to api.anthropic.com.
453
+ * `upstreamApiKey` set → per-token API pool (`x-api-key`); otherwise the
454
+ * Pro/Max OAuth bearer. Pure + exported for unit testing.
455
+ */
456
+ export function upstreamAuthHeaders(upstreamApiKey, accessToken) {
457
+ return upstreamApiKey
458
+ ? { 'x-api-key': upstreamApiKey }
459
+ : { 'Authorization': `Bearer ${accessToken}` };
460
+ }
451
461
  export async function startProxy(opts = {}) {
452
462
  const port = opts.port ?? DEFAULT_PORT;
453
463
  const host = opts.host ?? process.env.DARIO_HOST ?? DEFAULT_HOST;
454
464
  const verbose = opts.verbose ?? false;
455
465
  const passthrough = opts.passthrough ?? false;
466
+ // Upstream auth override: a per-token API key forwards to the standard API
467
+ // pool via `x-api-key`, bypassing OAuth/Max + the account pool entirely.
468
+ // Env-only so the key never lands in `ps`/argv. Default (empty) = OAuth/Max.
469
+ const upstreamApiKey = (opts.upstreamApiKey ?? process.env.ANTHROPIC_UPSTREAM_API_KEY ?? '').trim();
470
+ if (upstreamApiKey)
471
+ console.error('[dario] upstream auth: per-token API key (x-api-key) — OAuth/Max + account pool bypassed');
456
472
  // DNS result order — prefer IPv4 for the Anthropic upstream by default.
457
473
  // api.anthropic.com publishes both A and AAAA records. In a container with
458
474
  // no IPv6 egress (e.g. a default Docker bridge network), Node's `verbatim`
@@ -1218,7 +1234,13 @@ export async function startProxy(opts = {}) {
1218
1234
  // requests, not within a single 429 retry.
1219
1235
  let poolAccount = null;
1220
1236
  let accessToken;
1221
- if (pool) {
1237
+ if (upstreamApiKey) {
1238
+ // Per-token API-key mode: no OAuth, no pool. `poolAccount` stays null,
1239
+ // so every pool-failover retry below is skipped; the x-api-key is set
1240
+ // on the outbound headers instead of an Authorization bearer.
1241
+ accessToken = '';
1242
+ }
1243
+ else if (pool) {
1222
1244
  poolAccount = pool.select();
1223
1245
  if (!poolAccount) {
1224
1246
  res.writeHead(503, JSON_HEADERS);
@@ -1445,6 +1467,15 @@ export async function startProxy(opts = {}) {
1445
1467
  skipFields,
1446
1468
  honorClientThinking: opts.honorClientThinking ?? false,
1447
1469
  });
1470
+ // Prompt-cache the tools + conversation prefix (the system prompt
1471
+ // is already cached in ccBody's system blocks). Mirrors CC's cache
1472
+ // breakpoints so a long session doesn't re-bill them as fresh input
1473
+ // every turn and burn the Max 5h/7d window — the cause of the
1474
+ // "sessions wall in minutes through dario but not CC" report.
1475
+ // Opt-out: DARIO_SKIP_FIELDS=prompt_cache.
1476
+ if (!skipFields?.has('prompt_cache')) {
1477
+ applyCcPromptCaching(ccBody, CACHE_EPHEMERAL);
1478
+ }
1448
1479
  detectedClientForLog = detectedClient;
1449
1480
  preserveToolsEffective = Boolean(opts.preserveTools)
1450
1481
  || (Boolean(detectedClient) && !opts.hybridTools && !opts.mergeTools);
@@ -1612,7 +1643,7 @@ export async function startProxy(opts = {}) {
1612
1643
  }
1613
1644
  const headers = {
1614
1645
  ...staticHeaders,
1615
- 'Authorization': `Bearer ${accessToken}`,
1646
+ ...upstreamAuthHeaders(upstreamApiKey, accessToken),
1616
1647
  'x-claude-code-session-id': outboundSessionId,
1617
1648
  'anthropic-version': passthrough ? (req.headers['anthropic-version'] || '2023-06-01') : '2023-06-01',
1618
1649
  'anthropic-beta': beta,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "4.8.39",
3
+ "version": "4.8.41",
4
4
  "description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
5
5
  "type": "module",
6
6
  "bin": {