@askalf/dario 4.8.39 → 4.8.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cc-template.d.ts +17 -0
- package/dist/cc-template.js +43 -0
- package/dist/proxy.d.ts +17 -0
- package/dist/proxy.js +34 -3
- package/package.json +1 -1
package/dist/cc-template.d.ts
CHANGED
|
@@ -323,6 +323,23 @@ export declare function resolveEffort(flag: EffortValue | undefined, clientBody:
|
|
|
323
323
|
* — never a broken request.
|
|
324
324
|
*/
|
|
325
325
|
export declare function supportsAdaptiveThinking(modelId: string): boolean;
|
|
326
|
+
/**
|
|
327
|
+
* Place CC-style prompt-cache breakpoints on the tools array and the
|
|
328
|
+
* conversation. The system prompt is already cached at build time (2 system
|
|
329
|
+
* breakpoints); this adds the last tool + a single rolling breakpoint on the
|
|
330
|
+
* last message — total 4, the Anthropic max, mirroring Claude Code.
|
|
331
|
+
*
|
|
332
|
+
* Why: dario previously cached ONLY the system prompt and stripped every
|
|
333
|
+
* message breakpoint, so the tools schema (10-20KB) and the entire growing
|
|
334
|
+
* conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
|
|
335
|
+
* vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
|
|
336
|
+
* exactly why long agentic sessions hit a wall through dario that real CC
|
|
337
|
+
* sails through. CC genuinely caches tools + conversation, so NOT caching them
|
|
338
|
+
* was itself a wire divergence from CC. Exported for unit testing.
|
|
339
|
+
*/
|
|
340
|
+
export declare function applyCcPromptCaching(ccRequest: Record<string, unknown>, cacheControl: {
|
|
341
|
+
type: 'ephemeral';
|
|
342
|
+
}): void;
|
|
326
343
|
export declare function buildCCRequest(clientBody: Record<string, unknown>, billingTag: string, cacheControl: {
|
|
327
344
|
type: 'ephemeral';
|
|
328
345
|
}, identity: {
|
package/dist/cc-template.js
CHANGED
|
@@ -1053,6 +1053,49 @@ export function supportsAdaptiveThinking(modelId) {
|
|
|
1053
1053
|
return true;
|
|
1054
1054
|
return false;
|
|
1055
1055
|
}
|
|
1056
|
+
/**
|
|
1057
|
+
* Place CC-style prompt-cache breakpoints on the tools array and the
|
|
1058
|
+
* conversation. The system prompt is already cached at build time (2 system
|
|
1059
|
+
* breakpoints); this adds the last tool + a single rolling breakpoint on the
|
|
1060
|
+
* last message — total 4, the Anthropic max, mirroring Claude Code.
|
|
1061
|
+
*
|
|
1062
|
+
* Why: dario previously cached ONLY the system prompt and stripped every
|
|
1063
|
+
* message breakpoint, so the tools schema (10-20KB) and the entire growing
|
|
1064
|
+
* conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
|
|
1065
|
+
* vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
|
|
1066
|
+
* exactly why long agentic sessions hit a wall through dario that real CC
|
|
1067
|
+
* sails through. CC genuinely caches tools + conversation, so NOT caching them
|
|
1068
|
+
* was itself a wire divergence from CC. Exported for unit testing.
|
|
1069
|
+
*/
|
|
1070
|
+
export function applyCcPromptCaching(ccRequest, cacheControl) {
|
|
1071
|
+
// Tools — clone (CC_TOOL_DEFINITIONS is a shared module constant), strip any
|
|
1072
|
+
// stray breakpoints, cache the LAST tool (caches the whole tools prefix).
|
|
1073
|
+
const tools = ccRequest.tools;
|
|
1074
|
+
if (Array.isArray(tools) && tools.length > 0) {
|
|
1075
|
+
const cloned = tools.map((t) => {
|
|
1076
|
+
const copy = { ...t };
|
|
1077
|
+
delete copy.cache_control;
|
|
1078
|
+
return copy;
|
|
1079
|
+
});
|
|
1080
|
+
cloned[cloned.length - 1] = { ...cloned[cloned.length - 1], cache_control: cacheControl };
|
|
1081
|
+
ccRequest.tools = cloned;
|
|
1082
|
+
}
|
|
1083
|
+
// Conversation — cache up to and including the last message so the NEXT turn
|
|
1084
|
+
// reads the whole prefix from cache. Client breakpoints were already stripped
|
|
1085
|
+
// upstream; this is the single rolling breakpoint CC uses.
|
|
1086
|
+
const msgs = ccRequest.messages;
|
|
1087
|
+
if (Array.isArray(msgs) && msgs.length > 0) {
|
|
1088
|
+
const last = msgs[msgs.length - 1];
|
|
1089
|
+
// Only block-array content gets a breakpoint. String content (some SDK
|
|
1090
|
+
// clients) is left untouched — wrapping it would change the wire shape, and
|
|
1091
|
+
// a bare string user turn is tiny anyway, so system+tools caching is the
|
|
1092
|
+
// win. Real CC / agentic sessions use block arrays, which DO get cached.
|
|
1093
|
+
if (Array.isArray(last.content) && last.content.length > 0) {
|
|
1094
|
+
const blocks = last.content;
|
|
1095
|
+
blocks[blocks.length - 1] = { ...blocks[blocks.length - 1], cache_control: cacheControl };
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1056
1099
|
export function buildCCRequest(clientBody, billingTag, cacheControl, identity, opts = {}) {
|
|
1057
1100
|
const model = clientBody.model || 'claude-sonnet-4-6';
|
|
1058
1101
|
const isHaiku = model.toLowerCase().includes('haiku');
|
package/dist/proxy.d.ts
CHANGED
|
@@ -193,6 +193,17 @@ interface ProxyOptions {
|
|
|
193
193
|
* Sourced from `--system-prompt=<value>` or DARIO_SYSTEM_PROMPT.
|
|
194
194
|
*/
|
|
195
195
|
systemPrompt?: string;
|
|
196
|
+
/**
|
|
197
|
+
* Upstream auth override: forward to api.anthropic.com using `x-api-key:
|
|
198
|
+
* <this>` (the per-token API pool) instead of the Pro/Max OAuth bearer.
|
|
199
|
+
* When set, OAuth/getAccessToken and the account pool are bypassed entirely
|
|
200
|
+
* — dario becomes a thin per-token Anthropic proxy. Default (unset) keeps
|
|
201
|
+
* the subscription-OAuth behavior. Used by the self-hosted compat workflow
|
|
202
|
+
* so it can route the suite THROUGH dario without tripping the subscription
|
|
203
|
+
* pool's ~3/min cap. Sourced from ANTHROPIC_UPSTREAM_API_KEY (env-only — never
|
|
204
|
+
* a CLI flag, so the key never lands in `ps`/argv).
|
|
205
|
+
*/
|
|
206
|
+
upstreamApiKey?: string;
|
|
196
207
|
/**
|
|
197
208
|
* Overage-guard — halt the proxy on the first response carrying
|
|
198
209
|
* `representative-claim: overage`. Subscribers should never see a
|
|
@@ -253,5 +264,11 @@ export declare function authenticateRequest(headers: IncomingMessage['headers'],
|
|
|
253
264
|
* user's real credential for some other provider. Pure over inputs (dario#97).
|
|
254
265
|
*/
|
|
255
266
|
export declare function describeAuthReject(headers: IncomingMessage['headers']): string;
|
|
267
|
+
/**
|
|
268
|
+
* Build the upstream auth header for the request to api.anthropic.com.
|
|
269
|
+
* `upstreamApiKey` set → per-token API pool (`x-api-key`); otherwise the
|
|
270
|
+
* Pro/Max OAuth bearer. Pure + exported for unit testing.
|
|
271
|
+
*/
|
|
272
|
+
export declare function upstreamAuthHeaders(upstreamApiKey: string, accessToken: string): Record<string, string>;
|
|
256
273
|
export declare function startProxy(opts?: ProxyOptions): Promise<void>;
|
|
257
274
|
export {};
|
package/dist/proxy.js
CHANGED
|
@@ -7,7 +7,7 @@ import { homedir } from 'node:os';
|
|
|
7
7
|
import { setDefaultResultOrder } from 'node:dns';
|
|
8
8
|
import { arch, platform } from 'node:process';
|
|
9
9
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
10
|
-
import { buildCCRequest, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
10
|
+
import { buildCCRequest, applyCcPromptCaching, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
11
11
|
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
12
12
|
import { AccountPool, computeStickyKey, parseRateLimits, modelFamily, isInAuthCooldown, authCooldownMs } from './pool.js';
|
|
13
13
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
@@ -448,11 +448,27 @@ function enrich429(body, headers) {
|
|
|
448
448
|
return body;
|
|
449
449
|
}
|
|
450
450
|
}
|
|
451
|
+
/**
|
|
452
|
+
* Build the upstream auth header for the request to api.anthropic.com.
|
|
453
|
+
* `upstreamApiKey` set → per-token API pool (`x-api-key`); otherwise the
|
|
454
|
+
* Pro/Max OAuth bearer. Pure + exported for unit testing.
|
|
455
|
+
*/
|
|
456
|
+
export function upstreamAuthHeaders(upstreamApiKey, accessToken) {
|
|
457
|
+
return upstreamApiKey
|
|
458
|
+
? { 'x-api-key': upstreamApiKey }
|
|
459
|
+
: { 'Authorization': `Bearer ${accessToken}` };
|
|
460
|
+
}
|
|
451
461
|
export async function startProxy(opts = {}) {
|
|
452
462
|
const port = opts.port ?? DEFAULT_PORT;
|
|
453
463
|
const host = opts.host ?? process.env.DARIO_HOST ?? DEFAULT_HOST;
|
|
454
464
|
const verbose = opts.verbose ?? false;
|
|
455
465
|
const passthrough = opts.passthrough ?? false;
|
|
466
|
+
// Upstream auth override: a per-token API key forwards to the standard API
|
|
467
|
+
// pool via `x-api-key`, bypassing OAuth/Max + the account pool entirely.
|
|
468
|
+
// Env-only so the key never lands in `ps`/argv. Default (empty) = OAuth/Max.
|
|
469
|
+
const upstreamApiKey = (opts.upstreamApiKey ?? process.env.ANTHROPIC_UPSTREAM_API_KEY ?? '').trim();
|
|
470
|
+
if (upstreamApiKey)
|
|
471
|
+
console.error('[dario] upstream auth: per-token API key (x-api-key) — OAuth/Max + account pool bypassed');
|
|
456
472
|
// DNS result order — prefer IPv4 for the Anthropic upstream by default.
|
|
457
473
|
// api.anthropic.com publishes both A and AAAA records. In a container with
|
|
458
474
|
// no IPv6 egress (e.g. a default Docker bridge network), Node's `verbatim`
|
|
@@ -1218,7 +1234,13 @@ export async function startProxy(opts = {}) {
|
|
|
1218
1234
|
// requests, not within a single 429 retry.
|
|
1219
1235
|
let poolAccount = null;
|
|
1220
1236
|
let accessToken;
|
|
1221
|
-
if (
|
|
1237
|
+
if (upstreamApiKey) {
|
|
1238
|
+
// Per-token API-key mode: no OAuth, no pool. `poolAccount` stays null,
|
|
1239
|
+
// so every pool-failover retry below is skipped; the x-api-key is set
|
|
1240
|
+
// on the outbound headers instead of an Authorization bearer.
|
|
1241
|
+
accessToken = '';
|
|
1242
|
+
}
|
|
1243
|
+
else if (pool) {
|
|
1222
1244
|
poolAccount = pool.select();
|
|
1223
1245
|
if (!poolAccount) {
|
|
1224
1246
|
res.writeHead(503, JSON_HEADERS);
|
|
@@ -1445,6 +1467,15 @@ export async function startProxy(opts = {}) {
|
|
|
1445
1467
|
skipFields,
|
|
1446
1468
|
honorClientThinking: opts.honorClientThinking ?? false,
|
|
1447
1469
|
});
|
|
1470
|
+
// Prompt-cache the tools + conversation prefix (the system prompt
|
|
1471
|
+
// is already cached in ccBody's system blocks). Mirrors CC's cache
|
|
1472
|
+
// breakpoints so a long session doesn't re-bill them as fresh input
|
|
1473
|
+
// every turn and burn the Max 5h/7d window — the cause of the
|
|
1474
|
+
// "sessions wall in minutes through dario but not CC" report.
|
|
1475
|
+
// Opt-out: DARIO_SKIP_FIELDS=prompt_cache.
|
|
1476
|
+
if (!skipFields?.has('prompt_cache')) {
|
|
1477
|
+
applyCcPromptCaching(ccBody, CACHE_EPHEMERAL);
|
|
1478
|
+
}
|
|
1448
1479
|
detectedClientForLog = detectedClient;
|
|
1449
1480
|
preserveToolsEffective = Boolean(opts.preserveTools)
|
|
1450
1481
|
|| (Boolean(detectedClient) && !opts.hybridTools && !opts.mergeTools);
|
|
@@ -1612,7 +1643,7 @@ export async function startProxy(opts = {}) {
|
|
|
1612
1643
|
}
|
|
1613
1644
|
const headers = {
|
|
1614
1645
|
...staticHeaders,
|
|
1615
|
-
|
|
1646
|
+
...upstreamAuthHeaders(upstreamApiKey, accessToken),
|
|
1616
1647
|
'x-claude-code-session-id': outboundSessionId,
|
|
1617
1648
|
'anthropic-version': passthrough ? (req.headers['anthropic-version'] || '2023-06-01') : '2023-06-01',
|
|
1618
1649
|
'anthropic-beta': beta,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.41",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|