@askalf/dario 4.8.40 → 4.8.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cc-template.d.ts +17 -0
- package/dist/cc-template.js +43 -0
- package/dist/proxy.js +10 -1
- package/package.json +1 -1
package/dist/cc-template.d.ts
CHANGED
|
@@ -323,6 +323,23 @@ export declare function resolveEffort(flag: EffortValue | undefined, clientBody:
|
|
|
323
323
|
* — never a broken request.
|
|
324
324
|
*/
|
|
325
325
|
export declare function supportsAdaptiveThinking(modelId: string): boolean;
|
|
326
|
+
/**
|
|
327
|
+
* Place CC-style prompt-cache breakpoints on the tools array and the
|
|
328
|
+
* conversation. The system prompt is already cached at build time (2 system
|
|
329
|
+
* breakpoints); this adds the last tool + a single rolling breakpoint on the
|
|
330
|
+
* last message — total 4, the Anthropic max, mirroring Claude Code.
|
|
331
|
+
*
|
|
332
|
+
* Why: dario previously cached ONLY the system prompt and stripped every
|
|
333
|
+
* message breakpoint, so the tools schema (10-20KB) and the entire growing
|
|
334
|
+
* conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
|
|
335
|
+
* vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
|
|
336
|
+
* exactly why long agentic sessions hit a wall through dario that real CC
|
|
337
|
+
* sails through. CC genuinely caches tools + conversation, so NOT caching them
|
|
338
|
+
* was itself a wire divergence from CC. Exported for unit testing.
|
|
339
|
+
*/
|
|
340
|
+
export declare function applyCcPromptCaching(ccRequest: Record<string, unknown>, cacheControl: {
|
|
341
|
+
type: 'ephemeral';
|
|
342
|
+
}): void;
|
|
326
343
|
export declare function buildCCRequest(clientBody: Record<string, unknown>, billingTag: string, cacheControl: {
|
|
327
344
|
type: 'ephemeral';
|
|
328
345
|
}, identity: {
|
package/dist/cc-template.js
CHANGED
|
@@ -1053,6 +1053,49 @@ export function supportsAdaptiveThinking(modelId) {
|
|
|
1053
1053
|
return true;
|
|
1054
1054
|
return false;
|
|
1055
1055
|
}
|
|
1056
|
+
/**
|
|
1057
|
+
* Place CC-style prompt-cache breakpoints on the tools array and the
|
|
1058
|
+
* conversation. The system prompt is already cached at build time (2 system
|
|
1059
|
+
* breakpoints); this adds the last tool + a single rolling breakpoint on the
|
|
1060
|
+
* last message — total 4, the Anthropic max, mirroring Claude Code.
|
|
1061
|
+
*
|
|
1062
|
+
* Why: dario previously cached ONLY the system prompt and stripped every
|
|
1063
|
+
* message breakpoint, so the tools schema (10-20KB) and the entire growing
|
|
1064
|
+
* conversation re-billed as FRESH input every turn. Fleet cache-read ran ~1.9%
|
|
1065
|
+
* vs CC's ~70-90%, draining the Max 5h/7d token window 10-50x faster — which is
|
|
1066
|
+
* exactly why long agentic sessions hit a wall through dario that real CC
|
|
1067
|
+
* sails through. CC genuinely caches tools + conversation, so NOT caching them
|
|
1068
|
+
* was itself a wire divergence from CC. Exported for unit testing.
|
|
1069
|
+
*/
|
|
1070
|
+
export function applyCcPromptCaching(ccRequest, cacheControl) {
|
|
1071
|
+
// Tools — clone (CC_TOOL_DEFINITIONS is a shared module constant), strip any
|
|
1072
|
+
// stray breakpoints, cache the LAST tool (caches the whole tools prefix).
|
|
1073
|
+
const tools = ccRequest.tools;
|
|
1074
|
+
if (Array.isArray(tools) && tools.length > 0) {
|
|
1075
|
+
const cloned = tools.map((t) => {
|
|
1076
|
+
const copy = { ...t };
|
|
1077
|
+
delete copy.cache_control;
|
|
1078
|
+
return copy;
|
|
1079
|
+
});
|
|
1080
|
+
cloned[cloned.length - 1] = { ...cloned[cloned.length - 1], cache_control: cacheControl };
|
|
1081
|
+
ccRequest.tools = cloned;
|
|
1082
|
+
}
|
|
1083
|
+
// Conversation — cache up to and including the last message so the NEXT turn
|
|
1084
|
+
// reads the whole prefix from cache. Client breakpoints were already stripped
|
|
1085
|
+
// upstream; this is the single rolling breakpoint CC uses.
|
|
1086
|
+
const msgs = ccRequest.messages;
|
|
1087
|
+
if (Array.isArray(msgs) && msgs.length > 0) {
|
|
1088
|
+
const last = msgs[msgs.length - 1];
|
|
1089
|
+
// Only block-array content gets a breakpoint. String content (some SDK
|
|
1090
|
+
// clients) is left untouched — wrapping it would change the wire shape, and
|
|
1091
|
+
// a bare string user turn is tiny anyway, so system+tools caching is the
|
|
1092
|
+
// win. Real CC / agentic sessions use block arrays, which DO get cached.
|
|
1093
|
+
if (Array.isArray(last.content) && last.content.length > 0) {
|
|
1094
|
+
const blocks = last.content;
|
|
1095
|
+
blocks[blocks.length - 1] = { ...blocks[blocks.length - 1], cache_control: cacheControl };
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1056
1099
|
export function buildCCRequest(clientBody, billingTag, cacheControl, identity, opts = {}) {
|
|
1057
1100
|
const model = clientBody.model || 'claude-sonnet-4-6';
|
|
1058
1101
|
const isHaiku = model.toLowerCase().includes('haiku');
|
package/dist/proxy.js
CHANGED
|
@@ -7,7 +7,7 @@ import { homedir } from 'node:os';
|
|
|
7
7
|
import { setDefaultResultOrder } from 'node:dns';
|
|
8
8
|
import { arch, platform } from 'node:process';
|
|
9
9
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
10
|
-
import { buildCCRequest, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
10
|
+
import { buildCCRequest, applyCcPromptCaching, parseEffortSuffix, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
11
11
|
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
12
12
|
import { AccountPool, computeStickyKey, parseRateLimits, modelFamily, isInAuthCooldown, authCooldownMs } from './pool.js';
|
|
13
13
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
@@ -1467,6 +1467,15 @@ export async function startProxy(opts = {}) {
|
|
|
1467
1467
|
skipFields,
|
|
1468
1468
|
honorClientThinking: opts.honorClientThinking ?? false,
|
|
1469
1469
|
});
|
|
1470
|
+
// Prompt-cache the tools + conversation prefix (the system prompt
|
|
1471
|
+
// is already cached in ccBody's system blocks). Mirrors CC's cache
|
|
1472
|
+
// breakpoints so a long session doesn't re-bill them as fresh input
|
|
1473
|
+
// every turn and burn the Max 5h/7d window — the cause of the
|
|
1474
|
+
// "sessions wall in minutes through dario but not CC" report.
|
|
1475
|
+
// Opt-out: DARIO_SKIP_FIELDS=prompt_cache.
|
|
1476
|
+
if (!skipFields?.has('prompt_cache')) {
|
|
1477
|
+
applyCcPromptCaching(ccBody, CACHE_EPHEMERAL);
|
|
1478
|
+
}
|
|
1470
1479
|
detectedClientForLog = detectedClient;
|
|
1471
1480
|
preserveToolsEffective = Boolean(opts.preserveTools)
|
|
1472
1481
|
|| (Boolean(detectedClient) && !opts.hybridTools && !opts.mergeTools);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.41",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|