@askalf/dario 3.16.0 → 3.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -169
- package/dist/accounts.d.ts +2 -0
- package/dist/accounts.js +54 -4
- package/dist/cc-template-data.json +1 -0
- package/dist/cc-template.d.ts +3 -0
- package/dist/cc-template.js +95 -35
- package/dist/cli.js +19 -0
- package/dist/doctor.d.ts +43 -0
- package/dist/doctor.js +208 -0
- package/dist/live-fingerprint.d.ts +137 -0
- package/dist/live-fingerprint.js +375 -9
- package/dist/openai-backend.js +24 -3
- package/dist/proxy.js +108 -16
- package/package.json +2 -2
package/dist/proxy.js
CHANGED
|
@@ -6,7 +6,8 @@ import { join } from 'node:path';
|
|
|
6
6
|
import { homedir } from 'node:os';
|
|
7
7
|
import { arch, platform } from 'node:process';
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
|
-
import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound } from './cc-template.js';
|
|
9
|
+
import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
|
|
10
|
+
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
10
11
|
import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
|
|
11
12
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
12
13
|
import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
|
|
@@ -89,9 +90,18 @@ function extractFirstUserMessage(body) {
|
|
|
89
90
|
}
|
|
90
91
|
return '';
|
|
91
92
|
}
|
|
92
|
-
// Session ID
|
|
93
|
-
//
|
|
93
|
+
// Session ID behavior (single-account mode):
|
|
94
|
+
// v3.18 rotated per request — which was itself a fingerprint. Real CC
|
|
95
|
+
// rotates roughly once per conversation, not per call. A user who has
|
|
96
|
+
// distinct session-ids for every request looks nothing like a CC user.
|
|
97
|
+
//
|
|
98
|
+
// v3.19 keeps the id stable through a conversation window and rotates
|
|
99
|
+
// only after an idle gap long enough to credibly indicate a new
|
|
100
|
+
// conversation (SESSION_IDLE_ROTATE_MS). Pool mode still uses the
|
|
101
|
+
// per-account identity.sessionId (stable across the account's lifetime).
|
|
94
102
|
let SESSION_ID = randomUUID();
|
|
103
|
+
let SESSION_LAST_USED = 0;
|
|
104
|
+
const SESSION_IDLE_ROTATE_MS = 15 * 60 * 1000;
|
|
95
105
|
const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
|
|
96
106
|
// Claude Code device identity — required for Max plan billing classification.
|
|
97
107
|
// Without metadata.user_id, Anthropic classifies requests as third-party and
|
|
@@ -476,6 +486,18 @@ export async function startProxy(opts = {}) {
|
|
|
476
486
|
// Claude Code runs on Bun which reports v24.3.0 as Node compat version
|
|
477
487
|
'x-stainless-runtime-version': 'v24.3.0',
|
|
478
488
|
};
|
|
489
|
+
// Overlay captured header values from the live template (schema v2). This
|
|
490
|
+
// replaces the hardcoded stainless/user-agent constants with whatever CC
|
|
491
|
+
// actually emitted on the capture, so a CC release that nudges any of those
|
|
492
|
+
// values gets reflected automatically on the next template refresh.
|
|
493
|
+
// Excludes auth + body-framing + session-scoped keys by construction (see
|
|
494
|
+
// extractStaticHeaderValues in live-fingerprint.ts). No-op when the loaded
|
|
495
|
+
// template predates v2 or the bundled snapshot is in use.
|
|
496
|
+
if (!passthrough && CC_TEMPLATE.header_values) {
|
|
497
|
+
for (const [k, v] of Object.entries(CC_TEMPLATE.header_values)) {
|
|
498
|
+
staticHeaders[k] = v;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
479
501
|
let requestCount = 0;
|
|
480
502
|
const semaphore = new Semaphore(MAX_CONCURRENT);
|
|
481
503
|
// Cache context-1m beta availability. Set false once per account (or process
|
|
@@ -486,6 +508,14 @@ export async function startProxy(opts = {}) {
|
|
|
486
508
|
// retry loop was firing on every POST with hybrid-tools + OC.
|
|
487
509
|
const context1mUnavailable = new Set();
|
|
488
510
|
const ACCOUNT_KEY_SINGLE = '__default__';
|
|
511
|
+
// Beta flag set — sourced from the live template when the capture recorded
|
|
512
|
+
// one (schema v2+), else falls back to the v2.1.104 bundled default. Same
|
|
513
|
+
// fallback string shim/runtime.cjs uses (kept in sync so proxy and shim
|
|
514
|
+
// never diverge on the wire). Computed once per proxy because it's a
|
|
515
|
+
// function of the loaded template, not of the request.
|
|
516
|
+
const BETA_FALLBACK = 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
|
|
517
|
+
const betaBase = CC_TEMPLATE.anthropic_beta || BETA_FALLBACK;
|
|
518
|
+
const betaWithoutContext1m = betaBase.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
|
|
489
519
|
// Rate governor — minimum 500ms between requests. Fast enough for agents,
|
|
490
520
|
// slow enough to not look like a scripted flood of identical traffic.
|
|
491
521
|
let lastRequestTime = 0;
|
|
@@ -591,6 +621,19 @@ export async function startProxy(opts = {}) {
|
|
|
591
621
|
res.end(JSON.stringify({ error: 'malformed borrow envelope' }));
|
|
592
622
|
return;
|
|
593
623
|
}
|
|
624
|
+
// Envelope shape guard — envelope.request is `unknown` on the wire.
|
|
625
|
+
// We stringify it and forward to Anthropic under the lender's identity,
|
|
626
|
+
// so a borrower could otherwise waste the lender's rate-limit slot with
|
|
627
|
+
// a body Anthropic will reject. Minimum: must be a plain object with
|
|
628
|
+
// `model` (string) and `messages` (array). Anthropic validates the rest.
|
|
629
|
+
const br = envelope.request;
|
|
630
|
+
if (!br || typeof br !== 'object' || Array.isArray(br) ||
|
|
631
|
+
typeof br.model !== 'string' ||
|
|
632
|
+
!Array.isArray(br.messages)) {
|
|
633
|
+
res.writeHead(400, JSON_HEADERS);
|
|
634
|
+
res.end(JSON.stringify({ error: 'envelope.request must be an Anthropic /v1/messages body' }));
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
594
637
|
if (envelope.groupId !== groupLender.groupId) {
|
|
595
638
|
res.writeHead(403, JSON_HEADERS);
|
|
596
639
|
res.end(JSON.stringify({ error: 'unknown_group', expected: groupLender.groupId }));
|
|
@@ -926,15 +969,15 @@ export async function startProxy(opts = {}) {
|
|
|
926
969
|
beta += ',' + clientBeta;
|
|
927
970
|
}
|
|
928
971
|
else {
|
|
929
|
-
//
|
|
972
|
+
// Beta set sourced from the live template (schema v2). Bundled
|
|
973
|
+
// snapshots predating v3.19 leave anthropic_beta undefined, so fall
|
|
974
|
+
// back to the v2.1.104 flag set — matches shim/runtime.cjs's fallback.
|
|
930
975
|
// context-1m requires Extra Usage — if it 400s, we auto-retry without
|
|
931
976
|
// it, and cache the rejection so subsequent requests on this account
|
|
932
977
|
// skip context-1m entirely (dario#36).
|
|
933
978
|
const acctKey = poolAccount?.alias ?? ACCOUNT_KEY_SINGLE;
|
|
934
979
|
const skipContext1m = context1mUnavailable.has(acctKey);
|
|
935
|
-
beta = skipContext1m
|
|
936
|
-
? 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24'
|
|
937
|
-
: 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
|
|
980
|
+
beta = skipContext1m ? betaWithoutContext1m : betaBase;
|
|
938
981
|
if (clientBeta) {
|
|
939
982
|
const baseSet = new Set(beta.split(','));
|
|
940
983
|
const filtered = filterBillableBetas(clientBeta)
|
|
@@ -950,11 +993,18 @@ export async function startProxy(opts = {}) {
|
|
|
950
993
|
await new Promise(r => setTimeout(r, MIN_REQUEST_INTERVAL_MS - elapsed));
|
|
951
994
|
}
|
|
952
995
|
lastRequestTime = Date.now();
|
|
953
|
-
//
|
|
954
|
-
//
|
|
955
|
-
//
|
|
956
|
-
|
|
957
|
-
|
|
996
|
+
// Session ID: pool mode uses the per-account identity.sessionId (stable
|
|
997
|
+
// per account). Single-account mode keeps SESSION_ID stable through
|
|
998
|
+
// active conversations and rotates only after an idle gap that looks
|
|
999
|
+
// like a new conversation — matches CC's observed cadence (see note
|
|
1000
|
+
// at SESSION_ID declaration).
|
|
1001
|
+
if (!poolAccount) {
|
|
1002
|
+
const nowTs = Date.now();
|
|
1003
|
+
if (SESSION_LAST_USED === 0 || nowTs - SESSION_LAST_USED > SESSION_IDLE_ROTATE_MS) {
|
|
1004
|
+
SESSION_ID = randomUUID();
|
|
1005
|
+
}
|
|
1006
|
+
SESSION_LAST_USED = nowTs;
|
|
1007
|
+
}
|
|
958
1008
|
const outboundSessionId = poolAccount ? poolAccount.identity.sessionId : SESSION_ID;
|
|
959
1009
|
const headers = {
|
|
960
1010
|
...staticHeaders,
|
|
@@ -1044,7 +1094,13 @@ export async function startProxy(opts = {}) {
|
|
|
1044
1094
|
context1mUnavailable.add(acctKey);
|
|
1045
1095
|
if (verbose && firstRejection)
|
|
1046
1096
|
console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it (cached for session)`);
|
|
1047
|
-
|
|
1097
|
+
// Rebuild via array filter instead of string replace so the output
|
|
1098
|
+
// is byte-identical to a request that started without context-1m
|
|
1099
|
+
// (skipContext1m path above). A deterministic string-replace would
|
|
1100
|
+
// leave the retry indistinguishable on content but divergent on
|
|
1101
|
+
// whitespace/structure if betaBase ever gains non-context-1m tokens
|
|
1102
|
+
// at the same position — keep the two paths funneled through one filter.
|
|
1103
|
+
const reducedBeta = beta.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
|
|
1048
1104
|
const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
|
|
1049
1105
|
const retry = await fetch(targetBase, {
|
|
1050
1106
|
method: req.method ?? 'POST',
|
|
@@ -1282,9 +1338,26 @@ export async function startProxy(opts = {}) {
|
|
|
1282
1338
|
if (isOpenAI) {
|
|
1283
1339
|
// Translate Anthropic SSE → OpenAI SSE
|
|
1284
1340
|
buffer += decoder.decode(value, { stream: true });
|
|
1285
|
-
//
|
|
1341
|
+
// Reject oversized SSE lines instead of silently truncating.
|
|
1342
|
+
// Truncation hid protocol bugs (a runaway upstream event would
|
|
1343
|
+
// stream indefinitely with the tail rewritten each chunk) and
|
|
1344
|
+
// guaranteed a malformed JSON parse at the client. Since we've
|
|
1345
|
+
// already sent 200 and an SSE content-type, the cleanest exit
|
|
1346
|
+
// is an error event in OpenAI shape + [DONE] sentinel + abort.
|
|
1286
1347
|
if (buffer.length > MAX_LINE_LENGTH) {
|
|
1287
|
-
|
|
1348
|
+
if (verbose)
|
|
1349
|
+
console.warn(`[dario] #${requestCount} SSE line exceeded ${MAX_LINE_LENGTH}B — aborting stream`);
|
|
1350
|
+
const errPayload = JSON.stringify({
|
|
1351
|
+
error: {
|
|
1352
|
+
message: `Upstream SSE line exceeded ${MAX_LINE_LENGTH} bytes`,
|
|
1353
|
+
type: 'upstream_protocol_error',
|
|
1354
|
+
},
|
|
1355
|
+
});
|
|
1356
|
+
res.write(`data: ${errPayload}\n\n`);
|
|
1357
|
+
res.write('data: [DONE]\n\n');
|
|
1358
|
+
upstreamAbortReason = 'sse_overflow';
|
|
1359
|
+
upstreamAbort.abort();
|
|
1360
|
+
break;
|
|
1288
1361
|
}
|
|
1289
1362
|
const lines = buffer.split('\n');
|
|
1290
1363
|
buffer = lines.pop() ?? '';
|
|
@@ -1420,11 +1493,30 @@ export async function startProxy(opts = {}) {
|
|
|
1420
1493
|
}
|
|
1421
1494
|
process.exit(1);
|
|
1422
1495
|
});
|
|
1496
|
+
// One-line template summary so users can tell at a glance whether they
|
|
1497
|
+
// booted on a fresh live capture or a stale bundled fallback.
|
|
1498
|
+
console.log(`[dario] template: ${describeTemplate(CC_TEMPLATE)}`);
|
|
1499
|
+
// Drift check: compare captured CC version to the installed binary. If
|
|
1500
|
+
// they differ, force the background refresh to bypass TTL so the next
|
|
1501
|
+
// startup picks up the new capture. Drifted caches still serve the
|
|
1502
|
+
// current request — the shape is usually compatible — but we flag it.
|
|
1503
|
+
const drift = detectDrift(CC_TEMPLATE);
|
|
1504
|
+
if (drift.drifted) {
|
|
1505
|
+
console.log(`[dario] ⚠ template drift: ${drift.message}`);
|
|
1506
|
+
}
|
|
1507
|
+
// Compat check: is the installed CC inside the range this dario
|
|
1508
|
+
// release has been tested against? Only log when non-OK so the happy
|
|
1509
|
+
// path stays quiet. `unknown` (no CC on PATH) is also quiet — bundled
|
|
1510
|
+
// template will serve.
|
|
1511
|
+
const compat = checkCCCompat();
|
|
1512
|
+
if (compat.status === 'below-min' || compat.status === 'untested-above') {
|
|
1513
|
+
console.log(`[dario] ⚠ CC compat: ${compat.message}`);
|
|
1514
|
+
}
|
|
1423
1515
|
// Kick off a live fingerprint refresh in the background. Re-captures the
|
|
1424
1516
|
// user's own CC binary request shape and updates ~/.dario/cc-template.live.json
|
|
1425
1517
|
// for the next startup. No-op if CC isn't installed or the cache is fresh.
|
|
1426
1518
|
// Never blocks proxy startup; never throws.
|
|
1427
|
-
void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false }).catch(() => { }));
|
|
1519
|
+
void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false, force: drift.drifted }).catch(() => { }));
|
|
1428
1520
|
server.listen(port, host, () => {
|
|
1429
1521
|
const modeLine = passthrough
|
|
1430
1522
|
? 'Mode: passthrough (OAuth swap only, no injection)'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.19.1",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
|
|
24
|
-
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs",
|
|
24
|
+
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs",
|
|
25
25
|
"audit": "npm audit --production --audit-level=high",
|
|
26
26
|
"prepublishOnly": "npm run build",
|
|
27
27
|
"start": "node dist/cli.js",
|