@askalf/dario 3.16.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/proxy.js CHANGED
@@ -6,7 +6,8 @@ import { join } from 'node:path';
6
6
  import { homedir } from 'node:os';
7
7
  import { arch, platform } from 'node:process';
8
8
  import { getAccessToken, getStatus } from './oauth.js';
9
- import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound } from './cc-template.js';
9
+ import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, orderHeadersForOutbound, CC_TEMPLATE } from './cc-template.js';
10
+ import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
10
11
  import { AccountPool, computeStickyKey, parseRateLimits } from './pool.js';
11
12
  import { Analytics, billingBucketFromClaim } from './analytics.js';
12
13
  import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
@@ -89,9 +90,18 @@ function extractFirstUserMessage(body) {
89
90
  }
90
91
  return '';
91
92
  }
92
- // Session ID rotates per request — fresh UUID per invocation.
93
- // A persistent session ID across many requests is a behavioral fingerprint.
93
+ // Session ID behavior (single-account mode):
94
+ // v3.18 rotated per request which was itself a fingerprint. Real CC
95
+ // rotates roughly once per conversation, not per call. A user who has
96
+ // distinct session-ids for every request looks nothing like a CC user.
97
+ //
98
+ // v3.19 keeps the id stable through a conversation window and rotates
99
+ // only after an idle gap long enough to credibly indicate a new
100
+ // conversation (SESSION_IDLE_ROTATE_MS). Pool mode still uses the
101
+ // per-account identity.sessionId (stable across the account's lifetime).
94
102
  let SESSION_ID = randomUUID();
103
+ let SESSION_LAST_USED = 0;
104
+ const SESSION_IDLE_ROTATE_MS = 15 * 60 * 1000;
95
105
  const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
96
106
  // Claude Code device identity — required for Max plan billing classification.
97
107
  // Without metadata.user_id, Anthropic classifies requests as third-party and
@@ -476,6 +486,18 @@ export async function startProxy(opts = {}) {
476
486
  // Claude Code runs on Bun which reports v24.3.0 as Node compat version
477
487
  'x-stainless-runtime-version': 'v24.3.0',
478
488
  };
489
+ // Overlay captured header values from the live template (schema v2). This
490
+ // replaces the hardcoded stainless/user-agent constants with whatever CC
491
+ // actually emitted on the capture, so a CC release that nudges any of those
492
+ // values gets reflected automatically on the next template refresh.
493
+ // Excludes auth + body-framing + session-scoped keys by construction (see
494
+ // extractStaticHeaderValues in live-fingerprint.ts). No-op when the loaded
495
+ // template predates v2 or the bundled snapshot is in use.
496
+ if (!passthrough && CC_TEMPLATE.header_values) {
497
+ for (const [k, v] of Object.entries(CC_TEMPLATE.header_values)) {
498
+ staticHeaders[k] = v;
499
+ }
500
+ }
479
501
  let requestCount = 0;
480
502
  const semaphore = new Semaphore(MAX_CONCURRENT);
481
503
  // Cache context-1m beta availability. Set false once per account (or process
@@ -486,6 +508,14 @@ export async function startProxy(opts = {}) {
486
508
  // retry loop was firing on every POST with hybrid-tools + OC.
487
509
  const context1mUnavailable = new Set();
488
510
  const ACCOUNT_KEY_SINGLE = '__default__';
511
+ // Beta flag set — sourced from the live template when the capture recorded
512
+ // one (schema v2+), else falls back to the v2.1.104 bundled default. Same
513
+ // fallback string shim/runtime.cjs uses (kept in sync so proxy and shim
514
+ // never diverge on the wire). Computed once per proxy because it's a
515
+ // function of the loaded template, not of the request.
516
+ const BETA_FALLBACK = 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
517
+ const betaBase = CC_TEMPLATE.anthropic_beta || BETA_FALLBACK;
518
+ const betaWithoutContext1m = betaBase.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
489
519
  // Rate governor — minimum 500ms between requests. Fast enough for agents,
490
520
  // slow enough to not look like a scripted flood of identical traffic.
491
521
  let lastRequestTime = 0;
@@ -591,6 +621,19 @@ export async function startProxy(opts = {}) {
591
621
  res.end(JSON.stringify({ error: 'malformed borrow envelope' }));
592
622
  return;
593
623
  }
624
+ // Envelope shape guard — envelope.request is `unknown` on the wire.
625
+ // We stringify it and forward to Anthropic under the lender's identity,
626
+ // so a borrower could otherwise waste the lender's rate-limit slot with
627
+ // a body Anthropic will reject. Minimum: must be a plain object with
628
+ // `model` (string) and `messages` (array). Anthropic validates the rest.
629
+ const br = envelope.request;
630
+ if (!br || typeof br !== 'object' || Array.isArray(br) ||
631
+ typeof br.model !== 'string' ||
632
+ !Array.isArray(br.messages)) {
633
+ res.writeHead(400, JSON_HEADERS);
634
+ res.end(JSON.stringify({ error: 'envelope.request must be an Anthropic /v1/messages body' }));
635
+ return;
636
+ }
594
637
  if (envelope.groupId !== groupLender.groupId) {
595
638
  res.writeHead(403, JSON_HEADERS);
596
639
  res.end(JSON.stringify({ error: 'unknown_group', expected: groupLender.groupId }));
@@ -926,15 +969,15 @@ export async function startProxy(opts = {}) {
926
969
  beta += ',' + clientBeta;
927
970
  }
928
971
  else {
929
- // CC v2.1.104 beta set 8 flags in the order Claude Code sends them.
972
+ // Beta set sourced from the live template (schema v2). Bundled
973
+ // snapshots predating v3.19 leave anthropic_beta undefined, so fall
974
+ // back to the v2.1.104 flag set — matches shim/runtime.cjs's fallback.
930
975
  // context-1m requires Extra Usage — if it 400s, we auto-retry without
931
976
  // it, and cache the rejection so subsequent requests on this account
932
977
  // skip context-1m entirely (dario#36).
933
978
  const acctKey = poolAccount?.alias ?? ACCOUNT_KEY_SINGLE;
934
979
  const skipContext1m = context1mUnavailable.has(acctKey);
935
- beta = skipContext1m
936
- ? 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24'
937
- : 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
980
+ beta = skipContext1m ? betaWithoutContext1m : betaBase;
938
981
  if (clientBeta) {
939
982
  const baseSet = new Set(beta.split(','));
940
983
  const filtered = filterBillableBetas(clientBeta)
@@ -950,11 +993,18 @@ export async function startProxy(opts = {}) {
950
993
  await new Promise(r => setTimeout(r, MIN_REQUEST_INTERVAL_MS - elapsed));
951
994
  }
952
995
  lastRequestTime = Date.now();
953
- // Rotate session ID per request fresh UUID avoids persistent-session fingerprinting.
954
- // Pool mode uses the per-account identity.sessionId which is stable across
955
- // a given account's lifetime; single-account mode rotates per request.
956
- if (!poolAccount)
957
- SESSION_ID = randomUUID();
996
+ // Session ID: pool mode uses the per-account identity.sessionId (stable
997
+ // per account). Single-account mode keeps SESSION_ID stable through
998
+ // active conversations and rotates only after an idle gap that looks
999
+ // like a new conversation — matches CC's observed cadence (see note
1000
+ // at SESSION_ID declaration).
1001
+ if (!poolAccount) {
1002
+ const nowTs = Date.now();
1003
+ if (SESSION_LAST_USED === 0 || nowTs - SESSION_LAST_USED > SESSION_IDLE_ROTATE_MS) {
1004
+ SESSION_ID = randomUUID();
1005
+ }
1006
+ SESSION_LAST_USED = nowTs;
1007
+ }
958
1008
  const outboundSessionId = poolAccount ? poolAccount.identity.sessionId : SESSION_ID;
959
1009
  const headers = {
960
1010
  ...staticHeaders,
@@ -1044,7 +1094,13 @@ export async function startProxy(opts = {}) {
1044
1094
  context1mUnavailable.add(acctKey);
1045
1095
  if (verbose && firstRejection)
1046
1096
  console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it (cached for session)`);
1047
- const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
1097
+ // Rebuild via array filter instead of string replace so the output
1098
+ // is byte-identical to a request that started without context-1m
1099
+ // (skipContext1m path above). A deterministic string-replace would
1100
+ // leave the retry indistinguishable on content but divergent on
1101
+ // whitespace/structure if betaBase ever gains non-context-1m tokens
1102
+ // at the same position — keep the two paths funneled through one filter.
1103
+ const reducedBeta = beta.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
1048
1104
  const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
1049
1105
  const retry = await fetch(targetBase, {
1050
1106
  method: req.method ?? 'POST',
@@ -1282,9 +1338,26 @@ export async function startProxy(opts = {}) {
1282
1338
  if (isOpenAI) {
1283
1339
  // Translate Anthropic SSE → OpenAI SSE
1284
1340
  buffer += decoder.decode(value, { stream: true });
1285
- // Guard against unbounded buffer growth
1341
+ // Reject oversized SSE lines instead of silently truncating.
1342
+ // Truncation hid protocol bugs (a runaway upstream event would
1343
+ // stream indefinitely with the tail rewritten each chunk) and
1344
+ // guaranteed a malformed JSON parse at the client. Since we've
1345
+ // already sent 200 and an SSE content-type, the cleanest exit
1346
+ // is an error event in OpenAI shape + [DONE] sentinel + abort.
1286
1347
  if (buffer.length > MAX_LINE_LENGTH) {
1287
- buffer = buffer.slice(-MAX_LINE_LENGTH);
1348
+ if (verbose)
1349
+ console.warn(`[dario] #${requestCount} SSE line exceeded ${MAX_LINE_LENGTH}B — aborting stream`);
1350
+ const errPayload = JSON.stringify({
1351
+ error: {
1352
+ message: `Upstream SSE line exceeded ${MAX_LINE_LENGTH} bytes`,
1353
+ type: 'upstream_protocol_error',
1354
+ },
1355
+ });
1356
+ res.write(`data: ${errPayload}\n\n`);
1357
+ res.write('data: [DONE]\n\n');
1358
+ upstreamAbortReason = 'sse_overflow';
1359
+ upstreamAbort.abort();
1360
+ break;
1288
1361
  }
1289
1362
  const lines = buffer.split('\n');
1290
1363
  buffer = lines.pop() ?? '';
@@ -1420,11 +1493,30 @@ export async function startProxy(opts = {}) {
1420
1493
  }
1421
1494
  process.exit(1);
1422
1495
  });
1496
+ // One-line template summary so users can tell at a glance whether they
1497
+ // booted on a fresh live capture or a stale bundled fallback.
1498
+ console.log(`[dario] template: ${describeTemplate(CC_TEMPLATE)}`);
1499
+ // Drift check: compare captured CC version to the installed binary. If
1500
+ // they differ, force the background refresh to bypass TTL so the next
1501
+ // startup picks up the new capture. Drifted caches still serve the
1502
+ // current request — the shape is usually compatible — but we flag it.
1503
+ const drift = detectDrift(CC_TEMPLATE);
1504
+ if (drift.drifted) {
1505
+ console.log(`[dario] ⚠ template drift: ${drift.message}`);
1506
+ }
1507
+ // Compat check: is the installed CC inside the range this dario
1508
+ // release has been tested against? Only log when non-OK so the happy
1509
+ // path stays quiet. `unknown` (no CC on PATH) is also quiet — bundled
1510
+ // template will serve.
1511
+ const compat = checkCCCompat();
1512
+ if (compat.status === 'below-min' || compat.status === 'untested-above') {
1513
+ console.log(`[dario] ⚠ CC compat: ${compat.message}`);
1514
+ }
1423
1515
  // Kick off a live fingerprint refresh in the background. Re-captures the
1424
1516
  // user's own CC binary request shape and updates ~/.dario/cc-template.live.json
1425
1517
  // for the next startup. No-op if CC isn't installed or the cache is fresh.
1426
1518
  // Never blocks proxy startup; never throws.
1427
- void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false }).catch(() => { }));
1519
+ void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false, force: drift.drifted }).catch(() => { }));
1428
1520
  server.listen(port, host, () => {
1429
1521
  const modeLine = passthrough
1430
1522
  ? 'Mode: passthrough (OAuth swap only, no injection)'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.16.0",
3
+ "version": "3.19.0",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "scripts": {
23
23
  "build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
24
- "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs",
24
+ "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs",
25
25
  "audit": "npm audit --production --audit-level=high",
26
26
  "prepublishOnly": "npm run build",
27
27
  "start": "node dist/cli.js",