@askalf/dario 3.24.0 → 3.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -214,7 +214,13 @@ async function proxy() {
214
214
  // calc lives in src/pacing.ts; the flags just feed it.
215
215
  const pacingMinMs = parsePositiveIntFlag('--pace-min=');
216
216
  const pacingJitterMs = parsePositiveIntFlag('--pace-jitter=');
217
- await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs });
217
+ // --drain-on-close (v3.25, direction #5). When set, a client
218
+ // disconnect no longer aborts the upstream SSE — dario keeps
219
+ // draining the stream to EOF so Anthropic sees the CC-shaped
220
+ // read-to-completion pattern. Costs tokens (the response is fully
221
+ // generated even if nobody reads it), so it's opt-in.
222
+ const drainOnClose = args.includes('--drain-on-close') || undefined;
223
+ await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose });
218
224
  }
219
225
  function parsePositiveIntFlag(prefix) {
220
226
  const found = args.find(a => a.startsWith(prefix));
@@ -486,6 +492,14 @@ async function help() {
486
492
  Default: 0 (off). Set to e.g. 300 to hide
487
493
  the floor from long-run inter-arrival
488
494
  statistics. (v3.24)
495
+ --drain-on-close When the client disconnects mid-stream,
496
+ keep consuming the upstream SSE to EOF
497
+ so Anthropic sees the same read-to-
498
+ completion pattern native Claude Code
499
+ produces. Trades tokens (the response
500
+ is fully generated even if nobody reads
501
+ it) for fingerprint fidelity. Bounded by
502
+ the 5-minute upstream timeout. (v3.25)
489
503
  --port=PORT Port to listen on (default: 3456)
490
504
  --host=ADDRESS Address to bind to (default: 127.0.0.1)
491
505
  Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
package/dist/proxy.d.ts CHANGED
@@ -15,6 +15,7 @@ interface ProxyOptions {
15
15
  strictTls?: boolean;
16
16
  pacingMinMs?: number;
17
17
  pacingJitterMs?: number;
18
+ drainOnClose?: boolean;
18
19
  }
19
20
  export declare function sanitizeError(err: unknown): string;
20
21
  export declare function startProxy(opts?: ProxyOptions): Promise<void>;
package/dist/proxy.js CHANGED
@@ -584,6 +584,15 @@ export async function startProxy(opts = {}) {
584
584
  if (verbose) {
585
585
  console.log(`[dario] pacing: min=${pacingCfg.minGapMs}ms jitter=${pacingCfg.jitterMs}ms`);
586
586
  }
587
+ // Stream-consumption replay (v3.25, direction #5). When on, a client
588
+ // disconnect no longer aborts the upstream fetch — we keep consuming
589
+ // the SSE so Anthropic sees a CC-shaped read-to-EOF pattern. See
590
+ // src/stream-drain.ts for the rationale + tradeoff.
591
+ const { decideOnClientClose, resolveDrainOnClose } = await import('./stream-drain.js');
592
+ const drainOnClose = resolveDrainOnClose(opts.drainOnClose);
593
+ if (verbose) {
594
+ console.log(`[dario] drain-on-close: ${drainOnClose ? 'enabled' : 'disabled'}`);
595
+ }
587
596
  // Optional proxy authentication — pre-encode key buffer for performance
588
597
  const apiKey = process.env.DARIO_API_KEY;
589
598
  const apiKeyBuf = apiKey ? Buffer.from(apiKey) : null;
@@ -1116,11 +1125,15 @@ export async function startProxy(opts = {}) {
1116
1125
  'x-stainless-timeout': '600',
1117
1126
  };
1118
1127
  // Client-disconnect abort: if the client drops the connection before
1119
- // we've finished sending the response, we abort the upstream fetch so
1120
- // Anthropic stops generating (and billing) a response nobody will
1121
- // read. Also carries the 5-minute upstream timeout via the same
1122
- // controller, so a single signal covers both cancellation reasons.
1128
+ // we've finished sending the response, we default to aborting the
1129
+ // upstream fetch so Anthropic stops generating (and billing) a
1130
+ // response nobody will read. With `--drain-on-close` set, we
1131
+ // instead keep the reader spinning to consume the full SSE — see
1132
+ // src/stream-drain.ts for the fingerprint rationale. The 5-minute
1133
+ // upstream timeout shares the same controller, so a hung upstream
1134
+ // still gets cut off regardless of drain mode.
1123
1135
  const upstreamAbort = new AbortController();
1136
+ let clientDisconnected = false;
1124
1137
  upstreamTimeout = setTimeout(() => {
1125
1138
  if (!upstreamAbort.signal.aborted) {
1126
1139
  upstreamAbortReason = 'timeout';
@@ -1128,13 +1141,18 @@ export async function startProxy(opts = {}) {
1128
1141
  }
1129
1142
  }, UPSTREAM_TIMEOUT_MS);
1130
1143
  onClientClose = () => {
1131
- // 'close' fires on both normal teardown and client disconnect.
1132
- // We only want to abort if we haven't finished our response yet —
1133
- // normal teardown happens AFTER res.writableEnded becomes true.
1134
- if (!res.writableEnded && !upstreamAbort.signal.aborted) {
1144
+ const action = decideOnClientClose(res.writableEnded, upstreamAbort.signal.aborted, drainOnClose);
1145
+ if (action === 'abort') {
1135
1146
  upstreamAbortReason = 'client_closed';
1136
1147
  upstreamAbort.abort();
1137
1148
  }
1149
+ else if (action === 'drain') {
1150
+ clientDisconnected = true;
1151
+ if (verbose)
1152
+ console.log(`[dario] #${requestCount} client disconnected — draining upstream to EOF`);
1153
+ }
1154
+ // noop: either res is already ended (normal teardown) or upstream
1155
+ // is already aborted for another reason.
1138
1156
  };
1139
1157
  req.on('close', onClientClose);
1140
1158
  const startTime = Date.now();
@@ -1448,6 +1466,14 @@ export async function startProxy(opts = {}) {
1448
1466
  const streamMapper = ccToolMap && !isOpenAI
1449
1467
  ? createStreamingReverseMapper(ccToolMap, reqCtx)
1450
1468
  : null;
1469
+ // Gated writer — a no-op once the downstream client has gone away
1470
+ // in drain-on-close mode. The read loop keeps consuming so the
1471
+ // upstream sees a full-length read; writes to a closed socket are
1472
+ // suppressed to avoid EPIPE/warnings and pointless work.
1473
+ const writeToClient = (chunk) => {
1474
+ if (!clientDisconnected)
1475
+ res.write(chunk);
1476
+ };
1451
1477
  try {
1452
1478
  let buffer = '';
1453
1479
  const MAX_LINE_LENGTH = 1_000_000; // 1MB max per SSE line
@@ -1501,8 +1527,8 @@ export async function startProxy(opts = {}) {
1501
1527
  type: 'upstream_protocol_error',
1502
1528
  },
1503
1529
  });
1504
- res.write(`data: ${errPayload}\n\n`);
1505
- res.write('data: [DONE]\n\n');
1530
+ writeToClient(`data: ${errPayload}\n\n`);
1531
+ writeToClient('data: [DONE]\n\n');
1506
1532
  upstreamAbortReason = 'sse_overflow';
1507
1533
  upstreamAbort.abort();
1508
1534
  break;
@@ -1512,28 +1538,28 @@ export async function startProxy(opts = {}) {
1512
1538
  for (const line of lines) {
1513
1539
  const translated = translateStreamChunk(line);
1514
1540
  if (translated)
1515
- res.write(translated);
1541
+ writeToClient(translated);
1516
1542
  }
1517
1543
  }
1518
1544
  else if (streamMapper) {
1519
1545
  const out = streamMapper.feed(value);
1520
1546
  if (out.length > 0)
1521
- res.write(out);
1547
+ writeToClient(out);
1522
1548
  }
1523
1549
  else {
1524
- res.write(value);
1550
+ writeToClient(value);
1525
1551
  }
1526
1552
  }
1527
1553
  // Flush remaining buffer
1528
1554
  if (isOpenAI && buffer.trim()) {
1529
1555
  const translated = translateStreamChunk(buffer);
1530
1556
  if (translated)
1531
- res.write(translated);
1557
+ writeToClient(translated);
1532
1558
  }
1533
1559
  if (streamMapper) {
1534
1560
  const tail = streamMapper.end();
1535
1561
  if (tail.length > 0)
1536
- res.write(tail);
1562
+ writeToClient(tail);
1537
1563
  }
1538
1564
  }
1539
1565
  catch (err) {
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
3
+ *
4
+ * Native Claude Code, when it streams a response from `/v1/messages`, reads
5
+ * the SSE to its final event before closing the socket — even when the
6
+ * consumer logically already has enough. Third-party consumers routed
7
+ * through dario's proxy often abort mid-stream (close their request the
8
+ * instant they see the tool-use content block they wanted). Dario's
9
+ * default has been to propagate that abort upstream by triggering
10
+ * `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
11
+ * a billing standpoint (Anthropic stops generating, stops billing), but a
12
+ * fingerprint axis: "connection closed mid-stream" vs CC's "connection
13
+ * read to EOF" is visible on Anthropic's side.
14
+ *
15
+ * `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
16
+ * the downstream client disconnects, dario suppresses the upstream abort
17
+ * and keeps the reader loop spinning until the upstream emits its final
18
+ * event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
19
+ * linger on dead upstreams). Writes to the closed `res` are gated off;
20
+ * the reads and any accumulator state (analytics, tool-map) continue so
21
+ * the captured usage numbers are complete rather than truncated.
22
+ *
23
+ * This has a real cost — you pay tokens for a response your consumer
24
+ * isn't going to read — so it's deliberately opt-in. Users on an
25
+ * unmetered subscription who care more about fingerprint than wasted
26
+ * generation can flip it on globally.
27
+ *
28
+ * This module exposes the *decision* as a pure function so the test
29
+ * suite can exercise every branch without spinning up a socket. The
30
+ * proxy wires the decision into its existing `onClientClose` handler.
31
+ */
32
+ export type ClientCloseAction = 'abort' | 'drain' | 'noop';
33
+ /**
34
+ * Decide what `onClientClose` should do when the client's `req.on('close')`
35
+ * fires. Pure over its three inputs.
36
+ *
37
+ * `writableEnded` — `res.writableEnded` at the moment the handler
38
+ * runs. `true` means the response is already
39
+ * finished (the 'close' event is a normal
40
+ * teardown notification after res.end()) — no
41
+ * action needed.
42
+ * `upstreamAborted` — whether upstream has already been aborted for
43
+ * some other reason (timeout, overflow, pool
44
+ * failover). Don't double-abort.
45
+ * `drainOnClose` — the runtime-configured knob.
46
+ *
47
+ * Returns:
48
+ * `'noop'` — already finished / already aborted; handler should return.
49
+ * `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
50
+ * `'drain'` — leave upstream alive; gate off client writes; let the
51
+ * read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
52
+ */
53
+ export declare function decideOnClientClose(writableEnded: boolean, upstreamAborted: boolean, drainOnClose: boolean): ClientCloseAction;
54
+ /**
55
+ * Resolve the `drainOnClose` effective setting from explicit options +
56
+ * `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
57
+ * `'yes'` (case-insensitive). Anything else (including unset) is false.
58
+ * Explicit `true`/`false` on the options object always wins.
59
+ */
60
+ export declare function resolveDrainOnClose(explicit: boolean | undefined, env?: NodeJS.ProcessEnv): boolean;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
3
+ *
4
+ * Native Claude Code, when it streams a response from `/v1/messages`, reads
5
+ * the SSE to its final event before closing the socket — even when the
6
+ * consumer logically already has enough. Third-party consumers routed
7
+ * through dario's proxy often abort mid-stream (close their request the
8
+ * instant they see the tool-use content block they wanted). Dario's
9
+ * default has been to propagate that abort upstream by triggering
10
+ * `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
11
+ * a billing standpoint (Anthropic stops generating, stops billing), but a
12
+ * fingerprint axis: "connection closed mid-stream" vs CC's "connection
13
+ * read to EOF" is visible on Anthropic's side.
14
+ *
15
+ * `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
16
+ * the downstream client disconnects, dario suppresses the upstream abort
17
+ * and keeps the reader loop spinning until the upstream emits its final
18
+ * event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
19
+ * linger on dead upstreams). Writes to the closed `res` are gated off;
20
+ * the reads and any accumulator state (analytics, tool-map) continue so
21
+ * the captured usage numbers are complete rather than truncated.
22
+ *
23
+ * This has a real cost — you pay tokens for a response your consumer
24
+ * isn't going to read — so it's deliberately opt-in. Users on an
25
+ * unmetered subscription who care more about fingerprint than wasted
26
+ * generation can flip it on globally.
27
+ *
28
+ * This module exposes the *decision* as a pure function so the test
29
+ * suite can exercise every branch without spinning up a socket. The
30
+ * proxy wires the decision into its existing `onClientClose` handler.
31
+ */
32
+ /**
33
+ * Decide what `onClientClose` should do when the client's `req.on('close')`
34
+ * fires. Pure over its three inputs.
35
+ *
36
+ * `writableEnded` — `res.writableEnded` at the moment the handler
37
+ * runs. `true` means the response is already
38
+ * finished (the 'close' event is a normal
39
+ * teardown notification after res.end()) — no
40
+ * action needed.
41
+ * `upstreamAborted` — whether upstream has already been aborted for
42
+ * some other reason (timeout, overflow, pool
43
+ * failover). Don't double-abort.
44
+ * `drainOnClose` — the runtime-configured knob.
45
+ *
46
+ * Returns:
47
+ * `'noop'` — already finished / already aborted; handler should return.
48
+ * `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
49
+ * `'drain'` — leave upstream alive; gate off client writes; let the
50
+ * read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
51
+ */
52
+ export function decideOnClientClose(writableEnded, upstreamAborted, drainOnClose) {
53
+ if (writableEnded || upstreamAborted)
54
+ return 'noop';
55
+ return drainOnClose ? 'drain' : 'abort';
56
+ }
57
+ /**
58
+ * Resolve the `drainOnClose` effective setting from explicit options +
59
+ * `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
60
+ * `'yes'` (case-insensitive). Anything else (including unset) is false.
61
+ * Explicit `true`/`false` on the options object always wins.
62
+ */
63
+ export function resolveDrainOnClose(explicit, env = process.env) {
64
+ if (typeof explicit === 'boolean')
65
+ return explicit;
66
+ const v = (env.DARIO_DRAIN_ON_CLOSE ?? '').toLowerCase();
67
+ return v === '1' || v === 'true' || v === 'yes';
68
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.24.0",
3
+ "version": "3.25.0",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "scripts": {
23
23
  "build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
24
- "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
24
+ "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/stream-drain.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
25
25
  "audit": "npm audit --production --audit-level=high",
26
26
  "prepublishOnly": "npm run build",
27
27
  "start": "node dist/cli.js",