@askalf/dario 3.23.0 → 3.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -208,7 +208,31 @@ async function proxy() {
208
208
  const strictTls = args.includes('--strict-tls');
209
209
  const modelArg = args.find(a => a.startsWith('--model='));
210
210
  const model = modelArg ? modelArg.split('=')[1] : undefined;
211
- await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls });
211
+ // --pace-min=MS / --pace-jitter=MS (v3.24, direction #6 behavioral
212
+ // smoothing). Inter-request gap floor + optional uniform-random jitter.
213
+ // Defaults preserve v3.23 behavior (500ms floor, no jitter). The pure
214
+ // calc lives in src/pacing.ts; the flags just feed it.
215
+ const pacingMinMs = parsePositiveIntFlag('--pace-min=');
216
+ const pacingJitterMs = parsePositiveIntFlag('--pace-jitter=');
217
+ // --drain-on-close (v3.25, direction #5). When set, a client
218
+ // disconnect no longer aborts the upstream SSE — dario keeps
219
+ // draining the stream to EOF so Anthropic sees the CC-shaped
220
+ // read-to-completion pattern. Costs tokens (the response is fully
221
+ // generated even if nobody reads it), so it's opt-in.
222
+ const drainOnClose = args.includes('--drain-on-close') || undefined;
223
+ await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose });
224
+ }
225
+ function parsePositiveIntFlag(prefix) {
226
+ const found = args.find(a => a.startsWith(prefix));
227
+ if (!found)
228
+ return undefined;
229
+ const raw = found.slice(prefix.length);
230
+ const n = parseInt(raw, 10);
231
+ if (!Number.isFinite(n) || n < 0) {
232
+ console.error(`[dario] Invalid ${prefix.replace(/=$/, '')} value: ${JSON.stringify(raw)}. Must be a non-negative integer (ms).`);
233
+ process.exit(1);
234
+ }
235
+ return n;
212
236
  }
213
237
  async function accounts() {
214
238
  const sub = args[1];
@@ -459,6 +483,23 @@ async function help() {
459
483
  from a stock CC request. Install Bun
460
484
  (https://bun.sh) so dario auto-relaunches
461
485
  under it, or use shim mode. (v3.23)
486
+ --pace-min=MS Minimum ms between upstream requests
487
+ (default: 500). Prevents request floods
488
+ that are distinguishable from human-paced
489
+ CC traffic.
490
+ --pace-jitter=MS Max additional uniform-random jitter (ms)
491
+ added on top of --pace-min per request.
492
+ Default: 0 (off). Set to e.g. 300 to hide
493
+ the floor from long-run inter-arrival
494
+ statistics. (v3.24)
495
+ --drain-on-close When the client disconnects mid-stream,
496
+ keep consuming the upstream SSE to EOF
497
+ so Anthropic sees the same read-to-
498
+ completion pattern native Claude Code
499
+ produces. Trades tokens (the response
500
+ is fully generated even if nobody reads
501
+ it) for fingerprint fidelity. Bounded by
502
+ the 5-minute upstream timeout. (v3.25)
462
503
  --port=PORT Port to listen on (default: 3456)
463
504
  --host=ADDRESS Address to bind to (default: 127.0.0.1)
464
505
  Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Inter-request pacing (v3.24, direction #6 — behavioral smoothing).
3
+ *
4
+ * Real CC traffic has human-paced gaps between requests — sub-second when
5
+ * the model is streaming tool-loop output, multi-second when the user is
6
+ * typing the next message. A proxy that fires requests at machine speed
7
+ * with perfectly uniform spacing stands out against that rhythm.
8
+ *
9
+ * This module supplies the pure gap-calculation function the proxy's
10
+ * rate governor calls before every outbound fetch. Two knobs:
11
+ *
12
+ * minGapMs — lower bound on the wall-clock distance between requests.
13
+ * Was a hardcoded 500ms through v3.23; keep 500 as default
14
+ * so back-compat is exact when both knobs stay at defaults.
15
+ *
16
+ * jitterMs — uniform random addition on top of minGap. The *effective*
17
+ * gap for a given request is minGap + U(0, jitter). Adds
18
+ * non-uniformity so an observer can't infer the floor from
19
+ * the long-run minimum of inter-arrival times.
20
+ *
21
+ * Pure over (now, lastRequestTime, minGap, jitter, rng) so the tests can
22
+ * exercise every edge without spawning timers. The proxy passes
23
+ * `Math.random` as the rng at runtime; tests pass a deterministic stub.
24
+ *
25
+ * The first request in a session (lastRequestTime === 0) is never paced —
26
+ * the purpose is smoothing the *gap between* requests, not delaying the
27
+ * first one from whenever the consumer happens to connect.
28
+ */
29
+ export interface PacingConfig {
30
+ /** Minimum wall-clock milliseconds between the completion of one request and the start of the next. */
31
+ minGapMs: number;
32
+ /** Max additional uniform-random jitter (ms) added on top of minGap. Pass 0 to disable. */
33
+ jitterMs: number;
34
+ }
35
+ /**
36
+ * How many milliseconds to sleep before the next upstream fetch.
37
+ *
38
+ * Returns 0 when no delay is required — either because this is the first
39
+ * request of the session, or enough wall-clock time has already elapsed
40
+ * since `lastRequestTime`.
41
+ *
42
+ * `rng` defaults to Math.random; tests inject a deterministic stub.
43
+ * Negative configuration values are clamped to 0 (lenient, not an error).
44
+ */
45
+ export declare function computePacingDelay(now: number, lastRequestTime: number, cfg: PacingConfig, rng?: () => number): number;
46
+ /**
47
+ * Resolve a PacingConfig from explicit options, env vars, and defaults.
48
+ *
49
+ * Precedence (highest first):
50
+ * 1. Explicit argument (typically from CLI flag)
51
+ * 2. DARIO_PACE_MIN_MS / DARIO_PACE_JITTER_MS env vars
52
+ * 3. Legacy DARIO_MIN_INTERVAL_MS env var (minGap only — matches v3.23
53
+ * behavior so existing setups don't regress silently)
54
+ * 4. Defaults: minGap=500, jitter=0
55
+ *
56
+ * Invalid strings (non-numeric, negative) are ignored and fall through to
57
+ * the next source — a typoed env var shouldn't fail-loud at startup.
58
+ */
59
+ export declare function resolvePacingConfig(explicit?: {
60
+ minGapMs?: number;
61
+ jitterMs?: number;
62
+ }, env?: NodeJS.ProcessEnv): PacingConfig;
package/dist/pacing.js ADDED
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Inter-request pacing (v3.24, direction #6 — behavioral smoothing).
3
+ *
4
+ * Real CC traffic has human-paced gaps between requests — sub-second when
5
+ * the model is streaming tool-loop output, multi-second when the user is
6
+ * typing the next message. A proxy that fires requests at machine speed
7
+ * with perfectly uniform spacing stands out against that rhythm.
8
+ *
9
+ * This module supplies the pure gap-calculation function the proxy's
10
+ * rate governor calls before every outbound fetch. Two knobs:
11
+ *
12
+ * minGapMs — lower bound on the wall-clock distance between requests.
13
+ * Was a hardcoded 500ms through v3.23; keep 500 as default
14
+ * so back-compat is exact when both knobs stay at defaults.
15
+ *
16
+ * jitterMs — uniform random addition on top of minGap. The *effective*
17
+ * gap for a given request is minGap + U(0, jitter). Adds
18
+ * non-uniformity so an observer can't infer the floor from
19
+ * the long-run minimum of inter-arrival times.
20
+ *
21
+ * Pure over (now, lastRequestTime, minGap, jitter, rng) so the tests can
22
+ * exercise every edge without spawning timers. The proxy passes
23
+ * `Math.random` as the rng at runtime; tests pass a deterministic stub.
24
+ *
25
+ * The first request in a session (lastRequestTime === 0) is never paced —
26
+ * the purpose is smoothing the *gap between* requests, not delaying the
27
+ * first one from whenever the consumer happens to connect.
28
+ */
29
+ /**
30
+ * How many milliseconds to sleep before the next upstream fetch.
31
+ *
32
+ * Returns 0 when no delay is required — either because this is the first
33
+ * request of the session, or enough wall-clock time has already elapsed
34
+ * since `lastRequestTime`.
35
+ *
36
+ * `rng` defaults to Math.random; tests inject a deterministic stub.
37
+ * Negative configuration values are clamped to 0 (lenient, not an error).
38
+ */
39
+ export function computePacingDelay(now, lastRequestTime, cfg, rng = Math.random) {
40
+ if (lastRequestTime <= 0)
41
+ return 0;
42
+ const minGap = Math.max(0, cfg.minGapMs);
43
+ const jitter = Math.max(0, cfg.jitterMs);
44
+ const jitterAdd = jitter > 0 ? Math.floor(rng() * jitter) : 0;
45
+ const effectiveGap = minGap + jitterAdd;
46
+ const elapsed = now - lastRequestTime;
47
+ if (elapsed >= effectiveGap)
48
+ return 0;
49
+ return effectiveGap - elapsed;
50
+ }
51
+ /**
52
+ * Resolve a PacingConfig from explicit options, env vars, and defaults.
53
+ *
54
+ * Precedence (highest first):
55
+ * 1. Explicit argument (typically from CLI flag)
56
+ * 2. DARIO_PACE_MIN_MS / DARIO_PACE_JITTER_MS env vars
57
+ * 3. Legacy DARIO_MIN_INTERVAL_MS env var (minGap only — matches v3.23
58
+ * behavior so existing setups don't regress silently)
59
+ * 4. Defaults: minGap=500, jitter=0
60
+ *
61
+ * Invalid strings (non-numeric, negative) are ignored and fall through to
62
+ * the next source — a typoed env var shouldn't fail-loud at startup.
63
+ */
64
+ export function resolvePacingConfig(explicit = {}, env = process.env) {
65
+ const minGap = pickNonNegativeInt(explicit.minGapMs, env.DARIO_PACE_MIN_MS, env.DARIO_MIN_INTERVAL_MS) ?? 500;
66
+ const jitter = pickNonNegativeInt(explicit.jitterMs, env.DARIO_PACE_JITTER_MS) ?? 0;
67
+ return { minGapMs: minGap, jitterMs: jitter };
68
+ }
69
+ function pickNonNegativeInt(...candidates) {
70
+ for (const c of candidates) {
71
+ if (c === undefined || c === null || c === '')
72
+ continue;
73
+ const n = typeof c === 'number' ? c : parseInt(c, 10);
74
+ if (Number.isFinite(n) && n >= 0)
75
+ return Math.floor(n);
76
+ }
77
+ return undefined;
78
+ }
package/dist/proxy.d.ts CHANGED
@@ -13,6 +13,9 @@ interface ProxyOptions {
13
13
  hybridTools?: boolean;
14
14
  noAutoDetect?: boolean;
15
15
  strictTls?: boolean;
16
+ pacingMinMs?: number;
17
+ pacingJitterMs?: number;
18
+ drainOnClose?: boolean;
16
19
  }
17
20
  export declare function sanitizeError(err: unknown): string;
18
21
  export declare function startProxy(opts?: ProxyOptions): Promise<void>;
package/dist/proxy.js CHANGED
@@ -571,10 +571,28 @@ export async function startProxy(opts = {}) {
571
571
  betaBase = betaBase ? `${betaBase},oauth-2025-04-20` : 'oauth-2025-04-20';
572
572
  }
573
573
  const betaWithoutContext1m = betaBase.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
574
- // Rate governor — minimum 500ms between requests. Fast enough for agents,
575
- // slow enough to not look like a scripted flood of identical traffic.
574
+ // Rate governor — floor + optional jitter between requests. A hardcoded
575
+ // 500ms floor keeps the default behavior identical to v3.23; `--pace-min`
576
+ // and `--pace-jitter` let callers tune the distribution. Pure calc lives
577
+ // in src/pacing.ts so the edge cases are unit-tested without timers.
578
+ const { computePacingDelay, resolvePacingConfig } = await import('./pacing.js');
576
579
  let lastRequestTime = 0;
577
- const MIN_REQUEST_INTERVAL_MS = parseInt(process.env.DARIO_MIN_INTERVAL_MS || '500', 10);
580
+ const pacingCfg = resolvePacingConfig({
581
+ minGapMs: opts.pacingMinMs,
582
+ jitterMs: opts.pacingJitterMs,
583
+ });
584
+ if (verbose) {
585
+ console.log(`[dario] pacing: min=${pacingCfg.minGapMs}ms jitter=${pacingCfg.jitterMs}ms`);
586
+ }
587
+ // Stream-consumption replay (v3.25, direction #5). When on, a client
588
+ // disconnect no longer aborts the upstream fetch — we keep consuming
589
+ // the SSE so Anthropic sees a CC-shaped read-to-EOF pattern. See
590
+ // src/stream-drain.ts for the rationale + tradeoff.
591
+ const { decideOnClientClose, resolveDrainOnClose } = await import('./stream-drain.js');
592
+ const drainOnClose = resolveDrainOnClose(opts.drainOnClose);
593
+ if (verbose) {
594
+ console.log(`[dario] drain-on-close: ${drainOnClose ? 'enabled' : 'disabled'}`);
595
+ }
578
596
  // Optional proxy authentication — pre-encode key buffer for performance
579
597
  const apiKey = process.env.DARIO_API_KEY;
580
598
  const apiKeyBuf = apiKey ? Buffer.from(apiKey) : null;
@@ -1076,11 +1094,11 @@ export async function startProxy(opts = {}) {
1076
1094
  beta = beta.split(',').filter((t) => t.length > 0 && !rejectedSet.has(t)).join(',');
1077
1095
  }
1078
1096
  }
1079
- // Rate governor — prevent inhuman request cadence
1080
- const now = Date.now();
1081
- const elapsed = now - lastRequestTime;
1082
- if (elapsed < MIN_REQUEST_INTERVAL_MS && lastRequestTime > 0) {
1083
- await new Promise(r => setTimeout(r, MIN_REQUEST_INTERVAL_MS - elapsed));
1097
+ // Rate governor — prevent inhuman request cadence. See src/pacing.ts
1098
+ // for the pure delay calculator (floor + uniform jitter).
1099
+ const pacingDelay = computePacingDelay(Date.now(), lastRequestTime, pacingCfg);
1100
+ if (pacingDelay > 0) {
1101
+ await new Promise(r => setTimeout(r, pacingDelay));
1084
1102
  }
1085
1103
  lastRequestTime = Date.now();
1086
1104
  // Session ID: pool mode uses the per-account identity.sessionId (stable
@@ -1107,11 +1125,15 @@ export async function startProxy(opts = {}) {
1107
1125
  'x-stainless-timeout': '600',
1108
1126
  };
1109
1127
  // Client-disconnect abort: if the client drops the connection before
1110
- // we've finished sending the response, we abort the upstream fetch so
1111
- // Anthropic stops generating (and billing) a response nobody will
1112
- // read. Also carries the 5-minute upstream timeout via the same
1113
- // controller, so a single signal covers both cancellation reasons.
1128
+ // we've finished sending the response, we default to aborting the
1129
+ // upstream fetch so Anthropic stops generating (and billing) a
1130
+ // response nobody will read. With `--drain-on-close` set, we
1131
+ // instead keep the reader spinning to consume the full SSE — see
1132
+ // src/stream-drain.ts for the fingerprint rationale. The 5-minute
1133
+ // upstream timeout shares the same controller, so a hung upstream
1134
+ // still gets cut off regardless of drain mode.
1114
1135
  const upstreamAbort = new AbortController();
1136
+ let clientDisconnected = false;
1115
1137
  upstreamTimeout = setTimeout(() => {
1116
1138
  if (!upstreamAbort.signal.aborted) {
1117
1139
  upstreamAbortReason = 'timeout';
@@ -1119,13 +1141,18 @@ export async function startProxy(opts = {}) {
1119
1141
  }
1120
1142
  }, UPSTREAM_TIMEOUT_MS);
1121
1143
  onClientClose = () => {
1122
- // 'close' fires on both normal teardown and client disconnect.
1123
- // We only want to abort if we haven't finished our response yet —
1124
- // normal teardown happens AFTER res.writableEnded becomes true.
1125
- if (!res.writableEnded && !upstreamAbort.signal.aborted) {
1144
+ const action = decideOnClientClose(res.writableEnded, upstreamAbort.signal.aborted, drainOnClose);
1145
+ if (action === 'abort') {
1126
1146
  upstreamAbortReason = 'client_closed';
1127
1147
  upstreamAbort.abort();
1128
1148
  }
1149
+ else if (action === 'drain') {
1150
+ clientDisconnected = true;
1151
+ if (verbose)
1152
+ console.log(`[dario] #${requestCount} client disconnected — draining upstream to EOF`);
1153
+ }
1154
+ // noop: either res is already ended (normal teardown) or upstream
1155
+ // is already aborted for another reason.
1129
1156
  };
1130
1157
  req.on('close', onClientClose);
1131
1158
  const startTime = Date.now();
@@ -1439,6 +1466,14 @@ export async function startProxy(opts = {}) {
1439
1466
  const streamMapper = ccToolMap && !isOpenAI
1440
1467
  ? createStreamingReverseMapper(ccToolMap, reqCtx)
1441
1468
  : null;
1469
+ // Gated writer — a no-op once the downstream client has gone away
1470
+ // in drain-on-close mode. The read loop keeps consuming so the
1471
+ // upstream sees a full-length read; writes to a closed socket are
1472
+ // suppressed to avoid EPIPE/warnings and pointless work.
1473
+ const writeToClient = (chunk) => {
1474
+ if (!clientDisconnected)
1475
+ res.write(chunk);
1476
+ };
1442
1477
  try {
1443
1478
  let buffer = '';
1444
1479
  const MAX_LINE_LENGTH = 1_000_000; // 1MB max per SSE line
@@ -1492,8 +1527,8 @@ export async function startProxy(opts = {}) {
1492
1527
  type: 'upstream_protocol_error',
1493
1528
  },
1494
1529
  });
1495
- res.write(`data: ${errPayload}\n\n`);
1496
- res.write('data: [DONE]\n\n');
1530
+ writeToClient(`data: ${errPayload}\n\n`);
1531
+ writeToClient('data: [DONE]\n\n');
1497
1532
  upstreamAbortReason = 'sse_overflow';
1498
1533
  upstreamAbort.abort();
1499
1534
  break;
@@ -1503,28 +1538,28 @@ export async function startProxy(opts = {}) {
1503
1538
  for (const line of lines) {
1504
1539
  const translated = translateStreamChunk(line);
1505
1540
  if (translated)
1506
- res.write(translated);
1541
+ writeToClient(translated);
1507
1542
  }
1508
1543
  }
1509
1544
  else if (streamMapper) {
1510
1545
  const out = streamMapper.feed(value);
1511
1546
  if (out.length > 0)
1512
- res.write(out);
1547
+ writeToClient(out);
1513
1548
  }
1514
1549
  else {
1515
- res.write(value);
1550
+ writeToClient(value);
1516
1551
  }
1517
1552
  }
1518
1553
  // Flush remaining buffer
1519
1554
  if (isOpenAI && buffer.trim()) {
1520
1555
  const translated = translateStreamChunk(buffer);
1521
1556
  if (translated)
1522
- res.write(translated);
1557
+ writeToClient(translated);
1523
1558
  }
1524
1559
  if (streamMapper) {
1525
1560
  const tail = streamMapper.end();
1526
1561
  if (tail.length > 0)
1527
- res.write(tail);
1562
+ writeToClient(tail);
1528
1563
  }
1529
1564
  }
1530
1565
  catch (err) {
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
3
+ *
4
+ * Native Claude Code, when it streams a response from `/v1/messages`, reads
5
+ * the SSE to its final event before closing the socket — even when the
6
+ * consumer logically already has enough. Third-party consumers routed
7
+ * through dario's proxy often abort mid-stream (close their request the
8
+ * instant they see the tool-use content block they wanted). Dario's
9
+ * default has been to propagate that abort upstream by triggering
10
+ * `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
11
+ * a billing standpoint (Anthropic stops generating, stops billing), but a
12
+ * fingerprint axis: "connection closed mid-stream" vs CC's "connection
13
+ * read to EOF" is visible on Anthropic's side.
14
+ *
15
+ * `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
16
+ * the downstream client disconnects, dario suppresses the upstream abort
17
+ * and keeps the reader loop spinning until the upstream emits its final
18
+ * event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
19
+ * linger on dead upstreams). Writes to the closed `res` are gated off;
20
+ * the reads and any accumulator state (analytics, tool-map) continue so
21
+ * the captured usage numbers are complete rather than truncated.
22
+ *
23
+ * This has a real cost — you pay tokens for a response your consumer
24
+ * isn't going to read — so it's deliberately opt-in. Users on an
25
+ * unmetered subscription who care more about fingerprint than wasted
26
+ * generation can flip it on globally.
27
+ *
28
+ * This module exposes the *decision* as a pure function so the test
29
+ * suite can exercise every branch without spinning up a socket. The
30
+ * proxy wires the decision into its existing `onClientClose` handler.
31
+ */
32
+ export type ClientCloseAction = 'abort' | 'drain' | 'noop';
33
+ /**
34
+ * Decide what `onClientClose` should do when the client's `req.on('close')`
35
+ * fires. Pure over its three inputs.
36
+ *
37
+ * `writableEnded` — `res.writableEnded` at the moment the handler
38
+ * runs. `true` means the response is already
39
+ * finished (the 'close' event is a normal
40
+ * teardown notification after res.end()) — no
41
+ * action needed.
42
+ * `upstreamAborted` — whether upstream has already been aborted for
43
+ * some other reason (timeout, overflow, pool
44
+ * failover). Don't double-abort.
45
+ * `drainOnClose` — the runtime-configured knob.
46
+ *
47
+ * Returns:
48
+ * `'noop'` — already finished / already aborted; handler should return.
49
+ * `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
50
+ * `'drain'` — leave upstream alive; gate off client writes; let the
51
+ * read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
52
+ */
53
+ export declare function decideOnClientClose(writableEnded: boolean, upstreamAborted: boolean, drainOnClose: boolean): ClientCloseAction;
54
+ /**
55
+ * Resolve the `drainOnClose` effective setting from explicit options +
56
+ * `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
57
+ * `'yes'` (case-insensitive). Anything else (including unset) is false.
58
+ * Explicit `true`/`false` on the options object always wins.
59
+ */
60
+ export declare function resolveDrainOnClose(explicit: boolean | undefined, env?: NodeJS.ProcessEnv): boolean;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
3
+ *
4
+ * Native Claude Code, when it streams a response from `/v1/messages`, reads
5
+ * the SSE to its final event before closing the socket — even when the
6
+ * consumer logically already has enough. Third-party consumers routed
7
+ * through dario's proxy often abort mid-stream (close their request the
8
+ * instant they see the tool-use content block they wanted). Dario's
9
+ * default has been to propagate that abort upstream by triggering
10
+ * `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
11
+ * a billing standpoint (Anthropic stops generating, stops billing), but a
12
+ * fingerprint axis: "connection closed mid-stream" vs CC's "connection
13
+ * read to EOF" is visible on Anthropic's side.
14
+ *
15
+ * `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
16
+ * the downstream client disconnects, dario suppresses the upstream abort
17
+ * and keeps the reader loop spinning until the upstream emits its final
18
+ * event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
19
+ * linger on dead upstreams). Writes to the closed `res` are gated off;
20
+ * the reads and any accumulator state (analytics, tool-map) continue so
21
+ * the captured usage numbers are complete rather than truncated.
22
+ *
23
+ * This has a real cost — you pay tokens for a response your consumer
24
+ * isn't going to read — so it's deliberately opt-in. Users on an
25
+ * unmetered subscription who care more about fingerprint than wasted
26
+ * generation can flip it on globally.
27
+ *
28
+ * This module exposes the *decision* as a pure function so the test
29
+ * suite can exercise every branch without spinning up a socket. The
30
+ * proxy wires the decision into its existing `onClientClose` handler.
31
+ */
32
+ /**
33
+ * Decide what `onClientClose` should do when the client's `req.on('close')`
34
+ * fires. Pure over its three inputs.
35
+ *
36
+ * `writableEnded` — `res.writableEnded` at the moment the handler
37
+ * runs. `true` means the response is already
38
+ * finished (the 'close' event is a normal
39
+ * teardown notification after res.end()) — no
40
+ * action needed.
41
+ * `upstreamAborted` — whether upstream has already been aborted for
42
+ * some other reason (timeout, overflow, pool
43
+ * failover). Don't double-abort.
44
+ * `drainOnClose` — the runtime-configured knob.
45
+ *
46
+ * Returns:
47
+ * `'noop'` — already finished / already aborted; handler should return.
48
+ * `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
49
+ * `'drain'` — leave upstream alive; gate off client writes; let the
50
+ * read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
51
+ */
52
+ export function decideOnClientClose(writableEnded, upstreamAborted, drainOnClose) {
53
+ if (writableEnded || upstreamAborted)
54
+ return 'noop';
55
+ return drainOnClose ? 'drain' : 'abort';
56
+ }
57
+ /**
58
+ * Resolve the `drainOnClose` effective setting from explicit options +
59
+ * `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
60
+ * `'yes'` (case-insensitive). Anything else (including unset) is false.
61
+ * Explicit `true`/`false` on the options object always wins.
62
+ */
63
+ export function resolveDrainOnClose(explicit, env = process.env) {
64
+ if (typeof explicit === 'boolean')
65
+ return explicit;
66
+ const v = (env.DARIO_DRAIN_ON_CLOSE ?? '').toLowerCase();
67
+ return v === '1' || v === 'true' || v === 'yes';
68
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.23.0",
3
+ "version": "3.25.0",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "scripts": {
23
23
  "build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
24
- "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
24
+ "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/stream-drain.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
25
25
  "audit": "npm audit --production --audit-level=high",
26
26
  "prepublishOnly": "npm run build",
27
27
  "start": "node dist/cli.js",