@askalf/dario 3.23.0 → 3.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +42 -1
- package/dist/pacing.d.ts +62 -0
- package/dist/pacing.js +78 -0
- package/dist/proxy.d.ts +3 -0
- package/dist/proxy.js +58 -23
- package/dist/stream-drain.d.ts +60 -0
- package/dist/stream-drain.js +68 -0
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -208,7 +208,31 @@ async function proxy() {
|
|
|
208
208
|
const strictTls = args.includes('--strict-tls');
|
|
209
209
|
const modelArg = args.find(a => a.startsWith('--model='));
|
|
210
210
|
const model = modelArg ? modelArg.split('=')[1] : undefined;
|
|
211
|
-
|
|
211
|
+
// --pace-min=MS / --pace-jitter=MS (v3.24, direction #6 — behavioral
|
|
212
|
+
// smoothing). Inter-request gap floor + optional uniform-random jitter.
|
|
213
|
+
// Defaults preserve v3.23 behavior (500ms floor, no jitter). The pure
|
|
214
|
+
// calc lives in src/pacing.ts; the flags just feed it.
|
|
215
|
+
const pacingMinMs = parsePositiveIntFlag('--pace-min=');
|
|
216
|
+
const pacingJitterMs = parsePositiveIntFlag('--pace-jitter=');
|
|
217
|
+
// --drain-on-close (v3.25, direction #5). When set, a client
|
|
218
|
+
// disconnect no longer aborts the upstream SSE — dario keeps
|
|
219
|
+
// draining the stream to EOF so Anthropic sees the CC-shaped
|
|
220
|
+
// read-to-completion pattern. Costs tokens (the response is fully
|
|
221
|
+
// generated even if nobody reads it), so it's opt-in.
|
|
222
|
+
const drainOnClose = args.includes('--drain-on-close') || undefined;
|
|
223
|
+
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose });
|
|
224
|
+
}
|
|
225
|
+
function parsePositiveIntFlag(prefix) {
|
|
226
|
+
const found = args.find(a => a.startsWith(prefix));
|
|
227
|
+
if (!found)
|
|
228
|
+
return undefined;
|
|
229
|
+
const raw = found.slice(prefix.length);
|
|
230
|
+
const n = parseInt(raw, 10);
|
|
231
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
232
|
+
console.error(`[dario] Invalid ${prefix.replace(/=$/, '')} value: ${JSON.stringify(raw)}. Must be a non-negative integer (ms).`);
|
|
233
|
+
process.exit(1);
|
|
234
|
+
}
|
|
235
|
+
return n;
|
|
212
236
|
}
|
|
213
237
|
async function accounts() {
|
|
214
238
|
const sub = args[1];
|
|
@@ -459,6 +483,23 @@ async function help() {
|
|
|
459
483
|
from a stock CC request. Install Bun
|
|
460
484
|
(https://bun.sh) so dario auto-relaunches
|
|
461
485
|
under it, or use shim mode. (v3.23)
|
|
486
|
+
--pace-min=MS Minimum ms between upstream requests
|
|
487
|
+
(default: 500). Prevents request floods
|
|
488
|
+
that are distinguishable from human-paced
|
|
489
|
+
CC traffic.
|
|
490
|
+
--pace-jitter=MS Max additional uniform-random jitter (ms)
|
|
491
|
+
added on top of --pace-min per request.
|
|
492
|
+
Default: 0 (off). Set to e.g. 300 to hide
|
|
493
|
+
the floor from long-run inter-arrival
|
|
494
|
+
statistics. (v3.24)
|
|
495
|
+
--drain-on-close When the client disconnects mid-stream,
|
|
496
|
+
keep consuming the upstream SSE to EOF
|
|
497
|
+
so Anthropic sees the same read-to-
|
|
498
|
+
completion pattern native Claude Code
|
|
499
|
+
produces. Trades tokens (the response
|
|
500
|
+
is fully generated even if nobody reads
|
|
501
|
+
it) for fingerprint fidelity. Bounded by
|
|
502
|
+
the 5-minute upstream timeout. (v3.25)
|
|
462
503
|
--port=PORT Port to listen on (default: 3456)
|
|
463
504
|
--host=ADDRESS Address to bind to (default: 127.0.0.1)
|
|
464
505
|
Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
|
package/dist/pacing.d.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inter-request pacing (v3.24, direction #6 — behavioral smoothing).
|
|
3
|
+
*
|
|
4
|
+
* Real CC traffic has human-paced gaps between requests — sub-second when
|
|
5
|
+
* the model is streaming tool-loop output, multi-second when the user is
|
|
6
|
+
* typing the next message. A proxy that fires requests at machine speed
|
|
7
|
+
* with perfectly uniform spacing stands out against that rhythm.
|
|
8
|
+
*
|
|
9
|
+
* This module supplies the pure gap-calculation function the proxy's
|
|
10
|
+
* rate governor calls before every outbound fetch. Two knobs:
|
|
11
|
+
*
|
|
12
|
+
* minGapMs — lower bound on the wall-clock distance between requests.
|
|
13
|
+
* Was a hardcoded 500ms through v3.23; keep 500 as default
|
|
14
|
+
* so back-compat is exact when both knobs stay at defaults.
|
|
15
|
+
*
|
|
16
|
+
* jitterMs — uniform random addition on top of minGap. The *effective*
|
|
17
|
+
* gap for a given request is minGap + U(0, jitter). Adds
|
|
18
|
+
* non-uniformity so an observer can't infer the floor from
|
|
19
|
+
* the long-run minimum of inter-arrival times.
|
|
20
|
+
*
|
|
21
|
+
* Pure over (now, lastRequestTime, minGap, jitter, rng) so the tests can
|
|
22
|
+
* exercise every edge without spawning timers. The proxy passes
|
|
23
|
+
* `Math.random` as the rng at runtime; tests pass a deterministic stub.
|
|
24
|
+
*
|
|
25
|
+
* The first request in a session (lastRequestTime === 0) is never paced —
|
|
26
|
+
* the purpose is smoothing the *gap between* requests, not delaying the
|
|
27
|
+
* first one from whenever the consumer happens to connect.
|
|
28
|
+
*/
|
|
29
|
+
export interface PacingConfig {
|
|
30
|
+
/** Minimum wall-clock milliseconds between the completion of one request and the start of the next. */
|
|
31
|
+
minGapMs: number;
|
|
32
|
+
/** Max additional uniform-random jitter (ms) added on top of minGap. Pass 0 to disable. */
|
|
33
|
+
jitterMs: number;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* How many milliseconds to sleep before the next upstream fetch.
|
|
37
|
+
*
|
|
38
|
+
* Returns 0 when no delay is required — either because this is the first
|
|
39
|
+
* request of the session, or enough wall-clock time has already elapsed
|
|
40
|
+
* since `lastRequestTime`.
|
|
41
|
+
*
|
|
42
|
+
* `rng` defaults to Math.random; tests inject a deterministic stub.
|
|
43
|
+
* Negative configuration values are clamped to 0 (lenient, not an error).
|
|
44
|
+
*/
|
|
45
|
+
export declare function computePacingDelay(now: number, lastRequestTime: number, cfg: PacingConfig, rng?: () => number): number;
|
|
46
|
+
/**
|
|
47
|
+
* Resolve a PacingConfig from explicit options, env vars, and defaults.
|
|
48
|
+
*
|
|
49
|
+
* Precedence (highest first):
|
|
50
|
+
* 1. Explicit argument (typically from CLI flag)
|
|
51
|
+
* 2. DARIO_PACE_MIN_MS / DARIO_PACE_JITTER_MS env vars
|
|
52
|
+
* 3. Legacy DARIO_MIN_INTERVAL_MS env var (minGap only — matches v3.23
|
|
53
|
+
* behavior so existing setups don't regress silently)
|
|
54
|
+
* 4. Defaults: minGap=500, jitter=0
|
|
55
|
+
*
|
|
56
|
+
* Invalid strings (non-numeric, negative) are ignored and fall through to
|
|
57
|
+
* the next source — a typoed env var shouldn't fail-loud at startup.
|
|
58
|
+
*/
|
|
59
|
+
export declare function resolvePacingConfig(explicit?: {
|
|
60
|
+
minGapMs?: number;
|
|
61
|
+
jitterMs?: number;
|
|
62
|
+
}, env?: NodeJS.ProcessEnv): PacingConfig;
|
package/dist/pacing.js
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inter-request pacing (v3.24, direction #6 — behavioral smoothing).
|
|
3
|
+
*
|
|
4
|
+
* Real CC traffic has human-paced gaps between requests — sub-second when
|
|
5
|
+
* the model is streaming tool-loop output, multi-second when the user is
|
|
6
|
+
* typing the next message. A proxy that fires requests at machine speed
|
|
7
|
+
* with perfectly uniform spacing stands out against that rhythm.
|
|
8
|
+
*
|
|
9
|
+
* This module supplies the pure gap-calculation function the proxy's
|
|
10
|
+
* rate governor calls before every outbound fetch. Two knobs:
|
|
11
|
+
*
|
|
12
|
+
* minGapMs — lower bound on the wall-clock distance between requests.
|
|
13
|
+
* Was a hardcoded 500ms through v3.23; keep 500 as default
|
|
14
|
+
* so back-compat is exact when both knobs stay at defaults.
|
|
15
|
+
*
|
|
16
|
+
* jitterMs — uniform random addition on top of minGap. The *effective*
|
|
17
|
+
* gap for a given request is minGap + U(0, jitter). Adds
|
|
18
|
+
* non-uniformity so an observer can't infer the floor from
|
|
19
|
+
* the long-run minimum of inter-arrival times.
|
|
20
|
+
*
|
|
21
|
+
* Pure over (now, lastRequestTime, minGap, jitter, rng) so the tests can
|
|
22
|
+
* exercise every edge without spawning timers. The proxy passes
|
|
23
|
+
* `Math.random` as the rng at runtime; tests pass a deterministic stub.
|
|
24
|
+
*
|
|
25
|
+
* The first request in a session (lastRequestTime === 0) is never paced —
|
|
26
|
+
* the purpose is smoothing the *gap between* requests, not delaying the
|
|
27
|
+
* first one from whenever the consumer happens to connect.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* How many milliseconds to sleep before the next upstream fetch.
|
|
31
|
+
*
|
|
32
|
+
* Returns 0 when no delay is required — either because this is the first
|
|
33
|
+
* request of the session, or enough wall-clock time has already elapsed
|
|
34
|
+
* since `lastRequestTime`.
|
|
35
|
+
*
|
|
36
|
+
* `rng` defaults to Math.random; tests inject a deterministic stub.
|
|
37
|
+
* Negative configuration values are clamped to 0 (lenient, not an error).
|
|
38
|
+
*/
|
|
39
|
+
export function computePacingDelay(now, lastRequestTime, cfg, rng = Math.random) {
|
|
40
|
+
if (lastRequestTime <= 0)
|
|
41
|
+
return 0;
|
|
42
|
+
const minGap = Math.max(0, cfg.minGapMs);
|
|
43
|
+
const jitter = Math.max(0, cfg.jitterMs);
|
|
44
|
+
const jitterAdd = jitter > 0 ? Math.floor(rng() * jitter) : 0;
|
|
45
|
+
const effectiveGap = minGap + jitterAdd;
|
|
46
|
+
const elapsed = now - lastRequestTime;
|
|
47
|
+
if (elapsed >= effectiveGap)
|
|
48
|
+
return 0;
|
|
49
|
+
return effectiveGap - elapsed;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Resolve a PacingConfig from explicit options, env vars, and defaults.
|
|
53
|
+
*
|
|
54
|
+
* Precedence (highest first):
|
|
55
|
+
* 1. Explicit argument (typically from CLI flag)
|
|
56
|
+
* 2. DARIO_PACE_MIN_MS / DARIO_PACE_JITTER_MS env vars
|
|
57
|
+
* 3. Legacy DARIO_MIN_INTERVAL_MS env var (minGap only — matches v3.23
|
|
58
|
+
* behavior so existing setups don't regress silently)
|
|
59
|
+
* 4. Defaults: minGap=500, jitter=0
|
|
60
|
+
*
|
|
61
|
+
* Invalid strings (non-numeric, negative) are ignored and fall through to
|
|
62
|
+
* the next source — a typoed env var shouldn't fail-loud at startup.
|
|
63
|
+
*/
|
|
64
|
+
export function resolvePacingConfig(explicit = {}, env = process.env) {
|
|
65
|
+
const minGap = pickNonNegativeInt(explicit.minGapMs, env.DARIO_PACE_MIN_MS, env.DARIO_MIN_INTERVAL_MS) ?? 500;
|
|
66
|
+
const jitter = pickNonNegativeInt(explicit.jitterMs, env.DARIO_PACE_JITTER_MS) ?? 0;
|
|
67
|
+
return { minGapMs: minGap, jitterMs: jitter };
|
|
68
|
+
}
|
|
69
|
+
function pickNonNegativeInt(...candidates) {
|
|
70
|
+
for (const c of candidates) {
|
|
71
|
+
if (c === undefined || c === null || c === '')
|
|
72
|
+
continue;
|
|
73
|
+
const n = typeof c === 'number' ? c : parseInt(c, 10);
|
|
74
|
+
if (Number.isFinite(n) && n >= 0)
|
|
75
|
+
return Math.floor(n);
|
|
76
|
+
}
|
|
77
|
+
return undefined;
|
|
78
|
+
}
|
package/dist/proxy.d.ts
CHANGED
|
@@ -13,6 +13,9 @@ interface ProxyOptions {
|
|
|
13
13
|
hybridTools?: boolean;
|
|
14
14
|
noAutoDetect?: boolean;
|
|
15
15
|
strictTls?: boolean;
|
|
16
|
+
pacingMinMs?: number;
|
|
17
|
+
pacingJitterMs?: number;
|
|
18
|
+
drainOnClose?: boolean;
|
|
16
19
|
}
|
|
17
20
|
export declare function sanitizeError(err: unknown): string;
|
|
18
21
|
export declare function startProxy(opts?: ProxyOptions): Promise<void>;
|
package/dist/proxy.js
CHANGED
|
@@ -571,10 +571,28 @@ export async function startProxy(opts = {}) {
|
|
|
571
571
|
betaBase = betaBase ? `${betaBase},oauth-2025-04-20` : 'oauth-2025-04-20';
|
|
572
572
|
}
|
|
573
573
|
const betaWithoutContext1m = betaBase.split(',').filter((t) => t !== 'context-1m-2025-08-07').join(',');
|
|
574
|
-
// Rate governor —
|
|
575
|
-
//
|
|
574
|
+
// Rate governor — floor + optional jitter between requests. A hardcoded
|
|
575
|
+
// 500ms floor keeps the default behavior identical to v3.23; `--pace-min`
|
|
576
|
+
// and `--pace-jitter` let callers tune the distribution. Pure calc lives
|
|
577
|
+
// in src/pacing.ts so the edge cases are unit-tested without timers.
|
|
578
|
+
const { computePacingDelay, resolvePacingConfig } = await import('./pacing.js');
|
|
576
579
|
let lastRequestTime = 0;
|
|
577
|
-
const
|
|
580
|
+
const pacingCfg = resolvePacingConfig({
|
|
581
|
+
minGapMs: opts.pacingMinMs,
|
|
582
|
+
jitterMs: opts.pacingJitterMs,
|
|
583
|
+
});
|
|
584
|
+
if (verbose) {
|
|
585
|
+
console.log(`[dario] pacing: min=${pacingCfg.minGapMs}ms jitter=${pacingCfg.jitterMs}ms`);
|
|
586
|
+
}
|
|
587
|
+
// Stream-consumption replay (v3.25, direction #5). When on, a client
|
|
588
|
+
// disconnect no longer aborts the upstream fetch — we keep consuming
|
|
589
|
+
// the SSE so Anthropic sees a CC-shaped read-to-EOF pattern. See
|
|
590
|
+
// src/stream-drain.ts for the rationale + tradeoff.
|
|
591
|
+
const { decideOnClientClose, resolveDrainOnClose } = await import('./stream-drain.js');
|
|
592
|
+
const drainOnClose = resolveDrainOnClose(opts.drainOnClose);
|
|
593
|
+
if (verbose) {
|
|
594
|
+
console.log(`[dario] drain-on-close: ${drainOnClose ? 'enabled' : 'disabled'}`);
|
|
595
|
+
}
|
|
578
596
|
// Optional proxy authentication — pre-encode key buffer for performance
|
|
579
597
|
const apiKey = process.env.DARIO_API_KEY;
|
|
580
598
|
const apiKeyBuf = apiKey ? Buffer.from(apiKey) : null;
|
|
@@ -1076,11 +1094,11 @@ export async function startProxy(opts = {}) {
|
|
|
1076
1094
|
beta = beta.split(',').filter((t) => t.length > 0 && !rejectedSet.has(t)).join(',');
|
|
1077
1095
|
}
|
|
1078
1096
|
}
|
|
1079
|
-
// Rate governor — prevent inhuman request cadence
|
|
1080
|
-
|
|
1081
|
-
const
|
|
1082
|
-
if (
|
|
1083
|
-
await new Promise(r => setTimeout(r,
|
|
1097
|
+
// Rate governor — prevent inhuman request cadence. See src/pacing.ts
|
|
1098
|
+
// for the pure delay calculator (floor + uniform jitter).
|
|
1099
|
+
const pacingDelay = computePacingDelay(Date.now(), lastRequestTime, pacingCfg);
|
|
1100
|
+
if (pacingDelay > 0) {
|
|
1101
|
+
await new Promise(r => setTimeout(r, pacingDelay));
|
|
1084
1102
|
}
|
|
1085
1103
|
lastRequestTime = Date.now();
|
|
1086
1104
|
// Session ID: pool mode uses the per-account identity.sessionId (stable
|
|
@@ -1107,11 +1125,15 @@ export async function startProxy(opts = {}) {
|
|
|
1107
1125
|
'x-stainless-timeout': '600',
|
|
1108
1126
|
};
|
|
1109
1127
|
// Client-disconnect abort: if the client drops the connection before
|
|
1110
|
-
// we've finished sending the response, we
|
|
1111
|
-
// Anthropic stops generating (and billing) a
|
|
1112
|
-
// read.
|
|
1113
|
-
//
|
|
1128
|
+
// we've finished sending the response, we default to aborting the
|
|
1129
|
+
// upstream fetch so Anthropic stops generating (and billing) a
|
|
1130
|
+
// response nobody will read. With `--drain-on-close` set, we
|
|
1131
|
+
// instead keep the reader spinning to consume the full SSE — see
|
|
1132
|
+
// src/stream-drain.ts for the fingerprint rationale. The 5-minute
|
|
1133
|
+
// upstream timeout shares the same controller, so a hung upstream
|
|
1134
|
+
// still gets cut off regardless of drain mode.
|
|
1114
1135
|
const upstreamAbort = new AbortController();
|
|
1136
|
+
let clientDisconnected = false;
|
|
1115
1137
|
upstreamTimeout = setTimeout(() => {
|
|
1116
1138
|
if (!upstreamAbort.signal.aborted) {
|
|
1117
1139
|
upstreamAbortReason = 'timeout';
|
|
@@ -1119,13 +1141,18 @@ export async function startProxy(opts = {}) {
|
|
|
1119
1141
|
}
|
|
1120
1142
|
}, UPSTREAM_TIMEOUT_MS);
|
|
1121
1143
|
onClientClose = () => {
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
// normal teardown happens AFTER res.writableEnded becomes true.
|
|
1125
|
-
if (!res.writableEnded && !upstreamAbort.signal.aborted) {
|
|
1144
|
+
const action = decideOnClientClose(res.writableEnded, upstreamAbort.signal.aborted, drainOnClose);
|
|
1145
|
+
if (action === 'abort') {
|
|
1126
1146
|
upstreamAbortReason = 'client_closed';
|
|
1127
1147
|
upstreamAbort.abort();
|
|
1128
1148
|
}
|
|
1149
|
+
else if (action === 'drain') {
|
|
1150
|
+
clientDisconnected = true;
|
|
1151
|
+
if (verbose)
|
|
1152
|
+
console.log(`[dario] #${requestCount} client disconnected — draining upstream to EOF`);
|
|
1153
|
+
}
|
|
1154
|
+
// noop: either res is already ended (normal teardown) or upstream
|
|
1155
|
+
// is already aborted for another reason.
|
|
1129
1156
|
};
|
|
1130
1157
|
req.on('close', onClientClose);
|
|
1131
1158
|
const startTime = Date.now();
|
|
@@ -1439,6 +1466,14 @@ export async function startProxy(opts = {}) {
|
|
|
1439
1466
|
const streamMapper = ccToolMap && !isOpenAI
|
|
1440
1467
|
? createStreamingReverseMapper(ccToolMap, reqCtx)
|
|
1441
1468
|
: null;
|
|
1469
|
+
// Gated writer — a no-op once the downstream client has gone away
|
|
1470
|
+
// in drain-on-close mode. The read loop keeps consuming so the
|
|
1471
|
+
// upstream sees a full-length read; writes to a closed socket are
|
|
1472
|
+
// suppressed to avoid EPIPE/warnings and pointless work.
|
|
1473
|
+
const writeToClient = (chunk) => {
|
|
1474
|
+
if (!clientDisconnected)
|
|
1475
|
+
res.write(chunk);
|
|
1476
|
+
};
|
|
1442
1477
|
try {
|
|
1443
1478
|
let buffer = '';
|
|
1444
1479
|
const MAX_LINE_LENGTH = 1_000_000; // 1MB max per SSE line
|
|
@@ -1492,8 +1527,8 @@ export async function startProxy(opts = {}) {
|
|
|
1492
1527
|
type: 'upstream_protocol_error',
|
|
1493
1528
|
},
|
|
1494
1529
|
});
|
|
1495
|
-
|
|
1496
|
-
|
|
1530
|
+
writeToClient(`data: ${errPayload}\n\n`);
|
|
1531
|
+
writeToClient('data: [DONE]\n\n');
|
|
1497
1532
|
upstreamAbortReason = 'sse_overflow';
|
|
1498
1533
|
upstreamAbort.abort();
|
|
1499
1534
|
break;
|
|
@@ -1503,28 +1538,28 @@ export async function startProxy(opts = {}) {
|
|
|
1503
1538
|
for (const line of lines) {
|
|
1504
1539
|
const translated = translateStreamChunk(line);
|
|
1505
1540
|
if (translated)
|
|
1506
|
-
|
|
1541
|
+
writeToClient(translated);
|
|
1507
1542
|
}
|
|
1508
1543
|
}
|
|
1509
1544
|
else if (streamMapper) {
|
|
1510
1545
|
const out = streamMapper.feed(value);
|
|
1511
1546
|
if (out.length > 0)
|
|
1512
|
-
|
|
1547
|
+
writeToClient(out);
|
|
1513
1548
|
}
|
|
1514
1549
|
else {
|
|
1515
|
-
|
|
1550
|
+
writeToClient(value);
|
|
1516
1551
|
}
|
|
1517
1552
|
}
|
|
1518
1553
|
// Flush remaining buffer
|
|
1519
1554
|
if (isOpenAI && buffer.trim()) {
|
|
1520
1555
|
const translated = translateStreamChunk(buffer);
|
|
1521
1556
|
if (translated)
|
|
1522
|
-
|
|
1557
|
+
writeToClient(translated);
|
|
1523
1558
|
}
|
|
1524
1559
|
if (streamMapper) {
|
|
1525
1560
|
const tail = streamMapper.end();
|
|
1526
1561
|
if (tail.length > 0)
|
|
1527
|
-
|
|
1562
|
+
writeToClient(tail);
|
|
1528
1563
|
}
|
|
1529
1564
|
}
|
|
1530
1565
|
catch (err) {
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
|
|
3
|
+
*
|
|
4
|
+
* Native Claude Code, when it streams a response from `/v1/messages`, reads
|
|
5
|
+
* the SSE to its final event before closing the socket — even when the
|
|
6
|
+
* consumer logically already has enough. Third-party consumers routed
|
|
7
|
+
* through dario's proxy often abort mid-stream (close their request the
|
|
8
|
+
* instant they see the tool-use content block they wanted). Dario's
|
|
9
|
+
* default has been to propagate that abort upstream by triggering
|
|
10
|
+
* `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
|
|
11
|
+
* a billing standpoint (Anthropic stops generating, stops billing), but a
|
|
12
|
+
* fingerprint axis: "connection closed mid-stream" vs CC's "connection
|
|
13
|
+
* read to EOF" is visible on Anthropic's side.
|
|
14
|
+
*
|
|
15
|
+
* `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
|
|
16
|
+
* the downstream client disconnects, dario suppresses the upstream abort
|
|
17
|
+
* and keeps the reader loop spinning until the upstream emits its final
|
|
18
|
+
* event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
|
|
19
|
+
* linger on dead upstreams). Writes to the closed `res` are gated off;
|
|
20
|
+
* the reads and any accumulator state (analytics, tool-map) continue so
|
|
21
|
+
* the captured usage numbers are complete rather than truncated.
|
|
22
|
+
*
|
|
23
|
+
* This has a real cost — you pay tokens for a response your consumer
|
|
24
|
+
* isn't going to read — so it's deliberately opt-in. Users on an
|
|
25
|
+
* unmetered subscription who care more about fingerprint than wasted
|
|
26
|
+
* generation can flip it on globally.
|
|
27
|
+
*
|
|
28
|
+
* This module exposes the *decision* as a pure function so the test
|
|
29
|
+
* suite can exercise every branch without spinning up a socket. The
|
|
30
|
+
* proxy wires the decision into its existing `onClientClose` handler.
|
|
31
|
+
*/
|
|
32
|
+
export type ClientCloseAction = 'abort' | 'drain' | 'noop';
|
|
33
|
+
/**
|
|
34
|
+
* Decide what `onClientClose` should do when the client's `req.on('close')`
|
|
35
|
+
* fires. Pure over its three inputs.
|
|
36
|
+
*
|
|
37
|
+
* `writableEnded` — `res.writableEnded` at the moment the handler
|
|
38
|
+
* runs. `true` means the response is already
|
|
39
|
+
* finished (the 'close' event is a normal
|
|
40
|
+
* teardown notification after res.end()) — no
|
|
41
|
+
* action needed.
|
|
42
|
+
* `upstreamAborted` — whether upstream has already been aborted for
|
|
43
|
+
* some other reason (timeout, overflow, pool
|
|
44
|
+
* failover). Don't double-abort.
|
|
45
|
+
* `drainOnClose` — the runtime-configured knob.
|
|
46
|
+
*
|
|
47
|
+
* Returns:
|
|
48
|
+
* `'noop'` — already finished / already aborted; handler should return.
|
|
49
|
+
* `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
|
|
50
|
+
* `'drain'` — leave upstream alive; gate off client writes; let the
|
|
51
|
+
* read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
|
|
52
|
+
*/
|
|
53
|
+
export declare function decideOnClientClose(writableEnded: boolean, upstreamAborted: boolean, drainOnClose: boolean): ClientCloseAction;
|
|
54
|
+
/**
|
|
55
|
+
* Resolve the `drainOnClose` effective setting from explicit options +
|
|
56
|
+
* `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
|
|
57
|
+
* `'yes'` (case-insensitive). Anything else (including unset) is false.
|
|
58
|
+
* Explicit `true`/`false` on the options object always wins.
|
|
59
|
+
*/
|
|
60
|
+
export declare function resolveDrainOnClose(explicit: boolean | undefined, env?: NodeJS.ProcessEnv): boolean;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
|
|
3
|
+
*
|
|
4
|
+
* Native Claude Code, when it streams a response from `/v1/messages`, reads
|
|
5
|
+
* the SSE to its final event before closing the socket — even when the
|
|
6
|
+
* consumer logically already has enough. Third-party consumers routed
|
|
7
|
+
* through dario's proxy often abort mid-stream (close their request the
|
|
8
|
+
* instant they see the tool-use content block they wanted). Dario's
|
|
9
|
+
* default has been to propagate that abort upstream by triggering
|
|
10
|
+
* `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
|
|
11
|
+
* a billing standpoint (Anthropic stops generating, stops billing), but a
|
|
12
|
+
* fingerprint axis: "connection closed mid-stream" vs CC's "connection
|
|
13
|
+
* read to EOF" is visible on Anthropic's side.
|
|
14
|
+
*
|
|
15
|
+
* `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
|
|
16
|
+
* the downstream client disconnects, dario suppresses the upstream abort
|
|
17
|
+
* and keeps the reader loop spinning until the upstream emits its final
|
|
18
|
+
* event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
|
|
19
|
+
* linger on dead upstreams). Writes to the closed `res` are gated off;
|
|
20
|
+
* the reads and any accumulator state (analytics, tool-map) continue so
|
|
21
|
+
* the captured usage numbers are complete rather than truncated.
|
|
22
|
+
*
|
|
23
|
+
* This has a real cost — you pay tokens for a response your consumer
|
|
24
|
+
* isn't going to read — so it's deliberately opt-in. Users on an
|
|
25
|
+
* unmetered subscription who care more about fingerprint than wasted
|
|
26
|
+
* generation can flip it on globally.
|
|
27
|
+
*
|
|
28
|
+
* This module exposes the *decision* as a pure function so the test
|
|
29
|
+
* suite can exercise every branch without spinning up a socket. The
|
|
30
|
+
* proxy wires the decision into its existing `onClientClose` handler.
|
|
31
|
+
*/
|
|
32
|
+
/**
|
|
33
|
+
* Decide what `onClientClose` should do when the client's `req.on('close')`
|
|
34
|
+
* fires. Pure over its three inputs.
|
|
35
|
+
*
|
|
36
|
+
* `writableEnded` — `res.writableEnded` at the moment the handler
|
|
37
|
+
* runs. `true` means the response is already
|
|
38
|
+
* finished (the 'close' event is a normal
|
|
39
|
+
* teardown notification after res.end()) — no
|
|
40
|
+
* action needed.
|
|
41
|
+
* `upstreamAborted` — whether upstream has already been aborted for
|
|
42
|
+
* some other reason (timeout, overflow, pool
|
|
43
|
+
* failover). Don't double-abort.
|
|
44
|
+
* `drainOnClose` — the runtime-configured knob.
|
|
45
|
+
*
|
|
46
|
+
* Returns:
|
|
47
|
+
* `'noop'` — already finished / already aborted; handler should return.
|
|
48
|
+
* `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
|
|
49
|
+
* `'drain'` — leave upstream alive; gate off client writes; let the
|
|
50
|
+
* read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
|
|
51
|
+
*/
|
|
52
|
+
export function decideOnClientClose(writableEnded, upstreamAborted, drainOnClose) {
|
|
53
|
+
if (writableEnded || upstreamAborted)
|
|
54
|
+
return 'noop';
|
|
55
|
+
return drainOnClose ? 'drain' : 'abort';
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Resolve the `drainOnClose` effective setting from explicit options +
|
|
59
|
+
* `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
|
|
60
|
+
* `'yes'` (case-insensitive). Anything else (including unset) is false.
|
|
61
|
+
* Explicit `true`/`false` on the options object always wins.
|
|
62
|
+
*/
|
|
63
|
+
export function resolveDrainOnClose(explicit, env = process.env) {
|
|
64
|
+
if (typeof explicit === 'boolean')
|
|
65
|
+
return explicit;
|
|
66
|
+
const v = (env.DARIO_DRAIN_ON_CLOSE ?? '').toLowerCase();
|
|
67
|
+
return v === '1' || v === 'true' || v === 'yes';
|
|
68
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.25.0",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
|
|
24
|
-
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
|
|
24
|
+
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/stream-drain.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
|
|
25
25
|
"audit": "npm audit --production --audit-level=high",
|
|
26
26
|
"prepublishOnly": "npm run build",
|
|
27
27
|
"start": "node dist/cli.js",
|