@askalf/dario 3.24.0 → 3.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +15 -1
- package/dist/proxy.d.ts +1 -0
- package/dist/proxy.js +41 -15
- package/dist/stream-drain.d.ts +60 -0
- package/dist/stream-drain.js +68 -0
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -214,7 +214,13 @@ async function proxy() {
|
|
|
214
214
|
// calc lives in src/pacing.ts; the flags just feed it.
|
|
215
215
|
const pacingMinMs = parsePositiveIntFlag('--pace-min=');
|
|
216
216
|
const pacingJitterMs = parsePositiveIntFlag('--pace-jitter=');
|
|
217
|
-
|
|
217
|
+
// --drain-on-close (v3.25, direction #5). When set, a client
|
|
218
|
+
// disconnect no longer aborts the upstream SSE — dario keeps
|
|
219
|
+
// draining the stream to EOF so Anthropic sees the CC-shaped
|
|
220
|
+
// read-to-completion pattern. Costs tokens (the response is fully
|
|
221
|
+
// generated even if nobody reads it), so it's opt-in.
|
|
222
|
+
const drainOnClose = args.includes('--drain-on-close') || undefined;
|
|
223
|
+
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose });
|
|
218
224
|
}
|
|
219
225
|
function parsePositiveIntFlag(prefix) {
|
|
220
226
|
const found = args.find(a => a.startsWith(prefix));
|
|
@@ -486,6 +492,14 @@ async function help() {
|
|
|
486
492
|
Default: 0 (off). Set to e.g. 300 to hide
|
|
487
493
|
the floor from long-run inter-arrival
|
|
488
494
|
statistics. (v3.24)
|
|
495
|
+
--drain-on-close When the client disconnects mid-stream,
|
|
496
|
+
keep consuming the upstream SSE to EOF
|
|
497
|
+
so Anthropic sees the same read-to-
|
|
498
|
+
completion pattern native Claude Code
|
|
499
|
+
produces. Trades tokens (the response
|
|
500
|
+
is fully generated even if nobody reads
|
|
501
|
+
it) for fingerprint fidelity. Bounded by
|
|
502
|
+
the 5-minute upstream timeout. (v3.25)
|
|
489
503
|
--port=PORT Port to listen on (default: 3456)
|
|
490
504
|
--host=ADDRESS Address to bind to (default: 127.0.0.1)
|
|
491
505
|
Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
|
package/dist/proxy.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ interface ProxyOptions {
|
|
|
15
15
|
strictTls?: boolean;
|
|
16
16
|
pacingMinMs?: number;
|
|
17
17
|
pacingJitterMs?: number;
|
|
18
|
+
drainOnClose?: boolean;
|
|
18
19
|
}
|
|
19
20
|
export declare function sanitizeError(err: unknown): string;
|
|
20
21
|
export declare function startProxy(opts?: ProxyOptions): Promise<void>;
|
package/dist/proxy.js
CHANGED
|
@@ -584,6 +584,15 @@ export async function startProxy(opts = {}) {
|
|
|
584
584
|
if (verbose) {
|
|
585
585
|
console.log(`[dario] pacing: min=${pacingCfg.minGapMs}ms jitter=${pacingCfg.jitterMs}ms`);
|
|
586
586
|
}
|
|
587
|
+
// Stream-consumption replay (v3.25, direction #5). When on, a client
|
|
588
|
+
// disconnect no longer aborts the upstream fetch — we keep consuming
|
|
589
|
+
// the SSE so Anthropic sees a CC-shaped read-to-EOF pattern. See
|
|
590
|
+
// src/stream-drain.ts for the rationale + tradeoff.
|
|
591
|
+
const { decideOnClientClose, resolveDrainOnClose } = await import('./stream-drain.js');
|
|
592
|
+
const drainOnClose = resolveDrainOnClose(opts.drainOnClose);
|
|
593
|
+
if (verbose) {
|
|
594
|
+
console.log(`[dario] drain-on-close: ${drainOnClose ? 'enabled' : 'disabled'}`);
|
|
595
|
+
}
|
|
587
596
|
// Optional proxy authentication — pre-encode key buffer for performance
|
|
588
597
|
const apiKey = process.env.DARIO_API_KEY;
|
|
589
598
|
const apiKeyBuf = apiKey ? Buffer.from(apiKey) : null;
|
|
@@ -1116,11 +1125,15 @@ export async function startProxy(opts = {}) {
|
|
|
1116
1125
|
'x-stainless-timeout': '600',
|
|
1117
1126
|
};
|
|
1118
1127
|
// Client-disconnect abort: if the client drops the connection before
|
|
1119
|
-
// we've finished sending the response, we
|
|
1120
|
-
// Anthropic stops generating (and billing) a
|
|
1121
|
-
// read.
|
|
1122
|
-
//
|
|
1128
|
+
// we've finished sending the response, we default to aborting the
|
|
1129
|
+
// upstream fetch so Anthropic stops generating (and billing) a
|
|
1130
|
+
// response nobody will read. With `--drain-on-close` set, we
|
|
1131
|
+
// instead keep the reader spinning to consume the full SSE — see
|
|
1132
|
+
// src/stream-drain.ts for the fingerprint rationale. The 5-minute
|
|
1133
|
+
// upstream timeout shares the same controller, so a hung upstream
|
|
1134
|
+
// still gets cut off regardless of drain mode.
|
|
1123
1135
|
const upstreamAbort = new AbortController();
|
|
1136
|
+
let clientDisconnected = false;
|
|
1124
1137
|
upstreamTimeout = setTimeout(() => {
|
|
1125
1138
|
if (!upstreamAbort.signal.aborted) {
|
|
1126
1139
|
upstreamAbortReason = 'timeout';
|
|
@@ -1128,13 +1141,18 @@ export async function startProxy(opts = {}) {
|
|
|
1128
1141
|
}
|
|
1129
1142
|
}, UPSTREAM_TIMEOUT_MS);
|
|
1130
1143
|
onClientClose = () => {
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
// normal teardown happens AFTER res.writableEnded becomes true.
|
|
1134
|
-
if (!res.writableEnded && !upstreamAbort.signal.aborted) {
|
|
1144
|
+
const action = decideOnClientClose(res.writableEnded, upstreamAbort.signal.aborted, drainOnClose);
|
|
1145
|
+
if (action === 'abort') {
|
|
1135
1146
|
upstreamAbortReason = 'client_closed';
|
|
1136
1147
|
upstreamAbort.abort();
|
|
1137
1148
|
}
|
|
1149
|
+
else if (action === 'drain') {
|
|
1150
|
+
clientDisconnected = true;
|
|
1151
|
+
if (verbose)
|
|
1152
|
+
console.log(`[dario] #${requestCount} client disconnected — draining upstream to EOF`);
|
|
1153
|
+
}
|
|
1154
|
+
// noop: either res is already ended (normal teardown) or upstream
|
|
1155
|
+
// is already aborted for another reason.
|
|
1138
1156
|
};
|
|
1139
1157
|
req.on('close', onClientClose);
|
|
1140
1158
|
const startTime = Date.now();
|
|
@@ -1448,6 +1466,14 @@ export async function startProxy(opts = {}) {
|
|
|
1448
1466
|
const streamMapper = ccToolMap && !isOpenAI
|
|
1449
1467
|
? createStreamingReverseMapper(ccToolMap, reqCtx)
|
|
1450
1468
|
: null;
|
|
1469
|
+
// Gated writer — a no-op once the downstream client has gone away
|
|
1470
|
+
// in drain-on-close mode. The read loop keeps consuming so the
|
|
1471
|
+
// upstream sees a full-length read; writes to a closed socket are
|
|
1472
|
+
// suppressed to avoid EPIPE/warnings and pointless work.
|
|
1473
|
+
const writeToClient = (chunk) => {
|
|
1474
|
+
if (!clientDisconnected)
|
|
1475
|
+
res.write(chunk);
|
|
1476
|
+
};
|
|
1451
1477
|
try {
|
|
1452
1478
|
let buffer = '';
|
|
1453
1479
|
const MAX_LINE_LENGTH = 1_000_000; // 1MB max per SSE line
|
|
@@ -1501,8 +1527,8 @@ export async function startProxy(opts = {}) {
|
|
|
1501
1527
|
type: 'upstream_protocol_error',
|
|
1502
1528
|
},
|
|
1503
1529
|
});
|
|
1504
|
-
|
|
1505
|
-
|
|
1530
|
+
writeToClient(`data: ${errPayload}\n\n`);
|
|
1531
|
+
writeToClient('data: [DONE]\n\n');
|
|
1506
1532
|
upstreamAbortReason = 'sse_overflow';
|
|
1507
1533
|
upstreamAbort.abort();
|
|
1508
1534
|
break;
|
|
@@ -1512,28 +1538,28 @@ export async function startProxy(opts = {}) {
|
|
|
1512
1538
|
for (const line of lines) {
|
|
1513
1539
|
const translated = translateStreamChunk(line);
|
|
1514
1540
|
if (translated)
|
|
1515
|
-
|
|
1541
|
+
writeToClient(translated);
|
|
1516
1542
|
}
|
|
1517
1543
|
}
|
|
1518
1544
|
else if (streamMapper) {
|
|
1519
1545
|
const out = streamMapper.feed(value);
|
|
1520
1546
|
if (out.length > 0)
|
|
1521
|
-
|
|
1547
|
+
writeToClient(out);
|
|
1522
1548
|
}
|
|
1523
1549
|
else {
|
|
1524
|
-
|
|
1550
|
+
writeToClient(value);
|
|
1525
1551
|
}
|
|
1526
1552
|
}
|
|
1527
1553
|
// Flush remaining buffer
|
|
1528
1554
|
if (isOpenAI && buffer.trim()) {
|
|
1529
1555
|
const translated = translateStreamChunk(buffer);
|
|
1530
1556
|
if (translated)
|
|
1531
|
-
|
|
1557
|
+
writeToClient(translated);
|
|
1532
1558
|
}
|
|
1533
1559
|
if (streamMapper) {
|
|
1534
1560
|
const tail = streamMapper.end();
|
|
1535
1561
|
if (tail.length > 0)
|
|
1536
|
-
|
|
1562
|
+
writeToClient(tail);
|
|
1537
1563
|
}
|
|
1538
1564
|
}
|
|
1539
1565
|
catch (err) {
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
|
|
3
|
+
*
|
|
4
|
+
* Native Claude Code, when it streams a response from `/v1/messages`, reads
|
|
5
|
+
* the SSE to its final event before closing the socket — even when the
|
|
6
|
+
* consumer logically already has enough. Third-party consumers routed
|
|
7
|
+
* through dario's proxy often abort mid-stream (close their request the
|
|
8
|
+
* instant they see the tool-use content block they wanted). Dario's
|
|
9
|
+
* default has been to propagate that abort upstream by triggering
|
|
10
|
+
* `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
|
|
11
|
+
* a billing standpoint (Anthropic stops generating, stops billing), but a
|
|
12
|
+
* fingerprint axis: "connection closed mid-stream" vs CC's "connection
|
|
13
|
+
* read to EOF" is visible on Anthropic's side.
|
|
14
|
+
*
|
|
15
|
+
* `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
|
|
16
|
+
* the downstream client disconnects, dario suppresses the upstream abort
|
|
17
|
+
* and keeps the reader loop spinning until the upstream emits its final
|
|
18
|
+
* event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
|
|
19
|
+
* linger on dead upstreams). Writes to the closed `res` are gated off;
|
|
20
|
+
* the reads and any accumulator state (analytics, tool-map) continue so
|
|
21
|
+
* the captured usage numbers are complete rather than truncated.
|
|
22
|
+
*
|
|
23
|
+
* This has a real cost — you pay tokens for a response your consumer
|
|
24
|
+
* isn't going to read — so it's deliberately opt-in. Users on an
|
|
25
|
+
* unmetered subscription who care more about fingerprint than wasted
|
|
26
|
+
* generation can flip it on globally.
|
|
27
|
+
*
|
|
28
|
+
* This module exposes the *decision* as a pure function so the test
|
|
29
|
+
* suite can exercise every branch without spinning up a socket. The
|
|
30
|
+
* proxy wires the decision into its existing `onClientClose` handler.
|
|
31
|
+
*/
|
|
32
|
+
export type ClientCloseAction = 'abort' | 'drain' | 'noop';
|
|
33
|
+
/**
|
|
34
|
+
* Decide what `onClientClose` should do when the client's `req.on('close')`
|
|
35
|
+
* fires. Pure over its three inputs.
|
|
36
|
+
*
|
|
37
|
+
* `writableEnded` — `res.writableEnded` at the moment the handler
|
|
38
|
+
* runs. `true` means the response is already
|
|
39
|
+
* finished (the 'close' event is a normal
|
|
40
|
+
* teardown notification after res.end()) — no
|
|
41
|
+
* action needed.
|
|
42
|
+
* `upstreamAborted` — whether upstream has already been aborted for
|
|
43
|
+
* some other reason (timeout, overflow, pool
|
|
44
|
+
* failover). Don't double-abort.
|
|
45
|
+
* `drainOnClose` — the runtime-configured knob.
|
|
46
|
+
*
|
|
47
|
+
* Returns:
|
|
48
|
+
* `'noop'` — already finished / already aborted; handler should return.
|
|
49
|
+
* `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
|
|
50
|
+
* `'drain'` — leave upstream alive; gate off client writes; let the
|
|
51
|
+
* read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
|
|
52
|
+
*/
|
|
53
|
+
export declare function decideOnClientClose(writableEnded: boolean, upstreamAborted: boolean, drainOnClose: boolean): ClientCloseAction;
|
|
54
|
+
/**
|
|
55
|
+
* Resolve the `drainOnClose` effective setting from explicit options +
|
|
56
|
+
* `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
|
|
57
|
+
* `'yes'` (case-insensitive). Anything else (including unset) is false.
|
|
58
|
+
* Explicit `true`/`false` on the options object always wins.
|
|
59
|
+
*/
|
|
60
|
+
export declare function resolveDrainOnClose(explicit: boolean | undefined, env?: NodeJS.ProcessEnv): boolean;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stream-consumption replay (v3.25, direction #5 — behavioral fidelity).
|
|
3
|
+
*
|
|
4
|
+
* Native Claude Code, when it streams a response from `/v1/messages`, reads
|
|
5
|
+
* the SSE to its final event before closing the socket — even when the
|
|
6
|
+
* consumer logically already has enough. Third-party consumers routed
|
|
7
|
+
* through dario's proxy often abort mid-stream (close their request the
|
|
8
|
+
* instant they see the tool-use content block they wanted). Dario's
|
|
9
|
+
* default has been to propagate that abort upstream by triggering
|
|
10
|
+
* `upstreamAbort.abort()` from the `req.on('close')` handler — clean from
|
|
11
|
+
* a billing standpoint (Anthropic stops generating, stops billing), but a
|
|
12
|
+
* fingerprint axis: "connection closed mid-stream" vs CC's "connection
|
|
13
|
+
* read to EOF" is visible on Anthropic's side.
|
|
14
|
+
*
|
|
15
|
+
* `--drain-on-close` / `DARIO_DRAIN_ON_CLOSE=1` flips the tradeoff: when
|
|
16
|
+
* the downstream client disconnects, dario suppresses the upstream abort
|
|
17
|
+
* and keeps the reader loop spinning until the upstream emits its final
|
|
18
|
+
* event (or `UPSTREAM_TIMEOUT_MS` fires as a hard ceiling — we don't
|
|
19
|
+
* linger on dead upstreams). Writes to the closed `res` are gated off;
|
|
20
|
+
* the reads and any accumulator state (analytics, tool-map) continue so
|
|
21
|
+
* the captured usage numbers are complete rather than truncated.
|
|
22
|
+
*
|
|
23
|
+
* This has a real cost — you pay tokens for a response your consumer
|
|
24
|
+
* isn't going to read — so it's deliberately opt-in. Users on an
|
|
25
|
+
* unmetered subscription who care more about fingerprint than wasted
|
|
26
|
+
* generation can flip it on globally.
|
|
27
|
+
*
|
|
28
|
+
* This module exposes the *decision* as a pure function so the test
|
|
29
|
+
* suite can exercise every branch without spinning up a socket. The
|
|
30
|
+
* proxy wires the decision into its existing `onClientClose` handler.
|
|
31
|
+
*/
|
|
32
|
+
/**
|
|
33
|
+
* Decide what `onClientClose` should do when the client's `req.on('close')`
|
|
34
|
+
* fires. Pure over its three inputs.
|
|
35
|
+
*
|
|
36
|
+
* `writableEnded` — `res.writableEnded` at the moment the handler
|
|
37
|
+
* runs. `true` means the response is already
|
|
38
|
+
* finished (the 'close' event is a normal
|
|
39
|
+
* teardown notification after res.end()) — no
|
|
40
|
+
* action needed.
|
|
41
|
+
* `upstreamAborted` — whether upstream has already been aborted for
|
|
42
|
+
* some other reason (timeout, overflow, pool
|
|
43
|
+
* failover). Don't double-abort.
|
|
44
|
+
* `drainOnClose` — the runtime-configured knob.
|
|
45
|
+
*
|
|
46
|
+
* Returns:
|
|
47
|
+
* `'noop'` — already finished / already aborted; handler should return.
|
|
48
|
+
* `'abort'` — fire `upstreamAbort.abort()` (the v3.24-and-earlier default).
|
|
49
|
+
* `'drain'` — leave upstream alive; gate off client writes; let the
|
|
50
|
+
* read loop consume to EOF (bounded by UPSTREAM_TIMEOUT_MS).
|
|
51
|
+
*/
|
|
52
|
+
export function decideOnClientClose(writableEnded, upstreamAborted, drainOnClose) {
|
|
53
|
+
if (writableEnded || upstreamAborted)
|
|
54
|
+
return 'noop';
|
|
55
|
+
return drainOnClose ? 'drain' : 'abort';
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Resolve the `drainOnClose` effective setting from explicit options +
|
|
59
|
+
* `DARIO_DRAIN_ON_CLOSE` env var. Truthy env values: `'1'`, `'true'`,
|
|
60
|
+
* `'yes'` (case-insensitive). Anything else (including unset) is false.
|
|
61
|
+
* Explicit `true`/`false` on the options object always wins.
|
|
62
|
+
*/
|
|
63
|
+
export function resolveDrainOnClose(explicit, env = process.env) {
|
|
64
|
+
if (typeof explicit === 'boolean')
|
|
65
|
+
return explicit;
|
|
66
|
+
const v = (env.DARIO_DRAIN_ON_CLOSE ?? '').toLowerCase();
|
|
67
|
+
return v === '1' || v === 'true' || v === 'yes';
|
|
68
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.25.0",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "tsc && cp src/cc-template-data.json dist/ && node -e \"require('fs').mkdirSync('dist/shim',{recursive:true})\" && cp src/shim/runtime.cjs dist/shim/",
|
|
24
|
-
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
|
|
24
|
+
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/tool-schema-contract.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/pool-sticky.mjs && node test/sealed-pool.mjs && node test/live-fingerprint.mjs && node test/shim-runtime.mjs && node test/shim-e2e.mjs && node test/proxy-header-order.mjs && node test/proxy-body-order.mjs && node test/runtime-fingerprint.mjs && node test/pacing.mjs && node test/stream-drain.mjs && node test/drift-detection.mjs && node test/compat-range.mjs && node test/doctor-formatter.mjs && node test/atomic-write.mjs && node test/account-refresh-singleflight.mjs && node test/streaming-edge-cases.mjs && node test/client-detection.mjs && node test/manual-oauth-flow.mjs && node test/scrub-template.mjs",
|
|
25
25
|
"audit": "npm audit --production --audit-level=high",
|
|
26
26
|
"prepublishOnly": "npm run build",
|
|
27
27
|
"start": "node dist/cli.js",
|