@askalf/dario 3.7.2 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -1
- package/dist/pool.d.ts +2 -2
- package/dist/pool.js +3 -3
- package/dist/proxy.js +212 -91
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -245,7 +245,7 @@ curl http://localhost:3456/analytics # per-account / per-model stats, burn ra
|
|
|
245
245
|
| Flag / env | Description | Default |
|
|
246
246
|
|---|---|---|
|
|
247
247
|
| `--passthrough` / `--thin` | Thin proxy for the Claude backend — OAuth swap only, no template injection | off |
|
|
248
|
-
| `--preserve-tools` / `--keep-tools` | Keep client tool schemas instead of remapping to CC tools (
|
|
248
|
+
| `--preserve-tools` / `--keep-tools` | Keep client tool schemas instead of remapping to CC's `Bash/Read/Grep/Glob/WebSearch/WebFetch`. Required for clients whose tools have fields CC doesn't (`sessionId`, custom ids, etc.) — see [Custom tool schemas](#custom-tool-schemas). Trade-off: drops the CC request fingerprint. | off |
|
|
249
249
|
| `--model=<name>` | Force a model (`opus`, `sonnet`, `haiku`, or full ID). Applies to the Claude backend. | passthrough |
|
|
250
250
|
| `--port=<n>` | Port to listen on | `3456` |
|
|
251
251
|
| `--host=<addr>` / `DARIO_HOST` | Bind address. Use `0.0.0.0` for LAN, or a specific IP (e.g. a Tailscale interface). When non-loopback, also set `DARIO_API_KEY`. | `127.0.0.1` |
|
|
@@ -347,6 +347,24 @@ curl http://localhost:3456/v1/chat/completions \
|
|
|
347
347
|
|
|
348
348
|
All supported. Claude backend: full Anthropic SSE format plus OpenAI-SSE translation for tool_use streaming. OpenAI-compat backend: streaming body forwarded byte-for-byte.
|
|
349
349
|
|
|
350
|
+
### Custom tool schemas
|
|
351
|
+
|
|
352
|
+
By default, on the Claude backend, dario replaces your client's tool definitions with the real Claude Code tools (`Bash`, `Read`, `Grep`, `Glob`, `WebSearch`, `WebFetch`) and translates parameters back and forth. That's how dario looks like CC on the wire, which is what lets your request bill against your Claude subscription instead of API pricing.
|
|
353
|
+
|
|
354
|
+
The trade-off: if your client's tools carry fields CC's schema doesn't have — a `sessionId`, a custom request id, a channel-bound context token, anything — those fields don't survive the round trip. The model only ever sees `Bash({command})`, responds with `Bash({command})`, and dario's reverse map rebuilds your tool call without the fields the model never saw. Your validator then rejects the call for a missing required field.
|
|
355
|
+
|
|
356
|
+
Symptom: your tool calls come back looking stripped-down, or your runtime complains about a required field being absent *only when routed through dario's Claude backend*, while the same tools work fine against a direct API key or the OpenAI-compat backend.
|
|
357
|
+
|
|
358
|
+
Fix: run dario with `--preserve-tools` (or `--keep-tools`). That skips the CC tool remap entirely, passes your client's tool definitions through to the model unchanged, and lets the model populate every field your schema expects.
|
|
359
|
+
|
|
360
|
+
```bash
|
|
361
|
+
dario proxy --preserve-tools
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
The cost: requests no longer look like CC on the wire, so the CC subscription fingerprint is gone. On a Max/Pro plan, that means the request may be counted against your API usage rather than your subscription quota. If you're on API-key billing already, `--preserve-tools` is free; if you're using dario specifically to route against a subscription, decide whether your custom-schema workload is worth the fingerprint loss on that endpoint. (A hybrid mode that keeps the fingerprint and also passes through unmapped client fields is on the roadmap.)
|
|
365
|
+
|
|
366
|
+
The openai-compat backend (OpenRouter, OpenAI, Groq, local LiteLLM, etc.) is unaffected — it forwards tool definitions byte-for-byte and doesn't need this flag.
|
|
367
|
+
|
|
350
368
|
### Library mode
|
|
351
369
|
|
|
352
370
|
```typescript
|
package/dist/pool.d.ts
CHANGED
|
@@ -50,8 +50,8 @@ export declare class AccountPool {
|
|
|
50
50
|
get size(): number;
|
|
51
51
|
/** Select the best account for the next request. */
|
|
52
52
|
select(): PoolAccount | null;
|
|
53
|
-
/** Select the next-best account, excluding the given
|
|
54
|
-
selectExcluding(
|
|
53
|
+
/** Select the next-best account, excluding the given set of aliases. */
|
|
54
|
+
selectExcluding(excluded: Set<string>): PoolAccount | null;
|
|
55
55
|
updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
|
|
56
56
|
markRejected(alias: string, snapshot: RateLimitSnapshot): void;
|
|
57
57
|
updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
|
package/dist/pool.js
CHANGED
|
@@ -82,12 +82,12 @@ export class AccountPool {
|
|
|
82
82
|
// No rate-limit data at all — least-used first
|
|
83
83
|
return all.reduce((a, b) => a.requestCount < b.requestCount ? a : b);
|
|
84
84
|
}
|
|
85
|
-
/** Select the next-best account, excluding the given
|
|
86
|
-
selectExcluding(
|
|
85
|
+
/** Select the next-best account, excluding the given set of aliases. */
|
|
86
|
+
selectExcluding(excluded) {
|
|
87
87
|
if (this.accounts.size <= 1)
|
|
88
88
|
return null;
|
|
89
89
|
const now = Date.now();
|
|
90
|
-
const candidates = [...this.accounts.values()].filter(a => a.alias
|
|
90
|
+
const candidates = [...this.accounts.values()].filter(a => !excluded.has(a.alias));
|
|
91
91
|
const eligible = candidates.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
92
92
|
a.expiresAt > now + 30_000);
|
|
93
93
|
if (eligible.length > 0) {
|
package/dist/proxy.js
CHANGED
|
@@ -731,65 +731,147 @@ export async function startProxy(opts = {}) {
|
|
|
731
731
|
}
|
|
732
732
|
};
|
|
733
733
|
req.on('close', onClientClose);
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
// returns status='rejected' on 429, which makes the next `select()` call
|
|
742
|
-
// route traffic away from this account until it resets.
|
|
743
|
-
if (pool && poolAccount) {
|
|
744
|
-
const snapshot = parseRateLimits(upstream.headers);
|
|
745
|
-
if (upstream.status === 429) {
|
|
746
|
-
pool.markRejected(poolAccount.alias, snapshot);
|
|
747
|
-
}
|
|
748
|
-
else {
|
|
749
|
-
pool.updateRateLimits(poolAccount.alias, snapshot);
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
// Auto-retry without context-1m if it triggers a long-context billing error.
|
|
753
|
-
// Anthropic returns this as either 400 ("long context beta is not yet available
|
|
754
|
-
// for this subscription") or 429 ("Extra usage is required for long context
|
|
755
|
-
// requests") depending on the endpoint — we handle both.
|
|
756
|
-
//
|
|
757
|
-
// Note: `upstream.text()` consumes the body, so once we peek we MUST
|
|
758
|
-
// handle the response here (can't fall through to the normal forwarder).
|
|
734
|
+
const startTime = Date.now();
|
|
735
|
+
// Tracks which accounts we've already tried this request — used by the
|
|
736
|
+
// inside-request 429 failover loop to avoid re-hitting exhausted accounts.
|
|
737
|
+
const triedAliases = new Set();
|
|
738
|
+
if (poolAccount)
|
|
739
|
+
triedAliases.add(poolAccount.alias);
|
|
740
|
+
let upstream;
|
|
759
741
|
let peekedBody = null;
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
742
|
+
// Inside-request 429 failover loop (v3.8.0). On a 429, pool mode tries
|
|
743
|
+
// the next-best account before surfacing the error to the client.
|
|
744
|
+
// Bounded to pool.size iterations; breaks immediately on any non-429.
|
|
745
|
+
dispatchLoop: while (true) {
|
|
746
|
+
upstream = await fetch(targetBase, {
|
|
747
|
+
method: req.method ?? 'POST',
|
|
748
|
+
headers,
|
|
749
|
+
body: finalBody ? new Uint8Array(finalBody) : undefined,
|
|
750
|
+
signal: upstreamAbort.signal,
|
|
751
|
+
});
|
|
752
|
+
// Pool mode: capture rate-limit snapshot from the response. parseRateLimits
|
|
753
|
+
// returns status='rejected' on 429, which makes the next `select()` call
|
|
754
|
+
// route traffic away from this account until it resets.
|
|
755
|
+
if (pool && poolAccount) {
|
|
756
|
+
const snapshot = parseRateLimits(upstream.headers);
|
|
757
|
+
if (upstream.status === 429) {
|
|
758
|
+
pool.markRejected(poolAccount.alias, snapshot);
|
|
759
|
+
}
|
|
760
|
+
else {
|
|
761
|
+
pool.updateRateLimits(poolAccount.alias, snapshot);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
// Auto-retry without context-1m if it triggers a long-context billing error.
|
|
765
|
+
// Anthropic returns this as either 400 ("long context beta is not yet available
|
|
766
|
+
// for this subscription") or 429 ("Extra usage is required for long context
|
|
767
|
+
// requests") depending on the endpoint — we handle both.
|
|
768
|
+
//
|
|
769
|
+
// Note: `upstream.text()` consumes the body, so once we peek we MUST
|
|
770
|
+
// handle the response here (can't fall through to the normal forwarder).
|
|
771
|
+
peekedBody = null;
|
|
772
|
+
if ((upstream.status === 400 || upstream.status === 429) && !passthrough) {
|
|
773
|
+
peekedBody = await upstream.text().catch(() => '');
|
|
774
|
+
const isLongContextError = peekedBody.includes('long context')
|
|
775
|
+
|| peekedBody.includes('Extra usage is required')
|
|
776
|
+
|| peekedBody.includes('long_context');
|
|
777
|
+
if (isLongContextError) {
|
|
778
|
+
if (verbose)
|
|
779
|
+
console.log(`[dario] #${requestCount} context-1m rejected (${upstream.status}) — retrying without it`);
|
|
780
|
+
const reducedBeta = beta.replace(',context-1m-2025-08-07', '').replace('context-1m-2025-08-07,', '');
|
|
781
|
+
const retryHeaders = { ...headers, 'anthropic-beta': reducedBeta };
|
|
782
|
+
const retry = await fetch(targetBase, {
|
|
783
|
+
method: req.method ?? 'POST',
|
|
784
|
+
headers: retryHeaders,
|
|
785
|
+
body: finalBody ? new Uint8Array(finalBody) : undefined,
|
|
786
|
+
signal: upstreamAbort.signal,
|
|
787
|
+
});
|
|
788
|
+
// Use the retry response from here on — peeked body is now stale
|
|
789
|
+
upstream = retry;
|
|
790
|
+
peekedBody = null;
|
|
791
|
+
// Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
|
|
792
|
+
if (pool && poolAccount) {
|
|
793
|
+
const retrySnapshot = parseRateLimits(upstream.headers);
|
|
794
|
+
if (upstream.status === 429) {
|
|
795
|
+
pool.markRejected(poolAccount.alias, retrySnapshot);
|
|
796
|
+
}
|
|
797
|
+
else {
|
|
798
|
+
pool.updateRateLimits(poolAccount.alias, retrySnapshot);
|
|
799
|
+
}
|
|
784
800
|
}
|
|
785
|
-
|
|
786
|
-
|
|
801
|
+
}
|
|
802
|
+
else if (upstream.status === 429) {
|
|
803
|
+
// Not a context-1m issue — try pool failover before surfacing to client
|
|
804
|
+
if (pool && poolAccount) {
|
|
805
|
+
const nextAccount = pool.selectExcluding(triedAliases);
|
|
806
|
+
if (nextAccount) {
|
|
807
|
+
triedAliases.add(nextAccount.alias);
|
|
808
|
+
poolAccount = nextAccount;
|
|
809
|
+
accessToken = nextAccount.accessToken;
|
|
810
|
+
headers['Authorization'] = `Bearer ${accessToken}`;
|
|
811
|
+
headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
|
|
812
|
+
peekedBody = null;
|
|
813
|
+
continue dispatchLoop;
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
const enriched = enrich429(peekedBody, upstream.headers);
|
|
817
|
+
const responseHeaders = {
|
|
818
|
+
'Content-Type': 'application/json',
|
|
819
|
+
'Access-Control-Allow-Origin': corsOrigin,
|
|
820
|
+
...SECURITY_HEADERS,
|
|
821
|
+
};
|
|
822
|
+
for (const [key, value] of upstream.headers.entries()) {
|
|
823
|
+
if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
|
|
824
|
+
responseHeaders[key] = value;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
requestCount++;
|
|
828
|
+
if (analytics && poolAccount) {
|
|
829
|
+
analytics.record({
|
|
830
|
+
timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
|
|
831
|
+
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
|
|
832
|
+
claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
|
|
833
|
+
util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
|
|
834
|
+
latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
|
|
835
|
+
});
|
|
787
836
|
}
|
|
837
|
+
res.writeHead(429, responseHeaders);
|
|
838
|
+
res.end(enriched);
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
841
|
+
else if (upstream.status === 400) {
|
|
842
|
+
// Non-long-context 400 — forward upstream error directly.
|
|
843
|
+
// The body is already consumed, so we write it straight out.
|
|
844
|
+
const responseHeaders = {
|
|
845
|
+
'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
|
|
846
|
+
'Access-Control-Allow-Origin': corsOrigin,
|
|
847
|
+
...SECURITY_HEADERS,
|
|
848
|
+
};
|
|
849
|
+
for (const [key, value] of upstream.headers.entries()) {
|
|
850
|
+
if (key === 'request-id')
|
|
851
|
+
responseHeaders[key] = value;
|
|
852
|
+
}
|
|
853
|
+
requestCount++;
|
|
854
|
+
res.writeHead(400, responseHeaders);
|
|
855
|
+
res.end(peekedBody);
|
|
856
|
+
return;
|
|
788
857
|
}
|
|
789
858
|
}
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
859
|
+
// Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
|
|
860
|
+
if (upstream.status === 429) {
|
|
861
|
+
// Try pool failover before surfacing to client
|
|
862
|
+
if (pool && poolAccount) {
|
|
863
|
+
const nextAccount = pool.selectExcluding(triedAliases);
|
|
864
|
+
if (nextAccount) {
|
|
865
|
+
triedAliases.add(nextAccount.alias);
|
|
866
|
+
poolAccount = nextAccount;
|
|
867
|
+
accessToken = nextAccount.accessToken;
|
|
868
|
+
headers['Authorization'] = `Bearer ${accessToken}`;
|
|
869
|
+
headers['x-claude-code-session-id'] = nextAccount.identity.sessionId;
|
|
870
|
+
continue dispatchLoop;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
const errBody = await upstream.text().catch(() => '');
|
|
874
|
+
const enriched = enrich429(errBody, upstream.headers);
|
|
793
875
|
const responseHeaders = {
|
|
794
876
|
'Content-Type': 'application/json',
|
|
795
877
|
'Access-Control-Allow-Origin': corsOrigin,
|
|
@@ -801,47 +883,22 @@ export async function startProxy(opts = {}) {
|
|
|
801
883
|
}
|
|
802
884
|
}
|
|
803
885
|
requestCount++;
|
|
886
|
+
if (analytics && poolAccount) {
|
|
887
|
+
analytics.record({
|
|
888
|
+
timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
|
|
889
|
+
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreateTokens: 0, thinkingTokens: 0,
|
|
890
|
+
claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
|
|
891
|
+
util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
|
|
892
|
+
latencyMs: Date.now() - startTime, status: 429, isStream: false, isOpenAI,
|
|
893
|
+
});
|
|
894
|
+
}
|
|
804
895
|
res.writeHead(429, responseHeaders);
|
|
805
896
|
res.end(enriched);
|
|
806
897
|
return;
|
|
807
898
|
}
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
const responseHeaders = {
|
|
812
|
-
'Content-Type': upstream.headers.get('content-type') ?? 'application/json',
|
|
813
|
-
'Access-Control-Allow-Origin': corsOrigin,
|
|
814
|
-
...SECURITY_HEADERS,
|
|
815
|
-
};
|
|
816
|
-
for (const [key, value] of upstream.headers.entries()) {
|
|
817
|
-
if (key === 'request-id')
|
|
818
|
-
responseHeaders[key] = value;
|
|
819
|
-
}
|
|
820
|
-
requestCount++;
|
|
821
|
-
res.writeHead(400, responseHeaders);
|
|
822
|
-
res.end(peekedBody);
|
|
823
|
-
return;
|
|
824
|
-
}
|
|
825
|
-
}
|
|
826
|
-
// Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
|
|
827
|
-
if (upstream.status === 429) {
|
|
828
|
-
const errBody = await upstream.text().catch(() => '');
|
|
829
|
-
const enriched = enrich429(errBody, upstream.headers);
|
|
830
|
-
const responseHeaders = {
|
|
831
|
-
'Content-Type': 'application/json',
|
|
832
|
-
'Access-Control-Allow-Origin': corsOrigin,
|
|
833
|
-
...SECURITY_HEADERS,
|
|
834
|
-
};
|
|
835
|
-
for (const [key, value] of upstream.headers.entries()) {
|
|
836
|
-
if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
|
|
837
|
-
responseHeaders[key] = value;
|
|
838
|
-
}
|
|
839
|
-
}
|
|
840
|
-
requestCount++;
|
|
841
|
-
res.writeHead(429, responseHeaders);
|
|
842
|
-
res.end(enriched);
|
|
843
|
-
return;
|
|
844
|
-
}
|
|
899
|
+
// Non-429 — exit dispatch loop and forward the response to client.
|
|
900
|
+
break;
|
|
901
|
+
} // end dispatchLoop: while (true)
|
|
845
902
|
// Detect streaming from content-type (reliable) or body (fallback)
|
|
846
903
|
const contentType = upstream.headers.get('content-type') ?? '';
|
|
847
904
|
const isStream = contentType.includes('text/event-stream');
|
|
@@ -869,6 +926,14 @@ export async function startProxy(opts = {}) {
|
|
|
869
926
|
}
|
|
870
927
|
res.writeHead(upstream.status, responseHeaders);
|
|
871
928
|
if (isStream && upstream.body) {
|
|
929
|
+
// Analytics accumulators for streaming responses — filled by parsing
|
|
930
|
+
// message_start / message_delta SSE events as they flow through.
|
|
931
|
+
let streamInputTokens = 0;
|
|
932
|
+
let streamOutputTokens = 0;
|
|
933
|
+
let streamCacheReadTokens = 0;
|
|
934
|
+
let streamCacheCreateTokens = 0;
|
|
935
|
+
const analyticsDecoder = (analytics && poolAccount) ? new TextDecoder() : null;
|
|
936
|
+
let analyticsBuffer = '';
|
|
872
937
|
// Stream SSE chunks through
|
|
873
938
|
const reader = upstream.body.getReader();
|
|
874
939
|
const decoder = new TextDecoder();
|
|
@@ -888,6 +953,34 @@ export async function startProxy(opts = {}) {
|
|
|
888
953
|
const { done, value } = await reader.read();
|
|
889
954
|
if (done)
|
|
890
955
|
break;
|
|
956
|
+
// Parse SSE events for analytics regardless of routing branch
|
|
957
|
+
if (analyticsDecoder && value) {
|
|
958
|
+
analyticsBuffer += analyticsDecoder.decode(value, { stream: true });
|
|
959
|
+
const parts = analyticsBuffer.split('\n\n');
|
|
960
|
+
analyticsBuffer = parts.pop() ?? '';
|
|
961
|
+
for (const part of parts) {
|
|
962
|
+
const dataLine = part.split('\n').find(l => l.startsWith('data: '));
|
|
963
|
+
if (!dataLine)
|
|
964
|
+
continue;
|
|
965
|
+
try {
|
|
966
|
+
const e = JSON.parse(dataLine.slice(6));
|
|
967
|
+
if (e.type === 'message_start') {
|
|
968
|
+
const u = e.message?.usage;
|
|
969
|
+
if (u) {
|
|
970
|
+
streamInputTokens = u.input_tokens ?? 0;
|
|
971
|
+
streamCacheReadTokens = u.cache_read_input_tokens ?? 0;
|
|
972
|
+
streamCacheCreateTokens = u.cache_creation_input_tokens ?? 0;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
else if (e.type === 'message_delta') {
|
|
976
|
+
const u = e.usage;
|
|
977
|
+
if (u?.output_tokens)
|
|
978
|
+
streamOutputTokens = u.output_tokens;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
catch { /* ignore malformed SSE events */ }
|
|
982
|
+
}
|
|
983
|
+
}
|
|
891
984
|
if (isOpenAI) {
|
|
892
985
|
// Translate Anthropic SSE → OpenAI SSE
|
|
893
986
|
buffer += decoder.decode(value, { stream: true });
|
|
@@ -929,6 +1022,17 @@ export async function startProxy(opts = {}) {
|
|
|
929
1022
|
console.error('[dario] Stream error:', sanitizeError(err));
|
|
930
1023
|
}
|
|
931
1024
|
res.end();
|
|
1025
|
+
if (analytics && poolAccount) {
|
|
1026
|
+
analytics.record({
|
|
1027
|
+
timestamp: Date.now(), account: poolAccount.alias, model: requestModel,
|
|
1028
|
+
inputTokens: streamInputTokens, outputTokens: streamOutputTokens,
|
|
1029
|
+
cacheReadTokens: streamCacheReadTokens, cacheCreateTokens: streamCacheCreateTokens,
|
|
1030
|
+
thinkingTokens: 0,
|
|
1031
|
+
claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
|
|
1032
|
+
util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
|
|
1033
|
+
latencyMs: Date.now() - startTime, status: upstream.status, isStream: true, isOpenAI,
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
932
1036
|
}
|
|
933
1037
|
else {
|
|
934
1038
|
// Buffer and forward
|
|
@@ -948,6 +1052,23 @@ export async function startProxy(opts = {}) {
|
|
|
948
1052
|
else {
|
|
949
1053
|
res.end(responseBody);
|
|
950
1054
|
}
|
|
1055
|
+
if (analytics && poolAccount) {
|
|
1056
|
+
try {
|
|
1057
|
+
const parsed = JSON.parse(responseBody);
|
|
1058
|
+
const usage = Analytics.parseUsage(parsed);
|
|
1059
|
+
analytics.record({
|
|
1060
|
+
timestamp: Date.now(), account: poolAccount.alias,
|
|
1061
|
+
model: usage.model || requestModel,
|
|
1062
|
+
inputTokens: usage.inputTokens, outputTokens: usage.outputTokens,
|
|
1063
|
+
cacheReadTokens: usage.cacheReadTokens, cacheCreateTokens: usage.cacheCreateTokens,
|
|
1064
|
+
thinkingTokens: usage.thinkingTokens,
|
|
1065
|
+
claim: poolAccount.rateLimit.claim, util5h: poolAccount.rateLimit.util5h,
|
|
1066
|
+
util7d: poolAccount.rateLimit.util7d, overageUtil: poolAccount.rateLimit.overageUtil,
|
|
1067
|
+
latencyMs: Date.now() - startTime, status: upstream.status, isStream: false, isOpenAI,
|
|
1068
|
+
});
|
|
1069
|
+
}
|
|
1070
|
+
catch { /* don't let analytics errors break responses */ }
|
|
1071
|
+
}
|
|
951
1072
|
if (verbose)
|
|
952
1073
|
console.log(`[dario] #${requestCount} ${upstream.status}`);
|
|
953
1074
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.8.1",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "tsc && cp src/cc-template-data.json dist/",
|
|
24
|
-
"test": "node test/issue-29-tool-translation.mjs",
|
|
24
|
+
"test": "node test/issue-29-tool-translation.mjs && node test/analytics-recording.mjs && node test/failover-429.mjs",
|
|
25
25
|
"audit": "npm audit --production --audit-level=high",
|
|
26
26
|
"prepublishOnly": "npm run build",
|
|
27
27
|
"start": "node dist/cli.js",
|