@askalf/dario 3.4.6 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -16
- package/dist/accounts.d.ts +23 -0
- package/dist/accounts.js +253 -0
- package/dist/analytics.d.ts +99 -0
- package/dist/analytics.js +198 -0
- package/dist/cli.js +209 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +11 -0
- package/dist/openai-backend.d.ts +19 -0
- package/dist/openai-backend.js +170 -0
- package/dist/pool.d.ts +68 -0
- package/dist/pool.js +212 -0
- package/dist/proxy.js +174 -10
- package/package.json +1 -1
package/dist/pool.d.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
export interface AccountIdentity {
|
|
2
|
+
deviceId: string;
|
|
3
|
+
accountUuid: string;
|
|
4
|
+
sessionId: string;
|
|
5
|
+
}
|
|
6
|
+
export interface RateLimitSnapshot {
|
|
7
|
+
status: string;
|
|
8
|
+
util5h: number;
|
|
9
|
+
util7d: number;
|
|
10
|
+
overageUtil: number;
|
|
11
|
+
claim: string;
|
|
12
|
+
reset: number;
|
|
13
|
+
fallbackPct: number;
|
|
14
|
+
updatedAt: number;
|
|
15
|
+
}
|
|
16
|
+
export declare const EMPTY_SNAPSHOT: RateLimitSnapshot;
|
|
17
|
+
export interface PoolAccount {
|
|
18
|
+
alias: string;
|
|
19
|
+
accessToken: string;
|
|
20
|
+
refreshToken: string;
|
|
21
|
+
expiresAt: number;
|
|
22
|
+
identity: AccountIdentity;
|
|
23
|
+
rateLimit: RateLimitSnapshot;
|
|
24
|
+
requestCount: number;
|
|
25
|
+
}
|
|
26
|
+
export interface PoolStatus {
|
|
27
|
+
accounts: number;
|
|
28
|
+
healthy: number;
|
|
29
|
+
exhausted: number;
|
|
30
|
+
totalHeadroom: number;
|
|
31
|
+
bestAccount: string;
|
|
32
|
+
queued: number;
|
|
33
|
+
}
|
|
34
|
+
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
35
|
+
export declare function parseRateLimits(headers: Headers): RateLimitSnapshot;
|
|
36
|
+
export declare class AccountPool {
|
|
37
|
+
private accounts;
|
|
38
|
+
private queue;
|
|
39
|
+
private queueMaxSize;
|
|
40
|
+
private queueTimeoutMs;
|
|
41
|
+
private drainTimer;
|
|
42
|
+
add(alias: string, opts: {
|
|
43
|
+
accessToken: string;
|
|
44
|
+
refreshToken: string;
|
|
45
|
+
expiresAt: number;
|
|
46
|
+
deviceId: string;
|
|
47
|
+
accountUuid: string;
|
|
48
|
+
}): void;
|
|
49
|
+
remove(alias: string): boolean;
|
|
50
|
+
get size(): number;
|
|
51
|
+
/** Select the best account for the next request. */
|
|
52
|
+
select(): PoolAccount | null;
|
|
53
|
+
/** Select the next-best account, excluding the given alias. */
|
|
54
|
+
selectExcluding(excludeAlias: string): PoolAccount | null;
|
|
55
|
+
updateRateLimits(alias: string, snapshot: RateLimitSnapshot): void;
|
|
56
|
+
markRejected(alias: string, snapshot: RateLimitSnapshot): void;
|
|
57
|
+
updateTokens(alias: string, accessToken: string, refreshToken: string, expiresAt: number): void;
|
|
58
|
+
get(alias: string): PoolAccount | undefined;
|
|
59
|
+
all(): PoolAccount[];
|
|
60
|
+
status(): PoolStatus;
|
|
61
|
+
/**
|
|
62
|
+
* Wait for an available account. If all accounts are exhausted, queues
|
|
63
|
+
* the request and resolves when an account becomes available via
|
|
64
|
+
* updateRateLimits reducing utilization below threshold.
|
|
65
|
+
*/
|
|
66
|
+
waitForAccount(): Promise<PoolAccount>;
|
|
67
|
+
private drainQueue;
|
|
68
|
+
}
|
package/dist/pool.js
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Account pool — rate limit tracking, headroom routing, failover.
|
|
3
|
+
*
|
|
4
|
+
* Activated automatically when `~/.dario/accounts/` contains 2+ accounts.
|
|
5
|
+
* Single-account dario (`~/.dario/credentials.json`) keeps the same code
|
|
6
|
+
* path it has always had; the pool only runs when there are multiple
|
|
7
|
+
* accounts to distribute against.
|
|
8
|
+
*/
|
|
9
|
+
import { randomUUID } from 'node:crypto';
|
|
10
|
+
export const EMPTY_SNAPSHOT = {
|
|
11
|
+
status: 'unknown',
|
|
12
|
+
util5h: 0,
|
|
13
|
+
util7d: 0,
|
|
14
|
+
overageUtil: 0,
|
|
15
|
+
claim: 'unknown',
|
|
16
|
+
reset: 0,
|
|
17
|
+
fallbackPct: 0,
|
|
18
|
+
updatedAt: 0,
|
|
19
|
+
};
|
|
20
|
+
/** Parse an Anthropic response's rate-limit headers into a snapshot. */
|
|
21
|
+
export function parseRateLimits(headers) {
|
|
22
|
+
const get = (key) => headers.get(`anthropic-ratelimit-unified-${key}`) ?? '';
|
|
23
|
+
return {
|
|
24
|
+
status: get('status') || 'unknown',
|
|
25
|
+
util5h: parseFloat(get('5h-utilization')) || 0,
|
|
26
|
+
util7d: parseFloat(get('7d-utilization')) || 0,
|
|
27
|
+
overageUtil: parseFloat(get('overage-utilization')) || 0,
|
|
28
|
+
claim: get('representative-claim') || 'unknown',
|
|
29
|
+
reset: parseInt(get('reset')) || 0,
|
|
30
|
+
fallbackPct: parseFloat(get('fallback-percentage')) || 0,
|
|
31
|
+
updatedAt: Date.now(),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
export class AccountPool {
|
|
35
|
+
accounts = new Map();
|
|
36
|
+
queue = [];
|
|
37
|
+
queueMaxSize = 50;
|
|
38
|
+
queueTimeoutMs = 60_000;
|
|
39
|
+
drainTimer = null;
|
|
40
|
+
add(alias, opts) {
|
|
41
|
+
const existing = this.accounts.get(alias);
|
|
42
|
+
this.accounts.set(alias, {
|
|
43
|
+
alias,
|
|
44
|
+
accessToken: opts.accessToken,
|
|
45
|
+
refreshToken: opts.refreshToken,
|
|
46
|
+
expiresAt: opts.expiresAt,
|
|
47
|
+
identity: existing?.identity ?? {
|
|
48
|
+
deviceId: opts.deviceId,
|
|
49
|
+
accountUuid: opts.accountUuid,
|
|
50
|
+
sessionId: randomUUID(),
|
|
51
|
+
},
|
|
52
|
+
rateLimit: existing?.rateLimit ?? { ...EMPTY_SNAPSHOT },
|
|
53
|
+
requestCount: existing?.requestCount ?? 0,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
remove(alias) {
|
|
57
|
+
return this.accounts.delete(alias);
|
|
58
|
+
}
|
|
59
|
+
get size() {
|
|
60
|
+
return this.accounts.size;
|
|
61
|
+
}
|
|
62
|
+
/** Select the best account for the next request. */
|
|
63
|
+
select() {
|
|
64
|
+
if (this.accounts.size === 0)
|
|
65
|
+
return null;
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
const all = [...this.accounts.values()];
|
|
68
|
+
const eligible = all.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
69
|
+
a.expiresAt > now + 30_000);
|
|
70
|
+
if (eligible.length > 0) {
|
|
71
|
+
return eligible.reduce((best, curr) => {
|
|
72
|
+
const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
|
|
73
|
+
const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
|
|
74
|
+
return currHeadroom > bestHeadroom ? curr : best;
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
// All accounts exhausted — return the one with the earliest reset
|
|
78
|
+
const withReset = all.filter(a => a.rateLimit.reset > 0);
|
|
79
|
+
if (withReset.length > 0) {
|
|
80
|
+
return withReset.reduce((a, b) => a.rateLimit.reset < b.rateLimit.reset ? a : b);
|
|
81
|
+
}
|
|
82
|
+
// No rate-limit data at all — least-used first
|
|
83
|
+
return all.reduce((a, b) => a.requestCount < b.requestCount ? a : b);
|
|
84
|
+
}
|
|
85
|
+
/** Select the next-best account, excluding the given alias. */
|
|
86
|
+
selectExcluding(excludeAlias) {
|
|
87
|
+
if (this.accounts.size <= 1)
|
|
88
|
+
return null;
|
|
89
|
+
const now = Date.now();
|
|
90
|
+
const candidates = [...this.accounts.values()].filter(a => a.alias !== excludeAlias);
|
|
91
|
+
const eligible = candidates.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
92
|
+
a.expiresAt > now + 30_000);
|
|
93
|
+
if (eligible.length > 0) {
|
|
94
|
+
return eligible.reduce((best, curr) => {
|
|
95
|
+
const bestHeadroom = 1 - Math.max(best.rateLimit.util5h, best.rateLimit.util7d);
|
|
96
|
+
const currHeadroom = 1 - Math.max(curr.rateLimit.util5h, curr.rateLimit.util7d);
|
|
97
|
+
return currHeadroom > bestHeadroom ? curr : best;
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
if (candidates.length > 0) {
|
|
101
|
+
return candidates.reduce((a, b) => a.requestCount < b.requestCount ? a : b);
|
|
102
|
+
}
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
updateRateLimits(alias, snapshot) {
|
|
106
|
+
const account = this.accounts.get(alias);
|
|
107
|
+
if (!account)
|
|
108
|
+
return;
|
|
109
|
+
account.rateLimit = snapshot;
|
|
110
|
+
account.requestCount++;
|
|
111
|
+
}
|
|
112
|
+
markRejected(alias, snapshot) {
|
|
113
|
+
const account = this.accounts.get(alias);
|
|
114
|
+
if (!account)
|
|
115
|
+
return;
|
|
116
|
+
account.rateLimit = { ...snapshot, status: 'rejected' };
|
|
117
|
+
}
|
|
118
|
+
updateTokens(alias, accessToken, refreshToken, expiresAt) {
|
|
119
|
+
const account = this.accounts.get(alias);
|
|
120
|
+
if (!account)
|
|
121
|
+
return;
|
|
122
|
+
account.accessToken = accessToken;
|
|
123
|
+
account.refreshToken = refreshToken;
|
|
124
|
+
account.expiresAt = expiresAt;
|
|
125
|
+
}
|
|
126
|
+
get(alias) {
|
|
127
|
+
return this.accounts.get(alias);
|
|
128
|
+
}
|
|
129
|
+
all() {
|
|
130
|
+
return [...this.accounts.values()];
|
|
131
|
+
}
|
|
132
|
+
status() {
|
|
133
|
+
const all = this.all();
|
|
134
|
+
const now = Date.now();
|
|
135
|
+
const healthy = all.filter(a => a.rateLimit.status !== 'rejected' &&
|
|
136
|
+
a.expiresAt > now + 30_000);
|
|
137
|
+
const headrooms = all.map(a => 1 - Math.max(a.rateLimit.util5h, a.rateLimit.util7d));
|
|
138
|
+
const avgHeadroom = headrooms.length > 0 ? headrooms.reduce((a, b) => a + b, 0) / headrooms.length : 0;
|
|
139
|
+
const best = this.select();
|
|
140
|
+
return {
|
|
141
|
+
accounts: all.length,
|
|
142
|
+
healthy: healthy.length,
|
|
143
|
+
exhausted: all.length - healthy.length,
|
|
144
|
+
totalHeadroom: Math.round(avgHeadroom * 100),
|
|
145
|
+
bestAccount: best?.alias ?? 'none',
|
|
146
|
+
queued: this.queue.length,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Wait for an available account. If all accounts are exhausted, queues
|
|
151
|
+
* the request and resolves when an account becomes available via
|
|
152
|
+
* updateRateLimits reducing utilization below threshold.
|
|
153
|
+
*/
|
|
154
|
+
async waitForAccount() {
|
|
155
|
+
const immediate = this.select();
|
|
156
|
+
if (immediate) {
|
|
157
|
+
const headroom = 1 - Math.max(immediate.rateLimit.util5h, immediate.rateLimit.util7d);
|
|
158
|
+
if (headroom > 0.02)
|
|
159
|
+
return immediate;
|
|
160
|
+
}
|
|
161
|
+
if (this.queue.length >= this.queueMaxSize) {
|
|
162
|
+
throw new Error('Queue full — all accounts exhausted');
|
|
163
|
+
}
|
|
164
|
+
if (!this.drainTimer) {
|
|
165
|
+
this.drainTimer = setInterval(() => this.drainQueue(), 5_000);
|
|
166
|
+
this.drainTimer.unref();
|
|
167
|
+
}
|
|
168
|
+
return new Promise((resolve, reject) => {
|
|
169
|
+
const entry = { resolve, reject, enqueuedAt: Date.now() };
|
|
170
|
+
this.queue.push(entry);
|
|
171
|
+
setTimeout(() => {
|
|
172
|
+
const idx = this.queue.indexOf(entry);
|
|
173
|
+
if (idx >= 0) {
|
|
174
|
+
this.queue.splice(idx, 1);
|
|
175
|
+
reject(new Error('Queue timeout — no accounts available within 60s'));
|
|
176
|
+
}
|
|
177
|
+
}, this.queueTimeoutMs);
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
drainQueue() {
|
|
181
|
+
if (this.queue.length === 0) {
|
|
182
|
+
if (this.drainTimer) {
|
|
183
|
+
clearInterval(this.drainTimer);
|
|
184
|
+
this.drainTimer = null;
|
|
185
|
+
}
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
const now = Date.now();
|
|
189
|
+
this.queue = this.queue.filter(entry => {
|
|
190
|
+
if (now - entry.enqueuedAt > this.queueTimeoutMs) {
|
|
191
|
+
entry.reject(new Error('Queue timeout — no accounts available within 60s'));
|
|
192
|
+
return false;
|
|
193
|
+
}
|
|
194
|
+
return true;
|
|
195
|
+
});
|
|
196
|
+
while (this.queue.length > 0) {
|
|
197
|
+
const account = this.select();
|
|
198
|
+
if (!account)
|
|
199
|
+
break;
|
|
200
|
+
const headroom = 1 - Math.max(account.rateLimit.util5h, account.rateLimit.util7d);
|
|
201
|
+
if (headroom <= 0.02)
|
|
202
|
+
break;
|
|
203
|
+
const entry = this.queue.shift();
|
|
204
|
+
if (entry)
|
|
205
|
+
entry.resolve(account);
|
|
206
|
+
}
|
|
207
|
+
if (this.queue.length === 0 && this.drainTimer) {
|
|
208
|
+
clearInterval(this.drainTimer);
|
|
209
|
+
this.drainTimer = null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
package/dist/proxy.js
CHANGED
|
@@ -7,6 +7,10 @@ import { homedir } from 'node:os';
|
|
|
7
7
|
import { arch, platform } from 'node:process';
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
import { buildCCRequest, reverseMapResponse } from './cc-template.js';
|
|
10
|
+
import { AccountPool, parseRateLimits } from './pool.js';
|
|
11
|
+
import { Analytics } from './analytics.js';
|
|
12
|
+
import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
|
|
13
|
+
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
10
14
|
const ANTHROPIC_API = 'https://api.anthropic.com';
|
|
11
15
|
const DEFAULT_PORT = 3456;
|
|
12
16
|
const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MB — generous for large prompts, prevents abuse
|
|
@@ -321,11 +325,69 @@ export async function startProxy(opts = {}) {
|
|
|
321
325
|
const host = opts.host ?? process.env.DARIO_HOST ?? DEFAULT_HOST;
|
|
322
326
|
const verbose = opts.verbose ?? false;
|
|
323
327
|
const passthrough = opts.passthrough ?? false;
|
|
324
|
-
//
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
328
|
+
// Multi-provider backends (v3.6.0+). Loaded once at startup; the CLI
|
|
329
|
+
// `dario backend add openai --key=…` writes to ~/.dario/backends/.
|
|
330
|
+
// Routing: a GPT-family model arriving on /v1/chat/completions is
|
|
331
|
+
// dispatched to the openai-compat backend when one is configured,
|
|
332
|
+
// otherwise it falls through to the existing Claude-side handling
|
|
333
|
+
// (which used to map gpt-* names to Claude equivalents).
|
|
334
|
+
let openaiBackend = await getOpenAIBackend();
|
|
335
|
+
if (openaiBackend) {
|
|
336
|
+
console.log(` OpenAI-compat backend: ${openaiBackend.name} → ${openaiBackend.baseUrl}`);
|
|
337
|
+
}
|
|
338
|
+
// Multi-account pool — activated when ~/.dario/accounts/ has 2+ entries.
|
|
339
|
+
// Single-account dario keeps its existing code path unchanged.
|
|
340
|
+
const accountsList = await loadAllAccounts();
|
|
341
|
+
const pool = accountsList.length >= 2 ? new AccountPool() : null;
|
|
342
|
+
const analytics = pool ? new Analytics() : null;
|
|
343
|
+
let status;
|
|
344
|
+
if (pool) {
|
|
345
|
+
for (const acc of accountsList) {
|
|
346
|
+
pool.add(acc.alias, {
|
|
347
|
+
accessToken: acc.accessToken,
|
|
348
|
+
refreshToken: acc.refreshToken,
|
|
349
|
+
expiresAt: acc.expiresAt,
|
|
350
|
+
deviceId: acc.deviceId,
|
|
351
|
+
accountUuid: acc.accountUuid,
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
console.log(` Pool mode: ${accountsList.length} accounts loaded`);
|
|
355
|
+
// Background refresh — keep every account's token fresh without blocking requests
|
|
356
|
+
const refreshInterval = setInterval(async () => {
|
|
357
|
+
for (const acc of pool.all()) {
|
|
358
|
+
if (acc.expiresAt < Date.now() + 45 * 60 * 1000) {
|
|
359
|
+
try {
|
|
360
|
+
const saved = await loadAccount(acc.alias);
|
|
361
|
+
if (!saved)
|
|
362
|
+
continue;
|
|
363
|
+
const refreshed = await refreshAccountToken(saved);
|
|
364
|
+
pool.updateTokens(acc.alias, refreshed.accessToken, refreshed.refreshToken, refreshed.expiresAt);
|
|
365
|
+
}
|
|
366
|
+
catch (err) {
|
|
367
|
+
console.error(`[dario] Background refresh failed for ${acc.alias}: ${err instanceof Error ? err.message : err}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}, 15 * 60 * 1000);
|
|
372
|
+
refreshInterval.unref();
|
|
373
|
+
// Pool mode doesn't check single-account status — compute a placeholder
|
|
374
|
+
// for the startup banner using the pool's earliest expiry.
|
|
375
|
+
const earliest = Math.min(...pool.all().map(a => a.expiresAt));
|
|
376
|
+
const msLeft = Math.max(0, earliest - Date.now());
|
|
377
|
+
status = {
|
|
378
|
+
authenticated: true,
|
|
379
|
+
status: 'healthy',
|
|
380
|
+
expiresAt: earliest,
|
|
381
|
+
expiresIn: `${Math.floor(msLeft / 3600000)}h ${Math.floor((msLeft % 3600000) / 60000)}m`,
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
else {
|
|
385
|
+
// Single-account mode — existing auth check
|
|
386
|
+
status = await getStatus();
|
|
387
|
+
if (!status.authenticated) {
|
|
388
|
+
console.error('[dario] Not authenticated. Run `dario login` first.');
|
|
389
|
+
process.exit(1);
|
|
390
|
+
}
|
|
329
391
|
}
|
|
330
392
|
const cliVersion = detectCliVersion();
|
|
331
393
|
const modelOverride = opts.model ? (MODEL_ALIASES[opts.model] ?? opts.model) : null;
|
|
@@ -433,6 +495,39 @@ export async function startProxy(opts = {}) {
|
|
|
433
495
|
res.end(JSON.stringify(s));
|
|
434
496
|
return;
|
|
435
497
|
}
|
|
498
|
+
// Pool status endpoint — shows loaded accounts, headroom, and the
|
|
499
|
+
// account that would be selected next. Read-only; mutation flows through
|
|
500
|
+
// the `dario accounts` CLI, not HTTP.
|
|
501
|
+
if (urlPath === '/accounts' && req.method === 'GET') {
|
|
502
|
+
if (!pool) {
|
|
503
|
+
res.writeHead(200, JSON_HEADERS);
|
|
504
|
+
res.end(JSON.stringify({ mode: 'single-account', accounts: 0 }));
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
const accounts = pool.all().map(a => ({
|
|
508
|
+
alias: a.alias,
|
|
509
|
+
util5h: a.rateLimit.util5h,
|
|
510
|
+
util7d: a.rateLimit.util7d,
|
|
511
|
+
claim: a.rateLimit.claim,
|
|
512
|
+
status: a.rateLimit.status,
|
|
513
|
+
requestCount: a.requestCount,
|
|
514
|
+
expiresInMs: Math.max(0, a.expiresAt - Date.now()),
|
|
515
|
+
}));
|
|
516
|
+
res.writeHead(200, JSON_HEADERS);
|
|
517
|
+
res.end(JSON.stringify({ mode: 'pool', ...pool.status(), accounts }));
|
|
518
|
+
return;
|
|
519
|
+
}
|
|
520
|
+
// Analytics endpoint — request history + burn-rate summary (pool mode only).
|
|
521
|
+
if (urlPath === '/analytics' && req.method === 'GET') {
|
|
522
|
+
if (!analytics) {
|
|
523
|
+
res.writeHead(200, JSON_HEADERS);
|
|
524
|
+
res.end(JSON.stringify({ mode: 'single-account', note: 'Analytics are only collected in pool mode.' }));
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
res.writeHead(200, JSON_HEADERS);
|
|
528
|
+
res.end(JSON.stringify(analytics.summary()));
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
436
531
|
if (urlPath === '/v1/models' && req.method === 'GET') {
|
|
437
532
|
requestCount++;
|
|
438
533
|
res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
@@ -465,7 +560,26 @@ export async function startProxy(opts = {}) {
|
|
|
465
560
|
let onClientClose = null;
|
|
466
561
|
let upstreamAbortReason = null;
|
|
467
562
|
try {
|
|
468
|
-
|
|
563
|
+
// Pool mode: select an account by headroom. Single-account mode:
|
|
564
|
+
// fall through to getAccessToken() exactly as before. Request-path
|
|
565
|
+
// 429 failover (retry with the next-best account before returning a
|
|
566
|
+
// rate-limit error to the client) lands in v3.5.1 — this release
|
|
567
|
+
// ships the pool scaffolding and headroom-aware selection across
|
|
568
|
+
// requests, not within a single 429 retry.
|
|
569
|
+
let poolAccount = null;
|
|
570
|
+
let accessToken;
|
|
571
|
+
if (pool) {
|
|
572
|
+
poolAccount = pool.select();
|
|
573
|
+
if (!poolAccount) {
|
|
574
|
+
res.writeHead(503, JSON_HEADERS);
|
|
575
|
+
res.end(JSON.stringify({ error: 'No accounts available in pool' }));
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
accessToken = poolAccount.accessToken;
|
|
579
|
+
}
|
|
580
|
+
else {
|
|
581
|
+
accessToken = await getAccessToken();
|
|
582
|
+
}
|
|
469
583
|
// Read request body with size limit and timeout (prevents slow-loris)
|
|
470
584
|
const chunks = [];
|
|
471
585
|
let totalBytes = 0;
|
|
@@ -487,6 +601,27 @@ export async function startProxy(opts = {}) {
|
|
|
487
601
|
clearTimeout(bodyTimeout);
|
|
488
602
|
}
|
|
489
603
|
const body = Buffer.concat(chunks);
|
|
604
|
+
// Multi-provider routing (v3.6.0+). When an OpenAI-compat backend is
|
|
605
|
+
// configured and the request is on /v1/chat/completions with a
|
|
606
|
+
// GPT-family model, forward it straight through to the backend
|
|
607
|
+
// instead of running it through the Claude template path. Requests
|
|
608
|
+
// on /v1/messages or with Claude-family models fall through to
|
|
609
|
+
// existing behavior.
|
|
610
|
+
if (openaiBackend && isOpenAI && body.length > 0) {
|
|
611
|
+
try {
|
|
612
|
+
const peek = JSON.parse(body.toString());
|
|
613
|
+
const rawModel = (peek.model || '').toString();
|
|
614
|
+
if (rawModel && isOpenAIModel(rawModel)) {
|
|
615
|
+
if (verbose) {
|
|
616
|
+
console.log(`[dario] #${requestCount} ${req.method} ${urlPath} (model: ${rawModel}) → openai backend`);
|
|
617
|
+
}
|
|
618
|
+
requestCount++;
|
|
619
|
+
await forwardToOpenAI(req, res, body, openaiBackend, corsOrigin, SECURITY_HEADERS, UPSTREAM_TIMEOUT_MS, verbose);
|
|
620
|
+
return;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
catch { /* not JSON — fall through to existing path */ }
|
|
624
|
+
}
|
|
490
625
|
// Parse body once, apply OpenAI translation, model override, and sanitization
|
|
491
626
|
let finalBody = body.length > 0 ? body : undefined;
|
|
492
627
|
let ccToolMap = null;
|
|
@@ -511,7 +646,10 @@ export async function startProxy(opts = {}) {
|
|
|
511
646
|
const fullVersion = `${cliVersion}.${buildTag}`;
|
|
512
647
|
const billingTag = `x-anthropic-billing-header: cc_version=${fullVersion}; cc_entrypoint=cli; cch=${cch};`;
|
|
513
648
|
const CACHE_1H = { type: 'ephemeral', ttl: '1h' };
|
|
514
|
-
const
|
|
649
|
+
const bodyIdentity = poolAccount
|
|
650
|
+
? poolAccount.identity
|
|
651
|
+
: { deviceId: identity.deviceId, accountUuid: identity.accountUuid, sessionId: SESSION_ID };
|
|
652
|
+
const { body: ccBody, toolMap } = buildCCRequest(r, billingTag, CACHE_1H, bodyIdentity, { preserveTools: opts.preserveTools ?? false });
|
|
515
653
|
// Store tool map for response reverse-mapping
|
|
516
654
|
ccToolMap = toolMap;
|
|
517
655
|
// Replace request body entirely with CC template
|
|
@@ -555,12 +693,16 @@ export async function startProxy(opts = {}) {
|
|
|
555
693
|
await new Promise(r => setTimeout(r, MIN_REQUEST_INTERVAL_MS - elapsed));
|
|
556
694
|
}
|
|
557
695
|
lastRequestTime = Date.now();
|
|
558
|
-
// Rotate session ID per request — fresh UUID avoids persistent-session fingerprinting
|
|
559
|
-
|
|
696
|
+
// Rotate session ID per request — fresh UUID avoids persistent-session fingerprinting.
|
|
697
|
+
// Pool mode uses the per-account identity.sessionId which is stable across
|
|
698
|
+
// a given account's lifetime; single-account mode rotates per request.
|
|
699
|
+
if (!poolAccount)
|
|
700
|
+
SESSION_ID = randomUUID();
|
|
701
|
+
const outboundSessionId = poolAccount ? poolAccount.identity.sessionId : SESSION_ID;
|
|
560
702
|
const headers = {
|
|
561
703
|
...staticHeaders,
|
|
562
704
|
'Authorization': `Bearer ${accessToken}`,
|
|
563
|
-
'x-claude-code-session-id':
|
|
705
|
+
'x-claude-code-session-id': outboundSessionId,
|
|
564
706
|
'anthropic-version': passthrough ? (req.headers['anthropic-version'] || '2023-06-01') : '2023-06-01',
|
|
565
707
|
'anthropic-beta': beta,
|
|
566
708
|
'x-client-request-id': randomUUID(),
|
|
@@ -595,6 +737,18 @@ export async function startProxy(opts = {}) {
|
|
|
595
737
|
body: finalBody ? new Uint8Array(finalBody) : undefined,
|
|
596
738
|
signal: upstreamAbort.signal,
|
|
597
739
|
});
|
|
740
|
+
// Pool mode: capture rate-limit snapshot from the response. parseRateLimits
|
|
741
|
+
// returns status='rejected' on 429, which makes the next `select()` call
|
|
742
|
+
// route traffic away from this account until it resets.
|
|
743
|
+
if (pool && poolAccount) {
|
|
744
|
+
const snapshot = parseRateLimits(upstream.headers);
|
|
745
|
+
if (upstream.status === 429) {
|
|
746
|
+
pool.markRejected(poolAccount.alias, snapshot);
|
|
747
|
+
}
|
|
748
|
+
else {
|
|
749
|
+
pool.updateRateLimits(poolAccount.alias, snapshot);
|
|
750
|
+
}
|
|
751
|
+
}
|
|
598
752
|
// Auto-retry without context-1m if it triggers a long-context billing error.
|
|
599
753
|
// Anthropic returns this as either 400 ("long context beta is not yet available
|
|
600
754
|
// for this subscription") or 429 ("Extra usage is required for long context
|
|
@@ -622,6 +776,16 @@ export async function startProxy(opts = {}) {
|
|
|
622
776
|
// Use the retry response from here on — peeked body is now stale
|
|
623
777
|
upstream = retry;
|
|
624
778
|
peekedBody = null;
|
|
779
|
+
// Pool mode: re-capture after the context-1m retry as the snapshot may have changed.
|
|
780
|
+
if (pool && poolAccount) {
|
|
781
|
+
const retrySnapshot = parseRateLimits(upstream.headers);
|
|
782
|
+
if (upstream.status === 429) {
|
|
783
|
+
pool.markRejected(poolAccount.alias, retrySnapshot);
|
|
784
|
+
}
|
|
785
|
+
else {
|
|
786
|
+
pool.updateRateLimits(poolAccount.alias, retrySnapshot);
|
|
787
|
+
}
|
|
788
|
+
}
|
|
625
789
|
}
|
|
626
790
|
else if (upstream.status === 429) {
|
|
627
791
|
// Not a context-1m issue — return enriched 429 directly
|