@askalf/dario 2.7.1 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +3 -1
- package/dist/proxy.d.ts +1 -0
- package/dist/proxy.js +169 -70
- package/package.json +3 -2
package/dist/cli.js
CHANGED
|
@@ -112,9 +112,10 @@ async function proxy() {
|
|
|
112
112
|
}
|
|
113
113
|
const verbose = args.includes('--verbose') || args.includes('-v');
|
|
114
114
|
const cliBackend = args.includes('--cli');
|
|
115
|
+
const passthrough = args.includes('--passthrough') || args.includes('--thin');
|
|
115
116
|
const modelArg = args.find(a => a.startsWith('--model='));
|
|
116
117
|
const model = modelArg ? modelArg.split('=')[1] : undefined;
|
|
117
|
-
await startProxy({ port, verbose, model, cliBackend });
|
|
118
|
+
await startProxy({ port, verbose, model, cliBackend, passthrough });
|
|
118
119
|
}
|
|
119
120
|
async function help() {
|
|
120
121
|
console.log(`
|
|
@@ -133,6 +134,7 @@ async function help() {
|
|
|
133
134
|
Full IDs: claude-opus-4-6, claude-sonnet-4-6
|
|
134
135
|
Default: passthrough (client decides)
|
|
135
136
|
--cli Use Claude CLI as backend (bypasses rate limits)
|
|
137
|
+
--passthrough Thin proxy — OAuth swap only, no injection
|
|
136
138
|
--port=PORT Port to listen on (default: 3456)
|
|
137
139
|
--verbose, -v Log all requests
|
|
138
140
|
|
package/dist/proxy.d.ts
CHANGED
package/dist/proxy.js
CHANGED
|
@@ -301,6 +301,37 @@ export function sanitizeError(err) {
|
|
|
301
301
|
.replace(/eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, '[REDACTED_JWT]')
|
|
302
302
|
.replace(/Bearer\s+[^\s,;]+/gi, 'Bearer [REDACTED]');
|
|
303
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Enrich Anthropic's unhelpful 429 "Error" body with rate limit details from headers.
|
|
306
|
+
*/
|
|
307
|
+
function enrich429(body, headers) {
|
|
308
|
+
try {
|
|
309
|
+
const parsed = JSON.parse(body);
|
|
310
|
+
const err = parsed.error;
|
|
311
|
+
if (err && (err.message === 'Error' || !err.message)) {
|
|
312
|
+
const claim = headers.get('anthropic-ratelimit-unified-representative-claim') || 'unknown';
|
|
313
|
+
const status = headers.get('anthropic-ratelimit-unified-status') || 'rejected';
|
|
314
|
+
const util5h = headers.get('anthropic-ratelimit-unified-5h-utilization');
|
|
315
|
+
const util7d = headers.get('anthropic-ratelimit-unified-7d-utilization');
|
|
316
|
+
const reset = headers.get('anthropic-ratelimit-unified-reset');
|
|
317
|
+
const parts = [`Rate limited (${status}). Limiting window: ${claim}`];
|
|
318
|
+
if (util5h)
|
|
319
|
+
parts.push(`5h utilization: ${Math.round(parseFloat(util5h) * 100)}%`);
|
|
320
|
+
if (util7d)
|
|
321
|
+
parts.push(`7d utilization: ${Math.round(parseFloat(util7d) * 100)}%`);
|
|
322
|
+
if (reset) {
|
|
323
|
+
const resetDate = new Date(parseInt(reset) * 1000);
|
|
324
|
+
const mins = Math.max(0, Math.round((resetDate.getTime() - Date.now()) / 60000));
|
|
325
|
+
parts.push(`resets in ${mins}m`);
|
|
326
|
+
}
|
|
327
|
+
err.message = parts.join('. ');
|
|
328
|
+
}
|
|
329
|
+
return JSON.stringify(parsed);
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
return body;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
304
335
|
/**
|
|
305
336
|
* CLI Backend: route requests through `claude --print` instead of direct API.
|
|
306
337
|
* This bypasses rate limiting because Claude Code's binary has priority routing.
|
|
@@ -398,6 +429,7 @@ async function handleViaCli(body, model, verbose) {
|
|
|
398
429
|
export async function startProxy(opts = {}) {
|
|
399
430
|
const port = opts.port ?? DEFAULT_PORT;
|
|
400
431
|
const verbose = opts.verbose ?? false;
|
|
432
|
+
const passthrough = opts.passthrough ?? false;
|
|
401
433
|
// Verify auth before starting
|
|
402
434
|
const status = await getStatus();
|
|
403
435
|
if (!status.authenticated) {
|
|
@@ -415,8 +447,11 @@ export async function startProxy(opts = {}) {
|
|
|
415
447
|
console.warn('[dario] WARNING: No Claude Code device identity found. Requests may be billed as Extra Usage.');
|
|
416
448
|
console.warn('[dario] Run Claude Code at least once to generate ~/.claude/.claude.json');
|
|
417
449
|
}
|
|
418
|
-
// Pre-build static headers
|
|
419
|
-
const staticHeaders = {
|
|
450
|
+
// Pre-build static headers
|
|
451
|
+
const staticHeaders = passthrough ? {
|
|
452
|
+
'accept': 'application/json',
|
|
453
|
+
'Content-Type': 'application/json',
|
|
454
|
+
} : {
|
|
420
455
|
'accept': 'application/json',
|
|
421
456
|
'Content-Type': 'application/json',
|
|
422
457
|
'anthropic-dangerous-direct-browser-access': 'true',
|
|
@@ -556,30 +591,60 @@ export async function startProxy(opts = {}) {
|
|
|
556
591
|
// CLI backend mode: route through claude --print (works for both Anthropic and OpenAI endpoints)
|
|
557
592
|
if (useCli && req.method === 'POST' && body.length > 0) {
|
|
558
593
|
let cliBody = body;
|
|
594
|
+
let clientWantsStream = false;
|
|
559
595
|
// Translate OpenAI format before passing to CLI
|
|
560
596
|
if (isOpenAI) {
|
|
561
597
|
try {
|
|
562
598
|
const parsed = JSON.parse(body.toString());
|
|
599
|
+
clientWantsStream = !!parsed.stream;
|
|
563
600
|
cliBody = Buffer.from(JSON.stringify(openaiToAnthropic(parsed, modelOverride)));
|
|
564
601
|
}
|
|
565
602
|
catch { /* send as-is */ }
|
|
566
603
|
}
|
|
604
|
+
else {
|
|
605
|
+
try {
|
|
606
|
+
const parsed = JSON.parse(body.toString());
|
|
607
|
+
clientWantsStream = !!parsed.stream;
|
|
608
|
+
}
|
|
609
|
+
catch { }
|
|
610
|
+
}
|
|
567
611
|
const cliResult = await handleViaCli(cliBody, modelOverride, verbose);
|
|
568
612
|
requestCount++;
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
613
|
+
if (cliResult.status >= 200 && cliResult.status < 300 && clientWantsStream) {
|
|
614
|
+
// Client requested streaming — convert CLI JSON to SSE
|
|
615
|
+
if (isOpenAI) {
|
|
616
|
+
try {
|
|
617
|
+
const parsed = JSON.parse(cliResult.body);
|
|
618
|
+
const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
|
|
619
|
+
const ts = Math.floor(Date.now() / 1000);
|
|
620
|
+
let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
|
|
621
|
+
sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
|
|
622
|
+
res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
623
|
+
res.end(sseData);
|
|
624
|
+
}
|
|
625
|
+
catch {
|
|
626
|
+
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
627
|
+
res.end(cliResult.body);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
else {
|
|
631
|
+
const sseData = jsonToSse(cliResult.body);
|
|
632
|
+
res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
633
|
+
res.end(sseData);
|
|
574
634
|
}
|
|
575
|
-
catch { /* send as-is */ }
|
|
576
635
|
}
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
636
|
+
else {
|
|
637
|
+
// Non-streaming or error — translate and return as JSON
|
|
638
|
+
if (isOpenAI && cliResult.status >= 200 && cliResult.status < 300) {
|
|
639
|
+
try {
|
|
640
|
+
const parsed = JSON.parse(cliResult.body);
|
|
641
|
+
cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
|
|
642
|
+
}
|
|
643
|
+
catch { /* send as-is */ }
|
|
644
|
+
}
|
|
645
|
+
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
646
|
+
res.end(cliResult.body);
|
|
647
|
+
}
|
|
583
648
|
return;
|
|
584
649
|
}
|
|
585
650
|
// Parse body once, apply OpenAI translation, model override, and sanitization
|
|
@@ -595,54 +660,61 @@ export async function startProxy(opts = {}) {
|
|
|
595
660
|
}
|
|
596
661
|
const result = isOpenAI ? openaiToAnthropic(parsed, modelOverride) : (modelOverride ? { ...parsed, model: modelOverride } : parsed);
|
|
597
662
|
const r = result;
|
|
598
|
-
//
|
|
599
|
-
if (
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
// Haiku 4.5 does not support thinking at all
|
|
610
|
-
const modelName = (r.model || '').toLowerCase();
|
|
611
|
-
const supportsThinking = !modelName.includes('haiku');
|
|
612
|
-
if (supportsThinking && !r.thinking) {
|
|
613
|
-
r.thinking = { type: 'adaptive' };
|
|
614
|
-
// Ensure max_tokens is reasonable for thinking models
|
|
615
|
-
const clientMax = r.max_tokens || 8192;
|
|
616
|
-
r.max_tokens = Math.max(clientMax, 16000);
|
|
617
|
-
}
|
|
618
|
-
// Request priority capacity when available
|
|
619
|
-
if (!r.service_tier) {
|
|
620
|
-
r.service_tier = 'auto';
|
|
621
|
-
}
|
|
622
|
-
// Enable context management (matches Claude Code default)
|
|
623
|
-
// Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
|
|
624
|
-
if (supportsThinking && !r.context_management) {
|
|
625
|
-
r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
|
|
626
|
-
}
|
|
627
|
-
// Inject Claude Code billing header into system prompt.
|
|
628
|
-
// Anthropic uses this to route requests through priority rate limiting
|
|
629
|
-
// instead of the general API quota. Without it, Opus/Sonnet get 429
|
|
630
|
-
// when overall utilization is high, even though model-specific limits
|
|
631
|
-
// have headroom. The CLI binary embeds this in its system prompt.
|
|
632
|
-
const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
|
|
633
|
-
if (typeof r.system === 'string') {
|
|
634
|
-
if (!r.system.includes('x-anthropic-billing-header:')) {
|
|
635
|
-
r.system = billingTag + '\n' + r.system;
|
|
663
|
+
// In passthrough mode, skip all Claude-specific injection — OAuth swap only
|
|
664
|
+
if (!passthrough) {
|
|
665
|
+
// Inject device identity metadata for session tracking
|
|
666
|
+
if (identity.deviceId) {
|
|
667
|
+
r.metadata = {
|
|
668
|
+
user_id: JSON.stringify({
|
|
669
|
+
device_id: identity.deviceId,
|
|
670
|
+
account_uuid: identity.accountUuid,
|
|
671
|
+
session_id: SESSION_ID,
|
|
672
|
+
}),
|
|
673
|
+
};
|
|
636
674
|
}
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
const
|
|
640
|
-
|
|
641
|
-
|
|
675
|
+
// Enable adaptive thinking for models that support it (Opus/Sonnet 4.6+)
|
|
676
|
+
// Haiku 4.5 does not support thinking at all
|
|
677
|
+
const modelName = (r.model || '').toLowerCase();
|
|
678
|
+
const supportsThinking = !modelName.includes('haiku');
|
|
679
|
+
if (supportsThinking && !r.thinking) {
|
|
680
|
+
r.thinking = { type: 'adaptive' };
|
|
681
|
+
// Ensure max_tokens is reasonable for thinking models
|
|
682
|
+
const clientMax = r.max_tokens || 8192;
|
|
683
|
+
r.max_tokens = Math.max(clientMax, 16000);
|
|
684
|
+
}
|
|
685
|
+
// Request priority capacity when available
|
|
686
|
+
if (!r.service_tier) {
|
|
687
|
+
r.service_tier = 'auto';
|
|
688
|
+
}
|
|
689
|
+
// Set reasoning effort (pass through client value or default)
|
|
690
|
+
if (!r.output_config) {
|
|
691
|
+
r.output_config = { effort: 'high' };
|
|
692
|
+
}
|
|
693
|
+
// Enable context management (matches Claude Code default)
|
|
694
|
+
// Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
|
|
695
|
+
if (supportsThinking && !r.context_management) {
|
|
696
|
+
r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
|
|
697
|
+
}
|
|
698
|
+
// Inject Claude Code billing header into system prompt.
|
|
699
|
+
// Anthropic uses this to route requests through priority rate limiting
|
|
700
|
+
// instead of the general API quota. Without it, Opus/Sonnet get 429
|
|
701
|
+
// when overall utilization is high, even though model-specific limits
|
|
702
|
+
// have headroom. The CLI binary embeds this in its system prompt.
|
|
703
|
+
const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
|
|
704
|
+
if (typeof r.system === 'string') {
|
|
705
|
+
if (!r.system.includes('x-anthropic-billing-header:')) {
|
|
706
|
+
r.system = billingTag + '\n' + r.system;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
else if (Array.isArray(r.system)) {
|
|
710
|
+
const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
|
|
711
|
+
if (!hasTag) {
|
|
712
|
+
r.system.unshift({ type: 'text', text: billingTag });
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
else {
|
|
716
|
+
r.system = billingTag;
|
|
642
717
|
}
|
|
643
|
-
}
|
|
644
|
-
else {
|
|
645
|
-
r.system = billingTag;
|
|
646
718
|
}
|
|
647
719
|
finalBody = Buffer.from(JSON.stringify(r));
|
|
648
720
|
}
|
|
@@ -652,15 +724,23 @@ export async function startProxy(opts = {}) {
|
|
|
652
724
|
const modelInfo = modelOverride ? ` (model: ${modelOverride})` : '';
|
|
653
725
|
console.log(`[dario] #${requestCount} ${req.method} ${urlPath}${modelInfo}`);
|
|
654
726
|
}
|
|
655
|
-
// Beta
|
|
656
|
-
// Billing classification is determined by the OAuth token alone, not beta flags.
|
|
657
|
-
// context-management and prompt-caching-scope are safe for all subscription types.
|
|
727
|
+
// Beta headers
|
|
658
728
|
const clientBeta = req.headers['anthropic-beta'];
|
|
659
|
-
let beta
|
|
660
|
-
if (
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
729
|
+
let beta;
|
|
730
|
+
if (passthrough) {
|
|
731
|
+
// Passthrough: only add oauth beta, forward client betas as-is
|
|
732
|
+
beta = 'oauth-2025-04-20';
|
|
733
|
+
if (clientBeta)
|
|
734
|
+
beta += ',' + clientBeta;
|
|
735
|
+
}
|
|
736
|
+
else {
|
|
737
|
+
// Claude-optimized: full beta set matching CLI v2.1.100
|
|
738
|
+
beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219,advisor-tool-2026-03-01,effort-2025-11-24';
|
|
739
|
+
if (clientBeta) {
|
|
740
|
+
const filtered = filterBillableBetas(clientBeta);
|
|
741
|
+
if (filtered)
|
|
742
|
+
beta += ',' + filtered;
|
|
743
|
+
}
|
|
664
744
|
}
|
|
665
745
|
const headers = {
|
|
666
746
|
...staticHeaders,
|
|
@@ -675,6 +755,25 @@ export async function startProxy(opts = {}) {
|
|
|
675
755
|
body: finalBody ? new Uint8Array(finalBody) : undefined,
|
|
676
756
|
signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
|
|
677
757
|
});
|
|
758
|
+
// Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
|
|
759
|
+
if (upstream.status === 429 && !(cliAvailable && !useCli)) {
|
|
760
|
+
const errBody = await upstream.text().catch(() => '');
|
|
761
|
+
const enriched = enrich429(errBody, upstream.headers);
|
|
762
|
+
const responseHeaders = {
|
|
763
|
+
'Content-Type': 'application/json',
|
|
764
|
+
'Access-Control-Allow-Origin': corsOrigin,
|
|
765
|
+
...SECURITY_HEADERS,
|
|
766
|
+
};
|
|
767
|
+
for (const [key, value] of upstream.headers.entries()) {
|
|
768
|
+
if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
|
|
769
|
+
responseHeaders[key] = value;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
requestCount++;
|
|
773
|
+
res.writeHead(429, responseHeaders);
|
|
774
|
+
res.end(enriched);
|
|
775
|
+
return;
|
|
776
|
+
}
|
|
678
777
|
// Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary.
|
|
679
778
|
// The CLI gets priority routing from Anthropic's server — a separate rate limit pool
|
|
680
779
|
// that continues working when the direct API quota is exhausted for expensive models.
|
|
@@ -869,7 +968,7 @@ export async function startProxy(opts = {}) {
|
|
|
869
968
|
process.exit(1);
|
|
870
969
|
});
|
|
871
970
|
server.listen(port, LOCALHOST, () => {
|
|
872
|
-
const
|
|
971
|
+
const modeLine = passthrough ? 'Mode: passthrough (OAuth swap only, no injection)' : useCli ? 'Backend: Claude CLI (bypasses rate limits)' : `OAuth: ${status.status} (expires in ${status.expiresIn})`;
|
|
873
972
|
const modelLine = modelOverride ? `Model: ${modelOverride} (all requests)` : 'Model: passthrough (client decides)';
|
|
874
973
|
console.log('');
|
|
875
974
|
console.log(` dario — http://localhost:${port}`);
|
|
@@ -880,7 +979,7 @@ export async function startProxy(opts = {}) {
|
|
|
880
979
|
console.log(` ANTHROPIC_BASE_URL=http://localhost:${port}`);
|
|
881
980
|
console.log(' ANTHROPIC_API_KEY=dario');
|
|
882
981
|
console.log('');
|
|
883
|
-
console.log(` ${
|
|
982
|
+
console.log(` ${modeLine}`);
|
|
884
983
|
console.log(` ${modelLine}`);
|
|
885
984
|
console.log('');
|
|
886
985
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.8.0",
|
|
4
4
|
"description": "Use your Claude subscription as an API. No API key needed. Local proxy for Claude Max/Pro subscriptions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -24,7 +24,8 @@
|
|
|
24
24
|
"audit": "npm audit --production --audit-level=high",
|
|
25
25
|
"prepublishOnly": "npm run build",
|
|
26
26
|
"start": "node dist/cli.js",
|
|
27
|
-
"dev": "tsx src/cli.ts"
|
|
27
|
+
"dev": "tsx src/cli.ts",
|
|
28
|
+
"e2e": "node test/e2e.mjs"
|
|
28
29
|
},
|
|
29
30
|
"keywords": [
|
|
30
31
|
"claude",
|