@askalf/dario 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -112,9 +112,10 @@ async function proxy() {
112
112
  }
113
113
  const verbose = args.includes('--verbose') || args.includes('-v');
114
114
  const cliBackend = args.includes('--cli');
115
+ const passthrough = args.includes('--passthrough') || args.includes('--thin');
115
116
  const modelArg = args.find(a => a.startsWith('--model='));
116
117
  const model = modelArg ? modelArg.split('=')[1] : undefined;
117
- await startProxy({ port, verbose, model, cliBackend });
118
+ await startProxy({ port, verbose, model, cliBackend, passthrough });
118
119
  }
119
120
  async function help() {
120
121
  console.log(`
@@ -133,6 +134,7 @@ async function help() {
133
134
  Full IDs: claude-opus-4-6, claude-sonnet-4-6
134
135
  Default: passthrough (client decides)
135
136
  --cli Use Claude CLI as backend (bypasses rate limits)
137
+ --passthrough Thin proxy — OAuth swap only, no injection
136
138
  --port=PORT Port to listen on (default: 3456)
137
139
  --verbose, -v Log all requests
138
140
 
package/dist/proxy.d.ts CHANGED
@@ -3,6 +3,7 @@ interface ProxyOptions {
3
3
  verbose?: boolean;
4
4
  model?: string;
5
5
  cliBackend?: boolean;
6
+ passthrough?: boolean;
6
7
  }
7
8
  export declare function sanitizeError(err: unknown): string;
8
9
  export declare function startProxy(opts?: ProxyOptions): Promise<void>;
package/dist/proxy.js CHANGED
@@ -301,6 +301,37 @@ export function sanitizeError(err) {
301
301
  .replace(/eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, '[REDACTED_JWT]')
302
302
  .replace(/Bearer\s+[^\s,;]+/gi, 'Bearer [REDACTED]');
303
303
  }
304
+ /**
305
+ * Enrich Anthropic's unhelpful 429 "Error" body with rate limit details from headers.
306
+ */
307
+ function enrich429(body, headers) {
308
+ try {
309
+ const parsed = JSON.parse(body);
310
+ const err = parsed.error;
311
+ if (err && (err.message === 'Error' || !err.message)) {
312
+ const claim = headers.get('anthropic-ratelimit-unified-representative-claim') || 'unknown';
313
+ const status = headers.get('anthropic-ratelimit-unified-status') || 'rejected';
314
+ const util5h = headers.get('anthropic-ratelimit-unified-5h-utilization');
315
+ const util7d = headers.get('anthropic-ratelimit-unified-7d-utilization');
316
+ const reset = headers.get('anthropic-ratelimit-unified-reset');
317
+ const parts = [`Rate limited (${status}). Limiting window: ${claim}`];
318
+ if (util5h)
319
+ parts.push(`5h utilization: ${Math.round(parseFloat(util5h) * 100)}%`);
320
+ if (util7d)
321
+ parts.push(`7d utilization: ${Math.round(parseFloat(util7d) * 100)}%`);
322
+ if (reset) {
323
+ const resetDate = new Date(parseInt(reset) * 1000);
324
+ const mins = Math.max(0, Math.round((resetDate.getTime() - Date.now()) / 60000));
325
+ parts.push(`resets in ${mins}m`);
326
+ }
327
+ err.message = parts.join('. ');
328
+ }
329
+ return JSON.stringify(parsed);
330
+ }
331
+ catch {
332
+ return body;
333
+ }
334
+ }
304
335
  /**
305
336
  * CLI Backend: route requests through `claude --print` instead of direct API.
306
337
  * This bypasses rate limiting because Claude Code's binary has priority routing.
@@ -398,6 +429,7 @@ async function handleViaCli(body, model, verbose) {
398
429
  export async function startProxy(opts = {}) {
399
430
  const port = opts.port ?? DEFAULT_PORT;
400
431
  const verbose = opts.verbose ?? false;
432
+ const passthrough = opts.passthrough ?? false;
401
433
  // Verify auth before starting
402
434
  const status = await getStatus();
403
435
  if (!status.authenticated) {
@@ -415,8 +447,11 @@ export async function startProxy(opts = {}) {
415
447
  console.warn('[dario] WARNING: No Claude Code device identity found. Requests may be billed as Extra Usage.');
416
448
  console.warn('[dario] Run Claude Code at least once to generate ~/.claude/.claude.json');
417
449
  }
418
- // Pre-build static headers (only auth, version, beta, request-id change per request)
419
- const staticHeaders = {
450
+ // Pre-build static headers
451
+ const staticHeaders = passthrough ? {
452
+ 'accept': 'application/json',
453
+ 'Content-Type': 'application/json',
454
+ } : {
420
455
  'accept': 'application/json',
421
456
  'Content-Type': 'application/json',
422
457
  'anthropic-dangerous-direct-browser-access': 'true',
@@ -556,30 +591,60 @@ export async function startProxy(opts = {}) {
556
591
  // CLI backend mode: route through claude --print (works for both Anthropic and OpenAI endpoints)
557
592
  if (useCli && req.method === 'POST' && body.length > 0) {
558
593
  let cliBody = body;
594
+ let clientWantsStream = false;
559
595
  // Translate OpenAI format before passing to CLI
560
596
  if (isOpenAI) {
561
597
  try {
562
598
  const parsed = JSON.parse(body.toString());
599
+ clientWantsStream = !!parsed.stream;
563
600
  cliBody = Buffer.from(JSON.stringify(openaiToAnthropic(parsed, modelOverride)));
564
601
  }
565
602
  catch { /* send as-is */ }
566
603
  }
604
+ else {
605
+ try {
606
+ const parsed = JSON.parse(body.toString());
607
+ clientWantsStream = !!parsed.stream;
608
+ }
609
+ catch { }
610
+ }
567
611
  const cliResult = await handleViaCli(cliBody, modelOverride, verbose);
568
612
  requestCount++;
569
- // Translate CLI response back to OpenAI format if needed
570
- if (isOpenAI && cliResult.status >= 200 && cliResult.status < 300) {
571
- try {
572
- const parsed = JSON.parse(cliResult.body);
573
- cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
613
+ if (cliResult.status >= 200 && cliResult.status < 300 && clientWantsStream) {
614
+ // Client requested streaming convert CLI JSON to SSE
615
+ if (isOpenAI) {
616
+ try {
617
+ const parsed = JSON.parse(cliResult.body);
618
+ const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
619
+ const ts = Math.floor(Date.now() / 1000);
620
+ let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
621
+ sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
622
+ res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
623
+ res.end(sseData);
624
+ }
625
+ catch {
626
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
627
+ res.end(cliResult.body);
628
+ }
629
+ }
630
+ else {
631
+ const sseData = jsonToSse(cliResult.body);
632
+ res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
633
+ res.end(sseData);
574
634
  }
575
- catch { /* send as-is */ }
576
635
  }
577
- res.writeHead(cliResult.status, {
578
- 'Content-Type': cliResult.contentType,
579
- 'Access-Control-Allow-Origin': corsOrigin,
580
- ...SECURITY_HEADERS,
581
- });
582
- res.end(cliResult.body);
636
+ else {
637
+ // Non-streaming or error — translate and return as JSON
638
+ if (isOpenAI && cliResult.status >= 200 && cliResult.status < 300) {
639
+ try {
640
+ const parsed = JSON.parse(cliResult.body);
641
+ cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
642
+ }
643
+ catch { /* send as-is */ }
644
+ }
645
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
646
+ res.end(cliResult.body);
647
+ }
583
648
  return;
584
649
  }
585
650
  // Parse body once, apply OpenAI translation, model override, and sanitization
@@ -595,54 +660,61 @@ export async function startProxy(opts = {}) {
595
660
  }
596
661
  const result = isOpenAI ? openaiToAnthropic(parsed, modelOverride) : (modelOverride ? { ...parsed, model: modelOverride } : parsed);
597
662
  const r = result;
598
- // Inject device identity metadata for session tracking
599
- if (identity.deviceId) {
600
- r.metadata = {
601
- user_id: JSON.stringify({
602
- device_id: identity.deviceId,
603
- account_uuid: identity.accountUuid,
604
- session_id: SESSION_ID,
605
- }),
606
- };
607
- }
608
- // Enable adaptive thinking for models that support it (Opus/Sonnet 4.6+)
609
- // Haiku 4.5 does not support thinking at all
610
- const modelName = (r.model || '').toLowerCase();
611
- const supportsThinking = !modelName.includes('haiku');
612
- if (supportsThinking && !r.thinking) {
613
- r.thinking = { type: 'adaptive' };
614
- // Ensure max_tokens is reasonable for thinking models
615
- const clientMax = r.max_tokens || 8192;
616
- r.max_tokens = Math.max(clientMax, 16000);
617
- }
618
- // Request priority capacity when available
619
- if (!r.service_tier) {
620
- r.service_tier = 'auto';
621
- }
622
- // Enable context management (matches Claude Code default)
623
- // Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
624
- if (supportsThinking && !r.context_management) {
625
- r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
626
- }
627
- // Inject Claude Code billing header into system prompt.
628
- // Anthropic uses this to route requests through priority rate limiting
629
- // instead of the general API quota. Without it, Opus/Sonnet get 429
630
- // when overall utilization is high, even though model-specific limits
631
- // have headroom. The CLI binary embeds this in its system prompt.
632
- const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
633
- if (typeof r.system === 'string') {
634
- if (!r.system.includes('x-anthropic-billing-header:')) {
635
- r.system = billingTag + '\n' + r.system;
663
+ // In passthrough mode, skip all Claude-specific injection — OAuth swap only
664
+ if (!passthrough) {
665
+ // Inject device identity metadata for session tracking
666
+ if (identity.deviceId) {
667
+ r.metadata = {
668
+ user_id: JSON.stringify({
669
+ device_id: identity.deviceId,
670
+ account_uuid: identity.accountUuid,
671
+ session_id: SESSION_ID,
672
+ }),
673
+ };
636
674
  }
637
- }
638
- else if (Array.isArray(r.system)) {
639
- const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
640
- if (!hasTag) {
641
- r.system.unshift({ type: 'text', text: billingTag });
675
+ // Enable adaptive thinking for models that support it (Opus/Sonnet 4.6+)
676
+ // Haiku 4.5 does not support thinking at all
677
+ const modelName = (r.model || '').toLowerCase();
678
+ const supportsThinking = !modelName.includes('haiku');
679
+ if (supportsThinking && !r.thinking) {
680
+ r.thinking = { type: 'adaptive' };
681
+ // Ensure max_tokens is reasonable for thinking models
682
+ const clientMax = r.max_tokens || 8192;
683
+ r.max_tokens = Math.max(clientMax, 16000);
684
+ }
685
+ // Request priority capacity when available
686
+ if (!r.service_tier) {
687
+ r.service_tier = 'auto';
688
+ }
689
+ // Set reasoning effort (pass through client value or default)
690
+ if (!r.output_config) {
691
+ r.output_config = { effort: 'high' };
692
+ }
693
+ // Enable context management (matches Claude Code default)
694
+ // Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
695
+ if (supportsThinking && !r.context_management) {
696
+ r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
697
+ }
698
+ // Inject Claude Code billing header into system prompt.
699
+ // Anthropic uses this to route requests through priority rate limiting
700
+ // instead of the general API quota. Without it, Opus/Sonnet get 429
701
+ // when overall utilization is high, even though model-specific limits
702
+ // have headroom. The CLI binary embeds this in its system prompt.
703
+ const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
704
+ if (typeof r.system === 'string') {
705
+ if (!r.system.includes('x-anthropic-billing-header:')) {
706
+ r.system = billingTag + '\n' + r.system;
707
+ }
708
+ }
709
+ else if (Array.isArray(r.system)) {
710
+ const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
711
+ if (!hasTag) {
712
+ r.system.unshift({ type: 'text', text: billingTag });
713
+ }
714
+ }
715
+ else {
716
+ r.system = billingTag;
642
717
  }
643
- }
644
- else {
645
- r.system = billingTag;
646
718
  }
647
719
  finalBody = Buffer.from(JSON.stringify(r));
648
720
  }
@@ -652,15 +724,23 @@ export async function startProxy(opts = {}) {
652
724
  const modelInfo = modelOverride ? ` (model: ${modelOverride})` : '';
653
725
  console.log(`[dario] #${requestCount} ${req.method} ${urlPath}${modelInfo}`);
654
726
  }
655
- // Beta defaults — matches native Claude Code v2.1.98 headers exactly.
656
- // Billing classification is determined by the OAuth token alone, not beta flags.
657
- // context-management and prompt-caching-scope are safe for all subscription types.
727
+ // Beta headers
658
728
  const clientBeta = req.headers['anthropic-beta'];
659
- let beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219,advisor-tool-2026-03-01,effort-2025-11-24';
660
- if (clientBeta) {
661
- const filtered = filterBillableBetas(clientBeta);
662
- if (filtered)
663
- beta += ',' + filtered;
729
+ let beta;
730
+ if (passthrough) {
731
+ // Passthrough: only add oauth beta, forward client betas as-is
732
+ beta = 'oauth-2025-04-20';
733
+ if (clientBeta)
734
+ beta += ',' + clientBeta;
735
+ }
736
+ else {
737
+ // Claude-optimized: full beta set matching CLI v2.1.100
738
+ beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219,advisor-tool-2026-03-01,effort-2025-11-24';
739
+ if (clientBeta) {
740
+ const filtered = filterBillableBetas(clientBeta);
741
+ if (filtered)
742
+ beta += ',' + filtered;
743
+ }
664
744
  }
665
745
  const headers = {
666
746
  ...staticHeaders,
@@ -675,6 +755,25 @@ export async function startProxy(opts = {}) {
675
755
  body: finalBody ? new Uint8Array(finalBody) : undefined,
676
756
  signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
677
757
  });
758
+ // Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
759
+ if (upstream.status === 429 && !(cliAvailable && !useCli)) {
760
+ const errBody = await upstream.text().catch(() => '');
761
+ const enriched = enrich429(errBody, upstream.headers);
762
+ const responseHeaders = {
763
+ 'Content-Type': 'application/json',
764
+ 'Access-Control-Allow-Origin': corsOrigin,
765
+ ...SECURITY_HEADERS,
766
+ };
767
+ for (const [key, value] of upstream.headers.entries()) {
768
+ if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
769
+ responseHeaders[key] = value;
770
+ }
771
+ }
772
+ requestCount++;
773
+ res.writeHead(429, responseHeaders);
774
+ res.end(enriched);
775
+ return;
776
+ }
678
777
  // Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary.
679
778
  // The CLI gets priority routing from Anthropic's server — a separate rate limit pool
680
779
  // that continues working when the direct API quota is exhausted for expensive models.
@@ -869,7 +968,7 @@ export async function startProxy(opts = {}) {
869
968
  process.exit(1);
870
969
  });
871
970
  server.listen(port, LOCALHOST, () => {
872
- const oauthLine = useCli ? 'Backend: Claude CLI (bypasses rate limits)' : `OAuth: ${status.status} (expires in ${status.expiresIn})`;
971
+ const modeLine = passthrough ? 'Mode: passthrough (OAuth swap only, no injection)' : useCli ? 'Backend: Claude CLI (bypasses rate limits)' : `OAuth: ${status.status} (expires in ${status.expiresIn})`;
873
972
  const modelLine = modelOverride ? `Model: ${modelOverride} (all requests)` : 'Model: passthrough (client decides)';
874
973
  console.log('');
875
974
  console.log(` dario — http://localhost:${port}`);
@@ -880,7 +979,7 @@ export async function startProxy(opts = {}) {
880
979
  console.log(` ANTHROPIC_BASE_URL=http://localhost:${port}`);
881
980
  console.log(' ANTHROPIC_API_KEY=dario');
882
981
  console.log('');
883
- console.log(` ${oauthLine}`);
982
+ console.log(` ${modeLine}`);
884
983
  console.log(` ${modelLine}`);
885
984
  console.log('');
886
985
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "2.7.1",
3
+ "version": "2.8.0",
4
4
  "description": "Use your Claude subscription as an API. No API key needed. Local proxy for Claude Max/Pro subscriptions.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -24,7 +24,8 @@
24
24
  "audit": "npm audit --production --audit-level=high",
25
25
  "prepublishOnly": "npm run build",
26
26
  "start": "node dist/cli.js",
27
- "dev": "tsx src/cli.ts"
27
+ "dev": "tsx src/cli.ts",
28
+ "e2e": "node test/e2e.mjs"
28
29
  },
29
30
  "keywords": [
30
31
  "claude",