@askalf/dario 2.5.1 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +8 -1
  2. package/dist/proxy.js +142 -7
  3. package/package.json +4 -4
package/README.md CHANGED
@@ -387,7 +387,7 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
387
387
  - Tool use / function calling
388
388
  - System prompts and multi-turn conversations
389
389
  - Prompt caching and extended thinking
390
- - **Billable beta filtering** — strips `extended-cache-ttl`, `context-management`, `prompt-caching-scope` from client betas to prevent surprise Extra Usage charges
390
+ - **Billable beta filtering** — strips `extended-cache-ttl` from client betas (the only prefix requiring Extra Usage)
391
391
  - **Orchestration tag sanitization** — strips agent-injected XML (`<system-reminder>`, `<env>`, `<task_metadata>`, etc.) before forwarding
392
392
  - **Token anomaly detection** — warns on context spike (>60% input growth) or output explosion (>2x previous)
393
393
  - Concurrency control (max 10 concurrent upstream requests)
@@ -531,6 +531,13 @@ npm install
531
531
  npm run dev # runs with tsx (no build needed)
532
532
  ```
533
533
 
534
+ ## Contributors
535
+
536
+ | Who | Contributions |
537
+ |-----|---------------|
538
+ | [@GodsBoy](https://github.com/GodsBoy) | Proxy authentication, token redaction, error sanitization ([#2](https://github.com/askalf/dario/pull/2)) |
539
+ | [@belangertrading](https://github.com/belangertrading) | Billing classification investigation — reported, tested 5 versions, confirmed fix via response header analysis ([#4](https://github.com/askalf/dario/issues/4)) |
540
+
534
541
  ## Also by AskAlf
535
542
 
536
543
  | Project | What it does |
package/dist/proxy.js CHANGED
@@ -35,7 +35,7 @@ class Semaphore {
35
35
  next();
36
36
  }
37
37
  }
38
- // Detect installed Claude Code version at startup
38
+ // Detect installed Claude Code binary at startup
39
39
  function detectClaudeVersion() {
40
40
  try {
41
41
  const out = execSync('claude --version', { timeout: 5000, stdio: 'pipe' }).toString().trim();
@@ -46,6 +46,46 @@ function detectClaudeVersion() {
46
46
  return '2.1.96';
47
47
  }
48
48
  }
49
+ let cliAvailable = false;
50
+ function detectCliAvailable() {
51
+ try {
52
+ execSync('claude --version', { timeout: 5000, stdio: 'pipe' });
53
+ return true;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ /** Convert a non-streaming Messages API response to SSE event stream. */
60
+ function jsonToSse(jsonBody) {
61
+ try {
62
+ const msg = JSON.parse(jsonBody);
63
+ const events = [];
64
+ // message_start
65
+ events.push(`event: message_start\ndata: ${JSON.stringify({ type: 'message_start', message: { ...msg, content: [], stop_reason: null } })}\n\n`);
66
+ // content blocks
67
+ const content = msg.content;
68
+ if (content) {
69
+ for (let i = 0; i < content.length; i++) {
70
+ const block = content[i];
71
+ events.push(`event: content_block_start\ndata: ${JSON.stringify({ type: 'content_block_start', index: i, content_block: { type: block.type, ...(block.type === 'text' ? { text: '' } : { thinking: '' }) } })}\n\n`);
72
+ if (block.type === 'text' && block.text) {
73
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'text_delta', text: block.text } })}\n\n`);
74
+ }
75
+ else if (block.type === 'thinking' && block.thinking) {
76
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'thinking_delta', thinking: block.thinking } })}\n\n`);
77
+ }
78
+ events.push(`event: content_block_stop\ndata: ${JSON.stringify({ type: 'content_block_stop', index: i })}\n\n`);
79
+ }
80
+ }
81
+ // message_stop
82
+ events.push(`event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`);
83
+ return events.join('');
84
+ }
85
+ catch {
86
+ return '';
87
+ }
88
+ }
49
89
  const SESSION_ID = randomUUID();
50
90
  const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
51
91
  // Claude Code device identity — required for Max plan billing classification.
@@ -365,6 +405,7 @@ export async function startProxy(opts = {}) {
365
405
  process.exit(1);
366
406
  }
367
407
  const cliVersion = detectClaudeVersion();
408
+ cliAvailable = detectCliAvailable();
368
409
  const modelOverride = opts.model ? (MODEL_ALIASES[opts.model] ?? opts.model) : null;
369
410
  const identity = loadClaudeIdentity();
370
411
  if (identity.deviceId) {
@@ -564,18 +605,42 @@ export async function startProxy(opts = {}) {
564
605
  }),
565
606
  };
566
607
  }
567
- // Enable extended thinking (matches Claude Code default)
568
- // budget_tokens must be >= 1024, and max_tokens must accommodate it
608
+ // Enable adaptive thinking (matches Claude Code default)
609
+ // adaptive lets the model decide when/how much to think — preferred for Opus/Sonnet 4.6
569
610
  if (!r.thinking) {
611
+ r.thinking = { type: 'adaptive' };
612
+ // Ensure max_tokens is reasonable for thinking models
570
613
  const clientMax = r.max_tokens || 8192;
571
- const maxTokens = Math.max(clientMax, 16000);
572
- r.max_tokens = maxTokens;
573
- r.thinking = { budget_tokens: maxTokens - 1, type: 'enabled' };
614
+ r.max_tokens = Math.max(clientMax, 16000);
615
+ }
616
+ // Request priority capacity when available
617
+ if (!r.service_tier) {
618
+ r.service_tier = 'auto';
574
619
  }
575
620
  // Enable context management (matches Claude Code default)
576
621
  if (!r.context_management) {
577
622
  r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
578
623
  }
624
+ // Inject Claude Code billing header into system prompt.
625
+ // Anthropic uses this to route requests through priority rate limiting
626
+ // instead of the general API quota. Without it, Opus/Sonnet get 429
627
+ // when overall utilization is high, even though model-specific limits
628
+ // have headroom. The CLI binary embeds this in its system prompt.
629
+ const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
630
+ if (typeof r.system === 'string') {
631
+ if (!r.system.includes('x-anthropic-billing-header:')) {
632
+ r.system = billingTag + '\n' + r.system;
633
+ }
634
+ }
635
+ else if (Array.isArray(r.system)) {
636
+ const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
637
+ if (!hasTag) {
638
+ r.system.unshift({ type: 'text', text: billingTag });
639
+ }
640
+ }
641
+ else {
642
+ r.system = billingTag;
643
+ }
579
644
  finalBody = Buffer.from(JSON.stringify(r));
580
645
  }
581
646
  catch { /* not JSON, send as-is */ }
@@ -588,7 +653,7 @@ export async function startProxy(opts = {}) {
588
653
  // Billing classification is determined by the OAuth token alone, not beta flags.
589
654
  // context-management and prompt-caching-scope are safe for all subscription types.
590
655
  const clientBeta = req.headers['anthropic-beta'];
591
- let beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219,advisor-tool-2026-03-01';
656
+ let beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219,advisor-tool-2026-03-01,effort-2025-11-24';
592
657
  if (clientBeta) {
593
658
  const filtered = filterBillableBetas(clientBeta);
594
659
  if (filtered)
@@ -607,6 +672,76 @@ export async function startProxy(opts = {}) {
607
672
  body: finalBody ? new Uint8Array(finalBody) : undefined,
608
673
  signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
609
674
  });
675
+ // Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary.
676
+ // The CLI gets priority routing from Anthropic's server — a separate rate limit pool
677
+ // that continues working when the direct API quota is exhausted for expensive models.
678
+ if (upstream.status === 429 && cliAvailable && !useCli) {
679
+ // Drain the upstream response
680
+ await upstream.text().catch(() => { });
681
+ if (verbose)
682
+ console.log(`[dario] #${requestCount} 429 from API — falling back to CLI`);
683
+ // Determine if the client requested streaming
684
+ let clientWantsStream = false;
685
+ if (body.length > 0) {
686
+ try {
687
+ const p = JSON.parse(body.toString());
688
+ clientWantsStream = !!p.stream;
689
+ }
690
+ catch { }
691
+ }
692
+ const cliResult = await handleViaCli(body, modelOverride, verbose);
693
+ requestCount++;
694
+ if (cliResult.status >= 200 && cliResult.status < 300) {
695
+ if (isOpenAI) {
696
+ // Translate to OpenAI format
697
+ try {
698
+ const parsed = JSON.parse(cliResult.body);
699
+ cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
700
+ }
701
+ catch { }
702
+ }
703
+ if (clientWantsStream && !isOpenAI) {
704
+ // Client requested SSE streaming — convert CLI JSON to SSE events
705
+ const sseData = jsonToSse(cliResult.body);
706
+ res.writeHead(200, {
707
+ 'Content-Type': 'text/event-stream',
708
+ 'Access-Control-Allow-Origin': corsOrigin,
709
+ ...SECURITY_HEADERS,
710
+ });
711
+ res.end(sseData);
712
+ }
713
+ else if (clientWantsStream && isOpenAI) {
714
+ // OpenAI streaming — convert Anthropic JSON to OpenAI SSE
715
+ try {
716
+ const parsed = JSON.parse(cliResult.body);
717
+ const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
718
+ const ts = Math.floor(Date.now() / 1000);
719
+ let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
720
+ sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
721
+ res.writeHead(200, {
722
+ 'Content-Type': 'text/event-stream',
723
+ 'Access-Control-Allow-Origin': corsOrigin,
724
+ ...SECURITY_HEADERS,
725
+ });
726
+ res.end(sseData);
727
+ }
728
+ catch {
729
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
730
+ res.end(cliResult.body);
731
+ }
732
+ }
733
+ else {
734
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
735
+ res.end(cliResult.body);
736
+ }
737
+ }
738
+ else {
739
+ // CLI also failed — return the CLI error
740
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
741
+ res.end(cliResult.body);
742
+ }
743
+ return;
744
+ }
610
745
  // Detect streaming from content-type (reliable) or body (fallback)
611
746
  const contentType = upstream.headers.get('content-type') ?? '';
612
747
  const isStream = contentType.includes('text/event-stream');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "2.5.1",
3
+ "version": "2.7.0",
4
4
  "description": "Use your Claude subscription as an API. No API key needed. Local proxy for Claude Max/Pro subscriptions.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -55,11 +55,11 @@
55
55
  "node": ">=18.0.0"
56
56
  },
57
57
  "dependencies": {
58
- "@anthropic-ai/sdk": "^0.39.0"
58
+ "@anthropic-ai/sdk": "^0.81.0"
59
59
  },
60
60
  "devDependencies": {
61
- "typescript": "^5.7.0",
61
+ "@types/node": "^22.0.0",
62
62
  "tsx": "^4.19.0",
63
- "@types/node": "^22.0.0"
63
+ "typescript": "^5.7.0"
64
64
  }
65
65
  }