@askalf/dario 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +8 -1
  2. package/dist/proxy.js +132 -1
  3. package/package.json +4 -4
package/README.md CHANGED
@@ -387,7 +387,7 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
387
387
  - Tool use / function calling
388
388
  - System prompts and multi-turn conversations
389
389
  - Prompt caching and extended thinking
390
- - **Billable beta filtering** — strips `extended-cache-ttl`, `context-management`, `prompt-caching-scope` from client betas to prevent surprise Extra Usage charges
390
+ - **Billable beta filtering** — strips `extended-cache-ttl` from client betas (the only prefix requiring Extra Usage)
391
391
  - **Orchestration tag sanitization** — strips agent-injected XML (`<system-reminder>`, `<env>`, `<task_metadata>`, etc.) before forwarding
392
392
  - **Token anomaly detection** — warns on context spike (>60% input growth) or output explosion (>2x previous)
393
393
  - Concurrency control (max 10 concurrent upstream requests)
@@ -531,6 +531,13 @@ npm install
531
531
  npm run dev # runs with tsx (no build needed)
532
532
  ```
533
533
 
534
+ ## Contributors
535
+
536
+ | Who | Contributions |
537
+ |-----|---------------|
538
+ | [@GodsBoy](https://github.com/GodsBoy) | Proxy authentication, token redaction, error sanitization ([#2](https://github.com/askalf/dario/pull/2)) |
539
+ | [@belangertrading](https://github.com/belangertrading) | Billing classification investigation — reported, tested 5 versions, confirmed fix via response header analysis ([#4](https://github.com/askalf/dario/issues/4)) |
540
+
534
541
  ## Also by AskAlf
535
542
 
536
543
  | Project | What it does |
package/dist/proxy.js CHANGED
@@ -35,7 +35,7 @@ class Semaphore {
35
35
  next();
36
36
  }
37
37
  }
38
- // Detect installed Claude Code version at startup
38
+ // Detect installed Claude Code binary at startup
39
39
  function detectClaudeVersion() {
40
40
  try {
41
41
  const out = execSync('claude --version', { timeout: 5000, stdio: 'pipe' }).toString().trim();
@@ -46,6 +46,46 @@ function detectClaudeVersion() {
46
46
  return '2.1.96';
47
47
  }
48
48
  }
49
+ let cliAvailable = false;
50
+ function detectCliAvailable() {
51
+ try {
52
+ execSync('claude --version', { timeout: 5000, stdio: 'pipe' });
53
+ return true;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ /** Convert a non-streaming Messages API response to SSE event stream. */
60
+ function jsonToSse(jsonBody) {
61
+ try {
62
+ const msg = JSON.parse(jsonBody);
63
+ const events = [];
64
+ // message_start
65
+ events.push(`event: message_start\ndata: ${JSON.stringify({ type: 'message_start', message: { ...msg, content: [], stop_reason: null } })}\n\n`);
66
+ // content blocks
67
+ const content = msg.content;
68
+ if (content) {
69
+ for (let i = 0; i < content.length; i++) {
70
+ const block = content[i];
71
+ events.push(`event: content_block_start\ndata: ${JSON.stringify({ type: 'content_block_start', index: i, content_block: { type: block.type, ...(block.type === 'text' ? { text: '' } : { thinking: '' }) } })}\n\n`);
72
+ if (block.type === 'text' && block.text) {
73
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'text_delta', text: block.text } })}\n\n`);
74
+ }
75
+ else if (block.type === 'thinking' && block.thinking) {
76
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'thinking_delta', thinking: block.thinking } })}\n\n`);
77
+ }
78
+ events.push(`event: content_block_stop\ndata: ${JSON.stringify({ type: 'content_block_stop', index: i })}\n\n`);
79
+ }
80
+ }
81
+ // message_stop
82
+ events.push(`event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`);
83
+ return events.join('');
84
+ }
85
+ catch {
86
+ return '';
87
+ }
88
+ }
49
89
  const SESSION_ID = randomUUID();
50
90
  const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
51
91
  // Claude Code device identity — required for Max plan billing classification.
@@ -365,6 +405,7 @@ export async function startProxy(opts = {}) {
365
405
  process.exit(1);
366
406
  }
367
407
  const cliVersion = detectClaudeVersion();
408
+ cliAvailable = detectCliAvailable();
368
409
  const modelOverride = opts.model ? (MODEL_ALIASES[opts.model] ?? opts.model) : null;
369
410
  const identity = loadClaudeIdentity();
370
411
  if (identity.deviceId) {
@@ -576,6 +617,26 @@ export async function startProxy(opts = {}) {
576
617
  if (!r.context_management) {
577
618
  r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
578
619
  }
620
+ // Inject Claude Code billing header into system prompt.
621
+ // Anthropic uses this to route requests through priority rate limiting
622
+ // instead of the general API quota. Without it, Opus/Sonnet get 429
623
+ // when overall utilization is high, even though model-specific limits
624
+ // have headroom. The CLI binary embeds this in its system prompt.
625
+ const billingTag = `x-anthropic-billing-header: cc_version=${cliVersion}; cc_entrypoint=cli; cch=98638;`;
626
+ if (typeof r.system === 'string') {
627
+ if (!r.system.includes('x-anthropic-billing-header:')) {
628
+ r.system = billingTag + '\n' + r.system;
629
+ }
630
+ }
631
+ else if (Array.isArray(r.system)) {
632
+ const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
633
+ if (!hasTag) {
634
+ r.system.unshift({ type: 'text', text: billingTag });
635
+ }
636
+ }
637
+ else {
638
+ r.system = billingTag;
639
+ }
579
640
  finalBody = Buffer.from(JSON.stringify(r));
580
641
  }
581
642
  catch { /* not JSON, send as-is */ }
@@ -607,6 +668,76 @@ export async function startProxy(opts = {}) {
607
668
  body: finalBody ? new Uint8Array(finalBody) : undefined,
608
669
  signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
609
670
  });
671
+ // Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary.
672
+ // The CLI gets priority routing from Anthropic's server — a separate rate limit pool
673
+ // that continues working when the direct API quota is exhausted for expensive models.
674
+ if (upstream.status === 429 && cliAvailable && !useCli) {
675
+ // Drain the upstream response
676
+ await upstream.text().catch(() => { });
677
+ if (verbose)
678
+ console.log(`[dario] #${requestCount} 429 from API — falling back to CLI`);
679
+ // Determine if the client requested streaming
680
+ let clientWantsStream = false;
681
+ if (body.length > 0) {
682
+ try {
683
+ const p = JSON.parse(body.toString());
684
+ clientWantsStream = !!p.stream;
685
+ }
686
+ catch { }
687
+ }
688
+ const cliResult = await handleViaCli(body, modelOverride, verbose);
689
+ requestCount++;
690
+ if (cliResult.status >= 200 && cliResult.status < 300) {
691
+ if (isOpenAI) {
692
+ // Translate to OpenAI format
693
+ try {
694
+ const parsed = JSON.parse(cliResult.body);
695
+ cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
696
+ }
697
+ catch { }
698
+ }
699
+ if (clientWantsStream && !isOpenAI) {
700
+ // Client requested SSE streaming — convert CLI JSON to SSE events
701
+ const sseData = jsonToSse(cliResult.body);
702
+ res.writeHead(200, {
703
+ 'Content-Type': 'text/event-stream',
704
+ 'Access-Control-Allow-Origin': corsOrigin,
705
+ ...SECURITY_HEADERS,
706
+ });
707
+ res.end(sseData);
708
+ }
709
+ else if (clientWantsStream && isOpenAI) {
710
+ // OpenAI streaming — convert Anthropic JSON to OpenAI SSE
711
+ try {
712
+ const parsed = JSON.parse(cliResult.body);
713
+ const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
714
+ const ts = Math.floor(Date.now() / 1000);
715
+ let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
716
+ sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
717
+ res.writeHead(200, {
718
+ 'Content-Type': 'text/event-stream',
719
+ 'Access-Control-Allow-Origin': corsOrigin,
720
+ ...SECURITY_HEADERS,
721
+ });
722
+ res.end(sseData);
723
+ }
724
+ catch {
725
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
726
+ res.end(cliResult.body);
727
+ }
728
+ }
729
+ else {
730
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
731
+ res.end(cliResult.body);
732
+ }
733
+ }
734
+ else {
735
+ // CLI also failed — return the CLI error
736
+ res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
737
+ res.end(cliResult.body);
738
+ }
739
+ return;
740
+ }
610
741
  // Detect streaming from content-type (reliable) or body (fallback)
611
742
  const contentType = upstream.headers.get('content-type') ?? '';
612
743
  const isStream = contentType.includes('text/event-stream');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "2.5.0",
3
+ "version": "2.6.0",
4
4
  "description": "Use your Claude subscription as an API. No API key needed. Local proxy for Claude Max/Pro subscriptions.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -55,11 +55,11 @@
55
55
  "node": ">=18.0.0"
56
56
  },
57
57
  "dependencies": {
58
- "@anthropic-ai/sdk": "^0.39.0"
58
+ "@anthropic-ai/sdk": "^0.81.0"
59
59
  },
60
60
  "devDependencies": {
61
- "typescript": "^5.7.0",
61
+ "@types/node": "^22.0.0",
62
62
  "tsx": "^4.19.0",
63
- "@types/node": "^22.0.0"
63
+ "typescript": "^5.7.0"
64
64
  }
65
65
  }