@askalf/dario 2.8.3 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/proxy.js +76 -22
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -68,7 +68,7 @@ Opus, Sonnet, Haiku — all models, streaming, tool use. Works with Cursor, Cont
|
|
|
68
68
|
<tr>
|
|
69
69
|
<td colspan="3" valign="top">
|
|
70
70
|
|
|
71
|
-
*"The 429s were driving us crazy running a multi-agent stack on Claude Max
|
|
71
|
+
*"The 429s were driving us crazy running a multi-agent stack on Claude Max. You found the billing tag, fixed the checksum, reverse-engineered the per-request hash from the binary — v2.8.5 running clean, zero reclassification."* — [@belangertrading](https://github.com/belangertrading), multi-agent stack on Claude Max
|
|
72
72
|
|
|
73
73
|
</td>
|
|
74
74
|
</tr>
|
|
@@ -80,7 +80,7 @@ Opus, Sonnet, Haiku — all models, streaming, tool use. Works with Cursor, Cont
|
|
|
80
80
|
|
|
81
81
|
Most Claude subscription proxies have a critical billing problem: **Anthropic classifies their requests as third-party and routes all usage to Extra Usage billing** — even when you have Max plan limits available. You're paying for your subscription twice.
|
|
82
82
|
|
|
83
|
-
dario is the only proxy that solves this. It injects native Claude Code device identity, billing
|
|
83
|
+
dario is the only proxy that solves this. It injects native Claude Code device identity, per-request billing checksums (reverse-engineered from the Claude Code binary), and priority routing into every request — so Anthropic's billing system treats your requests exactly like Claude Code itself. Your Max plan limits work correctly, and Opus/Sonnet stay available even at high utilization.
|
|
84
84
|
|
|
85
85
|
| | dario | Other proxies |
|
|
86
86
|
|---|---|---|
|
|
@@ -88,6 +88,7 @@ dario is the only proxy that solves this. It injects native Claude Code device i
|
|
|
88
88
|
| **Max plan limits** | Used correctly | Bypassed — billed separately |
|
|
89
89
|
| **Device identity** | Injected automatically | Missing |
|
|
90
90
|
| **Priority routing** | Billing tag + service_tier auto | Missing |
|
|
91
|
+
| **Billing tag fingerprint** | Per-request SHA-256 matching binary RE | Static or missing |
|
|
91
92
|
| **Beta flags** | Match Claude Code v2.1.100 | Outdated or missing |
|
|
92
93
|
| **Billable beta filtering** | Strips surprise charges | Passes everything through |
|
|
93
94
|
|
|
@@ -415,7 +416,7 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
|
|
|
415
416
|
|
|
416
417
|
### Direct API Mode
|
|
417
418
|
- All Claude models (Opus 4.6, Sonnet 4.6, Haiku 4.5) + 1M extended context aliases (`opus1m`, `sonnet1m`)
|
|
418
|
-
- **Native billing classification** — device identity
|
|
419
|
+
- **Native billing classification** — device identity, per-request billing tag with SHA-256 checksums matching real Claude Code (extracted via binary RE), ensures Max plan limits work correctly
|
|
419
420
|
- **Priority routing** — billing tag injection + `service_tier: 'auto'` activates per-model rate limits, keeping Opus/Sonnet available even at 100% overall utilization
|
|
420
421
|
- **Adaptive thinking** — matches Claude Code's `{ type: 'adaptive' }` mode for optimal reasoning (auto-skipped for Haiku 4.5)
|
|
421
422
|
- **Effort control** — injects `output_config: { effort: 'high' }` by default, or passes through client-specified effort level
|
|
@@ -585,7 +586,7 @@ npm run dev # runs with tsx (no build needed)
|
|
|
585
586
|
| Who | Contributions |
|
|
586
587
|
|-----|---------------|
|
|
587
588
|
| [@GodsBoy](https://github.com/GodsBoy) | Proxy authentication, token redaction, error sanitization ([#2](https://github.com/askalf/dario/pull/2)) |
|
|
588
|
-
| [@belangertrading](https://github.com/belangertrading) | Billing classification investigation ([#4](https://github.com/askalf/dario/issues/4)), Opus/Sonnet 429 diagnosis + CLI fallback workaround ([#6](https://github.com/askalf/dario/issues/6)) |
|
|
589
|
+
| [@belangertrading](https://github.com/belangertrading) | Billing classification investigation ([#4](https://github.com/askalf/dario/issues/4)), Opus/Sonnet 429 diagnosis + CLI fallback workaround ([#6](https://github.com/askalf/dario/issues/6)), billing reclassification root cause ([#7](https://github.com/askalf/dario/issues/7)) |
|
|
589
590
|
|
|
590
591
|
## Also by AskAlf
|
|
591
592
|
|
package/dist/proxy.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createServer } from 'node:http';
|
|
2
|
-
import { randomUUID, timingSafeEqual } from 'node:crypto';
|
|
2
|
+
import { randomUUID, timingSafeEqual, createHash } from 'node:crypto';
|
|
3
3
|
import { execSync, spawn } from 'node:child_process';
|
|
4
4
|
import { readFileSync, readdirSync, writeFileSync, unlinkSync } from 'node:fs';
|
|
5
5
|
import { join } from 'node:path';
|
|
6
6
|
import { homedir, tmpdir } from 'node:os';
|
|
7
|
-
import { arch, platform
|
|
7
|
+
import { arch, platform } from 'node:process';
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
const ANTHROPIC_API = 'https://api.anthropic.com';
|
|
10
10
|
const DEFAULT_PORT = 3456;
|
|
@@ -35,17 +35,33 @@ class Semaphore {
|
|
|
35
35
|
next();
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
|
+
// Billing tag hash seed — extracted from Claude Code binary (constant XGA)
|
|
39
|
+
const BILLING_SEED = '59cf53e54c78';
|
|
40
|
+
// Compute per-request build tag matching Claude Code's Oz$ algorithm:
|
|
41
|
+
// SHA-256(seed + chars[4,7,20] of user message + version).slice(0,3)
|
|
42
|
+
function computeBuildTag(userMessage, version) {
|
|
43
|
+
const chars = [4, 7, 20].map(i => userMessage[i] || '0').join('');
|
|
44
|
+
return createHash('sha256').update(`${BILLING_SEED}${chars}${version}`).digest('hex').slice(0, 3);
|
|
45
|
+
}
|
|
46
|
+
// Compute per-request cch checksum matching Claude Code's algorithm:
|
|
47
|
+
// SHA-256(seed + chars[4,7,20] of user message + version).slice(0,5)
|
|
48
|
+
// Real Claude Code uses a similar but separate computation that produces 5 hex chars
|
|
49
|
+
function computeCch(userMessage, version) {
|
|
50
|
+
const chars = [4, 7, 20].map(i => userMessage[i] || '0').join('');
|
|
51
|
+
return createHash('sha256').update(`${BILLING_SEED}${version}${chars}`).digest('hex').slice(0, 5);
|
|
52
|
+
}
|
|
38
53
|
// Detect installed Claude Code binary at startup (single exec for both version + availability)
|
|
39
54
|
let cliAvailable = false;
|
|
40
55
|
function detectCli() {
|
|
41
56
|
try {
|
|
42
57
|
const out = execSync('claude --version', { timeout: 5000, stdio: 'pipe' }).toString().trim();
|
|
43
58
|
cliAvailable = true;
|
|
44
|
-
|
|
59
|
+
// Capture major version (e.g., 2.1.100) — build tag is computed per-request
|
|
60
|
+
return out.match(/^([\d]+\.[\d]+\.[\d]+)/)?.[1] ?? '2.1.100';
|
|
45
61
|
}
|
|
46
62
|
catch {
|
|
47
63
|
cliAvailable = false;
|
|
48
|
-
return '2.1.
|
|
64
|
+
return '2.1.100';
|
|
49
65
|
}
|
|
50
66
|
}
|
|
51
67
|
/** Convert a non-streaming Messages API response to SSE event stream. */
|
|
@@ -78,6 +94,22 @@ function jsonToSse(jsonBody) {
|
|
|
78
94
|
return '';
|
|
79
95
|
}
|
|
80
96
|
}
|
|
97
|
+
/** Extract first user message text from a request body for billing tag computation. */
|
|
98
|
+
function extractFirstUserMessage(body) {
|
|
99
|
+
const messages = body.messages;
|
|
100
|
+
if (!messages)
|
|
101
|
+
return '';
|
|
102
|
+
const userMsg = messages.find(m => m.role === 'user');
|
|
103
|
+
if (!userMsg)
|
|
104
|
+
return '';
|
|
105
|
+
if (typeof userMsg.content === 'string')
|
|
106
|
+
return userMsg.content;
|
|
107
|
+
if (Array.isArray(userMsg.content)) {
|
|
108
|
+
const textBlock = userMsg.content.find(b => b.type === 'text');
|
|
109
|
+
return textBlock?.text ?? '';
|
|
110
|
+
}
|
|
111
|
+
return '';
|
|
112
|
+
}
|
|
81
113
|
/** Convert CLI JSON response to OpenAI SSE format. */
|
|
82
114
|
function jsonToOpenaiSse(jsonBody) {
|
|
83
115
|
try {
|
|
@@ -463,7 +495,7 @@ export async function startProxy(opts = {}) {
|
|
|
463
495
|
console.warn('[dario] WARNING: No Claude Code device identity found. Requests may be billed as Extra Usage.');
|
|
464
496
|
console.warn('[dario] Run Claude Code at least once to generate ~/.claude/.claude.json');
|
|
465
497
|
}
|
|
466
|
-
// Pre-build static headers
|
|
498
|
+
// Pre-build static headers (matches real Claude Code captured via MITM)
|
|
467
499
|
const staticHeaders = passthrough ? {
|
|
468
500
|
'accept': 'application/json',
|
|
469
501
|
'Content-Type': 'application/json',
|
|
@@ -471,7 +503,6 @@ export async function startProxy(opts = {}) {
|
|
|
471
503
|
'accept': 'application/json',
|
|
472
504
|
'Content-Type': 'application/json',
|
|
473
505
|
'anthropic-dangerous-direct-browser-access': 'true',
|
|
474
|
-
'anthropic-client-platform': 'cli',
|
|
475
506
|
'user-agent': `claude-cli/${cliVersion} (external, cli)`,
|
|
476
507
|
'x-app': 'cli',
|
|
477
508
|
'x-claude-code-session-id': SESSION_ID,
|
|
@@ -481,8 +512,8 @@ export async function startProxy(opts = {}) {
|
|
|
481
512
|
'x-stainless-package-version': '0.81.0',
|
|
482
513
|
'x-stainless-retry-count': '0',
|
|
483
514
|
'x-stainless-runtime': 'node',
|
|
484
|
-
|
|
485
|
-
'x-stainless-
|
|
515
|
+
// Claude Code runs on Bun which reports v24.3.0 as Node compat version
|
|
516
|
+
'x-stainless-runtime-version': 'v24.3.0',
|
|
486
517
|
};
|
|
487
518
|
const useCli = opts.cliBackend ?? false;
|
|
488
519
|
let requestCount = 0;
|
|
@@ -656,18 +687,15 @@ export async function startProxy(opts = {}) {
|
|
|
656
687
|
const supportsThinking = !modelName.includes('haiku');
|
|
657
688
|
if (supportsThinking && !r.thinking) {
|
|
658
689
|
r.thinking = { type: 'adaptive' };
|
|
659
|
-
// Ensure max_tokens is reasonable for thinking models
|
|
660
|
-
const clientMax = r.max_tokens || 8192;
|
|
661
|
-
r.max_tokens = Math.max(clientMax, 16000);
|
|
662
690
|
}
|
|
663
|
-
//
|
|
664
|
-
if (!r.
|
|
665
|
-
r.
|
|
691
|
+
// Match Claude Code's default max_tokens (64000) when client sends low values
|
|
692
|
+
if (!r.max_tokens || r.max_tokens < 16000) {
|
|
693
|
+
r.max_tokens = 64000;
|
|
666
694
|
}
|
|
667
|
-
// Set reasoning effort (pass through client value or default)
|
|
695
|
+
// Set reasoning effort (pass through client value or default to 'medium' matching Claude Code)
|
|
668
696
|
// Haiku does not support the effort parameter
|
|
669
697
|
if (supportsThinking && !r.output_config) {
|
|
670
|
-
r.output_config = { effort: '
|
|
698
|
+
r.output_config = { effort: 'medium' };
|
|
671
699
|
}
|
|
672
700
|
// Enable context management (matches Claude Code default)
|
|
673
701
|
// Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
|
|
@@ -679,20 +707,44 @@ export async function startProxy(opts = {}) {
|
|
|
679
707
|
// instead of the general API quota. Without it, Opus/Sonnet get 429
|
|
680
708
|
// when overall utilization is high, even though model-specific limits
|
|
681
709
|
// have headroom. The CLI binary embeds this in its system prompt.
|
|
682
|
-
|
|
710
|
+
//
|
|
711
|
+
// Build tag and cch are computed per-request using the same algorithm
|
|
712
|
+
// as the real Claude Code binary (Oz$ function):
|
|
713
|
+
// - build tag = SHA-256(seed + msg_chars[4,7,20] + version).slice(0,3)
|
|
714
|
+
// - cch = SHA-256(seed + version + msg_chars[4,7,20]).slice(0,5)
|
|
715
|
+
// Build per-request billing tag matching Claude Code binary
|
|
716
|
+
const userMsg = extractFirstUserMessage(r);
|
|
717
|
+
const buildTag = computeBuildTag(userMsg, cliVersion);
|
|
718
|
+
const cch = computeCch(userMsg, cliVersion);
|
|
719
|
+
const fullVersion = `${cliVersion}.${buildTag}`;
|
|
720
|
+
const billingTag = `x-anthropic-billing-header: cc_version=${fullVersion}; cc_entrypoint=cli; cch=${cch};`;
|
|
721
|
+
// Structure system prompt as 3 blocks matching real Claude Code:
|
|
722
|
+
// [0] billing tag (no cache_control)
|
|
723
|
+
// [1] agent identity string (cache 1h)
|
|
724
|
+
// [2] actual system prompt (cache 1h)
|
|
725
|
+
const AGENT_IDENTITY = 'You are a Claude agent, built on Anthropic\'s Claude Agent SDK.';
|
|
726
|
+
const CACHE_1H = { type: 'ephemeral', ttl: '1h' };
|
|
683
727
|
if (typeof r.system === 'string') {
|
|
684
728
|
if (!r.system.includes('x-anthropic-billing-header:')) {
|
|
685
|
-
r.system =
|
|
729
|
+
r.system = [
|
|
730
|
+
{ type: 'text', text: billingTag },
|
|
731
|
+
{ type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H },
|
|
732
|
+
{ type: 'text', text: r.system, cache_control: CACHE_1H },
|
|
733
|
+
];
|
|
686
734
|
}
|
|
687
735
|
}
|
|
688
736
|
else if (Array.isArray(r.system)) {
|
|
689
737
|
const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
|
|
690
738
|
if (!hasTag) {
|
|
691
|
-
|
|
739
|
+
// Prepend billing tag and agent identity before existing blocks
|
|
740
|
+
r.system.unshift({ type: 'text', text: billingTag }, { type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H });
|
|
692
741
|
}
|
|
693
742
|
}
|
|
694
743
|
else {
|
|
695
|
-
r.system =
|
|
744
|
+
r.system = [
|
|
745
|
+
{ type: 'text', text: billingTag },
|
|
746
|
+
{ type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H },
|
|
747
|
+
];
|
|
696
748
|
}
|
|
697
749
|
}
|
|
698
750
|
finalBody = Buffer.from(JSON.stringify(r));
|
|
@@ -713,8 +765,8 @@ export async function startProxy(opts = {}) {
|
|
|
713
765
|
beta += ',' + clientBeta;
|
|
714
766
|
}
|
|
715
767
|
else {
|
|
716
|
-
// Claude-optimized: full beta set matching
|
|
717
|
-
beta = 'oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,
|
|
768
|
+
// Claude-optimized: full beta set matching real Claude Code (exact order from MITM capture)
|
|
769
|
+
beta = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
|
|
718
770
|
if (clientBeta) {
|
|
719
771
|
const filtered = filterBillableBetas(clientBeta);
|
|
720
772
|
if (filtered)
|
|
@@ -727,6 +779,8 @@ export async function startProxy(opts = {}) {
|
|
|
727
779
|
'anthropic-version': req.headers['anthropic-version'] || '2023-06-01',
|
|
728
780
|
'anthropic-beta': beta,
|
|
729
781
|
'x-client-request-id': randomUUID(),
|
|
782
|
+
// Real Claude Code sends 600 on first request, 300 on subsequent
|
|
783
|
+
'x-stainless-timeout': requestCount <= 1 ? '600' : '300',
|
|
730
784
|
};
|
|
731
785
|
const upstream = await fetch(targetBase, {
|
|
732
786
|
method: req.method ?? 'POST',
|