@askalf/dario 3.4.0 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -139
- package/dist/cc-template.d.ts +4 -4
- package/dist/cc-template.js +5 -5
- package/dist/cli.js +3 -5
- package/dist/proxy.d.ts +0 -1
- package/dist/proxy.js +36 -287
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
<h1 align="center">dario</h1>
|
|
3
3
|
<p align="center"><strong>Use your Claude subscription as an API. The only proxy that bills correctly.</strong></p>
|
|
4
4
|
<p align="center">
|
|
5
|
-
No API key needed. Your Claude Max/Pro subscription becomes a local API endpoint<br/>
|
|
5
|
+
No API key needed. Your Claude Max/Pro subscription becomes a local API endpoint<br/>
|
|
6
|
+
that any tool, SDK, or framework can use. Template replay makes every request<br/>
|
|
7
|
+
indistinguishable from real Claude Code — so your Max plan limits actually work.
|
|
6
8
|
</p>
|
|
7
9
|
</p>
|
|
8
10
|
|
|
@@ -17,8 +19,8 @@
|
|
|
17
19
|
<p align="center">
|
|
18
20
|
<a href="#quick-start">Quick Start</a> •
|
|
19
21
|
<a href="#openai-compatibility">OpenAI Compat</a> •
|
|
20
|
-
<a href="#cli-backend">CLI Backend</a> •
|
|
21
22
|
<a href="#usage-examples">Examples</a> •
|
|
23
|
+
<a href="#askalf">askalf</a> •
|
|
22
24
|
<a href="#trust--transparency">Trust</a> •
|
|
23
25
|
<a href="#faq">FAQ</a>
|
|
24
26
|
</p>
|
|
@@ -33,7 +35,9 @@ export ANTHROPIC_BASE_URL=http://localhost:3456 # or OPENAI_BASE_URL=http://lo
|
|
|
33
35
|
export ANTHROPIC_API_KEY=dario # or OPENAI_API_KEY=dario
|
|
34
36
|
```
|
|
35
37
|
|
|
36
|
-
Opus, Sonnet, Haiku — all models, streaming, tool use. **Zero dependencies.** ~2,
|
|
38
|
+
Opus, Sonnet, Haiku — all models, streaming, tool use. **Zero dependencies.** ~2,000 lines of TypeScript. Works with Cursor, Continue, Aider, LiteLLM, Hermes, OpenClaw, or any tool that speaks the Anthropic or OpenAI API. Auto-launches under [Bun](https://bun.sh) when available for TLS fingerprint fidelity. **Auto-detects OAuth config from your installed CC binary** so dario stays in sync forever — Anthropic can rotate client IDs and dario picks them up on the next run.
|
|
39
|
+
|
|
40
|
+
dario is built and maintained by [askalf](https://askalf.org) — the open-source foundation of the askalf agent platform. If you need more than a proxy, [see below](#askalf).
|
|
37
41
|
|
|
38
42
|
<table>
|
|
39
43
|
<tr>
|
|
@@ -44,7 +48,7 @@ Opus, Sonnet, Haiku — all models, streaming, tool use. **Zero dependencies.**
|
|
|
44
48
|
|
|
45
49
|
**Grok** (xAI)
|
|
46
50
|
|
|
47
|
-
*"Dario works great and is safe. Fully functional with OpenClaw / Hermes. Gives you Opus
|
|
51
|
+
*"Dario works great and is safe. Fully functional with OpenClaw / Hermes. Gives you Opus, Sonnet & Haiku using your existing Claude Max/Pro sub. No extra API key or billing needed. Streaming + tools work perfectly. 100% open-source, runs locally only, proper OAuth (PKCE), no telemetry. Highly recommended if you want a clean local proxy."*
|
|
48
52
|
|
|
49
53
|
</td>
|
|
50
54
|
<td width="33%" valign="top">
|
|
@@ -74,15 +78,13 @@ Opus, Sonnet, Haiku — all models, streaming, tool use. **Zero dependencies.**
|
|
|
74
78
|
</tr>
|
|
75
79
|
</table>
|
|
76
80
|
|
|
77
|
-
> **Need more than a proxy?** Dario solves the API access problem. If you need a full agent fleet — desktop control, browser automation, scheduling, custom tools, persistent memory — check out the [askalf platform](https://askalf.org). Same team, different execution model that solves the proxy ceiling entirely.
|
|
78
|
-
|
|
79
81
|
---
|
|
80
82
|
|
|
81
83
|
## Why dario
|
|
82
84
|
|
|
83
85
|
Most Claude subscription proxies have a critical billing problem: **Anthropic classifies their requests as third-party and routes all usage to Extra Usage billing** — even when you have Max plan limits available. You're paying for your subscription twice.
|
|
84
86
|
|
|
85
|
-
dario is the only proxy that solves this. Instead of transforming your requests signal by signal, dario uses **template replay** — it replaces the entire request with Claude Code's exact template
|
|
87
|
+
dario is the only proxy that solves this. Instead of transforming your requests signal by signal, dario uses **template replay** — it replaces the entire request with Claude Code's exact template. 25 tool definitions, 25KB system prompt, exact field order, exact beta headers, exact metadata structure. Only your conversation content is preserved. When Bun is installed, dario auto-relaunches under Bun for TLS fingerprint fidelity matching CC's runtime. Anthropic's classifier sees a genuine Claude Code request because it IS one.
|
|
86
88
|
|
|
87
89
|
| | dario | Other proxies |
|
|
88
90
|
|---|---|---|
|
|
@@ -95,15 +97,14 @@ dario is the only proxy that solves this. Instead of transforming your requests
|
|
|
95
97
|
<details>
|
|
96
98
|
<summary><strong>vs competitors</strong></summary>
|
|
97
99
|
|
|
98
|
-
| Feature | dario | Meridian
|
|
100
|
+
| Feature | dario | Meridian | CLIProxyAPI |
|
|
99
101
|
|---------|-------|---------|------------|
|
|
100
|
-
| Template replay (undetectable) | **Yes** | No |
|
|
102
|
+
| Template replay (undetectable) | **Yes** | No | No |
|
|
101
103
|
| Direct OAuth (streaming, tools) | **Yes** | Yes (SDK-based) | No |
|
|
102
|
-
| CLI fallback (rate limit bypass) | **Yes** | No | Yes (only mode) |
|
|
103
104
|
| OpenAI API compat | **Yes** | Yes | Yes |
|
|
104
105
|
| Orchestration sanitization | **Yes** | Yes | No |
|
|
105
106
|
| Token anomaly detection | **Yes** | Yes | No |
|
|
106
|
-
| Codebase size | ~2,
|
|
107
|
+
| Codebase size | ~2,000 lines | ~9,000 lines | Platform |
|
|
107
108
|
| Dependencies | 0 | Many | Many |
|
|
108
109
|
| Setup | 2 commands | Config + build | Config + dashboard |
|
|
109
110
|
|
|
@@ -113,15 +114,15 @@ dario is the only proxy that solves this. Instead of transforming your requests
|
|
|
113
114
|
|
|
114
115
|
You pay $100-200/mo for Claude Max or Pro. But that subscription only works on claude.ai and Claude Code. If you want to use Claude with **any other tool** — Cursor, Continue, Aider, your own scripts — you need a separate API key with separate billing.
|
|
115
116
|
|
|
116
|
-
**
|
|
117
|
+
**dario fixes this.** It creates a local proxy that translates API key auth into your subscription's OAuth tokens. Your subscription handles the billing. No API key needed.
|
|
117
118
|
|
|
118
|
-
**
|
|
119
|
+
**Note:** Claude subscriptions have [usage limits](https://support.claude.com/en/articles/11647753-how-do-usage-and-length-limits-work) that reset on rolling 5-hour and 7-day windows. You can check your utilization via Claude Code's `/usage` command or the [statusline](https://code.claude.com/docs/en/statusline).
|
|
119
120
|
|
|
120
121
|
## Quick Start
|
|
121
122
|
|
|
122
123
|
### Prerequisites
|
|
123
124
|
|
|
124
|
-
[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) installed and logged in (recommended). Dario detects your existing Claude Code credentials automatically.
|
|
125
|
+
[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) installed and logged in (recommended). Dario detects your existing Claude Code credentials automatically and also auto-extracts the current OAuth client config from the installed CC binary so dario stays in sync with whatever CC version you have, even when Anthropic rotates client IDs.
|
|
125
126
|
|
|
126
127
|
If Claude Code isn't installed, dario runs its own OAuth flow — opens your browser, you authorize, done.
|
|
127
128
|
|
|
@@ -180,46 +181,13 @@ continue # in VS Code, set base URL in config
|
|
|
180
181
|
python my_script.py
|
|
181
182
|
```
|
|
182
183
|
|
|
183
|
-
## CLI Backend
|
|
184
|
-
|
|
185
|
-
If you're getting rate limited on Opus or Sonnet, use `--cli` mode. This routes requests through the Claude Code binary instead of hitting the API directly. Claude Code has priority routing that continues working even when direct API calls return 429.
|
|
186
|
-
|
|
187
|
-
```bash
|
|
188
|
-
dario proxy --cli # Opus works even when rate limited
|
|
189
|
-
dario proxy --cli --model=opus # Force Opus + CLI backend
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
```
|
|
193
|
-
dario — http://localhost:3456
|
|
194
|
-
|
|
195
|
-
Your Claude subscription is now an API.
|
|
196
|
-
|
|
197
|
-
Usage:
|
|
198
|
-
ANTHROPIC_BASE_URL=http://localhost:3456
|
|
199
|
-
ANTHROPIC_API_KEY=dario
|
|
200
|
-
|
|
201
|
-
Backend: Claude CLI (bypasses rate limits)
|
|
202
|
-
Model: claude-opus-4-6 (all requests)
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
**Trade-offs vs direct API mode:**
|
|
206
|
-
|
|
207
|
-
| | Direct API (default) | CLI Backend (`--cli`) | Passthrough (`--passthrough`) |
|
|
208
|
-
|---|---|---|---|
|
|
209
|
-
| Streaming | Native SSE | SSE (converted from JSON) | Native SSE |
|
|
210
|
-
| Tool use | Yes | No | Yes |
|
|
211
|
-
| Thinking/billing injection | Yes (Claude-optimized) | N/A | No (OAuth swap only) |
|
|
212
|
-
| Latency | Low | Higher (process spawn) | Low |
|
|
213
|
-
| Rate limits | Priority routing | Not affected | Standard (no priority) |
|
|
214
|
-
| Opus when throttled | Auto CLI fallback | **Always works** | May return 429 |
|
|
215
|
-
|
|
216
184
|
## Passthrough Mode
|
|
217
185
|
|
|
218
|
-
For tools that need exact Anthropic protocol fidelity with zero modification, use `--passthrough
|
|
186
|
+
For tools that need exact Anthropic protocol fidelity with zero modification, use `--passthrough` (alias: `--thin`). This does OAuth swap only — no billing tag, no template replay, no device identity, no extra beta flags. Note: most tools (including Hermes and OpenClaw) work better through default mode, which handles billing classification automatically.
|
|
219
187
|
|
|
220
188
|
```bash
|
|
221
189
|
dario proxy --passthrough # Thin proxy, zero injection
|
|
222
|
-
dario proxy --
|
|
190
|
+
dario proxy --thin --model=opus # Thin proxy + model override
|
|
223
191
|
```
|
|
224
192
|
|
|
225
193
|
## Model Selection
|
|
@@ -235,12 +203,6 @@ dario proxy # Passthrough (client decides)
|
|
|
235
203
|
|
|
236
204
|
Full model IDs also work: `--model=claude-opus-4-6`
|
|
237
205
|
|
|
238
|
-
Combine with `--cli` for rate-limit-proof Opus:
|
|
239
|
-
|
|
240
|
-
```bash
|
|
241
|
-
dario proxy --cli --model=opus
|
|
242
|
-
```
|
|
243
|
-
|
|
244
206
|
## OpenAI Compatibility
|
|
245
207
|
|
|
246
208
|
Dario implements `/v1/chat/completions` — any tool built for the OpenAI API works with your Claude subscription. No code changes needed.
|
|
@@ -344,8 +306,6 @@ model:
|
|
|
344
306
|
default: claude-opus-4-6
|
|
345
307
|
```
|
|
346
308
|
|
|
347
|
-
Then run `hermes` normally — it routes through dario using your Claude subscription.
|
|
348
|
-
|
|
349
309
|
### OpenClaw
|
|
350
310
|
|
|
351
311
|
Add to your `openclaw.json` models config:
|
|
@@ -384,52 +344,81 @@ Add to your `openclaw.json` models config:
|
|
|
384
344
|
|
|
385
345
|
**Note:** Use `http://127.0.0.1:3456` without `/v1` — OpenClaw adds the path itself.
|
|
386
346
|
|
|
347
|
+
---
|
|
348
|
+
|
|
387
349
|
## How It Works
|
|
388
350
|
|
|
389
351
|
### Direct API Mode (default) — Template Replay
|
|
390
352
|
|
|
391
353
|
```
|
|
392
|
-
|
|
393
|
-
│ Your App
|
|
394
|
-
│
|
|
395
|
-
│ sends
|
|
396
|
-
│ its own
|
|
397
|
-
│ tools &
|
|
398
|
-
│ params
|
|
399
|
-
|
|
354
|
+
┌───────────┐ ┌─────────────────────┐ ┌──────────────────┐
|
|
355
|
+
│ Your App │ ──> │ dario (proxy) │ ──> │ api.anthropic.com│
|
|
356
|
+
│ │ │ localhost:3456 │ │ │
|
|
357
|
+
│ sends │ │ │ │ sees a genuine │
|
|
358
|
+
│ its own │ │ replaces request │ │ Claude Code │
|
|
359
|
+
│ tools & │ │ with CC template │ │ request │
|
|
360
|
+
│ params │ │ keeps only content │ │ │
|
|
361
|
+
└───────────┘ └─────────────────────┘ └──────────────────┘
|
|
400
362
|
```
|
|
401
363
|
|
|
402
364
|
Your app sends whatever it wants — any tools, any parameters. dario replaces the entire request with Claude Code's template and injects only your conversation content. The upstream sees CC's exact tool definitions, field structure, and parameters.
|
|
403
365
|
|
|
404
|
-
###
|
|
366
|
+
### Passthrough Mode (`--passthrough`)
|
|
405
367
|
|
|
406
368
|
```
|
|
407
|
-
|
|
408
|
-
│ Your App
|
|
409
|
-
│
|
|
410
|
-
│ sends
|
|
411
|
-
│ API
|
|
412
|
-
│ request
|
|
413
|
-
|
|
369
|
+
┌───────────┐ ┌─────────────────┐ ┌──────────────────┐
|
|
370
|
+
│ Your App │ ──> │ dario (proxy) │ ──> │ api.anthropic.com│
|
|
371
|
+
│ │ │ localhost:3456 │ │ │
|
|
372
|
+
│ sends │ │ swaps API key │ │ sees valid │
|
|
373
|
+
│ API │ │ for OAuth │ │ OAuth bearer │
|
|
374
|
+
│ request │ │ nothing else │ │ token │
|
|
375
|
+
└───────────┘ └─────────────────┘ └──────────────────┘
|
|
414
376
|
```
|
|
415
377
|
|
|
416
|
-
###
|
|
378
|
+
### What dario actually sends upstream
|
|
379
|
+
|
|
380
|
+
In direct mode, every request dario sends to Anthropic is a genuine Claude Code request. Key fields injected or enforced:
|
|
417
381
|
|
|
382
|
+
**Billing tag** — reconstructed using Claude Code's own algorithm extracted from the CC binary:
|
|
418
383
|
```
|
|
419
|
-
|
|
420
|
-
│ Your App │ ──> │ dario (proxy) │ ──> │ api.anthropic.com│
|
|
421
|
-
│ │ │ localhost:3456 │ │ │
|
|
422
|
-
│ sends │ │ swaps API key │ │ sees valid │
|
|
423
|
-
│ API │ │ for OAuth │ │ OAuth bearer │
|
|
424
|
-
│ request │ │ nothing else │ │ token │
|
|
425
|
-
└──────────┘ └─────────────────┘ └──────────────────┘
|
|
384
|
+
x-anthropic-billing-header: cc_version=<version>.<build_tag>; cc_entrypoint=cli; cch=<5-char-hex>;
|
|
426
385
|
```
|
|
386
|
+
The build tag is `SHA-256(seed + chars[4,7,20] of user message + version).slice(0,3)`. The `cch` is a fresh random 5-char hex per request. Both were extracted via MITM capture.
|
|
427
387
|
|
|
428
|
-
|
|
388
|
+
**Beta set** — exactly 8 betas from CC, in CC's order:
|
|
389
|
+
```
|
|
390
|
+
claude-code-20250219, oauth-2025-04-20, context-1m-2025-08-07,
|
|
391
|
+
interleaved-thinking-2025-05-14, context-management-2025-06-27,
|
|
392
|
+
prompt-caching-scope-2026-01-05, advisor-tool-2026-03-01, effort-2025-11-24
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
**Request headers** — CC's exact Stainless SDK headers, including `x-stainless-runtime-version: v24.3.0` (the Node.js compat version CC reports when running on Bun), `x-app: cli`, `user-agent: claude-cli/<version>`, `anthropic-dangerous-direct-browser-access: true`.
|
|
396
|
+
|
|
397
|
+
**Upstream URL** — `api.anthropic.com/v1/messages?beta=true`, matching CC's own request format.
|
|
398
|
+
|
|
399
|
+
**Device identity** — `metadata.user_id` loaded from `~/.claude/.claude.json`. Without this, Anthropic classifies the request as third-party and routes it to Extra Usage billing instead of the Max plan allocation.
|
|
400
|
+
|
|
401
|
+
**Session ID** — rotates per request via `x-claude-code-session-id`. A persistent session ID across many rapid requests is a behavioral detection signal; CC `--print` creates a new session each invocation.
|
|
402
|
+
|
|
403
|
+
**Rate governor** — 500ms minimum between requests (configurable via `DARIO_MIN_INTERVAL_MS`). Configurable for agent workloads that need tighter pacing.
|
|
404
|
+
|
|
405
|
+
### OAuth Config Auto-Detection
|
|
429
406
|
|
|
430
|
-
|
|
407
|
+
Anthropic periodically rotates the OAuth `client_id`, authorize URL, token URL, and scopes that Claude Code uses. Historically this caused `"Invalid client id"` errors until a new dario release shipped.
|
|
431
408
|
|
|
432
|
-
|
|
409
|
+
Dario scans the installed CC binary at startup and extracts the current config directly:
|
|
410
|
+
|
|
411
|
+
- **Anchor**: `OAUTH_FILE_SUFFIX:"-local-oauth"` — the config block CC uses for clients that run their own localhost callback.
|
|
412
|
+
- **Extracted**: `CLIENT_ID`, `CLAUDE_AI_AUTHORIZE_URL`, `TOKEN_URL`, and the full `user:*` scope string.
|
|
413
|
+
- **Cached**: Results stored at `~/.dario/cc-oauth-cache.json` keyed by binary fingerprint (first 64KB sha256 + size + mtime). Cold scan ~500ms, cache hit ~5ms. Re-scans only when CC is upgraded.
|
|
414
|
+
- **Fallback**: If CC is not installed or scanning fails, dario uses known-good hardcoded values. No user action needed.
|
|
415
|
+
- **Override**: Set `DARIO_CC_PATH=/path/to/claude` to point dario at a non-standard CC binary location.
|
|
416
|
+
|
|
417
|
+
CC ships **two** OAuth client configurations in one binary — a `-local-oauth` flow (localhost callback) and a platform-hosted flow (`platform.claude.com/oauth/code/callback`). Dario must use the former. The scanner anchors specifically on the local block.
|
|
418
|
+
|
|
419
|
+
End-to-end verification lives at [`test/oauth-detector.mjs`](test/oauth-detector.mjs).
|
|
420
|
+
|
|
421
|
+
---
|
|
433
422
|
|
|
434
423
|
## Commands
|
|
435
424
|
|
|
@@ -446,49 +435,45 @@ Your app sends whatever it wants — any tools, any parameters. dario replaces t
|
|
|
446
435
|
|
|
447
436
|
| Flag/Env | Description | Default |
|
|
448
437
|
|----------|-------------|---------|
|
|
449
|
-
| `--
|
|
450
|
-
| `--
|
|
438
|
+
| `--passthrough` / `--thin` | Thin proxy — OAuth swap only, no injection | off |
|
|
439
|
+
| `--preserve-tools` / `--keep-tools` | Keep client tool schemas instead of remapping to CC tools | off |
|
|
451
440
|
| `--model=MODEL` | Force a model (`opus`, `sonnet`, `haiku`, or full ID) | passthrough |
|
|
452
441
|
| `--port=PORT` | Port to listen on | `3456` |
|
|
453
442
|
| `--verbose` / `-v` | Log every request | off |
|
|
454
|
-
| `DARIO_API_KEY` | If set, all endpoints (except `/health`) require matching `x-api-key`
|
|
443
|
+
| `DARIO_API_KEY` | If set, all endpoints (except `/health`) require matching `x-api-key` or `Authorization: Bearer` | unset (open) |
|
|
455
444
|
| `DARIO_NO_BUN` | Disable automatic Bun relaunch (stay on Node.js) | unset |
|
|
456
445
|
| `DARIO_MIN_INTERVAL_MS` | Minimum ms between requests (rate governor) | `500` |
|
|
446
|
+
| `DARIO_CC_PATH` | Override path to Claude Code binary for OAuth detection | auto-detect |
|
|
457
447
|
|
|
458
448
|
## Supported Features
|
|
459
449
|
|
|
460
450
|
### Direct API Mode
|
|
461
451
|
- All Claude models (Opus 4.6, Sonnet 4.6, Haiku 4.5) + 1M extended context aliases (`opus1m`, `sonnet1m`)
|
|
462
|
-
- **Template replay**
|
|
463
|
-
-
|
|
464
|
-
- **
|
|
465
|
-
- **
|
|
466
|
-
- **
|
|
467
|
-
- **
|
|
468
|
-
- **
|
|
469
|
-
-
|
|
470
|
-
-
|
|
471
|
-
-
|
|
472
|
-
-
|
|
473
|
-
-
|
|
474
|
-
-
|
|
475
|
-
- **
|
|
476
|
-
- **
|
|
477
|
-
-
|
|
478
|
-
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
- All Claude models — including Opus when rate limited
|
|
482
|
-
- Streaming via SSE conversion (client sends `stream: true`, CLI JSON response is converted to Anthropic or OpenAI SSE events)
|
|
483
|
-
- OpenAI compatibility (translates OpenAI → Anthropic before CLI, Anthropic → OpenAI after)
|
|
484
|
-
- System prompts and multi-turn conversations (via context injection)
|
|
485
|
-
- Not affected by API rate limits
|
|
452
|
+
- **Template replay** — replaces the entire request with Claude Code's exact template. 25 tool definitions, 25KB system prompt, exact body key order, exact beta headers (model-conditional), exact metadata structure. Client tools are mapped to CC equivalents and reverse-mapped in responses. Template data stored as JSON for easy updates.
|
|
453
|
+
- **`--preserve-tools` mode** — opt-out of CC tool schema replacement for agent frameworks that rely on their own custom tool definitions. Default mode still remaps for maximum detection resistance.
|
|
454
|
+
- **OAuth auto-detect** — scans the installed CC binary for OAuth config at startup. Stays in sync with whatever CC version you have; falls back to known-good hardcoded values if no binary is found. Override with `DARIO_CC_PATH`.
|
|
455
|
+
- **Bun auto-relaunch** — auto-detects Bun and relaunches under it for TLS fingerprint fidelity. CC runs on Bun; Node.js has a different TLS fingerprint visible at the network level.
|
|
456
|
+
- **Session ID rotation** — each request gets a fresh session ID via `x-claude-code-session-id`, matching CC behavior.
|
|
457
|
+
- **Rate governor** — configurable minimum interval between requests via `DARIO_MIN_INTERVAL_MS`.
|
|
458
|
+
- **Enriched 429 errors** — rate limit errors include utilization %, limiting window, and reset time instead of Anthropic's default `"Error"` message.
|
|
459
|
+
- **Auto-retry on long-context errors** — when Anthropic returns 400 or 429 with `"long context beta is not yet available"` or `"Extra usage is required"`, dario transparently retries without the `context-1m-2025-08-07` beta flag.
|
|
460
|
+
- **OpenAI-compatible** (`/v1/chat/completions`) — works with any OpenAI SDK or tool.
|
|
461
|
+
- Streaming and non-streaming (both Anthropic and OpenAI SSE formats, including tool_use streaming).
|
|
462
|
+
- Tool use / function calling.
|
|
463
|
+
- System prompts and multi-turn conversations.
|
|
464
|
+
- Prompt caching and extended thinking.
|
|
465
|
+
- **Billable beta filtering** — strips `extended-cache-ttl` from client betas (the only beta requiring Extra Usage enabled on the account).
|
|
466
|
+
- **Beta deduplication** — client-provided betas are deduplicated against the base set before appending.
|
|
467
|
+
- **Orchestration tag sanitization** — strips agent-injected XML (`<system-reminder>`, `<env>`, `<task_metadata>`, etc.) before forwarding.
|
|
468
|
+
- **Token anomaly detection** — warns on context spike (>60% input growth) or output explosion (>2x previous).
|
|
469
|
+
- Concurrency control (max 10 concurrent upstream requests).
|
|
470
|
+
- CORS enabled (works from browser apps on localhost).
|
|
486
471
|
|
|
487
472
|
### Passthrough Mode
|
|
488
|
-
- All Claude models with native streaming and tool use
|
|
489
|
-
- OAuth token swap only — no billing tag,
|
|
490
|
-
- Minimal beta flags (`oauth-2025-04-20` + client betas only)
|
|
491
|
-
- For tools that need exact Anthropic protocol fidelity with zero modification
|
|
473
|
+
- All Claude models with native streaming and tool use.
|
|
474
|
+
- OAuth token swap only — no billing tag, no template injection, no device identity.
|
|
475
|
+
- Minimal beta flags (`oauth-2025-04-20` + client betas only).
|
|
476
|
+
- For tools that need exact Anthropic protocol fidelity with zero modification.
|
|
492
477
|
|
|
493
478
|
## Endpoints
|
|
494
479
|
|
|
@@ -521,17 +506,45 @@ curl http://localhost:3456/health
|
|
|
521
506
|
|---------|---------------------|
|
|
522
507
|
| Credential storage | Reads from Claude Code (`~/.claude/.credentials.json`) or its own store (`~/.dario/credentials.json`) with `0600` permissions |
|
|
523
508
|
| OAuth flow | PKCE (Proof Key for Code Exchange) — no client secret needed |
|
|
524
|
-
|
|
|
525
|
-
|
|
|
526
|
-
|
|
|
527
|
-
|
|
|
528
|
-
|
|
|
529
|
-
|
|
|
509
|
+
| OAuth config source | Auto-detected from local CC binary at runtime; cached at `~/.dario/cc-oauth-cache.json`. Detector reads binary in read-only mode, never modifies it. |
|
|
510
|
+
| Token exposure | Tokens never logged; redacted from all error messages. |
|
|
511
|
+
| Network binding | Binds exclusively to `127.0.0.1`. Upstream traffic goes only to `api.anthropic.com` over HTTPS. |
|
|
512
|
+
| Auth timing | `timingSafeEqual` used for `DARIO_API_KEY` comparison. |
|
|
513
|
+
| SSRF protection | Only `/v1/messages` and `/v1/complete` are proxied upstream — hardcoded allowlist. |
|
|
514
|
+
| Body size | 10MB hard cap per request. 30s read timeout prevents slow-loris. |
|
|
515
|
+
| Token refresh | Auto-refreshes 30 minutes before expiry. Refresh tokens rotate on each use. Mutex prevents concurrent refresh races. |
|
|
516
|
+
| Telemetry | None. Zero analytics, tracking, or data collection of any kind. |
|
|
517
|
+
|
|
518
|
+
---
|
|
519
|
+
|
|
520
|
+
## askalf
|
|
521
|
+
|
|
522
|
+
dario solves the API access problem — your $200/mo subscription, usable everywhere, billed correctly.
|
|
523
|
+
|
|
524
|
+
But a proxy has a ceiling. Every request still runs on your single account, with your subscription's rate limits, on your machine. When you need to scale beyond that — multiple accounts, persistent browser sessions, desktop control, scheduled workflows, a fleet of agents that can run while you sleep — that's what [askalf](https://askalf.org) is built for.
|
|
525
|
+
|
|
526
|
+
**askalf** is the agent platform built on top of the same OAuth and billing infrastructure that powers dario:
|
|
527
|
+
|
|
528
|
+
| | dario | askalf |
|
|
529
|
+
|---|---|---|
|
|
530
|
+
| **What it is** | Local proxy, single account | Hosted agent fleet, multi-account |
|
|
531
|
+
| **Rate limits** | Your subscription's limits | Distributed across fleet, near-zero 429s |
|
|
532
|
+
| **Browser / desktop** | No | Yes — full computer use |
|
|
533
|
+
| **Scheduling** | No | Yes — cron, webhooks, triggers |
|
|
534
|
+
| **Persistent memory** | No | Yes — per-agent memory and context |
|
|
535
|
+
| **Custom tools** | Via `--preserve-tools` | Native MCP tool server |
|
|
536
|
+
| **Setup** | 2 commands | Waitlist → dashboard |
|
|
537
|
+
|
|
538
|
+
If you're running multi-agent workflows, hitting rate limits on Claude Max, or want agents that run 24/7 without babysitting, **[join the waitlist at askalf.org](https://askalf.org)**.
|
|
539
|
+
|
|
540
|
+
dario will always be open-source and free. askalf is the hosted tier for teams who need more.
|
|
541
|
+
|
|
542
|
+
---
|
|
530
543
|
|
|
531
544
|
## FAQ
|
|
532
545
|
|
|
533
546
|
**Does this violate Anthropic's terms of service?**
|
|
534
|
-
Dario uses your existing Claude Code credentials with the same OAuth tokens. It authenticates you as you, with your subscription, through Anthropic's official API.
|
|
547
|
+
Dario uses your existing Claude Code credentials with the same OAuth tokens. It authenticates you as you, with your subscription, through Anthropic's official API.
|
|
535
548
|
|
|
536
549
|
**What subscription plans work?**
|
|
537
550
|
Claude Max and Claude Pro. Any plan that lets you use Claude Code.
|
|
@@ -540,7 +553,7 @@ Claude Max and Claude Pro. Any plan that lets you use Claude Code.
|
|
|
540
553
|
Should work if your plan includes Claude Code access. Not tested yet — please open an issue with results.
|
|
541
554
|
|
|
542
555
|
**Do I need Claude Code installed?**
|
|
543
|
-
Recommended but not required. If Claude Code is installed and logged in, `dario login` picks up your credentials automatically. Without Claude Code, dario runs its own OAuth flow to authenticate directly.
|
|
556
|
+
Recommended but not required. If Claude Code is installed and logged in, `dario login` picks up your credentials automatically. Without Claude Code, dario runs its own OAuth flow to authenticate directly.
|
|
544
557
|
|
|
545
558
|
**First time setup — account priming**
|
|
546
559
|
If dario is the first thing you use with a new Claude account, run a few real Claude Code commands first to establish a session baseline:
|
|
@@ -557,14 +570,19 @@ Optional but recommended. If [Bun](https://bun.sh) is installed, dario auto-rela
|
|
|
557
570
|
**What happens when my token expires?**
|
|
558
571
|
Dario auto-refreshes tokens 30 minutes before expiry. You should never see an auth error in normal use. If something goes wrong, `dario refresh` forces an immediate refresh.
|
|
559
572
|
|
|
560
|
-
**
|
|
561
|
-
|
|
573
|
+
**What happens when Anthropic rotates the OAuth client_id or URL?**
|
|
574
|
+
Dario auto-detects OAuth config from your installed Claude Code binary. When CC ships a new version with rotated values, dario picks them up on the next startup — no dario release needed. The detector is cached at `~/.dario/cc-oauth-cache.json` and only re-scans when the binary fingerprint changes. If CC isn't installed, dario falls back to known-good hardcoded values.
|
|
575
|
+
|
|
576
|
+
**I'm hitting rate limits. What do I do?**
|
|
577
|
+
Claude subscriptions have rolling 5-hour and 7-day usage windows. Check your utilization with Claude Code's `/usage` command or the [statusline](https://code.claude.com/docs/en/statusline). Rate limit errors from dario include utilization percentages and reset times so you can see exactly when capacity returns.
|
|
578
|
+
|
|
579
|
+
If you're running a multi-agent workload and consistently hitting limits, [askalf](https://askalf.org) distributes load across multiple accounts automatically.
|
|
562
580
|
|
|
563
581
|
**What are the usage limits?**
|
|
564
|
-
Claude subscriptions have rolling 5-hour and 7-day usage windows shared across claude.ai and Claude Code. See [Anthropic's docs](https://support.claude.com/en/articles/11647753-how-do-usage-and-length-limits-work) for details.
|
|
582
|
+
Claude subscriptions have rolling 5-hour and 7-day usage windows shared across claude.ai and Claude Code. See [Anthropic's docs](https://support.claude.com/en/articles/11647753-how-do-usage-and-length-limits-work) for details.
|
|
565
583
|
|
|
566
584
|
**Can I run this on a server?**
|
|
567
|
-
Dario binds to localhost by default. For server use,
|
|
585
|
+
Dario binds to localhost by default. For server use, handle the initial login on a machine with a browser, then copy `~/.claude/.credentials.json` (or `~/.dario/credentials.json`) to your server. Auto-refresh will keep it alive from there.
|
|
568
586
|
|
|
569
587
|
**Why "dario"?**
|
|
570
588
|
Named after [Dario Amodei](https://en.wikipedia.org/wiki/Dario_Amodei), CEO of Anthropic.
|
|
@@ -579,12 +597,12 @@ import { startProxy, getAccessToken, getStatus } from "@askalf/dario";
|
|
|
579
597
|
// Start the proxy programmatically
|
|
580
598
|
await startProxy({ port: 3456, verbose: true });
|
|
581
599
|
|
|
582
|
-
// CLI backend mode
|
|
583
|
-
await startProxy({ port: 3456, cliBackend: true, model: "opus" });
|
|
584
|
-
|
|
585
600
|
// Passthrough mode (OAuth swap only, no injection)
|
|
586
601
|
await startProxy({ port: 3456, passthrough: true });
|
|
587
602
|
|
|
603
|
+
// Preserve-tools mode (keep client tool schemas)
|
|
604
|
+
await startProxy({ port: 3456, preserveTools: true });
|
|
605
|
+
|
|
588
606
|
// Or just get a raw access token
|
|
589
607
|
const token = await getAccessToken();
|
|
590
608
|
|
|
@@ -599,7 +617,7 @@ Dario handles your OAuth tokens. Here's why you can trust it:
|
|
|
599
617
|
|
|
600
618
|
| Signal | Status |
|
|
601
619
|
|--------|--------|
|
|
602
|
-
| **Source code** | ~2,
|
|
620
|
+
| **Source code** | ~2,000 lines of TypeScript — small enough to audit in one sitting |
|
|
603
621
|
| **Dependencies** | 0 runtime dependencies. Verify: `npm ls --production` |
|
|
604
622
|
| **npm provenance** | Every release is [SLSA attested](https://www.npmjs.com/package/@askalf/dario) via GitHub Actions |
|
|
605
623
|
| **Security scanning** | [CodeQL](https://github.com/askalf/dario/actions/workflows/codeql.yml) runs on every push and weekly |
|
|
@@ -625,21 +643,22 @@ cd $(npm root -g)/@askalf/dario && npm ls --production
|
|
|
625
643
|
|-------|------|
|
|
626
644
|
| v3.0 Template Replay — why we stopped matching signals | [Discussion 14](https://github.com/askalf/dario/discussions/14) |
|
|
627
645
|
| Claude Code defaults are detection signals, not optimizations | [Discussion 13](https://github.com/askalf/dario/discussions/13) |
|
|
628
|
-
| Why Opus
|
|
646
|
+
| Why Opus feels worse through other proxies and how to fix it | [Discussion 9](https://github.com/askalf/dario/discussions/9) |
|
|
629
647
|
| Billing tag algorithm and fingerprint analysis | [Discussion 8](https://github.com/askalf/dario/discussions/8) |
|
|
630
648
|
| Rate limit header analysis | [Discussion 1](https://github.com/askalf/dario/discussions/1) |
|
|
631
649
|
|
|
632
650
|
## Contributing
|
|
633
651
|
|
|
634
|
-
PRs welcome. The codebase is ~2,
|
|
652
|
+
PRs welcome. The codebase is ~2,000 lines of TypeScript across 7 files:
|
|
635
653
|
|
|
636
654
|
| File | Purpose |
|
|
637
655
|
|------|---------|
|
|
638
|
-
| `src/proxy.ts` | HTTP proxy server,
|
|
639
|
-
| `src/cc-template.ts` |
|
|
640
|
-
| `src/cc-template-data.json` |
|
|
641
|
-
| `src/oauth.ts` |
|
|
642
|
-
| `src/
|
|
656
|
+
| `src/proxy.ts` | HTTP proxy server, rate governor, billing tag, response forwarding |
|
|
657
|
+
| `src/cc-template.ts` | Template engine, tool mapping, orchestration sanitization |
|
|
658
|
+
| `src/cc-template-data.json` | CC request template data (25 tools, 25KB system prompt) |
|
|
659
|
+
| `src/cc-oauth-detect.ts` | Auto-detect OAuth config from the installed CC binary |
|
|
660
|
+
| `src/oauth.ts` | Token storage, PKCE flow, auto-refresh, credential detection |
|
|
661
|
+
| `src/cli.ts` | CLI entry point, command routing, Bun auto-relaunch |
|
|
643
662
|
| `src/index.ts` | Library exports |
|
|
644
663
|
|
|
645
664
|
```bash
|
|
@@ -654,7 +673,7 @@ npm run dev # runs with tsx (no build needed)
|
|
|
654
673
|
| Who | Contributions |
|
|
655
674
|
|-----|---------------|
|
|
656
675
|
| [@GodsBoy](https://github.com/GodsBoy) | Proxy authentication, token redaction, error sanitization ([#2](https://github.com/askalf/dario/pull/2)) |
|
|
657
|
-
| [@belangertrading](https://github.com/belangertrading) | Billing classification investigation ([#4](https://github.com/askalf/dario/issues/4)),
|
|
676
|
+
| [@belangertrading](https://github.com/belangertrading) | Billing classification investigation ([#4](https://github.com/askalf/dario/issues/4)), billing reclassification root cause ([#7](https://github.com/askalf/dario/issues/7)) |
|
|
658
677
|
|
|
659
678
|
## License
|
|
660
679
|
|
package/dist/cc-template.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Claude Code request template
|
|
2
|
+
* Claude Code request template.
|
|
3
3
|
*
|
|
4
4
|
* Tool definitions, system prompt, and request structure are loaded from
|
|
5
|
-
* cc-template-data.json
|
|
6
|
-
*
|
|
5
|
+
* cc-template-data.json and sent verbatim — this gives byte-level fidelity
|
|
6
|
+
* with the shape of a real Claude Code request.
|
|
7
7
|
*/
|
|
8
|
-
/** CC's exact tool definitions — loaded from
|
|
8
|
+
/** CC's exact tool definitions — loaded from the template JSON. */
|
|
9
9
|
export declare const CC_TOOL_DEFINITIONS: {
|
|
10
10
|
name: string;
|
|
11
11
|
description: string;
|
package/dist/cc-template.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Claude Code request template
|
|
2
|
+
* Claude Code request template.
|
|
3
3
|
*
|
|
4
4
|
* Tool definitions, system prompt, and request structure are loaded from
|
|
5
|
-
* cc-template-data.json
|
|
6
|
-
*
|
|
5
|
+
* cc-template-data.json and sent verbatim — this gives byte-level fidelity
|
|
6
|
+
* with the shape of a real Claude Code request.
|
|
7
7
|
*/
|
|
8
8
|
import { readFileSync } from 'node:fs';
|
|
9
9
|
import { join, dirname } from 'node:path';
|
|
@@ -11,7 +11,7 @@ import { fileURLToPath } from 'node:url';
|
|
|
11
11
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
12
12
|
// Load template data at module init — fail fast if missing
|
|
13
13
|
const TEMPLATE = JSON.parse(readFileSync(join(__dirname, 'cc-template-data.json'), 'utf-8'));
|
|
14
|
-
/** CC's exact tool definitions — loaded from
|
|
14
|
+
/** CC's exact tool definitions — loaded from the template JSON. */
|
|
15
15
|
export const CC_TOOL_DEFINITIONS = TEMPLATE.tools;
|
|
16
16
|
/** CC's static system prompt (~25KB). */
|
|
17
17
|
export const CC_SYSTEM_PROMPT = TEMPLATE.system_prompt;
|
|
@@ -182,7 +182,7 @@ export function buildCCRequest(clientBody, billingTag, cache1h, identity, opts =
|
|
|
182
182
|
systemText = systemText.replace(pattern, '');
|
|
183
183
|
}
|
|
184
184
|
// ── Build the CC request from template ──
|
|
185
|
-
// Key order matches CC v2.1.104
|
|
185
|
+
// Key order matches CC v2.1.104 exactly:
|
|
186
186
|
// model, messages, system, tools, metadata, max_tokens, thinking, context_management, output_config, stream
|
|
187
187
|
//
|
|
188
188
|
// System prompt structure (3 blocks, matching real CC):
|
package/dist/cli.js
CHANGED
|
@@ -126,12 +126,11 @@ async function proxy() {
|
|
|
126
126
|
process.exit(1);
|
|
127
127
|
}
|
|
128
128
|
const verbose = args.includes('--verbose') || args.includes('-v');
|
|
129
|
-
const cliBackend = args.includes('--cli');
|
|
130
129
|
const passthrough = args.includes('--passthrough') || args.includes('--thin');
|
|
131
130
|
const preserveTools = args.includes('--preserve-tools') || args.includes('--keep-tools');
|
|
132
131
|
const modelArg = args.find(a => a.startsWith('--model='));
|
|
133
132
|
const model = modelArg ? modelArg.split('=')[1] : undefined;
|
|
134
|
-
await startProxy({ port, verbose, model,
|
|
133
|
+
await startProxy({ port, verbose, model, passthrough, preserveTools });
|
|
135
134
|
}
|
|
136
135
|
async function help() {
|
|
137
136
|
console.log(`
|
|
@@ -149,15 +148,14 @@ async function help() {
|
|
|
149
148
|
Shortcuts: opus, sonnet, haiku
|
|
150
149
|
Full IDs: claude-opus-4-6, claude-sonnet-4-6
|
|
151
150
|
Default: passthrough (client decides)
|
|
152
|
-
--
|
|
153
|
-
--passthrough Thin proxy — OAuth swap only, no injection
|
|
151
|
+
--passthrough, --thin Thin proxy — OAuth swap only, no injection
|
|
154
152
|
--preserve-tools Keep client tool schemas (for agents with custom tools)
|
|
155
153
|
--port=PORT Port to listen on (default: 3456)
|
|
156
154
|
--verbose, -v Log all requests
|
|
157
155
|
|
|
158
156
|
Quick start:
|
|
159
157
|
dario login # auto-detects Claude Code credentials
|
|
160
|
-
dario proxy
|
|
158
|
+
dario proxy --model=opus # or: dario proxy --passthrough
|
|
161
159
|
|
|
162
160
|
Then point any Anthropic SDK at http://localhost:3456:
|
|
163
161
|
export ANTHROPIC_BASE_URL=http://localhost:3456
|
package/dist/proxy.d.ts
CHANGED
package/dist/proxy.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { createServer } from 'node:http';
|
|
2
2
|
import { randomUUID, randomBytes, timingSafeEqual, createHash } from 'node:crypto';
|
|
3
|
-
import { execSync
|
|
4
|
-
import { readFileSync, readdirSync
|
|
3
|
+
import { execSync } from 'node:child_process';
|
|
4
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
5
5
|
import { join } from 'node:path';
|
|
6
|
-
import { homedir
|
|
6
|
+
import { homedir } from 'node:os';
|
|
7
7
|
import { arch, platform } from 'node:process';
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
import { buildCCRequest, reverseMapResponse } from './cc-template.js';
|
|
@@ -36,63 +36,29 @@ class Semaphore {
|
|
|
36
36
|
next();
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
|
-
// Billing tag hash seed —
|
|
39
|
+
// Billing tag hash seed — matches Claude Code's value
|
|
40
40
|
const BILLING_SEED = '59cf53e54c78';
|
|
41
|
-
// Compute per-request build tag
|
|
41
|
+
// Compute per-request build tag:
|
|
42
42
|
// SHA-256(seed + chars[4,7,20] of user message + version).slice(0,3)
|
|
43
43
|
function computeBuildTag(userMessage, version) {
|
|
44
44
|
const chars = [4, 7, 20].map(i => userMessage[i] || '0').join('');
|
|
45
45
|
return createHash('sha256').update(`${BILLING_SEED}${chars}${version}`).digest('hex').slice(0, 3);
|
|
46
46
|
}
|
|
47
|
-
// Per-request cch:
|
|
48
|
-
// Confirmed via MITM: 10 identical requests → 10 unique cch values, no deterministic pattern.
|
|
47
|
+
// Per-request cch: random 5-char hex value each request (Claude Code does the same).
|
|
49
48
|
function computeCch() {
|
|
50
49
|
return randomBytes(3).toString('hex').slice(0, 5);
|
|
51
50
|
}
|
|
52
|
-
// Detect installed Claude Code
|
|
53
|
-
|
|
54
|
-
function
|
|
51
|
+
// Detect installed Claude Code version for the build-tag computation.
|
|
52
|
+
// Falls back to a known-good version if claude isn't on PATH.
|
|
53
|
+
function detectCliVersion() {
|
|
55
54
|
try {
|
|
56
55
|
const out = execSync('claude --version', { timeout: 5000, stdio: 'pipe' }).toString().trim();
|
|
57
|
-
cliAvailable = true;
|
|
58
|
-
// Capture major version (e.g., 2.1.100) — build tag is computed per-request
|
|
59
56
|
return out.match(/^([\d]+\.[\d]+\.[\d]+)/)?.[1] ?? '2.1.100';
|
|
60
57
|
}
|
|
61
58
|
catch {
|
|
62
|
-
cliAvailable = false;
|
|
63
59
|
return '2.1.100';
|
|
64
60
|
}
|
|
65
61
|
}
|
|
66
|
-
/** Convert a non-streaming Messages API response to SSE event stream. */
|
|
67
|
-
function jsonToSse(jsonBody) {
|
|
68
|
-
try {
|
|
69
|
-
const msg = JSON.parse(jsonBody);
|
|
70
|
-
const events = [];
|
|
71
|
-
// message_start
|
|
72
|
-
events.push(`event: message_start\ndata: ${JSON.stringify({ type: 'message_start', message: { ...msg, content: [], stop_reason: null } })}\n\n`);
|
|
73
|
-
// content blocks
|
|
74
|
-
const content = msg.content;
|
|
75
|
-
if (content) {
|
|
76
|
-
for (let i = 0; i < content.length; i++) {
|
|
77
|
-
const block = content[i];
|
|
78
|
-
events.push(`event: content_block_start\ndata: ${JSON.stringify({ type: 'content_block_start', index: i, content_block: { type: block.type, ...(block.type === 'text' ? { text: '' } : { thinking: '' }) } })}\n\n`);
|
|
79
|
-
if (block.type === 'text' && block.text) {
|
|
80
|
-
events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'text_delta', text: block.text } })}\n\n`);
|
|
81
|
-
}
|
|
82
|
-
else if (block.type === 'thinking' && block.thinking) {
|
|
83
|
-
events.push(`event: content_block_delta\ndata: ${JSON.stringify({ type: 'content_block_delta', index: i, delta: { type: 'thinking_delta', thinking: block.thinking } })}\n\n`);
|
|
84
|
-
}
|
|
85
|
-
events.push(`event: content_block_stop\ndata: ${JSON.stringify({ type: 'content_block_stop', index: i })}\n\n`);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
// message_stop
|
|
89
|
-
events.push(`event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`);
|
|
90
|
-
return events.join('');
|
|
91
|
-
}
|
|
92
|
-
catch {
|
|
93
|
-
return '';
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
62
|
/** Extract first user message text from a request body for billing tag computation. */
|
|
97
63
|
function extractFirstUserMessage(body) {
|
|
98
64
|
const messages = body.messages;
|
|
@@ -109,42 +75,8 @@ function extractFirstUserMessage(body) {
|
|
|
109
75
|
}
|
|
110
76
|
return '';
|
|
111
77
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
try {
|
|
115
|
-
const parsed = JSON.parse(jsonBody);
|
|
116
|
-
const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
|
|
117
|
-
const ts = Math.floor(Date.now() / 1000);
|
|
118
|
-
return `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n` +
|
|
119
|
-
`data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
|
|
120
|
-
}
|
|
121
|
-
catch {
|
|
122
|
-
return '';
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
/** Send a CLI result to the client, handling streaming/format translation. */
|
|
126
|
-
function sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, securityHeaders) {
|
|
127
|
-
const headers = { 'Access-Control-Allow-Origin': corsOrigin, ...securityHeaders };
|
|
128
|
-
const ok = cliResult.status >= 200 && cliResult.status < 300;
|
|
129
|
-
if (ok && clientWantsStream) {
|
|
130
|
-
const sseData = isOpenAI ? jsonToOpenaiSse(cliResult.body) : jsonToSse(cliResult.body);
|
|
131
|
-
if (sseData) {
|
|
132
|
-
res.writeHead(200, { 'Content-Type': 'text/event-stream', ...headers });
|
|
133
|
-
res.end(sseData);
|
|
134
|
-
return;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
if (ok && isOpenAI) {
|
|
138
|
-
try {
|
|
139
|
-
cliResult.body = JSON.stringify(anthropicToOpenai(JSON.parse(cliResult.body)));
|
|
140
|
-
}
|
|
141
|
-
catch { }
|
|
142
|
-
}
|
|
143
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, ...headers });
|
|
144
|
-
res.end(cliResult.body);
|
|
145
|
-
}
|
|
146
|
-
// Session ID rotates per request — each CC --print invocation creates a new session.
|
|
147
|
-
// A persistent session ID across many requests is a detection signal.
|
|
78
|
+
// Session ID rotates per request — fresh UUID per invocation.
|
|
79
|
+
// A persistent session ID across many requests is a behavioral fingerprint.
|
|
148
80
|
let SESSION_ID = randomUUID();
|
|
149
81
|
const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
|
|
150
82
|
// Claude Code device identity — required for Max plan billing classification.
|
|
@@ -205,8 +137,7 @@ function filterBillableBetas(betas) {
|
|
|
205
137
|
const ORCHESTRATION_TAG_NAMES = [
|
|
206
138
|
'system-reminder', 'env', 'system_information', 'current_working_directory',
|
|
207
139
|
'operating_system', 'default_shell', 'home_directory', 'task_metadata',
|
|
208
|
-
'
|
|
209
|
-
'directories', 'available_skills', 'thinking',
|
|
140
|
+
'directories', 'thinking',
|
|
210
141
|
];
|
|
211
142
|
const ORCHESTRATION_PATTERNS = ORCHESTRATION_TAG_NAMES.flatMap(tag => [
|
|
212
143
|
new RegExp(`<${tag}\\b[^>]*>[\\s\\S]*?<\\/${tag}>`, 'gi'),
|
|
@@ -375,123 +306,6 @@ function enrich429(body, headers) {
|
|
|
375
306
|
return body;
|
|
376
307
|
}
|
|
377
308
|
}
|
|
378
|
-
/**
|
|
379
|
-
* CLI Backend: route requests through `claude --print` instead of direct API.
|
|
380
|
-
* This bypasses rate limiting because Claude Code's binary has priority routing.
|
|
381
|
-
*/
|
|
382
|
-
async function handleViaCli(body, model, verbose) {
|
|
383
|
-
try {
|
|
384
|
-
const parsed = JSON.parse(body.toString());
|
|
385
|
-
// Extract the last user message as the prompt
|
|
386
|
-
const messages = parsed.messages ?? [];
|
|
387
|
-
const lastUser = [...messages].reverse().find(m => m.role === 'user');
|
|
388
|
-
if (!lastUser) {
|
|
389
|
-
return { status: 400, body: JSON.stringify({ error: 'No user message' }), contentType: 'application/json' };
|
|
390
|
-
}
|
|
391
|
-
const rawModel = model ?? parsed.model ?? 'claude-opus-4-6';
|
|
392
|
-
// Validate model name — only allow alphanumeric, hyphens, dots, underscores
|
|
393
|
-
const effectiveModel = /^[a-zA-Z0-9._-]+$/.test(rawModel) ? rawModel : 'claude-opus-4-6';
|
|
394
|
-
const prompt = typeof lastUser.content === 'string'
|
|
395
|
-
? lastUser.content
|
|
396
|
-
: JSON.stringify(lastUser.content);
|
|
397
|
-
// Build claude --print command
|
|
398
|
-
const args = ['--print', '--model', effectiveModel];
|
|
399
|
-
// Flatten system prompt — API accepts string or array of content blocks,
|
|
400
|
-
// but claude --print only accepts a string
|
|
401
|
-
let systemPrompt = '';
|
|
402
|
-
if (typeof parsed.system === 'string') {
|
|
403
|
-
systemPrompt = parsed.system;
|
|
404
|
-
}
|
|
405
|
-
else if (Array.isArray(parsed.system)) {
|
|
406
|
-
systemPrompt = parsed.system
|
|
407
|
-
.filter(b => b.text)
|
|
408
|
-
.map(b => b.text)
|
|
409
|
-
.join('\n\n');
|
|
410
|
-
}
|
|
411
|
-
// Include conversation history as context
|
|
412
|
-
const history = messages.slice(0, -1);
|
|
413
|
-
if (history.length > 0) {
|
|
414
|
-
const historyText = history.map(m => `${m.role}: ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`).join('\n');
|
|
415
|
-
systemPrompt = systemPrompt ? `${systemPrompt}\n\nConversation history:\n${historyText}` : `Conversation history:\n${historyText}`;
|
|
416
|
-
}
|
|
417
|
-
// Write system prompt to temp file instead of passing as arg to avoid E2BIG
|
|
418
|
-
// on large conversation contexts (OS arg size limit ~2MB)
|
|
419
|
-
let systemPromptFile = null;
|
|
420
|
-
if (systemPrompt) {
|
|
421
|
-
systemPromptFile = join(tmpdir(), `dario-sysprompt-${randomUUID()}.txt`);
|
|
422
|
-
writeFileSync(systemPromptFile, systemPrompt, { mode: 0o600 });
|
|
423
|
-
args.push('--append-system-prompt-file', systemPromptFile);
|
|
424
|
-
}
|
|
425
|
-
if (verbose) {
|
|
426
|
-
console.log(`[dario:cli] model=${effectiveModel} prompt=${prompt.substring(0, 60)}...`);
|
|
427
|
-
}
|
|
428
|
-
// Spawn claude --print
|
|
429
|
-
return new Promise((resolve) => {
|
|
430
|
-
// Cleanup temp file when done
|
|
431
|
-
const cleanup = () => { if (systemPromptFile)
|
|
432
|
-
try {
|
|
433
|
-
unlinkSync(systemPromptFile);
|
|
434
|
-
}
|
|
435
|
-
catch { } };
|
|
436
|
-
const child = spawn('claude', args, {
|
|
437
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
438
|
-
timeout: 300_000,
|
|
439
|
-
});
|
|
440
|
-
let stdout = '';
|
|
441
|
-
let stderr = '';
|
|
442
|
-
const MAX_CLI_OUTPUT = 5_000_000; // 5MB cap per stream — prevents OOM from runaway CLI
|
|
443
|
-
child.stdout.on('data', (d) => { if (stdout.length < MAX_CLI_OUTPUT)
|
|
444
|
-
stdout += d.toString(); });
|
|
445
|
-
child.stderr.on('data', (d) => { if (stderr.length < MAX_CLI_OUTPUT)
|
|
446
|
-
stderr += d.toString(); });
|
|
447
|
-
child.stdin.write(prompt);
|
|
448
|
-
child.stdin.end();
|
|
449
|
-
child.on('close', (code) => {
|
|
450
|
-
cleanup();
|
|
451
|
-
if (code !== 0 || !stdout.trim()) {
|
|
452
|
-
resolve({
|
|
453
|
-
status: 502,
|
|
454
|
-
body: JSON.stringify({ type: 'error', error: { type: 'api_error', message: sanitizeError(stderr.substring(0, 200)) || 'CLI backend failed' } }),
|
|
455
|
-
contentType: 'application/json',
|
|
456
|
-
});
|
|
457
|
-
return;
|
|
458
|
-
}
|
|
459
|
-
// Build a proper Messages API response
|
|
460
|
-
const text = stdout.trim();
|
|
461
|
-
const estimatedTokens = Math.ceil(text.length / 4);
|
|
462
|
-
const response = {
|
|
463
|
-
id: `msg_${randomUUID().replace(/-/g, '').substring(0, 24)}`,
|
|
464
|
-
type: 'message',
|
|
465
|
-
role: 'assistant',
|
|
466
|
-
model: effectiveModel,
|
|
467
|
-
content: [{ type: 'text', text }],
|
|
468
|
-
stop_reason: 'end_turn',
|
|
469
|
-
stop_sequence: null,
|
|
470
|
-
usage: {
|
|
471
|
-
input_tokens: Math.ceil(prompt.length / 4),
|
|
472
|
-
output_tokens: estimatedTokens,
|
|
473
|
-
},
|
|
474
|
-
};
|
|
475
|
-
resolve({ status: 200, body: JSON.stringify(response), contentType: 'application/json' });
|
|
476
|
-
});
|
|
477
|
-
child.on('error', (err) => {
|
|
478
|
-
cleanup();
|
|
479
|
-
resolve({
|
|
480
|
-
status: 502,
|
|
481
|
-
body: JSON.stringify({ type: 'error', error: { type: 'api_error', message: 'Claude CLI not found. Install Claude Code first.' } }),
|
|
482
|
-
contentType: 'application/json',
|
|
483
|
-
});
|
|
484
|
-
});
|
|
485
|
-
});
|
|
486
|
-
}
|
|
487
|
-
catch (err) {
|
|
488
|
-
return {
|
|
489
|
-
status: 400,
|
|
490
|
-
body: JSON.stringify({ type: 'error', error: { type: 'invalid_request_error', message: 'Invalid request body' } }),
|
|
491
|
-
contentType: 'application/json',
|
|
492
|
-
};
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
309
|
export async function startProxy(opts = {}) {
|
|
496
310
|
const port = opts.port ?? DEFAULT_PORT;
|
|
497
311
|
const verbose = opts.verbose ?? false;
|
|
@@ -502,7 +316,7 @@ export async function startProxy(opts = {}) {
|
|
|
502
316
|
console.error('[dario] Not authenticated. Run `dario login` first.');
|
|
503
317
|
process.exit(1);
|
|
504
318
|
}
|
|
505
|
-
const cliVersion =
|
|
319
|
+
const cliVersion = detectCliVersion();
|
|
506
320
|
const modelOverride = opts.model ? (MODEL_ALIASES[opts.model] ?? opts.model) : null;
|
|
507
321
|
const identity = loadClaudeIdentity();
|
|
508
322
|
if (identity.deviceId) {
|
|
@@ -512,7 +326,7 @@ export async function startProxy(opts = {}) {
|
|
|
512
326
|
console.warn('[dario] WARNING: No Claude Code device identity found. Requests may be billed as Extra Usage.');
|
|
513
327
|
console.warn('[dario] Run Claude Code at least once to generate ~/.claude/.claude.json');
|
|
514
328
|
}
|
|
515
|
-
// Pre-build static headers
|
|
329
|
+
// Pre-build static headers — matches the set a real Claude Code client sends.
|
|
516
330
|
const staticHeaders = passthrough ? {
|
|
517
331
|
'accept': 'application/json',
|
|
518
332
|
'Content-Type': 'application/json',
|
|
@@ -531,12 +345,10 @@ export async function startProxy(opts = {}) {
|
|
|
531
345
|
// Claude Code runs on Bun which reports v24.3.0 as Node compat version
|
|
532
346
|
'x-stainless-runtime-version': 'v24.3.0',
|
|
533
347
|
};
|
|
534
|
-
const useCli = opts.cliBackend ?? false;
|
|
535
348
|
let requestCount = 0;
|
|
536
349
|
const semaphore = new Semaphore(MAX_CONCURRENT);
|
|
537
|
-
// Rate governor
|
|
538
|
-
//
|
|
539
|
-
// Minimum 500ms between requests — fast enough for agents, slow enough to not flag.
|
|
350
|
+
// Rate governor — minimum 500ms between requests. Fast enough for agents,
|
|
351
|
+
// slow enough to not look like a scripted flood of identical traffic.
|
|
540
352
|
let lastRequestTime = 0;
|
|
541
353
|
const MIN_REQUEST_INTERVAL_MS = parseInt(process.env.DARIO_MIN_INTERVAL_MS || '500', 10);
|
|
542
354
|
// Optional proxy authentication — pre-encode key buffer for performance
|
|
@@ -656,31 +468,6 @@ export async function startProxy(opts = {}) {
|
|
|
656
468
|
clearTimeout(bodyTimeout);
|
|
657
469
|
}
|
|
658
470
|
const body = Buffer.concat(chunks);
|
|
659
|
-
// CLI backend mode: route through claude --print (works for both Anthropic and OpenAI endpoints)
|
|
660
|
-
if (useCli && req.method === 'POST' && body.length > 0) {
|
|
661
|
-
let cliBody = body;
|
|
662
|
-
let clientWantsStream = false;
|
|
663
|
-
// Translate OpenAI format before passing to CLI
|
|
664
|
-
if (isOpenAI) {
|
|
665
|
-
try {
|
|
666
|
-
const parsed = JSON.parse(body.toString());
|
|
667
|
-
clientWantsStream = !!parsed.stream;
|
|
668
|
-
cliBody = Buffer.from(JSON.stringify(openaiToAnthropic(parsed, modelOverride)));
|
|
669
|
-
}
|
|
670
|
-
catch { /* send as-is */ }
|
|
671
|
-
}
|
|
672
|
-
else {
|
|
673
|
-
try {
|
|
674
|
-
const parsed = JSON.parse(body.toString());
|
|
675
|
-
clientWantsStream = !!parsed.stream;
|
|
676
|
-
}
|
|
677
|
-
catch { }
|
|
678
|
-
}
|
|
679
|
-
const cliResult = await handleViaCli(cliBody, modelOverride, verbose);
|
|
680
|
-
requestCount++;
|
|
681
|
-
sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, SECURITY_HEADERS);
|
|
682
|
-
return;
|
|
683
|
-
}
|
|
684
471
|
// Parse body once, apply OpenAI translation, model override, and sanitization
|
|
685
472
|
let finalBody = body.length > 0 ? body : undefined;
|
|
686
473
|
let ccToolMap = null;
|
|
@@ -731,7 +518,7 @@ export async function startProxy(opts = {}) {
|
|
|
731
518
|
beta += ',' + clientBeta;
|
|
732
519
|
}
|
|
733
520
|
else {
|
|
734
|
-
// CC v2.1.104 beta set
|
|
521
|
+
// CC v2.1.104 beta set — 8 flags in the order Claude Code sends them.
|
|
735
522
|
// context-1m requires Extra Usage — if it 400s, we auto-retry without it.
|
|
736
523
|
beta = 'claude-code-20250219,oauth-2025-04-20,context-1m-2025-08-07,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
|
|
737
524
|
if (clientBeta) {
|
|
@@ -749,7 +536,7 @@ export async function startProxy(opts = {}) {
|
|
|
749
536
|
await new Promise(r => setTimeout(r, MIN_REQUEST_INTERVAL_MS - elapsed));
|
|
750
537
|
}
|
|
751
538
|
lastRequestTime = Date.now();
|
|
752
|
-
// Rotate session ID per request —
|
|
539
|
+
// Rotate session ID per request — fresh UUID avoids persistent-session fingerprinting
|
|
753
540
|
SESSION_ID = randomUUID();
|
|
754
541
|
const headers = {
|
|
755
542
|
...staticHeaders,
|
|
@@ -798,24 +585,20 @@ export async function startProxy(opts = {}) {
|
|
|
798
585
|
else if (upstream.status === 429) {
|
|
799
586
|
// Not a context-1m issue — return enriched 429 directly
|
|
800
587
|
const enriched = enrich429(peekedBody, upstream.headers);
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
responseHeaders[key] = value;
|
|
810
|
-
}
|
|
588
|
+
const responseHeaders = {
|
|
589
|
+
'Content-Type': 'application/json',
|
|
590
|
+
'Access-Control-Allow-Origin': corsOrigin,
|
|
591
|
+
...SECURITY_HEADERS,
|
|
592
|
+
};
|
|
593
|
+
for (const [key, value] of upstream.headers.entries()) {
|
|
594
|
+
if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
|
|
595
|
+
responseHeaders[key] = value;
|
|
811
596
|
}
|
|
812
|
-
requestCount++;
|
|
813
|
-
res.writeHead(429, responseHeaders);
|
|
814
|
-
res.end(enriched);
|
|
815
|
-
return;
|
|
816
597
|
}
|
|
817
|
-
|
|
818
|
-
|
|
598
|
+
requestCount++;
|
|
599
|
+
res.writeHead(429, responseHeaders);
|
|
600
|
+
res.end(enriched);
|
|
601
|
+
return;
|
|
819
602
|
}
|
|
820
603
|
else if (upstream.status === 400) {
|
|
821
604
|
// Non-long-context 400 — forward upstream error directly.
|
|
@@ -836,7 +619,7 @@ export async function startProxy(opts = {}) {
|
|
|
836
619
|
}
|
|
837
620
|
}
|
|
838
621
|
// Enrich 429 errors with rate limit details from headers (Anthropic only returns "Error")
|
|
839
|
-
if (upstream.status === 429
|
|
622
|
+
if (upstream.status === 429) {
|
|
840
623
|
const errBody = await upstream.text().catch(() => '');
|
|
841
624
|
const enriched = enrich429(errBody, upstream.headers);
|
|
842
625
|
const responseHeaders = {
|
|
@@ -854,42 +637,6 @@ export async function startProxy(opts = {}) {
|
|
|
854
637
|
res.end(enriched);
|
|
855
638
|
return;
|
|
856
639
|
}
|
|
857
|
-
// Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary
|
|
858
|
-
if (upstream.status === 429 && cliAvailable && !useCli) {
|
|
859
|
-
const errBody429 = await upstream.text().catch(() => '');
|
|
860
|
-
if (verbose)
|
|
861
|
-
console.log(`[dario] #${requestCount} 429 from API — falling back to CLI`);
|
|
862
|
-
let clientWantsStream = false;
|
|
863
|
-
try {
|
|
864
|
-
clientWantsStream = !!JSON.parse(body.toString()).stream;
|
|
865
|
-
}
|
|
866
|
-
catch { }
|
|
867
|
-
const cliResult = await handleViaCli(body, modelOverride, verbose);
|
|
868
|
-
// If CLI fallback also failed, return the original 429 with enriched details
|
|
869
|
-
// instead of a cryptic 502 from CLI failure
|
|
870
|
-
if (cliResult.status >= 500) {
|
|
871
|
-
if (verbose)
|
|
872
|
-
console.log(`[dario] #${requestCount} CLI fallback failed (${cliResult.status}) — returning original 429`);
|
|
873
|
-
const enriched = enrich429(errBody429, upstream.headers);
|
|
874
|
-
const responseHeaders = {
|
|
875
|
-
'Content-Type': 'application/json',
|
|
876
|
-
'Access-Control-Allow-Origin': corsOrigin,
|
|
877
|
-
...SECURITY_HEADERS,
|
|
878
|
-
};
|
|
879
|
-
for (const [key, value] of upstream.headers.entries()) {
|
|
880
|
-
if (key.startsWith('x-ratelimit') || key.startsWith('anthropic-ratelimit') || key === 'request-id') {
|
|
881
|
-
responseHeaders[key] = value;
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
requestCount++;
|
|
885
|
-
res.writeHead(429, responseHeaders);
|
|
886
|
-
res.end(enriched);
|
|
887
|
-
return;
|
|
888
|
-
}
|
|
889
|
-
requestCount++;
|
|
890
|
-
sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, SECURITY_HEADERS);
|
|
891
|
-
return;
|
|
892
|
-
}
|
|
893
640
|
// Detect streaming from content-type (reliable) or body (fallback)
|
|
894
641
|
const contentType = upstream.headers.get('content-type') ?? '';
|
|
895
642
|
const isStream = contentType.includes('text/event-stream');
|
|
@@ -1008,7 +755,9 @@ export async function startProxy(opts = {}) {
|
|
|
1008
755
|
process.exit(1);
|
|
1009
756
|
});
|
|
1010
757
|
server.listen(port, LOCALHOST, () => {
|
|
1011
|
-
const modeLine = passthrough
|
|
758
|
+
const modeLine = passthrough
|
|
759
|
+
? 'Mode: passthrough (OAuth swap only, no injection)'
|
|
760
|
+
: `OAuth: ${status.status} (expires in ${status.expiresIn})`;
|
|
1012
761
|
const modelLine = modelOverride ? `Model: ${modelOverride} (all requests)` : 'Model: passthrough (client decides)';
|
|
1013
762
|
console.log('');
|
|
1014
763
|
console.log(` dario — http://localhost:${port}`);
|
|
@@ -1023,8 +772,8 @@ export async function startProxy(opts = {}) {
|
|
|
1023
772
|
console.log(` ${modelLine}`);
|
|
1024
773
|
console.log('');
|
|
1025
774
|
});
|
|
1026
|
-
// Session presence heartbeat —
|
|
1027
|
-
//
|
|
775
|
+
// Session presence heartbeat — keeps the OAuth session marked active
|
|
776
|
+
// (matches the ~5s cadence of a real Claude Code session).
|
|
1028
777
|
const clientId = randomUUID();
|
|
1029
778
|
const connectedAt = new Date().toISOString();
|
|
1030
779
|
let lastPresencePulse = 0;
|