@askalf/dario 4.8.11 → 4.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -260
- package/dist/analytics.js +2 -0
- package/dist/cc-template.d.ts +1 -0
- package/dist/cc-template.js +1 -0
- package/dist/doctor.js +1 -1
- package/dist/live-fingerprint.d.ts +1 -1
- package/dist/live-fingerprint.js +1 -1
- package/dist/proxy.js +4 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,10 +10,9 @@
|
|
|
10
10
|
<a href="https://github.com/askalf/dario/blob/master/LICENSE"><img src="https://img.shields.io/npm/l/@askalf/dario" alt="License"></a>
|
|
11
11
|
<a href="https://www.npmjs.com/package/@askalf/dario"><img src="https://img.shields.io/npm/dm/@askalf/dario" alt="Downloads"></a>
|
|
12
12
|
<a href="https://x.com/ask_alf"><img src="https://img.shields.io/badge/follow-@ask_alf-1da1f2?style=flat-square" alt="Follow on X"></a>
|
|
13
|
-
<!-- <a href="https://discord.gg/fENVZpdYcX"><img src="https://img.shields.io/badge/discord-join-5865f2?style=flat-square&logo=discord&logoColor=white" alt="Join Discord"></a> -->
|
|
14
13
|
</p>
|
|
15
14
|
|
|
16
|
-
<p align="center"><em>Zero runtime dependencies · <a href="https://www.npmjs.com/package/@askalf/dario">SLSA-attested</a> every release · nothing phones home · ~
|
|
15
|
+
<p align="center"><em>Zero runtime dependencies · <a href="https://www.npmjs.com/package/@askalf/dario">SLSA-attested</a> every release · nothing phones home · ~18.5k lines you can read in a weekend · independent, unofficial, third-party (<a href="DISCLAIMER.md">DISCLAIMER.md</a>)</em></p>
|
|
17
16
|
|
|
18
17
|
---
|
|
19
18
|
|
|
@@ -22,28 +21,50 @@ You're already paying $20, $100, or $200 a month for Claude. Then Cursor wants a
|
|
|
22
21
|
**dario is one local endpoint that routes all of them through the Claude subscription you already pay for.** Point any Anthropic- or OpenAI-compatible tool at `http://localhost:3456` and you're done. No per-tool config, no second bill.
|
|
23
22
|
|
|
24
23
|
```bash
|
|
24
|
+
# 1. Install
|
|
25
25
|
npm install -g @askalf/dario
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
|
|
27
|
+
# 2. Log in to your Claude subscription (Pro, Max 5x, or Max 20x)
|
|
28
|
+
dario login # or `dario login --manual` for SSH / headless
|
|
29
|
+
|
|
30
|
+
# 3. Start the local proxy
|
|
31
|
+
dario proxy # separate terminal or background
|
|
32
|
+
|
|
33
|
+
# 4. Point any Anthropic-compat tool at it
|
|
28
34
|
export ANTHROPIC_BASE_URL=http://localhost:3456
|
|
29
35
|
export ANTHROPIC_API_KEY=dario
|
|
30
36
|
```
|
|
31
37
|
|
|
32
38
|
That's the whole setup. Every tool that honors those env vars now runs on your subscription.
|
|
33
39
|
|
|
34
|
-
**
|
|
40
|
+
**Works with:** Claude Code, Cursor, Aider, Cline, Roo Code, Continue.dev, Zed, Windsurf, OpenHands, OpenClaw, Hermes, Codex CLI, the [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk), your own scripts.
|
|
41
|
+
|
|
42
|
+
Add other providers and reuse the same proxy:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
dario backend add openai --key=sk-proj-...
|
|
46
|
+
dario backend add groq --key=gsk_... --base-url=https://api.groq.com/openai/v1
|
|
47
|
+
dario backend add openrouter --key=sk-or-... --base-url=https://openrouter.ai/api/v1
|
|
48
|
+
dario backend add local --key=anything --base-url=http://127.0.0.1:11434/v1
|
|
49
|
+
|
|
50
|
+
export OPENAI_BASE_URL=http://localhost:3456/v1
|
|
51
|
+
export OPENAI_API_KEY=dario
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Force a specific backend with a model prefix: `openai:gpt-4o`, `claude:opus`, `groq:llama-3.3-70b`, `local:qwen-coder`. Prefer Docker? `ghcr.io/askalf/dario:latest` — multi-arch (`amd64`+`arm64`), published every release ([guide](./docs/docker.md)). Something off? `dario doctor` prints one paste-ready health report.
|
|
55
|
+
|
|
56
|
+
### The interactive TUI
|
|
35
57
|
|
|
36
|
-
|
|
58
|
+
Type `dario` with no args (in another terminal) to open a full-screen control panel — live request stream, per-model burn-rate, rate-limit utilization, billing-bucket breakdown, and an in-place config editor that writes to `~/.dario/config.json`. It turns subscription accounting from "log files" into "watch it happen." Pure ANSI, zero new runtime deps. Migrating from v3? See [MIGRATION.md](MIGRATION.md).
|
|
37
59
|
|
|
38
60
|
```
|
|
39
|
-
┌─ dario
|
|
61
|
+
┌─ dario ─────────────────────────────[ q quit · Tab next · ? help ]──┐
|
|
40
62
|
│ Status Config ▎Analytics▎ Hits Accounts Backends │
|
|
41
63
|
├─────────────────────────────────────────────────────────────────────┤
|
|
42
64
|
│ ANALYTICS — last 60 min │
|
|
43
65
|
│ │
|
|
44
66
|
│ Requests: 247 (4.1/min) Tokens in: 142,830 │
|
|
45
67
|
│ Tokens out: 38,200 Subscription %: 98% │
|
|
46
|
-
│ Avg latency: 1,234 ms │
|
|
47
68
|
│ │
|
|
48
69
|
│ Per-model: │
|
|
49
70
|
│ opus-4-7 ████████████████████░ 72% (178 req) │
|
|
@@ -56,27 +77,6 @@ That's the whole setup. Every tool that honors those env vars now runs on your s
|
|
|
56
77
|
└─────────────────────────────────────────────────────────────────────┘
|
|
57
78
|
```
|
|
58
79
|
|
|
59
|
-
```
|
|
60
|
-
┌─ dario v4 ──────────────────────────[ q quit · Tab next · ? help ]──┐
|
|
61
|
-
│ Status Config Analytics ▎Hits▎ Accounts Backends │
|
|
62
|
-
├─────────────────────────────────────────────────────────────────────┤
|
|
63
|
-
│ HITS — live · 3,142 buffered │
|
|
64
|
-
│ │
|
|
65
|
-
│ time model in out lat st │
|
|
66
|
-
│ ▎18:42:01 opus-4-7 842 216 1.2s 200 │
|
|
67
|
-
│ 18:42:03 sonnet-4-6 1.2k 480 0.8s 200 │
|
|
68
|
-
│ 18:42:05 haiku-4-5 120 24 0.3s 200 │
|
|
69
|
-
│ 18:42:07 opus-4-7 2.4k 900 3.1s 200 │
|
|
70
|
-
│ ─────────────────────────────────────────────────────────────────── │
|
|
71
|
-
│ Selected: 18:42:01 req_011Cb52VKMBsB6z6w28NvMn │
|
|
72
|
-
│ Account: default │
|
|
73
|
-
│ Model: claude-opus-4-7 │
|
|
74
|
-
│ Billing bucket: subscription │
|
|
75
|
-
│ Tokens: in 842 / out 216 / cache-read 6.2k │
|
|
76
|
-
│ Util at request: 5h 18% · 7d 8% │
|
|
77
|
-
└─────────────────────────────────────────────────────────────────────┘
|
|
78
|
-
```
|
|
79
|
-
|
|
80
80
|
---
|
|
81
81
|
|
|
82
82
|
## The money
|
|
@@ -87,7 +87,7 @@ That's the whole setup. Every tool that honors those env vars now runs on your s
|
|
|
87
87
|
| Multi-tool heavy use (Cursor + Aider + Cline + Continue), per-token | **$200–$600+** |
|
|
88
88
|
| **Any of the above + dario** | **$20–$200 flat** — your existing Pro/Max plan, nothing extra |
|
|
89
89
|
|
|
90
|
-
Switching providers is a model-name change, not a reconfigure
|
|
90
|
+
Switching providers is a model-name change, not a reconfigure. Add a backend once and the same `localhost:3456` speaks OpenAI, Groq, OpenRouter, or a local Ollama too.
|
|
91
91
|
|
|
92
92
|
---
|
|
93
93
|
|
|
@@ -103,7 +103,7 @@ On **2026-06-15**, Anthropic splits Claude billing in two. Agentic traffic — A
|
|
|
103
103
|
|
|
104
104
|
A sustained Cline or Aider session burns $100 of API-rate tokens in an evening. **Any proxy that forwards requests in their original `claude -p` / Agent-SDK shape — which is most of them — dumps your agentic traffic into that small credit bucket, then onto metered pricing.**
|
|
105
105
|
|
|
106
|
-
dario doesn't. Every outbound request is rebuilt into **interactive Claude Code wire-shape** before it leaves your machine — headers, body key order, TLS stack, session-id lifecycle, and (
|
|
106
|
+
dario doesn't. Every outbound request is rebuilt into **interactive Claude Code wire-shape** before it leaves your machine — headers, body key order, TLS stack, session-id lifecycle, and (`--stealth`) the temporal axis: response-correlated think-time and session-start latency. Anthropic's billing classifier sees an interactive Claude Code session. Your traffic stays in the subscription pool you already pay for.
|
|
107
107
|
|
|
108
108
|
| Your setup | After 2026-06-15 |
|
|
109
109
|
|---|---|
|
|
@@ -121,94 +121,76 @@ Same install, same `localhost:3456`, no config change for the cliff. Verify on y
|
|
|
121
121
|
Two layers, separated:
|
|
122
122
|
|
|
123
123
|
1. **Tiered pricing is fine.** Anthropic can charge differently for first-party use vs. third-party use. Every SaaS does this.
|
|
124
|
-
2. **Hiding the tier from the customer is not.** When the public docs
|
|
124
|
+
2. **Hiding the tier from the customer is not.** When the public docs say "1M context available on Sonnet/Opus" but the auth layer rejects every attempt to access it on the OAuth path most subscribers use — when the billing classifier silently flips your request to overage without saying which signal triggered it — that's information asymmetry weaponized into product design.
|
|
125
125
|
|
|
126
126
|
OpenAI does this cleanly: ChatGPT Plus is a chat product, the API is a separate metered product, you choose. Anthropic uses one URL and a hidden classifier. **dario's job is to make the classifier visible.**
|
|
127
127
|
|
|
128
128
|
We don't bypass auth. We don't fake who you are. We replay the exact wire shape Claude Code emits — captured live from your installed binary — so the classifier sees what it expects. That's a transparency tool, not a circumvention tool. Your subscription is doing what your subscription does; you're authenticating as you.
|
|
129
129
|
|
|
130
|
-
This is also why every dario release ships receipts: the [eight-signal classifier table](https://github.com/askalf/dario/discussions/13), the [drift watch records](.github/workflows/cc-drift-watch.yml), the auto-PR history. Anthropic doesn't publish what their classifier reads. dario does.
|
|
131
|
-
|
|
132
130
|
---
|
|
133
131
|
|
|
134
|
-
##
|
|
135
|
-
|
|
136
|
-
The 2026-06-15 split is announced. The wire-shape changes that arrive between releases are not. This is the cadence:
|
|
137
|
-
|
|
138
|
-
**Claude Code v2.1.142 ([changelog](https://code.claude.com/docs/en/changelog), 2026-05-14)** — itemizes a Fast-mode default change, MCP timeout fixes, plugin path fixes, terminal display tweaks, a stale model-name removal. Says **nothing** about these three wire-shape changes that landed in the same release:
|
|
139
|
-
|
|
140
|
-
| What changed in v2.1.142 (silent) | Effect on subscribers | dario detected | dario shipped |
|
|
141
|
-
|---|---|---|---|
|
|
142
|
-
| `context-1m-2025-08-07` dropped from default `anthropic-beta` header set, and the beta is categorically rejected on OAuth subscription auth | Subscription users lose >200K context on Sonnet/Opus. Anthropic docs at [platform.claude.com/docs/en/build-with-claude/context-windows](https://platform.claude.com/docs/en/build-with-claude/context-windows) still list 1M context as available for these models with no OAuth caveat. | hourly drift watcher | v3.38.3 (re-bake) + v3.38.4 (compat range) — 2026-05-14/15 |
|
|
143
|
-
| `thinking: {type: "adaptive"}` gated per-model server-side — only Opus/Sonnet 4-6+ accept; older 4-5 models 400 with `"adaptive thinking is not supported on this model"` | Anyone targeting Sonnet 4-5 or Opus 4-5 through any proxy 400s every request until they remove the field | live-probe matrix (this session) | **v3.38.5** — published 2026-05-15T21:20:22Z ([PR #273](https://github.com/askalf/dario/pull/273)) |
|
|
144
|
-
| `TodoWrite` / `TodoRead` removed from the tool catalog; replaced by the `Task*` family (TaskCreate, TaskGet, TaskList, TaskOutput, TaskStop, TaskUpdate) — no migration note | Any client that hardcoded the `todo_*` names now sends tools the server doesn't recognize | template re-bake | **v3.38.6** — published 2026-05-15T21:33:44Z ([PR #274](https://github.com/askalf/dario/pull/274)) |
|
|
145
|
-
|
|
146
|
-
**Three undisclosed wire-shape changes in one CC release. Three dario releases the same evening, 13 minutes apart.** The interactive Claude Code TUI is what makes this visible to you in real time — Hits tab shows the request shape going out, the rate-limit bucket coming back, and dario's auto-retry decision in between. v4.0.0 went live 2026-05-16T11:46:01Z; the drift cadence is the same as it's been since v3.22.
|
|
147
|
-
|
|
148
|
-
**Then it got worse.** v4.2.1 (2026-05-17) shipped receipts for a more aggressive class: **same CC binary, different wire output 24 hours apart.** Same `claude.exe` on disk that produced template A yesterday produces template B today — three slot diffs in one 24h window (`output_config.effort` default flipped `medium` → `high` → `xhigh`, `context-1m-2025-08-07` beta back in the header set, system_prompt -354 chars), zero changelog entries from Anthropic, no npm version bump anywhere. Anthropic ships wire-shape changes through CC's **remote configuration**, not just through CC npm releases.
|
|
149
|
-
|
|
150
|
-
**Three classes of drift, three watchers, all auto-detecting and auto-PR'ing.**
|
|
132
|
+
## How it works, and how it stays working
|
|
151
133
|
|
|
152
|
-
|
|
153
|
-
- **Class B — same-binary remote-config drift** *(v4.2.2)*. [`cc-drift-template-watch.yml`](./.github/workflows/cc-drift-template-watch.yml) (cron `*/30 * * * *`) runs on a self-hosted runner with an authenticated CC install, captures live every 30 min. On detection, **opens an auto-rebake PR** *(v4.4.0)* with the freshly-baked template and a unified-line diff inline *(v4.5.0)* — a reviewer sees ship-or-investigate in one screen. This is the only way to catch the class — github-hosted has no Pro/Max session, no OAuth credential, no way to capture from real CC. Setup: [`docs/drift-monitor.md`](./docs/drift-monitor.md).
|
|
154
|
-
- **Class C — classifier-rule drift** *(v4.6.0)*. [`cc-billing-classifier-canary.yml`](./.github/workflows/cc-billing-classifier-canary.yml) sends one live request through dario's canonical-rebuild mode daily, asserts the `representative-claim` response header still maps to a subscription bucket. Catches the orthogonal failure mode where CC's wire shape is unchanged but Anthropic's classifier rules shifted underneath.
|
|
155
|
-
- **PR-time compat gate** *(v4.3.0)*. [`compat-test-self-hosted.yml`](./.github/workflows/compat-test-self-hosted.yml) runs the full compat suite against a live `dario proxy` on every PR that touches the wire-shape surface — regressions fail the merge check **before** they ship.
|
|
156
|
-
- **Liveness alarm** *(v4.4.2)*. [`cc-drift-watcher-liveness.yml`](./.github/workflows/cc-drift-watcher-liveness.yml) runs on github-hosted infra every 2 hours, alerts if the self-hosted class-B watcher hasn't had a successful run in 3 hours. Watches the watcher; survives the exact failure modes it's designed to detect.
|
|
157
|
-
|
|
158
|
-
**Anthropic doesn't publish a wire-level changelog for subscribers. dario is one.**
|
|
159
|
-
|
|
160
|
-
---
|
|
161
|
-
|
|
162
|
-
## What the billing classifier actually does
|
|
134
|
+
dario doesn't *guess* Claude Code's request shape — it captures it live from your installed `claude` binary on every startup, drift-detects against each upstream CC release, and replays it byte-for-byte. That's why the billing classifier can't tell the difference. Deep dive: [`docs/wire-fidelity.md`](./docs/wire-fidelity.md).
|
|
163
135
|
|
|
164
|
-
[Discussion #13](https://github.com/askalf/dario/discussions/13) documents eight binary signals identified via MITM capture + binary RE + controlled A/B testing with a real Max 5x subscriber.
|
|
136
|
+
**What the classifier reads.** [Discussion #13](https://github.com/askalf/dario/discussions/13) documents eight binary signals identified via MITM capture + binary RE + controlled A/B testing with a real Max 5x subscriber. It's rule-based, not ML — transitions are sharp; same input flips to the same output 100% of the time across 6 A/B trials:
|
|
165
137
|
|
|
166
138
|
| Signal | Claude Code value | Non-CC value |
|
|
167
139
|
|---|---|---|
|
|
168
140
|
| `output_config.effort` | `medium` (CC default) | other → reclassified |
|
|
169
141
|
| `max_tokens` | `64000` | other → reclassified |
|
|
170
|
-
| `thinking` shape | `{type: "adaptive"}` *(per-model
|
|
142
|
+
| `thinking` shape | `{type: "adaptive"}` *(per-model)* | `{enabled, budget_tokens: N}` → reclassified |
|
|
171
143
|
| System prompt block count | exactly 3 | other → reclassified |
|
|
172
|
-
| Tool names | `Bash`, `Read`, `Write`, `Edit`, …
|
|
144
|
+
| Tool names | `Bash`, `Read`, `Write`, `Edit`, … | non-CC names → reclassified |
|
|
173
145
|
| Per-request billing tag | rolling SHA-256 | missing/static → reclassified |
|
|
174
146
|
| JSON field order | specific stable order | different → reclassified |
|
|
175
147
|
| Non-CC body fields (`temperature`, `top_p`, `service_tier`) | absent | present → reclassified |
|
|
176
148
|
|
|
177
|
-
[Discussion #178](https://github.com/askalf/dario/discussions/178) reproduces
|
|
149
|
+
[Discussion #178](https://github.com/askalf/dario/discussions/178) reproduces a ninth fingerprint operating on commit metadata: the classifier fires on the literal namespaced string `openclaw.inbound_meta.v1` in recent git commits. dario's template replay protects you because that git context never reaches `api.anthropic.com` — only dario's captured CC template does.
|
|
150
|
+
|
|
151
|
+
**Why this needs constant maintenance.** The 2026-06-15 split is announced; the wire-shape changes that arrive between releases are not. CC v2.1.142 ([changelog](https://code.claude.com/docs/en/changelog), 2026-05-14) itemized a Fast-mode tweak and some fixes — and said **nothing** about three wire-shape changes in the same release:
|
|
152
|
+
|
|
153
|
+
| Silent change in v2.1.142 | Effect on subscribers | dario shipped |
|
|
154
|
+
|---|---|---|
|
|
155
|
+
| `context-1m-2025-08-07` dropped from the default beta set + rejected on OAuth auth | Subscription users lose >200K context on Sonnet/Opus | v3.38.3–4 (2026-05-14/15) |
|
|
156
|
+
| `thinking: {type:"adaptive"}` gated per-model server-side | Sonnet/Opus 4-5 through any proxy 400s every request | [v3.38.5](https://github.com/askalf/dario/pull/273) — 2026-05-15 |
|
|
157
|
+
| `TodoWrite`/`TodoRead` replaced by the `Task*` family, no migration note | Clients hardcoding `todo_*` send unrecognized tools | [v3.38.6](https://github.com/askalf/dario/pull/274) — 13 min later |
|
|
178
158
|
|
|
179
|
-
|
|
159
|
+
And it gets subtler: v4.2.1 (2026-05-17) shipped receipts for **same CC binary, different wire output 24 hours apart** — Anthropic ships changes through CC's *remote configuration*, not just npm releases. So dario runs **three classes of drift detection**, all auto-detecting and auto-PR'ing:
|
|
160
|
+
|
|
161
|
+
- **Class A — npm-release drift.** [`cc-drift-watch.yml`](./.github/workflows/cc-drift-watch.yml) (hourly, github-hosted) catches each new CC npm release; [`cc-drift-auto-release.yml`](./.github/workflows/cc-drift-auto-release.yml) auto-drafts, merges, and ships within minutes.
|
|
162
|
+
- **Class B — same-binary remote-config drift** *(v4.2.2)*. [`cc-drift-template-watch.yml`](./.github/workflows/cc-drift-template-watch.yml) (every 30 min, self-hosted runner with an authenticated CC install) captures live and **opens an auto-rebake PR** with a unified-line diff inline. The only way to catch this class — github-hosted has no Pro/Max session to capture from.
|
|
163
|
+
- **Class C — classifier-rule drift** *(v4.6.0)*. [`cc-billing-classifier-canary.yml`](./.github/workflows/cc-billing-classifier-canary.yml) sends one live request daily and asserts the `representative-claim` header still maps to a subscription bucket — catches Anthropic changing the *rules* while the wire shape is unchanged.
|
|
164
|
+
- **Guards on the guards.** A [PR-time compat gate](./.github/workflows/compat-test-self-hosted.yml) *(v4.3.0)* runs the full compat suite against a live proxy before any wire-shape PR merges; a [liveness alarm](./.github/workflows/cc-drift-watcher-liveness.yml) *(v4.4.2)* fires if the Class-B watcher goes quiet for 8h. Setup + walkthrough: [`docs/drift-monitor.md`](./docs/drift-monitor.md).
|
|
165
|
+
|
|
166
|
+
**Anthropic doesn't publish a wire-level changelog for subscribers. dario is one.**
|
|
180
167
|
|
|
181
168
|
---
|
|
182
169
|
|
|
183
|
-
## What
|
|
170
|
+
## What it routes
|
|
184
171
|
|
|
185
|
-
|
|
172
|
+
You point every tool at one URL. dario reads each request, decides which backend owns it, forwards in that backend's native protocol.
|
|
186
173
|
|
|
187
|
-
|
|
174
|
+
| Client speaks | Model | Routes to | What happens |
|
|
175
|
+
|---|---|---|---|
|
|
176
|
+
| Anthropic Messages | `claude-*` / `opus` / `sonnet` / `haiku` | Claude backend | OAuth swap + CC template replay → `api.anthropic.com` |
|
|
177
|
+
| Anthropic Messages | `gpt-*`, `llama-*`, … | OpenAI-compat backend | Anthropic→OpenAI translation, forwarded |
|
|
178
|
+
| OpenAI Chat | `gpt-*` / `o1-*` / `o3-*` | OpenAI-compat backend | Auth swap, body forwarded byte-for-byte |
|
|
179
|
+
| OpenAI Chat | `claude-*` | Claude backend | OpenAI→Anthropic translation, then Claude path |
|
|
180
|
+
| Either | `<provider>:<model>` | Forced by prefix | Explicit override |
|
|
188
181
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
"error": {
|
|
193
|
-
"type": "dario_overage_guard",
|
|
194
|
-
"message": "dario halted to prevent API-rate bleed. A request was classified as 'overage' (per-token billing) instead of your subscription pool. To resume: run `dario resume` in another terminal, or wait until <ISO ts> for the cooldown to auto-clear. Details: github.com/askalf/dario/issues/288"
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
```
|
|
182
|
+
The tool doesn't know. The backend doesn't know. dario is the seam.
|
|
183
|
+
|
|
184
|
+
---
|
|
198
185
|
|
|
199
|
-
|
|
186
|
+
## Overage guard
|
|
200
187
|
|
|
201
|
-
|
|
202
|
-
|---|---|---|
|
|
203
|
-
| **Status** | What's happening RIGHT NOW? | `⚠ HALTED` banner with triggering request, cause, live countdown to auto-resume, manual-resume hint |
|
|
204
|
-
| **Hits** | Which specific request triggered it? | Pinned banner across the top + red `!` marker + red row on the triggering request in the live buffer + 503-status row for any blocked-while-halted requests |
|
|
205
|
-
| **Analytics** | How often is this happening across my traffic? | New "Overage" bar in the rate-limit cluster, alongside 5h/7d — red the moment count is non-zero |
|
|
206
|
-
| **Config** | How do I tune this? | Four in-place-editable fields: `overageGuard.enabled`, `.behavior` (enum-validated halt/warn), `.cooldownMs`, `.notifyOs` |
|
|
188
|
+
A subscriber should never see a single `representative-claim: overage` response during normal operation. One means something is wrong — wire-shape drift, a classifier change, an account misconfig — and continuing to forward requests in the same shape bleeds real money (accounts with extra-usage enabled) or returns a wall of rejections (accounts without it). The first hit is the signal; the second through hundredth are damage.
|
|
207
189
|
|
|
208
|
-
Status and
|
|
190
|
+
So the moment any upstream response carries `representative-claim: overage`, dario **halts the proxy**. Every subsequent request returns `503` with an Anthropic-shaped error body the client surfaces verbatim, until you run `dario resume`, press `R` on the TUI, or the cooldown clears (default 30 min). The halt is visible across the TUI's Status, Hits, and Analytics tabs, fires a best-effort native OS notification, and emits named SSE events.
|
|
209
191
|
|
|
210
192
|
```
|
|
211
|
-
┌─ dario
|
|
193
|
+
┌─ dario ─────────────────────────────[ q quit · Tab next · ? help ]──┐
|
|
212
194
|
│ ▎Status▎ Config Analytics Hits Accounts Backends │
|
|
213
195
|
├─────────────────────────────────────────────────────────────────────┤
|
|
214
196
|
│ Overage-guard │
|
|
@@ -217,144 +199,22 @@ Status and Hits during an active halt:
|
|
|
217
199
|
│ Cause: representative-claim = overage │
|
|
218
200
|
│ Auto-resume in 29m 48s │
|
|
219
201
|
│ Manual resume press R here, or `dario resume` from any shell │
|
|
220
|
-
│ │
|
|
221
|
-
│ Last refresh: just now. r refresh · R resume. │
|
|
222
|
-
└─────────────────────────────────────────────────────────────────────┘
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
```
|
|
226
|
-
┌─ dario v4.1 ────────────────────────[ q quit · Tab next · ? help ]──┐
|
|
227
|
-
│ Status Config Analytics ▎Hits▎ Accounts Backends │
|
|
228
|
-
├─────────────────────────────────────────────────────────────────────┤
|
|
229
|
-
│ Hits 248 buffered · live │
|
|
230
|
-
│ │
|
|
231
|
-
│ ⚠ HALTED overage detected at 15:54:28 on opus-4-7 acct=work │
|
|
232
|
-
│ → New /v1/messages return 503 until R here, or `dario resume` │
|
|
233
|
-
│ │
|
|
234
|
-
│ time model in out lat st │
|
|
235
|
-
│ ▎15:54:31 opus-4-7 2.1k — — 503 │
|
|
236
|
-
│ 15:54:29 haiku-4-5 120 24 0.3s 200 │
|
|
237
|
-
│ ! 15:54:28 opus-4-7 1.4k 216 1.2s 200 ◀ red row │
|
|
238
|
-
│ 15:54:25 sonnet-4-6 1.2k 480 0.8s 200 │
|
|
239
|
-
│ 15:54:20 opus-4-7 842 216 1.2s 200 │
|
|
240
|
-
│ ────────────────────────────────────────────────────────────── │
|
|
241
|
-
│ Selected: 15:54:31 req_011Cb52VKMBsB6z6w28NvMn │
|
|
242
|
-
│ Account: work │
|
|
243
|
-
│ Model: claude-opus-4-7 │
|
|
244
|
-
│ Billing bucket: (halted before upstream — no claim) │
|
|
245
|
-
│ Status: 503 dario_overage_guard │
|
|
246
|
-
└─────────────────────────────────────────────────────────────────────┘
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
Analytics — the burn-rate view, with the new Overage bar at the bottom of the rate-limit cluster (here showing one overage hit out of 248 — which is enough to halt by default):
|
|
250
|
-
|
|
251
|
-
```
|
|
252
|
-
┌─ dario v4.1 ────────────────────────[ q quit · Tab next · ? help ]──┐
|
|
253
|
-
│ Status Config ▎Analytics▎ Hits Accounts Backends │
|
|
254
|
-
├─────────────────────────────────────────────────────────────────────┤
|
|
255
|
-
│ Analytics — last 60 min │
|
|
256
|
-
│ │
|
|
257
|
-
│ Requests: 248 (4.1/min) │
|
|
258
|
-
│ Tokens in: 142,830 │
|
|
259
|
-
│ Tokens out: 38,200 │
|
|
260
|
-
│ Subscription %: 99% │
|
|
261
|
-
│ │
|
|
262
|
-
│ Rate-limit │
|
|
263
|
-
│ 5h ████░░░░░░░░░░░░░░░░░░░░░░░░ 18% │
|
|
264
|
-
│ 7d ██░░░░░░░░░░░░░░░░░░░░░░░░░░ 8% │
|
|
265
|
-
│ Overage █░░░░░░░░░░░░░░░░░░░░░░░░░░░ 1 req of 248 │
|
|
266
|
-
│ ⮤ red — the moment count is non-zero │
|
|
267
|
-
│ │
|
|
268
|
-
│ Billing │
|
|
269
|
-
│ subscription 247 req │
|
|
270
|
-
│ extra_usage 1 req │
|
|
271
202
|
└─────────────────────────────────────────────────────────────────────┘
|
|
272
203
|
```
|
|
273
204
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
**Resume paths** — `dario resume` from any shell, `R` on the TUI Status tab, or the cooldown timer (default 30 min). **Configuration** — `~/.dario/config.json` → `overageGuard`, or CLI flags (`--overage-behavior=warn` for visibility-only, `--no-overage-guard` to disable, `--overage-cooldown=<ms>` to tune). **OS notification** — best-effort native toast (osascript / notify-send / BurntToast) plus terminal BEL as the unconditional floor. See [#288](https://github.com/askalf/dario/issues/288).
|
|
277
|
-
|
|
278
|
-
**Verified end-to-end live.** [`test/overage-guard-e2e-live.mjs`](./test/overage-guard-e2e-live.mjs) patches `globalThis.fetch` to mock the upstream, starts a real dario proxy in-process, and drives the five-stage halt cycle through real HTTP: subscription request flows → upstream returns overage → guard halts → next request returns 503 with the `dario_overage_guard` body → `POST /admin/resume` clears state → requests flow again. 20/20 assertions, 3 upstream calls intercepted (the halted request short-circuited at the request handler, never touched the upstream). Run with `node test/overage-guard-e2e-live.mjs`.
|
|
279
|
-
|
|
280
|
-
---
|
|
281
|
-
|
|
282
|
-
## How it actually works
|
|
283
|
-
|
|
284
|
-
dario doesn't *guess* Claude Code's request shape — it captures it live from your installed `claude` binary on every startup, drift-detects against each upstream CC release, and replays it byte-for-byte. That's why the billing classifier can't tell the difference. Deep dive: [`docs/wire-fidelity.md`](./docs/wire-fidelity.md).
|
|
285
|
-
|
|
286
|
-
---
|
|
287
|
-
|
|
288
|
-
## 30 seconds, in full
|
|
289
|
-
|
|
290
|
-
```bash
|
|
291
|
-
# 1. Install
|
|
292
|
-
npm install -g @askalf/dario
|
|
293
|
-
|
|
294
|
-
# 2. Log in to your Claude subscription (Pro, Max 5x, or Max 20x)
|
|
295
|
-
dario login # or `dario login --manual` for SSH / headless
|
|
296
|
-
|
|
297
|
-
# 3. Start the local proxy in one terminal
|
|
298
|
-
dario proxy
|
|
299
|
-
|
|
300
|
-
# 4. (Optional, recommended) Open the interactive TUI in another terminal
|
|
301
|
-
dario # tabs: Status / Config / Analytics / Hits / Accounts / Backends
|
|
302
|
-
|
|
303
|
-
# 5. Point any Anthropic-compat tool at the proxy
|
|
304
|
-
export ANTHROPIC_BASE_URL=http://localhost:3456
|
|
305
|
-
export ANTHROPIC_API_KEY=dario
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
Works with: Claude Code, Cursor, Aider, Cline, Roo Code, Continue.dev, Zed, Windsurf, OpenHands, OpenClaw, Hermes, Codex CLI, the [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk), your own scripts.
|
|
309
|
-
|
|
310
|
-
Add other providers and reuse the same proxy:
|
|
311
|
-
|
|
312
|
-
```bash
|
|
313
|
-
dario backend add openai --key=sk-proj-...
|
|
314
|
-
dario backend add groq --key=gsk_... --base-url=https://api.groq.com/openai/v1
|
|
315
|
-
dario backend add openrouter --key=sk-or-... --base-url=https://openrouter.ai/api/v1
|
|
316
|
-
dario backend add local --key=anything --base-url=http://127.0.0.1:11434/v1
|
|
317
|
-
|
|
318
|
-
export OPENAI_BASE_URL=http://localhost:3456/v1
|
|
319
|
-
export OPENAI_API_KEY=dario
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
Force a specific backend with a model prefix: `openai:gpt-4o`, `claude:opus`, `groq:llama-3.3-70b`, `local:qwen-coder`.
|
|
323
|
-
|
|
324
|
-
Prefer Docker? `ghcr.io/askalf/dario:latest` — multi-arch (`amd64`+`arm64`), published every release. Guide: [`docs/docker.md`](./docs/docker.md).
|
|
325
|
-
|
|
326
|
-
Something off? `dario doctor` prints one paste-ready health report. Or open the TUI's Status tab.
|
|
327
|
-
|
|
328
|
-
---
|
|
329
|
-
|
|
330
|
-
## What it actually does
|
|
331
|
-
|
|
332
|
-
You point every tool at one URL. Dario reads each request, decides which backend owns it, forwards in that backend's native protocol.
|
|
333
|
-
|
|
334
|
-
| Client speaks | Model | Routes to | What happens |
|
|
335
|
-
|---|---|---|---|
|
|
336
|
-
| Anthropic Messages | `claude-*` / `opus` / `sonnet` / `haiku` | Claude backend | OAuth swap + CC template replay → `api.anthropic.com` |
|
|
337
|
-
| Anthropic Messages | `gpt-*`, `llama-*`, … | OpenAI-compat backend | Anthropic→OpenAI translation, forwarded |
|
|
338
|
-
| OpenAI Chat | `gpt-*` / `o1-*` / `o3-*` | OpenAI-compat backend | Auth swap, body forwarded byte-for-byte |
|
|
339
|
-
| OpenAI Chat | `claude-*` | Claude backend | OpenAI→Anthropic translation, then Claude path |
|
|
340
|
-
| Either | `<provider>:<model>` | Forced by prefix | Explicit override |
|
|
341
|
-
|
|
342
|
-
The tool doesn't know. The backend doesn't know. Dario is the seam.
|
|
205
|
+
Tune via `~/.dario/config.json` → `overageGuard`, or CLI flags: `--overage-behavior=warn` (visibility-only), `--no-overage-guard` (off), `--overage-cooldown=<ms>`. Verified end-to-end by [`test/overage-guard-e2e-live.mjs`](./test/overage-guard-e2e-live.mjs) — a real in-process proxy driven through the five-stage halt cycle over real HTTP. Background: [#288](https://github.com/askalf/dario/issues/288).
|
|
343
206
|
|
|
344
207
|
---
|
|
345
208
|
|
|
346
209
|
## Capabilities
|
|
347
210
|
|
|
348
|
-
- **
|
|
349
|
-
- **Three-class drift detection.** [`cc-drift-watch.yml`](./.github/workflows/cc-drift-watch.yml) catches **Class A** (new CC npm releases) hourly on a github-hosted runner — auto-drafts + auto-merges a fix PR; median CC release → dario release under one hour. [`cc-drift-template-watch.yml`](./.github/workflows/cc-drift-template-watch.yml) *(v4.2.2)* catches **Class B** (same-binary remote-config drift — Anthropic changing wire output *without* bumping the npm version, first documented in [CHANGELOG v4.2.1](./CHANGELOG.md#421---2026-05-17)) every 30 min on a self-hosted runner with authenticated CC, **opens an auto-rebake PR** *(v4.4.0)* with a unified-line diff inline *(v4.5.0)*. [`cc-billing-classifier-canary.yml`](./.github/workflows/cc-billing-classifier-canary.yml) *(v4.6.0)* catches **Class C** (Anthropic changing classifier rules — same wire shape, different billing bucket) via a daily 1-request canary. [`compat-test-self-hosted.yml`](./.github/workflows/compat-test-self-hosted.yml) *(v4.3.0)* runs the full compat suite against a live proxy on every PR that touches the wire-shape surface. [`cc-drift-watcher-liveness.yml`](./.github/workflows/cc-drift-watcher-liveness.yml) *(v4.4.2)* alarms if the class-B watcher itself goes offline. Walkthrough: [`docs/drift-monitor.md`](./docs/drift-monitor.md).
|
|
350
|
-
- **Multi-account pool.** Drop 2+ Claude accounts in `~/.dario/accounts/` and pool mode auto-activates: every request routes to the account with the most headroom, multi-turn sessions pin to one account so the prompt cache survives, in-flight 429s fail over to a peer before your client sees an error. `dario accounts add work` / `dario accounts add personal`. → [`docs/multi-account-pool.md`](./docs/multi-account-pool.md)
|
|
211
|
+
- **Multi-account pool.** Drop 2+ Claude accounts in `~/.dario/accounts/` and pool mode auto-activates: every request routes to the account with the most headroom, multi-turn sessions pin to one account so the prompt cache survives, in-flight 429s fail over to a peer before your client sees an error. → [`docs/multi-account-pool.md`](./docs/multi-account-pool.md)
|
|
351
212
|
- **Behavioral stealth (`--stealth`).** Static wire fidelity covers *what* the request looks like; `--stealth` adds *when* it arrives — response-length-correlated think time and 1.2–4.2s session-start latency, the inter-arrival pattern real interactive sessions have and agent loops don't. → [`docs/wire-fidelity.md`](./docs/wire-fidelity.md)
|
|
352
213
|
- **Runs any non-Claude-Code agent.** A 64-entry schema-verified `TOOL_MAP` pre-maps Cline, Roo, Kilo, Cursor, Windsurf, Continue, Copilot, OpenHands, OpenClaw, Hermes, [hands](https://github.com/askalf/hands) tool names to CC's native set. No flag, no validator errors. → [`docs/integrations/agent-compat.md`](./docs/integrations/agent-compat.md)
|
|
353
|
-
- **Shim mode** *(deprecated in v4.2; removal scheduled for v5.x)*. The original "no HTTP hop" path that patched `globalThis.fetch` inside a `dario shim -- <cmd>` child process. Empirically only matches 3 of the 8 wire-shape axes the billing classifier inspects (system blocks, agent identity, header order) and falls back to total passthrough when the client sends a 1-block system — which `claude -p` and Agent-SDK both do. Use **proxy mode** for any non-CC client; that's the only mode that rebuilds every request to CC's full canonical shape. Shim emits a deprecation banner on every invocation. See [CHANGELOG v4.2.0](./CHANGELOG.md) for the side-by-side fingerprint diff that drove this call.
|
|
354
214
|
- **Recover output capability.** `dario proxy --system-prompt=partial` strips CC's tone/verbosity/no-comments constraints for 1.2–2.8× more output on open-ended work — empirically without flipping billing (the classifier doesn't read that slot). [Discussion #183](https://github.com/askalf/dario/discussions/183) has the per-prompt receipts. → [`docs/system-prompt.md`](./docs/system-prompt.md)
|
|
355
|
-
- **Honor client thinking (`--honor-client-thinking
|
|
215
|
+
- **Honor client thinking (`--honor-client-thinking`).** By default dario rebuilds the outbound request with CC's interactive thinking shape regardless of what the client sent. Pass this flag (or `DARIO_HONOR_CLIENT_THINKING=1`) to pass a non-CC client's own `thinking` block through unchanged. Off by default; the rebuild-to-CC path is what keeps the subscription pool routing.
|
|
356
216
|
- **Reachable from inside CC / any MCP client.** `dario subagent install` registers a CC sub-agent for in-session diagnostics; `dario mcp` exposes dario as a read-only MCP server. → [`docs/sub-agent.md`](./docs/sub-agent.md) · [`docs/mcp-server.md`](./docs/mcp-server.md)
|
|
357
|
-
- **
|
|
217
|
+
- **Shim mode** *(deprecated v4.2, removal scheduled v5.x)*. The original "no HTTP hop" path empirically matched only 3 of the 8 classifier axes and fell back to passthrough for the 1-block system prompts `claude -p` and the Agent SDK both send. Use **proxy mode** for any non-CC client — it's the only mode that rebuilds every request to CC's full canonical shape.
|
|
358
218
|
|
|
359
219
|
---
|
|
360
220
|
|
|
@@ -364,10 +224,10 @@ The tool doesn't know. The backend doesn't know. Dario is the seam.
|
|
|
364
224
|
|---|---|
|
|
365
225
|
| Source | **~18.5k** lines of TypeScript across **44** files — auditable in a weekend |
|
|
366
226
|
| Dependencies | **0 runtime.** Verify: `npm ls --production` |
|
|
367
|
-
| Provenance | Every release [SLSA-attested](https://www.npmjs.com/package/@askalf/dario) via GitHub Actions + Sigstore
|
|
227
|
+
| Provenance | Every release [SLSA-attested](https://www.npmjs.com/package/@askalf/dario) via GitHub Actions + Sigstore |
|
|
368
228
|
| Scanning | [CodeQL](https://github.com/askalf/dario/actions/workflows/codeql.yml) on every push and weekly |
|
|
369
|
-
| Tests | **
|
|
370
|
-
| Drift response | [`cc-drift-watch.yml`](./.github/workflows/cc-drift-watch.yml)
|
|
229
|
+
| Tests | **84 test files**, **77 in the default `npm test` suite** (`test/all.test.mjs`) — green on every release |
|
|
230
|
+
| Drift response | hourly [`cc-drift-watch.yml`](./.github/workflows/cc-drift-watch.yml) + auto-publish on merge — median CC-release → dario-release under one hour |
|
|
371
231
|
| Credentials | Never logged, redacted from errors, `0600` on disk in `0700` dirs; MCP server redacts at the tool boundary |
|
|
372
232
|
| Network | Binds `127.0.0.1` by default; upstream only to configured backends over HTTPS; hardcoded SSRF allowlist |
|
|
373
233
|
| Telemetry | **None.** No analytics, no tracking, no data collection |
|
|
@@ -382,26 +242,17 @@ cd $(npm root -g)/@askalf/dario && npm ls --production
|
|
|
382
242
|
|
|
383
243
|
## Project status — maintenance mode
|
|
384
244
|
|
|
385
|
-
As of **2026-05-19**, dario is in maintenance mode. New feature work has stopped; what runs unattended is the part that
|
|
386
|
-
|
|
387
|
-
- **Drift watchers** open auto-rebake PRs within ~30 min of each new Claude Code release (Class A) and within ~30 min of in-version remote-config drift (Class B).
|
|
388
|
-
- **Compat suite** ([`compat-test-self-hosted.yml`](./.github/workflows/compat-test-self-hosted.yml)) gates every wire-shape-affecting PR against a live Anthropic call before merge — green compat means the rebake didn't break the subscription path.
|
|
389
|
-
- **Auto-release pipeline** publishes to npm + ghcr the moment a rebake PR merges; an [idempotency gate](./.github/workflows/cc-drift-auto-release.yml) backfills any registry that lagged so partial releases self-heal.
|
|
390
|
-
- **NPM_TOKEN health monitor** runs daily and opens a GitHub issue if the token rotates, expires, or has its scopes changed — token rot becomes a 24h-detection issue instead of an invisible release failure.
|
|
391
|
-
- **Billing-classifier canary** fires a single daily probe to detect when Anthropic shifts classifier rules (Class C), so you find out before traffic silently moves to the wrong pool.
|
|
392
|
-
- **Recovery runbook** ([`docs/recovery.md`](./docs/recovery.md)) covers the residual manual cases — OAuth credential rotation, runner re-registration, ghcr backfill — sorted by how often they actually come up.
|
|
245
|
+
As of **2026-05-19**, dario is in maintenance mode. New feature work has stopped; what runs unattended is the part that keeps your subscription routing intact — the [three drift watchers](#how-it-works-and-how-it-stays-working), the PR-time compat gate, the auto-release pipeline (with an idempotency gate that backfills any registry that lagged), the daily NPM_TOKEN health monitor, and the billing-classifier canary. Residual manual cases — OAuth credential rotation, runner re-registration, ghcr backfill — are in the [recovery runbook](./docs/recovery.md).
|
|
393
246
|
|
|
394
|
-
The proxy, TUI, multi-account pool, overage guard,
|
|
395
|
-
|
|
396
|
-
Feature velocity moved to the [askalf platform](https://askalf.org) — a self-hosted AI workforce that uses dario as its LLM substrate. Same engineering rigor (drift detection, SLSA provenance, zero-telemetry), aimed at the workforce layer above the proxy.
|
|
247
|
+
The proxy, TUI, multi-account pool, overage guard, and 2026-06-15 cliff protection are all stable surface. If Anthropic ships something new, the watchers + compat suite catch it within a release cycle and the maintainer reviews the bot-PR. Feature velocity moved to the [askalf platform](https://askalf.org) — a self-hosted AI workforce that uses dario as its LLM substrate.
|
|
397
248
|
|
|
398
249
|
---
|
|
399
250
|
|
|
400
251
|
## Who it's for
|
|
401
252
|
|
|
402
|
-
**Best fit:** developers juggling multiple LLM tools and per-tool API keys · Claude Pro/Max subscribers who want their plan usable everywhere, not just in Claude Code · teams running local/hosted OpenAI-compat servers who want one stable local endpoint · Agent SDK users who want OAuth-subscription routing with zero code change (`baseURL: 'http://localhost:3456'`) · power users wanting multi-account pooling + 429 failover
|
|
253
|
+
**Best fit:** developers juggling multiple LLM tools and per-tool API keys · Claude Pro/Max subscribers who want their plan usable everywhere, not just in Claude Code · teams running local/hosted OpenAI-compat servers who want one stable local endpoint · Agent SDK users who want OAuth-subscription routing with zero code change (`baseURL: 'http://localhost:3456'`) · power users wanting multi-account pooling + 429 failover.
|
|
403
254
|
|
|
404
|
-
**Not a fit:** you need vendor-managed production SLAs (use the provider APIs) · you want a hosted, multi-tenant team platform with dashboard / SSO / audit logs (that's
|
|
255
|
+
**Not a fit:** you need vendor-managed production SLAs (use the provider APIs) · you want a hosted, multi-tenant team platform with dashboard / SSO / audit logs (that's the [askalf platform](https://askalf.org), shipping soon) · you want a chat UI (use claude.ai).
|
|
405
256
|
|
|
406
257
|
---
|
|
407
258
|
|
|
@@ -418,12 +269,6 @@ Full flag/env reference: [`docs/commands.md`](./docs/commands.md) · SDK example
|
|
|
418
269
|
**Does this violate Anthropic's terms?**
|
|
419
270
|
Mechanically, dario uses your existing Claude Code OAuth tokens — it authenticates you as you, with your subscription, through Anthropic's official endpoints. Whether any particular use complies with current terms is between you and Anthropic; consult their terms and your agreement. Independent, unofficial, third-party — see [DISCLAIMER.md](DISCLAIMER.md).
|
|
420
271
|
|
|
421
|
-
**What does the v4 TUI actually do?**
|
|
422
|
-
Open `dario` with no args. Six tabs: **Status** shows proxy health + OAuth expiry + config source + overage-guard state (v4.1: halt banner with countdown + `R` to resume); **Config** edits `~/.dario/config.json` in place (bool toggles inline, numbers/strings open a prompt, `s` saves); **Analytics** polls `/analytics` every 2s and renders per-model bars + rate-limit utilization + an Overage bar that's red the moment count is non-zero (v4.1); **Hits** subscribes to `/analytics/stream` SSE for the live request feed with per-record detail drilldown and a pinned halt banner when overage is detected (v4.1); **Accounts** lists the pool; **Backends** lists OpenAI-compat backends. Pure ANSI, zero new runtime deps. Migration from v3: [MIGRATION.md](MIGRATION.md).
|
|
423
|
-
|
|
424
|
-
**What if a request lands in `overage` despite the wire-shape replay?**
|
|
425
|
-
v4.1+ halts the proxy on the first overage response and returns 503 to subsequent requests until you investigate. See [What dario does when overage lands](#what-dario-does-when-overage-lands-v41). The TUI Status tab shows the triggering request + countdown to auto-resume; `dario resume` from any shell clears the halt immediately; `--overage-behavior=warn` switches to visibility-only mode if you'd rather see the signal than block traffic.
|
|
426
|
-
|
|
427
272
|
**Do I need Claude Code installed?**
|
|
428
273
|
Recommended, not required. With CC, `dario login` picks up credentials automatically and the live template extractor reads your binary on every startup. Without it, dario runs its own OAuth flow and falls back to the bundled (scrubbed) template snapshot.
|
|
429
274
|
|
|
@@ -431,16 +276,13 @@ Recommended, not required. With CC, `dario login` picks up credentials automatic
|
|
|
431
276
|
Optional, recommended — Bun's TLS ClientHello matches CC's runtime. Without it dario works fine; `dario doctor` flags the mismatch and `--strict-tls` hard-fails until resolved.
|
|
432
277
|
|
|
433
278
|
**Can I use dario without a Claude subscription?**
|
|
434
|
-
Yes. Skip `dario login`, `dario backend add openai --key=…`, and you have a local OpenAI-compat router with no Claude involvement.
|
|
279
|
+
Yes. Skip `dario login`, run `dario backend add openai --key=…`, and you have a local OpenAI-compat router with no Claude involvement.
|
|
435
280
|
|
|
436
|
-
**`representative-claim: seven_day` in my
|
|
281
|
+
**`representative-claim: seven_day` in my headers — am I downgraded?**
|
|
437
282
|
No. `five_hour` and `seven_day` are both subscription billing — different accounting buckets, same mode. `overage` is the one that flips you to per-token. [Discussion #1](https://github.com/askalf/dario/discussions/1).
|
|
438
283
|
|
|
439
|
-
**Will the 2026-06-15 split break my
|
|
440
|
-
No — see [The deadline](#the-deadline-2026-06-15)
|
|
441
|
-
|
|
442
|
-
**What if Anthropic ships another silent change tomorrow?**
|
|
443
|
-
The three-class drift watcher picks it up — npm-release changes hourly on a github-hosted runner, in-version remote-config changes every 30 min on a self-hosted runner with real CC, classifier-rule changes via a daily live canary. Class A auto-drafts + auto-merges; Class B auto-rebakes the bundled template and opens a PR with a unified diff inline; Class C opens a labeled alert with diagnosis hints. v3.38.5 + v3.38.6 (13 min apart, same-day fix for v2.1.142's silent drops) and v4.2.1's same-binary remote-config receipts are the prior art. The TUI's Hits tab shows you the request shape in real time, so you'll see drift the moment it happens on your machine.
|
|
284
|
+
**Will the 2026-06-15 split break my setup? / What if Anthropic ships another silent change?**
|
|
285
|
+
No, and it's caught automatically — see [The deadline](#the-deadline-2026-06-15) and [How it stays working](#how-it-works-and-how-it-stays-working). dario rewrites every request to interactive-CC shape before it reaches `api.anthropic.com`, and the three-class drift watcher picks up new changes (npm-release hourly, remote-config every 30 min, classifier-rule daily). v3.38.5 + v3.38.6 — 13 minutes apart, same day as v2.1.142's silent drops — are the prior art.
|
|
444
286
|
|
|
445
287
|
Full FAQ: [`docs/faq.md`](./docs/faq.md)
|
|
446
288
|
|
|
@@ -466,7 +308,7 @@ PRs welcome. Small TypeScript codebase, zero runtime deps. Architecture + file-b
|
|
|
466
308
|
git clone https://github.com/askalf/dario && cd dario
|
|
467
309
|
npm install
|
|
468
310
|
npm run dev # tsx, no build step
|
|
469
|
-
npm test #
|
|
311
|
+
npm test # 77 test files via test/all.test.mjs, green on every release
|
|
470
312
|
npm run e2e # live proxy + OAuth (needs a working Claude backend)
|
|
471
313
|
```
|
|
472
314
|
|
|
@@ -489,15 +331,12 @@ npm run e2e # live proxy + OAuth (needs a working Claude backend)
|
|
|
489
331
|
|
|
490
332
|
Anthropic doesn't publish a wire-level changelog for subscribers. The dario repo is the closest thing that exists. Every silent change Anthropic ships, the drift watcher catches; every fix dario ships, the public record gets longer. That accumulating record is what makes the asymmetry visible to the next subscriber who can't explain why their burn rate spiked.
|
|
491
333
|
|
|
492
|
-
|
|
334
|
+
- **Star the repo.** The most legible public signal that this matters.
|
|
335
|
+
- **Install + run.** Every active install is one more subscriber routing their already-paid-for plan through their own infrastructure.
|
|
336
|
+
- **File drift.** Open an issue when your rate-limit header flips, when a tool that worked yesterday breaks today, when a CC release lands without a wire-level note. It gets documented in public alongside the fix.
|
|
337
|
+
- **Share the install line.** The next Cursor/Aider/Cline user quietly paying their second bill.
|
|
493
338
|
|
|
494
|
-
|
|
495
|
-
- **Install + run.** Every active install is one more subscriber routing their already-paid-for plan through their own infrastructure instead of through whatever the next silent change does.
|
|
496
|
-
- **Run a pool.** Two accounts in `~/.dario/accounts/`, headroom-aware routing, 429 failover. Subscriptions are designed for one user; pool mode makes them resilient.
|
|
497
|
-
- **File drift.** Open an issue when your rate-limit header flips, when a tool you used yesterday breaks today, when a CC release lands without a wire-level note. We document it in public alongside the fix.
|
|
498
|
-
- **Share the install line.** Slack channel, group chat, the next Cursor/Aider/Cline user who's quietly paying their second bill. Pricing-aware proxying is a baseline subscriber capability, not a privilege.
|
|
499
|
-
|
|
500
|
-
Follow [@ask_alf](https://x.com/ask_alf) for drift bulletins as they happen. The [askalf platform](https://askalf.org) — a self-hosted AI workforce that builds on dario — is shipping soon.
|
|
339
|
+
Follow [@ask_alf](https://x.com/ask_alf) for drift bulletins as they happen.
|
|
501
340
|
|
|
502
341
|
---
|
|
503
342
|
|
|
@@ -511,13 +350,13 @@ MIT — see [LICENSE](LICENSE) and [DISCLAIMER.md](DISCLAIMER.md).
|
|
|
511
350
|
|
|
512
351
|
## Also by askalf
|
|
513
352
|
|
|
514
|
-
Ordered by relevance to a dario reader — projects that route through dario first, Claude Code ecosystem second, supporting infrastructure last.
|
|
515
|
-
|
|
516
353
|
| Project | What it does |
|
|
517
354
|
|---|---|
|
|
518
|
-
| [askalf platform](https://askalf.org) | Self-hosted AI workforce — agents that run real business + life work
|
|
519
|
-
| [hands](https://github.com/askalf/hands) | Cross-platform computer-use agent — your LLM on your mouse, keyboard, and screen.
|
|
520
|
-
| [deepdive](https://github.com/askalf/deepdive) | Local research agent. One command, cited answer. Plan → search → headless fetch → extract → synthesize.
|
|
521
|
-
| [
|
|
522
|
-
| [
|
|
523
|
-
| [
|
|
355
|
+
| [askalf platform](https://askalf.org) | Self-hosted AI workforce — agents that run real business + life work. Uses dario as its LLM substrate. *Shipping soon.* |
|
|
356
|
+
| [hands](https://github.com/askalf/hands) | Cross-platform computer-use agent — your LLM on your mouse, keyboard, and screen. Routes through dario or any Anthropic-compat. |
|
|
357
|
+
| [deepdive](https://github.com/askalf/deepdive) | Local research agent. One command, cited answer. Plan → search → headless fetch → extract → synthesize. |
|
|
358
|
+
| [agent](https://github.com/askalf/agent) | Connect any device to an askalf fleet — runs the shell or Claude Code tasks the fleet dispatches. |
|
|
359
|
+
| [browser-bridge](https://github.com/askalf/browser-bridge) | Stealth headless Chromium in a container, CDP on 9222. Playwright / Puppeteer / MCP-compatible. |
|
|
360
|
+
| [claude-sync](https://github.com/askalf/claude-sync) | Sync Claude Code sessions across machines via a portable `.ccsync` file. |
|
|
361
|
+
| [pgflex](https://github.com/askalf/pgflex) | One Postgres API, two modes — real PostgreSQL for production, PGlite (WASM) for dev. |
|
|
362
|
+
| [redisflex](https://github.com/askalf/redisflex) | One Redis API, two modes — ioredis for production, in-process for dev. Includes a BullMQ-shaped in-memory queue. |
|
package/dist/analytics.js
CHANGED
|
@@ -40,6 +40,8 @@ export function billingBucketFromClaim(claim) {
|
|
|
40
40
|
// Anthropic pricing (per 1M tokens, USD). Not authoritative — used for
|
|
41
41
|
// rough burn-rate display in the /analytics summary.
|
|
42
42
|
const PRICING = {
|
|
43
|
+
'claude-opus-4-8': { input: 5, output: 25, cacheRead: 0.5, cacheCreate: 6.25 },
|
|
44
|
+
'claude-opus-4-7': { input: 5, output: 25, cacheRead: 0.5, cacheCreate: 6.25 },
|
|
43
45
|
'claude-opus-4-6': { input: 15, output: 75, cacheRead: 1.5, cacheCreate: 18.75 },
|
|
44
46
|
'claude-sonnet-4-6': { input: 3, output: 15, cacheRead: 0.3, cacheCreate: 3.75 },
|
|
45
47
|
'claude-haiku-4-5': { input: 0.8, output: 4, cacheRead: 0.08, cacheCreate: 1 },
|
package/dist/cc-template.d.ts
CHANGED
|
@@ -285,6 +285,7 @@ export declare function resolveEffort(flag: EffortValue | undefined, clientBody:
|
|
|
285
285
|
*
|
|
286
286
|
* Empirical results (2026-05-15, live OAuth-subscription probes against
|
|
287
287
|
* api.anthropic.com — see dario#NNN for the probe matrix):
|
|
288
|
+
* claude-opus-4-8 ✓ accepts adaptive (verified 2026-05-28)
|
|
288
289
|
* claude-opus-4-7 ✓ accepts adaptive
|
|
289
290
|
* claude-opus-4-6 ✓ accepts adaptive
|
|
290
291
|
* claude-sonnet-4-6 ✓ accepts adaptive
|
package/dist/cc-template.js
CHANGED
|
@@ -925,6 +925,7 @@ export function resolveEffort(flag, clientBody) {
|
|
|
925
925
|
*
|
|
926
926
|
* Empirical results (2026-05-15, live OAuth-subscription probes against
|
|
927
927
|
* api.anthropic.com — see dario#NNN for the probe matrix):
|
|
928
|
+
* claude-opus-4-8 ✓ accepts adaptive (verified 2026-05-28)
|
|
928
929
|
* claude-opus-4-7 ✓ accepts adaptive
|
|
929
930
|
* claude-opus-4-6 ✓ accepts adaptive
|
|
930
931
|
* claude-sonnet-4-6 ✓ accepts adaptive
|
package/dist/doctor.js
CHANGED
|
@@ -478,7 +478,7 @@ export async function runChecks(opts = {}) {
|
|
|
478
478
|
const families = [
|
|
479
479
|
{ family: 'haiku', model: 'claude-haiku-4-5' },
|
|
480
480
|
{ family: 'sonnet', model: 'claude-sonnet-4-6' },
|
|
481
|
-
{ family: 'opus', model: 'claude-opus-4-
|
|
481
|
+
{ family: 'opus', model: 'claude-opus-4-8' },
|
|
482
482
|
];
|
|
483
483
|
const probe = async (model) => {
|
|
484
484
|
const res = await fetch(probeEndpoint, {
|
|
@@ -282,7 +282,7 @@ export declare function _resetInstalledVersionProbeForTest(): void;
|
|
|
282
282
|
*/
|
|
283
283
|
export declare const SUPPORTED_CC_RANGE: {
|
|
284
284
|
readonly min: "1.0.0";
|
|
285
|
-
readonly maxTested: "2.1.
|
|
285
|
+
readonly maxTested: "2.1.154";
|
|
286
286
|
};
|
|
287
287
|
/**
|
|
288
288
|
* Compare two dotted-numeric version strings. Returns negative if `a<b`,
|
package/dist/live-fingerprint.js
CHANGED
|
@@ -777,7 +777,7 @@ export function _resetInstalledVersionProbeForTest() {
|
|
|
777
777
|
*/
|
|
778
778
|
export const SUPPORTED_CC_RANGE = {
|
|
779
779
|
min: '1.0.0',
|
|
780
|
-
maxTested: '2.1.
|
|
780
|
+
maxTested: '2.1.154',
|
|
781
781
|
};
|
|
782
782
|
/**
|
|
783
783
|
* Compare two dotted-numeric version strings. Returns negative if `a<b`,
|
package/dist/proxy.js
CHANGED
|
@@ -127,7 +127,8 @@ function loadClaudeIdentity() {
|
|
|
127
127
|
}
|
|
128
128
|
// Model shortcuts — users can pass short names
|
|
129
129
|
const MODEL_ALIASES = {
|
|
130
|
-
'opus': 'claude-opus-4-
|
|
130
|
+
'opus': 'claude-opus-4-8',
|
|
131
|
+
'opus47': 'claude-opus-4-7',
|
|
131
132
|
'opus46': 'claude-opus-4-6',
|
|
132
133
|
'opus1m': 'claude-opus-4-7[1m]',
|
|
133
134
|
'sonnet': 'claude-sonnet-4-6',
|
|
@@ -273,7 +274,7 @@ export function sanitizeMessages(body, preserveTags) {
|
|
|
273
274
|
*/
|
|
274
275
|
// OpenAI model names → Anthropic (fallback if client sends GPT names)
|
|
275
276
|
const OPENAI_MODEL_MAP = {
|
|
276
|
-
'gpt-5.4': 'claude-opus-4-
|
|
277
|
+
'gpt-5.4': 'claude-opus-4-8',
|
|
277
278
|
'gpt-5.4-mini': 'claude-sonnet-4-6',
|
|
278
279
|
'gpt-5.4-nano': 'claude-haiku-4-5',
|
|
279
280
|
'gpt-5.3': 'claude-opus-4-6',
|
|
@@ -361,7 +362,7 @@ function translateStreamChunk(line) {
|
|
|
361
362
|
catch { }
|
|
362
363
|
return null;
|
|
363
364
|
}
|
|
364
|
-
const OPENAI_MODELS_LIST = { object: 'list', data: ['claude-opus-4-7', 'claude-opus-4-6', 'claude-sonnet-4-6', 'claude-haiku-4-5'].map(id => ({ id, object: 'model', created: 1700000000, owned_by: 'anthropic' })) };
|
|
365
|
+
const OPENAI_MODELS_LIST = { object: 'list', data: ['claude-opus-4-8', 'claude-opus-4-7', 'claude-opus-4-6', 'claude-sonnet-4-6', 'claude-haiku-4-5'].map(id => ({ id, object: 'model', created: 1700000000, owned_by: 'anthropic' })) };
|
|
365
366
|
/**
|
|
366
367
|
* Append a JSON-ND line to the proxy log file. No-op when stream is
|
|
367
368
|
* null (logFile not configured). Errors are swallowed — log writes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.13",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|