@askalf/dario 2.8.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -12
- package/dist/cli.js +13 -22
- package/dist/oauth.js +2 -7
- package/dist/proxy.js +51 -155
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -193,13 +193,23 @@ Model: claude-opus-4-6 (all requests)
|
|
|
193
193
|
|
|
194
194
|
**Trade-offs vs direct API mode:**
|
|
195
195
|
|
|
196
|
-
| | Direct API (default) | CLI Backend (`--cli`) |
|
|
197
|
-
|
|
198
|
-
| Streaming |
|
|
199
|
-
| Tool use
|
|
200
|
-
|
|
|
201
|
-
|
|
|
202
|
-
|
|
|
196
|
+
| | Direct API (default) | CLI Backend (`--cli`) | Passthrough (`--passthrough`) |
|
|
197
|
+
|---|---|---|---|
|
|
198
|
+
| Streaming | Native SSE | SSE (converted from JSON) | Native SSE |
|
|
199
|
+
| Tool use | Yes | No | Yes |
|
|
200
|
+
| Thinking/billing injection | Yes (Claude-optimized) | N/A | No (OAuth swap only) |
|
|
201
|
+
| Latency | Low | Higher (process spawn) | Low |
|
|
202
|
+
| Rate limits | Priority routing | Not affected | Standard (no priority) |
|
|
203
|
+
| Opus when throttled | Auto CLI fallback | **Always works** | May return 429 |
|
|
204
|
+
|
|
205
|
+
## Passthrough Mode
|
|
206
|
+
|
|
207
|
+
For tools like Hermes or OpenClaw that need exact Anthropic protocol fidelity, use `--passthrough`. This does OAuth swap only — no billing tag, no thinking injection, no device identity, no extra beta flags.
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
dario proxy --passthrough # Thin proxy, zero injection
|
|
211
|
+
dario proxy --passthrough --model=opus # Thin proxy + model override
|
|
212
|
+
```
|
|
203
213
|
|
|
204
214
|
## Model Selection
|
|
205
215
|
|
|
@@ -285,7 +295,7 @@ const message = await client.messages.create({
|
|
|
285
295
|
});
|
|
286
296
|
```
|
|
287
297
|
|
|
288
|
-
### Streaming
|
|
298
|
+
### Streaming
|
|
289
299
|
|
|
290
300
|
```bash
|
|
291
301
|
curl http://localhost:3456/v1/messages \
|
|
@@ -351,6 +361,18 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
|
|
|
351
361
|
└──────────┘ └─────────────────┘ └──────────────────┘
|
|
352
362
|
```
|
|
353
363
|
|
|
364
|
+
### Passthrough Mode (`--passthrough`)
|
|
365
|
+
|
|
366
|
+
```
|
|
367
|
+
┌──────────┐ ┌─────────────────┐ ┌──────────────────┐
|
|
368
|
+
│ Your App │ ──> │ dario (proxy) │ ──> │ api.anthropic.com│
|
|
369
|
+
│ │ │ localhost:3456 │ │ │
|
|
370
|
+
│ sends │ │ swaps API key │ │ sees valid │
|
|
371
|
+
│ API │ │ for OAuth │ │ OAuth bearer │
|
|
372
|
+
│ request │ │ nothing else │ │ token │
|
|
373
|
+
└──────────┘ └─────────────────┘ └──────────────────┘
|
|
374
|
+
```
|
|
375
|
+
|
|
354
376
|
1. **`dario login`** — Detects your existing Claude Code credentials (`~/.claude/.credentials.json`) and starts the proxy automatically. If Claude Code isn't installed, runs a PKCE OAuth flow with a local callback server to capture the token automatically.
|
|
355
377
|
|
|
356
378
|
2. **`dario proxy`** — Starts an HTTP server on localhost that implements the Anthropic Messages API. In direct mode, it swaps your API key for an OAuth bearer token. In CLI mode, it routes through the Claude Code binary.
|
|
@@ -373,6 +395,7 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
|
|
|
373
395
|
| Flag/Env | Description | Default |
|
|
374
396
|
|----------|-------------|---------|
|
|
375
397
|
| `--cli` | Use Claude CLI as backend (bypasses rate limits) | off |
|
|
398
|
+
| `--passthrough` | Thin proxy — OAuth swap only, no injection | off |
|
|
376
399
|
| `--model=MODEL` | Force a model (`opus`, `sonnet`, `haiku`, or full ID) | passthrough |
|
|
377
400
|
| `--port=PORT` | Port to listen on | `3456` |
|
|
378
401
|
| `--verbose` / `-v` | Log every request | off |
|
|
@@ -383,8 +406,10 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
|
|
|
383
406
|
### Direct API Mode
|
|
384
407
|
- All Claude models (Opus 4.6, Sonnet 4.6, Haiku 4.5) + 1M extended context aliases (`opus1m`, `sonnet1m`)
|
|
385
408
|
- **Native billing classification** — device identity metadata ensures Max plan limits work correctly
|
|
386
|
-
- **Priority routing** — billing tag injection + `service_tier: auto` activates per-model rate limits, keeping Opus/Sonnet available even at 100% overall utilization
|
|
387
|
-
- **Adaptive thinking** — matches Claude Code's `{ type: 'adaptive' }` mode for optimal reasoning
|
|
409
|
+
- **Priority routing** — billing tag injection + `service_tier: 'auto'` activates per-model rate limits, keeping Opus/Sonnet available even at 100% overall utilization
|
|
410
|
+
- **Adaptive thinking** — matches Claude Code's `{ type: 'adaptive' }` mode for optimal reasoning (auto-skipped for Haiku 4.5)
|
|
411
|
+
- **Effort control** — injects `output_config: { effort: 'high' }` by default, or passes through client-specified effort level
|
|
412
|
+
- **Enriched 429 errors** — rate limit errors include utilization %, limiting window, and reset time instead of Anthropic's default `"Error"` message
|
|
388
413
|
- **Auto CLI fallback** — if the API returns 429 and Claude Code is installed, transparently retries through `claude --print` with SSE conversion
|
|
389
414
|
- **OpenAI-compatible** (`/v1/chat/completions`) — works with any OpenAI SDK or tool
|
|
390
415
|
- Streaming and non-streaming (both Anthropic and OpenAI SSE formats, including tool_use streaming)
|
|
@@ -399,10 +424,17 @@ Then run `hermes` normally — it routes through dario using your Claude subscri
|
|
|
399
424
|
|
|
400
425
|
### CLI Backend Mode
|
|
401
426
|
- All Claude models — including Opus when rate limited
|
|
402
|
-
-
|
|
427
|
+
- Streaming via SSE conversion (client sends `stream: true`, CLI JSON response is converted to Anthropic or OpenAI SSE events)
|
|
428
|
+
- OpenAI compatibility (translates OpenAI → Anthropic before CLI, Anthropic → OpenAI after)
|
|
403
429
|
- System prompts and multi-turn conversations (via context injection)
|
|
404
430
|
- Not affected by API rate limits
|
|
405
431
|
|
|
432
|
+
### Passthrough Mode
|
|
433
|
+
- All Claude models with native streaming and tool use
|
|
434
|
+
- OAuth token swap only — no billing tag, thinking, effort, service_tier, or device identity injection
|
|
435
|
+
- Minimal beta flags (`oauth-2025-04-20` + client betas only)
|
|
436
|
+
- For tools like Hermes or OpenClaw that need exact Anthropic protocol fidelity
|
|
437
|
+
|
|
406
438
|
## Endpoints
|
|
407
439
|
|
|
408
440
|
| Path | Description |
|
|
@@ -459,7 +491,7 @@ Recommended but not required. If Claude Code is installed and logged in, `dario
|
|
|
459
491
|
Dario auto-refreshes tokens 30 minutes before expiry. You should never see an auth error in normal use. If something goes wrong, `dario refresh` forces an immediate refresh.
|
|
460
492
|
|
|
461
493
|
**I'm getting rate limited on Opus. What do I do?**
|
|
462
|
-
Use `--cli` mode: `dario proxy --cli`. This routes through the Claude Code binary, which continues working when direct API calls are rate limited. You can also enable [extra usage](https://support.claude.com/en/articles/12429409-manage-extra-usage-for-paid-claude-plans) in your Anthropic account settings to extend your limits at API rates.
|
|
494
|
+
Use `--cli` mode: `dario proxy --cli`. This routes through the Claude Code binary, which continues working when direct API calls are rate limited. In default mode, dario automatically falls back to CLI when it detects a 429 (if Claude Code is installed). Rate limit errors include utilization percentages and reset times so you can see exactly when capacity returns. You can also enable [extra usage](https://support.claude.com/en/articles/12429409-manage-extra-usage-for-paid-claude-plans) in your Anthropic account settings to extend your limits at API rates.
|
|
463
495
|
|
|
464
496
|
**What are the usage limits?**
|
|
465
497
|
Claude subscriptions have rolling 5-hour and 7-day usage windows shared across claude.ai and Claude Code. See [Anthropic's docs](https://support.claude.com/en/articles/11647753-how-do-usage-and-length-limits-work) for details. In Claude Code, use `/usage` to check your current limits, or configure the [statusline](https://code.claude.com/docs/en/statusline) to show real-time 5h and 7d utilization percentages.
|
|
@@ -483,6 +515,9 @@ await startProxy({ port: 3456, verbose: true });
|
|
|
483
515
|
// CLI backend mode
|
|
484
516
|
await startProxy({ port: 3456, cliBackend: true, model: "opus" });
|
|
485
517
|
|
|
518
|
+
// Passthrough mode (OAuth swap only, no injection)
|
|
519
|
+
await startProxy({ port: 3456, passthrough: true });
|
|
520
|
+
|
|
486
521
|
// Or just get a raw access token
|
|
487
522
|
const token = await getAccessToken();
|
|
488
523
|
|
package/dist/cli.js
CHANGED
|
@@ -9,10 +9,10 @@
|
|
|
9
9
|
* dario refresh — Force token refresh
|
|
10
10
|
* dario logout — Remove saved credentials
|
|
11
11
|
*/
|
|
12
|
-
import {
|
|
12
|
+
import { unlink } from 'node:fs/promises';
|
|
13
13
|
import { join } from 'node:path';
|
|
14
14
|
import { homedir } from 'node:os';
|
|
15
|
-
import { startAutoOAuthFlow, getStatus, refreshTokens } from './oauth.js';
|
|
15
|
+
import { startAutoOAuthFlow, getStatus, refreshTokens, loadCredentials } from './oauth.js';
|
|
16
16
|
import { startProxy, sanitizeError } from './proxy.js';
|
|
17
17
|
const args = process.argv.slice(2);
|
|
18
18
|
const command = args[0] ?? 'proxy';
|
|
@@ -21,22 +21,14 @@ async function login() {
|
|
|
21
21
|
console.log(' dario — Claude Login');
|
|
22
22
|
console.log(' ───────────────────');
|
|
23
23
|
console.log('');
|
|
24
|
-
// Check
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
if (expiresAt > Date.now()) {
|
|
32
|
-
console.log(' Found Claude Code credentials. Starting proxy...');
|
|
33
|
-
console.log('');
|
|
34
|
-
await proxy();
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
24
|
+
// Check for existing credentials (Claude Code or dario's own)
|
|
25
|
+
const creds = await loadCredentials();
|
|
26
|
+
if (creds?.claudeAiOauth?.accessToken && creds.claudeAiOauth.expiresAt > Date.now()) {
|
|
27
|
+
console.log(' Found credentials. Starting proxy...');
|
|
28
|
+
console.log('');
|
|
29
|
+
await proxy();
|
|
30
|
+
return;
|
|
38
31
|
}
|
|
39
|
-
catch { /* no Claude Code credentials, fall through to OAuth */ }
|
|
40
32
|
console.log(' No Claude Code credentials found. Starting OAuth flow...');
|
|
41
33
|
console.log('');
|
|
42
34
|
try {
|
|
@@ -157,12 +149,11 @@ async function help() {
|
|
|
157
149
|
`);
|
|
158
150
|
}
|
|
159
151
|
async function version() {
|
|
160
|
-
const { readFile } = await import('node:fs/promises');
|
|
161
|
-
const { fileURLToPath } = await import('node:url');
|
|
162
|
-
const { dirname, join } = await import('node:path');
|
|
163
152
|
try {
|
|
164
|
-
const
|
|
165
|
-
const
|
|
153
|
+
const { fileURLToPath } = await import('node:url');
|
|
154
|
+
const { readFile: rf } = await import('node:fs/promises');
|
|
155
|
+
const dir = join(fileURLToPath(import.meta.url), '..', '..');
|
|
156
|
+
const pkg = JSON.parse(await rf(join(dir, 'package.json'), 'utf-8'));
|
|
166
157
|
console.log(pkg.version);
|
|
167
158
|
}
|
|
168
159
|
catch {
|
package/dist/oauth.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Handles authorization, token exchange, storage, and auto-refresh.
|
|
6
6
|
*/
|
|
7
7
|
import { randomBytes, createHash } from 'node:crypto';
|
|
8
|
-
import { readFile, writeFile, mkdir,
|
|
8
|
+
import { readFile, writeFile, mkdir, rename } from 'node:fs/promises';
|
|
9
9
|
import { dirname, join } from 'node:path';
|
|
10
10
|
import { homedir } from 'node:os';
|
|
11
11
|
// Claude Code's public OAuth client (PKCE, no secret needed)
|
|
@@ -62,11 +62,6 @@ async function saveCredentials(creds) {
|
|
|
62
62
|
const tmpPath = `${path}.tmp.${Date.now()}`;
|
|
63
63
|
await writeFile(tmpPath, JSON.stringify(creds, null, 2), { mode: 0o600 });
|
|
64
64
|
await rename(tmpPath, path);
|
|
65
|
-
// Set permissions (best-effort — no-op on Windows where mode is ignored)
|
|
66
|
-
try {
|
|
67
|
-
await chmod(path, 0o600);
|
|
68
|
-
}
|
|
69
|
-
catch { /* Windows ignores file modes */ }
|
|
70
65
|
// Invalidate cache so next read picks up the new tokens
|
|
71
66
|
credentialsCache = creds;
|
|
72
67
|
credentialsCacheTime = Date.now();
|
|
@@ -222,10 +217,10 @@ async function doRefreshTokens() {
|
|
|
222
217
|
}
|
|
223
218
|
const data = await res.json();
|
|
224
219
|
const tokens = {
|
|
225
|
-
...oauth,
|
|
226
220
|
accessToken: data.access_token,
|
|
227
221
|
refreshToken: data.refresh_token,
|
|
228
222
|
expiresAt: Date.now() + data.expires_in * 1000,
|
|
223
|
+
scopes: oauth.scopes,
|
|
229
224
|
};
|
|
230
225
|
await saveCredentials({ claudeAiOauth: tokens });
|
|
231
226
|
return tokens;
|
package/dist/proxy.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createServer } from 'node:http';
|
|
2
2
|
import { randomUUID, timingSafeEqual } from 'node:crypto';
|
|
3
3
|
import { execSync, spawn } from 'node:child_process';
|
|
4
|
-
import { readFileSync } from 'node:fs';
|
|
4
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
5
5
|
import { join } from 'node:path';
|
|
6
6
|
import { homedir } from 'node:os';
|
|
7
7
|
import { arch, platform, version as nodeVersion } from 'node:process';
|
|
@@ -35,27 +35,19 @@ class Semaphore {
|
|
|
35
35
|
next();
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
|
-
// Detect installed Claude Code binary at startup
|
|
39
|
-
|
|
38
|
+
// Detect installed Claude Code binary at startup (single exec for both version + availability)
|
|
39
|
+
let cliAvailable = false;
|
|
40
|
+
function detectCli() {
|
|
40
41
|
try {
|
|
41
42
|
const out = execSync('claude --version', { timeout: 5000, stdio: 'pipe' }).toString().trim();
|
|
42
|
-
|
|
43
|
-
return match?.[1] ?? '2.1.96';
|
|
43
|
+
cliAvailable = true;
|
|
44
|
+
return out.match(/^([\d.]+)/)?.[1] ?? '2.1.96';
|
|
44
45
|
}
|
|
45
46
|
catch {
|
|
47
|
+
cliAvailable = false;
|
|
46
48
|
return '2.1.96';
|
|
47
49
|
}
|
|
48
50
|
}
|
|
49
|
-
let cliAvailable = false;
|
|
50
|
-
function detectCliAvailable() {
|
|
51
|
-
try {
|
|
52
|
-
execSync('claude --version', { timeout: 5000, stdio: 'pipe' });
|
|
53
|
-
return true;
|
|
54
|
-
}
|
|
55
|
-
catch {
|
|
56
|
-
return false;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
51
|
/** Convert a non-streaming Messages API response to SSE event stream. */
|
|
60
52
|
function jsonToSse(jsonBody) {
|
|
61
53
|
try {
|
|
@@ -86,6 +78,40 @@ function jsonToSse(jsonBody) {
|
|
|
86
78
|
return '';
|
|
87
79
|
}
|
|
88
80
|
}
|
|
81
|
+
/** Convert CLI JSON response to OpenAI SSE format. */
|
|
82
|
+
function jsonToOpenaiSse(jsonBody) {
|
|
83
|
+
try {
|
|
84
|
+
const parsed = JSON.parse(jsonBody);
|
|
85
|
+
const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
|
|
86
|
+
const ts = Math.floor(Date.now() / 1000);
|
|
87
|
+
return `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n` +
|
|
88
|
+
`data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return '';
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/** Send a CLI result to the client, handling streaming/format translation. */
|
|
95
|
+
function sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, securityHeaders) {
|
|
96
|
+
const headers = { 'Access-Control-Allow-Origin': corsOrigin, ...securityHeaders };
|
|
97
|
+
const ok = cliResult.status >= 200 && cliResult.status < 300;
|
|
98
|
+
if (ok && clientWantsStream) {
|
|
99
|
+
const sseData = isOpenAI ? jsonToOpenaiSse(cliResult.body) : jsonToSse(cliResult.body);
|
|
100
|
+
if (sseData) {
|
|
101
|
+
res.writeHead(200, { 'Content-Type': 'text/event-stream', ...headers });
|
|
102
|
+
res.end(sseData);
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (ok && isOpenAI) {
|
|
107
|
+
try {
|
|
108
|
+
cliResult.body = JSON.stringify(anthropicToOpenai(JSON.parse(cliResult.body)));
|
|
109
|
+
}
|
|
110
|
+
catch { }
|
|
111
|
+
}
|
|
112
|
+
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, ...headers });
|
|
113
|
+
res.end(cliResult.body);
|
|
114
|
+
}
|
|
89
115
|
const SESSION_ID = randomUUID();
|
|
90
116
|
const OS_NAME = platform === 'win32' ? 'Windows' : platform === 'darwin' ? 'MacOS' : 'Linux';
|
|
91
117
|
// Claude Code device identity — required for Max plan billing classification.
|
|
@@ -100,7 +126,7 @@ function loadClaudeIdentity() {
|
|
|
100
126
|
// Also check backup files as fallback
|
|
101
127
|
try {
|
|
102
128
|
const backupDir = join(homedir(), '.claude', 'backups');
|
|
103
|
-
const files =
|
|
129
|
+
const files = readdirSync(backupDir);
|
|
104
130
|
const backups = files
|
|
105
131
|
.filter((f) => f.startsWith('.claude.json.backup.'))
|
|
106
132
|
.sort()
|
|
@@ -180,28 +206,6 @@ function sanitizeMessages(body) {
|
|
|
180
206
|
}
|
|
181
207
|
}
|
|
182
208
|
}
|
|
183
|
-
let lastTokenSnapshot = null;
|
|
184
|
-
function checkTokenAnomalies(usage, requestId) {
|
|
185
|
-
const current = {
|
|
186
|
-
inputTokens: usage.input_tokens ?? 0,
|
|
187
|
-
outputTokens: usage.output_tokens ?? 0,
|
|
188
|
-
cacheRead: usage.cache_read_input_tokens ?? 0,
|
|
189
|
-
};
|
|
190
|
-
if (lastTokenSnapshot && lastTokenSnapshot.inputTokens > 0) {
|
|
191
|
-
const growth = (current.inputTokens - lastTokenSnapshot.inputTokens) / lastTokenSnapshot.inputTokens;
|
|
192
|
-
if (growth > 0.6) {
|
|
193
|
-
const pct = Math.round(growth * 100);
|
|
194
|
-
console.warn(`[dario] TOKEN WARN ${requestId}: Input grew ${pct}% (${lastTokenSnapshot.inputTokens} → ${current.inputTokens}). Possible full replay.`);
|
|
195
|
-
}
|
|
196
|
-
if (current.outputTokens > lastTokenSnapshot.outputTokens * 2 && current.outputTokens > 2000) {
|
|
197
|
-
console.warn(`[dario] TOKEN WARN ${requestId}: Output explosion ${current.outputTokens} tokens (${Math.round(current.outputTokens / lastTokenSnapshot.outputTokens)}x previous).`);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
lastTokenSnapshot = current;
|
|
201
|
-
}
|
|
202
|
-
// Extended context fallback — cooldown after 1M context failure
|
|
203
|
-
let extendedContextUnavailableAt = 0;
|
|
204
|
-
const EXTENDED_CONTEXT_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour
|
|
205
209
|
// OpenAI model names → Anthropic (fallback if client sends GPT names)
|
|
206
210
|
const OPENAI_MODEL_MAP = {
|
|
207
211
|
'gpt-5.4': 'claude-opus-4-6',
|
|
@@ -436,8 +440,7 @@ export async function startProxy(opts = {}) {
|
|
|
436
440
|
console.error('[dario] Not authenticated. Run `dario login` first.');
|
|
437
441
|
process.exit(1);
|
|
438
442
|
}
|
|
439
|
-
const cliVersion =
|
|
440
|
-
cliAvailable = detectCliAvailable();
|
|
443
|
+
const cliVersion = detectCli();
|
|
441
444
|
const modelOverride = opts.model ? (MODEL_ALIASES[opts.model] ?? opts.model) : null;
|
|
442
445
|
const identity = loadClaudeIdentity();
|
|
443
446
|
if (identity.deviceId) {
|
|
@@ -610,41 +613,7 @@ export async function startProxy(opts = {}) {
|
|
|
610
613
|
}
|
|
611
614
|
const cliResult = await handleViaCli(cliBody, modelOverride, verbose);
|
|
612
615
|
requestCount++;
|
|
613
|
-
|
|
614
|
-
// Client requested streaming — convert CLI JSON to SSE
|
|
615
|
-
if (isOpenAI) {
|
|
616
|
-
try {
|
|
617
|
-
const parsed = JSON.parse(cliResult.body);
|
|
618
|
-
const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
|
|
619
|
-
const ts = Math.floor(Date.now() / 1000);
|
|
620
|
-
let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
|
|
621
|
-
sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
|
|
622
|
-
res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
623
|
-
res.end(sseData);
|
|
624
|
-
}
|
|
625
|
-
catch {
|
|
626
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
627
|
-
res.end(cliResult.body);
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
else {
|
|
631
|
-
const sseData = jsonToSse(cliResult.body);
|
|
632
|
-
res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
633
|
-
res.end(sseData);
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
else {
|
|
637
|
-
// Non-streaming or error — translate and return as JSON
|
|
638
|
-
if (isOpenAI && cliResult.status >= 200 && cliResult.status < 300) {
|
|
639
|
-
try {
|
|
640
|
-
const parsed = JSON.parse(cliResult.body);
|
|
641
|
-
cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
|
|
642
|
-
}
|
|
643
|
-
catch { /* send as-is */ }
|
|
644
|
-
}
|
|
645
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
646
|
-
res.end(cliResult.body);
|
|
647
|
-
}
|
|
616
|
+
sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, SECURITY_HEADERS);
|
|
648
617
|
return;
|
|
649
618
|
}
|
|
650
619
|
// Parse body once, apply OpenAI translation, model override, and sanitization
|
|
@@ -654,10 +623,6 @@ export async function startProxy(opts = {}) {
|
|
|
654
623
|
const parsed = JSON.parse(body.toString());
|
|
655
624
|
// Strip orchestration tags from messages (Aider, Cursor, etc.)
|
|
656
625
|
sanitizeMessages(parsed);
|
|
657
|
-
// Handle 1M context: strip [1m] suffix if in cooldown
|
|
658
|
-
if (modelOverride?.includes('[1m]') && extendedContextUnavailableAt > 0 && Date.now() - extendedContextUnavailableAt < EXTENDED_CONTEXT_COOLDOWN_MS) {
|
|
659
|
-
parsed.model = modelOverride.replace('[1m]', '');
|
|
660
|
-
}
|
|
661
626
|
const result = isOpenAI ? openaiToAnthropic(parsed, modelOverride) : (modelOverride ? { ...parsed, model: modelOverride } : parsed);
|
|
662
627
|
const r = result;
|
|
663
628
|
// In passthrough mode, skip all Claude-specific injection — OAuth swap only
|
|
@@ -687,7 +652,8 @@ export async function startProxy(opts = {}) {
|
|
|
687
652
|
r.service_tier = 'auto';
|
|
688
653
|
}
|
|
689
654
|
// Set reasoning effort (pass through client value or default)
|
|
690
|
-
|
|
655
|
+
// Haiku does not support the effort parameter
|
|
656
|
+
if (supportsThinking && !r.output_config) {
|
|
691
657
|
r.output_config = { effort: 'high' };
|
|
692
658
|
}
|
|
693
659
|
// Enable context management (matches Claude Code default)
|
|
@@ -774,74 +740,19 @@ export async function startProxy(opts = {}) {
|
|
|
774
740
|
res.end(enriched);
|
|
775
741
|
return;
|
|
776
742
|
}
|
|
777
|
-
// Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary
|
|
778
|
-
// The CLI gets priority routing from Anthropic's server — a separate rate limit pool
|
|
779
|
-
// that continues working when the direct API quota is exhausted for expensive models.
|
|
743
|
+
// Auto-fallback: if API returns 429 and CLI is available, retry through CLI binary
|
|
780
744
|
if (upstream.status === 429 && cliAvailable && !useCli) {
|
|
781
|
-
// Drain the upstream response
|
|
782
745
|
await upstream.text().catch(() => { });
|
|
783
746
|
if (verbose)
|
|
784
747
|
console.log(`[dario] #${requestCount} 429 from API — falling back to CLI`);
|
|
785
|
-
// Determine if the client requested streaming
|
|
786
748
|
let clientWantsStream = false;
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
const p = JSON.parse(body.toString());
|
|
790
|
-
clientWantsStream = !!p.stream;
|
|
791
|
-
}
|
|
792
|
-
catch { }
|
|
749
|
+
try {
|
|
750
|
+
clientWantsStream = !!JSON.parse(body.toString()).stream;
|
|
793
751
|
}
|
|
752
|
+
catch { }
|
|
794
753
|
const cliResult = await handleViaCli(body, modelOverride, verbose);
|
|
795
754
|
requestCount++;
|
|
796
|
-
|
|
797
|
-
if (isOpenAI) {
|
|
798
|
-
// Translate to OpenAI format
|
|
799
|
-
try {
|
|
800
|
-
const parsed = JSON.parse(cliResult.body);
|
|
801
|
-
cliResult.body = JSON.stringify(anthropicToOpenai(parsed));
|
|
802
|
-
}
|
|
803
|
-
catch { }
|
|
804
|
-
}
|
|
805
|
-
if (clientWantsStream && !isOpenAI) {
|
|
806
|
-
// Client requested SSE streaming — convert CLI JSON to SSE events
|
|
807
|
-
const sseData = jsonToSse(cliResult.body);
|
|
808
|
-
res.writeHead(200, {
|
|
809
|
-
'Content-Type': 'text/event-stream',
|
|
810
|
-
'Access-Control-Allow-Origin': corsOrigin,
|
|
811
|
-
...SECURITY_HEADERS,
|
|
812
|
-
});
|
|
813
|
-
res.end(sseData);
|
|
814
|
-
}
|
|
815
|
-
else if (clientWantsStream && isOpenAI) {
|
|
816
|
-
// OpenAI streaming — convert Anthropic JSON to OpenAI SSE
|
|
817
|
-
try {
|
|
818
|
-
const parsed = JSON.parse(cliResult.body);
|
|
819
|
-
const text = parsed.content?.find(c => c.type === 'text')?.text ?? '';
|
|
820
|
-
const ts = Math.floor(Date.now() / 1000);
|
|
821
|
-
let sseData = `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: { content: text }, finish_reason: null }] })}\n\n`;
|
|
822
|
-
sseData += `data: ${JSON.stringify({ id: 'chatcmpl-dario', object: 'chat.completion.chunk', created: ts, model: 'claude', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\ndata: [DONE]\n\n`;
|
|
823
|
-
res.writeHead(200, {
|
|
824
|
-
'Content-Type': 'text/event-stream',
|
|
825
|
-
'Access-Control-Allow-Origin': corsOrigin,
|
|
826
|
-
...SECURITY_HEADERS,
|
|
827
|
-
});
|
|
828
|
-
res.end(sseData);
|
|
829
|
-
}
|
|
830
|
-
catch {
|
|
831
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
832
|
-
res.end(cliResult.body);
|
|
833
|
-
}
|
|
834
|
-
}
|
|
835
|
-
else {
|
|
836
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
837
|
-
res.end(cliResult.body);
|
|
838
|
-
}
|
|
839
|
-
}
|
|
840
|
-
else {
|
|
841
|
-
// CLI also failed — return the CLI error
|
|
842
|
-
res.writeHead(cliResult.status, { 'Content-Type': cliResult.contentType, 'Access-Control-Allow-Origin': corsOrigin, ...SECURITY_HEADERS });
|
|
843
|
-
res.end(cliResult.body);
|
|
844
|
-
}
|
|
755
|
+
sendCliResponse(res, cliResult, clientWantsStream, isOpenAI, corsOrigin, SECURITY_HEADERS);
|
|
845
756
|
return;
|
|
846
757
|
}
|
|
847
758
|
// Detect streaming from content-type (reliable) or body (fallback)
|
|
@@ -916,21 +827,6 @@ export async function startProxy(opts = {}) {
|
|
|
916
827
|
else {
|
|
917
828
|
// Buffer and forward
|
|
918
829
|
const responseBody = await upstream.text();
|
|
919
|
-
// Check for extended context failure — cooldown to avoid repeated failures
|
|
920
|
-
if (upstream.status === 400 && responseBody.includes('extra_usage') && modelOverride?.includes('[1m]')) {
|
|
921
|
-
extendedContextUnavailableAt = Date.now();
|
|
922
|
-
console.warn('[dario] 1M context requires Extra Usage — falling back to standard context for 1 hour');
|
|
923
|
-
}
|
|
924
|
-
// Token anomaly detection on non-streaming responses
|
|
925
|
-
if (upstream.status >= 200 && upstream.status < 300) {
|
|
926
|
-
try {
|
|
927
|
-
const parsed = JSON.parse(responseBody);
|
|
928
|
-
const usage = parsed.usage;
|
|
929
|
-
if (usage)
|
|
930
|
-
checkTokenAnomalies(usage, responseHeaders['request-id'] ?? '');
|
|
931
|
-
}
|
|
932
|
-
catch { /* ignore parse errors */ }
|
|
933
|
-
}
|
|
934
830
|
if (isOpenAI && upstream.status >= 200 && upstream.status < 300) {
|
|
935
831
|
// Translate Anthropic response → OpenAI format
|
|
936
832
|
try {
|