@askalf/dario 4.8.1 → 4.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -4
- package/dist/cc-template.d.ts +1 -0
- package/dist/cc-template.js +24 -1
- package/dist/cli.js +19 -1
- package/dist/proxy.d.ts +19 -0
- package/dist/proxy.js +22 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -509,12 +509,8 @@ Ordered by relevance to a dario reader — projects that route through dario fir
|
|
|
509
509
|
|
|
510
510
|
| Project | What it does |
|
|
511
511
|
|---|---|
|
|
512
|
-
| [arnie](https://github.com/askalf/arnie) | Portable IT troubleshooting agent — networking, AD, package managers, log triage. Routes through dario for subscription billing. |
|
|
513
512
|
| [hands](https://github.com/askalf/hands) | Cross-platform computer-use agent — your LLM on your mouse, keyboard, and screen. Windows + macOS + Linux. Routes through dario or any Anthropic-compat. |
|
|
514
513
|
| [deepdive](https://github.com/askalf/deepdive) | Local research agent. One command, cited answer. Plan → search → headless fetch → extract → synthesize. Every LLM call through your own router. |
|
|
515
|
-
| [claude-sync](https://github.com/askalf/claude-sync) | Sync Claude Code sessions across machines. Pack a CC session into a portable `.ccsync` file, ship it via Dropbox / iCloud / USB, unpack on the other side. |
|
|
516
514
|
| [browser-bridge](https://github.com/askalf/browser-bridge) | Stealth headless Chromium in a container, CDP on 9222. Connect from Playwright, Puppeteer, MCP browser tools, any agent that wants a remote browser. |
|
|
517
|
-
| [install-kit](https://github.com/askalf/install-kit) | curl-pipe-bash template for self-hosted Docker apps — banner, prereq probes, `.env` scaffolding with crypto-rand secrets, healthcheck wait loop. |
|
|
518
515
|
| [pgflex](https://github.com/askalf/pgflex) | One Postgres API, two modes — real PostgreSQL for production, PGlite (in-process WASM) for standalone / dev. Same SQL, drop the server when you don't need it. |
|
|
519
516
|
| [redisflex](https://github.com/askalf/redisflex) | One Redis API, two modes — ioredis for production, in-process Map+EventEmitter for dev. Includes a BullMQ-shaped in-memory queue. |
|
|
520
|
-
| [git-providers](https://github.com/askalf/git-providers) | One `GitProvider` interface for GitHub + GitLab + Bitbucket Cloud, plus a 44-entry api-key-provider taxonomy (cloud / CI / monitoring / analytics / ...). |
|
package/dist/cc-template.d.ts
CHANGED
|
@@ -318,6 +318,7 @@ export declare function buildCCRequest(clientBody: Record<string, unknown>, bill
|
|
|
318
318
|
maxTokens?: number | 'client';
|
|
319
319
|
systemPrompt?: string;
|
|
320
320
|
skipFields?: ReadonlySet<string>;
|
|
321
|
+
honorClientThinking?: boolean;
|
|
321
322
|
}): {
|
|
322
323
|
body: Record<string, unknown>;
|
|
323
324
|
toolMap: Map<string, ToolMapping>;
|
package/dist/cc-template.js
CHANGED
|
@@ -1306,7 +1306,30 @@ export function buildCCRequest(clientBody, billingTag, cacheControl, identity, o
|
|
|
1306
1306
|
// absent). See dario#87.
|
|
1307
1307
|
if (!isHaiku) {
|
|
1308
1308
|
const skip = opts.skipFields;
|
|
1309
|
-
|
|
1309
|
+
// Client-supplied thinking shape takes precedence when honorClientThinking
|
|
1310
|
+
// is enabled. SDK clients (vs CC) sometimes need explicit control over
|
|
1311
|
+
// budget_tokens or the type='enabled' vs type='adaptive' choice — e.g.
|
|
1312
|
+
// an agent that wants 8k thinking tokens for hard problems, or a model
|
|
1313
|
+
// that supports thinking but not the 4.6-era adaptive variant. dario's
|
|
1314
|
+
// default builds the CC-style adaptive shape, which is fine for CC
|
|
1315
|
+
// clients but doesn't expose the budget knob to others.
|
|
1316
|
+
//
|
|
1317
|
+
// When honored, we also suppress dario's clear_thinking_* context-edit
|
|
1318
|
+
// pair — that edit is tuned for type='adaptive' and the client's shape
|
|
1319
|
+
// takes responsibility for the request as a whole. Effort still ships.
|
|
1320
|
+
const clientThinking = (clientBody.thinking ?? null);
|
|
1321
|
+
const honoredClientThinking = Boolean(opts.honorClientThinking
|
|
1322
|
+
&& clientThinking
|
|
1323
|
+
&& typeof clientThinking === 'object'
|
|
1324
|
+
&& typeof clientThinking['type'] === 'string');
|
|
1325
|
+
if (honoredClientThinking) {
|
|
1326
|
+
if (!skip || !skip.has('thinking')) {
|
|
1327
|
+
ccRequest.thinking = clientThinking;
|
|
1328
|
+
}
|
|
1329
|
+
// Intentionally do NOT inject context_management.clear_thinking_*
|
|
1330
|
+
// when honoring client thinking — the pairing is shape-specific.
|
|
1331
|
+
}
|
|
1332
|
+
else if (supportsAdaptiveThinking(model)) {
|
|
1310
1333
|
if (!skip || !skip.has('thinking')) {
|
|
1311
1334
|
ccRequest.thinking = { type: 'adaptive' };
|
|
1312
1335
|
}
|
package/dist/cli.js
CHANGED
|
@@ -518,6 +518,11 @@ async function proxy() {
|
|
|
518
518
|
// Falls back to DARIO_SKIP_FIELDS env var. See ProxyOptions.skipFields
|
|
519
519
|
// for rationale.
|
|
520
520
|
const skipFields = parseSkipFieldsFlag(args, process.env['DARIO_SKIP_FIELDS']);
|
|
521
|
+
// --honor-client-thinking — pass through the client body's `thinking`
|
|
522
|
+
// field instead of dario's default CC-style `{type:'adaptive'}`. See
|
|
523
|
+
// ProxyOptions.honorClientThinking for rationale.
|
|
524
|
+
const honorClientThinking = args.includes('--honor-client-thinking')
|
|
525
|
+
|| ['1', 'true', 'yes', 'on'].includes((process.env['DARIO_HONOR_CLIENT_THINKING'] ?? '').toLowerCase());
|
|
521
526
|
// Non-loopback bind without DARIO_API_KEY turns dario into an open
|
|
522
527
|
// OAuth-subscription relay for anyone on the reachable network. Refuse
|
|
523
528
|
// to start rather than rely on the operator to read the startup banner.
|
|
@@ -537,7 +542,7 @@ async function proxy() {
|
|
|
537
542
|
console.error(`[dario] Override (not recommended): pass --unsafe-no-auth if you have out-of-band network controls and accept the risk.`);
|
|
538
543
|
process.exit(1);
|
|
539
544
|
}
|
|
540
|
-
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, mergeTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, thinkTimeBaseMs, thinkTimePerTokenMs, thinkTimeJitterMs, thinkTimeMaxMs, sessionStartMinMs, sessionStartJitterMs, stealth, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens, logFile, passthroughBetas, skipFields, systemPrompt, overageGuardEnabled, overageGuardBehavior, overageGuardCooldownMs, overageGuardNotifyOs });
|
|
545
|
+
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, mergeTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, thinkTimeBaseMs, thinkTimePerTokenMs, thinkTimeJitterMs, thinkTimeMaxMs, sessionStartMinMs, sessionStartJitterMs, stealth, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens, logFile, passthroughBetas, skipFields, systemPrompt, overageGuardEnabled, overageGuardBehavior, overageGuardCooldownMs, overageGuardNotifyOs, honorClientThinking });
|
|
541
546
|
}
|
|
542
547
|
/**
|
|
543
548
|
* Parse `--system-prompt=<verbatim|partial|aggressive|filepath>` (or the
|
|
@@ -1384,6 +1389,19 @@ async function help() {
|
|
|
1384
1389
|
that rejects the field despite the beta
|
|
1385
1390
|
header. Env: DARIO_SKIP_FIELDS.
|
|
1386
1391
|
|
|
1392
|
+
--honor-client-thinking Pass the client body's \`thinking\` field
|
|
1393
|
+
through to upstream instead of dario's
|
|
1394
|
+
default \`{type:"adaptive"}\`. Lets SDK
|
|
1395
|
+
clients explicitly enable extended
|
|
1396
|
+
thinking with their own budget (e.g.
|
|
1397
|
+
\`{type:"enabled", budget_tokens:8000}\`).
|
|
1398
|
+
When honored, the paired
|
|
1399
|
+
\`context_management.clear_thinking_*\`
|
|
1400
|
+
edit is suppressed (shape-specific).
|
|
1401
|
+
No effect on Haiku or when client omits
|
|
1402
|
+
\`thinking\`. Env:
|
|
1403
|
+
DARIO_HONOR_CLIENT_THINKING.
|
|
1404
|
+
|
|
1387
1405
|
--upstream-proxy=URL / --via=URL
|
|
1388
1406
|
Route all of dario's outbound fetch
|
|
1389
1407
|
calls (api.anthropic.com, OpenAI-compat
|
package/dist/proxy.d.ts
CHANGED
|
@@ -155,6 +155,25 @@ interface ProxyOptions {
|
|
|
155
155
|
* so Max billing pool routing is unchanged.
|
|
156
156
|
*/
|
|
157
157
|
skipFields?: string[];
|
|
158
|
+
/**
|
|
159
|
+
* When set, an inbound client body's `thinking` field (e.g.
|
|
160
|
+
* `{type:"enabled", budget_tokens:N}` or `{type:"adaptive"}`) is passed
|
|
161
|
+
* through to the upstream INSTEAD of dario's default CC-style
|
|
162
|
+
* `{type:"adaptive"}`. SDK clients hitting dario can therefore explicitly
|
|
163
|
+
* enable extended thinking with their own budget, rather than being
|
|
164
|
+
* locked to CC's default adaptive shape.
|
|
165
|
+
*
|
|
166
|
+
* Side effect: when honored, dario also suppresses its
|
|
167
|
+
* `context_management.clear_thinking_*` edit — that edit is tuned for
|
|
168
|
+
* `type:"adaptive"` and pairing it with `type:"enabled"` 400s upstream.
|
|
169
|
+
* The client takes responsibility for the request shape as a whole.
|
|
170
|
+
*
|
|
171
|
+
* No effect on Haiku (which skips thinking by construction) or when the
|
|
172
|
+
* client doesn't supply a `thinking` field. CC clients are unaffected.
|
|
173
|
+
*
|
|
174
|
+
* Env: DARIO_HONOR_CLIENT_THINKING=1.
|
|
175
|
+
*/
|
|
176
|
+
honorClientThinking?: boolean;
|
|
158
177
|
/**
|
|
159
178
|
* System-prompt mode for the Claude backend. Empirically validated as
|
|
160
179
|
* unfingerprinted by the billing classifier in docs/research/system-prompt-classifier-study.md.
|
package/dist/proxy.js
CHANGED
|
@@ -1391,6 +1391,7 @@ export async function startProxy(opts = {}) {
|
|
|
1391
1391
|
maxTokens: opts.maxTokens,
|
|
1392
1392
|
systemPrompt: opts.systemPrompt,
|
|
1393
1393
|
skipFields,
|
|
1394
|
+
honorClientThinking: opts.honorClientThinking ?? false,
|
|
1394
1395
|
});
|
|
1395
1396
|
detectedClientForLog = detectedClient;
|
|
1396
1397
|
preserveToolsEffective = Boolean(opts.preserveTools)
|
|
@@ -1945,12 +1946,19 @@ export async function startProxy(opts = {}) {
|
|
|
1945
1946
|
res.writeHead(upstream.status, responseHeaders);
|
|
1946
1947
|
if (isStream && upstream.body) {
|
|
1947
1948
|
// Analytics accumulators for streaming responses — filled by parsing
|
|
1948
|
-
// message_start / message_delta SSE events as
|
|
1949
|
+
// message_start / message_delta / content_block_delta SSE events as
|
|
1950
|
+
// they flow through. Token capture must run regardless of pool mode:
|
|
1951
|
+
// gating on `poolAccount` (non-null only in multi-account installs)
|
|
1952
|
+
// skipped the parser entirely on single-account setups, so the
|
|
1953
|
+
// analytics.record() call below persisted zeros for input/output
|
|
1954
|
+
// tokens. SDK streaming clients on single-account installs had their
|
|
1955
|
+
// token usage invisible in /analytics until this fix.
|
|
1949
1956
|
let streamInputTokens = 0;
|
|
1950
1957
|
let streamOutputTokens = 0;
|
|
1951
1958
|
let streamCacheReadTokens = 0;
|
|
1952
1959
|
let streamCacheCreateTokens = 0;
|
|
1953
|
-
|
|
1960
|
+
let streamThinkingChars = 0;
|
|
1961
|
+
const analyticsDecoder = analytics ? new TextDecoder() : null;
|
|
1954
1962
|
let analyticsBuffer = '';
|
|
1955
1963
|
// Stream SSE chunks through
|
|
1956
1964
|
const reader = upstream.body.getReader();
|
|
@@ -2003,6 +2011,17 @@ export async function startProxy(opts = {}) {
|
|
|
2003
2011
|
if (u?.output_tokens)
|
|
2004
2012
|
streamOutputTokens = u.output_tokens;
|
|
2005
2013
|
}
|
|
2014
|
+
else if (e.type === 'content_block_delta') {
|
|
2015
|
+
// Mirror the non-streaming parseUsage thinking-token
|
|
2016
|
+
// heuristic: ~4 characters per token across thinking_delta
|
|
2017
|
+
// events. Closer than 0, and the same formula the parser
|
|
2018
|
+
// applies for buffered responses, so streaming + non-
|
|
2019
|
+
// streaming numbers stay comparable.
|
|
2020
|
+
const d = e.delta;
|
|
2021
|
+
if (d?.type === 'thinking_delta' && typeof d.thinking === 'string') {
|
|
2022
|
+
streamThinkingChars += d.thinking.length;
|
|
2023
|
+
}
|
|
2024
|
+
}
|
|
2006
2025
|
}
|
|
2007
2026
|
catch { /* ignore malformed SSE events */ }
|
|
2008
2027
|
}
|
|
@@ -2082,7 +2101,7 @@ export async function startProxy(opts = {}) {
|
|
|
2082
2101
|
model: requestModel,
|
|
2083
2102
|
inputTokens: streamInputTokens, outputTokens: streamOutputTokens,
|
|
2084
2103
|
cacheReadTokens: streamCacheReadTokens, cacheCreateTokens: streamCacheCreateTokens,
|
|
2085
|
-
thinkingTokens:
|
|
2104
|
+
thinkingTokens: Math.round(streamThinkingChars / 4),
|
|
2086
2105
|
claim: rl.claim, util5h: rl.util5h, util7d: rl.util7d, overageUtil: rl.overageUtil,
|
|
2087
2106
|
latencyMs: Date.now() - startTime, status: upstream.status, isStream: true, isOpenAI,
|
|
2088
2107
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.3",
|
|
4
4
|
"description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|