@askalf/dario 4.8.0 → 4.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -509,12 +509,8 @@ Ordered by relevance to a dario reader — projects that route through dario fir
509
509
 
510
510
  | Project | What it does |
511
511
  |---|---|
512
- | [arnie](https://github.com/askalf/arnie) | Portable IT troubleshooting agent — networking, AD, package managers, log triage. Routes through dario for subscription billing. |
513
512
  | [hands](https://github.com/askalf/hands) | Cross-platform computer-use agent — your LLM on your mouse, keyboard, and screen. Windows + macOS + Linux. Routes through dario or any Anthropic-compat. |
514
513
  | [deepdive](https://github.com/askalf/deepdive) | Local research agent. One command, cited answer. Plan → search → headless fetch → extract → synthesize. Every LLM call through your own router. |
515
- | [claude-sync](https://github.com/askalf/claude-sync) | Sync Claude Code sessions across machines. Pack a CC session into a portable `.ccsync` file, ship it via Dropbox / iCloud / USB, unpack on the other side. |
516
514
  | [browser-bridge](https://github.com/askalf/browser-bridge) | Stealth headless Chromium in a container, CDP on 9222. Connect from Playwright, Puppeteer, MCP browser tools, any agent that wants a remote browser. |
517
- | [install-kit](https://github.com/askalf/install-kit) | curl-pipe-bash template for self-hosted Docker apps — banner, prereq probes, `.env` scaffolding with crypto-rand secrets, healthcheck wait loop. |
518
515
  | [pgflex](https://github.com/askalf/pgflex) | One Postgres API, two modes — real PostgreSQL for production, PGlite (in-process WASM) for standalone / dev. Same SQL, drop the server when you don't need it. |
519
516
  | [redisflex](https://github.com/askalf/redisflex) | One Redis API, two modes — ioredis for production, in-process Map+EventEmitter for dev. Includes a BullMQ-shaped in-memory queue. |
520
- | [git-providers](https://github.com/askalf/git-providers) | One `GitProvider` interface for GitHub + GitLab + Bitbucket Cloud, plus a 44-entry api-key-provider taxonomy (cloud / CI / monitoring / analytics / ...). |
@@ -318,6 +318,7 @@ export declare function buildCCRequest(clientBody: Record<string, unknown>, bill
318
318
  maxTokens?: number | 'client';
319
319
  systemPrompt?: string;
320
320
  skipFields?: ReadonlySet<string>;
321
+ honorClientThinking?: boolean;
321
322
  }): {
322
323
  body: Record<string, unknown>;
323
324
  toolMap: Map<string, ToolMapping>;
@@ -1306,7 +1306,30 @@ export function buildCCRequest(clientBody, billingTag, cacheControl, identity, o
1306
1306
  // absent). See dario#87.
1307
1307
  if (!isHaiku) {
1308
1308
  const skip = opts.skipFields;
1309
- if (supportsAdaptiveThinking(model)) {
1309
+ // Client-supplied thinking shape takes precedence when honorClientThinking
1310
+ // is enabled. SDK clients (vs CC) sometimes need explicit control over
1311
+ // budget_tokens or the type='enabled' vs type='adaptive' choice — e.g.
1312
+ // an agent that wants 8k thinking tokens for hard problems, or a model
1313
+ // that supports thinking but not the 4.6-era adaptive variant. dario's
1314
+ // default builds the CC-style adaptive shape, which is fine for CC
1315
+ // clients but doesn't expose the budget knob to others.
1316
+ //
1317
+ // When honored, we also suppress dario's clear_thinking_* context-edit
1318
+ // pair — that edit is tuned for type='adaptive' and the client's shape
1319
+ // takes responsibility for the request as a whole. Effort still ships.
1320
+ const clientThinking = (clientBody.thinking ?? null);
1321
+ const honoredClientThinking = Boolean(opts.honorClientThinking
1322
+ && clientThinking
1323
+ && typeof clientThinking === 'object'
1324
+ && typeof clientThinking['type'] === 'string');
1325
+ if (honoredClientThinking) {
1326
+ if (!skip || !skip.has('thinking')) {
1327
+ ccRequest.thinking = clientThinking;
1328
+ }
1329
+ // Intentionally do NOT inject context_management.clear_thinking_*
1330
+ // when honoring client thinking — the pairing is shape-specific.
1331
+ }
1332
+ else if (supportsAdaptiveThinking(model)) {
1310
1333
  if (!skip || !skip.has('thinking')) {
1311
1334
  ccRequest.thinking = { type: 'adaptive' };
1312
1335
  }
package/dist/cli.js CHANGED
@@ -518,6 +518,11 @@ async function proxy() {
518
518
  // Falls back to DARIO_SKIP_FIELDS env var. See ProxyOptions.skipFields
519
519
  // for rationale.
520
520
  const skipFields = parseSkipFieldsFlag(args, process.env['DARIO_SKIP_FIELDS']);
521
+ // --honor-client-thinking — pass through the client body's `thinking`
522
+ // field instead of dario's default CC-style `{type:'adaptive'}`. See
523
+ // ProxyOptions.honorClientThinking for rationale.
524
+ const honorClientThinking = args.includes('--honor-client-thinking')
525
+ || ['1', 'true', 'yes', 'on'].includes((process.env['DARIO_HONOR_CLIENT_THINKING'] ?? '').toLowerCase());
521
526
  // Non-loopback bind without DARIO_API_KEY turns dario into an open
522
527
  // OAuth-subscription relay for anyone on the reachable network. Refuse
523
528
  // to start rather than rely on the operator to read the startup banner.
@@ -537,7 +542,7 @@ async function proxy() {
537
542
  console.error(`[dario] Override (not recommended): pass --unsafe-no-auth if you have out-of-band network controls and accept the risk.`);
538
543
  process.exit(1);
539
544
  }
540
- await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, mergeTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, thinkTimeBaseMs, thinkTimePerTokenMs, thinkTimeJitterMs, thinkTimeMaxMs, sessionStartMinMs, sessionStartJitterMs, stealth, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens, logFile, passthroughBetas, skipFields, systemPrompt, overageGuardEnabled, overageGuardBehavior, overageGuardCooldownMs, overageGuardNotifyOs });
545
+ await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, mergeTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, thinkTimeBaseMs, thinkTimePerTokenMs, thinkTimeJitterMs, thinkTimeMaxMs, sessionStartMinMs, sessionStartJitterMs, stealth, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens, logFile, passthroughBetas, skipFields, systemPrompt, overageGuardEnabled, overageGuardBehavior, overageGuardCooldownMs, overageGuardNotifyOs, honorClientThinking });
541
546
  }
542
547
  /**
543
548
  * Parse `--system-prompt=<verbatim|partial|aggressive|filepath>` (or the
@@ -1384,6 +1389,19 @@ async function help() {
1384
1389
  that rejects the field despite the beta
1385
1390
  header. Env: DARIO_SKIP_FIELDS.
1386
1391
 
1392
+ --honor-client-thinking Pass the client body's \`thinking\` field
1393
+ through to upstream instead of dario's
1394
+ default \`{type:"adaptive"}\`. Lets SDK
1395
+ clients explicitly enable extended
1396
+ thinking with their own budget (e.g.
1397
+ \`{type:"enabled", budget_tokens:8000}\`).
1398
+ When honored, the paired
1399
+ \`context_management.clear_thinking_*\`
1400
+ edit is suppressed (shape-specific).
1401
+ No effect on Haiku or when client omits
1402
+ \`thinking\`. Env:
1403
+ DARIO_HONOR_CLIENT_THINKING.
1404
+
1387
1405
  --upstream-proxy=URL / --via=URL
1388
1406
  Route all of dario's outbound fetch
1389
1407
  calls (api.anthropic.com, OpenAI-compat
@@ -282,7 +282,7 @@ export declare function _resetInstalledVersionProbeForTest(): void;
282
282
  */
283
283
  export declare const SUPPORTED_CC_RANGE: {
284
284
  readonly min: "1.0.0";
285
- readonly maxTested: "2.1.143";
285
+ readonly maxTested: "2.1.144";
286
286
  };
287
287
  /**
288
288
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
@@ -777,7 +777,7 @@ export function _resetInstalledVersionProbeForTest() {
777
777
  */
778
778
  export const SUPPORTED_CC_RANGE = {
779
779
  min: '1.0.0',
780
- maxTested: '2.1.143',
780
+ maxTested: '2.1.144',
781
781
  };
782
782
  /**
783
783
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
package/dist/proxy.d.ts CHANGED
@@ -155,6 +155,25 @@ interface ProxyOptions {
155
155
  * so Max billing pool routing is unchanged.
156
156
  */
157
157
  skipFields?: string[];
158
+ /**
159
+ * When set, an inbound client body's `thinking` field (e.g.
160
+ * `{type:"enabled", budget_tokens:N}` or `{type:"adaptive"}`) is passed
161
+ * through to the upstream INSTEAD of dario's default CC-style
162
+ * `{type:"adaptive"}`. SDK clients hitting dario can therefore explicitly
163
+ * enable extended thinking with their own budget, rather than being
164
+ * locked to CC's default adaptive shape.
165
+ *
166
+ * Side effect: when honored, dario also suppresses its
167
+ * `context_management.clear_thinking_*` edit — that edit is tuned for
168
+ * `type:"adaptive"` and pairing it with `type:"enabled"` 400s upstream.
169
+ * The client takes responsibility for the request shape as a whole.
170
+ *
171
+ * No effect on Haiku (which skips thinking by construction) or when the
172
+ * client doesn't supply a `thinking` field. CC clients are unaffected.
173
+ *
174
+ * Env: DARIO_HONOR_CLIENT_THINKING=1.
175
+ */
176
+ honorClientThinking?: boolean;
158
177
  /**
159
178
  * System-prompt mode for the Claude backend. Empirically validated as
160
179
  * unfingerprinted by the billing classifier in docs/research/system-prompt-classifier-study.md.
package/dist/proxy.js CHANGED
@@ -1391,6 +1391,7 @@ export async function startProxy(opts = {}) {
1391
1391
  maxTokens: opts.maxTokens,
1392
1392
  systemPrompt: opts.systemPrompt,
1393
1393
  skipFields,
1394
+ honorClientThinking: opts.honorClientThinking ?? false,
1394
1395
  });
1395
1396
  detectedClientForLog = detectedClient;
1396
1397
  preserveToolsEffective = Boolean(opts.preserveTools)
@@ -1945,12 +1946,19 @@ export async function startProxy(opts = {}) {
1945
1946
  res.writeHead(upstream.status, responseHeaders);
1946
1947
  if (isStream && upstream.body) {
1947
1948
  // Analytics accumulators for streaming responses — filled by parsing
1948
- // message_start / message_delta SSE events as they flow through.
1949
+ // message_start / message_delta / content_block_delta SSE events as
1950
+ // they flow through. Token capture must run regardless of pool mode:
1951
+ // gating on `poolAccount` (non-null only in multi-account installs)
1952
+ // skipped the parser entirely on single-account setups, so the
1953
+ // analytics.record() call below persisted zeros for input/output
1954
+ // tokens. SDK streaming clients on single-account installs had their
1955
+ // token usage invisible in /analytics until this fix.
1949
1956
  let streamInputTokens = 0;
1950
1957
  let streamOutputTokens = 0;
1951
1958
  let streamCacheReadTokens = 0;
1952
1959
  let streamCacheCreateTokens = 0;
1953
- const analyticsDecoder = (analytics && poolAccount) ? new TextDecoder() : null;
1960
+ let streamThinkingChars = 0;
1961
+ const analyticsDecoder = analytics ? new TextDecoder() : null;
1954
1962
  let analyticsBuffer = '';
1955
1963
  // Stream SSE chunks through
1956
1964
  const reader = upstream.body.getReader();
@@ -2003,6 +2011,17 @@ export async function startProxy(opts = {}) {
2003
2011
  if (u?.output_tokens)
2004
2012
  streamOutputTokens = u.output_tokens;
2005
2013
  }
2014
+ else if (e.type === 'content_block_delta') {
2015
+ // Mirror the non-streaming parseUsage thinking-token
2016
+ // heuristic: ~4 characters per token across thinking_delta
2017
+ // events. Closer than 0, and the same formula the parser
2018
+ // applies for buffered responses, so streaming + non-
2019
+ // streaming numbers stay comparable.
2020
+ const d = e.delta;
2021
+ if (d?.type === 'thinking_delta' && typeof d.thinking === 'string') {
2022
+ streamThinkingChars += d.thinking.length;
2023
+ }
2024
+ }
2006
2025
  }
2007
2026
  catch { /* ignore malformed SSE events */ }
2008
2027
  }
@@ -2082,7 +2101,7 @@ export async function startProxy(opts = {}) {
2082
2101
  model: requestModel,
2083
2102
  inputTokens: streamInputTokens, outputTokens: streamOutputTokens,
2084
2103
  cacheReadTokens: streamCacheReadTokens, cacheCreateTokens: streamCacheCreateTokens,
2085
- thinkingTokens: 0,
2104
+ thinkingTokens: Math.round(streamThinkingChars / 4),
2086
2105
  claim: rl.claim, util5h: rl.util5h, util7d: rl.util7d, overageUtil: rl.overageUtil,
2087
2106
  latencyMs: Date.now() - startTime, status: upstream.status, isStream: true, isOpenAI,
2088
2107
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "4.8.0",
3
+ "version": "4.8.3",
4
4
  "description": "Use your Claude Pro/Max subscription in any tool — Cursor, Cline, Aider, the Agent SDK, your scripts — at subscription pricing, not per-token API bills. One local Anthropic + OpenAI-compatible endpoint.",
5
5
  "type": "module",
6
6
  "bin": {