@askalf/dario 3.31.20 → 3.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/doctor.js CHANGED
@@ -210,6 +210,38 @@ export async function runChecks(opts = {}) {
210
210
  catch (err) {
211
211
  checks.push({ status: 'fail', label: 'Template', detail: `load failed: ${err.message}` });
212
212
  }
213
+ // ---- Per-request overhead surfacing.
214
+ // The CC system prompt + tool definitions are injected into every
215
+ // non-passthrough request and dominate the input-token cost on small
216
+ // turns. Anthropic caches them after the first hit (cache_creation
217
+ // tokens on call 1, then cache_read on subsequent calls within the
218
+ // 5-min/1-hr TTL), but non-CC users routing heavy tooling get
219
+ // surprised by the first-request charge. Surface the size up front
220
+ // so they can plan.
221
+ //
222
+ // No token estimate — char counts and tool count are factual; the
223
+ // tokenizer ratio varies enough between prose and tool-schema JSON
224
+ // (compressible structural keys) that any single divisor is
225
+ // misleading. Operators who want the exact number can read it off
226
+ // their first request's `cache_creation_input_tokens` once the proxy
227
+ // is warm. `--usage` adds the live snapshot for those who want it.
228
+ try {
229
+ const promptChars = CC_TEMPLATE.system_prompt?.length ?? 0;
230
+ const toolCount = (CC_TEMPLATE.tools ?? []).length;
231
+ const toolChars = JSON.stringify(CC_TEMPLATE.tools ?? []).length;
232
+ if (promptChars > 0 || toolCount > 0) {
233
+ checks.push({
234
+ status: 'info',
235
+ label: 'Overhead',
236
+ detail: `${promptChars.toLocaleString()} chars system prompt + ${toolCount} tool defs ` +
237
+ `(${toolChars.toLocaleString()} chars JSON-serialized) injected per non-passthrough ` +
238
+ `request. Cached after first hit; read-cost only on subsequent calls within ` +
239
+ `the 5-min/1-hr TTL. Exact token count surfaces as cache_creation_input_tokens ` +
240
+ `on the first response (or run \`dario doctor --usage\`).`,
241
+ });
242
+ }
243
+ }
244
+ catch { /* don't let overhead reporting break the doctor */ }
213
245
  // ---- Template drift
214
246
  try {
215
247
  const drift = detectDrift(CC_TEMPLATE);
@@ -429,6 +461,43 @@ export async function runChecks(opts = {}) {
429
461
  (expired > 0 ? `, ${expired} expired` : '') +
430
462
  (aliases.length < 2 ? ' (pool activates at 2+)' : ''),
431
463
  });
464
+ // Next-account-in-rotation surfacing. The proxy's per-request
465
+ // selector picks by max headroom (with 7d_<family> per-model
466
+ // bucket considered when a request's model family is known);
467
+ // doctor doesn't know the next request's model so it reports
468
+ // the family-agnostic pick. That's still the right preview for
469
+ // operators wondering "if I send a request right now, which
470
+ // account gets it?" — it matches `pool.select()` with no family
471
+ // hint, the same call the proxy uses when no model is parsed
472
+ // yet (e.g. on misshapen requests). Bypassed when only one
473
+ // account is loaded since "rotation" doesn't apply.
474
+ if (aliases.length >= 2) {
475
+ try {
476
+ const { AccountPool } = await import('./pool.js');
477
+ const pool = new AccountPool();
478
+ for (const acc of loaded) {
479
+ pool.add(acc.alias, {
480
+ accessToken: acc.accessToken,
481
+ refreshToken: acc.refreshToken,
482
+ expiresAt: acc.expiresAt,
483
+ deviceId: acc.deviceId,
484
+ accountUuid: acc.accountUuid,
485
+ });
486
+ }
487
+ const next = pool.select();
488
+ const ps = pool.status();
489
+ checks.push({
490
+ status: 'info',
491
+ label: 'Pool routing',
492
+ detail: next
493
+ ? `next: ${next.alias} (max-headroom select; ${ps.healthy}/${ps.accounts} healthy)`
494
+ : `no eligible account — all rejected or near-expiry (${ps.exhausted}/${ps.accounts} exhausted)`,
495
+ });
496
+ }
497
+ catch (err) {
498
+ checks.push({ status: 'warn', label: 'Pool routing', detail: `check failed: ${err.message}` });
499
+ }
500
+ }
432
501
  }
433
502
  }
434
503
  catch (err) {
@@ -282,7 +282,7 @@ export declare function _resetInstalledVersionProbeForTest(): void;
282
282
  */
283
283
  export declare const SUPPORTED_CC_RANGE: {
284
284
  readonly min: "1.0.0";
285
- readonly maxTested: "2.1.120";
285
+ readonly maxTested: "2.1.121";
286
286
  };
287
287
  /**
288
288
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
@@ -777,7 +777,7 @@ export function _resetInstalledVersionProbeForTest() {
777
777
  */
778
778
  export const SUPPORTED_CC_RANGE = {
779
779
  min: '1.0.0',
780
- maxTested: '2.1.120',
780
+ maxTested: '2.1.121',
781
781
  };
782
782
  /**
783
783
  * Compare two dotted-numeric version strings. Returns negative if `a<b`,
@@ -22,6 +22,32 @@ import type { McpTool } from './protocol.js';
22
22
  * tests can substitute pure synthetic data to avoid touching network /
23
23
  * filesystem / OAuth state.
24
24
  */
25
+ /**
26
+ * Shape returned by the `usage` data source. Mirrors the public subset
27
+ * of /analytics — keeps the MCP tool decoupled from internal Analytics
28
+ * record fields. Single-account mode returns `{ mode: 'single-account' }`
29
+ * with no stats since Analytics only collects in pool mode.
30
+ */
31
+ export interface UsageSummary {
32
+ mode: 'pool' | 'single-account';
33
+ reachable: boolean;
34
+ port?: number;
35
+ detail?: string;
36
+ window?: {
37
+ minutes: number;
38
+ requests: number;
39
+ totalInputTokens: number;
40
+ totalOutputTokens: number;
41
+ avgLatencyMs: number;
42
+ errorRate: number;
43
+ subscriptionPercent: number;
44
+ estimatedCost: number;
45
+ };
46
+ perAccount?: Record<string, {
47
+ requests: number;
48
+ subscriptionPercent: number;
49
+ }>;
50
+ }
25
51
  export interface ToolDataSources {
26
52
  doctor: () => Promise<Array<{
27
53
  status: string;
@@ -58,6 +84,8 @@ export interface ToolDataSources {
58
84
  templateSource: string;
59
85
  templateSchema: number | null;
60
86
  }>;
87
+ /** Burn-rate / consumption summary; see UsageSummary for the shape. */
88
+ usage: () => Promise<UsageSummary>;
61
89
  darioVersion: () => string;
62
90
  }
63
91
  export declare function buildToolRegistry(data: ToolDataSources): McpTool[];
package/dist/mcp/tools.js CHANGED
@@ -131,6 +131,56 @@ export function buildToolRegistry(data) {
131
131
  return textResult(lines.join('\n'));
132
132
  },
133
133
  },
134
+ {
135
+ name: 'usage',
136
+ description: 'Burn-rate summary of the running dario proxy\'s traffic over the last 60 minutes: requests, token totals, subscription % vs. extra-usage, per-account rotation if pool mode is on. Read-only — fetches /analytics from the local proxy. Returns a compact text summary; pair with the `dario usage --json` CLI for the full /analytics payload.',
137
+ inputSchema: emptyObjectSchema,
138
+ handler: async () => {
139
+ const u = await data.usage();
140
+ if (!u.reachable) {
141
+ const lines = [];
142
+ lines.push(`Proxy not reachable on port ${u.port ?? 3456}.`);
143
+ if (u.detail)
144
+ lines.push(`Detail: ${u.detail}`);
145
+ lines.push('');
146
+ lines.push('`usage` summarizes traffic from a running proxy. Start `dario proxy`, then re-call.');
147
+ lines.push('For a one-off rate-limit snapshot, run `dario doctor --usage` (~1 subscription request).');
148
+ return textResult(lines.join('\n'), true);
149
+ }
150
+ if (u.mode === 'single-account') {
151
+ return textResult([
152
+ 'Mode: single-account',
153
+ '',
154
+ 'Analytics history is collected only in pool mode (2+ accounts in ~/.dario/accounts/).',
155
+ 'For a one-off rate-limit snapshot from Anthropic, run `dario doctor --usage`.',
156
+ ].join('\n'));
157
+ }
158
+ const lines = [];
159
+ const w = u.window;
160
+ lines.push('Mode: pool');
161
+ lines.push(`Window: last ${w?.minutes ?? 60} minutes`);
162
+ lines.push(`Requests: ${w?.requests ?? 0}`);
163
+ if (w && w.requests > 0) {
164
+ lines.push(`Input tokens: ${w.totalInputTokens.toLocaleString()}`);
165
+ lines.push(`Output tokens: ${w.totalOutputTokens.toLocaleString()}`);
166
+ lines.push(`Avg latency: ${w.avgLatencyMs} ms`);
167
+ if (w.errorRate > 0)
168
+ lines.push(`Error rate: ${(w.errorRate * 100).toFixed(1)}%`);
169
+ lines.push(`Subscription %: ${w.subscriptionPercent}%`);
170
+ if (w.estimatedCost > 0)
171
+ lines.push(`Est. cost: $${w.estimatedCost.toFixed(4)} (would-be API cost)`);
172
+ }
173
+ if (u.perAccount && Object.keys(u.perAccount).length > 0) {
174
+ lines.push('');
175
+ lines.push('Per-account:');
176
+ const aliasWidth = Math.max(...Object.keys(u.perAccount).map((a) => a.length));
177
+ for (const [alias, stats] of Object.entries(u.perAccount)) {
178
+ lines.push(` ${alias.padEnd(aliasWidth)} ${stats.requests} req${stats.requests === 1 ? '' : 's'} (${stats.subscriptionPercent}% subscription)`);
179
+ }
180
+ }
181
+ return textResult(lines.join('\n'));
182
+ },
183
+ },
134
184
  ];
135
185
  }
136
186
  /**
@@ -187,9 +237,63 @@ export async function buildDefaultToolRegistry() {
187
237
  templateSchema: tmpl._schemaVersion ?? null,
188
238
  };
189
239
  },
240
+ usage: async () => fetchUsage(),
190
241
  darioVersion: () => pkgVersion,
191
242
  });
192
243
  }
244
+ /**
245
+ * Fetch the local proxy's `/analytics` endpoint and shape it into the
246
+ * MCP-tool surface. Port resolution mirrors `dario usage`:
247
+ * DARIO_USAGE_PORT, then DARIO_PORT (proxy's own default-port env), then
248
+ * 3456. 3-second timeout — we don't block the MCP client on a slow
249
+ * proxy.
250
+ *
251
+ * Failure modes are returned as `{ reachable: false, detail }` rather
252
+ * than thrown, so the tool handler can present a helpful message
253
+ * instead of a generic protocol error.
254
+ */
255
+ async function fetchUsage() {
256
+ const port = process.env.DARIO_USAGE_PORT
257
+ ? parseInt(process.env.DARIO_USAGE_PORT, 10)
258
+ : process.env.DARIO_PORT
259
+ ? parseInt(process.env.DARIO_PORT, 10)
260
+ : 3456;
261
+ try {
262
+ const res = await fetch(`http://127.0.0.1:${port}/analytics`, { signal: AbortSignal.timeout(3000) });
263
+ if (!res.ok) {
264
+ return { mode: 'pool', reachable: false, port, detail: `HTTP ${res.status}` };
265
+ }
266
+ const body = await res.json();
267
+ if (body.mode === 'single-account') {
268
+ return { mode: 'single-account', reachable: true, port };
269
+ }
270
+ const w = body.window;
271
+ return {
272
+ mode: 'pool',
273
+ reachable: true,
274
+ port,
275
+ window: {
276
+ minutes: w?.minutes ?? 60,
277
+ requests: w?.requests ?? 0,
278
+ totalInputTokens: w?.totalInputTokens ?? 0,
279
+ totalOutputTokens: w?.totalOutputTokens ?? 0,
280
+ avgLatencyMs: w?.avgLatencyMs ?? 0,
281
+ errorRate: w?.errorRate ?? 0,
282
+ subscriptionPercent: w?.subscriptionPercent ?? 0,
283
+ estimatedCost: w?.estimatedCost ?? 0,
284
+ },
285
+ perAccount: body.perAccount,
286
+ };
287
+ }
288
+ catch (err) {
289
+ return {
290
+ mode: 'pool',
291
+ reachable: false,
292
+ port,
293
+ detail: err instanceof Error ? err.message : String(err),
294
+ };
295
+ }
296
+ }
193
297
  async function readDarioVersion() {
194
298
  try {
195
299
  const { readFileSync } = await import('node:fs');
package/dist/proxy.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import { type IncomingMessage } from 'node:http';
2
+ import { type WriteStream } from 'node:fs';
2
3
  import { type EffortValue } from './cc-template.js';
3
4
  export declare function parseProviderPrefix(model: string): {
4
5
  provider: 'openai' | 'claude';
@@ -36,6 +37,15 @@ interface ProxyOptions {
36
37
  passthrough?: boolean;
37
38
  preserveTools?: boolean;
38
39
  hybridTools?: boolean;
40
+ /**
41
+ * Merge mode: send CC's canonical tools first, append client tools after
42
+ * (deduped by name). Mutually exclusive with preserveTools and hybridTools
43
+ * — proxy startup enforces the mutex. Experimental: Anthropic's billing
44
+ * classifier may treat the appended tail as a divergence from CC's wire
45
+ * shape and flip routing. Verify locally with `--verbose` and watch the
46
+ * billing-bucket line on the first 1-2 requests before relying on it.
47
+ */
48
+ mergeTools?: boolean;
39
49
  noAutoDetect?: boolean;
40
50
  strictTls?: boolean;
41
51
  pacingMinMs?: number;
@@ -90,7 +100,64 @@ interface ProxyOptions {
90
100
  * their output capacity. dario#88 (Hermes compat).
91
101
  */
92
102
  maxTokens?: number | 'client';
103
+ /**
104
+ * Append-only request log file. One JSON line per completed request,
105
+ * with secrets scrubbed via redactSecrets. Useful for backgrounded
106
+ * proxies where stdout is unobserved — `verbose` only helps when you
107
+ * can watch the foreground. Off by default; opt in with `--log-file`
108
+ * or `DARIO_LOG_FILE`. Write errors are swallowed (never crash the
109
+ * request path on a log mishap). dario#XYZ.
110
+ */
111
+ logFile?: string;
112
+ /**
113
+ * Beta flags to ALWAYS forward upstream regardless of CC's captured
114
+ * set or the client's anthropic-beta header. Operator declaration
115
+ * that "I know I want these survived through dario's substitution."
116
+ * Bypasses `filterBillableBetas`; still respects the per-account
117
+ * rejected-beta cache (so a flag the upstream 400's gets dropped on
118
+ * the retry rather than re-sent forever). dario passthrough-betas.
119
+ *
120
+ * Sourced from `--passthrough-betas=name1,name2` or
121
+ * DARIO_PASSTHROUGH_BETAS. Empty / undefined leaves current behavior
122
+ * unchanged. Surfaced at startup so operators can see exactly which
123
+ * flags are pinned-on; surfaced again per request when one of the
124
+ * pinned flags has been rejected and is therefore being dropped.
125
+ */
126
+ passthroughBetas?: string[];
93
127
  }
128
+ /**
129
+ * One JSON-ND record per completed request. Field set kept narrow to
130
+ * stay grep-friendly and avoid leaking content. No request bodies, no
131
+ * tool args, no headers — those still go through `--verbose-bodies` /
132
+ * DARIO_LOG_BODIES (which has its own opt-in and is foreground-only).
133
+ */
134
+ export interface ProxyLogEntry {
135
+ ts: string;
136
+ req: number;
137
+ method: string;
138
+ path: string;
139
+ model?: string;
140
+ status?: number;
141
+ latency_ms?: number;
142
+ in_tokens?: number;
143
+ out_tokens?: number;
144
+ cache_read?: number;
145
+ cache_create?: number;
146
+ claim?: string;
147
+ bucket?: string;
148
+ account?: string;
149
+ client?: string;
150
+ preserve_tools?: boolean;
151
+ stream?: boolean;
152
+ reject?: string;
153
+ error?: string;
154
+ }
155
+ /**
156
+ * Append a JSON-ND line to the proxy log file. No-op when stream is
157
+ * null (logFile not configured). Errors are swallowed — log writes
158
+ * must never break the request path.
159
+ */
160
+ export declare function writeLogLine(stream: WriteStream | null, entry: ProxyLogEntry): void;
94
161
  export declare function sanitizeError(err: unknown): string;
95
162
  /**
96
163
  * API-key auth via DARIO_API_KEY (x-api-key or Authorization: Bearer).