@askalf/dario 3.10.3 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,26 @@ export interface RequestRecord {
23
23
  isStream: boolean;
24
24
  isOpenAI: boolean;
25
25
  }
26
+ /**
27
+ * The four billing buckets a request can land in, derived from the
28
+ * `anthropic-ratelimit-unified-representative-claim` response header.
29
+ *
30
+ * - `subscription` — request billed against the user's 5h subscription window (Max/Pro)
31
+ * - `subscription_fallback` — server-side fallback subscription bucket (rare, still covered)
32
+ * - `extra_usage` — overage / pay-as-you-go, paid on top of subscription
33
+ * - `api` — pure API key billing, no subscription involved
34
+ * - `unknown` — header absent or unparseable (non-200 responses, stream aborts)
35
+ *
36
+ * Exposed in `/analytics` summaries and in verbose per-request logs so
37
+ * users can see at a glance which bucket their traffic is actually hitting.
38
+ * See #34 for background.
39
+ */
40
+ export type BillingBucket = 'subscription' | 'subscription_fallback' | 'extra_usage' | 'api' | 'unknown';
41
+ /**
42
+ * Map the raw `representative-claim` header value to a human-friendly
43
+ * billing bucket. Pure function; no state; safe to call from any context.
44
+ */
45
+ export declare function billingBucketFromClaim(claim: string | null | undefined): BillingBucket;
26
46
  export declare class Analytics {
27
47
  private records;
28
48
  private maxRecords;
@@ -60,27 +80,30 @@ interface PerModelStat {
60
80
  avgThinkingTokens: number;
61
81
  estimatedCost: number;
62
82
  }
83
+ interface WindowStats {
84
+ totalInputTokens: number;
85
+ totalOutputTokens: number;
86
+ totalThinkingTokens: number;
87
+ estimatedCost: number;
88
+ avgLatencyMs: number;
89
+ errorRate: number;
90
+ claimBreakdown: Record<string, number>;
91
+ /** Count of requests in each derived billing bucket. See #34. */
92
+ billingBucketBreakdown: Record<BillingBucket, number>;
93
+ /**
94
+ * Percentage of *classified* requests (non-unknown) that hit a
95
+ * subscription bucket. The headline number for "is dario routing me
96
+ * through my subscription?" — should be 100% for a clean setup. See #34.
97
+ */
98
+ subscriptionPercent: number;
99
+ }
63
100
  export interface AnalyticsSummary {
64
- window: {
101
+ window: WindowStats & {
65
102
  minutes: number;
66
103
  requests: number;
67
- totalInputTokens: number;
68
- totalOutputTokens: number;
69
- totalThinkingTokens: number;
70
- estimatedCost: number;
71
- avgLatencyMs: number;
72
- errorRate: number;
73
- claimBreakdown: Record<string, number>;
74
104
  };
75
- allTime: {
105
+ allTime: WindowStats & {
76
106
  requests: number;
77
- totalInputTokens: number;
78
- totalOutputTokens: number;
79
- totalThinkingTokens: number;
80
- estimatedCost: number;
81
- avgLatencyMs: number;
82
- errorRate: number;
83
- claimBreakdown: Record<string, number>;
84
107
  };
85
108
  perAccount: Record<string, PerAccountStat>;
86
109
  perModel: Record<string, PerModelStat>;
package/dist/analytics.js CHANGED
@@ -5,6 +5,24 @@
5
5
  * In-memory rolling window; exposed via the /analytics endpoint when
6
6
  * pool mode is active.
7
7
  */
8
+ /**
9
+ * Map the raw `representative-claim` header value to a human-friendly
10
+ * billing bucket. Pure function; no state; safe to call from any context.
11
+ */
12
+ export function billingBucketFromClaim(claim) {
13
+ switch (claim) {
14
+ case 'five_hour':
15
+ return 'subscription';
16
+ case 'five_hour_fallback':
17
+ return 'subscription_fallback';
18
+ case 'overage':
19
+ return 'extra_usage';
20
+ case 'api':
21
+ return 'api';
22
+ default:
23
+ return 'unknown';
24
+ }
25
+ }
8
26
  // Anthropic pricing (per 1M tokens, USD). Not authoritative — used for
9
27
  // rough burn-rate display in the /analytics summary.
10
28
  const PRICING = {
@@ -74,6 +92,14 @@ export class Analytics {
74
92
  totalInputTokens: 0, totalOutputTokens: 0, totalThinkingTokens: 0,
75
93
  estimatedCost: 0, avgLatencyMs: 0, errorRate: 0,
76
94
  claimBreakdown: {},
95
+ billingBucketBreakdown: {
96
+ subscription: 0,
97
+ subscription_fallback: 0,
98
+ extra_usage: 0,
99
+ api: 0,
100
+ unknown: 0,
101
+ },
102
+ subscriptionPercent: 0,
77
103
  };
78
104
  }
79
105
  const totalInput = records.reduce((s, r) => s + r.inputTokens, 0);
@@ -83,9 +109,22 @@ export class Analytics {
83
109
  const avgLatency = records.reduce((s, r) => s + r.latencyMs, 0) / records.length;
84
110
  const errors = records.filter(r => r.status >= 400).length;
85
111
  const claims = {};
112
+ const buckets = {
113
+ subscription: 0,
114
+ subscription_fallback: 0,
115
+ extra_usage: 0,
116
+ api: 0,
117
+ unknown: 0,
118
+ };
86
119
  for (const r of records) {
87
120
  claims[r.claim] = (claims[r.claim] ?? 0) + 1;
121
+ buckets[billingBucketFromClaim(r.claim)]++;
88
122
  }
123
+ const subscriptionHits = buckets.subscription + buckets.subscription_fallback;
124
+ const billedRequests = records.length - buckets.unknown;
125
+ const subscriptionPct = billedRequests > 0
126
+ ? Math.round((subscriptionHits / billedRequests) * 10000) / 100
127
+ : 0;
89
128
  return {
90
129
  totalInputTokens: totalInput,
91
130
  totalOutputTokens: totalOutput,
@@ -94,6 +133,8 @@ export class Analytics {
94
133
  avgLatencyMs: Math.round(avgLatency),
95
134
  errorRate: Math.round((errors / records.length) * 10000) / 10000,
96
135
  claimBreakdown: claims,
136
+ billingBucketBreakdown: buckets,
137
+ subscriptionPercent: subscriptionPct,
97
138
  };
98
139
  }
99
140
  perAccountStats(records) {
@@ -2,8 +2,10 @@
2
2
  * Claude Code request template.
3
3
  *
4
4
  * Tool definitions, system prompt, and request structure are loaded from
5
- * cc-template-data.json and sent verbatim this gives byte-level fidelity
6
- * with the shape of a real Claude Code request.
5
+ * the live fingerprint cache (captured from the user's own CC install at
6
+ * dario startup) or from the bundled cc-template-data.json snapshot. The
7
+ * live cache self-heals when Anthropic ships a new CC version — no user
8
+ * action required. See src/live-fingerprint.ts for the capture pipeline.
7
9
  */
8
10
  /** CC's exact tool definitions — loaded from the template JSON. */
9
11
  export declare const CC_TOOL_DEFINITIONS: {
@@ -2,15 +2,14 @@
2
2
  * Claude Code request template.
3
3
  *
4
4
  * Tool definitions, system prompt, and request structure are loaded from
5
- * cc-template-data.json and sent verbatim this gives byte-level fidelity
6
- * with the shape of a real Claude Code request.
5
+ * the live fingerprint cache (captured from the user's own CC install at
6
+ * dario startup) or from the bundled cc-template-data.json snapshot. The
7
+ * live cache self-heals when Anthropic ships a new CC version — no user
8
+ * action required. See src/live-fingerprint.ts for the capture pipeline.
7
9
  */
8
- import { readFileSync } from 'node:fs';
9
- import { join, dirname } from 'node:path';
10
- import { fileURLToPath } from 'node:url';
11
- const __dirname = dirname(fileURLToPath(import.meta.url));
12
- // Load template data at module init — fail fast if missing
13
- const TEMPLATE = JSON.parse(readFileSync(join(__dirname, 'cc-template-data.json'), 'utf-8'));
10
+ import { loadTemplate } from './live-fingerprint.js';
11
+ // Load template at module init — prefer live cache, fall back to bundled.
12
+ const TEMPLATE = loadTemplate({ silent: true });
14
13
  /** CC's exact tool definitions — loaded from the template JSON. */
15
14
  export const CC_TOOL_DEFINITIONS = TEMPLATE.tools;
16
15
  /** CC's static system prompt (~25KB). */
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Live fingerprint extraction.
3
+ *
4
+ * At dario startup, spawn the user's actual `claude` binary against a
5
+ * loopback MITM endpoint, capture the outbound /v1/messages request, and
6
+ * use the captured system prompt / tools / agent identity as the template
7
+ * replay source — instead of shipping a stale snapshot in
8
+ * `cc-template-data.json`.
9
+ *
10
+ * The bundled snapshot remains as a fallback for users without CC installed
11
+ * or when live capture fails. Template replay auto-heals on CC updates
12
+ * without any user action.
13
+ *
14
+ * Security: the MITM endpoint only accepts connections from 127.0.0.1 and
15
+ * only runs long enough to capture a single request. CC's OAuth token
16
+ * never leaves the machine — we send CC to a loopback URL that CC itself
17
+ * trusts because we set ANTHROPIC_BASE_URL in the child's environment.
18
+ */
19
+ export interface TemplateData {
20
+ _version: string;
21
+ _captured: string;
22
+ _source?: 'bundled' | 'live';
23
+ agent_identity: string;
24
+ system_prompt: string;
25
+ tools: Array<{
26
+ name: string;
27
+ description: string;
28
+ input_schema: Record<string, unknown>;
29
+ }>;
30
+ tool_names: string[];
31
+ }
32
+ /**
33
+ * Load the template synchronously. Prefers the live cache (fresh capture
34
+ * from the user's own CC install) and falls back to the bundled snapshot.
35
+ *
36
+ * This is intentionally sync and fast — it runs at module init on every
37
+ * dario request handler. The actual capture is async and runs in the
38
+ * background via refreshLiveFingerprintAsync(); its results are written
39
+ * to the cache file and picked up on the next dario startup.
40
+ */
41
+ export declare function loadTemplate(_options?: {
42
+ silent?: boolean;
43
+ }): TemplateData;
44
+ /**
45
+ * Kick off a background live fingerprint capture. Safe to call on every
46
+ * dario proxy startup — no-ops if CC isn't installed, if the cache is
47
+ * already fresh, or if another refresh is in flight. Never throws.
48
+ *
49
+ * Result is written to ~/.dario/cc-template.live.json and picked up on
50
+ * the next dario startup (cc-template.ts loads the cache synchronously
51
+ * at module init).
52
+ */
53
+ export declare function refreshLiveFingerprintAsync(options?: {
54
+ force?: boolean;
55
+ silent?: boolean;
56
+ timeoutMs?: number;
57
+ }): Promise<TemplateData | null>;
58
+ interface CapturedRequest {
59
+ method: string;
60
+ path: string;
61
+ headers: Record<string, string>;
62
+ body: Record<string, unknown>;
63
+ }
64
+ /**
65
+ * Run a loopback MITM server on a random port, spawn CC with
66
+ * ANTHROPIC_BASE_URL pointed at it, wait for one request, respond with a
67
+ * minimal valid SSE stream, and return the captured request.
68
+ *
69
+ * Returns null on timeout or spawn failure. Does not throw.
70
+ */
71
+ export declare function captureLiveTemplateAsync(timeoutMs?: number): Promise<TemplateData | null>;
72
+ /**
73
+ * Given a captured /v1/messages request body, pull out the fields that
74
+ * matter for template replay: agent identity, system prompt, tool list,
75
+ * and CC version (from the billing header or user-agent).
76
+ */
77
+ export declare function extractTemplate(captured: CapturedRequest): TemplateData | null;
78
+ /**
79
+ * Test hook: given a captured request object (from a mocked server or a
80
+ * synthetic fixture), run it through the same extraction path. Exposed so
81
+ * test/live-fingerprint.mjs doesn't need to spawn a real process.
82
+ */
83
+ export declare function _extractTemplateForTest(captured: CapturedRequest): TemplateData | null;
84
+ export {};
@@ -0,0 +1,369 @@
1
+ /**
2
+ * Live fingerprint extraction.
3
+ *
4
+ * At dario startup, spawn the user's actual `claude` binary against a
5
+ * loopback MITM endpoint, capture the outbound /v1/messages request, and
6
+ * use the captured system prompt / tools / agent identity as the template
7
+ * replay source — instead of shipping a stale snapshot in
8
+ * `cc-template-data.json`.
9
+ *
10
+ * The bundled snapshot remains as a fallback for users without CC installed
11
+ * or when live capture fails. Template replay auto-heals on CC updates
12
+ * without any user action.
13
+ *
14
+ * Security: the MITM endpoint only accepts connections from 127.0.0.1 and
15
+ * only runs long enough to capture a single request. CC's OAuth token
16
+ * never leaves the machine — we send CC to a loopback URL that CC itself
17
+ * trusts because we set ANTHROPIC_BASE_URL in the child's environment.
18
+ */
19
+ import { spawn } from 'node:child_process';
20
+ import { createServer } from 'node:http';
21
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
22
+ import { homedir } from 'node:os';
23
+ import { join, dirname } from 'node:path';
24
+ import { fileURLToPath } from 'node:url';
25
+ const __dirname = dirname(fileURLToPath(import.meta.url));
26
+ const LIVE_CACHE = join(homedir(), '.dario', 'cc-template.live.json');
27
+ const LIVE_TTL_MS = 24 * 60 * 60 * 1000; // re-extract once a day
28
+ /**
29
+ * Load the template synchronously. Prefers the live cache (fresh capture
30
+ * from the user's own CC install) and falls back to the bundled snapshot.
31
+ *
32
+ * This is intentionally sync and fast — it runs at module init on every
33
+ * dario request handler. The actual capture is async and runs in the
34
+ * background via refreshLiveFingerprintAsync(); its results are written
35
+ * to the cache file and picked up on the next dario startup.
36
+ */
37
+ export function loadTemplate(_options) {
38
+ const cached = readLiveCache();
39
+ if (cached) {
40
+ const age = Date.now() - new Date(cached._captured).getTime();
41
+ if (age < LIVE_TTL_MS) {
42
+ return cached;
43
+ }
44
+ // Stale cache — still better than bundled if bundled is older.
45
+ // We return the stale live cache and let the background refresh
46
+ // update it for next startup.
47
+ return cached;
48
+ }
49
+ return loadBundledTemplate();
50
+ }
51
+ /**
52
+ * Kick off a background live fingerprint capture. Safe to call on every
53
+ * dario proxy startup — no-ops if CC isn't installed, if the cache is
54
+ * already fresh, or if another refresh is in flight. Never throws.
55
+ *
56
+ * Result is written to ~/.dario/cc-template.live.json and picked up on
57
+ * the next dario startup (cc-template.ts loads the cache synchronously
58
+ * at module init).
59
+ */
60
+ export async function refreshLiveFingerprintAsync(options) {
61
+ const silent = options?.silent ?? false;
62
+ const log = (msg) => { if (!silent)
63
+ console.log(`[dario] ${msg}`); };
64
+ if (!options?.force) {
65
+ const cached = readLiveCache();
66
+ if (cached) {
67
+ const age = Date.now() - new Date(cached._captured).getTime();
68
+ if (age < LIVE_TTL_MS)
69
+ return cached;
70
+ }
71
+ }
72
+ if (!findClaudeBinary())
73
+ return null;
74
+ try {
75
+ const live = await captureLiveTemplateAsync(options?.timeoutMs ?? 10_000);
76
+ if (!live) {
77
+ log('live fingerprint refresh: capture returned null (CC did not send a /v1/messages request within the timeout)');
78
+ return null;
79
+ }
80
+ writeLiveCache(live);
81
+ log(`live fingerprint refreshed from CC ${live._version}`);
82
+ return live;
83
+ }
84
+ catch (err) {
85
+ log(`live fingerprint refresh failed: ${err.message}`);
86
+ return null;
87
+ }
88
+ }
89
+ function loadBundledTemplate() {
90
+ const data = JSON.parse(readFileSync(join(__dirname, 'cc-template-data.json'), 'utf-8'));
91
+ data._source = 'bundled';
92
+ return data;
93
+ }
94
+ function readLiveCache() {
95
+ if (!existsSync(LIVE_CACHE))
96
+ return null;
97
+ try {
98
+ const data = JSON.parse(readFileSync(LIVE_CACHE, 'utf-8'));
99
+ if (!data.system_prompt || !Array.isArray(data.tools) || data.tools.length === 0)
100
+ return null;
101
+ data._source = 'live';
102
+ return data;
103
+ }
104
+ catch {
105
+ return null;
106
+ }
107
+ }
108
+ function writeLiveCache(data) {
109
+ mkdirSync(dirname(LIVE_CACHE), { recursive: true });
110
+ writeFileSync(LIVE_CACHE, JSON.stringify(data, null, 2));
111
+ }
112
+ /**
113
+ * Run a loopback MITM server on a random port, spawn CC with
114
+ * ANTHROPIC_BASE_URL pointed at it, wait for one request, respond with a
115
+ * minimal valid SSE stream, and return the captured request.
116
+ *
117
+ * Returns null on timeout or spawn failure. Does not throw.
118
+ */
119
+ export async function captureLiveTemplateAsync(timeoutMs = 10_000) {
120
+ const captured = await runCapture(timeoutMs);
121
+ if (!captured)
122
+ return null;
123
+ return extractTemplate(captured);
124
+ }
125
+ async function runCapture(timeoutMs) {
126
+ return new Promise((resolve) => {
127
+ let captured = null;
128
+ let settled = false;
129
+ const settle = (result) => {
130
+ if (settled)
131
+ return;
132
+ settled = true;
133
+ try {
134
+ server.close();
135
+ }
136
+ catch { /* noop */ }
137
+ try {
138
+ child?.kill('SIGTERM');
139
+ }
140
+ catch { /* noop */ }
141
+ resolve(result);
142
+ };
143
+ const server = createServer((req, res) => {
144
+ // Only handle /v1/messages — everything else gets a 404 so CC doesn't
145
+ // accidentally think /v1/models is live.
146
+ if (!req.url?.includes('/v1/messages')) {
147
+ res.writeHead(404, { 'content-type': 'application/json' });
148
+ res.end('{"type":"error","error":{"type":"not_found_error","message":"not found"}}');
149
+ return;
150
+ }
151
+ const chunks = [];
152
+ req.on('data', (c) => chunks.push(c));
153
+ req.on('end', () => {
154
+ try {
155
+ const raw = Buffer.concat(chunks).toString('utf-8');
156
+ const body = raw ? JSON.parse(raw) : {};
157
+ const headers = {};
158
+ for (const [k, v] of Object.entries(req.headers)) {
159
+ if (typeof v === 'string')
160
+ headers[k] = v;
161
+ else if (Array.isArray(v))
162
+ headers[k] = v.join(',');
163
+ }
164
+ captured = {
165
+ method: req.method ?? 'POST',
166
+ path: req.url ?? '/v1/messages',
167
+ headers,
168
+ body,
169
+ };
170
+ }
171
+ catch {
172
+ // Captured body was not JSON — leave captured null, respond anyway.
173
+ }
174
+ // Send a minimal valid SSE stream so CC doesn't hang retrying.
175
+ res.writeHead(200, {
176
+ 'content-type': 'text/event-stream',
177
+ 'cache-control': 'no-cache',
178
+ connection: 'keep-alive',
179
+ 'anthropic-ratelimit-unified-representative-claim': 'five_hour',
180
+ 'anthropic-ratelimit-unified-status': 'allowed',
181
+ 'anthropic-ratelimit-unified-5h-utilization': '0',
182
+ 'anthropic-ratelimit-unified-7d-utilization': '0',
183
+ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 18000),
184
+ });
185
+ const sse = [
186
+ `event: message_start\ndata: ${JSON.stringify({
187
+ type: 'message_start',
188
+ message: {
189
+ id: 'msg_live_capture',
190
+ type: 'message',
191
+ role: 'assistant',
192
+ model: 'claude-opus-4-5',
193
+ content: [],
194
+ stop_reason: null,
195
+ stop_sequence: null,
196
+ usage: { input_tokens: 1, output_tokens: 1 },
197
+ },
198
+ })}\n\n`,
199
+ `event: content_block_start\ndata: ${JSON.stringify({
200
+ type: 'content_block_start',
201
+ index: 0,
202
+ content_block: { type: 'text', text: '' },
203
+ })}\n\n`,
204
+ `event: content_block_delta\ndata: ${JSON.stringify({
205
+ type: 'content_block_delta',
206
+ index: 0,
207
+ delta: { type: 'text_delta', text: 'ok' },
208
+ })}\n\n`,
209
+ `event: content_block_stop\ndata: ${JSON.stringify({ type: 'content_block_stop', index: 0 })}\n\n`,
210
+ `event: message_delta\ndata: ${JSON.stringify({
211
+ type: 'message_delta',
212
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
213
+ usage: { output_tokens: 1 },
214
+ })}\n\n`,
215
+ `event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`,
216
+ ].join('');
217
+ res.end(sse);
218
+ // Give CC a beat to read the response before we kill it.
219
+ setTimeout(() => settle(captured), 500);
220
+ });
221
+ });
222
+ server.on('error', () => settle(null));
223
+ server.listen(0, '127.0.0.1', () => {
224
+ const address = server.address();
225
+ if (!address || typeof address === 'string') {
226
+ settle(null);
227
+ return;
228
+ }
229
+ const url = `http://127.0.0.1:${address.port}`;
230
+ // Spawn CC with ANTHROPIC_BASE_URL pointed at our MITM.
231
+ const claudeBin = findClaudeBinary();
232
+ if (!claudeBin) {
233
+ settle(null);
234
+ return;
235
+ }
236
+ try {
237
+ child = spawn(claudeBin, ['--print', '-p', 'hi'], {
238
+ env: {
239
+ ...process.env,
240
+ ANTHROPIC_BASE_URL: url,
241
+ ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY ?? 'sk-dario-fingerprint-capture',
242
+ // Prevent CC from launching its own interactive UI or OAuth flow.
243
+ CLAUDE_NONINTERACTIVE: '1',
244
+ },
245
+ stdio: ['ignore', 'ignore', 'ignore'],
246
+ windowsHide: true,
247
+ });
248
+ child.on('error', () => settle(null));
249
+ child.on('exit', () => {
250
+ // Give the server a brief moment to finish reading the body in case
251
+ // exit and request-end race.
252
+ setTimeout(() => settle(captured), 200);
253
+ });
254
+ }
255
+ catch {
256
+ settle(null);
257
+ return;
258
+ }
259
+ });
260
+ let child;
261
+ // Hard timeout.
262
+ setTimeout(() => settle(captured), timeoutMs);
263
+ });
264
+ }
265
+ function findClaudeBinary() {
266
+ // Honor an explicit override first — useful for tests and for users on
267
+ // non-standard installs.
268
+ if (process.env.DARIO_CLAUDE_BIN)
269
+ return process.env.DARIO_CLAUDE_BIN;
270
+ // Try the obvious name. On Windows spawn resolves `.cmd` shims
271
+ // automatically when shell:true, but we don't want shell:true for
272
+ // safety. The `where` / `which` probe handles Windows via PATHEXT.
273
+ const candidates = process.platform === 'win32'
274
+ ? ['claude.cmd', 'claude.exe', 'claude']
275
+ : ['claude'];
276
+ for (const name of candidates) {
277
+ if (existsOnPath(name))
278
+ return name;
279
+ }
280
+ return null;
281
+ }
282
+ function existsOnPath(name) {
283
+ const pathEnv = process.env.PATH ?? '';
284
+ const sep = process.platform === 'win32' ? ';' : ':';
285
+ const dirs = pathEnv.split(sep).filter(Boolean);
286
+ for (const d of dirs) {
287
+ try {
288
+ if (existsSync(join(d, name)))
289
+ return true;
290
+ }
291
+ catch { /* noop */ }
292
+ }
293
+ return false;
294
+ }
295
+ /**
296
+ * Given a captured /v1/messages request body, pull out the fields that
297
+ * matter for template replay: agent identity, system prompt, tool list,
298
+ * and CC version (from the billing header or user-agent).
299
+ */
300
+ export function extractTemplate(captured) {
301
+ const body = captured.body;
302
+ const systemBlocks = body.system;
303
+ if (!Array.isArray(systemBlocks) || systemBlocks.length < 2)
304
+ return null;
305
+ // CC's system is a 3-block structure:
306
+ // [0] billing tag (no cache_control, tiny)
307
+ // [1] agent identity ("You are Claude Code..."), cache_control 1h
308
+ // [2] system prompt (~25KB), cache_control 1h
309
+ // Billing tag is per-request — we never cache it. Identity + prompt are
310
+ // what we want.
311
+ const agentIdentity = pickTextBlock(systemBlocks[1]);
312
+ const systemPrompt = pickTextBlock(systemBlocks[2]);
313
+ if (!agentIdentity || !systemPrompt)
314
+ return null;
315
+ const tools = Array.isArray(body.tools)
316
+ ? body.tools
317
+ .filter((t) => typeof t.name === 'string')
318
+ .map((t) => ({
319
+ name: t.name,
320
+ description: t.description ?? '',
321
+ input_schema: t.input_schema ?? {},
322
+ }))
323
+ : [];
324
+ if (tools.length === 0)
325
+ return null;
326
+ const version = extractCCVersion(captured.headers) ?? 'unknown';
327
+ return {
328
+ _version: version,
329
+ _captured: new Date().toISOString(),
330
+ _source: 'live',
331
+ agent_identity: agentIdentity,
332
+ system_prompt: systemPrompt,
333
+ tools,
334
+ tool_names: tools.map((t) => t.name),
335
+ };
336
+ }
337
+ function pickTextBlock(block) {
338
+ if (!block || typeof block !== 'object')
339
+ return null;
340
+ const b = block;
341
+ if (b.type === 'text' && typeof b.text === 'string')
342
+ return b.text;
343
+ return null;
344
+ }
345
+ function extractCCVersion(headers) {
346
+ // Preferred: x-anthropic-billing-header carries cc_version=X.Y.Z
347
+ const billing = headers['x-anthropic-billing-header'];
348
+ if (billing) {
349
+ const m = /cc_version=([\w.\-]+)/.exec(billing);
350
+ if (m)
351
+ return m[1];
352
+ }
353
+ // Fallback: user-agent often carries claude-cli/X.Y.Z
354
+ const ua = headers['user-agent'];
355
+ if (ua) {
356
+ const m = /claude-cli\/([\w.\-]+)/.exec(ua);
357
+ if (m)
358
+ return m[1];
359
+ }
360
+ return null;
361
+ }
362
+ /**
363
+ * Test hook: given a captured request object (from a mocked server or a
364
+ * synthetic fixture), run it through the same extraction path. Exposed so
365
+ * test/live-fingerprint.mjs doesn't need to spawn a real process.
366
+ */
367
+ export function _extractTemplateForTest(captured) {
368
+ return extractTemplate(captured);
369
+ }
package/dist/proxy.js CHANGED
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
8
8
  import { getAccessToken, getStatus } from './oauth.js';
9
9
  import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper } from './cc-template.js';
10
10
  import { AccountPool, parseRateLimits } from './pool.js';
11
- import { Analytics } from './analytics.js';
11
+ import { Analytics, billingBucketFromClaim } from './analytics.js';
12
12
  import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
13
13
  import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
14
14
  const ANTHROPIC_API = 'https://api.anthropic.com';
@@ -1039,7 +1039,13 @@ export async function startProxy(opts = {}) {
1039
1039
  else {
1040
1040
  overagePct = 'n/a';
1041
1041
  }
1042
- console.log(`[dario] #${requestCount} billing: ${billingClaim} (overage: ${overagePct})`);
1042
+ // Show the derived billing bucket as the headline, with the raw
1043
+ // claim value in parens so power users still see the header as-is.
1044
+ // See #34 — users want "am I actually on subscription?" answered
1045
+ // at a glance instead of having to memorize that `five_hour` means
1046
+ // "yes, subscription."
1047
+ const bucket = billingBucketFromClaim(billingClaim);
1048
+ console.log(`[dario] #${requestCount} billing: ${bucket} (${billingClaim}, overage: ${overagePct})`);
1043
1049
  }
1044
1050
  else if (verbose) {
1045
1051
  console.log(`[dario] #${requestCount} billing: headers absent (status=${upstream.status})`);
@@ -1243,6 +1249,11 @@ export async function startProxy(opts = {}) {
1243
1249
  }
1244
1250
  process.exit(1);
1245
1251
  });
1252
+ // Kick off a live fingerprint refresh in the background. Re-captures the
1253
+ // user's own CC binary request shape and updates ~/.dario/cc-template.live.json
1254
+ // for the next startup. No-op if CC isn't installed or the cache is fresh.
1255
+ // Never blocks proxy startup; never throws.
1256
+ void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false }).catch(() => { }));
1246
1257
  server.listen(port, host, () => {
1247
1258
  const modeLine = passthrough
1248
1259
  ? 'Mode: passthrough (OAuth swap only, no injection)'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.10.3",
3
+ "version": "3.11.1",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "scripts": {
23
23
  "build": "tsc && cp src/cc-template-data.json dist/",
24
- "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/failover-429.mjs",
24
+ "test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/live-fingerprint.mjs",
25
25
  "audit": "npm audit --production --audit-level=high",
26
26
  "prepublishOnly": "npm run build",
27
27
  "start": "node dist/cli.js",