@askalf/dario 3.10.3 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analytics.d.ts +39 -16
- package/dist/analytics.js +41 -0
- package/dist/cc-template.d.ts +4 -2
- package/dist/cc-template.js +7 -8
- package/dist/live-fingerprint.d.ts +84 -0
- package/dist/live-fingerprint.js +369 -0
- package/dist/proxy.js +13 -2
- package/package.json +2 -2
package/dist/analytics.d.ts
CHANGED
|
@@ -23,6 +23,26 @@ export interface RequestRecord {
|
|
|
23
23
|
isStream: boolean;
|
|
24
24
|
isOpenAI: boolean;
|
|
25
25
|
}
|
|
26
|
+
/**
|
|
27
|
+
* The four billing buckets a request can land in, derived from the
|
|
28
|
+
* `anthropic-ratelimit-unified-representative-claim` response header.
|
|
29
|
+
*
|
|
30
|
+
* - `subscription` — request billed against the user's 5h subscription window (Max/Pro)
|
|
31
|
+
* - `subscription_fallback` — server-side fallback subscription bucket (rare, still covered)
|
|
32
|
+
* - `extra_usage` — overage / pay-as-you-go, paid on top of subscription
|
|
33
|
+
* - `api` — pure API key billing, no subscription involved
|
|
34
|
+
* - `unknown` — header absent or unparseable (non-200 responses, stream aborts)
|
|
35
|
+
*
|
|
36
|
+
* Exposed in `/analytics` summaries and in verbose per-request logs so
|
|
37
|
+
* users can see at a glance which bucket their traffic is actually hitting.
|
|
38
|
+
* See #34 for background.
|
|
39
|
+
*/
|
|
40
|
+
export type BillingBucket = 'subscription' | 'subscription_fallback' | 'extra_usage' | 'api' | 'unknown';
|
|
41
|
+
/**
|
|
42
|
+
* Map the raw `representative-claim` header value to a human-friendly
|
|
43
|
+
* billing bucket. Pure function; no state; safe to call from any context.
|
|
44
|
+
*/
|
|
45
|
+
export declare function billingBucketFromClaim(claim: string | null | undefined): BillingBucket;
|
|
26
46
|
export declare class Analytics {
|
|
27
47
|
private records;
|
|
28
48
|
private maxRecords;
|
|
@@ -60,27 +80,30 @@ interface PerModelStat {
|
|
|
60
80
|
avgThinkingTokens: number;
|
|
61
81
|
estimatedCost: number;
|
|
62
82
|
}
|
|
83
|
+
interface WindowStats {
|
|
84
|
+
totalInputTokens: number;
|
|
85
|
+
totalOutputTokens: number;
|
|
86
|
+
totalThinkingTokens: number;
|
|
87
|
+
estimatedCost: number;
|
|
88
|
+
avgLatencyMs: number;
|
|
89
|
+
errorRate: number;
|
|
90
|
+
claimBreakdown: Record<string, number>;
|
|
91
|
+
/** Count of requests in each derived billing bucket. See #34. */
|
|
92
|
+
billingBucketBreakdown: Record<BillingBucket, number>;
|
|
93
|
+
/**
|
|
94
|
+
* Percentage of *classified* requests (non-unknown) that hit a
|
|
95
|
+
* subscription bucket. The headline number for "is dario routing me
|
|
96
|
+
* through my subscription?" — should be 100% for a clean setup. See #34.
|
|
97
|
+
*/
|
|
98
|
+
subscriptionPercent: number;
|
|
99
|
+
}
|
|
63
100
|
export interface AnalyticsSummary {
|
|
64
|
-
window: {
|
|
101
|
+
window: WindowStats & {
|
|
65
102
|
minutes: number;
|
|
66
103
|
requests: number;
|
|
67
|
-
totalInputTokens: number;
|
|
68
|
-
totalOutputTokens: number;
|
|
69
|
-
totalThinkingTokens: number;
|
|
70
|
-
estimatedCost: number;
|
|
71
|
-
avgLatencyMs: number;
|
|
72
|
-
errorRate: number;
|
|
73
|
-
claimBreakdown: Record<string, number>;
|
|
74
104
|
};
|
|
75
|
-
allTime: {
|
|
105
|
+
allTime: WindowStats & {
|
|
76
106
|
requests: number;
|
|
77
|
-
totalInputTokens: number;
|
|
78
|
-
totalOutputTokens: number;
|
|
79
|
-
totalThinkingTokens: number;
|
|
80
|
-
estimatedCost: number;
|
|
81
|
-
avgLatencyMs: number;
|
|
82
|
-
errorRate: number;
|
|
83
|
-
claimBreakdown: Record<string, number>;
|
|
84
107
|
};
|
|
85
108
|
perAccount: Record<string, PerAccountStat>;
|
|
86
109
|
perModel: Record<string, PerModelStat>;
|
package/dist/analytics.js
CHANGED
|
@@ -5,6 +5,24 @@
|
|
|
5
5
|
* In-memory rolling window; exposed via the /analytics endpoint when
|
|
6
6
|
* pool mode is active.
|
|
7
7
|
*/
|
|
8
|
+
/**
|
|
9
|
+
* Map the raw `representative-claim` header value to a human-friendly
|
|
10
|
+
* billing bucket. Pure function; no state; safe to call from any context.
|
|
11
|
+
*/
|
|
12
|
+
export function billingBucketFromClaim(claim) {
|
|
13
|
+
switch (claim) {
|
|
14
|
+
case 'five_hour':
|
|
15
|
+
return 'subscription';
|
|
16
|
+
case 'five_hour_fallback':
|
|
17
|
+
return 'subscription_fallback';
|
|
18
|
+
case 'overage':
|
|
19
|
+
return 'extra_usage';
|
|
20
|
+
case 'api':
|
|
21
|
+
return 'api';
|
|
22
|
+
default:
|
|
23
|
+
return 'unknown';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
8
26
|
// Anthropic pricing (per 1M tokens, USD). Not authoritative — used for
|
|
9
27
|
// rough burn-rate display in the /analytics summary.
|
|
10
28
|
const PRICING = {
|
|
@@ -74,6 +92,14 @@ export class Analytics {
|
|
|
74
92
|
totalInputTokens: 0, totalOutputTokens: 0, totalThinkingTokens: 0,
|
|
75
93
|
estimatedCost: 0, avgLatencyMs: 0, errorRate: 0,
|
|
76
94
|
claimBreakdown: {},
|
|
95
|
+
billingBucketBreakdown: {
|
|
96
|
+
subscription: 0,
|
|
97
|
+
subscription_fallback: 0,
|
|
98
|
+
extra_usage: 0,
|
|
99
|
+
api: 0,
|
|
100
|
+
unknown: 0,
|
|
101
|
+
},
|
|
102
|
+
subscriptionPercent: 0,
|
|
77
103
|
};
|
|
78
104
|
}
|
|
79
105
|
const totalInput = records.reduce((s, r) => s + r.inputTokens, 0);
|
|
@@ -83,9 +109,22 @@ export class Analytics {
|
|
|
83
109
|
const avgLatency = records.reduce((s, r) => s + r.latencyMs, 0) / records.length;
|
|
84
110
|
const errors = records.filter(r => r.status >= 400).length;
|
|
85
111
|
const claims = {};
|
|
112
|
+
const buckets = {
|
|
113
|
+
subscription: 0,
|
|
114
|
+
subscription_fallback: 0,
|
|
115
|
+
extra_usage: 0,
|
|
116
|
+
api: 0,
|
|
117
|
+
unknown: 0,
|
|
118
|
+
};
|
|
86
119
|
for (const r of records) {
|
|
87
120
|
claims[r.claim] = (claims[r.claim] ?? 0) + 1;
|
|
121
|
+
buckets[billingBucketFromClaim(r.claim)]++;
|
|
88
122
|
}
|
|
123
|
+
const subscriptionHits = buckets.subscription + buckets.subscription_fallback;
|
|
124
|
+
const billedRequests = records.length - buckets.unknown;
|
|
125
|
+
const subscriptionPct = billedRequests > 0
|
|
126
|
+
? Math.round((subscriptionHits / billedRequests) * 10000) / 100
|
|
127
|
+
: 0;
|
|
89
128
|
return {
|
|
90
129
|
totalInputTokens: totalInput,
|
|
91
130
|
totalOutputTokens: totalOutput,
|
|
@@ -94,6 +133,8 @@ export class Analytics {
|
|
|
94
133
|
avgLatencyMs: Math.round(avgLatency),
|
|
95
134
|
errorRate: Math.round((errors / records.length) * 10000) / 10000,
|
|
96
135
|
claimBreakdown: claims,
|
|
136
|
+
billingBucketBreakdown: buckets,
|
|
137
|
+
subscriptionPercent: subscriptionPct,
|
|
97
138
|
};
|
|
98
139
|
}
|
|
99
140
|
perAccountStats(records) {
|
package/dist/cc-template.d.ts
CHANGED
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
* Claude Code request template.
|
|
3
3
|
*
|
|
4
4
|
* Tool definitions, system prompt, and request structure are loaded from
|
|
5
|
-
*
|
|
6
|
-
*
|
|
5
|
+
* the live fingerprint cache (captured from the user's own CC install at
|
|
6
|
+
* dario startup) or from the bundled cc-template-data.json snapshot. The
|
|
7
|
+
* live cache self-heals when Anthropic ships a new CC version — no user
|
|
8
|
+
* action required. See src/live-fingerprint.ts for the capture pipeline.
|
|
7
9
|
*/
|
|
8
10
|
/** CC's exact tool definitions — loaded from the template JSON. */
|
|
9
11
|
export declare const CC_TOOL_DEFINITIONS: {
|
package/dist/cc-template.js
CHANGED
|
@@ -2,15 +2,14 @@
|
|
|
2
2
|
* Claude Code request template.
|
|
3
3
|
*
|
|
4
4
|
* Tool definitions, system prompt, and request structure are loaded from
|
|
5
|
-
*
|
|
6
|
-
*
|
|
5
|
+
* the live fingerprint cache (captured from the user's own CC install at
|
|
6
|
+
* dario startup) or from the bundled cc-template-data.json snapshot. The
|
|
7
|
+
* live cache self-heals when Anthropic ships a new CC version — no user
|
|
8
|
+
* action required. See src/live-fingerprint.ts for the capture pipeline.
|
|
7
9
|
*/
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
12
|
-
// Load template data at module init — fail fast if missing
|
|
13
|
-
const TEMPLATE = JSON.parse(readFileSync(join(__dirname, 'cc-template-data.json'), 'utf-8'));
|
|
10
|
+
import { loadTemplate } from './live-fingerprint.js';
|
|
11
|
+
// Load template at module init — prefer live cache, fall back to bundled.
|
|
12
|
+
const TEMPLATE = loadTemplate({ silent: true });
|
|
14
13
|
/** CC's exact tool definitions — loaded from the template JSON. */
|
|
15
14
|
export const CC_TOOL_DEFINITIONS = TEMPLATE.tools;
|
|
16
15
|
/** CC's static system prompt (~25KB). */
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live fingerprint extraction.
|
|
3
|
+
*
|
|
4
|
+
* At dario startup, spawn the user's actual `claude` binary against a
|
|
5
|
+
* loopback MITM endpoint, capture the outbound /v1/messages request, and
|
|
6
|
+
* use the captured system prompt / tools / agent identity as the template
|
|
7
|
+
* replay source — instead of shipping a stale snapshot in
|
|
8
|
+
* `cc-template-data.json`.
|
|
9
|
+
*
|
|
10
|
+
* The bundled snapshot remains as a fallback for users without CC installed
|
|
11
|
+
* or when live capture fails. Template replay auto-heals on CC updates
|
|
12
|
+
* without any user action.
|
|
13
|
+
*
|
|
14
|
+
* Security: the MITM endpoint only accepts connections from 127.0.0.1 and
|
|
15
|
+
* only runs long enough to capture a single request. CC's OAuth token
|
|
16
|
+
* never leaves the machine — we send CC to a loopback URL that CC itself
|
|
17
|
+
* trusts because we set ANTHROPIC_BASE_URL in the child's environment.
|
|
18
|
+
*/
|
|
19
|
+
export interface TemplateData {
|
|
20
|
+
_version: string;
|
|
21
|
+
_captured: string;
|
|
22
|
+
_source?: 'bundled' | 'live';
|
|
23
|
+
agent_identity: string;
|
|
24
|
+
system_prompt: string;
|
|
25
|
+
tools: Array<{
|
|
26
|
+
name: string;
|
|
27
|
+
description: string;
|
|
28
|
+
input_schema: Record<string, unknown>;
|
|
29
|
+
}>;
|
|
30
|
+
tool_names: string[];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Load the template synchronously. Prefers the live cache (fresh capture
|
|
34
|
+
* from the user's own CC install) and falls back to the bundled snapshot.
|
|
35
|
+
*
|
|
36
|
+
* This is intentionally sync and fast — it runs at module init on every
|
|
37
|
+
* dario request handler. The actual capture is async and runs in the
|
|
38
|
+
* background via refreshLiveFingerprintAsync(); its results are written
|
|
39
|
+
* to the cache file and picked up on the next dario startup.
|
|
40
|
+
*/
|
|
41
|
+
export declare function loadTemplate(_options?: {
|
|
42
|
+
silent?: boolean;
|
|
43
|
+
}): TemplateData;
|
|
44
|
+
/**
|
|
45
|
+
* Kick off a background live fingerprint capture. Safe to call on every
|
|
46
|
+
* dario proxy startup — no-ops if CC isn't installed, if the cache is
|
|
47
|
+
* already fresh, or if another refresh is in flight. Never throws.
|
|
48
|
+
*
|
|
49
|
+
* Result is written to ~/.dario/cc-template.live.json and picked up on
|
|
50
|
+
* the next dario startup (cc-template.ts loads the cache synchronously
|
|
51
|
+
* at module init).
|
|
52
|
+
*/
|
|
53
|
+
export declare function refreshLiveFingerprintAsync(options?: {
|
|
54
|
+
force?: boolean;
|
|
55
|
+
silent?: boolean;
|
|
56
|
+
timeoutMs?: number;
|
|
57
|
+
}): Promise<TemplateData | null>;
|
|
58
|
+
interface CapturedRequest {
|
|
59
|
+
method: string;
|
|
60
|
+
path: string;
|
|
61
|
+
headers: Record<string, string>;
|
|
62
|
+
body: Record<string, unknown>;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Run a loopback MITM server on a random port, spawn CC with
|
|
66
|
+
* ANTHROPIC_BASE_URL pointed at it, wait for one request, respond with a
|
|
67
|
+
* minimal valid SSE stream, and return the captured request.
|
|
68
|
+
*
|
|
69
|
+
* Returns null on timeout or spawn failure. Does not throw.
|
|
70
|
+
*/
|
|
71
|
+
export declare function captureLiveTemplateAsync(timeoutMs?: number): Promise<TemplateData | null>;
|
|
72
|
+
/**
|
|
73
|
+
* Given a captured /v1/messages request body, pull out the fields that
|
|
74
|
+
* matter for template replay: agent identity, system prompt, tool list,
|
|
75
|
+
* and CC version (from the billing header or user-agent).
|
|
76
|
+
*/
|
|
77
|
+
export declare function extractTemplate(captured: CapturedRequest): TemplateData | null;
|
|
78
|
+
/**
|
|
79
|
+
* Test hook: given a captured request object (from a mocked server or a
|
|
80
|
+
* synthetic fixture), run it through the same extraction path. Exposed so
|
|
81
|
+
* test/live-fingerprint.mjs doesn't need to spawn a real process.
|
|
82
|
+
*/
|
|
83
|
+
export declare function _extractTemplateForTest(captured: CapturedRequest): TemplateData | null;
|
|
84
|
+
export {};
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live fingerprint extraction.
|
|
3
|
+
*
|
|
4
|
+
* At dario startup, spawn the user's actual `claude` binary against a
|
|
5
|
+
* loopback MITM endpoint, capture the outbound /v1/messages request, and
|
|
6
|
+
* use the captured system prompt / tools / agent identity as the template
|
|
7
|
+
* replay source — instead of shipping a stale snapshot in
|
|
8
|
+
* `cc-template-data.json`.
|
|
9
|
+
*
|
|
10
|
+
* The bundled snapshot remains as a fallback for users without CC installed
|
|
11
|
+
* or when live capture fails. Template replay auto-heals on CC updates
|
|
12
|
+
* without any user action.
|
|
13
|
+
*
|
|
14
|
+
* Security: the MITM endpoint only accepts connections from 127.0.0.1 and
|
|
15
|
+
* only runs long enough to capture a single request. CC's OAuth token
|
|
16
|
+
* never leaves the machine — we send CC to a loopback URL that CC itself
|
|
17
|
+
* trusts because we set ANTHROPIC_BASE_URL in the child's environment.
|
|
18
|
+
*/
|
|
19
|
+
import { spawn } from 'node:child_process';
|
|
20
|
+
import { createServer } from 'node:http';
|
|
21
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
22
|
+
import { homedir } from 'node:os';
|
|
23
|
+
import { join, dirname } from 'node:path';
|
|
24
|
+
import { fileURLToPath } from 'node:url';
|
|
25
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
const LIVE_CACHE = join(homedir(), '.dario', 'cc-template.live.json');
|
|
27
|
+
const LIVE_TTL_MS = 24 * 60 * 60 * 1000; // re-extract once a day
|
|
28
|
+
/**
|
|
29
|
+
* Load the template synchronously. Prefers the live cache (fresh capture
|
|
30
|
+
* from the user's own CC install) and falls back to the bundled snapshot.
|
|
31
|
+
*
|
|
32
|
+
* This is intentionally sync and fast — it runs at module init on every
|
|
33
|
+
* dario request handler. The actual capture is async and runs in the
|
|
34
|
+
* background via refreshLiveFingerprintAsync(); its results are written
|
|
35
|
+
* to the cache file and picked up on the next dario startup.
|
|
36
|
+
*/
|
|
37
|
+
export function loadTemplate(_options) {
|
|
38
|
+
const cached = readLiveCache();
|
|
39
|
+
if (cached) {
|
|
40
|
+
const age = Date.now() - new Date(cached._captured).getTime();
|
|
41
|
+
if (age < LIVE_TTL_MS) {
|
|
42
|
+
return cached;
|
|
43
|
+
}
|
|
44
|
+
// Stale cache — still better than bundled if bundled is older.
|
|
45
|
+
// We return the stale live cache and let the background refresh
|
|
46
|
+
// update it for next startup.
|
|
47
|
+
return cached;
|
|
48
|
+
}
|
|
49
|
+
return loadBundledTemplate();
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Kick off a background live fingerprint capture. Safe to call on every
|
|
53
|
+
* dario proxy startup — no-ops if CC isn't installed, if the cache is
|
|
54
|
+
* already fresh, or if another refresh is in flight. Never throws.
|
|
55
|
+
*
|
|
56
|
+
* Result is written to ~/.dario/cc-template.live.json and picked up on
|
|
57
|
+
* the next dario startup (cc-template.ts loads the cache synchronously
|
|
58
|
+
* at module init).
|
|
59
|
+
*/
|
|
60
|
+
export async function refreshLiveFingerprintAsync(options) {
|
|
61
|
+
const silent = options?.silent ?? false;
|
|
62
|
+
const log = (msg) => { if (!silent)
|
|
63
|
+
console.log(`[dario] ${msg}`); };
|
|
64
|
+
if (!options?.force) {
|
|
65
|
+
const cached = readLiveCache();
|
|
66
|
+
if (cached) {
|
|
67
|
+
const age = Date.now() - new Date(cached._captured).getTime();
|
|
68
|
+
if (age < LIVE_TTL_MS)
|
|
69
|
+
return cached;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (!findClaudeBinary())
|
|
73
|
+
return null;
|
|
74
|
+
try {
|
|
75
|
+
const live = await captureLiveTemplateAsync(options?.timeoutMs ?? 10_000);
|
|
76
|
+
if (!live) {
|
|
77
|
+
log('live fingerprint refresh: capture returned null (CC did not send a /v1/messages request within the timeout)');
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
writeLiveCache(live);
|
|
81
|
+
log(`live fingerprint refreshed from CC ${live._version}`);
|
|
82
|
+
return live;
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
log(`live fingerprint refresh failed: ${err.message}`);
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
function loadBundledTemplate() {
|
|
90
|
+
const data = JSON.parse(readFileSync(join(__dirname, 'cc-template-data.json'), 'utf-8'));
|
|
91
|
+
data._source = 'bundled';
|
|
92
|
+
return data;
|
|
93
|
+
}
|
|
94
|
+
function readLiveCache() {
|
|
95
|
+
if (!existsSync(LIVE_CACHE))
|
|
96
|
+
return null;
|
|
97
|
+
try {
|
|
98
|
+
const data = JSON.parse(readFileSync(LIVE_CACHE, 'utf-8'));
|
|
99
|
+
if (!data.system_prompt || !Array.isArray(data.tools) || data.tools.length === 0)
|
|
100
|
+
return null;
|
|
101
|
+
data._source = 'live';
|
|
102
|
+
return data;
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
function writeLiveCache(data) {
|
|
109
|
+
mkdirSync(dirname(LIVE_CACHE), { recursive: true });
|
|
110
|
+
writeFileSync(LIVE_CACHE, JSON.stringify(data, null, 2));
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Run a loopback MITM server on a random port, spawn CC with
|
|
114
|
+
* ANTHROPIC_BASE_URL pointed at it, wait for one request, respond with a
|
|
115
|
+
* minimal valid SSE stream, and return the captured request.
|
|
116
|
+
*
|
|
117
|
+
* Returns null on timeout or spawn failure. Does not throw.
|
|
118
|
+
*/
|
|
119
|
+
export async function captureLiveTemplateAsync(timeoutMs = 10_000) {
|
|
120
|
+
const captured = await runCapture(timeoutMs);
|
|
121
|
+
if (!captured)
|
|
122
|
+
return null;
|
|
123
|
+
return extractTemplate(captured);
|
|
124
|
+
}
|
|
125
|
+
async function runCapture(timeoutMs) {
|
|
126
|
+
return new Promise((resolve) => {
|
|
127
|
+
let captured = null;
|
|
128
|
+
let settled = false;
|
|
129
|
+
const settle = (result) => {
|
|
130
|
+
if (settled)
|
|
131
|
+
return;
|
|
132
|
+
settled = true;
|
|
133
|
+
try {
|
|
134
|
+
server.close();
|
|
135
|
+
}
|
|
136
|
+
catch { /* noop */ }
|
|
137
|
+
try {
|
|
138
|
+
child?.kill('SIGTERM');
|
|
139
|
+
}
|
|
140
|
+
catch { /* noop */ }
|
|
141
|
+
resolve(result);
|
|
142
|
+
};
|
|
143
|
+
const server = createServer((req, res) => {
|
|
144
|
+
// Only handle /v1/messages — everything else gets a 404 so CC doesn't
|
|
145
|
+
// accidentally think /v1/models is live.
|
|
146
|
+
if (!req.url?.includes('/v1/messages')) {
|
|
147
|
+
res.writeHead(404, { 'content-type': 'application/json' });
|
|
148
|
+
res.end('{"type":"error","error":{"type":"not_found_error","message":"not found"}}');
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
const chunks = [];
|
|
152
|
+
req.on('data', (c) => chunks.push(c));
|
|
153
|
+
req.on('end', () => {
|
|
154
|
+
try {
|
|
155
|
+
const raw = Buffer.concat(chunks).toString('utf-8');
|
|
156
|
+
const body = raw ? JSON.parse(raw) : {};
|
|
157
|
+
const headers = {};
|
|
158
|
+
for (const [k, v] of Object.entries(req.headers)) {
|
|
159
|
+
if (typeof v === 'string')
|
|
160
|
+
headers[k] = v;
|
|
161
|
+
else if (Array.isArray(v))
|
|
162
|
+
headers[k] = v.join(',');
|
|
163
|
+
}
|
|
164
|
+
captured = {
|
|
165
|
+
method: req.method ?? 'POST',
|
|
166
|
+
path: req.url ?? '/v1/messages',
|
|
167
|
+
headers,
|
|
168
|
+
body,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
// Captured body was not JSON — leave captured null, respond anyway.
|
|
173
|
+
}
|
|
174
|
+
// Send a minimal valid SSE stream so CC doesn't hang retrying.
|
|
175
|
+
res.writeHead(200, {
|
|
176
|
+
'content-type': 'text/event-stream',
|
|
177
|
+
'cache-control': 'no-cache',
|
|
178
|
+
connection: 'keep-alive',
|
|
179
|
+
'anthropic-ratelimit-unified-representative-claim': 'five_hour',
|
|
180
|
+
'anthropic-ratelimit-unified-status': 'allowed',
|
|
181
|
+
'anthropic-ratelimit-unified-5h-utilization': '0',
|
|
182
|
+
'anthropic-ratelimit-unified-7d-utilization': '0',
|
|
183
|
+
'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 18000),
|
|
184
|
+
});
|
|
185
|
+
const sse = [
|
|
186
|
+
`event: message_start\ndata: ${JSON.stringify({
|
|
187
|
+
type: 'message_start',
|
|
188
|
+
message: {
|
|
189
|
+
id: 'msg_live_capture',
|
|
190
|
+
type: 'message',
|
|
191
|
+
role: 'assistant',
|
|
192
|
+
model: 'claude-opus-4-5',
|
|
193
|
+
content: [],
|
|
194
|
+
stop_reason: null,
|
|
195
|
+
stop_sequence: null,
|
|
196
|
+
usage: { input_tokens: 1, output_tokens: 1 },
|
|
197
|
+
},
|
|
198
|
+
})}\n\n`,
|
|
199
|
+
`event: content_block_start\ndata: ${JSON.stringify({
|
|
200
|
+
type: 'content_block_start',
|
|
201
|
+
index: 0,
|
|
202
|
+
content_block: { type: 'text', text: '' },
|
|
203
|
+
})}\n\n`,
|
|
204
|
+
`event: content_block_delta\ndata: ${JSON.stringify({
|
|
205
|
+
type: 'content_block_delta',
|
|
206
|
+
index: 0,
|
|
207
|
+
delta: { type: 'text_delta', text: 'ok' },
|
|
208
|
+
})}\n\n`,
|
|
209
|
+
`event: content_block_stop\ndata: ${JSON.stringify({ type: 'content_block_stop', index: 0 })}\n\n`,
|
|
210
|
+
`event: message_delta\ndata: ${JSON.stringify({
|
|
211
|
+
type: 'message_delta',
|
|
212
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
213
|
+
usage: { output_tokens: 1 },
|
|
214
|
+
})}\n\n`,
|
|
215
|
+
`event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`,
|
|
216
|
+
].join('');
|
|
217
|
+
res.end(sse);
|
|
218
|
+
// Give CC a beat to read the response before we kill it.
|
|
219
|
+
setTimeout(() => settle(captured), 500);
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
server.on('error', () => settle(null));
|
|
223
|
+
server.listen(0, '127.0.0.1', () => {
|
|
224
|
+
const address = server.address();
|
|
225
|
+
if (!address || typeof address === 'string') {
|
|
226
|
+
settle(null);
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
const url = `http://127.0.0.1:${address.port}`;
|
|
230
|
+
// Spawn CC with ANTHROPIC_BASE_URL pointed at our MITM.
|
|
231
|
+
const claudeBin = findClaudeBinary();
|
|
232
|
+
if (!claudeBin) {
|
|
233
|
+
settle(null);
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
try {
|
|
237
|
+
child = spawn(claudeBin, ['--print', '-p', 'hi'], {
|
|
238
|
+
env: {
|
|
239
|
+
...process.env,
|
|
240
|
+
ANTHROPIC_BASE_URL: url,
|
|
241
|
+
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY ?? 'sk-dario-fingerprint-capture',
|
|
242
|
+
// Prevent CC from launching its own interactive UI or OAuth flow.
|
|
243
|
+
CLAUDE_NONINTERACTIVE: '1',
|
|
244
|
+
},
|
|
245
|
+
stdio: ['ignore', 'ignore', 'ignore'],
|
|
246
|
+
windowsHide: true,
|
|
247
|
+
});
|
|
248
|
+
child.on('error', () => settle(null));
|
|
249
|
+
child.on('exit', () => {
|
|
250
|
+
// Give the server a brief moment to finish reading the body in case
|
|
251
|
+
// exit and request-end race.
|
|
252
|
+
setTimeout(() => settle(captured), 200);
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
settle(null);
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
});
|
|
260
|
+
let child;
|
|
261
|
+
// Hard timeout.
|
|
262
|
+
setTimeout(() => settle(captured), timeoutMs);
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
function findClaudeBinary() {
|
|
266
|
+
// Honor an explicit override first — useful for tests and for users on
|
|
267
|
+
// non-standard installs.
|
|
268
|
+
if (process.env.DARIO_CLAUDE_BIN)
|
|
269
|
+
return process.env.DARIO_CLAUDE_BIN;
|
|
270
|
+
// Try the obvious name. On Windows spawn resolves `.cmd` shims
|
|
271
|
+
// automatically when shell:true, but we don't want shell:true for
|
|
272
|
+
// safety. The `where` / `which` probe handles Windows via PATHEXT.
|
|
273
|
+
const candidates = process.platform === 'win32'
|
|
274
|
+
? ['claude.cmd', 'claude.exe', 'claude']
|
|
275
|
+
: ['claude'];
|
|
276
|
+
for (const name of candidates) {
|
|
277
|
+
if (existsOnPath(name))
|
|
278
|
+
return name;
|
|
279
|
+
}
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
function existsOnPath(name) {
|
|
283
|
+
const pathEnv = process.env.PATH ?? '';
|
|
284
|
+
const sep = process.platform === 'win32' ? ';' : ':';
|
|
285
|
+
const dirs = pathEnv.split(sep).filter(Boolean);
|
|
286
|
+
for (const d of dirs) {
|
|
287
|
+
try {
|
|
288
|
+
if (existsSync(join(d, name)))
|
|
289
|
+
return true;
|
|
290
|
+
}
|
|
291
|
+
catch { /* noop */ }
|
|
292
|
+
}
|
|
293
|
+
return false;
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Given a captured /v1/messages request body, pull out the fields that
|
|
297
|
+
* matter for template replay: agent identity, system prompt, tool list,
|
|
298
|
+
* and CC version (from the billing header or user-agent).
|
|
299
|
+
*/
|
|
300
|
+
export function extractTemplate(captured) {
|
|
301
|
+
const body = captured.body;
|
|
302
|
+
const systemBlocks = body.system;
|
|
303
|
+
if (!Array.isArray(systemBlocks) || systemBlocks.length < 2)
|
|
304
|
+
return null;
|
|
305
|
+
// CC's system is a 3-block structure:
|
|
306
|
+
// [0] billing tag (no cache_control, tiny)
|
|
307
|
+
// [1] agent identity ("You are Claude Code..."), cache_control 1h
|
|
308
|
+
// [2] system prompt (~25KB), cache_control 1h
|
|
309
|
+
// Billing tag is per-request — we never cache it. Identity + prompt are
|
|
310
|
+
// what we want.
|
|
311
|
+
const agentIdentity = pickTextBlock(systemBlocks[1]);
|
|
312
|
+
const systemPrompt = pickTextBlock(systemBlocks[2]);
|
|
313
|
+
if (!agentIdentity || !systemPrompt)
|
|
314
|
+
return null;
|
|
315
|
+
const tools = Array.isArray(body.tools)
|
|
316
|
+
? body.tools
|
|
317
|
+
.filter((t) => typeof t.name === 'string')
|
|
318
|
+
.map((t) => ({
|
|
319
|
+
name: t.name,
|
|
320
|
+
description: t.description ?? '',
|
|
321
|
+
input_schema: t.input_schema ?? {},
|
|
322
|
+
}))
|
|
323
|
+
: [];
|
|
324
|
+
if (tools.length === 0)
|
|
325
|
+
return null;
|
|
326
|
+
const version = extractCCVersion(captured.headers) ?? 'unknown';
|
|
327
|
+
return {
|
|
328
|
+
_version: version,
|
|
329
|
+
_captured: new Date().toISOString(),
|
|
330
|
+
_source: 'live',
|
|
331
|
+
agent_identity: agentIdentity,
|
|
332
|
+
system_prompt: systemPrompt,
|
|
333
|
+
tools,
|
|
334
|
+
tool_names: tools.map((t) => t.name),
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
function pickTextBlock(block) {
|
|
338
|
+
if (!block || typeof block !== 'object')
|
|
339
|
+
return null;
|
|
340
|
+
const b = block;
|
|
341
|
+
if (b.type === 'text' && typeof b.text === 'string')
|
|
342
|
+
return b.text;
|
|
343
|
+
return null;
|
|
344
|
+
}
|
|
345
|
+
function extractCCVersion(headers) {
|
|
346
|
+
// Preferred: x-anthropic-billing-header carries cc_version=X.Y.Z
|
|
347
|
+
const billing = headers['x-anthropic-billing-header'];
|
|
348
|
+
if (billing) {
|
|
349
|
+
const m = /cc_version=([\w.\-]+)/.exec(billing);
|
|
350
|
+
if (m)
|
|
351
|
+
return m[1];
|
|
352
|
+
}
|
|
353
|
+
// Fallback: user-agent often carries claude-cli/X.Y.Z
|
|
354
|
+
const ua = headers['user-agent'];
|
|
355
|
+
if (ua) {
|
|
356
|
+
const m = /claude-cli\/([\w.\-]+)/.exec(ua);
|
|
357
|
+
if (m)
|
|
358
|
+
return m[1];
|
|
359
|
+
}
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Test hook: given a captured request object (from a mocked server or a
|
|
364
|
+
* synthetic fixture), run it through the same extraction path. Exposed so
|
|
365
|
+
* test/live-fingerprint.mjs doesn't need to spawn a real process.
|
|
366
|
+
*/
|
|
367
|
+
export function _extractTemplateForTest(captured) {
|
|
368
|
+
return extractTemplate(captured);
|
|
369
|
+
}
|
package/dist/proxy.js
CHANGED
|
@@ -8,7 +8,7 @@ import { arch, platform } from 'node:process';
|
|
|
8
8
|
import { getAccessToken, getStatus } from './oauth.js';
|
|
9
9
|
import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper } from './cc-template.js';
|
|
10
10
|
import { AccountPool, parseRateLimits } from './pool.js';
|
|
11
|
-
import { Analytics } from './analytics.js';
|
|
11
|
+
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
12
12
|
import { loadAllAccounts, loadAccount, refreshAccountToken } from './accounts.js';
|
|
13
13
|
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
14
14
|
const ANTHROPIC_API = 'https://api.anthropic.com';
|
|
@@ -1039,7 +1039,13 @@ export async function startProxy(opts = {}) {
|
|
|
1039
1039
|
else {
|
|
1040
1040
|
overagePct = 'n/a';
|
|
1041
1041
|
}
|
|
1042
|
-
|
|
1042
|
+
// Show the derived billing bucket as the headline, with the raw
|
|
1043
|
+
// claim value in parens so power users still see the header as-is.
|
|
1044
|
+
// See #34 — users want "am I actually on subscription?" answered
|
|
1045
|
+
// at a glance instead of having to memorize that `five_hour` means
|
|
1046
|
+
// "yes, subscription."
|
|
1047
|
+
const bucket = billingBucketFromClaim(billingClaim);
|
|
1048
|
+
console.log(`[dario] #${requestCount} billing: ${bucket} (${billingClaim}, overage: ${overagePct})`);
|
|
1043
1049
|
}
|
|
1044
1050
|
else if (verbose) {
|
|
1045
1051
|
console.log(`[dario] #${requestCount} billing: headers absent (status=${upstream.status})`);
|
|
@@ -1243,6 +1249,11 @@ export async function startProxy(opts = {}) {
|
|
|
1243
1249
|
}
|
|
1244
1250
|
process.exit(1);
|
|
1245
1251
|
});
|
|
1252
|
+
// Kick off a live fingerprint refresh in the background. Re-captures the
|
|
1253
|
+
// user's own CC binary request shape and updates ~/.dario/cc-template.live.json
|
|
1254
|
+
// for the next startup. No-op if CC isn't installed or the cache is fresh.
|
|
1255
|
+
// Never blocks proxy startup; never throws.
|
|
1256
|
+
void import('./live-fingerprint.js').then(({ refreshLiveFingerprintAsync }) => refreshLiveFingerprintAsync({ silent: false }).catch(() => { }));
|
|
1246
1257
|
server.listen(port, host, () => {
|
|
1247
1258
|
const modeLine = passthrough
|
|
1248
1259
|
? 'Mode: passthrough (OAuth swap only, no injection)'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.11.1",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "tsc && cp src/cc-template-data.json dist/",
|
|
24
|
-
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/failover-429.mjs",
|
|
24
|
+
"test": "node test/issue-29-tool-translation.mjs && node test/hybrid-tools.mjs && node test/scrub-paths.mjs && node test/provider-prefix.mjs && node test/analytics-recording.mjs && node test/analytics-billing-bucket.mjs && node test/failover-429.mjs && node test/live-fingerprint.mjs",
|
|
25
25
|
"audit": "npm audit --production --audit-level=high",
|
|
26
26
|
"prepublishOnly": "npm run build",
|
|
27
27
|
"start": "node dist/cli.js",
|