@curatedmcp/tokenshield-core 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +15 -0
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -0
- package/dist/ledger.d.ts +33 -0
- package/dist/ledger.js +141 -0
- package/dist/ledger.js.map +1 -0
- package/dist/pricing.d.ts +5 -0
- package/dist/pricing.js +83 -0
- package/dist/pricing.js.map +1 -0
- package/dist/processors/conversation-dedup.d.ts +23 -0
- package/dist/processors/conversation-dedup.js +71 -0
- package/dist/processors/conversation-dedup.js.map +1 -0
- package/dist/processors/pipeline.d.ts +10 -0
- package/dist/processors/pipeline.js +89 -0
- package/dist/processors/pipeline.js.map +1 -0
- package/dist/processors/response-cache.d.ts +53 -0
- package/dist/processors/response-cache.js +129 -0
- package/dist/processors/response-cache.js.map +1 -0
- package/dist/processors/types.d.ts +54 -0
- package/dist/processors/types.js +2 -0
- package/dist/processors/types.js.map +1 -0
- package/dist/providers/anthropic.d.ts +6 -0
- package/dist/providers/anthropic.js +216 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/registry.d.ts +4 -0
- package/dist/providers/registry.js +7 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/providers/types.d.ts +79 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/proxy/anthropic-passthrough.d.ts +13 -0
- package/dist/proxy/anthropic-passthrough.js +363 -0
- package/dist/proxy/anthropic-passthrough.js.map +1 -0
- package/dist/proxy/sse.d.ts +20 -0
- package/dist/proxy/sse.js +59 -0
- package/dist/proxy/sse.js.map +1 -0
- package/dist/proxy/usage.d.ts +25 -0
- package/dist/proxy/usage.js +82 -0
- package/dist/proxy/usage.js.map +1 -0
- package/dist/server.d.ts +18 -0
- package/dist/server.js +130 -0
- package/dist/server.js.map +1 -0
- package/dist/types.d.ts +36 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +38 -0
- package/src/index.ts +31 -0
- package/src/ledger.ts +232 -0
- package/src/pricing.ts +93 -0
- package/src/processors/conversation-dedup.ts +77 -0
- package/src/processors/pipeline.ts +104 -0
- package/src/processors/response-cache.ts +161 -0
- package/src/processors/types.ts +58 -0
- package/src/providers/anthropic.ts +236 -0
- package/src/providers/registry.ts +10 -0
- package/src/providers/types.ts +87 -0
- package/src/proxy/anthropic-passthrough.ts +393 -0
- package/src/proxy/sse.ts +58 -0
- package/src/proxy/usage.ts +98 -0
- package/src/server.ts +154 -0
- package/src/types.ts +47 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
import { IncomingMessage, ServerResponse } from "node:http";
|
|
2
|
+
import { request as httpsRequest } from "node:https";
|
|
3
|
+
import { request as httpRequest } from "node:http";
|
|
4
|
+
import { URL } from "node:url";
|
|
5
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
6
|
+
import type { RequestRecord, ProxyConfig } from "../types.js";
|
|
7
|
+
import { emptyUsage, dollarsFor } from "../pricing.js";
|
|
8
|
+
import { SSEParser } from "./sse.js";
|
|
9
|
+
import { providerForPath } from "../providers/registry.js";
|
|
10
|
+
import { Pipeline } from "../processors/pipeline.js";
|
|
11
|
+
import type { Processor } from "../processors/types.js";
|
|
12
|
+
import { conversationDedup } from "../processors/conversation-dedup.js";
|
|
13
|
+
import { ResponseCache } from "../processors/response-cache.js";
|
|
14
|
+
|
|
15
|
+
type RecordSink = (record: RequestRecord) => void;
|
|
16
|
+
|
|
17
|
+
const HOP_BY_HOP = new Set([
|
|
18
|
+
"connection",
|
|
19
|
+
"keep-alive",
|
|
20
|
+
"proxy-authenticate",
|
|
21
|
+
"proxy-authorization",
|
|
22
|
+
"te",
|
|
23
|
+
"trailer",
|
|
24
|
+
"transfer-encoding",
|
|
25
|
+
"upgrade",
|
|
26
|
+
"host",
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
function copyHeaders(src: IncomingMessage["headers"]): Record<string, string> {
|
|
30
|
+
const out: Record<string, string> = {};
|
|
31
|
+
for (const [name, value] of Object.entries(src)) {
|
|
32
|
+
if (value === undefined) continue;
|
|
33
|
+
if (HOP_BY_HOP.has(name.toLowerCase())) continue;
|
|
34
|
+
out[name] = Array.isArray(value) ? value.join(", ") : value;
|
|
35
|
+
}
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function readJsonBody(
|
|
40
|
+
req: IncomingMessage,
|
|
41
|
+
limitBytes = 64 * 1024 * 1024,
|
|
42
|
+
): Promise<{ raw: Buffer; parsed: unknown }> {
|
|
43
|
+
const chunks: Buffer[] = [];
|
|
44
|
+
let total = 0;
|
|
45
|
+
for await (const chunk of req) {
|
|
46
|
+
const buf = chunk as Buffer;
|
|
47
|
+
total += buf.length;
|
|
48
|
+
if (total > limitBytes) {
|
|
49
|
+
throw new Error(`Request body exceeds ${limitBytes} bytes`);
|
|
50
|
+
}
|
|
51
|
+
chunks.push(buf);
|
|
52
|
+
}
|
|
53
|
+
const raw = Buffer.concat(chunks);
|
|
54
|
+
let parsed: unknown = null;
|
|
55
|
+
if (raw.length > 0) {
|
|
56
|
+
try {
|
|
57
|
+
parsed = JSON.parse(raw.toString("utf8"));
|
|
58
|
+
} catch {
|
|
59
|
+
parsed = null;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return { raw, parsed };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function conversationFingerprint(parsed: unknown): string {
|
|
66
|
+
if (!parsed || typeof parsed !== "object") return "_";
|
|
67
|
+
const obj = parsed as Record<string, unknown>;
|
|
68
|
+
const sys = obj["system"];
|
|
69
|
+
const messages = Array.isArray(obj["messages"]) ? obj["messages"] : [];
|
|
70
|
+
const firstUser = messages.find(
|
|
71
|
+
(m) => typeof m === "object" && m !== null && (m as Record<string, unknown>)["role"] === "user",
|
|
72
|
+
);
|
|
73
|
+
return createHash("sha256")
|
|
74
|
+
.update(JSON.stringify({ sys, firstUser }))
|
|
75
|
+
.digest("hex")
|
|
76
|
+
.slice(0, 16);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function bodySize(json: unknown): number {
|
|
80
|
+
return Buffer.byteLength(JSON.stringify(json ?? null), "utf8");
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Singleton pipeline + cache. One process = one set of state.
|
|
85
|
+
* Future: per-license configuration once cloud-tier gating ships.
|
|
86
|
+
*/
|
|
87
|
+
const PROCESSORS: Processor[] = [conversationDedup];
|
|
88
|
+
const ENABLED = new Set(PROCESSORS.filter((p) => p.enabledByDefault).map((p) => p.id));
|
|
89
|
+
const PIPELINE = new Pipeline({ processors: PROCESSORS, enabled: ENABLED });
|
|
90
|
+
const RESPONSE_CACHE = new ResponseCache();
|
|
91
|
+
|
|
92
|
+
export function setProcessorEnabled(id: string, enabled: boolean): void {
|
|
93
|
+
if (enabled) ENABLED.add(id);
|
|
94
|
+
else ENABLED.delete(id);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function getProcessorEnabledIds(): string[] {
|
|
98
|
+
return Array.from(ENABLED);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function getResponseCacheStats(): { hits: number; misses: number; entries: number; bytes: number } {
|
|
102
|
+
return RESPONSE_CACHE.stats();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export async function handleAnthropicRequest(
|
|
106
|
+
req: IncomingMessage,
|
|
107
|
+
res: ServerResponse,
|
|
108
|
+
config: ProxyConfig,
|
|
109
|
+
sink: RecordSink,
|
|
110
|
+
): Promise<void> {
|
|
111
|
+
const startedAt = Date.now();
|
|
112
|
+
const requestId = randomUUID();
|
|
113
|
+
|
|
114
|
+
let body: { raw: Buffer; parsed: unknown };
|
|
115
|
+
try {
|
|
116
|
+
body = await readJsonBody(req);
|
|
117
|
+
} catch (err) {
|
|
118
|
+
res.statusCode = 413;
|
|
119
|
+
res.setHeader("content-type", "text/plain");
|
|
120
|
+
res.end(`tokenshield: ${(err as Error).message}`);
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const upstream = new URL(req.url ?? "/", config.upstreamBaseUrl);
|
|
125
|
+
const provider = providerForPath(upstream.pathname);
|
|
126
|
+
const isHttps = upstream.protocol === "https:";
|
|
127
|
+
const requester = isHttps ? httpsRequest : httpRequest;
|
|
128
|
+
|
|
129
|
+
// ── 1. Determine model + stream flag from raw body (before any rewrite) ──
|
|
130
|
+
let model = "unknown";
|
|
131
|
+
let streamed = false;
|
|
132
|
+
if (provider !== null) {
|
|
133
|
+
model = provider.extractModel(body.parsed);
|
|
134
|
+
streamed = provider.isStreaming(body.parsed);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ── 2. Run request-side processors (fail-open) ──────────────────────────
|
|
138
|
+
let outboundParsed = body.parsed;
|
|
139
|
+
let outboundBytes = body.raw;
|
|
140
|
+
const effects: Array<{ name: string; bytesSaved: number; detail?: Record<string, unknown> }> = [];
|
|
141
|
+
let bytesRaw = body.raw.length;
|
|
142
|
+
let bytesSent = body.raw.length;
|
|
143
|
+
|
|
144
|
+
if (provider !== null && body.parsed !== null) {
|
|
145
|
+
const conv = provider.toConversation(body.parsed);
|
|
146
|
+
if (conv !== null) {
|
|
147
|
+
const ctx = {
|
|
148
|
+
providerId: provider.id,
|
|
149
|
+
conversationFingerprint: conversationFingerprint(body.parsed),
|
|
150
|
+
inboundBytes: body.raw.length,
|
|
151
|
+
};
|
|
152
|
+
// sizeOf measures wire-format (after applyConversation), not in-memory shape
|
|
153
|
+
const wireSize = (c: typeof conv): number =>
|
|
154
|
+
bodySize(provider.applyConversation(body.parsed, c));
|
|
155
|
+
const result = PIPELINE.run(conv, ctx, wireSize);
|
|
156
|
+
for (const e of result.effects) effects.push(e);
|
|
157
|
+
|
|
158
|
+
if (result.effects.length > 0) {
|
|
159
|
+
outboundParsed = provider.applyConversation(body.parsed, result.conversation);
|
|
160
|
+
const serialized = Buffer.from(JSON.stringify(outboundParsed), "utf8");
|
|
161
|
+
if (serialized.length < body.raw.length) {
|
|
162
|
+
outboundBytes = serialized;
|
|
163
|
+
bytesSent = serialized.length;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ── 3. Response cache: short-circuit if we have a fresh hit ─────────────
|
|
170
|
+
if (provider !== null && body.parsed !== null) {
|
|
171
|
+
const hit = RESPONSE_CACHE.lookup(body.parsed);
|
|
172
|
+
if (hit !== null) {
|
|
173
|
+
res.statusCode = hit.status;
|
|
174
|
+
for (const [k, v] of Object.entries(hit.headers)) {
|
|
175
|
+
if (HOP_BY_HOP.has(k.toLowerCase())) continue;
|
|
176
|
+
res.setHeader(k, v);
|
|
177
|
+
}
|
|
178
|
+
res.setHeader("x-tokenshield-cache", "hit");
|
|
179
|
+
res.setHeader("x-tokenshield-cache-age-ms", String(hit.cachedAgoMs));
|
|
180
|
+
res.end(hit.body);
|
|
181
|
+
|
|
182
|
+
const dollarsRaw = dollarsFor(hit.model || model, {
|
|
183
|
+
inputTokens: hit.usage.inputTokens,
|
|
184
|
+
outputTokens: hit.usage.outputTokens,
|
|
185
|
+
cacheCreationInputTokens: 0,
|
|
186
|
+
cacheReadInputTokens: 0,
|
|
187
|
+
});
|
|
188
|
+
try {
|
|
189
|
+
sink({
|
|
190
|
+
id: requestId,
|
|
191
|
+
timestamp: startedAt,
|
|
192
|
+
model: hit.model || model,
|
|
193
|
+
endpoint: upstream.pathname,
|
|
194
|
+
streamed: false,
|
|
195
|
+
durationMs: Date.now() - startedAt,
|
|
196
|
+
upstreamStatus: hit.status,
|
|
197
|
+
upstreamError: null,
|
|
198
|
+
usageRaw: {
|
|
199
|
+
inputTokens: hit.usage.inputTokens,
|
|
200
|
+
outputTokens: hit.usage.outputTokens,
|
|
201
|
+
cacheCreationInputTokens: 0,
|
|
202
|
+
cacheReadInputTokens: 0,
|
|
203
|
+
},
|
|
204
|
+
// Cached: zero new tokens billed
|
|
205
|
+
usageSent: emptyUsage(),
|
|
206
|
+
dollarsRaw,
|
|
207
|
+
dollarsSent: 0,
|
|
208
|
+
dollarsSaved: dollarsRaw,
|
|
209
|
+
processorsApplied: ["response-cache:hit", ...effects.map((e) => e.name)],
|
|
210
|
+
});
|
|
211
|
+
} catch {
|
|
212
|
+
/* sink errors must never break the request */
|
|
213
|
+
}
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const headers = copyHeaders(req.headers);
|
|
219
|
+
headers["host"] = upstream.host;
|
|
220
|
+
headers["content-length"] = String(outboundBytes.length);
|
|
221
|
+
|
|
222
|
+
let upstreamStatus = 0;
|
|
223
|
+
let upstreamError: string | null = null;
|
|
224
|
+
let usage = emptyUsage();
|
|
225
|
+
let modelFromResponse: string | null = null;
|
|
226
|
+
const cachedHeaders: Record<string, string> = {};
|
|
227
|
+
const responseBodyChunks: Buffer[] = [];
|
|
228
|
+
|
|
229
|
+
const finalize = (): void => {
|
|
230
|
+
const effectiveModel = modelFromResponse ?? model;
|
|
231
|
+
const dollarsSent = dollarsFor(effectiveModel, usage);
|
|
232
|
+
|
|
233
|
+
// Estimate "raw" cost — what the bill would have been without compression.
|
|
234
|
+
// We use actual sent input tokens + the ratio of bytes saved at the
|
|
235
|
+
// request layer. This is honest: it's an estimate, marked as such.
|
|
236
|
+
const totalBytesSavedReq = Math.max(0, bytesRaw - bytesSent);
|
|
237
|
+
const ratio = bytesSent > 0 ? totalBytesSavedReq / bytesSent : 0;
|
|
238
|
+
const estimatedInputTokensRaw = Math.round(usage.inputTokens * (1 + ratio));
|
|
239
|
+
const usageRaw = {
|
|
240
|
+
inputTokens: estimatedInputTokensRaw,
|
|
241
|
+
outputTokens: usage.outputTokens,
|
|
242
|
+
cacheCreationInputTokens: usage.cacheCreationInputTokens,
|
|
243
|
+
cacheReadInputTokens: usage.cacheReadInputTokens,
|
|
244
|
+
};
|
|
245
|
+
const dollarsRaw = dollarsFor(effectiveModel, usageRaw);
|
|
246
|
+
const dollarsSaved = Math.max(0, dollarsRaw - dollarsSent);
|
|
247
|
+
|
|
248
|
+
const record: RequestRecord = {
|
|
249
|
+
id: requestId,
|
|
250
|
+
timestamp: startedAt,
|
|
251
|
+
model: effectiveModel,
|
|
252
|
+
endpoint: upstream.pathname,
|
|
253
|
+
streamed,
|
|
254
|
+
durationMs: Date.now() - startedAt,
|
|
255
|
+
upstreamStatus,
|
|
256
|
+
upstreamError,
|
|
257
|
+
usageRaw,
|
|
258
|
+
usageSent: usage,
|
|
259
|
+
dollarsRaw,
|
|
260
|
+
dollarsSent,
|
|
261
|
+
dollarsSaved,
|
|
262
|
+
processorsApplied: effects.map((e) => e.name),
|
|
263
|
+
};
|
|
264
|
+
try {
|
|
265
|
+
sink(record);
|
|
266
|
+
} catch {
|
|
267
|
+
/* never */
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Cache the JSON response (no-op if not cacheable)
|
|
271
|
+
if (!streamed && provider !== null && responseBodyChunks.length > 0) {
|
|
272
|
+
const buf = Buffer.concat(responseBodyChunks);
|
|
273
|
+
RESPONSE_CACHE.store(body.parsed, {
|
|
274
|
+
status: upstreamStatus,
|
|
275
|
+
headers: cachedHeaders,
|
|
276
|
+
body: buf,
|
|
277
|
+
usage: { inputTokens: usage.inputTokens, outputTokens: usage.outputTokens },
|
|
278
|
+
model: effectiveModel,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
await new Promise<void>((resolve) => {
|
|
284
|
+
const upstreamReq = requester(
|
|
285
|
+
{
|
|
286
|
+
method: req.method ?? "POST",
|
|
287
|
+
hostname: upstream.hostname,
|
|
288
|
+
port: upstream.port || (isHttps ? 443 : 80),
|
|
289
|
+
path: upstream.pathname + upstream.search,
|
|
290
|
+
headers,
|
|
291
|
+
},
|
|
292
|
+
(upstreamRes) => {
|
|
293
|
+
upstreamStatus = upstreamRes.statusCode ?? 0;
|
|
294
|
+
res.statusCode = upstreamStatus;
|
|
295
|
+
for (const [name, value] of Object.entries(upstreamRes.headers)) {
|
|
296
|
+
if (value === undefined) continue;
|
|
297
|
+
if (HOP_BY_HOP.has(name.toLowerCase())) continue;
|
|
298
|
+
const strVal = Array.isArray(value) ? value.join(", ") : value;
|
|
299
|
+
res.setHeader(name, strVal);
|
|
300
|
+
cachedHeaders[name] = strVal;
|
|
301
|
+
}
|
|
302
|
+
if (effects.length > 0) {
|
|
303
|
+
res.setHeader("x-tokenshield-processors", effects.map((e) => e.name).join(","));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const contentType = String(upstreamRes.headers["content-type"] ?? "");
|
|
307
|
+
const isSse = contentType.includes("text/event-stream");
|
|
308
|
+
|
|
309
|
+
if (isSse) {
|
|
310
|
+
const parser = new SSEParser();
|
|
311
|
+
const accum = provider !== null ? provider.createStreamAccumulator() : null;
|
|
312
|
+
upstreamRes.on("data", (chunk: Buffer) => {
|
|
313
|
+
res.write(chunk);
|
|
314
|
+
if (accum !== null) {
|
|
315
|
+
try {
|
|
316
|
+
for (const ev of parser.push(chunk.toString("utf8"))) {
|
|
317
|
+
accum.observe(ev);
|
|
318
|
+
}
|
|
319
|
+
} catch { /* accounting must never break the data path */ }
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
upstreamRes.on("end", () => {
|
|
323
|
+
if (accum !== null) {
|
|
324
|
+
try {
|
|
325
|
+
for (const ev of parser.flush()) accum.observe(ev);
|
|
326
|
+
usage = accum.total();
|
|
327
|
+
modelFromResponse = accum.model();
|
|
328
|
+
} catch { /* ignore */ }
|
|
329
|
+
}
|
|
330
|
+
res.end();
|
|
331
|
+
finalize();
|
|
332
|
+
resolve();
|
|
333
|
+
});
|
|
334
|
+
upstreamRes.on("error", (err) => {
|
|
335
|
+
upstreamError = err.message;
|
|
336
|
+
res.end();
|
|
337
|
+
finalize();
|
|
338
|
+
resolve();
|
|
339
|
+
});
|
|
340
|
+
} else {
|
|
341
|
+
upstreamRes.on("data", (chunk: Buffer) => {
|
|
342
|
+
responseBodyChunks.push(chunk);
|
|
343
|
+
res.write(chunk);
|
|
344
|
+
});
|
|
345
|
+
upstreamRes.on("end", () => {
|
|
346
|
+
try {
|
|
347
|
+
const text = Buffer.concat(responseBodyChunks).toString("utf8");
|
|
348
|
+
if (text.length > 0 && provider !== null) {
|
|
349
|
+
const parsed = JSON.parse(text);
|
|
350
|
+
const u = provider.usageFromResponseJson(parsed);
|
|
351
|
+
usage = u.usage;
|
|
352
|
+
modelFromResponse = u.model;
|
|
353
|
+
}
|
|
354
|
+
} catch { /* non-JSON or parse failure — leave usage at zero */ }
|
|
355
|
+
res.end();
|
|
356
|
+
finalize();
|
|
357
|
+
resolve();
|
|
358
|
+
});
|
|
359
|
+
upstreamRes.on("error", (err) => {
|
|
360
|
+
upstreamError = err.message;
|
|
361
|
+
res.end();
|
|
362
|
+
finalize();
|
|
363
|
+
resolve();
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
},
|
|
367
|
+
);
|
|
368
|
+
|
|
369
|
+
upstreamReq.on("error", (err) => {
|
|
370
|
+
upstreamError = err.message;
|
|
371
|
+
if (!res.headersSent) {
|
|
372
|
+
res.statusCode = 502;
|
|
373
|
+
res.setHeader("content-type", "application/json");
|
|
374
|
+
res.end(
|
|
375
|
+
JSON.stringify({
|
|
376
|
+
type: "error",
|
|
377
|
+
error: {
|
|
378
|
+
type: "tokenshield_upstream_error",
|
|
379
|
+
message: `Failed to reach Anthropic: ${err.message}`,
|
|
380
|
+
},
|
|
381
|
+
}),
|
|
382
|
+
);
|
|
383
|
+
} else {
|
|
384
|
+
res.end();
|
|
385
|
+
}
|
|
386
|
+
finalize();
|
|
387
|
+
resolve();
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
upstreamReq.write(outboundBytes);
|
|
391
|
+
upstreamReq.end();
|
|
392
|
+
});
|
|
393
|
+
}
|
package/src/proxy/sse.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { SSEEvent } from "../types.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Streaming SSE parser. Accepts raw bytes (UTF-8) progressively and yields
|
|
5
|
+
* complete events. Events are buffered until a blank-line terminator is seen.
|
|
6
|
+
*
|
|
7
|
+
* Anthropic's SSE format emits lines of the form:
|
|
8
|
+
* event: message_start
|
|
9
|
+
* data: {"type":"message_start", ...}
|
|
10
|
+
*
|
|
11
|
+
* separated by blank lines. We preserve unrecognized fields and pass raw
|
|
12
|
+
* bytes through unchanged so the downstream client sees a byte-faithful
|
|
13
|
+
* stream — we only parse a copy for accounting.
|
|
14
|
+
*/
|
|
15
|
+
export class SSEParser {
|
|
16
|
+
private buffer = "";
|
|
17
|
+
private eventName = "";
|
|
18
|
+
private dataLines: string[] = [];
|
|
19
|
+
|
|
20
|
+
push(chunk: string): SSEEvent[] {
|
|
21
|
+
this.buffer += chunk;
|
|
22
|
+
const events: SSEEvent[] = [];
|
|
23
|
+
let idx: number;
|
|
24
|
+
while ((idx = this.buffer.indexOf("\n")) !== -1) {
|
|
25
|
+
const line = this.buffer.slice(0, idx).replace(/\r$/, "");
|
|
26
|
+
this.buffer = this.buffer.slice(idx + 1);
|
|
27
|
+
if (line === "") {
|
|
28
|
+
if (this.dataLines.length > 0 || this.eventName !== "") {
|
|
29
|
+
events.push({
|
|
30
|
+
event: this.eventName || "message",
|
|
31
|
+
data: this.dataLines.join("\n"),
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
this.eventName = "";
|
|
35
|
+
this.dataLines = [];
|
|
36
|
+
} else if (line.startsWith(":")) {
|
|
37
|
+
// comment / keep-alive
|
|
38
|
+
} else if (line.startsWith("event:")) {
|
|
39
|
+
this.eventName = line.slice(6).trimStart();
|
|
40
|
+
} else if (line.startsWith("data:")) {
|
|
41
|
+
this.dataLines.push(line.slice(5).trimStart());
|
|
42
|
+
}
|
|
43
|
+
// ignore other field names (id:, retry:) — not used by Anthropic
|
|
44
|
+
}
|
|
45
|
+
return events;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
flush(): SSEEvent[] {
|
|
49
|
+
if (this.dataLines.length === 0 && this.eventName === "") return [];
|
|
50
|
+
const event = {
|
|
51
|
+
event: this.eventName || "message",
|
|
52
|
+
data: this.dataLines.join("\n"),
|
|
53
|
+
};
|
|
54
|
+
this.eventName = "";
|
|
55
|
+
this.dataLines = [];
|
|
56
|
+
return [event];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type { SSEEvent, UsageCounts } from "../types.js";
|
|
2
|
+
import { emptyUsage, addUsage } from "../pricing.js";
|
|
3
|
+
|
|
4
|
+
interface AnthropicUsage {
|
|
5
|
+
input_tokens?: number;
|
|
6
|
+
output_tokens?: number;
|
|
7
|
+
cache_creation_input_tokens?: number;
|
|
8
|
+
cache_read_input_tokens?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function fromAnthropic(u: AnthropicUsage | undefined): UsageCounts {
|
|
12
|
+
if (!u) return emptyUsage();
|
|
13
|
+
return {
|
|
14
|
+
inputTokens: u.input_tokens ?? 0,
|
|
15
|
+
outputTokens: u.output_tokens ?? 0,
|
|
16
|
+
cacheCreationInputTokens: u.cache_creation_input_tokens ?? 0,
|
|
17
|
+
cacheReadInputTokens: u.cache_read_input_tokens ?? 0,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Accumulates Anthropic usage from a stream of SSE events.
|
|
23
|
+
*
|
|
24
|
+
* Streaming responses emit:
|
|
25
|
+
* message_start — has usage with input_tokens + initial output_tokens=1
|
|
26
|
+
* message_delta — has usage with cumulative output_tokens
|
|
27
|
+
* message_stop — terminal
|
|
28
|
+
*
|
|
29
|
+
* Non-streaming responses come as a single JSON body with `.usage` at the top
|
|
30
|
+
* level — handled by usageFromJson.
|
|
31
|
+
*/
|
|
32
|
+
export class StreamUsageAccumulator {
|
|
33
|
+
private current: UsageCounts = emptyUsage();
|
|
34
|
+
private modelFromEvent: string | null = null;
|
|
35
|
+
|
|
36
|
+
observe(event: SSEEvent): void {
|
|
37
|
+
if (event.event !== "message_start" && event.event !== "message_delta") {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
let parsed: unknown;
|
|
41
|
+
try {
|
|
42
|
+
parsed = JSON.parse(event.data);
|
|
43
|
+
} catch {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
if (!parsed || typeof parsed !== "object") return;
|
|
47
|
+
const obj = parsed as Record<string, unknown>;
|
|
48
|
+
|
|
49
|
+
if (event.event === "message_start") {
|
|
50
|
+
const message = obj["message"] as Record<string, unknown> | undefined;
|
|
51
|
+
if (message) {
|
|
52
|
+
if (typeof message["model"] === "string") {
|
|
53
|
+
this.modelFromEvent = message["model"] as string;
|
|
54
|
+
}
|
|
55
|
+
const u = fromAnthropic(message["usage"] as AnthropicUsage | undefined);
|
|
56
|
+
this.current = u;
|
|
57
|
+
}
|
|
58
|
+
} else if (event.event === "message_delta") {
|
|
59
|
+
const usage = obj["usage"] as AnthropicUsage | undefined;
|
|
60
|
+
if (usage) {
|
|
61
|
+
// message_delta usage is cumulative for output_tokens; input is fixed
|
|
62
|
+
const u = fromAnthropic(usage);
|
|
63
|
+
this.current = {
|
|
64
|
+
inputTokens: this.current.inputTokens || u.inputTokens,
|
|
65
|
+
outputTokens: u.outputTokens,
|
|
66
|
+
cacheCreationInputTokens:
|
|
67
|
+
this.current.cacheCreationInputTokens ||
|
|
68
|
+
u.cacheCreationInputTokens,
|
|
69
|
+
cacheReadInputTokens:
|
|
70
|
+
this.current.cacheReadInputTokens || u.cacheReadInputTokens,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
total(): UsageCounts {
|
|
77
|
+
return { ...this.current };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
model(): string | null {
|
|
81
|
+
return this.modelFromEvent;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function usageFromJson(body: unknown): {
|
|
86
|
+
usage: UsageCounts;
|
|
87
|
+
model: string | null;
|
|
88
|
+
} {
|
|
89
|
+
if (!body || typeof body !== "object") {
|
|
90
|
+
return { usage: emptyUsage(), model: null };
|
|
91
|
+
}
|
|
92
|
+
const obj = body as Record<string, unknown>;
|
|
93
|
+
const usage = fromAnthropic(obj["usage"] as AnthropicUsage | undefined);
|
|
94
|
+
const model = typeof obj["model"] === "string" ? (obj["model"] as string) : null;
|
|
95
|
+
return { usage, model };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export { addUsage };
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { createServer, IncomingMessage, ServerResponse, Server } from "node:http";
|
|
2
|
+
import type { ProxyConfig, RequestRecord } from "./types.js";
|
|
3
|
+
import { handleAnthropicRequest } from "./proxy/anthropic-passthrough.js";
|
|
4
|
+
import { Ledger } from "./ledger.js";
|
|
5
|
+
|
|
6
|
+
export interface ProxyServerHandle {
|
|
7
|
+
proxy: Server;
|
|
8
|
+
dashboard: Server;
|
|
9
|
+
ledger: Ledger;
|
|
10
|
+
close: () => Promise<void>;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
type DashboardRenderer = (ledger: Ledger) => string;
|
|
14
|
+
|
|
15
|
+
export interface StartOptions {
|
|
16
|
+
config: ProxyConfig;
|
|
17
|
+
onRecord?: (r: RequestRecord) => void;
|
|
18
|
+
renderDashboard?: DashboardRenderer;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function defaultConfig(overrides: Partial<ProxyConfig> = {}): ProxyConfig {
|
|
22
|
+
const home = process.env["HOME"] ?? process.env["USERPROFILE"] ?? ".";
|
|
23
|
+
return {
|
|
24
|
+
upstreamBaseUrl: overrides.upstreamBaseUrl ?? "https://api.anthropic.com",
|
|
25
|
+
port: overrides.port ?? 7777,
|
|
26
|
+
bind: overrides.bind ?? "127.0.0.1",
|
|
27
|
+
dashboardPort: overrides.dashboardPort ?? 7778,
|
|
28
|
+
ledgerPath: overrides.ledgerPath ?? `${home}/.tokenshield/ledger.db`,
|
|
29
|
+
enabledProcessors: overrides.enabledProcessors ?? ["token-accounting"],
|
|
30
|
+
retentionDays: overrides.retentionDays ?? 7,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function listenOn(server: Server, port: number, bind: string): Promise<void> {
|
|
35
|
+
return new Promise((resolve, reject) => {
|
|
36
|
+
server.once("error", reject);
|
|
37
|
+
server.listen(port, bind, () => {
|
|
38
|
+
server.removeListener("error", reject);
|
|
39
|
+
resolve();
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function closeServer(server: Server): Promise<void> {
|
|
45
|
+
return new Promise((resolve) => {
|
|
46
|
+
server.close(() => resolve());
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function start(opts: StartOptions): Promise<ProxyServerHandle> {
|
|
51
|
+
const ledger = new Ledger(opts.config.ledgerPath);
|
|
52
|
+
|
|
53
|
+
const sink = (r: RequestRecord): void => {
|
|
54
|
+
try {
|
|
55
|
+
ledger.record(r);
|
|
56
|
+
} catch {
|
|
57
|
+
// ledger errors must never break the request path
|
|
58
|
+
}
|
|
59
|
+
opts.onRecord?.(r);
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const proxy = createServer((req: IncomingMessage, res: ServerResponse) => {
|
|
63
|
+
if (req.url === "/__tokenshield/health") {
|
|
64
|
+
res.statusCode = 200;
|
|
65
|
+
res.setHeader("content-type", "application/json");
|
|
66
|
+
res.end(JSON.stringify({ ok: true, version: "0.1.0" }));
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
handleAnthropicRequest(req, res, opts.config, sink).catch((err: unknown) => {
|
|
70
|
+
if (!res.headersSent) {
|
|
71
|
+
res.statusCode = 500;
|
|
72
|
+
res.setHeader("content-type", "application/json");
|
|
73
|
+
res.end(
|
|
74
|
+
JSON.stringify({
|
|
75
|
+
type: "error",
|
|
76
|
+
error: {
|
|
77
|
+
type: "tokenshield_internal_error",
|
|
78
|
+
message: (err as Error)?.message ?? "unknown",
|
|
79
|
+
},
|
|
80
|
+
}),
|
|
81
|
+
);
|
|
82
|
+
} else {
|
|
83
|
+
try {
|
|
84
|
+
res.end();
|
|
85
|
+
} catch {
|
|
86
|
+
// ignore
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
proxy.keepAliveTimeout = 65_000;
|
|
92
|
+
proxy.headersTimeout = 70_000;
|
|
93
|
+
proxy.requestTimeout = 0; // streaming responses can be long
|
|
94
|
+
|
|
95
|
+
const dashboard = createServer((req: IncomingMessage, res: ServerResponse) => {
|
|
96
|
+
const url = req.url ?? "/";
|
|
97
|
+
if (url === "/api/summary") {
|
|
98
|
+
const since = Date.now() - 24 * 60 * 60 * 1000;
|
|
99
|
+
const summary = ledger.summary(since);
|
|
100
|
+
res.statusCode = 200;
|
|
101
|
+
res.setHeader("content-type", "application/json");
|
|
102
|
+
res.setHeader("cache-control", "no-store");
|
|
103
|
+
res.end(JSON.stringify(summary));
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
if (url === "/api/recent") {
|
|
107
|
+
res.statusCode = 200;
|
|
108
|
+
res.setHeader("content-type", "application/json");
|
|
109
|
+
res.setHeader("cache-control", "no-store");
|
|
110
|
+
res.end(JSON.stringify(ledger.recent(50)));
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
if (url === "/health") {
|
|
114
|
+
res.statusCode = 200;
|
|
115
|
+
res.setHeader("content-type", "application/json");
|
|
116
|
+
res.end(JSON.stringify({ ok: true }));
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const html = opts.renderDashboard?.(ledger) ?? defaultDashboardHtml();
|
|
120
|
+
res.statusCode = 200;
|
|
121
|
+
res.setHeader("content-type", "text/html; charset=utf-8");
|
|
122
|
+
res.setHeader("cache-control", "no-store");
|
|
123
|
+
res.end(html);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
await listenOn(proxy, opts.config.port, opts.config.bind);
|
|
127
|
+
await listenOn(dashboard, opts.config.dashboardPort, opts.config.bind);
|
|
128
|
+
|
|
129
|
+
const retentionInterval = setInterval(() => {
|
|
130
|
+
const cutoff = Date.now() - opts.config.retentionDays * 24 * 60 * 60 * 1000;
|
|
131
|
+
try {
|
|
132
|
+
ledger.prune(cutoff);
|
|
133
|
+
} catch {
|
|
134
|
+
// ignore
|
|
135
|
+
}
|
|
136
|
+
}, 60 * 60 * 1000);
|
|
137
|
+
retentionInterval.unref();
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
proxy,
|
|
141
|
+
dashboard,
|
|
142
|
+
ledger,
|
|
143
|
+
close: async () => {
|
|
144
|
+
clearInterval(retentionInterval);
|
|
145
|
+
await Promise.all([closeServer(proxy), closeServer(dashboard)]);
|
|
146
|
+
ledger.close();
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function defaultDashboardHtml(): string {
|
|
152
|
+
return `<!doctype html><meta charset="utf-8"><title>TokenShield</title>
|
|
153
|
+
<body><h1>TokenShield</h1><p>Dashboard renderer not provided.</p></body>`;
|
|
154
|
+
}
|