claude-code-cache-fix 2.0.6 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -5
- package/bin/claude-via-proxy.mjs +113 -0
- package/package.json +8 -3
- package/proxy/config.mjs +23 -0
- package/proxy/extensions/cache-control-normalize.mjs +59 -0
- package/proxy/extensions/cache-telemetry.mjs +24 -0
- package/proxy/extensions/fingerprint-strip.mjs +105 -0
- package/proxy/extensions/fresh-session-sort.mjs +188 -0
- package/proxy/extensions/identity-normalization.mjs +129 -0
- package/proxy/extensions/request-log.mjs +35 -0
- package/proxy/extensions/sort-stabilization.mjs +62 -0
- package/proxy/extensions/ttl-management.mjs +49 -0
- package/proxy/extensions.json +10 -0
- package/proxy/pipeline.mjs +96 -0
- package/proxy/server.mjs +168 -0
- package/proxy/stream.mjs +110 -0
- package/proxy/upstream.mjs +93 -0
- package/proxy/watcher.mjs +42 -0
- package/tools/MANUAL-COMPACT.md +41 -2
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
const _pinnedBlocks = new Map();
|
|
4
|
+
|
|
5
|
+
const SESSION_START_RESUME_MARKER = /SessionStart:startup hook success:/g;
|
|
6
|
+
const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
|
|
7
|
+
const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
|
|
8
|
+
const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
|
|
9
|
+
|
|
10
|
+
const REMINDER_WRAP_REGEX = /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
|
|
11
|
+
const BOOKKEEPING_REMINDER_PATTERNS = [
|
|
12
|
+
/^Token usage: \d+\/\d+; \d+ remaining\s*$/,
|
|
13
|
+
/^Output tokens \u2014 turn: [^\n]+ \u00b7 session: [^\n]+\s*$/,
|
|
14
|
+
/^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
function pinBlockContent(blockType, text) {
|
|
18
|
+
const normalized = text.replace(/\s+(<\/system-reminder>)\s*$/, "\n$1");
|
|
19
|
+
const hash = createHash("sha256").update(normalized).digest("hex").slice(0, 16);
|
|
20
|
+
const pinned = _pinnedBlocks.get(blockType);
|
|
21
|
+
|
|
22
|
+
if (pinned && pinned.hash === hash) {
|
|
23
|
+
return pinned.text;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_pinnedBlocks.set(blockType, { hash, text: normalized });
|
|
27
|
+
return normalized;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function stripSessionKnowledge(text) {
|
|
31
|
+
return text.replace(
|
|
32
|
+
/\n<session_knowledge[^>]*>[\s\S]*?<\/session_knowledge>/g,
|
|
33
|
+
""
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizeSessionStartText(text) {
|
|
38
|
+
if (typeof text !== "string" || !text.includes("SessionStart:")) return [text, 0];
|
|
39
|
+
let count = 0;
|
|
40
|
+
let out = text;
|
|
41
|
+
if (SESSION_START_RESUME_MARKER.test(out)) {
|
|
42
|
+
SESSION_START_RESUME_MARKER.lastIndex = 0;
|
|
43
|
+
out = out.replace(SESSION_START_RESUME_MARKER, "SessionStart:startup hook success:");
|
|
44
|
+
count++;
|
|
45
|
+
}
|
|
46
|
+
if (SESSION_START_ID_TAG.test(out)) {
|
|
47
|
+
SESSION_START_ID_TAG.lastIndex = 0;
|
|
48
|
+
out = out.replace(SESSION_START_ID_TAG, "");
|
|
49
|
+
count++;
|
|
50
|
+
}
|
|
51
|
+
if (SESSION_START_LAST_ACTIVE_LINE.test(out)) {
|
|
52
|
+
SESSION_START_LAST_ACTIVE_LINE.lastIndex = 0;
|
|
53
|
+
out = out.replace(SESSION_START_LAST_ACTIVE_LINE, "");
|
|
54
|
+
count++;
|
|
55
|
+
}
|
|
56
|
+
return [out, count];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function isContinueTrailerBlock(block) {
|
|
60
|
+
return (
|
|
61
|
+
!!block &&
|
|
62
|
+
typeof block === "object" &&
|
|
63
|
+
block.type === "text" &&
|
|
64
|
+
block.text === CONTINUE_TRAILER_TEXT
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function isBookkeepingReminder(text) {
|
|
69
|
+
if (typeof text !== "string") return false;
|
|
70
|
+
const m = text.match(REMINDER_WRAP_REGEX);
|
|
71
|
+
if (!m) return false;
|
|
72
|
+
const inner = m[1];
|
|
73
|
+
for (const rx of BOOKKEEPING_REMINDER_PATTERNS) {
|
|
74
|
+
if (rx.test(inner)) return true;
|
|
75
|
+
}
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export default {
|
|
80
|
+
name: "identity-normalization",
|
|
81
|
+
description: "Normalize volatile identity fields (SessionStart, Continue trailers, bookkeeping) for cache stability",
|
|
82
|
+
order: 300,
|
|
83
|
+
|
|
84
|
+
async onRequest(ctx) {
|
|
85
|
+
const { body } = ctx;
|
|
86
|
+
|
|
87
|
+
if (Array.isArray(body.system)) {
|
|
88
|
+
for (let i = 0; i < body.system.length; i++) {
|
|
89
|
+
const block = body.system[i];
|
|
90
|
+
if (block.type !== "text" || typeof block.text !== "string") continue;
|
|
91
|
+
|
|
92
|
+
let text = block.text;
|
|
93
|
+
if (text.includes("session_knowledge")) {
|
|
94
|
+
text = stripSessionKnowledge(text);
|
|
95
|
+
}
|
|
96
|
+
if (text.includes("<system-reminder>")) {
|
|
97
|
+
text = pinBlockContent(`system_${i}`, text);
|
|
98
|
+
}
|
|
99
|
+
if (text !== block.text) {
|
|
100
|
+
body.system[i] = { ...block, text };
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (Array.isArray(body.messages)) {
|
|
106
|
+
for (const msg of body.messages) {
|
|
107
|
+
if (!Array.isArray(msg.content)) continue;
|
|
108
|
+
|
|
109
|
+
for (let i = msg.content.length - 1; i >= 0; i--) {
|
|
110
|
+
const block = msg.content[i];
|
|
111
|
+
if (block.type !== "text" || typeof block.text !== "string") continue;
|
|
112
|
+
|
|
113
|
+
if (isContinueTrailerBlock(block) && i === msg.content.length - 1 && msg.role === "user") {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (isBookkeepingReminder(block.text)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const [normalized] = normalizeSessionStartText(block.text);
|
|
122
|
+
if (normalized !== block.text) {
|
|
123
|
+
msg.content[i] = { ...block, text: normalized };
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
};
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { appendFile } from "node:fs/promises";
|
|
2
|
+
|
|
3
|
+
const LOG_PATH = process.env.CACHE_FIX_REQUEST_LOG || "";
|
|
4
|
+
|
|
5
|
+
export default {
|
|
6
|
+
name: "request-log",
|
|
7
|
+
description: "Optional NDJSON request timing log",
|
|
8
|
+
enabled: false,
|
|
9
|
+
order: 700,
|
|
10
|
+
|
|
11
|
+
async onRequest(ctx) {
|
|
12
|
+
ctx.meta._requestStart = Date.now();
|
|
13
|
+
ctx.meta._requestModel = ctx.body.model || null;
|
|
14
|
+
},
|
|
15
|
+
|
|
16
|
+
async onResponseStart(ctx) {
|
|
17
|
+
ctx.meta._responseStart = Date.now();
|
|
18
|
+
},
|
|
19
|
+
|
|
20
|
+
async onStreamEvent(ctx) {
|
|
21
|
+
if (ctx.event?.type === "message_delta" && ctx.meta._requestStart && LOG_PATH) {
|
|
22
|
+
const entry = {
|
|
23
|
+
ts: new Date().toISOString(),
|
|
24
|
+
model: ctx.meta._requestModel,
|
|
25
|
+
latencyMs: (ctx.meta._responseStart || Date.now()) - ctx.meta._requestStart,
|
|
26
|
+
outputTokens: ctx.event.usage?.output_tokens || 0,
|
|
27
|
+
cacheRead: ctx.meta.cacheStats?.cacheRead || 0,
|
|
28
|
+
cacheCreation: ctx.meta.cacheStats?.cacheCreation || 0,
|
|
29
|
+
};
|
|
30
|
+
try {
|
|
31
|
+
await appendFile(LOG_PATH, JSON.stringify(entry) + "\n");
|
|
32
|
+
} catch {}
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
function sortSkillsBlock(text) {
|
|
2
|
+
const match = text.match(
|
|
3
|
+
/^([\s\S]*?\n\n)(- [\s\S]+?)(\n<\/system-reminder>\s*)$/
|
|
4
|
+
);
|
|
5
|
+
if (!match) return text;
|
|
6
|
+
const [, header, entriesText, footer] = match;
|
|
7
|
+
const entries = entriesText.split(/\n(?=- )/);
|
|
8
|
+
entries.sort();
|
|
9
|
+
return header + entries.join("\n") + footer;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function sortDeferredToolsBlock(text) {
|
|
13
|
+
const match = text.match(
|
|
14
|
+
/^(<system-reminder>\nThe following deferred tools are now available[^\n]*\n)([\s\S]+?)(\n<\/system-reminder>\s*)$/
|
|
15
|
+
);
|
|
16
|
+
if (!match) return text;
|
|
17
|
+
const [, header, toolsList, footer] = match;
|
|
18
|
+
const tools = toolsList.split("\n").map((t) => t.trim()).filter(Boolean);
|
|
19
|
+
tools.sort();
|
|
20
|
+
return header + tools.join("\n") + footer;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function isSkillsBlock(text) {
|
|
24
|
+
return typeof text === "string" && text.includes("User-invocable skills");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function isDeferredToolsBlock(text) {
|
|
28
|
+
return typeof text === "string" && text.includes("deferred tools are now available");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export default {
|
|
32
|
+
name: "sort-stabilization",
|
|
33
|
+
description: "Deterministic ordering of skills, deferred tools, and tool definitions",
|
|
34
|
+
order: 200,
|
|
35
|
+
|
|
36
|
+
async onRequest(ctx) {
|
|
37
|
+
const { body } = ctx;
|
|
38
|
+
|
|
39
|
+
if (Array.isArray(body.system)) {
|
|
40
|
+
for (let i = 0; i < body.system.length; i++) {
|
|
41
|
+
const block = body.system[i];
|
|
42
|
+
if (block.type !== "text" || typeof block.text !== "string") continue;
|
|
43
|
+
|
|
44
|
+
if (isSkillsBlock(block.text)) {
|
|
45
|
+
const sorted = sortSkillsBlock(block.text);
|
|
46
|
+
if (sorted !== block.text) {
|
|
47
|
+
body.system[i] = { ...block, text: sorted };
|
|
48
|
+
}
|
|
49
|
+
} else if (isDeferredToolsBlock(block.text)) {
|
|
50
|
+
const sorted = sortDeferredToolsBlock(block.text);
|
|
51
|
+
if (sorted !== block.text) {
|
|
52
|
+
body.system[i] = { ...block, text: sorted };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (Array.isArray(body.tools)) {
|
|
59
|
+
body.tools.sort((a, b) => (a.name || "").localeCompare(b.name || ""));
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
const TTL_MAIN = (process.env.CACHE_FIX_TTL_MAIN || "1h").toLowerCase();
|
|
2
|
+
const TTL_SUBAGENT = (process.env.CACHE_FIX_TTL_SUBAGENT || "1h").toLowerCase();
|
|
3
|
+
const AGENT_SDK_PREFIX = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
|
|
4
|
+
|
|
5
|
+
function detectRequestType(system) {
|
|
6
|
+
if (!Array.isArray(system)) return "main";
|
|
7
|
+
const isSubagent = system.some(
|
|
8
|
+
(b) => b?.type === "text" && typeof b.text === "string" && b.text.startsWith(AGENT_SDK_PREFIX)
|
|
9
|
+
);
|
|
10
|
+
return isSubagent ? "subagent" : "main";
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function injectTtl(block, ttlParam) {
|
|
14
|
+
if (block.cache_control?.type === "ephemeral" && !block.cache_control.ttl) {
|
|
15
|
+
return { ...block, cache_control: { ...block.cache_control, ttl: ttlParam } };
|
|
16
|
+
}
|
|
17
|
+
return block;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export default {
|
|
21
|
+
name: "ttl-management",
|
|
22
|
+
description: "Inject correct TTL on cache_control markers based on detected tier",
|
|
23
|
+
order: 500,
|
|
24
|
+
|
|
25
|
+
async onRequest(ctx) {
|
|
26
|
+
const { body } = ctx;
|
|
27
|
+
if (!body.system) return;
|
|
28
|
+
|
|
29
|
+
const requestType = detectRequestType(body.system);
|
|
30
|
+
const ttlValue = requestType === "subagent" ? TTL_SUBAGENT : TTL_MAIN;
|
|
31
|
+
|
|
32
|
+
if (ttlValue === "none") return;
|
|
33
|
+
|
|
34
|
+
const ttlParam = ttlValue === "5m" ? "5m" : "1h";
|
|
35
|
+
|
|
36
|
+
if (Array.isArray(body.system)) {
|
|
37
|
+
body.system = body.system.map((block) => injectTtl(block, ttlParam));
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (Array.isArray(body.messages)) {
|
|
41
|
+
for (const msg of body.messages) {
|
|
42
|
+
if (!Array.isArray(msg.content)) continue;
|
|
43
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
44
|
+
msg.content[i] = injectTtl(msg.content[i], ttlParam);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fingerprint-strip": { "enabled": true, "order": 100 },
|
|
3
|
+
"sort-stabilization": { "enabled": true, "order": 200 },
|
|
4
|
+
"fresh-session-sort": { "enabled": true, "order": 250 },
|
|
5
|
+
"identity-normalization": { "enabled": true, "order": 300 },
|
|
6
|
+
"cache-control-normalize": { "enabled": true, "order": 400 },
|
|
7
|
+
"ttl-management": { "enabled": true, "order": 500 },
|
|
8
|
+
"cache-telemetry": { "enabled": true, "order": 600 },
|
|
9
|
+
"request-log": { "enabled": false, "order": 700 }
|
|
10
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { readdir, readFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
let registry = [];
|
|
5
|
+
|
|
6
|
+
export async function loadExtensions(dir, configPath) {
|
|
7
|
+
let config = {};
|
|
8
|
+
try {
|
|
9
|
+
const raw = await readFile(configPath, "utf8");
|
|
10
|
+
config = JSON.parse(raw);
|
|
11
|
+
} catch {}
|
|
12
|
+
|
|
13
|
+
const files = await readdir(dir);
|
|
14
|
+
const mjsFiles = files.filter((f) => f.endsWith(".mjs")).sort();
|
|
15
|
+
|
|
16
|
+
const extensions = [];
|
|
17
|
+
for (const file of mjsFiles) {
|
|
18
|
+
try {
|
|
19
|
+
const mod = await import(join(dir, file) + "?t=" + Date.now());
|
|
20
|
+
const ext = mod.default;
|
|
21
|
+
if (!ext || !ext.name) continue;
|
|
22
|
+
|
|
23
|
+
const cfg = config[ext.name];
|
|
24
|
+
const enabled = cfg?.enabled ?? ext.enabled ?? true;
|
|
25
|
+
const order = cfg?.order ?? ext.order ?? 1000;
|
|
26
|
+
|
|
27
|
+
if (enabled) {
|
|
28
|
+
extensions.push({ ...ext, order, _file: file });
|
|
29
|
+
}
|
|
30
|
+
} catch (err) {
|
|
31
|
+
process.stderr.write(`[pipeline] failed to load ${file}: ${err.message}\n`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
extensions.sort((a, b) => a.order - b.order);
|
|
36
|
+
registry = extensions;
|
|
37
|
+
return extensions;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function getRegistry() {
|
|
41
|
+
return registry;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function snapshotRegistry() {
|
|
45
|
+
return [...registry];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function runOnRequest(ctx, snapshot) {
|
|
49
|
+
const exts = snapshot || registry;
|
|
50
|
+
for (const ext of exts) {
|
|
51
|
+
if (!ext.onRequest) continue;
|
|
52
|
+
try {
|
|
53
|
+
const result = await ext.onRequest(ctx);
|
|
54
|
+
if (result && result.skip) return result;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
process.stderr.write(`[pipeline] ${ext.name}.onRequest error: ${err.message}\n`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return undefined;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function runOnResponseStart(ctx, snapshot) {
|
|
63
|
+
const exts = snapshot || registry;
|
|
64
|
+
for (const ext of exts) {
|
|
65
|
+
if (!ext.onResponseStart) continue;
|
|
66
|
+
try {
|
|
67
|
+
await ext.onResponseStart(ctx);
|
|
68
|
+
} catch (err) {
|
|
69
|
+
process.stderr.write(`[pipeline] ${ext.name}.onResponseStart error: ${err.message}\n`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export async function runOnStreamEvent(ctx, snapshot) {
|
|
75
|
+
const exts = snapshot || registry;
|
|
76
|
+
for (const ext of exts) {
|
|
77
|
+
if (!ext.onStreamEvent) continue;
|
|
78
|
+
try {
|
|
79
|
+
await ext.onStreamEvent(ctx);
|
|
80
|
+
} catch (err) {
|
|
81
|
+
process.stderr.write(`[pipeline] ${ext.name}.onStreamEvent error: ${err.message}\n`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export async function runOnResponse(ctx, snapshot) {
|
|
87
|
+
const exts = snapshot || registry;
|
|
88
|
+
for (const ext of exts) {
|
|
89
|
+
if (!ext.onResponse) continue;
|
|
90
|
+
try {
|
|
91
|
+
await ext.onResponse(ctx);
|
|
92
|
+
} catch (err) {
|
|
93
|
+
process.stderr.write(`[pipeline] ${ext.name}.onResponse error: ${err.message}\n`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
package/proxy/server.mjs
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import http from "node:http";
|
|
2
|
+
import config from "./config.mjs";
|
|
3
|
+
import { forwardRequest } from "./upstream.mjs";
|
|
4
|
+
import { streamResponse, createTelemetryRecord } from "./stream.mjs";
|
|
5
|
+
import { loadExtensions, snapshotRegistry, runOnRequest, runOnResponseStart, runOnResponse } from "./pipeline.mjs";
|
|
6
|
+
import { startWatcher } from "./watcher.mjs";
|
|
7
|
+
|
|
8
|
+
function collectBody(req) {
|
|
9
|
+
return new Promise((resolve, reject) => {
|
|
10
|
+
const chunks = [];
|
|
11
|
+
req.on("data", (chunk) => chunks.push(chunk));
|
|
12
|
+
req.on("end", () => resolve(Buffer.concat(chunks)));
|
|
13
|
+
req.on("error", reject);
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async function handleMessages(clientReq, clientRes) {
|
|
18
|
+
const abortController = new AbortController();
|
|
19
|
+
const extSnapshot = snapshotRegistry();
|
|
20
|
+
|
|
21
|
+
clientReq.on("close", () => {
|
|
22
|
+
if (!clientRes.writableEnded) {
|
|
23
|
+
abortController.abort();
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const rawBody = await collectBody(clientReq);
|
|
28
|
+
|
|
29
|
+
let parsed;
|
|
30
|
+
try {
|
|
31
|
+
parsed = JSON.parse(rawBody);
|
|
32
|
+
} catch {
|
|
33
|
+
parsed = null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
let forwardBody = rawBody;
|
|
37
|
+
const meta = {};
|
|
38
|
+
|
|
39
|
+
if (parsed && extSnapshot.length > 0) {
|
|
40
|
+
const reqCtx = { body: parsed, headers: { ...clientReq.headers }, meta };
|
|
41
|
+
const skipResult = await runOnRequest(reqCtx, extSnapshot);
|
|
42
|
+
|
|
43
|
+
if (skipResult && skipResult.skip) {
|
|
44
|
+
const status = skipResult.status || 400;
|
|
45
|
+
const body = skipResult.body || { error: "blocked_by_extension" };
|
|
46
|
+
clientRes.writeHead(status, { "content-type": "application/json" });
|
|
47
|
+
clientRes.end(JSON.stringify(body));
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
forwardBody = Buffer.from(JSON.stringify(reqCtx.body));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const requestedModel = parsed?.model || null;
|
|
55
|
+
|
|
56
|
+
let upstreamRes, responseHeaders, statusCode;
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
({ upstreamRes, responseHeaders, statusCode } = await forwardRequest(
|
|
60
|
+
clientReq,
|
|
61
|
+
forwardBody,
|
|
62
|
+
abortController.signal
|
|
63
|
+
));
|
|
64
|
+
} catch (err) {
|
|
65
|
+
if (abortController.signal.aborted) return;
|
|
66
|
+
clientRes.writeHead(502, { "content-type": "application/json" });
|
|
67
|
+
clientRes.end(JSON.stringify({ error: "upstream_error", message: err.message }));
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (extSnapshot.length > 0) {
|
|
72
|
+
const resCtx = { status: statusCode, headers: responseHeaders, meta };
|
|
73
|
+
await runOnResponseStart(resCtx, extSnapshot);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const isStreaming = (responseHeaders["content-type"] || "").includes("text/event-stream");
|
|
77
|
+
|
|
78
|
+
if (!isStreaming) {
|
|
79
|
+
const chunks = [];
|
|
80
|
+
for await (const chunk of upstreamRes) chunks.push(chunk);
|
|
81
|
+
const rawResponse = Buffer.concat(chunks);
|
|
82
|
+
|
|
83
|
+
if (extSnapshot.length > 0) {
|
|
84
|
+
let responseBody;
|
|
85
|
+
try {
|
|
86
|
+
responseBody = JSON.parse(rawResponse.toString());
|
|
87
|
+
} catch {
|
|
88
|
+
responseBody = null;
|
|
89
|
+
}
|
|
90
|
+
if (responseBody) {
|
|
91
|
+
const resCtx = { status: statusCode, headers: responseHeaders, body: responseBody, meta };
|
|
92
|
+
await runOnResponse(resCtx, extSnapshot);
|
|
93
|
+
clientRes.writeHead(statusCode, resCtx.headers);
|
|
94
|
+
clientRes.end(JSON.stringify(resCtx.body));
|
|
95
|
+
} else {
|
|
96
|
+
clientRes.writeHead(statusCode, responseHeaders);
|
|
97
|
+
clientRes.end(rawResponse);
|
|
98
|
+
}
|
|
99
|
+
} else {
|
|
100
|
+
clientRes.writeHead(statusCode, responseHeaders);
|
|
101
|
+
clientRes.end(rawResponse);
|
|
102
|
+
}
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
clientRes.writeHead(statusCode, responseHeaders);
|
|
107
|
+
|
|
108
|
+
const telemetry = createTelemetryRecord();
|
|
109
|
+
telemetry.requestedModel = requestedModel;
|
|
110
|
+
|
|
111
|
+
upstreamRes.on("error", (err) => {
|
|
112
|
+
if (!clientRes.writableEnded) {
|
|
113
|
+
clientRes.destroy(err);
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
await streamResponse(upstreamRes, clientRes, telemetry, extSnapshot, meta, responseHeaders);
|
|
119
|
+
} catch (err) {
|
|
120
|
+
if (!clientRes.writableEnded) {
|
|
121
|
+
clientRes.destroy(err);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function handleHealth(_req, res) {
|
|
127
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
128
|
+
res.end(JSON.stringify({ status: "ok" }));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function handleNotFound(_req, res) {
|
|
132
|
+
res.writeHead(404, { "content-type": "application/json" });
|
|
133
|
+
res.end(JSON.stringify({ error: "not_found" }));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const server = http.createServer((req, res) => {
|
|
137
|
+
if (req.method === "GET" && req.url === "/health") {
|
|
138
|
+
return handleHealth(req, res);
|
|
139
|
+
}
|
|
140
|
+
if (req.method === "POST" && req.url?.startsWith("/v1/messages")) {
|
|
141
|
+
return handleMessages(req, res);
|
|
142
|
+
}
|
|
143
|
+
handleNotFound(req, res);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
function shutdown() {
|
|
147
|
+
server.close(() => process.exit(0));
|
|
148
|
+
setTimeout(() => process.exit(1), 5000);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
process.on("SIGTERM", shutdown);
|
|
152
|
+
process.on("SIGINT", shutdown);
|
|
153
|
+
|
|
154
|
+
async function initPipeline() {
|
|
155
|
+
try {
|
|
156
|
+
await loadExtensions(config.extensionsDir, config.extensionsConfig);
|
|
157
|
+
startWatcher(config.extensionsDir, config.extensionsConfig);
|
|
158
|
+
} catch {}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
initPipeline().then(() => {
|
|
162
|
+
server.listen(config.port, config.bind, () => {
|
|
163
|
+
const addr = server.address();
|
|
164
|
+
process.stdout.write(`proxy listening on ${addr.address}:${addr.port}\n`);
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
export { server };
|
package/proxy/stream.mjs
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { runOnStreamEvent } from "./pipeline.mjs";
|
|
2
|
+
|
|
3
|
+
export function createTelemetryRecord() {
|
|
4
|
+
return {
|
|
5
|
+
model: null,
|
|
6
|
+
requestedModel: null,
|
|
7
|
+
inputTokens: 0,
|
|
8
|
+
outputTokens: 0,
|
|
9
|
+
cacheRead: 0,
|
|
10
|
+
cacheCreation: 0,
|
|
11
|
+
stopReason: null,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function extractTelemetry(event, telemetry) {
|
|
16
|
+
if (event.type === "message_start" && event.message) {
|
|
17
|
+
const msg = event.message;
|
|
18
|
+
telemetry.model = msg.model || null;
|
|
19
|
+
if (msg.usage) {
|
|
20
|
+
telemetry.inputTokens = msg.usage.input_tokens || 0;
|
|
21
|
+
telemetry.cacheRead = msg.usage.cache_read_input_tokens || 0;
|
|
22
|
+
telemetry.cacheCreation = msg.usage.cache_creation_input_tokens || 0;
|
|
23
|
+
}
|
|
24
|
+
} else if (event.type === "message_delta") {
|
|
25
|
+
if (event.usage) {
|
|
26
|
+
telemetry.outputTokens = event.usage.output_tokens || 0;
|
|
27
|
+
}
|
|
28
|
+
telemetry.stopReason = event.delta?.stop_reason || null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function processLine(line, clientRes, telemetry, extSnapshot, meta, responseHeaders) {
|
|
33
|
+
if (!line.startsWith("data: ")) {
|
|
34
|
+
const ok = clientRes.write(line + "\n");
|
|
35
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const jsonStr = line.slice(6);
|
|
40
|
+
if (jsonStr === "[DONE]") {
|
|
41
|
+
const ok = clientRes.write(line + "\n");
|
|
42
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
let event;
|
|
47
|
+
try {
|
|
48
|
+
event = JSON.parse(jsonStr);
|
|
49
|
+
} catch {
|
|
50
|
+
const ok = clientRes.write(line + "\n");
|
|
51
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
extractTelemetry(event, telemetry);
|
|
56
|
+
|
|
57
|
+
if (!extSnapshot || extSnapshot.length === 0) {
|
|
58
|
+
const ok = clientRes.write(line + "\n");
|
|
59
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const ctx = { event, meta, telemetry, responseHeaders: responseHeaders || null, drop: false };
|
|
64
|
+
const originalRef = event;
|
|
65
|
+
await runOnStreamEvent(ctx, extSnapshot);
|
|
66
|
+
|
|
67
|
+
if (ctx.drop) return;
|
|
68
|
+
|
|
69
|
+
let output;
|
|
70
|
+
if (ctx.event === originalRef) {
|
|
71
|
+
output = line + "\n";
|
|
72
|
+
} else {
|
|
73
|
+
try {
|
|
74
|
+
output = "data: " + JSON.stringify(ctx.event) + "\n";
|
|
75
|
+
} catch {
|
|
76
|
+
output = line + "\n";
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const ok = clientRes.write(output);
|
|
81
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export async function streamResponse(upstreamRes, clientRes, telemetry, extSnapshot, meta, responseHeaders) {
|
|
85
|
+
let buffer = "";
|
|
86
|
+
|
|
87
|
+
for await (const chunk of upstreamRes) {
|
|
88
|
+
const text = chunk.toString();
|
|
89
|
+
buffer += text;
|
|
90
|
+
|
|
91
|
+
const lines = buffer.split("\n");
|
|
92
|
+
buffer = lines.pop();
|
|
93
|
+
|
|
94
|
+
for (const line of lines) {
|
|
95
|
+
if (line === "") {
|
|
96
|
+
const ok = clientRes.write("\n");
|
|
97
|
+
if (!ok) await new Promise((r) => clientRes.once("drain", r));
|
|
98
|
+
} else {
|
|
99
|
+
await processLine(line, clientRes, telemetry, extSnapshot, meta, responseHeaders);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (buffer.length > 0) {
|
|
105
|
+
await processLine(buffer, clientRes, telemetry, extSnapshot, meta, responseHeaders);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
clientRes.end();
|
|
109
|
+
return telemetry;
|
|
110
|
+
}
|