proxitor 0.8.0 → 0.9.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/dist/cli.mjs +117 -48
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -250,6 +250,41 @@ provider:
|
|
|
250
250
|
p90: 3 # seconds (soft threshold)
|
|
251
251
|
```
|
|
252
252
|
|
|
253
|
+
### Prompt caching
|
|
254
|
+
|
|
255
|
+
By default, OpenRouter doesn't enable prompt caching — every request pays full token price. Proxitor can inject `cache_control` and `session_id` to make caching work automatically.
|
|
256
|
+
|
|
257
|
+
**`cacheControl`** — injects `cache_control: { "type": "ephemeral" }` into the request body. OpenRouter uses this to set cache breakpoints and advance them as conversations grow.
|
|
258
|
+
|
|
259
|
+
**`sessionId`** — injects `session_id` for provider sticky routing. Without it, OpenRouter only pins to a provider after detecting a cache hit. With it, routing sticks from the **first request** — critical for OpenAI models where delayed caching means 0 cached tokens on the first 1-2 requests.
|
|
260
|
+
|
|
261
|
+
Both support `auto` / `always` / `never` modes:
|
|
262
|
+
|
|
263
|
+
| Mode | `cacheControl` | `sessionId` |
|
|
264
|
+
|---|---|---|
|
|
265
|
+
| `auto` (default) | Anthropic models on `/v1/chat/completions`; all models on `/v1/messages` and `/v1/responses` | Use `X-Claude-Code-Session-Id` header if present; otherwise generate proxy UUID |
|
|
266
|
+
| `always` | All models, all endpoints | Generate a proxy UUID for sticky routing |
|
|
267
|
+
| `never` | Disabled | Disabled |
|
|
268
|
+
|
|
269
|
+
```yaml
|
|
270
|
+
cacheControl: auto # safe default — Anthropic and safe endpoints only
|
|
271
|
+
sessionId: auto # always ensures sticky routing (client header or proxy UUID)
|
|
272
|
+
|
|
273
|
+
# Force caching for all models (may cause 400 on non-Anthropic /v1/chat/completions)
|
|
274
|
+
# cacheControl: always
|
|
275
|
+
|
|
276
|
+
# Per-model overrides
|
|
277
|
+
modelOverrides:
|
|
278
|
+
"gpt-*":
|
|
279
|
+
cacheControl: never # OpenAI caches automatically, no injection needed
|
|
280
|
+
sessionId: always # but sticky routing still helps
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
**Why both matter:**
|
|
284
|
+
- **Anthropic models** — `cache_control` activates caching, `session_id` prevents provider flip-flopping that would invalidate it
|
|
285
|
+
- **OpenAI models** — caching is automatic (no `cache_control` needed), but `session_id` ensures sticky routing from request #1 instead of waiting for a cache hit
|
|
286
|
+
- **All models** — `session_id` prevents the provider switch that silently resets cache
|
|
287
|
+
|
|
253
288
|
### Health check
|
|
254
289
|
|
|
255
290
|
```sh
|
package/dist/cli.mjs
CHANGED
|
@@ -10580,9 +10580,16 @@ const providerConfigSchema = object({
|
|
|
10580
10580
|
preferredMinThroughput: union([number$1().positive(), percentileCutoffsSchema]).optional(),
|
|
10581
10581
|
preferredMaxLatency: union([number$1().positive(), percentileCutoffsSchema]).optional()
|
|
10582
10582
|
}).strict();
|
|
10583
|
+
const triStateSchema = _enum([
|
|
10584
|
+
"auto",
|
|
10585
|
+
"always",
|
|
10586
|
+
"never"
|
|
10587
|
+
]);
|
|
10583
10588
|
const modelOverrideSchema = object({
|
|
10584
10589
|
provider: providerConfigSchema.optional(),
|
|
10585
|
-
headers: record(string$1(), string$1()).optional()
|
|
10590
|
+
headers: record(string$1(), string$1()).optional(),
|
|
10591
|
+
cacheControl: triStateSchema.optional(),
|
|
10592
|
+
sessionId: triStateSchema.optional()
|
|
10586
10593
|
}).strict();
|
|
10587
10594
|
const proxyConfigSchema = object({
|
|
10588
10595
|
host: string$1().min(1).default("0.0.0.0"),
|
|
@@ -10597,6 +10604,8 @@ const proxyConfigSchema = object({
|
|
|
10597
10604
|
attributionReferer: string$1().min(1).default("https://github.com/neiromaster/proxitor"),
|
|
10598
10605
|
attributionTitle: string$1().min(1).default("proxitor"),
|
|
10599
10606
|
headers: record(string$1(), string$1()).optional(),
|
|
10607
|
+
cacheControl: triStateSchema.default("auto"),
|
|
10608
|
+
sessionId: triStateSchema.default("auto"),
|
|
10600
10609
|
modelOverrides: record(string$1().min(1), modelOverrideSchema).optional()
|
|
10601
10610
|
}).strict();
|
|
10602
10611
|
const DEFAULTS = proxyConfigSchema.parse({});
|
|
@@ -10716,27 +10725,36 @@ function matchScore(pattern, modelName) {
|
|
|
10716
10725
|
function resolveModelConfig(config, modelName) {
|
|
10717
10726
|
const result = {
|
|
10718
10727
|
provider: config.provider,
|
|
10719
|
-
headers: config.headers ? { ...config.headers } : void 0
|
|
10728
|
+
headers: config.headers ? { ...config.headers } : void 0,
|
|
10729
|
+
cacheControl: config.cacheControl,
|
|
10730
|
+
sessionId: config.sessionId
|
|
10720
10731
|
};
|
|
10721
10732
|
if (!modelName || !config.modelOverrides) return result;
|
|
10733
|
+
const bestPattern = findBestMatch(Object.keys(config.modelOverrides), modelName);
|
|
10734
|
+
if (bestPattern) applyOverride(result, config.modelOverrides[bestPattern]);
|
|
10735
|
+
return result;
|
|
10736
|
+
}
|
|
10737
|
+
function findBestMatch(patterns, modelName) {
|
|
10722
10738
|
let bestPattern = null;
|
|
10723
10739
|
let bestScore = -1;
|
|
10724
|
-
for (const pattern of
|
|
10740
|
+
for (const pattern of patterns) {
|
|
10725
10741
|
const score = matchScore(pattern, modelName);
|
|
10726
10742
|
if (score > bestScore) {
|
|
10727
10743
|
bestScore = score;
|
|
10728
10744
|
bestPattern = pattern;
|
|
10729
10745
|
}
|
|
10730
10746
|
}
|
|
10731
|
-
|
|
10732
|
-
|
|
10733
|
-
|
|
10734
|
-
|
|
10735
|
-
|
|
10736
|
-
|
|
10737
|
-
}
|
|
10738
|
-
|
|
10739
|
-
|
|
10747
|
+
return bestPattern;
|
|
10748
|
+
}
|
|
10749
|
+
function applyOverride(result, override) {
|
|
10750
|
+
if (!override) return;
|
|
10751
|
+
if (override.provider !== void 0) result.provider = override.provider;
|
|
10752
|
+
if (override.headers) result.headers = {
|
|
10753
|
+
...result.headers ?? {},
|
|
10754
|
+
...override.headers
|
|
10755
|
+
};
|
|
10756
|
+
if (override.cacheControl !== void 0) result.cacheControl = override.cacheControl;
|
|
10757
|
+
if (override.sessionId !== void 0) result.sessionId = override.sessionId;
|
|
10740
10758
|
}
|
|
10741
10759
|
async function loadConfig(options) {
|
|
10742
10760
|
let fileConfig = {};
|
|
@@ -14606,7 +14624,9 @@ const STRIP_REQUEST = new Set([
|
|
|
14606
14624
|
"authorization",
|
|
14607
14625
|
"x-api-key",
|
|
14608
14626
|
"host",
|
|
14609
|
-
"content-length"
|
|
14627
|
+
"content-length",
|
|
14628
|
+
"x-claude-code-session-id",
|
|
14629
|
+
"x-session-id"
|
|
14610
14630
|
]);
|
|
14611
14631
|
/** Headers to strip from upstream response before forwarding */
|
|
14612
14632
|
const STRIP_RESPONSE = new Set(["content-length", "content-encoding"]);
|
|
@@ -14755,25 +14775,34 @@ function buildUpstreamResponseWithLogging(upstream, method, reqId) {
|
|
|
14755
14775
|
}
|
|
14756
14776
|
//#endregion
|
|
14757
14777
|
//#region src/proxy/inject.ts
|
|
14758
|
-
|
|
14778
|
+
function isAnthropicModel(modelName) {
|
|
14779
|
+
const lower = modelName.toLowerCase();
|
|
14780
|
+
return lower.startsWith("anthropic/claude") || lower.startsWith("claude-") || lower.includes("claude");
|
|
14781
|
+
}
|
|
14759
14782
|
function extractModel(rawBody) {
|
|
14760
14783
|
const json = tryParseBody(rawBody);
|
|
14761
14784
|
return typeof json?.model === "string" ? json.model : void 0;
|
|
14762
14785
|
}
|
|
14763
|
-
|
|
14764
|
-
|
|
14765
|
-
if (rawBody.byteLength === 0) throw new Error("Request body is empty; cannot inject provider");
|
|
14786
|
+
function injectBodyFields(rawBody, params) {
|
|
14787
|
+
if (rawBody.byteLength === 0) throw new Error("Request body is empty; cannot inject");
|
|
14766
14788
|
let json;
|
|
14767
14789
|
try {
|
|
14768
14790
|
json = JSON.parse(new TextDecoder().decode(rawBody));
|
|
14769
14791
|
} catch (parseError) {
|
|
14770
|
-
throw new Error("Request body is not valid JSON; cannot inject
|
|
14792
|
+
throw new Error("Request body is not valid JSON; cannot inject", { cause: parseError });
|
|
14793
|
+
}
|
|
14794
|
+
if (params.providerRouting !== void 0) json.provider = params.providerRouting;
|
|
14795
|
+
if (params.cacheControl && !("cache_control" in json)) json.cache_control = { type: "ephemeral" };
|
|
14796
|
+
let effectiveSessionId;
|
|
14797
|
+
if (params.sessionId) if ("session_id" in json) effectiveSessionId = String(json.session_id);
|
|
14798
|
+
else {
|
|
14799
|
+
json.session_id = params.sessionId;
|
|
14800
|
+
effectiveSessionId = params.sessionId;
|
|
14771
14801
|
}
|
|
14772
|
-
|
|
14773
|
-
|
|
14774
|
-
|
|
14802
|
+
return {
|
|
14803
|
+
body: new TextEncoder().encode(JSON.stringify(json)).buffer,
|
|
14804
|
+
effectiveSessionId
|
|
14775
14805
|
};
|
|
14776
|
-
return new TextEncoder().encode(JSON.stringify(modified)).buffer;
|
|
14777
14806
|
}
|
|
14778
14807
|
//#endregion
|
|
14779
14808
|
//#region src/proxy/paths.ts
|
|
@@ -14805,10 +14834,18 @@ function buildUpstreamUrl(requestUrl, config) {
|
|
|
14805
14834
|
}
|
|
14806
14835
|
//#endregion
|
|
14807
14836
|
//#region src/proxy.ts
|
|
14808
|
-
|
|
14809
|
-
|
|
14810
|
-
if (
|
|
14811
|
-
|
|
14837
|
+
const PROXY_SESSION_ID = crypto.randomUUID();
|
|
14838
|
+
function deriveSessionId(incomingHeaders, mode) {
|
|
14839
|
+
if (mode === "never") return void 0;
|
|
14840
|
+
const fromClient = incomingHeaders.get("x-claude-code-session-id");
|
|
14841
|
+
if (fromClient) return fromClient.slice(0, 256);
|
|
14842
|
+
return PROXY_SESSION_ID;
|
|
14843
|
+
}
|
|
14844
|
+
function shouldInjectCacheControl(mode, modelName, path) {
|
|
14845
|
+
if (mode === "never") return false;
|
|
14846
|
+
if (mode === "always") return true;
|
|
14847
|
+
if (path === "/v1/chat/completions" && !isAnthropicModel(modelName ?? "")) return false;
|
|
14848
|
+
return true;
|
|
14812
14849
|
}
|
|
14813
14850
|
async function fetchUpstream(url, method, headers, body, signal) {
|
|
14814
14851
|
return fetch(url, {
|
|
@@ -14837,36 +14874,68 @@ async function readRawBody(request, reqId) {
|
|
|
14837
14874
|
};
|
|
14838
14875
|
}
|
|
14839
14876
|
}
|
|
14840
|
-
function
|
|
14877
|
+
function isReadonlyMethod(method) {
|
|
14878
|
+
return method === "GET" || method === "HEAD";
|
|
14879
|
+
}
|
|
14880
|
+
function computeInjection(rawBody, config, path, incomingHeaders) {
|
|
14841
14881
|
const modelName = extractModel(rawBody);
|
|
14842
14882
|
const resolved = resolveModelConfig(config, modelName);
|
|
14843
14883
|
const providerRouting = buildProviderRouting(resolved.provider);
|
|
14844
|
-
|
|
14884
|
+
if (!shouldInject("POST", path)) return {
|
|
14885
|
+
modelName,
|
|
14886
|
+
resolved,
|
|
14887
|
+
inject: false
|
|
14888
|
+
};
|
|
14889
|
+
const cacheControlMode = resolved.cacheControl ?? "auto";
|
|
14890
|
+
const sessionIdMode = resolved.sessionId ?? "auto";
|
|
14891
|
+
const injectCacheControl = shouldInjectCacheControl(cacheControlMode, modelName, path);
|
|
14892
|
+
const sessionId = deriveSessionId(incomingHeaders, sessionIdMode);
|
|
14893
|
+
if (providerRouting === void 0 && !injectCacheControl && sessionId === void 0) return {
|
|
14894
|
+
modelName,
|
|
14895
|
+
resolved,
|
|
14896
|
+
inject: false
|
|
14897
|
+
};
|
|
14898
|
+
return {
|
|
14899
|
+
modelName,
|
|
14900
|
+
resolved,
|
|
14901
|
+
inject: true,
|
|
14902
|
+
params: {
|
|
14903
|
+
providerRouting,
|
|
14904
|
+
cacheControl: injectCacheControl,
|
|
14905
|
+
sessionId
|
|
14906
|
+
},
|
|
14907
|
+
sessionId
|
|
14908
|
+
};
|
|
14909
|
+
}
|
|
14910
|
+
function resolveRequest(rawBody, config, method, path, reqId, incomingHeaders) {
|
|
14911
|
+
const decision = computeInjection(rawBody, config, path, incomingHeaders);
|
|
14845
14912
|
let body;
|
|
14913
|
+
let bodyModified = false;
|
|
14914
|
+
let effectiveSessionId;
|
|
14846
14915
|
try {
|
|
14847
|
-
|
|
14916
|
+
if (decision.inject) {
|
|
14917
|
+
const result = injectBodyFields(rawBody, decision.params);
|
|
14918
|
+
body = result.body;
|
|
14919
|
+
bodyModified = true;
|
|
14920
|
+
effectiveSessionId = result.effectiveSessionId;
|
|
14921
|
+
} else if (!isReadonlyMethod(method)) body = rawBody.byteLength > 0 ? rawBody : void 0;
|
|
14848
14922
|
} catch (err) {
|
|
14849
14923
|
const message = err instanceof Error ? err.message : "Failed to process request body";
|
|
14850
|
-
|
|
14851
|
-
|
|
14852
|
-
|
|
14853
|
-
|
|
14854
|
-
|
|
14855
|
-
|
|
14856
|
-
|
|
14857
|
-
message,
|
|
14858
|
-
type: "proxy_request_error"
|
|
14859
|
-
} }), {
|
|
14860
|
-
status: 400,
|
|
14861
|
-
headers: { "Content-Type": "application/json" }
|
|
14862
|
-
})
|
|
14863
|
-
};
|
|
14924
|
+
if (!isReadonlyMethod(method) && rawBody.byteLength > 0) {
|
|
14925
|
+
body = rawBody;
|
|
14926
|
+
logger.warn(withReq(reqId, `${message}; forwarding body as-is`));
|
|
14927
|
+
} else {
|
|
14928
|
+
body = void 0;
|
|
14929
|
+
logger.warn(withReq(reqId, message));
|
|
14930
|
+
}
|
|
14864
14931
|
}
|
|
14932
|
+
const finalHeaders = { ...decision.resolved.headers ?? {} };
|
|
14933
|
+
if (effectiveSessionId !== void 0) finalHeaders["x-session-id"] = effectiveSessionId;
|
|
14865
14934
|
return {
|
|
14866
|
-
inject,
|
|
14935
|
+
inject: bodyModified,
|
|
14867
14936
|
body,
|
|
14868
|
-
modelName,
|
|
14869
|
-
headers:
|
|
14937
|
+
modelName: decision.modelName,
|
|
14938
|
+
headers: finalHeaders
|
|
14870
14939
|
};
|
|
14871
14940
|
}
|
|
14872
14941
|
/**
|
|
@@ -14956,7 +15025,7 @@ function createProxyServer(config, onReady) {
|
|
|
14956
15025
|
const reqId = requestId();
|
|
14957
15026
|
const raw = await readRawBody(c.req.raw, reqId);
|
|
14958
15027
|
if (!raw.ok) return raw.response;
|
|
14959
|
-
const resolved = resolveRequest(raw.body, config, method, path, reqId);
|
|
15028
|
+
const resolved = resolveRequest(raw.body, config, method, path, reqId, c.req.raw.headers);
|
|
14960
15029
|
if (resolved.error) return resolved.error;
|
|
14961
15030
|
const headers = buildRequestHeaders(c.req.raw.headers, config, resolved.inject, resolved.headers);
|
|
14962
15031
|
const controller = new AbortController();
|
|
@@ -14996,7 +15065,7 @@ function startProxyServer(config, onReady) {
|
|
|
14996
15065
|
}
|
|
14997
15066
|
//#endregion
|
|
14998
15067
|
//#region src/version.ts
|
|
14999
|
-
const version = "0.
|
|
15068
|
+
const version = "0.9.0-beta.0";
|
|
15000
15069
|
//#endregion
|
|
15001
15070
|
//#region src/cli.ts
|
|
15002
15071
|
const argv = process.argv.slice(2);
|