proxitor 0.8.0 → 0.9.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -250,6 +250,41 @@ provider:
250
250
  p90: 3 # seconds (soft threshold)
251
251
  ```
252
252
 
253
+ ### Prompt caching
254
+
255
+ By default, OpenRouter doesn't enable prompt caching — every request pays full token price. Proxitor can inject `cache_control` and `session_id` to make caching work automatically.
256
+
257
+ **`cacheControl`** — injects `cache_control: { "type": "ephemeral" }` into the request body. OpenRouter uses this to set cache breakpoints and advance them as conversations grow.
258
+
259
+ **`sessionId`** — injects `session_id` for provider sticky routing. Without it, OpenRouter only pins to a provider after detecting a cache hit. With it, routing sticks from the **first request** — critical for OpenAI models where delayed caching means 0 cached tokens on the first 1-2 requests.
260
+
261
+ Both support `auto` / `always` / `never` modes:
262
+
263
+ | Mode | `cacheControl` | `sessionId` |
264
+ |---|---|---|
265
+ | `auto` (default) | Anthropic models on `/v1/chat/completions`; all models on `/v1/messages` and `/v1/responses` | Use `X-Claude-Code-Session-Id` header if present; otherwise generate proxy UUID |
266
+ | `always` | All models, all endpoints | Generate a proxy UUID for sticky routing |
267
+ | `never` | Disabled | Disabled |
268
+
269
+ ```yaml
270
+ cacheControl: auto # safe default — Anthropic and safe endpoints only
271
+ sessionId: auto # always ensures sticky routing (client header or proxy UUID)
272
+
273
+ # Force caching for all models (may cause 400 on non-Anthropic /v1/chat/completions)
274
+ # cacheControl: always
275
+
276
+ # Per-model overrides
277
+ modelOverrides:
278
+ "gpt-*":
279
+ cacheControl: never # OpenAI caches automatically, no injection needed
280
+ sessionId: always # but sticky routing still helps
281
+ ```
282
+
283
+ **Why both matter:**
284
+ - **Anthropic models** — `cache_control` activates caching, `session_id` prevents provider flip-flopping that would invalidate it
285
+ - **OpenAI models** — caching is automatic (no `cache_control` needed), but `session_id` ensures sticky routing from request #1 instead of waiting for a cache hit
286
+ - **All models** — `session_id` prevents the provider switch that silently resets cache
287
+
253
288
  ### Health check
254
289
 
255
290
  ```sh
package/dist/cli.mjs CHANGED
@@ -10580,9 +10580,16 @@ const providerConfigSchema = object({
10580
10580
  preferredMinThroughput: union([number$1().positive(), percentileCutoffsSchema]).optional(),
10581
10581
  preferredMaxLatency: union([number$1().positive(), percentileCutoffsSchema]).optional()
10582
10582
  }).strict();
10583
+ const triStateSchema = _enum([
10584
+ "auto",
10585
+ "always",
10586
+ "never"
10587
+ ]);
10583
10588
  const modelOverrideSchema = object({
10584
10589
  provider: providerConfigSchema.optional(),
10585
- headers: record(string$1(), string$1()).optional()
10590
+ headers: record(string$1(), string$1()).optional(),
10591
+ cacheControl: triStateSchema.optional(),
10592
+ sessionId: triStateSchema.optional()
10586
10593
  }).strict();
10587
10594
  const proxyConfigSchema = object({
10588
10595
  host: string$1().min(1).default("0.0.0.0"),
@@ -10597,6 +10604,8 @@ const proxyConfigSchema = object({
10597
10604
  attributionReferer: string$1().min(1).default("https://github.com/neiromaster/proxitor"),
10598
10605
  attributionTitle: string$1().min(1).default("proxitor"),
10599
10606
  headers: record(string$1(), string$1()).optional(),
10607
+ cacheControl: triStateSchema.default("auto"),
10608
+ sessionId: triStateSchema.default("auto"),
10600
10609
  modelOverrides: record(string$1().min(1), modelOverrideSchema).optional()
10601
10610
  }).strict();
10602
10611
  const DEFAULTS = proxyConfigSchema.parse({});
@@ -10716,27 +10725,36 @@ function matchScore(pattern, modelName) {
10716
10725
  function resolveModelConfig(config, modelName) {
10717
10726
  const result = {
10718
10727
  provider: config.provider,
10719
- headers: config.headers ? { ...config.headers } : void 0
10728
+ headers: config.headers ? { ...config.headers } : void 0,
10729
+ cacheControl: config.cacheControl,
10730
+ sessionId: config.sessionId
10720
10731
  };
10721
10732
  if (!modelName || !config.modelOverrides) return result;
10733
+ const bestPattern = findBestMatch(Object.keys(config.modelOverrides), modelName);
10734
+ if (bestPattern) applyOverride(result, config.modelOverrides[bestPattern]);
10735
+ return result;
10736
+ }
10737
+ function findBestMatch(patterns, modelName) {
10722
10738
  let bestPattern = null;
10723
10739
  let bestScore = -1;
10724
- for (const pattern of Object.keys(config.modelOverrides)) {
10740
+ for (const pattern of patterns) {
10725
10741
  const score = matchScore(pattern, modelName);
10726
10742
  if (score > bestScore) {
10727
10743
  bestScore = score;
10728
10744
  bestPattern = pattern;
10729
10745
  }
10730
10746
  }
10731
- if (bestPattern) {
10732
- const override = config.modelOverrides[bestPattern];
10733
- if (override?.provider !== void 0) result.provider = override.provider;
10734
- if (override?.headers) result.headers = {
10735
- ...result.headers ?? {},
10736
- ...override.headers
10737
- };
10738
- }
10739
- return result;
10747
+ return bestPattern;
10748
+ }
10749
+ function applyOverride(result, override) {
10750
+ if (!override) return;
10751
+ if (override.provider !== void 0) result.provider = override.provider;
10752
+ if (override.headers) result.headers = {
10753
+ ...result.headers ?? {},
10754
+ ...override.headers
10755
+ };
10756
+ if (override.cacheControl !== void 0) result.cacheControl = override.cacheControl;
10757
+ if (override.sessionId !== void 0) result.sessionId = override.sessionId;
10740
10758
  }
10741
10759
  async function loadConfig(options) {
10742
10760
  let fileConfig = {};
@@ -14606,7 +14624,9 @@ const STRIP_REQUEST = new Set([
14606
14624
  "authorization",
14607
14625
  "x-api-key",
14608
14626
  "host",
14609
- "content-length"
14627
+ "content-length",
14628
+ "x-claude-code-session-id",
14629
+ "x-session-id"
14610
14630
  ]);
14611
14631
  /** Headers to strip from upstream response before forwarding */
14612
14632
  const STRIP_RESPONSE = new Set(["content-length", "content-encoding"]);
@@ -14755,25 +14775,34 @@ function buildUpstreamResponseWithLogging(upstream, method, reqId) {
14755
14775
  }
14756
14776
  //#endregion
14757
14777
  //#region src/proxy/inject.ts
14758
- /** Extract the model name from a raw request body. Returns undefined if not parseable or absent. */
14778
+ function isAnthropicModel(modelName) {
14779
+ const lower = modelName.toLowerCase();
14780
+ return lower.startsWith("anthropic/claude") || lower.startsWith("claude-") || lower.includes("claude");
14781
+ }
14759
14782
  function extractModel(rawBody) {
14760
14783
  const json = tryParseBody(rawBody);
14761
14784
  return typeof json?.model === "string" ? json.model : void 0;
14762
14785
  }
14763
- /** Inject provider routing into request body, always overwriting existing value */
14764
- function injectProvider(rawBody, providerRouting) {
14765
- if (rawBody.byteLength === 0) throw new Error("Request body is empty; cannot inject provider");
14786
+ function injectBodyFields(rawBody, params) {
14787
+ if (rawBody.byteLength === 0) throw new Error("Request body is empty; cannot inject");
14766
14788
  let json;
14767
14789
  try {
14768
14790
  json = JSON.parse(new TextDecoder().decode(rawBody));
14769
14791
  } catch (parseError) {
14770
- throw new Error("Request body is not valid JSON; cannot inject provider", { cause: parseError });
14792
+ throw new Error("Request body is not valid JSON; cannot inject", { cause: parseError });
14793
+ }
14794
+ if (params.providerRouting !== void 0) json.provider = params.providerRouting;
14795
+ if (params.cacheControl && !("cache_control" in json)) json.cache_control = { type: "ephemeral" };
14796
+ let effectiveSessionId;
14797
+ if (params.sessionId) if ("session_id" in json) effectiveSessionId = String(json.session_id);
14798
+ else {
14799
+ json.session_id = params.sessionId;
14800
+ effectiveSessionId = params.sessionId;
14771
14801
  }
14772
- const modified = {
14773
- ...json,
14774
- provider: providerRouting
14802
+ return {
14803
+ body: new TextEncoder().encode(JSON.stringify(json)).buffer,
14804
+ effectiveSessionId
14775
14805
  };
14776
- return new TextEncoder().encode(JSON.stringify(modified)).buffer;
14777
14806
  }
14778
14807
  //#endregion
14779
14808
  //#region src/proxy/paths.ts
@@ -14805,10 +14834,18 @@ function buildUpstreamUrl(requestUrl, config) {
14805
14834
  }
14806
14835
  //#endregion
14807
14836
  //#region src/proxy.ts
14808
- function readRequestBody(method, raw, inject, providerRouting) {
14809
- if (["GET", "HEAD"].includes(method)) return void 0;
14810
- if (inject) return injectProvider(raw, providerRouting);
14811
- return raw.byteLength > 0 ? raw : void 0;
14837
+ const PROXY_SESSION_ID = crypto.randomUUID();
14838
+ function deriveSessionId(incomingHeaders, mode) {
14839
+ if (mode === "never") return void 0;
14840
+ const fromClient = incomingHeaders.get("x-claude-code-session-id");
14841
+ if (fromClient) return fromClient.slice(0, 256);
14842
+ return PROXY_SESSION_ID;
14843
+ }
14844
+ function shouldInjectCacheControl(mode, modelName, path) {
14845
+ if (mode === "never") return false;
14846
+ if (mode === "always") return true;
14847
+ if (path === "/v1/chat/completions" && !isAnthropicModel(modelName ?? "")) return false;
14848
+ return true;
14812
14849
  }
14813
14850
  async function fetchUpstream(url, method, headers, body, signal) {
14814
14851
  return fetch(url, {
@@ -14837,36 +14874,68 @@ async function readRawBody(request, reqId) {
14837
14874
  };
14838
14875
  }
14839
14876
  }
14840
- function resolveRequest(rawBody, config, method, path, reqId) {
14877
+ function isReadonlyMethod(method) {
14878
+ return method === "GET" || method === "HEAD";
14879
+ }
14880
+ function computeInjection(rawBody, config, path, incomingHeaders) {
14841
14881
  const modelName = extractModel(rawBody);
14842
14882
  const resolved = resolveModelConfig(config, modelName);
14843
14883
  const providerRouting = buildProviderRouting(resolved.provider);
14844
- const inject = shouldInject(method, path) && providerRouting !== void 0;
14884
+ if (!shouldInject("POST", path)) return {
14885
+ modelName,
14886
+ resolved,
14887
+ inject: false
14888
+ };
14889
+ const cacheControlMode = resolved.cacheControl ?? "auto";
14890
+ const sessionIdMode = resolved.sessionId ?? "auto";
14891
+ const injectCacheControl = shouldInjectCacheControl(cacheControlMode, modelName, path);
14892
+ const sessionId = deriveSessionId(incomingHeaders, sessionIdMode);
14893
+ if (providerRouting === void 0 && !injectCacheControl && sessionId === void 0) return {
14894
+ modelName,
14895
+ resolved,
14896
+ inject: false
14897
+ };
14898
+ return {
14899
+ modelName,
14900
+ resolved,
14901
+ inject: true,
14902
+ params: {
14903
+ providerRouting,
14904
+ cacheControl: injectCacheControl,
14905
+ sessionId
14906
+ },
14907
+ sessionId
14908
+ };
14909
+ }
14910
+ function resolveRequest(rawBody, config, method, path, reqId, incomingHeaders) {
14911
+ const decision = computeInjection(rawBody, config, path, incomingHeaders);
14845
14912
  let body;
14913
+ let bodyModified = false;
14914
+ let effectiveSessionId;
14846
14915
  try {
14847
- body = readRequestBody(method, rawBody, inject, providerRouting);
14916
+ if (decision.inject) {
14917
+ const result = injectBodyFields(rawBody, decision.params);
14918
+ body = result.body;
14919
+ bodyModified = true;
14920
+ effectiveSessionId = result.effectiveSessionId;
14921
+ } else if (!isReadonlyMethod(method)) body = rawBody.byteLength > 0 ? rawBody : void 0;
14848
14922
  } catch (err) {
14849
14923
  const message = err instanceof Error ? err.message : "Failed to process request body";
14850
- logger.error(withReq(reqId, message));
14851
- return {
14852
- inject,
14853
- body: void 0,
14854
- modelName,
14855
- headers: resolved.headers,
14856
- error: new Response(JSON.stringify({ error: {
14857
- message,
14858
- type: "proxy_request_error"
14859
- } }), {
14860
- status: 400,
14861
- headers: { "Content-Type": "application/json" }
14862
- })
14863
- };
14924
+ if (!isReadonlyMethod(method) && rawBody.byteLength > 0) {
14925
+ body = rawBody;
14926
+ logger.warn(withReq(reqId, `${message}; forwarding body as-is`));
14927
+ } else {
14928
+ body = void 0;
14929
+ logger.warn(withReq(reqId, message));
14930
+ }
14864
14931
  }
14932
+ const finalHeaders = { ...decision.resolved.headers ?? {} };
14933
+ if (effectiveSessionId !== void 0) finalHeaders["x-session-id"] = effectiveSessionId;
14865
14934
  return {
14866
- inject,
14935
+ inject: bodyModified,
14867
14936
  body,
14868
- modelName,
14869
- headers: resolved.headers
14937
+ modelName: decision.modelName,
14938
+ headers: finalHeaders
14870
14939
  };
14871
14940
  }
14872
14941
  /**
@@ -14956,7 +15025,7 @@ function createProxyServer(config, onReady) {
14956
15025
  const reqId = requestId();
14957
15026
  const raw = await readRawBody(c.req.raw, reqId);
14958
15027
  if (!raw.ok) return raw.response;
14959
- const resolved = resolveRequest(raw.body, config, method, path, reqId);
15028
+ const resolved = resolveRequest(raw.body, config, method, path, reqId, c.req.raw.headers);
14960
15029
  if (resolved.error) return resolved.error;
14961
15030
  const headers = buildRequestHeaders(c.req.raw.headers, config, resolved.inject, resolved.headers);
14962
15031
  const controller = new AbortController();
@@ -14996,7 +15065,7 @@ function startProxyServer(config, onReady) {
14996
15065
  }
14997
15066
  //#endregion
14998
15067
  //#region src/version.ts
14999
- const version = "0.8.0";
15068
+ const version = "0.9.0-beta.0";
15000
15069
  //#endregion
15001
15070
  //#region src/cli.ts
15002
15071
  const argv = process.argv.slice(2);