@ljoukov/llm 7.0.17 → 7.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -108,13 +108,22 @@ refresh-token rotation and serves short-lived access tokens.
108
108
  - `CHATGPT_AUTH_TOKEN_PROVIDER_URL` (example: `https://chatgpt-auth.<your-domain>`)
109
109
  - `CHATGPT_AUTH_API_KEY` (shared secret; sent as `Authorization: Bearer ...` and `x-chatgpt-auth: ...`)
110
110
  - `CHATGPT_AUTH_TOKEN_PROVIDER_STORE` (`kv` or `d1`, defaults to `kv`)
111
+ - `CHATGPT_CODEX_PROXY_URL` (optional Vercel proxy endpoint, for example `https://<project>.vercel.app/api/codex/responses`)
112
+ - `CHATGPT_CODEX_PROXY_API_KEY` (bearer token for `CHATGPT_CODEX_PROXY_URL`)
113
+ - `CHATGPT_CODEX_ENDPOINT` (optional direct endpoint override; defaults to `https://chatgpt.com/backend-api/codex/responses`)
111
114
  - `CHATGPT_RESPONSES_WEBSOCKET_MODE` (`auto` | `off` | `only`, default: `auto`)
112
115
 
113
116
  This repo includes a Cloudflare Workers token provider implementation in `workers/chatgpt-auth/`.
117
+ It also includes a minimal Vercel streaming proxy app in `vercel/codex-proxy/`.
114
118
 
115
119
  If `CHATGPT_AUTH_TOKEN_PROVIDER_URL` + `CHATGPT_AUTH_API_KEY` are set, `chatgpt-*` models will fetch tokens from the
116
120
  token provider and will not read the local Codex auth store.
117
121
 
122
+ If `CHATGPT_CODEX_PROXY_URL` + `CHATGPT_CODEX_PROXY_API_KEY` are set, `chatgpt-*` text requests are sent through that
123
+ proxy and the local process does not need access to the Codex auth store or token provider. The Vercel proxy fetches
124
+ short-lived ChatGPT access tokens from `workers/chatgpt-auth` and streams the upstream Codex response body back to the
125
+ caller.
126
+
118
127
  ### Responses transport
119
128
 
120
129
  For OpenAI and `chatgpt-*` model paths, this library now tries **Responses WebSocket transport first** and falls back
package/dist/index.cjs CHANGED
@@ -481,6 +481,7 @@ function resolveChatGptServiceTier(model) {
481
481
  // src/openai/pricing.ts
482
482
  var OPENAI_GPT_55_FAST_MODEL_IDS = ["gpt-5.5-fast", "chatgpt-gpt-5.5-fast"];
483
483
  var OPENAI_GPT_55_STANDARD_MODEL_IDS = ["gpt-5.5", "chatgpt-gpt-5.5"];
484
+ var OPENAI_GPT_55_CONCRETE_MODEL_ID_RE = /^(?:chatgpt-)?gpt-5\.5-\d{4}-\d{2}-\d{2}$/u;
484
485
  var OPENAI_GPT_54_FAST_MODEL_IDS = ["gpt-5.4-fast", "chatgpt-gpt-5.4-fast"];
485
486
  var OPENAI_GPT_54_MINI_MODEL_IDS = ["gpt-5.4-mini", "chatgpt-gpt-5.4-mini"];
486
487
  var OPENAI_GPT_54_NANO_MODEL_IDS = ["gpt-5.4-nano"];
@@ -547,7 +548,7 @@ function getOpenAiPricing(modelId) {
547
548
  if (OPENAI_GPT_55_FAST_MODEL_IDS.includes(modelId)) {
548
549
  return OPENAI_GPT_55_PRIORITY_PRICING;
549
550
  }
550
- if (OPENAI_GPT_55_STANDARD_MODEL_IDS.includes(modelId)) {
551
+ if (OPENAI_GPT_55_STANDARD_MODEL_IDS.includes(modelId) || OPENAI_GPT_55_CONCRETE_MODEL_ID_RE.test(modelId)) {
551
552
  return OPENAI_GPT_55_PRICING;
552
553
  }
553
554
  if (OPENAI_GPT_54_FAST_MODEL_IDS.includes(modelId)) {
@@ -580,12 +581,14 @@ function resolveUsageNumber(value) {
580
581
  }
581
582
  function estimateCallCostUsd({
582
583
  modelId,
584
+ pricingModelId,
583
585
  tokens,
584
586
  responseImages,
585
587
  imageSize,
586
588
  imageQuality
587
589
  }) {
588
- const openAiImagePricing = getOpenAiImagePricing(modelId);
590
+ const pricingModelIds = resolvePricingModelIds(modelId, pricingModelId);
591
+ const openAiImagePricing = resolvePricing(pricingModelIds, getOpenAiImagePricing);
589
592
  if (openAiImagePricing) {
590
593
  return estimateOpenAiImageCostUsd({
591
594
  pricing: openAiImagePricing,
@@ -605,7 +608,7 @@ function estimateCallCostUsd({
605
608
  const toolUsePromptTokens = resolveUsageNumber(tokens.toolUsePromptTokens);
606
609
  const promptTokenTotal = promptTokens + toolUsePromptTokens;
607
610
  const nonCachedPrompt = Math.max(0, promptTokenTotal - cachedTokens);
608
- const imagePreviewPricing = getGeminiImagePricing(modelId);
611
+ const imagePreviewPricing = resolvePricing(pricingModelIds, getGeminiImagePricing);
609
612
  if (imagePreviewPricing) {
610
613
  const resolvedImageSize = imageSize && imagePreviewPricing.imagePrices[imageSize] ? imageSize : "2K";
611
614
  const imageRate = imagePreviewPricing.imagePrices[resolvedImageSize] ?? 0;
@@ -625,7 +628,7 @@ function estimateCallCostUsd({
625
628
  const imageOutputCost = imageTokensForPricing * imagePreviewPricing.outputImageRate;
626
629
  return inputCost + cachedCost + textOutputCost + imageOutputCost;
627
630
  }
628
- const geminiPricing = getGeminiProPricing(modelId);
631
+ const geminiPricing = resolvePricing(pricingModelIds, getGeminiProPricing);
629
632
  if (geminiPricing) {
630
633
  const useHighTier = promptTokenTotal > geminiPricing.threshold;
631
634
  const inputRate = useHighTier ? geminiPricing.inputRateHigh : geminiPricing.inputRateLow;
@@ -637,7 +640,7 @@ function estimateCallCostUsd({
637
640
  const outputCost = outputTokens * outputRate;
638
641
  return inputCost + cachedCost + outputCost;
639
642
  }
640
- const fireworksPricing = getFireworksPricing(modelId);
643
+ const fireworksPricing = resolvePricing(pricingModelIds, getFireworksPricing);
641
644
  if (fireworksPricing) {
642
645
  const inputCost = nonCachedPrompt * fireworksPricing.inputRate;
643
646
  const cachedCost = cachedTokens * fireworksPricing.cachedRate;
@@ -645,7 +648,7 @@ function estimateCallCostUsd({
645
648
  const outputCost = outputTokens * fireworksPricing.outputRate;
646
649
  return inputCost + cachedCost + outputCost;
647
650
  }
648
- const openAiPricing = getOpenAiPricing(modelId);
651
+ const openAiPricing = resolvePricing(pricingModelIds, getOpenAiPricing);
649
652
  if (openAiPricing) {
650
653
  const inputCost = nonCachedPrompt * openAiPricing.inputRate;
651
654
  const cachedCost = cachedTokens * openAiPricing.cachedRate;
@@ -655,6 +658,21 @@ function estimateCallCostUsd({
655
658
  }
656
659
  return 0;
657
660
  }
661
+ function resolvePricingModelIds(modelId, pricingModelId) {
662
+ if (pricingModelId && pricingModelId !== modelId) {
663
+ return [pricingModelId, modelId];
664
+ }
665
+ return [modelId];
666
+ }
667
+ function resolvePricing(modelIds, resolve) {
668
+ for (const modelId of modelIds) {
669
+ const pricing = resolve(modelId);
670
+ if (pricing) {
671
+ return pricing;
672
+ }
673
+ }
674
+ return void 0;
675
+ }
658
676
  function estimateOpenAiImageCostUsd({
659
677
  pricing,
660
678
  responseImages,
@@ -694,6 +712,10 @@ function resolveOpenAiImagePriceResolution(imageSize) {
694
712
  var import_node_os2 = __toESM(require("os"), 1);
695
713
  var import_node_util = require("util");
696
714
 
715
+ // src/utils/env.ts
716
+ var import_node_fs = __toESM(require("fs"), 1);
717
+ var import_node_path = __toESM(require("path"), 1);
718
+
697
719
  // src/utils/runtimeSingleton.ts
698
720
  var runtimeSingletonStoreKey = /* @__PURE__ */ Symbol.for("@ljoukov/llm.runtimeSingletonStore");
699
721
  function getRuntimeSingletonStore() {
@@ -722,16 +744,7 @@ function getRuntimeSingleton(key, create) {
722
744
  return createdValue;
723
745
  }
724
746
 
725
- // src/openai/chatgpt-auth.ts
726
- var import_node_buffer = require("buffer");
727
- var import_node_fs2 = __toESM(require("fs"), 1);
728
- var import_node_os = __toESM(require("os"), 1);
729
- var import_node_path2 = __toESM(require("path"), 1);
730
- var import_zod = require("zod");
731
-
732
747
  // src/utils/env.ts
733
- var import_node_fs = __toESM(require("fs"), 1);
734
- var import_node_path = __toESM(require("path"), 1);
735
748
  var envState = getRuntimeSingleton(/* @__PURE__ */ Symbol.for("@ljoukov/llm.envState"), () => ({
736
749
  envLoaded: false
737
750
  }));
@@ -793,6 +806,11 @@ function parseEnvLine(line) {
793
806
  }
794
807
 
795
808
  // src/openai/chatgpt-auth.ts
809
+ var import_node_buffer = require("buffer");
810
+ var import_node_fs2 = __toESM(require("fs"), 1);
811
+ var import_node_os = __toESM(require("os"), 1);
812
+ var import_node_path2 = __toESM(require("path"), 1);
813
+ var import_zod = require("zod");
796
814
  var CHATGPT_AUTH_TOKEN_PROVIDER_URL_ENV = "CHATGPT_AUTH_TOKEN_PROVIDER_URL";
797
815
  var CHATGPT_AUTH_TOKEN_PROVIDER_STORE_ENV = "CHATGPT_AUTH_TOKEN_PROVIDER_STORE";
798
816
  var CHATGPT_AUTH_API_KEY_ENV = "CHATGPT_AUTH_API_KEY";
@@ -1700,19 +1718,21 @@ function createAbortError(reason) {
1700
1718
 
1701
1719
  // src/openai/chatgpt-codex.ts
1702
1720
  var CHATGPT_CODEX_ENDPOINT = "https://chatgpt.com/backend-api/codex/responses";
1721
+ var CHATGPT_CODEX_ENDPOINT_ENV = "CHATGPT_CODEX_ENDPOINT";
1722
+ var CHATGPT_CODEX_PROXY_URL_ENV = "CHATGPT_CODEX_PROXY_URL";
1723
+ var CHATGPT_CODEX_PROXY_API_KEY_ENV = "CHATGPT_CODEX_PROXY_API_KEY";
1703
1724
  var CHATGPT_RESPONSES_EXPERIMENTAL_HEADER = "responses=experimental";
1704
1725
  var chatGptCodexState = getRuntimeSingleton(/* @__PURE__ */ Symbol.for("@ljoukov/llm.chatGptCodexState"), () => ({
1705
1726
  cachedResponsesWebSocketMode: null,
1706
1727
  chatGptResponsesWebSocketDisabled: false
1707
1728
  }));
1708
1729
  async function streamChatGptCodexResponse(options) {
1709
- const { access, accountId } = await getChatGptAuthProfile();
1730
+ const endpointConfig = await resolveChatGptCodexEndpointConfig();
1710
1731
  const mode = resolveChatGptResponsesWebSocketMode();
1711
1732
  const fallbackStreamFactory = () => {
1712
1733
  const streamPromise = streamChatGptCodexResponseSse({
1713
1734
  request: options.request,
1714
- access,
1715
- accountId,
1735
+ endpointConfig,
1716
1736
  sessionId: options.sessionId,
1717
1737
  signal: options.signal
1718
1738
  });
@@ -1734,15 +1754,14 @@ async function streamChatGptCodexResponse(options) {
1734
1754
  return fallbackStreamFactory();
1735
1755
  }
1736
1756
  const websocketHeaders = buildChatGptCodexHeaders({
1737
- access,
1738
- accountId,
1757
+ endpointConfig,
1739
1758
  sessionId: options.sessionId,
1740
1759
  useWebSocket: true
1741
1760
  });
1742
1761
  return createAdaptiveResponsesStream({
1743
1762
  mode,
1744
1763
  createWebSocketStream: async () => await createResponsesWebSocketStream({
1745
- url: toWebSocketUrl(CHATGPT_CODEX_ENDPOINT),
1764
+ url: toWebSocketUrl(endpointConfig.url),
1746
1765
  headers: websocketHeaders,
1747
1766
  request: options.request,
1748
1767
  signal: options.signal
@@ -1755,14 +1774,13 @@ async function streamChatGptCodexResponse(options) {
1755
1774
  }
1756
1775
  async function streamChatGptCodexResponseSse(options) {
1757
1776
  const headers = buildChatGptCodexHeaders({
1758
- access: options.access,
1759
- accountId: options.accountId,
1777
+ endpointConfig: options.endpointConfig,
1760
1778
  sessionId: options.sessionId,
1761
1779
  useWebSocket: false
1762
1780
  });
1763
1781
  headers.Accept = "text/event-stream";
1764
1782
  headers["Content-Type"] = "application/json";
1765
- const response = await fetch(CHATGPT_CODEX_ENDPOINT, {
1783
+ const response = await fetch(options.endpointConfig.url, {
1766
1784
  method: "POST",
1767
1785
  headers,
1768
1786
  body: JSON.stringify(options.request),
@@ -1782,24 +1800,66 @@ function resolveChatGptResponsesWebSocketMode() {
1782
1800
  if (chatGptCodexState.cachedResponsesWebSocketMode) {
1783
1801
  return chatGptCodexState.cachedResponsesWebSocketMode;
1784
1802
  }
1803
+ const explicitMode = process.env.CHATGPT_RESPONSES_WEBSOCKET_MODE ?? process.env.OPENAI_RESPONSES_WEBSOCKET_MODE;
1804
+ const defaultMode = resolveChatGptCodexProxyConfig() ? "off" : "auto";
1785
1805
  chatGptCodexState.cachedResponsesWebSocketMode = resolveResponsesWebSocketMode(
1786
- process.env.CHATGPT_RESPONSES_WEBSOCKET_MODE ?? process.env.OPENAI_RESPONSES_WEBSOCKET_MODE,
1787
- "auto"
1806
+ explicitMode,
1807
+ defaultMode
1788
1808
  );
1789
1809
  return chatGptCodexState.cachedResponsesWebSocketMode;
1790
1810
  }
1811
+ async function resolveChatGptCodexEndpointConfig() {
1812
+ const proxy = resolveChatGptCodexProxyConfig();
1813
+ if (proxy) {
1814
+ return proxy;
1815
+ }
1816
+ const { access, accountId } = await getChatGptAuthProfile();
1817
+ return {
1818
+ kind: "direct",
1819
+ url: resolveChatGptCodexEndpoint(),
1820
+ access,
1821
+ accountId
1822
+ };
1823
+ }
1824
+ function resolveChatGptCodexEndpoint() {
1825
+ loadLocalEnv();
1826
+ return process.env[CHATGPT_CODEX_ENDPOINT_ENV]?.trim() || CHATGPT_CODEX_ENDPOINT;
1827
+ }
1828
+ function resolveChatGptCodexProxyConfig() {
1829
+ loadLocalEnv();
1830
+ const url = process.env[CHATGPT_CODEX_PROXY_URL_ENV]?.trim();
1831
+ if (!url) {
1832
+ return null;
1833
+ }
1834
+ const apiKey = process.env[CHATGPT_CODEX_PROXY_API_KEY_ENV]?.trim();
1835
+ if (!apiKey) {
1836
+ throw new Error(
1837
+ `${CHATGPT_CODEX_PROXY_API_KEY_ENV} must be provided when ${CHATGPT_CODEX_PROXY_URL_ENV} is set.`
1838
+ );
1839
+ }
1840
+ return {
1841
+ kind: "proxy",
1842
+ url,
1843
+ apiKey
1844
+ };
1845
+ }
1791
1846
  function buildChatGptCodexHeaders(options) {
1792
1847
  const openAiBeta = options.useWebSocket ? mergeOpenAiBetaHeader(
1793
1848
  CHATGPT_RESPONSES_EXPERIMENTAL_HEADER,
1794
1849
  OPENAI_BETA_RESPONSES_WEBSOCKETS_V2
1795
1850
  ) : CHATGPT_RESPONSES_EXPERIMENTAL_HEADER;
1796
1851
  const headers = {
1797
- Authorization: `Bearer ${options.access}`,
1798
- "chatgpt-account-id": options.accountId,
1799
1852
  "OpenAI-Beta": openAiBeta,
1800
1853
  originator: "llm",
1801
1854
  "User-Agent": buildUserAgent()
1802
1855
  };
1856
+ if (options.endpointConfig.kind === "proxy") {
1857
+ headers.Authorization = `Bearer ${options.endpointConfig.apiKey}`;
1858
+ headers["x-codex-proxy-auth"] = options.endpointConfig.apiKey;
1859
+ } else {
1860
+ headers.Authorization = `Bearer ${options.endpointConfig.access}`;
1861
+ headers["chatgpt-account-id"] = options.endpointConfig.accountId;
1862
+ }
1803
1863
  if (options.sessionId) {
1804
1864
  headers.session_id = options.sessionId;
1805
1865
  }
@@ -8466,6 +8526,7 @@ async function runTextCall(params) {
8466
8526
  const outputAttachments = collectLoggedAttachmentsFromLlmParts(mergedParts, "output");
8467
8527
  const costUsd = estimateCallCostUsd({
8468
8528
  modelId: modelVersion,
8529
+ pricingModelId: request.model,
8469
8530
  tokens: latestUsage,
8470
8531
  responseImages,
8471
8532
  imageSize: request.imageSize
@@ -9209,6 +9270,7 @@ async function runToolLoop(request) {
9209
9270
  const modelCompletedAtMs = Date.now();
9210
9271
  const stepCostUsd = estimateCallCostUsd({
9211
9272
  modelId: modelVersion,
9273
+ pricingModelId: request.model,
9212
9274
  tokens: usageTokens,
9213
9275
  responseImages: 0
9214
9276
  });
@@ -9540,6 +9602,7 @@ async function runToolLoop(request) {
9540
9602
  usageTokens = extractChatGptUsageTokens(response.usage);
9541
9603
  const stepCostUsd = estimateCallCostUsd({
9542
9604
  modelId: modelVersion,
9605
+ pricingModelId: request.model,
9543
9606
  tokens: usageTokens,
9544
9607
  responseImages: 0
9545
9608
  });
@@ -9868,6 +9931,7 @@ async function runToolLoop(request) {
9868
9931
  usageTokens = extractFireworksUsageTokens(response.usage);
9869
9932
  const stepCostUsd = estimateCallCostUsd({
9870
9933
  modelId: modelVersion,
9934
+ pricingModelId: request.model,
9871
9935
  tokens: usageTokens,
9872
9936
  responseImages: 0
9873
9937
  });
@@ -10230,6 +10294,7 @@ async function runToolLoop(request) {
10230
10294
  );
10231
10295
  const stepCostUsd = estimateCallCostUsd({
10232
10296
  modelId: modelVersion,
10297
+ pricingModelId: request.model,
10233
10298
  tokens: usageTokens,
10234
10299
  responseImages: 0
10235
10300
  });