@adaptic/lumic-utils 1.0.19 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/dist/{apollo-client.client-guxMwplM.js → apollo-client.client-ByADDB46.js} +3 -3
  2. package/dist/{apollo-client.client-guxMwplM.js.map → apollo-client.client-ByADDB46.js.map} +1 -1
  3. package/dist/{apollo-client.client-Dfi-rHW-.js → apollo-client.client-CUIakkzs.js} +4 -4
  4. package/dist/{apollo-client.client-Dfi-rHW-.js.map → apollo-client.client-CUIakkzs.js.map} +1 -1
  5. package/dist/{apollo-client.server-HwHIFnVk.js → apollo-client.server-BnZhh39o.js} +3 -3
  6. package/dist/{apollo-client.server-HwHIFnVk.js.map → apollo-client.server-BnZhh39o.js.map} +1 -1
  7. package/dist/{apollo-client.server-Blxbp1Gf.js → apollo-client.server-JucuAyrj.js} +3 -3
  8. package/dist/{apollo-client.server-Blxbp1Gf.js.map → apollo-client.server-JucuAyrj.js.map} +1 -1
  9. package/dist/{index-B4tfLvHx.js → index-BLXN1stF.js} +2 -2
  10. package/dist/{index-B4tfLvHx.js.map → index-BLXN1stF.js.map} +1 -1
  11. package/dist/{index-Dr85zRZC.js → index-Ca3x8X5U.js} +85 -31
  12. package/dist/{index-CSQmloZ-.js.map → index-Ca3x8X5U.js.map} +1 -1
  13. package/dist/{index-DollRUHQ.js → index-DT0dXUtn.js} +2 -2
  14. package/dist/{index-DollRUHQ.js.map → index-DT0dXUtn.js.map} +1 -1
  15. package/dist/{index-CSQmloZ-.js → index-DYehXKUX.js} +85 -31
  16. package/dist/{index-Dr85zRZC.js.map → index-DYehXKUX.js.map} +1 -1
  17. package/dist/index.cjs +1 -1
  18. package/dist/index.mjs +1 -1
  19. package/dist/test.cjs +1 -1
  20. package/dist/test.mjs +1 -1
  21. package/dist/types/functions/llm-config.d.ts +9 -1
  22. package/dist/types/types/openai-types.d.ts +8 -1
  23. package/dist/types/utils/llm-cost-tracker.d.ts +3 -0
  24. package/package.json +1 -1
@@ -768,58 +768,79 @@ const DEFAULT_DEVELOPER_PROMPT = `
768
768
  Present complete, high-confidence, final answers only. Do not rephrase to be more brief or omit parts of answers.
769
769
  Respond only with final content (e.g. code, a json or yaml object, a formatted string, or a markdown document) and nothing else. Do not reply with a preamble, introduction, or conclusion.
770
770
  `;
771
- /** Token costs in USD per token. Last updated Mar 2026. */
771
+ /**
772
+ * Token costs in USD per token. Last updated Apr 2026.
773
+ *
774
+ * `cacheHitCost` reflects OpenAI's cached-input billing rate (~50% of the
775
+ * standard input rate per OpenAI's prompt caching documentation). When set,
776
+ * `calculateCost` splits prompt tokens into cached vs non-cached buckets and
777
+ * applies the discount; when omitted, cached tokens are billed at full input
778
+ * rate (a silent ~50% cost overstatement for cache-friendly workloads).
779
+ */
772
780
  const openAiModelCosts = {
773
781
  'gpt-5.4': {
774
782
  inputCost: 2.5 / 1_000_000,
783
+ cacheHitCost: 1.25 / 1_000_000,
775
784
  outputCost: 15 / 1_000_000,
776
785
  },
777
786
  'gpt-5.4-mini': {
778
787
  inputCost: 0.75 / 1_000_000,
788
+ cacheHitCost: 0.375 / 1_000_000,
779
789
  outputCost: 4.5 / 1_000_000,
780
790
  },
781
791
  'gpt-5.4-nano': {
782
792
  inputCost: 0.2 / 1_000_000,
793
+ cacheHitCost: 0.1 / 1_000_000,
783
794
  outputCost: 1.25 / 1_000_000,
784
795
  },
785
796
  'gpt-5': {
786
797
  inputCost: 2.5 / 1_000_000,
798
+ cacheHitCost: 1.25 / 1_000_000,
787
799
  outputCost: 10 / 1_000_000,
788
800
  },
789
801
  'gpt-5-mini': {
790
802
  inputCost: 0.15 / 1_000_000,
803
+ cacheHitCost: 0.075 / 1_000_000,
791
804
  outputCost: 0.6 / 1_000_000,
792
805
  },
793
806
  'o1-mini': {
794
807
  inputCost: 1.1 / 1_000_000,
808
+ cacheHitCost: 0.55 / 1_000_000,
795
809
  outputCost: 4.4 / 1_000_000,
796
810
  },
797
811
  'o1': {
798
812
  inputCost: 15 / 1_000_000,
813
+ cacheHitCost: 7.5 / 1_000_000,
799
814
  outputCost: 60 / 1_000_000,
800
815
  },
801
816
  'o3-mini': {
802
817
  inputCost: 1.1 / 1_000_000,
818
+ cacheHitCost: 0.55 / 1_000_000,
803
819
  outputCost: 4.4 / 1_000_000,
804
820
  },
805
821
  'o3': {
806
822
  inputCost: 2 / 1_000_000,
823
+ cacheHitCost: 1 / 1_000_000,
807
824
  outputCost: 8 / 1_000_000,
808
825
  },
809
826
  'gpt-4.1': {
810
827
  inputCost: 2 / 1_000_000,
828
+ cacheHitCost: 1 / 1_000_000,
811
829
  outputCost: 8 / 1_000_000,
812
830
  },
813
831
  'gpt-4.1-mini': {
814
832
  inputCost: 0.4 / 1_000_000,
833
+ cacheHitCost: 0.2 / 1_000_000,
815
834
  outputCost: 1.6 / 1_000_000,
816
835
  },
817
836
  'gpt-4.1-nano': {
818
837
  inputCost: 0.1 / 1_000_000,
838
+ cacheHitCost: 0.05 / 1_000_000,
819
839
  outputCost: 0.4 / 1_000_000,
820
840
  },
821
841
  'o4-mini': {
822
842
  inputCost: 1.1 / 1_000_000,
843
+ cacheHitCost: 0.55 / 1_000_000,
823
844
  outputCost: 4.4 / 1_000_000,
824
845
  },
825
846
  };
@@ -1894,7 +1915,10 @@ class LLMCostTracker {
1894
1915
  timestamp: Date.now(),
1895
1916
  };
1896
1917
  this.usageRecords.push(record);
1897
- getLumicLogger().info(`LLM cost tracked: ${provider}/${model} - $${cost.toFixed(6)}`, { provider, model, inputTokens, outputTokens, cost });
1918
+ // Emit cachedTokens and reasoningTokens explicitly so operators can
1919
+ // verify cache effectiveness from logs alone (the prior log shape only
1920
+ // surfaced inputTokens and outputTokens, hiding the cache discount).
1921
+ getLumicLogger().info(`LLM cost tracked: ${provider}/${model} - $${cost.toFixed(6)}`, { provider, model, inputTokens, cachedTokens: cacheHitTokens, outputTokens, reasoningTokens, cost });
1898
1922
  }
1899
1923
  /**
1900
1924
  * Records usage from an image generation call.
@@ -1975,11 +1999,13 @@ class LLMCostTracker {
1975
1999
  const images = this.getImageCosts();
1976
2000
  let totalCost = 0;
1977
2001
  let totalInputTokens = 0;
2002
+ let totalCacheHitTokens = 0;
1978
2003
  let totalOutputTokens = 0;
1979
2004
  let totalReasoningTokens = 0;
1980
2005
  for (const summary of Object.values(byModel)) {
1981
2006
  totalCost += summary.totalCost;
1982
2007
  totalInputTokens += summary.totalInputTokens;
2008
+ totalCacheHitTokens += summary.totalCacheHitTokens;
1983
2009
  totalOutputTokens += summary.totalOutputTokens;
1984
2010
  totalReasoningTokens += summary.totalReasoningTokens;
1985
2011
  }
@@ -1996,6 +2022,7 @@ class LLMCostTracker {
1996
2022
  totalCost,
1997
2023
  totalCalls: this.usageRecords.length + this.imageRecords.length,
1998
2024
  totalInputTokens,
2025
+ totalCacheHitTokens,
1999
2026
  totalOutputTokens,
2000
2027
  totalReasoningTokens,
2001
2028
  byModel,
@@ -2018,7 +2045,9 @@ class LLMCostTracker {
2018
2045
  cost: `$${m.totalCost.toFixed(6)}`,
2019
2046
  calls: m.callCount,
2020
2047
  inputTokens: m.totalInputTokens,
2048
+ cachedTokens: m.totalCacheHitTokens,
2021
2049
  outputTokens: m.totalOutputTokens,
2050
+ reasoningTokens: m.totalReasoningTokens,
2022
2051
  }));
2023
2052
  const images = Object.values(summary.images).map((img) => ({
2024
2053
  model: img.model,
@@ -2374,14 +2403,20 @@ async function createCompletion(content, responseFormat, options = DEFAULT_OPTIO
2374
2403
  });
2375
2404
  throw error;
2376
2405
  }
2406
+ // OpenAI returns cached input tokens under `prompt_tokens_details.cached_tokens`
2407
+ // when prompts >1024 tokens hit the automatic prompt cache. We surface this
2408
+ // as a first-class field so cost tracking and dashboards reflect the real
2409
+ // (discounted) input cost rather than billing every input token at full rate.
2410
+ const cachedTokens = completion.usage?.prompt_tokens_details?.cached_tokens ?? 0;
2377
2411
  const response = {
2378
2412
  id: completion.id,
2379
2413
  content: completion.choices[0]?.message?.content || '',
2380
2414
  tool_calls: completion.choices[0]?.message?.tool_calls,
2381
- usage: completion.usage || {
2382
- prompt_tokens: 0,
2383
- completion_tokens: 0,
2384
- total_tokens: 0,
2415
+ usage: {
2416
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
2417
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
2418
+ total_tokens: completion.usage?.total_tokens ?? 0,
2419
+ cached_tokens: cachedTokens,
2385
2420
  },
2386
2421
  system_fingerprint: completion.system_fingerprint,
2387
2422
  service_tier: options.service_tier,
@@ -2404,8 +2439,10 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2404
2439
  ...options,
2405
2440
  };
2406
2441
  const completion = await createCompletion(content, responseFormat, mergedOptions);
2407
- // Track cost in the global cost tracker
2408
- getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
2442
+ // Track cost in the global cost tracker. Pass cached tokens through so the
2443
+ // tracker applies the discounted cached-input rate (typically ~50% of the
2444
+ // standard input rate) instead of billing every input token at full price.
2445
+ getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
2409
2446
  // Handle tool calls differently
2410
2447
  if (completion.tool_calls && completion.tool_calls.length > 0) {
2411
2448
  const toolCallResponse = {
@@ -2423,7 +2460,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2423
2460
  reasoning_tokens: 0,
2424
2461
  provider: 'openai',
2425
2462
  model: completion.model,
2426
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2463
+ cached_tokens: completion.usage.cached_tokens,
2464
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2427
2465
  },
2428
2466
  tool_calls: completion.tool_calls,
2429
2467
  };
@@ -2441,7 +2479,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2441
2479
  reasoning_tokens: 0,
2442
2480
  provider: 'openai',
2443
2481
  model: completion.model,
2444
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2482
+ cached_tokens: completion.usage.cached_tokens,
2483
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2445
2484
  },
2446
2485
  tool_calls: completion.tool_calls,
2447
2486
  };
@@ -2496,8 +2535,11 @@ const makeResponsesAPICall = async (input, options = {}) => {
2496
2535
  maxDelayMs: 30000,
2497
2536
  retryableErrors: isRetryableLLMError,
2498
2537
  }, `OpenAI-Responses:${normalizedModel}`);
2538
+ // Responses API exposes cached input tokens under `input_tokens_details.cached_tokens`
2539
+ // (the equivalent of Chat Completions' `prompt_tokens_details.cached_tokens`).
2540
+ const responsesCachedTokens = response.usage?.input_tokens_details?.cached_tokens || 0;
2499
2541
  // Track cost in the global cost tracker
2500
- getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0);
2542
+ getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens);
2501
2543
  // Extract tool calls from the output
2502
2544
  const toolCalls = response.output
2503
2545
  ?.filter((item) => item.type === 'function_call')
@@ -2538,7 +2580,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2538
2580
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2539
2581
  provider: 'openai',
2540
2582
  model: normalizedModel,
2541
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2583
+ cached_tokens: responsesCachedTokens,
2584
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2542
2585
  },
2543
2586
  tool_calls: toolCalls,
2544
2587
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -2570,7 +2613,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2570
2613
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2571
2614
  provider: 'openai',
2572
2615
  model: normalizedModel,
2573
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2616
+ cached_tokens: responsesCachedTokens,
2617
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2574
2618
  },
2575
2619
  tool_calls: toolCalls,
2576
2620
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -8762,14 +8806,25 @@ async function createDeepseekCompletion(content, responseFormat, options = {}) {
8762
8806
  maxDelayMs: 30000,
8763
8807
  retryableErrors: isRetryableDeepseekError,
8764
8808
  }, `Deepseek:${normalizedModel}`);
8809
+ // DeepSeek surfaces cached input tokens in two places on the usage object:
8810
+ // - `prompt_cache_hit_tokens` (DeepSeek-native field, see
8811
+ // https://api-docs.deepseek.com/guides/kv_cache)
8812
+ // - `prompt_tokens_details.cached_tokens` (OpenAI-compatible alias)
8813
+ // Prefer the OpenAI-compatible name so a single canonical field works for
8814
+ // both providers; fall back to the DeepSeek-native name if absent.
8815
+ const usageRaw = completion.usage;
8816
+ const cachedTokens = usageRaw?.prompt_tokens_details?.cached_tokens ??
8817
+ usageRaw?.prompt_cache_hit_tokens ??
8818
+ 0;
8765
8819
  return {
8766
8820
  id: completion.id,
8767
8821
  content: completion.choices[0]?.message?.content || '',
8768
8822
  tool_calls: completion.choices[0]?.message?.tool_calls,
8769
- usage: completion.usage || {
8770
- prompt_tokens: 0,
8771
- completion_tokens: 0,
8772
- total_tokens: 0,
8823
+ usage: {
8824
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
8825
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
8826
+ total_tokens: completion.usage?.total_tokens ?? 0,
8827
+ cached_tokens: cachedTokens,
8773
8828
  },
8774
8829
  system_fingerprint: completion.system_fingerprint,
8775
8830
  provider: 'deepseek',
@@ -8811,7 +8866,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8811
8866
  reasoning_tokens: 0,
8812
8867
  provider: 'deepseek',
8813
8868
  model: modelName,
8814
- cache_hit_tokens: 0,
8869
+ cached_tokens: 0,
8815
8870
  cost: 0,
8816
8871
  },
8817
8872
  tool_calls: undefined,
@@ -8830,7 +8885,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8830
8885
  reasoning_tokens: 0,
8831
8886
  provider: 'deepseek',
8832
8887
  model: modelName,
8833
- cache_hit_tokens: 0,
8888
+ cached_tokens: 0,
8834
8889
  cost: 0,
8835
8890
  },
8836
8891
  tool_calls: undefined,
@@ -8838,8 +8893,9 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8838
8893
  }
8839
8894
  try {
8840
8895
  const completion = await createDeepseekCompletion(content, responseFormat, mergedOptions);
8841
- // Track cost in the global cost tracker
8842
- getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
8896
+ // Track cost in the global cost tracker. Pass cached tokens through so the
8897
+ // discounted cached-input pricing tier is applied.
8898
+ getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
8843
8899
  // Handle tool calls similarly to OpenAI
8844
8900
  if (completion.tool_calls && completion.tool_calls.length > 0) {
8845
8901
  const toolCallResponse = {
@@ -8857,9 +8913,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8857
8913
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8858
8914
  provider: 'deepseek',
8859
8915
  model: completion.model,
8860
- cache_hit_tokens: 0, // Not provided directly in API response
8861
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8862
- ),
8916
+ cached_tokens: completion.usage.cached_tokens,
8917
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8863
8918
  },
8864
8919
  tool_calls: completion.tool_calls,
8865
8920
  };
@@ -8877,9 +8932,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8877
8932
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8878
8933
  provider: 'deepseek',
8879
8934
  model: completion.model,
8880
- cache_hit_tokens: 0, // Not provided directly in API response
8881
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8882
- ),
8935
+ cached_tokens: completion.usage.cached_tokens,
8936
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8883
8937
  },
8884
8938
  tool_calls: completion.tool_calls,
8885
8939
  };
@@ -8897,7 +8951,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8897
8951
  reasoning_tokens: 0,
8898
8952
  provider: 'deepseek',
8899
8953
  model: modelName,
8900
- cache_hit_tokens: 0,
8954
+ cached_tokens: 0,
8901
8955
  cost: 0,
8902
8956
  },
8903
8957
  tool_calls: undefined,
@@ -22796,11 +22850,11 @@ let poolConfig = DEFAULT_POOL_CONFIG;
22796
22850
  async function loadApolloModules() {
22797
22851
  if (typeof window === "undefined" || process.env.AWS_EXECUTION_ENV) {
22798
22852
  // Server-side (or Lambda): load the CommonJS‑based implementation.
22799
- return (await Promise.resolve().then(function () { return require('./apollo-client.server-HwHIFnVk.js'); }));
22853
+ return (await Promise.resolve().then(function () { return require('./apollo-client.server-BnZhh39o.js'); }));
22800
22854
  }
22801
22855
  else {
22802
22856
  // Client-side: load the ESM‑based implementation.
22803
- return (await Promise.resolve().then(function () { return require('./apollo-client.client-guxMwplM.js'); }));
22857
+ return (await Promise.resolve().then(function () { return require('./apollo-client.client-ByADDB46.js'); }));
22804
22858
  }
22805
22859
  }
22806
22860
  /**
@@ -81511,4 +81565,4 @@ exports.withCorrelationId = withCorrelationId;
81511
81565
  exports.withMetrics = withMetrics;
81512
81566
  exports.withRateLimit = withRateLimit;
81513
81567
  exports.withRetry = withRetry;
81514
- //# sourceMappingURL=index-Dr85zRZC.js.map
81568
+ //# sourceMappingURL=index-Ca3x8X5U.js.map