@adaptic/lumic-utils 1.0.19 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/dist/{apollo-client.client-Dfi-rHW-.js → apollo-client.client-DRk6kygw.js} +4 -4
  2. package/dist/{apollo-client.client-Dfi-rHW-.js.map → apollo-client.client-DRk6kygw.js.map} +1 -1
  3. package/dist/{apollo-client.client-guxMwplM.js → apollo-client.client-DVsbR05r.js} +3 -3
  4. package/dist/{apollo-client.client-guxMwplM.js.map → apollo-client.client-DVsbR05r.js.map} +1 -1
  5. package/dist/{apollo-client.server-HwHIFnVk.js → apollo-client.server-Djh4v__C.js} +3 -3
  6. package/dist/{apollo-client.server-HwHIFnVk.js.map → apollo-client.server-Djh4v__C.js.map} +1 -1
  7. package/dist/{apollo-client.server-Blxbp1Gf.js → apollo-client.server-L8JR2ko_.js} +3 -3
  8. package/dist/{apollo-client.server-Blxbp1Gf.js.map → apollo-client.server-L8JR2ko_.js.map} +1 -1
  9. package/dist/{index-Dr85zRZC.js → index-BVl0tRmx.js} +54 -29
  10. package/dist/{index-Dr85zRZC.js.map → index-BVl0tRmx.js.map} +1 -1
  11. package/dist/{index-CSQmloZ-.js → index-CSOg0U0R.js} +54 -29
  12. package/dist/{index-CSQmloZ-.js.map → index-CSOg0U0R.js.map} +1 -1
  13. package/dist/{index-DollRUHQ.js → index-Cs56Fq24.js} +2 -2
  14. package/dist/{index-DollRUHQ.js.map → index-Cs56Fq24.js.map} +1 -1
  15. package/dist/{index-B4tfLvHx.js → index-eU6Q74W8.js} +2 -2
  16. package/dist/{index-B4tfLvHx.js.map → index-eU6Q74W8.js.map} +1 -1
  17. package/dist/index.cjs +1 -1
  18. package/dist/index.mjs +1 -1
  19. package/dist/test.cjs +1 -1
  20. package/dist/test.mjs +1 -1
  21. package/dist/types/types/openai-types.d.ts +8 -1
  22. package/package.json +1 -1
@@ -2374,14 +2374,20 @@ async function createCompletion(content, responseFormat, options = DEFAULT_OPTIO
2374
2374
  });
2375
2375
  throw error;
2376
2376
  }
2377
+ // OpenAI returns cached input tokens under `prompt_tokens_details.cached_tokens`
2378
+ // when prompts >1024 tokens hit the automatic prompt cache. We surface this
2379
+ // as a first-class field so cost tracking and dashboards reflect the real
2380
+ // (discounted) input cost rather than billing every input token at full rate.
2381
+ const cachedTokens = completion.usage?.prompt_tokens_details?.cached_tokens ?? 0;
2377
2382
  const response = {
2378
2383
  id: completion.id,
2379
2384
  content: completion.choices[0]?.message?.content || '',
2380
2385
  tool_calls: completion.choices[0]?.message?.tool_calls,
2381
- usage: completion.usage || {
2382
- prompt_tokens: 0,
2383
- completion_tokens: 0,
2384
- total_tokens: 0,
2386
+ usage: {
2387
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
2388
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
2389
+ total_tokens: completion.usage?.total_tokens ?? 0,
2390
+ cached_tokens: cachedTokens,
2385
2391
  },
2386
2392
  system_fingerprint: completion.system_fingerprint,
2387
2393
  service_tier: options.service_tier,
@@ -2404,8 +2410,10 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2404
2410
  ...options,
2405
2411
  };
2406
2412
  const completion = await createCompletion(content, responseFormat, mergedOptions);
2407
- // Track cost in the global cost tracker
2408
- getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
2413
+ // Track cost in the global cost tracker. Pass cached tokens through so the
2414
+ // tracker applies the discounted cached-input rate (typically ~50% of the
2415
+ // standard input rate) instead of billing every input token at full price.
2416
+ getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
2409
2417
  // Handle tool calls differently
2410
2418
  if (completion.tool_calls && completion.tool_calls.length > 0) {
2411
2419
  const toolCallResponse = {
@@ -2423,7 +2431,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2423
2431
  reasoning_tokens: 0,
2424
2432
  provider: 'openai',
2425
2433
  model: completion.model,
2426
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2434
+ cached_tokens: completion.usage.cached_tokens,
2435
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2427
2436
  },
2428
2437
  tool_calls: completion.tool_calls,
2429
2438
  };
@@ -2441,7 +2450,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2441
2450
  reasoning_tokens: 0,
2442
2451
  provider: 'openai',
2443
2452
  model: completion.model,
2444
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2453
+ cached_tokens: completion.usage.cached_tokens,
2454
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2445
2455
  },
2446
2456
  tool_calls: completion.tool_calls,
2447
2457
  };
@@ -2496,8 +2506,11 @@ const makeResponsesAPICall = async (input, options = {}) => {
2496
2506
  maxDelayMs: 30000,
2497
2507
  retryableErrors: isRetryableLLMError,
2498
2508
  }, `OpenAI-Responses:${normalizedModel}`);
2509
+ // Responses API exposes cached input tokens under `input_tokens_details.cached_tokens`
2510
+ // (the equivalent of Chat Completions' `prompt_tokens_details.cached_tokens`).
2511
+ const responsesCachedTokens = response.usage?.input_tokens_details?.cached_tokens || 0;
2499
2512
  // Track cost in the global cost tracker
2500
- getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0);
2513
+ getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens);
2501
2514
  // Extract tool calls from the output
2502
2515
  const toolCalls = response.output
2503
2516
  ?.filter((item) => item.type === 'function_call')
@@ -2538,7 +2551,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2538
2551
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2539
2552
  provider: 'openai',
2540
2553
  model: normalizedModel,
2541
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2554
+ cached_tokens: responsesCachedTokens,
2555
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2542
2556
  },
2543
2557
  tool_calls: toolCalls,
2544
2558
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -2570,7 +2584,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2570
2584
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2571
2585
  provider: 'openai',
2572
2586
  model: normalizedModel,
2573
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2587
+ cached_tokens: responsesCachedTokens,
2588
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2574
2589
  },
2575
2590
  tool_calls: toolCalls,
2576
2591
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -8762,14 +8777,25 @@ async function createDeepseekCompletion(content, responseFormat, options = {}) {
8762
8777
  maxDelayMs: 30000,
8763
8778
  retryableErrors: isRetryableDeepseekError,
8764
8779
  }, `Deepseek:${normalizedModel}`);
8780
+ // DeepSeek surfaces cached input tokens in two places on the usage object:
8781
+ // - `prompt_cache_hit_tokens` (DeepSeek-native field, see
8782
+ // https://api-docs.deepseek.com/guides/kv_cache)
8783
+ // - `prompt_tokens_details.cached_tokens` (OpenAI-compatible alias)
8784
+ // Prefer the OpenAI-compatible name so a single canonical field works for
8785
+ // both providers; fall back to the DeepSeek-native name if absent.
8786
+ const usageRaw = completion.usage;
8787
+ const cachedTokens = usageRaw?.prompt_tokens_details?.cached_tokens ??
8788
+ usageRaw?.prompt_cache_hit_tokens ??
8789
+ 0;
8765
8790
  return {
8766
8791
  id: completion.id,
8767
8792
  content: completion.choices[0]?.message?.content || '',
8768
8793
  tool_calls: completion.choices[0]?.message?.tool_calls,
8769
- usage: completion.usage || {
8770
- prompt_tokens: 0,
8771
- completion_tokens: 0,
8772
- total_tokens: 0,
8794
+ usage: {
8795
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
8796
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
8797
+ total_tokens: completion.usage?.total_tokens ?? 0,
8798
+ cached_tokens: cachedTokens,
8773
8799
  },
8774
8800
  system_fingerprint: completion.system_fingerprint,
8775
8801
  provider: 'deepseek',
@@ -8811,7 +8837,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8811
8837
  reasoning_tokens: 0,
8812
8838
  provider: 'deepseek',
8813
8839
  model: modelName,
8814
- cache_hit_tokens: 0,
8840
+ cached_tokens: 0,
8815
8841
  cost: 0,
8816
8842
  },
8817
8843
  tool_calls: undefined,
@@ -8830,7 +8856,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8830
8856
  reasoning_tokens: 0,
8831
8857
  provider: 'deepseek',
8832
8858
  model: modelName,
8833
- cache_hit_tokens: 0,
8859
+ cached_tokens: 0,
8834
8860
  cost: 0,
8835
8861
  },
8836
8862
  tool_calls: undefined,
@@ -8838,8 +8864,9 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8838
8864
  }
8839
8865
  try {
8840
8866
  const completion = await createDeepseekCompletion(content, responseFormat, mergedOptions);
8841
- // Track cost in the global cost tracker
8842
- getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
8867
+ // Track cost in the global cost tracker. Pass cached tokens through so the
8868
+ // discounted cached-input pricing tier is applied.
8869
+ getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
8843
8870
  // Handle tool calls similarly to OpenAI
8844
8871
  if (completion.tool_calls && completion.tool_calls.length > 0) {
8845
8872
  const toolCallResponse = {
@@ -8857,9 +8884,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8857
8884
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8858
8885
  provider: 'deepseek',
8859
8886
  model: completion.model,
8860
- cache_hit_tokens: 0, // Not provided directly in API response
8861
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8862
- ),
8887
+ cached_tokens: completion.usage.cached_tokens,
8888
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8863
8889
  },
8864
8890
  tool_calls: completion.tool_calls,
8865
8891
  };
@@ -8877,9 +8903,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8877
8903
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8878
8904
  provider: 'deepseek',
8879
8905
  model: completion.model,
8880
- cache_hit_tokens: 0, // Not provided directly in API response
8881
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8882
- ),
8906
+ cached_tokens: completion.usage.cached_tokens,
8907
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8883
8908
  },
8884
8909
  tool_calls: completion.tool_calls,
8885
8910
  };
@@ -8897,7 +8922,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8897
8922
  reasoning_tokens: 0,
8898
8923
  provider: 'deepseek',
8899
8924
  model: modelName,
8900
- cache_hit_tokens: 0,
8925
+ cached_tokens: 0,
8901
8926
  cost: 0,
8902
8927
  },
8903
8928
  tool_calls: undefined,
@@ -22796,11 +22821,11 @@ let poolConfig = DEFAULT_POOL_CONFIG;
22796
22821
  async function loadApolloModules() {
22797
22822
  if (typeof window === "undefined" || process.env.AWS_EXECUTION_ENV) {
22798
22823
  // Server-side (or Lambda): load the CommonJS‑based implementation.
22799
- return (await Promise.resolve().then(function () { return require('./apollo-client.server-HwHIFnVk.js'); }));
22824
+ return (await Promise.resolve().then(function () { return require('./apollo-client.server-Djh4v__C.js'); }));
22800
22825
  }
22801
22826
  else {
22802
22827
  // Client-side: load the ESM‑based implementation.
22803
- return (await Promise.resolve().then(function () { return require('./apollo-client.client-guxMwplM.js'); }));
22828
+ return (await Promise.resolve().then(function () { return require('./apollo-client.client-DVsbR05r.js'); }));
22804
22829
  }
22805
22830
  }
22806
22831
  /**
@@ -81511,4 +81536,4 @@ exports.withCorrelationId = withCorrelationId;
81511
81536
  exports.withMetrics = withMetrics;
81512
81537
  exports.withRateLimit = withRateLimit;
81513
81538
  exports.withRetry = withRetry;
81514
- //# sourceMappingURL=index-Dr85zRZC.js.map
81539
+ //# sourceMappingURL=index-BVl0tRmx.js.map