@adaptic/lumic-utils 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/dist/{apollo-client.client-kEvzgHxw.js → apollo-client.client-DRk6kygw.js} +4 -4
  2. package/dist/{apollo-client.client-kEvzgHxw.js.map → apollo-client.client-DRk6kygw.js.map} +1 -1
  3. package/dist/{apollo-client.client-Cz-ZMwuK.js → apollo-client.client-DVsbR05r.js} +3 -3
  4. package/dist/{apollo-client.client-Cz-ZMwuK.js.map → apollo-client.client-DVsbR05r.js.map} +1 -1
  5. package/dist/{apollo-client.server-BAuFJqgR.js → apollo-client.server-Djh4v__C.js} +3 -3
  6. package/dist/{apollo-client.server-BAuFJqgR.js.map → apollo-client.server-Djh4v__C.js.map} +1 -1
  7. package/dist/{apollo-client.server-C2gZgUkR.js → apollo-client.server-L8JR2ko_.js} +3 -3
  8. package/dist/{apollo-client.server-C2gZgUkR.js.map → apollo-client.server-L8JR2ko_.js.map} +1 -1
  9. package/dist/{index-C3ihLNel.js → index-BVl0tRmx.js} +126 -38
  10. package/dist/{index-C3ihLNel.js.map → index-BVl0tRmx.js.map} +1 -1
  11. package/dist/{index-UQOI_SLD.js → index-CSOg0U0R.js} +126 -38
  12. package/dist/{index-UQOI_SLD.js.map → index-CSOg0U0R.js.map} +1 -1
  13. package/dist/{index-C_0vRRAD.js → index-Cs56Fq24.js} +2 -2
  14. package/dist/{index-C_0vRRAD.js.map → index-Cs56Fq24.js.map} +1 -1
  15. package/dist/{index-27SewDPi.js → index-eU6Q74W8.js} +2 -2
  16. package/dist/{index-27SewDPi.js.map → index-eU6Q74W8.js.map} +1 -1
  17. package/dist/index.cjs +1 -1
  18. package/dist/index.mjs +1 -1
  19. package/dist/test.cjs +1 -1
  20. package/dist/test.mjs +1 -1
  21. package/dist/types/types/openai-types.d.ts +8 -1
  22. package/package.json +1 -1
@@ -2106,15 +2106,33 @@ function resetLLMCostTracker() {
2106
2106
  // llm-openai.ts
2107
2107
  /**
2108
2108
  * Determines if an LLM error should be retried.
2109
- * Only retries on rate limit errors (429).
2109
+ *
2110
+ * Retries on:
2111
+ * - 429 / rate limit errors (transient capacity)
2112
+ * - "could not parse the JSON body" 400s — observed once in production for a
2113
+ * single symbol on the very first conversation turn (Wave 86, 2026-04-11).
2114
+ * The exact same call site succeeds millions of times before and after, and
2115
+ * the prior fix commit `6eaef52` in this repo already eliminated the only
2116
+ * known SDK-v5 cause (passing `tools: undefined/null`). The remaining cases
2117
+ * are virtually always proxy/network corruption of the request body in
2118
+ * flight (request truncated mid-flight, TLS renegotiation, edge proxy
2119
+ * buffer reset). Retrying once with a fresh connection has a high
2120
+ * probability of recovering, and a deterministic SDK-side defect would
2121
+ * re-fail on retry (so we still surface it).
2110
2122
  */
2111
2123
  const isRetryableLLMError = (error) => {
2112
2124
  if (error instanceof Error) {
2113
2125
  const message = error.message;
2114
- // Retry only on rate limits (429)
2126
+ // Retry on rate limits (429)
2115
2127
  if (message.includes('429') || message.includes('rate limit') || message.includes('Rate limit')) {
2116
2128
  return true;
2117
2129
  }
2130
+ // Retry on transient body-corruption 400s. Match the exact OpenAI error
2131
+ // string to avoid retrying genuine client-side validation 400s (which
2132
+ // would re-fail forever and waste retry budget).
2133
+ if (message.includes('could not parse the JSON body of your request')) {
2134
+ return true;
2135
+ }
2118
2136
  }
2119
2137
  return false;
2120
2138
  };
@@ -2310,20 +2328,66 @@ async function createCompletion(content, responseFormat, options = DEFAULT_OPTIO
2310
2328
  if (responseFormatOption.type !== 'text') {
2311
2329
  queryOptions.response_format = responseFormatOption;
2312
2330
  }
2313
- const completion = await withRetry(() => openai.chat.completions.create(queryOptions), {
2314
- maxRetries: 3,
2315
- baseDelayMs: 2000,
2316
- maxDelayMs: 30000,
2317
- retryableErrors: isRetryableLLMError,
2318
- }, `OpenAI:${normalizedModel}`);
2331
+ let completion;
2332
+ try {
2333
+ completion = await withRetry(() => openai.chat.completions.create(queryOptions), {
2334
+ maxRetries: 3,
2335
+ baseDelayMs: 2000,
2336
+ maxDelayMs: 30000,
2337
+ retryableErrors: isRetryableLLMError,
2338
+ }, `OpenAI:${normalizedModel}`);
2339
+ }
2340
+ catch (error) {
2341
+ // Defensive observability: when the OpenAI SDK rejects our request,
2342
+ // emit a structured snapshot of the queryOptions shape (NOT content) so
2343
+ // a future recurrence of the rare "could not parse JSON body" 400 can be
2344
+ // diagnosed without having to reproduce locally. We deliberately log
2345
+ // metadata only — no message content, no API key — so this is safe even
2346
+ // for production prompts containing sensitive context.
2347
+ const errorMessage = error instanceof Error ? error.message : String(error);
2348
+ const totalContentChars = messages.reduce((sum, msg) => {
2349
+ if (typeof msg.content === 'string')
2350
+ return sum + msg.content.length;
2351
+ if (Array.isArray(msg.content)) {
2352
+ return sum + msg.content.reduce((s, part) => {
2353
+ if (typeof part === 'object' && part !== null && 'text' in part && typeof part.text === 'string') {
2354
+ return s + part.text.length;
2355
+ }
2356
+ return s;
2357
+ }, 0);
2358
+ }
2359
+ return sum;
2360
+ }, 0);
2361
+ getLumicLogger().error(`OpenAI ChatCompletion call failed for model ${normalizedModel}`, {
2362
+ model: normalizedModel,
2363
+ errorMessage,
2364
+ messageCount: messages.length,
2365
+ roleBreakdown: messages.reduce((acc, msg) => {
2366
+ acc[msg.role] = (acc[msg.role] ?? 0) + 1;
2367
+ return acc;
2368
+ }, {}),
2369
+ totalContentChars,
2370
+ toolCount: queryOptions.tools?.length ?? 0,
2371
+ hasTemperature: queryOptions.temperature !== undefined,
2372
+ hasResponseFormat: queryOptions.response_format !== undefined,
2373
+ hasMaxCompletionTokens: queryOptions.max_completion_tokens !== undefined,
2374
+ });
2375
+ throw error;
2376
+ }
2377
+ // OpenAI returns cached input tokens under `prompt_tokens_details.cached_tokens`
2378
+ // when prompts >1024 tokens hit the automatic prompt cache. We surface this
2379
+ // as a first-class field so cost tracking and dashboards reflect the real
2380
+ // (discounted) input cost rather than billing every input token at full rate.
2381
+ const cachedTokens = completion.usage?.prompt_tokens_details?.cached_tokens ?? 0;
2319
2382
  const response = {
2320
2383
  id: completion.id,
2321
2384
  content: completion.choices[0]?.message?.content || '',
2322
2385
  tool_calls: completion.choices[0]?.message?.tool_calls,
2323
- usage: completion.usage || {
2324
- prompt_tokens: 0,
2325
- completion_tokens: 0,
2326
- total_tokens: 0,
2386
+ usage: {
2387
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
2388
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
2389
+ total_tokens: completion.usage?.total_tokens ?? 0,
2390
+ cached_tokens: cachedTokens,
2327
2391
  },
2328
2392
  system_fingerprint: completion.system_fingerprint,
2329
2393
  service_tier: options.service_tier,
@@ -2346,8 +2410,10 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2346
2410
  ...options,
2347
2411
  };
2348
2412
  const completion = await createCompletion(content, responseFormat, mergedOptions);
2349
- // Track cost in the global cost tracker
2350
- getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
2413
+ // Track cost in the global cost tracker. Pass cached tokens through so the
2414
+ // tracker applies the discounted cached-input rate (typically ~50% of the
2415
+ // standard input rate) instead of billing every input token at full price.
2416
+ getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
2351
2417
  // Handle tool calls differently
2352
2418
  if (completion.tool_calls && completion.tool_calls.length > 0) {
2353
2419
  const toolCallResponse = {
@@ -2365,7 +2431,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2365
2431
  reasoning_tokens: 0,
2366
2432
  provider: 'openai',
2367
2433
  model: completion.model,
2368
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2434
+ cached_tokens: completion.usage.cached_tokens,
2435
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2369
2436
  },
2370
2437
  tool_calls: completion.tool_calls,
2371
2438
  };
@@ -2383,7 +2450,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
2383
2450
  reasoning_tokens: 0,
2384
2451
  provider: 'openai',
2385
2452
  model: completion.model,
2386
- cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0),
2453
+ cached_tokens: completion.usage.cached_tokens,
2454
+ cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
2387
2455
  },
2388
2456
  tool_calls: completion.tool_calls,
2389
2457
  };
@@ -2438,8 +2506,11 @@ const makeResponsesAPICall = async (input, options = {}) => {
2438
2506
  maxDelayMs: 30000,
2439
2507
  retryableErrors: isRetryableLLMError,
2440
2508
  }, `OpenAI-Responses:${normalizedModel}`);
2509
+ // Responses API exposes cached input tokens under `input_tokens_details.cached_tokens`
2510
+ // (the equivalent of Chat Completions' `prompt_tokens_details.cached_tokens`).
2511
+ const responsesCachedTokens = response.usage?.input_tokens_details?.cached_tokens || 0;
2441
2512
  // Track cost in the global cost tracker
2442
- getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0);
2513
+ getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens);
2443
2514
  // Extract tool calls from the output
2444
2515
  const toolCalls = response.output
2445
2516
  ?.filter((item) => item.type === 'function_call')
@@ -2480,7 +2551,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2480
2551
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2481
2552
  provider: 'openai',
2482
2553
  model: normalizedModel,
2483
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2554
+ cached_tokens: responsesCachedTokens,
2555
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2484
2556
  },
2485
2557
  tool_calls: toolCalls,
2486
2558
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -2512,7 +2584,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
2512
2584
  reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
2513
2585
  provider: 'openai',
2514
2586
  model: normalizedModel,
2515
- cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0),
2587
+ cached_tokens: responsesCachedTokens,
2588
+ cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
2516
2589
  },
2517
2590
  tool_calls: toolCalls,
2518
2591
  ...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
@@ -7942,7 +8015,12 @@ function translateContextToAnthropic(context) {
7942
8015
  /** Convert string or content block array to a uniform content block array. */
7943
8016
  function toContentBlocks(content) {
7944
8017
  if (typeof content === 'string') {
7945
- return [{ type: 'text', text: content }];
8018
+ const textBlock = {
8019
+ type: 'text',
8020
+ text: content,
8021
+ citations: null,
8022
+ };
8023
+ return [textBlock];
7946
8024
  }
7947
8025
  return content;
7948
8026
  }
@@ -8699,14 +8777,25 @@ async function createDeepseekCompletion(content, responseFormat, options = {}) {
8699
8777
  maxDelayMs: 30000,
8700
8778
  retryableErrors: isRetryableDeepseekError,
8701
8779
  }, `Deepseek:${normalizedModel}`);
8780
+ // DeepSeek surfaces cached input tokens in two places on the usage object:
8781
+ // - `prompt_cache_hit_tokens` (DeepSeek-native field, see
8782
+ // https://api-docs.deepseek.com/guides/kv_cache)
8783
+ // - `prompt_tokens_details.cached_tokens` (OpenAI-compatible alias)
8784
+ // Prefer the OpenAI-compatible name so a single canonical field works for
8785
+ // both providers; fall back to the DeepSeek-native name if absent.
8786
+ const usageRaw = completion.usage;
8787
+ const cachedTokens = usageRaw?.prompt_tokens_details?.cached_tokens ??
8788
+ usageRaw?.prompt_cache_hit_tokens ??
8789
+ 0;
8702
8790
  return {
8703
8791
  id: completion.id,
8704
8792
  content: completion.choices[0]?.message?.content || '',
8705
8793
  tool_calls: completion.choices[0]?.message?.tool_calls,
8706
- usage: completion.usage || {
8707
- prompt_tokens: 0,
8708
- completion_tokens: 0,
8709
- total_tokens: 0,
8794
+ usage: {
8795
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
8796
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
8797
+ total_tokens: completion.usage?.total_tokens ?? 0,
8798
+ cached_tokens: cachedTokens,
8710
8799
  },
8711
8800
  system_fingerprint: completion.system_fingerprint,
8712
8801
  provider: 'deepseek',
@@ -8748,7 +8837,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8748
8837
  reasoning_tokens: 0,
8749
8838
  provider: 'deepseek',
8750
8839
  model: modelName,
8751
- cache_hit_tokens: 0,
8840
+ cached_tokens: 0,
8752
8841
  cost: 0,
8753
8842
  },
8754
8843
  tool_calls: undefined,
@@ -8767,7 +8856,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8767
8856
  reasoning_tokens: 0,
8768
8857
  provider: 'deepseek',
8769
8858
  model: modelName,
8770
- cache_hit_tokens: 0,
8859
+ cached_tokens: 0,
8771
8860
  cost: 0,
8772
8861
  },
8773
8862
  tool_calls: undefined,
@@ -8775,8 +8864,9 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8775
8864
  }
8776
8865
  try {
8777
8866
  const completion = await createDeepseekCompletion(content, responseFormat, mergedOptions);
8778
- // Track cost in the global cost tracker
8779
- getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens);
8867
+ // Track cost in the global cost tracker. Pass cached tokens through so the
8868
+ // discounted cached-input pricing tier is applied.
8869
+ getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
8780
8870
  // Handle tool calls similarly to OpenAI
8781
8871
  if (completion.tool_calls && completion.tool_calls.length > 0) {
8782
8872
  const toolCallResponse = {
@@ -8794,9 +8884,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8794
8884
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8795
8885
  provider: 'deepseek',
8796
8886
  model: completion.model,
8797
- cache_hit_tokens: 0, // Not provided directly in API response
8798
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8799
- ),
8887
+ cached_tokens: completion.usage.cached_tokens,
8888
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8800
8889
  },
8801
8890
  tool_calls: completion.tool_calls,
8802
8891
  };
@@ -8814,9 +8903,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8814
8903
  reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
8815
8904
  provider: 'deepseek',
8816
8905
  model: completion.model,
8817
- cache_hit_tokens: 0, // Not provided directly in API response
8818
- cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, 0 // Cache hit tokens (not provided in the response)
8819
- ),
8906
+ cached_tokens: completion.usage.cached_tokens,
8907
+ cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
8820
8908
  },
8821
8909
  tool_calls: completion.tool_calls,
8822
8910
  };
@@ -8834,7 +8922,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
8834
8922
  reasoning_tokens: 0,
8835
8923
  provider: 'deepseek',
8836
8924
  model: modelName,
8837
- cache_hit_tokens: 0,
8925
+ cached_tokens: 0,
8838
8926
  cost: 0,
8839
8927
  },
8840
8928
  tool_calls: undefined,
@@ -22733,11 +22821,11 @@ let poolConfig = DEFAULT_POOL_CONFIG;
22733
22821
  async function loadApolloModules() {
22734
22822
  if (typeof window === "undefined" || process.env.AWS_EXECUTION_ENV) {
22735
22823
  // Server-side (or Lambda): load the CommonJS‑based implementation.
22736
- return (await Promise.resolve().then(function () { return require('./apollo-client.server-BAuFJqgR.js'); }));
22824
+ return (await Promise.resolve().then(function () { return require('./apollo-client.server-Djh4v__C.js'); }));
22737
22825
  }
22738
22826
  else {
22739
22827
  // Client-side: load the ESM‑based implementation.
22740
- return (await Promise.resolve().then(function () { return require('./apollo-client.client-Cz-ZMwuK.js'); }));
22828
+ return (await Promise.resolve().then(function () { return require('./apollo-client.client-DVsbR05r.js'); }));
22741
22829
  }
22742
22830
  }
22743
22831
  /**
@@ -81448,4 +81536,4 @@ exports.withCorrelationId = withCorrelationId;
81448
81536
  exports.withMetrics = withMetrics;
81449
81537
  exports.withRateLimit = withRateLimit;
81450
81538
  exports.withRetry = withRetry;
81451
- //# sourceMappingURL=index-C3ihLNel.js.map
81539
+ //# sourceMappingURL=index-BVl0tRmx.js.map