@adaptic/lumic-utils 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{apollo-client.client-kEvzgHxw.js → apollo-client.client-DRk6kygw.js} +4 -4
- package/dist/{apollo-client.client-kEvzgHxw.js.map → apollo-client.client-DRk6kygw.js.map} +1 -1
- package/dist/{apollo-client.client-Cz-ZMwuK.js → apollo-client.client-DVsbR05r.js} +3 -3
- package/dist/{apollo-client.client-Cz-ZMwuK.js.map → apollo-client.client-DVsbR05r.js.map} +1 -1
- package/dist/{apollo-client.server-BAuFJqgR.js → apollo-client.server-Djh4v__C.js} +3 -3
- package/dist/{apollo-client.server-BAuFJqgR.js.map → apollo-client.server-Djh4v__C.js.map} +1 -1
- package/dist/{apollo-client.server-C2gZgUkR.js → apollo-client.server-L8JR2ko_.js} +3 -3
- package/dist/{apollo-client.server-C2gZgUkR.js.map → apollo-client.server-L8JR2ko_.js.map} +1 -1
- package/dist/{index-C3ihLNel.js → index-BVl0tRmx.js} +126 -38
- package/dist/{index-C3ihLNel.js.map → index-BVl0tRmx.js.map} +1 -1
- package/dist/{index-UQOI_SLD.js → index-CSOg0U0R.js} +126 -38
- package/dist/{index-UQOI_SLD.js.map → index-CSOg0U0R.js.map} +1 -1
- package/dist/{index-C_0vRRAD.js → index-Cs56Fq24.js} +2 -2
- package/dist/{index-C_0vRRAD.js.map → index-Cs56Fq24.js.map} +1 -1
- package/dist/{index-27SewDPi.js → index-eU6Q74W8.js} +2 -2
- package/dist/{index-27SewDPi.js.map → index-eU6Q74W8.js.map} +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.mjs +1 -1
- package/dist/test.cjs +1 -1
- package/dist/test.mjs +1 -1
- package/dist/types/types/openai-types.d.ts +8 -1
- package/package.json +1 -1
|
@@ -2106,15 +2106,33 @@ function resetLLMCostTracker() {
|
|
|
2106
2106
|
// llm-openai.ts
|
|
2107
2107
|
/**
|
|
2108
2108
|
* Determines if an LLM error should be retried.
|
|
2109
|
-
*
|
|
2109
|
+
*
|
|
2110
|
+
* Retries on:
|
|
2111
|
+
* - 429 / rate limit errors (transient capacity)
|
|
2112
|
+
* - "could not parse the JSON body" 400s — observed once in production for a
|
|
2113
|
+
* single symbol on the very first conversation turn (Wave 86, 2026-04-11).
|
|
2114
|
+
* The exact same call site succeeds millions of times before and after, and
|
|
2115
|
+
* the prior fix commit `6eaef52` in this repo already eliminated the only
|
|
2116
|
+
* known SDK-v5 cause (passing `tools: undefined/null`). The remaining cases
|
|
2117
|
+
* are virtually always proxy/network corruption of the request body in
|
|
2118
|
+
* flight (request truncated mid-flight, TLS renegotiation, edge proxy
|
|
2119
|
+
* buffer reset). Retrying once with a fresh connection has a high
|
|
2120
|
+
* probability of recovering, and a deterministic SDK-side defect would
|
|
2121
|
+
* re-fail on retry (so we still surface it).
|
|
2110
2122
|
*/
|
|
2111
2123
|
const isRetryableLLMError = (error) => {
|
|
2112
2124
|
if (error instanceof Error) {
|
|
2113
2125
|
const message = error.message;
|
|
2114
|
-
// Retry
|
|
2126
|
+
// Retry on rate limits (429)
|
|
2115
2127
|
if (message.includes('429') || message.includes('rate limit') || message.includes('Rate limit')) {
|
|
2116
2128
|
return true;
|
|
2117
2129
|
}
|
|
2130
|
+
// Retry on transient body-corruption 400s. Match the exact OpenAI error
|
|
2131
|
+
// string to avoid retrying genuine client-side validation 400s (which
|
|
2132
|
+
// would re-fail forever and waste retry budget).
|
|
2133
|
+
if (message.includes('could not parse the JSON body of your request')) {
|
|
2134
|
+
return true;
|
|
2135
|
+
}
|
|
2118
2136
|
}
|
|
2119
2137
|
return false;
|
|
2120
2138
|
};
|
|
@@ -2310,20 +2328,66 @@ async function createCompletion(content, responseFormat, options = DEFAULT_OPTIO
|
|
|
2310
2328
|
if (responseFormatOption.type !== 'text') {
|
|
2311
2329
|
queryOptions.response_format = responseFormatOption;
|
|
2312
2330
|
}
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2331
|
+
let completion;
|
|
2332
|
+
try {
|
|
2333
|
+
completion = await withRetry(() => openai.chat.completions.create(queryOptions), {
|
|
2334
|
+
maxRetries: 3,
|
|
2335
|
+
baseDelayMs: 2000,
|
|
2336
|
+
maxDelayMs: 30000,
|
|
2337
|
+
retryableErrors: isRetryableLLMError,
|
|
2338
|
+
}, `OpenAI:${normalizedModel}`);
|
|
2339
|
+
}
|
|
2340
|
+
catch (error) {
|
|
2341
|
+
// Defensive observability: when the OpenAI SDK rejects our request,
|
|
2342
|
+
// emit a structured snapshot of the queryOptions shape (NOT content) so
|
|
2343
|
+
// a future recurrence of the rare "could not parse JSON body" 400 can be
|
|
2344
|
+
// diagnosed without having to reproduce locally. We deliberately log
|
|
2345
|
+
// metadata only — no message content, no API key — so this is safe even
|
|
2346
|
+
// for production prompts containing sensitive context.
|
|
2347
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2348
|
+
const totalContentChars = messages.reduce((sum, msg) => {
|
|
2349
|
+
if (typeof msg.content === 'string')
|
|
2350
|
+
return sum + msg.content.length;
|
|
2351
|
+
if (Array.isArray(msg.content)) {
|
|
2352
|
+
return sum + msg.content.reduce((s, part) => {
|
|
2353
|
+
if (typeof part === 'object' && part !== null && 'text' in part && typeof part.text === 'string') {
|
|
2354
|
+
return s + part.text.length;
|
|
2355
|
+
}
|
|
2356
|
+
return s;
|
|
2357
|
+
}, 0);
|
|
2358
|
+
}
|
|
2359
|
+
return sum;
|
|
2360
|
+
}, 0);
|
|
2361
|
+
getLumicLogger().error(`OpenAI ChatCompletion call failed for model ${normalizedModel}`, {
|
|
2362
|
+
model: normalizedModel,
|
|
2363
|
+
errorMessage,
|
|
2364
|
+
messageCount: messages.length,
|
|
2365
|
+
roleBreakdown: messages.reduce((acc, msg) => {
|
|
2366
|
+
acc[msg.role] = (acc[msg.role] ?? 0) + 1;
|
|
2367
|
+
return acc;
|
|
2368
|
+
}, {}),
|
|
2369
|
+
totalContentChars,
|
|
2370
|
+
toolCount: queryOptions.tools?.length ?? 0,
|
|
2371
|
+
hasTemperature: queryOptions.temperature !== undefined,
|
|
2372
|
+
hasResponseFormat: queryOptions.response_format !== undefined,
|
|
2373
|
+
hasMaxCompletionTokens: queryOptions.max_completion_tokens !== undefined,
|
|
2374
|
+
});
|
|
2375
|
+
throw error;
|
|
2376
|
+
}
|
|
2377
|
+
// OpenAI returns cached input tokens under `prompt_tokens_details.cached_tokens`
|
|
2378
|
+
// when prompts >1024 tokens hit the automatic prompt cache. We surface this
|
|
2379
|
+
// as a first-class field so cost tracking and dashboards reflect the real
|
|
2380
|
+
// (discounted) input cost rather than billing every input token at full rate.
|
|
2381
|
+
const cachedTokens = completion.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
2319
2382
|
const response = {
|
|
2320
2383
|
id: completion.id,
|
|
2321
2384
|
content: completion.choices[0]?.message?.content || '',
|
|
2322
2385
|
tool_calls: completion.choices[0]?.message?.tool_calls,
|
|
2323
|
-
usage:
|
|
2324
|
-
prompt_tokens: 0,
|
|
2325
|
-
completion_tokens: 0,
|
|
2326
|
-
total_tokens: 0,
|
|
2386
|
+
usage: {
|
|
2387
|
+
prompt_tokens: completion.usage?.prompt_tokens ?? 0,
|
|
2388
|
+
completion_tokens: completion.usage?.completion_tokens ?? 0,
|
|
2389
|
+
total_tokens: completion.usage?.total_tokens ?? 0,
|
|
2390
|
+
cached_tokens: cachedTokens,
|
|
2327
2391
|
},
|
|
2328
2392
|
system_fingerprint: completion.system_fingerprint,
|
|
2329
2393
|
service_tier: options.service_tier,
|
|
@@ -2346,8 +2410,10 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
|
|
|
2346
2410
|
...options,
|
|
2347
2411
|
};
|
|
2348
2412
|
const completion = await createCompletion(content, responseFormat, mergedOptions);
|
|
2349
|
-
// Track cost in the global cost tracker
|
|
2350
|
-
|
|
2413
|
+
// Track cost in the global cost tracker. Pass cached tokens through so the
|
|
2414
|
+
// tracker applies the discounted cached-input rate (typically ~50% of the
|
|
2415
|
+
// standard input rate) instead of billing every input token at full price.
|
|
2416
|
+
getLLMCostTracker().trackUsage('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
|
|
2351
2417
|
// Handle tool calls differently
|
|
2352
2418
|
if (completion.tool_calls && completion.tool_calls.length > 0) {
|
|
2353
2419
|
const toolCallResponse = {
|
|
@@ -2365,7 +2431,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
|
|
|
2365
2431
|
reasoning_tokens: 0,
|
|
2366
2432
|
provider: 'openai',
|
|
2367
2433
|
model: completion.model,
|
|
2368
|
-
|
|
2434
|
+
cached_tokens: completion.usage.cached_tokens,
|
|
2435
|
+
cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
|
|
2369
2436
|
},
|
|
2370
2437
|
tool_calls: completion.tool_calls,
|
|
2371
2438
|
};
|
|
@@ -2383,7 +2450,8 @@ const makeOpenAIChatCompletionCall = async (content, responseFormat = 'text', op
|
|
|
2383
2450
|
reasoning_tokens: 0,
|
|
2384
2451
|
provider: 'openai',
|
|
2385
2452
|
model: completion.model,
|
|
2386
|
-
|
|
2453
|
+
cached_tokens: completion.usage.cached_tokens,
|
|
2454
|
+
cost: calculateCost('openai', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
|
|
2387
2455
|
},
|
|
2388
2456
|
tool_calls: completion.tool_calls,
|
|
2389
2457
|
};
|
|
@@ -2438,8 +2506,11 @@ const makeResponsesAPICall = async (input, options = {}) => {
|
|
|
2438
2506
|
maxDelayMs: 30000,
|
|
2439
2507
|
retryableErrors: isRetryableLLMError,
|
|
2440
2508
|
}, `OpenAI-Responses:${normalizedModel}`);
|
|
2509
|
+
// Responses API exposes cached input tokens under `input_tokens_details.cached_tokens`
|
|
2510
|
+
// (the equivalent of Chat Completions' `prompt_tokens_details.cached_tokens`).
|
|
2511
|
+
const responsesCachedTokens = response.usage?.input_tokens_details?.cached_tokens || 0;
|
|
2441
2512
|
// Track cost in the global cost tracker
|
|
2442
|
-
getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0);
|
|
2513
|
+
getLLMCostTracker().trackUsage('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens);
|
|
2443
2514
|
// Extract tool calls from the output
|
|
2444
2515
|
const toolCalls = response.output
|
|
2445
2516
|
?.filter((item) => item.type === 'function_call')
|
|
@@ -2480,7 +2551,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
|
|
|
2480
2551
|
reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
|
|
2481
2552
|
provider: 'openai',
|
|
2482
2553
|
model: normalizedModel,
|
|
2483
|
-
|
|
2554
|
+
cached_tokens: responsesCachedTokens,
|
|
2555
|
+
cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
|
|
2484
2556
|
},
|
|
2485
2557
|
tool_calls: toolCalls,
|
|
2486
2558
|
...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
|
|
@@ -2512,7 +2584,8 @@ const makeResponsesAPICall = async (input, options = {}) => {
|
|
|
2512
2584
|
reasoning_tokens: response.usage?.output_tokens_details?.reasoning_tokens || 0,
|
|
2513
2585
|
provider: 'openai',
|
|
2514
2586
|
model: normalizedModel,
|
|
2515
|
-
|
|
2587
|
+
cached_tokens: responsesCachedTokens,
|
|
2588
|
+
cost: calculateCost('openai', normalizedModel, response.usage?.input_tokens || 0, response.usage?.output_tokens || 0, response.usage?.output_tokens_details?.reasoning_tokens || 0, responsesCachedTokens),
|
|
2516
2589
|
},
|
|
2517
2590
|
tool_calls: toolCalls,
|
|
2518
2591
|
...(codeInterpreterOutputs ? { code_interpreter_outputs: codeInterpreterOutputs } : {}),
|
|
@@ -7942,7 +8015,12 @@ function translateContextToAnthropic(context) {
|
|
|
7942
8015
|
/** Convert string or content block array to a uniform content block array. */
|
|
7943
8016
|
function toContentBlocks(content) {
|
|
7944
8017
|
if (typeof content === 'string') {
|
|
7945
|
-
|
|
8018
|
+
const textBlock = {
|
|
8019
|
+
type: 'text',
|
|
8020
|
+
text: content,
|
|
8021
|
+
citations: null,
|
|
8022
|
+
};
|
|
8023
|
+
return [textBlock];
|
|
7946
8024
|
}
|
|
7947
8025
|
return content;
|
|
7948
8026
|
}
|
|
@@ -8699,14 +8777,25 @@ async function createDeepseekCompletion(content, responseFormat, options = {}) {
|
|
|
8699
8777
|
maxDelayMs: 30000,
|
|
8700
8778
|
retryableErrors: isRetryableDeepseekError,
|
|
8701
8779
|
}, `Deepseek:${normalizedModel}`);
|
|
8780
|
+
// DeepSeek surfaces cached input tokens in two places on the usage object:
|
|
8781
|
+
// - `prompt_cache_hit_tokens` (DeepSeek-native field, see
|
|
8782
|
+
// https://api-docs.deepseek.com/guides/kv_cache)
|
|
8783
|
+
// - `prompt_tokens_details.cached_tokens` (OpenAI-compatible alias)
|
|
8784
|
+
// Prefer the OpenAI-compatible name so a single canonical field works for
|
|
8785
|
+
// both providers; fall back to the DeepSeek-native name if absent.
|
|
8786
|
+
const usageRaw = completion.usage;
|
|
8787
|
+
const cachedTokens = usageRaw?.prompt_tokens_details?.cached_tokens ??
|
|
8788
|
+
usageRaw?.prompt_cache_hit_tokens ??
|
|
8789
|
+
0;
|
|
8702
8790
|
return {
|
|
8703
8791
|
id: completion.id,
|
|
8704
8792
|
content: completion.choices[0]?.message?.content || '',
|
|
8705
8793
|
tool_calls: completion.choices[0]?.message?.tool_calls,
|
|
8706
|
-
usage:
|
|
8707
|
-
prompt_tokens: 0,
|
|
8708
|
-
completion_tokens: 0,
|
|
8709
|
-
total_tokens: 0,
|
|
8794
|
+
usage: {
|
|
8795
|
+
prompt_tokens: completion.usage?.prompt_tokens ?? 0,
|
|
8796
|
+
completion_tokens: completion.usage?.completion_tokens ?? 0,
|
|
8797
|
+
total_tokens: completion.usage?.total_tokens ?? 0,
|
|
8798
|
+
cached_tokens: cachedTokens,
|
|
8710
8799
|
},
|
|
8711
8800
|
system_fingerprint: completion.system_fingerprint,
|
|
8712
8801
|
provider: 'deepseek',
|
|
@@ -8748,7 +8837,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8748
8837
|
reasoning_tokens: 0,
|
|
8749
8838
|
provider: 'deepseek',
|
|
8750
8839
|
model: modelName,
|
|
8751
|
-
|
|
8840
|
+
cached_tokens: 0,
|
|
8752
8841
|
cost: 0,
|
|
8753
8842
|
},
|
|
8754
8843
|
tool_calls: undefined,
|
|
@@ -8767,7 +8856,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8767
8856
|
reasoning_tokens: 0,
|
|
8768
8857
|
provider: 'deepseek',
|
|
8769
8858
|
model: modelName,
|
|
8770
|
-
|
|
8859
|
+
cached_tokens: 0,
|
|
8771
8860
|
cost: 0,
|
|
8772
8861
|
},
|
|
8773
8862
|
tool_calls: undefined,
|
|
@@ -8775,8 +8864,9 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8775
8864
|
}
|
|
8776
8865
|
try {
|
|
8777
8866
|
const completion = await createDeepseekCompletion(content, responseFormat, mergedOptions);
|
|
8778
|
-
// Track cost in the global cost tracker
|
|
8779
|
-
|
|
8867
|
+
// Track cost in the global cost tracker. Pass cached tokens through so the
|
|
8868
|
+
// discounted cached-input pricing tier is applied.
|
|
8869
|
+
getLLMCostTracker().trackUsage('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens);
|
|
8780
8870
|
// Handle tool calls similarly to OpenAI
|
|
8781
8871
|
if (completion.tool_calls && completion.tool_calls.length > 0) {
|
|
8782
8872
|
const toolCallResponse = {
|
|
@@ -8794,9 +8884,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8794
8884
|
reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
|
|
8795
8885
|
provider: 'deepseek',
|
|
8796
8886
|
model: completion.model,
|
|
8797
|
-
|
|
8798
|
-
cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0,
|
|
8799
|
-
),
|
|
8887
|
+
cached_tokens: completion.usage.cached_tokens,
|
|
8888
|
+
cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
|
|
8800
8889
|
},
|
|
8801
8890
|
tool_calls: completion.tool_calls,
|
|
8802
8891
|
};
|
|
@@ -8814,9 +8903,8 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8814
8903
|
reasoning_tokens: 0, // Deepseek doesn't provide reasoning tokens separately
|
|
8815
8904
|
provider: 'deepseek',
|
|
8816
8905
|
model: completion.model,
|
|
8817
|
-
|
|
8818
|
-
cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0,
|
|
8819
|
-
),
|
|
8906
|
+
cached_tokens: completion.usage.cached_tokens,
|
|
8907
|
+
cost: calculateCost('deepseek', completion.model, completion.usage.prompt_tokens, completion.usage.completion_tokens, 0, completion.usage.cached_tokens),
|
|
8820
8908
|
},
|
|
8821
8909
|
tool_calls: completion.tool_calls,
|
|
8822
8910
|
};
|
|
@@ -8834,7 +8922,7 @@ const makeDeepseekCall = async (content, responseFormat = 'json', options = {})
|
|
|
8834
8922
|
reasoning_tokens: 0,
|
|
8835
8923
|
provider: 'deepseek',
|
|
8836
8924
|
model: modelName,
|
|
8837
|
-
|
|
8925
|
+
cached_tokens: 0,
|
|
8838
8926
|
cost: 0,
|
|
8839
8927
|
},
|
|
8840
8928
|
tool_calls: undefined,
|
|
@@ -22733,11 +22821,11 @@ let poolConfig = DEFAULT_POOL_CONFIG;
|
|
|
22733
22821
|
async function loadApolloModules() {
|
|
22734
22822
|
if (typeof window === "undefined" || process.env.AWS_EXECUTION_ENV) {
|
|
22735
22823
|
// Server-side (or Lambda): load the CommonJS‑based implementation.
|
|
22736
|
-
return (await Promise.resolve().then(function () { return require('./apollo-client.server-
|
|
22824
|
+
return (await Promise.resolve().then(function () { return require('./apollo-client.server-Djh4v__C.js'); }));
|
|
22737
22825
|
}
|
|
22738
22826
|
else {
|
|
22739
22827
|
// Client-side: load the ESM‑based implementation.
|
|
22740
|
-
return (await Promise.resolve().then(function () { return require('./apollo-client.client-
|
|
22828
|
+
return (await Promise.resolve().then(function () { return require('./apollo-client.client-DVsbR05r.js'); }));
|
|
22741
22829
|
}
|
|
22742
22830
|
}
|
|
22743
22831
|
/**
|
|
@@ -81448,4 +81536,4 @@ exports.withCorrelationId = withCorrelationId;
|
|
|
81448
81536
|
exports.withMetrics = withMetrics;
|
|
81449
81537
|
exports.withRateLimit = withRateLimit;
|
|
81450
81538
|
exports.withRetry = withRetry;
|
|
81451
|
-
//# sourceMappingURL=index-
|
|
81539
|
+
//# sourceMappingURL=index-BVl0tRmx.js.map
|