@hyperspaceng/neural-ai 0.69.1 → 0.70.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/env-api-keys.d.ts +9 -0
- package/dist/env-api-keys.d.ts.map +1 -1
- package/dist/env-api-keys.js +42 -31
- package/dist/env-api-keys.js.map +1 -1
- package/dist/models.d.ts +2 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +298 -195
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +291 -200
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +7 -2
- package/dist/models.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +35 -15
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/azure-openai-responses.d.ts.map +1 -1
- package/dist/providers/azure-openai-responses.js +7 -4
- package/dist/providers/azure-openai-responses.js.map +1 -1
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js +34 -13
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +9 -8
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +97 -45
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +26 -23
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +2 -0
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/types.d.ts +35 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -78,20 +78,30 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
78
78
|
if (nextParams !== undefined) {
|
|
79
79
|
params = nextParams;
|
|
80
80
|
}
|
|
81
|
+
const requestOptions = {
|
|
82
|
+
...(options?.signal ? { signal: options.signal } : {}),
|
|
83
|
+
...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
|
|
84
|
+
...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),
|
|
85
|
+
};
|
|
81
86
|
const { data: openaiStream, response } = await client.chat.completions
|
|
82
|
-
.create(params,
|
|
87
|
+
.create(params, requestOptions)
|
|
83
88
|
.withResponse();
|
|
84
89
|
await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
|
|
85
90
|
stream.push({ type: "start", partial: output });
|
|
86
91
|
let currentBlock = null;
|
|
87
92
|
const blocks = output.content;
|
|
88
|
-
const
|
|
93
|
+
const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
|
|
94
|
+
const currentContentIndex = () => getContentIndex(currentBlock);
|
|
89
95
|
const finishCurrentBlock = (block) => {
|
|
90
96
|
if (block) {
|
|
97
|
+
const contentIndex = getContentIndex(block);
|
|
98
|
+
if (contentIndex === -1) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
91
101
|
if (block.type === "text") {
|
|
92
102
|
stream.push({
|
|
93
103
|
type: "text_end",
|
|
94
|
-
contentIndex
|
|
104
|
+
contentIndex,
|
|
95
105
|
content: block.text,
|
|
96
106
|
partial: output,
|
|
97
107
|
});
|
|
@@ -99,19 +109,20 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
99
109
|
else if (block.type === "thinking") {
|
|
100
110
|
stream.push({
|
|
101
111
|
type: "thinking_end",
|
|
102
|
-
contentIndex
|
|
112
|
+
contentIndex,
|
|
103
113
|
content: block.thinking,
|
|
104
114
|
partial: output,
|
|
105
115
|
});
|
|
106
116
|
}
|
|
107
117
|
else if (block.type === "toolCall") {
|
|
108
118
|
block.arguments = parseStreamingJson(block.partialArgs);
|
|
109
|
-
// Finalize in-place and strip the scratch
|
|
119
|
+
// Finalize in-place and strip the scratch buffers so replay only
|
|
110
120
|
// carries parsed arguments.
|
|
111
121
|
delete block.partialArgs;
|
|
122
|
+
delete block.streamIndex;
|
|
112
123
|
stream.push({
|
|
113
124
|
type: "toolcall_end",
|
|
114
|
-
contentIndex
|
|
125
|
+
contentIndex,
|
|
115
126
|
toolCall: block,
|
|
116
127
|
partial: output,
|
|
117
128
|
});
|
|
@@ -150,13 +161,13 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
150
161
|
finishCurrentBlock(currentBlock);
|
|
151
162
|
currentBlock = { type: "text", text: "" };
|
|
152
163
|
output.content.push(currentBlock);
|
|
153
|
-
stream.push({ type: "text_start", contentIndex:
|
|
164
|
+
stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
|
|
154
165
|
}
|
|
155
166
|
if (currentBlock.type === "text") {
|
|
156
167
|
currentBlock.text += choice.delta.content;
|
|
157
168
|
stream.push({
|
|
158
169
|
type: "text_delta",
|
|
159
|
-
contentIndex:
|
|
170
|
+
contentIndex: currentContentIndex(),
|
|
160
171
|
delta: choice.delta.content,
|
|
161
172
|
partial: output,
|
|
162
173
|
});
|
|
@@ -187,14 +198,14 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
187
198
|
thinkingSignature: foundReasoningField,
|
|
188
199
|
};
|
|
189
200
|
output.content.push(currentBlock);
|
|
190
|
-
stream.push({ type: "thinking_start", contentIndex:
|
|
201
|
+
stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
|
|
191
202
|
}
|
|
192
203
|
if (currentBlock.type === "thinking") {
|
|
193
204
|
const delta = choice.delta[foundReasoningField];
|
|
194
205
|
currentBlock.thinking += delta;
|
|
195
206
|
stream.push({
|
|
196
207
|
type: "thinking_delta",
|
|
197
|
-
contentIndex:
|
|
208
|
+
contentIndex: currentContentIndex(),
|
|
198
209
|
delta,
|
|
199
210
|
partial: output,
|
|
200
211
|
});
|
|
@@ -202,9 +213,11 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
202
213
|
}
|
|
203
214
|
if (choice?.delta?.tool_calls) {
|
|
204
215
|
for (const toolCall of choice.delta.tool_calls) {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
(
|
|
216
|
+
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
217
|
+
const sameToolCall = currentBlock?.type === "toolCall" &&
|
|
218
|
+
((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
|
|
219
|
+
(streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
|
|
220
|
+
if (!sameToolCall) {
|
|
208
221
|
finishCurrentBlock(currentBlock);
|
|
209
222
|
currentBlock = {
|
|
210
223
|
type: "toolCall",
|
|
@@ -212,24 +225,34 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
212
225
|
name: toolCall.function?.name || "",
|
|
213
226
|
arguments: {},
|
|
214
227
|
partialArgs: "",
|
|
228
|
+
streamIndex,
|
|
215
229
|
};
|
|
216
230
|
output.content.push(currentBlock);
|
|
217
|
-
stream.push({
|
|
231
|
+
stream.push({
|
|
232
|
+
type: "toolcall_start",
|
|
233
|
+
contentIndex: getContentIndex(currentBlock),
|
|
234
|
+
partial: output,
|
|
235
|
+
});
|
|
218
236
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
237
|
+
const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
|
|
238
|
+
if (currentToolCallBlock) {
|
|
239
|
+
if (!currentToolCallBlock.id && toolCall.id)
|
|
240
|
+
currentToolCallBlock.id = toolCall.id;
|
|
241
|
+
if (!currentToolCallBlock.name && toolCall.function?.name) {
|
|
242
|
+
currentToolCallBlock.name = toolCall.function.name;
|
|
243
|
+
}
|
|
244
|
+
if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
|
|
245
|
+
currentToolCallBlock.streamIndex = streamIndex;
|
|
246
|
+
}
|
|
224
247
|
let delta = "";
|
|
225
248
|
if (toolCall.function?.arguments) {
|
|
226
249
|
delta = toolCall.function.arguments;
|
|
227
|
-
|
|
228
|
-
|
|
250
|
+
currentToolCallBlock.partialArgs += toolCall.function.arguments;
|
|
251
|
+
currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
|
|
229
252
|
}
|
|
230
253
|
stream.push({
|
|
231
254
|
type: "toolcall_delta",
|
|
232
|
-
contentIndex:
|
|
255
|
+
contentIndex: getContentIndex(currentToolCallBlock),
|
|
233
256
|
delta,
|
|
234
257
|
partial: output,
|
|
235
258
|
});
|
|
@@ -265,8 +288,9 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
265
288
|
catch (error) {
|
|
266
289
|
for (const block of output.content) {
|
|
267
290
|
delete block.index;
|
|
268
|
-
//
|
|
291
|
+
// Streaming scratch buffers are only used during parsing; never persist them.
|
|
269
292
|
delete block.partialArgs;
|
|
293
|
+
delete block.streamIndex;
|
|
270
294
|
}
|
|
271
295
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
272
296
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
@@ -328,13 +352,16 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
|
|
|
328
352
|
}
|
|
329
353
|
function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
|
|
330
354
|
const messages = convertMessages(model, context, compat);
|
|
331
|
-
const cacheControl = getCompatCacheControl(
|
|
355
|
+
const cacheControl = getCompatCacheControl(compat, cacheRetention);
|
|
332
356
|
const params = {
|
|
333
357
|
model: model.id,
|
|
334
358
|
messages,
|
|
335
359
|
stream: true,
|
|
336
|
-
prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none"
|
|
337
|
-
|
|
360
|
+
prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
|
|
361
|
+
(cacheRetention === "long" && compat.supportsLongCacheRetention)
|
|
362
|
+
? options?.sessionId
|
|
363
|
+
: undefined,
|
|
364
|
+
prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
|
|
338
365
|
};
|
|
339
366
|
if (compat.supportsUsageInStreaming !== false) {
|
|
340
367
|
params.stream_options = { include_usage: true };
|
|
@@ -353,7 +380,7 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
|
|
|
353
380
|
if (options?.temperature !== undefined) {
|
|
354
381
|
params.temperature = options.temperature;
|
|
355
382
|
}
|
|
356
|
-
if (context.tools) {
|
|
383
|
+
if (context.tools && context.tools.length > 0) {
|
|
357
384
|
params.tools = convertTools(context.tools, compat);
|
|
358
385
|
if (compat.zaiToolStream) {
|
|
359
386
|
params.tool_stream = true;
|
|
@@ -381,6 +408,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
|
|
|
381
408
|
preserve_thinking: true,
|
|
382
409
|
};
|
|
383
410
|
}
|
|
411
|
+
else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
|
|
412
|
+
params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
|
413
|
+
if (options?.reasoningEffort) {
|
|
414
|
+
params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
384
417
|
else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
385
418
|
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
386
419
|
const openRouterParams = params;
|
|
@@ -418,11 +451,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
|
|
|
418
451
|
function mapReasoningEffort(effort, reasoningEffortMap) {
|
|
419
452
|
return reasoningEffortMap[effort] ?? effort;
|
|
420
453
|
}
|
|
421
|
-
function getCompatCacheControl(
|
|
454
|
+
function getCompatCacheControl(compat, cacheRetention) {
|
|
422
455
|
if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
|
|
423
456
|
return undefined;
|
|
424
457
|
}
|
|
425
|
-
const ttl = cacheRetention === "long" &&
|
|
458
|
+
const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
|
|
426
459
|
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
|
427
460
|
}
|
|
428
461
|
function applyAnthropicCacheControl(messages, tools, cacheControl) {
|
|
@@ -631,6 +664,11 @@ export function convertMessages(model, context, compat) {
|
|
|
631
664
|
assistantMsg.reasoning_details = reasoningDetails;
|
|
632
665
|
}
|
|
633
666
|
}
|
|
667
|
+
if (compat.requiresReasoningContentOnAssistantMessages &&
|
|
668
|
+
model.reasoning &&
|
|
669
|
+
assistantMsg.reasoning_content === undefined) {
|
|
670
|
+
assistantMsg.reasoning_content = "";
|
|
671
|
+
}
|
|
634
672
|
// Skip assistant messages that have no content and no tool calls.
|
|
635
673
|
// Some providers require "either content or tool_calls, but not none".
|
|
636
674
|
// Other providers also don't accept empty assistant messages.
|
|
@@ -725,7 +763,6 @@ function parseChunkUsage(rawUsage, model) {
|
|
|
725
763
|
const promptTokens = rawUsage.prompt_tokens || 0;
|
|
726
764
|
const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
|
|
727
765
|
const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
|
|
728
|
-
const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
|
|
729
766
|
// Normalize to pi-ai semantics:
|
|
730
767
|
// - cacheRead: hits from cache created by previous requests only
|
|
731
768
|
// - cacheWrite: tokens written to cache in this request
|
|
@@ -733,9 +770,8 @@ function parseChunkUsage(rawUsage, model) {
|
|
|
733
770
|
// as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
|
|
734
771
|
const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
|
|
735
772
|
const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
|
|
736
|
-
//
|
|
737
|
-
|
|
738
|
-
const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
|
|
773
|
+
// OpenAI completion_tokens already includes reasoning_tokens.
|
|
774
|
+
const outputTokens = rawUsage.completion_tokens || 0;
|
|
739
775
|
const usage = {
|
|
740
776
|
input,
|
|
741
777
|
output: outputTokens,
|
|
@@ -791,16 +827,25 @@ function detectCompat(model) {
|
|
|
791
827
|
const useMaxTokens = baseUrl.includes("chutes.ai");
|
|
792
828
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
793
829
|
const isGroq = provider === "groq" || baseUrl.includes("groq.com");
|
|
830
|
+
const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
|
|
794
831
|
const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
|
|
795
|
-
const reasoningEffortMap =
|
|
832
|
+
const reasoningEffortMap = isDeepSeek
|
|
796
833
|
? {
|
|
797
|
-
minimal: "
|
|
798
|
-
low: "
|
|
799
|
-
medium: "
|
|
800
|
-
high: "
|
|
801
|
-
xhigh: "
|
|
834
|
+
minimal: "high",
|
|
835
|
+
low: "high",
|
|
836
|
+
medium: "high",
|
|
837
|
+
high: "high",
|
|
838
|
+
xhigh: "max",
|
|
802
839
|
}
|
|
803
|
-
:
|
|
840
|
+
: isGroq && model.id === "qwen/qwen3-32b"
|
|
841
|
+
? {
|
|
842
|
+
minimal: "default",
|
|
843
|
+
low: "default",
|
|
844
|
+
medium: "default",
|
|
845
|
+
high: "default",
|
|
846
|
+
xhigh: "default",
|
|
847
|
+
}
|
|
848
|
+
: {};
|
|
804
849
|
return {
|
|
805
850
|
supportsStore: !isNonStandard,
|
|
806
851
|
supportsDeveloperRole: !isNonStandard,
|
|
@@ -811,17 +856,21 @@ function detectCompat(model) {
|
|
|
811
856
|
requiresToolResultName: false,
|
|
812
857
|
requiresAssistantAfterToolResult: false,
|
|
813
858
|
requiresThinkingAsText: false,
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
859
|
+
requiresReasoningContentOnAssistantMessages: isDeepSeek,
|
|
860
|
+
thinkingFormat: isDeepSeek
|
|
861
|
+
? "deepseek"
|
|
862
|
+
: isZai
|
|
863
|
+
? "zai"
|
|
864
|
+
: provider === "openrouter" || baseUrl.includes("openrouter.ai")
|
|
865
|
+
? "openrouter"
|
|
866
|
+
: "openai",
|
|
819
867
|
openRouterRouting: {},
|
|
820
868
|
vercelGatewayRouting: {},
|
|
821
869
|
zaiToolStream: false,
|
|
822
870
|
supportsStrictMode: true,
|
|
823
871
|
cacheControlFormat,
|
|
824
872
|
sendSessionAffinityHeaders: false,
|
|
873
|
+
supportsLongCacheRetention: true,
|
|
825
874
|
};
|
|
826
875
|
}
|
|
827
876
|
/**
|
|
@@ -842,6 +891,8 @@ function getCompat(model) {
|
|
|
842
891
|
requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
|
|
843
892
|
requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
|
|
844
893
|
requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
|
894
|
+
requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
|
|
895
|
+
detected.requiresReasoningContentOnAssistantMessages,
|
|
845
896
|
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
|
846
897
|
openRouterRouting: model.compat.openRouterRouting ?? {},
|
|
847
898
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
@@ -849,6 +900,7 @@ function getCompat(model) {
|
|
|
849
900
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
850
901
|
cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
|
|
851
902
|
sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
|
|
903
|
+
supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
|
|
852
904
|
};
|
|
853
905
|
}
|
|
854
906
|
//# sourceMappingURL=openai-completions.js.map
|