@hyperspaceng/neural-ai 0.69.1 → 0.70.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +4 -1
  2. package/dist/env-api-keys.d.ts +9 -0
  3. package/dist/env-api-keys.d.ts.map +1 -1
  4. package/dist/env-api-keys.js +42 -31
  5. package/dist/env-api-keys.js.map +1 -1
  6. package/dist/models.d.ts +2 -1
  7. package/dist/models.d.ts.map +1 -1
  8. package/dist/models.generated.d.ts +298 -195
  9. package/dist/models.generated.d.ts.map +1 -1
  10. package/dist/models.generated.js +291 -200
  11. package/dist/models.generated.js.map +1 -1
  12. package/dist/models.js +7 -2
  13. package/dist/models.js.map +1 -1
  14. package/dist/providers/anthropic.d.ts.map +1 -1
  15. package/dist/providers/anthropic.js +35 -15
  16. package/dist/providers/anthropic.js.map +1 -1
  17. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  18. package/dist/providers/azure-openai-responses.js +7 -4
  19. package/dist/providers/azure-openai-responses.js.map +1 -1
  20. package/dist/providers/google-vertex.d.ts.map +1 -1
  21. package/dist/providers/google-vertex.js +34 -13
  22. package/dist/providers/google-vertex.js.map +1 -1
  23. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  24. package/dist/providers/openai-codex-responses.js +9 -8
  25. package/dist/providers/openai-codex-responses.js.map +1 -1
  26. package/dist/providers/openai-completions.d.ts.map +1 -1
  27. package/dist/providers/openai-completions.js +97 -45
  28. package/dist/providers/openai-completions.js.map +1 -1
  29. package/dist/providers/openai-responses.d.ts.map +1 -1
  30. package/dist/providers/openai-responses.js +26 -23
  31. package/dist/providers/openai-responses.js.map +1 -1
  32. package/dist/providers/simple-options.d.ts.map +1 -1
  33. package/dist/providers/simple-options.js +2 -0
  34. package/dist/providers/simple-options.js.map +1 -1
  35. package/dist/types.d.ts +35 -4
  36. package/dist/types.d.ts.map +1 -1
  37. package/dist/types.js.map +1 -1
  38. package/package.json +1 -1
@@ -78,20 +78,30 @@ export const streamOpenAICompletions = (model, context, options) => {
78
78
  if (nextParams !== undefined) {
79
79
  params = nextParams;
80
80
  }
81
+ const requestOptions = {
82
+ ...(options?.signal ? { signal: options.signal } : {}),
83
+ ...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
84
+ ...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),
85
+ };
81
86
  const { data: openaiStream, response } = await client.chat.completions
82
- .create(params, { signal: options?.signal })
87
+ .create(params, requestOptions)
83
88
  .withResponse();
84
89
  await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
85
90
  stream.push({ type: "start", partial: output });
86
91
  let currentBlock = null;
87
92
  const blocks = output.content;
88
- const blockIndex = () => blocks.length - 1;
93
+ const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
94
+ const currentContentIndex = () => getContentIndex(currentBlock);
89
95
  const finishCurrentBlock = (block) => {
90
96
  if (block) {
97
+ const contentIndex = getContentIndex(block);
98
+ if (contentIndex === -1) {
99
+ return;
100
+ }
91
101
  if (block.type === "text") {
92
102
  stream.push({
93
103
  type: "text_end",
94
- contentIndex: blockIndex(),
104
+ contentIndex,
95
105
  content: block.text,
96
106
  partial: output,
97
107
  });
@@ -99,19 +109,20 @@ export const streamOpenAICompletions = (model, context, options) => {
99
109
  else if (block.type === "thinking") {
100
110
  stream.push({
101
111
  type: "thinking_end",
102
- contentIndex: blockIndex(),
112
+ contentIndex,
103
113
  content: block.thinking,
104
114
  partial: output,
105
115
  });
106
116
  }
107
117
  else if (block.type === "toolCall") {
108
118
  block.arguments = parseStreamingJson(block.partialArgs);
109
- // Finalize in-place and strip the scratch buffer so replay only
119
+ // Finalize in-place and strip the scratch buffers so replay only
110
120
  // carries parsed arguments.
111
121
  delete block.partialArgs;
122
+ delete block.streamIndex;
112
123
  stream.push({
113
124
  type: "toolcall_end",
114
- contentIndex: blockIndex(),
125
+ contentIndex,
115
126
  toolCall: block,
116
127
  partial: output,
117
128
  });
@@ -150,13 +161,13 @@ export const streamOpenAICompletions = (model, context, options) => {
150
161
  finishCurrentBlock(currentBlock);
151
162
  currentBlock = { type: "text", text: "" };
152
163
  output.content.push(currentBlock);
153
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
164
+ stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
154
165
  }
155
166
  if (currentBlock.type === "text") {
156
167
  currentBlock.text += choice.delta.content;
157
168
  stream.push({
158
169
  type: "text_delta",
159
- contentIndex: blockIndex(),
170
+ contentIndex: currentContentIndex(),
160
171
  delta: choice.delta.content,
161
172
  partial: output,
162
173
  });
@@ -187,14 +198,14 @@ export const streamOpenAICompletions = (model, context, options) => {
187
198
  thinkingSignature: foundReasoningField,
188
199
  };
189
200
  output.content.push(currentBlock);
190
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
201
+ stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
191
202
  }
192
203
  if (currentBlock.type === "thinking") {
193
204
  const delta = choice.delta[foundReasoningField];
194
205
  currentBlock.thinking += delta;
195
206
  stream.push({
196
207
  type: "thinking_delta",
197
- contentIndex: blockIndex(),
208
+ contentIndex: currentContentIndex(),
198
209
  delta,
199
210
  partial: output,
200
211
  });
@@ -202,9 +213,11 @@ export const streamOpenAICompletions = (model, context, options) => {
202
213
  }
203
214
  if (choice?.delta?.tool_calls) {
204
215
  for (const toolCall of choice.delta.tool_calls) {
205
- if (!currentBlock ||
206
- currentBlock.type !== "toolCall" ||
207
- (toolCall.id && currentBlock.id !== toolCall.id)) {
216
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
217
+ const sameToolCall = currentBlock?.type === "toolCall" &&
218
+ ((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
219
+ (streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
220
+ if (!sameToolCall) {
208
221
  finishCurrentBlock(currentBlock);
209
222
  currentBlock = {
210
223
  type: "toolCall",
@@ -212,24 +225,34 @@ export const streamOpenAICompletions = (model, context, options) => {
212
225
  name: toolCall.function?.name || "",
213
226
  arguments: {},
214
227
  partialArgs: "",
228
+ streamIndex,
215
229
  };
216
230
  output.content.push(currentBlock);
217
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
231
+ stream.push({
232
+ type: "toolcall_start",
233
+ contentIndex: getContentIndex(currentBlock),
234
+ partial: output,
235
+ });
218
236
  }
219
- if (currentBlock.type === "toolCall") {
220
- if (toolCall.id)
221
- currentBlock.id = toolCall.id;
222
- if (toolCall.function?.name)
223
- currentBlock.name = toolCall.function.name;
237
+ const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
238
+ if (currentToolCallBlock) {
239
+ if (!currentToolCallBlock.id && toolCall.id)
240
+ currentToolCallBlock.id = toolCall.id;
241
+ if (!currentToolCallBlock.name && toolCall.function?.name) {
242
+ currentToolCallBlock.name = toolCall.function.name;
243
+ }
244
+ if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
245
+ currentToolCallBlock.streamIndex = streamIndex;
246
+ }
224
247
  let delta = "";
225
248
  if (toolCall.function?.arguments) {
226
249
  delta = toolCall.function.arguments;
227
- currentBlock.partialArgs += toolCall.function.arguments;
228
- currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
250
+ currentToolCallBlock.partialArgs += toolCall.function.arguments;
251
+ currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
229
252
  }
230
253
  stream.push({
231
254
  type: "toolcall_delta",
232
- contentIndex: blockIndex(),
255
+ contentIndex: getContentIndex(currentToolCallBlock),
233
256
  delta,
234
257
  partial: output,
235
258
  });
@@ -265,8 +288,9 @@ export const streamOpenAICompletions = (model, context, options) => {
265
288
  catch (error) {
266
289
  for (const block of output.content) {
267
290
  delete block.index;
268
- // partialArgs is only a streaming scratch buffer; never persist it.
291
+ // Streaming scratch buffers are only used during parsing; never persist them.
269
292
  delete block.partialArgs;
293
+ delete block.streamIndex;
270
294
  }
271
295
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
272
296
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
@@ -328,13 +352,16 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
328
352
  }
329
353
  function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
330
354
  const messages = convertMessages(model, context, compat);
331
- const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
355
+ const cacheControl = getCompatCacheControl(compat, cacheRetention);
332
356
  const params = {
333
357
  model: model.id,
334
358
  messages,
335
359
  stream: true,
336
- prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
337
- prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
360
+ prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
361
+ (cacheRetention === "long" && compat.supportsLongCacheRetention)
362
+ ? options?.sessionId
363
+ : undefined,
364
+ prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
338
365
  };
339
366
  if (compat.supportsUsageInStreaming !== false) {
340
367
  params.stream_options = { include_usage: true };
@@ -353,7 +380,7 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
353
380
  if (options?.temperature !== undefined) {
354
381
  params.temperature = options.temperature;
355
382
  }
356
- if (context.tools) {
383
+ if (context.tools && context.tools.length > 0) {
357
384
  params.tools = convertTools(context.tools, compat);
358
385
  if (compat.zaiToolStream) {
359
386
  params.tool_stream = true;
@@ -381,6 +408,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
381
408
  preserve_thinking: true,
382
409
  };
383
410
  }
411
+ else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
412
+ params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
413
+ if (options?.reasoningEffort) {
414
+ params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
415
+ }
416
+ }
384
417
  else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
385
418
  // OpenRouter normalizes reasoning across providers via a nested reasoning object.
386
419
  const openRouterParams = params;
@@ -418,11 +451,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
418
451
  function mapReasoningEffort(effort, reasoningEffortMap) {
419
452
  return reasoningEffortMap[effort] ?? effort;
420
453
  }
421
- function getCompatCacheControl(model, compat, cacheRetention) {
454
+ function getCompatCacheControl(compat, cacheRetention) {
422
455
  if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
423
456
  return undefined;
424
457
  }
425
- const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
458
+ const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
426
459
  return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
427
460
  }
428
461
  function applyAnthropicCacheControl(messages, tools, cacheControl) {
@@ -631,6 +664,11 @@ export function convertMessages(model, context, compat) {
631
664
  assistantMsg.reasoning_details = reasoningDetails;
632
665
  }
633
666
  }
667
+ if (compat.requiresReasoningContentOnAssistantMessages &&
668
+ model.reasoning &&
669
+ assistantMsg.reasoning_content === undefined) {
670
+ assistantMsg.reasoning_content = "";
671
+ }
634
672
  // Skip assistant messages that have no content and no tool calls.
635
673
  // Some providers require "either content or tool_calls, but not none".
636
674
  // Other providers also don't accept empty assistant messages.
@@ -725,7 +763,6 @@ function parseChunkUsage(rawUsage, model) {
725
763
  const promptTokens = rawUsage.prompt_tokens || 0;
726
764
  const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
727
765
  const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
728
- const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
729
766
  // Normalize to pi-ai semantics:
730
767
  // - cacheRead: hits from cache created by previous requests only
731
768
  // - cacheWrite: tokens written to cache in this request
@@ -733,9 +770,8 @@ function parseChunkUsage(rawUsage, model) {
733
770
  // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
734
771
  const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
735
772
  const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
736
- // Compute totalTokens ourselves since we add reasoning_tokens to output
737
- // and some providers (e.g., Groq) don't include them in total_tokens
738
- const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
773
+ // OpenAI completion_tokens already includes reasoning_tokens.
774
+ const outputTokens = rawUsage.completion_tokens || 0;
739
775
  const usage = {
740
776
  input,
741
777
  output: outputTokens,
@@ -791,16 +827,25 @@ function detectCompat(model) {
791
827
  const useMaxTokens = baseUrl.includes("chutes.ai");
792
828
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
793
829
  const isGroq = provider === "groq" || baseUrl.includes("groq.com");
830
+ const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
794
831
  const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
795
- const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
832
+ const reasoningEffortMap = isDeepSeek
796
833
  ? {
797
- minimal: "default",
798
- low: "default",
799
- medium: "default",
800
- high: "default",
801
- xhigh: "default",
834
+ minimal: "high",
835
+ low: "high",
836
+ medium: "high",
837
+ high: "high",
838
+ xhigh: "max",
802
839
  }
803
- : {};
840
+ : isGroq && model.id === "qwen/qwen3-32b"
841
+ ? {
842
+ minimal: "default",
843
+ low: "default",
844
+ medium: "default",
845
+ high: "default",
846
+ xhigh: "default",
847
+ }
848
+ : {};
804
849
  return {
805
850
  supportsStore: !isNonStandard,
806
851
  supportsDeveloperRole: !isNonStandard,
@@ -811,17 +856,21 @@ function detectCompat(model) {
811
856
  requiresToolResultName: false,
812
857
  requiresAssistantAfterToolResult: false,
813
858
  requiresThinkingAsText: false,
814
- thinkingFormat: isZai
815
- ? "zai"
816
- : provider === "openrouter" || baseUrl.includes("openrouter.ai")
817
- ? "openrouter"
818
- : "openai",
859
+ requiresReasoningContentOnAssistantMessages: isDeepSeek,
860
+ thinkingFormat: isDeepSeek
861
+ ? "deepseek"
862
+ : isZai
863
+ ? "zai"
864
+ : provider === "openrouter" || baseUrl.includes("openrouter.ai")
865
+ ? "openrouter"
866
+ : "openai",
819
867
  openRouterRouting: {},
820
868
  vercelGatewayRouting: {},
821
869
  zaiToolStream: false,
822
870
  supportsStrictMode: true,
823
871
  cacheControlFormat,
824
872
  sendSessionAffinityHeaders: false,
873
+ supportsLongCacheRetention: true,
825
874
  };
826
875
  }
827
876
  /**
@@ -842,6 +891,8 @@ function getCompat(model) {
842
891
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
843
892
  requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
844
893
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
894
+ requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
895
+ detected.requiresReasoningContentOnAssistantMessages,
845
896
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
846
897
  openRouterRouting: model.compat.openRouterRouting ?? {},
847
898
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
@@ -849,6 +900,7 @@ function getCompat(model) {
849
900
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
850
901
  cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
851
902
  sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
903
+ supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
852
904
  };
853
905
  }
854
906
  //# sourceMappingURL=openai-completions.js.map