@mariozechner/pi-ai 0.69.0 → 0.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +4 -1
  2. package/dist/env-api-keys.d.ts +9 -0
  3. package/dist/env-api-keys.d.ts.map +1 -1
  4. package/dist/env-api-keys.js +42 -31
  5. package/dist/env-api-keys.js.map +1 -1
  6. package/dist/models.d.ts +1 -1
  7. package/dist/models.d.ts.map +1 -1
  8. package/dist/models.generated.d.ts +282 -19
  9. package/dist/models.generated.d.ts.map +1 -1
  10. package/dist/models.generated.js +278 -47
  11. package/dist/models.generated.js.map +1 -1
  12. package/dist/models.js +5 -2
  13. package/dist/models.js.map +1 -1
  14. package/dist/providers/anthropic.d.ts.map +1 -1
  15. package/dist/providers/anthropic.js +33 -12
  16. package/dist/providers/anthropic.js.map +1 -1
  17. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  18. package/dist/providers/azure-openai-responses.js +5 -1
  19. package/dist/providers/azure-openai-responses.js.map +1 -1
  20. package/dist/providers/google-vertex.d.ts.map +1 -1
  21. package/dist/providers/google-vertex.js +34 -13
  22. package/dist/providers/google-vertex.js.map +1 -1
  23. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  24. package/dist/providers/openai-codex-responses.js +8 -7
  25. package/dist/providers/openai-codex-responses.js.map +1 -1
  26. package/dist/providers/openai-completions.d.ts.map +1 -1
  27. package/dist/providers/openai-completions.js +95 -44
  28. package/dist/providers/openai-completions.js.map +1 -1
  29. package/dist/providers/openai-responses.d.ts.map +1 -1
  30. package/dist/providers/openai-responses.js +24 -20
  31. package/dist/providers/openai-responses.js.map +1 -1
  32. package/dist/providers/simple-options.d.ts.map +1 -1
  33. package/dist/providers/simple-options.js +2 -0
  34. package/dist/providers/simple-options.js.map +1 -1
  35. package/dist/types.d.ts +35 -4
  36. package/dist/types.d.ts.map +1 -1
  37. package/dist/types.js.map +1 -1
  38. package/package.json +1 -1
@@ -79,19 +79,28 @@ export const streamOpenAICompletions = (model, context, options) => {
79
79
  params = nextParams;
80
80
  }
81
81
  const { data: openaiStream, response } = await client.chat.completions
82
- .create(params, { signal: options?.signal })
82
+ .create(params, {
83
+ signal: options?.signal,
84
+ timeout: options?.timeoutMs,
85
+ maxRetries: options?.maxRetries,
86
+ })
83
87
  .withResponse();
84
88
  await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
85
89
  stream.push({ type: "start", partial: output });
86
90
  let currentBlock = null;
87
91
  const blocks = output.content;
88
- const blockIndex = () => blocks.length - 1;
92
+ const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
93
+ const currentContentIndex = () => getContentIndex(currentBlock);
89
94
  const finishCurrentBlock = (block) => {
90
95
  if (block) {
96
+ const contentIndex = getContentIndex(block);
97
+ if (contentIndex === -1) {
98
+ return;
99
+ }
91
100
  if (block.type === "text") {
92
101
  stream.push({
93
102
  type: "text_end",
94
- contentIndex: blockIndex(),
103
+ contentIndex,
95
104
  content: block.text,
96
105
  partial: output,
97
106
  });
@@ -99,19 +108,20 @@ export const streamOpenAICompletions = (model, context, options) => {
99
108
  else if (block.type === "thinking") {
100
109
  stream.push({
101
110
  type: "thinking_end",
102
- contentIndex: blockIndex(),
111
+ contentIndex,
103
112
  content: block.thinking,
104
113
  partial: output,
105
114
  });
106
115
  }
107
116
  else if (block.type === "toolCall") {
108
117
  block.arguments = parseStreamingJson(block.partialArgs);
109
- // Finalize in-place and strip the scratch buffer so replay only
118
+ // Finalize in-place and strip the scratch buffers so replay only
110
119
  // carries parsed arguments.
111
120
  delete block.partialArgs;
121
+ delete block.streamIndex;
112
122
  stream.push({
113
123
  type: "toolcall_end",
114
- contentIndex: blockIndex(),
124
+ contentIndex,
115
125
  toolCall: block,
116
126
  partial: output,
117
127
  });
@@ -150,13 +160,13 @@ export const streamOpenAICompletions = (model, context, options) => {
150
160
  finishCurrentBlock(currentBlock);
151
161
  currentBlock = { type: "text", text: "" };
152
162
  output.content.push(currentBlock);
153
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
163
+ stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
154
164
  }
155
165
  if (currentBlock.type === "text") {
156
166
  currentBlock.text += choice.delta.content;
157
167
  stream.push({
158
168
  type: "text_delta",
159
- contentIndex: blockIndex(),
169
+ contentIndex: currentContentIndex(),
160
170
  delta: choice.delta.content,
161
171
  partial: output,
162
172
  });
@@ -187,14 +197,14 @@ export const streamOpenAICompletions = (model, context, options) => {
187
197
  thinkingSignature: foundReasoningField,
188
198
  };
189
199
  output.content.push(currentBlock);
190
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
200
+ stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
191
201
  }
192
202
  if (currentBlock.type === "thinking") {
193
203
  const delta = choice.delta[foundReasoningField];
194
204
  currentBlock.thinking += delta;
195
205
  stream.push({
196
206
  type: "thinking_delta",
197
- contentIndex: blockIndex(),
207
+ contentIndex: currentContentIndex(),
198
208
  delta,
199
209
  partial: output,
200
210
  });
@@ -202,9 +212,11 @@ export const streamOpenAICompletions = (model, context, options) => {
202
212
  }
203
213
  if (choice?.delta?.tool_calls) {
204
214
  for (const toolCall of choice.delta.tool_calls) {
205
- if (!currentBlock ||
206
- currentBlock.type !== "toolCall" ||
207
- (toolCall.id && currentBlock.id !== toolCall.id)) {
215
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
216
+ const sameToolCall = currentBlock?.type === "toolCall" &&
217
+ ((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
218
+ (streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
219
+ if (!sameToolCall) {
208
220
  finishCurrentBlock(currentBlock);
209
221
  currentBlock = {
210
222
  type: "toolCall",
@@ -212,24 +224,34 @@ export const streamOpenAICompletions = (model, context, options) => {
212
224
  name: toolCall.function?.name || "",
213
225
  arguments: {},
214
226
  partialArgs: "",
227
+ streamIndex,
215
228
  };
216
229
  output.content.push(currentBlock);
217
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
230
+ stream.push({
231
+ type: "toolcall_start",
232
+ contentIndex: getContentIndex(currentBlock),
233
+ partial: output,
234
+ });
218
235
  }
219
- if (currentBlock.type === "toolCall") {
220
- if (toolCall.id)
221
- currentBlock.id = toolCall.id;
222
- if (toolCall.function?.name)
223
- currentBlock.name = toolCall.function.name;
236
+ const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
237
+ if (currentToolCallBlock) {
238
+ if (!currentToolCallBlock.id && toolCall.id)
239
+ currentToolCallBlock.id = toolCall.id;
240
+ if (!currentToolCallBlock.name && toolCall.function?.name) {
241
+ currentToolCallBlock.name = toolCall.function.name;
242
+ }
243
+ if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
244
+ currentToolCallBlock.streamIndex = streamIndex;
245
+ }
224
246
  let delta = "";
225
247
  if (toolCall.function?.arguments) {
226
248
  delta = toolCall.function.arguments;
227
- currentBlock.partialArgs += toolCall.function.arguments;
228
- currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
249
+ currentToolCallBlock.partialArgs += toolCall.function.arguments;
250
+ currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
229
251
  }
230
252
  stream.push({
231
253
  type: "toolcall_delta",
232
- contentIndex: blockIndex(),
254
+ contentIndex: getContentIndex(currentToolCallBlock),
233
255
  delta,
234
256
  partial: output,
235
257
  });
@@ -265,8 +287,9 @@ export const streamOpenAICompletions = (model, context, options) => {
265
287
  catch (error) {
266
288
  for (const block of output.content) {
267
289
  delete block.index;
268
- // partialArgs is only a streaming scratch buffer; never persist it.
290
+ // Streaming scratch buffers are only used during parsing; never persist them.
269
291
  delete block.partialArgs;
292
+ delete block.streamIndex;
270
293
  }
271
294
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
272
295
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
@@ -328,13 +351,16 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
328
351
  }
329
352
  function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
330
353
  const messages = convertMessages(model, context, compat);
331
- const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
354
+ const cacheControl = getCompatCacheControl(compat, cacheRetention);
332
355
  const params = {
333
356
  model: model.id,
334
357
  messages,
335
358
  stream: true,
336
- prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
337
- prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
359
+ prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
360
+ (cacheRetention === "long" && compat.supportsLongCacheRetention)
361
+ ? options?.sessionId
362
+ : undefined,
363
+ prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
338
364
  };
339
365
  if (compat.supportsUsageInStreaming !== false) {
340
366
  params.stream_options = { include_usage: true };
@@ -381,6 +407,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
381
407
  preserve_thinking: true,
382
408
  };
383
409
  }
410
+ else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
411
+ params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
412
+ if (options?.reasoningEffort) {
413
+ params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
414
+ }
415
+ }
384
416
  else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
385
417
  // OpenRouter normalizes reasoning across providers via a nested reasoning object.
386
418
  const openRouterParams = params;
@@ -418,11 +450,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
418
450
  function mapReasoningEffort(effort, reasoningEffortMap) {
419
451
  return reasoningEffortMap[effort] ?? effort;
420
452
  }
421
- function getCompatCacheControl(model, compat, cacheRetention) {
453
+ function getCompatCacheControl(compat, cacheRetention) {
422
454
  if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
423
455
  return undefined;
424
456
  }
425
- const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
457
+ const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
426
458
  return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
427
459
  }
428
460
  function applyAnthropicCacheControl(messages, tools, cacheControl) {
@@ -631,6 +663,11 @@ export function convertMessages(model, context, compat) {
631
663
  assistantMsg.reasoning_details = reasoningDetails;
632
664
  }
633
665
  }
666
+ if (compat.requiresReasoningContentOnAssistantMessages &&
667
+ model.reasoning &&
668
+ assistantMsg.reasoning_content === undefined) {
669
+ assistantMsg.reasoning_content = "";
670
+ }
634
671
  // Skip assistant messages that have no content and no tool calls.
635
672
  // Some providers require "either content or tool_calls, but not none".
636
673
  // Other providers also don't accept empty assistant messages.
@@ -725,7 +762,6 @@ function parseChunkUsage(rawUsage, model) {
725
762
  const promptTokens = rawUsage.prompt_tokens || 0;
726
763
  const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
727
764
  const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
728
- const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
729
765
  // Normalize to pi-ai semantics:
730
766
  // - cacheRead: hits from cache created by previous requests only
731
767
  // - cacheWrite: tokens written to cache in this request
@@ -733,9 +769,8 @@ function parseChunkUsage(rawUsage, model) {
733
769
  // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
734
770
  const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
735
771
  const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
736
- // Compute totalTokens ourselves since we add reasoning_tokens to output
737
- // and some providers (e.g., Groq) don't include them in total_tokens
738
- const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
772
+ // OpenAI completion_tokens already includes reasoning_tokens.
773
+ const outputTokens = rawUsage.completion_tokens || 0;
739
774
  const usage = {
740
775
  input,
741
776
  output: outputTokens,
@@ -791,16 +826,25 @@ function detectCompat(model) {
791
826
  const useMaxTokens = baseUrl.includes("chutes.ai");
792
827
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
793
828
  const isGroq = provider === "groq" || baseUrl.includes("groq.com");
829
+ const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
794
830
  const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
795
- const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
831
+ const reasoningEffortMap = isDeepSeek
796
832
  ? {
797
- minimal: "default",
798
- low: "default",
799
- medium: "default",
800
- high: "default",
801
- xhigh: "default",
833
+ minimal: "high",
834
+ low: "high",
835
+ medium: "high",
836
+ high: "high",
837
+ xhigh: "max",
802
838
  }
803
- : {};
839
+ : isGroq && model.id === "qwen/qwen3-32b"
840
+ ? {
841
+ minimal: "default",
842
+ low: "default",
843
+ medium: "default",
844
+ high: "default",
845
+ xhigh: "default",
846
+ }
847
+ : {};
804
848
  return {
805
849
  supportsStore: !isNonStandard,
806
850
  supportsDeveloperRole: !isNonStandard,
@@ -811,17 +855,21 @@ function detectCompat(model) {
811
855
  requiresToolResultName: false,
812
856
  requiresAssistantAfterToolResult: false,
813
857
  requiresThinkingAsText: false,
814
- thinkingFormat: isZai
815
- ? "zai"
816
- : provider === "openrouter" || baseUrl.includes("openrouter.ai")
817
- ? "openrouter"
818
- : "openai",
858
+ requiresReasoningContentOnAssistantMessages: isDeepSeek,
859
+ thinkingFormat: isDeepSeek
860
+ ? "deepseek"
861
+ : isZai
862
+ ? "zai"
863
+ : provider === "openrouter" || baseUrl.includes("openrouter.ai")
864
+ ? "openrouter"
865
+ : "openai",
819
866
  openRouterRouting: {},
820
867
  vercelGatewayRouting: {},
821
868
  zaiToolStream: false,
822
869
  supportsStrictMode: true,
823
870
  cacheControlFormat,
824
871
  sendSessionAffinityHeaders: false,
872
+ supportsLongCacheRetention: true,
825
873
  };
826
874
  }
827
875
  /**
@@ -842,6 +890,8 @@ function getCompat(model) {
842
890
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
843
891
  requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
844
892
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
893
+ requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
894
+ detected.requiresReasoningContentOnAssistantMessages,
845
895
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
846
896
  openRouterRouting: model.compat.openRouterRouting ?? {},
847
897
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
@@ -849,6 +899,7 @@ function getCompat(model) {
849
899
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
850
900
  cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
851
901
  sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
902
+ supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
852
903
  };
853
904
  }
854
905
  //# sourceMappingURL=openai-completions.js.map