@mariozechner/pi-ai 0.69.0 → 0.70.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/env-api-keys.d.ts +9 -0
- package/dist/env-api-keys.d.ts.map +1 -1
- package/dist/env-api-keys.js +42 -31
- package/dist/env-api-keys.js.map +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +282 -19
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +278 -47
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +5 -2
- package/dist/models.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +33 -12
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/azure-openai-responses.d.ts.map +1 -1
- package/dist/providers/azure-openai-responses.js +5 -1
- package/dist/providers/azure-openai-responses.js.map +1 -1
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js +34 -13
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +8 -7
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +95 -44
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +24 -20
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +2 -0
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/types.d.ts +35 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -79,19 +79,28 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
79
79
|
params = nextParams;
|
|
80
80
|
}
|
|
81
81
|
const { data: openaiStream, response } = await client.chat.completions
|
|
82
|
-
.create(params, {
|
|
82
|
+
.create(params, {
|
|
83
|
+
signal: options?.signal,
|
|
84
|
+
timeout: options?.timeoutMs,
|
|
85
|
+
maxRetries: options?.maxRetries,
|
|
86
|
+
})
|
|
83
87
|
.withResponse();
|
|
84
88
|
await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
|
|
85
89
|
stream.push({ type: "start", partial: output });
|
|
86
90
|
let currentBlock = null;
|
|
87
91
|
const blocks = output.content;
|
|
88
|
-
const
|
|
92
|
+
const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
|
|
93
|
+
const currentContentIndex = () => getContentIndex(currentBlock);
|
|
89
94
|
const finishCurrentBlock = (block) => {
|
|
90
95
|
if (block) {
|
|
96
|
+
const contentIndex = getContentIndex(block);
|
|
97
|
+
if (contentIndex === -1) {
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
91
100
|
if (block.type === "text") {
|
|
92
101
|
stream.push({
|
|
93
102
|
type: "text_end",
|
|
94
|
-
contentIndex
|
|
103
|
+
contentIndex,
|
|
95
104
|
content: block.text,
|
|
96
105
|
partial: output,
|
|
97
106
|
});
|
|
@@ -99,19 +108,20 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
99
108
|
else if (block.type === "thinking") {
|
|
100
109
|
stream.push({
|
|
101
110
|
type: "thinking_end",
|
|
102
|
-
contentIndex
|
|
111
|
+
contentIndex,
|
|
103
112
|
content: block.thinking,
|
|
104
113
|
partial: output,
|
|
105
114
|
});
|
|
106
115
|
}
|
|
107
116
|
else if (block.type === "toolCall") {
|
|
108
117
|
block.arguments = parseStreamingJson(block.partialArgs);
|
|
109
|
-
// Finalize in-place and strip the scratch
|
|
118
|
+
// Finalize in-place and strip the scratch buffers so replay only
|
|
110
119
|
// carries parsed arguments.
|
|
111
120
|
delete block.partialArgs;
|
|
121
|
+
delete block.streamIndex;
|
|
112
122
|
stream.push({
|
|
113
123
|
type: "toolcall_end",
|
|
114
|
-
contentIndex
|
|
124
|
+
contentIndex,
|
|
115
125
|
toolCall: block,
|
|
116
126
|
partial: output,
|
|
117
127
|
});
|
|
@@ -150,13 +160,13 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
150
160
|
finishCurrentBlock(currentBlock);
|
|
151
161
|
currentBlock = { type: "text", text: "" };
|
|
152
162
|
output.content.push(currentBlock);
|
|
153
|
-
stream.push({ type: "text_start", contentIndex:
|
|
163
|
+
stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
|
|
154
164
|
}
|
|
155
165
|
if (currentBlock.type === "text") {
|
|
156
166
|
currentBlock.text += choice.delta.content;
|
|
157
167
|
stream.push({
|
|
158
168
|
type: "text_delta",
|
|
159
|
-
contentIndex:
|
|
169
|
+
contentIndex: currentContentIndex(),
|
|
160
170
|
delta: choice.delta.content,
|
|
161
171
|
partial: output,
|
|
162
172
|
});
|
|
@@ -187,14 +197,14 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
187
197
|
thinkingSignature: foundReasoningField,
|
|
188
198
|
};
|
|
189
199
|
output.content.push(currentBlock);
|
|
190
|
-
stream.push({ type: "thinking_start", contentIndex:
|
|
200
|
+
stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
|
|
191
201
|
}
|
|
192
202
|
if (currentBlock.type === "thinking") {
|
|
193
203
|
const delta = choice.delta[foundReasoningField];
|
|
194
204
|
currentBlock.thinking += delta;
|
|
195
205
|
stream.push({
|
|
196
206
|
type: "thinking_delta",
|
|
197
|
-
contentIndex:
|
|
207
|
+
contentIndex: currentContentIndex(),
|
|
198
208
|
delta,
|
|
199
209
|
partial: output,
|
|
200
210
|
});
|
|
@@ -202,9 +212,11 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
202
212
|
}
|
|
203
213
|
if (choice?.delta?.tool_calls) {
|
|
204
214
|
for (const toolCall of choice.delta.tool_calls) {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
(
|
|
215
|
+
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
216
|
+
const sameToolCall = currentBlock?.type === "toolCall" &&
|
|
217
|
+
((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
|
|
218
|
+
(streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
|
|
219
|
+
if (!sameToolCall) {
|
|
208
220
|
finishCurrentBlock(currentBlock);
|
|
209
221
|
currentBlock = {
|
|
210
222
|
type: "toolCall",
|
|
@@ -212,24 +224,34 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
212
224
|
name: toolCall.function?.name || "",
|
|
213
225
|
arguments: {},
|
|
214
226
|
partialArgs: "",
|
|
227
|
+
streamIndex,
|
|
215
228
|
};
|
|
216
229
|
output.content.push(currentBlock);
|
|
217
|
-
stream.push({
|
|
230
|
+
stream.push({
|
|
231
|
+
type: "toolcall_start",
|
|
232
|
+
contentIndex: getContentIndex(currentBlock),
|
|
233
|
+
partial: output,
|
|
234
|
+
});
|
|
218
235
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
236
|
+
const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
|
|
237
|
+
if (currentToolCallBlock) {
|
|
238
|
+
if (!currentToolCallBlock.id && toolCall.id)
|
|
239
|
+
currentToolCallBlock.id = toolCall.id;
|
|
240
|
+
if (!currentToolCallBlock.name && toolCall.function?.name) {
|
|
241
|
+
currentToolCallBlock.name = toolCall.function.name;
|
|
242
|
+
}
|
|
243
|
+
if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
|
|
244
|
+
currentToolCallBlock.streamIndex = streamIndex;
|
|
245
|
+
}
|
|
224
246
|
let delta = "";
|
|
225
247
|
if (toolCall.function?.arguments) {
|
|
226
248
|
delta = toolCall.function.arguments;
|
|
227
|
-
|
|
228
|
-
|
|
249
|
+
currentToolCallBlock.partialArgs += toolCall.function.arguments;
|
|
250
|
+
currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
|
|
229
251
|
}
|
|
230
252
|
stream.push({
|
|
231
253
|
type: "toolcall_delta",
|
|
232
|
-
contentIndex:
|
|
254
|
+
contentIndex: getContentIndex(currentToolCallBlock),
|
|
233
255
|
delta,
|
|
234
256
|
partial: output,
|
|
235
257
|
});
|
|
@@ -265,8 +287,9 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
265
287
|
catch (error) {
|
|
266
288
|
for (const block of output.content) {
|
|
267
289
|
delete block.index;
|
|
268
|
-
//
|
|
290
|
+
// Streaming scratch buffers are only used during parsing; never persist them.
|
|
269
291
|
delete block.partialArgs;
|
|
292
|
+
delete block.streamIndex;
|
|
270
293
|
}
|
|
271
294
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
272
295
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
@@ -328,13 +351,16 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
|
|
|
328
351
|
}
|
|
329
352
|
function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
|
|
330
353
|
const messages = convertMessages(model, context, compat);
|
|
331
|
-
const cacheControl = getCompatCacheControl(
|
|
354
|
+
const cacheControl = getCompatCacheControl(compat, cacheRetention);
|
|
332
355
|
const params = {
|
|
333
356
|
model: model.id,
|
|
334
357
|
messages,
|
|
335
358
|
stream: true,
|
|
336
|
-
prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none"
|
|
337
|
-
|
|
359
|
+
prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
|
|
360
|
+
(cacheRetention === "long" && compat.supportsLongCacheRetention)
|
|
361
|
+
? options?.sessionId
|
|
362
|
+
: undefined,
|
|
363
|
+
prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
|
|
338
364
|
};
|
|
339
365
|
if (compat.supportsUsageInStreaming !== false) {
|
|
340
366
|
params.stream_options = { include_usage: true };
|
|
@@ -381,6 +407,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
|
|
|
381
407
|
preserve_thinking: true,
|
|
382
408
|
};
|
|
383
409
|
}
|
|
410
|
+
else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
|
|
411
|
+
params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
|
412
|
+
if (options?.reasoningEffort) {
|
|
413
|
+
params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
384
416
|
else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
385
417
|
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
386
418
|
const openRouterParams = params;
|
|
@@ -418,11 +450,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
|
|
|
418
450
|
function mapReasoningEffort(effort, reasoningEffortMap) {
|
|
419
451
|
return reasoningEffortMap[effort] ?? effort;
|
|
420
452
|
}
|
|
421
|
-
function getCompatCacheControl(
|
|
453
|
+
function getCompatCacheControl(compat, cacheRetention) {
|
|
422
454
|
if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
|
|
423
455
|
return undefined;
|
|
424
456
|
}
|
|
425
|
-
const ttl = cacheRetention === "long" &&
|
|
457
|
+
const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
|
|
426
458
|
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
|
427
459
|
}
|
|
428
460
|
function applyAnthropicCacheControl(messages, tools, cacheControl) {
|
|
@@ -631,6 +663,11 @@ export function convertMessages(model, context, compat) {
|
|
|
631
663
|
assistantMsg.reasoning_details = reasoningDetails;
|
|
632
664
|
}
|
|
633
665
|
}
|
|
666
|
+
if (compat.requiresReasoningContentOnAssistantMessages &&
|
|
667
|
+
model.reasoning &&
|
|
668
|
+
assistantMsg.reasoning_content === undefined) {
|
|
669
|
+
assistantMsg.reasoning_content = "";
|
|
670
|
+
}
|
|
634
671
|
// Skip assistant messages that have no content and no tool calls.
|
|
635
672
|
// Some providers require "either content or tool_calls, but not none".
|
|
636
673
|
// Other providers also don't accept empty assistant messages.
|
|
@@ -725,7 +762,6 @@ function parseChunkUsage(rawUsage, model) {
|
|
|
725
762
|
const promptTokens = rawUsage.prompt_tokens || 0;
|
|
726
763
|
const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
|
|
727
764
|
const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
|
|
728
|
-
const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
|
|
729
765
|
// Normalize to pi-ai semantics:
|
|
730
766
|
// - cacheRead: hits from cache created by previous requests only
|
|
731
767
|
// - cacheWrite: tokens written to cache in this request
|
|
@@ -733,9 +769,8 @@ function parseChunkUsage(rawUsage, model) {
|
|
|
733
769
|
// as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
|
|
734
770
|
const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
|
|
735
771
|
const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
|
|
736
|
-
//
|
|
737
|
-
|
|
738
|
-
const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
|
|
772
|
+
// OpenAI completion_tokens already includes reasoning_tokens.
|
|
773
|
+
const outputTokens = rawUsage.completion_tokens || 0;
|
|
739
774
|
const usage = {
|
|
740
775
|
input,
|
|
741
776
|
output: outputTokens,
|
|
@@ -791,16 +826,25 @@ function detectCompat(model) {
|
|
|
791
826
|
const useMaxTokens = baseUrl.includes("chutes.ai");
|
|
792
827
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
793
828
|
const isGroq = provider === "groq" || baseUrl.includes("groq.com");
|
|
829
|
+
const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
|
|
794
830
|
const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
|
|
795
|
-
const reasoningEffortMap =
|
|
831
|
+
const reasoningEffortMap = isDeepSeek
|
|
796
832
|
? {
|
|
797
|
-
minimal: "
|
|
798
|
-
low: "
|
|
799
|
-
medium: "
|
|
800
|
-
high: "
|
|
801
|
-
xhigh: "
|
|
833
|
+
minimal: "high",
|
|
834
|
+
low: "high",
|
|
835
|
+
medium: "high",
|
|
836
|
+
high: "high",
|
|
837
|
+
xhigh: "max",
|
|
802
838
|
}
|
|
803
|
-
:
|
|
839
|
+
: isGroq && model.id === "qwen/qwen3-32b"
|
|
840
|
+
? {
|
|
841
|
+
minimal: "default",
|
|
842
|
+
low: "default",
|
|
843
|
+
medium: "default",
|
|
844
|
+
high: "default",
|
|
845
|
+
xhigh: "default",
|
|
846
|
+
}
|
|
847
|
+
: {};
|
|
804
848
|
return {
|
|
805
849
|
supportsStore: !isNonStandard,
|
|
806
850
|
supportsDeveloperRole: !isNonStandard,
|
|
@@ -811,17 +855,21 @@ function detectCompat(model) {
|
|
|
811
855
|
requiresToolResultName: false,
|
|
812
856
|
requiresAssistantAfterToolResult: false,
|
|
813
857
|
requiresThinkingAsText: false,
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
858
|
+
requiresReasoningContentOnAssistantMessages: isDeepSeek,
|
|
859
|
+
thinkingFormat: isDeepSeek
|
|
860
|
+
? "deepseek"
|
|
861
|
+
: isZai
|
|
862
|
+
? "zai"
|
|
863
|
+
: provider === "openrouter" || baseUrl.includes("openrouter.ai")
|
|
864
|
+
? "openrouter"
|
|
865
|
+
: "openai",
|
|
819
866
|
openRouterRouting: {},
|
|
820
867
|
vercelGatewayRouting: {},
|
|
821
868
|
zaiToolStream: false,
|
|
822
869
|
supportsStrictMode: true,
|
|
823
870
|
cacheControlFormat,
|
|
824
871
|
sendSessionAffinityHeaders: false,
|
|
872
|
+
supportsLongCacheRetention: true,
|
|
825
873
|
};
|
|
826
874
|
}
|
|
827
875
|
/**
|
|
@@ -842,6 +890,8 @@ function getCompat(model) {
|
|
|
842
890
|
requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
|
|
843
891
|
requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
|
|
844
892
|
requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
|
893
|
+
requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
|
|
894
|
+
detected.requiresReasoningContentOnAssistantMessages,
|
|
845
895
|
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
|
846
896
|
openRouterRouting: model.compat.openRouterRouting ?? {},
|
|
847
897
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
@@ -849,6 +899,7 @@ function getCompat(model) {
|
|
|
849
899
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
850
900
|
cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
|
|
851
901
|
sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
|
|
902
|
+
supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
|
|
852
903
|
};
|
|
853
904
|
}
|
|
854
905
|
//# sourceMappingURL=openai-completions.js.map
|