npm - @mariozechner/pi-ai - Versions diffs - 0.69.0 → 0.70.1 - Mend

@mariozechner/pi-ai 0.69.0 → 0.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +4 -1
package/dist/env-api-keys.d.ts +9 -0
package/dist/env-api-keys.d.ts.map +1 -1
package/dist/env-api-keys.js +42 -31
package/dist/env-api-keys.js.map +1 -1
package/dist/models.d.ts +1 -1
package/dist/models.d.ts.map +1 -1
package/dist/models.generated.d.ts +282 -19
package/dist/models.generated.d.ts.map +1 -1
package/dist/models.generated.js +278 -47
package/dist/models.generated.js.map +1 -1
package/dist/models.js +5 -2
package/dist/models.js.map +1 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +33 -12
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/azure-openai-responses.d.ts.map +1 -1
package/dist/providers/azure-openai-responses.js +5 -1
package/dist/providers/azure-openai-responses.js.map +1 -1
package/dist/providers/google-vertex.d.ts.map +1 -1
package/dist/providers/google-vertex.js +34 -13
package/dist/providers/google-vertex.js.map +1 -1
package/dist/providers/openai-codex-responses.d.ts.map +1 -1
package/dist/providers/openai-codex-responses.js +8 -7
package/dist/providers/openai-codex-responses.js.map +1 -1
package/dist/providers/openai-completions.d.ts.map +1 -1
package/dist/providers/openai-completions.js +95 -44
package/dist/providers/openai-completions.js.map +1 -1
package/dist/providers/openai-responses.d.ts.map +1 -1
package/dist/providers/openai-responses.js +24 -20
package/dist/providers/openai-responses.js.map +1 -1
package/dist/providers/simple-options.d.ts.map +1 -1
package/dist/providers/simple-options.js +2 -0
package/dist/providers/simple-options.js.map +1 -1
package/dist/types.d.ts +35 -4
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/package.json +1 -1

package/dist/providers/openai-completions.js CHANGED Viewed

@@ -79,19 +79,28 @@ export const streamOpenAICompletions = (model, context, options) => {
                 params = nextParams;
             }
             const { data: openaiStream, response } = await client.chat.completions
-                .create(params, { signal: options?.signal })
+                .create(params, {
+                signal: options?.signal,
+                timeout: options?.timeoutMs,
+                maxRetries: options?.maxRetries,
+            })
                 .withResponse();
             await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
             stream.push({ type: "start", partial: output });
             let currentBlock = null;
             const blocks = output.content;
-            const blockIndex = () => blocks.length - 1;
+            const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
+            const currentContentIndex = () => getContentIndex(currentBlock);
             const finishCurrentBlock = (block) => {
                 if (block) {
+                    const contentIndex = getContentIndex(block);
+                    if (contentIndex === -1) {
+                        return;
+                    }
                     if (block.type === "text") {
                         stream.push({
                             type: "text_end",
-                            contentIndex: blockIndex(),
+                            contentIndex,
                             content: block.text,
                             partial: output,
                         });
@@ -99,19 +108,20 @@ export const streamOpenAICompletions = (model, context, options) => {
                     else if (block.type === "thinking") {
                         stream.push({
                             type: "thinking_end",
-                            contentIndex: blockIndex(),
+                            contentIndex,
                             content: block.thinking,
                             partial: output,
                         });
                     }
                     else if (block.type === "toolCall") {
                         block.arguments = parseStreamingJson(block.partialArgs);
-                        // Finalize in-place and strip the scratch buffer so replay only
+                        // Finalize in-place and strip the scratch buffers so replay only
                         // carries parsed arguments.
                         delete block.partialArgs;
+                        delete block.streamIndex;
                         stream.push({
                             type: "toolcall_end",
-                            contentIndex: blockIndex(),
+                            contentIndex,
                             toolCall: block,
                             partial: output,
                         });
@@ -150,13 +160,13 @@ export const streamOpenAICompletions = (model, context, options) => {
                             finishCurrentBlock(currentBlock);
                             currentBlock = { type: "text", text: "" };
                             output.content.push(currentBlock);
-                            stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+                            stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
                         }
                         if (currentBlock.type === "text") {
                             currentBlock.text += choice.delta.content;
                             stream.push({
                                 type: "text_delta",
-                                contentIndex: blockIndex(),
+                                contentIndex: currentContentIndex(),
                                 delta: choice.delta.content,
                                 partial: output,
                             });
@@ -187,14 +197,14 @@ export const streamOpenAICompletions = (model, context, options) => {
                                 thinkingSignature: foundReasoningField,
                             };
                             output.content.push(currentBlock);
-                            stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+                            stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
                         }
                         if (currentBlock.type === "thinking") {
                             const delta = choice.delta[foundReasoningField];
                             currentBlock.thinking += delta;
                             stream.push({
                                 type: "thinking_delta",
-                                contentIndex: blockIndex(),
+                                contentIndex: currentContentIndex(),
                                 delta,
                                 partial: output,
                             });
@@ -202,9 +212,11 @@ export const streamOpenAICompletions = (model, context, options) => {
                     }
                     if (choice?.delta?.tool_calls) {
                         for (const toolCall of choice.delta.tool_calls) {
-                            if (!currentBlock ||
-                                currentBlock.type !== "toolCall" ||
-                                (toolCall.id && currentBlock.id !== toolCall.id)) {
+                            const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
+                            const sameToolCall = currentBlock?.type === "toolCall" &&
+                                ((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
+                                    (streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
+                            if (!sameToolCall) {
                                 finishCurrentBlock(currentBlock);
                                 currentBlock = {
                                     type: "toolCall",
@@ -212,24 +224,34 @@ export const streamOpenAICompletions = (model, context, options) => {
                                     name: toolCall.function?.name || "",
                                     arguments: {},
                                     partialArgs: "",
+                                    streamIndex,
                                 };
                                 output.content.push(currentBlock);
-                                stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
+                                stream.push({
+                                    type: "toolcall_start",
+                                    contentIndex: getContentIndex(currentBlock),
+                                    partial: output,
+                                });
                             }
-                            if (currentBlock.type === "toolCall") {
-                                if (toolCall.id)
-                                    currentBlock.id = toolCall.id;
-                                if (toolCall.function?.name)
-                                    currentBlock.name = toolCall.function.name;
+                            const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
+                            if (currentToolCallBlock) {
+                                if (!currentToolCallBlock.id && toolCall.id)
+                                    currentToolCallBlock.id = toolCall.id;
+                                if (!currentToolCallBlock.name && toolCall.function?.name) {
+                                    currentToolCallBlock.name = toolCall.function.name;
+                                }
+                                if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
+                                    currentToolCallBlock.streamIndex = streamIndex;
+                                }
                                 let delta = "";
                                 if (toolCall.function?.arguments) {
                                     delta = toolCall.function.arguments;
-                                    currentBlock.partialArgs += toolCall.function.arguments;
-                                    currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
+                                    currentToolCallBlock.partialArgs += toolCall.function.arguments;
+                                    currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
                                 }
                                 stream.push({
                                     type: "toolcall_delta",
-                                    contentIndex: blockIndex(),
+                                    contentIndex: getContentIndex(currentToolCallBlock),
                                     delta,
                                     partial: output,
                                 });
@@ -265,8 +287,9 @@ export const streamOpenAICompletions = (model, context, options) => {
         catch (error) {
             for (const block of output.content) {
                 delete block.index;
-                // partialArgs is only a streaming scratch buffer; never persist it.
+                // Streaming scratch buffers are only used during parsing; never persist them.
                 delete block.partialArgs;
+                delete block.streamIndex;
             }
             output.stopReason = options?.signal?.aborted ? "aborted" : "error";
             output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
@@ -328,13 +351,16 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
 }
 function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
     const messages = convertMessages(model, context, compat);
-    const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
+    const cacheControl = getCompatCacheControl(compat, cacheRetention);
     const params = {
         model: model.id,
         messages,
         stream: true,
-        prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
-        prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
+        prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
+            (cacheRetention === "long" && compat.supportsLongCacheRetention)
+            ? options?.sessionId
+            : undefined,
+        prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
     };
     if (compat.supportsUsageInStreaming !== false) {
         params.stream_options = { include_usage: true };
@@ -381,6 +407,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
             preserve_thinking: true,
         };
     }
+    else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
+        params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
+        if (options?.reasoningEffort) {
+            params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
+        }
+    }
     else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
         // OpenRouter normalizes reasoning across providers via a nested reasoning object.
         const openRouterParams = params;
@@ -418,11 +450,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
 function mapReasoningEffort(effort, reasoningEffortMap) {
     return reasoningEffortMap[effort] ?? effort;
 }
-function getCompatCacheControl(model, compat, cacheRetention) {
+function getCompatCacheControl(compat, cacheRetention) {
     if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
         return undefined;
     }
-    const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
+    const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
     return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
 }
 function applyAnthropicCacheControl(messages, tools, cacheControl) {
@@ -631,6 +663,11 @@ export function convertMessages(model, context, compat) {
                     assistantMsg.reasoning_details = reasoningDetails;
                 }
             }
+            if (compat.requiresReasoningContentOnAssistantMessages &&
+                model.reasoning &&
+                assistantMsg.reasoning_content === undefined) {
+                assistantMsg.reasoning_content = "";
+            }
             // Skip assistant messages that have no content and no tool calls.
             // Some providers require "either content or tool_calls, but not none".
             // Other providers also don't accept empty assistant messages.
@@ -725,7 +762,6 @@ function parseChunkUsage(rawUsage, model) {
     const promptTokens = rawUsage.prompt_tokens || 0;
     const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
     const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
-    const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
     // Normalize to pi-ai semantics:
     // - cacheRead: hits from cache created by previous requests only
     // - cacheWrite: tokens written to cache in this request
@@ -733,9 +769,8 @@ function parseChunkUsage(rawUsage, model) {
     // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
     const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
     const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
-    // Compute totalTokens ourselves since we add reasoning_tokens to output
-    // and some providers (e.g., Groq) don't include them in total_tokens
-    const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
+    // OpenAI completion_tokens already includes reasoning_tokens.
+    const outputTokens = rawUsage.completion_tokens || 0;
     const usage = {
         input,
         output: outputTokens,
@@ -791,16 +826,25 @@ function detectCompat(model) {
     const useMaxTokens = baseUrl.includes("chutes.ai");
     const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
     const isGroq = provider === "groq" || baseUrl.includes("groq.com");
+    const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
     const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
-    const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
+    const reasoningEffortMap = isDeepSeek
         ? {
-            minimal: "default",
-            low: "default",
-            medium: "default",
-            high: "default",
-            xhigh: "default",
+            minimal: "high",
+            low: "high",
+            medium: "high",
+            high: "high",
+            xhigh: "max",
         }
-        : {};
+        : isGroq && model.id === "qwen/qwen3-32b"
+            ? {
+                minimal: "default",
+                low: "default",
+                medium: "default",
+                high: "default",
+                xhigh: "default",
+            }
+            : {};
     return {
         supportsStore: !isNonStandard,
         supportsDeveloperRole: !isNonStandard,
@@ -811,17 +855,21 @@ function detectCompat(model) {
         requiresToolResultName: false,
         requiresAssistantAfterToolResult: false,
         requiresThinkingAsText: false,
-        thinkingFormat: isZai
-            ? "zai"
-            : provider === "openrouter" || baseUrl.includes("openrouter.ai")
-                ? "openrouter"
-                : "openai",
+        requiresReasoningContentOnAssistantMessages: isDeepSeek,
+        thinkingFormat: isDeepSeek
+            ? "deepseek"
+            : isZai
+                ? "zai"
+                : provider === "openrouter" || baseUrl.includes("openrouter.ai")
+                    ? "openrouter"
+                    : "openai",
         openRouterRouting: {},
         vercelGatewayRouting: {},
         zaiToolStream: false,
         supportsStrictMode: true,
         cacheControlFormat,
         sendSessionAffinityHeaders: false,
+        supportsLongCacheRetention: true,
     };
 }
 /**
@@ -842,6 +890,8 @@ function getCompat(model) {
         requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
         requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
         requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
+        requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
+            detected.requiresReasoningContentOnAssistantMessages,
         thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
         openRouterRouting: model.compat.openRouterRouting ?? {},
         vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
@@ -849,6 +899,7 @@ function getCompat(model) {
         supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
         cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
         sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
+        supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
     };
 }
 //# sourceMappingURL=openai-completions.js.map