npm - @oh-my-pi/pi-ai - Versions diffs - 12.4.0 → 12.5.0 - Mend

@oh-my-pi/pi-ai 12.4.0 → 12.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +8 -8
package/src/providers/openai-completions.ts +141 -36
package/src/utils/overflow.ts +6 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "12.4.0",
+	"version": "12.5.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",
@@ -57,19 +57,19 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.74.0",
-		"@aws-sdk/client-bedrock-runtime": "^3.982.0",
+		"@aws-sdk/client-bedrock-runtime": "^3.990.0",
 		"@bufbuild/protobuf": "^2.11.0",
 		"@connectrpc/connect": "^2.1.1",
 		"@connectrpc/connect-node": "^2.1.1",
-		"@google/genai": "^1.39.0",
-		"@mistralai/mistralai": "^1.13.0",
-		"@oh-my-pi/pi-utils": "12.4.0",
+		"@google/genai": "^1.41.0",
+		"@mistralai/mistralai": "^1.14.0",
+		"@oh-my-pi/pi-utils": "12.5.0",
 		"@sinclair/typebox": "^0.34.48",
-		"@smithy/node-http-handler": "^4.4.9",
-		"ajv": "^8.17.1",
+		"@smithy/node-http-handler": "^4.4.10",
+		"ajv": "^8.18.0",
 		"ajv-formats": "^3.0.1",
 		"chalk": "^5.6.2",
-		"openai": "^6.17.0",
+		"openai": "^6.22.0",
 		"partial-json": "^0.1.7",
 		"zod-to-json-schema": "^3.25.1"
 	},

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -83,6 +83,44 @@ export interface OpenAICompletionsOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 }
+// LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
+// If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
+// they will be incorrectly consumed as thinking delimiters, truncating visible output.
+// A streaming parser with arbitrary chunk boundaries cannot reliably detect code block context.
+// This is acceptable because: (1) only enabled for minimax-code providers, (2) MiniMax models
+// use these tags as their actual thinking format, and (3) false positives are rare in practice.
+const MINIMAX_THINK_OPEN_TAGS = ["<think>", "<thinking>"] as const;
+const MINIMAX_THINK_CLOSE_TAGS = ["</think>", "</thinking>"] as const;
+function findFirstTag(text: string, tags: readonly string[]): { index: number; tag: string } | undefined {
+	let earliestIndex = Number.POSITIVE_INFINITY;
+	let earliestTag: string | undefined;
+	for (const tag of tags) {
+		const index = text.indexOf(tag);
+		if (index !== -1 && index < earliestIndex) {
+			earliestIndex = index;
+			earliestTag = tag;
+		}
+	}
+	if (!earliestTag) return undefined;
+	return { index: earliestIndex, tag: earliestTag };
+}
+function getTrailingPartialTag(text: string, tags: readonly string[]): string {
+	let maxLength = 0;
+	for (const tag of tags) {
+		const maxCandidateLength = Math.min(tag.length - 1, text.length);
+		for (let length = maxCandidateLength; length > 0; length--) {
+			if (text.endsWith(tag.slice(0, length))) {
+				if (length > maxLength) maxLength = length;
+				break;
+			}
+		}
+	}
+	if (maxLength === 0) return "";
+	return text.slice(-maxLength);
+}
 export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 	model: Model<"openai-completions">,
 	context: Context,
@@ -152,6 +190,93 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 				}
 			};
+			const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
+			let taggedTextBuffer = "";
+			let insideTaggedThinking = false;
+			const appendTextDelta = (delta: string) => {
+				if (delta.length === 0) return;
+				if (!currentBlock || currentBlock.type !== "text") {
+					finishCurrentBlock(currentBlock);
+					currentBlock = { type: "text", text: "" };
+					output.content.push(currentBlock);
+					stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+				}
+				if (currentBlock.type === "text") {
+					currentBlock.text += delta;
+					stream.push({
+						type: "text_delta",
+						contentIndex: blockIndex(),
+						delta,
+						partial: output,
+					});
+				}
+			};
+			const appendThinkingDelta = (delta: string, signature?: string) => {
+				if (delta.length === 0) return;
+				if (
+					!currentBlock ||
+					currentBlock.type !== "thinking" ||
+					(signature !== undefined && currentBlock.thinkingSignature !== signature)
+				) {
+					finishCurrentBlock(currentBlock);
+					currentBlock = {
+						type: "thinking",
+						thinking: "",
+						thinkingSignature: signature,
+					};
+					output.content.push(currentBlock);
+					stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+				}
+				if (currentBlock.type === "thinking") {
+					if (signature !== undefined && !currentBlock.thinkingSignature) {
+						currentBlock.thinkingSignature = signature;
+					}
+					currentBlock.thinking += delta;
+					stream.push({
+						type: "thinking_delta",
+						contentIndex: blockIndex(),
+						delta,
+						partial: output,
+					});
+				}
+			};
+			const flushTaggedTextBuffer = () => {
+				while (taggedTextBuffer.length > 0) {
+					if (insideTaggedThinking) {
+						const closingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
+						if (closingTag) {
+							appendThinkingDelta(taggedTextBuffer.slice(0, closingTag.index));
+							taggedTextBuffer = taggedTextBuffer.slice(closingTag.index + closingTag.tag.length);
+							insideTaggedThinking = false;
+							continue;
+						}
+						const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
+						const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
+						appendThinkingDelta(taggedTextBuffer.slice(0, flushLength));
+						taggedTextBuffer = trailingPartialTag;
+						break;
+					}
+					const openingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
+					if (openingTag) {
+						appendTextDelta(taggedTextBuffer.slice(0, openingTag.index));
+						taggedTextBuffer = taggedTextBuffer.slice(openingTag.index + openingTag.tag.length);
+						insideTaggedThinking = true;
+						continue;
+					}
+					const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
+					const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
+					appendTextDelta(taggedTextBuffer.slice(0, flushLength));
+					taggedTextBuffer = trailingPartialTag;
+					break;
+				}
+			};
 			for await (const chunk of openaiStream) {
 				if (chunk.usage) {
 					// Check for cached_tokens at root level (Kimi) or in prompt_tokens_details (OpenAI)
@@ -196,21 +321,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 						choice.delta.content.length > 0
 					) {
 						if (!firstTokenTime) firstTokenTime = Date.now();
-						if (!currentBlock || currentBlock.type !== "text") {
-							finishCurrentBlock(currentBlock);
-							currentBlock = { type: "text", text: "" };
-							output.content.push(currentBlock);
-							stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
-						}
-						if (currentBlock.type === "text") {
-							currentBlock.text += choice.delta.content;
-							stream.push({
-								type: "text_delta",
-								contentIndex: blockIndex(),
-								delta: choice.delta.content,
-								partial: output,
-							});
+						if (parseMiniMaxThinkTags) {
+							taggedTextBuffer += choice.delta.content;
+							flushTaggedTextBuffer();
+						} else {
+							appendTextDelta(choice.delta.content);
 						}
 					}
@@ -234,27 +349,8 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 					}
 					if (foundReasoningField) {
-						if (!currentBlock || currentBlock.type !== "thinking") {
-							finishCurrentBlock(currentBlock);
-							currentBlock = {
-								type: "thinking",
-								thinking: "",
-								thinkingSignature: foundReasoningField,
-							};
-							output.content.push(currentBlock);
-							stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
-						}
-						if (currentBlock.type === "thinking") {
-							const delta = (choice.delta as any)[foundReasoningField];
-							currentBlock.thinking += delta;
-							stream.push({
-								type: "thinking_delta",
-								contentIndex: blockIndex(),
-								delta,
-								partial: output,
-							});
-						}
+						const delta = (choice.delta as any)[foundReasoningField];
+						appendThinkingDelta(delta, foundReasoningField);
 					}
 					if (choice?.delta?.tool_calls) {
@@ -311,6 +407,15 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 				}
 			}
+			if (parseMiniMaxThinkTags && taggedTextBuffer.length > 0) {
+				if (insideTaggedThinking) {
+					appendThinkingDelta(taggedTextBuffer);
+				} else {
+					appendTextDelta(taggedTextBuffer);
+				}
+				taggedTextBuffer = "";
+			}
 			finishCurrentBlock(currentBlock);
 			if (options?.signal?.aborted) {

package/src/utils/overflow.ts CHANGED Viewed

@@ -35,6 +35,10 @@ const OVERFLOW_PATTERNS = [
 	/maximum context length is \d+ tokens/i, // OpenRouter (all backends)
 	/exceeds the limit of \d+/i, // GitHub Copilot
 	/exceeds the available context size/i, // llama.cpp server
+	/requested tokens?.*exceed.*context (window|length|size)/i, // llama.cpp / OpenAI-compatible local servers
+	/context (window|length|size).*(exceeded|overflow|too small)/i, // Generic local server variants
+	/(prompt|input).*(too long|too large).*(context|n_ctx)/i, // llama.cpp phrasing variants
+	/requested tokens?.*(exceeds?|greater than).*(n_ctx|context)/i, // llama.cpp n_ctx variants
 	/greater than the context length/i, // LM Studio
 	/context window exceeds limit/i, // MiniMax
 	/exceeded model token limit/i, // Kimi For Coding
@@ -105,8 +109,8 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num
 		}
 	}
-	// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context
-	if (contextWindow && message.stopReason === "stop") {
+	// Case 2: Usage-based overflow (silent or provider-specific)
+	if (contextWindow) {
 		const inputTokens = message.usage.input + message.usage.cacheRead;
 		if (inputTokens > contextWindow) {
 			return true;