npm - @oh-my-pi/pi-ai - Versions diffs - 14.7.3 → 14.7.5 - Mend

@oh-my-pi/pi-ai 14.7.3 → 14.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +10 -0
package/package.json +3 -3
package/src/providers/openai-completions-compat.ts +49 -0
package/src/providers/openai-completions.ts +12 -2
package/src/types.ts +13 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,16 @@
 ## [Unreleased]
+## [14.7.5] - 2026-05-07
+### Added
+- Added `OpenAICompat.supportsMultipleSystemMessages` so chat-completions hosts can opt out of separate leading system blocks. Auto-detected as `true` for OpenAI, Azure, OpenRouter, Cerebras, Together, Fireworks, Groq, DeepSeek, Mistral, xAI, Z.ai, GitHub Copilot, and Zenmux; `false` for MiniMax, Alibaba Dashscope, and Qwen Portal whose chat templates reject follow-up system messages. Unknown OpenAI-compatible hosts (custom vLLM/local) default to `false`; users can opt back in via `compat.supportsMultipleSystemMessages: true`.
+### Fixed
+- Fixed strict-template OpenAI-compatible hosts (e.g. Qwen 3.5+ via vLLM, MiniMax) rejecting follow-up `system`/`developer` messages by coalescing ordered system prompts into a single block joined by `\n\n` when `compat.supportsMultipleSystemMessages` is false. Canonical hosts continue to receive separate blocks so KV-cache reuse stays effective when only the trailing prompt changes ([#958](https://github.com/can1357/oh-my-pi/issues/958)).
 ## [14.7.2] - 2026-05-06
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.7.3",
+	"version": "14.7.5",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.39",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.52.0",
-		"@oh-my-pi/pi-natives": "14.7.3",
-		"@oh-my-pi/pi-utils": "14.7.3",
+		"@oh-my-pi/pi-natives": "14.7.5",
+		"@oh-my-pi/pi-utils": "14.7.5",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",

package/src/providers/openai-completions-compat.ts CHANGED Viewed

@@ -99,6 +99,52 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 	const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
 	const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
+	// Hosts whose chat-completions endpoints are known to accept multiple
+	// leading `system`/`developer` messages (preferred for KV-cache reuse).
+	// Anything outside this allowlist defaults to coalescing because
+	// strict chat templates (Qwen 3.5+ via vLLM, MiniMax, etc.) reject
+	// follow-up system messages with a 400.
+	const isOpenAIHost = provider === "openai" || baseUrl.includes("api.openai.com");
+	const isAzureHost =
+		provider === "azure" ||
+		baseUrl.includes(".openai.azure.com") ||
+		baseUrl.includes("models.inference.ai.azure.com") ||
+		baseUrl.includes("azure.com/openai");
+	const isOpenRouter = provider === "openrouter" || baseUrl.includes("openrouter.ai");
+	const isTogether = provider === "together" || baseUrl.includes("api.together.xyz");
+	const isFireworks = baseUrl.includes("fireworks.ai");
+	const isGroqHost = provider === "groq" || baseUrl.includes("api.groq.com");
+	const isCopilotHost = provider === "github-copilot";
+	const isZenmuxHost = provider === "zenmux";
+	// Endpoints that MUST receive a single system block. MiniMax's OpenAI
+	// endpoint returns error 2013 on multiple system messages; Alibaba's
+	// Dashscope and Qwen Portal serve Qwen models whose chat template
+	// raises "System message must be at the beginning" if any system
+	// message appears past index 0.
+	const isMiniMaxHost =
+		provider === "minimax-code" ||
+		provider === "minimax-code-cn" ||
+		baseUrl.includes("api.minimax.io") ||
+		baseUrl.includes("api.minimaxi.com");
+	const isQwenPortal = provider === "qwen-portal" || baseUrl.includes("portal.qwen.ai");
+	const supportsMultipleSystemMessagesDefault =
+		!isMiniMaxHost &&
+		!isAlibaba &&
+		!isQwenPortal &&
+		(isOpenAIHost ||
+			isAzureHost ||
+			isOpenRouter ||
+			isCerebras ||
+			isTogether ||
+			isFireworks ||
+			isGroqHost ||
+			isDeepseekFamily ||
+			isMistral ||
+			isGrok ||
+			isZai ||
+			isCopilotHost ||
+			isZenmuxHost);
 	const reasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
 		provider === "groq" && model.id === "qwen/qwen3-32b"
 			? ({
@@ -115,6 +161,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 	return {
 		supportsStore: !isNonStandard,
 		supportsDeveloperRole: !isNonStandard,
+		supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
 		supportsReasoningEffort: !isGrok && !isZai,
 		reasoningEffortMap,
 		supportsUsageInStreaming: !isCerebras,
@@ -175,6 +222,8 @@ export function resolveOpenAICompat(
 	return {
 		supportsStore: model.compat.supportsStore ?? detected.supportsStore,
 		supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
+		supportsMultipleSystemMessages:
+			model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
 		supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
 		reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
 		supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -1191,8 +1191,18 @@ export function convertMessages(
 	if (systemPrompts.length > 0) {
 		const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
 		const role = useDeveloperRole ? "developer" : "system";
-		for (const systemPrompt of systemPrompts) {
-			params.push({ role, content: systemPrompt });
+		// Default to one block per ordered system prompt so the leading prefix
+		// stays byte-identical between turns and the provider's KV cache can
+		// reuse it. Hosts whose chat templates reject follow-up system messages
+		// (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
+		// via `compat.supportsMultipleSystemMessages = false`; in that mode we
+		// coalesce into a single message joined by `\n\n`.
+		if (compat.supportsMultipleSystemMessages) {
+			for (const systemPrompt of systemPrompts) {
+				params.push({ role, content: systemPrompt });
+			}
+		} else {
+			params.push({ role, content: systemPrompts.join("\n\n") });
 		}
 	}

package/src/types.ts CHANGED Viewed

@@ -540,6 +540,19 @@ export interface OpenAICompat {
 	supportsStore?: boolean;
 	/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
 	supportsDeveloperRole?: boolean;
+	/**
+	 * Whether the provider's chat-completions endpoint accepts multiple
+	 * leading `system`/`developer` messages. When false, ordered system
+	 * prompts are coalesced into a single message joined by `\n\n` so
+	 * strict chat templates (e.g. Qwen-served via vLLM, MiniMax) accept
+	 * the request. Default: detected per provider/baseUrl. Canonical
+	 * OpenAI/Azure/OpenRouter/Cerebras/Together/Fireworks/Groq/DeepSeek/
+	 * Mistral/xAI/Z.ai/GitHub Copilot/Zenmux are treated as `true`;
+	 * unknown or strict-template hosts default to `false`. Setting this
+	 * to `true` preserves separate blocks, which is preferred for
+	 * KV-cache reuse when the trailing prompt changes between calls.
+	 */
+	supportsMultipleSystemMessages?: boolean;
 	/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
 	supportsReasoningEffort?: boolean;
 	/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */