@oh-my-pi/pi-ai 14.7.3 → 14.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.7.5] - 2026-05-07
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `OpenAICompat.supportsMultipleSystemMessages` so chat-completions hosts can opt out of separate leading system blocks. Auto-detected as `true` for OpenAI, Azure, OpenRouter, Cerebras, Together, Fireworks, Groq, DeepSeek, Mistral, xAI, Z.ai, GitHub Copilot, and Zenmux; `false` for MiniMax, Alibaba Dashscope, and Qwen Portal whose chat templates reject follow-up system messages. Unknown OpenAI-compatible hosts (custom vLLM/local) default to `false`; users can opt back in via `compat.supportsMultipleSystemMessages: true`.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- Fixed strict-template OpenAI-compatible hosts (e.g. Qwen 3.5+ via vLLM, MiniMax) rejecting follow-up `system`/`developer` messages by coalescing ordered system prompts into a single block joined by `\n\n` when `compat.supportsMultipleSystemMessages` is false. Canonical hosts continue to receive separate blocks so KV-cache reuse stays effective when only the trailing prompt changes ([#958](https://github.com/can1357/oh-my-pi/issues/958)).
|
|
14
|
+
|
|
5
15
|
## [14.7.2] - 2026-05-06
|
|
6
16
|
|
|
7
17
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.7.
|
|
4
|
+
"version": "14.7.5",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.7.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.7.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.7.5",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.7.5",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -99,6 +99,52 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
99
99
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
100
100
|
const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
|
|
101
101
|
|
|
102
|
+
// Hosts whose chat-completions endpoints are known to accept multiple
|
|
103
|
+
// leading `system`/`developer` messages (preferred for KV-cache reuse).
|
|
104
|
+
// Anything outside this allowlist defaults to coalescing because
|
|
105
|
+
// strict chat templates (Qwen 3.5+ via vLLM, MiniMax, etc.) reject
|
|
106
|
+
// follow-up system messages with a 400.
|
|
107
|
+
const isOpenAIHost = provider === "openai" || baseUrl.includes("api.openai.com");
|
|
108
|
+
const isAzureHost =
|
|
109
|
+
provider === "azure" ||
|
|
110
|
+
baseUrl.includes(".openai.azure.com") ||
|
|
111
|
+
baseUrl.includes("models.inference.ai.azure.com") ||
|
|
112
|
+
baseUrl.includes("azure.com/openai");
|
|
113
|
+
const isOpenRouter = provider === "openrouter" || baseUrl.includes("openrouter.ai");
|
|
114
|
+
const isTogether = provider === "together" || baseUrl.includes("api.together.xyz");
|
|
115
|
+
const isFireworks = baseUrl.includes("fireworks.ai");
|
|
116
|
+
const isGroqHost = provider === "groq" || baseUrl.includes("api.groq.com");
|
|
117
|
+
const isCopilotHost = provider === "github-copilot";
|
|
118
|
+
const isZenmuxHost = provider === "zenmux";
|
|
119
|
+
// Endpoints that MUST receive a single system block. MiniMax's OpenAI
|
|
120
|
+
// endpoint returns error 2013 on multiple system messages; Alibaba's
|
|
121
|
+
// Dashscope and Qwen Portal serve Qwen models whose chat template
|
|
122
|
+
// raises "System message must be at the beginning" if any system
|
|
123
|
+
// message appears past index 0.
|
|
124
|
+
const isMiniMaxHost =
|
|
125
|
+
provider === "minimax-code" ||
|
|
126
|
+
provider === "minimax-code-cn" ||
|
|
127
|
+
baseUrl.includes("api.minimax.io") ||
|
|
128
|
+
baseUrl.includes("api.minimaxi.com");
|
|
129
|
+
const isQwenPortal = provider === "qwen-portal" || baseUrl.includes("portal.qwen.ai");
|
|
130
|
+
const supportsMultipleSystemMessagesDefault =
|
|
131
|
+
!isMiniMaxHost &&
|
|
132
|
+
!isAlibaba &&
|
|
133
|
+
!isQwenPortal &&
|
|
134
|
+
(isOpenAIHost ||
|
|
135
|
+
isAzureHost ||
|
|
136
|
+
isOpenRouter ||
|
|
137
|
+
isCerebras ||
|
|
138
|
+
isTogether ||
|
|
139
|
+
isFireworks ||
|
|
140
|
+
isGroqHost ||
|
|
141
|
+
isDeepseekFamily ||
|
|
142
|
+
isMistral ||
|
|
143
|
+
isGrok ||
|
|
144
|
+
isZai ||
|
|
145
|
+
isCopilotHost ||
|
|
146
|
+
isZenmuxHost);
|
|
147
|
+
|
|
102
148
|
const reasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
|
|
103
149
|
provider === "groq" && model.id === "qwen/qwen3-32b"
|
|
104
150
|
? ({
|
|
@@ -115,6 +161,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
115
161
|
return {
|
|
116
162
|
supportsStore: !isNonStandard,
|
|
117
163
|
supportsDeveloperRole: !isNonStandard,
|
|
164
|
+
supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
|
|
118
165
|
supportsReasoningEffort: !isGrok && !isZai,
|
|
119
166
|
reasoningEffortMap,
|
|
120
167
|
supportsUsageInStreaming: !isCerebras,
|
|
@@ -175,6 +222,8 @@ export function resolveOpenAICompat(
|
|
|
175
222
|
return {
|
|
176
223
|
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
|
177
224
|
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
|
225
|
+
supportsMultipleSystemMessages:
|
|
226
|
+
model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
|
|
178
227
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
179
228
|
reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
|
|
180
229
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
@@ -1191,8 +1191,18 @@ export function convertMessages(
|
|
|
1191
1191
|
if (systemPrompts.length > 0) {
|
|
1192
1192
|
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
|
1193
1193
|
const role = useDeveloperRole ? "developer" : "system";
|
|
1194
|
-
|
|
1195
|
-
|
|
1194
|
+
// Default to one block per ordered system prompt so the leading prefix
|
|
1195
|
+
// stays byte-identical between turns and the provider's KV cache can
|
|
1196
|
+
// reuse it. Hosts whose chat templates reject follow-up system messages
|
|
1197
|
+
// (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
|
|
1198
|
+
// via `compat.supportsMultipleSystemMessages = false`; in that mode we
|
|
1199
|
+
// coalesce into a single message joined by `\n\n`.
|
|
1200
|
+
if (compat.supportsMultipleSystemMessages) {
|
|
1201
|
+
for (const systemPrompt of systemPrompts) {
|
|
1202
|
+
params.push({ role, content: systemPrompt });
|
|
1203
|
+
}
|
|
1204
|
+
} else {
|
|
1205
|
+
params.push({ role, content: systemPrompts.join("\n\n") });
|
|
1196
1206
|
}
|
|
1197
1207
|
}
|
|
1198
1208
|
|
package/src/types.ts
CHANGED
|
@@ -540,6 +540,19 @@ export interface OpenAICompat {
|
|
|
540
540
|
supportsStore?: boolean;
|
|
541
541
|
/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
|
|
542
542
|
supportsDeveloperRole?: boolean;
|
|
543
|
+
/**
|
|
544
|
+
* Whether the provider's chat-completions endpoint accepts multiple
|
|
545
|
+
* leading `system`/`developer` messages. When false, ordered system
|
|
546
|
+
* prompts are coalesced into a single message joined by `\n\n` so
|
|
547
|
+
* strict chat templates (e.g. Qwen-served via vLLM, MiniMax) accept
|
|
548
|
+
* the request. Default: detected per provider/baseUrl. Canonical
|
|
549
|
+
* OpenAI/Azure/OpenRouter/Cerebras/Together/Fireworks/Groq/DeepSeek/
|
|
550
|
+
* Mistral/xAI/Z.ai/GitHub Copilot/Zenmux are treated as `true`;
|
|
551
|
+
* unknown or strict-template hosts default to `false`. Setting this
|
|
552
|
+
* to `true` preserves separate blocks, which is preferred for
|
|
553
|
+
* KV-cache reuse when the trailing prompt changes between calls.
|
|
554
|
+
*/
|
|
555
|
+
supportsMultipleSystemMessages?: boolean;
|
|
543
556
|
/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
|
|
544
557
|
supportsReasoningEffort?: boolean;
|
|
545
558
|
/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */
|