@oh-my-pi/pi-ai 14.7.4 → 14.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/package.json +3 -3
- package/src/providers/azure-openai-responses.ts +6 -3
- package/src/providers/openai-codex/request-transformer.ts +6 -3
- package/src/providers/openai-completions-compat.ts +49 -0
- package/src/providers/openai-completions.ts +12 -2
- package/src/providers/openai-responses.ts +6 -3
- package/src/stream.ts +9 -0
- package/src/types.ts +21 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.7.6] - 2026-05-07
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `hideThinkingSummary` option to `SimpleStreamOptions`. When true, `streamSimple` requests that the underlying provider omit reasoning/thinking summaries: Anthropic receives `thinking.display = "omitted"` (where supported), and OpenAI Responses / Azure / Codex providers leave `reasoning.summary` unset so the server skips emitting the human-readable summary stream entirely.
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Changed OpenAI Responses, Azure OpenAI Responses, and OpenAI Codex providers to omit `reasoning.summary` from requests when `reasoningSummary` is explicitly `null` (previously fell back to `"auto"`).
|
|
14
|
+
## [14.7.5] - 2026-05-07
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
|
|
18
|
+
- Added `OpenAICompat.supportsMultipleSystemMessages` so chat-completions hosts can opt out of separate leading system blocks. Auto-detected as `true` for OpenAI, Azure, OpenRouter, Cerebras, Together, Fireworks, Groq, DeepSeek, Mistral, xAI, Z.ai, GitHub Copilot, and Zenmux; `false` for MiniMax, Alibaba Dashscope, and Qwen Portal whose chat templates reject follow-up system messages. Unknown OpenAI-compatible hosts (custom vLLM/local) default to `false`; users can opt back in via `compat.supportsMultipleSystemMessages: true`.
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
|
|
22
|
+
- Fixed strict-template OpenAI-compatible hosts (e.g. Qwen 3.5+ via vLLM, MiniMax) rejecting follow-up `system`/`developer` messages by coalescing ordered system prompts into a single block joined by `\n\n` when `compat.supportsMultipleSystemMessages` is false. Canonical hosts continue to receive separate blocks so KV-cache reuse stays effective when only the trailing prompt changes ([#958](https://github.com/can1357/oh-my-pi/issues/958)).
|
|
23
|
+
|
|
5
24
|
## [14.7.2] - 2026-05-06
|
|
6
25
|
|
|
7
26
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.7.
|
|
4
|
+
"version": "14.7.6",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.7.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.7.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.7.6",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.7.6",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -317,11 +317,14 @@ function buildParams(
|
|
|
317
317
|
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
318
318
|
params.include = ["reasoning.encrypted_content"];
|
|
319
319
|
|
|
320
|
-
if (options?.reasoning || options?.reasoningSummary) {
|
|
321
|
-
params.reasoning = {
|
|
320
|
+
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
321
|
+
const reasoningParams: NonNullable<typeof params.reasoning> = {
|
|
322
322
|
effort: options?.reasoning || "medium",
|
|
323
|
-
summary: options?.reasoningSummary || "auto",
|
|
324
323
|
};
|
|
324
|
+
if (options?.reasoningSummary !== null) {
|
|
325
|
+
reasoningParams.summary = options?.reasoningSummary || "auto";
|
|
326
|
+
}
|
|
327
|
+
params.reasoning = reasoningParams;
|
|
325
328
|
} else {
|
|
326
329
|
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
|
327
330
|
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
@@ -4,7 +4,7 @@ import type { Api, Model } from "../../types";
|
|
|
4
4
|
|
|
5
5
|
export interface ReasoningConfig {
|
|
6
6
|
effort: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
7
|
-
summary
|
|
7
|
+
summary?: "auto" | "concise" | "detailed";
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
export interface CodexRequestOptions {
|
|
@@ -52,11 +52,14 @@ export interface RequestBody {
|
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
function getReasoningConfig(model: Model<Api>, options: CodexRequestOptions): ReasoningConfig {
|
|
55
|
-
|
|
55
|
+
const config: ReasoningConfig = {
|
|
56
56
|
effort:
|
|
57
57
|
options.reasoningEffort === "none" ? "none" : requireSupportedEffort(model, options.reasoningEffort as Effort),
|
|
58
|
-
summary: options.reasoningSummary ?? "detailed",
|
|
59
58
|
};
|
|
59
|
+
if (options.reasoningSummary !== null) {
|
|
60
|
+
config.summary = options.reasoningSummary ?? "detailed";
|
|
61
|
+
}
|
|
62
|
+
return config;
|
|
60
63
|
}
|
|
61
64
|
|
|
62
65
|
function filterInput(input: InputItem[] | undefined): InputItem[] | undefined {
|
|
@@ -99,6 +99,52 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
99
99
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
100
100
|
const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
|
|
101
101
|
|
|
102
|
+
// Hosts whose chat-completions endpoints are known to accept multiple
|
|
103
|
+
// leading `system`/`developer` messages (preferred for KV-cache reuse).
|
|
104
|
+
// Anything outside this allowlist defaults to coalescing because
|
|
105
|
+
// strict chat templates (Qwen 3.5+ via vLLM, MiniMax, etc.) reject
|
|
106
|
+
// follow-up system messages with a 400.
|
|
107
|
+
const isOpenAIHost = provider === "openai" || baseUrl.includes("api.openai.com");
|
|
108
|
+
const isAzureHost =
|
|
109
|
+
provider === "azure" ||
|
|
110
|
+
baseUrl.includes(".openai.azure.com") ||
|
|
111
|
+
baseUrl.includes("models.inference.ai.azure.com") ||
|
|
112
|
+
baseUrl.includes("azure.com/openai");
|
|
113
|
+
const isOpenRouter = provider === "openrouter" || baseUrl.includes("openrouter.ai");
|
|
114
|
+
const isTogether = provider === "together" || baseUrl.includes("api.together.xyz");
|
|
115
|
+
const isFireworks = baseUrl.includes("fireworks.ai");
|
|
116
|
+
const isGroqHost = provider === "groq" || baseUrl.includes("api.groq.com");
|
|
117
|
+
const isCopilotHost = provider === "github-copilot";
|
|
118
|
+
const isZenmuxHost = provider === "zenmux";
|
|
119
|
+
// Endpoints that MUST receive a single system block. MiniMax's OpenAI
|
|
120
|
+
// endpoint returns error 2013 on multiple system messages; Alibaba's
|
|
121
|
+
// Dashscope and Qwen Portal serve Qwen models whose chat template
|
|
122
|
+
// raises "System message must be at the beginning" if any system
|
|
123
|
+
// message appears past index 0.
|
|
124
|
+
const isMiniMaxHost =
|
|
125
|
+
provider === "minimax-code" ||
|
|
126
|
+
provider === "minimax-code-cn" ||
|
|
127
|
+
baseUrl.includes("api.minimax.io") ||
|
|
128
|
+
baseUrl.includes("api.minimaxi.com");
|
|
129
|
+
const isQwenPortal = provider === "qwen-portal" || baseUrl.includes("portal.qwen.ai");
|
|
130
|
+
const supportsMultipleSystemMessagesDefault =
|
|
131
|
+
!isMiniMaxHost &&
|
|
132
|
+
!isAlibaba &&
|
|
133
|
+
!isQwenPortal &&
|
|
134
|
+
(isOpenAIHost ||
|
|
135
|
+
isAzureHost ||
|
|
136
|
+
isOpenRouter ||
|
|
137
|
+
isCerebras ||
|
|
138
|
+
isTogether ||
|
|
139
|
+
isFireworks ||
|
|
140
|
+
isGroqHost ||
|
|
141
|
+
isDeepseekFamily ||
|
|
142
|
+
isMistral ||
|
|
143
|
+
isGrok ||
|
|
144
|
+
isZai ||
|
|
145
|
+
isCopilotHost ||
|
|
146
|
+
isZenmuxHost);
|
|
147
|
+
|
|
102
148
|
const reasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
|
|
103
149
|
provider === "groq" && model.id === "qwen/qwen3-32b"
|
|
104
150
|
? ({
|
|
@@ -115,6 +161,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
115
161
|
return {
|
|
116
162
|
supportsStore: !isNonStandard,
|
|
117
163
|
supportsDeveloperRole: !isNonStandard,
|
|
164
|
+
supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
|
|
118
165
|
supportsReasoningEffort: !isGrok && !isZai,
|
|
119
166
|
reasoningEffortMap,
|
|
120
167
|
supportsUsageInStreaming: !isCerebras,
|
|
@@ -175,6 +222,8 @@ export function resolveOpenAICompat(
|
|
|
175
222
|
return {
|
|
176
223
|
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
|
177
224
|
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
|
225
|
+
supportsMultipleSystemMessages:
|
|
226
|
+
model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
|
|
178
227
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
179
228
|
reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
|
|
180
229
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
@@ -1191,8 +1191,18 @@ export function convertMessages(
|
|
|
1191
1191
|
if (systemPrompts.length > 0) {
|
|
1192
1192
|
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
|
1193
1193
|
const role = useDeveloperRole ? "developer" : "system";
|
|
1194
|
-
|
|
1195
|
-
|
|
1194
|
+
// Default to one block per ordered system prompt so the leading prefix
|
|
1195
|
+
// stays byte-identical between turns and the provider's KV cache can
|
|
1196
|
+
// reuse it. Hosts whose chat templates reject follow-up system messages
|
|
1197
|
+
// (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
|
|
1198
|
+
// via `compat.supportsMultipleSystemMessages = false`; in that mode we
|
|
1199
|
+
// coalesce into a single message joined by `\n\n`.
|
|
1200
|
+
if (compat.supportsMultipleSystemMessages) {
|
|
1201
|
+
for (const systemPrompt of systemPrompts) {
|
|
1202
|
+
params.push({ role, content: systemPrompt });
|
|
1203
|
+
}
|
|
1204
|
+
} else {
|
|
1205
|
+
params.push({ role, content: systemPrompts.join("\n\n") });
|
|
1196
1206
|
}
|
|
1197
1207
|
}
|
|
1198
1208
|
|
|
@@ -430,13 +430,16 @@ function buildParams(
|
|
|
430
430
|
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
431
431
|
params.include = ["reasoning.encrypted_content"];
|
|
432
432
|
|
|
433
|
-
if (options?.reasoning || options?.reasoningSummary) {
|
|
434
|
-
params.reasoning = {
|
|
433
|
+
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
434
|
+
const reasoningParams: NonNullable<typeof params.reasoning> = {
|
|
435
435
|
effort: mapReasoningEffort(options?.reasoning || "medium", model.compat?.reasoningEffortMap) as NonNullable<
|
|
436
436
|
OpenAIResponsesSamplingParams["reasoning"]
|
|
437
437
|
>["effort"],
|
|
438
|
-
summary: options?.reasoningSummary || "auto",
|
|
439
438
|
};
|
|
439
|
+
if (options?.reasoningSummary !== null) {
|
|
440
|
+
reasoningParams.summary = options?.reasoningSummary || "auto";
|
|
441
|
+
}
|
|
442
|
+
params.reasoning = reasoningParams;
|
|
440
443
|
} else if (model.name.startsWith("gpt-5")) {
|
|
441
444
|
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
442
445
|
messages.push({
|
package/src/stream.ts
CHANGED
|
@@ -462,6 +462,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
462
462
|
...base,
|
|
463
463
|
thinkingEnabled: false,
|
|
464
464
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
465
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
465
466
|
});
|
|
466
467
|
}
|
|
467
468
|
|
|
@@ -471,6 +472,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
471
472
|
...base,
|
|
472
473
|
thinkingEnabled: false,
|
|
473
474
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
475
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
474
476
|
});
|
|
475
477
|
}
|
|
476
478
|
|
|
@@ -483,6 +485,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
483
485
|
thinkingEnabled: true,
|
|
484
486
|
effort,
|
|
485
487
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
488
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
486
489
|
});
|
|
487
490
|
}
|
|
488
491
|
|
|
@@ -492,6 +495,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
492
495
|
thinkingEnabled: true,
|
|
493
496
|
thinkingBudgetTokens: thinkingBudget,
|
|
494
497
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
498
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
495
499
|
});
|
|
496
500
|
}
|
|
497
501
|
|
|
@@ -509,6 +513,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
509
513
|
...base,
|
|
510
514
|
thinkingEnabled: false,
|
|
511
515
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
516
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
512
517
|
});
|
|
513
518
|
} else {
|
|
514
519
|
return castApi<"anthropic-messages">({
|
|
@@ -517,6 +522,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
517
522
|
thinkingEnabled: true,
|
|
518
523
|
thinkingBudgetTokens: thinkingBudget,
|
|
519
524
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
525
|
+
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
520
526
|
});
|
|
521
527
|
}
|
|
522
528
|
}
|
|
@@ -564,6 +570,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
564
570
|
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
565
571
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
566
572
|
serviceTier: options?.serviceTier,
|
|
573
|
+
reasoningSummary: options?.hideThinkingSummary ? null : undefined,
|
|
567
574
|
});
|
|
568
575
|
|
|
569
576
|
case "azure-openai-responses":
|
|
@@ -572,6 +579,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
572
579
|
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
573
580
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
574
581
|
serviceTier: options?.serviceTier,
|
|
582
|
+
reasoningSummary: options?.hideThinkingSummary ? null : undefined,
|
|
575
583
|
});
|
|
576
584
|
|
|
577
585
|
case "openai-codex-responses":
|
|
@@ -581,6 +589,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
581
589
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
582
590
|
serviceTier: options?.serviceTier,
|
|
583
591
|
preferWebsockets: options?.preferWebsockets,
|
|
592
|
+
reasoningSummary: options?.hideThinkingSummary ? null : undefined,
|
|
584
593
|
});
|
|
585
594
|
|
|
586
595
|
case "google-generative-ai": {
|
package/src/types.ts
CHANGED
|
@@ -255,6 +255,14 @@ export interface SimpleStreamOptions extends StreamOptions {
|
|
|
255
255
|
* this way when `reasoning` is undefined.
|
|
256
256
|
*/
|
|
257
257
|
disableReasoning?: boolean;
|
|
258
|
+
/**
|
|
259
|
+
* If true, request that the provider omit thinking/reasoning summaries
|
|
260
|
+
* from the response (e.g. Anthropic `thinking.display = "omitted"`,
|
|
261
|
+
* OpenAI Responses `reasoning.summary` left unset). The model still
|
|
262
|
+
* reasons internally; only the human-readable summary stream is dropped.
|
|
263
|
+
* Useful when the UI hides thinking blocks anyway and the summary is wasted bandwidth.
|
|
264
|
+
*/
|
|
265
|
+
hideThinkingSummary?: boolean;
|
|
258
266
|
/** Custom token budgets for thinking levels (token-based providers only) */
|
|
259
267
|
thinkingBudgets?: ThinkingBudgets;
|
|
260
268
|
/** Cursor exec handlers for local tool execution */
|
|
@@ -540,6 +548,19 @@ export interface OpenAICompat {
|
|
|
540
548
|
supportsStore?: boolean;
|
|
541
549
|
/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
|
|
542
550
|
supportsDeveloperRole?: boolean;
|
|
551
|
+
/**
|
|
552
|
+
* Whether the provider's chat-completions endpoint accepts multiple
|
|
553
|
+
* leading `system`/`developer` messages. When false, ordered system
|
|
554
|
+
* prompts are coalesced into a single message joined by `\n\n` so
|
|
555
|
+
* strict chat templates (e.g. Qwen-served via vLLM, MiniMax) accept
|
|
556
|
+
* the request. Default: detected per provider/baseUrl. Canonical
|
|
557
|
+
* OpenAI/Azure/OpenRouter/Cerebras/Together/Fireworks/Groq/DeepSeek/
|
|
558
|
+
* Mistral/xAI/Z.ai/GitHub Copilot/Zenmux are treated as `true`;
|
|
559
|
+
* unknown or strict-template hosts default to `false`. Setting this
|
|
560
|
+
* to `true` preserves separate blocks, which is preferred for
|
|
561
|
+
* KV-cache reuse when the trailing prompt changes between calls.
|
|
562
|
+
*/
|
|
563
|
+
supportsMultipleSystemMessages?: boolean;
|
|
543
564
|
/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
|
|
544
565
|
supportsReasoningEffort?: boolean;
|
|
545
566
|
/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */
|