npm - @oh-my-pi/pi-ai - Versions diffs - 14.6.6 → 14.7.0 - Mend

@oh-my-pi/pi-ai 14.6.6 → 14.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +19 -0
package/README.md +2 -2
package/package.json +3 -3
package/src/models.json +185 -16
package/src/providers/amazon-bedrock.ts +4 -3
package/src/providers/anthropic.ts +24 -17
package/src/providers/azure-openai-responses.ts +8 -7
package/src/providers/cursor.ts +29 -13
package/src/providers/google-gemini-cli.ts +4 -3
package/src/providers/google-vertex.ts +3 -1
package/src/providers/google.ts +3 -1
package/src/providers/ollama.ts +7 -2
package/src/providers/openai-codex/request-transformer.ts +1 -1
package/src/providers/openai-codex-responses.ts +26 -17
package/src/providers/openai-completions.ts +6 -2
package/src/providers/openai-responses.ts +16 -7
package/src/types.ts +1 -1
package/src/utils.ts +3 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,25 @@
 ## [Unreleased]
+## [14.7.0] - 2026-05-04
+### Breaking Changes
+- Changed `Context.systemPrompt` from a string to `string[]`, so callers must now pass an array of prompts instead of a single string
+- Changed behavior will throw at runtime for non-array system prompts because request builders now normalize system prompts as an array
+### Added
+- Added support for multiple system prompts by changing `Context.systemPrompt` to an ordered string array and preserving provider-appropriate instruction precedence
+### Changed
+- Changed request builders for Anthropic, OpenAI, Bedrock, Azure, Cursor, Google, and Ollama to propagate every non-empty system prompt entry without demoting durable instructions into ordinary conversation turns
+### Fixed
+- Filtered out empty normalized system prompts so blank entries are no longer sent to providers
+- Removed blank system prompt strings from provider payloads to avoid unnecessary empty instruction messages
 ## [14.6.6] - 2026-05-04
 ### Added

package/README.md CHANGED Viewed

@@ -107,7 +107,7 @@ const tools: Tool[] = [
 // Build a conversation context (easily serializable and transferable between models)
 const context: Context = {
-	systemPrompt: "You are a helpful assistant.",
+	systemPrompt: ["You are a helpful assistant."],
 	messages: [{ role: "user", content: "What time is it?" }],
 	tools,
 };
@@ -873,7 +873,7 @@ import { Context, getModel, complete } from "@oh-my-pi/pi-ai";
 // Create and use a context
 const context: Context = {
-	systemPrompt: "You are a helpful assistant.",
+	systemPrompt: ["You are a helpful assistant."],
 	messages: [{ role: "user", content: "What is TypeScript?" }],
 };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.6.6",
+	"version": "14.7.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.36",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.50.1",
-		"@oh-my-pi/pi-natives": "14.6.6",
-		"@oh-my-pi/pi-utils": "14.6.6",
+		"@oh-my-pi/pi-natives": "14.7.0",
+		"@oh-my-pi/pi-utils": "14.7.0",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",

package/src/models.json CHANGED Viewed

@@ -12212,8 +12212,8 @@
 				"cacheRead": 0,
 				"cacheWrite": 0
 			},
-			"contextWindow": 262140,
-			"maxTokens": 262140,
+			"contextWindow": 262144,
+			"maxTokens": 262144,
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "minimal",
@@ -21575,6 +21575,25 @@
 			"contextWindow": 222222,
 			"maxTokens": 8888
 		},
+		"deepseek/deepseek-latest": {
+			"id": "deepseek/deepseek-latest",
+			"name": "deepseek/deepseek-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
+		},
 		"deepseek/deepseek-prover-v2-671b": {
 			"id": "deepseek/deepseek-prover-v2-671b",
 			"name": "deepseek/deepseek-prover-v2-671b",
@@ -25673,6 +25692,25 @@
 			"contextWindow": 222222,
 			"maxTokens": 8888
 		},
+		"minimax/minimax-latest": {
+			"id": "minimax/minimax-latest",
+			"name": "minimax/minimax-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
+		},
 		"minimax/minimax-m2-her": {
 			"id": "minimax/minimax-m2-her",
 			"name": "minimax/minimax-m2-her",
@@ -26487,6 +26525,25 @@
 				"maxLevel": "xhigh"
 			}
 		},
+		"moonshotai/kimi-latest": {
+			"id": "moonshotai/kimi-latest",
+			"name": "moonshotai/kimi-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
+		},
 		"NeverSleep/Llama-3-Lumimaid-70B-v0.1": {
 			"id": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
 			"name": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
@@ -27631,6 +27688,25 @@
 				"maxLevel": "xhigh"
 			}
 		},
+		"openai/gpt-chat-latest": {
+			"id": "openai/gpt-chat-latest",
+			"name": "openai/gpt-chat-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
+		},
 		"openai/gpt-latest": {
 			"id": "openai/gpt-latest",
 			"name": "openai/gpt-latest",
@@ -30360,7 +30436,7 @@
 			"api": "openai-completions",
 			"provider": "nanogpt",
 			"baseUrl": "https://nano-gpt.com/api/v1",
-			"reasoning": false,
+			"reasoning": true,
 			"input": [
 				"text"
 			],
@@ -30371,7 +30447,12 @@
 				"cacheWrite": 0
 			},
 			"contextWindow": 222222,
-			"maxTokens": 8888
+			"maxTokens": 8888,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "xhigh"
+			}
 		},
 		"TEE/glm-4.6": {
 			"id": "TEE/glm-4.6",
@@ -31720,6 +31801,25 @@
 				"maxLevel": "xhigh"
 			}
 		},
+		"x-ai/grok-latest": {
+			"id": "x-ai/grok-latest",
+			"name": "x-ai/grok-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
+		},
 		"xiaomi/mimo-v2-flash": {
 			"id": "xiaomi/mimo-v2-flash",
 			"name": "MiMo-V2-Flash",
@@ -32360,6 +32460,25 @@
 				"minLevel": "minimal",
 				"maxLevel": "xhigh"
 			}
+		},
+		"zai-org/glm-latest": {
+			"id": "zai-org/glm-latest",
+			"name": "zai-org/glm-latest",
+			"api": "openai-completions",
+			"provider": "nanogpt",
+			"baseUrl": "https://nano-gpt.com/api/v1",
+			"reasoning": false,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 222222,
+			"maxTokens": 8888
 		}
 	},
 	"nvidia": {
@@ -33365,6 +33484,31 @@
 				"maxLevel": "xhigh"
 			}
 		},
+		"moonshotai/kimi-k2.6": {
+			"id": "moonshotai/kimi-k2.6",
+			"name": "Kimi K2.6",
+			"api": "openai-completions",
+			"provider": "nvidia",
+			"baseUrl": "https://integrate.api.nvidia.com/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 262144,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "xhigh"
+			}
+		},
 		"nvidia/llama-3.1-nemotron-51b-instruct": {
 			"id": "nvidia/llama-3.1-nemotron-51b-instruct",
 			"name": "Llama 3.1 Nemotron 51b Instruct",
@@ -38354,8 +38498,8 @@
 				"text"
 			],
 			"cost": {
-				"input": 0.21,
-				"output": 0.7899999999999999,
+				"input": 0.27,
+				"output": 0.95,
 				"cacheRead": 0.13,
 				"cacheWrite": 0
 			},
@@ -38479,8 +38623,8 @@
 				"cacheRead": 0.003625,
 				"cacheWrite": 0
 			},
-			"contextWindow": 1048576,
-			"maxTokens": 384000,
+			"contextWindow": 131000,
+			"maxTokens": 131000,
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "minimal",
@@ -42272,13 +42416,13 @@
 				"text"
 			],
 			"cost": {
-				"input": 0.08,
-				"output": 0.28,
+				"input": 0.09,
+				"output": 0.44999999999999996,
 				"cacheRead": 0,
 				"cacheWrite": 0
 			},
 			"contextWindow": 40960,
-			"maxTokens": 16384,
+			"maxTokens": 20000,
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "minimal",
@@ -42884,13 +43028,13 @@
 				"image"
 			],
 			"cost": {
-				"input": 0.1625,
-				"output": 1.3,
-				"cacheRead": 0,
+				"input": 0.15,
+				"output": 1,
+				"cacheRead": 0.049999999999999996,
 				"cacheWrite": 0
 			},
 			"contextWindow": 262144,
-			"maxTokens": 65536,
+			"maxTokens": 262144,
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "minimal",
@@ -43047,6 +43191,31 @@
 				"maxLevel": "high"
 			}
 		},
+		"qwen/qwen3.6-35b-a3b": {
+			"id": "qwen/qwen3.6-35b-a3b",
+			"name": "Qwen: Qwen3.6 35B A3B",
+			"api": "openai-completions",
+			"provider": "openrouter",
+			"baseUrl": "https://openrouter.ai/api/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0.15,
+				"output": 1,
+				"cacheRead": 0.049999999999999996,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 262144,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		},
 		"qwen/qwen3.6-flash": {
 			"id": "qwen/qwen3.6-flash",
 			"name": "Qwen: Qwen3.6 Flash",
@@ -51988,7 +52157,7 @@
 		},
 		"glm-5v-turbo": {
 			"id": "glm-5v-turbo",
-			"name": "glm-5v-turbo",
+			"name": "GLM-5V-Turbo",
 			"api": "anthropic-messages",
 			"provider": "zai",
 			"baseUrl": "https://api.z.ai/api/anthropic",

package/src/providers/amazon-bedrock.ts CHANGED Viewed

@@ -464,13 +464,14 @@ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boo
 }
 function buildSystemPrompt(
-	systemPrompt: string | undefined,
+	systemPrompt: readonly string[] | undefined,
 	model: Model<"bedrock-converse-stream">,
 	cacheRetention: CacheRetention,
 ): SystemContentBlock[] | undefined {
-	if (!systemPrompt) return undefined;
+	const prompts = systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
+	if (prompts.length === 0) return undefined;
-	const blocks: SystemContentBlock[] = [{ text: systemPrompt.toWellFormed() }];
+	const blocks: SystemContentBlock[] = prompts.map(prompt => ({ text: prompt }));
 	// Add cache point for supported Claude models
 	if (cacheRetention !== "none" && supportsPromptCaching(model)) {

package/src/providers/anthropic.ts CHANGED Viewed

@@ -33,7 +33,13 @@ import type {
 	ToolResultMessage,
 	Usage,
 } from "../types";
-import { isAnthropicOAuthToken, isRecord, normalizeToolCallId, resolveCacheRetention } from "../utils";
+import {
+	isAnthropicOAuthToken,
+	isRecord,
+	normalizeSystemPrompts,
+	normalizeToolCallId,
+	resolveCacheRetention,
+} from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { isFoundryEnabled } from "../utils/foundry";
@@ -1417,18 +1423,18 @@ type SystemBlockOptions = {
 };
 export function buildAnthropicSystemBlocks(
-	systemPrompt: string | undefined,
+	systemPrompt: readonly string[] | undefined,
 	options: SystemBlockOptions = {},
 ): AnthropicSystemBlock[] | undefined {
 	const { includeClaudeCodeInstruction = false, extraInstructions = [], billingPayload, cacheControl } = options;
 	const blocks: AnthropicSystemBlock[] = [];
-	const sanitizedPrompt = systemPrompt ? systemPrompt.toWellFormed() : "";
+	const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
 	const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
-	const hasBillingHeader = sanitizedPrompt.includes(CLAUDE_BILLING_HEADER_PREFIX);
+	const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.includes(CLAUDE_BILLING_HEADER_PREFIX));
 	if (includeClaudeCodeInstruction && !hasBillingHeader) {
 		const payloadSeed = billingPayload ?? {
-			system: sanitizedPrompt,
+			system: sanitizedPrompts,
 			extraInstructions: trimmedInstructions,
 		};
 		blocks.push(
@@ -1441,19 +1447,19 @@ export function buildAnthropicSystemBlocks(
 	}
 	for (const instruction of trimmedInstructions) {
-		blocks.push({
-			type: "text",
-			text: instruction,
-			...(cacheControl ? { cache_control: cacheControl } : {}),
-		});
+		blocks.push({ type: "text", text: instruction });
 	}
-	if (systemPrompt) {
-		blocks.push({
-			type: "text",
-			text: sanitizedPrompt,
-			...(cacheControl ? { cache_control: cacheControl } : {}),
-		});
+	for (const systemPrompt of sanitizedPrompts) {
+		blocks.push({ type: "text", text: systemPrompt });
+	}
+	// Attach cache_control to the LAST emitted block only. Anthropic breakpoints are cumulative
+	// prefix cuts, so a single trailing breakpoint covers every preceding block; spreading
+	// cache_control across N blocks wastes slots against the 4-breakpoint cap.
+	const lastIndex = blocks.length - 1;
+	if (cacheControl && lastIndex >= 0) {
+		blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
 	}
 	return blocks.length > 0 ? blocks : undefined;
@@ -1921,10 +1927,11 @@ function buildParams(
 	}
 	const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
+	const billingSystemPrompts = normalizeSystemPrompts(context.systemPrompt);
 	const billingPayload = shouldInjectClaudeCodeInstruction
 		? {
 				...params,
-				...(context.systemPrompt ? { system: context.systemPrompt.toWellFormed() } : {}),
+				...(billingSystemPrompts.length > 0 ? { system: billingSystemPrompts } : {}),
 			}
 		: undefined;
 	const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
 	type Tool,
 	type ToolChoice,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
@@ -28,7 +29,7 @@ import {
 	iterateWithIdleTimeout,
 } from "../utils/idle-iterator";
 import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
-import { supportsDeveloperRole } from "./openai-responses";
+import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
 import {
 	appendResponsesToolResultMessages,
 	convertResponsesAssistantMessage,
@@ -273,7 +274,7 @@ function buildParams(
 		model: deploymentName,
 		input: messages,
 		stream: true,
-		prompt_cache_key: options?.sessionId,
+		prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
 	};
 	if (options?.maxTokens) {
@@ -350,12 +351,12 @@ function convertMessages(
 	const transformedMessages = transformMessages(context.messages, model, normalizeResponsesToolCallIdForTransform);
 	const knownCallIds = new Set<string>();
-	if (context.systemPrompt) {
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
+	if (systemPrompts.length > 0) {
 		const role = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
-		messages.push({
-			role,
-			content: context.systemPrompt.toWellFormed(),
-		});
+		for (const systemPrompt of systemPrompts) {
+			messages.push({ role, content: systemPrompt });
+		}
 	}
 	let msgIndex = 0;

package/src/providers/cursor.ts CHANGED Viewed

@@ -26,6 +26,7 @@ import type {
 	ToolCall,
 	ToolResultMessage,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { parseStreamingJson } from "../utils/json-parse";
 import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
@@ -2145,12 +2146,29 @@ function findLastUserMessageIndex(messages: Message[]): number {
  * only an empty placeholder where historical user turns should be.
  * The last user message is excluded because it is sent in the action.
  */
+/**
+ * Build one Cursor system-message JSON blob per ordered system prompt. Emitting separate blobs
+ * (rather than a single `\n\n`-joined string) lets Cursor's blob cache hit independently per
+ * entry: changing only the last prompt does not invalidate earlier blob ids, so the prefix
+ * up to the changed prompt remains cached on the server side.
+ *
+ * When no system prompts are provided, returns a single default greeting so we never emit
+ * an empty `rootPromptMessagesJson` head.
+ */
+export function buildCursorSystemPromptJsons(systemPrompt: readonly string[] | undefined): string[] {
+	const systemPrompts = normalizeSystemPrompts(systemPrompt);
+	if (systemPrompts.length === 0) {
+		return [JSON.stringify({ role: "system", content: "You are a helpful assistant." })];
+	}
+	return systemPrompts.map(content => JSON.stringify({ role: "system", content }));
+}
 function buildRootPromptMessagesJson(
 	messages: Message[],
-	systemPromptId: Uint8Array,
+	systemPromptIds: Uint8Array[],
 	blobStore: Map<string, Uint8Array>,
 ): Uint8Array[] {
-	const entries: Uint8Array[] = [systemPromptId];
+	const entries: Uint8Array[] = [...systemPromptIds];
 	const lastUserIdx = findLastUserMessageIndex(messages);
 	const pushJson = (obj: unknown) => {
@@ -2299,12 +2317,9 @@ function buildGrpcRequest(
 } {
 	const blobStore = state.blobStore;
-	const systemPromptJson = JSON.stringify({
-		role: "system",
-		content: context.systemPrompt || "You are a helpful assistant.",
-	});
-	const systemPromptBytes = new TextEncoder().encode(systemPromptJson);
-	const systemPromptId = storeCursorBlob(blobStore, systemPromptBytes);
+	const systemPromptIds = buildCursorSystemPromptJsons(context.systemPrompt).map(json =>
+		storeCursorBlob(blobStore, new TextEncoder().encode(json)),
+	);
 	const lastMessage = context.messages[context.messages.length - 1];
 	const userText =
@@ -2339,18 +2354,19 @@ function buildGrpcRequest(
 	// field (not `turns[]`) to construct the actual model prompt; if we only send the
 	// system prompt here, multi-turn conversations lose prior context and the model
 	// sees only the current user message.
-	const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptId, blobStore);
+	const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptIds, blobStore);
 	// Preserve cached non-history state fields (todos, file states, summaries, etc.)
 	// when the system prompt is unchanged; otherwise start fresh.
-	const hasMatchingPrompt = state.conversationState?.rootPromptMessagesJson?.some(entry =>
-		Buffer.from(entry).equals(systemPromptId),
-	);
+	const cachedPromptHead = state.conversationState?.rootPromptMessagesJson?.slice(0, systemPromptIds.length) ?? [];
+	const hasMatchingPrompt =
+		cachedPromptHead.length === systemPromptIds.length &&
+		systemPromptIds.every((id, idx) => Buffer.from(cachedPromptHead[idx]).equals(id));
 	const baseState =
 		state.conversationState && hasMatchingPrompt
 			? state.conversationState
 			: create(ConversationStateStructureSchema, {
-					rootPromptMessagesJson: [systemPromptId],
+					rootPromptMessagesJson: systemPromptIds,
 					turns: [],
 					todos: [],
 					pendingToolCalls: [],

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import type {
 	ThinkingContent,
 	ToolCall,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
 import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
@@ -865,8 +866,8 @@ export function buildRequest(
 	options: GoogleGeminiCliOptions = {},
 	isAntigravity = false,
 ): CloudCodeAssistRequest {
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
 	const contents = convertMessages(model, context);
 	const generationConfig: CloudCodeAssistRequest["request"]["generationConfig"] = {};
 	if (options.temperature !== undefined) {
 		generationConfig.temperature = options.temperature;
@@ -913,9 +914,9 @@ export function buildRequest(
 	}
 	// System instruction must be object with parts, not plain string
-	if (context.systemPrompt) {
+	if (systemPrompts.length > 0) {
 		request.systemInstruction = {
-			parts: [{ text: context.systemPrompt.toWellFormed() }],
+			parts: systemPrompts.map(text => ({ text })),
 		};
 	}

package/src/providers/google-vertex.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import type {
 	ThinkingContent,
 	ToolCall,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
 import type { GoogleThinkingLevel } from "./google-gemini-cli";
@@ -369,6 +370,7 @@ function buildParams(
 	context: Context,
 	options: GoogleVertexOptions = {},
 ): GenerateContentParameters {
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
 	const contents = convertMessages(model, context);
 	const generationConfig: GoogleVertexSamplingConfig = {};
@@ -396,7 +398,7 @@ function buildParams(
 	const config: GenerateContentConfig = {
 		...(Object.keys(generationConfig).length > 0 && generationConfig),
-		...(context.systemPrompt && { systemInstruction: context.systemPrompt.toWellFormed() }),
+		...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
 		...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
 	};

package/src/providers/google.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import type {
 	ThinkingContent,
 	ToolCall,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
 import type { GoogleThinkingLevel } from "./google-gemini-cli";
@@ -313,6 +314,7 @@ function buildParams(
 	context: Context,
 	options: GoogleOptions = {},
 ): GenerateContentParameters {
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
 	const contents = convertMessages(model, context);
 	const generationConfig: GoogleSamplingConfig = {};
@@ -340,7 +342,7 @@ function buildParams(
 	const config: GenerateContentConfig = {
 		...(Object.keys(generationConfig).length > 0 && generationConfig),
-		...(context.systemPrompt && { systemInstruction: context.systemPrompt.toWellFormed() }),
+		...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
 		...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
 	};

package/src/providers/ollama.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import type {
 	ToolResultMessage,
 	UserMessage,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
 import { parseStreamingJson } from "../utils/json-parse";
@@ -186,10 +187,14 @@ function convertMessage(message: Message): OllamaMessage {
 function convertMessages(model: Model<"ollama-chat">, context: Context): OllamaMessage[] {
 	const messages: Message[] = [];
-	if (context.systemPrompt) {
+	// Emit one developer message per ordered system prompt. The wire role is mapped to "system"
+	// by `convertMessage`, but keeping the prompts separate preserves prefix-cache stability:
+	// if only the trailing prompt changes between calls, the leading system messages keep
+	// their identical token prefix so KV-cache reuse covers them.
+	for (const systemPrompt of normalizeSystemPrompts(context.systemPrompt)) {
 		messages.push({
 			role: "developer",
-			content: context.systemPrompt,
+			content: systemPrompt,
 			timestamp: Date.now(),
 		});
 	}

package/src/providers/openai-codex/request-transformer.ts CHANGED Viewed

@@ -77,7 +77,7 @@ export async function transformRequestBody(
 	body: RequestBody,
 	model: Model<Api>,
 	options: CodexRequestOptions = {},
-	prompt?: { instructions: string; developerMessages: string[] },
+	prompt?: { developerMessages: string[] },
 ): Promise<RequestBody> {
 	body.store = false;
 	body.stream = true;

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -36,6 +36,7 @@ import {
 	getOpenAIResponsesHistoryItems,
 	getOpenAIResponsesHistoryPayload,
 	normalizeResponsesToolCallId,
+	normalizeSystemPrompts,
 } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
@@ -51,6 +52,7 @@ import {
 	transformRequestBody,
 } from "./openai-codex/request-transformer";
 import { parseCodexError } from "./openai-codex/response-handler";
+import { normalizeOpenAIResponsesPromptCacheKey } from "./openai-responses";
 import {
 	encodeResponsesToolCallId,
 	encodeTextSignatureV1,
@@ -476,6 +478,7 @@ async function buildCodexRequestContext(
 	const accountId = getAccountId(apiKey);
 	const baseUrl = model.baseUrl || CODEX_BASE_URL;
 	const url = resolveCodexResponsesUrl(baseUrl);
+	const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
 	const transformedBody = await buildTransformedCodexRequestBody(model, context, options);
 	options?.onPayload?.(transformedBody);
@@ -490,8 +493,8 @@ async function buildCodexRequestContext(
 	};
 	const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
-	const sessionKey = getCodexWebSocketSessionKey(options?.sessionId, model, accountId, baseUrl);
-	const publicSessionKey = getCodexPublicSessionKey(options?.sessionId, model, baseUrl);
+	const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
+	const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
 	if (sessionKey && publicSessionKey) {
 		providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
 	}
@@ -520,7 +523,7 @@ async function buildTransformedCodexRequestBody(
 		model: model.id,
 		input: [...convertMessages(model, context)],
 		stream: true,
-		prompt_cache_key: options?.sessionId,
+		prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
 	};
 	if (options?.maxTokens) {
@@ -567,8 +570,11 @@ async function buildTransformedCodexRequestBody(
 		}
 	}
-	params.instructions = context.systemPrompt;
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
+	if (systemPrompts.length > 0) {
+		params.instructions = systemPrompts[0];
+	}
+	const developerMessages = systemPrompts.slice(1);
 	const codexOptions: CodexRequestOptions = {
 		reasoningEffort: options?.reasoning,
 		reasoningSummary: options?.reasoningSummary ?? "auto",
@@ -576,7 +582,7 @@ async function buildTransformedCodexRequestBody(
 		include: options?.include,
 	};
-	return transformRequestBody(params, model, codexOptions);
+	return transformRequestBody(params, model, codexOptions, { developerMessages });
 }
 async function openInitialCodexEventStream(
@@ -628,7 +634,7 @@ async function openInitialCodexEventStream(
 async function openCodexWebSocketTransport(
 	requestContext: CodexRequestContext,
 	requestSetup: CodexRequestSetup,
-	options: OpenAICodexResponsesOptions | undefined,
+	_options: OpenAICodexResponsesOptions | undefined,
 	websocketState: CodexWebSocketSessionState,
 	retry: number,
 ): Promise<{
@@ -641,7 +647,7 @@ async function openCodexWebSocketTransport(
 		requestContext.requestHeaders,
 		requestContext.accountId,
 		requestContext.apiKey,
-		options?.sessionId,
+		requestContext.transformedBody.prompt_cache_key,
 		"websocket",
 		websocketState,
 	);
@@ -670,7 +676,7 @@ async function openCodexWebSocketTransport(
 async function openCodexSseTransport(
 	requestContext: CodexRequestContext,
 	requestSetup: CodexRequestSetup,
-	options: OpenAICodexResponsesOptions | undefined,
+	_options: OpenAICodexResponsesOptions | undefined,
 	state: CodexWebSocketSessionState | undefined,
 	body = requestContext.transformedBody,
 ): Promise<{
@@ -684,7 +690,7 @@ async function openCodexSseTransport(
 			requestContext.requestHeaders,
 			requestContext.accountId,
 			requestContext.apiKey,
-			options?.sessionId,
+			body.prompt_cache_key,
 			body,
 			state,
 			requestSetup.requestSignal,
@@ -1559,9 +1565,10 @@ export async function prewarmOpenAICodexResponses(
 	const accountId = getAccountId(apiKey);
 	const baseUrl = model.baseUrl || CODEX_BASE_URL;
 	const url = resolveCodexResponsesUrl(baseUrl);
+	const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
 	const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
-	const sessionKey = getCodexWebSocketSessionKey(options?.sessionId, model, accountId, baseUrl);
-	const publicSessionKey = getCodexPublicSessionKey(options?.sessionId, model, baseUrl);
+	const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
+	const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
 	if (publicSessionKey && sessionKey) {
 		providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
 	}
@@ -1574,7 +1581,7 @@ export async function prewarmOpenAICodexResponses(
 		{ ...(model.headers ?? {}), ...(options?.headers ?? {}) },
 		accountId,
 		apiKey,
-		options?.sessionId,
+		promptCacheKey,
 		"websocket",
 		state,
 	);
@@ -1595,8 +1602,9 @@ function getCodexWebSocketSessionKey(
 	accountId: string,
 	baseUrl: string,
 ): string | undefined {
-	if (!sessionId || sessionId.length === 0) return undefined;
-	return `${accountId}:${baseUrl}:${model.id}:${sessionId}`;
+	const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
+	if (!promptCacheKey) return undefined;
+	return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}`;
 }
 function getCodexPublicSessionKey(
@@ -1604,8 +1612,9 @@ function getCodexPublicSessionKey(
 	model: Model<"openai-codex-responses">,
 	baseUrl: string,
 ): string | undefined {
-	if (!sessionId || sessionId.length === 0) return undefined;
-	return `${baseUrl}:${model.id}:${sessionId}`;
+	const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
+	if (!promptCacheKey) return undefined;
+	return `${baseUrl}:${model.id}:${promptCacheKey}`;
 }
 function getCodexWebSocketSessionState(

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -33,6 +33,7 @@ import {
 	type ToolChoice,
 	type ToolResultMessage,
 } from "../types";
+import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { toFireworksWireModelId } from "../utils/fireworks-model-id";
@@ -1178,10 +1179,13 @@ export function convertMessages(
 		return generateFallbackToolCallId(seed);
 	};
-	if (context.systemPrompt) {
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
+	if (systemPrompts.length > 0) {
 		const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
 		const role = useDeveloperRole ? "developer" : "system";
-		params.push({ role: role, content: context.systemPrompt.toWellFormed() });
+		for (const systemPrompt of systemPrompts) {
+			params.push({ role, content: systemPrompt });
+		}
 	}
 	let lastRole: string | null = null;

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -25,6 +25,7 @@ import {
 	createOpenAIResponsesHistoryPayload,
 	getOpenAIResponsesHistoryItems,
 	getOpenAIResponsesHistoryPayload,
+	normalizeSystemPrompts,
 	resolveCacheRetention,
 	sanitizeOpenAIResponsesHistoryItemsForReplay,
 } from "../utils";
@@ -73,6 +74,13 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
 	return undefined;
 }
+export function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined {
+	if (!sessionId || sessionId.length === 0) return undefined;
+	const wellFormed = sessionId.toWellFormed();
+	if (wellFormed.length <= 64) return wellFormed;
+	return `pc_${Bun.hash(wellFormed).toString(36)}`;
+}
 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -331,7 +339,9 @@ function createClient(
 function getOpenAIResponsesCacheSessionId(
 	options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId"> | undefined,
 ): string | undefined {
-	return resolveCacheRetention(options?.cacheRetention) === "none" ? undefined : options?.sessionId;
+	return resolveCacheRetention(options?.cacheRetention) === "none"
+		? undefined
+		: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
 }
 function buildParams(
@@ -352,12 +362,11 @@ function buildParams(
 	);
 	const messages: ResponseInput = [...conversationMessages];
-	if (context.systemPrompt) {
-		const role = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
-		messages.unshift({
-			role,
-			content: context.systemPrompt.toWellFormed(),
-		});
+	const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
+	if (systemPrompts.length > 0) {
+		const role: "developer" | "system" =
+			model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
+		messages.unshift(...systemPrompts.map(systemPrompt => ({ role, content: systemPrompt })));
 	}
 	const cacheRetention = resolveCacheRetention(options?.cacheRetention);

package/src/types.ts CHANGED Viewed

@@ -502,7 +502,7 @@ export interface Tool<TParameters extends TSchema = TSchema> {
 }
 export interface Context {
-	systemPrompt?: string;
+	systemPrompt?: string[];
 	messages: Message[];
 	tools?: Tool[];
 }

package/src/utils.ts CHANGED Viewed

@@ -5,6 +5,9 @@ import type { CacheRetention, OpenAIResponsesHistoryPayload, ProviderPayload } f
 type OpenAIResponsesReplayItem = ResponseInput[number];
 export { isRecord } from "@oh-my-pi/pi-utils";
+export function normalizeSystemPrompts(systemPrompt: readonly string[] | undefined): string[] {
+	return systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
+}
 export function toNumber(value: unknown): number | undefined {
 	if (typeof value === "number" && Number.isFinite(value)) return value;