@oh-my-pi/pi-ai 14.6.6 → 14.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,25 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.7.0] - 2026-05-04
6
+ ### Breaking Changes
7
+
8
+ - Changed `Context.systemPrompt` from a string to `string[]`, so callers must now pass an array of prompts instead of a single string
9
+ - Changed behavior will throw at runtime for non-array system prompts because request builders now normalize system prompts as an array
10
+
11
+ ### Added
12
+
13
+ - Added support for multiple system prompts by changing `Context.systemPrompt` to an ordered string array and preserving provider-appropriate instruction precedence
14
+
15
+ ### Changed
16
+
17
+ - Changed request builders for Anthropic, OpenAI, Bedrock, Azure, Cursor, Google, and Ollama to propagate every non-empty system prompt entry without demoting durable instructions into ordinary conversation turns
18
+
19
+ ### Fixed
20
+
21
+ - Filtered out empty normalized system prompts so blank entries are no longer sent to providers
22
+ - Removed blank system prompt strings from provider payloads to avoid unnecessary empty instruction messages
23
+
5
24
  ## [14.6.6] - 2026-05-04
6
25
 
7
26
  ### Added
package/README.md CHANGED
@@ -107,7 +107,7 @@ const tools: Tool[] = [
107
107
 
108
108
  // Build a conversation context (easily serializable and transferable between models)
109
109
  const context: Context = {
110
- systemPrompt: "You are a helpful assistant.",
110
+ systemPrompt: ["You are a helpful assistant."],
111
111
  messages: [{ role: "user", content: "What time is it?" }],
112
112
  tools,
113
113
  };
@@ -873,7 +873,7 @@ import { Context, getModel, complete } from "@oh-my-pi/pi-ai";
873
873
 
874
874
  // Create and use a context
875
875
  const context: Context = {
876
- systemPrompt: "You are a helpful assistant.",
876
+ systemPrompt: ["You are a helpful assistant."],
877
877
  messages: [{ role: "user", content: "What is TypeScript?" }],
878
878
  };
879
879
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.6.6",
4
+ "version": "14.7.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.36",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.50.1",
49
- "@oh-my-pi/pi-natives": "14.6.6",
50
- "@oh-my-pi/pi-utils": "14.6.6",
49
+ "@oh-my-pi/pi-natives": "14.7.0",
50
+ "@oh-my-pi/pi-utils": "14.7.0",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
package/src/models.json CHANGED
@@ -12212,8 +12212,8 @@
12212
12212
  "cacheRead": 0,
12213
12213
  "cacheWrite": 0
12214
12214
  },
12215
- "contextWindow": 262140,
12216
- "maxTokens": 262140,
12215
+ "contextWindow": 262144,
12216
+ "maxTokens": 262144,
12217
12217
  "thinking": {
12218
12218
  "mode": "effort",
12219
12219
  "minLevel": "minimal",
@@ -21575,6 +21575,25 @@
21575
21575
  "contextWindow": 222222,
21576
21576
  "maxTokens": 8888
21577
21577
  },
21578
+ "deepseek/deepseek-latest": {
21579
+ "id": "deepseek/deepseek-latest",
21580
+ "name": "deepseek/deepseek-latest",
21581
+ "api": "openai-completions",
21582
+ "provider": "nanogpt",
21583
+ "baseUrl": "https://nano-gpt.com/api/v1",
21584
+ "reasoning": false,
21585
+ "input": [
21586
+ "text"
21587
+ ],
21588
+ "cost": {
21589
+ "input": 0,
21590
+ "output": 0,
21591
+ "cacheRead": 0,
21592
+ "cacheWrite": 0
21593
+ },
21594
+ "contextWindow": 222222,
21595
+ "maxTokens": 8888
21596
+ },
21578
21597
  "deepseek/deepseek-prover-v2-671b": {
21579
21598
  "id": "deepseek/deepseek-prover-v2-671b",
21580
21599
  "name": "deepseek/deepseek-prover-v2-671b",
@@ -25673,6 +25692,25 @@
25673
25692
  "contextWindow": 222222,
25674
25693
  "maxTokens": 8888
25675
25694
  },
25695
+ "minimax/minimax-latest": {
25696
+ "id": "minimax/minimax-latest",
25697
+ "name": "minimax/minimax-latest",
25698
+ "api": "openai-completions",
25699
+ "provider": "nanogpt",
25700
+ "baseUrl": "https://nano-gpt.com/api/v1",
25701
+ "reasoning": false,
25702
+ "input": [
25703
+ "text"
25704
+ ],
25705
+ "cost": {
25706
+ "input": 0,
25707
+ "output": 0,
25708
+ "cacheRead": 0,
25709
+ "cacheWrite": 0
25710
+ },
25711
+ "contextWindow": 222222,
25712
+ "maxTokens": 8888
25713
+ },
25676
25714
  "minimax/minimax-m2-her": {
25677
25715
  "id": "minimax/minimax-m2-her",
25678
25716
  "name": "minimax/minimax-m2-her",
@@ -26487,6 +26525,25 @@
26487
26525
  "maxLevel": "xhigh"
26488
26526
  }
26489
26527
  },
26528
+ "moonshotai/kimi-latest": {
26529
+ "id": "moonshotai/kimi-latest",
26530
+ "name": "moonshotai/kimi-latest",
26531
+ "api": "openai-completions",
26532
+ "provider": "nanogpt",
26533
+ "baseUrl": "https://nano-gpt.com/api/v1",
26534
+ "reasoning": false,
26535
+ "input": [
26536
+ "text"
26537
+ ],
26538
+ "cost": {
26539
+ "input": 0,
26540
+ "output": 0,
26541
+ "cacheRead": 0,
26542
+ "cacheWrite": 0
26543
+ },
26544
+ "contextWindow": 222222,
26545
+ "maxTokens": 8888
26546
+ },
26490
26547
  "NeverSleep/Llama-3-Lumimaid-70B-v0.1": {
26491
26548
  "id": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
26492
26549
  "name": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
@@ -27631,6 +27688,25 @@
27631
27688
  "maxLevel": "xhigh"
27632
27689
  }
27633
27690
  },
27691
+ "openai/gpt-chat-latest": {
27692
+ "id": "openai/gpt-chat-latest",
27693
+ "name": "openai/gpt-chat-latest",
27694
+ "api": "openai-completions",
27695
+ "provider": "nanogpt",
27696
+ "baseUrl": "https://nano-gpt.com/api/v1",
27697
+ "reasoning": false,
27698
+ "input": [
27699
+ "text"
27700
+ ],
27701
+ "cost": {
27702
+ "input": 0,
27703
+ "output": 0,
27704
+ "cacheRead": 0,
27705
+ "cacheWrite": 0
27706
+ },
27707
+ "contextWindow": 222222,
27708
+ "maxTokens": 8888
27709
+ },
27634
27710
  "openai/gpt-latest": {
27635
27711
  "id": "openai/gpt-latest",
27636
27712
  "name": "openai/gpt-latest",
@@ -30360,7 +30436,7 @@
30360
30436
  "api": "openai-completions",
30361
30437
  "provider": "nanogpt",
30362
30438
  "baseUrl": "https://nano-gpt.com/api/v1",
30363
- "reasoning": false,
30439
+ "reasoning": true,
30364
30440
  "input": [
30365
30441
  "text"
30366
30442
  ],
@@ -30371,7 +30447,12 @@
30371
30447
  "cacheWrite": 0
30372
30448
  },
30373
30449
  "contextWindow": 222222,
30374
- "maxTokens": 8888
30450
+ "maxTokens": 8888,
30451
+ "thinking": {
30452
+ "mode": "effort",
30453
+ "minLevel": "minimal",
30454
+ "maxLevel": "xhigh"
30455
+ }
30375
30456
  },
30376
30457
  "TEE/glm-4.6": {
30377
30458
  "id": "TEE/glm-4.6",
@@ -31720,6 +31801,25 @@
31720
31801
  "maxLevel": "xhigh"
31721
31802
  }
31722
31803
  },
31804
+ "x-ai/grok-latest": {
31805
+ "id": "x-ai/grok-latest",
31806
+ "name": "x-ai/grok-latest",
31807
+ "api": "openai-completions",
31808
+ "provider": "nanogpt",
31809
+ "baseUrl": "https://nano-gpt.com/api/v1",
31810
+ "reasoning": false,
31811
+ "input": [
31812
+ "text"
31813
+ ],
31814
+ "cost": {
31815
+ "input": 0,
31816
+ "output": 0,
31817
+ "cacheRead": 0,
31818
+ "cacheWrite": 0
31819
+ },
31820
+ "contextWindow": 222222,
31821
+ "maxTokens": 8888
31822
+ },
31723
31823
  "xiaomi/mimo-v2-flash": {
31724
31824
  "id": "xiaomi/mimo-v2-flash",
31725
31825
  "name": "MiMo-V2-Flash",
@@ -32360,6 +32460,25 @@
32360
32460
  "minLevel": "minimal",
32361
32461
  "maxLevel": "xhigh"
32362
32462
  }
32463
+ },
32464
+ "zai-org/glm-latest": {
32465
+ "id": "zai-org/glm-latest",
32466
+ "name": "zai-org/glm-latest",
32467
+ "api": "openai-completions",
32468
+ "provider": "nanogpt",
32469
+ "baseUrl": "https://nano-gpt.com/api/v1",
32470
+ "reasoning": false,
32471
+ "input": [
32472
+ "text"
32473
+ ],
32474
+ "cost": {
32475
+ "input": 0,
32476
+ "output": 0,
32477
+ "cacheRead": 0,
32478
+ "cacheWrite": 0
32479
+ },
32480
+ "contextWindow": 222222,
32481
+ "maxTokens": 8888
32363
32482
  }
32364
32483
  },
32365
32484
  "nvidia": {
@@ -33365,6 +33484,31 @@
33365
33484
  "maxLevel": "xhigh"
33366
33485
  }
33367
33486
  },
33487
+ "moonshotai/kimi-k2.6": {
33488
+ "id": "moonshotai/kimi-k2.6",
33489
+ "name": "Kimi K2.6",
33490
+ "api": "openai-completions",
33491
+ "provider": "nvidia",
33492
+ "baseUrl": "https://integrate.api.nvidia.com/v1",
33493
+ "reasoning": true,
33494
+ "input": [
33495
+ "text",
33496
+ "image"
33497
+ ],
33498
+ "cost": {
33499
+ "input": 0,
33500
+ "output": 0,
33501
+ "cacheRead": 0,
33502
+ "cacheWrite": 0
33503
+ },
33504
+ "contextWindow": 262144,
33505
+ "maxTokens": 262144,
33506
+ "thinking": {
33507
+ "mode": "effort",
33508
+ "minLevel": "minimal",
33509
+ "maxLevel": "xhigh"
33510
+ }
33511
+ },
33368
33512
  "nvidia/llama-3.1-nemotron-51b-instruct": {
33369
33513
  "id": "nvidia/llama-3.1-nemotron-51b-instruct",
33370
33514
  "name": "Llama 3.1 Nemotron 51b Instruct",
@@ -38354,8 +38498,8 @@
38354
38498
  "text"
38355
38499
  ],
38356
38500
  "cost": {
38357
- "input": 0.21,
38358
- "output": 0.7899999999999999,
38501
+ "input": 0.27,
38502
+ "output": 0.95,
38359
38503
  "cacheRead": 0.13,
38360
38504
  "cacheWrite": 0
38361
38505
  },
@@ -38479,8 +38623,8 @@
38479
38623
  "cacheRead": 0.003625,
38480
38624
  "cacheWrite": 0
38481
38625
  },
38482
- "contextWindow": 1048576,
38483
- "maxTokens": 384000,
38626
+ "contextWindow": 131000,
38627
+ "maxTokens": 131000,
38484
38628
  "thinking": {
38485
38629
  "mode": "effort",
38486
38630
  "minLevel": "minimal",
@@ -42272,13 +42416,13 @@
42272
42416
  "text"
42273
42417
  ],
42274
42418
  "cost": {
42275
- "input": 0.08,
42276
- "output": 0.28,
42419
+ "input": 0.09,
42420
+ "output": 0.44999999999999996,
42277
42421
  "cacheRead": 0,
42278
42422
  "cacheWrite": 0
42279
42423
  },
42280
42424
  "contextWindow": 40960,
42281
- "maxTokens": 16384,
42425
+ "maxTokens": 20000,
42282
42426
  "thinking": {
42283
42427
  "mode": "effort",
42284
42428
  "minLevel": "minimal",
@@ -42884,13 +43028,13 @@
42884
43028
  "image"
42885
43029
  ],
42886
43030
  "cost": {
42887
- "input": 0.1625,
42888
- "output": 1.3,
42889
- "cacheRead": 0,
43031
+ "input": 0.15,
43032
+ "output": 1,
43033
+ "cacheRead": 0.049999999999999996,
42890
43034
  "cacheWrite": 0
42891
43035
  },
42892
43036
  "contextWindow": 262144,
42893
- "maxTokens": 65536,
43037
+ "maxTokens": 262144,
42894
43038
  "thinking": {
42895
43039
  "mode": "effort",
42896
43040
  "minLevel": "minimal",
@@ -43047,6 +43191,31 @@
43047
43191
  "maxLevel": "high"
43048
43192
  }
43049
43193
  },
43194
+ "qwen/qwen3.6-35b-a3b": {
43195
+ "id": "qwen/qwen3.6-35b-a3b",
43196
+ "name": "Qwen: Qwen3.6 35B A3B",
43197
+ "api": "openai-completions",
43198
+ "provider": "openrouter",
43199
+ "baseUrl": "https://openrouter.ai/api/v1",
43200
+ "reasoning": true,
43201
+ "input": [
43202
+ "text",
43203
+ "image"
43204
+ ],
43205
+ "cost": {
43206
+ "input": 0.15,
43207
+ "output": 1,
43208
+ "cacheRead": 0.049999999999999996,
43209
+ "cacheWrite": 0
43210
+ },
43211
+ "contextWindow": 262144,
43212
+ "maxTokens": 262144,
43213
+ "thinking": {
43214
+ "mode": "effort",
43215
+ "minLevel": "minimal",
43216
+ "maxLevel": "high"
43217
+ }
43218
+ },
43050
43219
  "qwen/qwen3.6-flash": {
43051
43220
  "id": "qwen/qwen3.6-flash",
43052
43221
  "name": "Qwen: Qwen3.6 Flash",
@@ -51988,7 +52157,7 @@
51988
52157
  },
51989
52158
  "glm-5v-turbo": {
51990
52159
  "id": "glm-5v-turbo",
51991
- "name": "glm-5v-turbo",
52160
+ "name": "GLM-5V-Turbo",
51992
52161
  "api": "anthropic-messages",
51993
52162
  "provider": "zai",
51994
52163
  "baseUrl": "https://api.z.ai/api/anthropic",
@@ -464,13 +464,14 @@ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boo
464
464
  }
465
465
 
466
466
  function buildSystemPrompt(
467
- systemPrompt: string | undefined,
467
+ systemPrompt: readonly string[] | undefined,
468
468
  model: Model<"bedrock-converse-stream">,
469
469
  cacheRetention: CacheRetention,
470
470
  ): SystemContentBlock[] | undefined {
471
- if (!systemPrompt) return undefined;
471
+ const prompts = systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
472
+ if (prompts.length === 0) return undefined;
472
473
 
473
- const blocks: SystemContentBlock[] = [{ text: systemPrompt.toWellFormed() }];
474
+ const blocks: SystemContentBlock[] = prompts.map(prompt => ({ text: prompt }));
474
475
 
475
476
  // Add cache point for supported Claude models
476
477
  if (cacheRetention !== "none" && supportsPromptCaching(model)) {
@@ -33,7 +33,13 @@ import type {
33
33
  ToolResultMessage,
34
34
  Usage,
35
35
  } from "../types";
36
- import { isAnthropicOAuthToken, isRecord, normalizeToolCallId, resolveCacheRetention } from "../utils";
36
+ import {
37
+ isAnthropicOAuthToken,
38
+ isRecord,
39
+ normalizeSystemPrompts,
40
+ normalizeToolCallId,
41
+ resolveCacheRetention,
42
+ } from "../utils";
37
43
  import { createAbortSourceTracker } from "../utils/abort";
38
44
  import { AssistantMessageEventStream } from "../utils/event-stream";
39
45
  import { isFoundryEnabled } from "../utils/foundry";
@@ -1417,18 +1423,18 @@ type SystemBlockOptions = {
1417
1423
  };
1418
1424
 
1419
1425
  export function buildAnthropicSystemBlocks(
1420
- systemPrompt: string | undefined,
1426
+ systemPrompt: readonly string[] | undefined,
1421
1427
  options: SystemBlockOptions = {},
1422
1428
  ): AnthropicSystemBlock[] | undefined {
1423
1429
  const { includeClaudeCodeInstruction = false, extraInstructions = [], billingPayload, cacheControl } = options;
1424
1430
  const blocks: AnthropicSystemBlock[] = [];
1425
- const sanitizedPrompt = systemPrompt ? systemPrompt.toWellFormed() : "";
1431
+ const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
1426
1432
  const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
1427
- const hasBillingHeader = sanitizedPrompt.includes(CLAUDE_BILLING_HEADER_PREFIX);
1433
+ const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.includes(CLAUDE_BILLING_HEADER_PREFIX));
1428
1434
 
1429
1435
  if (includeClaudeCodeInstruction && !hasBillingHeader) {
1430
1436
  const payloadSeed = billingPayload ?? {
1431
- system: sanitizedPrompt,
1437
+ system: sanitizedPrompts,
1432
1438
  extraInstructions: trimmedInstructions,
1433
1439
  };
1434
1440
  blocks.push(
@@ -1441,19 +1447,19 @@ export function buildAnthropicSystemBlocks(
1441
1447
  }
1442
1448
 
1443
1449
  for (const instruction of trimmedInstructions) {
1444
- blocks.push({
1445
- type: "text",
1446
- text: instruction,
1447
- ...(cacheControl ? { cache_control: cacheControl } : {}),
1448
- });
1450
+ blocks.push({ type: "text", text: instruction });
1449
1451
  }
1450
1452
 
1451
- if (systemPrompt) {
1452
- blocks.push({
1453
- type: "text",
1454
- text: sanitizedPrompt,
1455
- ...(cacheControl ? { cache_control: cacheControl } : {}),
1456
- });
1453
+ for (const systemPrompt of sanitizedPrompts) {
1454
+ blocks.push({ type: "text", text: systemPrompt });
1455
+ }
1456
+
1457
+ // Attach cache_control to the LAST emitted block only. Anthropic breakpoints are cumulative
1458
+ // prefix cuts, so a single trailing breakpoint covers every preceding block; spreading
1459
+ // cache_control across N blocks wastes slots against the 4-breakpoint cap.
1460
+ const lastIndex = blocks.length - 1;
1461
+ if (cacheControl && lastIndex >= 0) {
1462
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
1457
1463
  }
1458
1464
 
1459
1465
  return blocks.length > 0 ? blocks : undefined;
@@ -1921,10 +1927,11 @@ function buildParams(
1921
1927
  }
1922
1928
 
1923
1929
  const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
1930
+ const billingSystemPrompts = normalizeSystemPrompts(context.systemPrompt);
1924
1931
  const billingPayload = shouldInjectClaudeCodeInstruction
1925
1932
  ? {
1926
1933
  ...params,
1927
- ...(context.systemPrompt ? { system: context.systemPrompt.toWellFormed() } : {}),
1934
+ ...(billingSystemPrompts.length > 0 ? { system: billingSystemPrompts } : {}),
1928
1935
  }
1929
1936
  : undefined;
1930
1937
  const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
@@ -18,6 +18,7 @@ import {
18
18
  type Tool,
19
19
  type ToolChoice,
20
20
  } from "../types";
21
+ import { normalizeSystemPrompts } from "../utils";
21
22
  import { createAbortSourceTracker } from "../utils/abort";
22
23
  import { AssistantMessageEventStream } from "../utils/event-stream";
23
24
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
@@ -28,7 +29,7 @@ import {
28
29
  iterateWithIdleTimeout,
29
30
  } from "../utils/idle-iterator";
30
31
  import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
31
- import { supportsDeveloperRole } from "./openai-responses";
32
+ import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
32
33
  import {
33
34
  appendResponsesToolResultMessages,
34
35
  convertResponsesAssistantMessage,
@@ -273,7 +274,7 @@ function buildParams(
273
274
  model: deploymentName,
274
275
  input: messages,
275
276
  stream: true,
276
- prompt_cache_key: options?.sessionId,
277
+ prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
277
278
  };
278
279
 
279
280
  if (options?.maxTokens) {
@@ -350,12 +351,12 @@ function convertMessages(
350
351
  const transformedMessages = transformMessages(context.messages, model, normalizeResponsesToolCallIdForTransform);
351
352
  const knownCallIds = new Set<string>();
352
353
 
353
- if (context.systemPrompt) {
354
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
355
+ if (systemPrompts.length > 0) {
354
356
  const role = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
355
- messages.push({
356
- role,
357
- content: context.systemPrompt.toWellFormed(),
358
- });
357
+ for (const systemPrompt of systemPrompts) {
358
+ messages.push({ role, content: systemPrompt });
359
+ }
359
360
  }
360
361
 
361
362
  let msgIndex = 0;
@@ -26,6 +26,7 @@ import type {
26
26
  ToolCall,
27
27
  ToolResultMessage,
28
28
  } from "../types";
29
+ import { normalizeSystemPrompts } from "../utils";
29
30
  import { AssistantMessageEventStream } from "../utils/event-stream";
30
31
  import { parseStreamingJson } from "../utils/json-parse";
31
32
  import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
@@ -2145,12 +2146,29 @@ function findLastUserMessageIndex(messages: Message[]): number {
2145
2146
  * only an empty placeholder where historical user turns should be.
2146
2147
  * The last user message is excluded because it is sent in the action.
2147
2148
  */
2149
+ /**
2150
+ * Build one Cursor system-message JSON blob per ordered system prompt. Emitting separate blobs
2151
+ * (rather than a single `\n\n`-joined string) lets Cursor's blob cache hit independently per
2152
+ * entry: changing only the last prompt does not invalidate earlier blob ids, so the prefix
2153
+ * up to the changed prompt remains cached on the server side.
2154
+ *
2155
+ * When no system prompts are provided, returns a single default greeting so we never emit
2156
+ * an empty `rootPromptMessagesJson` head.
2157
+ */
2158
+ export function buildCursorSystemPromptJsons(systemPrompt: readonly string[] | undefined): string[] {
2159
+ const systemPrompts = normalizeSystemPrompts(systemPrompt);
2160
+ if (systemPrompts.length === 0) {
2161
+ return [JSON.stringify({ role: "system", content: "You are a helpful assistant." })];
2162
+ }
2163
+ return systemPrompts.map(content => JSON.stringify({ role: "system", content }));
2164
+ }
2165
+
2148
2166
  function buildRootPromptMessagesJson(
2149
2167
  messages: Message[],
2150
- systemPromptId: Uint8Array,
2168
+ systemPromptIds: Uint8Array[],
2151
2169
  blobStore: Map<string, Uint8Array>,
2152
2170
  ): Uint8Array[] {
2153
- const entries: Uint8Array[] = [systemPromptId];
2171
+ const entries: Uint8Array[] = [...systemPromptIds];
2154
2172
  const lastUserIdx = findLastUserMessageIndex(messages);
2155
2173
 
2156
2174
  const pushJson = (obj: unknown) => {
@@ -2299,12 +2317,9 @@ function buildGrpcRequest(
2299
2317
  } {
2300
2318
  const blobStore = state.blobStore;
2301
2319
 
2302
- const systemPromptJson = JSON.stringify({
2303
- role: "system",
2304
- content: context.systemPrompt || "You are a helpful assistant.",
2305
- });
2306
- const systemPromptBytes = new TextEncoder().encode(systemPromptJson);
2307
- const systemPromptId = storeCursorBlob(blobStore, systemPromptBytes);
2320
+ const systemPromptIds = buildCursorSystemPromptJsons(context.systemPrompt).map(json =>
2321
+ storeCursorBlob(blobStore, new TextEncoder().encode(json)),
2322
+ );
2308
2323
 
2309
2324
  const lastMessage = context.messages[context.messages.length - 1];
2310
2325
  const userText =
@@ -2339,18 +2354,19 @@ function buildGrpcRequest(
2339
2354
  // field (not `turns[]`) to construct the actual model prompt; if we only send the
2340
2355
  // system prompt here, multi-turn conversations lose prior context and the model
2341
2356
  // sees only the current user message.
2342
- const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptId, blobStore);
2357
+ const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptIds, blobStore);
2343
2358
 
2344
2359
  // Preserve cached non-history state fields (todos, file states, summaries, etc.)
2345
2360
  // when the system prompt is unchanged; otherwise start fresh.
2346
- const hasMatchingPrompt = state.conversationState?.rootPromptMessagesJson?.some(entry =>
2347
- Buffer.from(entry).equals(systemPromptId),
2348
- );
2361
+ const cachedPromptHead = state.conversationState?.rootPromptMessagesJson?.slice(0, systemPromptIds.length) ?? [];
2362
+ const hasMatchingPrompt =
2363
+ cachedPromptHead.length === systemPromptIds.length &&
2364
+ systemPromptIds.every((id, idx) => Buffer.from(cachedPromptHead[idx]).equals(id));
2349
2365
  const baseState =
2350
2366
  state.conversationState && hasMatchingPrompt
2351
2367
  ? state.conversationState
2352
2368
  : create(ConversationStateStructureSchema, {
2353
- rootPromptMessagesJson: [systemPromptId],
2369
+ rootPromptMessagesJson: systemPromptIds,
2354
2370
  turns: [],
2355
2371
  todos: [],
2356
2372
  pendingToolCalls: [],
@@ -18,6 +18,7 @@ import type {
18
18
  ThinkingContent,
19
19
  ToolCall,
20
20
  } from "../types";
21
+ import { normalizeSystemPrompts } from "../utils";
21
22
  import { AssistantMessageEventStream } from "../utils/event-stream";
22
23
  import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
23
24
  import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
@@ -865,8 +866,8 @@ export function buildRequest(
865
866
  options: GoogleGeminiCliOptions = {},
866
867
  isAntigravity = false,
867
868
  ): CloudCodeAssistRequest {
869
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
868
870
  const contents = convertMessages(model, context);
869
-
870
871
  const generationConfig: CloudCodeAssistRequest["request"]["generationConfig"] = {};
871
872
  if (options.temperature !== undefined) {
872
873
  generationConfig.temperature = options.temperature;
@@ -913,9 +914,9 @@ export function buildRequest(
913
914
  }
914
915
 
915
916
  // System instruction must be object with parts, not plain string
916
- if (context.systemPrompt) {
917
+ if (systemPrompts.length > 0) {
917
918
  request.systemInstruction = {
918
- parts: [{ text: context.systemPrompt.toWellFormed() }],
919
+ parts: systemPrompts.map(text => ({ text })),
919
920
  };
920
921
  }
921
922
 
@@ -18,6 +18,7 @@ import type {
18
18
  ThinkingContent,
19
19
  ToolCall,
20
20
  } from "../types";
21
+ import { normalizeSystemPrompts } from "../utils";
21
22
  import { AssistantMessageEventStream } from "../utils/event-stream";
22
23
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
23
24
  import type { GoogleThinkingLevel } from "./google-gemini-cli";
@@ -369,6 +370,7 @@ function buildParams(
369
370
  context: Context,
370
371
  options: GoogleVertexOptions = {},
371
372
  ): GenerateContentParameters {
373
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
372
374
  const contents = convertMessages(model, context);
373
375
 
374
376
  const generationConfig: GoogleVertexSamplingConfig = {};
@@ -396,7 +398,7 @@ function buildParams(
396
398
 
397
399
  const config: GenerateContentConfig = {
398
400
  ...(Object.keys(generationConfig).length > 0 && generationConfig),
399
- ...(context.systemPrompt && { systemInstruction: context.systemPrompt.toWellFormed() }),
401
+ ...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
400
402
  ...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
401
403
  };
402
404
 
@@ -17,6 +17,7 @@ import type {
17
17
  ThinkingContent,
18
18
  ToolCall,
19
19
  } from "../types";
20
+ import { normalizeSystemPrompts } from "../utils";
20
21
  import { AssistantMessageEventStream } from "../utils/event-stream";
21
22
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
22
23
  import type { GoogleThinkingLevel } from "./google-gemini-cli";
@@ -313,6 +314,7 @@ function buildParams(
313
314
  context: Context,
314
315
  options: GoogleOptions = {},
315
316
  ): GenerateContentParameters {
317
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
316
318
  const contents = convertMessages(model, context);
317
319
 
318
320
  const generationConfig: GoogleSamplingConfig = {};
@@ -340,7 +342,7 @@ function buildParams(
340
342
 
341
343
  const config: GenerateContentConfig = {
342
344
  ...(Object.keys(generationConfig).length > 0 && generationConfig),
343
- ...(context.systemPrompt && { systemInstruction: context.systemPrompt.toWellFormed() }),
345
+ ...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
344
346
  ...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
345
347
  };
346
348
 
@@ -14,6 +14,7 @@ import type {
14
14
  ToolResultMessage,
15
15
  UserMessage,
16
16
  } from "../types";
17
+ import { normalizeSystemPrompts } from "../utils";
17
18
  import { AssistantMessageEventStream } from "../utils/event-stream";
18
19
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
19
20
  import { parseStreamingJson } from "../utils/json-parse";
@@ -186,10 +187,14 @@ function convertMessage(message: Message): OllamaMessage {
186
187
 
187
188
  function convertMessages(model: Model<"ollama-chat">, context: Context): OllamaMessage[] {
188
189
  const messages: Message[] = [];
189
- if (context.systemPrompt) {
190
+ // Emit one developer message per ordered system prompt. The wire role is mapped to "system"
191
+ // by `convertMessage`, but keeping the prompts separate preserves prefix-cache stability:
192
+ // if only the trailing prompt changes between calls, the leading system messages keep
193
+ // their identical token prefix so KV-cache reuse covers them.
194
+ for (const systemPrompt of normalizeSystemPrompts(context.systemPrompt)) {
190
195
  messages.push({
191
196
  role: "developer",
192
- content: context.systemPrompt,
197
+ content: systemPrompt,
193
198
  timestamp: Date.now(),
194
199
  });
195
200
  }
@@ -77,7 +77,7 @@ export async function transformRequestBody(
77
77
  body: RequestBody,
78
78
  model: Model<Api>,
79
79
  options: CodexRequestOptions = {},
80
- prompt?: { instructions: string; developerMessages: string[] },
80
+ prompt?: { developerMessages: string[] },
81
81
  ): Promise<RequestBody> {
82
82
  body.store = false;
83
83
  body.stream = true;
@@ -36,6 +36,7 @@ import {
36
36
  getOpenAIResponsesHistoryItems,
37
37
  getOpenAIResponsesHistoryPayload,
38
38
  normalizeResponsesToolCallId,
39
+ normalizeSystemPrompts,
39
40
  } from "../utils";
40
41
  import { AssistantMessageEventStream } from "../utils/event-stream";
41
42
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
@@ -51,6 +52,7 @@ import {
51
52
  transformRequestBody,
52
53
  } from "./openai-codex/request-transformer";
53
54
  import { parseCodexError } from "./openai-codex/response-handler";
55
+ import { normalizeOpenAIResponsesPromptCacheKey } from "./openai-responses";
54
56
  import {
55
57
  encodeResponsesToolCallId,
56
58
  encodeTextSignatureV1,
@@ -476,6 +478,7 @@ async function buildCodexRequestContext(
476
478
  const accountId = getAccountId(apiKey);
477
479
  const baseUrl = model.baseUrl || CODEX_BASE_URL;
478
480
  const url = resolveCodexResponsesUrl(baseUrl);
481
+ const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
479
482
  const transformedBody = await buildTransformedCodexRequestBody(model, context, options);
480
483
  options?.onPayload?.(transformedBody);
481
484
 
@@ -490,8 +493,8 @@ async function buildCodexRequestContext(
490
493
  };
491
494
 
492
495
  const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
493
- const sessionKey = getCodexWebSocketSessionKey(options?.sessionId, model, accountId, baseUrl);
494
- const publicSessionKey = getCodexPublicSessionKey(options?.sessionId, model, baseUrl);
496
+ const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
497
+ const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
495
498
  if (sessionKey && publicSessionKey) {
496
499
  providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
497
500
  }
@@ -520,7 +523,7 @@ async function buildTransformedCodexRequestBody(
520
523
  model: model.id,
521
524
  input: [...convertMessages(model, context)],
522
525
  stream: true,
523
- prompt_cache_key: options?.sessionId,
526
+ prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
524
527
  };
525
528
 
526
529
  if (options?.maxTokens) {
@@ -567,8 +570,11 @@ async function buildTransformedCodexRequestBody(
567
570
  }
568
571
  }
569
572
 
570
- params.instructions = context.systemPrompt;
571
-
573
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
574
+ if (systemPrompts.length > 0) {
575
+ params.instructions = systemPrompts[0];
576
+ }
577
+ const developerMessages = systemPrompts.slice(1);
572
578
  const codexOptions: CodexRequestOptions = {
573
579
  reasoningEffort: options?.reasoning,
574
580
  reasoningSummary: options?.reasoningSummary ?? "auto",
@@ -576,7 +582,7 @@ async function buildTransformedCodexRequestBody(
576
582
  include: options?.include,
577
583
  };
578
584
 
579
- return transformRequestBody(params, model, codexOptions);
585
+ return transformRequestBody(params, model, codexOptions, { developerMessages });
580
586
  }
581
587
 
582
588
  async function openInitialCodexEventStream(
@@ -628,7 +634,7 @@ async function openInitialCodexEventStream(
628
634
  async function openCodexWebSocketTransport(
629
635
  requestContext: CodexRequestContext,
630
636
  requestSetup: CodexRequestSetup,
631
- options: OpenAICodexResponsesOptions | undefined,
637
+ _options: OpenAICodexResponsesOptions | undefined,
632
638
  websocketState: CodexWebSocketSessionState,
633
639
  retry: number,
634
640
  ): Promise<{
@@ -641,7 +647,7 @@ async function openCodexWebSocketTransport(
641
647
  requestContext.requestHeaders,
642
648
  requestContext.accountId,
643
649
  requestContext.apiKey,
644
- options?.sessionId,
650
+ requestContext.transformedBody.prompt_cache_key,
645
651
  "websocket",
646
652
  websocketState,
647
653
  );
@@ -670,7 +676,7 @@ async function openCodexWebSocketTransport(
670
676
  async function openCodexSseTransport(
671
677
  requestContext: CodexRequestContext,
672
678
  requestSetup: CodexRequestSetup,
673
- options: OpenAICodexResponsesOptions | undefined,
679
+ _options: OpenAICodexResponsesOptions | undefined,
674
680
  state: CodexWebSocketSessionState | undefined,
675
681
  body = requestContext.transformedBody,
676
682
  ): Promise<{
@@ -684,7 +690,7 @@ async function openCodexSseTransport(
684
690
  requestContext.requestHeaders,
685
691
  requestContext.accountId,
686
692
  requestContext.apiKey,
687
- options?.sessionId,
693
+ body.prompt_cache_key,
688
694
  body,
689
695
  state,
690
696
  requestSetup.requestSignal,
@@ -1559,9 +1565,10 @@ export async function prewarmOpenAICodexResponses(
1559
1565
  const accountId = getAccountId(apiKey);
1560
1566
  const baseUrl = model.baseUrl || CODEX_BASE_URL;
1561
1567
  const url = resolveCodexResponsesUrl(baseUrl);
1568
+ const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
1562
1569
  const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
1563
- const sessionKey = getCodexWebSocketSessionKey(options?.sessionId, model, accountId, baseUrl);
1564
- const publicSessionKey = getCodexPublicSessionKey(options?.sessionId, model, baseUrl);
1570
+ const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
1571
+ const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
1565
1572
  if (publicSessionKey && sessionKey) {
1566
1573
  providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
1567
1574
  }
@@ -1574,7 +1581,7 @@ export async function prewarmOpenAICodexResponses(
1574
1581
  { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
1575
1582
  accountId,
1576
1583
  apiKey,
1577
- options?.sessionId,
1584
+ promptCacheKey,
1578
1585
  "websocket",
1579
1586
  state,
1580
1587
  );
@@ -1595,8 +1602,9 @@ function getCodexWebSocketSessionKey(
1595
1602
  accountId: string,
1596
1603
  baseUrl: string,
1597
1604
  ): string | undefined {
1598
- if (!sessionId || sessionId.length === 0) return undefined;
1599
- return `${accountId}:${baseUrl}:${model.id}:${sessionId}`;
1605
+ const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
1606
+ if (!promptCacheKey) return undefined;
1607
+ return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}`;
1600
1608
  }
1601
1609
 
1602
1610
  function getCodexPublicSessionKey(
@@ -1604,8 +1612,9 @@ function getCodexPublicSessionKey(
1604
1612
  model: Model<"openai-codex-responses">,
1605
1613
  baseUrl: string,
1606
1614
  ): string | undefined {
1607
- if (!sessionId || sessionId.length === 0) return undefined;
1608
- return `${baseUrl}:${model.id}:${sessionId}`;
1615
+ const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
1616
+ if (!promptCacheKey) return undefined;
1617
+ return `${baseUrl}:${model.id}:${promptCacheKey}`;
1609
1618
  }
1610
1619
 
1611
1620
  function getCodexWebSocketSessionState(
@@ -33,6 +33,7 @@ import {
33
33
  type ToolChoice,
34
34
  type ToolResultMessage,
35
35
  } from "../types";
36
+ import { normalizeSystemPrompts } from "../utils";
36
37
  import { createAbortSourceTracker } from "../utils/abort";
37
38
  import { AssistantMessageEventStream } from "../utils/event-stream";
38
39
  import { toFireworksWireModelId } from "../utils/fireworks-model-id";
@@ -1178,10 +1179,13 @@ export function convertMessages(
1178
1179
  return generateFallbackToolCallId(seed);
1179
1180
  };
1180
1181
 
1181
- if (context.systemPrompt) {
1182
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
1183
+ if (systemPrompts.length > 0) {
1182
1184
  const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
1183
1185
  const role = useDeveloperRole ? "developer" : "system";
1184
- params.push({ role: role, content: context.systemPrompt.toWellFormed() });
1186
+ for (const systemPrompt of systemPrompts) {
1187
+ params.push({ role, content: systemPrompt });
1188
+ }
1185
1189
  }
1186
1190
 
1187
1191
  let lastRole: string | null = null;
@@ -25,6 +25,7 @@ import {
25
25
  createOpenAIResponsesHistoryPayload,
26
26
  getOpenAIResponsesHistoryItems,
27
27
  getOpenAIResponsesHistoryPayload,
28
+ normalizeSystemPrompts,
28
29
  resolveCacheRetention,
29
30
  sanitizeOpenAIResponsesHistoryItemsForReplay,
30
31
  } from "../utils";
@@ -73,6 +74,13 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
73
74
  return undefined;
74
75
  }
75
76
 
77
+ export function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined {
78
+ if (!sessionId || sessionId.length === 0) return undefined;
79
+ const wellFormed = sessionId.toWellFormed();
80
+ if (wellFormed.length <= 64) return wellFormed;
81
+ return `pc_${Bun.hash(wellFormed).toString(36)}`;
82
+ }
83
+
76
84
  // OpenAI Responses-specific options
77
85
  export interface OpenAIResponsesOptions extends StreamOptions {
78
86
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -331,7 +339,9 @@ function createClient(
331
339
  function getOpenAIResponsesCacheSessionId(
332
340
  options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId"> | undefined,
333
341
  ): string | undefined {
334
- return resolveCacheRetention(options?.cacheRetention) === "none" ? undefined : options?.sessionId;
342
+ return resolveCacheRetention(options?.cacheRetention) === "none"
343
+ ? undefined
344
+ : normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
335
345
  }
336
346
 
337
347
  function buildParams(
@@ -352,12 +362,11 @@ function buildParams(
352
362
  );
353
363
  const messages: ResponseInput = [...conversationMessages];
354
364
 
355
- if (context.systemPrompt) {
356
- const role = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
357
- messages.unshift({
358
- role,
359
- content: context.systemPrompt.toWellFormed(),
360
- });
365
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
366
+ if (systemPrompts.length > 0) {
367
+ const role: "developer" | "system" =
368
+ model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
369
+ messages.unshift(...systemPrompts.map(systemPrompt => ({ role, content: systemPrompt })));
361
370
  }
362
371
 
363
372
  const cacheRetention = resolveCacheRetention(options?.cacheRetention);
package/src/types.ts CHANGED
@@ -502,7 +502,7 @@ export interface Tool<TParameters extends TSchema = TSchema> {
502
502
  }
503
503
 
504
504
  export interface Context {
505
- systemPrompt?: string;
505
+ systemPrompt?: string[];
506
506
  messages: Message[];
507
507
  tools?: Tool[];
508
508
  }
package/src/utils.ts CHANGED
@@ -5,6 +5,9 @@ import type { CacheRetention, OpenAIResponsesHistoryPayload, ProviderPayload } f
5
5
  type OpenAIResponsesReplayItem = ResponseInput[number];
6
6
 
7
7
  export { isRecord } from "@oh-my-pi/pi-utils";
8
+ export function normalizeSystemPrompts(systemPrompt: readonly string[] | undefined): string[] {
9
+ return systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
10
+ }
8
11
 
9
12
  export function toNumber(value: unknown): number | undefined {
10
13
  if (typeof value === "number" && Number.isFinite(value)) return value;