@oh-my-pi/pi-ai 14.6.6 → 14.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +2 -2
- package/package.json +3 -3
- package/src/models.json +185 -16
- package/src/providers/amazon-bedrock.ts +4 -3
- package/src/providers/anthropic.ts +24 -17
- package/src/providers/azure-openai-responses.ts +8 -7
- package/src/providers/cursor.ts +29 -13
- package/src/providers/google-gemini-cli.ts +4 -3
- package/src/providers/google-vertex.ts +3 -1
- package/src/providers/google.ts +3 -1
- package/src/providers/ollama.ts +7 -2
- package/src/providers/openai-codex/request-transformer.ts +1 -1
- package/src/providers/openai-codex-responses.ts +26 -17
- package/src/providers/openai-completions.ts +6 -2
- package/src/providers/openai-responses.ts +16 -7
- package/src/types.ts +1 -1
- package/src/utils.ts +3 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.7.0] - 2026-05-04
|
|
6
|
+
### Breaking Changes
|
|
7
|
+
|
|
8
|
+
- Changed `Context.systemPrompt` from a string to `string[]`, so callers must now pass an array of prompts instead of a single string
|
|
9
|
+
- Changed behavior will throw at runtime for non-array system prompts because request builders now normalize system prompts as an array
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- Added support for multiple system prompts by changing `Context.systemPrompt` to an ordered string array and preserving provider-appropriate instruction precedence
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- Changed request builders for Anthropic, OpenAI, Bedrock, Azure, Cursor, Google, and Ollama to propagate every non-empty system prompt entry without demoting durable instructions into ordinary conversation turns
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- Filtered out empty normalized system prompts so blank entries are no longer sent to providers
|
|
22
|
+
- Removed blank system prompt strings from provider payloads to avoid unnecessary empty instruction messages
|
|
23
|
+
|
|
5
24
|
## [14.6.6] - 2026-05-04
|
|
6
25
|
|
|
7
26
|
### Added
|
package/README.md
CHANGED
|
@@ -107,7 +107,7 @@ const tools: Tool[] = [
|
|
|
107
107
|
|
|
108
108
|
// Build a conversation context (easily serializable and transferable between models)
|
|
109
109
|
const context: Context = {
|
|
110
|
-
systemPrompt: "You are a helpful assistant.",
|
|
110
|
+
systemPrompt: ["You are a helpful assistant."],
|
|
111
111
|
messages: [{ role: "user", content: "What time is it?" }],
|
|
112
112
|
tools,
|
|
113
113
|
};
|
|
@@ -873,7 +873,7 @@ import { Context, getModel, complete } from "@oh-my-pi/pi-ai";
|
|
|
873
873
|
|
|
874
874
|
// Create and use a context
|
|
875
875
|
const context: Context = {
|
|
876
|
-
systemPrompt: "You are a helpful assistant.",
|
|
876
|
+
systemPrompt: ["You are a helpful assistant."],
|
|
877
877
|
messages: [{ role: "user", content: "What is TypeScript?" }],
|
|
878
878
|
};
|
|
879
879
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.
|
|
4
|
+
"version": "14.7.0",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.36",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.50.1",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.7.0",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.7.0",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
package/src/models.json
CHANGED
|
@@ -12212,8 +12212,8 @@
|
|
|
12212
12212
|
"cacheRead": 0,
|
|
12213
12213
|
"cacheWrite": 0
|
|
12214
12214
|
},
|
|
12215
|
-
"contextWindow":
|
|
12216
|
-
"maxTokens":
|
|
12215
|
+
"contextWindow": 262144,
|
|
12216
|
+
"maxTokens": 262144,
|
|
12217
12217
|
"thinking": {
|
|
12218
12218
|
"mode": "effort",
|
|
12219
12219
|
"minLevel": "minimal",
|
|
@@ -21575,6 +21575,25 @@
|
|
|
21575
21575
|
"contextWindow": 222222,
|
|
21576
21576
|
"maxTokens": 8888
|
|
21577
21577
|
},
|
|
21578
|
+
"deepseek/deepseek-latest": {
|
|
21579
|
+
"id": "deepseek/deepseek-latest",
|
|
21580
|
+
"name": "deepseek/deepseek-latest",
|
|
21581
|
+
"api": "openai-completions",
|
|
21582
|
+
"provider": "nanogpt",
|
|
21583
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
21584
|
+
"reasoning": false,
|
|
21585
|
+
"input": [
|
|
21586
|
+
"text"
|
|
21587
|
+
],
|
|
21588
|
+
"cost": {
|
|
21589
|
+
"input": 0,
|
|
21590
|
+
"output": 0,
|
|
21591
|
+
"cacheRead": 0,
|
|
21592
|
+
"cacheWrite": 0
|
|
21593
|
+
},
|
|
21594
|
+
"contextWindow": 222222,
|
|
21595
|
+
"maxTokens": 8888
|
|
21596
|
+
},
|
|
21578
21597
|
"deepseek/deepseek-prover-v2-671b": {
|
|
21579
21598
|
"id": "deepseek/deepseek-prover-v2-671b",
|
|
21580
21599
|
"name": "deepseek/deepseek-prover-v2-671b",
|
|
@@ -25673,6 +25692,25 @@
|
|
|
25673
25692
|
"contextWindow": 222222,
|
|
25674
25693
|
"maxTokens": 8888
|
|
25675
25694
|
},
|
|
25695
|
+
"minimax/minimax-latest": {
|
|
25696
|
+
"id": "minimax/minimax-latest",
|
|
25697
|
+
"name": "minimax/minimax-latest",
|
|
25698
|
+
"api": "openai-completions",
|
|
25699
|
+
"provider": "nanogpt",
|
|
25700
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
25701
|
+
"reasoning": false,
|
|
25702
|
+
"input": [
|
|
25703
|
+
"text"
|
|
25704
|
+
],
|
|
25705
|
+
"cost": {
|
|
25706
|
+
"input": 0,
|
|
25707
|
+
"output": 0,
|
|
25708
|
+
"cacheRead": 0,
|
|
25709
|
+
"cacheWrite": 0
|
|
25710
|
+
},
|
|
25711
|
+
"contextWindow": 222222,
|
|
25712
|
+
"maxTokens": 8888
|
|
25713
|
+
},
|
|
25676
25714
|
"minimax/minimax-m2-her": {
|
|
25677
25715
|
"id": "minimax/minimax-m2-her",
|
|
25678
25716
|
"name": "minimax/minimax-m2-her",
|
|
@@ -26487,6 +26525,25 @@
|
|
|
26487
26525
|
"maxLevel": "xhigh"
|
|
26488
26526
|
}
|
|
26489
26527
|
},
|
|
26528
|
+
"moonshotai/kimi-latest": {
|
|
26529
|
+
"id": "moonshotai/kimi-latest",
|
|
26530
|
+
"name": "moonshotai/kimi-latest",
|
|
26531
|
+
"api": "openai-completions",
|
|
26532
|
+
"provider": "nanogpt",
|
|
26533
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
26534
|
+
"reasoning": false,
|
|
26535
|
+
"input": [
|
|
26536
|
+
"text"
|
|
26537
|
+
],
|
|
26538
|
+
"cost": {
|
|
26539
|
+
"input": 0,
|
|
26540
|
+
"output": 0,
|
|
26541
|
+
"cacheRead": 0,
|
|
26542
|
+
"cacheWrite": 0
|
|
26543
|
+
},
|
|
26544
|
+
"contextWindow": 222222,
|
|
26545
|
+
"maxTokens": 8888
|
|
26546
|
+
},
|
|
26490
26547
|
"NeverSleep/Llama-3-Lumimaid-70B-v0.1": {
|
|
26491
26548
|
"id": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
|
|
26492
26549
|
"name": "NeverSleep/Llama-3-Lumimaid-70B-v0.1",
|
|
@@ -27631,6 +27688,25 @@
|
|
|
27631
27688
|
"maxLevel": "xhigh"
|
|
27632
27689
|
}
|
|
27633
27690
|
},
|
|
27691
|
+
"openai/gpt-chat-latest": {
|
|
27692
|
+
"id": "openai/gpt-chat-latest",
|
|
27693
|
+
"name": "openai/gpt-chat-latest",
|
|
27694
|
+
"api": "openai-completions",
|
|
27695
|
+
"provider": "nanogpt",
|
|
27696
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
27697
|
+
"reasoning": false,
|
|
27698
|
+
"input": [
|
|
27699
|
+
"text"
|
|
27700
|
+
],
|
|
27701
|
+
"cost": {
|
|
27702
|
+
"input": 0,
|
|
27703
|
+
"output": 0,
|
|
27704
|
+
"cacheRead": 0,
|
|
27705
|
+
"cacheWrite": 0
|
|
27706
|
+
},
|
|
27707
|
+
"contextWindow": 222222,
|
|
27708
|
+
"maxTokens": 8888
|
|
27709
|
+
},
|
|
27634
27710
|
"openai/gpt-latest": {
|
|
27635
27711
|
"id": "openai/gpt-latest",
|
|
27636
27712
|
"name": "openai/gpt-latest",
|
|
@@ -30360,7 +30436,7 @@
|
|
|
30360
30436
|
"api": "openai-completions",
|
|
30361
30437
|
"provider": "nanogpt",
|
|
30362
30438
|
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
30363
|
-
"reasoning":
|
|
30439
|
+
"reasoning": true,
|
|
30364
30440
|
"input": [
|
|
30365
30441
|
"text"
|
|
30366
30442
|
],
|
|
@@ -30371,7 +30447,12 @@
|
|
|
30371
30447
|
"cacheWrite": 0
|
|
30372
30448
|
},
|
|
30373
30449
|
"contextWindow": 222222,
|
|
30374
|
-
"maxTokens": 8888
|
|
30450
|
+
"maxTokens": 8888,
|
|
30451
|
+
"thinking": {
|
|
30452
|
+
"mode": "effort",
|
|
30453
|
+
"minLevel": "minimal",
|
|
30454
|
+
"maxLevel": "xhigh"
|
|
30455
|
+
}
|
|
30375
30456
|
},
|
|
30376
30457
|
"TEE/glm-4.6": {
|
|
30377
30458
|
"id": "TEE/glm-4.6",
|
|
@@ -31720,6 +31801,25 @@
|
|
|
31720
31801
|
"maxLevel": "xhigh"
|
|
31721
31802
|
}
|
|
31722
31803
|
},
|
|
31804
|
+
"x-ai/grok-latest": {
|
|
31805
|
+
"id": "x-ai/grok-latest",
|
|
31806
|
+
"name": "x-ai/grok-latest",
|
|
31807
|
+
"api": "openai-completions",
|
|
31808
|
+
"provider": "nanogpt",
|
|
31809
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
31810
|
+
"reasoning": false,
|
|
31811
|
+
"input": [
|
|
31812
|
+
"text"
|
|
31813
|
+
],
|
|
31814
|
+
"cost": {
|
|
31815
|
+
"input": 0,
|
|
31816
|
+
"output": 0,
|
|
31817
|
+
"cacheRead": 0,
|
|
31818
|
+
"cacheWrite": 0
|
|
31819
|
+
},
|
|
31820
|
+
"contextWindow": 222222,
|
|
31821
|
+
"maxTokens": 8888
|
|
31822
|
+
},
|
|
31723
31823
|
"xiaomi/mimo-v2-flash": {
|
|
31724
31824
|
"id": "xiaomi/mimo-v2-flash",
|
|
31725
31825
|
"name": "MiMo-V2-Flash",
|
|
@@ -32360,6 +32460,25 @@
|
|
|
32360
32460
|
"minLevel": "minimal",
|
|
32361
32461
|
"maxLevel": "xhigh"
|
|
32362
32462
|
}
|
|
32463
|
+
},
|
|
32464
|
+
"zai-org/glm-latest": {
|
|
32465
|
+
"id": "zai-org/glm-latest",
|
|
32466
|
+
"name": "zai-org/glm-latest",
|
|
32467
|
+
"api": "openai-completions",
|
|
32468
|
+
"provider": "nanogpt",
|
|
32469
|
+
"baseUrl": "https://nano-gpt.com/api/v1",
|
|
32470
|
+
"reasoning": false,
|
|
32471
|
+
"input": [
|
|
32472
|
+
"text"
|
|
32473
|
+
],
|
|
32474
|
+
"cost": {
|
|
32475
|
+
"input": 0,
|
|
32476
|
+
"output": 0,
|
|
32477
|
+
"cacheRead": 0,
|
|
32478
|
+
"cacheWrite": 0
|
|
32479
|
+
},
|
|
32480
|
+
"contextWindow": 222222,
|
|
32481
|
+
"maxTokens": 8888
|
|
32363
32482
|
}
|
|
32364
32483
|
},
|
|
32365
32484
|
"nvidia": {
|
|
@@ -33365,6 +33484,31 @@
|
|
|
33365
33484
|
"maxLevel": "xhigh"
|
|
33366
33485
|
}
|
|
33367
33486
|
},
|
|
33487
|
+
"moonshotai/kimi-k2.6": {
|
|
33488
|
+
"id": "moonshotai/kimi-k2.6",
|
|
33489
|
+
"name": "Kimi K2.6",
|
|
33490
|
+
"api": "openai-completions",
|
|
33491
|
+
"provider": "nvidia",
|
|
33492
|
+
"baseUrl": "https://integrate.api.nvidia.com/v1",
|
|
33493
|
+
"reasoning": true,
|
|
33494
|
+
"input": [
|
|
33495
|
+
"text",
|
|
33496
|
+
"image"
|
|
33497
|
+
],
|
|
33498
|
+
"cost": {
|
|
33499
|
+
"input": 0,
|
|
33500
|
+
"output": 0,
|
|
33501
|
+
"cacheRead": 0,
|
|
33502
|
+
"cacheWrite": 0
|
|
33503
|
+
},
|
|
33504
|
+
"contextWindow": 262144,
|
|
33505
|
+
"maxTokens": 262144,
|
|
33506
|
+
"thinking": {
|
|
33507
|
+
"mode": "effort",
|
|
33508
|
+
"minLevel": "minimal",
|
|
33509
|
+
"maxLevel": "xhigh"
|
|
33510
|
+
}
|
|
33511
|
+
},
|
|
33368
33512
|
"nvidia/llama-3.1-nemotron-51b-instruct": {
|
|
33369
33513
|
"id": "nvidia/llama-3.1-nemotron-51b-instruct",
|
|
33370
33514
|
"name": "Llama 3.1 Nemotron 51b Instruct",
|
|
@@ -38354,8 +38498,8 @@
|
|
|
38354
38498
|
"text"
|
|
38355
38499
|
],
|
|
38356
38500
|
"cost": {
|
|
38357
|
-
"input": 0.
|
|
38358
|
-
"output": 0.
|
|
38501
|
+
"input": 0.27,
|
|
38502
|
+
"output": 0.95,
|
|
38359
38503
|
"cacheRead": 0.13,
|
|
38360
38504
|
"cacheWrite": 0
|
|
38361
38505
|
},
|
|
@@ -38479,8 +38623,8 @@
|
|
|
38479
38623
|
"cacheRead": 0.003625,
|
|
38480
38624
|
"cacheWrite": 0
|
|
38481
38625
|
},
|
|
38482
|
-
"contextWindow":
|
|
38483
|
-
"maxTokens":
|
|
38626
|
+
"contextWindow": 131000,
|
|
38627
|
+
"maxTokens": 131000,
|
|
38484
38628
|
"thinking": {
|
|
38485
38629
|
"mode": "effort",
|
|
38486
38630
|
"minLevel": "minimal",
|
|
@@ -42272,13 +42416,13 @@
|
|
|
42272
42416
|
"text"
|
|
42273
42417
|
],
|
|
42274
42418
|
"cost": {
|
|
42275
|
-
"input": 0.
|
|
42276
|
-
"output": 0.
|
|
42419
|
+
"input": 0.09,
|
|
42420
|
+
"output": 0.44999999999999996,
|
|
42277
42421
|
"cacheRead": 0,
|
|
42278
42422
|
"cacheWrite": 0
|
|
42279
42423
|
},
|
|
42280
42424
|
"contextWindow": 40960,
|
|
42281
|
-
"maxTokens":
|
|
42425
|
+
"maxTokens": 20000,
|
|
42282
42426
|
"thinking": {
|
|
42283
42427
|
"mode": "effort",
|
|
42284
42428
|
"minLevel": "minimal",
|
|
@@ -42884,13 +43028,13 @@
|
|
|
42884
43028
|
"image"
|
|
42885
43029
|
],
|
|
42886
43030
|
"cost": {
|
|
42887
|
-
"input": 0.
|
|
42888
|
-
"output": 1
|
|
42889
|
-
"cacheRead": 0,
|
|
43031
|
+
"input": 0.15,
|
|
43032
|
+
"output": 1,
|
|
43033
|
+
"cacheRead": 0.049999999999999996,
|
|
42890
43034
|
"cacheWrite": 0
|
|
42891
43035
|
},
|
|
42892
43036
|
"contextWindow": 262144,
|
|
42893
|
-
"maxTokens":
|
|
43037
|
+
"maxTokens": 262144,
|
|
42894
43038
|
"thinking": {
|
|
42895
43039
|
"mode": "effort",
|
|
42896
43040
|
"minLevel": "minimal",
|
|
@@ -43047,6 +43191,31 @@
|
|
|
43047
43191
|
"maxLevel": "high"
|
|
43048
43192
|
}
|
|
43049
43193
|
},
|
|
43194
|
+
"qwen/qwen3.6-35b-a3b": {
|
|
43195
|
+
"id": "qwen/qwen3.6-35b-a3b",
|
|
43196
|
+
"name": "Qwen: Qwen3.6 35B A3B",
|
|
43197
|
+
"api": "openai-completions",
|
|
43198
|
+
"provider": "openrouter",
|
|
43199
|
+
"baseUrl": "https://openrouter.ai/api/v1",
|
|
43200
|
+
"reasoning": true,
|
|
43201
|
+
"input": [
|
|
43202
|
+
"text",
|
|
43203
|
+
"image"
|
|
43204
|
+
],
|
|
43205
|
+
"cost": {
|
|
43206
|
+
"input": 0.15,
|
|
43207
|
+
"output": 1,
|
|
43208
|
+
"cacheRead": 0.049999999999999996,
|
|
43209
|
+
"cacheWrite": 0
|
|
43210
|
+
},
|
|
43211
|
+
"contextWindow": 262144,
|
|
43212
|
+
"maxTokens": 262144,
|
|
43213
|
+
"thinking": {
|
|
43214
|
+
"mode": "effort",
|
|
43215
|
+
"minLevel": "minimal",
|
|
43216
|
+
"maxLevel": "high"
|
|
43217
|
+
}
|
|
43218
|
+
},
|
|
43050
43219
|
"qwen/qwen3.6-flash": {
|
|
43051
43220
|
"id": "qwen/qwen3.6-flash",
|
|
43052
43221
|
"name": "Qwen: Qwen3.6 Flash",
|
|
@@ -51988,7 +52157,7 @@
|
|
|
51988
52157
|
},
|
|
51989
52158
|
"glm-5v-turbo": {
|
|
51990
52159
|
"id": "glm-5v-turbo",
|
|
51991
|
-
"name": "
|
|
52160
|
+
"name": "GLM-5V-Turbo",
|
|
51992
52161
|
"api": "anthropic-messages",
|
|
51993
52162
|
"provider": "zai",
|
|
51994
52163
|
"baseUrl": "https://api.z.ai/api/anthropic",
|
|
@@ -464,13 +464,14 @@ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boo
|
|
|
464
464
|
}
|
|
465
465
|
|
|
466
466
|
function buildSystemPrompt(
|
|
467
|
-
systemPrompt: string | undefined,
|
|
467
|
+
systemPrompt: readonly string[] | undefined,
|
|
468
468
|
model: Model<"bedrock-converse-stream">,
|
|
469
469
|
cacheRetention: CacheRetention,
|
|
470
470
|
): SystemContentBlock[] | undefined {
|
|
471
|
-
|
|
471
|
+
const prompts = systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
|
|
472
|
+
if (prompts.length === 0) return undefined;
|
|
472
473
|
|
|
473
|
-
const blocks: SystemContentBlock[] =
|
|
474
|
+
const blocks: SystemContentBlock[] = prompts.map(prompt => ({ text: prompt }));
|
|
474
475
|
|
|
475
476
|
// Add cache point for supported Claude models
|
|
476
477
|
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
|
@@ -33,7 +33,13 @@ import type {
|
|
|
33
33
|
ToolResultMessage,
|
|
34
34
|
Usage,
|
|
35
35
|
} from "../types";
|
|
36
|
-
import {
|
|
36
|
+
import {
|
|
37
|
+
isAnthropicOAuthToken,
|
|
38
|
+
isRecord,
|
|
39
|
+
normalizeSystemPrompts,
|
|
40
|
+
normalizeToolCallId,
|
|
41
|
+
resolveCacheRetention,
|
|
42
|
+
} from "../utils";
|
|
37
43
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
38
44
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
39
45
|
import { isFoundryEnabled } from "../utils/foundry";
|
|
@@ -1417,18 +1423,18 @@ type SystemBlockOptions = {
|
|
|
1417
1423
|
};
|
|
1418
1424
|
|
|
1419
1425
|
export function buildAnthropicSystemBlocks(
|
|
1420
|
-
systemPrompt: string | undefined,
|
|
1426
|
+
systemPrompt: readonly string[] | undefined,
|
|
1421
1427
|
options: SystemBlockOptions = {},
|
|
1422
1428
|
): AnthropicSystemBlock[] | undefined {
|
|
1423
1429
|
const { includeClaudeCodeInstruction = false, extraInstructions = [], billingPayload, cacheControl } = options;
|
|
1424
1430
|
const blocks: AnthropicSystemBlock[] = [];
|
|
1425
|
-
const
|
|
1431
|
+
const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
|
|
1426
1432
|
const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
|
|
1427
|
-
const hasBillingHeader =
|
|
1433
|
+
const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.includes(CLAUDE_BILLING_HEADER_PREFIX));
|
|
1428
1434
|
|
|
1429
1435
|
if (includeClaudeCodeInstruction && !hasBillingHeader) {
|
|
1430
1436
|
const payloadSeed = billingPayload ?? {
|
|
1431
|
-
system:
|
|
1437
|
+
system: sanitizedPrompts,
|
|
1432
1438
|
extraInstructions: trimmedInstructions,
|
|
1433
1439
|
};
|
|
1434
1440
|
blocks.push(
|
|
@@ -1441,19 +1447,19 @@ export function buildAnthropicSystemBlocks(
|
|
|
1441
1447
|
}
|
|
1442
1448
|
|
|
1443
1449
|
for (const instruction of trimmedInstructions) {
|
|
1444
|
-
blocks.push({
|
|
1445
|
-
type: "text",
|
|
1446
|
-
text: instruction,
|
|
1447
|
-
...(cacheControl ? { cache_control: cacheControl } : {}),
|
|
1448
|
-
});
|
|
1450
|
+
blocks.push({ type: "text", text: instruction });
|
|
1449
1451
|
}
|
|
1450
1452
|
|
|
1451
|
-
|
|
1452
|
-
blocks.push({
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1453
|
+
for (const systemPrompt of sanitizedPrompts) {
|
|
1454
|
+
blocks.push({ type: "text", text: systemPrompt });
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
// Attach cache_control to the LAST emitted block only. Anthropic breakpoints are cumulative
|
|
1458
|
+
// prefix cuts, so a single trailing breakpoint covers every preceding block; spreading
|
|
1459
|
+
// cache_control across N blocks wastes slots against the 4-breakpoint cap.
|
|
1460
|
+
const lastIndex = blocks.length - 1;
|
|
1461
|
+
if (cacheControl && lastIndex >= 0) {
|
|
1462
|
+
blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
|
|
1457
1463
|
}
|
|
1458
1464
|
|
|
1459
1465
|
return blocks.length > 0 ? blocks : undefined;
|
|
@@ -1921,10 +1927,11 @@ function buildParams(
|
|
|
1921
1927
|
}
|
|
1922
1928
|
|
|
1923
1929
|
const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
|
|
1930
|
+
const billingSystemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
1924
1931
|
const billingPayload = shouldInjectClaudeCodeInstruction
|
|
1925
1932
|
? {
|
|
1926
1933
|
...params,
|
|
1927
|
-
...(
|
|
1934
|
+
...(billingSystemPrompts.length > 0 ? { system: billingSystemPrompts } : {}),
|
|
1928
1935
|
}
|
|
1929
1936
|
: undefined;
|
|
1930
1937
|
const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
type Tool,
|
|
19
19
|
type ToolChoice,
|
|
20
20
|
} from "../types";
|
|
21
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
21
22
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
22
23
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
23
24
|
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
@@ -28,7 +29,7 @@ import {
|
|
|
28
29
|
iterateWithIdleTimeout,
|
|
29
30
|
} from "../utils/idle-iterator";
|
|
30
31
|
import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
31
|
-
import { supportsDeveloperRole } from "./openai-responses";
|
|
32
|
+
import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
|
|
32
33
|
import {
|
|
33
34
|
appendResponsesToolResultMessages,
|
|
34
35
|
convertResponsesAssistantMessage,
|
|
@@ -273,7 +274,7 @@ function buildParams(
|
|
|
273
274
|
model: deploymentName,
|
|
274
275
|
input: messages,
|
|
275
276
|
stream: true,
|
|
276
|
-
prompt_cache_key: options?.sessionId,
|
|
277
|
+
prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
|
|
277
278
|
};
|
|
278
279
|
|
|
279
280
|
if (options?.maxTokens) {
|
|
@@ -350,12 +351,12 @@ function convertMessages(
|
|
|
350
351
|
const transformedMessages = transformMessages(context.messages, model, normalizeResponsesToolCallIdForTransform);
|
|
351
352
|
const knownCallIds = new Set<string>();
|
|
352
353
|
|
|
353
|
-
|
|
354
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
355
|
+
if (systemPrompts.length > 0) {
|
|
354
356
|
const role = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
|
|
355
|
-
|
|
356
|
-
role,
|
|
357
|
-
|
|
358
|
-
});
|
|
357
|
+
for (const systemPrompt of systemPrompts) {
|
|
358
|
+
messages.push({ role, content: systemPrompt });
|
|
359
|
+
}
|
|
359
360
|
}
|
|
360
361
|
|
|
361
362
|
let msgIndex = 0;
|
package/src/providers/cursor.ts
CHANGED
|
@@ -26,6 +26,7 @@ import type {
|
|
|
26
26
|
ToolCall,
|
|
27
27
|
ToolResultMessage,
|
|
28
28
|
} from "../types";
|
|
29
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
29
30
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
30
31
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
31
32
|
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
@@ -2145,12 +2146,29 @@ function findLastUserMessageIndex(messages: Message[]): number {
|
|
|
2145
2146
|
* only an empty placeholder where historical user turns should be.
|
|
2146
2147
|
* The last user message is excluded because it is sent in the action.
|
|
2147
2148
|
*/
|
|
2149
|
+
/**
|
|
2150
|
+
* Build one Cursor system-message JSON blob per ordered system prompt. Emitting separate blobs
|
|
2151
|
+
* (rather than a single `\n\n`-joined string) lets Cursor's blob cache hit independently per
|
|
2152
|
+
* entry: changing only the last prompt does not invalidate earlier blob ids, so the prefix
|
|
2153
|
+
* up to the changed prompt remains cached on the server side.
|
|
2154
|
+
*
|
|
2155
|
+
* When no system prompts are provided, returns a single default greeting so we never emit
|
|
2156
|
+
* an empty `rootPromptMessagesJson` head.
|
|
2157
|
+
*/
|
|
2158
|
+
export function buildCursorSystemPromptJsons(systemPrompt: readonly string[] | undefined): string[] {
|
|
2159
|
+
const systemPrompts = normalizeSystemPrompts(systemPrompt);
|
|
2160
|
+
if (systemPrompts.length === 0) {
|
|
2161
|
+
return [JSON.stringify({ role: "system", content: "You are a helpful assistant." })];
|
|
2162
|
+
}
|
|
2163
|
+
return systemPrompts.map(content => JSON.stringify({ role: "system", content }));
|
|
2164
|
+
}
|
|
2165
|
+
|
|
2148
2166
|
function buildRootPromptMessagesJson(
|
|
2149
2167
|
messages: Message[],
|
|
2150
|
-
|
|
2168
|
+
systemPromptIds: Uint8Array[],
|
|
2151
2169
|
blobStore: Map<string, Uint8Array>,
|
|
2152
2170
|
): Uint8Array[] {
|
|
2153
|
-
const entries: Uint8Array[] = [
|
|
2171
|
+
const entries: Uint8Array[] = [...systemPromptIds];
|
|
2154
2172
|
const lastUserIdx = findLastUserMessageIndex(messages);
|
|
2155
2173
|
|
|
2156
2174
|
const pushJson = (obj: unknown) => {
|
|
@@ -2299,12 +2317,9 @@ function buildGrpcRequest(
|
|
|
2299
2317
|
} {
|
|
2300
2318
|
const blobStore = state.blobStore;
|
|
2301
2319
|
|
|
2302
|
-
const
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
});
|
|
2306
|
-
const systemPromptBytes = new TextEncoder().encode(systemPromptJson);
|
|
2307
|
-
const systemPromptId = storeCursorBlob(blobStore, systemPromptBytes);
|
|
2320
|
+
const systemPromptIds = buildCursorSystemPromptJsons(context.systemPrompt).map(json =>
|
|
2321
|
+
storeCursorBlob(blobStore, new TextEncoder().encode(json)),
|
|
2322
|
+
);
|
|
2308
2323
|
|
|
2309
2324
|
const lastMessage = context.messages[context.messages.length - 1];
|
|
2310
2325
|
const userText =
|
|
@@ -2339,18 +2354,19 @@ function buildGrpcRequest(
|
|
|
2339
2354
|
// field (not `turns[]`) to construct the actual model prompt; if we only send the
|
|
2340
2355
|
// system prompt here, multi-turn conversations lose prior context and the model
|
|
2341
2356
|
// sees only the current user message.
|
|
2342
|
-
const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages,
|
|
2357
|
+
const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptIds, blobStore);
|
|
2343
2358
|
|
|
2344
2359
|
// Preserve cached non-history state fields (todos, file states, summaries, etc.)
|
|
2345
2360
|
// when the system prompt is unchanged; otherwise start fresh.
|
|
2346
|
-
const
|
|
2347
|
-
|
|
2348
|
-
|
|
2361
|
+
const cachedPromptHead = state.conversationState?.rootPromptMessagesJson?.slice(0, systemPromptIds.length) ?? [];
|
|
2362
|
+
const hasMatchingPrompt =
|
|
2363
|
+
cachedPromptHead.length === systemPromptIds.length &&
|
|
2364
|
+
systemPromptIds.every((id, idx) => Buffer.from(cachedPromptHead[idx]).equals(id));
|
|
2349
2365
|
const baseState =
|
|
2350
2366
|
state.conversationState && hasMatchingPrompt
|
|
2351
2367
|
? state.conversationState
|
|
2352
2368
|
: create(ConversationStateStructureSchema, {
|
|
2353
|
-
rootPromptMessagesJson:
|
|
2369
|
+
rootPromptMessagesJson: systemPromptIds,
|
|
2354
2370
|
turns: [],
|
|
2355
2371
|
todos: [],
|
|
2356
2372
|
pendingToolCalls: [],
|
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
ThinkingContent,
|
|
19
19
|
ToolCall,
|
|
20
20
|
} from "../types";
|
|
21
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
21
22
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
22
23
|
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
|
|
23
24
|
import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
|
|
@@ -865,8 +866,8 @@ export function buildRequest(
|
|
|
865
866
|
options: GoogleGeminiCliOptions = {},
|
|
866
867
|
isAntigravity = false,
|
|
867
868
|
): CloudCodeAssistRequest {
|
|
869
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
868
870
|
const contents = convertMessages(model, context);
|
|
869
|
-
|
|
870
871
|
const generationConfig: CloudCodeAssistRequest["request"]["generationConfig"] = {};
|
|
871
872
|
if (options.temperature !== undefined) {
|
|
872
873
|
generationConfig.temperature = options.temperature;
|
|
@@ -913,9 +914,9 @@ export function buildRequest(
|
|
|
913
914
|
}
|
|
914
915
|
|
|
915
916
|
// System instruction must be object with parts, not plain string
|
|
916
|
-
if (
|
|
917
|
+
if (systemPrompts.length > 0) {
|
|
917
918
|
request.systemInstruction = {
|
|
918
|
-
parts:
|
|
919
|
+
parts: systemPrompts.map(text => ({ text })),
|
|
919
920
|
};
|
|
920
921
|
}
|
|
921
922
|
|
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
ThinkingContent,
|
|
19
19
|
ToolCall,
|
|
20
20
|
} from "../types";
|
|
21
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
21
22
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
22
23
|
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
23
24
|
import type { GoogleThinkingLevel } from "./google-gemini-cli";
|
|
@@ -369,6 +370,7 @@ function buildParams(
|
|
|
369
370
|
context: Context,
|
|
370
371
|
options: GoogleVertexOptions = {},
|
|
371
372
|
): GenerateContentParameters {
|
|
373
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
372
374
|
const contents = convertMessages(model, context);
|
|
373
375
|
|
|
374
376
|
const generationConfig: GoogleVertexSamplingConfig = {};
|
|
@@ -396,7 +398,7 @@ function buildParams(
|
|
|
396
398
|
|
|
397
399
|
const config: GenerateContentConfig = {
|
|
398
400
|
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
|
399
|
-
...(
|
|
401
|
+
...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
|
|
400
402
|
...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
|
|
401
403
|
};
|
|
402
404
|
|
package/src/providers/google.ts
CHANGED
|
@@ -17,6 +17,7 @@ import type {
|
|
|
17
17
|
ThinkingContent,
|
|
18
18
|
ToolCall,
|
|
19
19
|
} from "../types";
|
|
20
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
20
21
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
21
22
|
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
22
23
|
import type { GoogleThinkingLevel } from "./google-gemini-cli";
|
|
@@ -313,6 +314,7 @@ function buildParams(
|
|
|
313
314
|
context: Context,
|
|
314
315
|
options: GoogleOptions = {},
|
|
315
316
|
): GenerateContentParameters {
|
|
317
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
316
318
|
const contents = convertMessages(model, context);
|
|
317
319
|
|
|
318
320
|
const generationConfig: GoogleSamplingConfig = {};
|
|
@@ -340,7 +342,7 @@ function buildParams(
|
|
|
340
342
|
|
|
341
343
|
const config: GenerateContentConfig = {
|
|
342
344
|
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
|
343
|
-
...(
|
|
345
|
+
...(systemPrompts.length > 0 && { systemInstruction: { parts: systemPrompts.map(text => ({ text })) } }),
|
|
344
346
|
...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools, model) }),
|
|
345
347
|
};
|
|
346
348
|
|
package/src/providers/ollama.ts
CHANGED
|
@@ -14,6 +14,7 @@ import type {
|
|
|
14
14
|
ToolResultMessage,
|
|
15
15
|
UserMessage,
|
|
16
16
|
} from "../types";
|
|
17
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
17
18
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
18
19
|
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
19
20
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
@@ -186,10 +187,14 @@ function convertMessage(message: Message): OllamaMessage {
|
|
|
186
187
|
|
|
187
188
|
function convertMessages(model: Model<"ollama-chat">, context: Context): OllamaMessage[] {
|
|
188
189
|
const messages: Message[] = [];
|
|
189
|
-
|
|
190
|
+
// Emit one developer message per ordered system prompt. The wire role is mapped to "system"
|
|
191
|
+
// by `convertMessage`, but keeping the prompts separate preserves prefix-cache stability:
|
|
192
|
+
// if only the trailing prompt changes between calls, the leading system messages keep
|
|
193
|
+
// their identical token prefix so KV-cache reuse covers them.
|
|
194
|
+
for (const systemPrompt of normalizeSystemPrompts(context.systemPrompt)) {
|
|
190
195
|
messages.push({
|
|
191
196
|
role: "developer",
|
|
192
|
-
content:
|
|
197
|
+
content: systemPrompt,
|
|
193
198
|
timestamp: Date.now(),
|
|
194
199
|
});
|
|
195
200
|
}
|
|
@@ -77,7 +77,7 @@ export async function transformRequestBody(
|
|
|
77
77
|
body: RequestBody,
|
|
78
78
|
model: Model<Api>,
|
|
79
79
|
options: CodexRequestOptions = {},
|
|
80
|
-
prompt?: {
|
|
80
|
+
prompt?: { developerMessages: string[] },
|
|
81
81
|
): Promise<RequestBody> {
|
|
82
82
|
body.store = false;
|
|
83
83
|
body.stream = true;
|
|
@@ -36,6 +36,7 @@ import {
|
|
|
36
36
|
getOpenAIResponsesHistoryItems,
|
|
37
37
|
getOpenAIResponsesHistoryPayload,
|
|
38
38
|
normalizeResponsesToolCallId,
|
|
39
|
+
normalizeSystemPrompts,
|
|
39
40
|
} from "../utils";
|
|
40
41
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
41
42
|
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
@@ -51,6 +52,7 @@ import {
|
|
|
51
52
|
transformRequestBody,
|
|
52
53
|
} from "./openai-codex/request-transformer";
|
|
53
54
|
import { parseCodexError } from "./openai-codex/response-handler";
|
|
55
|
+
import { normalizeOpenAIResponsesPromptCacheKey } from "./openai-responses";
|
|
54
56
|
import {
|
|
55
57
|
encodeResponsesToolCallId,
|
|
56
58
|
encodeTextSignatureV1,
|
|
@@ -476,6 +478,7 @@ async function buildCodexRequestContext(
|
|
|
476
478
|
const accountId = getAccountId(apiKey);
|
|
477
479
|
const baseUrl = model.baseUrl || CODEX_BASE_URL;
|
|
478
480
|
const url = resolveCodexResponsesUrl(baseUrl);
|
|
481
|
+
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
|
|
479
482
|
const transformedBody = await buildTransformedCodexRequestBody(model, context, options);
|
|
480
483
|
options?.onPayload?.(transformedBody);
|
|
481
484
|
|
|
@@ -490,8 +493,8 @@ async function buildCodexRequestContext(
|
|
|
490
493
|
};
|
|
491
494
|
|
|
492
495
|
const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
|
|
493
|
-
const sessionKey = getCodexWebSocketSessionKey(
|
|
494
|
-
const publicSessionKey = getCodexPublicSessionKey(
|
|
496
|
+
const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
|
|
497
|
+
const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
|
|
495
498
|
if (sessionKey && publicSessionKey) {
|
|
496
499
|
providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
|
|
497
500
|
}
|
|
@@ -520,7 +523,7 @@ async function buildTransformedCodexRequestBody(
|
|
|
520
523
|
model: model.id,
|
|
521
524
|
input: [...convertMessages(model, context)],
|
|
522
525
|
stream: true,
|
|
523
|
-
prompt_cache_key: options?.sessionId,
|
|
526
|
+
prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
|
|
524
527
|
};
|
|
525
528
|
|
|
526
529
|
if (options?.maxTokens) {
|
|
@@ -567,8 +570,11 @@ async function buildTransformedCodexRequestBody(
|
|
|
567
570
|
}
|
|
568
571
|
}
|
|
569
572
|
|
|
570
|
-
|
|
571
|
-
|
|
573
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
574
|
+
if (systemPrompts.length > 0) {
|
|
575
|
+
params.instructions = systemPrompts[0];
|
|
576
|
+
}
|
|
577
|
+
const developerMessages = systemPrompts.slice(1);
|
|
572
578
|
const codexOptions: CodexRequestOptions = {
|
|
573
579
|
reasoningEffort: options?.reasoning,
|
|
574
580
|
reasoningSummary: options?.reasoningSummary ?? "auto",
|
|
@@ -576,7 +582,7 @@ async function buildTransformedCodexRequestBody(
|
|
|
576
582
|
include: options?.include,
|
|
577
583
|
};
|
|
578
584
|
|
|
579
|
-
return transformRequestBody(params, model, codexOptions);
|
|
585
|
+
return transformRequestBody(params, model, codexOptions, { developerMessages });
|
|
580
586
|
}
|
|
581
587
|
|
|
582
588
|
async function openInitialCodexEventStream(
|
|
@@ -628,7 +634,7 @@ async function openInitialCodexEventStream(
|
|
|
628
634
|
async function openCodexWebSocketTransport(
|
|
629
635
|
requestContext: CodexRequestContext,
|
|
630
636
|
requestSetup: CodexRequestSetup,
|
|
631
|
-
|
|
637
|
+
_options: OpenAICodexResponsesOptions | undefined,
|
|
632
638
|
websocketState: CodexWebSocketSessionState,
|
|
633
639
|
retry: number,
|
|
634
640
|
): Promise<{
|
|
@@ -641,7 +647,7 @@ async function openCodexWebSocketTransport(
|
|
|
641
647
|
requestContext.requestHeaders,
|
|
642
648
|
requestContext.accountId,
|
|
643
649
|
requestContext.apiKey,
|
|
644
|
-
|
|
650
|
+
requestContext.transformedBody.prompt_cache_key,
|
|
645
651
|
"websocket",
|
|
646
652
|
websocketState,
|
|
647
653
|
);
|
|
@@ -670,7 +676,7 @@ async function openCodexWebSocketTransport(
|
|
|
670
676
|
async function openCodexSseTransport(
|
|
671
677
|
requestContext: CodexRequestContext,
|
|
672
678
|
requestSetup: CodexRequestSetup,
|
|
673
|
-
|
|
679
|
+
_options: OpenAICodexResponsesOptions | undefined,
|
|
674
680
|
state: CodexWebSocketSessionState | undefined,
|
|
675
681
|
body = requestContext.transformedBody,
|
|
676
682
|
): Promise<{
|
|
@@ -684,7 +690,7 @@ async function openCodexSseTransport(
|
|
|
684
690
|
requestContext.requestHeaders,
|
|
685
691
|
requestContext.accountId,
|
|
686
692
|
requestContext.apiKey,
|
|
687
|
-
|
|
693
|
+
body.prompt_cache_key,
|
|
688
694
|
body,
|
|
689
695
|
state,
|
|
690
696
|
requestSetup.requestSignal,
|
|
@@ -1559,9 +1565,10 @@ export async function prewarmOpenAICodexResponses(
|
|
|
1559
1565
|
const accountId = getAccountId(apiKey);
|
|
1560
1566
|
const baseUrl = model.baseUrl || CODEX_BASE_URL;
|
|
1561
1567
|
const url = resolveCodexResponsesUrl(baseUrl);
|
|
1568
|
+
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
|
|
1562
1569
|
const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
|
|
1563
|
-
const sessionKey = getCodexWebSocketSessionKey(
|
|
1564
|
-
const publicSessionKey = getCodexPublicSessionKey(
|
|
1570
|
+
const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
|
|
1571
|
+
const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
|
|
1565
1572
|
if (publicSessionKey && sessionKey) {
|
|
1566
1573
|
providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
|
|
1567
1574
|
}
|
|
@@ -1574,7 +1581,7 @@ export async function prewarmOpenAICodexResponses(
|
|
|
1574
1581
|
{ ...(model.headers ?? {}), ...(options?.headers ?? {}) },
|
|
1575
1582
|
accountId,
|
|
1576
1583
|
apiKey,
|
|
1577
|
-
|
|
1584
|
+
promptCacheKey,
|
|
1578
1585
|
"websocket",
|
|
1579
1586
|
state,
|
|
1580
1587
|
);
|
|
@@ -1595,8 +1602,9 @@ function getCodexWebSocketSessionKey(
|
|
|
1595
1602
|
accountId: string,
|
|
1596
1603
|
baseUrl: string,
|
|
1597
1604
|
): string | undefined {
|
|
1598
|
-
|
|
1599
|
-
return
|
|
1605
|
+
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
|
|
1606
|
+
if (!promptCacheKey) return undefined;
|
|
1607
|
+
return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}`;
|
|
1600
1608
|
}
|
|
1601
1609
|
|
|
1602
1610
|
function getCodexPublicSessionKey(
|
|
@@ -1604,8 +1612,9 @@ function getCodexPublicSessionKey(
|
|
|
1604
1612
|
model: Model<"openai-codex-responses">,
|
|
1605
1613
|
baseUrl: string,
|
|
1606
1614
|
): string | undefined {
|
|
1607
|
-
|
|
1608
|
-
return
|
|
1615
|
+
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
|
|
1616
|
+
if (!promptCacheKey) return undefined;
|
|
1617
|
+
return `${baseUrl}:${model.id}:${promptCacheKey}`;
|
|
1609
1618
|
}
|
|
1610
1619
|
|
|
1611
1620
|
function getCodexWebSocketSessionState(
|
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
type ToolChoice,
|
|
34
34
|
type ToolResultMessage,
|
|
35
35
|
} from "../types";
|
|
36
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
36
37
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
37
38
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
38
39
|
import { toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
@@ -1178,10 +1179,13 @@ export function convertMessages(
|
|
|
1178
1179
|
return generateFallbackToolCallId(seed);
|
|
1179
1180
|
};
|
|
1180
1181
|
|
|
1181
|
-
|
|
1182
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
1183
|
+
if (systemPrompts.length > 0) {
|
|
1182
1184
|
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
|
1183
1185
|
const role = useDeveloperRole ? "developer" : "system";
|
|
1184
|
-
|
|
1186
|
+
for (const systemPrompt of systemPrompts) {
|
|
1187
|
+
params.push({ role, content: systemPrompt });
|
|
1188
|
+
}
|
|
1185
1189
|
}
|
|
1186
1190
|
|
|
1187
1191
|
let lastRole: string | null = null;
|
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
createOpenAIResponsesHistoryPayload,
|
|
26
26
|
getOpenAIResponsesHistoryItems,
|
|
27
27
|
getOpenAIResponsesHistoryPayload,
|
|
28
|
+
normalizeSystemPrompts,
|
|
28
29
|
resolveCacheRetention,
|
|
29
30
|
sanitizeOpenAIResponsesHistoryItemsForReplay,
|
|
30
31
|
} from "../utils";
|
|
@@ -73,6 +74,13 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
|
|
|
73
74
|
return undefined;
|
|
74
75
|
}
|
|
75
76
|
|
|
77
|
+
export function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined {
|
|
78
|
+
if (!sessionId || sessionId.length === 0) return undefined;
|
|
79
|
+
const wellFormed = sessionId.toWellFormed();
|
|
80
|
+
if (wellFormed.length <= 64) return wellFormed;
|
|
81
|
+
return `pc_${Bun.hash(wellFormed).toString(36)}`;
|
|
82
|
+
}
|
|
83
|
+
|
|
76
84
|
// OpenAI Responses-specific options
|
|
77
85
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
|
78
86
|
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
@@ -331,7 +339,9 @@ function createClient(
|
|
|
331
339
|
function getOpenAIResponsesCacheSessionId(
|
|
332
340
|
options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId"> | undefined,
|
|
333
341
|
): string | undefined {
|
|
334
|
-
return resolveCacheRetention(options?.cacheRetention) === "none"
|
|
342
|
+
return resolveCacheRetention(options?.cacheRetention) === "none"
|
|
343
|
+
? undefined
|
|
344
|
+
: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
|
|
335
345
|
}
|
|
336
346
|
|
|
337
347
|
function buildParams(
|
|
@@ -352,12 +362,11 @@ function buildParams(
|
|
|
352
362
|
);
|
|
353
363
|
const messages: ResponseInput = [...conversationMessages];
|
|
354
364
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
});
|
|
365
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
366
|
+
if (systemPrompts.length > 0) {
|
|
367
|
+
const role: "developer" | "system" =
|
|
368
|
+
model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
|
|
369
|
+
messages.unshift(...systemPrompts.map(systemPrompt => ({ role, content: systemPrompt })));
|
|
361
370
|
}
|
|
362
371
|
|
|
363
372
|
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
package/src/types.ts
CHANGED
package/src/utils.ts
CHANGED
|
@@ -5,6 +5,9 @@ import type { CacheRetention, OpenAIResponsesHistoryPayload, ProviderPayload } f
|
|
|
5
5
|
type OpenAIResponsesReplayItem = ResponseInput[number];
|
|
6
6
|
|
|
7
7
|
export { isRecord } from "@oh-my-pi/pi-utils";
|
|
8
|
+
export function normalizeSystemPrompts(systemPrompt: readonly string[] | undefined): string[] {
|
|
9
|
+
return systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
|
|
10
|
+
}
|
|
8
11
|
|
|
9
12
|
export function toNumber(value: unknown): number | undefined {
|
|
10
13
|
if (typeof value === "number" && Number.isFinite(value)) return value;
|