@oh-my-pi/pi-ai 5.5.0 → 5.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "5.5.0",
3
+ "version": "5.6.7",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/cli.ts CHANGED
@@ -1,5 +1,5 @@
1
- #!/usr/bin/env node
2
-
1
+ #!/usr/bin/env bun
2
+ import "./utils/migrate-env";
3
3
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
4
4
  import { createInterface } from "readline";
5
5
  import { loginAnthropic } from "./utils/oauth/anthropic";
package/src/index.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import "./utils/migrate-env";
2
+
1
3
  export * from "./models";
2
4
  export * from "./providers/anthropic";
3
5
  export * from "./providers/cursor";
@@ -1335,23 +1335,6 @@ export const MODELS = {
1335
1335
  contextWindow: 131000,
1336
1336
  maxTokens: 32000,
1337
1337
  } satisfies Model<"openai-completions">,
1338
- "zai-glm-4.6": {
1339
- id: "zai-glm-4.6",
1340
- name: "Z.AI GLM-4.6",
1341
- api: "openai-completions",
1342
- provider: "cerebras",
1343
- baseUrl: "https://api.cerebras.ai/v1",
1344
- reasoning: false,
1345
- input: ["text"],
1346
- cost: {
1347
- input: 0,
1348
- output: 0,
1349
- cacheRead: 0,
1350
- cacheWrite: 0,
1351
- },
1352
- contextWindow: 131072,
1353
- maxTokens: 40960,
1354
- } satisfies Model<"openai-completions">,
1355
1338
  "zai-glm-4.7": {
1356
1339
  id: "zai-glm-4.7",
1357
1340
  name: "Z.AI GLM-4.7",
@@ -1946,24 +1929,6 @@ export const MODELS = {
1946
1929
  contextWindow: 128000,
1947
1930
  maxTokens: 64000,
1948
1931
  } satisfies Model<"openai-completions">,
1949
- "oswe-vscode-prime": {
1950
- id: "oswe-vscode-prime",
1951
- name: "Raptor Mini (Preview)",
1952
- api: "openai-responses",
1953
- provider: "github-copilot",
1954
- baseUrl: "https://api.individual.githubcopilot.com",
1955
- headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"},
1956
- reasoning: true,
1957
- input: ["text", "image"],
1958
- cost: {
1959
- input: 0,
1960
- output: 0,
1961
- cacheRead: 0,
1962
- cacheWrite: 0,
1963
- },
1964
- contextWindow: 200000,
1965
- maxTokens: 64000,
1966
- } satisfies Model<"openai-responses">,
1967
1932
  },
1968
1933
  "google": {
1969
1934
  "gemini-1.5-flash": {
@@ -4657,23 +4622,6 @@ export const MODELS = {
4657
4622
  contextWindow: 131072,
4658
4623
  maxTokens: 131072,
4659
4624
  } satisfies Model<"openai-completions">,
4660
- "allenai/olmo-3-7b-instruct": {
4661
- id: "allenai/olmo-3-7b-instruct",
4662
- name: "AllenAI: Olmo 3 7B Instruct",
4663
- api: "openai-completions",
4664
- provider: "openrouter",
4665
- baseUrl: "https://openrouter.ai/api/v1",
4666
- reasoning: false,
4667
- input: ["text"],
4668
- cost: {
4669
- input: 0.09999999999999999,
4670
- output: 0.19999999999999998,
4671
- cacheRead: 0,
4672
- cacheWrite: 0,
4673
- },
4674
- contextWindow: 65536,
4675
- maxTokens: 65536,
4676
- } satisfies Model<"openai-completions">,
4677
4625
  "allenai/olmo-3.1-32b-instruct": {
4678
4626
  id: "allenai/olmo-3.1-32b-instruct",
4679
4627
  name: "AllenAI: Olmo 3.1 32B Instruct",
@@ -5228,12 +5176,12 @@ export const MODELS = {
5228
5176
  input: ["text"],
5229
5177
  cost: {
5230
5178
  input: 0.7,
5231
- output: 2.4,
5179
+ output: 2.5,
5232
5180
  cacheRead: 0,
5233
5181
  cacheWrite: 0,
5234
5182
  },
5235
- contextWindow: 163840,
5236
- maxTokens: 163840,
5183
+ contextWindow: 64000,
5184
+ maxTokens: 16000,
5237
5185
  } satisfies Model<"openai-completions">,
5238
5186
  "deepseek/deepseek-r1-0528": {
5239
5187
  id: "deepseek/deepseek-r1-0528",
@@ -5244,13 +5192,13 @@ export const MODELS = {
5244
5192
  reasoning: true,
5245
5193
  input: ["text"],
5246
5194
  cost: {
5247
- input: 0.44999999999999996,
5248
- output: 2.1500000000000004,
5195
+ input: 0.39999999999999997,
5196
+ output: 1.75,
5249
5197
  cacheRead: 0,
5250
5198
  cacheWrite: 0,
5251
5199
  },
5252
- contextWindow: 131072,
5253
- maxTokens: 32768,
5200
+ contextWindow: 163840,
5201
+ maxTokens: 65536,
5254
5202
  } satisfies Model<"openai-completions">,
5255
5203
  "deepseek/deepseek-r1-distill-llama-70b": {
5256
5204
  id: "deepseek/deepseek-r1-distill-llama-70b",
@@ -5349,7 +5297,7 @@ export const MODELS = {
5349
5297
  input: 0.09999999999999999,
5350
5298
  output: 0.39999999999999997,
5351
5299
  cacheRead: 0.024999999999999998,
5352
- cacheWrite: 0.18330000000000002,
5300
+ cacheWrite: 0.0833,
5353
5301
  },
5354
5302
  contextWindow: 1048576,
5355
5303
  maxTokens: 8192,
@@ -5400,7 +5348,7 @@ export const MODELS = {
5400
5348
  input: 0.3,
5401
5349
  output: 2.5,
5402
5350
  cacheRead: 0.03,
5403
- cacheWrite: 0.3833,
5351
+ cacheWrite: 0.08333333333333334,
5404
5352
  },
5405
5353
  contextWindow: 1048576,
5406
5354
  maxTokens: 65535,
@@ -5417,7 +5365,7 @@ export const MODELS = {
5417
5365
  input: 0.09999999999999999,
5418
5366
  output: 0.39999999999999997,
5419
5367
  cacheRead: 0.01,
5420
- cacheWrite: 0.18330000000000002,
5368
+ cacheWrite: 0.0833,
5421
5369
  },
5422
5370
  contextWindow: 1048576,
5423
5371
  maxTokens: 65535,
@@ -5433,12 +5381,29 @@ export const MODELS = {
5433
5381
  cost: {
5434
5382
  input: 0.09999999999999999,
5435
5383
  output: 0.39999999999999997,
5436
- cacheRead: 0,
5437
- cacheWrite: 0,
5384
+ cacheRead: 0.01,
5385
+ cacheWrite: 0.0833,
5438
5386
  },
5439
5387
  contextWindow: 1048576,
5440
5388
  maxTokens: 65536,
5441
5389
  } satisfies Model<"openai-completions">,
5390
+ "google/gemini-2.5-flash-preview-09-2025": {
5391
+ id: "google/gemini-2.5-flash-preview-09-2025",
5392
+ name: "Google: Gemini 2.5 Flash Preview 09-2025",
5393
+ api: "openai-completions",
5394
+ provider: "openrouter",
5395
+ baseUrl: "https://openrouter.ai/api/v1",
5396
+ reasoning: true,
5397
+ input: ["text", "image"],
5398
+ cost: {
5399
+ input: 0.3,
5400
+ output: 2.5,
5401
+ cacheRead: 0.075,
5402
+ cacheWrite: 0.0833,
5403
+ },
5404
+ contextWindow: 1048576,
5405
+ maxTokens: 65535,
5406
+ } satisfies Model<"openai-completions">,
5442
5407
  "google/gemini-2.5-pro": {
5443
5408
  id: "google/gemini-2.5-pro",
5444
5409
  name: "Google: Gemini 2.5 Pro",
@@ -5451,7 +5416,7 @@ export const MODELS = {
5451
5416
  input: 1.25,
5452
5417
  output: 10,
5453
5418
  cacheRead: 0.125,
5454
- cacheWrite: 1.625,
5419
+ cacheWrite: 0.375,
5455
5420
  },
5456
5421
  contextWindow: 1048576,
5457
5422
  maxTokens: 65536,
@@ -5468,7 +5433,7 @@ export const MODELS = {
5468
5433
  input: 1.25,
5469
5434
  output: 10,
5470
5435
  cacheRead: 0.31,
5471
- cacheWrite: 1.625,
5436
+ cacheWrite: 0.375,
5472
5437
  },
5473
5438
  contextWindow: 1048576,
5474
5439
  maxTokens: 65536,
@@ -5485,7 +5450,7 @@ export const MODELS = {
5485
5450
  input: 1.25,
5486
5451
  output: 10,
5487
5452
  cacheRead: 0.31,
5488
- cacheWrite: 1.625,
5453
+ cacheWrite: 0.375,
5489
5454
  },
5490
5455
  contextWindow: 1048576,
5491
5456
  maxTokens: 65535,
@@ -5519,7 +5484,7 @@ export const MODELS = {
5519
5484
  input: 2,
5520
5485
  output: 12,
5521
5486
  cacheRead: 0.19999999999999998,
5522
- cacheWrite: 2.375,
5487
+ cacheWrite: 0.375,
5523
5488
  },
5524
5489
  contextWindow: 1048576,
5525
5490
  maxTokens: 65536,
@@ -5609,23 +5574,6 @@ export const MODELS = {
5609
5574
  contextWindow: 256000,
5610
5575
  maxTokens: 128000,
5611
5576
  } satisfies Model<"openai-completions">,
5612
- "meta-llama/llama-3-70b-instruct": {
5613
- id: "meta-llama/llama-3-70b-instruct",
5614
- name: "Meta: Llama 3 70B Instruct",
5615
- api: "openai-completions",
5616
- provider: "openrouter",
5617
- baseUrl: "https://openrouter.ai/api/v1",
5618
- reasoning: false,
5619
- input: ["text"],
5620
- cost: {
5621
- input: 0.3,
5622
- output: 0.39999999999999997,
5623
- cacheRead: 0,
5624
- cacheWrite: 0,
5625
- },
5626
- contextWindow: 8192,
5627
- maxTokens: 16384,
5628
- } satisfies Model<"openai-completions">,
5629
5577
  "meta-llama/llama-3-8b-instruct": {
5630
5578
  id: "meta-llama/llama-3-8b-instruct",
5631
5579
  name: "Meta: Llama 3 8B Instruct",
@@ -5890,12 +5838,12 @@ export const MODELS = {
5890
5838
  reasoning: false,
5891
5839
  input: ["text"],
5892
5840
  cost: {
5893
- input: 0.07,
5894
- output: 0.28,
5841
+ input: 0.09999999999999999,
5842
+ output: 0.3,
5895
5843
  cacheRead: 0,
5896
5844
  cacheWrite: 0,
5897
5845
  },
5898
- contextWindow: 128000,
5846
+ contextWindow: 131072,
5899
5847
  maxTokens: 4096,
5900
5848
  } satisfies Model<"openai-completions">,
5901
5849
  "mistralai/ministral-14b-2512": {
@@ -5983,40 +5931,6 @@ export const MODELS = {
5983
5931
  contextWindow: 262144,
5984
5932
  maxTokens: 4096,
5985
5933
  } satisfies Model<"openai-completions">,
5986
- "mistralai/mistral-7b-instruct": {
5987
- id: "mistralai/mistral-7b-instruct",
5988
- name: "Mistral: Mistral 7B Instruct",
5989
- api: "openai-completions",
5990
- provider: "openrouter",
5991
- baseUrl: "https://openrouter.ai/api/v1",
5992
- reasoning: false,
5993
- input: ["text"],
5994
- cost: {
5995
- input: 0.028,
5996
- output: 0.054,
5997
- cacheRead: 0,
5998
- cacheWrite: 0,
5999
- },
6000
- contextWindow: 32768,
6001
- maxTokens: 16384,
6002
- } satisfies Model<"openai-completions">,
6003
- "mistralai/mistral-7b-instruct:free": {
6004
- id: "mistralai/mistral-7b-instruct:free",
6005
- name: "Mistral: Mistral 7B Instruct (free)",
6006
- api: "openai-completions",
6007
- provider: "openrouter",
6008
- baseUrl: "https://openrouter.ai/api/v1",
6009
- reasoning: false,
6010
- input: ["text"],
6011
- cost: {
6012
- input: 0,
6013
- output: 0,
6014
- cacheRead: 0,
6015
- cacheWrite: 0,
6016
- },
6017
- contextWindow: 32768,
6018
- maxTokens: 16384,
6019
- } satisfies Model<"openai-completions">,
6020
5934
  "mistralai/mistral-large": {
6021
5935
  id: "mistralai/mistral-large",
6022
5936
  name: "Mistral Large",
@@ -6134,7 +6048,7 @@ export const MODELS = {
6134
6048
  cacheWrite: 0,
6135
6049
  },
6136
6050
  contextWindow: 131072,
6137
- maxTokens: 16384,
6051
+ maxTokens: 131072,
6138
6052
  } satisfies Model<"openai-completions">,
6139
6053
  "mistralai/mistral-saba": {
6140
6054
  id: "mistralai/mistral-saba",
@@ -6578,23 +6492,6 @@ export const MODELS = {
6578
6492
  contextWindow: 128000,
6579
6493
  maxTokens: 4096,
6580
6494
  } satisfies Model<"openai-completions">,
6581
- "openai/codex-mini": {
6582
- id: "openai/codex-mini",
6583
- name: "OpenAI: Codex Mini",
6584
- api: "openai-completions",
6585
- provider: "openrouter",
6586
- baseUrl: "https://openrouter.ai/api/v1",
6587
- reasoning: true,
6588
- input: ["text", "image"],
6589
- cost: {
6590
- input: 1.5,
6591
- output: 6,
6592
- cacheRead: 0.375,
6593
- cacheWrite: 0,
6594
- },
6595
- contextWindow: 200000,
6596
- maxTokens: 100000,
6597
- } satisfies Model<"openai-completions">,
6598
6495
  "openai/gpt-3.5-turbo": {
6599
6496
  id: "openai/gpt-3.5-turbo",
6600
6497
  name: "OpenAI: GPT-3.5 Turbo",
@@ -7641,13 +7538,13 @@ export const MODELS = {
7641
7538
  reasoning: true,
7642
7539
  input: ["text"],
7643
7540
  cost: {
7644
- input: 0.18,
7645
- output: 0.54,
7541
+ input: 0.19999999999999998,
7542
+ output: 0.6,
7646
7543
  cacheRead: 0,
7647
7544
  cacheWrite: 0,
7648
7545
  },
7649
7546
  contextWindow: 40960,
7650
- maxTokens: 40960,
7547
+ maxTokens: 4096,
7651
7548
  } satisfies Model<"openai-completions">,
7652
7549
  "qwen/qwen3-235b-a22b-2507": {
7653
7550
  id: "qwen/qwen3-235b-a22b-2507",
@@ -7921,6 +7818,23 @@ export const MODELS = {
7921
7818
  contextWindow: 262144,
7922
7819
  maxTokens: 4096,
7923
7820
  } satisfies Model<"openai-completions">,
7821
+ "qwen/qwen3-next-80b-a3b-instruct:free": {
7822
+ id: "qwen/qwen3-next-80b-a3b-instruct:free",
7823
+ name: "Qwen: Qwen3 Next 80B A3B Instruct (free)",
7824
+ api: "openai-completions",
7825
+ provider: "openrouter",
7826
+ baseUrl: "https://openrouter.ai/api/v1",
7827
+ reasoning: false,
7828
+ input: ["text"],
7829
+ cost: {
7830
+ input: 0,
7831
+ output: 0,
7832
+ cacheRead: 0,
7833
+ cacheWrite: 0,
7834
+ },
7835
+ contextWindow: 262144,
7836
+ maxTokens: 4096,
7837
+ } satisfies Model<"openai-completions">,
7924
7838
  "qwen/qwen3-next-80b-a3b-thinking": {
7925
7839
  id: "qwen/qwen3-next-80b-a3b-thinking",
7926
7840
  name: "Qwen: Qwen3 Next 80B A3B Thinking",
@@ -7935,8 +7849,8 @@ export const MODELS = {
7935
7849
  cacheRead: 0,
7936
7850
  cacheWrite: 0,
7937
7851
  },
7938
- contextWindow: 262144,
7939
- maxTokens: 262144,
7852
+ contextWindow: 128000,
7853
+ maxTokens: 4096,
7940
7854
  } satisfies Model<"openai-completions">,
7941
7855
  "qwen/qwen3-vl-235b-a22b-instruct": {
7942
7856
  id: "qwen/qwen3-vl-235b-a22b-instruct",
@@ -7955,23 +7869,6 @@ export const MODELS = {
7955
7869
  contextWindow: 262144,
7956
7870
  maxTokens: 4096,
7957
7871
  } satisfies Model<"openai-completions">,
7958
- "qwen/qwen3-vl-235b-a22b-thinking": {
7959
- id: "qwen/qwen3-vl-235b-a22b-thinking",
7960
- name: "Qwen: Qwen3 VL 235B A22B Thinking",
7961
- api: "openai-completions",
7962
- provider: "openrouter",
7963
- baseUrl: "https://openrouter.ai/api/v1",
7964
- reasoning: true,
7965
- input: ["text", "image"],
7966
- cost: {
7967
- input: 0.44999999999999996,
7968
- output: 3.5,
7969
- cacheRead: 0,
7970
- cacheWrite: 0,
7971
- },
7972
- contextWindow: 262144,
7973
- maxTokens: 262144,
7974
- } satisfies Model<"openai-completions">,
7975
7872
  "qwen/qwen3-vl-30b-a3b-instruct": {
7976
7873
  id: "qwen/qwen3-vl-30b-a3b-instruct",
7977
7874
  name: "Qwen: Qwen3 VL 30B A3B Instruct",
@@ -8355,13 +8252,13 @@ export const MODELS = {
8355
8252
  reasoning: true,
8356
8253
  input: ["text"],
8357
8254
  cost: {
8358
- input: 0.09999999999999999,
8359
- output: 0.3,
8360
- cacheRead: 0.02,
8255
+ input: 0.09,
8256
+ output: 0.29,
8257
+ cacheRead: 0,
8361
8258
  cacheWrite: 0,
8362
8259
  },
8363
8260
  contextWindow: 262144,
8364
- maxTokens: 32000,
8261
+ maxTokens: 4096,
8365
8262
  } satisfies Model<"openai-completions">,
8366
8263
  "xiaomi/mimo-v2-flash:free": {
8367
8264
  id: "xiaomi/mimo-v2-flash:free",
@@ -10516,8 +10413,8 @@ export const MODELS = {
10516
10413
  reasoning: true,
10517
10414
  input: ["text"],
10518
10415
  cost: {
10519
- input: 0.098,
10520
- output: 0.293,
10416
+ input: 0.09,
10417
+ output: 0.29,
10521
10418
  cacheRead: 0,
10522
10419
  cacheWrite: 0,
10523
10420
  },
@@ -54,6 +54,11 @@ export interface BedrockOptions extends StreamOptions {
54
54
 
55
55
  type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };
56
56
 
57
+ function sanitizeToolCallId(id: string): string {
58
+ const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_");
59
+ return sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
60
+ }
61
+
57
62
  export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
58
63
  model: Model<"bedrock-converse-stream">,
59
64
  context: Context,
@@ -165,7 +170,7 @@ function handleContentBlockStart(
165
170
  if (start?.toolUse) {
166
171
  const block: Block = {
167
172
  type: "toolCall",
168
- id: start.toolUse.toolUseId || "",
173
+ id: sanitizeToolCallId(start.toolUse.toolUseId || ""),
169
174
  name: start.toolUse.name || "",
170
175
  arguments: {},
171
176
  partialJson: "",
@@ -290,6 +295,17 @@ function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean
290
295
  return false;
291
296
  }
292
297
 
298
+ /**
299
+ * Check if the model supports thinking signatures in reasoningContent.
300
+ * Only Anthropic Claude models support the signature field.
301
+ * Other models (Nova, Titan, Mistral, Llama, etc.) reject it with:
302
+ * "This model doesn't support the reasoningContent.reasoningText.signature field"
303
+ */
304
+ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boolean {
305
+ const id = model.id.toLowerCase();
306
+ return id.includes("anthropic.claude") || id.includes("anthropic/claude");
307
+ }
308
+
293
309
  function buildSystemPrompt(
294
310
  systemPrompt: string | undefined,
295
311
  model: Model<"bedrock-converse-stream">,
@@ -348,17 +364,32 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
348
364
  break;
349
365
  case "toolCall":
350
366
  contentBlocks.push({
351
- toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
367
+ toolUse: {
368
+ toolUseId: sanitizeToolCallId(c.id),
369
+ name: c.name,
370
+ input: c.arguments,
371
+ },
352
372
  });
353
373
  break;
354
374
  case "thinking":
355
375
  // Skip empty thinking blocks
356
376
  if (c.thinking.trim().length === 0) continue;
357
- contentBlocks.push({
358
- reasoningContent: {
359
- reasoningText: { text: sanitizeSurrogates(c.thinking), signature: c.thinkingSignature },
360
- },
361
- });
377
+ // Only Anthropic models support the signature field in reasoningText.
378
+ // For other models, we omit the signature to avoid errors like:
379
+ // "This model doesn't support the reasoningContent.reasoningText.signature field"
380
+ if (supportsThinkingSignature(model)) {
381
+ contentBlocks.push({
382
+ reasoningContent: {
383
+ reasoningText: { text: sanitizeSurrogates(c.thinking), signature: c.thinkingSignature },
384
+ },
385
+ });
386
+ } else {
387
+ contentBlocks.push({
388
+ reasoningContent: {
389
+ reasoningText: { text: sanitizeSurrogates(c.thinking) },
390
+ },
391
+ });
392
+ }
362
393
  break;
363
394
  default:
364
395
  throw new Error("Unknown assistant content type");
@@ -382,7 +413,7 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
382
413
  // Add current tool result with all content blocks combined
383
414
  toolResults.push({
384
415
  toolResult: {
385
- toolUseId: m.toolCallId,
416
+ toolUseId: sanitizeToolCallId(m.toolCallId),
386
417
  content: m.content.map((c) =>
387
418
  c.type === "image"
388
419
  ? { image: createImageBlock(c.mimeType, c.data) }
@@ -398,7 +429,7 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
398
429
  const nextMsg = transformedMessages[j] as ToolResultMessage;
399
430
  toolResults.push({
400
431
  toolResult: {
401
- toolUseId: nextMsg.toolCallId,
432
+ toolUseId: sanitizeToolCallId(nextMsg.toolCallId),
402
433
  content: nextMsg.content.map((c) =>
403
434
  c.type === "image"
404
435
  ? { image: createImageBlock(c.mimeType, c.data) }
@@ -47,15 +47,59 @@ export const claudeCodeHeaders = {
47
47
  } as const;
48
48
 
49
49
  export const applyClaudeToolPrefix = (name: string) => {
50
- if (!claudeToolPrefix || name.startsWith(claudeToolPrefix)) return name;
50
+ if (!claudeToolPrefix) return name;
51
+ const prefix = claudeToolPrefix.toLowerCase();
52
+ if (name.toLowerCase().startsWith(prefix)) return name;
51
53
  return `${claudeToolPrefix}${name}`;
52
54
  };
53
55
 
54
56
  export const stripClaudeToolPrefix = (name: string) => {
55
- if (!claudeToolPrefix || !name.startsWith(claudeToolPrefix)) return name;
57
+ if (!claudeToolPrefix) return name;
58
+ const prefix = claudeToolPrefix.toLowerCase();
59
+ if (!name.toLowerCase().startsWith(prefix)) return name;
56
60
  return name.slice(claudeToolPrefix.length);
57
61
  };
58
62
 
63
+ // Claude Code 2.x tool names (canonical casing)
64
+ // Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
65
+ // To update: https://github.com/badlogic/cchistory
66
+ const claudeCodeTools = [
67
+ "Read",
68
+ "Write",
69
+ "Edit",
70
+ "Bash",
71
+ "Grep",
72
+ "Glob",
73
+ "AskUserQuestion",
74
+ "EnterPlanMode",
75
+ "ExitPlanMode",
76
+ "KillShell",
77
+ "NotebookEdit",
78
+ "Skill",
79
+ "Task",
80
+ "TaskOutput",
81
+ "TodoWrite",
82
+ "WebFetch",
83
+ "WebSearch",
84
+ ];
85
+
86
+ const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
87
+
88
+ // Convert tool name to CC canonical casing if it matches (case-insensitive), fallback to prefix
89
+ const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? applyClaudeToolPrefix(name);
90
+
91
+ // Convert CC tool name back to original, checking provided tools for case-insensitive match
92
+ const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
93
+ // First try to find by case-insensitive match in provided tools
94
+ if (tools && tools.length > 0) {
95
+ const lowerName = name.toLowerCase();
96
+ const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
97
+ if (matchedTool) return matchedTool.name;
98
+ }
99
+ // Fall back to stripping prefix if no match found
100
+ return stripClaudeToolPrefix(name);
101
+ };
102
+
59
103
  /**
60
104
  * Convert content blocks to Anthropic API format
61
105
  */
@@ -187,7 +231,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
187
231
  const block: Block = {
188
232
  type: "toolCall",
189
233
  id: event.content_block.id,
190
- name: isOAuthToken ? stripClaudeToolPrefix(event.content_block.name) : event.content_block.name,
234
+ name: isOAuthToken
235
+ ? fromClaudeCodeName(event.content_block.name, context.tools)
236
+ : event.content_block.name,
191
237
  arguments: event.content_block.input as Record<string, any>,
192
238
  partialJson: "",
193
239
  index: event.index,
@@ -631,7 +677,7 @@ function convertMessages(
631
677
  blocks.push({
632
678
  type: "tool_use",
633
679
  id: sanitizeToolCallId(block.id),
634
- name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
680
+ name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
635
681
  input: block.arguments,
636
682
  });
637
683
  }
@@ -704,7 +750,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.
704
750
  const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
705
751
 
706
752
  return {
707
- name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
753
+ name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
708
754
  description: tool.description,
709
755
  input_schema: {
710
756
  type: "object" as const,
@@ -65,6 +65,10 @@ export function requiresToolCallId(modelId: string): boolean {
65
65
  return modelId.startsWith("claude-");
66
66
  }
67
67
 
68
+ function isGemini3Model(modelId: string): boolean {
69
+ return modelId.includes("gemini-3");
70
+ }
71
+
68
72
  /**
69
73
  * Convert internal messages to Gemini Content[] format.
70
74
  */
@@ -131,6 +135,17 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
131
135
  });
132
136
  }
133
137
  } else if (block.type === "toolCall") {
138
+ const thoughtSignature = resolveThoughtSignature(isSameProviderAndModel, block.thoughtSignature);
139
+ if (isGemini3Model(model.id) && !thoughtSignature) {
140
+ const argsStr = JSON.stringify(block.arguments, null, 2);
141
+ parts.push({
142
+ text: sanitizeSurrogates(
143
+ `[Historical context: a different model called tool "${block.name}" with arguments: ${argsStr}. Do not mimic this format - use proper function calling.]`,
144
+ ),
145
+ });
146
+ continue;
147
+ }
148
+
134
149
  const part: Part = {
135
150
  functionCall: {
136
151
  name: block.name,
@@ -141,7 +156,6 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
141
156
  if (model.provider === "google-vertex" && part?.functionCall?.id) {
142
157
  delete part.functionCall.id; // Vertex AI does not support 'id' in functionCall
143
158
  }
144
- const thoughtSignature = resolveThoughtSignature(isSameProviderAndModel, block.thoughtSignature);
145
159
  if (thoughtSignature) {
146
160
  part.thoughtSignature = thoughtSignature;
147
161
  }
@@ -49,7 +49,10 @@ export interface OpenAICodexResponsesOptions extends StreamOptions {
49
49
  codexMode?: boolean;
50
50
  }
51
51
 
52
- const CODEX_DEBUG = process.env.PI_CODEX_DEBUG === "1" || process.env.PI_CODEX_DEBUG === "true";
52
+ const CODEX_DEBUG = process.env.OMP_CODEX_DEBUG === "1" || process.env.OMP_CODEX_DEBUG === "true";
53
+ const CODEX_MAX_RETRIES = 2;
54
+ const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
55
+ const CODEX_RETRY_DELAY_MS = 500;
53
56
 
54
57
  export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"> = (
55
58
  model: Model<"openai-codex-responses">,
@@ -134,12 +137,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
134
137
  headers: redactHeaders(headers),
135
138
  });
136
139
 
137
- const response = await fetch(url, {
138
- method: "POST",
139
- headers,
140
- body: JSON.stringify(transformedBody),
141
- signal: options?.signal,
142
- });
140
+ const response = await fetchWithRetry(
141
+ url,
142
+ {
143
+ method: "POST",
144
+ headers,
145
+ body: JSON.stringify(transformedBody),
146
+ },
147
+ options?.signal,
148
+ );
143
149
 
144
150
  logCodexDebug("codex response", {
145
151
  url: response.url,
@@ -409,6 +415,43 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
409
415
  console.error(`[codex] ${message}`);
410
416
  }
411
417
 
418
+ function getRetryDelayMs(response: Response | null, attempt: number): number {
419
+ const retryAfter = response?.headers?.get("retry-after") || null;
420
+ if (retryAfter) {
421
+ const seconds = Number(retryAfter);
422
+ if (Number.isFinite(seconds)) {
423
+ return Math.max(0, seconds * 1000);
424
+ }
425
+ const parsedDate = Date.parse(retryAfter);
426
+ if (!Number.isNaN(parsedDate)) {
427
+ return Math.max(0, parsedDate - Date.now());
428
+ }
429
+ }
430
+ return CODEX_RETRY_DELAY_MS * (attempt + 1);
431
+ }
432
+
433
+ async function fetchWithRetry(url: string, init: RequestInit, signal?: AbortSignal): Promise<Response> {
434
+ let attempt = 0;
435
+ while (true) {
436
+ try {
437
+ const response = await fetch(url, { ...init, signal: signal ?? init.signal });
438
+ if (!CODEX_RETRYABLE_STATUS.has(response.status) || attempt >= CODEX_MAX_RETRIES) {
439
+ return response;
440
+ }
441
+ if (signal?.aborted) return response;
442
+ const delay = getRetryDelayMs(response, attempt);
443
+ await new Promise((resolve) => setTimeout(resolve, delay));
444
+ } catch (error) {
445
+ if (attempt >= CODEX_MAX_RETRIES || signal?.aborted) {
446
+ throw error;
447
+ }
448
+ const delay = CODEX_RETRY_DELAY_MS * (attempt + 1);
449
+ await new Promise((resolve) => setTimeout(resolve, delay));
450
+ }
451
+ attempt += 1;
452
+ }
453
+ }
454
+
412
455
  function redactHeaders(headers: Headers): Record<string, string> {
413
456
  const redacted: Record<string, string> = {};
414
457
  for (const [key, value] of headers.entries()) {
@@ -680,24 +680,33 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
680
680
  }
681
681
 
682
682
  /**
683
- * Detect compatibility settings from baseUrl for known providers.
683
+ * Detect compatibility settings from provider and baseUrl for known providers.
684
+ * Provider takes precedence over URL-based detection since it's explicitly configured.
684
685
  * Returns a fully resolved OpenAICompat object with all fields set.
685
686
  */
686
- function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
687
- const isZai = baseUrl.includes("api.z.ai");
687
+ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
688
+ const provider = model.provider;
689
+ const baseUrl = model.baseUrl;
690
+
691
+ const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
688
692
 
689
693
  const isNonStandard =
694
+ provider === "cerebras" ||
690
695
  baseUrl.includes("cerebras.ai") ||
696
+ provider === "xai" ||
691
697
  baseUrl.includes("api.x.ai") ||
698
+ provider === "mistral" ||
692
699
  baseUrl.includes("mistral.ai") ||
693
700
  baseUrl.includes("chutes.ai") ||
694
- isZai;
701
+ isZai ||
702
+ provider === "opencode" ||
703
+ baseUrl.includes("opencode.ai");
695
704
 
696
- const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
705
+ const useMaxTokens = provider === "mistral" || baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
697
706
 
698
- const isGrok = baseUrl.includes("api.x.ai");
707
+ const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
699
708
 
700
- const isMistral = baseUrl.includes("mistral.ai");
709
+ const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
701
710
 
702
711
  return {
703
712
  supportsStore: !isNonStandard,
@@ -715,10 +724,10 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
715
724
 
716
725
  /**
717
726
  * Get resolved compatibility settings for a model.
718
- * Uses explicit model.compat if provided, otherwise auto-detects from URL.
727
+ * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
719
728
  */
720
729
  function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
721
- const detected = detectCompatFromUrl(model.baseUrl);
730
+ const detected = detectCompat(model);
722
731
  if (!model.compat) return detected;
723
732
 
724
733
  return {
@@ -14,19 +14,19 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
14
14
  const toolCallIdMap = new Map<string, string>();
15
15
 
16
16
  // First pass: transform messages (thinking blocks, tool call ID normalization)
17
- const transformed = messages.map((msg) => {
17
+ const transformed = messages.flatMap<Message>((msg): Message[] => {
18
18
  // User messages pass through unchanged
19
19
  if (msg.role === "user") {
20
- return msg;
20
+ return [msg];
21
21
  }
22
22
 
23
23
  // Handle toolResult messages - normalize toolCallId if we have a mapping
24
24
  if (msg.role === "toolResult") {
25
25
  const normalizedId = toolCallIdMap.get(msg.toolCallId);
26
26
  if (normalizedId && normalizedId !== msg.toolCallId) {
27
- return { ...msg, toolCallId: normalizedId };
27
+ return [{ ...msg, toolCallId: normalizedId }];
28
28
  }
29
- return msg;
29
+ return [msg];
30
30
  }
31
31
 
32
32
  // Assistant messages need transformation check
@@ -35,7 +35,10 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
35
35
 
36
36
  // If message is from the same provider and API, keep as is
37
37
  if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
38
- return msg;
38
+ if (assistantMsg.stopReason === "error" && assistantMsg.content.length === 0) {
39
+ return [];
40
+ }
41
+ return [msg];
39
42
  }
40
43
 
41
44
  // Check if we need to normalize tool call IDs
@@ -73,13 +76,19 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
73
76
  return block;
74
77
  });
75
78
 
79
+ if (assistantMsg.stopReason === "error" && transformedContent.length === 0) {
80
+ return [];
81
+ }
82
+
76
83
  // Return transformed assistant message
77
- return {
78
- ...assistantMsg,
79
- content: transformedContent,
80
- };
84
+ return [
85
+ {
86
+ ...assistantMsg,
87
+ content: transformedContent,
88
+ },
89
+ ];
81
90
  }
82
- return msg;
91
+ return [msg];
83
92
  });
84
93
 
85
94
  // Second pass: insert synthetic empty tool results for orphaned tool calls
@@ -110,15 +119,30 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
110
119
  existingToolResultIds = new Set();
111
120
  }
112
121
 
113
- // Track tool calls from this assistant message
114
122
  const assistantMsg = msg as AssistantMessage;
123
+ const isErroredAssistant = assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted";
115
124
  const toolCalls = assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[];
116
- if (toolCalls.length > 0) {
125
+
126
+ result.push(msg);
127
+
128
+ // For errored/aborted messages with tool calls, insert synthetic results immediately
129
+ // to maintain tool_use/tool_result pairing required by the API
130
+ if (isErroredAssistant && toolCalls.length > 0) {
131
+ for (const tc of toolCalls) {
132
+ result.push({
133
+ role: "toolResult",
134
+ toolCallId: tc.id,
135
+ toolName: tc.name,
136
+ content: [{ type: "text", text: "Tool execution was aborted" }],
137
+ isError: true,
138
+ timestamp: Date.now(),
139
+ } as ToolResultMessage);
140
+ }
141
+ } else if (!isErroredAssistant && toolCalls.length > 0) {
142
+ // Track tool calls to check for orphaned calls later
117
143
  pendingToolCalls = toolCalls;
118
144
  existingToolResultIds = new Set();
119
145
  }
120
-
121
- result.push(msg);
122
146
  } else if (msg.role === "toolResult") {
123
147
  existingToolResultIds.add(msg.toolCallId);
124
148
  result.push(msg);
@@ -146,5 +170,22 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
146
170
  }
147
171
  }
148
172
 
173
+ // Handle orphaned tool calls at the end of the message array
174
+ // This can happen if the last message is an assistant with tool calls that never got results
175
+ if (pendingToolCalls.length > 0) {
176
+ for (const tc of pendingToolCalls) {
177
+ if (!existingToolResultIds.has(tc.id)) {
178
+ result.push({
179
+ role: "toolResult",
180
+ toolCallId: tc.id,
181
+ toolName: tc.name,
182
+ content: [{ type: "text", text: "No result provided" }],
183
+ isError: true,
184
+ timestamp: Date.now(),
185
+ } as ToolResultMessage);
186
+ }
187
+ }
188
+ }
189
+
149
190
  return result;
150
191
  }
package/src/stream.ts CHANGED
@@ -223,6 +223,25 @@ const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
223
223
  xhigh: 24575,
224
224
  };
225
225
 
226
+ const BEDROCK_CLAUDE_THINKING: Record<ThinkingLevel, number> = {
227
+ minimal: 1024,
228
+ low: 2048,
229
+ medium: 8192,
230
+ high: 16384,
231
+ xhigh: 16384,
232
+ };
233
+
234
+ function resolveBedrockThinkingBudget(
235
+ model: Model<"bedrock-converse-stream">,
236
+ options?: SimpleStreamOptions,
237
+ ): { budget: number; level: ThinkingLevel } | null {
238
+ if (!options?.reasoning || !model.reasoning) return null;
239
+ if (!model.id.includes("anthropic.claude")) return null;
240
+ const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
241
+ const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
242
+ return { budget, level };
243
+ }
244
+
226
245
  function mapOptionsForApi<TApi extends Api>(
227
246
  model: Model<TApi>,
228
247
  options?: SimpleStreamOptions,
@@ -282,12 +301,28 @@ function mapOptionsForApi<TApi extends Api>(
282
301
  }
283
302
  }
284
303
 
285
- case "bedrock-converse-stream":
286
- return {
304
+ case "bedrock-converse-stream": {
305
+ const bedrockBase: BedrockOptions = {
287
306
  ...base,
288
307
  reasoning: options?.reasoning,
289
308
  thinkingBudgets: options?.thinkingBudgets,
290
- } satisfies BedrockOptions;
309
+ };
310
+ const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
311
+ if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
312
+ let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
313
+ let thinkingBudgets = bedrockBase.thinkingBudgets;
314
+ if (maxTokens <= budgetInfo.budget) {
315
+ const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
316
+ if (desiredMaxTokens > maxTokens) {
317
+ maxTokens = desiredMaxTokens;
318
+ }
319
+ }
320
+ if (maxTokens <= budgetInfo.budget) {
321
+ const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
322
+ thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
323
+ }
324
+ return { ...bedrockBase, maxTokens, thinkingBudgets } as OptionsForApi<TApi>;
325
+ }
291
326
 
292
327
  case "openai-completions":
293
328
  return {
@@ -0,0 +1,8 @@
1
+ for (const [key, value] of Object.entries(process.env)) {
2
+ if (key.startsWith("PI_") && value !== undefined) {
3
+ const ompKey = `OMP_${key.slice(3)}`; // PI_FOO -> OMP_FOO
4
+ if (process.env[ompKey] === undefined) {
5
+ process.env[ompKey] = value;
6
+ }
7
+ }
8
+ }
@@ -29,6 +29,9 @@ import type { Tool, ToolCall } from "../types";
29
29
  /** Regex matching valid JSON number literals (integers, decimals, scientific notation) */
30
30
  const JSON_NUMBER_PATTERN = /^[+-]?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/;
31
31
 
32
+ /** Regex matching numeric strings (allows leading zeros) */
33
+ const NUMERIC_STRING_PATTERN = /^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/;
34
+
32
35
  /**
33
36
  * Normalizes AJV's `params.type` into a consistent string array.
34
37
  * AJV may report the expected type as a single string or an array of strings
@@ -69,6 +72,28 @@ function matchesExpectedType(value: unknown, expectedTypes: string[]): boolean {
69
72
  });
70
73
  }
71
74
 
75
+ function tryParseNumberString(value: string, expectedTypes: string[]): { value: unknown; changed: boolean } {
76
+ if (!expectedTypes.includes("number") && !expectedTypes.includes("integer")) {
77
+ return { value, changed: false };
78
+ }
79
+
80
+ const trimmed = value.trim();
81
+ if (!trimmed || !NUMERIC_STRING_PATTERN.test(trimmed)) {
82
+ return { value, changed: false };
83
+ }
84
+
85
+ const parsed = Number(trimmed);
86
+ if (!Number.isFinite(parsed)) {
87
+ return { value, changed: false };
88
+ }
89
+
90
+ if (!matchesExpectedType(parsed, expectedTypes)) {
91
+ return { value, changed: false };
92
+ }
93
+
94
+ return { value: parsed, changed: true };
95
+ }
96
+
72
97
  /**
73
98
  * Attempts to parse a string as JSON if it looks like a JSON literal and
74
99
  * the parsed result matches one of the expected types.
@@ -86,6 +111,11 @@ function tryParseJsonForTypes(value: string, expectedTypes: string[]): { value:
86
111
  const trimmed = value.trim();
87
112
  if (!trimmed) return { value, changed: false };
88
113
 
114
+ const numberCoercion = tryParseNumberString(trimmed, expectedTypes);
115
+ if (numberCoercion.changed) {
116
+ return numberCoercion;
117
+ }
118
+
89
119
  // Quick syntactic checks to avoid unnecessary parse attempts
90
120
  const looksJsonObject = trimmed.startsWith("{") && trimmed.endsWith("}");
91
121
  const looksJsonArray = trimmed.startsWith("[") && trimmed.endsWith("]");