@oh-my-pi/pi-ai 11.2.2 → 11.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "11.2.2",
3
+ "version": "11.2.3",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -63,7 +63,7 @@
63
63
  "@connectrpc/connect-node": "^2.1.1",
64
64
  "@google/genai": "^1.39.0",
65
65
  "@mistralai/mistralai": "^1.13.0",
66
- "@oh-my-pi/pi-utils": "11.2.2",
66
+ "@oh-my-pi/pi-utils": "11.2.3",
67
67
  "@sinclair/typebox": "^0.34.48",
68
68
  "@smithy/node-http-handler": "^4.4.9",
69
69
  "ajv": "^8.17.1",
@@ -107,6 +107,23 @@ export const MODELS = {
107
107
  contextWindow: 200000,
108
108
  maxTokens: 4096,
109
109
  } satisfies Model<"bedrock-converse-stream">,
110
+ "anthropic.claude-opus-4-6-v1:0": {
111
+ id: "anthropic.claude-opus-4-6-v1:0",
112
+ name: "Claude Opus 4.6",
113
+ api: "bedrock-converse-stream",
114
+ provider: "amazon-bedrock",
115
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
116
+ reasoning: true,
117
+ input: ["text", "image"],
118
+ cost: {
119
+ input: 5,
120
+ output: 25,
121
+ cacheRead: 0.5,
122
+ cacheWrite: 6.25,
123
+ },
124
+ contextWindow: 200000,
125
+ maxTokens: 128000,
126
+ } satisfies Model<"bedrock-converse-stream">,
110
127
  "cohere.command-r-plus-v1:0": {
111
128
  id: "cohere.command-r-plus-v1:0",
112
129
  name: "Command R+",
@@ -192,6 +209,23 @@ export const MODELS = {
192
209
  contextWindow: 200000,
193
210
  maxTokens: 64000,
194
211
  } satisfies Model<"bedrock-converse-stream">,
212
+ "eu.anthropic.claude-opus-4-6-v1:0": {
213
+ id: "eu.anthropic.claude-opus-4-6-v1:0",
214
+ name: "Claude Opus 4.6 (EU)",
215
+ api: "bedrock-converse-stream",
216
+ provider: "amazon-bedrock",
217
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
218
+ reasoning: true,
219
+ input: ["text", "image"],
220
+ cost: {
221
+ input: 5,
222
+ output: 25,
223
+ cacheRead: 1.5,
224
+ cacheWrite: 18.75,
225
+ },
226
+ contextWindow: 200000,
227
+ maxTokens: 128000,
228
+ } satisfies Model<"bedrock-converse-stream">,
195
229
  "eu.anthropic.claude-sonnet-4-20250514-v1:0": {
196
230
  id: "eu.anthropic.claude-sonnet-4-20250514-v1:0",
197
231
  name: "Claude Sonnet 4 (EU)",
@@ -277,6 +311,23 @@ export const MODELS = {
277
311
  contextWindow: 200000,
278
312
  maxTokens: 64000,
279
313
  } satisfies Model<"bedrock-converse-stream">,
314
+ "global.anthropic.claude-opus-4-6-v1:0": {
315
+ id: "global.anthropic.claude-opus-4-6-v1:0",
316
+ name: "Claude Opus 4.6 (Global)",
317
+ api: "bedrock-converse-stream",
318
+ provider: "amazon-bedrock",
319
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
320
+ reasoning: true,
321
+ input: ["text", "image"],
322
+ cost: {
323
+ input: 5,
324
+ output: 25,
325
+ cacheRead: 0.5,
326
+ cacheWrite: 6.25,
327
+ },
328
+ contextWindow: 200000,
329
+ maxTokens: 128000,
330
+ } satisfies Model<"bedrock-converse-stream">,
280
331
  "global.anthropic.claude-sonnet-4-20250514-v1:0": {
281
332
  id: "global.anthropic.claude-sonnet-4-20250514-v1:0",
282
333
  name: "Claude Sonnet 4",
@@ -855,6 +906,23 @@ export const MODELS = {
855
906
  contextWindow: 200000,
856
907
  maxTokens: 64000,
857
908
  } satisfies Model<"bedrock-converse-stream">,
909
+ "us.anthropic.claude-opus-4-6-v1:0": {
910
+ id: "us.anthropic.claude-opus-4-6-v1:0",
911
+ name: "Claude Opus 4.6 (US)",
912
+ api: "bedrock-converse-stream",
913
+ provider: "amazon-bedrock",
914
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
915
+ reasoning: true,
916
+ input: ["text", "image"],
917
+ cost: {
918
+ input: 5,
919
+ output: 25,
920
+ cacheRead: 1.5,
921
+ cacheWrite: 18.75,
922
+ },
923
+ contextWindow: 200000,
924
+ maxTokens: 128000,
925
+ } satisfies Model<"bedrock-converse-stream">,
858
926
  "us.anthropic.claude-sonnet-4-20250514-v1:0": {
859
927
  id: "us.anthropic.claude-sonnet-4-20250514-v1:0",
860
928
  name: "Claude Sonnet 4 (US)",
@@ -1316,6 +1384,40 @@ export const MODELS = {
1316
1384
  contextWindow: 200000,
1317
1385
  maxTokens: 64000,
1318
1386
  } satisfies Model<"anthropic-messages">,
1387
+ "claude-opus-4-6": {
1388
+ id: "claude-opus-4-6",
1389
+ name: "Claude Opus 4.6",
1390
+ api: "anthropic-messages",
1391
+ provider: "anthropic",
1392
+ baseUrl: "https://api.anthropic.com",
1393
+ reasoning: true,
1394
+ input: ["text", "image"],
1395
+ cost: {
1396
+ input: 5,
1397
+ output: 25,
1398
+ cacheRead: 0.5,
1399
+ cacheWrite: 6.25,
1400
+ },
1401
+ contextWindow: 1000000,
1402
+ maxTokens: 128000,
1403
+ } satisfies Model<"anthropic-messages">,
1404
+ "claude-opus-4-6-20260205": {
1405
+ id: "claude-opus-4-6-20260205",
1406
+ name: "Claude Opus 4.6",
1407
+ api: "anthropic-messages",
1408
+ provider: "anthropic",
1409
+ baseUrl: "https://api.anthropic.com",
1410
+ reasoning: true,
1411
+ input: ["text", "image"],
1412
+ cost: {
1413
+ input: 5,
1414
+ output: 25,
1415
+ cacheRead: 0.5,
1416
+ cacheWrite: 6.25,
1417
+ },
1418
+ contextWindow: 200000,
1419
+ maxTokens: 128000,
1420
+ } satisfies Model<"anthropic-messages">,
1319
1421
  "claude-sonnet-4-0": {
1320
1422
  id: "claude-sonnet-4-0",
1321
1423
  name: "Claude Sonnet 4 (latest)",
@@ -1700,6 +1802,25 @@ export const MODELS = {
1700
1802
  contextWindow: 128000,
1701
1803
  maxTokens: 16000,
1702
1804
  } satisfies Model<"openai-completions">,
1805
+ "claude-opus-4.6": {
1806
+ id: "claude-opus-4.6",
1807
+ name: "Claude Opus 4.6",
1808
+ api: "openai-completions",
1809
+ provider: "github-copilot",
1810
+ baseUrl: "https://api.individual.githubcopilot.com",
1811
+ headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"},
1812
+ compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false},
1813
+ reasoning: true,
1814
+ input: ["text", "image"],
1815
+ cost: {
1816
+ input: 0,
1817
+ output: 0,
1818
+ cacheRead: 0,
1819
+ cacheWrite: 0,
1820
+ },
1821
+ contextWindow: 128000,
1822
+ maxTokens: 16000,
1823
+ } satisfies Model<"openai-completions">,
1703
1824
  "claude-sonnet-4": {
1704
1825
  id: "claude-sonnet-4",
1705
1826
  name: "Claude Sonnet 4",
@@ -3030,63 +3151,6 @@ export const MODELS = {
3030
3151
  contextWindow: 262144,
3031
3152
  maxTokens: 32000,
3032
3153
  } satisfies Model<"openai-completions">,
3033
- "kimi-k2": {
3034
- id: "kimi-k2",
3035
- name: "Kimi K2",
3036
- api: "openai-completions",
3037
- provider: "kimi-code",
3038
- baseUrl: "https://api.kimi.com/coding/v1",
3039
- headers: {"User-Agent":"KimiCLI/1.0","X-Msh-Platform":"kimi_cli"},
3040
- compat: {"thinkingFormat":"zai","reasoningContentField":"reasoning_content","supportsDeveloperRole":false},
3041
- reasoning: true,
3042
- input: ["text"],
3043
- cost: {
3044
- input: 0,
3045
- output: 0,
3046
- cacheRead: 0,
3047
- cacheWrite: 0,
3048
- },
3049
- contextWindow: 262144,
3050
- maxTokens: 32000,
3051
- } satisfies Model<"openai-completions">,
3052
- "kimi-k2-turbo-preview": {
3053
- id: "kimi-k2-turbo-preview",
3054
- name: "Kimi K2 Turbo Preview",
3055
- api: "openai-completions",
3056
- provider: "kimi-code",
3057
- baseUrl: "https://api.kimi.com/coding/v1",
3058
- headers: {"User-Agent":"KimiCLI/1.0","X-Msh-Platform":"kimi_cli"},
3059
- compat: {"thinkingFormat":"zai","reasoningContentField":"reasoning_content","supportsDeveloperRole":false},
3060
- reasoning: true,
3061
- input: ["text"],
3062
- cost: {
3063
- input: 0,
3064
- output: 0,
3065
- cacheRead: 0,
3066
- cacheWrite: 0,
3067
- },
3068
- contextWindow: 262144,
3069
- maxTokens: 32000,
3070
- } satisfies Model<"openai-completions">,
3071
- "kimi-k2.5": {
3072
- id: "kimi-k2.5",
3073
- name: "Kimi K2.5",
3074
- api: "openai-completions",
3075
- provider: "kimi-code",
3076
- baseUrl: "https://api.kimi.com/coding/v1",
3077
- headers: {"User-Agent":"KimiCLI/1.0","X-Msh-Platform":"kimi_cli"},
3078
- compat: {"thinkingFormat":"zai","reasoningContentField":"reasoning_content","supportsDeveloperRole":false},
3079
- reasoning: true,
3080
- input: ["text", "image"],
3081
- cost: {
3082
- input: 0,
3083
- output: 0,
3084
- cacheRead: 0,
3085
- cacheWrite: 0,
3086
- },
3087
- contextWindow: 262144,
3088
- maxTokens: 32000,
3089
- } satisfies Model<"openai-completions">,
3090
3154
  },
3091
3155
  "minimax": {
3092
3156
  "MiniMax-M2": {
@@ -4030,6 +4094,23 @@ export const MODELS = {
4030
4094
  contextWindow: 400000,
4031
4095
  maxTokens: 128000,
4032
4096
  } satisfies Model<"openai-responses">,
4097
+ "gpt-5.3-codex": {
4098
+ id: "gpt-5.3-codex",
4099
+ name: "GPT-5.3 Codex",
4100
+ api: "openai-responses",
4101
+ provider: "openai",
4102
+ baseUrl: "https://api.openai.com/v1",
4103
+ reasoning: true,
4104
+ input: ["text", "image"],
4105
+ cost: {
4106
+ input: 1.75,
4107
+ output: 14,
4108
+ cacheRead: 0.175,
4109
+ cacheWrite: 0,
4110
+ },
4111
+ contextWindow: 400000,
4112
+ maxTokens: 128000,
4113
+ } satisfies Model<"openai-responses">,
4033
4114
  "o1": {
4034
4115
  id: "o1",
4035
4116
  name: "o1",
@@ -4253,6 +4334,23 @@ export const MODELS = {
4253
4334
  contextWindow: 272000,
4254
4335
  maxTokens: 128000,
4255
4336
  } satisfies Model<"openai-codex-responses">,
4337
+ "gpt-5.3-codex": {
4338
+ id: "gpt-5.3-codex",
4339
+ name: "GPT-5.3 Codex",
4340
+ api: "openai-codex-responses",
4341
+ provider: "openai-codex",
4342
+ baseUrl: "https://chatgpt.com/backend-api",
4343
+ reasoning: true,
4344
+ input: ["text", "image"],
4345
+ cost: {
4346
+ input: 1.75,
4347
+ output: 14,
4348
+ cacheRead: 0.175,
4349
+ cacheWrite: 0,
4350
+ },
4351
+ contextWindow: 400000,
4352
+ maxTokens: 128000,
4353
+ } satisfies Model<"openai-codex-responses">,
4256
4354
  },
4257
4355
  "opencode": {
4258
4356
  "big-pickle": {
@@ -4340,6 +4438,23 @@ export const MODELS = {
4340
4438
  contextWindow: 200000,
4341
4439
  maxTokens: 64000,
4342
4440
  } satisfies Model<"anthropic-messages">,
4441
+ "claude-opus-4-6": {
4442
+ id: "claude-opus-4-6",
4443
+ name: "Claude Opus 4.6",
4444
+ api: "anthropic-messages",
4445
+ provider: "opencode",
4446
+ baseUrl: "https://opencode.ai/zen",
4447
+ reasoning: true,
4448
+ input: ["text", "image"],
4449
+ cost: {
4450
+ input: 5,
4451
+ output: 25,
4452
+ cacheRead: 0.5,
4453
+ cacheWrite: 6.25,
4454
+ },
4455
+ contextWindow: 1000000,
4456
+ maxTokens: 128000,
4457
+ } satisfies Model<"anthropic-messages">,
4343
4458
  "claude-sonnet-4": {
4344
4459
  id: "claude-sonnet-4",
4345
4460
  name: "Claude Sonnet 4",
@@ -5060,6 +5175,23 @@ export const MODELS = {
5060
5175
  contextWindow: 200000,
5061
5176
  maxTokens: 64000,
5062
5177
  } satisfies Model<"openai-completions">,
5178
+ "anthropic/claude-opus-4.6": {
5179
+ id: "anthropic/claude-opus-4.6",
5180
+ name: "Anthropic: Claude Opus 4.6",
5181
+ api: "openai-completions",
5182
+ provider: "openrouter",
5183
+ baseUrl: "https://openrouter.ai/api/v1",
5184
+ reasoning: true,
5185
+ input: ["text", "image"],
5186
+ cost: {
5187
+ input: 5,
5188
+ output: 25,
5189
+ cacheRead: 0.5,
5190
+ cacheWrite: 6.25,
5191
+ },
5192
+ contextWindow: 1000000,
5193
+ maxTokens: 128000,
5194
+ } satisfies Model<"openai-completions">,
5063
5195
  "anthropic/claude-sonnet-4": {
5064
5196
  id: "anthropic/claude-sonnet-4",
5065
5197
  name: "Anthropic: Claude Sonnet 4",
@@ -5265,57 +5397,6 @@ export const MODELS = {
5265
5397
  contextWindow: 128000,
5266
5398
  maxTokens: 4000,
5267
5399
  } satisfies Model<"openai-completions">,
5268
- "deepcogito/cogito-v2-preview-llama-109b-moe": {
5269
- id: "deepcogito/cogito-v2-preview-llama-109b-moe",
5270
- name: "Cogito V2 Preview Llama 109B",
5271
- api: "openai-completions",
5272
- provider: "openrouter",
5273
- baseUrl: "https://openrouter.ai/api/v1",
5274
- reasoning: true,
5275
- input: ["text", "image"],
5276
- cost: {
5277
- input: 0.18,
5278
- output: 0.59,
5279
- cacheRead: 0,
5280
- cacheWrite: 0,
5281
- },
5282
- contextWindow: 32767,
5283
- maxTokens: 4096,
5284
- } satisfies Model<"openai-completions">,
5285
- "deepcogito/cogito-v2-preview-llama-405b": {
5286
- id: "deepcogito/cogito-v2-preview-llama-405b",
5287
- name: "Deep Cogito: Cogito V2 Preview Llama 405B",
5288
- api: "openai-completions",
5289
- provider: "openrouter",
5290
- baseUrl: "https://openrouter.ai/api/v1",
5291
- reasoning: true,
5292
- input: ["text"],
5293
- cost: {
5294
- input: 3.5,
5295
- output: 3.5,
5296
- cacheRead: 0,
5297
- cacheWrite: 0,
5298
- },
5299
- contextWindow: 32768,
5300
- maxTokens: 4096,
5301
- } satisfies Model<"openai-completions">,
5302
- "deepcogito/cogito-v2-preview-llama-70b": {
5303
- id: "deepcogito/cogito-v2-preview-llama-70b",
5304
- name: "Deep Cogito: Cogito V2 Preview Llama 70B",
5305
- api: "openai-completions",
5306
- provider: "openrouter",
5307
- baseUrl: "https://openrouter.ai/api/v1",
5308
- reasoning: true,
5309
- input: ["text"],
5310
- cost: {
5311
- input: 0.88,
5312
- output: 0.88,
5313
- cacheRead: 0,
5314
- cacheWrite: 0,
5315
- },
5316
- contextWindow: 32768,
5317
- maxTokens: 4096,
5318
- } satisfies Model<"openai-completions">,
5319
5400
  "deepseek/deepseek-chat": {
5320
5401
  id: "deepseek/deepseek-chat",
5321
5402
  name: "DeepSeek: DeepSeek V3",
@@ -5412,7 +5493,7 @@ export const MODELS = {
5412
5493
  cost: {
5413
5494
  input: 0.21,
5414
5495
  output: 0.7899999999999999,
5415
- cacheRead: 0.16799999999999998,
5496
+ cacheRead: 0.1300000002,
5416
5497
  cacheWrite: 0,
5417
5498
  },
5418
5499
  contextWindow: 163840,
@@ -9042,6 +9123,23 @@ export const MODELS = {
9042
9123
  contextWindow: 200000,
9043
9124
  maxTokens: 64000,
9044
9125
  } satisfies Model<"anthropic-messages">,
9126
+ "anthropic/claude-opus-4.6": {
9127
+ id: "anthropic/claude-opus-4.6",
9128
+ name: "Claude Opus 4.6",
9129
+ api: "anthropic-messages",
9130
+ provider: "vercel-ai-gateway",
9131
+ baseUrl: "https://ai-gateway.vercel.sh",
9132
+ reasoning: true,
9133
+ input: ["text", "image"],
9134
+ cost: {
9135
+ input: 5,
9136
+ output: 25,
9137
+ cacheRead: 0.5,
9138
+ cacheWrite: 6.25,
9139
+ },
9140
+ contextWindow: 1000000,
9141
+ maxTokens: 128000,
9142
+ } satisfies Model<"anthropic-messages">,
9045
9143
  "anthropic/claude-sonnet-4": {
9046
9144
  id: "anthropic/claude-sonnet-4",
9047
9145
  name: "Claude Sonnet 4",
@@ -9799,13 +9897,13 @@ export const MODELS = {
9799
9897
  reasoning: true,
9800
9898
  input: ["text", "image"],
9801
9899
  cost: {
9802
- input: 0.44999999999999996,
9900
+ input: 0.5,
9803
9901
  output: 2.8,
9804
9902
  cacheRead: 0,
9805
9903
  cacheWrite: 0,
9806
9904
  },
9807
- contextWindow: 262144,
9808
- maxTokens: 252144,
9905
+ contextWindow: 256000,
9906
+ maxTokens: 256000,
9809
9907
  } satisfies Model<"anthropic-messages">,
9810
9908
  "nvidia/nemotron-nano-12b-v2-vl": {
9811
9909
  id: "nvidia/nemotron-nano-12b-v2-vl",
@@ -5,7 +5,7 @@
5
5
  */
6
6
  import { createHash } from "node:crypto";
7
7
  import type { Content, ThinkingConfig } from "@google/genai";
8
- import { abortableSleep } from "@oh-my-pi/pi-utils";
8
+ import { abortableSleep, readSseJson } from "@oh-my-pi/pi-utils";
9
9
  import { calculateCost } from "../models";
10
10
  import type {
11
11
  Api,
@@ -523,211 +523,168 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
523
523
  const blocks = output.content;
524
524
  const blockIndex = () => blocks.length - 1;
525
525
 
526
- // Read SSE stream
527
- const reader = activeResponse.body.getReader();
528
- const decoder = new TextDecoder();
529
- let buffer = "";
530
- let jsonlBuffer = "";
531
-
532
- // Set up abort handler to cancel reader when signal fires
533
- const abortHandler = () => {
534
- void reader.cancel().catch(() => {});
535
- };
536
- options?.signal?.addEventListener("abort", abortHandler);
537
-
538
- try {
539
- while (true) {
540
- // Check abort signal before each read
541
- if (options?.signal?.aborted) {
542
- throw new Error("Request was aborted");
543
- }
544
-
545
- const { done, value } = await reader.read();
546
- if (done) break;
547
-
548
- buffer += decoder.decode(value, { stream: true });
549
- const lines = buffer.split("\n");
550
- buffer = lines.pop() || "";
551
-
552
- for (const line of lines) {
553
- if (!line.startsWith("data:")) continue;
554
-
555
- const jsonStr = line.slice(5).trim();
556
- if (!jsonStr) continue;
557
- jsonlBuffer += `${jsonStr}\n`;
558
- const parsed = Bun.JSONL.parseChunk(jsonlBuffer);
559
- jsonlBuffer = jsonlBuffer.slice(parsed.read);
560
- if (parsed.error) {
561
- jsonlBuffer = "";
562
- continue;
563
- }
564
-
565
- const chunk = parsed.values[0] as CloudCodeAssistResponseChunk | undefined;
566
- if (!chunk) continue;
567
-
568
- // Unwrap the response
569
- const responseData = chunk.response;
570
- if (!responseData) continue;
571
-
572
- const candidate = responseData.candidates?.[0];
573
- if (candidate?.content?.parts) {
574
- for (const part of candidate.content.parts) {
575
- if (part.text !== undefined) {
576
- hasContent = true;
577
- const isThinking = isThinkingPart(part);
578
- if (
579
- !currentBlock ||
580
- (isThinking && currentBlock.type !== "thinking") ||
581
- (!isThinking && currentBlock.type !== "text")
582
- ) {
583
- if (currentBlock) {
584
- if (currentBlock.type === "text") {
585
- stream.push({
586
- type: "text_end",
587
- contentIndex: blocks.length - 1,
588
- content: currentBlock.text,
589
- partial: output,
590
- });
591
- } else {
592
- stream.push({
593
- type: "thinking_end",
594
- contentIndex: blockIndex(),
595
- content: currentBlock.thinking,
596
- partial: output,
597
- });
598
- }
599
- }
600
- if (isThinking) {
601
- currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
602
- output.content.push(currentBlock);
603
- ensureStarted();
604
- stream.push({
605
- type: "thinking_start",
606
- contentIndex: blockIndex(),
607
- partial: output,
608
- });
609
- } else {
610
- currentBlock = { type: "text", text: "" };
611
- output.content.push(currentBlock);
612
- ensureStarted();
613
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
614
- }
615
- }
616
- if (currentBlock.type === "thinking") {
617
- currentBlock.thinking += part.text;
618
- currentBlock.thinkingSignature = retainThoughtSignature(
619
- currentBlock.thinkingSignature,
620
- part.thoughtSignature,
621
- );
526
+ for await (const chunk of readSseJson<CloudCodeAssistResponseChunk>(
527
+ activeResponse.body!,
528
+ options?.signal,
529
+ )) {
530
+ const responseData = chunk.response;
531
+ if (!responseData) continue;
532
+
533
+ const candidate = responseData.candidates?.[0];
534
+ if (candidate?.content?.parts) {
535
+ for (const part of candidate.content.parts) {
536
+ if (part.text !== undefined) {
537
+ hasContent = true;
538
+ const isThinking = isThinkingPart(part);
539
+ if (
540
+ !currentBlock ||
541
+ (isThinking && currentBlock.type !== "thinking") ||
542
+ (!isThinking && currentBlock.type !== "text")
543
+ ) {
544
+ if (currentBlock) {
545
+ if (currentBlock.type === "text") {
622
546
  stream.push({
623
- type: "thinking_delta",
624
- contentIndex: blockIndex(),
625
- delta: part.text,
547
+ type: "text_end",
548
+ contentIndex: blocks.length - 1,
549
+ content: currentBlock.text,
626
550
  partial: output,
627
551
  });
628
552
  } else {
629
- currentBlock.text += part.text;
630
- currentBlock.textSignature = retainThoughtSignature(
631
- currentBlock.textSignature,
632
- part.thoughtSignature,
633
- );
634
553
  stream.push({
635
- type: "text_delta",
554
+ type: "thinking_end",
636
555
  contentIndex: blockIndex(),
637
- delta: part.text,
556
+ content: currentBlock.thinking,
638
557
  partial: output,
639
558
  });
640
559
  }
641
560
  }
642
-
643
- if (part.functionCall) {
644
- hasContent = true;
645
- if (currentBlock) {
646
- if (currentBlock.type === "text") {
647
- stream.push({
648
- type: "text_end",
649
- contentIndex: blockIndex(),
650
- content: currentBlock.text,
651
- partial: output,
652
- });
653
- } else {
654
- stream.push({
655
- type: "thinking_end",
656
- contentIndex: blockIndex(),
657
- content: currentBlock.thinking,
658
- partial: output,
659
- });
660
- }
661
- currentBlock = null;
662
- }
663
-
664
- const providedId = part.functionCall.id;
665
- const needsNewId =
666
- !providedId || output.content.some(b => b.type === "toolCall" && b.id === providedId);
667
- const toolCallId = needsNewId
668
- ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
669
- : providedId;
670
-
671
- const toolCall: ToolCall = {
672
- type: "toolCall",
673
- id: toolCallId,
674
- name: part.functionCall.name || "",
675
- arguments: part.functionCall.args as Record<string, unknown>,
676
- ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
677
- };
678
-
679
- output.content.push(toolCall);
561
+ if (isThinking) {
562
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
563
+ output.content.push(currentBlock);
680
564
  ensureStarted();
681
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
682
565
  stream.push({
683
- type: "toolcall_delta",
566
+ type: "thinking_start",
684
567
  contentIndex: blockIndex(),
685
- delta: JSON.stringify(toolCall.arguments),
686
568
  partial: output,
687
569
  });
570
+ } else {
571
+ currentBlock = { type: "text", text: "" };
572
+ output.content.push(currentBlock);
573
+ ensureStarted();
574
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
575
+ }
576
+ }
577
+ if (currentBlock.type === "thinking") {
578
+ currentBlock.thinking += part.text;
579
+ currentBlock.thinkingSignature = retainThoughtSignature(
580
+ currentBlock.thinkingSignature,
581
+ part.thoughtSignature,
582
+ );
583
+ stream.push({
584
+ type: "thinking_delta",
585
+ contentIndex: blockIndex(),
586
+ delta: part.text,
587
+ partial: output,
588
+ });
589
+ } else {
590
+ currentBlock.text += part.text;
591
+ currentBlock.textSignature = retainThoughtSignature(
592
+ currentBlock.textSignature,
593
+ part.thoughtSignature,
594
+ );
595
+ stream.push({
596
+ type: "text_delta",
597
+ contentIndex: blockIndex(),
598
+ delta: part.text,
599
+ partial: output,
600
+ });
601
+ }
602
+ }
603
+
604
+ if (part.functionCall) {
605
+ hasContent = true;
606
+ if (currentBlock) {
607
+ if (currentBlock.type === "text") {
688
608
  stream.push({
689
- type: "toolcall_end",
609
+ type: "text_end",
690
610
  contentIndex: blockIndex(),
691
- toolCall,
611
+ content: currentBlock.text,
612
+ partial: output,
613
+ });
614
+ } else {
615
+ stream.push({
616
+ type: "thinking_end",
617
+ contentIndex: blockIndex(),
618
+ content: currentBlock.thinking,
692
619
  partial: output,
693
620
  });
694
621
  }
622
+ currentBlock = null;
695
623
  }
696
- }
697
-
698
- if (candidate?.finishReason) {
699
- output.stopReason = mapStopReasonString(candidate.finishReason);
700
- if (output.content.some(b => b.type === "toolCall")) {
701
- output.stopReason = "toolUse";
702
- }
703
- }
704
624
 
705
- if (responseData.usageMetadata) {
706
- // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
707
- const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
708
- const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
709
- output.usage = {
710
- input: promptTokens - cacheReadTokens,
711
- output:
712
- (responseData.usageMetadata.candidatesTokenCount || 0) +
713
- (responseData.usageMetadata.thoughtsTokenCount || 0),
714
- cacheRead: cacheReadTokens,
715
- cacheWrite: 0,
716
- totalTokens: responseData.usageMetadata.totalTokenCount || 0,
717
- cost: {
718
- input: 0,
719
- output: 0,
720
- cacheRead: 0,
721
- cacheWrite: 0,
722
- total: 0,
723
- },
625
+ const providedId = part.functionCall.id;
626
+ const needsNewId =
627
+ !providedId || output.content.some(b => b.type === "toolCall" && b.id === providedId);
628
+ const toolCallId = needsNewId
629
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
630
+ : providedId;
631
+
632
+ const toolCall: ToolCall = {
633
+ type: "toolCall",
634
+ id: toolCallId,
635
+ name: part.functionCall.name || "",
636
+ arguments: part.functionCall.args as Record<string, unknown>,
637
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
724
638
  };
725
- calculateCost(model, output.usage);
639
+
640
+ output.content.push(toolCall);
641
+ ensureStarted();
642
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
643
+ stream.push({
644
+ type: "toolcall_delta",
645
+ contentIndex: blockIndex(),
646
+ delta: JSON.stringify(toolCall.arguments),
647
+ partial: output,
648
+ });
649
+ stream.push({
650
+ type: "toolcall_end",
651
+ contentIndex: blockIndex(),
652
+ toolCall,
653
+ partial: output,
654
+ });
726
655
  }
727
656
  }
728
657
  }
729
- } finally {
730
- options?.signal?.removeEventListener("abort", abortHandler);
658
+
659
+ if (candidate?.finishReason) {
660
+ output.stopReason = mapStopReasonString(candidate.finishReason);
661
+ if (output.content.some(b => b.type === "toolCall")) {
662
+ output.stopReason = "toolUse";
663
+ }
664
+ }
665
+
666
+ if (responseData.usageMetadata) {
667
+ // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
668
+ const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
669
+ const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
670
+ output.usage = {
671
+ input: promptTokens - cacheReadTokens,
672
+ output:
673
+ (responseData.usageMetadata.candidatesTokenCount || 0) +
674
+ (responseData.usageMetadata.thoughtsTokenCount || 0),
675
+ cacheRead: cacheReadTokens,
676
+ cacheWrite: 0,
677
+ totalTokens: responseData.usageMetadata.totalTokenCount || 0,
678
+ cost: {
679
+ input: 0,
680
+ output: 0,
681
+ cacheRead: 0,
682
+ cacheWrite: 0,
683
+ total: 0,
684
+ },
685
+ };
686
+ calculateCost(model, output.usage);
687
+ }
731
688
  }
732
689
 
733
690
  if (currentBlock) {