@ssweens/pi-vertex 1.0.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +19 -22
- package/TEST_COVERAGE.md +13 -0
- package/index.ts +2 -2
- package/models/claude.ts +21 -75
- package/models/gemini.ts +39 -31
- package/models/index.ts +1 -1
- package/models/maas.ts +39 -76
- package/package.json +4 -1
- package/streaming/gemini.ts +198 -89
- package/streaming/maas.ts +350 -53
- package/types.ts +24 -35
- package/utils.ts +163 -58
package/models/maas.ts
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MaaS (Model-as-a-Service) open model definitions for Vertex AI
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
|
4
|
+
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
|
|
5
|
+
* All prices per 1M tokens
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import type { VertexModelConfig } from "../types.js";
|
|
8
9
|
|
|
9
10
|
export const MAAS_MODELS: VertexModelConfig[] = [
|
|
10
|
-
// Llama
|
|
11
|
+
// --- Meta Llama ---
|
|
11
12
|
{
|
|
12
13
|
id: "llama-4-maverick",
|
|
13
14
|
name: "Llama 4 Maverick",
|
|
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
66
67
|
region: "global",
|
|
67
68
|
},
|
|
68
69
|
|
|
69
|
-
// Mistral
|
|
70
|
+
// --- Mistral AI ---
|
|
70
71
|
{
|
|
71
72
|
id: "mistral-medium-3",
|
|
72
73
|
name: "Mistral Medium 3",
|
|
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
106
107
|
region: "global",
|
|
107
108
|
},
|
|
108
109
|
{
|
|
109
|
-
id: "
|
|
110
|
-
name: "
|
|
111
|
-
apiId: "mistralai/
|
|
110
|
+
id: "codestral-2",
|
|
111
|
+
name: "Codestral 2",
|
|
112
|
+
apiId: "mistralai/codestral-2",
|
|
112
113
|
publisher: "mistralai",
|
|
113
114
|
endpointType: "maas",
|
|
114
|
-
contextWindow:
|
|
115
|
+
contextWindow: 256000,
|
|
115
116
|
maxTokens: 32000,
|
|
116
|
-
input: ["text"
|
|
117
|
+
input: ["text"],
|
|
117
118
|
reasoning: false,
|
|
118
|
-
tools:
|
|
119
|
+
tools: true,
|
|
119
120
|
cost: {
|
|
120
|
-
input: 0.
|
|
121
|
-
output: 0.
|
|
121
|
+
input: 0.30,
|
|
122
|
+
output: 0.90,
|
|
122
123
|
cacheRead: 0,
|
|
123
124
|
cacheWrite: 0,
|
|
124
125
|
},
|
|
125
126
|
region: "global",
|
|
126
127
|
},
|
|
127
128
|
{
|
|
128
|
-
id: "
|
|
129
|
-
name: "
|
|
130
|
-
apiId: "mistralai/
|
|
129
|
+
id: "mistral-ocr",
|
|
130
|
+
name: "Mistral OCR",
|
|
131
|
+
apiId: "mistralai/mistral-ocr-2505",
|
|
131
132
|
publisher: "mistralai",
|
|
132
133
|
endpointType: "maas",
|
|
133
|
-
contextWindow:
|
|
134
|
+
contextWindow: 128000,
|
|
134
135
|
maxTokens: 32000,
|
|
135
|
-
input: ["text"],
|
|
136
|
+
input: ["text", "image"],
|
|
136
137
|
reasoning: false,
|
|
137
|
-
tools:
|
|
138
|
+
tools: false,
|
|
138
139
|
cost: {
|
|
139
|
-
input: 0.
|
|
140
|
-
output: 0.
|
|
140
|
+
input: 0.0005,
|
|
141
|
+
output: 0.0005,
|
|
141
142
|
cacheRead: 0,
|
|
142
143
|
cacheWrite: 0,
|
|
143
144
|
},
|
|
144
145
|
region: "global",
|
|
145
146
|
},
|
|
146
147
|
|
|
147
|
-
// DeepSeek
|
|
148
|
+
// --- DeepSeek ---
|
|
148
149
|
{
|
|
149
150
|
id: "deepseek-v3.2",
|
|
150
151
|
name: "DeepSeek V3.2",
|
|
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
202
203
|
},
|
|
203
204
|
region: "global",
|
|
204
205
|
},
|
|
205
|
-
|
|
206
|
-
// AI21 Labs models
|
|
207
|
-
{
|
|
208
|
-
id: "jamba-1.5-large",
|
|
209
|
-
name: "Jamba 1.5 Large",
|
|
210
|
-
apiId: "ai21/jamba-1.5-large",
|
|
211
|
-
publisher: "ai21",
|
|
212
|
-
endpointType: "maas",
|
|
213
|
-
contextWindow: 256000,
|
|
214
|
-
maxTokens: 256000,
|
|
215
|
-
input: ["text"],
|
|
216
|
-
reasoning: false,
|
|
217
|
-
tools: true,
|
|
218
|
-
cost: {
|
|
219
|
-
input: 2.00,
|
|
220
|
-
output: 8.00,
|
|
221
|
-
cacheRead: 0,
|
|
222
|
-
cacheWrite: 0,
|
|
223
|
-
},
|
|
224
|
-
region: "global",
|
|
225
|
-
},
|
|
226
206
|
{
|
|
227
|
-
id: "
|
|
228
|
-
name: "
|
|
229
|
-
apiId: "
|
|
230
|
-
publisher: "
|
|
207
|
+
id: "deepseek-ocr",
|
|
208
|
+
name: "DeepSeek OCR",
|
|
209
|
+
apiId: "deepseek-ai/deepseek-ocr-maas",
|
|
210
|
+
publisher: "deepseek-ai",
|
|
231
211
|
endpointType: "maas",
|
|
232
|
-
contextWindow:
|
|
233
|
-
maxTokens:
|
|
234
|
-
input: ["text"],
|
|
212
|
+
contextWindow: 163840,
|
|
213
|
+
maxTokens: 32000,
|
|
214
|
+
input: ["text", "image"],
|
|
235
215
|
reasoning: false,
|
|
236
|
-
tools:
|
|
216
|
+
tools: false,
|
|
237
217
|
cost: {
|
|
238
|
-
input: 0.
|
|
239
|
-
output:
|
|
218
|
+
input: 0.30,
|
|
219
|
+
output: 1.20,
|
|
240
220
|
cacheRead: 0,
|
|
241
221
|
cacheWrite: 0,
|
|
242
222
|
},
|
|
243
223
|
region: "global",
|
|
244
224
|
},
|
|
245
225
|
|
|
246
|
-
// OpenAI
|
|
226
|
+
// --- OpenAI (gpt-oss) ---
|
|
247
227
|
{
|
|
248
228
|
id: "gpt-oss-120b",
|
|
249
229
|
name: "GPT-OSS 120B",
|
|
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
283
263
|
region: "global",
|
|
284
264
|
},
|
|
285
265
|
|
|
286
|
-
//
|
|
287
|
-
{
|
|
288
|
-
id: "deepseek-ocr",
|
|
289
|
-
name: "DeepSeek OCR",
|
|
290
|
-
apiId: "deepseek-ai/deepseek-ocr-maas",
|
|
291
|
-
publisher: "deepseek-ai",
|
|
292
|
-
endpointType: "maas",
|
|
293
|
-
contextWindow: 163840,
|
|
294
|
-
maxTokens: 32000,
|
|
295
|
-
input: ["text", "image"],
|
|
296
|
-
reasoning: false,
|
|
297
|
-
tools: false,
|
|
298
|
-
cost: {
|
|
299
|
-
input: 0.30, // Per page: $0.0003/page
|
|
300
|
-
output: 1.20, // Per page pricing
|
|
301
|
-
cacheRead: 0,
|
|
302
|
-
cacheWrite: 0,
|
|
303
|
-
},
|
|
304
|
-
region: "global",
|
|
305
|
-
},
|
|
306
|
-
|
|
307
|
-
// Qwen models
|
|
266
|
+
// --- Qwen ---
|
|
308
267
|
{
|
|
309
268
|
id: "qwen3-235b",
|
|
310
269
|
name: "Qwen 3 235B",
|
|
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
382
341
|
region: "global",
|
|
383
342
|
},
|
|
384
343
|
|
|
385
|
-
//
|
|
344
|
+
// --- Moonshot ---
|
|
386
345
|
{
|
|
387
346
|
id: "kimi-k2-thinking",
|
|
388
347
|
name: "Kimi K2 Thinking",
|
|
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
402
361
|
},
|
|
403
362
|
region: "global",
|
|
404
363
|
},
|
|
364
|
+
|
|
365
|
+
// --- MiniMax ---
|
|
405
366
|
{
|
|
406
367
|
id: "minimax-m2",
|
|
407
368
|
name: "MiniMax M2",
|
|
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
421
382
|
},
|
|
422
383
|
region: "global",
|
|
423
384
|
},
|
|
385
|
+
|
|
386
|
+
// --- GLM (Zhipu AI) ---
|
|
424
387
|
{
|
|
425
388
|
id: "glm-5",
|
|
426
389
|
name: "GLM 5",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ssweens/pi-vertex",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.3",
|
|
4
4
|
"description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
"models/",
|
|
14
14
|
"streaming/",
|
|
15
15
|
"README.md",
|
|
16
|
+
"CHANGELOG.md",
|
|
17
|
+
"TEST_COVERAGE.md",
|
|
16
18
|
"LICENSE",
|
|
17
19
|
"screenshot.png"
|
|
18
20
|
],
|
|
@@ -22,6 +24,7 @@
|
|
|
22
24
|
"check": "echo 'nothing to check'"
|
|
23
25
|
},
|
|
24
26
|
"dependencies": {
|
|
27
|
+
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
|
25
28
|
"@google/genai": "^1.42.0",
|
|
26
29
|
"google-auth-library": "^9.0.0"
|
|
27
30
|
},
|
package/streaming/gemini.ts
CHANGED
|
@@ -1,20 +1,49 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Gemini streaming handler using @google/genai SDK
|
|
3
|
+
*
|
|
4
|
+
* Aligned with pi-mono's google-vertex.ts for consistent handling of:
|
|
5
|
+
* - Thinking content (thought blocks with signatures)
|
|
6
|
+
* - Tool calls with unique IDs and deduplication
|
|
7
|
+
* - Thinking configuration (levels for Gemini 3, budgets for Gemini 2.5)
|
|
8
|
+
* - Usage tracking including thinking tokens
|
|
3
9
|
*/
|
|
4
10
|
|
|
5
|
-
import { GoogleGenAI } from "@google/genai";
|
|
6
|
-
import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
|
|
11
|
+
import { GoogleGenAI, FinishReason, ThinkingLevel } from "@google/genai";
|
|
12
|
+
import type { VertexModelConfig, Context, StreamOptions, AssistantMessage } from "../types.js";
|
|
7
13
|
import { getAuthConfig, resolveLocation } from "../auth.js";
|
|
8
|
-
import { sanitizeText, convertToGeminiMessages, calculateCost } from "../utils.js";
|
|
9
|
-
import { createAssistantMessageEventStream, type AssistantMessageEventStream
|
|
14
|
+
import { sanitizeText, convertToGeminiMessages, convertToolsForGemini, retainThoughtSignature, calculateCost } from "../utils.js";
|
|
15
|
+
import { createAssistantMessageEventStream, type AssistantMessageEventStream } from "@mariozechner/pi-ai";
|
|
16
|
+
|
|
17
|
+
// Module-level counter for generating unique tool call IDs (matches pi-mono pattern)
|
|
18
|
+
let toolCallCounter = 0;
|
|
19
|
+
|
|
20
|
+
const THINKING_LEVEL_MAP: Record<string, ThinkingLevel> = {
|
|
21
|
+
minimal: ThinkingLevel.MINIMAL,
|
|
22
|
+
low: ThinkingLevel.LOW,
|
|
23
|
+
medium: ThinkingLevel.MEDIUM,
|
|
24
|
+
high: ThinkingLevel.HIGH,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
|
|
28
|
+
switch (reason) {
|
|
29
|
+
case FinishReason.STOP:
|
|
30
|
+
return "stop";
|
|
31
|
+
case FinishReason.MAX_TOKENS:
|
|
32
|
+
return "length";
|
|
33
|
+
case FinishReason.SAFETY:
|
|
34
|
+
case FinishReason.RECITATION:
|
|
35
|
+
default:
|
|
36
|
+
return "error";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
10
39
|
|
|
11
40
|
export function streamGemini(
|
|
12
41
|
model: VertexModelConfig,
|
|
13
42
|
context: Context,
|
|
14
|
-
options?: StreamOptions
|
|
43
|
+
options?: StreamOptions,
|
|
15
44
|
): AssistantMessageEventStream {
|
|
16
45
|
const stream = createAssistantMessageEventStream();
|
|
17
|
-
|
|
46
|
+
|
|
18
47
|
(async () => {
|
|
19
48
|
const output: AssistantMessage = {
|
|
20
49
|
role: "assistant",
|
|
@@ -33,123 +62,203 @@ export function streamGemini(
|
|
|
33
62
|
stopReason: "stop",
|
|
34
63
|
timestamp: Date.now(),
|
|
35
64
|
};
|
|
36
|
-
|
|
65
|
+
|
|
37
66
|
try {
|
|
38
67
|
// Priority: config file > env var > model region > default
|
|
39
68
|
const location = resolveLocation(model.region);
|
|
40
69
|
const auth = getAuthConfig(location);
|
|
41
70
|
|
|
42
|
-
// Create client
|
|
71
|
+
// Create client with explicit API version (matches pi-mono)
|
|
43
72
|
const client = new GoogleGenAI({
|
|
44
73
|
vertexai: true,
|
|
45
74
|
project: auth.projectId,
|
|
46
75
|
location: auth.location,
|
|
76
|
+
apiVersion: "v1",
|
|
47
77
|
});
|
|
48
|
-
|
|
49
|
-
// Convert messages
|
|
50
|
-
const contents = convertToGeminiMessages(context.messages);
|
|
51
|
-
|
|
52
|
-
// Build config
|
|
78
|
+
|
|
79
|
+
// Convert messages with model ID for proper thinking/tool handling
|
|
80
|
+
const contents = convertToGeminiMessages(context.messages, model.apiId);
|
|
81
|
+
|
|
82
|
+
// Build config — only set temperature when explicitly provided
|
|
53
83
|
const config: any = {
|
|
54
84
|
maxOutputTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
|
|
55
|
-
temperature: options
|
|
85
|
+
...(options?.temperature !== undefined && { temperature: options.temperature }),
|
|
56
86
|
};
|
|
57
|
-
|
|
87
|
+
|
|
58
88
|
// Add system prompt if present
|
|
59
89
|
if (context.systemPrompt) {
|
|
60
90
|
config.systemInstruction = sanitizeText(context.systemPrompt);
|
|
61
91
|
}
|
|
62
|
-
|
|
63
|
-
// Add tools if present
|
|
92
|
+
|
|
93
|
+
// Add tools if present (using parametersJsonSchema for full JSON Schema support)
|
|
64
94
|
if (context.tools && context.tools.length > 0) {
|
|
65
|
-
config.tools =
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
95
|
+
config.tools = convertToolsForGemini(context.tools);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Add thinking configuration (matches pi-mono's buildParams logic)
|
|
99
|
+
if (model.reasoning && options?.reasoning) {
|
|
100
|
+
const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
|
101
|
+
const isGemini3 = model.apiId.startsWith("gemini-3");
|
|
102
|
+
|
|
103
|
+
const thinkingConfig: any = { includeThoughts: true };
|
|
104
|
+
|
|
105
|
+
if (isGemini3) {
|
|
106
|
+
// Gemini 3 models use thinking levels (MINIMAL/LOW/MEDIUM/HIGH)
|
|
107
|
+
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[effort];
|
|
108
|
+
} else {
|
|
109
|
+
// Gemini 2.5 models use thinking budgets (token counts)
|
|
110
|
+
const budgets: Record<string, number> = {
|
|
111
|
+
minimal: 128,
|
|
112
|
+
low: 2048,
|
|
113
|
+
medium: 8192,
|
|
114
|
+
high: model.apiId.includes("2.5-pro") ? 32768 : 24576,
|
|
115
|
+
};
|
|
116
|
+
thinkingConfig.thinkingBudget = budgets[effort] ?? 8192;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
config.thinkingConfig = thinkingConfig;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Pass abort signal to SDK for in-flight cancellation
|
|
123
|
+
if (options?.signal) {
|
|
124
|
+
if (options.signal.aborted) {
|
|
125
|
+
throw new Error("Request aborted");
|
|
126
|
+
}
|
|
127
|
+
config.abortSignal = options.signal;
|
|
74
128
|
}
|
|
75
|
-
|
|
129
|
+
|
|
76
130
|
stream.push({ type: "start", partial: output });
|
|
77
|
-
|
|
131
|
+
|
|
78
132
|
// Start streaming
|
|
79
133
|
const response = await client.models.generateContentStream({
|
|
80
134
|
model: model.apiId,
|
|
81
135
|
contents,
|
|
82
136
|
config,
|
|
83
137
|
});
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
let
|
|
87
|
-
|
|
138
|
+
|
|
139
|
+
// Track current content block for thinking/text transitions
|
|
140
|
+
let currentBlock: any = null;
|
|
141
|
+
let currentBlockType: "text" | "thinking" | null = null;
|
|
142
|
+
|
|
88
143
|
for await (const chunk of response) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
144
|
+
const candidate = chunk.candidates?.[0];
|
|
145
|
+
|
|
146
|
+
// Process individual parts (handles thinking vs text detection)
|
|
147
|
+
if (candidate?.content?.parts) {
|
|
148
|
+
for (const part of candidate.content.parts) {
|
|
149
|
+
if (part.text !== undefined) {
|
|
150
|
+
const isThinking = part.thought === true;
|
|
151
|
+
const targetType = isThinking ? "thinking" : "text";
|
|
152
|
+
|
|
153
|
+
// Check if we need to transition to a new block
|
|
154
|
+
if (currentBlockType !== targetType) {
|
|
155
|
+
// End previous block
|
|
156
|
+
if (currentBlock && currentBlockType) {
|
|
157
|
+
if (currentBlockType === "text") {
|
|
158
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
159
|
+
} else {
|
|
160
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Start new block
|
|
165
|
+
if (isThinking) {
|
|
166
|
+
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
|
167
|
+
output.content.push(currentBlock);
|
|
168
|
+
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
|
169
|
+
} else {
|
|
170
|
+
currentBlock = { type: "text", text: "", textSignature: undefined };
|
|
171
|
+
output.content.push(currentBlock);
|
|
172
|
+
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
|
|
173
|
+
}
|
|
174
|
+
currentBlockType = targetType;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Accumulate content
|
|
178
|
+
if (currentBlockType === "thinking") {
|
|
179
|
+
currentBlock.thinking += part.text;
|
|
180
|
+
currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
|
|
181
|
+
stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
|
|
182
|
+
} else {
|
|
183
|
+
currentBlock.text += part.text;
|
|
184
|
+
currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
|
|
185
|
+
stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (part.functionCall) {
|
|
190
|
+
// End current text/thinking block before tool call
|
|
191
|
+
if (currentBlock && currentBlockType) {
|
|
192
|
+
if (currentBlockType === "text") {
|
|
193
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
194
|
+
} else {
|
|
195
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
196
|
+
}
|
|
197
|
+
currentBlock = null;
|
|
198
|
+
currentBlockType = null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Generate unique tool call ID with dedup (matches pi-mono pattern)
|
|
202
|
+
const providedId = part.functionCall.id;
|
|
203
|
+
const needsNewId =
|
|
204
|
+
!providedId || output.content.some((b: any) => b.type === "toolCall" && b.id === providedId);
|
|
205
|
+
const toolCallId = needsNewId
|
|
206
|
+
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
207
|
+
: providedId;
|
|
208
|
+
|
|
209
|
+
const toolCall = {
|
|
210
|
+
type: "toolCall" as const,
|
|
211
|
+
id: toolCallId,
|
|
212
|
+
name: part.functionCall.name || "",
|
|
213
|
+
arguments: (part.functionCall.args as Record<string, any>) ?? {},
|
|
214
|
+
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
output.content.push(toolCall);
|
|
218
|
+
const idx = output.content.length - 1;
|
|
219
|
+
stream.push({ type: "toolcall_start", contentIndex: idx, partial: output });
|
|
220
|
+
stream.push({ type: "toolcall_delta", contentIndex: idx, delta: JSON.stringify(toolCall.arguments), partial: output });
|
|
221
|
+
stream.push({ type: "toolcall_end", contentIndex: idx, toolCall, partial: output });
|
|
222
|
+
}
|
|
131
223
|
}
|
|
132
224
|
}
|
|
133
|
-
|
|
225
|
+
|
|
134
226
|
// Handle finish reason
|
|
135
|
-
if (
|
|
136
|
-
|
|
137
|
-
if (
|
|
138
|
-
output.stopReason = "stop";
|
|
139
|
-
} else if (reason === "MAX_TOKENS") {
|
|
140
|
-
output.stopReason = "length";
|
|
141
|
-
} else if (reason === "SAFETY") {
|
|
142
|
-
output.stopReason = "error";
|
|
227
|
+
if (candidate?.finishReason) {
|
|
228
|
+
output.stopReason = mapGeminiStopReason(candidate.finishReason);
|
|
229
|
+
if (candidate.finishReason === FinishReason.SAFETY) {
|
|
143
230
|
output.errorMessage = "Content blocked by safety filters";
|
|
144
231
|
}
|
|
232
|
+
// Override to toolUse if any tool calls are present (matches pi-mono)
|
|
233
|
+
if (output.content.some((b: any) => b.type === "toolCall")) {
|
|
234
|
+
output.stopReason = "toolUse";
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Update usage — include thoughtsTokenCount in output (matches pi-mono)
|
|
239
|
+
if (chunk.usageMetadata) {
|
|
240
|
+
const meta = chunk.usageMetadata as any;
|
|
241
|
+
output.usage = {
|
|
242
|
+
input: meta.promptTokenCount || 0,
|
|
243
|
+
output: (meta.candidatesTokenCount || 0) + (meta.thoughtsTokenCount || 0),
|
|
244
|
+
cacheRead: meta.cachedContentTokenCount || 0,
|
|
245
|
+
cacheWrite: 0,
|
|
246
|
+
totalTokens: meta.totalTokenCount || 0,
|
|
247
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
248
|
+
};
|
|
249
|
+
calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
|
|
145
250
|
}
|
|
146
251
|
}
|
|
147
|
-
|
|
148
|
-
// End
|
|
149
|
-
if (
|
|
150
|
-
|
|
252
|
+
|
|
253
|
+
// End final block
|
|
254
|
+
if (currentBlock && currentBlockType) {
|
|
255
|
+
if (currentBlockType === "text") {
|
|
256
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
257
|
+
} else {
|
|
258
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
259
|
+
}
|
|
151
260
|
}
|
|
152
|
-
|
|
261
|
+
|
|
153
262
|
stream.push({ type: "done", reason: output.stopReason as any, message: output });
|
|
154
263
|
stream.end();
|
|
155
264
|
} catch (error) {
|
|
@@ -159,6 +268,6 @@ export function streamGemini(
|
|
|
159
268
|
stream.end();
|
|
160
269
|
}
|
|
161
270
|
})();
|
|
162
|
-
|
|
271
|
+
|
|
163
272
|
return stream;
|
|
164
273
|
}
|