@ssweens/pi-vertex 1.0.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/models/maas.ts CHANGED
@@ -1,13 +1,14 @@
1
1
  /**
2
2
  * MaaS (Model-as-a-Service) open model definitions for Vertex AI
3
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
4
- * All prices per 1M tokens (as of Feb 2025)
3
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
4
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
5
+ * All prices per 1M tokens
5
6
  */
6
7
 
7
8
  import type { VertexModelConfig } from "../types.js";
8
9
 
9
10
  export const MAAS_MODELS: VertexModelConfig[] = [
10
- // Llama models (Meta)
11
+ // --- Meta Llama ---
11
12
  {
12
13
  id: "llama-4-maverick",
13
14
  name: "Llama 4 Maverick",
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
66
67
  region: "global",
67
68
  },
68
69
 
69
- // Mistral models
70
+ // --- Mistral AI ---
70
71
  {
71
72
  id: "mistral-medium-3",
72
73
  name: "Mistral Medium 3",
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
106
107
  region: "global",
107
108
  },
108
109
  {
109
- id: "mistral-ocr",
110
- name: "Mistral OCR",
111
- apiId: "mistralai/mistral-ocr-2505",
110
+ id: "codestral-2",
111
+ name: "Codestral 2",
112
+ apiId: "mistralai/codestral-2",
112
113
  publisher: "mistralai",
113
114
  endpointType: "maas",
114
- contextWindow: 128000,
115
+ contextWindow: 256000,
115
116
  maxTokens: 32000,
116
- input: ["text", "image"],
117
+ input: ["text"],
117
118
  reasoning: false,
118
- tools: false,
119
+ tools: true,
119
120
  cost: {
120
- input: 0.50, // Per page: $0.0005/page, shown as approx per 1K pages
121
- output: 0.50, // Per page pricing
121
+ input: 0.30,
122
+ output: 0.90,
122
123
  cacheRead: 0,
123
124
  cacheWrite: 0,
124
125
  },
125
126
  region: "global",
126
127
  },
127
128
  {
128
- id: "codestral-2",
129
- name: "Codestral 2",
130
- apiId: "mistralai/codestral-2",
129
+ id: "mistral-ocr",
130
+ name: "Mistral OCR",
131
+ apiId: "mistralai/mistral-ocr-2505",
131
132
  publisher: "mistralai",
132
133
  endpointType: "maas",
133
- contextWindow: 256000,
134
+ contextWindow: 128000,
134
135
  maxTokens: 32000,
135
- input: ["text"],
136
+ input: ["text", "image"],
136
137
  reasoning: false,
137
- tools: true,
138
+ tools: false,
138
139
  cost: {
139
- input: 0.30,
140
- output: 0.90,
140
+ input: 0.0005,
141
+ output: 0.0005,
141
142
  cacheRead: 0,
142
143
  cacheWrite: 0,
143
144
  },
144
145
  region: "global",
145
146
  },
146
147
 
147
- // DeepSeek models
148
+ // --- DeepSeek ---
148
149
  {
149
150
  id: "deepseek-v3.2",
150
151
  name: "DeepSeek V3.2",
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
202
203
  },
203
204
  region: "global",
204
205
  },
205
-
206
- // AI21 Labs models
207
- {
208
- id: "jamba-1.5-large",
209
- name: "Jamba 1.5 Large",
210
- apiId: "ai21/jamba-1.5-large",
211
- publisher: "ai21",
212
- endpointType: "maas",
213
- contextWindow: 256000,
214
- maxTokens: 256000,
215
- input: ["text"],
216
- reasoning: false,
217
- tools: true,
218
- cost: {
219
- input: 2.00,
220
- output: 8.00,
221
- cacheRead: 0,
222
- cacheWrite: 0,
223
- },
224
- region: "global",
225
- },
226
206
  {
227
- id: "jamba-1.5-mini",
228
- name: "Jamba 1.5 Mini",
229
- apiId: "ai21/jamba-1.5-mini",
230
- publisher: "ai21",
207
+ id: "deepseek-ocr",
208
+ name: "DeepSeek OCR",
209
+ apiId: "deepseek-ai/deepseek-ocr-maas",
210
+ publisher: "deepseek-ai",
231
211
  endpointType: "maas",
232
- contextWindow: 256000,
233
- maxTokens: 256000,
234
- input: ["text"],
212
+ contextWindow: 163840,
213
+ maxTokens: 32000,
214
+ input: ["text", "image"],
235
215
  reasoning: false,
236
- tools: true,
216
+ tools: false,
237
217
  cost: {
238
- input: 0.20,
239
- output: 0.40,
218
+ input: 0.30,
219
+ output: 1.20,
240
220
  cacheRead: 0,
241
221
  cacheWrite: 0,
242
222
  },
243
223
  region: "global",
244
224
  },
245
225
 
246
- // OpenAI models (gpt-oss)
226
+ // --- OpenAI (gpt-oss) ---
247
227
  {
248
228
  id: "gpt-oss-120b",
249
229
  name: "GPT-OSS 120B",
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
283
263
  region: "global",
284
264
  },
285
265
 
286
- // DeepSeek OCR
287
- {
288
- id: "deepseek-ocr",
289
- name: "DeepSeek OCR",
290
- apiId: "deepseek-ai/deepseek-ocr-maas",
291
- publisher: "deepseek-ai",
292
- endpointType: "maas",
293
- contextWindow: 163840,
294
- maxTokens: 32000,
295
- input: ["text", "image"],
296
- reasoning: false,
297
- tools: false,
298
- cost: {
299
- input: 0.30, // Per page: $0.0003/page
300
- output: 1.20, // Per page pricing
301
- cacheRead: 0,
302
- cacheWrite: 0,
303
- },
304
- region: "global",
305
- },
306
-
307
- // Qwen models
266
+ // --- Qwen ---
308
267
  {
309
268
  id: "qwen3-235b",
310
269
  name: "Qwen 3 235B",
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
382
341
  region: "global",
383
342
  },
384
343
 
385
- // Other models
344
+ // --- Moonshot ---
386
345
  {
387
346
  id: "kimi-k2-thinking",
388
347
  name: "Kimi K2 Thinking",
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
402
361
  },
403
362
  region: "global",
404
363
  },
364
+
365
+ // --- MiniMax ---
405
366
  {
406
367
  id: "minimax-m2",
407
368
  name: "MiniMax M2",
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
421
382
  },
422
383
  region: "global",
423
384
  },
385
+
386
+ // --- GLM (Zhipu AI) ---
424
387
  {
425
388
  id: "glm-5",
426
389
  name: "GLM 5",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ssweens/pi-vertex",
3
- "version": "1.0.1",
3
+ "version": "1.1.3",
4
4
  "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -13,6 +13,8 @@
13
13
  "models/",
14
14
  "streaming/",
15
15
  "README.md",
16
+ "CHANGELOG.md",
17
+ "TEST_COVERAGE.md",
16
18
  "LICENSE",
17
19
  "screenshot.png"
18
20
  ],
@@ -22,6 +24,7 @@
22
24
  "check": "echo 'nothing to check'"
23
25
  },
24
26
  "dependencies": {
27
+ "@anthropic-ai/vertex-sdk": "^0.14.4",
25
28
  "@google/genai": "^1.42.0",
26
29
  "google-auth-library": "^9.0.0"
27
30
  },
@@ -1,20 +1,49 @@
1
1
  /**
2
2
  * Gemini streaming handler using @google/genai SDK
3
+ *
4
+ * Aligned with pi-mono's google-vertex.ts for consistent handling of:
5
+ * - Thinking content (thought blocks with signatures)
6
+ * - Tool calls with unique IDs and deduplication
7
+ * - Thinking configuration (levels for Gemini 3, budgets for Gemini 2.5)
8
+ * - Usage tracking including thinking tokens
3
9
  */
4
10
 
5
- import { GoogleGenAI } from "@google/genai";
6
- import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
11
+ import { GoogleGenAI, FinishReason, ThinkingLevel } from "@google/genai";
12
+ import type { VertexModelConfig, Context, StreamOptions, AssistantMessage } from "../types.js";
7
13
  import { getAuthConfig, resolveLocation } from "../auth.js";
8
- import { sanitizeText, convertToGeminiMessages, calculateCost } from "../utils.js";
9
- import { createAssistantMessageEventStream, type AssistantMessageEventStream, type AssistantMessage } from "@mariozechner/pi-ai";
14
+ import { sanitizeText, convertToGeminiMessages, convertToolsForGemini, retainThoughtSignature, calculateCost } from "../utils.js";
15
+ import { createAssistantMessageEventStream, type AssistantMessageEventStream } from "@mariozechner/pi-ai";
16
+
17
+ // Module-level counter for generating unique tool call IDs (matches pi-mono pattern)
18
+ let toolCallCounter = 0;
19
+
20
+ const THINKING_LEVEL_MAP: Record<string, ThinkingLevel> = {
21
+ minimal: ThinkingLevel.MINIMAL,
22
+ low: ThinkingLevel.LOW,
23
+ medium: ThinkingLevel.MEDIUM,
24
+ high: ThinkingLevel.HIGH,
25
+ };
26
+
27
+ function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
28
+ switch (reason) {
29
+ case FinishReason.STOP:
30
+ return "stop";
31
+ case FinishReason.MAX_TOKENS:
32
+ return "length";
33
+ case FinishReason.SAFETY:
34
+ case FinishReason.RECITATION:
35
+ default:
36
+ return "error";
37
+ }
38
+ }
10
39
 
11
40
  export function streamGemini(
12
41
  model: VertexModelConfig,
13
42
  context: Context,
14
- options?: StreamOptions
43
+ options?: StreamOptions,
15
44
  ): AssistantMessageEventStream {
16
45
  const stream = createAssistantMessageEventStream();
17
-
46
+
18
47
  (async () => {
19
48
  const output: AssistantMessage = {
20
49
  role: "assistant",
@@ -33,123 +62,203 @@ export function streamGemini(
33
62
  stopReason: "stop",
34
63
  timestamp: Date.now(),
35
64
  };
36
-
65
+
37
66
  try {
38
67
  // Priority: config file > env var > model region > default
39
68
  const location = resolveLocation(model.region);
40
69
  const auth = getAuthConfig(location);
41
70
 
42
- // Create client
71
+ // Create client with explicit API version (matches pi-mono)
43
72
  const client = new GoogleGenAI({
44
73
  vertexai: true,
45
74
  project: auth.projectId,
46
75
  location: auth.location,
76
+ apiVersion: "v1",
47
77
  });
48
-
49
- // Convert messages
50
- const contents = convertToGeminiMessages(context.messages);
51
-
52
- // Build config
78
+
79
+ // Convert messages with model ID for proper thinking/tool handling
80
+ const contents = convertToGeminiMessages(context.messages, model.apiId);
81
+
82
+ // Build config — only set temperature when explicitly provided
53
83
  const config: any = {
54
84
  maxOutputTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
55
- temperature: options?.temperature ?? 0.7,
85
+ ...(options?.temperature !== undefined && { temperature: options.temperature }),
56
86
  };
57
-
87
+
58
88
  // Add system prompt if present
59
89
  if (context.systemPrompt) {
60
90
  config.systemInstruction = sanitizeText(context.systemPrompt);
61
91
  }
62
-
63
- // Add tools if present
92
+
93
+ // Add tools if present (using parametersJsonSchema for full JSON Schema support)
64
94
  if (context.tools && context.tools.length > 0) {
65
- config.tools = [
66
- {
67
- functionDeclarations: context.tools.map((tool) => ({
68
- name: tool.name,
69
- description: tool.description,
70
- parameters: tool.parameters,
71
- })),
72
- },
73
- ];
95
+ config.tools = convertToolsForGemini(context.tools);
96
+ }
97
+
98
+ // Add thinking configuration (matches pi-mono's buildParams logic)
99
+ if (model.reasoning && options?.reasoning) {
100
+ const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
101
+ const isGemini3 = model.apiId.startsWith("gemini-3");
102
+
103
+ const thinkingConfig: any = { includeThoughts: true };
104
+
105
+ if (isGemini3) {
106
+ // Gemini 3 models use thinking levels (MINIMAL/LOW/MEDIUM/HIGH)
107
+ thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[effort];
108
+ } else {
109
+ // Gemini 2.5 models use thinking budgets (token counts)
110
+ const budgets: Record<string, number> = {
111
+ minimal: 128,
112
+ low: 2048,
113
+ medium: 8192,
114
+ high: model.apiId.includes("2.5-pro") ? 32768 : 24576,
115
+ };
116
+ thinkingConfig.thinkingBudget = budgets[effort] ?? 8192;
117
+ }
118
+
119
+ config.thinkingConfig = thinkingConfig;
120
+ }
121
+
122
+ // Pass abort signal to SDK for in-flight cancellation
123
+ if (options?.signal) {
124
+ if (options.signal.aborted) {
125
+ throw new Error("Request aborted");
126
+ }
127
+ config.abortSignal = options.signal;
74
128
  }
75
-
129
+
76
130
  stream.push({ type: "start", partial: output });
77
-
131
+
78
132
  // Start streaming
79
133
  const response = await client.models.generateContentStream({
80
134
  model: model.apiId,
81
135
  contents,
82
136
  config,
83
137
  });
84
-
85
- let textContent = "";
86
- let textIndex = 0;
87
-
138
+
139
+ // Track current content block for thinking/text transitions
140
+ let currentBlock: any = null;
141
+ let currentBlockType: "text" | "thinking" | null = null;
142
+
88
143
  for await (const chunk of response) {
89
- if (options?.signal?.aborted) {
90
- throw new Error("Request was aborted");
91
- }
92
-
93
- // Update usage
94
- if (chunk.usageMetadata) {
95
- output.usage.input = chunk.usageMetadata.promptTokenCount || output.usage.input;
96
- output.usage.output = chunk.usageMetadata.candidatesTokenCount || output.usage.output;
97
- output.usage.totalTokens = chunk.usageMetadata.totalTokenCount ||
98
- (output.usage.input + output.usage.output);
99
- calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
100
- }
101
-
102
- // Handle text
103
- const text = chunk.text;
104
- if (text) {
105
- if (!textContent) {
106
- // First text chunk
107
- output.content.push({ type: "text", text: "" });
108
- textIndex = output.content.length - 1;
109
- stream.push({ type: "text_start", contentIndex: textIndex, partial: output });
110
- }
111
- textContent += text;
112
- (output.content[textIndex] as any).text = textContent;
113
- stream.push({ type: "text_delta", contentIndex: textIndex, delta: text, partial: output });
114
- }
115
-
116
- // Handle function calls (tools)
117
- if (chunk.functionCalls && chunk.functionCalls.length > 0) {
118
- for (const call of chunk.functionCalls) {
119
- output.content.push({
120
- type: "toolCall",
121
- id: call.id || `call_${Date.now()}`,
122
- name: call.name,
123
- arguments: call.args || {},
124
- });
125
- stream.push({
126
- type: "toolcall_end",
127
- contentIndex: output.content.length - 1,
128
- toolCall: output.content[output.content.length - 1] as any,
129
- partial: output,
130
- });
144
+ const candidate = chunk.candidates?.[0];
145
+
146
+ // Process individual parts (handles thinking vs text detection)
147
+ if (candidate?.content?.parts) {
148
+ for (const part of candidate.content.parts) {
149
+ if (part.text !== undefined) {
150
+ const isThinking = part.thought === true;
151
+ const targetType = isThinking ? "thinking" : "text";
152
+
153
+ // Check if we need to transition to a new block
154
+ if (currentBlockType !== targetType) {
155
+ // End previous block
156
+ if (currentBlock && currentBlockType) {
157
+ if (currentBlockType === "text") {
158
+ stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
159
+ } else {
160
+ stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
161
+ }
162
+ }
163
+
164
+ // Start new block
165
+ if (isThinking) {
166
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
167
+ output.content.push(currentBlock);
168
+ stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
169
+ } else {
170
+ currentBlock = { type: "text", text: "", textSignature: undefined };
171
+ output.content.push(currentBlock);
172
+ stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
173
+ }
174
+ currentBlockType = targetType;
175
+ }
176
+
177
+ // Accumulate content
178
+ if (currentBlockType === "thinking") {
179
+ currentBlock.thinking += part.text;
180
+ currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
181
+ stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
182
+ } else {
183
+ currentBlock.text += part.text;
184
+ currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
185
+ stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
186
+ }
187
+ }
188
+
189
+ if (part.functionCall) {
190
+ // End current text/thinking block before tool call
191
+ if (currentBlock && currentBlockType) {
192
+ if (currentBlockType === "text") {
193
+ stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
194
+ } else {
195
+ stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
196
+ }
197
+ currentBlock = null;
198
+ currentBlockType = null;
199
+ }
200
+
201
+ // Generate unique tool call ID with dedup (matches pi-mono pattern)
202
+ const providedId = part.functionCall.id;
203
+ const needsNewId =
204
+ !providedId || output.content.some((b: any) => b.type === "toolCall" && b.id === providedId);
205
+ const toolCallId = needsNewId
206
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
207
+ : providedId;
208
+
209
+ const toolCall = {
210
+ type: "toolCall" as const,
211
+ id: toolCallId,
212
+ name: part.functionCall.name || "",
213
+ arguments: (part.functionCall.args as Record<string, any>) ?? {},
214
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
215
+ };
216
+
217
+ output.content.push(toolCall);
218
+ const idx = output.content.length - 1;
219
+ stream.push({ type: "toolcall_start", contentIndex: idx, partial: output });
220
+ stream.push({ type: "toolcall_delta", contentIndex: idx, delta: JSON.stringify(toolCall.arguments), partial: output });
221
+ stream.push({ type: "toolcall_end", contentIndex: idx, toolCall, partial: output });
222
+ }
131
223
  }
132
224
  }
133
-
225
+
134
226
  // Handle finish reason
135
- if (chunk.candidates && chunk.candidates[0]?.finishReason) {
136
- const reason = chunk.candidates[0].finishReason;
137
- if (reason === "STOP") {
138
- output.stopReason = "stop";
139
- } else if (reason === "MAX_TOKENS") {
140
- output.stopReason = "length";
141
- } else if (reason === "SAFETY") {
142
- output.stopReason = "error";
227
+ if (candidate?.finishReason) {
228
+ output.stopReason = mapGeminiStopReason(candidate.finishReason);
229
+ if (candidate.finishReason === FinishReason.SAFETY) {
143
230
  output.errorMessage = "Content blocked by safety filters";
144
231
  }
232
+ // Override to toolUse if any tool calls are present (matches pi-mono)
233
+ if (output.content.some((b: any) => b.type === "toolCall")) {
234
+ output.stopReason = "toolUse";
235
+ }
236
+ }
237
+
238
+ // Update usage — include thoughtsTokenCount in output (matches pi-mono)
239
+ if (chunk.usageMetadata) {
240
+ const meta = chunk.usageMetadata as any;
241
+ output.usage = {
242
+ input: meta.promptTokenCount || 0,
243
+ output: (meta.candidatesTokenCount || 0) + (meta.thoughtsTokenCount || 0),
244
+ cacheRead: meta.cachedContentTokenCount || 0,
245
+ cacheWrite: 0,
246
+ totalTokens: meta.totalTokenCount || 0,
247
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
248
+ };
249
+ calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
145
250
  }
146
251
  }
147
-
148
- // End text if we had any
149
- if (textContent) {
150
- stream.push({ type: "text_end", contentIndex: textIndex, content: textContent, partial: output });
252
+
253
+ // End final block
254
+ if (currentBlock && currentBlockType) {
255
+ if (currentBlockType === "text") {
256
+ stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
257
+ } else {
258
+ stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
259
+ }
151
260
  }
152
-
261
+
153
262
  stream.push({ type: "done", reason: output.stopReason as any, message: output });
154
263
  stream.end();
155
264
  } catch (error) {
@@ -159,6 +268,6 @@ export function streamGemini(
159
268
  stream.end();
160
269
  }
161
270
  })();
162
-
271
+
163
272
  return stream;
164
273
  }