@ssweens/pi-vertex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/models/maas.ts ADDED
@@ -0,0 +1,462 @@
1
+ /**
2
+ * MaaS (Model-as-a-Service) open model definitions for Vertex AI
3
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
4
+ * All prices per 1M tokens (as of Feb 2025)
5
+ */
6
+
7
+ import type { VertexModelConfig } from "../types.js";
8
+
9
+ export const MAAS_MODELS: VertexModelConfig[] = [
10
+ // Llama models (Meta)
11
+ {
12
+ id: "llama-4-maverick",
13
+ name: "Llama 4 Maverick",
14
+ apiId: "meta/llama-4-maverick-17b-128e-instruct-maas",
15
+ publisher: "meta",
16
+ endpointType: "maas",
17
+ contextWindow: 524288,
18
+ maxTokens: 32000,
19
+ input: ["text"],
20
+ reasoning: true,
21
+ tools: true,
22
+ cost: {
23
+ input: 0.35,
24
+ output: 1.15,
25
+ cacheRead: 0,
26
+ cacheWrite: 0,
27
+ },
28
+ region: "global",
29
+ },
30
+ {
31
+ id: "llama-4-scout",
32
+ name: "Llama 4 Scout",
33
+ apiId: "meta/llama-4-scout-17b-16e-instruct-maas",
34
+ publisher: "meta",
35
+ endpointType: "maas",
36
+ contextWindow: 1310720,
37
+ maxTokens: 32000,
38
+ input: ["text"],
39
+ reasoning: true,
40
+ tools: true,
41
+ cost: {
42
+ input: 0.25,
43
+ output: 0.70,
44
+ cacheRead: 0,
45
+ cacheWrite: 0,
46
+ },
47
+ region: "global",
48
+ },
49
+ {
50
+ id: "llama-3.3-70b",
51
+ name: "Llama 3.3 70B",
52
+ apiId: "meta/llama-3.3-70b-instruct-maas",
53
+ publisher: "meta",
54
+ endpointType: "maas",
55
+ contextWindow: 128000,
56
+ maxTokens: 8192,
57
+ input: ["text"],
58
+ reasoning: false,
59
+ tools: true,
60
+ cost: {
61
+ input: 0.72,
62
+ output: 0.72,
63
+ cacheRead: 0,
64
+ cacheWrite: 0,
65
+ },
66
+ region: "global",
67
+ },
68
+
69
+ // Mistral models
70
+ {
71
+ id: "mistral-medium-3",
72
+ name: "Mistral Medium 3",
73
+ apiId: "mistralai/mistral-medium-3",
74
+ publisher: "mistralai",
75
+ endpointType: "maas",
76
+ contextWindow: 128000,
77
+ maxTokens: 32000,
78
+ input: ["text"],
79
+ reasoning: false,
80
+ tools: true,
81
+ cost: {
82
+ input: 0.40,
83
+ output: 2.00,
84
+ cacheRead: 0,
85
+ cacheWrite: 0,
86
+ },
87
+ region: "global",
88
+ },
89
+ {
90
+ id: "mistral-small-3.1",
91
+ name: "Mistral Small 3.1",
92
+ apiId: "mistralai/mistral-small-2503",
93
+ publisher: "mistralai",
94
+ endpointType: "maas",
95
+ contextWindow: 128000,
96
+ maxTokens: 32000,
97
+ input: ["text"],
98
+ reasoning: false,
99
+ tools: true,
100
+ cost: {
101
+ input: 0.10,
102
+ output: 0.30,
103
+ cacheRead: 0,
104
+ cacheWrite: 0,
105
+ },
106
+ region: "global",
107
+ },
108
+ {
109
+ id: "mistral-ocr",
110
+ name: "Mistral OCR",
111
+ apiId: "mistralai/mistral-ocr-2505",
112
+ publisher: "mistralai",
113
+ endpointType: "maas",
114
+ contextWindow: 128000,
115
+ maxTokens: 32000,
116
+ input: ["text", "image"],
117
+ reasoning: false,
118
+ tools: false,
119
+ cost: {
120
+ input: 0.50, // Per page: $0.0005/page, shown as approx per 1K pages
121
+ output: 0.50, // Per page pricing
122
+ cacheRead: 0,
123
+ cacheWrite: 0,
124
+ },
125
+ region: "global",
126
+ },
127
+ {
128
+ id: "codestral-2",
129
+ name: "Codestral 2",
130
+ apiId: "mistralai/codestral-2",
131
+ publisher: "mistralai",
132
+ endpointType: "maas",
133
+ contextWindow: 256000,
134
+ maxTokens: 32000,
135
+ input: ["text"],
136
+ reasoning: false,
137
+ tools: true,
138
+ cost: {
139
+ input: 0.30,
140
+ output: 0.90,
141
+ cacheRead: 0,
142
+ cacheWrite: 0,
143
+ },
144
+ region: "global",
145
+ },
146
+
147
+ // DeepSeek models
148
+ {
149
+ id: "deepseek-v3.2",
150
+ name: "DeepSeek V3.2",
151
+ apiId: "deepseek-ai/deepseek-v3.2-maas",
152
+ publisher: "deepseek-ai",
153
+ endpointType: "maas",
154
+ contextWindow: 163840,
155
+ maxTokens: 32000,
156
+ input: ["text"],
157
+ reasoning: true,
158
+ tools: true,
159
+ cost: {
160
+ input: 0.56,
161
+ output: 1.68,
162
+ cacheRead: 0.056,
163
+ cacheWrite: 0,
164
+ },
165
+ region: "global",
166
+ },
167
+ {
168
+ id: "deepseek-v3.1",
169
+ name: "DeepSeek V3.1",
170
+ apiId: "deepseek-ai/deepseek-v3.1-maas",
171
+ publisher: "deepseek-ai",
172
+ endpointType: "maas",
173
+ contextWindow: 163840,
174
+ maxTokens: 32000,
175
+ input: ["text"],
176
+ reasoning: true,
177
+ tools: true,
178
+ cost: {
179
+ input: 0.60,
180
+ output: 1.70,
181
+ cacheRead: 0.06,
182
+ cacheWrite: 0,
183
+ },
184
+ region: "global",
185
+ },
186
+ {
187
+ id: "deepseek-r1",
188
+ name: "DeepSeek R1",
189
+ apiId: "deepseek-ai/deepseek-r1-0528-maas",
190
+ publisher: "deepseek-ai",
191
+ endpointType: "maas",
192
+ contextWindow: 163840,
193
+ maxTokens: 32000,
194
+ input: ["text"],
195
+ reasoning: true,
196
+ tools: true,
197
+ cost: {
198
+ input: 1.35,
199
+ output: 5.40,
200
+ cacheRead: 0,
201
+ cacheWrite: 0,
202
+ },
203
+ region: "global",
204
+ },
205
+
206
+ // AI21 Labs models
207
+ {
208
+ id: "jamba-1.5-large",
209
+ name: "Jamba 1.5 Large",
210
+ apiId: "ai21/jamba-1.5-large",
211
+ publisher: "ai21",
212
+ endpointType: "maas",
213
+ contextWindow: 256000,
214
+ maxTokens: 256000,
215
+ input: ["text"],
216
+ reasoning: false,
217
+ tools: true,
218
+ cost: {
219
+ input: 2.00,
220
+ output: 8.00,
221
+ cacheRead: 0,
222
+ cacheWrite: 0,
223
+ },
224
+ region: "global",
225
+ },
226
+ {
227
+ id: "jamba-1.5-mini",
228
+ name: "Jamba 1.5 Mini",
229
+ apiId: "ai21/jamba-1.5-mini",
230
+ publisher: "ai21",
231
+ endpointType: "maas",
232
+ contextWindow: 256000,
233
+ maxTokens: 256000,
234
+ input: ["text"],
235
+ reasoning: false,
236
+ tools: true,
237
+ cost: {
238
+ input: 0.20,
239
+ output: 0.40,
240
+ cacheRead: 0,
241
+ cacheWrite: 0,
242
+ },
243
+ region: "global",
244
+ },
245
+
246
+ // OpenAI models (gpt-oss)
247
+ {
248
+ id: "gpt-oss-120b",
249
+ name: "GPT-OSS 120B",
250
+ apiId: "openai/gpt-oss-120b-maas",
251
+ publisher: "openai",
252
+ endpointType: "maas",
253
+ contextWindow: 131072,
254
+ maxTokens: 32000,
255
+ input: ["text"],
256
+ reasoning: true,
257
+ tools: true,
258
+ cost: {
259
+ input: 0.09,
260
+ output: 0.36,
261
+ cacheRead: 0,
262
+ cacheWrite: 0,
263
+ },
264
+ region: "global",
265
+ },
266
+ {
267
+ id: "gpt-oss-20b",
268
+ name: "GPT-OSS 20B",
269
+ apiId: "openai/gpt-oss-20b-maas",
270
+ publisher: "openai",
271
+ endpointType: "maas",
272
+ contextWindow: 131072,
273
+ maxTokens: 32000,
274
+ input: ["text"],
275
+ reasoning: false,
276
+ tools: true,
277
+ cost: {
278
+ input: 0.07,
279
+ output: 0.25,
280
+ cacheRead: 0.007,
281
+ cacheWrite: 0,
282
+ },
283
+ region: "global",
284
+ },
285
+
286
+ // DeepSeek OCR
287
+ {
288
+ id: "deepseek-ocr",
289
+ name: "DeepSeek OCR",
290
+ apiId: "deepseek-ai/deepseek-ocr-maas",
291
+ publisher: "deepseek-ai",
292
+ endpointType: "maas",
293
+ contextWindow: 163840,
294
+ maxTokens: 32000,
295
+ input: ["text", "image"],
296
+ reasoning: false,
297
+ tools: false,
298
+ cost: {
299
+ input: 0.30, // Per page: $0.0003/page
300
+ output: 1.20, // Per page pricing
301
+ cacheRead: 0,
302
+ cacheWrite: 0,
303
+ },
304
+ region: "global",
305
+ },
306
+
307
+ // Qwen models
308
+ {
309
+ id: "qwen3-235b",
310
+ name: "Qwen 3 235B",
311
+ apiId: "qwen/qwen3-235b-a22b-instruct-2507-maas",
312
+ publisher: "qwen",
313
+ endpointType: "maas",
314
+ contextWindow: 262144,
315
+ maxTokens: 32000,
316
+ input: ["text"],
317
+ reasoning: true,
318
+ tools: true,
319
+ cost: {
320
+ input: 0.22,
321
+ output: 0.88,
322
+ cacheRead: 0,
323
+ cacheWrite: 0,
324
+ },
325
+ region: "global",
326
+ },
327
+ {
328
+ id: "qwen3-next-instruct",
329
+ name: "Qwen 3 Next Instruct",
330
+ apiId: "qwen/qwen3-next-instruct-80b-maas",
331
+ publisher: "qwen",
332
+ endpointType: "maas",
333
+ contextWindow: 262144,
334
+ maxTokens: 32000,
335
+ input: ["text"],
336
+ reasoning: true,
337
+ tools: true,
338
+ cost: {
339
+ input: 0.15,
340
+ output: 1.20,
341
+ cacheRead: 0,
342
+ cacheWrite: 0,
343
+ },
344
+ region: "global",
345
+ },
346
+ {
347
+ id: "qwen3-next-thinking",
348
+ name: "Qwen 3 Next Thinking",
349
+ apiId: "qwen/qwen3-next-thinking-80b-maas",
350
+ publisher: "qwen",
351
+ endpointType: "maas",
352
+ contextWindow: 262144,
353
+ maxTokens: 32000,
354
+ input: ["text"],
355
+ reasoning: true,
356
+ tools: true,
357
+ cost: {
358
+ input: 0.15,
359
+ output: 1.20,
360
+ cacheRead: 0,
361
+ cacheWrite: 0,
362
+ },
363
+ region: "global",
364
+ },
365
+ {
366
+ id: "qwen3-coder",
367
+ name: "Qwen 3 Coder",
368
+ apiId: "qwen/qwen3-coder-480b-a35b-instruct-maas",
369
+ publisher: "qwen",
370
+ endpointType: "maas",
371
+ contextWindow: 262144,
372
+ maxTokens: 32000,
373
+ input: ["text"],
374
+ reasoning: true,
375
+ tools: true,
376
+ cost: {
377
+ input: 0.22,
378
+ output: 1.80,
379
+ cacheRead: 0.022,
380
+ cacheWrite: 0,
381
+ },
382
+ region: "global",
383
+ },
384
+
385
+ // Other models
386
+ {
387
+ id: "kimi-k2-thinking",
388
+ name: "Kimi K2 Thinking",
389
+ apiId: "moonshotai/kimi-k2-thinking-maas",
390
+ publisher: "moonshotai",
391
+ endpointType: "maas",
392
+ contextWindow: 262144,
393
+ maxTokens: 32000,
394
+ input: ["text"],
395
+ reasoning: true,
396
+ tools: true,
397
+ cost: {
398
+ input: 0.60,
399
+ output: 2.50,
400
+ cacheRead: 0.06,
401
+ cacheWrite: 0,
402
+ },
403
+ region: "global",
404
+ },
405
+ {
406
+ id: "minimax-m2",
407
+ name: "MiniMax M2",
408
+ apiId: "minimaxai/minimax-m2-maas",
409
+ publisher: "minimaxai",
410
+ endpointType: "maas",
411
+ contextWindow: 196608,
412
+ maxTokens: 32000,
413
+ input: ["text"],
414
+ reasoning: true,
415
+ tools: true,
416
+ cost: {
417
+ input: 0.30,
418
+ output: 1.20,
419
+ cacheRead: 0.03,
420
+ cacheWrite: 0,
421
+ },
422
+ region: "global",
423
+ },
424
+ {
425
+ id: "glm-5",
426
+ name: "GLM 5",
427
+ apiId: "zai-org/glm-5-maas",
428
+ publisher: "zai-org",
429
+ endpointType: "maas",
430
+ contextWindow: 200000,
431
+ maxTokens: 32000,
432
+ input: ["text"],
433
+ reasoning: true,
434
+ tools: true,
435
+ cost: {
436
+ input: 1.00,
437
+ output: 3.20,
438
+ cacheRead: 0.10,
439
+ cacheWrite: 0,
440
+ },
441
+ region: "global",
442
+ },
443
+ {
444
+ id: "glm-4.7",
445
+ name: "GLM 4.7",
446
+ apiId: "zai-org/glm-4.7-maas",
447
+ publisher: "zai-org",
448
+ endpointType: "maas",
449
+ contextWindow: 200000,
450
+ maxTokens: 32000,
451
+ input: ["text"],
452
+ reasoning: true,
453
+ tools: true,
454
+ cost: {
455
+ input: 0.60,
456
+ output: 2.20,
457
+ cacheRead: 0,
458
+ cacheWrite: 0,
459
+ },
460
+ region: "global",
461
+ },
462
+ ];
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@ssweens/pi-vertex",
3
+ "version": "1.0.0",
4
+ "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
5
+ "type": "module",
6
+ "main": "index.ts",
7
+ "files": [
8
+ "index.ts",
9
+ "auth.ts",
10
+ "config.ts",
11
+ "types.ts",
12
+ "utils.ts",
13
+ "models/",
14
+ "streaming/",
15
+ "README.md",
16
+ "LICENSE",
17
+ "screenshot.png"
18
+ ],
19
+ "scripts": {
20
+ "clean": "echo 'nothing to clean'",
21
+ "build": "echo 'nothing to build'",
22
+ "check": "echo 'nothing to check'"
23
+ },
24
+ "dependencies": {
25
+ "@google/genai": "^1.42.0",
26
+ "google-auth-library": "^9.0.0"
27
+ },
28
+ "peerDependencies": {
29
+ "@mariozechner/pi-ai": "*",
30
+ "@mariozechner/pi-coding-agent": "*"
31
+ },
32
+ "pi": {
33
+ "extensions": [
34
+ "./index.ts"
35
+ ]
36
+ },
37
+ "keywords": [
38
+ "pi-package",
39
+ "pi-extension",
40
+ "vertex-ai",
41
+ "gemini",
42
+ "claude",
43
+ "maas"
44
+ ],
45
+ "author": "ssweens",
46
+ "license": "MIT"
47
+ }
package/screenshot.png ADDED
Binary file
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Gemini streaming handler using @google/genai SDK
3
+ */
4
+
5
+ import { GoogleGenAI } from "@google/genai";
6
+ import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
7
+ import { getAuthConfig, resolveLocation } from "../auth.js";
8
+ import { sanitizeText, convertToGeminiMessages, calculateCost } from "../utils.js";
9
+ import { createAssistantMessageEventStream, type AssistantMessageEventStream, type AssistantMessage } from "@mariozechner/pi-ai";
10
+
11
+ export function streamGemini(
12
+ model: VertexModelConfig,
13
+ context: Context,
14
+ options?: StreamOptions
15
+ ): AssistantMessageEventStream {
16
+ const stream = createAssistantMessageEventStream();
17
+
18
+ (async () => {
19
+ const output: AssistantMessage = {
20
+ role: "assistant",
21
+ content: [],
22
+ api: "google-generative-ai",
23
+ provider: "vertex",
24
+ model: model.id,
25
+ usage: {
26
+ input: 0,
27
+ output: 0,
28
+ cacheRead: 0,
29
+ cacheWrite: 0,
30
+ totalTokens: 0,
31
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
32
+ },
33
+ stopReason: "stop",
34
+ timestamp: Date.now(),
35
+ };
36
+
37
+ try {
38
+ // Priority: config file > env var > model region > default
39
+ const location = resolveLocation(model.region);
40
+ const auth = getAuthConfig(location);
41
+
42
+ // Create client
43
+ const client = new GoogleGenAI({
44
+ vertexai: true,
45
+ project: auth.projectId,
46
+ location: auth.location,
47
+ });
48
+
49
+ // Convert messages
50
+ const contents = convertToGeminiMessages(context.messages);
51
+
52
+ // Build config
53
+ const config: any = {
54
+ maxOutputTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
55
+ temperature: options?.temperature ?? 0.7,
56
+ };
57
+
58
+ // Add system prompt if present
59
+ if (context.systemPrompt) {
60
+ config.systemInstruction = sanitizeText(context.systemPrompt);
61
+ }
62
+
63
+ // Add tools if present
64
+ if (context.tools && context.tools.length > 0) {
65
+ config.tools = [
66
+ {
67
+ functionDeclarations: context.tools.map((tool) => ({
68
+ name: tool.name,
69
+ description: tool.description,
70
+ parameters: tool.parameters,
71
+ })),
72
+ },
73
+ ];
74
+ }
75
+
76
+ stream.push({ type: "start", partial: output });
77
+
78
+ // Start streaming
79
+ const response = await client.models.generateContentStream({
80
+ model: model.apiId,
81
+ contents,
82
+ config,
83
+ });
84
+
85
+ let textContent = "";
86
+ let textIndex = 0;
87
+
88
+ for await (const chunk of response) {
89
+ if (options?.signal?.aborted) {
90
+ throw new Error("Request was aborted");
91
+ }
92
+
93
+ // Update usage
94
+ if (chunk.usageMetadata) {
95
+ output.usage.input = chunk.usageMetadata.promptTokenCount || output.usage.input;
96
+ output.usage.output = chunk.usageMetadata.candidatesTokenCount || output.usage.output;
97
+ output.usage.totalTokens = chunk.usageMetadata.totalTokenCount ||
98
+ (output.usage.input + output.usage.output);
99
+ calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
100
+ }
101
+
102
+ // Handle text
103
+ const text = chunk.text;
104
+ if (text) {
105
+ if (!textContent) {
106
+ // First text chunk
107
+ output.content.push({ type: "text", text: "" });
108
+ textIndex = output.content.length - 1;
109
+ stream.push({ type: "text_start", contentIndex: textIndex, partial: output });
110
+ }
111
+ textContent += text;
112
+ (output.content[textIndex] as any).text = textContent;
113
+ stream.push({ type: "text_delta", contentIndex: textIndex, delta: text, partial: output });
114
+ }
115
+
116
+ // Handle function calls (tools)
117
+ if (chunk.functionCalls && chunk.functionCalls.length > 0) {
118
+ for (const call of chunk.functionCalls) {
119
+ output.content.push({
120
+ type: "toolCall",
121
+ id: call.id || `call_${Date.now()}`,
122
+ name: call.name,
123
+ arguments: call.args || {},
124
+ });
125
+ stream.push({
126
+ type: "toolcall_end",
127
+ contentIndex: output.content.length - 1,
128
+ toolCall: output.content[output.content.length - 1] as any,
129
+ partial: output,
130
+ });
131
+ }
132
+ }
133
+
134
+ // Handle finish reason
135
+ if (chunk.candidates && chunk.candidates[0]?.finishReason) {
136
+ const reason = chunk.candidates[0].finishReason;
137
+ if (reason === "STOP") {
138
+ output.stopReason = "stop";
139
+ } else if (reason === "MAX_TOKENS") {
140
+ output.stopReason = "length";
141
+ } else if (reason === "SAFETY") {
142
+ output.stopReason = "error";
143
+ output.errorMessage = "Content blocked by safety filters";
144
+ }
145
+ }
146
+ }
147
+
148
+ // End text if we had any
149
+ if (textContent) {
150
+ stream.push({ type: "text_end", contentIndex: textIndex, content: textContent, partial: output });
151
+ }
152
+
153
+ stream.push({ type: "done", reason: output.stopReason as any, message: output });
154
+ stream.end();
155
+ } catch (error) {
156
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
157
+ output.errorMessage = error instanceof Error ? error.message : String(error);
158
+ stream.push({ type: "error", reason: output.stopReason, error: output });
159
+ stream.end();
160
+ }
161
+ })();
162
+
163
+ return stream;
164
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Streaming handler dispatcher
3
+ */
4
+
5
+ import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
6
+ import type { AssistantMessageEventStream } from "@mariozechner/pi-ai";
7
+ import { streamGemini } from "./gemini.js";
8
+ import { streamMaaS } from "./maas.js";
9
+
10
+ export function streamVertex(
11
+ model: VertexModelConfig,
12
+ context: Context,
13
+ options?: StreamOptions
14
+ ): AssistantMessageEventStream {
15
+ switch (model.endpointType) {
16
+ case "gemini":
17
+ return streamGemini(model, context, options);
18
+ case "maas":
19
+ return streamMaaS(model, context, options);
20
+ default:
21
+ throw new Error(`Unknown endpoint type: ${(model as any).endpointType}`);
22
+ }
23
+ }
24
+
25
+ export { streamGemini, streamMaaS };