npm - @ssweens/pi-vertex - Versions diffs - 1.0.0 - Mend

@ssweens/pi-vertex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/models/maas.ts ADDED Viewed

@@ -0,0 +1,462 @@
+/**
+ * MaaS (Model-as-a-Service) open model definitions for Vertex AI
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
+ * All prices per 1M tokens (as of Feb 2025)
+ */
+import type { VertexModelConfig } from "../types.js";
+export const MAAS_MODELS: VertexModelConfig[] = [
+  // Llama models (Meta)
+  {
+    id: "llama-4-maverick",
+    name: "Llama 4 Maverick",
+    apiId: "meta/llama-4-maverick-17b-128e-instruct-maas",
+    publisher: "meta",
+    endpointType: "maas",
+    contextWindow: 524288,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.35,
+      output: 1.15,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "llama-4-scout",
+    name: "Llama 4 Scout",
+    apiId: "meta/llama-4-scout-17b-16e-instruct-maas",
+    publisher: "meta",
+    endpointType: "maas",
+    contextWindow: 1310720,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.25,
+      output: 0.70,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "llama-3.3-70b",
+    name: "Llama 3.3 70B",
+    apiId: "meta/llama-3.3-70b-instruct-maas",
+    publisher: "meta",
+    endpointType: "maas",
+    contextWindow: 128000,
+    maxTokens: 8192,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.72,
+      output: 0.72,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // Mistral models
+  {
+    id: "mistral-medium-3",
+    name: "Mistral Medium 3",
+    apiId: "mistralai/mistral-medium-3",
+    publisher: "mistralai",
+    endpointType: "maas",
+    contextWindow: 128000,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.40,
+      output: 2.00,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "mistral-small-3.1",
+    name: "Mistral Small 3.1",
+    apiId: "mistralai/mistral-small-2503",
+    publisher: "mistralai",
+    endpointType: "maas",
+    contextWindow: 128000,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.10,
+      output: 0.30,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "mistral-ocr",
+    name: "Mistral OCR",
+    apiId: "mistralai/mistral-ocr-2505",
+    publisher: "mistralai",
+    endpointType: "maas",
+    contextWindow: 128000,
+    maxTokens: 32000,
+    input: ["text", "image"],
+    reasoning: false,
+    tools: false,
+    cost: {
+      input: 0.50,  // Per page: $0.0005/page, shown as approx per 1K pages
+      output: 0.50,  // Per page pricing
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "codestral-2",
+    name: "Codestral 2",
+    apiId: "mistralai/codestral-2",
+    publisher: "mistralai",
+    endpointType: "maas",
+    contextWindow: 256000,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.30,
+      output: 0.90,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // DeepSeek models
+  {
+    id: "deepseek-v3.2",
+    name: "DeepSeek V3.2",
+    apiId: "deepseek-ai/deepseek-v3.2-maas",
+    publisher: "deepseek-ai",
+    endpointType: "maas",
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.56,
+      output: 1.68,
+      cacheRead: 0.056,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "deepseek-v3.1",
+    name: "DeepSeek V3.1",
+    apiId: "deepseek-ai/deepseek-v3.1-maas",
+    publisher: "deepseek-ai",
+    endpointType: "maas",
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.60,
+      output: 1.70,
+      cacheRead: 0.06,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "deepseek-r1",
+    name: "DeepSeek R1",
+    apiId: "deepseek-ai/deepseek-r1-0528-maas",
+    publisher: "deepseek-ai",
+    endpointType: "maas",
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 1.35,
+      output: 5.40,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // AI21 Labs models
+  {
+    id: "jamba-1.5-large",
+    name: "Jamba 1.5 Large",
+    apiId: "ai21/jamba-1.5-large",
+    publisher: "ai21",
+    endpointType: "maas",
+    contextWindow: 256000,
+    maxTokens: 256000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 2.00,
+      output: 8.00,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "jamba-1.5-mini",
+    name: "Jamba 1.5 Mini",
+    apiId: "ai21/jamba-1.5-mini",
+    publisher: "ai21",
+    endpointType: "maas",
+    contextWindow: 256000,
+    maxTokens: 256000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.20,
+      output: 0.40,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // OpenAI models (gpt-oss)
+  {
+    id: "gpt-oss-120b",
+    name: "GPT-OSS 120B",
+    apiId: "openai/gpt-oss-120b-maas",
+    publisher: "openai",
+    endpointType: "maas",
+    contextWindow: 131072,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.09,
+      output: 0.36,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "gpt-oss-20b",
+    name: "GPT-OSS 20B",
+    apiId: "openai/gpt-oss-20b-maas",
+    publisher: "openai",
+    endpointType: "maas",
+    contextWindow: 131072,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: false,
+    tools: true,
+    cost: {
+      input: 0.07,
+      output: 0.25,
+      cacheRead: 0.007,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // DeepSeek OCR
+  {
+    id: "deepseek-ocr",
+    name: "DeepSeek OCR",
+    apiId: "deepseek-ai/deepseek-ocr-maas",
+    publisher: "deepseek-ai",
+    endpointType: "maas",
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text", "image"],
+    reasoning: false,
+    tools: false,
+    cost: {
+      input: 0.30,  // Per page: $0.0003/page
+      output: 1.20,  // Per page pricing
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // Qwen models
+  {
+    id: "qwen3-235b",
+    name: "Qwen 3 235B",
+    apiId: "qwen/qwen3-235b-a22b-instruct-2507-maas",
+    publisher: "qwen",
+    endpointType: "maas",
+    contextWindow: 262144,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.22,
+      output: 0.88,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "qwen3-next-instruct",
+    name: "Qwen 3 Next Instruct",
+    apiId: "qwen/qwen3-next-instruct-80b-maas",
+    publisher: "qwen",
+    endpointType: "maas",
+    contextWindow: 262144,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.15,
+      output: 1.20,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "qwen3-next-thinking",
+    name: "Qwen 3 Next Thinking",
+    apiId: "qwen/qwen3-next-thinking-80b-maas",
+    publisher: "qwen",
+    endpointType: "maas",
+    contextWindow: 262144,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.15,
+      output: 1.20,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "qwen3-coder",
+    name: "Qwen 3 Coder",
+    apiId: "qwen/qwen3-coder-480b-a35b-instruct-maas",
+    publisher: "qwen",
+    endpointType: "maas",
+    contextWindow: 262144,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.22,
+      output: 1.80,
+      cacheRead: 0.022,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // Other models
+  {
+    id: "kimi-k2-thinking",
+    name: "Kimi K2 Thinking",
+    apiId: "moonshotai/kimi-k2-thinking-maas",
+    publisher: "moonshotai",
+    endpointType: "maas",
+    contextWindow: 262144,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.60,
+      output: 2.50,
+      cacheRead: 0.06,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "minimax-m2",
+    name: "MiniMax M2",
+    apiId: "minimaxai/minimax-m2-maas",
+    publisher: "minimaxai",
+    endpointType: "maas",
+    contextWindow: 196608,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.30,
+      output: 1.20,
+      cacheRead: 0.03,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "glm-5",
+    name: "GLM 5",
+    apiId: "zai-org/glm-5-maas",
+    publisher: "zai-org",
+    endpointType: "maas",
+    contextWindow: 200000,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 1.00,
+      output: 3.20,
+      cacheRead: 0.10,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  {
+    id: "glm-4.7",
+    name: "GLM 4.7",
+    apiId: "zai-org/glm-4.7-maas",
+    publisher: "zai-org",
+    endpointType: "maas",
+    contextWindow: 200000,
+    maxTokens: 32000,
+    input: ["text"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.60,
+      output: 2.20,
+      cacheRead: 0,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+];

package/package.json ADDED Viewed

@@ -0,0 +1,47 @@
+{
+  "name": "@ssweens/pi-vertex",
+  "version": "1.0.0",
+  "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
+  "type": "module",
+  "main": "index.ts",
+  "files": [
+    "index.ts",
+    "auth.ts",
+    "config.ts",
+    "types.ts",
+    "utils.ts",
+    "models/",
+    "streaming/",
+    "README.md",
+    "LICENSE",
+    "screenshot.png"
+  ],
+  "scripts": {
+    "clean": "echo 'nothing to clean'",
+    "build": "echo 'nothing to build'",
+    "check": "echo 'nothing to check'"
+  },
+  "dependencies": {
+    "@google/genai": "^1.42.0",
+    "google-auth-library": "^9.0.0"
+  },
+  "peerDependencies": {
+    "@mariozechner/pi-ai": "*",
+    "@mariozechner/pi-coding-agent": "*"
+  },
+  "pi": {
+    "extensions": [
+      "./index.ts"
+    ]
+  },
+  "keywords": [
+    "pi-package",
+    "pi-extension",
+    "vertex-ai",
+    "gemini",
+    "claude",
+    "maas"
+  ],
+  "author": "ssweens",
+  "license": "MIT"
+}

package/screenshot.png ADDED Viewed

Binary file

package/streaming/gemini.ts ADDED Viewed

@@ -0,0 +1,164 @@
+/**
+ * Gemini streaming handler using @google/genai SDK
+ */
+import { GoogleGenAI } from "@google/genai";
+import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
+import { getAuthConfig, resolveLocation } from "../auth.js";
+import { sanitizeText, convertToGeminiMessages, calculateCost } from "../utils.js";
+import { createAssistantMessageEventStream, type AssistantMessageEventStream, type AssistantMessage } from "@mariozechner/pi-ai";
+export function streamGemini(
+  model: VertexModelConfig,
+  context: Context,
+  options?: StreamOptions
+): AssistantMessageEventStream {
+  const stream = createAssistantMessageEventStream();
+  (async () => {
+    const output: AssistantMessage = {
+      role: "assistant",
+      content: [],
+      api: "google-generative-ai",
+      provider: "vertex",
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+    try {
+      // Priority: config file > env var > model region > default
+      const location = resolveLocation(model.region);
+      const auth = getAuthConfig(location);
+      // Create client
+      const client = new GoogleGenAI({
+        vertexai: true,
+        project: auth.projectId,
+        location: auth.location,
+      });
+      // Convert messages
+      const contents = convertToGeminiMessages(context.messages);
+      // Build config
+      const config: any = {
+        maxOutputTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
+        temperature: options?.temperature ?? 0.7,
+      };
+      // Add system prompt if present
+      if (context.systemPrompt) {
+        config.systemInstruction = sanitizeText(context.systemPrompt);
+      }
+      // Add tools if present
+      if (context.tools && context.tools.length > 0) {
+        config.tools = [
+          {
+            functionDeclarations: context.tools.map((tool) => ({
+              name: tool.name,
+              description: tool.description,
+              parameters: tool.parameters,
+            })),
+          },
+        ];
+      }
+      stream.push({ type: "start", partial: output });
+      // Start streaming
+      const response = await client.models.generateContentStream({
+        model: model.apiId,
+        contents,
+        config,
+      });
+      let textContent = "";
+      let textIndex = 0;
+      for await (const chunk of response) {
+        if (options?.signal?.aborted) {
+          throw new Error("Request was aborted");
+        }
+        // Update usage
+        if (chunk.usageMetadata) {
+          output.usage.input = chunk.usageMetadata.promptTokenCount || output.usage.input;
+          output.usage.output = chunk.usageMetadata.candidatesTokenCount || output.usage.output;
+          output.usage.totalTokens = chunk.usageMetadata.totalTokenCount ||
+            (output.usage.input + output.usage.output);
+          calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
+        }
+        // Handle text
+        const text = chunk.text;
+        if (text) {
+          if (!textContent) {
+            // First text chunk
+            output.content.push({ type: "text", text: "" });
+            textIndex = output.content.length - 1;
+            stream.push({ type: "text_start", contentIndex: textIndex, partial: output });
+          }
+          textContent += text;
+          (output.content[textIndex] as any).text = textContent;
+          stream.push({ type: "text_delta", contentIndex: textIndex, delta: text, partial: output });
+        }
+        // Handle function calls (tools)
+        if (chunk.functionCalls && chunk.functionCalls.length > 0) {
+          for (const call of chunk.functionCalls) {
+            output.content.push({
+              type: "toolCall",
+              id: call.id || `call_${Date.now()}`,
+              name: call.name,
+              arguments: call.args || {},
+            });
+            stream.push({
+              type: "toolcall_end",
+              contentIndex: output.content.length - 1,
+              toolCall: output.content[output.content.length - 1] as any,
+              partial: output,
+            });
+          }
+        }
+        // Handle finish reason
+        if (chunk.candidates && chunk.candidates[0]?.finishReason) {
+          const reason = chunk.candidates[0].finishReason;
+          if (reason === "STOP") {
+            output.stopReason = "stop";
+          } else if (reason === "MAX_TOKENS") {
+            output.stopReason = "length";
+          } else if (reason === "SAFETY") {
+            output.stopReason = "error";
+            output.errorMessage = "Content blocked by safety filters";
+          }
+        }
+      }
+      // End text if we had any
+      if (textContent) {
+        stream.push({ type: "text_end", contentIndex: textIndex, content: textContent, partial: output });
+      }
+      stream.push({ type: "done", reason: output.stopReason as any, message: output });
+      stream.end();
+    } catch (error) {
+      output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+      output.errorMessage = error instanceof Error ? error.message : String(error);
+      stream.push({ type: "error", reason: output.stopReason, error: output });
+      stream.end();
+    }
+  })();
+  return stream;
+}

package/streaming/index.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Streaming handler dispatcher
+ */
+import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
+import type { AssistantMessageEventStream } from "@mariozechner/pi-ai";
+import { streamGemini } from "./gemini.js";
+import { streamMaaS } from "./maas.js";
+export function streamVertex(
+  model: VertexModelConfig,
+  context: Context,
+  options?: StreamOptions
+): AssistantMessageEventStream {
+  switch (model.endpointType) {
+    case "gemini":
+      return streamGemini(model, context, options);
+    case "maas":
+      return streamMaaS(model, context, options);
+    default:
+      throw new Error(`Unknown endpoint type: ${(model as any).endpointType}`);
+  }
+}
+export { streamGemini, streamMaaS };