npm - @ssweens/pi-vertex - Versions diffs - 1.0.1 → 1.1.3 - Mend

@ssweens/pi-vertex 1.0.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/models/maas.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 /**
  * MaaS (Model-as-a-Service) open model definitions for Vertex AI
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
- * All prices per 1M tokens (as of Feb 2025)
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
+ * All prices per 1M tokens
  */
 import type { VertexModelConfig } from "../types.js";
 export const MAAS_MODELS: VertexModelConfig[] = [
-  // Llama models (Meta)
+  // --- Meta Llama ---
   {
     id: "llama-4-maverick",
     name: "Llama 4 Maverick",
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // Mistral models
+  // --- Mistral AI ---
   {
     id: "mistral-medium-3",
     name: "Mistral Medium 3",
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
   {
-    id: "mistral-ocr",
-    name: "Mistral OCR",
-    apiId: "mistralai/mistral-ocr-2505",
+    id: "codestral-2",
+    name: "Codestral 2",
+    apiId: "mistralai/codestral-2",
     publisher: "mistralai",
     endpointType: "maas",
-    contextWindow: 128000,
+    contextWindow: 256000,
     maxTokens: 32000,
-    input: ["text", "image"],
+    input: ["text"],
     reasoning: false,
-    tools: false,
+    tools: true,
     cost: {
-      input: 0.50,  // Per page: $0.0005/page, shown as approx per 1K pages
-      output: 0.50,  // Per page pricing
+      input: 0.30,
+      output: 0.90,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
   {
-    id: "codestral-2",
-    name: "Codestral 2",
-    apiId: "mistralai/codestral-2",
+    id: "mistral-ocr",
+    name: "Mistral OCR",
+    apiId: "mistralai/mistral-ocr-2505",
     publisher: "mistralai",
     endpointType: "maas",
-    contextWindow: 256000,
+    contextWindow: 128000,
     maxTokens: 32000,
-    input: ["text"],
+    input: ["text", "image"],
     reasoning: false,
-    tools: true,
+    tools: false,
     cost: {
-      input: 0.30,
-      output: 0.90,
+      input: 0.0005,
+      output: 0.0005,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
-  // DeepSeek models
+  // --- DeepSeek ---
   {
     id: "deepseek-v3.2",
     name: "DeepSeek V3.2",
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // AI21 Labs models
-  {
-    id: "jamba-1.5-large",
-    name: "Jamba 1.5 Large",
-    apiId: "ai21/jamba-1.5-large",
-    publisher: "ai21",
-    endpointType: "maas",
-    contextWindow: 256000,
-    maxTokens: 256000,
-    input: ["text"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 2.00,
-      output: 8.00,
-      cacheRead: 0,
-      cacheWrite: 0,
-    },
-    region: "global",
-  },
   {
-    id: "jamba-1.5-mini",
-    name: "Jamba 1.5 Mini",
-    apiId: "ai21/jamba-1.5-mini",
-    publisher: "ai21",
+    id: "deepseek-ocr",
+    name: "DeepSeek OCR",
+    apiId: "deepseek-ai/deepseek-ocr-maas",
+    publisher: "deepseek-ai",
     endpointType: "maas",
-    contextWindow: 256000,
-    maxTokens: 256000,
-    input: ["text"],
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text", "image"],
     reasoning: false,
-    tools: true,
+    tools: false,
     cost: {
-      input: 0.20,
-      output: 0.40,
+      input: 0.30,
+      output: 1.20,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
-  // OpenAI models (gpt-oss)
+  // --- OpenAI (gpt-oss) ---
   {
     id: "gpt-oss-120b",
     name: "GPT-OSS 120B",
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // DeepSeek OCR
-  {
-    id: "deepseek-ocr",
-    name: "DeepSeek OCR",
-    apiId: "deepseek-ai/deepseek-ocr-maas",
-    publisher: "deepseek-ai",
-    endpointType: "maas",
-    contextWindow: 163840,
-    maxTokens: 32000,
-    input: ["text", "image"],
-    reasoning: false,
-    tools: false,
-    cost: {
-      input: 0.30,  // Per page: $0.0003/page
-      output: 1.20,  // Per page pricing
-      cacheRead: 0,
-      cacheWrite: 0,
-    },
-    region: "global",
-  },
-  // Qwen models
+  // --- Qwen ---
   {
     id: "qwen3-235b",
     name: "Qwen 3 235B",
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // Other models
+  // --- Moonshot ---
   {
     id: "kimi-k2-thinking",
     name: "Kimi K2 Thinking",
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
+  // --- MiniMax ---
   {
     id: "minimax-m2",
     name: "MiniMax M2",
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
+  // --- GLM (Zhipu AI) ---
   {
     id: "glm-5",
     name: "GLM 5",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ssweens/pi-vertex",
-  "version": "1.0.1",
+  "version": "1.1.3",
   "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
   "type": "module",
   "main": "index.ts",
@@ -13,6 +13,8 @@
     "models/",
     "streaming/",
     "README.md",
+    "CHANGELOG.md",
+    "TEST_COVERAGE.md",
     "LICENSE",
     "screenshot.png"
   ],
@@ -22,6 +24,7 @@
     "check": "echo 'nothing to check'"
   },
   "dependencies": {
+    "@anthropic-ai/vertex-sdk": "^0.14.4",
     "@google/genai": "^1.42.0",
     "google-auth-library": "^9.0.0"
   },

package/streaming/gemini.ts CHANGED Viewed

@@ -1,20 +1,49 @@
 /**
  * Gemini streaming handler using @google/genai SDK
+ *
+ * Aligned with pi-mono's google-vertex.ts for consistent handling of:
+ * - Thinking content (thought blocks with signatures)
+ * - Tool calls with unique IDs and deduplication
+ * - Thinking configuration (levels for Gemini 3, budgets for Gemini 2.5)
+ * - Usage tracking including thinking tokens
  */
-import { GoogleGenAI } from "@google/genai";
-import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
+import { GoogleGenAI, FinishReason, ThinkingLevel } from "@google/genai";
+import type { VertexModelConfig, Context, StreamOptions, AssistantMessage } from "../types.js";
 import { getAuthConfig, resolveLocation } from "../auth.js";
-import { sanitizeText, convertToGeminiMessages, calculateCost } from "../utils.js";
-import { createAssistantMessageEventStream, type AssistantMessageEventStream, type AssistantMessage } from "@mariozechner/pi-ai";
+import { sanitizeText, convertToGeminiMessages, convertToolsForGemini, retainThoughtSignature, calculateCost } from "../utils.js";
+import { createAssistantMessageEventStream, type AssistantMessageEventStream } from "@mariozechner/pi-ai";
+// Module-level counter for generating unique tool call IDs (matches pi-mono pattern)
+let toolCallCounter = 0;
+const THINKING_LEVEL_MAP: Record<string, ThinkingLevel> = {
+  minimal: ThinkingLevel.MINIMAL,
+  low: ThinkingLevel.LOW,
+  medium: ThinkingLevel.MEDIUM,
+  high: ThinkingLevel.HIGH,
+};
+function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
+  switch (reason) {
+    case FinishReason.STOP:
+      return "stop";
+    case FinishReason.MAX_TOKENS:
+      return "length";
+    case FinishReason.SAFETY:
+    case FinishReason.RECITATION:
+    default:
+      return "error";
+  }
+}
 export function streamGemini(
   model: VertexModelConfig,
   context: Context,
-  options?: StreamOptions
+  options?: StreamOptions,
 ): AssistantMessageEventStream {
   const stream = createAssistantMessageEventStream();
   (async () => {
     const output: AssistantMessage = {
       role: "assistant",
@@ -33,123 +62,203 @@ export function streamGemini(
       stopReason: "stop",
       timestamp: Date.now(),
     };
     try {
       // Priority: config file > env var > model region > default
       const location = resolveLocation(model.region);
       const auth = getAuthConfig(location);
-      // Create client
+      // Create client with explicit API version (matches pi-mono)
       const client = new GoogleGenAI({
         vertexai: true,
         project: auth.projectId,
         location: auth.location,
+        apiVersion: "v1",
       });
-      // Convert messages
-      const contents = convertToGeminiMessages(context.messages);
-      // Build config
+      // Convert messages with model ID for proper thinking/tool handling
+      const contents = convertToGeminiMessages(context.messages, model.apiId);
+      // Build config — only set temperature when explicitly provided
       const config: any = {
         maxOutputTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
-        temperature: options?.temperature ?? 0.7,
+        ...(options?.temperature !== undefined && { temperature: options.temperature }),
       };
       // Add system prompt if present
       if (context.systemPrompt) {
         config.systemInstruction = sanitizeText(context.systemPrompt);
       }
-      // Add tools if present
+      // Add tools if present (using parametersJsonSchema for full JSON Schema support)
       if (context.tools && context.tools.length > 0) {
-        config.tools = [
-          {
-            functionDeclarations: context.tools.map((tool) => ({
-              name: tool.name,
-              description: tool.description,
-              parameters: tool.parameters,
-            })),
-          },
-        ];
+        config.tools = convertToolsForGemini(context.tools);
+      }
+      // Add thinking configuration (matches pi-mono's buildParams logic)
+      if (model.reasoning && options?.reasoning) {
+        const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
+        const isGemini3 = model.apiId.startsWith("gemini-3");
+        const thinkingConfig: any = { includeThoughts: true };
+        if (isGemini3) {
+          // Gemini 3 models use thinking levels (MINIMAL/LOW/MEDIUM/HIGH)
+          thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[effort];
+        } else {
+          // Gemini 2.5 models use thinking budgets (token counts)
+          const budgets: Record<string, number> = {
+            minimal: 128,
+            low: 2048,
+            medium: 8192,
+            high: model.apiId.includes("2.5-pro") ? 32768 : 24576,
+          };
+          thinkingConfig.thinkingBudget = budgets[effort] ?? 8192;
+        }
+        config.thinkingConfig = thinkingConfig;
+      }
+      // Pass abort signal to SDK for in-flight cancellation
+      if (options?.signal) {
+        if (options.signal.aborted) {
+          throw new Error("Request aborted");
+        }
+        config.abortSignal = options.signal;
       }
       stream.push({ type: "start", partial: output });
       // Start streaming
       const response = await client.models.generateContentStream({
         model: model.apiId,
         contents,
         config,
       });
-      let textContent = "";
-      let textIndex = 0;
+      // Track current content block for thinking/text transitions
+      let currentBlock: any = null;
+      let currentBlockType: "text" | "thinking" | null = null;
       for await (const chunk of response) {
-        if (options?.signal?.aborted) {
-          throw new Error("Request was aborted");
-        }
-        // Update usage
-        if (chunk.usageMetadata) {
-          output.usage.input = chunk.usageMetadata.promptTokenCount || output.usage.input;
-          output.usage.output = chunk.usageMetadata.candidatesTokenCount || output.usage.output;
-          output.usage.totalTokens = chunk.usageMetadata.totalTokenCount ||
-            (output.usage.input + output.usage.output);
-          calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
-        }
-        // Handle text
-        const text = chunk.text;
-        if (text) {
-          if (!textContent) {
-            // First text chunk
-            output.content.push({ type: "text", text: "" });
-            textIndex = output.content.length - 1;
-            stream.push({ type: "text_start", contentIndex: textIndex, partial: output });
-          }
-          textContent += text;
-          (output.content[textIndex] as any).text = textContent;
-          stream.push({ type: "text_delta", contentIndex: textIndex, delta: text, partial: output });
-        }
-        // Handle function calls (tools)
-        if (chunk.functionCalls && chunk.functionCalls.length > 0) {
-          for (const call of chunk.functionCalls) {
-            output.content.push({
-              type: "toolCall",
-              id: call.id || `call_${Date.now()}`,
-              name: call.name,
-              arguments: call.args || {},
-            });
-            stream.push({
-              type: "toolcall_end",
-              contentIndex: output.content.length - 1,
-              toolCall: output.content[output.content.length - 1] as any,
-              partial: output,
-            });
+        const candidate = chunk.candidates?.[0];
+        // Process individual parts (handles thinking vs text detection)
+        if (candidate?.content?.parts) {
+          for (const part of candidate.content.parts) {
+            if (part.text !== undefined) {
+              const isThinking = part.thought === true;
+              const targetType = isThinking ? "thinking" : "text";
+              // Check if we need to transition to a new block
+              if (currentBlockType !== targetType) {
+                // End previous block
+                if (currentBlock && currentBlockType) {
+                  if (currentBlockType === "text") {
+                    stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
+                  } else {
+                    stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
+                  }
+                }
+                // Start new block
+                if (isThinking) {
+                  currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
+                  output.content.push(currentBlock);
+                  stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
+                } else {
+                  currentBlock = { type: "text", text: "", textSignature: undefined };
+                  output.content.push(currentBlock);
+                  stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
+                }
+                currentBlockType = targetType;
+              }
+              // Accumulate content
+              if (currentBlockType === "thinking") {
+                currentBlock.thinking += part.text;
+                currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
+                stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
+              } else {
+                currentBlock.text += part.text;
+                currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
+                stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
+              }
+            }
+            if (part.functionCall) {
+              // End current text/thinking block before tool call
+              if (currentBlock && currentBlockType) {
+                if (currentBlockType === "text") {
+                  stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
+                } else {
+                  stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
+                }
+                currentBlock = null;
+                currentBlockType = null;
+              }
+              // Generate unique tool call ID with dedup (matches pi-mono pattern)
+              const providedId = part.functionCall.id;
+              const needsNewId =
+                !providedId || output.content.some((b: any) => b.type === "toolCall" && b.id === providedId);
+              const toolCallId = needsNewId
+                ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
+                : providedId;
+              const toolCall = {
+                type: "toolCall" as const,
+                id: toolCallId,
+                name: part.functionCall.name || "",
+                arguments: (part.functionCall.args as Record<string, any>) ?? {},
+                ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
+              };
+              output.content.push(toolCall);
+              const idx = output.content.length - 1;
+              stream.push({ type: "toolcall_start", contentIndex: idx, partial: output });
+              stream.push({ type: "toolcall_delta", contentIndex: idx, delta: JSON.stringify(toolCall.arguments), partial: output });
+              stream.push({ type: "toolcall_end", contentIndex: idx, toolCall, partial: output });
+            }
           }
         }
         // Handle finish reason
-        if (chunk.candidates && chunk.candidates[0]?.finishReason) {
-          const reason = chunk.candidates[0].finishReason;
-          if (reason === "STOP") {
-            output.stopReason = "stop";
-          } else if (reason === "MAX_TOKENS") {
-            output.stopReason = "length";
-          } else if (reason === "SAFETY") {
-            output.stopReason = "error";
+        if (candidate?.finishReason) {
+          output.stopReason = mapGeminiStopReason(candidate.finishReason);
+          if (candidate.finishReason === FinishReason.SAFETY) {
             output.errorMessage = "Content blocked by safety filters";
           }
+          // Override to toolUse if any tool calls are present (matches pi-mono)
+          if (output.content.some((b: any) => b.type === "toolCall")) {
+            output.stopReason = "toolUse";
+          }
+        }
+        // Update usage — include thoughtsTokenCount in output (matches pi-mono)
+        if (chunk.usageMetadata) {
+          const meta = chunk.usageMetadata as any;
+          output.usage = {
+            input: meta.promptTokenCount || 0,
+            output: (meta.candidatesTokenCount || 0) + (meta.thoughtsTokenCount || 0),
+            cacheRead: meta.cachedContentTokenCount || 0,
+            cacheWrite: 0,
+            totalTokens: meta.totalTokenCount || 0,
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+          };
+          calculateCost(model.cost.input, model.cost.output, model.cost.cacheRead, model.cost.cacheWrite, output.usage);
         }
       }
-      // End text if we had any
-      if (textContent) {
-        stream.push({ type: "text_end", contentIndex: textIndex, content: textContent, partial: output });
+      // End final block
+      if (currentBlock && currentBlockType) {
+        if (currentBlockType === "text") {
+          stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
+        } else {
+          stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
+        }
       }
       stream.push({ type: "done", reason: output.stopReason as any, message: output });
       stream.end();
     } catch (error) {
@@ -159,6 +268,6 @@ export function streamGemini(
       stream.end();
     }
   })();
   return stream;
 }