npm - @ssweens/pi-vertex - Versions diffs - 1.0.0 → 1.1.1 - Mend

@ssweens/pi-vertex 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md CHANGED Viewed

@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
   - **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
 - **Unified streaming**: Single provider, multiple model families
-- **Full tool calling support**: All models marked with tools support
+- **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
+- **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
 - **Automatic auth**: Uses Google Application Default Credentials
 - **Region awareness**: Global endpoints where supported, regional where required
-- **Pricing tracking**: Built-in cost per token for all models
+- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
 ## Installation
@@ -115,14 +116,15 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
 |-------|---------|------------|-------|-----------|----------------|
-| gemini-3.1-pro | 1M | 64,000 | text, image | ✅ | $2.00/$12.00 |
-| gemini-3-pro | 2M | 8,192 | text, image | ✅ | $1.25/$10.00 |
-| gemini-3-flash | 1M | 8,192 | text, image | ✅ | $0.15/$0.60 |
-| gemini-2.5-pro | 1M | 64,000 | text, image | ✅ | $1.25/$10.00 |
-| gemini-2.5-flash | 1M | 64,000 | text, image | ✅ | $0.30/$2.50 |
-| gemini-2.5-flash-lite | 1M | 64,000 | text, image | ✅ | $0.10/$0.40 |
+| gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
+| gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
+| gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
+| gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
+| gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
+| gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
+| gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
 | gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
-| gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
+| gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
 ### Claude Models
@@ -133,13 +135,10 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
 | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
 | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
-| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
-| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
-| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
-| claude-3-7-sonnet | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
-| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
-| claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
-| claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
+| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
+| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
+| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
+| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | global |
 ### Llama Models
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | Model | Context | Publisher | Price (in/out) | Region |
 |-------|---------|-----------|----------------|--------|
-| jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
-| jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
 | mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
 | mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
 | mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
 Default regions by model:
 - Gemini: `global`
-- Claude 4.6/4.5: `global`
-- Claude 4/4.1/3.7/3.5/3: `us-east5`
+- Claude (all): `global`
 - MaaS: `global`
 Override with:

package/models/claude.ts CHANGED Viewed

@@ -1,14 +1,15 @@
 /**
  * Claude model definitions for Vertex AI
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
  * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
- * All prices per 1M tokens (as of Feb 2025)
- * Cache write prices shown are for 5-minute TTL
+ * All prices per 1M tokens (global endpoint, <= 200K input tokens)
+ * Cache write prices are for 5-minute TTL
  */
 import type { VertexModelConfig } from "../types.js";
 export const CLAUDE_MODELS: VertexModelConfig[] = [
-  // Claude 4.6 series - latest, supports global endpoint
+  // Claude 4.6 series
   {
     id: "claude-opus-4-6",
     name: "Claude Opus 4.6",
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // Claude 4.5 series - supports global endpoint
+  // Claude 4.5 series
   {
     id: "claude-opus-4-5",
     name: "Claude Opus 4.5",
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // Claude 4.1 series - regional pricing
+  // Claude 4.1 series
   {
     id: "claude-opus-4-1",
     name: "Claude Opus 4.1",
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 1.50,
       cacheWrite: 18.75,
     },
-    region: "us-east5",
+    region: "global",
   },
-  // Claude 4.0 series - regional pricing
+  // Claude 4.0 series
   {
     id: "claude-opus-4",
     name: "Claude Opus 4",
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 1.50,
       cacheWrite: 18.75,
     },
-    region: "us-east5",
+    region: "global",
   },
   {
     id: "claude-sonnet-4",
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 0.30,
       cacheWrite: 3.75,
     },
-    region: "us-east5",
-  },
-  // Claude 3.7 series - regional pricing
-  {
-    id: "claude-3-7-sonnet",
-    name: "Claude 3.7 Sonnet",
-    apiId: "claude-3-7-sonnet@20250219",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 64000,
-    input: ["text", "image"],
-    reasoning: true,
-    tools: true,
-    cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
-      cacheWrite: 3.75,
-    },
-    region: "us-east5",
+    region: "global",
   },
-  // Claude 3.5 series - regional pricing
+  // Claude 3.5 series
   {
     id: "claude-3-5-sonnet-v2",
     name: "Claude 3.5 Sonnet v2",
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     reasoning: false,
     tools: true,
     cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
-      cacheWrite: 3.75,
-    },
-    region: "us-east5",
-  },
-  {
-    id: "claude-3-5-sonnet",
-    name: "Claude 3.5 Sonnet",
-    apiId: "claude-3-5-sonnet@20240620",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 8192,
-    input: ["text", "image"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
+      input: 3.00,
+      output: 15.00,
+      cacheRead: 0.30,
       cacheWrite: 3.75,
     },
-    region: "us-east5",
-  },
-  // Claude 3 Haiku - regional pricing
-  {
-    id: "claude-3-haiku",
-    name: "Claude 3 Haiku",
-    apiId: "claude-3-haiku@20240307",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 4096,
-    input: ["text"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 0.25,
-      output: 1.25,
-      cacheRead: 0.03,
-      cacheWrite: 0.3,
-    },
-    region: "us-east5",
+    region: "global",
   },
 ];

package/models/gemini.ts CHANGED Viewed

@@ -1,46 +1,69 @@
 /**
  * Gemini model definitions for Vertex AI
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
- * All prices per 1M tokens (Standard tier pricing, as of Feb 2026)
+ * Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
+ * All prices per 1M tokens (standard tier, <= 200K input tokens)
  */
 import type { VertexModelConfig } from "../types.js";
 export const GEMINI_MODELS: VertexModelConfig[] = [
+  // --- Gemini 3.1 (Preview) ---
   {
     id: "gemini-3.1-pro",
     name: "Gemini 3.1 Pro",
     apiId: "gemini-3.1-pro-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 2.00,
       output: 12.00,
-      cacheRead: 0,
+      cacheRead: 0.20,
       cacheWrite: 0,
     },
     region: "global",
   },
+  {
+    id: "gemini-3.1-flash-lite",
+    name: "Gemini 3.1 Flash Lite",
+    apiId: "gemini-3.1-flash-lite-preview",
+    publisher: "google",
+    endpointType: "gemini",
+    contextWindow: 1048576,
+    maxTokens: 65535,
+    input: ["text", "image"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 0.25,
+      output: 1.50,
+      cacheRead: 0.025,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
+  // --- Gemini 3 (Preview) ---
   {
     id: "gemini-3-pro",
     name: "Gemini 3 Pro",
     apiId: "gemini-3-pro-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 2000000,
-    maxTokens: 8192,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
-      input: 1.25,
-      output: 10.00,
-      cacheRead: 0.125,
+      input: 2.00,
+      output: 12.00,
+      cacheRead: 0.20,
       cacheWrite: 0,
     },
     region: "global",
@@ -51,27 +74,29 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-3-flash-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 8192,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
-      input: 0.15,
-      output: 0.60,
-      cacheRead: 0.0375,
+      input: 0.50,
+      output: 3.00,
+      cacheRead: 0.05,
       cacheWrite: 0,
     },
     region: "global",
   },
+  // --- Gemini 2.5 (GA) ---
   {
     id: "gemini-2.5-pro",
     name: "Gemini 2.5 Pro",
     apiId: "gemini-2.5-pro",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
@@ -89,15 +114,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.5-flash",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 0.30,
       output: 2.50,
-      cacheRead: 0.030,
+      cacheRead: 0.03,
       cacheWrite: 0,
     },
     region: "global",
@@ -108,26 +133,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.5-flash-lite",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 0.10,
       output: 0.40,
-      cacheRead: 0.010,
+      cacheRead: 0.01,
       cacheWrite: 0,
     },
     region: "global",
   },
+  // --- Gemini 2.0 (GA) ---
   {
     id: "gemini-2.0-flash",
     name: "Gemini 2.0 Flash",
     apiId: "gemini-2.0-flash",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
+    contextWindow: 1048576,
     maxTokens: 8192,
     input: ["text", "image"],
     reasoning: false,
@@ -135,7 +162,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     cost: {
       input: 0.15,
       output: 0.60,
-      cacheRead: 0.025,
+      cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
@@ -146,15 +173,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.0-flash-lite",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
+    contextWindow: 1048576,
     maxTokens: 8192,
-    input: ["text"],
+    input: ["text", "image"],
     reasoning: false,
     tools: true,
     cost: {
       input: 0.075,
       output: 0.30,
-      cacheRead: 0.01875,
+      cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",

package/models/index.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
   ...GEMINI_MODELS,
   ...CLAUDE_MODELS,
   ...MAAS_MODELS,
-];
+].sort((a, b) => a.name.localeCompare(b.name));
 export function getModelById(id: string): VertexModelConfig | undefined {
   return ALL_MODELS.find((m) => m.id === id);

package/models/maas.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 /**
  * MaaS (Model-as-a-Service) open model definitions for Vertex AI
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
- * All prices per 1M tokens (as of Feb 2025)
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
+ * All prices per 1M tokens
  */
 import type { VertexModelConfig } from "../types.js";
 export const MAAS_MODELS: VertexModelConfig[] = [
-  // Llama models (Meta)
+  // --- Meta Llama ---
   {
     id: "llama-4-maverick",
     name: "Llama 4 Maverick",
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // Mistral models
+  // --- Mistral AI ---
   {
     id: "mistral-medium-3",
     name: "Mistral Medium 3",
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
   {
-    id: "mistral-ocr",
-    name: "Mistral OCR",
-    apiId: "mistralai/mistral-ocr-2505",
+    id: "codestral-2",
+    name: "Codestral 2",
+    apiId: "mistralai/codestral-2",
     publisher: "mistralai",
     endpointType: "maas",
-    contextWindow: 128000,
+    contextWindow: 256000,
     maxTokens: 32000,
-    input: ["text", "image"],
+    input: ["text"],
     reasoning: false,
-    tools: false,
+    tools: true,
     cost: {
-      input: 0.50,  // Per page: $0.0005/page, shown as approx per 1K pages
-      output: 0.50,  // Per page pricing
+      input: 0.30,
+      output: 0.90,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
   {
-    id: "codestral-2",
-    name: "Codestral 2",
-    apiId: "mistralai/codestral-2",
+    id: "mistral-ocr",
+    name: "Mistral OCR",
+    apiId: "mistralai/mistral-ocr-2505",
     publisher: "mistralai",
     endpointType: "maas",
-    contextWindow: 256000,
+    contextWindow: 128000,
     maxTokens: 32000,
-    input: ["text"],
+    input: ["text", "image"],
     reasoning: false,
-    tools: true,
+    tools: false,
     cost: {
-      input: 0.30,
-      output: 0.90,
+      input: 0.0005,
+      output: 0.0005,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
-  // DeepSeek models
+  // --- DeepSeek ---
   {
     id: "deepseek-v3.2",
     name: "DeepSeek V3.2",
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // AI21 Labs models
-  {
-    id: "jamba-1.5-large",
-    name: "Jamba 1.5 Large",
-    apiId: "ai21/jamba-1.5-large",
-    publisher: "ai21",
-    endpointType: "maas",
-    contextWindow: 256000,
-    maxTokens: 256000,
-    input: ["text"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 2.00,
-      output: 8.00,
-      cacheRead: 0,
-      cacheWrite: 0,
-    },
-    region: "global",
-  },
   {
-    id: "jamba-1.5-mini",
-    name: "Jamba 1.5 Mini",
-    apiId: "ai21/jamba-1.5-mini",
-    publisher: "ai21",
+    id: "deepseek-ocr",
+    name: "DeepSeek OCR",
+    apiId: "deepseek-ai/deepseek-ocr-maas",
+    publisher: "deepseek-ai",
     endpointType: "maas",
-    contextWindow: 256000,
-    maxTokens: 256000,
-    input: ["text"],
+    contextWindow: 163840,
+    maxTokens: 32000,
+    input: ["text", "image"],
     reasoning: false,
-    tools: true,
+    tools: false,
     cost: {
-      input: 0.20,
-      output: 0.40,
+      input: 0.30,
+      output: 1.20,
       cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
   },
-  // OpenAI models (gpt-oss)
+  // --- OpenAI (gpt-oss) ---
   {
     id: "gpt-oss-120b",
     name: "GPT-OSS 120B",
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // DeepSeek OCR
-  {
-    id: "deepseek-ocr",
-    name: "DeepSeek OCR",
-    apiId: "deepseek-ai/deepseek-ocr-maas",
-    publisher: "deepseek-ai",
-    endpointType: "maas",
-    contextWindow: 163840,
-    maxTokens: 32000,
-    input: ["text", "image"],
-    reasoning: false,
-    tools: false,
-    cost: {
-      input: 0.30,  // Per page: $0.0003/page
-      output: 1.20,  // Per page pricing
-      cacheRead: 0,
-      cacheWrite: 0,
-    },
-    region: "global",
-  },
-  // Qwen models
+  // --- Qwen ---
   {
     id: "qwen3-235b",
     name: "Qwen 3 235B",
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     region: "global",
   },
-  // Other models
+  // --- Moonshot ---
   {
     id: "kimi-k2-thinking",
     name: "Kimi K2 Thinking",
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
+  // --- MiniMax ---
   {
     id: "minimax-m2",
     name: "MiniMax M2",
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
+  // --- GLM (Zhipu AI) ---
   {
     id: "glm-5",
     name: "GLM 5",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ssweens/pi-vertex",
-  "version": "1.0.0",
+  "version": "1.1.1",
   "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
   "type": "module",
   "main": "index.ts",