npm - @ssweens/pi-vertex - Versions diffs - 1.0.1 → 1.1.3 - Mend

@ssweens/pi-vertex 1.0.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,17 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+## [1.1.3] - 2026-03-26
+### Fixed
+- Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
+- Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
+### Updated
+- Claude 4.6 models use native Anthropic Vertex SDK streaming.
+- Claude 4.6 context window updated to 1M.
+- Model list order in the selector is now alphabetized by ID.
+## [1.1.2] - 2026-03-24
+### Changed
+- Initial Claude 4.x support on Vertex.

package/README.md CHANGED Viewed

@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
   - **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
 - **Unified streaming**: Single provider, multiple model families
-- **Full tool calling support**: All models marked with tools support
+- **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
+- **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
 - **Automatic auth**: Uses Google Application Default Credentials
 - **Region awareness**: Global endpoints where supported, regional where required
-- **Pricing tracking**: Built-in cost per token for all models
+- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
 ## Installation
@@ -115,31 +116,29 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
 |-------|---------|------------|-------|-----------|----------------|
-| gemini-3.1-pro | 1M | 64,000 | text, image | ✅ | $2.00/$12.00 |
-| gemini-3-pro | 2M | 8,192 | text, image | ✅ | $1.25/$10.00 |
-| gemini-3-flash | 1M | 8,192 | text, image | ✅ | $0.15/$0.60 |
-| gemini-2.5-pro | 1M | 64,000 | text, image | ✅ | $1.25/$10.00 |
-| gemini-2.5-flash | 1M | 64,000 | text, image | ✅ | $0.30/$2.50 |
-| gemini-2.5-flash-lite | 1M | 64,000 | text, image | ✅ | $0.10/$0.40 |
+| gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
+| gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
+| gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
+| gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
+| gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
+| gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
+| gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
 | gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
-| gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
+| gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
 ### Claude Models
 | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
 |-------|---------|------------|-------|-----------|----------------|--------|
-| claude-opus-4-6 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
-| claude-sonnet-4-6 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
+| claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
+| claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
 | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
 | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
 | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
-| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
-| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
-| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
-| claude-3-7-sonnet | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
-| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
-| claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
-| claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
+| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
+| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
+| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
+| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | global |
 ### Llama Models
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | Model | Context | Publisher | Price (in/out) | Region |
 |-------|---------|-----------|----------------|--------|
-| jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
-| jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
 | mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
 | mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
 | mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
 Default regions by model:
 - Gemini: `global`
-- Claude 4.6/4.5: `global`
-- Claude 4/4.1/3.7/3.5/3: `us-east5`
+- Claude (all): `global`
 - MaaS: `global`
 Override with:
@@ -217,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
 ## Dependencies
 - `@google/genai`: Google GenAI SDK for Gemini models
+- `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
 - `google-auth-library`: ADC authentication for all models
 - `@mariozechner/pi-ai`: Peer dependency
 - `@mariozechner/pi-coding-agent`: Peer dependency

package/TEST_COVERAGE.md ADDED Viewed

@@ -0,0 +1,13 @@
+# Test Coverage
+## Current Status
+- Automated tests: not yet implemented in this package.
+- Lint/type checks: `npm run check` (currently a no-op placeholder).
+## Manual Verification
+- Claude 4.6 streaming verified via Anthropic Vertex SDK.
+- Mid-session model switching (tool call replay) verified interactively in pi.
+## Gaps / Next Steps
+- Add automated integration tests for Anthropic Vertex streaming and tool replay.
+- Add unit tests for message normalization and replay sequencing.

package/index.ts CHANGED Viewed

@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
   // Show startup info as a widget that clears on first user input
   const vertexStartupLines = [
-    `[pi-vertex] Initializing with project: ${projectId}`,
-    `[pi-vertex] Registered ${ALL_MODELS.length} models`,
+    `   [pi-vertex] Initializing with project: ${projectId}`,
+    `   [pi-vertex] Registered ${ALL_MODELS.length} models`,
   ];
   pi.on("session_start", async (_event, ctx) => {
     ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({

package/models/claude.ts CHANGED Viewed

@@ -1,21 +1,22 @@
 /**
  * Claude model definitions for Vertex AI
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
  * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
- * All prices per 1M tokens (as of Feb 2025)
- * Cache write prices shown are for 5-minute TTL
+ * All prices per 1M tokens (global endpoint, <= 200K input tokens)
+ * Cache write prices are for 5-minute TTL
  */
 import type { VertexModelConfig } from "../types.js";
 export const CLAUDE_MODELS: VertexModelConfig[] = [
-  // Claude 4.6 series - latest, supports global endpoint
+  // Claude 4.6 series
   {
     id: "claude-opus-4-6",
     name: "Claude Opus 4.6",
     apiId: "claude-opus-4-6",
     publisher: "anthropic",
     endpointType: "maas",
-    contextWindow: 200000,
+    contextWindow: 1000000,
     maxTokens: 32000,
     input: ["text", "image"],
     reasoning: true,
@@ -34,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     apiId: "claude-sonnet-4-6",
     publisher: "anthropic",
     endpointType: "maas",
-    contextWindow: 200000,
+    contextWindow: 1000000,
     maxTokens: 64000,
     input: ["text", "image"],
     reasoning: true,
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // Claude 4.5 series - supports global endpoint
+  // Claude 4.5 series
   {
     id: "claude-opus-4-5",
     name: "Claude Opus 4.5",
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     },
     region: "global",
   },
-  // Claude 4.1 series - regional pricing
+  // Claude 4.1 series
   {
     id: "claude-opus-4-1",
     name: "Claude Opus 4.1",
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 1.50,
       cacheWrite: 18.75,
     },
-    region: "us-east5",
+    region: "global",
   },
-  // Claude 4.0 series - regional pricing
+  // Claude 4.0 series
   {
     id: "claude-opus-4",
     name: "Claude Opus 4",
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 1.50,
       cacheWrite: 18.75,
     },
-    region: "us-east5",
+    region: "global",
   },
   {
     id: "claude-sonnet-4",
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
       cacheRead: 0.30,
       cacheWrite: 3.75,
     },
-    region: "us-east5",
-  },
-  // Claude 3.7 series - regional pricing
-  {
-    id: "claude-3-7-sonnet",
-    name: "Claude 3.7 Sonnet",
-    apiId: "claude-3-7-sonnet@20250219",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 64000,
-    input: ["text", "image"],
-    reasoning: true,
-    tools: true,
-    cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
-      cacheWrite: 3.75,
-    },
-    region: "us-east5",
+    region: "global",
   },
-  // Claude 3.5 series - regional pricing
+  // Claude 3.5 series
   {
     id: "claude-3-5-sonnet-v2",
     name: "Claude 3.5 Sonnet v2",
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     reasoning: false,
     tools: true,
     cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
-      cacheWrite: 3.75,
-    },
-    region: "us-east5",
-  },
-  {
-    id: "claude-3-5-sonnet",
-    name: "Claude 3.5 Sonnet",
-    apiId: "claude-3-5-sonnet@20240620",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 8192,
-    input: ["text", "image"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 3.0,
-      output: 15.0,
-      cacheRead: 0.3,
+      input: 3.00,
+      output: 15.00,
+      cacheRead: 0.30,
       cacheWrite: 3.75,
     },
-    region: "us-east5",
-  },
-  // Claude 3 Haiku - regional pricing
-  {
-    id: "claude-3-haiku",
-    name: "Claude 3 Haiku",
-    apiId: "claude-3-haiku@20240307",
-    publisher: "anthropic",
-    endpointType: "maas",
-    contextWindow: 200000,
-    maxTokens: 4096,
-    input: ["text"],
-    reasoning: false,
-    tools: true,
-    cost: {
-      input: 0.25,
-      output: 1.25,
-      cacheRead: 0.03,
-      cacheWrite: 0.3,
-    },
-    region: "us-east5",
+    region: "global",
   },
 ];

package/models/gemini.ts CHANGED Viewed

@@ -1,77 +1,83 @@
 /**
  * Gemini model definitions for Vertex AI
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
- * All prices per 1M tokens (Standard tier pricing, as of Feb 2026)
+ * Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
+ * All prices per 1M tokens (standard tier, <= 200K input tokens)
  */
 import type { VertexModelConfig } from "../types.js";
 export const GEMINI_MODELS: VertexModelConfig[] = [
+  // --- Gemini 3.1 (Preview) ---
   {
     id: "gemini-3.1-pro",
     name: "Gemini 3.1 Pro",
     apiId: "gemini-3.1-pro-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 2.00,
       output: 12.00,
-      cacheRead: 0,
+      cacheRead: 0.20,
       cacheWrite: 0,
     },
     region: "global",
   },
   {
-    id: "gemini-3-pro",
-    name: "Gemini 3 Pro",
-    apiId: "gemini-3-pro-preview",
+    id: "gemini-3.1-flash-lite",
+    name: "Gemini 3.1 Flash Lite",
+    apiId: "gemini-3.1-flash-lite-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 2000000,
-    maxTokens: 8192,
+    contextWindow: 1048576,
+    maxTokens: 65535,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
-      input: 1.25,
-      output: 10.00,
-      cacheRead: 0.125,
+      input: 0.25,
+      output: 1.50,
+      cacheRead: 0.025,
       cacheWrite: 0,
     },
     region: "global",
   },
+  // --- Gemini 3 (Preview) ---
   {
     id: "gemini-3-flash",
     name: "Gemini 3 Flash",
     apiId: "gemini-3-flash-preview",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 8192,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
-      input: 0.15,
-      output: 0.60,
-      cacheRead: 0.0375,
+      input: 0.50,
+      output: 3.00,
+      cacheRead: 0.05,
       cacheWrite: 0,
     },
     region: "global",
   },
+  // --- Gemini 2.5 (GA) ---
   {
     id: "gemini-2.5-pro",
     name: "Gemini 2.5 Pro",
     apiId: "gemini-2.5-pro",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
@@ -89,15 +95,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.5-flash",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 0.30,
       output: 2.50,
-      cacheRead: 0.030,
+      cacheRead: 0.03,
       cacheWrite: 0,
     },
     region: "global",
@@ -108,26 +114,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.5-flash-lite",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
-    maxTokens: 64000,
+    contextWindow: 1048576,
+    maxTokens: 65536,
     input: ["text", "image"],
     reasoning: true,
     tools: true,
     cost: {
       input: 0.10,
       output: 0.40,
-      cacheRead: 0.010,
+      cacheRead: 0.01,
       cacheWrite: 0,
     },
     region: "global",
   },
+  // --- Gemini 2.0 (GA) ---
   {
     id: "gemini-2.0-flash",
     name: "Gemini 2.0 Flash",
     apiId: "gemini-2.0-flash",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
+    contextWindow: 1048576,
     maxTokens: 8192,
     input: ["text", "image"],
     reasoning: false,
@@ -135,7 +143,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     cost: {
       input: 0.15,
       output: 0.60,
-      cacheRead: 0.025,
+      cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",
@@ -146,15 +154,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
     apiId: "gemini-2.0-flash-lite",
     publisher: "google",
     endpointType: "gemini",
-    contextWindow: 1000000,
+    contextWindow: 1048576,
     maxTokens: 8192,
-    input: ["text"],
+    input: ["text", "image"],
     reasoning: false,
     tools: true,
     cost: {
       input: 0.075,
       output: 0.30,
-      cacheRead: 0.01875,
+      cacheRead: 0,
       cacheWrite: 0,
     },
     region: "global",

package/models/index.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
   ...GEMINI_MODELS,
   ...CLAUDE_MODELS,
   ...MAAS_MODELS,
-];
+].sort((a, b) => a.id.localeCompare(b.id));
 export function getModelById(id: string): VertexModelConfig | undefined {
   return ALL_MODELS.find((m) => m.id === id);