npm - @ssweens/pi-vertex - Versions diffs - 1.1.1 → 1.1.3 - Mend

@ssweens/pi-vertex 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,17 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+## [1.1.3] - 2026-03-26
+### Fixed
+- Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
+- Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
+### Updated
+- Claude 4.6 models use native Anthropic Vertex SDK streaming.
+- Claude 4.6 context window updated to 1M.
+- Model list order in the selector is now alphabetized by ID.
+## [1.1.2] - 2026-03-24
+### Changed
+- Initial Claude 4.x support on Vertex.

package/README.md CHANGED Viewed

@@ -130,8 +130,8 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
 | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
 |-------|---------|------------|-------|-----------|----------------|--------|
-| claude-opus-4-6 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
-| claude-sonnet-4-6 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
+| claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
+| claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
 | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
 | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
 | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
@@ -213,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
 ## Dependencies
 - `@google/genai`: Google GenAI SDK for Gemini models
+- `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
 - `google-auth-library`: ADC authentication for all models
 - `@mariozechner/pi-ai`: Peer dependency
 - `@mariozechner/pi-coding-agent`: Peer dependency

package/TEST_COVERAGE.md ADDED Viewed

@@ -0,0 +1,13 @@
+# Test Coverage
+## Current Status
+- Automated tests: not yet implemented in this package.
+- Lint/type checks: `npm run check` (currently a no-op placeholder).
+## Manual Verification
+- Claude 4.6 streaming verified via Anthropic Vertex SDK.
+- Mid-session model switching (tool call replay) verified interactively in pi.
+## Gaps / Next Steps
+- Add automated integration tests for Anthropic Vertex streaming and tool replay.
+- Add unit tests for message normalization and replay sequencing.

package/index.ts CHANGED Viewed

@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
   // Show startup info as a widget that clears on first user input
   const vertexStartupLines = [
-    `[pi-vertex] Initializing with project: ${projectId}`,
-    `[pi-vertex] Registered ${ALL_MODELS.length} models`,
+    `   [pi-vertex] Initializing with project: ${projectId}`,
+    `   [pi-vertex] Registered ${ALL_MODELS.length} models`,
   ];
   pi.on("session_start", async (_event, ctx) => {
     ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({

package/models/claude.ts CHANGED Viewed

@@ -16,7 +16,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     apiId: "claude-opus-4-6",
     publisher: "anthropic",
     endpointType: "maas",
-    contextWindow: 200000,
+    contextWindow: 1000000,
     maxTokens: 32000,
     input: ["text", "image"],
     reasoning: true,
@@ -35,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
     apiId: "claude-sonnet-4-6",
     publisher: "anthropic",
     endpointType: "maas",
-    contextWindow: 200000,
+    contextWindow: 1000000,
     maxTokens: 64000,
     input: ["text", "image"],
     reasoning: true,

package/models/gemini.ts CHANGED Viewed

@@ -49,25 +49,6 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
   },
   // --- Gemini 3 (Preview) ---
-  {
-    id: "gemini-3-pro",
-    name: "Gemini 3 Pro",
-    apiId: "gemini-3-pro-preview",
-    publisher: "google",
-    endpointType: "gemini",
-    contextWindow: 1048576,
-    maxTokens: 65536,
-    input: ["text", "image"],
-    reasoning: true,
-    tools: true,
-    cost: {
-      input: 2.00,
-      output: 12.00,
-      cacheRead: 0.20,
-      cacheWrite: 0,
-    },
-    region: "global",
-  },
   {
     id: "gemini-3-flash",
     name: "Gemini 3 Flash",

package/models/index.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
   ...GEMINI_MODELS,
   ...CLAUDE_MODELS,
   ...MAAS_MODELS,
-].sort((a, b) => a.name.localeCompare(b.name));
+].sort((a, b) => a.id.localeCompare(b.id));
 export function getModelById(id: string): VertexModelConfig | undefined {
   return ALL_MODELS.find((m) => m.id === id);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ssweens/pi-vertex",
-  "version": "1.1.1",
+  "version": "1.1.3",
   "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
   "type": "module",
   "main": "index.ts",
@@ -13,6 +13,8 @@
     "models/",
     "streaming/",
     "README.md",
+    "CHANGELOG.md",
+    "TEST_COVERAGE.md",
     "LICENSE",
     "screenshot.png"
   ],
@@ -22,6 +24,7 @@
     "check": "echo 'nothing to check'"
   },
   "dependencies": {
+    "@anthropic-ai/vertex-sdk": "^0.14.4",
     "@google/genai": "^1.42.0",
     "google-auth-library": "^9.0.0"
   },

package/streaming/maas.ts CHANGED Viewed

@@ -1,15 +1,327 @@
 /**
- * MaaS streaming handler for Claude and all other models
- * Uses OpenAI-compatible Chat Completions endpoint
+ * MaaS streaming handler for Claude and all other models.
  *
- * Delegates to pi-ai's built-in OpenAI streaming implementation.
- * Uses model.apiId directly in the request (no global fetch interceptor)
- * and patches the model ID back to the friendly name in response events.
+ * - Anthropic models: native AnthropicVertex SDK streaming
+ * - Other MaaS models: Vertex OpenAI-compatible Chat Completions endpoint
  */
 import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
 import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
-import { createAssistantMessageEventStream, type AssistantMessageEventStream, type Model, streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
+import {
+  createAssistantMessageEventStream,
+  type AssistantMessageEventStream,
+  type Model,
+  streamSimpleOpenAICompletions,
+  calculateCost,
+} from "@mariozechner/pi-ai";
+import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
+function mapAnthropicEffort(reasoning?: string): "low" | "medium" | "high" | "max" | undefined {
+  if (!reasoning) return undefined;
+  if (reasoning === "minimal" || reasoning === "low") return "low";
+  if (reasoning === "medium") return "medium";
+  if (reasoning === "xhigh") return "max";
+  return "high";
+}
+/**
+ * Sanitize an ID to match Anthropic's pattern: ^[a-zA-Z0-9_-]+$
+ * Replaces invalid characters with underscores.
+ */
+function sanitizeToolId(id: string): string {
+  // Replace any character that's not alphanumeric, underscore, or hyphen.
+  const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+  // Deterministic fallback for empty/invalid IDs.
+  return sanitized || "tool_id";
+}
+function isValidThinkingSignature(signature?: string): boolean {
+  if (!signature) return false;
+  // Anthropic signatures are base64-like encrypted payloads.
+  return /^[A-Za-z0-9+/]+={0,2}$/.test(signature) && signature.length % 4 === 0;
+}
+/**
+ * Stream a Claude model via the native AnthropicVertex SDK.
+ */
+async function streamAnthropic(
+  model: VertexModelConfig,
+  context: Context,
+  options: StreamOptions | undefined,
+  stream: ReturnType<typeof createAssistantMessageEventStream>,
+): Promise<void> {
+  const location = resolveLocation(model.region);
+  const auth = getAuthConfig(location);
+  const client = new AnthropicVertex({
+    projectId: auth.projectId,
+    region: auth.location,
+  });
+  // Build messages with Anthropic-compatible tool-use/tool-result sequencing.
+  const sourceMessages = (context.messages as any[]) ?? [];
+  // Pass 1: normalize tool call IDs and propagate mapping to tool results.
+  const normalized: any[] = [];
+  const toolIdMap = new Map<string, string>();
+  for (const msg of sourceMessages) {
+    if (msg.role === "assistant" && Array.isArray(msg.content)) {
+      const content = msg.content.map((block: any) => {
+        if (block?.type !== "toolCall") return block;
+        const normalizedId = sanitizeToolId(String(block.id ?? ""));
+        if (block.id && normalizedId !== block.id) toolIdMap.set(block.id, normalizedId);
+        return { ...block, id: normalizedId };
+      });
+      normalized.push({ ...msg, content });
+    } else if (msg.role === "toolResult") {
+      const mapped = toolIdMap.get(msg.toolCallId);
+      normalized.push({ ...msg, toolCallId: sanitizeToolId(String(mapped ?? msg.toolCallId ?? "")) });
+    } else {
+      normalized.push(msg);
+    }
+  }
+  // Pass 2: enforce Anthropic adjacency rule:
+  // assistant(tool_use...) MUST be immediately followed by user(tool_result...)
+  const replayable: any[] = [];
+  for (let i = 0; i < normalized.length; i++) {
+    const msg = normalized[i];
+    if (msg.role === "assistant") {
+      if (msg.stopReason === "error" || msg.stopReason === "aborted") continue;
+      const toolCalls = Array.isArray(msg.content)
+        ? msg.content.filter((b: any) => b?.type === "toolCall" && b?.id && b?.name)
+        : [];
+      replayable.push(msg);
+      if (toolCalls.length > 0) {
+        const collectedToolResults: any[] = [];
+        let j = i + 1;
+        while (j < normalized.length && normalized[j]?.role === "toolResult") {
+          collectedToolResults.push(normalized[j]);
+          j++;
+        }
+        const existingIds = new Set(collectedToolResults.map((tr: any) => tr.toolCallId));
+        for (const tc of toolCalls) {
+          if (!existingIds.has(tc.id)) {
+            collectedToolResults.push({
+              role: "toolResult",
+              toolCallId: tc.id,
+              toolName: tc.name,
+              content: [{ type: "text", text: "No result provided" }],
+              isError: true,
+              timestamp: Date.now(),
+            });
+          }
+        }
+        replayable.push(...collectedToolResults);
+        i = j - 1;
+      }
+      continue;
+    }
+    // Drop orphan tool results (invalid for Anthropic if not immediately after tool_use assistant msg).
+    if (msg.role === "toolResult") continue;
+    replayable.push(msg);
+  }
+  // Final pass: convert replayable internal messages to Anthropic message blocks.
+  const messages: Array<{ role: "user" | "assistant"; content: any }> = [];
+  for (let i = 0; i < replayable.length; i++) {
+    const msg = replayable[i];
+    if (msg.role === "user") {
+      if (typeof msg.content === "string") {
+        messages.push({ role: "user", content: [{ type: "text", text: msg.content }] });
+      } else if (Array.isArray(msg.content)) {
+        const blocks = msg.content
+          .map((c: any) => {
+            if (c.type === "text") return { type: "text", text: c.text };
+            if (c.type === "image") {
+              return { type: "image", source: { type: "base64", media_type: c.mimeType, data: c.data } };
+            }
+            return null;
+          })
+          .filter(Boolean);
+        if (blocks.length > 0) messages.push({ role: "user", content: blocks });
+      }
+      continue;
+    }
+    if (msg.role === "assistant") {
+      const blocks: any[] = [];
+      const isSameModel = msg.provider === "vertex" && msg.api === "anthropic-messages" && msg.model === model.id;
+      if (Array.isArray(msg.content)) {
+        for (const block of msg.content) {
+          if (block.type === "text" && block.text?.trim()) {
+            blocks.push({ type: "text", text: block.text });
+          } else if (block.type === "toolCall") {
+            blocks.push({ type: "tool_use", id: sanitizeToolId(String(block.id ?? "")), name: block.name, input: block.arguments ?? {} });
+          } else if (block.type === "thinking" && block.thinking?.trim()) {
+            if (isSameModel && isValidThinkingSignature(block.thinkingSignature)) {
+              blocks.push({ type: "thinking", thinking: block.thinking, signature: block.thinkingSignature });
+            } else {
+              // Cross-model/provider replay: convert thinking to plain text to avoid signature errors.
+              blocks.push({ type: "text", text: block.thinking });
+            }
+          }
+        }
+      }
+      if (blocks.length > 0) messages.push({ role: "assistant", content: blocks });
+      continue;
+    }
+    if (msg.role === "toolResult") {
+      // Group consecutive tool results into one user message (Anthropic expects this shape).
+      const toolResultBlocks: any[] = [];
+      let j = i;
+      while (j < replayable.length && replayable[j]?.role === "toolResult") {
+        const tr = replayable[j];
+        const text = typeof tr.content === "string"
+          ? tr.content
+          : Array.isArray(tr.content)
+            ? tr.content.filter((c: any) => c?.type === "text").map((c: any) => c.text).join("\n")
+            : JSON.stringify(tr.content ?? "");
+        toolResultBlocks.push({
+          type: "tool_result",
+          tool_use_id: sanitizeToolId(String(tr.toolCallId ?? "")),
+          content: text || "",
+          ...(tr.isError ? { is_error: true } : {}),
+        });
+        j++;
+      }
+      if (toolResultBlocks.length > 0) {
+        messages.push({ role: "user", content: toolResultBlocks });
+      }
+      i = j - 1;
+    }
+  }
+  // Build tools
+  const tools = context.tools?.map((t: any) => ({
+    name: t.name,
+    description: t.description,
+    input_schema: {
+      type: "object" as const,
+      properties: t.parameters?.properties ?? {},
+      required: t.parameters?.required ?? [],
+    },
+  }));
+  const params: any = {
+    model: model.apiId,
+    max_tokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
+    messages,
+    ...(context.systemPrompt ? { system: context.systemPrompt } : {}),
+    ...(tools && tools.length > 0 ? { tools } : {}),
+    ...(options?.temperature !== undefined && !options?.reasoning ? { temperature: options.temperature } : {}),
+  };
+  // Thinking
+  if (model.reasoning && options?.reasoning) {
+    const effort = mapAnthropicEffort(options.reasoning);
+    if (effort) {
+      params.thinking = { type: "adaptive" };
+      params.output_config = { effort };
+    }
+  }
+  const output: any = {
+    role: "assistant",
+    content: [],
+    api: "anthropic-messages",
+    provider: "vertex",
+    model: model.id,
+    usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+    stopReason: "stop",
+    timestamp: Date.now(),
+  };
+  stream.push({ type: "start", partial: output });
+  const anthropicStream = client.messages.stream(params, { signal: options?.signal });
+  for await (const event of anthropicStream) {
+    if (event.type === "message_start") {
+      output.responseId = event.message.id;
+      output.usage.input = event.message.usage.input_tokens || 0;
+      output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
+      output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
+    } else if (event.type === "content_block_start") {
+      const cb = event.content_block;
+      if (cb.type === "text") {
+        output.content.push({ type: "text", text: "", index: event.index });
+        stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
+      } else if (cb.type === "thinking") {
+        output.content.push({ type: "thinking", thinking: "", thinkingSignature: "", index: event.index });
+        stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
+      } else if (cb.type === "tool_use") {
+        output.content.push({ type: "toolCall", id: cb.id, name: cb.name, arguments: {}, partialArgs: "", index: event.index });
+        stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
+      }
+    } else if (event.type === "content_block_delta") {
+      const idx = output.content.findIndex((b: any) => b.index === event.index);
+      const block = output.content[idx];
+      if (!block) continue;
+      const delta = event.delta;
+      if (delta.type === "text_delta" && block.type === "text") {
+        block.text += delta.text;
+        stream.push({ type: "text_delta", contentIndex: idx, delta: delta.text, partial: output });
+      } else if (delta.type === "thinking_delta" && block.type === "thinking") {
+        block.thinking += delta.thinking;
+        stream.push({ type: "thinking_delta", contentIndex: idx, delta: delta.thinking, partial: output });
+      } else if (delta.type === "signature_delta" && block.type === "thinking") {
+        block.thinkingSignature = (block.thinkingSignature || "") + delta.signature;
+      } else if (delta.type === "input_json_delta" && block.type === "toolCall") {
+        block.partialArgs += delta.partial_json;
+        stream.push({ type: "toolcall_delta", contentIndex: idx, delta: delta.partial_json, partial: output });
+      }
+    } else if (event.type === "content_block_stop") {
+      const idx = output.content.findIndex((b: any) => b.index === event.index);
+      const block = output.content[idx];
+      if (!block) continue;
+      delete block.index;
+      if (block.type === "text") {
+        stream.push({ type: "text_end", contentIndex: idx, content: block.text, partial: output });
+      } else if (block.type === "thinking") {
+        stream.push({ type: "thinking_end", contentIndex: idx, content: block.thinking, partial: output });
+      } else if (block.type === "toolCall") {
+        try { block.arguments = JSON.parse(block.partialArgs); } catch { block.arguments = {}; }
+        delete block.partialArgs;
+        stream.push({ type: "toolcall_end", contentIndex: idx, toolCall: block, partial: output });
+      }
+    } else if (event.type === "message_delta") {
+      if (event.delta.stop_reason) {
+        const r = event.delta.stop_reason;
+        output.stopReason = r === "end_turn" ? "stop" : r === "max_tokens" ? "length" : r === "tool_use" ? "toolUse" : "stop";
+      }
+      if (event.usage?.output_tokens != null) output.usage.output = event.usage.output_tokens;
+    }
+  }
+  output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
+  calculateCost(model as any, output.usage);
+  if (output.content.some((b: any) => b.type === "toolCall")) {
+    output.stopReason = "toolUse";
+  }
+  stream.push({ type: "done", reason: output.stopReason, message: output });
+}
 export function streamMaaS(
   model: VertexModelConfig,
@@ -19,20 +331,24 @@ export function streamMaaS(
   const stream = createAssistantMessageEventStream();
   (async () => {
+    const apiModelId = model.apiId.includes("/") ? model.apiId : `${model.publisher}/${model.apiId}`;
     try {
-      // Priority: config file > env var > model region > default
+      if (model.publisher === "anthropic") {
+        await streamAnthropic(model, context, options, stream);
+        stream.end();
+        return;
+      }
+      // Non-Anthropic MaaS models: Vertex OpenAI-compatible endpoint.
       const location = resolveLocation(model.region);
       const auth = getAuthConfig(location);
       const accessToken = await getAccessToken();
       const baseUrl = buildBaseUrl(auth.projectId, auth.location);
       const endpoint = `${baseUrl}/endpoints/openapi`;
-      // Create a model object compatible with pi-ai's OpenAI streaming.
-      // Use model.apiId directly so the correct model name goes in the request body.
-      // The friendly model.id is patched back into response events below for session persistence.
       const modelForPi: Model<"openai-completions"> = {
-        id: model.apiId,
+        id: apiModelId,
         name: model.name,
         api: "openai-completions",
         provider: "vertex",
@@ -52,53 +368,41 @@ export function streamMaaS(
         },
       };
-      // Delegate to pi-ai's built-in OpenAI streaming
-      const innerStream = streamSimpleOpenAICompletions(
-        modelForPi,
-        context as any,
-        {
-          ...options,
-          apiKey: accessToken,
-          maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
-          temperature: options?.temperature,
-        },
-      );
+      const innerStream = streamSimpleOpenAICompletions(modelForPi, context as any, {
+        ...options,
+        apiKey: accessToken,
+        maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
+        temperature: options?.temperature,
+      });
-      // Forward all events, patching model ID back to the friendly name
-      // so pi-coding-agent can restore sessions correctly.
       for await (const event of innerStream) {
-        if ("partial" in event && event.partial) {
-          event.partial.model = model.id;
-        }
-        if ("message" in event && event.message) {
-          event.message.model = model.id;
-        }
+        if ("partial" in event && event.partial) event.partial.model = model.id;
+        if ("message" in event && event.message) event.message.model = model.id;
         if ("error" in event && event.error && typeof event.error === "object") {
-          (event.error as any).model = model.id;
+          const err = event.error as any;
+          err.model = model.id;
+          if (typeof err.errorMessage === "string" && /^400\s*(status code)?\s*\(no body\)/i.test(err.errorMessage)) {
+            err.errorMessage = `Vertex MaaS HTTP 400 (no body) for model "${apiModelId}". Not automatically treated as context overflow.`;
+          }
         }
         stream.push(event);
       }
       stream.end();
     } catch (error) {
+      const rawMessage = error instanceof Error ? error.message : String(error);
       stream.push({
         type: "error",
         reason: options?.signal?.aborted ? "aborted" : "error",
         error: {
           role: "assistant",
           content: [],
-          api: "openai-completions",
+          api: model.publisher === "anthropic" ? "anthropic-messages" : "openai-completions",
           provider: "vertex",
           model: model.id,
-          usage: {
-            input: 0,
-            output: 0,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 0,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
+          usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
           stopReason: options?.signal?.aborted ? "aborted" : "error",
-          errorMessage: error instanceof Error ? error.message : String(error),
+          errorMessage: rawMessage,
           timestamp: Date.now(),
         },
       });