npm - @ssweens/pi-vertex - Versions diffs - 1.1.7 → 1.1.9 - Mend

@ssweens/pi-vertex 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 All notable changes to this project will be documented in this file.
+## [1.1.9] - 2026-05-19
+### Fixed
+- **Gemini 3/3.5 default thinking preserved** — previously, if no reasoning effort level was specified, we silently overrode Gemini 3/3.5 models to use their lowest thinking level (`MINIMAL`). This severely degraded the intelligence of Gemini 3.5 Flash (defaulting it to no real reasoning/thinking) and Gemini 3 Pro/Flash. We now completely omit `thinkingConfig` when `options.reasoning` is undefined, allowing Gemini 3/3.5 models to use their native GA defaults (`MEDIUM` for 3.5 Flash, `HIGH` for others).
+- **Gemini 2.5 default thinking budget** — Gemini 2.5 has thinking disabled by default on Vertex, so we now apply a healthy thinking budget floor (`2048` tokens for Pro, `1024` for Flash) when `options.reasoning` is undefined.
+## [1.1.8] - 2026-05-19
+### Added
+- **Gemini 3.5 Flash** (`gemini-3.5-flash`) — GA release from Google I/O 2026. Most intelligent Flash model; optimized for agentic execution, coding, and long-horizon tasks. 1M context, 65K max output, text/image/video/audio input, reasoning (thinking levels: minimal/low/medium/high), tools. $1.50/$9.00 per 1M tokens (global), $0.15/1M cache read.
 ## [1.1.7] - 2026-05-16
 ### Added
 - **Regional pricing for Claude models** — non-global Vertex endpoints (us-east5, europe-west1, asia-southeast1, us/eu multi-region) carry a 10% price premium per GCP's published rates. The streaming layer now automatically selects the correct cost tier based on the resolved endpoint at call time. No config change required — if your `GOOGLE_CLOUD_LOCATION` or config resolves to any non-`global` location, cost tracking reflects the regional rate.

package/models/gemini.ts CHANGED Viewed

@@ -8,6 +8,27 @@
 import type { VertexModelConfig } from "../types.js";
 export const GEMINI_MODELS: VertexModelConfig[] = [
+  // --- Gemini 3.5 (GA) ---
+  {
+    id: "gemini-3.5-flash",
+    name: "Gemini 3.5 Flash",
+    apiId: "gemini-3.5-flash",
+    publisher: "google",
+    endpointType: "gemini",
+    contextWindow: 1048576,
+    maxTokens: 65536,
+    input: ["text", "image"],
+    reasoning: true,
+    tools: true,
+    cost: {
+      input: 1.50,
+      output: 9.00,
+      cacheRead: 0.15,
+      cacheWrite: 0,
+    },
+    region: "global",
+  },
   // --- Gemini 3.1 (Preview) ---
   {
     id: "gemini-3.1-pro",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ssweens/pi-vertex",
-  "version": "1.1.7",
+  "version": "1.1.9",
   "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
   "type": "module",
   "main": "index.ts",

package/streaming/gemini.ts CHANGED Viewed

@@ -52,19 +52,6 @@ function getGemini3ThinkingLevel(effort: string, modelId: string): ThinkingLevel
   return THINKING_LEVEL_MAP[effort];
 }
-function getLowestThinkingConfig(modelId: string): GeminiThinkingConfig {
-  if (isGemini3ProModel(modelId)) {
-    return { thinkingLevel: ThinkingLevel.LOW };
-  }
-  if (isGemini3FlashModel(modelId)) {
-    return { thinkingLevel: ThinkingLevel.MINIMAL };
-  }
-  if (isGemini25ProModel(modelId)) {
-    return { thinkingBudget: 128 };
-  }
-  return { thinkingBudget: 0 };
-}
 function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
   switch (reason) {
     case FinishReason.STOP:
@@ -137,8 +124,6 @@ export function streamGemini(
       }
       // Add thinking configuration (matches pi-mono's buildParams logic).
-      // For reasoning models: always set a minimum thinking config so the model
-      // doesn't silently suppress thoughts when no effort level is specified.
       if (model.reasoning) {
         if (options?.reasoning) {
           const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
@@ -161,7 +146,18 @@ export function streamGemini(
           config.thinkingConfig = thinkingConfig;
         } else {
-          config.thinkingConfig = getLowestThinkingConfig(model.apiId);
+          // If no reasoning level is specified:
+          // - For Gemini 3.x/3.5 models, omit thinkingConfig entirely so Vertex AI uses
+          //   the model's native default level (e.g. MEDIUM for 3.5, HIGH for others).
+          // - For Gemini 2.5 models, apply a healthy thinking budget floor (thinking is
+          //   disabled by default on 2.5).
+          const isGemini3 = model.apiId.startsWith("gemini-3");
+          if (!isGemini3) {
+            config.thinkingConfig = {
+              includeThoughts: true,
+              thinkingBudget: model.apiId.includes("2.5-pro") ? 2048 : 1024,
+            };
+          }
         }
       }