@ssweens/pi-vertex 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [1.1.9] - 2026-05-19
6
+ ### Fixed
7
+ - **Gemini 3/3.5 default thinking preserved** — previously, if no reasoning effort level was specified, we silently overrode Gemini 3/3.5 models to use their lowest thinking level (`MINIMAL`). This severely degraded the intelligence of Gemini 3.5 Flash (defaulting it to no real reasoning/thinking) and Gemini 3 Pro/Flash. We now completely omit `thinkingConfig` when `options.reasoning` is undefined, allowing Gemini 3/3.5 models to use their native GA defaults (`MEDIUM` for 3.5 Flash, `HIGH` for others).
8
+ - **Gemini 2.5 default thinking budget** — Gemini 2.5 has thinking disabled by default on Vertex, so we now apply a healthy thinking budget floor (`2048` tokens for Pro, `1024` for Flash) when `options.reasoning` is undefined.
9
+
10
+ ## [1.1.8] - 2026-05-19
11
+ ### Added
12
+ - **Gemini 3.5 Flash** (`gemini-3.5-flash`) — GA release from Google I/O 2026. Most intelligent Flash model; optimized for agentic execution, coding, and long-horizon tasks. 1M context, 65K max output, text/image/video/audio input, reasoning (thinking levels: minimal/low/medium/high), tools. $1.50/$9.00 per 1M tokens (global), $0.15/1M cache read.
13
+
5
14
  ## [1.1.7] - 2026-05-16
6
15
  ### Added
7
16
  - **Regional pricing for Claude models** — non-global Vertex endpoints (us-east5, europe-west1, asia-southeast1, us/eu multi-region) carry a 10% price premium per GCP's published rates. The streaming layer now automatically selects the correct cost tier based on the resolved endpoint at call time. No config change required — if your `GOOGLE_CLOUD_LOCATION` or config resolves to any non-`global` location, cost tracking reflects the regional rate.
package/models/gemini.ts CHANGED
@@ -8,6 +8,27 @@
8
8
  import type { VertexModelConfig } from "../types.js";
9
9
 
10
10
  export const GEMINI_MODELS: VertexModelConfig[] = [
11
+ // --- Gemini 3.5 (GA) ---
12
+ {
13
+ id: "gemini-3.5-flash",
14
+ name: "Gemini 3.5 Flash",
15
+ apiId: "gemini-3.5-flash",
16
+ publisher: "google",
17
+ endpointType: "gemini",
18
+ contextWindow: 1048576,
19
+ maxTokens: 65536,
20
+ input: ["text", "image"],
21
+ reasoning: true,
22
+ tools: true,
23
+ cost: {
24
+ input: 1.50,
25
+ output: 9.00,
26
+ cacheRead: 0.15,
27
+ cacheWrite: 0,
28
+ },
29
+ region: "global",
30
+ },
31
+
11
32
  // --- Gemini 3.1 (Preview) ---
12
33
  {
13
34
  id: "gemini-3.1-pro",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ssweens/pi-vertex",
3
- "version": "1.1.7",
3
+ "version": "1.1.9",
4
4
  "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -52,19 +52,6 @@ function getGemini3ThinkingLevel(effort: string, modelId: string): ThinkingLevel
52
52
  return THINKING_LEVEL_MAP[effort];
53
53
  }
54
54
 
55
- function getLowestThinkingConfig(modelId: string): GeminiThinkingConfig {
56
- if (isGemini3ProModel(modelId)) {
57
- return { thinkingLevel: ThinkingLevel.LOW };
58
- }
59
- if (isGemini3FlashModel(modelId)) {
60
- return { thinkingLevel: ThinkingLevel.MINIMAL };
61
- }
62
- if (isGemini25ProModel(modelId)) {
63
- return { thinkingBudget: 128 };
64
- }
65
- return { thinkingBudget: 0 };
66
- }
67
-
68
55
  function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
69
56
  switch (reason) {
70
57
  case FinishReason.STOP:
@@ -137,8 +124,6 @@ export function streamGemini(
137
124
  }
138
125
 
139
126
  // Add thinking configuration (matches pi-mono's buildParams logic).
140
- // For reasoning models: always set a minimum thinking config so the model
141
- // doesn't silently suppress thoughts when no effort level is specified.
142
127
  if (model.reasoning) {
143
128
  if (options?.reasoning) {
144
129
  const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
@@ -161,7 +146,18 @@ export function streamGemini(
161
146
 
162
147
  config.thinkingConfig = thinkingConfig;
163
148
  } else {
164
- config.thinkingConfig = getLowestThinkingConfig(model.apiId);
149
+ // If no reasoning level is specified:
150
+ // - For Gemini 3.x/3.5 models, omit thinkingConfig entirely so Vertex AI uses
151
+ // the model's native default level (e.g. MEDIUM for 3.5, HIGH for others).
152
+ // - For Gemini 2.5 models, apply a healthy thinking budget floor (thinking is
153
+ // disabled by default on 2.5).
154
+ const isGemini3 = model.apiId.startsWith("gemini-3");
155
+ if (!isGemini3) {
156
+ config.thinkingConfig = {
157
+ includeThoughts: true,
158
+ thinkingBudget: model.apiId.includes("2.5-pro") ? 2048 : 1024,
159
+ };
160
+ }
165
161
  }
166
162
  }
167
163