@ssweens/pi-vertex 1.1.7 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/models/gemini.ts +21 -0
- package/package.json +1 -1
- package/streaming/gemini.ts +12 -16
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.1.9] - 2026-05-19
|
|
6
|
+
### Fixed
|
|
7
|
+
- **Gemini 3/3.5 default thinking preserved** — previously, if no reasoning effort level was specified, we silently overrode Gemini 3/3.5 models to use their lowest thinking level (`MINIMAL`). This severely degraded the intelligence of Gemini 3.5 Flash (defaulting it to no real reasoning/thinking) and Gemini 3 Pro/Flash. We now completely omit `thinkingConfig` when `options.reasoning` is undefined, allowing Gemini 3/3.5 models to use their native GA defaults (`MEDIUM` for 3.5 Flash, `HIGH` for others).
|
|
8
|
+
- **Gemini 2.5 default thinking budget** — Gemini 2.5 has thinking disabled by default on Vertex, so we now apply a healthy thinking budget floor (`2048` tokens for Pro, `1024` for Flash) when `options.reasoning` is undefined.
|
|
9
|
+
|
|
10
|
+
## [1.1.8] - 2026-05-19
|
|
11
|
+
### Added
|
|
12
|
+
- **Gemini 3.5 Flash** (`gemini-3.5-flash`) — GA release from Google I/O 2026. Most intelligent Flash model; optimized for agentic execution, coding, and long-horizon tasks. 1M context, 65K max output, text/image/video/audio input, reasoning (thinking levels: minimal/low/medium/high), tools. $1.50/$9.00 per 1M tokens (global), $0.15/1M cache read.
|
|
13
|
+
|
|
5
14
|
## [1.1.7] - 2026-05-16
|
|
6
15
|
### Added
|
|
7
16
|
- **Regional pricing for Claude models** — non-global Vertex endpoints (us-east5, europe-west1, asia-southeast1, us/eu multi-region) carry a 10% price premium per GCP's published rates. The streaming layer now automatically selects the correct cost tier based on the resolved endpoint at call time. No config change required — if your `GOOGLE_CLOUD_LOCATION` or config resolves to any non-`global` location, cost tracking reflects the regional rate.
|
package/models/gemini.ts
CHANGED
|
@@ -8,6 +8,27 @@
|
|
|
8
8
|
import type { VertexModelConfig } from "../types.js";
|
|
9
9
|
|
|
10
10
|
export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
11
|
+
// --- Gemini 3.5 (GA) ---
|
|
12
|
+
{
|
|
13
|
+
id: "gemini-3.5-flash",
|
|
14
|
+
name: "Gemini 3.5 Flash",
|
|
15
|
+
apiId: "gemini-3.5-flash",
|
|
16
|
+
publisher: "google",
|
|
17
|
+
endpointType: "gemini",
|
|
18
|
+
contextWindow: 1048576,
|
|
19
|
+
maxTokens: 65536,
|
|
20
|
+
input: ["text", "image"],
|
|
21
|
+
reasoning: true,
|
|
22
|
+
tools: true,
|
|
23
|
+
cost: {
|
|
24
|
+
input: 1.50,
|
|
25
|
+
output: 9.00,
|
|
26
|
+
cacheRead: 0.15,
|
|
27
|
+
cacheWrite: 0,
|
|
28
|
+
},
|
|
29
|
+
region: "global",
|
|
30
|
+
},
|
|
31
|
+
|
|
11
32
|
// --- Gemini 3.1 (Preview) ---
|
|
12
33
|
{
|
|
13
34
|
id: "gemini-3.1-pro",
|
package/package.json
CHANGED
package/streaming/gemini.ts
CHANGED
|
@@ -52,19 +52,6 @@ function getGemini3ThinkingLevel(effort: string, modelId: string): ThinkingLevel
|
|
|
52
52
|
return THINKING_LEVEL_MAP[effort];
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
function getLowestThinkingConfig(modelId: string): GeminiThinkingConfig {
|
|
56
|
-
if (isGemini3ProModel(modelId)) {
|
|
57
|
-
return { thinkingLevel: ThinkingLevel.LOW };
|
|
58
|
-
}
|
|
59
|
-
if (isGemini3FlashModel(modelId)) {
|
|
60
|
-
return { thinkingLevel: ThinkingLevel.MINIMAL };
|
|
61
|
-
}
|
|
62
|
-
if (isGemini25ProModel(modelId)) {
|
|
63
|
-
return { thinkingBudget: 128 };
|
|
64
|
-
}
|
|
65
|
-
return { thinkingBudget: 0 };
|
|
66
|
-
}
|
|
67
|
-
|
|
68
55
|
function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
|
|
69
56
|
switch (reason) {
|
|
70
57
|
case FinishReason.STOP:
|
|
@@ -137,8 +124,6 @@ export function streamGemini(
|
|
|
137
124
|
}
|
|
138
125
|
|
|
139
126
|
// Add thinking configuration (matches pi-mono's buildParams logic).
|
|
140
|
-
// For reasoning models: always set a minimum thinking config so the model
|
|
141
|
-
// doesn't silently suppress thoughts when no effort level is specified.
|
|
142
127
|
if (model.reasoning) {
|
|
143
128
|
if (options?.reasoning) {
|
|
144
129
|
const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
|
@@ -161,7 +146,18 @@ export function streamGemini(
|
|
|
161
146
|
|
|
162
147
|
config.thinkingConfig = thinkingConfig;
|
|
163
148
|
} else {
|
|
164
|
-
|
|
149
|
+
// If no reasoning level is specified:
|
|
150
|
+
// - For Gemini 3.x/3.5 models, omit thinkingConfig entirely so Vertex AI uses
|
|
151
|
+
// the model's native default level (e.g. MEDIUM for 3.5, HIGH for others).
|
|
152
|
+
// - For Gemini 2.5 models, apply a healthy thinking budget floor (thinking is
|
|
153
|
+
// disabled by default on 2.5).
|
|
154
|
+
const isGemini3 = model.apiId.startsWith("gemini-3");
|
|
155
|
+
if (!isGemini3) {
|
|
156
|
+
config.thinkingConfig = {
|
|
157
|
+
includeThoughts: true,
|
|
158
|
+
thinkingBudget: model.apiId.includes("2.5-pro") ? 2048 : 1024,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
165
161
|
}
|
|
166
162
|
}
|
|
167
163
|
|