@ssweens/pi-vertex 1.0.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [1.1.3] - 2026-03-26
6
+ ### Fixed
7
+ - Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
8
+ - Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
9
+
10
+ ### Updated
11
+ - Claude 4.6 models use native Anthropic Vertex SDK streaming.
12
+ - Claude 4.6 context window updated to 1M.
13
+ - Model list order in the selector is now alphabetized by ID.
14
+
15
+ ## [1.1.2] - 2026-03-24
16
+ ### Changed
17
+ - Initial Claude 4.x support on Vertex.
package/README.md CHANGED
@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
22
22
  - **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
23
23
 
24
24
  - **Unified streaming**: Single provider, multiple model families
25
- - **Full tool calling support**: All models marked with tools support
25
+ - **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
26
+ - **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
26
27
  - **Automatic auth**: Uses Google Application Default Credentials
27
28
  - **Region awareness**: Global endpoints where supported, regional where required
28
- - **Pricing tracking**: Built-in cost per token for all models
29
+ - **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
29
30
 
30
31
  ## Installation
31
32
 
@@ -115,31 +116,29 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
115
116
 
116
117
  | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
117
118
  |-------|---------|------------|-------|-----------|----------------|
118
- | gemini-3.1-pro | 1M | 64,000 | text, image | ✅ | $2.00/$12.00 |
119
- | gemini-3-pro | 2M | 8,192 | text, image | ✅ | $1.25/$10.00 |
120
- | gemini-3-flash | 1M | 8,192 | text, image | ✅ | $0.15/$0.60 |
121
- | gemini-2.5-pro | 1M | 64,000 | text, image | ✅ | $1.25/$10.00 |
122
- | gemini-2.5-flash | 1M | 64,000 | text, image | ✅ | $0.30/$2.50 |
123
- | gemini-2.5-flash-lite | 1M | 64,000 | text, image | ✅ | $0.10/$0.40 |
119
+ | gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
120
+ | gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
121
+ | gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
122
+ | gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
123
+ | gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
124
+ | gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
125
+ | gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
124
126
  | gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
125
- | gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
127
+ | gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
126
128
 
127
129
  ### Claude Models
128
130
 
129
131
  | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
130
132
  |-------|---------|------------|-------|-----------|----------------|--------|
131
- | claude-opus-4-6 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
132
- | claude-sonnet-4-6 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
133
+ | claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
134
+ | claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
133
135
  | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
134
136
  | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
135
137
  | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
136
- | claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
137
- | claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
138
- | claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
139
- | claude-3-7-sonnet | 200K | 64,000 | text, image | | $3.00/$15.00 | us-east5 |
140
- | claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
141
- | claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
142
- | claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
138
+ | claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
139
+ | claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
140
+ | claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
141
+ | claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | | $3.00/$15.00 | global |
143
142
 
144
143
  ### Llama Models
145
144
 
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
153
152
 
154
153
  | Model | Context | Publisher | Price (in/out) | Region |
155
154
  |-------|---------|-----------|----------------|--------|
156
- | jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
157
- | jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
158
155
  | mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
159
156
  | mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
160
157
  | mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
183
180
 
184
181
  Default regions by model:
185
182
  - Gemini: `global`
186
- - Claude 4.6/4.5: `global`
187
- - Claude 4/4.1/3.7/3.5/3: `us-east5`
183
+ - Claude (all): `global`
188
184
  - MaaS: `global`
189
185
 
190
186
  Override with:
@@ -217,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
217
213
  ## Dependencies
218
214
 
219
215
  - `@google/genai`: Google GenAI SDK for Gemini models
216
+ - `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
220
217
  - `google-auth-library`: ADC authentication for all models
221
218
  - `@mariozechner/pi-ai`: Peer dependency
222
219
  - `@mariozechner/pi-coding-agent`: Peer dependency
@@ -0,0 +1,13 @@
1
+ # Test Coverage
2
+
3
+ ## Current Status
4
+ - Automated tests: not yet implemented in this package.
5
+ - Lint/type checks: `npm run check` (currently a no-op placeholder).
6
+
7
+ ## Manual Verification
8
+ - Claude 4.6 streaming verified via Anthropic Vertex SDK.
9
+ - Mid-session model switching (tool call replay) verified interactively in pi.
10
+
11
+ ## Gaps / Next Steps
12
+ - Add automated integration tests for Anthropic Vertex streaming and tool replay.
13
+ - Add unit tests for message normalization and replay sequencing.
package/index.ts CHANGED
@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
112
112
 
113
113
  // Show startup info as a widget that clears on first user input
114
114
  const vertexStartupLines = [
115
- `[pi-vertex] Initializing with project: ${projectId}`,
116
- `[pi-vertex] Registered ${ALL_MODELS.length} models`,
115
+ ` [pi-vertex] Initializing with project: ${projectId}`,
116
+ ` [pi-vertex] Registered ${ALL_MODELS.length} models`,
117
117
  ];
118
118
  pi.on("session_start", async (_event, ctx) => {
119
119
  ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({
package/models/claude.ts CHANGED
@@ -1,21 +1,22 @@
1
1
  /**
2
2
  * Claude model definitions for Vertex AI
3
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
3
4
  * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
4
- * All prices per 1M tokens (as of Feb 2025)
5
- * Cache write prices shown are for 5-minute TTL
5
+ * All prices per 1M tokens (global endpoint, <= 200K input tokens)
6
+ * Cache write prices are for 5-minute TTL
6
7
  */
7
8
 
8
9
  import type { VertexModelConfig } from "../types.js";
9
10
 
10
11
  export const CLAUDE_MODELS: VertexModelConfig[] = [
11
- // Claude 4.6 series - latest, supports global endpoint
12
+ // Claude 4.6 series
12
13
  {
13
14
  id: "claude-opus-4-6",
14
15
  name: "Claude Opus 4.6",
15
16
  apiId: "claude-opus-4-6",
16
17
  publisher: "anthropic",
17
18
  endpointType: "maas",
18
- contextWindow: 200000,
19
+ contextWindow: 1000000,
19
20
  maxTokens: 32000,
20
21
  input: ["text", "image"],
21
22
  reasoning: true,
@@ -34,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
34
35
  apiId: "claude-sonnet-4-6",
35
36
  publisher: "anthropic",
36
37
  endpointType: "maas",
37
- contextWindow: 200000,
38
+ contextWindow: 1000000,
38
39
  maxTokens: 64000,
39
40
  input: ["text", "image"],
40
41
  reasoning: true,
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
47
48
  },
48
49
  region: "global",
49
50
  },
50
- // Claude 4.5 series - supports global endpoint
51
+
52
+ // Claude 4.5 series
51
53
  {
52
54
  id: "claude-opus-4-5",
53
55
  name: "Claude Opus 4.5",
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
105
107
  },
106
108
  region: "global",
107
109
  },
108
- // Claude 4.1 series - regional pricing
110
+
111
+ // Claude 4.1 series
109
112
  {
110
113
  id: "claude-opus-4-1",
111
114
  name: "Claude Opus 4.1",
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
123
126
  cacheRead: 1.50,
124
127
  cacheWrite: 18.75,
125
128
  },
126
- region: "us-east5",
129
+ region: "global",
127
130
  },
128
- // Claude 4.0 series - regional pricing
131
+
132
+ // Claude 4.0 series
129
133
  {
130
134
  id: "claude-opus-4",
131
135
  name: "Claude Opus 4",
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
143
147
  cacheRead: 1.50,
144
148
  cacheWrite: 18.75,
145
149
  },
146
- region: "us-east5",
150
+ region: "global",
147
151
  },
148
152
  {
149
153
  id: "claude-sonnet-4",
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
162
166
  cacheRead: 0.30,
163
167
  cacheWrite: 3.75,
164
168
  },
165
- region: "us-east5",
166
- },
167
- // Claude 3.7 series - regional pricing
168
- {
169
- id: "claude-3-7-sonnet",
170
- name: "Claude 3.7 Sonnet",
171
- apiId: "claude-3-7-sonnet@20250219",
172
- publisher: "anthropic",
173
- endpointType: "maas",
174
- contextWindow: 200000,
175
- maxTokens: 64000,
176
- input: ["text", "image"],
177
- reasoning: true,
178
- tools: true,
179
- cost: {
180
- input: 3.0,
181
- output: 15.0,
182
- cacheRead: 0.3,
183
- cacheWrite: 3.75,
184
- },
185
- region: "us-east5",
169
+ region: "global",
186
170
  },
187
- // Claude 3.5 series - regional pricing
171
+
172
+ // Claude 3.5 series
188
173
  {
189
174
  id: "claude-3-5-sonnet-v2",
190
175
  name: "Claude 3.5 Sonnet v2",
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
197
182
  reasoning: false,
198
183
  tools: true,
199
184
  cost: {
200
- input: 3.0,
201
- output: 15.0,
202
- cacheRead: 0.3,
203
- cacheWrite: 3.75,
204
- },
205
- region: "us-east5",
206
- },
207
- {
208
- id: "claude-3-5-sonnet",
209
- name: "Claude 3.5 Sonnet",
210
- apiId: "claude-3-5-sonnet@20240620",
211
- publisher: "anthropic",
212
- endpointType: "maas",
213
- contextWindow: 200000,
214
- maxTokens: 8192,
215
- input: ["text", "image"],
216
- reasoning: false,
217
- tools: true,
218
- cost: {
219
- input: 3.0,
220
- output: 15.0,
221
- cacheRead: 0.3,
185
+ input: 3.00,
186
+ output: 15.00,
187
+ cacheRead: 0.30,
222
188
  cacheWrite: 3.75,
223
189
  },
224
- region: "us-east5",
225
- },
226
- // Claude 3 Haiku - regional pricing
227
- {
228
- id: "claude-3-haiku",
229
- name: "Claude 3 Haiku",
230
- apiId: "claude-3-haiku@20240307",
231
- publisher: "anthropic",
232
- endpointType: "maas",
233
- contextWindow: 200000,
234
- maxTokens: 4096,
235
- input: ["text"],
236
- reasoning: false,
237
- tools: true,
238
- cost: {
239
- input: 0.25,
240
- output: 1.25,
241
- cacheRead: 0.03,
242
- cacheWrite: 0.3,
243
- },
244
- region: "us-east5",
190
+ region: "global",
245
191
  },
246
192
  ];
package/models/gemini.ts CHANGED
@@ -1,77 +1,83 @@
1
1
  /**
2
2
  * Gemini model definitions for Vertex AI
3
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
4
- * All prices per 1M tokens (Standard tier pricing, as of Feb 2026)
3
+ * Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
4
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
5
+ * All prices per 1M tokens (standard tier, <= 200K input tokens)
5
6
  */
6
7
 
7
8
  import type { VertexModelConfig } from "../types.js";
8
9
 
9
10
  export const GEMINI_MODELS: VertexModelConfig[] = [
11
+ // --- Gemini 3.1 (Preview) ---
10
12
  {
11
13
  id: "gemini-3.1-pro",
12
14
  name: "Gemini 3.1 Pro",
13
15
  apiId: "gemini-3.1-pro-preview",
14
16
  publisher: "google",
15
17
  endpointType: "gemini",
16
- contextWindow: 1000000,
17
- maxTokens: 64000,
18
+ contextWindow: 1048576,
19
+ maxTokens: 65536,
18
20
  input: ["text", "image"],
19
21
  reasoning: true,
20
22
  tools: true,
21
23
  cost: {
22
24
  input: 2.00,
23
25
  output: 12.00,
24
- cacheRead: 0,
26
+ cacheRead: 0.20,
25
27
  cacheWrite: 0,
26
28
  },
27
29
  region: "global",
28
30
  },
29
31
  {
30
- id: "gemini-3-pro",
31
- name: "Gemini 3 Pro",
32
- apiId: "gemini-3-pro-preview",
32
+ id: "gemini-3.1-flash-lite",
33
+ name: "Gemini 3.1 Flash Lite",
34
+ apiId: "gemini-3.1-flash-lite-preview",
33
35
  publisher: "google",
34
36
  endpointType: "gemini",
35
- contextWindow: 2000000,
36
- maxTokens: 8192,
37
+ contextWindow: 1048576,
38
+ maxTokens: 65535,
37
39
  input: ["text", "image"],
38
40
  reasoning: true,
39
41
  tools: true,
40
42
  cost: {
41
- input: 1.25,
42
- output: 10.00,
43
- cacheRead: 0.125,
43
+ input: 0.25,
44
+ output: 1.50,
45
+ cacheRead: 0.025,
44
46
  cacheWrite: 0,
45
47
  },
46
48
  region: "global",
47
49
  },
50
+
51
+ // --- Gemini 3 (Preview) ---
48
52
  {
49
53
  id: "gemini-3-flash",
50
54
  name: "Gemini 3 Flash",
51
55
  apiId: "gemini-3-flash-preview",
52
56
  publisher: "google",
53
57
  endpointType: "gemini",
54
- contextWindow: 1000000,
55
- maxTokens: 8192,
58
+ contextWindow: 1048576,
59
+ maxTokens: 65536,
56
60
  input: ["text", "image"],
57
61
  reasoning: true,
58
62
  tools: true,
59
63
  cost: {
60
- input: 0.15,
61
- output: 0.60,
62
- cacheRead: 0.0375,
64
+ input: 0.50,
65
+ output: 3.00,
66
+ cacheRead: 0.05,
63
67
  cacheWrite: 0,
64
68
  },
65
69
  region: "global",
66
70
  },
71
+
72
+ // --- Gemini 2.5 (GA) ---
67
73
  {
68
74
  id: "gemini-2.5-pro",
69
75
  name: "Gemini 2.5 Pro",
70
76
  apiId: "gemini-2.5-pro",
71
77
  publisher: "google",
72
78
  endpointType: "gemini",
73
- contextWindow: 1000000,
74
- maxTokens: 64000,
79
+ contextWindow: 1048576,
80
+ maxTokens: 65536,
75
81
  input: ["text", "image"],
76
82
  reasoning: true,
77
83
  tools: true,
@@ -89,15 +95,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
89
95
  apiId: "gemini-2.5-flash",
90
96
  publisher: "google",
91
97
  endpointType: "gemini",
92
- contextWindow: 1000000,
93
- maxTokens: 64000,
98
+ contextWindow: 1048576,
99
+ maxTokens: 65536,
94
100
  input: ["text", "image"],
95
101
  reasoning: true,
96
102
  tools: true,
97
103
  cost: {
98
104
  input: 0.30,
99
105
  output: 2.50,
100
- cacheRead: 0.030,
106
+ cacheRead: 0.03,
101
107
  cacheWrite: 0,
102
108
  },
103
109
  region: "global",
@@ -108,26 +114,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
108
114
  apiId: "gemini-2.5-flash-lite",
109
115
  publisher: "google",
110
116
  endpointType: "gemini",
111
- contextWindow: 1000000,
112
- maxTokens: 64000,
117
+ contextWindow: 1048576,
118
+ maxTokens: 65536,
113
119
  input: ["text", "image"],
114
120
  reasoning: true,
115
121
  tools: true,
116
122
  cost: {
117
123
  input: 0.10,
118
124
  output: 0.40,
119
- cacheRead: 0.010,
125
+ cacheRead: 0.01,
120
126
  cacheWrite: 0,
121
127
  },
122
128
  region: "global",
123
129
  },
130
+
131
+ // --- Gemini 2.0 (GA) ---
124
132
  {
125
133
  id: "gemini-2.0-flash",
126
134
  name: "Gemini 2.0 Flash",
127
135
  apiId: "gemini-2.0-flash",
128
136
  publisher: "google",
129
137
  endpointType: "gemini",
130
- contextWindow: 1000000,
138
+ contextWindow: 1048576,
131
139
  maxTokens: 8192,
132
140
  input: ["text", "image"],
133
141
  reasoning: false,
@@ -135,7 +143,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
135
143
  cost: {
136
144
  input: 0.15,
137
145
  output: 0.60,
138
- cacheRead: 0.025,
146
+ cacheRead: 0,
139
147
  cacheWrite: 0,
140
148
  },
141
149
  region: "global",
@@ -146,15 +154,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
146
154
  apiId: "gemini-2.0-flash-lite",
147
155
  publisher: "google",
148
156
  endpointType: "gemini",
149
- contextWindow: 1000000,
157
+ contextWindow: 1048576,
150
158
  maxTokens: 8192,
151
- input: ["text"],
159
+ input: ["text", "image"],
152
160
  reasoning: false,
153
161
  tools: true,
154
162
  cost: {
155
163
  input: 0.075,
156
164
  output: 0.30,
157
- cacheRead: 0.01875,
165
+ cacheRead: 0,
158
166
  cacheWrite: 0,
159
167
  },
160
168
  region: "global",
package/models/index.ts CHANGED
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
11
11
  ...GEMINI_MODELS,
12
12
  ...CLAUDE_MODELS,
13
13
  ...MAAS_MODELS,
14
- ];
14
+ ].sort((a, b) => a.id.localeCompare(b.id));
15
15
 
16
16
  export function getModelById(id: string): VertexModelConfig | undefined {
17
17
  return ALL_MODELS.find((m) => m.id === id);