@ssweens/pi-vertex 1.0.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +19 -22
- package/TEST_COVERAGE.md +13 -0
- package/index.ts +2 -2
- package/models/claude.ts +21 -75
- package/models/gemini.ts +39 -31
- package/models/index.ts +1 -1
- package/models/maas.ts +39 -76
- package/package.json +4 -1
- package/streaming/gemini.ts +198 -89
- package/streaming/maas.ts +350 -53
- package/types.ts +24 -35
- package/utils.ts +163 -58
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [1.1.3] - 2026-03-26
|
|
6
|
+
### Fixed
|
|
7
|
+
- Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
|
|
8
|
+
- Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
|
|
9
|
+
|
|
10
|
+
### Updated
|
|
11
|
+
- Claude 4.6 models use native Anthropic Vertex SDK streaming.
|
|
12
|
+
- Claude 4.6 context window updated to 1M.
|
|
13
|
+
- Model list order in the selector is now alphabetized by ID.
|
|
14
|
+
|
|
15
|
+
## [1.1.2] - 2026-03-24
|
|
16
|
+
### Changed
|
|
17
|
+
- Initial Claude 4.x support on Vertex.
|
package/README.md
CHANGED
|
@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
|
|
|
22
22
|
- **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
|
|
23
23
|
|
|
24
24
|
- **Unified streaming**: Single provider, multiple model families
|
|
25
|
-
- **Full tool calling support**: All models
|
|
25
|
+
- **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
|
|
26
|
+
- **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
|
|
26
27
|
- **Automatic auth**: Uses Google Application Default Credentials
|
|
27
28
|
- **Region awareness**: Global endpoints where supported, regional where required
|
|
28
|
-
- **Pricing tracking**: Built-in cost per token for all models
|
|
29
|
+
- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
|
|
29
30
|
|
|
30
31
|
## Installation
|
|
31
32
|
|
|
@@ -115,31 +116,29 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
115
116
|
|
|
116
117
|
| Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
|
|
117
118
|
|-------|---------|------------|-------|-----------|----------------|
|
|
118
|
-
| gemini-3.1-pro | 1M |
|
|
119
|
-
| gemini-3-
|
|
120
|
-
| gemini-3-
|
|
121
|
-
| gemini-
|
|
122
|
-
| gemini-2.5-
|
|
123
|
-
| gemini-2.5-flash
|
|
119
|
+
| gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
|
|
120
|
+
| gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
|
|
121
|
+
| gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
|
|
122
|
+
| gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
|
|
123
|
+
| gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
|
|
124
|
+
| gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
|
|
125
|
+
| gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
|
|
124
126
|
| gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
|
|
125
|
-
| gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
|
|
127
|
+
| gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
|
|
126
128
|
|
|
127
129
|
### Claude Models
|
|
128
130
|
|
|
129
131
|
| Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
|
|
130
132
|
|-------|---------|------------|-------|-----------|----------------|--------|
|
|
131
|
-
| claude-opus-4-6 |
|
|
132
|
-
| claude-sonnet-4-6 |
|
|
133
|
+
| claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
|
|
134
|
+
| claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
133
135
|
| claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
|
|
134
136
|
| claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
135
137
|
| claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
|
|
136
|
-
| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 |
|
|
137
|
-
| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 |
|
|
138
|
-
| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 |
|
|
139
|
-
| claude-3-
|
|
140
|
-
| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
|
|
141
|
-
| claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
|
|
142
|
-
| claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
|
|
138
|
+
| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
|
|
139
|
+
| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
|
|
140
|
+
| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
141
|
+
| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | global |
|
|
143
142
|
|
|
144
143
|
### Llama Models
|
|
145
144
|
|
|
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
153
152
|
|
|
154
153
|
| Model | Context | Publisher | Price (in/out) | Region |
|
|
155
154
|
|-------|---------|-----------|----------------|--------|
|
|
156
|
-
| jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
|
|
157
|
-
| jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
|
|
158
155
|
| mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
|
|
159
156
|
| mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
|
|
160
157
|
| mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
|
|
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
|
|
|
183
180
|
|
|
184
181
|
Default regions by model:
|
|
185
182
|
- Gemini: `global`
|
|
186
|
-
- Claude
|
|
187
|
-
- Claude 4/4.1/3.7/3.5/3: `us-east5`
|
|
183
|
+
- Claude (all): `global`
|
|
188
184
|
- MaaS: `global`
|
|
189
185
|
|
|
190
186
|
Override with:
|
|
@@ -217,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
|
|
|
217
213
|
## Dependencies
|
|
218
214
|
|
|
219
215
|
- `@google/genai`: Google GenAI SDK for Gemini models
|
|
216
|
+
- `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
|
|
220
217
|
- `google-auth-library`: ADC authentication for all models
|
|
221
218
|
- `@mariozechner/pi-ai`: Peer dependency
|
|
222
219
|
- `@mariozechner/pi-coding-agent`: Peer dependency
|
package/TEST_COVERAGE.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Test Coverage
|
|
2
|
+
|
|
3
|
+
## Current Status
|
|
4
|
+
- Automated tests: not yet implemented in this package.
|
|
5
|
+
- Lint/type checks: `npm run check` (currently a no-op placeholder).
|
|
6
|
+
|
|
7
|
+
## Manual Verification
|
|
8
|
+
- Claude 4.6 streaming verified via Anthropic Vertex SDK.
|
|
9
|
+
- Mid-session model switching (tool call replay) verified interactively in pi.
|
|
10
|
+
|
|
11
|
+
## Gaps / Next Steps
|
|
12
|
+
- Add automated integration tests for Anthropic Vertex streaming and tool replay.
|
|
13
|
+
- Add unit tests for message normalization and replay sequencing.
|
package/index.ts
CHANGED
|
@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
112
112
|
|
|
113
113
|
// Show startup info as a widget that clears on first user input
|
|
114
114
|
const vertexStartupLines = [
|
|
115
|
-
`[pi-vertex] Initializing with project: ${projectId}`,
|
|
116
|
-
`[pi-vertex] Registered ${ALL_MODELS.length} models`,
|
|
115
|
+
` [pi-vertex] Initializing with project: ${projectId}`,
|
|
116
|
+
` [pi-vertex] Registered ${ALL_MODELS.length} models`,
|
|
117
117
|
];
|
|
118
118
|
pi.on("session_start", async (_event, ctx) => {
|
|
119
119
|
ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({
|
package/models/claude.ts
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Claude model definitions for Vertex AI
|
|
3
|
+
* Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
|
3
4
|
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
|
|
4
|
-
* All prices per 1M tokens (
|
|
5
|
-
* Cache write prices
|
|
5
|
+
* All prices per 1M tokens (global endpoint, <= 200K input tokens)
|
|
6
|
+
* Cache write prices are for 5-minute TTL
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import type { VertexModelConfig } from "../types.js";
|
|
9
10
|
|
|
10
11
|
export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
11
|
-
// Claude 4.6 series
|
|
12
|
+
// Claude 4.6 series
|
|
12
13
|
{
|
|
13
14
|
id: "claude-opus-4-6",
|
|
14
15
|
name: "Claude Opus 4.6",
|
|
15
16
|
apiId: "claude-opus-4-6",
|
|
16
17
|
publisher: "anthropic",
|
|
17
18
|
endpointType: "maas",
|
|
18
|
-
contextWindow:
|
|
19
|
+
contextWindow: 1000000,
|
|
19
20
|
maxTokens: 32000,
|
|
20
21
|
input: ["text", "image"],
|
|
21
22
|
reasoning: true,
|
|
@@ -34,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
34
35
|
apiId: "claude-sonnet-4-6",
|
|
35
36
|
publisher: "anthropic",
|
|
36
37
|
endpointType: "maas",
|
|
37
|
-
contextWindow:
|
|
38
|
+
contextWindow: 1000000,
|
|
38
39
|
maxTokens: 64000,
|
|
39
40
|
input: ["text", "image"],
|
|
40
41
|
reasoning: true,
|
|
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
47
48
|
},
|
|
48
49
|
region: "global",
|
|
49
50
|
},
|
|
50
|
-
|
|
51
|
+
|
|
52
|
+
// Claude 4.5 series
|
|
51
53
|
{
|
|
52
54
|
id: "claude-opus-4-5",
|
|
53
55
|
name: "Claude Opus 4.5",
|
|
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
105
107
|
},
|
|
106
108
|
region: "global",
|
|
107
109
|
},
|
|
108
|
-
|
|
110
|
+
|
|
111
|
+
// Claude 4.1 series
|
|
109
112
|
{
|
|
110
113
|
id: "claude-opus-4-1",
|
|
111
114
|
name: "Claude Opus 4.1",
|
|
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
123
126
|
cacheRead: 1.50,
|
|
124
127
|
cacheWrite: 18.75,
|
|
125
128
|
},
|
|
126
|
-
region: "
|
|
129
|
+
region: "global",
|
|
127
130
|
},
|
|
128
|
-
|
|
131
|
+
|
|
132
|
+
// Claude 4.0 series
|
|
129
133
|
{
|
|
130
134
|
id: "claude-opus-4",
|
|
131
135
|
name: "Claude Opus 4",
|
|
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
143
147
|
cacheRead: 1.50,
|
|
144
148
|
cacheWrite: 18.75,
|
|
145
149
|
},
|
|
146
|
-
region: "
|
|
150
|
+
region: "global",
|
|
147
151
|
},
|
|
148
152
|
{
|
|
149
153
|
id: "claude-sonnet-4",
|
|
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
162
166
|
cacheRead: 0.30,
|
|
163
167
|
cacheWrite: 3.75,
|
|
164
168
|
},
|
|
165
|
-
region: "
|
|
166
|
-
},
|
|
167
|
-
// Claude 3.7 series - regional pricing
|
|
168
|
-
{
|
|
169
|
-
id: "claude-3-7-sonnet",
|
|
170
|
-
name: "Claude 3.7 Sonnet",
|
|
171
|
-
apiId: "claude-3-7-sonnet@20250219",
|
|
172
|
-
publisher: "anthropic",
|
|
173
|
-
endpointType: "maas",
|
|
174
|
-
contextWindow: 200000,
|
|
175
|
-
maxTokens: 64000,
|
|
176
|
-
input: ["text", "image"],
|
|
177
|
-
reasoning: true,
|
|
178
|
-
tools: true,
|
|
179
|
-
cost: {
|
|
180
|
-
input: 3.0,
|
|
181
|
-
output: 15.0,
|
|
182
|
-
cacheRead: 0.3,
|
|
183
|
-
cacheWrite: 3.75,
|
|
184
|
-
},
|
|
185
|
-
region: "us-east5",
|
|
169
|
+
region: "global",
|
|
186
170
|
},
|
|
187
|
-
|
|
171
|
+
|
|
172
|
+
// Claude 3.5 series
|
|
188
173
|
{
|
|
189
174
|
id: "claude-3-5-sonnet-v2",
|
|
190
175
|
name: "Claude 3.5 Sonnet v2",
|
|
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
197
182
|
reasoning: false,
|
|
198
183
|
tools: true,
|
|
199
184
|
cost: {
|
|
200
|
-
input: 3.
|
|
201
|
-
output: 15.
|
|
202
|
-
cacheRead: 0.
|
|
203
|
-
cacheWrite: 3.75,
|
|
204
|
-
},
|
|
205
|
-
region: "us-east5",
|
|
206
|
-
},
|
|
207
|
-
{
|
|
208
|
-
id: "claude-3-5-sonnet",
|
|
209
|
-
name: "Claude 3.5 Sonnet",
|
|
210
|
-
apiId: "claude-3-5-sonnet@20240620",
|
|
211
|
-
publisher: "anthropic",
|
|
212
|
-
endpointType: "maas",
|
|
213
|
-
contextWindow: 200000,
|
|
214
|
-
maxTokens: 8192,
|
|
215
|
-
input: ["text", "image"],
|
|
216
|
-
reasoning: false,
|
|
217
|
-
tools: true,
|
|
218
|
-
cost: {
|
|
219
|
-
input: 3.0,
|
|
220
|
-
output: 15.0,
|
|
221
|
-
cacheRead: 0.3,
|
|
185
|
+
input: 3.00,
|
|
186
|
+
output: 15.00,
|
|
187
|
+
cacheRead: 0.30,
|
|
222
188
|
cacheWrite: 3.75,
|
|
223
189
|
},
|
|
224
|
-
region: "
|
|
225
|
-
},
|
|
226
|
-
// Claude 3 Haiku - regional pricing
|
|
227
|
-
{
|
|
228
|
-
id: "claude-3-haiku",
|
|
229
|
-
name: "Claude 3 Haiku",
|
|
230
|
-
apiId: "claude-3-haiku@20240307",
|
|
231
|
-
publisher: "anthropic",
|
|
232
|
-
endpointType: "maas",
|
|
233
|
-
contextWindow: 200000,
|
|
234
|
-
maxTokens: 4096,
|
|
235
|
-
input: ["text"],
|
|
236
|
-
reasoning: false,
|
|
237
|
-
tools: true,
|
|
238
|
-
cost: {
|
|
239
|
-
input: 0.25,
|
|
240
|
-
output: 1.25,
|
|
241
|
-
cacheRead: 0.03,
|
|
242
|
-
cacheWrite: 0.3,
|
|
243
|
-
},
|
|
244
|
-
region: "us-east5",
|
|
190
|
+
region: "global",
|
|
245
191
|
},
|
|
246
192
|
];
|
package/models/gemini.ts
CHANGED
|
@@ -1,77 +1,83 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Gemini model definitions for Vertex AI
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
|
|
4
|
+
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
|
|
5
|
+
* All prices per 1M tokens (standard tier, <= 200K input tokens)
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import type { VertexModelConfig } from "../types.js";
|
|
8
9
|
|
|
9
10
|
export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
11
|
+
// --- Gemini 3.1 (Preview) ---
|
|
10
12
|
{
|
|
11
13
|
id: "gemini-3.1-pro",
|
|
12
14
|
name: "Gemini 3.1 Pro",
|
|
13
15
|
apiId: "gemini-3.1-pro-preview",
|
|
14
16
|
publisher: "google",
|
|
15
17
|
endpointType: "gemini",
|
|
16
|
-
contextWindow:
|
|
17
|
-
maxTokens:
|
|
18
|
+
contextWindow: 1048576,
|
|
19
|
+
maxTokens: 65536,
|
|
18
20
|
input: ["text", "image"],
|
|
19
21
|
reasoning: true,
|
|
20
22
|
tools: true,
|
|
21
23
|
cost: {
|
|
22
24
|
input: 2.00,
|
|
23
25
|
output: 12.00,
|
|
24
|
-
cacheRead: 0,
|
|
26
|
+
cacheRead: 0.20,
|
|
25
27
|
cacheWrite: 0,
|
|
26
28
|
},
|
|
27
29
|
region: "global",
|
|
28
30
|
},
|
|
29
31
|
{
|
|
30
|
-
id: "gemini-3-
|
|
31
|
-
name: "Gemini 3
|
|
32
|
-
apiId: "gemini-3-
|
|
32
|
+
id: "gemini-3.1-flash-lite",
|
|
33
|
+
name: "Gemini 3.1 Flash Lite",
|
|
34
|
+
apiId: "gemini-3.1-flash-lite-preview",
|
|
33
35
|
publisher: "google",
|
|
34
36
|
endpointType: "gemini",
|
|
35
|
-
contextWindow:
|
|
36
|
-
maxTokens:
|
|
37
|
+
contextWindow: 1048576,
|
|
38
|
+
maxTokens: 65535,
|
|
37
39
|
input: ["text", "image"],
|
|
38
40
|
reasoning: true,
|
|
39
41
|
tools: true,
|
|
40
42
|
cost: {
|
|
41
|
-
input:
|
|
42
|
-
output:
|
|
43
|
-
cacheRead: 0.
|
|
43
|
+
input: 0.25,
|
|
44
|
+
output: 1.50,
|
|
45
|
+
cacheRead: 0.025,
|
|
44
46
|
cacheWrite: 0,
|
|
45
47
|
},
|
|
46
48
|
region: "global",
|
|
47
49
|
},
|
|
50
|
+
|
|
51
|
+
// --- Gemini 3 (Preview) ---
|
|
48
52
|
{
|
|
49
53
|
id: "gemini-3-flash",
|
|
50
54
|
name: "Gemini 3 Flash",
|
|
51
55
|
apiId: "gemini-3-flash-preview",
|
|
52
56
|
publisher: "google",
|
|
53
57
|
endpointType: "gemini",
|
|
54
|
-
contextWindow:
|
|
55
|
-
maxTokens:
|
|
58
|
+
contextWindow: 1048576,
|
|
59
|
+
maxTokens: 65536,
|
|
56
60
|
input: ["text", "image"],
|
|
57
61
|
reasoning: true,
|
|
58
62
|
tools: true,
|
|
59
63
|
cost: {
|
|
60
|
-
input: 0.
|
|
61
|
-
output:
|
|
62
|
-
cacheRead: 0.
|
|
64
|
+
input: 0.50,
|
|
65
|
+
output: 3.00,
|
|
66
|
+
cacheRead: 0.05,
|
|
63
67
|
cacheWrite: 0,
|
|
64
68
|
},
|
|
65
69
|
region: "global",
|
|
66
70
|
},
|
|
71
|
+
|
|
72
|
+
// --- Gemini 2.5 (GA) ---
|
|
67
73
|
{
|
|
68
74
|
id: "gemini-2.5-pro",
|
|
69
75
|
name: "Gemini 2.5 Pro",
|
|
70
76
|
apiId: "gemini-2.5-pro",
|
|
71
77
|
publisher: "google",
|
|
72
78
|
endpointType: "gemini",
|
|
73
|
-
contextWindow:
|
|
74
|
-
maxTokens:
|
|
79
|
+
contextWindow: 1048576,
|
|
80
|
+
maxTokens: 65536,
|
|
75
81
|
input: ["text", "image"],
|
|
76
82
|
reasoning: true,
|
|
77
83
|
tools: true,
|
|
@@ -89,15 +95,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
89
95
|
apiId: "gemini-2.5-flash",
|
|
90
96
|
publisher: "google",
|
|
91
97
|
endpointType: "gemini",
|
|
92
|
-
contextWindow:
|
|
93
|
-
maxTokens:
|
|
98
|
+
contextWindow: 1048576,
|
|
99
|
+
maxTokens: 65536,
|
|
94
100
|
input: ["text", "image"],
|
|
95
101
|
reasoning: true,
|
|
96
102
|
tools: true,
|
|
97
103
|
cost: {
|
|
98
104
|
input: 0.30,
|
|
99
105
|
output: 2.50,
|
|
100
|
-
cacheRead: 0.
|
|
106
|
+
cacheRead: 0.03,
|
|
101
107
|
cacheWrite: 0,
|
|
102
108
|
},
|
|
103
109
|
region: "global",
|
|
@@ -108,26 +114,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
108
114
|
apiId: "gemini-2.5-flash-lite",
|
|
109
115
|
publisher: "google",
|
|
110
116
|
endpointType: "gemini",
|
|
111
|
-
contextWindow:
|
|
112
|
-
maxTokens:
|
|
117
|
+
contextWindow: 1048576,
|
|
118
|
+
maxTokens: 65536,
|
|
113
119
|
input: ["text", "image"],
|
|
114
120
|
reasoning: true,
|
|
115
121
|
tools: true,
|
|
116
122
|
cost: {
|
|
117
123
|
input: 0.10,
|
|
118
124
|
output: 0.40,
|
|
119
|
-
cacheRead: 0.
|
|
125
|
+
cacheRead: 0.01,
|
|
120
126
|
cacheWrite: 0,
|
|
121
127
|
},
|
|
122
128
|
region: "global",
|
|
123
129
|
},
|
|
130
|
+
|
|
131
|
+
// --- Gemini 2.0 (GA) ---
|
|
124
132
|
{
|
|
125
133
|
id: "gemini-2.0-flash",
|
|
126
134
|
name: "Gemini 2.0 Flash",
|
|
127
135
|
apiId: "gemini-2.0-flash",
|
|
128
136
|
publisher: "google",
|
|
129
137
|
endpointType: "gemini",
|
|
130
|
-
contextWindow:
|
|
138
|
+
contextWindow: 1048576,
|
|
131
139
|
maxTokens: 8192,
|
|
132
140
|
input: ["text", "image"],
|
|
133
141
|
reasoning: false,
|
|
@@ -135,7 +143,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
135
143
|
cost: {
|
|
136
144
|
input: 0.15,
|
|
137
145
|
output: 0.60,
|
|
138
|
-
cacheRead: 0
|
|
146
|
+
cacheRead: 0,
|
|
139
147
|
cacheWrite: 0,
|
|
140
148
|
},
|
|
141
149
|
region: "global",
|
|
@@ -146,15 +154,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
146
154
|
apiId: "gemini-2.0-flash-lite",
|
|
147
155
|
publisher: "google",
|
|
148
156
|
endpointType: "gemini",
|
|
149
|
-
contextWindow:
|
|
157
|
+
contextWindow: 1048576,
|
|
150
158
|
maxTokens: 8192,
|
|
151
|
-
input: ["text"],
|
|
159
|
+
input: ["text", "image"],
|
|
152
160
|
reasoning: false,
|
|
153
161
|
tools: true,
|
|
154
162
|
cost: {
|
|
155
163
|
input: 0.075,
|
|
156
164
|
output: 0.30,
|
|
157
|
-
cacheRead: 0
|
|
165
|
+
cacheRead: 0,
|
|
158
166
|
cacheWrite: 0,
|
|
159
167
|
},
|
|
160
168
|
region: "global",
|
package/models/index.ts
CHANGED
|
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
|
|
|
11
11
|
...GEMINI_MODELS,
|
|
12
12
|
...CLAUDE_MODELS,
|
|
13
13
|
...MAAS_MODELS,
|
|
14
|
-
];
|
|
14
|
+
].sort((a, b) => a.id.localeCompare(b.id));
|
|
15
15
|
|
|
16
16
|
export function getModelById(id: string): VertexModelConfig | undefined {
|
|
17
17
|
return ALL_MODELS.find((m) => m.id === id);
|