@ssweens/pi-vertex 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -20
- package/models/claude.ts +19 -73
- package/models/gemini.ts +55 -28
- package/models/index.ts +1 -1
- package/models/maas.ts +39 -76
- package/package.json +1 -1
- package/streaming/gemini.ts +198 -89
- package/streaming/maas.ts +22 -9
- package/types.ts +24 -35
- package/utils.ts +163 -58
package/README.md
CHANGED
|
@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
|
|
|
22
22
|
- **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
|
|
23
23
|
|
|
24
24
|
- **Unified streaming**: Single provider, multiple model families
|
|
25
|
-
- **Full tool calling support**: All models
|
|
25
|
+
- **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
|
|
26
|
+
- **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
|
|
26
27
|
- **Automatic auth**: Uses Google Application Default Credentials
|
|
27
28
|
- **Region awareness**: Global endpoints where supported, regional where required
|
|
28
|
-
- **Pricing tracking**: Built-in cost per token for all models
|
|
29
|
+
- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
|
|
29
30
|
|
|
30
31
|
## Installation
|
|
31
32
|
|
|
@@ -115,14 +116,15 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
115
116
|
|
|
116
117
|
| Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
|
|
117
118
|
|-------|---------|------------|-------|-----------|----------------|
|
|
118
|
-
| gemini-3.1-pro | 1M |
|
|
119
|
-
| gemini-3-
|
|
120
|
-
| gemini-3-
|
|
121
|
-
| gemini-
|
|
122
|
-
| gemini-2.5-
|
|
123
|
-
| gemini-2.5-flash
|
|
119
|
+
| gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
|
|
120
|
+
| gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
|
|
121
|
+
| gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
|
|
122
|
+
| gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
|
|
123
|
+
| gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
|
|
124
|
+
| gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
|
|
125
|
+
| gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
|
|
124
126
|
| gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
|
|
125
|
-
| gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
|
|
127
|
+
| gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
|
|
126
128
|
|
|
127
129
|
### Claude Models
|
|
128
130
|
|
|
@@ -133,13 +135,10 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
133
135
|
| claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
|
|
134
136
|
| claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
135
137
|
| claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
|
|
136
|
-
| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 |
|
|
137
|
-
| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 |
|
|
138
|
-
| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 |
|
|
139
|
-
| claude-3-
|
|
140
|
-
| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
|
|
141
|
-
| claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
|
|
142
|
-
| claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
|
|
138
|
+
| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
|
|
139
|
+
| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
|
|
140
|
+
| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
141
|
+
| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | global |
|
|
143
142
|
|
|
144
143
|
### Llama Models
|
|
145
144
|
|
|
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
153
152
|
|
|
154
153
|
| Model | Context | Publisher | Price (in/out) | Region |
|
|
155
154
|
|-------|---------|-----------|----------------|--------|
|
|
156
|
-
| jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
|
|
157
|
-
| jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
|
|
158
155
|
| mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
|
|
159
156
|
| mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
|
|
160
157
|
| mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
|
|
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
|
|
|
183
180
|
|
|
184
181
|
Default regions by model:
|
|
185
182
|
- Gemini: `global`
|
|
186
|
-
- Claude
|
|
187
|
-
- Claude 4/4.1/3.7/3.5/3: `us-east5`
|
|
183
|
+
- Claude (all): `global`
|
|
188
184
|
- MaaS: `global`
|
|
189
185
|
|
|
190
186
|
Override with:
|
package/models/claude.ts
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Claude model definitions for Vertex AI
|
|
3
|
+
* Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
|
3
4
|
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
|
|
4
|
-
* All prices per 1M tokens (
|
|
5
|
-
* Cache write prices
|
|
5
|
+
* All prices per 1M tokens (global endpoint, <= 200K input tokens)
|
|
6
|
+
* Cache write prices are for 5-minute TTL
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import type { VertexModelConfig } from "../types.js";
|
|
9
10
|
|
|
10
11
|
export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
11
|
-
// Claude 4.6 series
|
|
12
|
+
// Claude 4.6 series
|
|
12
13
|
{
|
|
13
14
|
id: "claude-opus-4-6",
|
|
14
15
|
name: "Claude Opus 4.6",
|
|
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
47
48
|
},
|
|
48
49
|
region: "global",
|
|
49
50
|
},
|
|
50
|
-
|
|
51
|
+
|
|
52
|
+
// Claude 4.5 series
|
|
51
53
|
{
|
|
52
54
|
id: "claude-opus-4-5",
|
|
53
55
|
name: "Claude Opus 4.5",
|
|
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
105
107
|
},
|
|
106
108
|
region: "global",
|
|
107
109
|
},
|
|
108
|
-
|
|
110
|
+
|
|
111
|
+
// Claude 4.1 series
|
|
109
112
|
{
|
|
110
113
|
id: "claude-opus-4-1",
|
|
111
114
|
name: "Claude Opus 4.1",
|
|
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
123
126
|
cacheRead: 1.50,
|
|
124
127
|
cacheWrite: 18.75,
|
|
125
128
|
},
|
|
126
|
-
region: "
|
|
129
|
+
region: "global",
|
|
127
130
|
},
|
|
128
|
-
|
|
131
|
+
|
|
132
|
+
// Claude 4.0 series
|
|
129
133
|
{
|
|
130
134
|
id: "claude-opus-4",
|
|
131
135
|
name: "Claude Opus 4",
|
|
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
143
147
|
cacheRead: 1.50,
|
|
144
148
|
cacheWrite: 18.75,
|
|
145
149
|
},
|
|
146
|
-
region: "
|
|
150
|
+
region: "global",
|
|
147
151
|
},
|
|
148
152
|
{
|
|
149
153
|
id: "claude-sonnet-4",
|
|
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
162
166
|
cacheRead: 0.30,
|
|
163
167
|
cacheWrite: 3.75,
|
|
164
168
|
},
|
|
165
|
-
region: "
|
|
166
|
-
},
|
|
167
|
-
// Claude 3.7 series - regional pricing
|
|
168
|
-
{
|
|
169
|
-
id: "claude-3-7-sonnet",
|
|
170
|
-
name: "Claude 3.7 Sonnet",
|
|
171
|
-
apiId: "claude-3-7-sonnet@20250219",
|
|
172
|
-
publisher: "anthropic",
|
|
173
|
-
endpointType: "maas",
|
|
174
|
-
contextWindow: 200000,
|
|
175
|
-
maxTokens: 64000,
|
|
176
|
-
input: ["text", "image"],
|
|
177
|
-
reasoning: true,
|
|
178
|
-
tools: true,
|
|
179
|
-
cost: {
|
|
180
|
-
input: 3.0,
|
|
181
|
-
output: 15.0,
|
|
182
|
-
cacheRead: 0.3,
|
|
183
|
-
cacheWrite: 3.75,
|
|
184
|
-
},
|
|
185
|
-
region: "us-east5",
|
|
169
|
+
region: "global",
|
|
186
170
|
},
|
|
187
|
-
|
|
171
|
+
|
|
172
|
+
// Claude 3.5 series
|
|
188
173
|
{
|
|
189
174
|
id: "claude-3-5-sonnet-v2",
|
|
190
175
|
name: "Claude 3.5 Sonnet v2",
|
|
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
197
182
|
reasoning: false,
|
|
198
183
|
tools: true,
|
|
199
184
|
cost: {
|
|
200
|
-
input: 3.
|
|
201
|
-
output: 15.
|
|
202
|
-
cacheRead: 0.
|
|
203
|
-
cacheWrite: 3.75,
|
|
204
|
-
},
|
|
205
|
-
region: "us-east5",
|
|
206
|
-
},
|
|
207
|
-
{
|
|
208
|
-
id: "claude-3-5-sonnet",
|
|
209
|
-
name: "Claude 3.5 Sonnet",
|
|
210
|
-
apiId: "claude-3-5-sonnet@20240620",
|
|
211
|
-
publisher: "anthropic",
|
|
212
|
-
endpointType: "maas",
|
|
213
|
-
contextWindow: 200000,
|
|
214
|
-
maxTokens: 8192,
|
|
215
|
-
input: ["text", "image"],
|
|
216
|
-
reasoning: false,
|
|
217
|
-
tools: true,
|
|
218
|
-
cost: {
|
|
219
|
-
input: 3.0,
|
|
220
|
-
output: 15.0,
|
|
221
|
-
cacheRead: 0.3,
|
|
185
|
+
input: 3.00,
|
|
186
|
+
output: 15.00,
|
|
187
|
+
cacheRead: 0.30,
|
|
222
188
|
cacheWrite: 3.75,
|
|
223
189
|
},
|
|
224
|
-
region: "
|
|
225
|
-
},
|
|
226
|
-
// Claude 3 Haiku - regional pricing
|
|
227
|
-
{
|
|
228
|
-
id: "claude-3-haiku",
|
|
229
|
-
name: "Claude 3 Haiku",
|
|
230
|
-
apiId: "claude-3-haiku@20240307",
|
|
231
|
-
publisher: "anthropic",
|
|
232
|
-
endpointType: "maas",
|
|
233
|
-
contextWindow: 200000,
|
|
234
|
-
maxTokens: 4096,
|
|
235
|
-
input: ["text"],
|
|
236
|
-
reasoning: false,
|
|
237
|
-
tools: true,
|
|
238
|
-
cost: {
|
|
239
|
-
input: 0.25,
|
|
240
|
-
output: 1.25,
|
|
241
|
-
cacheRead: 0.03,
|
|
242
|
-
cacheWrite: 0.3,
|
|
243
|
-
},
|
|
244
|
-
region: "us-east5",
|
|
190
|
+
region: "global",
|
|
245
191
|
},
|
|
246
192
|
];
|
package/models/gemini.ts
CHANGED
|
@@ -1,46 +1,69 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Gemini model definitions for Vertex AI
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
|
|
4
|
+
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
|
|
5
|
+
* All prices per 1M tokens (standard tier, <= 200K input tokens)
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import type { VertexModelConfig } from "../types.js";
|
|
8
9
|
|
|
9
10
|
export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
11
|
+
// --- Gemini 3.1 (Preview) ---
|
|
10
12
|
{
|
|
11
13
|
id: "gemini-3.1-pro",
|
|
12
14
|
name: "Gemini 3.1 Pro",
|
|
13
15
|
apiId: "gemini-3.1-pro-preview",
|
|
14
16
|
publisher: "google",
|
|
15
17
|
endpointType: "gemini",
|
|
16
|
-
contextWindow:
|
|
17
|
-
maxTokens:
|
|
18
|
+
contextWindow: 1048576,
|
|
19
|
+
maxTokens: 65536,
|
|
18
20
|
input: ["text", "image"],
|
|
19
21
|
reasoning: true,
|
|
20
22
|
tools: true,
|
|
21
23
|
cost: {
|
|
22
24
|
input: 2.00,
|
|
23
25
|
output: 12.00,
|
|
24
|
-
cacheRead: 0,
|
|
26
|
+
cacheRead: 0.20,
|
|
25
27
|
cacheWrite: 0,
|
|
26
28
|
},
|
|
27
29
|
region: "global",
|
|
28
30
|
},
|
|
31
|
+
{
|
|
32
|
+
id: "gemini-3.1-flash-lite",
|
|
33
|
+
name: "Gemini 3.1 Flash Lite",
|
|
34
|
+
apiId: "gemini-3.1-flash-lite-preview",
|
|
35
|
+
publisher: "google",
|
|
36
|
+
endpointType: "gemini",
|
|
37
|
+
contextWindow: 1048576,
|
|
38
|
+
maxTokens: 65535,
|
|
39
|
+
input: ["text", "image"],
|
|
40
|
+
reasoning: true,
|
|
41
|
+
tools: true,
|
|
42
|
+
cost: {
|
|
43
|
+
input: 0.25,
|
|
44
|
+
output: 1.50,
|
|
45
|
+
cacheRead: 0.025,
|
|
46
|
+
cacheWrite: 0,
|
|
47
|
+
},
|
|
48
|
+
region: "global",
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
// --- Gemini 3 (Preview) ---
|
|
29
52
|
{
|
|
30
53
|
id: "gemini-3-pro",
|
|
31
54
|
name: "Gemini 3 Pro",
|
|
32
55
|
apiId: "gemini-3-pro-preview",
|
|
33
56
|
publisher: "google",
|
|
34
57
|
endpointType: "gemini",
|
|
35
|
-
contextWindow:
|
|
36
|
-
maxTokens:
|
|
58
|
+
contextWindow: 1048576,
|
|
59
|
+
maxTokens: 65536,
|
|
37
60
|
input: ["text", "image"],
|
|
38
61
|
reasoning: true,
|
|
39
62
|
tools: true,
|
|
40
63
|
cost: {
|
|
41
|
-
input:
|
|
42
|
-
output:
|
|
43
|
-
cacheRead: 0.
|
|
64
|
+
input: 2.00,
|
|
65
|
+
output: 12.00,
|
|
66
|
+
cacheRead: 0.20,
|
|
44
67
|
cacheWrite: 0,
|
|
45
68
|
},
|
|
46
69
|
region: "global",
|
|
@@ -51,27 +74,29 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
51
74
|
apiId: "gemini-3-flash-preview",
|
|
52
75
|
publisher: "google",
|
|
53
76
|
endpointType: "gemini",
|
|
54
|
-
contextWindow:
|
|
55
|
-
maxTokens:
|
|
77
|
+
contextWindow: 1048576,
|
|
78
|
+
maxTokens: 65536,
|
|
56
79
|
input: ["text", "image"],
|
|
57
80
|
reasoning: true,
|
|
58
81
|
tools: true,
|
|
59
82
|
cost: {
|
|
60
|
-
input: 0.
|
|
61
|
-
output:
|
|
62
|
-
cacheRead: 0.
|
|
83
|
+
input: 0.50,
|
|
84
|
+
output: 3.00,
|
|
85
|
+
cacheRead: 0.05,
|
|
63
86
|
cacheWrite: 0,
|
|
64
87
|
},
|
|
65
88
|
region: "global",
|
|
66
89
|
},
|
|
90
|
+
|
|
91
|
+
// --- Gemini 2.5 (GA) ---
|
|
67
92
|
{
|
|
68
93
|
id: "gemini-2.5-pro",
|
|
69
94
|
name: "Gemini 2.5 Pro",
|
|
70
95
|
apiId: "gemini-2.5-pro",
|
|
71
96
|
publisher: "google",
|
|
72
97
|
endpointType: "gemini",
|
|
73
|
-
contextWindow:
|
|
74
|
-
maxTokens:
|
|
98
|
+
contextWindow: 1048576,
|
|
99
|
+
maxTokens: 65536,
|
|
75
100
|
input: ["text", "image"],
|
|
76
101
|
reasoning: true,
|
|
77
102
|
tools: true,
|
|
@@ -89,15 +114,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
89
114
|
apiId: "gemini-2.5-flash",
|
|
90
115
|
publisher: "google",
|
|
91
116
|
endpointType: "gemini",
|
|
92
|
-
contextWindow:
|
|
93
|
-
maxTokens:
|
|
117
|
+
contextWindow: 1048576,
|
|
118
|
+
maxTokens: 65536,
|
|
94
119
|
input: ["text", "image"],
|
|
95
120
|
reasoning: true,
|
|
96
121
|
tools: true,
|
|
97
122
|
cost: {
|
|
98
123
|
input: 0.30,
|
|
99
124
|
output: 2.50,
|
|
100
|
-
cacheRead: 0.
|
|
125
|
+
cacheRead: 0.03,
|
|
101
126
|
cacheWrite: 0,
|
|
102
127
|
},
|
|
103
128
|
region: "global",
|
|
@@ -108,26 +133,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
108
133
|
apiId: "gemini-2.5-flash-lite",
|
|
109
134
|
publisher: "google",
|
|
110
135
|
endpointType: "gemini",
|
|
111
|
-
contextWindow:
|
|
112
|
-
maxTokens:
|
|
136
|
+
contextWindow: 1048576,
|
|
137
|
+
maxTokens: 65536,
|
|
113
138
|
input: ["text", "image"],
|
|
114
139
|
reasoning: true,
|
|
115
140
|
tools: true,
|
|
116
141
|
cost: {
|
|
117
142
|
input: 0.10,
|
|
118
143
|
output: 0.40,
|
|
119
|
-
cacheRead: 0.
|
|
144
|
+
cacheRead: 0.01,
|
|
120
145
|
cacheWrite: 0,
|
|
121
146
|
},
|
|
122
147
|
region: "global",
|
|
123
148
|
},
|
|
149
|
+
|
|
150
|
+
// --- Gemini 2.0 (GA) ---
|
|
124
151
|
{
|
|
125
152
|
id: "gemini-2.0-flash",
|
|
126
153
|
name: "Gemini 2.0 Flash",
|
|
127
154
|
apiId: "gemini-2.0-flash",
|
|
128
155
|
publisher: "google",
|
|
129
156
|
endpointType: "gemini",
|
|
130
|
-
contextWindow:
|
|
157
|
+
contextWindow: 1048576,
|
|
131
158
|
maxTokens: 8192,
|
|
132
159
|
input: ["text", "image"],
|
|
133
160
|
reasoning: false,
|
|
@@ -135,7 +162,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
135
162
|
cost: {
|
|
136
163
|
input: 0.15,
|
|
137
164
|
output: 0.60,
|
|
138
|
-
cacheRead: 0
|
|
165
|
+
cacheRead: 0,
|
|
139
166
|
cacheWrite: 0,
|
|
140
167
|
},
|
|
141
168
|
region: "global",
|
|
@@ -146,15 +173,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
146
173
|
apiId: "gemini-2.0-flash-lite",
|
|
147
174
|
publisher: "google",
|
|
148
175
|
endpointType: "gemini",
|
|
149
|
-
contextWindow:
|
|
176
|
+
contextWindow: 1048576,
|
|
150
177
|
maxTokens: 8192,
|
|
151
|
-
input: ["text"],
|
|
178
|
+
input: ["text", "image"],
|
|
152
179
|
reasoning: false,
|
|
153
180
|
tools: true,
|
|
154
181
|
cost: {
|
|
155
182
|
input: 0.075,
|
|
156
183
|
output: 0.30,
|
|
157
|
-
cacheRead: 0
|
|
184
|
+
cacheRead: 0,
|
|
158
185
|
cacheWrite: 0,
|
|
159
186
|
},
|
|
160
187
|
region: "global",
|
package/models/index.ts
CHANGED
|
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
|
|
|
11
11
|
...GEMINI_MODELS,
|
|
12
12
|
...CLAUDE_MODELS,
|
|
13
13
|
...MAAS_MODELS,
|
|
14
|
-
];
|
|
14
|
+
].sort((a, b) => a.name.localeCompare(b.name));
|
|
15
15
|
|
|
16
16
|
export function getModelById(id: string): VertexModelConfig | undefined {
|
|
17
17
|
return ALL_MODELS.find((m) => m.id === id);
|
package/models/maas.ts
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MaaS (Model-as-a-Service) open model definitions for Vertex AI
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
|
4
|
+
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
|
|
5
|
+
* All prices per 1M tokens
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import type { VertexModelConfig } from "../types.js";
|
|
8
9
|
|
|
9
10
|
export const MAAS_MODELS: VertexModelConfig[] = [
|
|
10
|
-
// Llama
|
|
11
|
+
// --- Meta Llama ---
|
|
11
12
|
{
|
|
12
13
|
id: "llama-4-maverick",
|
|
13
14
|
name: "Llama 4 Maverick",
|
|
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
66
67
|
region: "global",
|
|
67
68
|
},
|
|
68
69
|
|
|
69
|
-
// Mistral
|
|
70
|
+
// --- Mistral AI ---
|
|
70
71
|
{
|
|
71
72
|
id: "mistral-medium-3",
|
|
72
73
|
name: "Mistral Medium 3",
|
|
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
106
107
|
region: "global",
|
|
107
108
|
},
|
|
108
109
|
{
|
|
109
|
-
id: "
|
|
110
|
-
name: "
|
|
111
|
-
apiId: "mistralai/
|
|
110
|
+
id: "codestral-2",
|
|
111
|
+
name: "Codestral 2",
|
|
112
|
+
apiId: "mistralai/codestral-2",
|
|
112
113
|
publisher: "mistralai",
|
|
113
114
|
endpointType: "maas",
|
|
114
|
-
contextWindow:
|
|
115
|
+
contextWindow: 256000,
|
|
115
116
|
maxTokens: 32000,
|
|
116
|
-
input: ["text"
|
|
117
|
+
input: ["text"],
|
|
117
118
|
reasoning: false,
|
|
118
|
-
tools:
|
|
119
|
+
tools: true,
|
|
119
120
|
cost: {
|
|
120
|
-
input: 0.
|
|
121
|
-
output: 0.
|
|
121
|
+
input: 0.30,
|
|
122
|
+
output: 0.90,
|
|
122
123
|
cacheRead: 0,
|
|
123
124
|
cacheWrite: 0,
|
|
124
125
|
},
|
|
125
126
|
region: "global",
|
|
126
127
|
},
|
|
127
128
|
{
|
|
128
|
-
id: "
|
|
129
|
-
name: "
|
|
130
|
-
apiId: "mistralai/
|
|
129
|
+
id: "mistral-ocr",
|
|
130
|
+
name: "Mistral OCR",
|
|
131
|
+
apiId: "mistralai/mistral-ocr-2505",
|
|
131
132
|
publisher: "mistralai",
|
|
132
133
|
endpointType: "maas",
|
|
133
|
-
contextWindow:
|
|
134
|
+
contextWindow: 128000,
|
|
134
135
|
maxTokens: 32000,
|
|
135
|
-
input: ["text"],
|
|
136
|
+
input: ["text", "image"],
|
|
136
137
|
reasoning: false,
|
|
137
|
-
tools:
|
|
138
|
+
tools: false,
|
|
138
139
|
cost: {
|
|
139
|
-
input: 0.
|
|
140
|
-
output: 0.
|
|
140
|
+
input: 0.0005,
|
|
141
|
+
output: 0.0005,
|
|
141
142
|
cacheRead: 0,
|
|
142
143
|
cacheWrite: 0,
|
|
143
144
|
},
|
|
144
145
|
region: "global",
|
|
145
146
|
},
|
|
146
147
|
|
|
147
|
-
// DeepSeek
|
|
148
|
+
// --- DeepSeek ---
|
|
148
149
|
{
|
|
149
150
|
id: "deepseek-v3.2",
|
|
150
151
|
name: "DeepSeek V3.2",
|
|
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
202
203
|
},
|
|
203
204
|
region: "global",
|
|
204
205
|
},
|
|
205
|
-
|
|
206
|
-
// AI21 Labs models
|
|
207
|
-
{
|
|
208
|
-
id: "jamba-1.5-large",
|
|
209
|
-
name: "Jamba 1.5 Large",
|
|
210
|
-
apiId: "ai21/jamba-1.5-large",
|
|
211
|
-
publisher: "ai21",
|
|
212
|
-
endpointType: "maas",
|
|
213
|
-
contextWindow: 256000,
|
|
214
|
-
maxTokens: 256000,
|
|
215
|
-
input: ["text"],
|
|
216
|
-
reasoning: false,
|
|
217
|
-
tools: true,
|
|
218
|
-
cost: {
|
|
219
|
-
input: 2.00,
|
|
220
|
-
output: 8.00,
|
|
221
|
-
cacheRead: 0,
|
|
222
|
-
cacheWrite: 0,
|
|
223
|
-
},
|
|
224
|
-
region: "global",
|
|
225
|
-
},
|
|
226
206
|
{
|
|
227
|
-
id: "
|
|
228
|
-
name: "
|
|
229
|
-
apiId: "
|
|
230
|
-
publisher: "
|
|
207
|
+
id: "deepseek-ocr",
|
|
208
|
+
name: "DeepSeek OCR",
|
|
209
|
+
apiId: "deepseek-ai/deepseek-ocr-maas",
|
|
210
|
+
publisher: "deepseek-ai",
|
|
231
211
|
endpointType: "maas",
|
|
232
|
-
contextWindow:
|
|
233
|
-
maxTokens:
|
|
234
|
-
input: ["text"],
|
|
212
|
+
contextWindow: 163840,
|
|
213
|
+
maxTokens: 32000,
|
|
214
|
+
input: ["text", "image"],
|
|
235
215
|
reasoning: false,
|
|
236
|
-
tools:
|
|
216
|
+
tools: false,
|
|
237
217
|
cost: {
|
|
238
|
-
input: 0.
|
|
239
|
-
output:
|
|
218
|
+
input: 0.30,
|
|
219
|
+
output: 1.20,
|
|
240
220
|
cacheRead: 0,
|
|
241
221
|
cacheWrite: 0,
|
|
242
222
|
},
|
|
243
223
|
region: "global",
|
|
244
224
|
},
|
|
245
225
|
|
|
246
|
-
// OpenAI
|
|
226
|
+
// --- OpenAI (gpt-oss) ---
|
|
247
227
|
{
|
|
248
228
|
id: "gpt-oss-120b",
|
|
249
229
|
name: "GPT-OSS 120B",
|
|
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
283
263
|
region: "global",
|
|
284
264
|
},
|
|
285
265
|
|
|
286
|
-
//
|
|
287
|
-
{
|
|
288
|
-
id: "deepseek-ocr",
|
|
289
|
-
name: "DeepSeek OCR",
|
|
290
|
-
apiId: "deepseek-ai/deepseek-ocr-maas",
|
|
291
|
-
publisher: "deepseek-ai",
|
|
292
|
-
endpointType: "maas",
|
|
293
|
-
contextWindow: 163840,
|
|
294
|
-
maxTokens: 32000,
|
|
295
|
-
input: ["text", "image"],
|
|
296
|
-
reasoning: false,
|
|
297
|
-
tools: false,
|
|
298
|
-
cost: {
|
|
299
|
-
input: 0.30, // Per page: $0.0003/page
|
|
300
|
-
output: 1.20, // Per page pricing
|
|
301
|
-
cacheRead: 0,
|
|
302
|
-
cacheWrite: 0,
|
|
303
|
-
},
|
|
304
|
-
region: "global",
|
|
305
|
-
},
|
|
306
|
-
|
|
307
|
-
// Qwen models
|
|
266
|
+
// --- Qwen ---
|
|
308
267
|
{
|
|
309
268
|
id: "qwen3-235b",
|
|
310
269
|
name: "Qwen 3 235B",
|
|
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
382
341
|
region: "global",
|
|
383
342
|
},
|
|
384
343
|
|
|
385
|
-
//
|
|
344
|
+
// --- Moonshot ---
|
|
386
345
|
{
|
|
387
346
|
id: "kimi-k2-thinking",
|
|
388
347
|
name: "Kimi K2 Thinking",
|
|
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
402
361
|
},
|
|
403
362
|
region: "global",
|
|
404
363
|
},
|
|
364
|
+
|
|
365
|
+
// --- MiniMax ---
|
|
405
366
|
{
|
|
406
367
|
id: "minimax-m2",
|
|
407
368
|
name: "MiniMax M2",
|
|
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
421
382
|
},
|
|
422
383
|
region: "global",
|
|
423
384
|
},
|
|
385
|
+
|
|
386
|
+
// --- GLM (Zhipu AI) ---
|
|
424
387
|
{
|
|
425
388
|
id: "glm-5",
|
|
426
389
|
name: "GLM 5",
|