@ssweens/pi-vertex 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,10 +22,11 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
22
22
  - **Other MaaS** (20): AI21 Jamba, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
23
23
 
24
24
  - **Unified streaming**: Single provider, multiple model families
25
- - **Full tool calling support**: All models marked with tools support
25
+ - **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
26
+ - **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
26
27
  - **Automatic auth**: Uses Google Application Default Credentials
27
28
  - **Region awareness**: Global endpoints where supported, regional where required
28
- - **Pricing tracking**: Built-in cost per token for all models
29
+ - **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
29
30
 
30
31
  ## Installation
31
32
 
@@ -115,14 +116,15 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
115
116
 
116
117
  | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) |
117
118
  |-------|---------|------------|-------|-----------|----------------|
118
- | gemini-3.1-pro | 1M | 64,000 | text, image | ✅ | $2.00/$12.00 |
119
- | gemini-3-pro | 2M | 8,192 | text, image | ✅ | $1.25/$10.00 |
120
- | gemini-3-flash | 1M | 8,192 | text, image | ✅ | $0.15/$0.60 |
121
- | gemini-2.5-pro | 1M | 64,000 | text, image | ✅ | $1.25/$10.00 |
122
- | gemini-2.5-flash | 1M | 64,000 | text, image | ✅ | $0.30/$2.50 |
123
- | gemini-2.5-flash-lite | 1M | 64,000 | text, image | ✅ | $0.10/$0.40 |
119
+ | gemini-3.1-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
120
+ | gemini-3.1-flash-lite | 1M | 65,535 | text, image | ✅ | $0.25/$1.50 |
121
+ | gemini-3-pro | 1M | 65,536 | text, image | ✅ | $2.00/$12.00 |
122
+ | gemini-3-flash | 1M | 65,536 | text, image | ✅ | $0.50/$3.00 |
123
+ | gemini-2.5-pro | 1M | 65,536 | text, image | ✅ | $1.25/$10.00 |
124
+ | gemini-2.5-flash | 1M | 65,536 | text, image | ✅ | $0.30/$2.50 |
125
+ | gemini-2.5-flash-lite | 1M | 65,536 | text, image | ✅ | $0.10/$0.40 |
124
126
  | gemini-2.0-flash | 1M | 8,192 | text, image | ❌ | $0.15/$0.60 |
125
- | gemini-2.0-flash-lite | 1M | 8,192 | text | ❌ | $0.075/$0.30 |
127
+ | gemini-2.0-flash-lite | 1M | 8,192 | text, image | ❌ | $0.075/$0.30 |
126
128
 
127
129
  ### Claude Models
128
130
 
@@ -133,13 +135,10 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
133
135
  | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
134
136
  | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
135
137
  | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
136
- | claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
137
- | claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | us-east5 |
138
- | claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | us-east5 |
139
- | claude-3-7-sonnet | 200K | 64,000 | text, image | | $3.00/$15.00 | us-east5 |
140
- | claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
141
- | claude-3-5-sonnet | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | us-east5 |
142
- | claude-3-haiku | 200K | 4,096 | text | ❌ | $0.25/$1.25 | us-east5 |
138
+ | claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
139
+ | claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | global |
140
+ | claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
141
+ | claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | | $3.00/$15.00 | global |
143
142
 
144
143
  ### Llama Models
145
144
 
@@ -153,8 +152,6 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
153
152
 
154
153
  | Model | Context | Publisher | Price (in/out) | Region |
155
154
  |-------|---------|-----------|----------------|--------|
156
- | jamba-1.5-large | 256K | ai21 | $2.00/$8.00 | global |
157
- | jamba-1.5-mini | 256K | ai21 | $0.20/$0.40 | global |
158
155
  | mistral-medium-3 | 128K | mistralai | $0.40/$2.00 | global |
159
156
  | mistral-small-3.1 | 128K | mistralai | $0.10/$0.30 | global |
160
157
  | mistral-ocr | 30 pages | mistralai | $0.0005/page | global |
@@ -183,8 +180,7 @@ Models use different endpoints based on availability:
183
180
 
184
181
  Default regions by model:
185
182
  - Gemini: `global`
186
- - Claude 4.6/4.5: `global`
187
- - Claude 4/4.1/3.7/3.5/3: `us-east5`
183
+ - Claude (all): `global`
188
184
  - MaaS: `global`
189
185
 
190
186
  Override with:
package/models/claude.ts CHANGED
@@ -1,14 +1,15 @@
1
1
  /**
2
2
  * Claude model definitions for Vertex AI
3
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
3
4
  * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
4
- * All prices per 1M tokens (as of Feb 2025)
5
- * Cache write prices shown are for 5-minute TTL
5
+ * All prices per 1M tokens (global endpoint, <= 200K input tokens)
6
+ * Cache write prices are for 5-minute TTL
6
7
  */
7
8
 
8
9
  import type { VertexModelConfig } from "../types.js";
9
10
 
10
11
  export const CLAUDE_MODELS: VertexModelConfig[] = [
11
- // Claude 4.6 series - latest, supports global endpoint
12
+ // Claude 4.6 series
12
13
  {
13
14
  id: "claude-opus-4-6",
14
15
  name: "Claude Opus 4.6",
@@ -47,7 +48,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
47
48
  },
48
49
  region: "global",
49
50
  },
50
- // Claude 4.5 series - supports global endpoint
51
+
52
+ // Claude 4.5 series
51
53
  {
52
54
  id: "claude-opus-4-5",
53
55
  name: "Claude Opus 4.5",
@@ -105,7 +107,8 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
105
107
  },
106
108
  region: "global",
107
109
  },
108
- // Claude 4.1 series - regional pricing
110
+
111
+ // Claude 4.1 series
109
112
  {
110
113
  id: "claude-opus-4-1",
111
114
  name: "Claude Opus 4.1",
@@ -123,9 +126,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
123
126
  cacheRead: 1.50,
124
127
  cacheWrite: 18.75,
125
128
  },
126
- region: "us-east5",
129
+ region: "global",
127
130
  },
128
- // Claude 4.0 series - regional pricing
131
+
132
+ // Claude 4.0 series
129
133
  {
130
134
  id: "claude-opus-4",
131
135
  name: "Claude Opus 4",
@@ -143,7 +147,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
143
147
  cacheRead: 1.50,
144
148
  cacheWrite: 18.75,
145
149
  },
146
- region: "us-east5",
150
+ region: "global",
147
151
  },
148
152
  {
149
153
  id: "claude-sonnet-4",
@@ -162,29 +166,10 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
162
166
  cacheRead: 0.30,
163
167
  cacheWrite: 3.75,
164
168
  },
165
- region: "us-east5",
166
- },
167
- // Claude 3.7 series - regional pricing
168
- {
169
- id: "claude-3-7-sonnet",
170
- name: "Claude 3.7 Sonnet",
171
- apiId: "claude-3-7-sonnet@20250219",
172
- publisher: "anthropic",
173
- endpointType: "maas",
174
- contextWindow: 200000,
175
- maxTokens: 64000,
176
- input: ["text", "image"],
177
- reasoning: true,
178
- tools: true,
179
- cost: {
180
- input: 3.0,
181
- output: 15.0,
182
- cacheRead: 0.3,
183
- cacheWrite: 3.75,
184
- },
185
- region: "us-east5",
169
+ region: "global",
186
170
  },
187
- // Claude 3.5 series - regional pricing
171
+
172
+ // Claude 3.5 series
188
173
  {
189
174
  id: "claude-3-5-sonnet-v2",
190
175
  name: "Claude 3.5 Sonnet v2",
@@ -197,50 +182,11 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
197
182
  reasoning: false,
198
183
  tools: true,
199
184
  cost: {
200
- input: 3.0,
201
- output: 15.0,
202
- cacheRead: 0.3,
203
- cacheWrite: 3.75,
204
- },
205
- region: "us-east5",
206
- },
207
- {
208
- id: "claude-3-5-sonnet",
209
- name: "Claude 3.5 Sonnet",
210
- apiId: "claude-3-5-sonnet@20240620",
211
- publisher: "anthropic",
212
- endpointType: "maas",
213
- contextWindow: 200000,
214
- maxTokens: 8192,
215
- input: ["text", "image"],
216
- reasoning: false,
217
- tools: true,
218
- cost: {
219
- input: 3.0,
220
- output: 15.0,
221
- cacheRead: 0.3,
185
+ input: 3.00,
186
+ output: 15.00,
187
+ cacheRead: 0.30,
222
188
  cacheWrite: 3.75,
223
189
  },
224
- region: "us-east5",
225
- },
226
- // Claude 3 Haiku - regional pricing
227
- {
228
- id: "claude-3-haiku",
229
- name: "Claude 3 Haiku",
230
- apiId: "claude-3-haiku@20240307",
231
- publisher: "anthropic",
232
- endpointType: "maas",
233
- contextWindow: 200000,
234
- maxTokens: 4096,
235
- input: ["text"],
236
- reasoning: false,
237
- tools: true,
238
- cost: {
239
- input: 0.25,
240
- output: 1.25,
241
- cacheRead: 0.03,
242
- cacheWrite: 0.3,
243
- },
244
- region: "us-east5",
190
+ region: "global",
245
191
  },
246
192
  ];
package/models/gemini.ts CHANGED
@@ -1,46 +1,69 @@
1
1
  /**
2
2
  * Gemini model definitions for Vertex AI
3
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
4
- * All prices per 1M tokens (Standard tier pricing, as of Feb 2026)
3
+ * Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models
4
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing
5
+ * All prices per 1M tokens (standard tier, <= 200K input tokens)
5
6
  */
6
7
 
7
8
  import type { VertexModelConfig } from "../types.js";
8
9
 
9
10
  export const GEMINI_MODELS: VertexModelConfig[] = [
11
+ // --- Gemini 3.1 (Preview) ---
10
12
  {
11
13
  id: "gemini-3.1-pro",
12
14
  name: "Gemini 3.1 Pro",
13
15
  apiId: "gemini-3.1-pro-preview",
14
16
  publisher: "google",
15
17
  endpointType: "gemini",
16
- contextWindow: 1000000,
17
- maxTokens: 64000,
18
+ contextWindow: 1048576,
19
+ maxTokens: 65536,
18
20
  input: ["text", "image"],
19
21
  reasoning: true,
20
22
  tools: true,
21
23
  cost: {
22
24
  input: 2.00,
23
25
  output: 12.00,
24
- cacheRead: 0,
26
+ cacheRead: 0.20,
25
27
  cacheWrite: 0,
26
28
  },
27
29
  region: "global",
28
30
  },
31
+ {
32
+ id: "gemini-3.1-flash-lite",
33
+ name: "Gemini 3.1 Flash Lite",
34
+ apiId: "gemini-3.1-flash-lite-preview",
35
+ publisher: "google",
36
+ endpointType: "gemini",
37
+ contextWindow: 1048576,
38
+ maxTokens: 65535,
39
+ input: ["text", "image"],
40
+ reasoning: true,
41
+ tools: true,
42
+ cost: {
43
+ input: 0.25,
44
+ output: 1.50,
45
+ cacheRead: 0.025,
46
+ cacheWrite: 0,
47
+ },
48
+ region: "global",
49
+ },
50
+
51
+ // --- Gemini 3 (Preview) ---
29
52
  {
30
53
  id: "gemini-3-pro",
31
54
  name: "Gemini 3 Pro",
32
55
  apiId: "gemini-3-pro-preview",
33
56
  publisher: "google",
34
57
  endpointType: "gemini",
35
- contextWindow: 2000000,
36
- maxTokens: 8192,
58
+ contextWindow: 1048576,
59
+ maxTokens: 65536,
37
60
  input: ["text", "image"],
38
61
  reasoning: true,
39
62
  tools: true,
40
63
  cost: {
41
- input: 1.25,
42
- output: 10.00,
43
- cacheRead: 0.125,
64
+ input: 2.00,
65
+ output: 12.00,
66
+ cacheRead: 0.20,
44
67
  cacheWrite: 0,
45
68
  },
46
69
  region: "global",
@@ -51,27 +74,29 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
51
74
  apiId: "gemini-3-flash-preview",
52
75
  publisher: "google",
53
76
  endpointType: "gemini",
54
- contextWindow: 1000000,
55
- maxTokens: 8192,
77
+ contextWindow: 1048576,
78
+ maxTokens: 65536,
56
79
  input: ["text", "image"],
57
80
  reasoning: true,
58
81
  tools: true,
59
82
  cost: {
60
- input: 0.15,
61
- output: 0.60,
62
- cacheRead: 0.0375,
83
+ input: 0.50,
84
+ output: 3.00,
85
+ cacheRead: 0.05,
63
86
  cacheWrite: 0,
64
87
  },
65
88
  region: "global",
66
89
  },
90
+
91
+ // --- Gemini 2.5 (GA) ---
67
92
  {
68
93
  id: "gemini-2.5-pro",
69
94
  name: "Gemini 2.5 Pro",
70
95
  apiId: "gemini-2.5-pro",
71
96
  publisher: "google",
72
97
  endpointType: "gemini",
73
- contextWindow: 1000000,
74
- maxTokens: 64000,
98
+ contextWindow: 1048576,
99
+ maxTokens: 65536,
75
100
  input: ["text", "image"],
76
101
  reasoning: true,
77
102
  tools: true,
@@ -89,15 +114,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
89
114
  apiId: "gemini-2.5-flash",
90
115
  publisher: "google",
91
116
  endpointType: "gemini",
92
- contextWindow: 1000000,
93
- maxTokens: 64000,
117
+ contextWindow: 1048576,
118
+ maxTokens: 65536,
94
119
  input: ["text", "image"],
95
120
  reasoning: true,
96
121
  tools: true,
97
122
  cost: {
98
123
  input: 0.30,
99
124
  output: 2.50,
100
- cacheRead: 0.030,
125
+ cacheRead: 0.03,
101
126
  cacheWrite: 0,
102
127
  },
103
128
  region: "global",
@@ -108,26 +133,28 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
108
133
  apiId: "gemini-2.5-flash-lite",
109
134
  publisher: "google",
110
135
  endpointType: "gemini",
111
- contextWindow: 1000000,
112
- maxTokens: 64000,
136
+ contextWindow: 1048576,
137
+ maxTokens: 65536,
113
138
  input: ["text", "image"],
114
139
  reasoning: true,
115
140
  tools: true,
116
141
  cost: {
117
142
  input: 0.10,
118
143
  output: 0.40,
119
- cacheRead: 0.010,
144
+ cacheRead: 0.01,
120
145
  cacheWrite: 0,
121
146
  },
122
147
  region: "global",
123
148
  },
149
+
150
+ // --- Gemini 2.0 (GA) ---
124
151
  {
125
152
  id: "gemini-2.0-flash",
126
153
  name: "Gemini 2.0 Flash",
127
154
  apiId: "gemini-2.0-flash",
128
155
  publisher: "google",
129
156
  endpointType: "gemini",
130
- contextWindow: 1000000,
157
+ contextWindow: 1048576,
131
158
  maxTokens: 8192,
132
159
  input: ["text", "image"],
133
160
  reasoning: false,
@@ -135,7 +162,7 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
135
162
  cost: {
136
163
  input: 0.15,
137
164
  output: 0.60,
138
- cacheRead: 0.025,
165
+ cacheRead: 0,
139
166
  cacheWrite: 0,
140
167
  },
141
168
  region: "global",
@@ -146,15 +173,15 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
146
173
  apiId: "gemini-2.0-flash-lite",
147
174
  publisher: "google",
148
175
  endpointType: "gemini",
149
- contextWindow: 1000000,
176
+ contextWindow: 1048576,
150
177
  maxTokens: 8192,
151
- input: ["text"],
178
+ input: ["text", "image"],
152
179
  reasoning: false,
153
180
  tools: true,
154
181
  cost: {
155
182
  input: 0.075,
156
183
  output: 0.30,
157
- cacheRead: 0.01875,
184
+ cacheRead: 0,
158
185
  cacheWrite: 0,
159
186
  },
160
187
  region: "global",
package/models/index.ts CHANGED
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
11
11
  ...GEMINI_MODELS,
12
12
  ...CLAUDE_MODELS,
13
13
  ...MAAS_MODELS,
14
- ];
14
+ ].sort((a, b) => a.name.localeCompare(b.name));
15
15
 
16
16
  export function getModelById(id: string): VertexModelConfig | undefined {
17
17
  return ALL_MODELS.find((m) => m.id === id);
package/models/maas.ts CHANGED
@@ -1,13 +1,14 @@
1
1
  /**
2
2
  * MaaS (Model-as-a-Service) open model definitions for Vertex AI
3
- * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#open-models
4
- * All prices per 1M tokens (as of Feb 2025)
3
+ * Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
4
+ * Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
5
+ * All prices per 1M tokens
5
6
  */
6
7
 
7
8
  import type { VertexModelConfig } from "../types.js";
8
9
 
9
10
  export const MAAS_MODELS: VertexModelConfig[] = [
10
- // Llama models (Meta)
11
+ // --- Meta Llama ---
11
12
  {
12
13
  id: "llama-4-maverick",
13
14
  name: "Llama 4 Maverick",
@@ -66,7 +67,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
66
67
  region: "global",
67
68
  },
68
69
 
69
- // Mistral models
70
+ // --- Mistral AI ---
70
71
  {
71
72
  id: "mistral-medium-3",
72
73
  name: "Mistral Medium 3",
@@ -106,45 +107,45 @@ export const MAAS_MODELS: VertexModelConfig[] = [
106
107
  region: "global",
107
108
  },
108
109
  {
109
- id: "mistral-ocr",
110
- name: "Mistral OCR",
111
- apiId: "mistralai/mistral-ocr-2505",
110
+ id: "codestral-2",
111
+ name: "Codestral 2",
112
+ apiId: "mistralai/codestral-2",
112
113
  publisher: "mistralai",
113
114
  endpointType: "maas",
114
- contextWindow: 128000,
115
+ contextWindow: 256000,
115
116
  maxTokens: 32000,
116
- input: ["text", "image"],
117
+ input: ["text"],
117
118
  reasoning: false,
118
- tools: false,
119
+ tools: true,
119
120
  cost: {
120
- input: 0.50, // Per page: $0.0005/page, shown as approx per 1K pages
121
- output: 0.50, // Per page pricing
121
+ input: 0.30,
122
+ output: 0.90,
122
123
  cacheRead: 0,
123
124
  cacheWrite: 0,
124
125
  },
125
126
  region: "global",
126
127
  },
127
128
  {
128
- id: "codestral-2",
129
- name: "Codestral 2",
130
- apiId: "mistralai/codestral-2",
129
+ id: "mistral-ocr",
130
+ name: "Mistral OCR",
131
+ apiId: "mistralai/mistral-ocr-2505",
131
132
  publisher: "mistralai",
132
133
  endpointType: "maas",
133
- contextWindow: 256000,
134
+ contextWindow: 128000,
134
135
  maxTokens: 32000,
135
- input: ["text"],
136
+ input: ["text", "image"],
136
137
  reasoning: false,
137
- tools: true,
138
+ tools: false,
138
139
  cost: {
139
- input: 0.30,
140
- output: 0.90,
140
+ input: 0.0005,
141
+ output: 0.0005,
141
142
  cacheRead: 0,
142
143
  cacheWrite: 0,
143
144
  },
144
145
  region: "global",
145
146
  },
146
147
 
147
- // DeepSeek models
148
+ // --- DeepSeek ---
148
149
  {
149
150
  id: "deepseek-v3.2",
150
151
  name: "DeepSeek V3.2",
@@ -202,48 +203,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
202
203
  },
203
204
  region: "global",
204
205
  },
205
-
206
- // AI21 Labs models
207
- {
208
- id: "jamba-1.5-large",
209
- name: "Jamba 1.5 Large",
210
- apiId: "ai21/jamba-1.5-large",
211
- publisher: "ai21",
212
- endpointType: "maas",
213
- contextWindow: 256000,
214
- maxTokens: 256000,
215
- input: ["text"],
216
- reasoning: false,
217
- tools: true,
218
- cost: {
219
- input: 2.00,
220
- output: 8.00,
221
- cacheRead: 0,
222
- cacheWrite: 0,
223
- },
224
- region: "global",
225
- },
226
206
  {
227
- id: "jamba-1.5-mini",
228
- name: "Jamba 1.5 Mini",
229
- apiId: "ai21/jamba-1.5-mini",
230
- publisher: "ai21",
207
+ id: "deepseek-ocr",
208
+ name: "DeepSeek OCR",
209
+ apiId: "deepseek-ai/deepseek-ocr-maas",
210
+ publisher: "deepseek-ai",
231
211
  endpointType: "maas",
232
- contextWindow: 256000,
233
- maxTokens: 256000,
234
- input: ["text"],
212
+ contextWindow: 163840,
213
+ maxTokens: 32000,
214
+ input: ["text", "image"],
235
215
  reasoning: false,
236
- tools: true,
216
+ tools: false,
237
217
  cost: {
238
- input: 0.20,
239
- output: 0.40,
218
+ input: 0.30,
219
+ output: 1.20,
240
220
  cacheRead: 0,
241
221
  cacheWrite: 0,
242
222
  },
243
223
  region: "global",
244
224
  },
245
225
 
246
- // OpenAI models (gpt-oss)
226
+ // --- OpenAI (gpt-oss) ---
247
227
  {
248
228
  id: "gpt-oss-120b",
249
229
  name: "GPT-OSS 120B",
@@ -283,28 +263,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
283
263
  region: "global",
284
264
  },
285
265
 
286
- // DeepSeek OCR
287
- {
288
- id: "deepseek-ocr",
289
- name: "DeepSeek OCR",
290
- apiId: "deepseek-ai/deepseek-ocr-maas",
291
- publisher: "deepseek-ai",
292
- endpointType: "maas",
293
- contextWindow: 163840,
294
- maxTokens: 32000,
295
- input: ["text", "image"],
296
- reasoning: false,
297
- tools: false,
298
- cost: {
299
- input: 0.30, // Per page: $0.0003/page
300
- output: 1.20, // Per page pricing
301
- cacheRead: 0,
302
- cacheWrite: 0,
303
- },
304
- region: "global",
305
- },
306
-
307
- // Qwen models
266
+ // --- Qwen ---
308
267
  {
309
268
  id: "qwen3-235b",
310
269
  name: "Qwen 3 235B",
@@ -382,7 +341,7 @@ export const MAAS_MODELS: VertexModelConfig[] = [
382
341
  region: "global",
383
342
  },
384
343
 
385
- // Other models
344
+ // --- Moonshot ---
386
345
  {
387
346
  id: "kimi-k2-thinking",
388
347
  name: "Kimi K2 Thinking",
@@ -402,6 +361,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
402
361
  },
403
362
  region: "global",
404
363
  },
364
+
365
+ // --- MiniMax ---
405
366
  {
406
367
  id: "minimax-m2",
407
368
  name: "MiniMax M2",
@@ -421,6 +382,8 @@ export const MAAS_MODELS: VertexModelConfig[] = [
421
382
  },
422
383
  region: "global",
423
384
  },
385
+
386
+ // --- GLM (Zhipu AI) ---
424
387
  {
425
388
  id: "glm-5",
426
389
  name: "GLM 5",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ssweens/pi-vertex",
3
- "version": "1.0.1",
3
+ "version": "1.1.1",
4
4
  "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
5
5
  "type": "module",
6
6
  "main": "index.ts",