@ssweens/pi-vertex 1.1.4 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/README.md +26 -16
- package/models/claude.ts +62 -3
- package/models/maas.ts +61 -0
- package/package.json +1 -1
- package/streaming/gemini.ts +111 -55
- package/streaming/maas.ts +8 -3
- package/types.ts +10 -0
- package/utils.ts +123 -48
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.1.7] - 2026-05-16
|
|
6
|
+
### Added
|
|
7
|
+
- **Regional pricing for Claude models** — non-global Vertex endpoints (us-east5, europe-west1, asia-southeast1, us/eu multi-region) carry a 10% price premium per GCP's published rates. The streaming layer now automatically selects the correct cost tier based on the resolved endpoint at call time. No config change required — if your `GOOGLE_CLOUD_LOCATION` or config resolves to any non-`global` location, cost tracking reflects the regional rate.
|
|
8
|
+
- Claude Opus 4.7/4.6/4.5: global $5.00/$25.00 → regional $5.50/$27.50
|
|
9
|
+
- Claude Sonnet 4.6/4.5: global $3.00/$15.00 → regional $3.30/$16.50
|
|
10
|
+
- Claude Haiku 4.5: global $1.00/$5.00 → regional $1.10/$5.50
|
|
11
|
+
- Claude Opus 4.1, Opus 4, Sonnet 4: uniform pricing (no regional variant on GCP)
|
|
12
|
+
- **`costRegional?: ModelCost` field on `VertexModelConfig`** — optional cost tier used when the resolved GCP location is non-global. Models without this field use `cost` for all regions.
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- **Grok cache read pricing** — previously 0 for both xAI models; corrected to GCP official rates:
|
|
16
|
+
- `grok-4.20-reasoning`: cacheRead $0.20/1M
|
|
17
|
+
- `grok-4.1-fast-reasoning`: cacheRead $0.05/1M
|
|
18
|
+
|
|
19
|
+
## [1.1.6] - 2026-05-16
|
|
20
|
+
### Fixed
|
|
21
|
+
- **`maxTokens / 2` halving removed** — both the Anthropic and OpenAI-compat MaaS streaming paths were silently capping requests at half the model's stated `maxTokens`. Requests now use the full `maxTokens` value unless the caller explicitly overrides it.
|
|
22
|
+
- **Gemini cached token double-counting** — `promptTokenCount` includes cached tokens, so input cost was inflated. Input usage is now `promptTokenCount − cachedTokenCount`, matching the actual billable amount.
|
|
23
|
+
- **`sanitizeText` corrupted emoji** — the previous regex replaced all surrogate code units including valid pairs (emoji are encoded as two surrogates). Now only unpaired/lone surrogates are stripped.
|
|
24
|
+
- **Gemini Pro can't use `MINIMAL` thinking level** — `ThinkingLevel.MINIMAL` is only valid for Flash models. Pro requests with `minimal`/`low` effort now floor to `ThinkingLevel.LOW`.
|
|
25
|
+
- **Reasoning models always get a minimum thinking config** — previously thinking was only configured when an explicit `reasoning` effort was passed. For reasoning-capable Gemini models, a minimum config (lowest budget/level) is now always set, matching pi-mono behavior and preventing silent thought suppression.
|
|
26
|
+
- **`convertToGeminiMessages`: missing tool results injected** — if an assistant turn with tool calls has no matching `toolResult` message, a synthetic error result (`"No result provided"`) is flushed before the next turn. Prevents Gemini 400 errors from dangling tool calls.
|
|
27
|
+
- **`convertToGeminiMessages`: image tool results supported** — `toolResult` messages containing image content are now forwarded correctly. Gemini 3+ models receive them as `functionResponse.parts`; older models get a separate user image turn.
|
|
28
|
+
- **`convertToGeminiMessages`: tighter same-model guard** — thought signature replay now also requires `api === "google-generative-ai"` so signatures from non-Gemini providers (e.g. Claude) are never incorrectly forwarded.
|
|
29
|
+
- **`convertToGeminiMessages`: removed `id` from `functionCall` parts** — the `requiresToolCallId` heuristic was wrong; Gemini does not use tool call IDs in `functionCall` parts.
|
|
30
|
+
|
|
31
|
+
### Updated
|
|
32
|
+
- `claude-opus-4-6`: `maxTokens` corrected to `128000` (was `32000`)
|
|
33
|
+
- `claude-sonnet-4-6`: `maxTokens` corrected to `128000` (was `64000`)
|
|
34
|
+
- `convertToolsForGemini` / `convertTools`: signatures tightened from `any[]` to typed `Tool[]`
|
|
35
|
+
|
|
36
|
+
*Bug fixes co-discovered with [lhl/pi-vertex](https://github.com/lhl/pi-vertex), a respected community fork. Credit: @lhl.*
|
|
37
|
+
|
|
38
|
+
## [1.1.5] - 2026-05-16
|
|
39
|
+
### Added
|
|
40
|
+
- **xAI Grok models** (new publisher on Vertex MaaS OpenAI-compat endpoint):
|
|
41
|
+
- `grok-4.20-reasoning` — flagship model, 200K context, text+image input, reasoning+tools, $1.25/$2.50 per 1M tokens
|
|
42
|
+
- `grok-4.1-fast-reasoning` — cost-effective model, 128K context, text+image input, reasoning+tools, $0.20/$0.50 per 1M tokens
|
|
43
|
+
- **Claude Opus 4.7** (`claude-opus-4-7`) — 1M context, 128K max output tokens (up from 4.6's 32K), $5.00/$25.00 per 1M, same cache pricing as Opus 4.6
|
|
44
|
+
- **Gemma 4 26B A4B IT** (`gemma-4-26b-a4b-it`) — Google's MoE instruction-tuned model via MaaS, 262K context, 128K max output, text+image input, $0.15/$0.60 per 1M tokens
|
|
45
|
+
|
|
5
46
|
## [1.1.4] - 2026-03-30
|
|
6
47
|
### Fixed
|
|
7
48
|
- Removed error message override for `400 (no body)` responses from Vertex MaaS models. The original message now passes through to `isContextOverflow()` which already handles this pattern, enabling proper auto-compact instead of showing a raw error to the user.
|
package/README.md
CHANGED
|
@@ -15,18 +15,18 @@ Set your GCP project and credentials. Vertex AI models (Gemini, Claude, Llama, D
|
|
|
15
15
|
|
|
16
16
|
## Features
|
|
17
17
|
|
|
18
|
-
- **
|
|
19
|
-
- **Gemini** (
|
|
20
|
-
- **Claude** (
|
|
18
|
+
- **48 models** across 4 categories:
|
|
19
|
+
- **Gemini** (9): 3.1 Pro, 3.1 Flash-Lite, 3 Flash, 2.5 Pro, 2.5 Flash, 2.5 Flash-Lite, 2.0 Flash, 2.0 Flash-Lite
|
|
20
|
+
- **Claude** (10): Opus 4.7, Opus 4.6, Sonnet 4.6, Opus/Sonnet/Haiku 4.5, Opus 4.1, Opus 4, Sonnet 4, 3.5 Sonnet v2
|
|
21
21
|
- **Llama** (3): 4 Maverick, 4 Scout, 3.3 70B
|
|
22
|
-
- **Other MaaS** (
|
|
22
|
+
- **Other MaaS** (26): Grok, Gemma, Mistral, DeepSeek, Qwen, OpenAI GPT-OSS, Kimi, MiniMax, GLM
|
|
23
23
|
|
|
24
24
|
- **Unified streaming**: Single provider, multiple model families
|
|
25
25
|
- **Full tool calling support**: All models with multi-turn tool use and proper tool result handling
|
|
26
26
|
- **Thinking/reasoning**: Gemini 3 thinking levels, Gemini 2.5 thinking budgets, thought signature preservation
|
|
27
27
|
- **Automatic auth**: Uses Google Application Default Credentials
|
|
28
28
|
- **Region awareness**: Global endpoints where supported, regional where required
|
|
29
|
-
- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens)
|
|
29
|
+
- **Pricing tracking**: Built-in cost per token for all models (including thinking tokens and regional endpoint premiums)
|
|
30
30
|
|
|
31
31
|
## Installation
|
|
32
32
|
|
|
@@ -128,17 +128,20 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
128
128
|
|
|
129
129
|
### Claude Models
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
|
134
|
-
|
|
135
|
-
| claude-opus-4-
|
|
136
|
-
| claude-
|
|
137
|
-
| claude-
|
|
138
|
-
| claude-opus-4-
|
|
139
|
-
| claude-
|
|
140
|
-
| claude-
|
|
141
|
-
| claude-
|
|
131
|
+
Prices shown are for the **global** endpoint. Non-global regions (us-east5, europe-west1, asia-southeast1, us/eu multi-region) carry a 10% premium — cost tracking adjusts automatically based on your configured `GOOGLE_CLOUD_LOCATION`.
|
|
132
|
+
|
|
133
|
+
| Model | Context | Max Tokens | Input | Reasoning | Price global (in/out) | Price regional (in/out) |
|
|
134
|
+
|-------|---------|------------|-------|-----------|----------------------|------------------------|
|
|
135
|
+
| claude-opus-4-7 | 1M | 128,000 | text, image | ✅ | $5.00/$25.00 | $5.50/$27.50 |
|
|
136
|
+
| claude-opus-4-6 | 1M | 128,000 | text, image | ✅ | $5.00/$25.00 | $5.50/$27.50 |
|
|
137
|
+
| claude-sonnet-4-6 | 1M | 128,000 | text, image | ✅ | $3.00/$15.00 | $3.30/$16.50 |
|
|
138
|
+
| claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | $5.50/$27.50 |
|
|
139
|
+
| claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | $3.30/$16.50 |
|
|
140
|
+
| claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | $1.10/$5.50 |
|
|
141
|
+
| claude-opus-4-1 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | (uniform) |
|
|
142
|
+
| claude-opus-4 | 200K | 32,000 | text, image | ✅ | $15.00/$75.00 | (uniform) |
|
|
143
|
+
| claude-sonnet-4 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | (uniform) |
|
|
144
|
+
| claude-3-5-sonnet-v2 | 200K | 8,192 | text, image | ❌ | $3.00/$15.00 | (uniform) |
|
|
142
145
|
|
|
143
146
|
### Llama Models
|
|
144
147
|
|
|
@@ -170,6 +173,9 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
170
173
|
| minimax-m2 | 196K | minimaxai | $0.30/$1.20 | global |
|
|
171
174
|
| glm-5 | 200K | zai-org | $1.00/$3.20 | global |
|
|
172
175
|
| glm-4.7 | 200K | zai-org | $0.60/$2.20 | global |
|
|
176
|
+
| grok-4.20-reasoning | 200K | xai | $1.25/$2.50 | global |
|
|
177
|
+
| grok-4.1-fast-reasoning | 128K | xai | $0.20/$0.50 | global |
|
|
178
|
+
| gemma-4-26b-a4b-it | 262K | google | $0.15/$0.60 | global |
|
|
173
179
|
|
|
174
180
|
## Regional Endpoints
|
|
175
181
|
|
|
@@ -218,6 +224,10 @@ export GOOGLE_CLOUD_LOCATION=us-central1
|
|
|
218
224
|
- `@mariozechner/pi-ai`: Peer dependency
|
|
219
225
|
- `@mariozechner/pi-coding-agent`: Peer dependency
|
|
220
226
|
|
|
227
|
+
## Acknowledgments
|
|
228
|
+
|
|
229
|
+
[lhl](https://github.com/lhl) maintains [lhl/pi-vertex](https://github.com/lhl/pi-vertex), an independent fork that added comprehensive unit tests and CI, and identified several important bugs. Several fixes in v1.1.6 were co-discovered through review of that work, including the `maxTokens/2` halving bug, Gemini cached-token double-counting, `sanitizeText` emoji corruption, missing tool result flushing, and image tool result forwarding. Kudos.
|
|
230
|
+
|
|
221
231
|
## License
|
|
222
232
|
|
|
223
233
|
MIT
|
package/models/claude.ts
CHANGED
|
@@ -2,13 +2,42 @@
|
|
|
2
2
|
* Claude model definitions for Vertex AI
|
|
3
3
|
* Source: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
|
4
4
|
* Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models
|
|
5
|
-
* All prices per 1M tokens (
|
|
5
|
+
* All prices per 1M tokens (<=200K input tokens)
|
|
6
|
+
* `cost` = global endpoint; `costRegional` = non-global (us-east5, europe-west1,
|
|
7
|
+
* asia-southeast1, us/eu multi-region) — uniformly 10% above global.
|
|
6
8
|
* Cache write prices are for 5-minute TTL
|
|
7
9
|
*/
|
|
8
10
|
|
|
9
11
|
import type { VertexModelConfig } from "../types.js";
|
|
10
12
|
|
|
11
13
|
export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
14
|
+
// Claude 4.7 series
|
|
15
|
+
{
|
|
16
|
+
id: "claude-opus-4-7",
|
|
17
|
+
name: "Claude Opus 4.7",
|
|
18
|
+
apiId: "claude-opus-4-7",
|
|
19
|
+
publisher: "anthropic",
|
|
20
|
+
endpointType: "maas",
|
|
21
|
+
contextWindow: 1000000,
|
|
22
|
+
maxTokens: 128000,
|
|
23
|
+
input: ["text", "image"],
|
|
24
|
+
reasoning: true,
|
|
25
|
+
tools: true,
|
|
26
|
+
cost: {
|
|
27
|
+
input: 5.00,
|
|
28
|
+
output: 25.00,
|
|
29
|
+
cacheRead: 0.50,
|
|
30
|
+
cacheWrite: 6.25,
|
|
31
|
+
},
|
|
32
|
+
costRegional: {
|
|
33
|
+
input: 5.50,
|
|
34
|
+
output: 27.50,
|
|
35
|
+
cacheRead: 0.55,
|
|
36
|
+
cacheWrite: 6.875,
|
|
37
|
+
},
|
|
38
|
+
region: "global",
|
|
39
|
+
},
|
|
40
|
+
|
|
12
41
|
// Claude 4.6 series
|
|
13
42
|
{
|
|
14
43
|
id: "claude-opus-4-6",
|
|
@@ -17,7 +46,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
17
46
|
publisher: "anthropic",
|
|
18
47
|
endpointType: "maas",
|
|
19
48
|
contextWindow: 1000000,
|
|
20
|
-
maxTokens:
|
|
49
|
+
maxTokens: 128000,
|
|
21
50
|
input: ["text", "image"],
|
|
22
51
|
reasoning: true,
|
|
23
52
|
tools: true,
|
|
@@ -27,6 +56,12 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
27
56
|
cacheRead: 0.50,
|
|
28
57
|
cacheWrite: 6.25,
|
|
29
58
|
},
|
|
59
|
+
costRegional: {
|
|
60
|
+
input: 5.50,
|
|
61
|
+
output: 27.50,
|
|
62
|
+
cacheRead: 0.55,
|
|
63
|
+
cacheWrite: 6.875,
|
|
64
|
+
},
|
|
30
65
|
region: "global",
|
|
31
66
|
},
|
|
32
67
|
{
|
|
@@ -36,7 +71,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
36
71
|
publisher: "anthropic",
|
|
37
72
|
endpointType: "maas",
|
|
38
73
|
contextWindow: 1000000,
|
|
39
|
-
maxTokens:
|
|
74
|
+
maxTokens: 128000,
|
|
40
75
|
input: ["text", "image"],
|
|
41
76
|
reasoning: true,
|
|
42
77
|
tools: true,
|
|
@@ -46,6 +81,12 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
46
81
|
cacheRead: 0.30,
|
|
47
82
|
cacheWrite: 3.75,
|
|
48
83
|
},
|
|
84
|
+
costRegional: {
|
|
85
|
+
input: 3.30,
|
|
86
|
+
output: 16.50,
|
|
87
|
+
cacheRead: 0.33,
|
|
88
|
+
cacheWrite: 4.125,
|
|
89
|
+
},
|
|
49
90
|
region: "global",
|
|
50
91
|
},
|
|
51
92
|
|
|
@@ -67,6 +108,12 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
67
108
|
cacheRead: 0.50,
|
|
68
109
|
cacheWrite: 6.25,
|
|
69
110
|
},
|
|
111
|
+
costRegional: {
|
|
112
|
+
input: 5.50,
|
|
113
|
+
output: 27.50,
|
|
114
|
+
cacheRead: 0.55,
|
|
115
|
+
cacheWrite: 6.875,
|
|
116
|
+
},
|
|
70
117
|
region: "global",
|
|
71
118
|
},
|
|
72
119
|
{
|
|
@@ -86,6 +133,12 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
86
133
|
cacheRead: 0.30,
|
|
87
134
|
cacheWrite: 3.75,
|
|
88
135
|
},
|
|
136
|
+
costRegional: {
|
|
137
|
+
input: 3.30,
|
|
138
|
+
output: 16.50,
|
|
139
|
+
cacheRead: 0.33,
|
|
140
|
+
cacheWrite: 4.125,
|
|
141
|
+
},
|
|
89
142
|
region: "global",
|
|
90
143
|
},
|
|
91
144
|
{
|
|
@@ -105,6 +158,12 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
105
158
|
cacheRead: 0.10,
|
|
106
159
|
cacheWrite: 1.25,
|
|
107
160
|
},
|
|
161
|
+
costRegional: {
|
|
162
|
+
input: 1.10,
|
|
163
|
+
output: 5.50,
|
|
164
|
+
cacheRead: 0.11,
|
|
165
|
+
cacheWrite: 1.375,
|
|
166
|
+
},
|
|
108
167
|
region: "global",
|
|
109
168
|
},
|
|
110
169
|
|
package/models/maas.ts
CHANGED
|
@@ -8,6 +8,46 @@
|
|
|
8
8
|
import type { VertexModelConfig } from "../types.js";
|
|
9
9
|
|
|
10
10
|
export const MAAS_MODELS: VertexModelConfig[] = [
|
|
11
|
+
// --- xAI Grok ---
|
|
12
|
+
{
|
|
13
|
+
id: "grok-4.20-reasoning",
|
|
14
|
+
name: "Grok 4.20 Reasoning",
|
|
15
|
+
apiId: "grok-4.20-reasoning",
|
|
16
|
+
publisher: "xai",
|
|
17
|
+
endpointType: "maas",
|
|
18
|
+
contextWindow: 200000,
|
|
19
|
+
maxTokens: 32000,
|
|
20
|
+
input: ["text", "image"],
|
|
21
|
+
reasoning: true,
|
|
22
|
+
tools: true,
|
|
23
|
+
cost: {
|
|
24
|
+
input: 1.25,
|
|
25
|
+
output: 2.50,
|
|
26
|
+
cacheRead: 0.20,
|
|
27
|
+
cacheWrite: 0,
|
|
28
|
+
},
|
|
29
|
+
region: "global",
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
id: "grok-4.1-fast-reasoning",
|
|
33
|
+
name: "Grok 4.1 Fast Reasoning",
|
|
34
|
+
apiId: "grok-4.1-fast-reasoning",
|
|
35
|
+
publisher: "xai",
|
|
36
|
+
endpointType: "maas",
|
|
37
|
+
contextWindow: 128000,
|
|
38
|
+
maxTokens: 32000,
|
|
39
|
+
input: ["text", "image"],
|
|
40
|
+
reasoning: true,
|
|
41
|
+
tools: true,
|
|
42
|
+
cost: {
|
|
43
|
+
input: 0.20,
|
|
44
|
+
output: 0.50,
|
|
45
|
+
cacheRead: 0.05,
|
|
46
|
+
cacheWrite: 0,
|
|
47
|
+
},
|
|
48
|
+
region: "global",
|
|
49
|
+
},
|
|
50
|
+
|
|
11
51
|
// --- Meta Llama ---
|
|
12
52
|
{
|
|
13
53
|
id: "llama-4-maverick",
|
|
@@ -383,6 +423,27 @@ export const MAAS_MODELS: VertexModelConfig[] = [
|
|
|
383
423
|
region: "global",
|
|
384
424
|
},
|
|
385
425
|
|
|
426
|
+
// --- Google Gemma (MaaS) ---
|
|
427
|
+
{
|
|
428
|
+
id: "gemma-4-26b-a4b-it",
|
|
429
|
+
name: "Gemma 4 26B A4B IT",
|
|
430
|
+
apiId: "gemma-4-26b-a4b-it-maas",
|
|
431
|
+
publisher: "google",
|
|
432
|
+
endpointType: "maas",
|
|
433
|
+
contextWindow: 262144,
|
|
434
|
+
maxTokens: 128000,
|
|
435
|
+
input: ["text", "image"],
|
|
436
|
+
reasoning: false,
|
|
437
|
+
tools: false,
|
|
438
|
+
cost: {
|
|
439
|
+
input: 0.15,
|
|
440
|
+
output: 0.60,
|
|
441
|
+
cacheRead: 0,
|
|
442
|
+
cacheWrite: 0,
|
|
443
|
+
},
|
|
444
|
+
region: "global",
|
|
445
|
+
},
|
|
446
|
+
|
|
386
447
|
// --- GLM (Zhipu AI) ---
|
|
387
448
|
{
|
|
388
449
|
id: "glm-5",
|
package/package.json
CHANGED
package/streaming/gemini.ts
CHANGED
|
@@ -24,14 +24,53 @@ const THINKING_LEVEL_MAP: Record<string, ThinkingLevel> = {
|
|
|
24
24
|
high: ThinkingLevel.HIGH,
|
|
25
25
|
};
|
|
26
26
|
|
|
27
|
+
interface GeminiThinkingConfig {
|
|
28
|
+
includeThoughts?: boolean;
|
|
29
|
+
thinkingBudget?: number;
|
|
30
|
+
thinkingLevel?: ThinkingLevel;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function isGemini3ProModel(modelId: string): boolean {
|
|
34
|
+
return /gemini-3(?:\.\d+)?-pro/.test(modelId.toLowerCase());
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function isGemini3FlashModel(modelId: string): boolean {
|
|
38
|
+
return /gemini-3(?:\.\d+)?-flash/.test(modelId.toLowerCase());
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function isGemini25ProModel(modelId: string): boolean {
|
|
42
|
+
return /gemini-2\.5-pro/.test(modelId.toLowerCase());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function getGemini3ThinkingLevel(effort: string, modelId: string): ThinkingLevel {
|
|
46
|
+
if (isGemini3ProModel(modelId)) {
|
|
47
|
+
// Pro only supports LOW/MEDIUM/HIGH — floor minimal/low to LOW
|
|
48
|
+
if (effort === "minimal" || effort === "low") return ThinkingLevel.LOW;
|
|
49
|
+
if (effort === "medium") return ThinkingLevel.MEDIUM;
|
|
50
|
+
return ThinkingLevel.HIGH;
|
|
51
|
+
}
|
|
52
|
+
return THINKING_LEVEL_MAP[effort];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function getLowestThinkingConfig(modelId: string): GeminiThinkingConfig {
|
|
56
|
+
if (isGemini3ProModel(modelId)) {
|
|
57
|
+
return { thinkingLevel: ThinkingLevel.LOW };
|
|
58
|
+
}
|
|
59
|
+
if (isGemini3FlashModel(modelId)) {
|
|
60
|
+
return { thinkingLevel: ThinkingLevel.MINIMAL };
|
|
61
|
+
}
|
|
62
|
+
if (isGemini25ProModel(modelId)) {
|
|
63
|
+
return { thinkingBudget: 128 };
|
|
64
|
+
}
|
|
65
|
+
return { thinkingBudget: 0 };
|
|
66
|
+
}
|
|
67
|
+
|
|
27
68
|
function mapGeminiStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
|
|
28
69
|
switch (reason) {
|
|
29
70
|
case FinishReason.STOP:
|
|
30
71
|
return "stop";
|
|
31
72
|
case FinishReason.MAX_TOKENS:
|
|
32
73
|
return "length";
|
|
33
|
-
case FinishReason.SAFETY:
|
|
34
|
-
case FinishReason.RECITATION:
|
|
35
74
|
default:
|
|
36
75
|
return "error";
|
|
37
76
|
}
|
|
@@ -79,9 +118,11 @@ export function streamGemini(
|
|
|
79
118
|
// Convert messages with model ID for proper thinking/tool handling
|
|
80
119
|
const contents = convertToGeminiMessages(context.messages, model.apiId);
|
|
81
120
|
|
|
82
|
-
// Build config — only set temperature when explicitly provided
|
|
83
|
-
|
|
84
|
-
|
|
121
|
+
// Build config — only set temperature when explicitly provided.
|
|
122
|
+
// The Vertex Gemini config shape is sprawling; use Record to avoid
|
|
123
|
+
// fighting the SDK's incomplete typings.
|
|
124
|
+
const config: Record<string, unknown> = {
|
|
125
|
+
maxOutputTokens: options?.maxTokens || model.maxTokens,
|
|
85
126
|
...(options?.temperature !== undefined && { temperature: options.temperature }),
|
|
86
127
|
};
|
|
87
128
|
|
|
@@ -95,28 +136,33 @@ export function streamGemini(
|
|
|
95
136
|
config.tools = convertToolsForGemini(context.tools);
|
|
96
137
|
}
|
|
97
138
|
|
|
98
|
-
// Add thinking configuration (matches pi-mono's buildParams logic)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
139
|
+
// Add thinking configuration (matches pi-mono's buildParams logic).
|
|
140
|
+
// For reasoning models: always set a minimum thinking config so the model
|
|
141
|
+
// doesn't silently suppress thoughts when no effort level is specified.
|
|
142
|
+
if (model.reasoning) {
|
|
143
|
+
if (options?.reasoning) {
|
|
144
|
+
const effort = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
|
145
|
+
const isGemini3 = model.apiId.startsWith("gemini-3");
|
|
146
|
+
const thinkingConfig: GeminiThinkingConfig = { includeThoughts: true };
|
|
147
|
+
|
|
148
|
+
if (isGemini3) {
|
|
149
|
+
// Gemini 3 Pro doesn't support MINIMAL; Flash models do.
|
|
150
|
+
thinkingConfig.thinkingLevel = getGemini3ThinkingLevel(effort, model.apiId);
|
|
151
|
+
} else {
|
|
152
|
+
// Gemini 2.5 models use thinking budgets (token counts)
|
|
153
|
+
const budgets: Record<string, number> = {
|
|
154
|
+
minimal: 128,
|
|
155
|
+
low: 2048,
|
|
156
|
+
medium: 8192,
|
|
157
|
+
high: model.apiId.includes("2.5-pro") ? 32768 : 24576,
|
|
158
|
+
};
|
|
159
|
+
thinkingConfig.thinkingBudget = budgets[effort] ?? 8192;
|
|
160
|
+
}
|
|
104
161
|
|
|
105
|
-
|
|
106
|
-
// Gemini 3 models use thinking levels (MINIMAL/LOW/MEDIUM/HIGH)
|
|
107
|
-
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[effort];
|
|
162
|
+
config.thinkingConfig = thinkingConfig;
|
|
108
163
|
} else {
|
|
109
|
-
|
|
110
|
-
const budgets: Record<string, number> = {
|
|
111
|
-
minimal: 128,
|
|
112
|
-
low: 2048,
|
|
113
|
-
medium: 8192,
|
|
114
|
-
high: model.apiId.includes("2.5-pro") ? 32768 : 24576,
|
|
115
|
-
};
|
|
116
|
-
thinkingConfig.thinkingBudget = budgets[effort] ?? 8192;
|
|
164
|
+
config.thinkingConfig = getLowestThinkingConfig(model.apiId);
|
|
117
165
|
}
|
|
118
|
-
|
|
119
|
-
config.thinkingConfig = thinkingConfig;
|
|
120
166
|
}
|
|
121
167
|
|
|
122
168
|
// Pass abort signal to SDK for in-flight cancellation
|
|
@@ -136,8 +182,10 @@ export function streamGemini(
|
|
|
136
182
|
config,
|
|
137
183
|
});
|
|
138
184
|
|
|
139
|
-
// Track current content block for thinking/text transitions
|
|
140
|
-
|
|
185
|
+
// Track current content block for thinking/text transitions.
|
|
186
|
+
type StreamingTextBlock = { type: "text"; text: string; textSignature?: string };
|
|
187
|
+
type StreamingThinkingBlock = { type: "thinking"; thinking: string; thinkingSignature?: string };
|
|
188
|
+
let currentBlock: StreamingTextBlock | StreamingThinkingBlock | null = null;
|
|
141
189
|
let currentBlockType: "text" | "thinking" | null = null;
|
|
142
190
|
|
|
143
191
|
for await (const chunk of response) {
|
|
@@ -152,13 +200,11 @@ export function streamGemini(
|
|
|
152
200
|
|
|
153
201
|
// Check if we need to transition to a new block
|
|
154
202
|
if (currentBlockType !== targetType) {
|
|
155
|
-
// End previous block
|
|
156
|
-
if (currentBlock
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
161
|
-
}
|
|
203
|
+
// End previous block (narrow on type for correct field access)
|
|
204
|
+
if (currentBlock?.type === "text") {
|
|
205
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
206
|
+
} else if (currentBlock?.type === "thinking") {
|
|
207
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
162
208
|
}
|
|
163
209
|
|
|
164
210
|
// Start new block
|
|
@@ -174,12 +220,12 @@ export function streamGemini(
|
|
|
174
220
|
currentBlockType = targetType;
|
|
175
221
|
}
|
|
176
222
|
|
|
177
|
-
// Accumulate content
|
|
178
|
-
if (
|
|
223
|
+
// Accumulate content (narrow on discriminant for type safety)
|
|
224
|
+
if (currentBlock?.type === "thinking") {
|
|
179
225
|
currentBlock.thinking += part.text;
|
|
180
226
|
currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
|
|
181
227
|
stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
|
|
182
|
-
} else {
|
|
228
|
+
} else if (currentBlock?.type === "text") {
|
|
183
229
|
currentBlock.text += part.text;
|
|
184
230
|
currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
|
|
185
231
|
stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta: part.text, partial: output });
|
|
@@ -188,12 +234,12 @@ export function streamGemini(
|
|
|
188
234
|
|
|
189
235
|
if (part.functionCall) {
|
|
190
236
|
// End current text/thinking block before tool call
|
|
191
|
-
if (currentBlock
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
237
|
+
if (currentBlock?.type === "text") {
|
|
238
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
239
|
+
} else if (currentBlock?.type === "thinking") {
|
|
240
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
241
|
+
}
|
|
242
|
+
if (currentBlock) {
|
|
197
243
|
currentBlock = null;
|
|
198
244
|
currentBlockType = null;
|
|
199
245
|
}
|
|
@@ -210,7 +256,7 @@ export function streamGemini(
|
|
|
210
256
|
type: "toolCall" as const,
|
|
211
257
|
id: toolCallId,
|
|
212
258
|
name: part.functionCall.name || "",
|
|
213
|
-
arguments: (part.functionCall.args as Record<string,
|
|
259
|
+
arguments: (part.functionCall.args as Record<string, unknown>) ?? {},
|
|
214
260
|
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
215
261
|
};
|
|
216
262
|
|
|
@@ -230,18 +276,26 @@ export function streamGemini(
|
|
|
230
276
|
output.errorMessage = "Content blocked by safety filters";
|
|
231
277
|
}
|
|
232
278
|
// Override to toolUse if any tool calls are present (matches pi-mono)
|
|
233
|
-
if (output.content.some((b
|
|
279
|
+
if (output.content.some((b) => b.type === "toolCall")) {
|
|
234
280
|
output.stopReason = "toolUse";
|
|
235
281
|
}
|
|
236
282
|
}
|
|
237
283
|
|
|
238
|
-
// Update usage — include thoughtsTokenCount in output (matches pi-mono)
|
|
284
|
+
// Update usage — include thoughtsTokenCount in output (matches pi-mono).
|
|
285
|
+
// Subtract cached tokens from prompt to avoid double-counting in input cost.
|
|
239
286
|
if (chunk.usageMetadata) {
|
|
240
|
-
const meta = chunk.usageMetadata as
|
|
287
|
+
const meta = chunk.usageMetadata as {
|
|
288
|
+
cachedContentTokenCount?: number;
|
|
289
|
+
promptTokenCount?: number;
|
|
290
|
+
candidatesTokenCount?: number;
|
|
291
|
+
thoughtsTokenCount?: number;
|
|
292
|
+
totalTokenCount?: number;
|
|
293
|
+
};
|
|
294
|
+
const cachedTokens = meta.cachedContentTokenCount || 0;
|
|
241
295
|
output.usage = {
|
|
242
|
-
input: meta.promptTokenCount || 0,
|
|
296
|
+
input: Math.max(0, (meta.promptTokenCount || 0) - cachedTokens),
|
|
243
297
|
output: (meta.candidatesTokenCount || 0) + (meta.thoughtsTokenCount || 0),
|
|
244
|
-
cacheRead:
|
|
298
|
+
cacheRead: cachedTokens,
|
|
245
299
|
cacheWrite: 0,
|
|
246
300
|
totalTokens: meta.totalTokenCount || 0,
|
|
247
301
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
@@ -251,15 +305,17 @@ export function streamGemini(
|
|
|
251
305
|
}
|
|
252
306
|
|
|
253
307
|
// End final block
|
|
254
|
-
if (currentBlock
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
308
|
+
if (currentBlock?.type === "text") {
|
|
309
|
+
stream.push({ type: "text_end", contentIndex: output.content.length - 1, content: currentBlock.text, partial: output });
|
|
310
|
+
} else if (currentBlock?.type === "thinking") {
|
|
311
|
+
stream.push({ type: "thinking_end", contentIndex: output.content.length - 1, content: currentBlock.thinking, partial: output });
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (options?.signal?.aborted) {
|
|
315
|
+
throw new Error("Request was aborted");
|
|
260
316
|
}
|
|
261
317
|
|
|
262
|
-
stream.push({ type: "done", reason: output.stopReason
|
|
318
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
263
319
|
stream.end();
|
|
264
320
|
} catch (error) {
|
|
265
321
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
package/streaming/maas.ts
CHANGED
|
@@ -53,6 +53,11 @@ async function streamAnthropic(
|
|
|
53
53
|
const location = resolveLocation(model.region);
|
|
54
54
|
const auth = getAuthConfig(location);
|
|
55
55
|
|
|
56
|
+
// Use regional pricing when the resolved endpoint is not the global one.
|
|
57
|
+
// Models without costRegional (e.g. Opus 4.1, Sonnet 4) have uniform pricing.
|
|
58
|
+
const effectiveCost =
|
|
59
|
+
auth.location !== "global" && model.costRegional ? model.costRegional : model.cost;
|
|
60
|
+
|
|
56
61
|
const client = new AnthropicVertex({
|
|
57
62
|
projectId: auth.projectId,
|
|
58
63
|
region: auth.location,
|
|
@@ -218,7 +223,7 @@ async function streamAnthropic(
|
|
|
218
223
|
|
|
219
224
|
const params: any = {
|
|
220
225
|
model: model.apiId,
|
|
221
|
-
max_tokens: options?.maxTokens ||
|
|
226
|
+
max_tokens: options?.maxTokens || model.maxTokens,
|
|
222
227
|
messages,
|
|
223
228
|
...(context.systemPrompt ? { system: context.systemPrompt } : {}),
|
|
224
229
|
...(tools && tools.length > 0 ? { tools } : {}),
|
|
@@ -314,7 +319,7 @@ async function streamAnthropic(
|
|
|
314
319
|
}
|
|
315
320
|
|
|
316
321
|
output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
317
|
-
calculateCost(model as any, output.usage);
|
|
322
|
+
calculateCost({ ...model, cost: effectiveCost } as any, output.usage);
|
|
318
323
|
|
|
319
324
|
if (output.content.some((b: any) => b.type === "toolCall")) {
|
|
320
325
|
output.stopReason = "toolUse";
|
|
@@ -371,7 +376,7 @@ export function streamMaaS(
|
|
|
371
376
|
const innerStream = streamSimpleOpenAICompletions(modelForPi, context as any, {
|
|
372
377
|
...options,
|
|
373
378
|
apiKey: accessToken,
|
|
374
|
-
maxTokens: options?.maxTokens ||
|
|
379
|
+
maxTokens: options?.maxTokens || model.maxTokens,
|
|
375
380
|
temperature: options?.temperature,
|
|
376
381
|
});
|
|
377
382
|
|
package/types.ts
CHANGED
|
@@ -47,7 +47,17 @@ export interface VertexModelConfig {
|
|
|
47
47
|
input: ModelInputType[];
|
|
48
48
|
reasoning: boolean;
|
|
49
49
|
tools: boolean;
|
|
50
|
+
/** Pricing for the global endpoint (default). */
|
|
50
51
|
cost: ModelCost;
|
|
52
|
+
/**
|
|
53
|
+
* Pricing for non-global regional endpoints (us-east5, europe-west1,
|
|
54
|
+
* asia-southeast1, us/eu multi-region, etc.).
|
|
55
|
+
*
|
|
56
|
+
* When the resolved GCP location is not "global" and this field is set,
|
|
57
|
+
* the streaming layer uses these costs instead of `cost`.
|
|
58
|
+
* Omit for models whose pricing is uniform across all regions.
|
|
59
|
+
*/
|
|
60
|
+
costRegional?: ModelCost;
|
|
51
61
|
region: string;
|
|
52
62
|
}
|
|
53
63
|
|
package/utils.ts
CHANGED
|
@@ -7,18 +7,24 @@
|
|
|
7
7
|
|
|
8
8
|
import type {
|
|
9
9
|
AssistantMessage,
|
|
10
|
+
ImageContent,
|
|
10
11
|
Message,
|
|
11
12
|
TextContent,
|
|
12
13
|
ThinkingContent,
|
|
14
|
+
Tool,
|
|
13
15
|
ToolCall,
|
|
14
16
|
ToolResultMessage,
|
|
15
17
|
} from "./types.js";
|
|
16
18
|
|
|
17
19
|
/**
|
|
18
|
-
* Sanitize text by removing
|
|
20
|
+
* Sanitize text by removing unpaired surrogate code units.
|
|
21
|
+
* Valid surrogate pairs (emoji) are preserved.
|
|
19
22
|
*/
|
|
20
23
|
export function sanitizeText(text: string): string {
|
|
21
|
-
return text.replace(
|
|
24
|
+
return text.replace(
|
|
25
|
+
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
|
|
26
|
+
"",
|
|
27
|
+
);
|
|
22
28
|
}
|
|
23
29
|
|
|
24
30
|
// --- Thought signature helpers (matching pi-mono google-shared.ts) ---
|
|
@@ -50,12 +56,20 @@ export function retainThoughtSignature(
|
|
|
50
56
|
return existing;
|
|
51
57
|
}
|
|
52
58
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
type GeminiContent = {
|
|
60
|
+
role: "user" | "model";
|
|
61
|
+
parts: Array<Record<string, unknown>>;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
function getGeminiMajorVersion(modelId: string): number | undefined {
|
|
65
|
+
const match = modelId.toLowerCase().match(/^gemini(?:-live)?-(\d+)/);
|
|
66
|
+
return match ? Number.parseInt(match[1], 10) : undefined;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function supportsMultimodalFunctionResponse(modelId: string): boolean {
|
|
70
|
+
const majorVersion = getGeminiMajorVersion(modelId);
|
|
71
|
+
if (majorVersion !== undefined) return majorVersion >= 3;
|
|
72
|
+
return true;
|
|
59
73
|
}
|
|
60
74
|
|
|
61
75
|
/**
|
|
@@ -64,12 +78,77 @@ function requiresToolCallId(modelId: string): boolean {
|
|
|
64
78
|
* Handles the full pi-ai Message union: UserMessage, AssistantMessage (with
|
|
65
79
|
* TextContent, ThinkingContent, ToolCall blocks), and ToolResultMessage.
|
|
66
80
|
*/
|
|
67
|
-
export function convertToGeminiMessages(messages: Message[], modelId: string):
|
|
68
|
-
const result:
|
|
81
|
+
export function convertToGeminiMessages(messages: Message[], modelId: string): GeminiContent[] {
|
|
82
|
+
const result: GeminiContent[] = [];
|
|
69
83
|
const isGemini3 = modelId.startsWith("gemini-3");
|
|
84
|
+
let pendingToolCalls: ToolCall[] = [];
|
|
85
|
+
let existingToolResultIds = new Set<string>();
|
|
86
|
+
|
|
87
|
+
const pushToolResult = (
|
|
88
|
+
toolCallId: string,
|
|
89
|
+
toolName: string,
|
|
90
|
+
content: ToolResultMessage["content"],
|
|
91
|
+
isError: boolean,
|
|
92
|
+
) => {
|
|
93
|
+
const textContent = content.filter((c): c is TextContent => c.type === "text");
|
|
94
|
+
const textResult = textContent.map((c) => c.text).join("\n");
|
|
95
|
+
const imageContent = content.filter((c): c is ImageContent => c.type === "image");
|
|
96
|
+
const hasText = textResult.length > 0;
|
|
97
|
+
const hasImages = imageContent.length > 0;
|
|
98
|
+
const responseValue = hasText
|
|
99
|
+
? sanitizeText(textResult)
|
|
100
|
+
: hasImages
|
|
101
|
+
? "(see attached image)"
|
|
102
|
+
: "";
|
|
103
|
+
|
|
104
|
+
const imageParts = imageContent.map((img) => ({
|
|
105
|
+
inlineData: { mimeType: img.mimeType, data: img.data },
|
|
106
|
+
}));
|
|
107
|
+
|
|
108
|
+
const functionResponsePart: Record<string, unknown> = {
|
|
109
|
+
functionResponse: {
|
|
110
|
+
name: toolName,
|
|
111
|
+
response: isError ? { error: responseValue } : { output: responseValue },
|
|
112
|
+
...(hasImages && supportsMultimodalFunctionResponse(modelId) ? { parts: imageParts } : {}),
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// Merge consecutive tool results into a single user turn (required by Gemini API)
|
|
117
|
+
const lastContent = result[result.length - 1];
|
|
118
|
+
if (lastContent?.role === "user" && lastContent.parts?.some((p) => "functionResponse" in p)) {
|
|
119
|
+
lastContent.parts.push(functionResponsePart);
|
|
120
|
+
} else {
|
|
121
|
+
result.push({ role: "user", parts: [functionResponsePart] });
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Gemini < 3: carry image tool results as a separate user image turn
|
|
125
|
+
if (hasImages && !supportsMultimodalFunctionResponse(modelId)) {
|
|
126
|
+
result.push({
|
|
127
|
+
role: "user",
|
|
128
|
+
parts: [{ text: "Tool result image:" }, ...imageParts],
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const flushMissingToolResults = () => {
|
|
134
|
+
if (pendingToolCalls.length === 0) return;
|
|
135
|
+
for (const toolCall of pendingToolCalls) {
|
|
136
|
+
if (!existingToolResultIds.has(toolCall.id)) {
|
|
137
|
+
pushToolResult(
|
|
138
|
+
toolCall.id,
|
|
139
|
+
toolCall.name,
|
|
140
|
+
[{ type: "text", text: "No result provided" }],
|
|
141
|
+
true,
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
pendingToolCalls = [];
|
|
146
|
+
existingToolResultIds = new Set<string>();
|
|
147
|
+
};
|
|
70
148
|
|
|
71
149
|
for (const msg of messages) {
|
|
72
150
|
if (msg.role === "user") {
|
|
151
|
+
flushMissingToolResults();
|
|
73
152
|
if (typeof msg.content === "string") {
|
|
74
153
|
if (msg.content.trim()) {
|
|
75
154
|
result.push({
|
|
@@ -78,33 +157,34 @@ export function convertToGeminiMessages(messages: Message[], modelId: string): a
|
|
|
78
157
|
});
|
|
79
158
|
}
|
|
80
159
|
} else {
|
|
81
|
-
const parts = msg.content.map(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
},
|
|
90
|
-
};
|
|
91
|
-
}
|
|
92
|
-
});
|
|
160
|
+
const parts: Array<Record<string, unknown>> = msg.content.map(
|
|
161
|
+
(item: TextContent | ImageContent) => {
|
|
162
|
+
if (item.type === "text") {
|
|
163
|
+
return { text: sanitizeText(item.text) };
|
|
164
|
+
}
|
|
165
|
+
return { inlineData: { mimeType: item.mimeType, data: item.data } };
|
|
166
|
+
},
|
|
167
|
+
);
|
|
93
168
|
if (parts.length > 0) {
|
|
94
169
|
result.push({ role: "user", parts });
|
|
95
170
|
}
|
|
96
171
|
}
|
|
97
172
|
} else if (msg.role === "assistant") {
|
|
98
173
|
const assistantMsg = msg as AssistantMessage;
|
|
174
|
+
flushMissingToolResults();
|
|
99
175
|
|
|
100
176
|
// Skip errored/aborted messages — they're incomplete turns
|
|
101
177
|
if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
|
|
102
178
|
continue;
|
|
103
179
|
}
|
|
104
180
|
|
|
181
|
+
// Also require api match so cross-provider thought signatures aren't replayed
|
|
105
182
|
const isSameProviderAndModel =
|
|
106
|
-
assistantMsg.provider === "vertex" &&
|
|
107
|
-
|
|
183
|
+
assistantMsg.provider === "vertex" &&
|
|
184
|
+
assistantMsg.api === "google-generative-ai" &&
|
|
185
|
+
assistantMsg.model === modelId;
|
|
186
|
+
const parts: Array<Record<string, unknown>> = [];
|
|
187
|
+
const toolCalls: ToolCall[] = [];
|
|
108
188
|
|
|
109
189
|
for (const block of assistantMsg.content) {
|
|
110
190
|
if (block.type === "text") {
|
|
@@ -134,13 +214,13 @@ export function convertToGeminiMessages(messages: Message[], modelId: string): a
|
|
|
134
214
|
}
|
|
135
215
|
} else if (block.type === "toolCall") {
|
|
136
216
|
const toolCallBlock = block as ToolCall;
|
|
217
|
+
toolCalls.push(toolCallBlock);
|
|
137
218
|
const thoughtSig = resolveThoughtSignature(isSameProviderAndModel, toolCallBlock.thoughtSignature);
|
|
138
219
|
|
|
139
|
-
const part:
|
|
220
|
+
const part: Record<string, unknown> = {
|
|
140
221
|
functionCall: {
|
|
141
222
|
name: toolCallBlock.name,
|
|
142
223
|
args: toolCallBlock.arguments ?? {},
|
|
143
|
-
...(requiresToolCallId(modelId) ? { id: toolCallBlock.id } : {}),
|
|
144
224
|
},
|
|
145
225
|
};
|
|
146
226
|
if (thoughtSig) {
|
|
@@ -159,31 +239,24 @@ export function convertToGeminiMessages(messages: Message[], modelId: string): a
|
|
|
159
239
|
if (parts.length > 0) {
|
|
160
240
|
result.push({ role: "model", parts });
|
|
161
241
|
}
|
|
242
|
+
if (toolCalls.length > 0) {
|
|
243
|
+
pendingToolCalls = toolCalls;
|
|
244
|
+
existingToolResultIds = new Set<string>();
|
|
245
|
+
}
|
|
162
246
|
} else if (msg.role === "toolResult") {
|
|
163
247
|
const toolResultMsg = msg as ToolResultMessage;
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
name: toolResultMsg.toolName,
|
|
172
|
-
response: toolResultMsg.isError ? { error: responseValue } : { output: responseValue },
|
|
173
|
-
...(includeId ? { id: toolResultMsg.toolCallId } : {}),
|
|
174
|
-
},
|
|
175
|
-
};
|
|
176
|
-
|
|
177
|
-
// Merge consecutive tool results into a single user turn (required by Gemini API)
|
|
178
|
-
const lastContent = result[result.length - 1];
|
|
179
|
-
if (lastContent?.role === "user" && lastContent.parts?.some((p: any) => p.functionResponse)) {
|
|
180
|
-
lastContent.parts.push(functionResponsePart);
|
|
181
|
-
} else {
|
|
182
|
-
result.push({ role: "user", parts: [functionResponsePart] });
|
|
183
|
-
}
|
|
248
|
+
existingToolResultIds.add(toolResultMsg.toolCallId);
|
|
249
|
+
pushToolResult(
|
|
250
|
+
toolResultMsg.toolCallId,
|
|
251
|
+
toolResultMsg.toolName,
|
|
252
|
+
toolResultMsg.content,
|
|
253
|
+
toolResultMsg.isError,
|
|
254
|
+
);
|
|
184
255
|
}
|
|
185
256
|
}
|
|
186
257
|
|
|
258
|
+
flushMissingToolResults();
|
|
259
|
+
|
|
187
260
|
return result;
|
|
188
261
|
}
|
|
189
262
|
|
|
@@ -191,7 +264,9 @@ export function convertToGeminiMessages(messages: Message[], modelId: string): a
|
|
|
191
264
|
* Convert tools to Gemini format using parametersJsonSchema (full JSON Schema support).
|
|
192
265
|
* This differs from OpenAI format — Gemini uses functionDeclarations wrapped in an array.
|
|
193
266
|
*/
|
|
194
|
-
export function convertToolsForGemini(
|
|
267
|
+
export function convertToolsForGemini(
|
|
268
|
+
tools: Tool[],
|
|
269
|
+
): Array<{ functionDeclarations: Array<Record<string, unknown>> }> | undefined {
|
|
195
270
|
if (!tools || tools.length === 0) return undefined;
|
|
196
271
|
return [
|
|
197
272
|
{
|
|
@@ -207,7 +282,7 @@ export function convertToolsForGemini(tools: any[]): any[] | undefined {
|
|
|
207
282
|
/**
|
|
208
283
|
* Convert tools to OpenAI format (for Claude and MaaS models)
|
|
209
284
|
*/
|
|
210
|
-
export function convertTools(tools:
|
|
285
|
+
export function convertTools(tools: Tool[]): Array<Record<string, unknown>> {
|
|
211
286
|
return tools.map((tool) => ({
|
|
212
287
|
type: "function",
|
|
213
288
|
function: {
|