@ssweens/pi-vertex 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +3 -2
- package/TEST_COVERAGE.md +13 -0
- package/index.ts +2 -2
- package/models/claude.ts +2 -2
- package/models/gemini.ts +0 -19
- package/models/index.ts +1 -1
- package/package.json +4 -1
- package/streaming/maas.ts +346 -42
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [1.1.3] - 2026-03-26
|
|
6
|
+
### Fixed
|
|
7
|
+
- Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
|
|
8
|
+
- Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
|
|
9
|
+
|
|
10
|
+
### Updated
|
|
11
|
+
- Claude 4.6 models use native Anthropic Vertex SDK streaming.
|
|
12
|
+
- Claude 4.6 context window updated to 1M.
|
|
13
|
+
- Model list order in the selector is now alphabetized by ID.
|
|
14
|
+
|
|
15
|
+
## [1.1.2] - 2026-03-24
|
|
16
|
+
### Changed
|
|
17
|
+
- Initial Claude 4.x support on Vertex.
|
package/README.md
CHANGED
|
@@ -130,8 +130,8 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
|
|
|
130
130
|
|
|
131
131
|
| Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
|
|
132
132
|
|-------|---------|------------|-------|-----------|----------------|--------|
|
|
133
|
-
| claude-opus-4-6 |
|
|
134
|
-
| claude-sonnet-4-6 |
|
|
133
|
+
| claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
|
|
134
|
+
| claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
135
135
|
| claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
|
|
136
136
|
| claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
|
|
137
137
|
| claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
|
|
@@ -213,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
|
|
|
213
213
|
## Dependencies
|
|
214
214
|
|
|
215
215
|
- `@google/genai`: Google GenAI SDK for Gemini models
|
|
216
|
+
- `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
|
|
216
217
|
- `google-auth-library`: ADC authentication for all models
|
|
217
218
|
- `@mariozechner/pi-ai`: Peer dependency
|
|
218
219
|
- `@mariozechner/pi-coding-agent`: Peer dependency
|
package/TEST_COVERAGE.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Test Coverage
|
|
2
|
+
|
|
3
|
+
## Current Status
|
|
4
|
+
- Automated tests: not yet implemented in this package.
|
|
5
|
+
- Lint/type checks: `npm run check` (currently a no-op placeholder).
|
|
6
|
+
|
|
7
|
+
## Manual Verification
|
|
8
|
+
- Claude 4.6 streaming verified via Anthropic Vertex SDK.
|
|
9
|
+
- Mid-session model switching (tool call replay) verified interactively in pi.
|
|
10
|
+
|
|
11
|
+
## Gaps / Next Steps
|
|
12
|
+
- Add automated integration tests for Anthropic Vertex streaming and tool replay.
|
|
13
|
+
- Add unit tests for message normalization and replay sequencing.
|
package/index.ts
CHANGED
|
@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
112
112
|
|
|
113
113
|
// Show startup info as a widget that clears on first user input
|
|
114
114
|
const vertexStartupLines = [
|
|
115
|
-
`[pi-vertex] Initializing with project: ${projectId}`,
|
|
116
|
-
`[pi-vertex] Registered ${ALL_MODELS.length} models`,
|
|
115
|
+
` [pi-vertex] Initializing with project: ${projectId}`,
|
|
116
|
+
` [pi-vertex] Registered ${ALL_MODELS.length} models`,
|
|
117
117
|
];
|
|
118
118
|
pi.on("session_start", async (_event, ctx) => {
|
|
119
119
|
ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({
|
package/models/claude.ts
CHANGED
|
@@ -16,7 +16,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
16
16
|
apiId: "claude-opus-4-6",
|
|
17
17
|
publisher: "anthropic",
|
|
18
18
|
endpointType: "maas",
|
|
19
|
-
contextWindow:
|
|
19
|
+
contextWindow: 1000000,
|
|
20
20
|
maxTokens: 32000,
|
|
21
21
|
input: ["text", "image"],
|
|
22
22
|
reasoning: true,
|
|
@@ -35,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
|
|
|
35
35
|
apiId: "claude-sonnet-4-6",
|
|
36
36
|
publisher: "anthropic",
|
|
37
37
|
endpointType: "maas",
|
|
38
|
-
contextWindow:
|
|
38
|
+
contextWindow: 1000000,
|
|
39
39
|
maxTokens: 64000,
|
|
40
40
|
input: ["text", "image"],
|
|
41
41
|
reasoning: true,
|
package/models/gemini.ts
CHANGED
|
@@ -49,25 +49,6 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
|
|
|
49
49
|
},
|
|
50
50
|
|
|
51
51
|
// --- Gemini 3 (Preview) ---
|
|
52
|
-
{
|
|
53
|
-
id: "gemini-3-pro",
|
|
54
|
-
name: "Gemini 3 Pro",
|
|
55
|
-
apiId: "gemini-3-pro-preview",
|
|
56
|
-
publisher: "google",
|
|
57
|
-
endpointType: "gemini",
|
|
58
|
-
contextWindow: 1048576,
|
|
59
|
-
maxTokens: 65536,
|
|
60
|
-
input: ["text", "image"],
|
|
61
|
-
reasoning: true,
|
|
62
|
-
tools: true,
|
|
63
|
-
cost: {
|
|
64
|
-
input: 2.00,
|
|
65
|
-
output: 12.00,
|
|
66
|
-
cacheRead: 0.20,
|
|
67
|
-
cacheWrite: 0,
|
|
68
|
-
},
|
|
69
|
-
region: "global",
|
|
70
|
-
},
|
|
71
52
|
{
|
|
72
53
|
id: "gemini-3-flash",
|
|
73
54
|
name: "Gemini 3 Flash",
|
package/models/index.ts
CHANGED
|
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
|
|
|
11
11
|
...GEMINI_MODELS,
|
|
12
12
|
...CLAUDE_MODELS,
|
|
13
13
|
...MAAS_MODELS,
|
|
14
|
-
].sort((a, b) => a.
|
|
14
|
+
].sort((a, b) => a.id.localeCompare(b.id));
|
|
15
15
|
|
|
16
16
|
export function getModelById(id: string): VertexModelConfig | undefined {
|
|
17
17
|
return ALL_MODELS.find((m) => m.id === id);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ssweens/pi-vertex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.3",
|
|
4
4
|
"description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
"models/",
|
|
14
14
|
"streaming/",
|
|
15
15
|
"README.md",
|
|
16
|
+
"CHANGELOG.md",
|
|
17
|
+
"TEST_COVERAGE.md",
|
|
16
18
|
"LICENSE",
|
|
17
19
|
"screenshot.png"
|
|
18
20
|
],
|
|
@@ -22,6 +24,7 @@
|
|
|
22
24
|
"check": "echo 'nothing to check'"
|
|
23
25
|
},
|
|
24
26
|
"dependencies": {
|
|
27
|
+
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
|
25
28
|
"@google/genai": "^1.42.0",
|
|
26
29
|
"google-auth-library": "^9.0.0"
|
|
27
30
|
},
|
package/streaming/maas.ts
CHANGED
|
@@ -1,15 +1,327 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* MaaS streaming handler for Claude and all other models
|
|
3
|
-
* Uses OpenAI-compatible Chat Completions endpoint
|
|
2
|
+
* MaaS streaming handler for Claude and all other models.
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* and patches the model ID back to the friendly name in response events.
|
|
4
|
+
* - Anthropic models: native AnthropicVertex SDK streaming
|
|
5
|
+
* - Other MaaS models: Vertex OpenAI-compatible Chat Completions endpoint
|
|
8
6
|
*/
|
|
9
7
|
|
|
10
8
|
import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
|
|
11
9
|
import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
|
|
12
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
createAssistantMessageEventStream,
|
|
12
|
+
type AssistantMessageEventStream,
|
|
13
|
+
type Model,
|
|
14
|
+
streamSimpleOpenAICompletions,
|
|
15
|
+
calculateCost,
|
|
16
|
+
} from "@mariozechner/pi-ai";
|
|
17
|
+
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
|
|
18
|
+
|
|
19
|
+
function mapAnthropicEffort(reasoning?: string): "low" | "medium" | "high" | "max" | undefined {
|
|
20
|
+
if (!reasoning) return undefined;
|
|
21
|
+
if (reasoning === "minimal" || reasoning === "low") return "low";
|
|
22
|
+
if (reasoning === "medium") return "medium";
|
|
23
|
+
if (reasoning === "xhigh") return "max";
|
|
24
|
+
return "high";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Sanitize an ID to match Anthropic's pattern: ^[a-zA-Z0-9_-]+$
|
|
29
|
+
* Replaces invalid characters with underscores.
|
|
30
|
+
*/
|
|
31
|
+
function sanitizeToolId(id: string): string {
|
|
32
|
+
// Replace any character that's not alphanumeric, underscore, or hyphen.
|
|
33
|
+
const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
|
34
|
+
// Deterministic fallback for empty/invalid IDs.
|
|
35
|
+
return sanitized || "tool_id";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function isValidThinkingSignature(signature?: string): boolean {
|
|
39
|
+
if (!signature) return false;
|
|
40
|
+
// Anthropic signatures are base64-like encrypted payloads.
|
|
41
|
+
return /^[A-Za-z0-9+/]+={0,2}$/.test(signature) && signature.length % 4 === 0;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Stream a Claude model via the native AnthropicVertex SDK.
|
|
46
|
+
*/
|
|
47
|
+
async function streamAnthropic(
|
|
48
|
+
model: VertexModelConfig,
|
|
49
|
+
context: Context,
|
|
50
|
+
options: StreamOptions | undefined,
|
|
51
|
+
stream: ReturnType<typeof createAssistantMessageEventStream>,
|
|
52
|
+
): Promise<void> {
|
|
53
|
+
const location = resolveLocation(model.region);
|
|
54
|
+
const auth = getAuthConfig(location);
|
|
55
|
+
|
|
56
|
+
const client = new AnthropicVertex({
|
|
57
|
+
projectId: auth.projectId,
|
|
58
|
+
region: auth.location,
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// Build messages with Anthropic-compatible tool-use/tool-result sequencing.
|
|
62
|
+
const sourceMessages = (context.messages as any[]) ?? [];
|
|
63
|
+
|
|
64
|
+
// Pass 1: normalize tool call IDs and propagate mapping to tool results.
|
|
65
|
+
const normalized: any[] = [];
|
|
66
|
+
const toolIdMap = new Map<string, string>();
|
|
67
|
+
for (const msg of sourceMessages) {
|
|
68
|
+
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
|
69
|
+
const content = msg.content.map((block: any) => {
|
|
70
|
+
if (block?.type !== "toolCall") return block;
|
|
71
|
+
const normalizedId = sanitizeToolId(String(block.id ?? ""));
|
|
72
|
+
if (block.id && normalizedId !== block.id) toolIdMap.set(block.id, normalizedId);
|
|
73
|
+
return { ...block, id: normalizedId };
|
|
74
|
+
});
|
|
75
|
+
normalized.push({ ...msg, content });
|
|
76
|
+
} else if (msg.role === "toolResult") {
|
|
77
|
+
const mapped = toolIdMap.get(msg.toolCallId);
|
|
78
|
+
normalized.push({ ...msg, toolCallId: sanitizeToolId(String(mapped ?? msg.toolCallId ?? "")) });
|
|
79
|
+
} else {
|
|
80
|
+
normalized.push(msg);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Pass 2: enforce Anthropic adjacency rule:
|
|
85
|
+
// assistant(tool_use...) MUST be immediately followed by user(tool_result...)
|
|
86
|
+
const replayable: any[] = [];
|
|
87
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
88
|
+
const msg = normalized[i];
|
|
89
|
+
|
|
90
|
+
if (msg.role === "assistant") {
|
|
91
|
+
if (msg.stopReason === "error" || msg.stopReason === "aborted") continue;
|
|
92
|
+
|
|
93
|
+
const toolCalls = Array.isArray(msg.content)
|
|
94
|
+
? msg.content.filter((b: any) => b?.type === "toolCall" && b?.id && b?.name)
|
|
95
|
+
: [];
|
|
96
|
+
|
|
97
|
+
replayable.push(msg);
|
|
98
|
+
|
|
99
|
+
if (toolCalls.length > 0) {
|
|
100
|
+
const collectedToolResults: any[] = [];
|
|
101
|
+
let j = i + 1;
|
|
102
|
+
while (j < normalized.length && normalized[j]?.role === "toolResult") {
|
|
103
|
+
collectedToolResults.push(normalized[j]);
|
|
104
|
+
j++;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const existingIds = new Set(collectedToolResults.map((tr: any) => tr.toolCallId));
|
|
108
|
+
for (const tc of toolCalls) {
|
|
109
|
+
if (!existingIds.has(tc.id)) {
|
|
110
|
+
collectedToolResults.push({
|
|
111
|
+
role: "toolResult",
|
|
112
|
+
toolCallId: tc.id,
|
|
113
|
+
toolName: tc.name,
|
|
114
|
+
content: [{ type: "text", text: "No result provided" }],
|
|
115
|
+
isError: true,
|
|
116
|
+
timestamp: Date.now(),
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
replayable.push(...collectedToolResults);
|
|
122
|
+
i = j - 1;
|
|
123
|
+
}
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Drop orphan tool results (invalid for Anthropic if not immediately after tool_use assistant msg).
|
|
128
|
+
if (msg.role === "toolResult") continue;
|
|
129
|
+
|
|
130
|
+
replayable.push(msg);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Final pass: convert replayable internal messages to Anthropic message blocks.
|
|
134
|
+
const messages: Array<{ role: "user" | "assistant"; content: any }> = [];
|
|
135
|
+
for (let i = 0; i < replayable.length; i++) {
|
|
136
|
+
const msg = replayable[i];
|
|
137
|
+
|
|
138
|
+
if (msg.role === "user") {
|
|
139
|
+
if (typeof msg.content === "string") {
|
|
140
|
+
messages.push({ role: "user", content: [{ type: "text", text: msg.content }] });
|
|
141
|
+
} else if (Array.isArray(msg.content)) {
|
|
142
|
+
const blocks = msg.content
|
|
143
|
+
.map((c: any) => {
|
|
144
|
+
if (c.type === "text") return { type: "text", text: c.text };
|
|
145
|
+
if (c.type === "image") {
|
|
146
|
+
return { type: "image", source: { type: "base64", media_type: c.mimeType, data: c.data } };
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
})
|
|
150
|
+
.filter(Boolean);
|
|
151
|
+
if (blocks.length > 0) messages.push({ role: "user", content: blocks });
|
|
152
|
+
}
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (msg.role === "assistant") {
|
|
157
|
+
const blocks: any[] = [];
|
|
158
|
+
const isSameModel = msg.provider === "vertex" && msg.api === "anthropic-messages" && msg.model === model.id;
|
|
159
|
+
|
|
160
|
+
if (Array.isArray(msg.content)) {
|
|
161
|
+
for (const block of msg.content) {
|
|
162
|
+
if (block.type === "text" && block.text?.trim()) {
|
|
163
|
+
blocks.push({ type: "text", text: block.text });
|
|
164
|
+
} else if (block.type === "toolCall") {
|
|
165
|
+
blocks.push({ type: "tool_use", id: sanitizeToolId(String(block.id ?? "")), name: block.name, input: block.arguments ?? {} });
|
|
166
|
+
} else if (block.type === "thinking" && block.thinking?.trim()) {
|
|
167
|
+
if (isSameModel && isValidThinkingSignature(block.thinkingSignature)) {
|
|
168
|
+
blocks.push({ type: "thinking", thinking: block.thinking, signature: block.thinkingSignature });
|
|
169
|
+
} else {
|
|
170
|
+
// Cross-model/provider replay: convert thinking to plain text to avoid signature errors.
|
|
171
|
+
blocks.push({ type: "text", text: block.thinking });
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
if (blocks.length > 0) messages.push({ role: "assistant", content: blocks });
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (msg.role === "toolResult") {
|
|
181
|
+
// Group consecutive tool results into one user message (Anthropic expects this shape).
|
|
182
|
+
const toolResultBlocks: any[] = [];
|
|
183
|
+
let j = i;
|
|
184
|
+
while (j < replayable.length && replayable[j]?.role === "toolResult") {
|
|
185
|
+
const tr = replayable[j];
|
|
186
|
+
const text = typeof tr.content === "string"
|
|
187
|
+
? tr.content
|
|
188
|
+
: Array.isArray(tr.content)
|
|
189
|
+
? tr.content.filter((c: any) => c?.type === "text").map((c: any) => c.text).join("\n")
|
|
190
|
+
: JSON.stringify(tr.content ?? "");
|
|
191
|
+
|
|
192
|
+
toolResultBlocks.push({
|
|
193
|
+
type: "tool_result",
|
|
194
|
+
tool_use_id: sanitizeToolId(String(tr.toolCallId ?? "")),
|
|
195
|
+
content: text || "",
|
|
196
|
+
...(tr.isError ? { is_error: true } : {}),
|
|
197
|
+
});
|
|
198
|
+
j++;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (toolResultBlocks.length > 0) {
|
|
202
|
+
messages.push({ role: "user", content: toolResultBlocks });
|
|
203
|
+
}
|
|
204
|
+
i = j - 1;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Build tools
|
|
209
|
+
const tools = context.tools?.map((t: any) => ({
|
|
210
|
+
name: t.name,
|
|
211
|
+
description: t.description,
|
|
212
|
+
input_schema: {
|
|
213
|
+
type: "object" as const,
|
|
214
|
+
properties: t.parameters?.properties ?? {},
|
|
215
|
+
required: t.parameters?.required ?? [],
|
|
216
|
+
},
|
|
217
|
+
}));
|
|
218
|
+
|
|
219
|
+
const params: any = {
|
|
220
|
+
model: model.apiId,
|
|
221
|
+
max_tokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
|
|
222
|
+
messages,
|
|
223
|
+
...(context.systemPrompt ? { system: context.systemPrompt } : {}),
|
|
224
|
+
...(tools && tools.length > 0 ? { tools } : {}),
|
|
225
|
+
...(options?.temperature !== undefined && !options?.reasoning ? { temperature: options.temperature } : {}),
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
// Thinking
|
|
229
|
+
if (model.reasoning && options?.reasoning) {
|
|
230
|
+
const effort = mapAnthropicEffort(options.reasoning);
|
|
231
|
+
if (effort) {
|
|
232
|
+
params.thinking = { type: "adaptive" };
|
|
233
|
+
params.output_config = { effort };
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const output: any = {
|
|
238
|
+
role: "assistant",
|
|
239
|
+
content: [],
|
|
240
|
+
api: "anthropic-messages",
|
|
241
|
+
provider: "vertex",
|
|
242
|
+
model: model.id,
|
|
243
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
244
|
+
stopReason: "stop",
|
|
245
|
+
timestamp: Date.now(),
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
stream.push({ type: "start", partial: output });
|
|
249
|
+
|
|
250
|
+
const anthropicStream = client.messages.stream(params, { signal: options?.signal });
|
|
251
|
+
|
|
252
|
+
for await (const event of anthropicStream) {
|
|
253
|
+
if (event.type === "message_start") {
|
|
254
|
+
output.responseId = event.message.id;
|
|
255
|
+
output.usage.input = event.message.usage.input_tokens || 0;
|
|
256
|
+
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
|
257
|
+
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
|
258
|
+
|
|
259
|
+
} else if (event.type === "content_block_start") {
|
|
260
|
+
const cb = event.content_block;
|
|
261
|
+
if (cb.type === "text") {
|
|
262
|
+
output.content.push({ type: "text", text: "", index: event.index });
|
|
263
|
+
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
|
|
264
|
+
} else if (cb.type === "thinking") {
|
|
265
|
+
output.content.push({ type: "thinking", thinking: "", thinkingSignature: "", index: event.index });
|
|
266
|
+
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
|
267
|
+
} else if (cb.type === "tool_use") {
|
|
268
|
+
output.content.push({ type: "toolCall", id: cb.id, name: cb.name, arguments: {}, partialArgs: "", index: event.index });
|
|
269
|
+
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
} else if (event.type === "content_block_delta") {
|
|
273
|
+
const idx = output.content.findIndex((b: any) => b.index === event.index);
|
|
274
|
+
const block = output.content[idx];
|
|
275
|
+
if (!block) continue;
|
|
276
|
+
|
|
277
|
+
const delta = event.delta;
|
|
278
|
+
if (delta.type === "text_delta" && block.type === "text") {
|
|
279
|
+
block.text += delta.text;
|
|
280
|
+
stream.push({ type: "text_delta", contentIndex: idx, delta: delta.text, partial: output });
|
|
281
|
+
} else if (delta.type === "thinking_delta" && block.type === "thinking") {
|
|
282
|
+
block.thinking += delta.thinking;
|
|
283
|
+
stream.push({ type: "thinking_delta", contentIndex: idx, delta: delta.thinking, partial: output });
|
|
284
|
+
} else if (delta.type === "signature_delta" && block.type === "thinking") {
|
|
285
|
+
block.thinkingSignature = (block.thinkingSignature || "") + delta.signature;
|
|
286
|
+
} else if (delta.type === "input_json_delta" && block.type === "toolCall") {
|
|
287
|
+
block.partialArgs += delta.partial_json;
|
|
288
|
+
stream.push({ type: "toolcall_delta", contentIndex: idx, delta: delta.partial_json, partial: output });
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
} else if (event.type === "content_block_stop") {
|
|
292
|
+
const idx = output.content.findIndex((b: any) => b.index === event.index);
|
|
293
|
+
const block = output.content[idx];
|
|
294
|
+
if (!block) continue;
|
|
295
|
+
delete block.index;
|
|
296
|
+
|
|
297
|
+
if (block.type === "text") {
|
|
298
|
+
stream.push({ type: "text_end", contentIndex: idx, content: block.text, partial: output });
|
|
299
|
+
} else if (block.type === "thinking") {
|
|
300
|
+
stream.push({ type: "thinking_end", contentIndex: idx, content: block.thinking, partial: output });
|
|
301
|
+
} else if (block.type === "toolCall") {
|
|
302
|
+
try { block.arguments = JSON.parse(block.partialArgs); } catch { block.arguments = {}; }
|
|
303
|
+
delete block.partialArgs;
|
|
304
|
+
stream.push({ type: "toolcall_end", contentIndex: idx, toolCall: block, partial: output });
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
} else if (event.type === "message_delta") {
|
|
308
|
+
if (event.delta.stop_reason) {
|
|
309
|
+
const r = event.delta.stop_reason;
|
|
310
|
+
output.stopReason = r === "end_turn" ? "stop" : r === "max_tokens" ? "length" : r === "tool_use" ? "toolUse" : "stop";
|
|
311
|
+
}
|
|
312
|
+
if (event.usage?.output_tokens != null) output.usage.output = event.usage.output_tokens;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
317
|
+
calculateCost(model as any, output.usage);
|
|
318
|
+
|
|
319
|
+
if (output.content.some((b: any) => b.type === "toolCall")) {
|
|
320
|
+
output.stopReason = "toolUse";
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
324
|
+
}
|
|
13
325
|
|
|
14
326
|
export function streamMaaS(
|
|
15
327
|
model: VertexModelConfig,
|
|
@@ -19,20 +331,24 @@ export function streamMaaS(
|
|
|
19
331
|
const stream = createAssistantMessageEventStream();
|
|
20
332
|
|
|
21
333
|
(async () => {
|
|
334
|
+
const apiModelId = model.apiId.includes("/") ? model.apiId : `${model.publisher}/${model.apiId}`;
|
|
335
|
+
|
|
22
336
|
try {
|
|
23
|
-
|
|
337
|
+
if (model.publisher === "anthropic") {
|
|
338
|
+
await streamAnthropic(model, context, options, stream);
|
|
339
|
+
stream.end();
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Non-Anthropic MaaS models: Vertex OpenAI-compatible endpoint.
|
|
24
344
|
const location = resolveLocation(model.region);
|
|
25
345
|
const auth = getAuthConfig(location);
|
|
26
346
|
const accessToken = await getAccessToken();
|
|
27
|
-
|
|
28
347
|
const baseUrl = buildBaseUrl(auth.projectId, auth.location);
|
|
29
348
|
const endpoint = `${baseUrl}/endpoints/openapi`;
|
|
30
349
|
|
|
31
|
-
// Create a model object compatible with pi-ai's OpenAI streaming.
|
|
32
|
-
// Use model.apiId directly so the correct model name goes in the request body.
|
|
33
|
-
// The friendly model.id is patched back into response events below for session persistence.
|
|
34
350
|
const modelForPi: Model<"openai-completions"> = {
|
|
35
|
-
id:
|
|
351
|
+
id: apiModelId,
|
|
36
352
|
name: model.name,
|
|
37
353
|
api: "openai-completions",
|
|
38
354
|
provider: "vertex",
|
|
@@ -52,53 +368,41 @@ export function streamMaaS(
|
|
|
52
368
|
},
|
|
53
369
|
};
|
|
54
370
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
apiKey: accessToken,
|
|
62
|
-
maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
|
|
63
|
-
temperature: options?.temperature,
|
|
64
|
-
},
|
|
65
|
-
);
|
|
371
|
+
const innerStream = streamSimpleOpenAICompletions(modelForPi, context as any, {
|
|
372
|
+
...options,
|
|
373
|
+
apiKey: accessToken,
|
|
374
|
+
maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
|
|
375
|
+
temperature: options?.temperature,
|
|
376
|
+
});
|
|
66
377
|
|
|
67
|
-
// Forward all events, patching model ID back to the friendly name
|
|
68
|
-
// so pi-coding-agent can restore sessions correctly.
|
|
69
378
|
for await (const event of innerStream) {
|
|
70
|
-
if ("partial" in event && event.partial)
|
|
71
|
-
|
|
72
|
-
}
|
|
73
|
-
if ("message" in event && event.message) {
|
|
74
|
-
event.message.model = model.id;
|
|
75
|
-
}
|
|
379
|
+
if ("partial" in event && event.partial) event.partial.model = model.id;
|
|
380
|
+
if ("message" in event && event.message) event.message.model = model.id;
|
|
76
381
|
if ("error" in event && event.error && typeof event.error === "object") {
|
|
77
|
-
|
|
382
|
+
const err = event.error as any;
|
|
383
|
+
err.model = model.id;
|
|
384
|
+
if (typeof err.errorMessage === "string" && /^400\s*(status code)?\s*\(no body\)/i.test(err.errorMessage)) {
|
|
385
|
+
err.errorMessage = `Vertex MaaS HTTP 400 (no body) for model "${apiModelId}". Not automatically treated as context overflow.`;
|
|
386
|
+
}
|
|
78
387
|
}
|
|
79
388
|
stream.push(event);
|
|
80
389
|
}
|
|
81
390
|
stream.end();
|
|
391
|
+
|
|
82
392
|
} catch (error) {
|
|
393
|
+
const rawMessage = error instanceof Error ? error.message : String(error);
|
|
83
394
|
stream.push({
|
|
84
395
|
type: "error",
|
|
85
396
|
reason: options?.signal?.aborted ? "aborted" : "error",
|
|
86
397
|
error: {
|
|
87
398
|
role: "assistant",
|
|
88
399
|
content: [],
|
|
89
|
-
api: "openai-completions",
|
|
400
|
+
api: model.publisher === "anthropic" ? "anthropic-messages" : "openai-completions",
|
|
90
401
|
provider: "vertex",
|
|
91
402
|
model: model.id,
|
|
92
|
-
usage: {
|
|
93
|
-
input: 0,
|
|
94
|
-
output: 0,
|
|
95
|
-
cacheRead: 0,
|
|
96
|
-
cacheWrite: 0,
|
|
97
|
-
totalTokens: 0,
|
|
98
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
99
|
-
},
|
|
403
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
100
404
|
stopReason: options?.signal?.aborted ? "aborted" : "error",
|
|
101
|
-
errorMessage:
|
|
405
|
+
errorMessage: rawMessage,
|
|
102
406
|
timestamp: Date.now(),
|
|
103
407
|
},
|
|
104
408
|
});
|