@ssweens/pi-vertex 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [1.1.3] - 2026-03-26
6
+ ### Fixed
7
+ - Hardened Claude-on-Vertex replay for mid-session model switching (tool ID normalization, tool result adjacency, thinking signature validation).
8
+ - Prevented Anthropic tool replay errors by inserting synthetic tool results when missing.
9
+
10
+ ### Updated
11
+ - Claude 4.6 models use native Anthropic Vertex SDK streaming.
12
+ - Claude 4.6 context window updated to 1M.
13
+ - Model list order in the selector is now alphabetized by ID.
14
+
15
+ ## [1.1.2] - 2026-03-24
16
+ ### Changed
17
+ - Initial Claude 4.x support on Vertex.
package/README.md CHANGED
@@ -130,8 +130,8 @@ alias pil="GOOGLE_CLOUD_PROJECT=your-project pi --provider vertex --model llama-
130
130
 
131
131
  | Model | Context | Max Tokens | Input | Reasoning | Price (in/out) | Region |
132
132
  |-------|---------|------------|-------|-----------|----------------|--------|
133
- | claude-opus-4-6 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
134
- | claude-sonnet-4-6 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
133
+ | claude-opus-4-6 | 1M | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
134
+ | claude-sonnet-4-6 | 1M | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
135
135
  | claude-opus-4-5 | 200K | 32,000 | text, image | ✅ | $5.00/$25.00 | global |
136
136
  | claude-sonnet-4-5 | 200K | 64,000 | text, image | ✅ | $3.00/$15.00 | global |
137
137
  | claude-haiku-4-5 | 200K | 64,000 | text, image | ✅ | $1.00/$5.00 | global |
@@ -213,6 +213,7 @@ export GOOGLE_CLOUD_LOCATION=us-central1
213
213
  ## Dependencies
214
214
 
215
215
  - `@google/genai`: Google GenAI SDK for Gemini models
216
+ - `@anthropic-ai/vertex-sdk`: Official Anthropic-on-Vertex SDK for Claude models (native streaming)
216
217
  - `google-auth-library`: ADC authentication for all models
217
218
  - `@mariozechner/pi-ai`: Peer dependency
218
219
  - `@mariozechner/pi-coding-agent`: Peer dependency
@@ -0,0 +1,13 @@
1
+ # Test Coverage
2
+
3
+ ## Current Status
4
+ - Automated tests: not yet implemented in this package.
5
+ - Lint/type checks: `npm run check` (currently a no-op placeholder).
6
+
7
+ ## Manual Verification
8
+ - Claude 4.6 streaming verified via Anthropic Vertex SDK.
9
+ - Mid-session model switching (tool call replay) verified interactively in pi.
10
+
11
+ ## Gaps / Next Steps
12
+ - Add automated integration tests for Anthropic Vertex streaming and tool replay.
13
+ - Add unit tests for message normalization and replay sequencing.
package/index.ts CHANGED
@@ -112,8 +112,8 @@ export default function (pi: ExtensionAPI) {
112
112
 
113
113
  // Show startup info as a widget that clears on first user input
114
114
  const vertexStartupLines = [
115
- `[pi-vertex] Initializing with project: ${projectId}`,
116
- `[pi-vertex] Registered ${ALL_MODELS.length} models`,
115
+ ` [pi-vertex] Initializing with project: ${projectId}`,
116
+ ` [pi-vertex] Registered ${ALL_MODELS.length} models`,
117
117
  ];
118
118
  pi.on("session_start", async (_event, ctx) => {
119
119
  ctx.ui.setWidget("pi-vertex-startup", (_tui, theme) => ({
package/models/claude.ts CHANGED
@@ -16,7 +16,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
16
16
  apiId: "claude-opus-4-6",
17
17
  publisher: "anthropic",
18
18
  endpointType: "maas",
19
- contextWindow: 200000,
19
+ contextWindow: 1000000,
20
20
  maxTokens: 32000,
21
21
  input: ["text", "image"],
22
22
  reasoning: true,
@@ -35,7 +35,7 @@ export const CLAUDE_MODELS: VertexModelConfig[] = [
35
35
  apiId: "claude-sonnet-4-6",
36
36
  publisher: "anthropic",
37
37
  endpointType: "maas",
38
- contextWindow: 200000,
38
+ contextWindow: 1000000,
39
39
  maxTokens: 64000,
40
40
  input: ["text", "image"],
41
41
  reasoning: true,
package/models/gemini.ts CHANGED
@@ -49,25 +49,6 @@ export const GEMINI_MODELS: VertexModelConfig[] = [
49
49
  },
50
50
 
51
51
  // --- Gemini 3 (Preview) ---
52
- {
53
- id: "gemini-3-pro",
54
- name: "Gemini 3 Pro",
55
- apiId: "gemini-3-pro-preview",
56
- publisher: "google",
57
- endpointType: "gemini",
58
- contextWindow: 1048576,
59
- maxTokens: 65536,
60
- input: ["text", "image"],
61
- reasoning: true,
62
- tools: true,
63
- cost: {
64
- input: 2.00,
65
- output: 12.00,
66
- cacheRead: 0.20,
67
- cacheWrite: 0,
68
- },
69
- region: "global",
70
- },
71
52
  {
72
53
  id: "gemini-3-flash",
73
54
  name: "Gemini 3 Flash",
package/models/index.ts CHANGED
@@ -11,7 +11,7 @@ export const ALL_MODELS: VertexModelConfig[] = [
11
11
  ...GEMINI_MODELS,
12
12
  ...CLAUDE_MODELS,
13
13
  ...MAAS_MODELS,
14
- ].sort((a, b) => a.name.localeCompare(b.name));
14
+ ].sort((a, b) => a.id.localeCompare(b.id));
15
15
 
16
16
  export function getModelById(id: string): VertexModelConfig | undefined {
17
17
  return ALL_MODELS.find((m) => m.id === id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ssweens/pi-vertex",
3
- "version": "1.1.1",
3
+ "version": "1.1.3",
4
4
  "description": "Google Vertex AI provider for Pi coding agent - supports Gemini, Claude, and all MaaS models",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -13,6 +13,8 @@
13
13
  "models/",
14
14
  "streaming/",
15
15
  "README.md",
16
+ "CHANGELOG.md",
17
+ "TEST_COVERAGE.md",
16
18
  "LICENSE",
17
19
  "screenshot.png"
18
20
  ],
@@ -22,6 +24,7 @@
22
24
  "check": "echo 'nothing to check'"
23
25
  },
24
26
  "dependencies": {
27
+ "@anthropic-ai/vertex-sdk": "^0.14.4",
25
28
  "@google/genai": "^1.42.0",
26
29
  "google-auth-library": "^9.0.0"
27
30
  },
package/streaming/maas.ts CHANGED
@@ -1,15 +1,327 @@
1
1
  /**
2
- * MaaS streaming handler for Claude and all other models
3
- * Uses OpenAI-compatible Chat Completions endpoint
2
+ * MaaS streaming handler for Claude and all other models.
4
3
  *
5
- * Delegates to pi-ai's built-in OpenAI streaming implementation.
6
- * Uses model.apiId directly in the request (no global fetch interceptor)
7
- * and patches the model ID back to the friendly name in response events.
4
+ * - Anthropic models: native AnthropicVertex SDK streaming
5
+ * - Other MaaS models: Vertex OpenAI-compatible Chat Completions endpoint
8
6
  */
9
7
 
10
8
  import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
11
9
  import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
12
- import { createAssistantMessageEventStream, type AssistantMessageEventStream, type Model, streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
10
+ import {
11
+ createAssistantMessageEventStream,
12
+ type AssistantMessageEventStream,
13
+ type Model,
14
+ streamSimpleOpenAICompletions,
15
+ calculateCost,
16
+ } from "@mariozechner/pi-ai";
17
+ import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
18
+
19
+ function mapAnthropicEffort(reasoning?: string): "low" | "medium" | "high" | "max" | undefined {
20
+ if (!reasoning) return undefined;
21
+ if (reasoning === "minimal" || reasoning === "low") return "low";
22
+ if (reasoning === "medium") return "medium";
23
+ if (reasoning === "xhigh") return "max";
24
+ return "high";
25
+ }
26
+
27
+ /**
28
+ * Sanitize an ID to match Anthropic's pattern: ^[a-zA-Z0-9_-]+$
29
+ * Replaces invalid characters with underscores.
30
+ */
31
+ function sanitizeToolId(id: string): string {
32
+ // Replace any character that's not alphanumeric, underscore, or hyphen.
33
+ const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
34
+ // Deterministic fallback for empty/invalid IDs.
35
+ return sanitized || "tool_id";
36
+ }
37
+
38
+ function isValidThinkingSignature(signature?: string): boolean {
39
+ if (!signature) return false;
40
+ // Anthropic signatures are base64-like encrypted payloads.
41
+ return /^[A-Za-z0-9+/]+={0,2}$/.test(signature) && signature.length % 4 === 0;
42
+ }
43
+
44
+ /**
45
+ * Stream a Claude model via the native AnthropicVertex SDK.
46
+ */
47
+ async function streamAnthropic(
48
+ model: VertexModelConfig,
49
+ context: Context,
50
+ options: StreamOptions | undefined,
51
+ stream: ReturnType<typeof createAssistantMessageEventStream>,
52
+ ): Promise<void> {
53
+ const location = resolveLocation(model.region);
54
+ const auth = getAuthConfig(location);
55
+
56
+ const client = new AnthropicVertex({
57
+ projectId: auth.projectId,
58
+ region: auth.location,
59
+ });
60
+
61
+ // Build messages with Anthropic-compatible tool-use/tool-result sequencing.
62
+ const sourceMessages = (context.messages as any[]) ?? [];
63
+
64
+ // Pass 1: normalize tool call IDs and propagate mapping to tool results.
65
+ const normalized: any[] = [];
66
+ const toolIdMap = new Map<string, string>();
67
+ for (const msg of sourceMessages) {
68
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
69
+ const content = msg.content.map((block: any) => {
70
+ if (block?.type !== "toolCall") return block;
71
+ const normalizedId = sanitizeToolId(String(block.id ?? ""));
72
+ if (block.id && normalizedId !== block.id) toolIdMap.set(block.id, normalizedId);
73
+ return { ...block, id: normalizedId };
74
+ });
75
+ normalized.push({ ...msg, content });
76
+ } else if (msg.role === "toolResult") {
77
+ const mapped = toolIdMap.get(msg.toolCallId);
78
+ normalized.push({ ...msg, toolCallId: sanitizeToolId(String(mapped ?? msg.toolCallId ?? "")) });
79
+ } else {
80
+ normalized.push(msg);
81
+ }
82
+ }
83
+
84
+ // Pass 2: enforce Anthropic adjacency rule:
85
+ // assistant(tool_use...) MUST be immediately followed by user(tool_result...)
86
+ const replayable: any[] = [];
87
+ for (let i = 0; i < normalized.length; i++) {
88
+ const msg = normalized[i];
89
+
90
+ if (msg.role === "assistant") {
91
+ if (msg.stopReason === "error" || msg.stopReason === "aborted") continue;
92
+
93
+ const toolCalls = Array.isArray(msg.content)
94
+ ? msg.content.filter((b: any) => b?.type === "toolCall" && b?.id && b?.name)
95
+ : [];
96
+
97
+ replayable.push(msg);
98
+
99
+ if (toolCalls.length > 0) {
100
+ const collectedToolResults: any[] = [];
101
+ let j = i + 1;
102
+ while (j < normalized.length && normalized[j]?.role === "toolResult") {
103
+ collectedToolResults.push(normalized[j]);
104
+ j++;
105
+ }
106
+
107
+ const existingIds = new Set(collectedToolResults.map((tr: any) => tr.toolCallId));
108
+ for (const tc of toolCalls) {
109
+ if (!existingIds.has(tc.id)) {
110
+ collectedToolResults.push({
111
+ role: "toolResult",
112
+ toolCallId: tc.id,
113
+ toolName: tc.name,
114
+ content: [{ type: "text", text: "No result provided" }],
115
+ isError: true,
116
+ timestamp: Date.now(),
117
+ });
118
+ }
119
+ }
120
+
121
+ replayable.push(...collectedToolResults);
122
+ i = j - 1;
123
+ }
124
+ continue;
125
+ }
126
+
127
+ // Drop orphan tool results (invalid for Anthropic if not immediately after tool_use assistant msg).
128
+ if (msg.role === "toolResult") continue;
129
+
130
+ replayable.push(msg);
131
+ }
132
+
133
+ // Final pass: convert replayable internal messages to Anthropic message blocks.
134
+ const messages: Array<{ role: "user" | "assistant"; content: any }> = [];
135
+ for (let i = 0; i < replayable.length; i++) {
136
+ const msg = replayable[i];
137
+
138
+ if (msg.role === "user") {
139
+ if (typeof msg.content === "string") {
140
+ messages.push({ role: "user", content: [{ type: "text", text: msg.content }] });
141
+ } else if (Array.isArray(msg.content)) {
142
+ const blocks = msg.content
143
+ .map((c: any) => {
144
+ if (c.type === "text") return { type: "text", text: c.text };
145
+ if (c.type === "image") {
146
+ return { type: "image", source: { type: "base64", media_type: c.mimeType, data: c.data } };
147
+ }
148
+ return null;
149
+ })
150
+ .filter(Boolean);
151
+ if (blocks.length > 0) messages.push({ role: "user", content: blocks });
152
+ }
153
+ continue;
154
+ }
155
+
156
+ if (msg.role === "assistant") {
157
+ const blocks: any[] = [];
158
+ const isSameModel = msg.provider === "vertex" && msg.api === "anthropic-messages" && msg.model === model.id;
159
+
160
+ if (Array.isArray(msg.content)) {
161
+ for (const block of msg.content) {
162
+ if (block.type === "text" && block.text?.trim()) {
163
+ blocks.push({ type: "text", text: block.text });
164
+ } else if (block.type === "toolCall") {
165
+ blocks.push({ type: "tool_use", id: sanitizeToolId(String(block.id ?? "")), name: block.name, input: block.arguments ?? {} });
166
+ } else if (block.type === "thinking" && block.thinking?.trim()) {
167
+ if (isSameModel && isValidThinkingSignature(block.thinkingSignature)) {
168
+ blocks.push({ type: "thinking", thinking: block.thinking, signature: block.thinkingSignature });
169
+ } else {
170
+ // Cross-model/provider replay: convert thinking to plain text to avoid signature errors.
171
+ blocks.push({ type: "text", text: block.thinking });
172
+ }
173
+ }
174
+ }
175
+ }
176
+ if (blocks.length > 0) messages.push({ role: "assistant", content: blocks });
177
+ continue;
178
+ }
179
+
180
+ if (msg.role === "toolResult") {
181
+ // Group consecutive tool results into one user message (Anthropic expects this shape).
182
+ const toolResultBlocks: any[] = [];
183
+ let j = i;
184
+ while (j < replayable.length && replayable[j]?.role === "toolResult") {
185
+ const tr = replayable[j];
186
+ const text = typeof tr.content === "string"
187
+ ? tr.content
188
+ : Array.isArray(tr.content)
189
+ ? tr.content.filter((c: any) => c?.type === "text").map((c: any) => c.text).join("\n")
190
+ : JSON.stringify(tr.content ?? "");
191
+
192
+ toolResultBlocks.push({
193
+ type: "tool_result",
194
+ tool_use_id: sanitizeToolId(String(tr.toolCallId ?? "")),
195
+ content: text || "",
196
+ ...(tr.isError ? { is_error: true } : {}),
197
+ });
198
+ j++;
199
+ }
200
+
201
+ if (toolResultBlocks.length > 0) {
202
+ messages.push({ role: "user", content: toolResultBlocks });
203
+ }
204
+ i = j - 1;
205
+ }
206
+ }
207
+
208
+ // Build tools
209
+ const tools = context.tools?.map((t: any) => ({
210
+ name: t.name,
211
+ description: t.description,
212
+ input_schema: {
213
+ type: "object" as const,
214
+ properties: t.parameters?.properties ?? {},
215
+ required: t.parameters?.required ?? [],
216
+ },
217
+ }));
218
+
219
+ const params: any = {
220
+ model: model.apiId,
221
+ max_tokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
222
+ messages,
223
+ ...(context.systemPrompt ? { system: context.systemPrompt } : {}),
224
+ ...(tools && tools.length > 0 ? { tools } : {}),
225
+ ...(options?.temperature !== undefined && !options?.reasoning ? { temperature: options.temperature } : {}),
226
+ };
227
+
228
+ // Thinking
229
+ if (model.reasoning && options?.reasoning) {
230
+ const effort = mapAnthropicEffort(options.reasoning);
231
+ if (effort) {
232
+ params.thinking = { type: "adaptive" };
233
+ params.output_config = { effort };
234
+ }
235
+ }
236
+
237
+ const output: any = {
238
+ role: "assistant",
239
+ content: [],
240
+ api: "anthropic-messages",
241
+ provider: "vertex",
242
+ model: model.id,
243
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
244
+ stopReason: "stop",
245
+ timestamp: Date.now(),
246
+ };
247
+
248
+ stream.push({ type: "start", partial: output });
249
+
250
+ const anthropicStream = client.messages.stream(params, { signal: options?.signal });
251
+
252
+ for await (const event of anthropicStream) {
253
+ if (event.type === "message_start") {
254
+ output.responseId = event.message.id;
255
+ output.usage.input = event.message.usage.input_tokens || 0;
256
+ output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
257
+ output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
258
+
259
+ } else if (event.type === "content_block_start") {
260
+ const cb = event.content_block;
261
+ if (cb.type === "text") {
262
+ output.content.push({ type: "text", text: "", index: event.index });
263
+ stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
264
+ } else if (cb.type === "thinking") {
265
+ output.content.push({ type: "thinking", thinking: "", thinkingSignature: "", index: event.index });
266
+ stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
267
+ } else if (cb.type === "tool_use") {
268
+ output.content.push({ type: "toolCall", id: cb.id, name: cb.name, arguments: {}, partialArgs: "", index: event.index });
269
+ stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
270
+ }
271
+
272
+ } else if (event.type === "content_block_delta") {
273
+ const idx = output.content.findIndex((b: any) => b.index === event.index);
274
+ const block = output.content[idx];
275
+ if (!block) continue;
276
+
277
+ const delta = event.delta;
278
+ if (delta.type === "text_delta" && block.type === "text") {
279
+ block.text += delta.text;
280
+ stream.push({ type: "text_delta", contentIndex: idx, delta: delta.text, partial: output });
281
+ } else if (delta.type === "thinking_delta" && block.type === "thinking") {
282
+ block.thinking += delta.thinking;
283
+ stream.push({ type: "thinking_delta", contentIndex: idx, delta: delta.thinking, partial: output });
284
+ } else if (delta.type === "signature_delta" && block.type === "thinking") {
285
+ block.thinkingSignature = (block.thinkingSignature || "") + delta.signature;
286
+ } else if (delta.type === "input_json_delta" && block.type === "toolCall") {
287
+ block.partialArgs += delta.partial_json;
288
+ stream.push({ type: "toolcall_delta", contentIndex: idx, delta: delta.partial_json, partial: output });
289
+ }
290
+
291
+ } else if (event.type === "content_block_stop") {
292
+ const idx = output.content.findIndex((b: any) => b.index === event.index);
293
+ const block = output.content[idx];
294
+ if (!block) continue;
295
+ delete block.index;
296
+
297
+ if (block.type === "text") {
298
+ stream.push({ type: "text_end", contentIndex: idx, content: block.text, partial: output });
299
+ } else if (block.type === "thinking") {
300
+ stream.push({ type: "thinking_end", contentIndex: idx, content: block.thinking, partial: output });
301
+ } else if (block.type === "toolCall") {
302
+ try { block.arguments = JSON.parse(block.partialArgs); } catch { block.arguments = {}; }
303
+ delete block.partialArgs;
304
+ stream.push({ type: "toolcall_end", contentIndex: idx, toolCall: block, partial: output });
305
+ }
306
+
307
+ } else if (event.type === "message_delta") {
308
+ if (event.delta.stop_reason) {
309
+ const r = event.delta.stop_reason;
310
+ output.stopReason = r === "end_turn" ? "stop" : r === "max_tokens" ? "length" : r === "tool_use" ? "toolUse" : "stop";
311
+ }
312
+ if (event.usage?.output_tokens != null) output.usage.output = event.usage.output_tokens;
313
+ }
314
+ }
315
+
316
+ output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
317
+ calculateCost(model as any, output.usage);
318
+
319
+ if (output.content.some((b: any) => b.type === "toolCall")) {
320
+ output.stopReason = "toolUse";
321
+ }
322
+
323
+ stream.push({ type: "done", reason: output.stopReason, message: output });
324
+ }
13
325
 
14
326
  export function streamMaaS(
15
327
  model: VertexModelConfig,
@@ -19,20 +331,24 @@ export function streamMaaS(
19
331
  const stream = createAssistantMessageEventStream();
20
332
 
21
333
  (async () => {
334
+ const apiModelId = model.apiId.includes("/") ? model.apiId : `${model.publisher}/${model.apiId}`;
335
+
22
336
  try {
23
- // Priority: config file > env var > model region > default
337
+ if (model.publisher === "anthropic") {
338
+ await streamAnthropic(model, context, options, stream);
339
+ stream.end();
340
+ return;
341
+ }
342
+
343
+ // Non-Anthropic MaaS models: Vertex OpenAI-compatible endpoint.
24
344
  const location = resolveLocation(model.region);
25
345
  const auth = getAuthConfig(location);
26
346
  const accessToken = await getAccessToken();
27
-
28
347
  const baseUrl = buildBaseUrl(auth.projectId, auth.location);
29
348
  const endpoint = `${baseUrl}/endpoints/openapi`;
30
349
 
31
- // Create a model object compatible with pi-ai's OpenAI streaming.
32
- // Use model.apiId directly so the correct model name goes in the request body.
33
- // The friendly model.id is patched back into response events below for session persistence.
34
350
  const modelForPi: Model<"openai-completions"> = {
35
- id: model.apiId,
351
+ id: apiModelId,
36
352
  name: model.name,
37
353
  api: "openai-completions",
38
354
  provider: "vertex",
@@ -52,53 +368,41 @@ export function streamMaaS(
52
368
  },
53
369
  };
54
370
 
55
- // Delegate to pi-ai's built-in OpenAI streaming
56
- const innerStream = streamSimpleOpenAICompletions(
57
- modelForPi,
58
- context as any,
59
- {
60
- ...options,
61
- apiKey: accessToken,
62
- maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
63
- temperature: options?.temperature,
64
- },
65
- );
371
+ const innerStream = streamSimpleOpenAICompletions(modelForPi, context as any, {
372
+ ...options,
373
+ apiKey: accessToken,
374
+ maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
375
+ temperature: options?.temperature,
376
+ });
66
377
 
67
- // Forward all events, patching model ID back to the friendly name
68
- // so pi-coding-agent can restore sessions correctly.
69
378
  for await (const event of innerStream) {
70
- if ("partial" in event && event.partial) {
71
- event.partial.model = model.id;
72
- }
73
- if ("message" in event && event.message) {
74
- event.message.model = model.id;
75
- }
379
+ if ("partial" in event && event.partial) event.partial.model = model.id;
380
+ if ("message" in event && event.message) event.message.model = model.id;
76
381
  if ("error" in event && event.error && typeof event.error === "object") {
77
- (event.error as any).model = model.id;
382
+ const err = event.error as any;
383
+ err.model = model.id;
384
+ if (typeof err.errorMessage === "string" && /^400\s*(status code)?\s*\(no body\)/i.test(err.errorMessage)) {
385
+ err.errorMessage = `Vertex MaaS HTTP 400 (no body) for model "${apiModelId}". Not automatically treated as context overflow.`;
386
+ }
78
387
  }
79
388
  stream.push(event);
80
389
  }
81
390
  stream.end();
391
+
82
392
  } catch (error) {
393
+ const rawMessage = error instanceof Error ? error.message : String(error);
83
394
  stream.push({
84
395
  type: "error",
85
396
  reason: options?.signal?.aborted ? "aborted" : "error",
86
397
  error: {
87
398
  role: "assistant",
88
399
  content: [],
89
- api: "openai-completions",
400
+ api: model.publisher === "anthropic" ? "anthropic-messages" : "openai-completions",
90
401
  provider: "vertex",
91
402
  model: model.id,
92
- usage: {
93
- input: 0,
94
- output: 0,
95
- cacheRead: 0,
96
- cacheWrite: 0,
97
- totalTokens: 0,
98
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
99
- },
403
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
100
404
  stopReason: options?.signal?.aborted ? "aborted" : "error",
101
- errorMessage: error instanceof Error ? error.message : String(error),
405
+ errorMessage: rawMessage,
102
406
  timestamp: Date.now(),
103
407
  },
104
408
  });