@ssweens/pi-vertex 1.0.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/streaming/maas.ts CHANGED
@@ -1,37 +1,354 @@
1
1
  /**
2
- * MaaS streaming handler for Claude and all other models
3
- * Uses OpenAI-compatible Chat Completions endpoint
4
- *
5
- * Delegates to pi-ai's built-in OpenAI streaming implementation
2
+ * MaaS streaming handler for Claude and all other models.
3
+ *
4
+ * - Anthropic models: native AnthropicVertex SDK streaming
5
+ * - Other MaaS models: Vertex OpenAI-compatible Chat Completions endpoint
6
6
  */
7
7
 
8
8
  import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
9
9
  import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
10
- import { createAssistantMessageEventStream, type AssistantMessageEventStream, type Model, streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
10
+ import {
11
+ createAssistantMessageEventStream,
12
+ type AssistantMessageEventStream,
13
+ type Model,
14
+ streamSimpleOpenAICompletions,
15
+ calculateCost,
16
+ } from "@mariozechner/pi-ai";
17
+ import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
18
+
19
+ function mapAnthropicEffort(reasoning?: string): "low" | "medium" | "high" | "max" | undefined {
20
+ if (!reasoning) return undefined;
21
+ if (reasoning === "minimal" || reasoning === "low") return "low";
22
+ if (reasoning === "medium") return "medium";
23
+ if (reasoning === "xhigh") return "max";
24
+ return "high";
25
+ }
26
+
27
+ /**
28
+ * Sanitize an ID to match Anthropic's pattern: ^[a-zA-Z0-9_-]+$
29
+ * Replaces invalid characters with underscores.
30
+ */
31
+ function sanitizeToolId(id: string): string {
32
+ // Replace any character that's not alphanumeric, underscore, or hyphen.
33
+ const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
34
+ // Deterministic fallback for empty/invalid IDs.
35
+ return sanitized || "tool_id";
36
+ }
37
+
38
+ function isValidThinkingSignature(signature?: string): boolean {
39
+ if (!signature) return false;
40
+ // Anthropic signatures are base64-like encrypted payloads.
41
+ return /^[A-Za-z0-9+/]+={0,2}$/.test(signature) && signature.length % 4 === 0;
42
+ }
43
+
44
+ /**
45
+ * Stream a Claude model via the native AnthropicVertex SDK.
46
+ */
47
+ async function streamAnthropic(
48
+ model: VertexModelConfig,
49
+ context: Context,
50
+ options: StreamOptions | undefined,
51
+ stream: ReturnType<typeof createAssistantMessageEventStream>,
52
+ ): Promise<void> {
53
+ const location = resolveLocation(model.region);
54
+ const auth = getAuthConfig(location);
55
+
56
+ const client = new AnthropicVertex({
57
+ projectId: auth.projectId,
58
+ region: auth.location,
59
+ });
60
+
61
+ // Build messages with Anthropic-compatible tool-use/tool-result sequencing.
62
+ const sourceMessages = (context.messages as any[]) ?? [];
63
+
64
+ // Pass 1: normalize tool call IDs and propagate mapping to tool results.
65
+ const normalized: any[] = [];
66
+ const toolIdMap = new Map<string, string>();
67
+ for (const msg of sourceMessages) {
68
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
69
+ const content = msg.content.map((block: any) => {
70
+ if (block?.type !== "toolCall") return block;
71
+ const normalizedId = sanitizeToolId(String(block.id ?? ""));
72
+ if (block.id && normalizedId !== block.id) toolIdMap.set(block.id, normalizedId);
73
+ return { ...block, id: normalizedId };
74
+ });
75
+ normalized.push({ ...msg, content });
76
+ } else if (msg.role === "toolResult") {
77
+ const mapped = toolIdMap.get(msg.toolCallId);
78
+ normalized.push({ ...msg, toolCallId: sanitizeToolId(String(mapped ?? msg.toolCallId ?? "")) });
79
+ } else {
80
+ normalized.push(msg);
81
+ }
82
+ }
83
+
84
+ // Pass 2: enforce Anthropic adjacency rule:
85
+ // assistant(tool_use...) MUST be immediately followed by user(tool_result...)
86
+ const replayable: any[] = [];
87
+ for (let i = 0; i < normalized.length; i++) {
88
+ const msg = normalized[i];
89
+
90
+ if (msg.role === "assistant") {
91
+ if (msg.stopReason === "error" || msg.stopReason === "aborted") continue;
92
+
93
+ const toolCalls = Array.isArray(msg.content)
94
+ ? msg.content.filter((b: any) => b?.type === "toolCall" && b?.id && b?.name)
95
+ : [];
96
+
97
+ replayable.push(msg);
98
+
99
+ if (toolCalls.length > 0) {
100
+ const collectedToolResults: any[] = [];
101
+ let j = i + 1;
102
+ while (j < normalized.length && normalized[j]?.role === "toolResult") {
103
+ collectedToolResults.push(normalized[j]);
104
+ j++;
105
+ }
106
+
107
+ const existingIds = new Set(collectedToolResults.map((tr: any) => tr.toolCallId));
108
+ for (const tc of toolCalls) {
109
+ if (!existingIds.has(tc.id)) {
110
+ collectedToolResults.push({
111
+ role: "toolResult",
112
+ toolCallId: tc.id,
113
+ toolName: tc.name,
114
+ content: [{ type: "text", text: "No result provided" }],
115
+ isError: true,
116
+ timestamp: Date.now(),
117
+ });
118
+ }
119
+ }
120
+
121
+ replayable.push(...collectedToolResults);
122
+ i = j - 1;
123
+ }
124
+ continue;
125
+ }
126
+
127
+ // Drop orphan tool results (invalid for Anthropic if not immediately after tool_use assistant msg).
128
+ if (msg.role === "toolResult") continue;
129
+
130
+ replayable.push(msg);
131
+ }
132
+
133
+ // Final pass: convert replayable internal messages to Anthropic message blocks.
134
+ const messages: Array<{ role: "user" | "assistant"; content: any }> = [];
135
+ for (let i = 0; i < replayable.length; i++) {
136
+ const msg = replayable[i];
137
+
138
+ if (msg.role === "user") {
139
+ if (typeof msg.content === "string") {
140
+ messages.push({ role: "user", content: [{ type: "text", text: msg.content }] });
141
+ } else if (Array.isArray(msg.content)) {
142
+ const blocks = msg.content
143
+ .map((c: any) => {
144
+ if (c.type === "text") return { type: "text", text: c.text };
145
+ if (c.type === "image") {
146
+ return { type: "image", source: { type: "base64", media_type: c.mimeType, data: c.data } };
147
+ }
148
+ return null;
149
+ })
150
+ .filter(Boolean);
151
+ if (blocks.length > 0) messages.push({ role: "user", content: blocks });
152
+ }
153
+ continue;
154
+ }
155
+
156
+ if (msg.role === "assistant") {
157
+ const blocks: any[] = [];
158
+ const isSameModel = msg.provider === "vertex" && msg.api === "anthropic-messages" && msg.model === model.id;
159
+
160
+ if (Array.isArray(msg.content)) {
161
+ for (const block of msg.content) {
162
+ if (block.type === "text" && block.text?.trim()) {
163
+ blocks.push({ type: "text", text: block.text });
164
+ } else if (block.type === "toolCall") {
165
+ blocks.push({ type: "tool_use", id: sanitizeToolId(String(block.id ?? "")), name: block.name, input: block.arguments ?? {} });
166
+ } else if (block.type === "thinking" && block.thinking?.trim()) {
167
+ if (isSameModel && isValidThinkingSignature(block.thinkingSignature)) {
168
+ blocks.push({ type: "thinking", thinking: block.thinking, signature: block.thinkingSignature });
169
+ } else {
170
+ // Cross-model/provider replay: convert thinking to plain text to avoid signature errors.
171
+ blocks.push({ type: "text", text: block.thinking });
172
+ }
173
+ }
174
+ }
175
+ }
176
+ if (blocks.length > 0) messages.push({ role: "assistant", content: blocks });
177
+ continue;
178
+ }
179
+
180
+ if (msg.role === "toolResult") {
181
+ // Group consecutive tool results into one user message (Anthropic expects this shape).
182
+ const toolResultBlocks: any[] = [];
183
+ let j = i;
184
+ while (j < replayable.length && replayable[j]?.role === "toolResult") {
185
+ const tr = replayable[j];
186
+ const text = typeof tr.content === "string"
187
+ ? tr.content
188
+ : Array.isArray(tr.content)
189
+ ? tr.content.filter((c: any) => c?.type === "text").map((c: any) => c.text).join("\n")
190
+ : JSON.stringify(tr.content ?? "");
191
+
192
+ toolResultBlocks.push({
193
+ type: "tool_result",
194
+ tool_use_id: sanitizeToolId(String(tr.toolCallId ?? "")),
195
+ content: text || "",
196
+ ...(tr.isError ? { is_error: true } : {}),
197
+ });
198
+ j++;
199
+ }
200
+
201
+ if (toolResultBlocks.length > 0) {
202
+ messages.push({ role: "user", content: toolResultBlocks });
203
+ }
204
+ i = j - 1;
205
+ }
206
+ }
207
+
208
+ // Build tools
209
+ const tools = context.tools?.map((t: any) => ({
210
+ name: t.name,
211
+ description: t.description,
212
+ input_schema: {
213
+ type: "object" as const,
214
+ properties: t.parameters?.properties ?? {},
215
+ required: t.parameters?.required ?? [],
216
+ },
217
+ }));
218
+
219
+ const params: any = {
220
+ model: model.apiId,
221
+ max_tokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
222
+ messages,
223
+ ...(context.systemPrompt ? { system: context.systemPrompt } : {}),
224
+ ...(tools && tools.length > 0 ? { tools } : {}),
225
+ ...(options?.temperature !== undefined && !options?.reasoning ? { temperature: options.temperature } : {}),
226
+ };
227
+
228
+ // Thinking
229
+ if (model.reasoning && options?.reasoning) {
230
+ const effort = mapAnthropicEffort(options.reasoning);
231
+ if (effort) {
232
+ params.thinking = { type: "adaptive" };
233
+ params.output_config = { effort };
234
+ }
235
+ }
236
+
237
+ const output: any = {
238
+ role: "assistant",
239
+ content: [],
240
+ api: "anthropic-messages",
241
+ provider: "vertex",
242
+ model: model.id,
243
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
244
+ stopReason: "stop",
245
+ timestamp: Date.now(),
246
+ };
247
+
248
+ stream.push({ type: "start", partial: output });
249
+
250
+ const anthropicStream = client.messages.stream(params, { signal: options?.signal });
251
+
252
+ for await (const event of anthropicStream) {
253
+ if (event.type === "message_start") {
254
+ output.responseId = event.message.id;
255
+ output.usage.input = event.message.usage.input_tokens || 0;
256
+ output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
257
+ output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
258
+
259
+ } else if (event.type === "content_block_start") {
260
+ const cb = event.content_block;
261
+ if (cb.type === "text") {
262
+ output.content.push({ type: "text", text: "", index: event.index });
263
+ stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
264
+ } else if (cb.type === "thinking") {
265
+ output.content.push({ type: "thinking", thinking: "", thinkingSignature: "", index: event.index });
266
+ stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
267
+ } else if (cb.type === "tool_use") {
268
+ output.content.push({ type: "toolCall", id: cb.id, name: cb.name, arguments: {}, partialArgs: "", index: event.index });
269
+ stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
270
+ }
271
+
272
+ } else if (event.type === "content_block_delta") {
273
+ const idx = output.content.findIndex((b: any) => b.index === event.index);
274
+ const block = output.content[idx];
275
+ if (!block) continue;
276
+
277
+ const delta = event.delta;
278
+ if (delta.type === "text_delta" && block.type === "text") {
279
+ block.text += delta.text;
280
+ stream.push({ type: "text_delta", contentIndex: idx, delta: delta.text, partial: output });
281
+ } else if (delta.type === "thinking_delta" && block.type === "thinking") {
282
+ block.thinking += delta.thinking;
283
+ stream.push({ type: "thinking_delta", contentIndex: idx, delta: delta.thinking, partial: output });
284
+ } else if (delta.type === "signature_delta" && block.type === "thinking") {
285
+ block.thinkingSignature = (block.thinkingSignature || "") + delta.signature;
286
+ } else if (delta.type === "input_json_delta" && block.type === "toolCall") {
287
+ block.partialArgs += delta.partial_json;
288
+ stream.push({ type: "toolcall_delta", contentIndex: idx, delta: delta.partial_json, partial: output });
289
+ }
290
+
291
+ } else if (event.type === "content_block_stop") {
292
+ const idx = output.content.findIndex((b: any) => b.index === event.index);
293
+ const block = output.content[idx];
294
+ if (!block) continue;
295
+ delete block.index;
296
+
297
+ if (block.type === "text") {
298
+ stream.push({ type: "text_end", contentIndex: idx, content: block.text, partial: output });
299
+ } else if (block.type === "thinking") {
300
+ stream.push({ type: "thinking_end", contentIndex: idx, content: block.thinking, partial: output });
301
+ } else if (block.type === "toolCall") {
302
+ try { block.arguments = JSON.parse(block.partialArgs); } catch { block.arguments = {}; }
303
+ delete block.partialArgs;
304
+ stream.push({ type: "toolcall_end", contentIndex: idx, toolCall: block, partial: output });
305
+ }
306
+
307
+ } else if (event.type === "message_delta") {
308
+ if (event.delta.stop_reason) {
309
+ const r = event.delta.stop_reason;
310
+ output.stopReason = r === "end_turn" ? "stop" : r === "max_tokens" ? "length" : r === "tool_use" ? "toolUse" : "stop";
311
+ }
312
+ if (event.usage?.output_tokens != null) output.usage.output = event.usage.output_tokens;
313
+ }
314
+ }
315
+
316
+ output.usage.totalTokens = output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
317
+ calculateCost(model as any, output.usage);
318
+
319
+ if (output.content.some((b: any) => b.type === "toolCall")) {
320
+ output.stopReason = "toolUse";
321
+ }
322
+
323
+ stream.push({ type: "done", reason: output.stopReason, message: output });
324
+ }
11
325
 
12
326
  export function streamMaaS(
13
327
  model: VertexModelConfig,
14
328
  context: Context,
15
- options?: StreamOptions
329
+ options?: StreamOptions,
16
330
  ): AssistantMessageEventStream {
17
331
  const stream = createAssistantMessageEventStream();
18
332
 
19
333
  (async () => {
20
- const originalFetch = globalThis.fetch;
334
+ const apiModelId = model.apiId.includes("/") ? model.apiId : `${model.publisher}/${model.apiId}`;
335
+
21
336
  try {
22
- // Priority: config file > env var > model region > default
337
+ if (model.publisher === "anthropic") {
338
+ await streamAnthropic(model, context, options, stream);
339
+ stream.end();
340
+ return;
341
+ }
342
+
343
+ // Non-Anthropic MaaS models: Vertex OpenAI-compatible endpoint.
23
344
  const location = resolveLocation(model.region);
24
345
  const auth = getAuthConfig(location);
25
346
  const accessToken = await getAccessToken();
26
-
27
347
  const baseUrl = buildBaseUrl(auth.projectId, auth.location);
28
348
  const endpoint = `${baseUrl}/endpoints/openapi`;
29
- // Create a model object compatible with pi-ai's OpenAI streaming.
30
- // Note: baseUrl must point to the OpenAPI root; pi-ai appends /chat/completions.
31
- // Use model.id (registered name like "glm-5") so pi can restore sessions correctly.
32
- // The actual API model name (apiId like "zai-org/glm-5-maas") is injected via fetch interceptor below.
349
+
33
350
  const modelForPi: Model<"openai-completions"> = {
34
- id: model.id,
351
+ id: apiModelId,
35
352
  name: model.name,
36
353
  api: "openai-completions",
37
354
  provider: "vertex",
@@ -51,61 +368,41 @@ export function streamMaaS(
51
368
  },
52
369
  };
53
370
 
54
- // Intercept fetch to replace model.id with the actual API model name (apiId)
55
- // pi-ai's streaming uses model.id in the request body, but Vertex MaaS needs the full publisher-prefixed name
56
- globalThis.fetch = async (input: any, init?: any) => {
57
- if (init?.body && typeof init.body === "string") {
58
- try {
59
- const body = JSON.parse(init.body);
60
- if (body.model === model.id) {
61
- body.model = model.apiId;
62
- init = { ...init, body: JSON.stringify(body) };
63
- }
64
- } catch {}
65
- }
66
- return originalFetch(input, init);
67
- };
68
-
69
- // Delegate to pi-ai's built-in OpenAI streaming
70
- const innerStream = streamSimpleOpenAICompletions(
71
- modelForPi,
72
- context as any,
73
- {
74
- ...options,
75
- apiKey: accessToken,
76
- maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
77
- temperature: options?.temperature ?? 0.7,
78
- }
79
- );
371
+ const innerStream = streamSimpleOpenAICompletions(modelForPi, context as any, {
372
+ ...options,
373
+ apiKey: accessToken,
374
+ maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
375
+ temperature: options?.temperature,
376
+ });
80
377
 
81
- // Forward all events from inner stream to outer stream
82
378
  for await (const event of innerStream) {
379
+ if ("partial" in event && event.partial) event.partial.model = model.id;
380
+ if ("message" in event && event.message) event.message.model = model.id;
381
+ if ("error" in event && event.error && typeof event.error === "object") {
382
+ const err = event.error as any;
383
+ err.model = model.id;
384
+ if (typeof err.errorMessage === "string" && /^400\s*(status code)?\s*\(no body\)/i.test(err.errorMessage)) {
385
+ err.errorMessage = `Vertex MaaS HTTP 400 (no body) for model "${apiModelId}". Not automatically treated as context overflow.`;
386
+ }
387
+ }
83
388
  stream.push(event);
84
389
  }
85
- globalThis.fetch = originalFetch;
86
390
  stream.end();
87
391
 
88
392
  } catch (error) {
89
- globalThis.fetch = originalFetch;
393
+ const rawMessage = error instanceof Error ? error.message : String(error);
90
394
  stream.push({
91
395
  type: "error",
92
396
  reason: options?.signal?.aborted ? "aborted" : "error",
93
397
  error: {
94
398
  role: "assistant",
95
399
  content: [],
96
- api: "openai-completions",
400
+ api: model.publisher === "anthropic" ? "anthropic-messages" : "openai-completions",
97
401
  provider: "vertex",
98
402
  model: model.id,
99
- usage: {
100
- input: 0,
101
- output: 0,
102
- cacheRead: 0,
103
- cacheWrite: 0,
104
- totalTokens: 0,
105
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
106
- },
403
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
107
404
  stopReason: options?.signal?.aborted ? "aborted" : "error",
108
- errorMessage: error instanceof Error ? error.message : String(error),
405
+ errorMessage: rawMessage,
109
406
  timestamp: Date.now(),
110
407
  },
111
408
  });
package/types.ts CHANGED
@@ -1,7 +1,31 @@
1
1
  /**
2
2
  * Type definitions for pi-vertex extension
3
+ *
4
+ * Core message/content types are re-exported from pi-ai to ensure pi-vertex
5
+ * handles the full message structure (thinking blocks, tool calls, tool results)
6
+ * that pi-coding-agent passes through the streamSimple callback.
3
7
  */
4
8
 
9
+ // Re-export core types from pi-ai
10
+ export type {
11
+ AssistantMessage,
12
+ AssistantMessageEvent,
13
+ AssistantMessageEventStream,
14
+ Context,
15
+ ImageContent,
16
+ Message,
17
+ StopReason,
18
+ TextContent,
19
+ ThinkingContent,
20
+ Tool,
21
+ ToolCall,
22
+ ToolResultMessage,
23
+ Usage,
24
+ UserMessage,
25
+ } from "@mariozechner/pi-ai";
26
+
27
+ // Vertex-specific types
28
+
5
29
  export type ModelInputType = "text" | "image";
6
30
  export type EndpointType = "gemini" | "maas";
7
31
 
@@ -33,44 +57,9 @@ export interface AuthConfig {
33
57
  credentials?: string;
34
58
  }
35
59
 
36
- export type MessageRole = "user" | "assistant" | "system";
37
-
38
- export interface TextContent {
39
- type: "text";
40
- text: string;
41
- }
42
-
43
- export interface ImageContent {
44
- type: "image";
45
- mimeType: string;
46
- data: string;
47
- }
48
-
49
- export type MessageContent = TextContent | ImageContent;
50
-
51
- export interface Message {
52
- role: MessageRole;
53
- content: string | MessageContent[];
54
- }
55
-
56
- export interface Tool {
57
- name: string;
58
- description: string;
59
- parameters: Record<string, unknown>;
60
- }
61
-
62
- export interface Context {
63
- systemPrompt?: string;
64
- messages: Message[];
65
- tools?: Tool[];
66
- }
67
-
68
60
  export interface StreamOptions {
69
61
  maxTokens?: number;
70
62
  temperature?: number;
71
63
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
72
64
  signal?: AbortSignal;
73
65
  }
74
-
75
- // Re-export types from pi-ai for convenience
76
- export type { AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream } from "@mariozechner/pi-ai";