@ssweens/pi-vertex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ /**
2
+ * MaaS streaming handler for Claude and all other models
3
+ * Uses OpenAI-compatible Chat Completions endpoint
4
+ *
5
+ * Delegates to pi-ai's built-in OpenAI streaming implementation
6
+ */
7
+
8
+ import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
9
+ import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
10
+ import { createAssistantMessageEventStream, type AssistantMessageEventStream, type Model, streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
11
+
12
+ export function streamMaaS(
13
+ model: VertexModelConfig,
14
+ context: Context,
15
+ options?: StreamOptions
16
+ ): AssistantMessageEventStream {
17
+ const stream = createAssistantMessageEventStream();
18
+
19
+ (async () => {
20
+ try {
21
+ // Priority: config file > env var > model region > default
22
+ const location = resolveLocation(model.region);
23
+ const auth = getAuthConfig(location);
24
+ const accessToken = await getAccessToken();
25
+
26
+ const baseUrl = buildBaseUrl(auth.projectId, auth.location);
27
+ const endpoint = `${baseUrl}/endpoints/openapi`;
28
+ // Create a model object compatible with pi-ai's OpenAI streaming.
29
+ // Note: baseUrl must point to the OpenAPI root; pi-ai appends /chat/completions.
30
+ const modelForPi: Model<"openai-completions"> = {
31
+ id: model.apiId, // Use the full API ID with publisher prefix
32
+ name: model.name,
33
+ api: "openai-completions",
34
+ provider: "vertex",
35
+ baseUrl: endpoint,
36
+ reasoning: model.reasoning,
37
+ input: model.input,
38
+ cost: model.cost,
39
+ contextWindow: model.contextWindow,
40
+ maxTokens: model.maxTokens,
41
+ headers: {},
42
+ compat: {
43
+ supportsStore: false,
44
+ supportsDeveloperRole: false,
45
+ supportsReasoningEffort: false,
46
+ maxTokensField: "max_tokens",
47
+ thinkingFormat: model.publisher === "qwen" ? "qwen" : "openai",
48
+ },
49
+ };
50
+
51
+ // Delegate to pi-ai's built-in OpenAI streaming
52
+ const innerStream = streamSimpleOpenAICompletions(
53
+ modelForPi,
54
+ context as any,
55
+ {
56
+ ...options,
57
+ apiKey: accessToken,
58
+ maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
59
+ temperature: options?.temperature ?? 0.7,
60
+ }
61
+ );
62
+
63
+ // Forward all events from inner stream to outer stream
64
+ for await (const event of innerStream) {
65
+ stream.push(event);
66
+ }
67
+ stream.end();
68
+
69
+ } catch (error) {
70
+ stream.push({
71
+ type: "error",
72
+ reason: options?.signal?.aborted ? "aborted" : "error",
73
+ error: {
74
+ role: "assistant",
75
+ content: [],
76
+ api: "openai-completions",
77
+ provider: "vertex",
78
+ model: model.id,
79
+ usage: {
80
+ input: 0,
81
+ output: 0,
82
+ cacheRead: 0,
83
+ cacheWrite: 0,
84
+ totalTokens: 0,
85
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
86
+ },
87
+ stopReason: options?.signal?.aborted ? "aborted" : "error",
88
+ errorMessage: error instanceof Error ? error.message : String(error),
89
+ timestamp: Date.now(),
90
+ },
91
+ });
92
+ stream.end();
93
+ }
94
+ })();
95
+
96
+ return stream;
97
+ }
package/types.ts ADDED
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Type definitions for pi-vertex extension
3
+ */
4
+
5
+ export type ModelInputType = "text" | "image";
6
+ export type EndpointType = "gemini" | "maas";
7
+
8
+ export interface ModelCost {
9
+ input: number;
10
+ output: number;
11
+ cacheRead: number;
12
+ cacheWrite: number;
13
+ }
14
+
15
+ export interface VertexModelConfig {
16
+ id: string;
17
+ name: string;
18
+ apiId: string;
19
+ publisher: string;
20
+ endpointType: EndpointType;
21
+ contextWindow: number;
22
+ maxTokens: number;
23
+ input: ModelInputType[];
24
+ reasoning: boolean;
25
+ tools: boolean;
26
+ cost: ModelCost;
27
+ region: string;
28
+ }
29
+
30
+ export interface AuthConfig {
31
+ projectId: string;
32
+ location: string;
33
+ credentials?: string;
34
+ }
35
+
36
+ export type MessageRole = "user" | "assistant" | "system";
37
+
38
+ export interface TextContent {
39
+ type: "text";
40
+ text: string;
41
+ }
42
+
43
+ export interface ImageContent {
44
+ type: "image";
45
+ mimeType: string;
46
+ data: string;
47
+ }
48
+
49
+ export type MessageContent = TextContent | ImageContent;
50
+
51
+ export interface Message {
52
+ role: MessageRole;
53
+ content: string | MessageContent[];
54
+ }
55
+
56
+ export interface Tool {
57
+ name: string;
58
+ description: string;
59
+ parameters: Record<string, unknown>;
60
+ }
61
+
62
+ export interface Context {
63
+ systemPrompt?: string;
64
+ messages: Message[];
65
+ tools?: Tool[];
66
+ }
67
+
68
+ export interface StreamOptions {
69
+ maxTokens?: number;
70
+ temperature?: number;
71
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
72
+ signal?: AbortSignal;
73
+ }
74
+
75
+ // Re-export types from pi-ai for convenience
76
+ export type { AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream } from "@mariozechner/pi-ai";
package/utils.ts ADDED
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Utility functions for pi-vertex extension
3
+ */
4
+
5
+ import type { Message, MessageContent, TextContent, ToolCall, AssistantMessage } from "./types.js";
6
+
7
+ /**
8
+ * Sanitize text by removing invalid surrogate pairs
9
+ */
10
+ export function sanitizeText(text: string): string {
11
+ return text.replace(/[\uD800-\uDFFF]/g, "\uFFFD");
12
+ }
13
+
14
+ /**
15
+ * Convert messages to Gemini format
16
+ */
17
+ export function convertToGeminiMessages(messages: Message[]): any[] {
18
+ const result: any[] = [];
19
+
20
+ for (const msg of messages) {
21
+ if (msg.role === "user") {
22
+ if (typeof msg.content === "string") {
23
+ if (msg.content.trim()) {
24
+ result.push({
25
+ role: "user",
26
+ parts: [{ text: sanitizeText(msg.content) }],
27
+ });
28
+ }
29
+ } else {
30
+ const parts = msg.content.map((item) => {
31
+ if (item.type === "text") {
32
+ return { text: sanitizeText(item.text) };
33
+ } else {
34
+ return {
35
+ inlineData: {
36
+ mimeType: item.mimeType,
37
+ data: item.data,
38
+ },
39
+ };
40
+ }
41
+ });
42
+ result.push({ role: "user", parts });
43
+ }
44
+ } else if (msg.role === "assistant") {
45
+ // Gemini doesn't have a separate assistant role in the same way
46
+ // We'll handle this in the conversation history
47
+ if (typeof msg.content === "string") {
48
+ if (msg.content.trim()) {
49
+ result.push({
50
+ role: "model",
51
+ parts: [{ text: sanitizeText(msg.content) }],
52
+ });
53
+ }
54
+ }
55
+ }
56
+ }
57
+
58
+ return result;
59
+ }
60
+
61
+ /**
62
+ * Convert messages to OpenAI-compatible format (for Claude and MaaS)
63
+ */
64
+ export function convertToOpenAIMessages(messages: Message[]): any[] {
65
+ const result: any[] = [];
66
+
67
+ for (const msg of messages) {
68
+ if (msg.role === "user") {
69
+ if (typeof msg.content === "string") {
70
+ if (msg.content.trim()) {
71
+ result.push({
72
+ role: "user",
73
+ content: sanitizeText(msg.content),
74
+ });
75
+ }
76
+ } else {
77
+ const content = msg.content.map((item) => {
78
+ if (item.type === "text") {
79
+ return { type: "text", text: sanitizeText(item.text) };
80
+ } else {
81
+ return {
82
+ type: "image_url",
83
+ image_url: {
84
+ url: `data:${item.mimeType};base64,${item.data}`,
85
+ },
86
+ };
87
+ }
88
+ });
89
+ result.push({ role: "user", content });
90
+ }
91
+ } else if (msg.role === "assistant") {
92
+ if (typeof msg.content === "string") {
93
+ if (msg.content.trim()) {
94
+ result.push({
95
+ role: "assistant",
96
+ content: sanitizeText(msg.content),
97
+ });
98
+ }
99
+ }
100
+ } else if (msg.role === "system") {
101
+ // System messages handled separately
102
+ }
103
+ }
104
+
105
+ return result;
106
+ }
107
+
108
+ /**
109
+ * Convert tools to OpenAI format
110
+ */
111
+ export function convertTools(tools: any[]): any[] {
112
+ return tools.map((tool) => ({
113
+ type: "function",
114
+ function: {
115
+ name: tool.name,
116
+ description: tool.description,
117
+ parameters: tool.parameters,
118
+ },
119
+ }));
120
+ }
121
+
122
+ /**
123
+ * Parse SSE (Server-Sent Events) stream
124
+ */
125
+ export async function* parseSSEStream(response: Response): AsyncGenerator<string> {
126
+ const reader = response.body?.getReader();
127
+ if (!reader) {
128
+ throw new Error("No response body");
129
+ }
130
+
131
+ const decoder = new TextDecoder();
132
+ let buffer = "";
133
+
134
+ try {
135
+ while (true) {
136
+ const { done, value } = await reader.read();
137
+ if (done) break;
138
+
139
+ buffer += decoder.decode(value, { stream: true });
140
+ const lines = buffer.split("\n");
141
+ buffer = lines.pop() || "";
142
+
143
+ for (const line of lines) {
144
+ const trimmed = line.trim();
145
+ if (trimmed.startsWith("data: ")) {
146
+ const data = trimmed.slice(6);
147
+ if (data === "[DONE]") return;
148
+ yield data;
149
+ }
150
+ }
151
+ }
152
+
153
+ // Process remaining buffer
154
+ const trimmed = buffer.trim();
155
+ if (trimmed.startsWith("data: ")) {
156
+ const data = trimmed.slice(6);
157
+ if (data !== "[DONE]") {
158
+ yield data;
159
+ }
160
+ }
161
+ } finally {
162
+ reader.releaseLock();
163
+ }
164
+ }
165
+
166
+ /**
167
+ * Map stop reason to standard format
168
+ */
169
+ export function mapStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
170
+ switch (reason) {
171
+ case "stop":
172
+ case "end_turn":
173
+ return "stop";
174
+ case "length":
175
+ case "max_tokens":
176
+ return "length";
177
+ case "tool_calls":
178
+ case "tool_use":
179
+ return "toolUse";
180
+ default:
181
+ return "error";
182
+ }
183
+ }
184
+
185
+ /**
186
+ * Calculate cost based on usage and model cost config
187
+ */
188
+ export function calculateCost(inputCost: number, outputCost: number, cacheReadCost: number, cacheWriteCost: number, usage: AssistantMessage["usage"]): void {
189
+ usage.cost.input = (inputCost / 1000000) * usage.input;
190
+ usage.cost.output = (outputCost / 1000000) * usage.output;
191
+ usage.cost.cacheRead = (cacheReadCost / 1000000) * usage.cacheRead;
192
+ usage.cost.cacheWrite = (cacheWriteCost / 1000000) * usage.cacheWrite;
193
+ usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
194
+ }