@ssweens/pi-vertex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +226 -0
- package/auth.ts +108 -0
- package/config.ts +63 -0
- package/index.ts +134 -0
- package/models/claude.ts +246 -0
- package/models/gemini.ts +162 -0
- package/models/index.ts +24 -0
- package/models/maas.ts +462 -0
- package/package.json +47 -0
- package/screenshot.png +0 -0
- package/streaming/gemini.ts +164 -0
- package/streaming/index.ts +25 -0
- package/streaming/maas.ts +97 -0
- package/types.ts +76 -0
- package/utils.ts +194 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MaaS streaming handler for Claude and all other models
|
|
3
|
+
* Uses OpenAI-compatible Chat Completions endpoint
|
|
4
|
+
*
|
|
5
|
+
* Delegates to pi-ai's built-in OpenAI streaming implementation
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { VertexModelConfig, Context, StreamOptions } from "../types.js";
|
|
9
|
+
import { getAuthConfig, buildBaseUrl, getAccessToken, resolveLocation } from "../auth.js";
|
|
10
|
+
import { createAssistantMessageEventStream, type AssistantMessageEventStream, type Model, streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
|
|
11
|
+
|
|
12
|
+
export function streamMaaS(
|
|
13
|
+
model: VertexModelConfig,
|
|
14
|
+
context: Context,
|
|
15
|
+
options?: StreamOptions
|
|
16
|
+
): AssistantMessageEventStream {
|
|
17
|
+
const stream = createAssistantMessageEventStream();
|
|
18
|
+
|
|
19
|
+
(async () => {
|
|
20
|
+
try {
|
|
21
|
+
// Priority: config file > env var > model region > default
|
|
22
|
+
const location = resolveLocation(model.region);
|
|
23
|
+
const auth = getAuthConfig(location);
|
|
24
|
+
const accessToken = await getAccessToken();
|
|
25
|
+
|
|
26
|
+
const baseUrl = buildBaseUrl(auth.projectId, auth.location);
|
|
27
|
+
const endpoint = `${baseUrl}/endpoints/openapi`;
|
|
28
|
+
// Create a model object compatible with pi-ai's OpenAI streaming.
|
|
29
|
+
// Note: baseUrl must point to the OpenAPI root; pi-ai appends /chat/completions.
|
|
30
|
+
const modelForPi: Model<"openai-completions"> = {
|
|
31
|
+
id: model.apiId, // Use the full API ID with publisher prefix
|
|
32
|
+
name: model.name,
|
|
33
|
+
api: "openai-completions",
|
|
34
|
+
provider: "vertex",
|
|
35
|
+
baseUrl: endpoint,
|
|
36
|
+
reasoning: model.reasoning,
|
|
37
|
+
input: model.input,
|
|
38
|
+
cost: model.cost,
|
|
39
|
+
contextWindow: model.contextWindow,
|
|
40
|
+
maxTokens: model.maxTokens,
|
|
41
|
+
headers: {},
|
|
42
|
+
compat: {
|
|
43
|
+
supportsStore: false,
|
|
44
|
+
supportsDeveloperRole: false,
|
|
45
|
+
supportsReasoningEffort: false,
|
|
46
|
+
maxTokensField: "max_tokens",
|
|
47
|
+
thinkingFormat: model.publisher === "qwen" ? "qwen" : "openai",
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
// Delegate to pi-ai's built-in OpenAI streaming
|
|
52
|
+
const innerStream = streamSimpleOpenAICompletions(
|
|
53
|
+
modelForPi,
|
|
54
|
+
context as any,
|
|
55
|
+
{
|
|
56
|
+
...options,
|
|
57
|
+
apiKey: accessToken,
|
|
58
|
+
maxTokens: options?.maxTokens || Math.floor(model.maxTokens / 2),
|
|
59
|
+
temperature: options?.temperature ?? 0.7,
|
|
60
|
+
}
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
// Forward all events from inner stream to outer stream
|
|
64
|
+
for await (const event of innerStream) {
|
|
65
|
+
stream.push(event);
|
|
66
|
+
}
|
|
67
|
+
stream.end();
|
|
68
|
+
|
|
69
|
+
} catch (error) {
|
|
70
|
+
stream.push({
|
|
71
|
+
type: "error",
|
|
72
|
+
reason: options?.signal?.aborted ? "aborted" : "error",
|
|
73
|
+
error: {
|
|
74
|
+
role: "assistant",
|
|
75
|
+
content: [],
|
|
76
|
+
api: "openai-completions",
|
|
77
|
+
provider: "vertex",
|
|
78
|
+
model: model.id,
|
|
79
|
+
usage: {
|
|
80
|
+
input: 0,
|
|
81
|
+
output: 0,
|
|
82
|
+
cacheRead: 0,
|
|
83
|
+
cacheWrite: 0,
|
|
84
|
+
totalTokens: 0,
|
|
85
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
86
|
+
},
|
|
87
|
+
stopReason: options?.signal?.aborted ? "aborted" : "error",
|
|
88
|
+
errorMessage: error instanceof Error ? error.message : String(error),
|
|
89
|
+
timestamp: Date.now(),
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
stream.end();
|
|
93
|
+
}
|
|
94
|
+
})();
|
|
95
|
+
|
|
96
|
+
return stream;
|
|
97
|
+
}
|
package/types.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for pi-vertex extension
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export type ModelInputType = "text" | "image";
|
|
6
|
+
export type EndpointType = "gemini" | "maas";
|
|
7
|
+
|
|
8
|
+
export interface ModelCost {
|
|
9
|
+
input: number;
|
|
10
|
+
output: number;
|
|
11
|
+
cacheRead: number;
|
|
12
|
+
cacheWrite: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface VertexModelConfig {
|
|
16
|
+
id: string;
|
|
17
|
+
name: string;
|
|
18
|
+
apiId: string;
|
|
19
|
+
publisher: string;
|
|
20
|
+
endpointType: EndpointType;
|
|
21
|
+
contextWindow: number;
|
|
22
|
+
maxTokens: number;
|
|
23
|
+
input: ModelInputType[];
|
|
24
|
+
reasoning: boolean;
|
|
25
|
+
tools: boolean;
|
|
26
|
+
cost: ModelCost;
|
|
27
|
+
region: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface AuthConfig {
|
|
31
|
+
projectId: string;
|
|
32
|
+
location: string;
|
|
33
|
+
credentials?: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export type MessageRole = "user" | "assistant" | "system";
|
|
37
|
+
|
|
38
|
+
export interface TextContent {
|
|
39
|
+
type: "text";
|
|
40
|
+
text: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface ImageContent {
|
|
44
|
+
type: "image";
|
|
45
|
+
mimeType: string;
|
|
46
|
+
data: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type MessageContent = TextContent | ImageContent;
|
|
50
|
+
|
|
51
|
+
export interface Message {
|
|
52
|
+
role: MessageRole;
|
|
53
|
+
content: string | MessageContent[];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface Tool {
|
|
57
|
+
name: string;
|
|
58
|
+
description: string;
|
|
59
|
+
parameters: Record<string, unknown>;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface Context {
|
|
63
|
+
systemPrompt?: string;
|
|
64
|
+
messages: Message[];
|
|
65
|
+
tools?: Tool[];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface StreamOptions {
|
|
69
|
+
maxTokens?: number;
|
|
70
|
+
temperature?: number;
|
|
71
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
72
|
+
signal?: AbortSignal;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Re-export types from pi-ai for convenience
|
|
76
|
+
export type { AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream } from "@mariozechner/pi-ai";
|
package/utils.ts
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for pi-vertex extension
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { Message, MessageContent, TextContent, ToolCall, AssistantMessage } from "./types.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Sanitize text by removing invalid surrogate pairs
|
|
9
|
+
*/
|
|
10
|
+
export function sanitizeText(text: string): string {
|
|
11
|
+
return text.replace(/[\uD800-\uDFFF]/g, "\uFFFD");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Convert messages to Gemini format
|
|
16
|
+
*/
|
|
17
|
+
export function convertToGeminiMessages(messages: Message[]): any[] {
|
|
18
|
+
const result: any[] = [];
|
|
19
|
+
|
|
20
|
+
for (const msg of messages) {
|
|
21
|
+
if (msg.role === "user") {
|
|
22
|
+
if (typeof msg.content === "string") {
|
|
23
|
+
if (msg.content.trim()) {
|
|
24
|
+
result.push({
|
|
25
|
+
role: "user",
|
|
26
|
+
parts: [{ text: sanitizeText(msg.content) }],
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
} else {
|
|
30
|
+
const parts = msg.content.map((item) => {
|
|
31
|
+
if (item.type === "text") {
|
|
32
|
+
return { text: sanitizeText(item.text) };
|
|
33
|
+
} else {
|
|
34
|
+
return {
|
|
35
|
+
inlineData: {
|
|
36
|
+
mimeType: item.mimeType,
|
|
37
|
+
data: item.data,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
result.push({ role: "user", parts });
|
|
43
|
+
}
|
|
44
|
+
} else if (msg.role === "assistant") {
|
|
45
|
+
// Gemini doesn't have a separate assistant role in the same way
|
|
46
|
+
// We'll handle this in the conversation history
|
|
47
|
+
if (typeof msg.content === "string") {
|
|
48
|
+
if (msg.content.trim()) {
|
|
49
|
+
result.push({
|
|
50
|
+
role: "model",
|
|
51
|
+
parts: [{ text: sanitizeText(msg.content) }],
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Convert messages to OpenAI-compatible format (for Claude and MaaS)
|
|
63
|
+
*/
|
|
64
|
+
export function convertToOpenAIMessages(messages: Message[]): any[] {
|
|
65
|
+
const result: any[] = [];
|
|
66
|
+
|
|
67
|
+
for (const msg of messages) {
|
|
68
|
+
if (msg.role === "user") {
|
|
69
|
+
if (typeof msg.content === "string") {
|
|
70
|
+
if (msg.content.trim()) {
|
|
71
|
+
result.push({
|
|
72
|
+
role: "user",
|
|
73
|
+
content: sanitizeText(msg.content),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
} else {
|
|
77
|
+
const content = msg.content.map((item) => {
|
|
78
|
+
if (item.type === "text") {
|
|
79
|
+
return { type: "text", text: sanitizeText(item.text) };
|
|
80
|
+
} else {
|
|
81
|
+
return {
|
|
82
|
+
type: "image_url",
|
|
83
|
+
image_url: {
|
|
84
|
+
url: `data:${item.mimeType};base64,${item.data}`,
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
result.push({ role: "user", content });
|
|
90
|
+
}
|
|
91
|
+
} else if (msg.role === "assistant") {
|
|
92
|
+
if (typeof msg.content === "string") {
|
|
93
|
+
if (msg.content.trim()) {
|
|
94
|
+
result.push({
|
|
95
|
+
role: "assistant",
|
|
96
|
+
content: sanitizeText(msg.content),
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
} else if (msg.role === "system") {
|
|
101
|
+
// System messages handled separately
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Convert tools to OpenAI format
|
|
110
|
+
*/
|
|
111
|
+
export function convertTools(tools: any[]): any[] {
|
|
112
|
+
return tools.map((tool) => ({
|
|
113
|
+
type: "function",
|
|
114
|
+
function: {
|
|
115
|
+
name: tool.name,
|
|
116
|
+
description: tool.description,
|
|
117
|
+
parameters: tool.parameters,
|
|
118
|
+
},
|
|
119
|
+
}));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Parse SSE (Server-Sent Events) stream
|
|
124
|
+
*/
|
|
125
|
+
export async function* parseSSEStream(response: Response): AsyncGenerator<string> {
|
|
126
|
+
const reader = response.body?.getReader();
|
|
127
|
+
if (!reader) {
|
|
128
|
+
throw new Error("No response body");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const decoder = new TextDecoder();
|
|
132
|
+
let buffer = "";
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
while (true) {
|
|
136
|
+
const { done, value } = await reader.read();
|
|
137
|
+
if (done) break;
|
|
138
|
+
|
|
139
|
+
buffer += decoder.decode(value, { stream: true });
|
|
140
|
+
const lines = buffer.split("\n");
|
|
141
|
+
buffer = lines.pop() || "";
|
|
142
|
+
|
|
143
|
+
for (const line of lines) {
|
|
144
|
+
const trimmed = line.trim();
|
|
145
|
+
if (trimmed.startsWith("data: ")) {
|
|
146
|
+
const data = trimmed.slice(6);
|
|
147
|
+
if (data === "[DONE]") return;
|
|
148
|
+
yield data;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Process remaining buffer
|
|
154
|
+
const trimmed = buffer.trim();
|
|
155
|
+
if (trimmed.startsWith("data: ")) {
|
|
156
|
+
const data = trimmed.slice(6);
|
|
157
|
+
if (data !== "[DONE]") {
|
|
158
|
+
yield data;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
} finally {
|
|
162
|
+
reader.releaseLock();
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Map stop reason to standard format
|
|
168
|
+
*/
|
|
169
|
+
export function mapStopReason(reason: string): "stop" | "length" | "toolUse" | "error" {
|
|
170
|
+
switch (reason) {
|
|
171
|
+
case "stop":
|
|
172
|
+
case "end_turn":
|
|
173
|
+
return "stop";
|
|
174
|
+
case "length":
|
|
175
|
+
case "max_tokens":
|
|
176
|
+
return "length";
|
|
177
|
+
case "tool_calls":
|
|
178
|
+
case "tool_use":
|
|
179
|
+
return "toolUse";
|
|
180
|
+
default:
|
|
181
|
+
return "error";
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Calculate cost based on usage and model cost config
|
|
187
|
+
*/
|
|
188
|
+
export function calculateCost(inputCost: number, outputCost: number, cacheReadCost: number, cacheWriteCost: number, usage: AssistantMessage["usage"]): void {
|
|
189
|
+
usage.cost.input = (inputCost / 1000000) * usage.input;
|
|
190
|
+
usage.cost.output = (outputCost / 1000000) * usage.output;
|
|
191
|
+
usage.cost.cacheRead = (cacheReadCost / 1000000) * usage.cacheRead;
|
|
192
|
+
usage.cost.cacheWrite = (cacheWriteCost / 1000000) * usage.cacheWrite;
|
|
193
|
+
usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
|
|
194
|
+
}
|