@oh-my-pi/pi-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
3
+ */
4
+
5
+ import { type Content, FinishReason, FunctionCallingConfigMode, type Part, type Schema } from "@google/genai";
6
+ import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from "../types";
7
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
8
+ import { transformMessages } from "./transorm-messages";
9
+
10
+ type GoogleApiType = "google-generative-ai" | "google-gemini-cli";
11
+
12
+ /**
13
+ * Convert internal messages to Gemini Content[] format.
14
+ */
15
+ export function convertMessages<T extends GoogleApiType>(model: Model<T>, context: Context): Content[] {
16
+ const contents: Content[] = [];
17
+ const transformedMessages = transformMessages(context.messages, model);
18
+
19
+ for (const msg of transformedMessages) {
20
+ if (msg.role === "user") {
21
+ if (typeof msg.content === "string") {
22
+ contents.push({
23
+ role: "user",
24
+ parts: [{ text: sanitizeSurrogates(msg.content) }],
25
+ });
26
+ } else {
27
+ const parts: Part[] = msg.content.map((item) => {
28
+ if (item.type === "text") {
29
+ return { text: sanitizeSurrogates(item.text) };
30
+ } else {
31
+ return {
32
+ inlineData: {
33
+ mimeType: item.mimeType,
34
+ data: item.data,
35
+ },
36
+ };
37
+ }
38
+ });
39
+ const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
40
+ if (filteredParts.length === 0) continue;
41
+ contents.push({
42
+ role: "user",
43
+ parts: filteredParts,
44
+ });
45
+ }
46
+ } else if (msg.role === "assistant") {
47
+ const parts: Part[] = [];
48
+
49
+ for (const block of msg.content) {
50
+ if (block.type === "text") {
51
+ // Skip empty text blocks - they can cause issues with some models (e.g. Claude via Antigravity)
52
+ if (!block.text || block.text.trim() === "") continue;
53
+ parts.push({ text: sanitizeSurrogates(block.text) });
54
+ } else if (block.type === "thinking") {
55
+ // Thinking blocks require signatures for Claude via Antigravity.
56
+ // If signature is missing (e.g. from GPT-OSS), convert to regular text with delimiters.
57
+ if (block.thinkingSignature) {
58
+ parts.push({
59
+ thought: true,
60
+ text: sanitizeSurrogates(block.thinking),
61
+ thoughtSignature: block.thinkingSignature,
62
+ });
63
+ } else {
64
+ parts.push({
65
+ text: `<thinking>\n${sanitizeSurrogates(block.thinking)}\n</thinking>`,
66
+ });
67
+ }
68
+ } else if (block.type === "toolCall") {
69
+ const part: Part = {
70
+ functionCall: {
71
+ id: block.id,
72
+ name: block.name,
73
+ args: block.arguments,
74
+ },
75
+ };
76
+ if (block.thoughtSignature) {
77
+ part.thoughtSignature = block.thoughtSignature;
78
+ }
79
+ parts.push(part);
80
+ }
81
+ }
82
+
83
+ if (parts.length === 0) continue;
84
+ contents.push({
85
+ role: "model",
86
+ parts,
87
+ });
88
+ } else if (msg.role === "toolResult") {
89
+ // Extract text and image content
90
+ const textContent = msg.content.filter((c): c is TextContent => c.type === "text");
91
+ const textResult = textContent.map((c) => c.text).join("\n");
92
+ const imageContent = model.input.includes("image")
93
+ ? msg.content.filter((c): c is ImageContent => c.type === "image")
94
+ : [];
95
+
96
+ const hasText = textResult.length > 0;
97
+ const hasImages = imageContent.length > 0;
98
+
99
+ // Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
100
+ // See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
101
+ // Older models don't support this, so we put images in a separate user message.
102
+ const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
103
+
104
+ // Use "output" key for success, "error" key for errors as per SDK documentation
105
+ const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "";
106
+
107
+ const imageParts: Part[] = imageContent.map((imageBlock) => ({
108
+ inlineData: {
109
+ mimeType: imageBlock.mimeType,
110
+ data: imageBlock.data,
111
+ },
112
+ }));
113
+
114
+ const functionResponsePart: Part = {
115
+ functionResponse: {
116
+ id: msg.toolCallId,
117
+ name: msg.toolName,
118
+ response: msg.isError ? { error: responseValue } : { output: responseValue },
119
+ // Nest images inside functionResponse.parts for Gemini 3
120
+ ...(hasImages && supportsMultimodalFunctionResponse && { parts: imageParts }),
121
+ },
122
+ };
123
+
124
+ // Cloud Code Assist API requires all function responses to be in a single user turn.
125
+ // Check if the last content is already a user turn with function responses and merge.
126
+ const lastContent = contents[contents.length - 1];
127
+ if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
128
+ lastContent.parts.push(functionResponsePart);
129
+ } else {
130
+ contents.push({
131
+ role: "user",
132
+ parts: [functionResponsePart],
133
+ });
134
+ }
135
+
136
+ // For older models, add images in a separate user message
137
+ if (hasImages && !supportsMultimodalFunctionResponse) {
138
+ contents.push({
139
+ role: "user",
140
+ parts: [{ text: "Tool result image:" }, ...imageParts],
141
+ });
142
+ }
143
+ }
144
+ }
145
+
146
+ return contents;
147
+ }
148
+
149
+ /**
150
+ * Convert tools to Gemini function declarations format.
151
+ */
152
+ export function convertTools(
153
+ tools: Tool[],
154
+ ): { functionDeclarations: { name: string; description?: string; parameters: Schema }[] }[] | undefined {
155
+ if (tools.length === 0) return undefined;
156
+ return [
157
+ {
158
+ functionDeclarations: tools.map((tool) => ({
159
+ name: tool.name,
160
+ description: tool.description,
161
+ parameters: tool.parameters as Schema,
162
+ })),
163
+ },
164
+ ];
165
+ }
166
+
167
+ /**
168
+ * Map tool choice string to Gemini FunctionCallingConfigMode.
169
+ */
170
+ export function mapToolChoice(choice: string): FunctionCallingConfigMode {
171
+ switch (choice) {
172
+ case "auto":
173
+ return FunctionCallingConfigMode.AUTO;
174
+ case "none":
175
+ return FunctionCallingConfigMode.NONE;
176
+ case "any":
177
+ return FunctionCallingConfigMode.ANY;
178
+ default:
179
+ return FunctionCallingConfigMode.AUTO;
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Map Gemini FinishReason to our StopReason.
185
+ */
186
+ export function mapStopReason(reason: FinishReason): StopReason {
187
+ switch (reason) {
188
+ case FinishReason.STOP:
189
+ return "stop";
190
+ case FinishReason.MAX_TOKENS:
191
+ return "length";
192
+ case FinishReason.BLOCKLIST:
193
+ case FinishReason.PROHIBITED_CONTENT:
194
+ case FinishReason.SPII:
195
+ case FinishReason.SAFETY:
196
+ case FinishReason.IMAGE_SAFETY:
197
+ case FinishReason.IMAGE_PROHIBITED_CONTENT:
198
+ case FinishReason.IMAGE_RECITATION:
199
+ case FinishReason.IMAGE_OTHER:
200
+ case FinishReason.RECITATION:
201
+ case FinishReason.FINISH_REASON_UNSPECIFIED:
202
+ case FinishReason.OTHER:
203
+ case FinishReason.LANGUAGE:
204
+ case FinishReason.MALFORMED_FUNCTION_CALL:
205
+ case FinishReason.UNEXPECTED_TOOL_CALL:
206
+ case FinishReason.NO_IMAGE:
207
+ return "error";
208
+ default: {
209
+ const _exhaustive: never = reason;
210
+ throw new Error(`Unhandled stop reason: ${_exhaustive}`);
211
+ }
212
+ }
213
+ }
214
+
215
+ /**
216
+ * Map string finish reason to our StopReason (for raw API responses).
217
+ */
218
+ export function mapStopReasonString(reason: string): StopReason {
219
+ switch (reason) {
220
+ case "STOP":
221
+ return "stop";
222
+ case "MAX_TOKENS":
223
+ return "length";
224
+ default:
225
+ return "error";
226
+ }
227
+ }
@@ -0,0 +1,324 @@
1
+ import {
2
+ type GenerateContentConfig,
3
+ type GenerateContentParameters,
4
+ GoogleGenAI,
5
+ type ThinkingConfig,
6
+ } from "@google/genai";
7
+ import { calculateCost } from "../models";
8
+ import { getEnvApiKey } from "../stream";
9
+ import type {
10
+ Api,
11
+ AssistantMessage,
12
+ Context,
13
+ Model,
14
+ StreamFunction,
15
+ StreamOptions,
16
+ TextContent,
17
+ ThinkingContent,
18
+ ToolCall,
19
+ } from "../types";
20
+ import { AssistantMessageEventStream } from "../utils/event-stream";
21
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
22
+ import type { GoogleThinkingLevel } from "./google-gemini-cli";
23
+ import { convertMessages, convertTools, mapStopReason, mapToolChoice } from "./google-shared";
24
+
25
+ export interface GoogleOptions extends StreamOptions {
26
+ toolChoice?: "auto" | "none" | "any";
27
+ thinking?: {
28
+ enabled: boolean;
29
+ budgetTokens?: number; // -1 for dynamic, 0 to disable
30
+ level?: GoogleThinkingLevel;
31
+ };
32
+ }
33
+
34
+ // Counter for generating unique tool call IDs
35
+ let toolCallCounter = 0;
36
+
37
+ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
38
+ model: Model<"google-generative-ai">,
39
+ context: Context,
40
+ options?: GoogleOptions,
41
+ ): AssistantMessageEventStream => {
42
+ const stream = new AssistantMessageEventStream();
43
+
44
+ (async () => {
45
+ const output: AssistantMessage = {
46
+ role: "assistant",
47
+ content: [],
48
+ api: "google-generative-ai" as Api,
49
+ provider: model.provider,
50
+ model: model.id,
51
+ usage: {
52
+ input: 0,
53
+ output: 0,
54
+ cacheRead: 0,
55
+ cacheWrite: 0,
56
+ totalTokens: 0,
57
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
58
+ },
59
+ stopReason: "stop",
60
+ timestamp: Date.now(),
61
+ };
62
+
63
+ try {
64
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
65
+ const client = createClient(model, apiKey);
66
+ const params = buildParams(model, context, options);
67
+ const googleStream = await client.models.generateContentStream(params);
68
+
69
+ stream.push({ type: "start", partial: output });
70
+ let currentBlock: TextContent | ThinkingContent | null = null;
71
+ const blocks = output.content;
72
+ const blockIndex = () => blocks.length - 1;
73
+ for await (const chunk of googleStream) {
74
+ const candidate = chunk.candidates?.[0];
75
+ if (candidate?.content?.parts) {
76
+ for (const part of candidate.content.parts) {
77
+ if (part.text !== undefined) {
78
+ const isThinking = part.thought === true;
79
+ if (
80
+ !currentBlock ||
81
+ (isThinking && currentBlock.type !== "thinking") ||
82
+ (!isThinking && currentBlock.type !== "text")
83
+ ) {
84
+ if (currentBlock) {
85
+ if (currentBlock.type === "text") {
86
+ stream.push({
87
+ type: "text_end",
88
+ contentIndex: blocks.length - 1,
89
+ content: currentBlock.text,
90
+ partial: output,
91
+ });
92
+ } else {
93
+ stream.push({
94
+ type: "thinking_end",
95
+ contentIndex: blockIndex(),
96
+ content: currentBlock.thinking,
97
+ partial: output,
98
+ });
99
+ }
100
+ }
101
+ if (isThinking) {
102
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
103
+ output.content.push(currentBlock);
104
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
105
+ } else {
106
+ currentBlock = { type: "text", text: "" };
107
+ output.content.push(currentBlock);
108
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
109
+ }
110
+ }
111
+ if (currentBlock.type === "thinking") {
112
+ currentBlock.thinking += part.text;
113
+ currentBlock.thinkingSignature = part.thoughtSignature;
114
+ stream.push({
115
+ type: "thinking_delta",
116
+ contentIndex: blockIndex(),
117
+ delta: part.text,
118
+ partial: output,
119
+ });
120
+ } else {
121
+ currentBlock.text += part.text;
122
+ stream.push({
123
+ type: "text_delta",
124
+ contentIndex: blockIndex(),
125
+ delta: part.text,
126
+ partial: output,
127
+ });
128
+ }
129
+ }
130
+
131
+ if (part.functionCall) {
132
+ if (currentBlock) {
133
+ if (currentBlock.type === "text") {
134
+ stream.push({
135
+ type: "text_end",
136
+ contentIndex: blockIndex(),
137
+ content: currentBlock.text,
138
+ partial: output,
139
+ });
140
+ } else {
141
+ stream.push({
142
+ type: "thinking_end",
143
+ contentIndex: blockIndex(),
144
+ content: currentBlock.thinking,
145
+ partial: output,
146
+ });
147
+ }
148
+ currentBlock = null;
149
+ }
150
+
151
+ // Generate unique ID if not provided or if it's a duplicate
152
+ const providedId = part.functionCall.id;
153
+ const needsNewId =
154
+ !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
155
+ const toolCallId = needsNewId
156
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
157
+ : providedId;
158
+
159
+ const toolCall: ToolCall = {
160
+ type: "toolCall",
161
+ id: toolCallId,
162
+ name: part.functionCall.name || "",
163
+ arguments: part.functionCall.args as Record<string, any>,
164
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
165
+ };
166
+
167
+ output.content.push(toolCall);
168
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
169
+ stream.push({
170
+ type: "toolcall_delta",
171
+ contentIndex: blockIndex(),
172
+ delta: JSON.stringify(toolCall.arguments),
173
+ partial: output,
174
+ });
175
+ stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
176
+ }
177
+ }
178
+ }
179
+
180
+ if (candidate?.finishReason) {
181
+ output.stopReason = mapStopReason(candidate.finishReason);
182
+ if (output.content.some((b) => b.type === "toolCall")) {
183
+ output.stopReason = "toolUse";
184
+ }
185
+ }
186
+
187
+ if (chunk.usageMetadata) {
188
+ output.usage = {
189
+ input: chunk.usageMetadata.promptTokenCount || 0,
190
+ output:
191
+ (chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
192
+ cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
193
+ cacheWrite: 0,
194
+ totalTokens: chunk.usageMetadata.totalTokenCount || 0,
195
+ cost: {
196
+ input: 0,
197
+ output: 0,
198
+ cacheRead: 0,
199
+ cacheWrite: 0,
200
+ total: 0,
201
+ },
202
+ };
203
+ calculateCost(model, output.usage);
204
+ }
205
+ }
206
+
207
+ if (currentBlock) {
208
+ if (currentBlock.type === "text") {
209
+ stream.push({
210
+ type: "text_end",
211
+ contentIndex: blockIndex(),
212
+ content: currentBlock.text,
213
+ partial: output,
214
+ });
215
+ } else {
216
+ stream.push({
217
+ type: "thinking_end",
218
+ contentIndex: blockIndex(),
219
+ content: currentBlock.thinking,
220
+ partial: output,
221
+ });
222
+ }
223
+ }
224
+
225
+ if (options?.signal?.aborted) {
226
+ throw new Error("Request was aborted");
227
+ }
228
+
229
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
230
+ throw new Error("An unkown error ocurred");
231
+ }
232
+
233
+ stream.push({ type: "done", reason: output.stopReason, message: output });
234
+ stream.end();
235
+ } catch (error) {
236
+ // Remove internal index property used during streaming
237
+ for (const block of output.content) {
238
+ if ("index" in block) {
239
+ delete (block as { index?: number }).index;
240
+ }
241
+ }
242
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
243
+ output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
244
+ stream.push({ type: "error", reason: output.stopReason, error: output });
245
+ stream.end();
246
+ }
247
+ })();
248
+
249
+ return stream;
250
+ };
251
+
252
+ function createClient(model: Model<"google-generative-ai">, apiKey?: string): GoogleGenAI {
253
+ const httpOptions: { baseUrl?: string; apiVersion?: string; headers?: Record<string, string> } = {};
254
+ if (model.baseUrl) {
255
+ httpOptions.baseUrl = model.baseUrl;
256
+ httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
257
+ }
258
+ if (model.headers) {
259
+ httpOptions.headers = model.headers;
260
+ }
261
+
262
+ return new GoogleGenAI({
263
+ apiKey,
264
+ httpOptions: Object.keys(httpOptions).length > 0 ? httpOptions : undefined,
265
+ });
266
+ }
267
+
268
+ function buildParams(
269
+ model: Model<"google-generative-ai">,
270
+ context: Context,
271
+ options: GoogleOptions = {},
272
+ ): GenerateContentParameters {
273
+ const contents = convertMessages(model, context);
274
+
275
+ const generationConfig: GenerateContentConfig = {};
276
+ if (options.temperature !== undefined) {
277
+ generationConfig.temperature = options.temperature;
278
+ }
279
+ if (options.maxTokens !== undefined) {
280
+ generationConfig.maxOutputTokens = options.maxTokens;
281
+ }
282
+
283
+ const config: GenerateContentConfig = {
284
+ ...(Object.keys(generationConfig).length > 0 && generationConfig),
285
+ ...(context.systemPrompt && { systemInstruction: sanitizeSurrogates(context.systemPrompt) }),
286
+ ...(context.tools && context.tools.length > 0 && { tools: convertTools(context.tools) }),
287
+ };
288
+
289
+ if (context.tools && context.tools.length > 0 && options.toolChoice) {
290
+ config.toolConfig = {
291
+ functionCallingConfig: {
292
+ mode: mapToolChoice(options.toolChoice),
293
+ },
294
+ };
295
+ } else {
296
+ config.toolConfig = undefined;
297
+ }
298
+
299
+ if (options.thinking?.enabled && model.reasoning) {
300
+ const thinkingConfig: ThinkingConfig = { includeThoughts: true };
301
+ if (options.thinking.level !== undefined) {
302
+ // Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
303
+ thinkingConfig.thinkingLevel = options.thinking.level as any;
304
+ } else if (options.thinking.budgetTokens !== undefined) {
305
+ thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
306
+ }
307
+ config.thinkingConfig = thinkingConfig;
308
+ }
309
+
310
+ if (options.signal) {
311
+ if (options.signal.aborted) {
312
+ throw new Error("Request aborted");
313
+ }
314
+ config.abortSignal = options.signal;
315
+ }
316
+
317
+ const params: GenerateContentParameters = {
318
+ model: model.id,
319
+ contents,
320
+ config,
321
+ };
322
+
323
+ return params;
324
+ }