@oh-my-pi/pi-ai 6.9.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "6.9.0",
3
+ "version": "7.0.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -17,7 +17,7 @@
17
17
  "test": "bun test"
18
18
  },
19
19
  "dependencies": {
20
- "@oh-my-pi/pi-utils": "6.9.0",
20
+ "@oh-my-pi/pi-utils": "7.0.0",
21
21
  "@anthropic-ai/sdk": "0.71.2",
22
22
  "@aws-sdk/client-bedrock-runtime": "^3.968.0",
23
23
  "@bufbuild/protobuf": "^2.10.2",
package/src/index.ts CHANGED
@@ -5,11 +5,18 @@ export * from "./providers/anthropic";
5
5
  export * from "./providers/cursor";
6
6
  export * from "./providers/google";
7
7
  export * from "./providers/google-gemini-cli";
8
+ export * from "./providers/google-gemini-cli-usage";
8
9
  export * from "./providers/google-vertex";
9
10
  export * from "./providers/openai-completions";
10
11
  export * from "./providers/openai-responses";
11
12
  export * from "./stream";
12
13
  export * from "./types";
14
+ export * from "./usage";
15
+ export * from "./usage/claude";
16
+ export * from "./usage/github-copilot";
17
+ export * from "./usage/google-antigravity";
18
+ export * from "./usage/openai-codex";
19
+ export * from "./usage/zai";
13
20
  export * from "./utils/event-stream";
14
21
  export * from "./utils/oauth/index";
15
22
  export * from "./utils/overflow";
@@ -67,6 +67,9 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
67
67
  const stream = new AssistantMessageEventStream();
68
68
 
69
69
  (async () => {
70
+ const startTime = Date.now();
71
+ let firstTokenTime: number | undefined;
72
+
70
73
  const output: AssistantMessage = {
71
74
  role: "assistant",
72
75
  content: [],
@@ -113,8 +116,10 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
113
116
  }
114
117
  stream.push({ type: "start", partial: output });
115
118
  } else if (item.contentBlockStart) {
119
+ if (!firstTokenTime) firstTokenTime = Date.now();
116
120
  handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
117
121
  } else if (item.contentBlockDelta) {
122
+ if (!firstTokenTime) firstTokenTime = Date.now();
118
123
  handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
119
124
  } else if (item.contentBlockStop) {
120
125
  handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
@@ -143,6 +148,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
143
148
  throw new Error("An unknown error occurred");
144
149
  }
145
150
 
151
+ output.duration = Date.now() - startTime;
152
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
146
153
  stream.push({ type: "done", reason: output.stopReason, message: output });
147
154
  stream.end();
148
155
  } catch (error) {
@@ -152,6 +159,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
152
159
  }
153
160
  output.stopReason = options.signal?.aborted ? "aborted" : "error";
154
161
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
162
+ output.duration = Date.now() - startTime;
163
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
155
164
  stream.push({ type: "error", reason: output.stopReason, error: output });
156
165
  stream.end();
157
166
  }
@@ -333,22 +342,39 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
333
342
 
334
343
  switch (m.role) {
335
344
  case "user":
336
- result.push({
337
- role: ConversationRole.USER,
338
- content:
339
- typeof m.content === "string"
340
- ? [{ text: sanitizeSurrogates(m.content) }]
341
- : m.content.map((c) => {
342
- switch (c.type) {
343
- case "text":
344
- return { text: sanitizeSurrogates(c.text) };
345
- case "image":
346
- return { image: createImageBlock(c.mimeType, c.data) };
347
- default:
348
- throw new Error("Unknown user content type");
349
- }
350
- }),
351
- });
345
+ if (typeof m.content === "string") {
346
+ // Skip empty user messages
347
+ if (!m.content || m.content.trim() === "") continue;
348
+ result.push({
349
+ role: ConversationRole.USER,
350
+ content: [{ text: sanitizeSurrogates(m.content) }],
351
+ });
352
+ } else {
353
+ const contentBlocks = m.content
354
+ .map((c) => {
355
+ switch (c.type) {
356
+ case "text":
357
+ return { text: sanitizeSurrogates(c.text) };
358
+ case "image":
359
+ return { image: createImageBlock(c.mimeType, c.data) };
360
+ default:
361
+ throw new Error("Unknown user content type");
362
+ }
363
+ })
364
+ .filter((block) => {
365
+ // Filter out empty text blocks
366
+ if ("text" in block && block.text) {
367
+ return block.text.trim().length > 0;
368
+ }
369
+ return true; // Keep non-text blocks (images)
370
+ });
371
+ // Skip message if all blocks filtered out
372
+ if (contentBlocks.length === 0) continue;
373
+ result.push({
374
+ role: ConversationRole.USER,
375
+ content: contentBlocks,
376
+ });
377
+ }
352
378
  break;
353
379
  case "assistant": {
354
380
  // Skip assistant messages with empty content (e.g., from aborted requests)
@@ -140,6 +140,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
140
140
  const stream = new AssistantMessageEventStream();
141
141
 
142
142
  (async () => {
143
+ const startTime = Date.now();
144
+ let firstTokenTime: number | undefined;
145
+
143
146
  const output: AssistantMessage = {
144
147
  role: "assistant",
145
148
  content: [],
@@ -183,6 +186,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
183
186
  output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
184
187
  calculateCost(model, output.usage);
185
188
  } else if (event.type === "content_block_start") {
189
+ if (!firstTokenTime) firstTokenTime = Date.now();
186
190
  if (event.content_block.type === "text") {
187
191
  const block: Block = {
188
192
  type: "text",
@@ -321,12 +325,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
321
325
  throw new Error("An unkown error ocurred");
322
326
  }
323
327
 
328
+ output.duration = Date.now() - startTime;
329
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
324
330
  stream.push({ type: "done", reason: output.stopReason, message: output });
325
331
  stream.end();
326
332
  } catch (error) {
327
333
  for (const block of output.content) delete (block as any).index;
328
334
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
329
335
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
336
+ output.duration = Date.now() - startTime;
337
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
330
338
  stream.push({ type: "error", reason: output.stopReason, error: output });
331
339
  stream.end();
332
340
  }
@@ -294,6 +294,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
294
294
  const stream = new AssistantMessageEventStream();
295
295
 
296
296
  (async () => {
297
+ const startTime = Date.now();
298
+ let firstTokenTime: number | undefined;
299
+
297
300
  const output: AssistantMessage = {
298
301
  role: "assistant",
299
302
  content: [],
@@ -369,6 +372,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
369
372
  get currentToolCall() {
370
373
  return currentToolCall;
371
374
  },
375
+ get firstTokenTime() {
376
+ return firstTokenTime;
377
+ },
372
378
  setTextBlock: (b) => {
373
379
  currentTextBlock = b;
374
380
  },
@@ -378,6 +384,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
378
384
  setToolCall: (t) => {
379
385
  currentToolCall = t;
380
386
  },
387
+ setFirstTokenTime: () => {
388
+ if (!firstTokenTime) firstTokenTime = Date.now();
389
+ },
381
390
  };
382
391
 
383
392
  const onConversationCheckpoint = (checkpoint: ConversationStateStructure) => {
@@ -502,6 +511,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
502
511
 
503
512
  calculateCost(model, output.usage);
504
513
 
514
+ output.duration = Date.now() - startTime;
515
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
505
516
  stream.push({
506
517
  type: "done",
507
518
  reason: output.stopReason as "stop" | "length" | "toolUse",
@@ -511,6 +522,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
511
522
  } catch (error) {
512
523
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
513
524
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
525
+ output.duration = Date.now() - startTime;
526
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
514
527
  stream.push({ type: "error", reason: output.stopReason, error: output });
515
528
  stream.end();
516
529
  } finally {
@@ -532,9 +545,11 @@ interface BlockState {
532
545
  currentTextBlock: (TextContent & { index: number }) | null;
533
546
  currentThinkingBlock: (ThinkingContent & { index: number }) | null;
534
547
  currentToolCall: ToolCallState | null;
548
+ firstTokenTime: number | undefined;
535
549
  setTextBlock: (b: (TextContent & { index: number }) | null) => void;
536
550
  setThinkingBlock: (b: (ThinkingContent & { index: number }) | null) => void;
537
551
  setToolCall: (t: ToolCallState | null) => void;
552
+ setFirstTokenTime: () => void;
538
553
  }
539
554
 
540
555
  interface UsageState {
@@ -1645,6 +1660,7 @@ function processInteractionUpdate(
1645
1660
  log("interactionUpdate", updateCase, update.message?.value);
1646
1661
 
1647
1662
  if (updateCase === "textDelta") {
1663
+ state.setFirstTokenTime();
1648
1664
  const delta = update.message.value.text || "";
1649
1665
  if (!state.currentTextBlock) {
1650
1666
  const block: TextContent & { index: number } = {
@@ -1660,6 +1676,7 @@ function processInteractionUpdate(
1660
1676
  const idx = output.content.indexOf(state.currentTextBlock!);
1661
1677
  stream.push({ type: "text_delta", contentIndex: idx, delta, partial: output });
1662
1678
  } else if (updateCase === "thinkingDelta") {
1679
+ state.setFirstTokenTime();
1663
1680
  const delta = update.message.value.text || "";
1664
1681
  if (!state.currentThinkingBlock) {
1665
1682
  const block: ThinkingContent & { index: number } = {
@@ -1821,11 +1838,12 @@ function buildMcpToolDefinitions(tools: Tool[] | undefined): McpToolDefinition[]
1821
1838
  function extractUserMessageText(msg: Message): string {
1822
1839
  if (msg.role !== "user") return "";
1823
1840
  const content = msg.content;
1824
- if (typeof content === "string") return content;
1825
- return content
1841
+ if (typeof content === "string") return content.trim();
1842
+ const text = content
1826
1843
  .filter((c): c is TextContent => c.type === "text")
1827
1844
  .map((c) => c.text)
1828
1845
  .join("\n");
1846
+ return text.trim();
1829
1847
  }
1830
1848
 
1831
1849
  /**
@@ -1874,7 +1892,7 @@ function buildConversationTurns(messages: Message[]): Uint8Array[] {
1874
1892
 
1875
1893
  // Create and serialize user message
1876
1894
  const userText = extractUserMessageText(msg);
1877
- if (!userText) {
1895
+ if (!userText || userText.length === 0) {
1878
1896
  i++;
1879
1897
  continue;
1880
1898
  }
@@ -1965,10 +1983,15 @@ function buildGrpcRequest(
1965
1983
  const userText =
1966
1984
  lastMessage?.role === "user"
1967
1985
  ? typeof lastMessage.content === "string"
1968
- ? lastMessage.content
1986
+ ? lastMessage.content.trim()
1969
1987
  : extractText(lastMessage.content)
1970
1988
  : "";
1971
1989
 
1990
+ // Validate that we have non-empty user text for the action
1991
+ if (!userText || userText.trim().length === 0) {
1992
+ throw new Error("Cannot send empty user message to Cursor API");
1993
+ }
1994
+
1972
1995
  const userMessage = create(UserMessageSchema, {
1973
1996
  text: userText,
1974
1997
  messageId: crypto.randomUUID(),
@@ -0,0 +1,271 @@
1
+ import type {
2
+ UsageAmount,
3
+ UsageFetchContext,
4
+ UsageFetchParams,
5
+ UsageLimit,
6
+ UsageProvider,
7
+ UsageReport,
8
+ UsageWindow,
9
+ } from "../usage";
10
+ import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
11
+
12
+ const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
13
+ const CACHE_TTL_MS = 60_000;
14
+
15
+ const GEMINI_CLI_HEADERS = {
16
+ "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
17
+ "X-Goog-Api-Client": "gl-node/22.17.0",
18
+ "Client-Metadata": JSON.stringify({
19
+ ideType: "IDE_UNSPECIFIED",
20
+ platform: "PLATFORM_UNSPECIFIED",
21
+ pluginType: "GEMINI",
22
+ }),
23
+ };
24
+
25
+ const GEMINI_TIER_MAP: Array<{ tier: string; models: string[] }> = [
26
+ {
27
+ tier: "3-Flash",
28
+ models: ["gemini-3-flash-preview", "gemini-3-flash"],
29
+ },
30
+ {
31
+ tier: "Flash",
32
+ models: ["gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash", "gemini-1.5-flash"],
33
+ },
34
+ {
35
+ tier: "Pro",
36
+ models: ["gemini-2.5-pro", "gemini-3-pro-preview", "gemini-3-pro", "gemini-1.5-pro"],
37
+ },
38
+ ];
39
+
40
+ interface LoadCodeAssistResponse {
41
+ cloudaicompanionProject?: string | { id?: string };
42
+ currentTier?: { id?: string; name?: string };
43
+ }
44
+
45
+ interface RetrieveUserQuotaResponse {
46
+ buckets?: Array<{
47
+ modelId?: string;
48
+ remainingFraction?: number;
49
+ resetTime?: string;
50
+ }>;
51
+ }
52
+
53
+ function getProjectId(payload: LoadCodeAssistResponse | undefined): string | undefined {
54
+ if (!payload) return undefined;
55
+ if (typeof payload.cloudaicompanionProject === "string") {
56
+ return payload.cloudaicompanionProject;
57
+ }
58
+ if (payload.cloudaicompanionProject && typeof payload.cloudaicompanionProject === "object") {
59
+ return payload.cloudaicompanionProject.id;
60
+ }
61
+ return undefined;
62
+ }
63
+
64
+ function getModelTier(modelId: string): string | undefined {
65
+ for (const entry of GEMINI_TIER_MAP) {
66
+ if (entry.models.includes(modelId)) {
67
+ return entry.tier;
68
+ }
69
+ }
70
+ const normalized = modelId.toLowerCase();
71
+ if (normalized.includes("flash")) return "Flash";
72
+ if (normalized.includes("pro")) return "Pro";
73
+ return undefined;
74
+ }
75
+
76
+ function parseWindow(resetTime: string | undefined, now: number): UsageWindow {
77
+ if (!resetTime) {
78
+ return {
79
+ id: "quota",
80
+ label: "Quota window",
81
+ };
82
+ }
83
+ const resetsAt = Date.parse(resetTime);
84
+ if (Number.isNaN(resetsAt)) {
85
+ return {
86
+ id: "quota",
87
+ label: "Quota window",
88
+ };
89
+ }
90
+ return {
91
+ id: `reset-${resetsAt}`,
92
+ label: "Quota window",
93
+ resetsAt,
94
+ resetInMs: Math.max(0, resetsAt - now),
95
+ };
96
+ }
97
+
98
+ function buildAmount(remainingFraction: number | undefined): UsageAmount {
99
+ if (remainingFraction === undefined || !Number.isFinite(remainingFraction)) {
100
+ return { unit: "percent" };
101
+ }
102
+ const remaining = Math.min(Math.max(remainingFraction, 0), 1);
103
+ const used = Math.min(Math.max(1 - remaining, 0), 1);
104
+ return {
105
+ unit: "percent",
106
+ used: Math.round(used * 1000) / 10,
107
+ remaining: Math.round(remaining * 1000) / 10,
108
+ limit: 100,
109
+ usedFraction: used,
110
+ remainingFraction: remaining,
111
+ };
112
+ }
113
+
114
+ async function resolveAccessToken(params: UsageFetchParams, ctx: UsageFetchContext): Promise<string | undefined> {
115
+ const { credential } = params;
116
+ if (credential.type !== "oauth") return undefined;
117
+ const now = ctx.now();
118
+ if (credential.accessToken && (!credential.expiresAt || credential.expiresAt > now + 60_000)) {
119
+ return credential.accessToken;
120
+ }
121
+ if (!credential.refreshToken || !credential.projectId) return credential.accessToken;
122
+ try {
123
+ const refreshed = await refreshGoogleCloudToken(credential.refreshToken, credential.projectId);
124
+ return refreshed.access;
125
+ } catch (error) {
126
+ ctx.logger?.warn("Gemini CLI token refresh failed", { error: String(error) });
127
+ return credential.accessToken;
128
+ }
129
+ }
130
+
131
+ async function loadCodeAssist(
132
+ params: UsageFetchParams,
133
+ ctx: UsageFetchContext,
134
+ accessToken: string,
135
+ baseUrl: string,
136
+ projectId?: string,
137
+ ): Promise<LoadCodeAssistResponse | undefined> {
138
+ const response = await ctx.fetch(`${baseUrl}/v1internal:loadCodeAssist`, {
139
+ method: "POST",
140
+ headers: {
141
+ Authorization: `Bearer ${accessToken}`,
142
+ "Content-Type": "application/json",
143
+ ...GEMINI_CLI_HEADERS,
144
+ },
145
+ body: JSON.stringify({
146
+ ...(projectId ? { cloudaicompanionProject: projectId } : {}),
147
+ metadata: {
148
+ ideType: "IDE_UNSPECIFIED",
149
+ platform: "PLATFORM_UNSPECIFIED",
150
+ pluginType: "GEMINI",
151
+ },
152
+ }),
153
+ signal: params.signal,
154
+ });
155
+
156
+ if (!response.ok) {
157
+ const errorText = await response.text();
158
+ ctx.logger?.warn("Gemini CLI loadCodeAssist failed", {
159
+ status: response.status,
160
+ error: errorText,
161
+ });
162
+ return undefined;
163
+ }
164
+
165
+ return (await response.json()) as LoadCodeAssistResponse;
166
+ }
167
+
168
+ async function fetchQuota(
169
+ params: UsageFetchParams,
170
+ ctx: UsageFetchContext,
171
+ accessToken: string,
172
+ baseUrl: string,
173
+ projectId?: string,
174
+ ): Promise<RetrieveUserQuotaResponse | undefined> {
175
+ const response = await ctx.fetch(`${baseUrl}/v1internal:retrieveUserQuota`, {
176
+ method: "POST",
177
+ headers: {
178
+ Authorization: `Bearer ${accessToken}`,
179
+ "Content-Type": "application/json",
180
+ ...GEMINI_CLI_HEADERS,
181
+ },
182
+ body: JSON.stringify(projectId ? { project: projectId } : {}),
183
+ signal: params.signal,
184
+ });
185
+
186
+ if (!response.ok) {
187
+ const errorText = await response.text();
188
+ ctx.logger?.warn("Gemini CLI retrieveUserQuota failed", {
189
+ status: response.status,
190
+ error: errorText,
191
+ });
192
+ return undefined;
193
+ }
194
+
195
+ return (await response.json()) as RetrieveUserQuotaResponse;
196
+ }
197
+
198
+ export const googleGeminiCliUsageProvider: UsageProvider = {
199
+ id: "google-gemini-cli",
200
+ supports: ({ credential }) => credential.type === "oauth" && !!credential.accessToken,
201
+ async fetchUsage(params, ctx) {
202
+ const { credential } = params;
203
+ if (credential.type !== "oauth") {
204
+ return null;
205
+ }
206
+ const accessToken = await resolveAccessToken(params, ctx);
207
+ if (!accessToken) {
208
+ return null;
209
+ }
210
+
211
+ const now = ctx.now();
212
+ const baseUrl = (params.baseUrl?.trim() || DEFAULT_ENDPOINT).replace(/\/$/, "");
213
+ const cacheKey = `usage:${params.provider}:${credential.accountId ?? credential.email ?? "default"}:${baseUrl}:${
214
+ credential.projectId ?? "default"
215
+ }`;
216
+ const cached = await ctx.cache.get(cacheKey);
217
+ if (cached && cached.expiresAt > now) {
218
+ return cached.value;
219
+ }
220
+
221
+ const loadResponse = await loadCodeAssist(params, ctx, accessToken, baseUrl, credential.projectId);
222
+ const projectId = credential.projectId ?? getProjectId(loadResponse);
223
+ const quotaResponse = await fetchQuota(params, ctx, accessToken, baseUrl, projectId);
224
+ if (!quotaResponse) {
225
+ const entry = { value: null, expiresAt: now + CACHE_TTL_MS };
226
+ await ctx.cache.set(cacheKey, entry);
227
+ return null;
228
+ }
229
+
230
+ const limits: UsageLimit[] = [];
231
+ const buckets = quotaResponse.buckets ?? [];
232
+
233
+ buckets.forEach((bucket, index) => {
234
+ const modelId = bucket.modelId;
235
+ const window = parseWindow(bucket.resetTime, now);
236
+ const amount = buildAmount(bucket.remainingFraction);
237
+ const tier = modelId ? getModelTier(modelId) : undefined;
238
+ const label = modelId ? `Gemini ${modelId}` : "Gemini quota";
239
+ const id = `${modelId ?? "unknown"}:${window?.id ?? index}`;
240
+
241
+ limits.push({
242
+ id,
243
+ label,
244
+ scope: {
245
+ provider: params.provider,
246
+ accountId: credential.accountId,
247
+ projectId,
248
+ modelId,
249
+ tier,
250
+ windowId: window?.id,
251
+ },
252
+ window,
253
+ amount,
254
+ });
255
+ });
256
+
257
+ const report: UsageReport = {
258
+ provider: params.provider,
259
+ fetchedAt: now,
260
+ limits,
261
+ metadata: {
262
+ currentTierId: loadResponse?.currentTier?.id,
263
+ currentTierName: loadResponse?.currentTier?.name,
264
+ },
265
+ raw: quotaResponse,
266
+ };
267
+
268
+ await ctx.cache.set(cacheKey, { value: report, expiresAt: now + CACHE_TTL_MS });
269
+ return report;
270
+ },
271
+ };
@@ -365,6 +365,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
365
365
  const stream = new AssistantMessageEventStream();
366
366
 
367
367
  (async () => {
368
+ const startTime = Date.now();
369
+ let firstTokenTime: number | undefined;
370
+
368
371
  const output: AssistantMessage = {
369
372
  role: "assistant",
370
373
  content: [],
@@ -489,6 +492,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
489
492
  let started = false;
490
493
  const ensureStarted = () => {
491
494
  if (!started) {
495
+ if (!firstTokenTime) firstTokenTime = Date.now();
492
496
  stream.push({ type: "start", partial: output });
493
497
  started = true;
494
498
  }
@@ -802,6 +806,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
802
806
  throw new Error("An unknown error occurred");
803
807
  }
804
808
 
809
+ output.duration = Date.now() - startTime;
810
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
805
811
  stream.push({ type: "done", reason: output.stopReason, message: output });
806
812
  stream.end();
807
813
  } catch (error) {
@@ -812,6 +818,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
812
818
  }
813
819
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
814
820
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
821
+ output.duration = Date.now() - startTime;
822
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
815
823
  stream.push({ type: "error", reason: output.stopReason, error: output });
816
824
  stream.end();
817
825
  }
@@ -79,6 +79,8 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
79
79
  for (const msg of transformedMessages) {
80
80
  if (msg.role === "user") {
81
81
  if (typeof msg.content === "string") {
82
+ // Skip empty user messages
83
+ if (!msg.content || msg.content.trim() === "") continue;
82
84
  contents.push({
83
85
  role: "user",
84
86
  parts: [{ text: sanitizeSurrogates(msg.content) }],
@@ -96,7 +98,14 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
96
98
  };
97
99
  }
98
100
  });
99
- const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
101
+ // Filter out images if model doesn't support them, and empty text blocks
102
+ let filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
103
+ filteredParts = filteredParts.filter((p) => {
104
+ if (p.text !== undefined) {
105
+ return p.text.trim().length > 0;
106
+ }
107
+ return true; // Keep non-text parts (images)
108
+ });
100
109
  if (filteredParts.length === 0) continue;
101
110
  contents.push({
102
111
  role: "user",
@@ -62,6 +62,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
62
62
  const stream = new AssistantMessageEventStream();
63
63
 
64
64
  (async () => {
65
+ const startTime = Date.now();
66
+ let firstTokenTime: number | undefined;
67
+
65
68
  const output: AssistantMessage = {
66
69
  role: "assistant",
67
70
  content: [],
@@ -97,6 +100,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
97
100
  if (candidate?.content?.parts) {
98
101
  for (const part of candidate.content.parts) {
99
102
  if (part.text !== undefined) {
103
+ if (!firstTokenTime) firstTokenTime = Date.now();
100
104
  const isThinking = isThinkingPart(part);
101
105
  if (
102
106
  !currentBlock ||
@@ -258,6 +262,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
258
262
  throw new Error("An unknown error occurred");
259
263
  }
260
264
 
265
+ output.duration = Date.now() - startTime;
266
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
261
267
  stream.push({ type: "done", reason: output.stopReason, message: output });
262
268
  stream.end();
263
269
  } catch (error) {
@@ -269,6 +275,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
269
275
  }
270
276
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
271
277
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
278
+ output.duration = Date.now() - startTime;
279
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
272
280
  stream.push({ type: "error", reason: output.stopReason, error: output });
273
281
  stream.end();
274
282
  }
@@ -53,6 +53,9 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
53
53
  const stream = new AssistantMessageEventStream();
54
54
 
55
55
  (async () => {
56
+ const startTime = Date.now();
57
+ let firstTokenTime: number | undefined;
58
+
56
59
  const output: AssistantMessage = {
57
60
  role: "assistant",
58
61
  content: [],
@@ -88,6 +91,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
88
91
  for (const part of candidate.content.parts) {
89
92
  if (part.text !== undefined) {
90
93
  const isThinking = isThinkingPart(part);
94
+ if (!firstTokenTime) firstTokenTime = Date.now();
91
95
  if (
92
96
  !currentBlock ||
93
97
  (isThinking && currentBlock.type !== "thinking") ||
@@ -245,6 +249,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
245
249
  throw new Error("An unkown error ocurred");
246
250
  }
247
251
 
252
+ output.duration = Date.now() - startTime;
253
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
248
254
  stream.push({ type: "done", reason: output.stopReason, message: output });
249
255
  stream.end();
250
256
  } catch (error) {
@@ -256,6 +262,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
256
262
  }
257
263
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
258
264
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
265
+ output.duration = Date.now() - startTime;
266
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
259
267
  stream.push({ type: "error", reason: output.stopReason, error: output });
260
268
  stream.end();
261
269
  }