@oh-my-pi/pi-ai 6.9.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.ts +7 -0
- package/src/providers/amazon-bedrock.ts +42 -16
- package/src/providers/anthropic.ts +8 -0
- package/src/providers/cursor.ts +27 -4
- package/src/providers/google-gemini-cli-usage.ts +271 -0
- package/src/providers/google-gemini-cli.ts +8 -0
- package/src/providers/google-shared.ts +10 -1
- package/src/providers/google-vertex.ts +8 -0
- package/src/providers/google.ts +8 -0
- package/src/providers/openai-codex/request-transformer.ts +4 -0
- package/src/providers/openai-codex-responses.ts +18 -1
- package/src/providers/openai-completions.ts +8 -0
- package/src/providers/openai-responses.ts +18 -1
- package/src/types.ts +2 -0
- package/src/usage/claude.ts +355 -0
- package/src/usage/github-copilot.ts +479 -0
- package/src/usage/google-antigravity.ts +218 -0
- package/src/usage/openai-codex.ts +393 -0
- package/src/usage/zai.ts +292 -0
- package/src/usage.ts +133 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "7.0.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"test": "bun test"
|
|
18
18
|
},
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"@oh-my-pi/pi-utils": "
|
|
20
|
+
"@oh-my-pi/pi-utils": "7.0.0",
|
|
21
21
|
"@anthropic-ai/sdk": "0.71.2",
|
|
22
22
|
"@aws-sdk/client-bedrock-runtime": "^3.968.0",
|
|
23
23
|
"@bufbuild/protobuf": "^2.10.2",
|
package/src/index.ts
CHANGED
|
@@ -5,11 +5,18 @@ export * from "./providers/anthropic";
|
|
|
5
5
|
export * from "./providers/cursor";
|
|
6
6
|
export * from "./providers/google";
|
|
7
7
|
export * from "./providers/google-gemini-cli";
|
|
8
|
+
export * from "./providers/google-gemini-cli-usage";
|
|
8
9
|
export * from "./providers/google-vertex";
|
|
9
10
|
export * from "./providers/openai-completions";
|
|
10
11
|
export * from "./providers/openai-responses";
|
|
11
12
|
export * from "./stream";
|
|
12
13
|
export * from "./types";
|
|
14
|
+
export * from "./usage";
|
|
15
|
+
export * from "./usage/claude";
|
|
16
|
+
export * from "./usage/github-copilot";
|
|
17
|
+
export * from "./usage/google-antigravity";
|
|
18
|
+
export * from "./usage/openai-codex";
|
|
19
|
+
export * from "./usage/zai";
|
|
13
20
|
export * from "./utils/event-stream";
|
|
14
21
|
export * from "./utils/oauth/index";
|
|
15
22
|
export * from "./utils/overflow";
|
|
@@ -67,6 +67,9 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
67
67
|
const stream = new AssistantMessageEventStream();
|
|
68
68
|
|
|
69
69
|
(async () => {
|
|
70
|
+
const startTime = Date.now();
|
|
71
|
+
let firstTokenTime: number | undefined;
|
|
72
|
+
|
|
70
73
|
const output: AssistantMessage = {
|
|
71
74
|
role: "assistant",
|
|
72
75
|
content: [],
|
|
@@ -113,8 +116,10 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
113
116
|
}
|
|
114
117
|
stream.push({ type: "start", partial: output });
|
|
115
118
|
} else if (item.contentBlockStart) {
|
|
119
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
116
120
|
handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
|
|
117
121
|
} else if (item.contentBlockDelta) {
|
|
122
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
118
123
|
handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
|
|
119
124
|
} else if (item.contentBlockStop) {
|
|
120
125
|
handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
|
|
@@ -143,6 +148,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
143
148
|
throw new Error("An unknown error occurred");
|
|
144
149
|
}
|
|
145
150
|
|
|
151
|
+
output.duration = Date.now() - startTime;
|
|
152
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
146
153
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
147
154
|
stream.end();
|
|
148
155
|
} catch (error) {
|
|
@@ -152,6 +159,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
152
159
|
}
|
|
153
160
|
output.stopReason = options.signal?.aborted ? "aborted" : "error";
|
|
154
161
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
162
|
+
output.duration = Date.now() - startTime;
|
|
163
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
155
164
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
156
165
|
stream.end();
|
|
157
166
|
}
|
|
@@ -333,22 +342,39 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
|
|
|
333
342
|
|
|
334
343
|
switch (m.role) {
|
|
335
344
|
case "user":
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
content
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
345
|
+
if (typeof m.content === "string") {
|
|
346
|
+
// Skip empty user messages
|
|
347
|
+
if (!m.content || m.content.trim() === "") continue;
|
|
348
|
+
result.push({
|
|
349
|
+
role: ConversationRole.USER,
|
|
350
|
+
content: [{ text: sanitizeSurrogates(m.content) }],
|
|
351
|
+
});
|
|
352
|
+
} else {
|
|
353
|
+
const contentBlocks = m.content
|
|
354
|
+
.map((c) => {
|
|
355
|
+
switch (c.type) {
|
|
356
|
+
case "text":
|
|
357
|
+
return { text: sanitizeSurrogates(c.text) };
|
|
358
|
+
case "image":
|
|
359
|
+
return { image: createImageBlock(c.mimeType, c.data) };
|
|
360
|
+
default:
|
|
361
|
+
throw new Error("Unknown user content type");
|
|
362
|
+
}
|
|
363
|
+
})
|
|
364
|
+
.filter((block) => {
|
|
365
|
+
// Filter out empty text blocks
|
|
366
|
+
if ("text" in block && block.text) {
|
|
367
|
+
return block.text.trim().length > 0;
|
|
368
|
+
}
|
|
369
|
+
return true; // Keep non-text blocks (images)
|
|
370
|
+
});
|
|
371
|
+
// Skip message if all blocks filtered out
|
|
372
|
+
if (contentBlocks.length === 0) continue;
|
|
373
|
+
result.push({
|
|
374
|
+
role: ConversationRole.USER,
|
|
375
|
+
content: contentBlocks,
|
|
376
|
+
});
|
|
377
|
+
}
|
|
352
378
|
break;
|
|
353
379
|
case "assistant": {
|
|
354
380
|
// Skip assistant messages with empty content (e.g., from aborted requests)
|
|
@@ -140,6 +140,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
140
140
|
const stream = new AssistantMessageEventStream();
|
|
141
141
|
|
|
142
142
|
(async () => {
|
|
143
|
+
const startTime = Date.now();
|
|
144
|
+
let firstTokenTime: number | undefined;
|
|
145
|
+
|
|
143
146
|
const output: AssistantMessage = {
|
|
144
147
|
role: "assistant",
|
|
145
148
|
content: [],
|
|
@@ -183,6 +186,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
183
186
|
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
184
187
|
calculateCost(model, output.usage);
|
|
185
188
|
} else if (event.type === "content_block_start") {
|
|
189
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
186
190
|
if (event.content_block.type === "text") {
|
|
187
191
|
const block: Block = {
|
|
188
192
|
type: "text",
|
|
@@ -321,12 +325,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
321
325
|
throw new Error("An unkown error ocurred");
|
|
322
326
|
}
|
|
323
327
|
|
|
328
|
+
output.duration = Date.now() - startTime;
|
|
329
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
324
330
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
325
331
|
stream.end();
|
|
326
332
|
} catch (error) {
|
|
327
333
|
for (const block of output.content) delete (block as any).index;
|
|
328
334
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
329
335
|
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
336
|
+
output.duration = Date.now() - startTime;
|
|
337
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
330
338
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
331
339
|
stream.end();
|
|
332
340
|
}
|
package/src/providers/cursor.ts
CHANGED
|
@@ -294,6 +294,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
|
|
|
294
294
|
const stream = new AssistantMessageEventStream();
|
|
295
295
|
|
|
296
296
|
(async () => {
|
|
297
|
+
const startTime = Date.now();
|
|
298
|
+
let firstTokenTime: number | undefined;
|
|
299
|
+
|
|
297
300
|
const output: AssistantMessage = {
|
|
298
301
|
role: "assistant",
|
|
299
302
|
content: [],
|
|
@@ -369,6 +372,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
|
|
|
369
372
|
get currentToolCall() {
|
|
370
373
|
return currentToolCall;
|
|
371
374
|
},
|
|
375
|
+
get firstTokenTime() {
|
|
376
|
+
return firstTokenTime;
|
|
377
|
+
},
|
|
372
378
|
setTextBlock: (b) => {
|
|
373
379
|
currentTextBlock = b;
|
|
374
380
|
},
|
|
@@ -378,6 +384,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
|
|
|
378
384
|
setToolCall: (t) => {
|
|
379
385
|
currentToolCall = t;
|
|
380
386
|
},
|
|
387
|
+
setFirstTokenTime: () => {
|
|
388
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
389
|
+
},
|
|
381
390
|
};
|
|
382
391
|
|
|
383
392
|
const onConversationCheckpoint = (checkpoint: ConversationStateStructure) => {
|
|
@@ -502,6 +511,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
|
|
|
502
511
|
|
|
503
512
|
calculateCost(model, output.usage);
|
|
504
513
|
|
|
514
|
+
output.duration = Date.now() - startTime;
|
|
515
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
505
516
|
stream.push({
|
|
506
517
|
type: "done",
|
|
507
518
|
reason: output.stopReason as "stop" | "length" | "toolUse",
|
|
@@ -511,6 +522,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
|
|
|
511
522
|
} catch (error) {
|
|
512
523
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
513
524
|
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
525
|
+
output.duration = Date.now() - startTime;
|
|
526
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
514
527
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
515
528
|
stream.end();
|
|
516
529
|
} finally {
|
|
@@ -532,9 +545,11 @@ interface BlockState {
|
|
|
532
545
|
currentTextBlock: (TextContent & { index: number }) | null;
|
|
533
546
|
currentThinkingBlock: (ThinkingContent & { index: number }) | null;
|
|
534
547
|
currentToolCall: ToolCallState | null;
|
|
548
|
+
firstTokenTime: number | undefined;
|
|
535
549
|
setTextBlock: (b: (TextContent & { index: number }) | null) => void;
|
|
536
550
|
setThinkingBlock: (b: (ThinkingContent & { index: number }) | null) => void;
|
|
537
551
|
setToolCall: (t: ToolCallState | null) => void;
|
|
552
|
+
setFirstTokenTime: () => void;
|
|
538
553
|
}
|
|
539
554
|
|
|
540
555
|
interface UsageState {
|
|
@@ -1645,6 +1660,7 @@ function processInteractionUpdate(
|
|
|
1645
1660
|
log("interactionUpdate", updateCase, update.message?.value);
|
|
1646
1661
|
|
|
1647
1662
|
if (updateCase === "textDelta") {
|
|
1663
|
+
state.setFirstTokenTime();
|
|
1648
1664
|
const delta = update.message.value.text || "";
|
|
1649
1665
|
if (!state.currentTextBlock) {
|
|
1650
1666
|
const block: TextContent & { index: number } = {
|
|
@@ -1660,6 +1676,7 @@ function processInteractionUpdate(
|
|
|
1660
1676
|
const idx = output.content.indexOf(state.currentTextBlock!);
|
|
1661
1677
|
stream.push({ type: "text_delta", contentIndex: idx, delta, partial: output });
|
|
1662
1678
|
} else if (updateCase === "thinkingDelta") {
|
|
1679
|
+
state.setFirstTokenTime();
|
|
1663
1680
|
const delta = update.message.value.text || "";
|
|
1664
1681
|
if (!state.currentThinkingBlock) {
|
|
1665
1682
|
const block: ThinkingContent & { index: number } = {
|
|
@@ -1821,11 +1838,12 @@ function buildMcpToolDefinitions(tools: Tool[] | undefined): McpToolDefinition[]
|
|
|
1821
1838
|
function extractUserMessageText(msg: Message): string {
|
|
1822
1839
|
if (msg.role !== "user") return "";
|
|
1823
1840
|
const content = msg.content;
|
|
1824
|
-
if (typeof content === "string") return content;
|
|
1825
|
-
|
|
1841
|
+
if (typeof content === "string") return content.trim();
|
|
1842
|
+
const text = content
|
|
1826
1843
|
.filter((c): c is TextContent => c.type === "text")
|
|
1827
1844
|
.map((c) => c.text)
|
|
1828
1845
|
.join("\n");
|
|
1846
|
+
return text.trim();
|
|
1829
1847
|
}
|
|
1830
1848
|
|
|
1831
1849
|
/**
|
|
@@ -1874,7 +1892,7 @@ function buildConversationTurns(messages: Message[]): Uint8Array[] {
|
|
|
1874
1892
|
|
|
1875
1893
|
// Create and serialize user message
|
|
1876
1894
|
const userText = extractUserMessageText(msg);
|
|
1877
|
-
if (!userText) {
|
|
1895
|
+
if (!userText || userText.length === 0) {
|
|
1878
1896
|
i++;
|
|
1879
1897
|
continue;
|
|
1880
1898
|
}
|
|
@@ -1965,10 +1983,15 @@ function buildGrpcRequest(
|
|
|
1965
1983
|
const userText =
|
|
1966
1984
|
lastMessage?.role === "user"
|
|
1967
1985
|
? typeof lastMessage.content === "string"
|
|
1968
|
-
? lastMessage.content
|
|
1986
|
+
? lastMessage.content.trim()
|
|
1969
1987
|
: extractText(lastMessage.content)
|
|
1970
1988
|
: "";
|
|
1971
1989
|
|
|
1990
|
+
// Validate that we have non-empty user text for the action
|
|
1991
|
+
if (!userText || userText.trim().length === 0) {
|
|
1992
|
+
throw new Error("Cannot send empty user message to Cursor API");
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1972
1995
|
const userMessage = create(UserMessageSchema, {
|
|
1973
1996
|
text: userText,
|
|
1974
1997
|
messageId: crypto.randomUUID(),
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
UsageAmount,
|
|
3
|
+
UsageFetchContext,
|
|
4
|
+
UsageFetchParams,
|
|
5
|
+
UsageLimit,
|
|
6
|
+
UsageProvider,
|
|
7
|
+
UsageReport,
|
|
8
|
+
UsageWindow,
|
|
9
|
+
} from "../usage";
|
|
10
|
+
import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
|
|
11
|
+
|
|
12
|
+
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
|
13
|
+
const CACHE_TTL_MS = 60_000;
|
|
14
|
+
|
|
15
|
+
const GEMINI_CLI_HEADERS = {
|
|
16
|
+
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
|
17
|
+
"X-Goog-Api-Client": "gl-node/22.17.0",
|
|
18
|
+
"Client-Metadata": JSON.stringify({
|
|
19
|
+
ideType: "IDE_UNSPECIFIED",
|
|
20
|
+
platform: "PLATFORM_UNSPECIFIED",
|
|
21
|
+
pluginType: "GEMINI",
|
|
22
|
+
}),
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const GEMINI_TIER_MAP: Array<{ tier: string; models: string[] }> = [
|
|
26
|
+
{
|
|
27
|
+
tier: "3-Flash",
|
|
28
|
+
models: ["gemini-3-flash-preview", "gemini-3-flash"],
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
tier: "Flash",
|
|
32
|
+
models: ["gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash", "gemini-1.5-flash"],
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
tier: "Pro",
|
|
36
|
+
models: ["gemini-2.5-pro", "gemini-3-pro-preview", "gemini-3-pro", "gemini-1.5-pro"],
|
|
37
|
+
},
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
interface LoadCodeAssistResponse {
|
|
41
|
+
cloudaicompanionProject?: string | { id?: string };
|
|
42
|
+
currentTier?: { id?: string; name?: string };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface RetrieveUserQuotaResponse {
|
|
46
|
+
buckets?: Array<{
|
|
47
|
+
modelId?: string;
|
|
48
|
+
remainingFraction?: number;
|
|
49
|
+
resetTime?: string;
|
|
50
|
+
}>;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function getProjectId(payload: LoadCodeAssistResponse | undefined): string | undefined {
|
|
54
|
+
if (!payload) return undefined;
|
|
55
|
+
if (typeof payload.cloudaicompanionProject === "string") {
|
|
56
|
+
return payload.cloudaicompanionProject;
|
|
57
|
+
}
|
|
58
|
+
if (payload.cloudaicompanionProject && typeof payload.cloudaicompanionProject === "object") {
|
|
59
|
+
return payload.cloudaicompanionProject.id;
|
|
60
|
+
}
|
|
61
|
+
return undefined;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function getModelTier(modelId: string): string | undefined {
|
|
65
|
+
for (const entry of GEMINI_TIER_MAP) {
|
|
66
|
+
if (entry.models.includes(modelId)) {
|
|
67
|
+
return entry.tier;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
const normalized = modelId.toLowerCase();
|
|
71
|
+
if (normalized.includes("flash")) return "Flash";
|
|
72
|
+
if (normalized.includes("pro")) return "Pro";
|
|
73
|
+
return undefined;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function parseWindow(resetTime: string | undefined, now: number): UsageWindow {
|
|
77
|
+
if (!resetTime) {
|
|
78
|
+
return {
|
|
79
|
+
id: "quota",
|
|
80
|
+
label: "Quota window",
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
const resetsAt = Date.parse(resetTime);
|
|
84
|
+
if (Number.isNaN(resetsAt)) {
|
|
85
|
+
return {
|
|
86
|
+
id: "quota",
|
|
87
|
+
label: "Quota window",
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
return {
|
|
91
|
+
id: `reset-${resetsAt}`,
|
|
92
|
+
label: "Quota window",
|
|
93
|
+
resetsAt,
|
|
94
|
+
resetInMs: Math.max(0, resetsAt - now),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function buildAmount(remainingFraction: number | undefined): UsageAmount {
|
|
99
|
+
if (remainingFraction === undefined || !Number.isFinite(remainingFraction)) {
|
|
100
|
+
return { unit: "percent" };
|
|
101
|
+
}
|
|
102
|
+
const remaining = Math.min(Math.max(remainingFraction, 0), 1);
|
|
103
|
+
const used = Math.min(Math.max(1 - remaining, 0), 1);
|
|
104
|
+
return {
|
|
105
|
+
unit: "percent",
|
|
106
|
+
used: Math.round(used * 1000) / 10,
|
|
107
|
+
remaining: Math.round(remaining * 1000) / 10,
|
|
108
|
+
limit: 100,
|
|
109
|
+
usedFraction: used,
|
|
110
|
+
remainingFraction: remaining,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async function resolveAccessToken(params: UsageFetchParams, ctx: UsageFetchContext): Promise<string | undefined> {
|
|
115
|
+
const { credential } = params;
|
|
116
|
+
if (credential.type !== "oauth") return undefined;
|
|
117
|
+
const now = ctx.now();
|
|
118
|
+
if (credential.accessToken && (!credential.expiresAt || credential.expiresAt > now + 60_000)) {
|
|
119
|
+
return credential.accessToken;
|
|
120
|
+
}
|
|
121
|
+
if (!credential.refreshToken || !credential.projectId) return credential.accessToken;
|
|
122
|
+
try {
|
|
123
|
+
const refreshed = await refreshGoogleCloudToken(credential.refreshToken, credential.projectId);
|
|
124
|
+
return refreshed.access;
|
|
125
|
+
} catch (error) {
|
|
126
|
+
ctx.logger?.warn("Gemini CLI token refresh failed", { error: String(error) });
|
|
127
|
+
return credential.accessToken;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async function loadCodeAssist(
|
|
132
|
+
params: UsageFetchParams,
|
|
133
|
+
ctx: UsageFetchContext,
|
|
134
|
+
accessToken: string,
|
|
135
|
+
baseUrl: string,
|
|
136
|
+
projectId?: string,
|
|
137
|
+
): Promise<LoadCodeAssistResponse | undefined> {
|
|
138
|
+
const response = await ctx.fetch(`${baseUrl}/v1internal:loadCodeAssist`, {
|
|
139
|
+
method: "POST",
|
|
140
|
+
headers: {
|
|
141
|
+
Authorization: `Bearer ${accessToken}`,
|
|
142
|
+
"Content-Type": "application/json",
|
|
143
|
+
...GEMINI_CLI_HEADERS,
|
|
144
|
+
},
|
|
145
|
+
body: JSON.stringify({
|
|
146
|
+
...(projectId ? { cloudaicompanionProject: projectId } : {}),
|
|
147
|
+
metadata: {
|
|
148
|
+
ideType: "IDE_UNSPECIFIED",
|
|
149
|
+
platform: "PLATFORM_UNSPECIFIED",
|
|
150
|
+
pluginType: "GEMINI",
|
|
151
|
+
},
|
|
152
|
+
}),
|
|
153
|
+
signal: params.signal,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
if (!response.ok) {
|
|
157
|
+
const errorText = await response.text();
|
|
158
|
+
ctx.logger?.warn("Gemini CLI loadCodeAssist failed", {
|
|
159
|
+
status: response.status,
|
|
160
|
+
error: errorText,
|
|
161
|
+
});
|
|
162
|
+
return undefined;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return (await response.json()) as LoadCodeAssistResponse;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function fetchQuota(
|
|
169
|
+
params: UsageFetchParams,
|
|
170
|
+
ctx: UsageFetchContext,
|
|
171
|
+
accessToken: string,
|
|
172
|
+
baseUrl: string,
|
|
173
|
+
projectId?: string,
|
|
174
|
+
): Promise<RetrieveUserQuotaResponse | undefined> {
|
|
175
|
+
const response = await ctx.fetch(`${baseUrl}/v1internal:retrieveUserQuota`, {
|
|
176
|
+
method: "POST",
|
|
177
|
+
headers: {
|
|
178
|
+
Authorization: `Bearer ${accessToken}`,
|
|
179
|
+
"Content-Type": "application/json",
|
|
180
|
+
...GEMINI_CLI_HEADERS,
|
|
181
|
+
},
|
|
182
|
+
body: JSON.stringify(projectId ? { project: projectId } : {}),
|
|
183
|
+
signal: params.signal,
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
if (!response.ok) {
|
|
187
|
+
const errorText = await response.text();
|
|
188
|
+
ctx.logger?.warn("Gemini CLI retrieveUserQuota failed", {
|
|
189
|
+
status: response.status,
|
|
190
|
+
error: errorText,
|
|
191
|
+
});
|
|
192
|
+
return undefined;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return (await response.json()) as RetrieveUserQuotaResponse;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export const googleGeminiCliUsageProvider: UsageProvider = {
|
|
199
|
+
id: "google-gemini-cli",
|
|
200
|
+
supports: ({ credential }) => credential.type === "oauth" && !!credential.accessToken,
|
|
201
|
+
async fetchUsage(params, ctx) {
|
|
202
|
+
const { credential } = params;
|
|
203
|
+
if (credential.type !== "oauth") {
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
const accessToken = await resolveAccessToken(params, ctx);
|
|
207
|
+
if (!accessToken) {
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const now = ctx.now();
|
|
212
|
+
const baseUrl = (params.baseUrl?.trim() || DEFAULT_ENDPOINT).replace(/\/$/, "");
|
|
213
|
+
const cacheKey = `usage:${params.provider}:${credential.accountId ?? credential.email ?? "default"}:${baseUrl}:${
|
|
214
|
+
credential.projectId ?? "default"
|
|
215
|
+
}`;
|
|
216
|
+
const cached = await ctx.cache.get(cacheKey);
|
|
217
|
+
if (cached && cached.expiresAt > now) {
|
|
218
|
+
return cached.value;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const loadResponse = await loadCodeAssist(params, ctx, accessToken, baseUrl, credential.projectId);
|
|
222
|
+
const projectId = credential.projectId ?? getProjectId(loadResponse);
|
|
223
|
+
const quotaResponse = await fetchQuota(params, ctx, accessToken, baseUrl, projectId);
|
|
224
|
+
if (!quotaResponse) {
|
|
225
|
+
const entry = { value: null, expiresAt: now + CACHE_TTL_MS };
|
|
226
|
+
await ctx.cache.set(cacheKey, entry);
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const limits: UsageLimit[] = [];
|
|
231
|
+
const buckets = quotaResponse.buckets ?? [];
|
|
232
|
+
|
|
233
|
+
buckets.forEach((bucket, index) => {
|
|
234
|
+
const modelId = bucket.modelId;
|
|
235
|
+
const window = parseWindow(bucket.resetTime, now);
|
|
236
|
+
const amount = buildAmount(bucket.remainingFraction);
|
|
237
|
+
const tier = modelId ? getModelTier(modelId) : undefined;
|
|
238
|
+
const label = modelId ? `Gemini ${modelId}` : "Gemini quota";
|
|
239
|
+
const id = `${modelId ?? "unknown"}:${window?.id ?? index}`;
|
|
240
|
+
|
|
241
|
+
limits.push({
|
|
242
|
+
id,
|
|
243
|
+
label,
|
|
244
|
+
scope: {
|
|
245
|
+
provider: params.provider,
|
|
246
|
+
accountId: credential.accountId,
|
|
247
|
+
projectId,
|
|
248
|
+
modelId,
|
|
249
|
+
tier,
|
|
250
|
+
windowId: window?.id,
|
|
251
|
+
},
|
|
252
|
+
window,
|
|
253
|
+
amount,
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
const report: UsageReport = {
|
|
258
|
+
provider: params.provider,
|
|
259
|
+
fetchedAt: now,
|
|
260
|
+
limits,
|
|
261
|
+
metadata: {
|
|
262
|
+
currentTierId: loadResponse?.currentTier?.id,
|
|
263
|
+
currentTierName: loadResponse?.currentTier?.name,
|
|
264
|
+
},
|
|
265
|
+
raw: quotaResponse,
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
await ctx.cache.set(cacheKey, { value: report, expiresAt: now + CACHE_TTL_MS });
|
|
269
|
+
return report;
|
|
270
|
+
},
|
|
271
|
+
};
|
|
@@ -365,6 +365,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
365
365
|
const stream = new AssistantMessageEventStream();
|
|
366
366
|
|
|
367
367
|
(async () => {
|
|
368
|
+
const startTime = Date.now();
|
|
369
|
+
let firstTokenTime: number | undefined;
|
|
370
|
+
|
|
368
371
|
const output: AssistantMessage = {
|
|
369
372
|
role: "assistant",
|
|
370
373
|
content: [],
|
|
@@ -489,6 +492,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
489
492
|
let started = false;
|
|
490
493
|
const ensureStarted = () => {
|
|
491
494
|
if (!started) {
|
|
495
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
492
496
|
stream.push({ type: "start", partial: output });
|
|
493
497
|
started = true;
|
|
494
498
|
}
|
|
@@ -802,6 +806,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
802
806
|
throw new Error("An unknown error occurred");
|
|
803
807
|
}
|
|
804
808
|
|
|
809
|
+
output.duration = Date.now() - startTime;
|
|
810
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
805
811
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
806
812
|
stream.end();
|
|
807
813
|
} catch (error) {
|
|
@@ -812,6 +818,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
812
818
|
}
|
|
813
819
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
814
820
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
821
|
+
output.duration = Date.now() - startTime;
|
|
822
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
815
823
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
816
824
|
stream.end();
|
|
817
825
|
}
|
|
@@ -79,6 +79,8 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
|
|
79
79
|
for (const msg of transformedMessages) {
|
|
80
80
|
if (msg.role === "user") {
|
|
81
81
|
if (typeof msg.content === "string") {
|
|
82
|
+
// Skip empty user messages
|
|
83
|
+
if (!msg.content || msg.content.trim() === "") continue;
|
|
82
84
|
contents.push({
|
|
83
85
|
role: "user",
|
|
84
86
|
parts: [{ text: sanitizeSurrogates(msg.content) }],
|
|
@@ -96,7 +98,14 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
|
|
96
98
|
};
|
|
97
99
|
}
|
|
98
100
|
});
|
|
99
|
-
|
|
101
|
+
// Filter out images if model doesn't support them, and empty text blocks
|
|
102
|
+
let filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
|
|
103
|
+
filteredParts = filteredParts.filter((p) => {
|
|
104
|
+
if (p.text !== undefined) {
|
|
105
|
+
return p.text.trim().length > 0;
|
|
106
|
+
}
|
|
107
|
+
return true; // Keep non-text parts (images)
|
|
108
|
+
});
|
|
100
109
|
if (filteredParts.length === 0) continue;
|
|
101
110
|
contents.push({
|
|
102
111
|
role: "user",
|
|
@@ -62,6 +62,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
62
62
|
const stream = new AssistantMessageEventStream();
|
|
63
63
|
|
|
64
64
|
(async () => {
|
|
65
|
+
const startTime = Date.now();
|
|
66
|
+
let firstTokenTime: number | undefined;
|
|
67
|
+
|
|
65
68
|
const output: AssistantMessage = {
|
|
66
69
|
role: "assistant",
|
|
67
70
|
content: [],
|
|
@@ -97,6 +100,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
97
100
|
if (candidate?.content?.parts) {
|
|
98
101
|
for (const part of candidate.content.parts) {
|
|
99
102
|
if (part.text !== undefined) {
|
|
103
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
100
104
|
const isThinking = isThinkingPart(part);
|
|
101
105
|
if (
|
|
102
106
|
!currentBlock ||
|
|
@@ -258,6 +262,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
258
262
|
throw new Error("An unknown error occurred");
|
|
259
263
|
}
|
|
260
264
|
|
|
265
|
+
output.duration = Date.now() - startTime;
|
|
266
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
261
267
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
262
268
|
stream.end();
|
|
263
269
|
} catch (error) {
|
|
@@ -269,6 +275,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
269
275
|
}
|
|
270
276
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
271
277
|
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
278
|
+
output.duration = Date.now() - startTime;
|
|
279
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
272
280
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
273
281
|
stream.end();
|
|
274
282
|
}
|
package/src/providers/google.ts
CHANGED
|
@@ -53,6 +53,9 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
53
53
|
const stream = new AssistantMessageEventStream();
|
|
54
54
|
|
|
55
55
|
(async () => {
|
|
56
|
+
const startTime = Date.now();
|
|
57
|
+
let firstTokenTime: number | undefined;
|
|
58
|
+
|
|
56
59
|
const output: AssistantMessage = {
|
|
57
60
|
role: "assistant",
|
|
58
61
|
content: [],
|
|
@@ -88,6 +91,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
88
91
|
for (const part of candidate.content.parts) {
|
|
89
92
|
if (part.text !== undefined) {
|
|
90
93
|
const isThinking = isThinkingPart(part);
|
|
94
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
91
95
|
if (
|
|
92
96
|
!currentBlock ||
|
|
93
97
|
(isThinking && currentBlock.type !== "thinking") ||
|
|
@@ -245,6 +249,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
245
249
|
throw new Error("An unkown error ocurred");
|
|
246
250
|
}
|
|
247
251
|
|
|
252
|
+
output.duration = Date.now() - startTime;
|
|
253
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
248
254
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
249
255
|
stream.end();
|
|
250
256
|
} catch (error) {
|
|
@@ -256,6 +262,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
256
262
|
}
|
|
257
263
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
258
264
|
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
265
|
+
output.duration = Date.now() - startTime;
|
|
266
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
259
267
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
260
268
|
stream.end();
|
|
261
269
|
}
|